1//===-- HexagonISelLoweringHVX.cpp --- Lowering HVX operations ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "HexagonISelLowering.h"
10#include "HexagonRegisterInfo.h"
11#include "HexagonSubtarget.h"
12#include "llvm/ADT/SetVector.h"
13#include "llvm/ADT/SmallVector.h"
14#include "llvm/Analysis/MemoryLocation.h"
15#include "llvm/CodeGen/MachineBasicBlock.h"
16#include "llvm/CodeGen/MachineFunction.h"
17#include "llvm/CodeGen/MachineInstr.h"
18#include "llvm/CodeGen/MachineOperand.h"
19#include "llvm/CodeGen/MachineRegisterInfo.h"
20#include "llvm/CodeGen/TargetInstrInfo.h"
21#include "llvm/IR/IntrinsicsHexagon.h"
22#include "llvm/Support/CommandLine.h"
23
24#include <algorithm>
25#include <string>
26#include <utility>
27
28using namespace llvm;
29
30static cl::opt<unsigned> HvxWidenThreshold("hexagon-hvx-widen",
31 cl::Hidden, cl::init(Val: 16),
32 cl::desc("Lower threshold (in bytes) for widening to HVX vectors"));
33
34static cl::opt<bool>
35 EnableFpFastConvert("hexagon-fp-fast-convert", cl::Hidden, cl::init(Val: false),
36 cl::desc("Enable FP fast conversion routine."));
37
38static const MVT LegalV64[] = { MVT::v64i8, MVT::v32i16, MVT::v16i32 };
39static const MVT LegalW64[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
40static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 };
41static const MVT LegalW128[] = { MVT::v256i8, MVT::v128i16, MVT::v64i32 };
42
43static std::tuple<unsigned, unsigned, unsigned> getIEEEProperties(MVT Ty) {
44 // For a float scalar type, return (exp-bits, exp-bias, fraction-bits)
45 MVT ElemTy = Ty.getScalarType();
46 switch (ElemTy.SimpleTy) {
47 case MVT::f16:
48 return std::make_tuple(args: 5, args: 15, args: 10);
49 case MVT::f32:
50 return std::make_tuple(args: 8, args: 127, args: 23);
51 case MVT::f64:
52 return std::make_tuple(args: 11, args: 1023, args: 52);
53 default:
54 break;
55 }
56 llvm_unreachable(("Unexpected type: " + EVT(ElemTy).getEVTString()).c_str());
57}
58
59void
60HexagonTargetLowering::initializeHVXLowering() {
61 if (Subtarget.useHVX64BOps()) {
62 addRegisterClass(VT: MVT::v64i8, RC: &Hexagon::HvxVRRegClass);
63 addRegisterClass(VT: MVT::v32i16, RC: &Hexagon::HvxVRRegClass);
64 addRegisterClass(VT: MVT::v16i32, RC: &Hexagon::HvxVRRegClass);
65 addRegisterClass(VT: MVT::v128i8, RC: &Hexagon::HvxWRRegClass);
66 addRegisterClass(VT: MVT::v64i16, RC: &Hexagon::HvxWRRegClass);
67 addRegisterClass(VT: MVT::v32i32, RC: &Hexagon::HvxWRRegClass);
68 // These "short" boolean vector types should be legal because
69 // they will appear as results of vector compares. If they were
70 // not legal, type legalization would try to make them legal
71 // and that would require using operations that do not use or
72 // produce such types. That, in turn, would imply using custom
73 // nodes, which would be unoptimizable by the DAG combiner.
74 // The idea is to rely on target-independent operations as much
75 // as possible.
76 addRegisterClass(VT: MVT::v16i1, RC: &Hexagon::HvxQRRegClass);
77 addRegisterClass(VT: MVT::v32i1, RC: &Hexagon::HvxQRRegClass);
78 addRegisterClass(VT: MVT::v64i1, RC: &Hexagon::HvxQRRegClass);
79 } else if (Subtarget.useHVX128BOps()) {
80 addRegisterClass(VT: MVT::v128i8, RC: &Hexagon::HvxVRRegClass);
81 addRegisterClass(VT: MVT::v64i16, RC: &Hexagon::HvxVRRegClass);
82 addRegisterClass(VT: MVT::v32i32, RC: &Hexagon::HvxVRRegClass);
83 addRegisterClass(VT: MVT::v256i8, RC: &Hexagon::HvxWRRegClass);
84 addRegisterClass(VT: MVT::v128i16, RC: &Hexagon::HvxWRRegClass);
85 addRegisterClass(VT: MVT::v64i32, RC: &Hexagon::HvxWRRegClass);
86 addRegisterClass(VT: MVT::v32i1, RC: &Hexagon::HvxQRRegClass);
87 addRegisterClass(VT: MVT::v64i1, RC: &Hexagon::HvxQRRegClass);
88 addRegisterClass(VT: MVT::v128i1, RC: &Hexagon::HvxQRRegClass);
89 if (Subtarget.useHVXV68Ops() && Subtarget.useHVXFloatingPoint()) {
90 addRegisterClass(VT: MVT::v32f32, RC: &Hexagon::HvxVRRegClass);
91 addRegisterClass(VT: MVT::v64f16, RC: &Hexagon::HvxVRRegClass);
92 addRegisterClass(VT: MVT::v64f32, RC: &Hexagon::HvxWRRegClass);
93 addRegisterClass(VT: MVT::v128f16, RC: &Hexagon::HvxWRRegClass);
94 }
95 if (Subtarget.useHVXV81Ops()) {
96 addRegisterClass(VT: MVT::v64bf16, RC: &Hexagon::HvxVRRegClass);
97 addRegisterClass(VT: MVT::v128bf16, RC: &Hexagon::HvxWRRegClass);
98 }
99 }
100
101 // Set up operation actions.
102
103 bool Use64b = Subtarget.useHVX64BOps();
104 ArrayRef<MVT> LegalV = Use64b ? LegalV64 : LegalV128;
105 ArrayRef<MVT> LegalW = Use64b ? LegalW64 : LegalW128;
106 MVT ByteV = Use64b ? MVT::v64i8 : MVT::v128i8;
107 MVT WordV = Use64b ? MVT::v16i32 : MVT::v32i32;
108 MVT ByteW = Use64b ? MVT::v128i8 : MVT::v256i8;
109
110 auto setPromoteTo = [this] (unsigned Opc, MVT FromTy, MVT ToTy) {
111 setOperationAction(Op: Opc, VT: FromTy, Action: Promote);
112 AddPromotedToType(Opc, OrigVT: FromTy, DestVT: ToTy);
113 };
114
115 // Handle bitcasts of vector predicates to scalars (e.g. v32i1 to i32).
116 // Note: v16i1 -> i16 is handled in type legalization instead of op
117 // legalization.
118 setOperationAction(Op: ISD::BITCAST, VT: MVT::i16, Action: Custom);
119 setOperationAction(Op: ISD::BITCAST, VT: MVT::i32, Action: Custom);
120 setOperationAction(Op: ISD::BITCAST, VT: MVT::i64, Action: Custom);
121 setOperationAction(Op: ISD::BITCAST, VT: MVT::v16i1, Action: Custom);
122 setOperationAction(Op: ISD::BITCAST, VT: MVT::v128i1, Action: Custom);
123 setOperationAction(Op: ISD::BITCAST, VT: MVT::i128, Action: Custom);
124 setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT: ByteV, Action: Legal);
125 setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT: ByteW, Action: Legal);
126 setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::Other, Action: Custom);
127
128 if (Subtarget.useHVX128BOps()) {
129 setOperationAction(Op: ISD::BITCAST, VT: MVT::v32i1, Action: Custom);
130 setOperationAction(Op: ISD::BITCAST, VT: MVT::v64i1, Action: Custom);
131 }
132 if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() &&
133 Subtarget.useHVXFloatingPoint()) {
134
135 static const MVT FloatV[] = { MVT::v64f16, MVT::v32f32 };
136 static const MVT FloatW[] = { MVT::v128f16, MVT::v64f32 };
137
138 for (MVT T : FloatV) {
139 setOperationAction(Op: ISD::FADD, VT: T, Action: Legal);
140 setOperationAction(Op: ISD::FSUB, VT: T, Action: Legal);
141 setOperationAction(Op: ISD::FMUL, VT: T, Action: Legal);
142 setOperationAction(Op: ISD::FMINIMUMNUM, VT: T, Action: Legal);
143 setOperationAction(Op: ISD::FMAXIMUMNUM, VT: T, Action: Legal);
144
145 setOperationAction(Op: ISD::INSERT_SUBVECTOR, VT: T, Action: Custom);
146 setOperationAction(Op: ISD::EXTRACT_SUBVECTOR, VT: T, Action: Custom);
147
148 setOperationAction(Op: ISD::SPLAT_VECTOR, VT: T, Action: Legal);
149 setOperationAction(Op: ISD::SPLAT_VECTOR, VT: T, Action: Legal);
150
151 setOperationAction(Op: ISD::MLOAD, VT: T, Action: Custom);
152 setOperationAction(Op: ISD::MSTORE, VT: T, Action: Custom);
153 // Custom-lower BUILD_VECTOR. The standard (target-independent)
154 // handling of it would convert it to a load, which is not always
155 // the optimal choice.
156 setOperationAction(Op: ISD::BUILD_VECTOR, VT: T, Action: Custom);
157 }
158
159
160 // BUILD_VECTOR with f16 operands cannot be promoted without
161 // promoting the result, so lower the node to vsplat or constant pool
162 setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::f16, Action: Custom);
163 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::f16, Action: Custom);
164 setOperationAction(Op: ISD::SPLAT_VECTOR, VT: MVT::f16, Action: Custom);
165
166 // Vector shuffle is always promoted to ByteV and a bitcast to f16 is
167 // generated.
168 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128f16, ByteW);
169 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f16, ByteV);
170 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW);
171 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV);
172
173 if (Subtarget.useHVXV81Ops()) {
174 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128bf16, ByteW);
175 setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64bf16, ByteV);
176 setPromoteTo(ISD::SETCC, MVT::v64bf16, MVT::v64f32);
177 setPromoteTo(ISD::FADD, MVT::v64bf16, MVT::v64f32);
178 setPromoteTo(ISD::FSUB, MVT::v64bf16, MVT::v64f32);
179 setPromoteTo(ISD::FMUL, MVT::v64bf16, MVT::v64f32);
180 setPromoteTo(ISD::FMINNUM, MVT::v64bf16, MVT::v64f32);
181 setPromoteTo(ISD::FMAXNUM, MVT::v64bf16, MVT::v64f32);
182
183 setOperationAction(Op: ISD::SPLAT_VECTOR, VT: MVT::v64bf16, Action: Legal);
184 setOperationAction(Op: ISD::INSERT_SUBVECTOR, VT: MVT::v64bf16, Action: Custom);
185 setOperationAction(Op: ISD::EXTRACT_SUBVECTOR, VT: MVT::v64bf16, Action: Custom);
186
187 setOperationAction(Op: ISD::LOAD, VT: MVT::v128bf16, Action: Custom);
188 setOperationAction(Op: ISD::STORE, VT: MVT::v128bf16, Action: Custom);
189
190 setOperationAction(Op: ISD::MLOAD, VT: MVT::v64bf16, Action: Custom);
191 setOperationAction(Op: ISD::MSTORE, VT: MVT::v64bf16, Action: Custom);
192 setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::v64bf16, Action: Custom);
193 setOperationAction(Op: ISD::CONCAT_VECTORS, VT: MVT::v64bf16, Action: Custom);
194
195 setOperationAction(Op: ISD::MLOAD, VT: MVT::v128bf16, Action: Custom);
196 setOperationAction(Op: ISD::MSTORE, VT: MVT::v128bf16, Action: Custom);
197 setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::v128bf16, Action: Custom);
198 setOperationAction(Op: ISD::CONCAT_VECTORS, VT: MVT::v128bf16, Action: Custom);
199
200 setOperationAction(Op: ISD::SPLAT_VECTOR, VT: MVT::bf16, Action: Custom);
201 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::bf16, Action: Custom);
202 setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::bf16, Action: Custom);
203 }
204
205 for (MVT P : FloatW) {
206 setOperationAction(Op: ISD::LOAD, VT: P, Action: Custom);
207 setOperationAction(Op: ISD::STORE, VT: P, Action: Custom);
208 setOperationAction(Op: ISD::FADD, VT: P, Action: Custom);
209 setOperationAction(Op: ISD::FSUB, VT: P, Action: Custom);
210 setOperationAction(Op: ISD::FMUL, VT: P, Action: Custom);
211 setOperationAction(Op: ISD::FMINIMUMNUM, VT: P, Action: Custom);
212 setOperationAction(Op: ISD::FMAXIMUMNUM, VT: P, Action: Custom);
213 setOperationAction(Op: ISD::SETCC, VT: P, Action: Custom);
214 setOperationAction(Op: ISD::VSELECT, VT: P, Action: Custom);
215
216 // Custom-lower BUILD_VECTOR. The standard (target-independent)
217 // handling of it would convert it to a load, which is not always
218 // the optimal choice.
219 setOperationAction(Op: ISD::BUILD_VECTOR, VT: P, Action: Custom);
220 // Make concat-vectors custom to handle concats of more than 2 vectors.
221 setOperationAction(Op: ISD::CONCAT_VECTORS, VT: P, Action: Custom);
222
223 setOperationAction(Op: ISD::MLOAD, VT: P, Action: Custom);
224 setOperationAction(Op: ISD::MSTORE, VT: P, Action: Custom);
225 }
226
227 if (Subtarget.useHVXQFloatOps()) {
228 setOperationAction(Op: ISD::FP_EXTEND, VT: MVT::v64f32, Action: Custom);
229 setOperationAction(Op: ISD::FP_ROUND, VT: MVT::v64f16, Action: Legal);
230 } else if (Subtarget.useHVXIEEEFPOps()) {
231 setOperationAction(Op: ISD::FP_EXTEND, VT: MVT::v64f32, Action: Legal);
232 setOperationAction(Op: ISD::FP_ROUND, VT: MVT::v64f16, Action: Legal);
233 }
234 }
235
236 for (MVT T : LegalV) {
237 setIndexedLoadAction(IdxModes: ISD::POST_INC, VT: T, Action: Legal);
238 setIndexedStoreAction(IdxModes: ISD::POST_INC, VT: T, Action: Legal);
239
240 setOperationAction(Op: ISD::ABS, VT: T, Action: Legal);
241 setOperationAction(Op: ISD::AND, VT: T, Action: Legal);
242 setOperationAction(Op: ISD::OR, VT: T, Action: Legal);
243 setOperationAction(Op: ISD::XOR, VT: T, Action: Legal);
244 setOperationAction(Op: ISD::ADD, VT: T, Action: Legal);
245 setOperationAction(Op: ISD::SUB, VT: T, Action: Legal);
246 setOperationAction(Op: ISD::MUL, VT: T, Action: Legal);
247 setOperationAction(Op: ISD::CTPOP, VT: T, Action: Legal);
248 setOperationAction(Op: ISD::CTLZ, VT: T, Action: Legal);
249 setOperationAction(Op: ISD::SELECT, VT: T, Action: Legal);
250 setOperationAction(Op: ISD::SPLAT_VECTOR, VT: T, Action: Legal);
251 setOperationAction(Op: ISD::UADDSAT, VT: T, Action: Legal);
252 setOperationAction(Op: ISD::SADDSAT, VT: T, Action: Legal);
253 setOperationAction(Op: ISD::USUBSAT, VT: T, Action: Legal);
254 setOperationAction(Op: ISD::SSUBSAT, VT: T, Action: Legal);
255 if (T != ByteV) {
256 setOperationAction(Op: ISD::SIGN_EXTEND_VECTOR_INREG, VT: T, Action: Legal);
257 setOperationAction(Op: ISD::ZERO_EXTEND_VECTOR_INREG, VT: T, Action: Legal);
258 setOperationAction(Op: ISD::BSWAP, VT: T, Action: Legal);
259 }
260
261 setOperationAction(Op: ISD::SMIN, VT: T, Action: Legal);
262 setOperationAction(Op: ISD::SMAX, VT: T, Action: Legal);
263 if (T.getScalarType() != MVT::i32) {
264 setOperationAction(Op: ISD::UMIN, VT: T, Action: Legal);
265 setOperationAction(Op: ISD::UMAX, VT: T, Action: Legal);
266 }
267
268 setOperationAction(Op: ISD::CTTZ, VT: T, Action: Custom);
269 setOperationAction(Op: ISD::LOAD, VT: T, Action: Custom);
270 setOperationAction(Op: ISD::MLOAD, VT: T, Action: Custom);
271 setOperationAction(Op: ISD::MSTORE, VT: T, Action: Custom);
272 if (T.getScalarType() != MVT::i32) {
273 setOperationAction(Op: ISD::MULHS, VT: T, Action: Legal);
274 setOperationAction(Op: ISD::MULHU, VT: T, Action: Legal);
275 }
276
277 setOperationAction(Op: ISD::BUILD_VECTOR, VT: T, Action: Custom);
278 // Make concat-vectors custom to handle concats of more than 2 vectors.
279 setOperationAction(Op: ISD::CONCAT_VECTORS, VT: T, Action: Custom);
280 setOperationAction(Op: ISD::INSERT_SUBVECTOR, VT: T, Action: Custom);
281 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: T, Action: Custom);
282 setOperationAction(Op: ISD::EXTRACT_SUBVECTOR, VT: T, Action: Custom);
283 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: T, Action: Custom);
284 setOperationAction(Op: ISD::ANY_EXTEND, VT: T, Action: Custom);
285 setOperationAction(Op: ISD::SIGN_EXTEND, VT: T, Action: Custom);
286 setOperationAction(Op: ISD::ZERO_EXTEND, VT: T, Action: Custom);
287 setOperationAction(Op: ISD::FSHL, VT: T, Action: Custom);
288 setOperationAction(Op: ISD::FSHR, VT: T, Action: Custom);
289 if (T != ByteV) {
290 setOperationAction(Op: ISD::ANY_EXTEND_VECTOR_INREG, VT: T, Action: Custom);
291 // HVX only has shifts of words and halfwords.
292 setOperationAction(Op: ISD::SRA, VT: T, Action: Custom);
293 setOperationAction(Op: ISD::SHL, VT: T, Action: Custom);
294 setOperationAction(Op: ISD::SRL, VT: T, Action: Custom);
295
296 // Promote all shuffles to operate on vectors of bytes.
297 setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV);
298 }
299
300 if (Subtarget.useHVXFloatingPoint()) {
301 // Same action for both QFloat and IEEE.
302 setOperationAction(Op: ISD::SINT_TO_FP, VT: T, Action: Custom);
303 setOperationAction(Op: ISD::UINT_TO_FP, VT: T, Action: Custom);
304 setOperationAction(Op: ISD::FP_TO_SINT, VT: T, Action: Custom);
305 setOperationAction(Op: ISD::FP_TO_UINT, VT: T, Action: Custom);
306 }
307
308 setCondCodeAction(CCs: ISD::SETNE, VT: T, Action: Expand);
309 setCondCodeAction(CCs: ISD::SETLE, VT: T, Action: Expand);
310 setCondCodeAction(CCs: ISD::SETGE, VT: T, Action: Expand);
311 setCondCodeAction(CCs: ISD::SETLT, VT: T, Action: Expand);
312 setCondCodeAction(CCs: ISD::SETULE, VT: T, Action: Expand);
313 setCondCodeAction(CCs: ISD::SETUGE, VT: T, Action: Expand);
314 setCondCodeAction(CCs: ISD::SETULT, VT: T, Action: Expand);
315 }
316
317 for (MVT T : LegalW) {
318 // Custom-lower BUILD_VECTOR for vector pairs. The standard (target-
319 // independent) handling of it would convert it to a load, which is
320 // not always the optimal choice.
321 setOperationAction(Op: ISD::BUILD_VECTOR, VT: T, Action: Custom);
322 // Make concat-vectors custom to handle concats of more than 2 vectors.
323 setOperationAction(Op: ISD::CONCAT_VECTORS, VT: T, Action: Custom);
324
325 // Custom-lower these operations for pairs. Expand them into a concat
326 // of the corresponding operations on individual vectors.
327 setOperationAction(Op: ISD::ANY_EXTEND, VT: T, Action: Custom);
328 setOperationAction(Op: ISD::SIGN_EXTEND, VT: T, Action: Custom);
329 setOperationAction(Op: ISD::ZERO_EXTEND, VT: T, Action: Custom);
330 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: T, Action: Custom);
331 setOperationAction(Op: ISD::ANY_EXTEND_VECTOR_INREG, VT: T, Action: Custom);
332 setOperationAction(Op: ISD::SIGN_EXTEND_VECTOR_INREG, VT: T, Action: Legal);
333 setOperationAction(Op: ISD::ZERO_EXTEND_VECTOR_INREG, VT: T, Action: Legal);
334 setOperationAction(Op: ISD::SPLAT_VECTOR, VT: T, Action: Custom);
335
336 setOperationAction(Op: ISD::LOAD, VT: T, Action: Custom);
337 setOperationAction(Op: ISD::STORE, VT: T, Action: Custom);
338 setOperationAction(Op: ISD::MLOAD, VT: T, Action: Custom);
339 setOperationAction(Op: ISD::MSTORE, VT: T, Action: Custom);
340 setOperationAction(Op: ISD::ABS, VT: T, Action: Custom);
341 setOperationAction(Op: ISD::CTLZ, VT: T, Action: Custom);
342 setOperationAction(Op: ISD::CTTZ, VT: T, Action: Custom);
343 setOperationAction(Op: ISD::CTPOP, VT: T, Action: Custom);
344
345 setOperationAction(Op: ISD::ADD, VT: T, Action: Legal);
346 setOperationAction(Op: ISD::UADDSAT, VT: T, Action: Legal);
347 setOperationAction(Op: ISD::SADDSAT, VT: T, Action: Legal);
348 setOperationAction(Op: ISD::SUB, VT: T, Action: Legal);
349 setOperationAction(Op: ISD::USUBSAT, VT: T, Action: Legal);
350 setOperationAction(Op: ISD::SSUBSAT, VT: T, Action: Legal);
351 setOperationAction(Op: ISD::MUL, VT: T, Action: Custom);
352 setOperationAction(Op: ISD::MULHS, VT: T, Action: Custom);
353 setOperationAction(Op: ISD::MULHU, VT: T, Action: Custom);
354 setOperationAction(Op: ISD::AND, VT: T, Action: Custom);
355 setOperationAction(Op: ISD::OR, VT: T, Action: Custom);
356 setOperationAction(Op: ISD::XOR, VT: T, Action: Custom);
357 setOperationAction(Op: ISD::SETCC, VT: T, Action: Custom);
358 setOperationAction(Op: ISD::VSELECT, VT: T, Action: Custom);
359 if (T != ByteW) {
360 setOperationAction(Op: ISD::SRA, VT: T, Action: Custom);
361 setOperationAction(Op: ISD::SHL, VT: T, Action: Custom);
362 setOperationAction(Op: ISD::SRL, VT: T, Action: Custom);
363
364 // Promote all shuffles to operate on vectors of bytes.
365 setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW);
366 }
367 setOperationAction(Op: ISD::FSHL, VT: T, Action: Custom);
368 setOperationAction(Op: ISD::FSHR, VT: T, Action: Custom);
369
370 setOperationAction(Op: ISD::SMIN, VT: T, Action: Custom);
371 setOperationAction(Op: ISD::SMAX, VT: T, Action: Custom);
372 if (T.getScalarType() != MVT::i32) {
373 setOperationAction(Op: ISD::UMIN, VT: T, Action: Custom);
374 setOperationAction(Op: ISD::UMAX, VT: T, Action: Custom);
375 }
376
377 if (Subtarget.useHVXFloatingPoint()) {
378 // Same action for both QFloat and IEEE.
379 setOperationAction(Op: ISD::SINT_TO_FP, VT: T, Action: Custom);
380 setOperationAction(Op: ISD::UINT_TO_FP, VT: T, Action: Custom);
381 setOperationAction(Op: ISD::FP_TO_SINT, VT: T, Action: Custom);
382 setOperationAction(Op: ISD::FP_TO_UINT, VT: T, Action: Custom);
383 }
384 }
385
386 // Legalize all of these to HexagonISD::[SU]MUL_LOHI.
387 setOperationAction(Op: ISD::MULHS, VT: WordV, Action: Custom); // -> _LOHI
388 setOperationAction(Op: ISD::MULHU, VT: WordV, Action: Custom); // -> _LOHI
389 setOperationAction(Op: ISD::SMUL_LOHI, VT: WordV, Action: Custom);
390 setOperationAction(Op: ISD::UMUL_LOHI, VT: WordV, Action: Custom);
391
392 setCondCodeAction(CCs: ISD::SETNE, VT: MVT::v64f16, Action: Expand);
393 setCondCodeAction(CCs: ISD::SETLE, VT: MVT::v64f16, Action: Expand);
394 setCondCodeAction(CCs: ISD::SETGE, VT: MVT::v64f16, Action: Expand);
395 setCondCodeAction(CCs: ISD::SETLT, VT: MVT::v64f16, Action: Expand);
396 setCondCodeAction(CCs: ISD::SETONE, VT: MVT::v64f16, Action: Expand);
397 setCondCodeAction(CCs: ISD::SETOLE, VT: MVT::v64f16, Action: Expand);
398 setCondCodeAction(CCs: ISD::SETOGE, VT: MVT::v64f16, Action: Expand);
399 setCondCodeAction(CCs: ISD::SETOLT, VT: MVT::v64f16, Action: Expand);
400 setCondCodeAction(CCs: ISD::SETUNE, VT: MVT::v64f16, Action: Expand);
401 setCondCodeAction(CCs: ISD::SETULE, VT: MVT::v64f16, Action: Expand);
402 setCondCodeAction(CCs: ISD::SETUGE, VT: MVT::v64f16, Action: Expand);
403 setCondCodeAction(CCs: ISD::SETULT, VT: MVT::v64f16, Action: Expand);
404 setCondCodeAction(CCs: ISD::SETUO, VT: MVT::v64f16, Action: Expand);
405 setCondCodeAction(CCs: ISD::SETO, VT: MVT::v64f16, Action: Expand);
406
407 setCondCodeAction(CCs: ISD::SETNE, VT: MVT::v32f32, Action: Expand);
408 setCondCodeAction(CCs: ISD::SETLE, VT: MVT::v32f32, Action: Expand);
409 setCondCodeAction(CCs: ISD::SETGE, VT: MVT::v32f32, Action: Expand);
410 setCondCodeAction(CCs: ISD::SETLT, VT: MVT::v32f32, Action: Expand);
411 setCondCodeAction(CCs: ISD::SETONE, VT: MVT::v32f32, Action: Expand);
412 setCondCodeAction(CCs: ISD::SETOLE, VT: MVT::v32f32, Action: Expand);
413 setCondCodeAction(CCs: ISD::SETOGE, VT: MVT::v32f32, Action: Expand);
414 setCondCodeAction(CCs: ISD::SETOLT, VT: MVT::v32f32, Action: Expand);
415 setCondCodeAction(CCs: ISD::SETUNE, VT: MVT::v32f32, Action: Expand);
416 setCondCodeAction(CCs: ISD::SETULE, VT: MVT::v32f32, Action: Expand);
417 setCondCodeAction(CCs: ISD::SETUGE, VT: MVT::v32f32, Action: Expand);
418 setCondCodeAction(CCs: ISD::SETULT, VT: MVT::v32f32, Action: Expand);
419 setCondCodeAction(CCs: ISD::SETUO, VT: MVT::v32f32, Action: Expand);
420 setCondCodeAction(CCs: ISD::SETO, VT: MVT::v32f32, Action: Expand);
421
422 // Boolean vectors.
423
424 for (MVT T : LegalW) {
425 // Boolean types for vector pairs will overlap with the boolean
426 // types for single vectors, e.g.
427 // v64i8 -> v64i1 (single)
428 // v64i16 -> v64i1 (pair)
429 // Set these actions first, and allow the single actions to overwrite
430 // any duplicates.
431 MVT BoolW = MVT::getVectorVT(VT: MVT::i1, NumElements: T.getVectorNumElements());
432 setOperationAction(Op: ISD::SETCC, VT: BoolW, Action: Custom);
433 setOperationAction(Op: ISD::AND, VT: BoolW, Action: Custom);
434 setOperationAction(Op: ISD::OR, VT: BoolW, Action: Custom);
435 setOperationAction(Op: ISD::XOR, VT: BoolW, Action: Custom);
436 // Masked load/store takes a mask that may need splitting.
437 setOperationAction(Op: ISD::MLOAD, VT: BoolW, Action: Custom);
438 setOperationAction(Op: ISD::MSTORE, VT: BoolW, Action: Custom);
439 }
440
441 for (MVT T : LegalV) {
442 MVT BoolV = MVT::getVectorVT(VT: MVT::i1, NumElements: T.getVectorNumElements());
443 setOperationAction(Op: ISD::BUILD_VECTOR, VT: BoolV, Action: Custom);
444 setOperationAction(Op: ISD::CONCAT_VECTORS, VT: BoolV, Action: Custom);
445 setOperationAction(Op: ISD::INSERT_SUBVECTOR, VT: BoolV, Action: Custom);
446 setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: BoolV, Action: Custom);
447 setOperationAction(Op: ISD::EXTRACT_SUBVECTOR, VT: BoolV, Action: Custom);
448 setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: BoolV, Action: Custom);
449 setOperationAction(Op: ISD::SELECT, VT: BoolV, Action: Custom);
450 setOperationAction(Op: ISD::AND, VT: BoolV, Action: Legal);
451 setOperationAction(Op: ISD::OR, VT: BoolV, Action: Legal);
452 setOperationAction(Op: ISD::XOR, VT: BoolV, Action: Legal);
453 }
454
455 if (Use64b) {
456 for (MVT T: {MVT::v32i8, MVT::v32i16, MVT::v16i8, MVT::v16i16, MVT::v16i32})
457 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: T, Action: Legal);
458 } else {
459 for (MVT T: {MVT::v64i8, MVT::v64i16, MVT::v32i8, MVT::v32i16, MVT::v32i32})
460 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: T, Action: Legal);
461 }
462
463 // Handle store widening for short vectors.
464 unsigned HwLen = Subtarget.getVectorLength();
465 for (MVT ElemTy : Subtarget.getHVXElementTypes()) {
466 if (ElemTy == MVT::i1)
467 continue;
468 int ElemWidth = ElemTy.getFixedSizeInBits();
469 int MaxElems = (8*HwLen) / ElemWidth;
470 for (int N = 2; N < MaxElems; N *= 2) {
471 MVT VecTy = MVT::getVectorVT(VT: ElemTy, NumElements: N);
472 auto Action = getPreferredVectorAction(VT: VecTy);
473 if (Action == TargetLoweringBase::TypeWidenVector) {
474 setOperationAction(Op: ISD::LOAD, VT: VecTy, Action: Custom);
475 setOperationAction(Op: ISD::STORE, VT: VecTy, Action: Custom);
476 setOperationAction(Op: ISD::SETCC, VT: VecTy, Action: Custom);
477 setOperationAction(Op: ISD::TRUNCATE, VT: VecTy, Action: Custom);
478 setOperationAction(Op: ISD::ANY_EXTEND, VT: VecTy, Action: Custom);
479 setOperationAction(Op: ISD::SIGN_EXTEND, VT: VecTy, Action: Custom);
480 setOperationAction(Op: ISD::ZERO_EXTEND, VT: VecTy, Action: Custom);
481 if (Subtarget.useHVXFloatingPoint()) {
482 setOperationAction(Op: ISD::FP_TO_SINT, VT: VecTy, Action: Custom);
483 setOperationAction(Op: ISD::FP_TO_UINT, VT: VecTy, Action: Custom);
484 setOperationAction(Op: ISD::SINT_TO_FP, VT: VecTy, Action: Custom);
485 setOperationAction(Op: ISD::UINT_TO_FP, VT: VecTy, Action: Custom);
486 }
487
488 MVT BoolTy = MVT::getVectorVT(VT: MVT::i1, NumElements: N);
489 if (!isTypeLegal(VT: BoolTy))
490 setOperationAction(Op: ISD::SETCC, VT: BoolTy, Action: Custom);
491 }
492 }
493 }
494
495 // Include cases which are not hander earlier
496 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::v32i1, Action: Custom);
497 setOperationAction(Op: ISD::UINT_TO_FP, VT: MVT::v64i1, Action: Custom);
498 setOperationAction(Op: ISD::SINT_TO_FP, VT: MVT::v32i1, Action: Custom);
499
500 setTargetDAGCombine({ISD::CONCAT_VECTORS, ISD::TRUNCATE, ISD::VSELECT});
501}
502
503unsigned
504HexagonTargetLowering::getPreferredHvxVectorAction(MVT VecTy) const {
505 // Early exit for invalid input types
506 if (!VecTy.isVector())
507 return ~0u;
508
509 MVT ElemTy = VecTy.getVectorElementType();
510 unsigned VecLen = VecTy.getVectorNumElements();
511 unsigned HwLen = Subtarget.getVectorLength();
512
513 // Split vectors of i1 that exceed byte vector length.
514 if (ElemTy == MVT::i1 && VecLen > HwLen)
515 return TargetLoweringBase::TypeSplitVector;
516
517 ArrayRef<MVT> Tys = Subtarget.getHVXElementTypes();
518 // For shorter vectors of i1, widen them if any of the corresponding
519 // vectors of integers needs to be widened.
520 if (ElemTy == MVT::i1) {
521 for (MVT T : Tys) {
522 assert(T != MVT::i1);
523 auto A = getPreferredHvxVectorAction(VecTy: MVT::getVectorVT(VT: T, NumElements: VecLen));
524 if (A != ~0u)
525 return A;
526 }
527 return ~0u;
528 }
529
530 // If the size of VecTy is at least half of the vector length,
531 // widen the vector. Note: the threshold was not selected in
532 // any scientific way.
533 if (llvm::is_contained(Range&: Tys, Element: ElemTy)) {
534 unsigned VecWidth = VecTy.getSizeInBits();
535 unsigned HwWidth = 8*HwLen;
536 if (VecWidth > 2*HwWidth)
537 return TargetLoweringBase::TypeSplitVector;
538
539 bool HaveThreshold = HvxWidenThreshold.getNumOccurrences() > 0;
540 if (HaveThreshold && 8*HvxWidenThreshold <= VecWidth)
541 return TargetLoweringBase::TypeWidenVector;
542 if (VecWidth >= HwWidth/2 && VecWidth < HwWidth)
543 return TargetLoweringBase::TypeWidenVector;
544 }
545
546 // Defer to default.
547 return ~0u;
548}
549
550unsigned
551HexagonTargetLowering::getCustomHvxOperationAction(SDNode &Op) const {
552 unsigned Opc = Op.getOpcode();
553 switch (Opc) {
554 case HexagonISD::SMUL_LOHI:
555 case HexagonISD::UMUL_LOHI:
556 case HexagonISD::USMUL_LOHI:
557 return TargetLoweringBase::Custom;
558 }
559 return TargetLoweringBase::Legal;
560}
561
562SDValue
563HexagonTargetLowering::getInt(unsigned IntId, MVT ResTy, ArrayRef<SDValue> Ops,
564 const SDLoc &dl, SelectionDAG &DAG) const {
565 SmallVector<SDValue,4> IntOps;
566 IntOps.push_back(Elt: DAG.getConstant(Val: IntId, DL: dl, VT: MVT::i32));
567 append_range(C&: IntOps, R&: Ops);
568 return DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL: dl, VT: ResTy, Ops: IntOps);
569}
570
571MVT
572HexagonTargetLowering::typeJoin(const TypePair &Tys) const {
573 assert(Tys.first.getVectorElementType() == Tys.second.getVectorElementType());
574
575 MVT ElemTy = Tys.first.getVectorElementType();
576 return MVT::getVectorVT(VT: ElemTy, NumElements: Tys.first.getVectorNumElements() +
577 Tys.second.getVectorNumElements());
578}
579
580HexagonTargetLowering::TypePair
581HexagonTargetLowering::typeSplit(MVT VecTy) const {
582 assert(VecTy.isVector());
583 unsigned NumElem = VecTy.getVectorNumElements();
584 assert((NumElem % 2) == 0 && "Expecting even-sized vector type");
585 MVT HalfTy = MVT::getVectorVT(VT: VecTy.getVectorElementType(), NumElements: NumElem/2);
586 return { HalfTy, HalfTy };
587}
588
589MVT
590HexagonTargetLowering::typeExtElem(MVT VecTy, unsigned Factor) const {
591 MVT ElemTy = VecTy.getVectorElementType();
592 MVT NewElemTy = MVT::getIntegerVT(BitWidth: ElemTy.getSizeInBits() * Factor);
593 return MVT::getVectorVT(VT: NewElemTy, NumElements: VecTy.getVectorNumElements());
594}
595
596MVT
597HexagonTargetLowering::typeTruncElem(MVT VecTy, unsigned Factor) const {
598 MVT ElemTy = VecTy.getVectorElementType();
599 MVT NewElemTy = MVT::getIntegerVT(BitWidth: ElemTy.getSizeInBits() / Factor);
600 return MVT::getVectorVT(VT: NewElemTy, NumElements: VecTy.getVectorNumElements());
601}
602
603SDValue
604HexagonTargetLowering::opCastElem(SDValue Vec, MVT ElemTy,
605 SelectionDAG &DAG) const {
606 if (ty(Op: Vec).getVectorElementType() == ElemTy)
607 return Vec;
608 MVT CastTy = tyVector(Ty: Vec.getValueType().getSimpleVT(), ElemTy);
609 return DAG.getBitcast(VT: CastTy, V: Vec);
610}
611
612SDValue
613HexagonTargetLowering::opJoin(const VectorPair &Ops, const SDLoc &dl,
614 SelectionDAG &DAG) const {
615 return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: typeJoin(Tys: ty(Ops)),
616 N1: Ops.first, N2: Ops.second);
617}
618
619HexagonTargetLowering::VectorPair
620HexagonTargetLowering::opSplit(SDValue Vec, const SDLoc &dl,
621 SelectionDAG &DAG) const {
622 TypePair Tys = typeSplit(VecTy: ty(Op: Vec));
623 if (Vec.getOpcode() == HexagonISD::QCAT)
624 return VectorPair(Vec.getOperand(i: 0), Vec.getOperand(i: 1));
625 return DAG.SplitVector(N: Vec, DL: dl, LoVT: Tys.first, HiVT: Tys.second);
626}
627
628bool
629HexagonTargetLowering::isHvxSingleTy(MVT Ty) const {
630 return Subtarget.isHVXVectorType(VecTy: Ty) &&
631 Ty.getSizeInBits() == 8 * Subtarget.getVectorLength();
632}
633
634bool
635HexagonTargetLowering::isHvxPairTy(MVT Ty) const {
636 return Subtarget.isHVXVectorType(VecTy: Ty) &&
637 Ty.getSizeInBits() == 16 * Subtarget.getVectorLength();
638}
639
640bool
641HexagonTargetLowering::isHvxBoolTy(MVT Ty) const {
642 return Subtarget.isHVXVectorType(VecTy: Ty, IncludeBool: true) &&
643 Ty.getVectorElementType() == MVT::i1;
644}
645
646bool HexagonTargetLowering::allowsHvxMemoryAccess(
647 MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const {
648 // Bool vectors are excluded by default, but make it explicit to
649 // emphasize that bool vectors cannot be loaded or stored.
650 // Also, disallow double vector stores (to prevent unnecessary
651 // store widening in DAG combiner).
652 if (VecTy.getSizeInBits() > 8*Subtarget.getVectorLength())
653 return false;
654 if (!Subtarget.isHVXVectorType(VecTy, /*IncludeBool=*/false))
655 return false;
656 if (Fast)
657 *Fast = 1;
658 return true;
659}
660
661bool HexagonTargetLowering::allowsHvxMisalignedMemoryAccesses(
662 MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const {
663 if (!Subtarget.isHVXVectorType(VecTy))
664 return false;
665 // XXX Should this be false? vmemu are a bit slower than vmem.
666 if (Fast)
667 *Fast = 1;
668 return true;
669}
670
671void HexagonTargetLowering::AdjustHvxInstrPostInstrSelection(
672 MachineInstr &MI, SDNode *Node) const {
673 unsigned Opc = MI.getOpcode();
674 const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
675 MachineBasicBlock &MB = *MI.getParent();
676 MachineFunction &MF = *MB.getParent();
677 MachineRegisterInfo &MRI = MF.getRegInfo();
678 DebugLoc DL = MI.getDebugLoc();
679 auto At = MI.getIterator();
680
681 switch (Opc) {
682 case Hexagon::PS_vsplatib:
683 if (Subtarget.useHVXV62Ops()) {
684 // SplatV = A2_tfrsi #imm
685 // OutV = V6_lvsplatb SplatV
686 Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass);
687 BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::A2_tfrsi), DestReg: SplatV)
688 .add(MO: MI.getOperand(i: 1));
689 Register OutV = MI.getOperand(i: 0).getReg();
690 BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplatb), DestReg: OutV)
691 .addReg(RegNo: SplatV);
692 } else {
693 // SplatV = A2_tfrsi #imm:#imm:#imm:#imm
694 // OutV = V6_lvsplatw SplatV
695 Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass);
696 const MachineOperand &InpOp = MI.getOperand(i: 1);
697 assert(InpOp.isImm());
698 uint32_t V = InpOp.getImm() & 0xFF;
699 BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::A2_tfrsi), DestReg: SplatV)
700 .addImm(Val: V << 24 | V << 16 | V << 8 | V);
701 Register OutV = MI.getOperand(i: 0).getReg();
702 BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplatw), DestReg: OutV).addReg(RegNo: SplatV);
703 }
704 MB.erase(I: At);
705 break;
706 case Hexagon::PS_vsplatrb:
707 if (Subtarget.useHVXV62Ops()) {
708 // OutV = V6_lvsplatb Inp
709 Register OutV = MI.getOperand(i: 0).getReg();
710 BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplatb), DestReg: OutV)
711 .add(MO: MI.getOperand(i: 1));
712 } else {
713 Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass);
714 const MachineOperand &InpOp = MI.getOperand(i: 1);
715 BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::S2_vsplatrb), DestReg: SplatV)
716 .addReg(RegNo: InpOp.getReg(), Flags: {}, SubReg: InpOp.getSubReg());
717 Register OutV = MI.getOperand(i: 0).getReg();
718 BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplatw), DestReg: OutV)
719 .addReg(RegNo: SplatV);
720 }
721 MB.erase(I: At);
722 break;
723 case Hexagon::PS_vsplatih:
724 if (Subtarget.useHVXV62Ops()) {
725 // SplatV = A2_tfrsi #imm
726 // OutV = V6_lvsplath SplatV
727 Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass);
728 BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::A2_tfrsi), DestReg: SplatV)
729 .add(MO: MI.getOperand(i: 1));
730 Register OutV = MI.getOperand(i: 0).getReg();
731 BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplath), DestReg: OutV)
732 .addReg(RegNo: SplatV);
733 } else {
734 // SplatV = A2_tfrsi #imm:#imm
735 // OutV = V6_lvsplatw SplatV
736 Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass);
737 const MachineOperand &InpOp = MI.getOperand(i: 1);
738 assert(InpOp.isImm());
739 uint32_t V = InpOp.getImm() & 0xFFFF;
740 BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::A2_tfrsi), DestReg: SplatV)
741 .addImm(Val: V << 16 | V);
742 Register OutV = MI.getOperand(i: 0).getReg();
743 BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplatw), DestReg: OutV).addReg(RegNo: SplatV);
744 }
745 MB.erase(I: At);
746 break;
747 case Hexagon::PS_vsplatrh:
748 if (Subtarget.useHVXV62Ops()) {
749 // OutV = V6_lvsplath Inp
750 Register OutV = MI.getOperand(i: 0).getReg();
751 BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplath), DestReg: OutV)
752 .add(MO: MI.getOperand(i: 1));
753 } else {
754 // SplatV = A2_combine_ll Inp, Inp
755 // OutV = V6_lvsplatw SplatV
756 Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass);
757 const MachineOperand &InpOp = MI.getOperand(i: 1);
758 BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::A2_combine_ll), DestReg: SplatV)
759 .addReg(RegNo: InpOp.getReg(), Flags: {}, SubReg: InpOp.getSubReg())
760 .addReg(RegNo: InpOp.getReg(), Flags: {}, SubReg: InpOp.getSubReg());
761 Register OutV = MI.getOperand(i: 0).getReg();
762 BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplatw), DestReg: OutV).addReg(RegNo: SplatV);
763 }
764 MB.erase(I: At);
765 break;
766 case Hexagon::PS_vsplatiw:
767 case Hexagon::PS_vsplatrw:
768 if (Opc == Hexagon::PS_vsplatiw) {
769 // SplatV = A2_tfrsi #imm
770 Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass);
771 BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::A2_tfrsi), DestReg: SplatV)
772 .add(MO: MI.getOperand(i: 1));
773 MI.getOperand(i: 1).ChangeToRegister(Reg: SplatV, isDef: false);
774 }
775 // OutV = V6_lvsplatw SplatV/Inp
776 MI.setDesc(TII.get(Opcode: Hexagon::V6_lvsplatw));
777 break;
778 }
779}
780
781SDValue
782HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx, MVT ElemTy,
783 SelectionDAG &DAG) const {
784 if (ElemIdx.getValueType().getSimpleVT() != MVT::i32)
785 ElemIdx = DAG.getBitcast(VT: MVT::i32, V: ElemIdx);
786
787 unsigned ElemWidth = ElemTy.getSizeInBits();
788 if (ElemWidth == 8)
789 return ElemIdx;
790
791 unsigned L = Log2_32(Value: ElemWidth/8);
792 const SDLoc &dl(ElemIdx);
793 return DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: MVT::i32,
794 Ops: {ElemIdx, DAG.getConstant(Val: L, DL: dl, VT: MVT::i32)});
795}
796
797SDValue
798HexagonTargetLowering::getIndexInWord32(SDValue Idx, MVT ElemTy,
799 SelectionDAG &DAG) const {
800 unsigned ElemWidth = ElemTy.getSizeInBits();
801 assert(ElemWidth >= 8 && ElemWidth <= 32);
802 if (ElemWidth == 32)
803 return Idx;
804
805 if (ty(Op: Idx) != MVT::i32)
806 Idx = DAG.getBitcast(VT: MVT::i32, V: Idx);
807 const SDLoc &dl(Idx);
808 SDValue Mask = DAG.getConstant(Val: 32/ElemWidth - 1, DL: dl, VT: MVT::i32);
809 SDValue SubIdx = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32, Ops: {Idx, Mask});
810 return SubIdx;
811}
812
813SDValue
814HexagonTargetLowering::getByteShuffle(const SDLoc &dl, SDValue Op0,
815 SDValue Op1, ArrayRef<int> Mask,
816 SelectionDAG &DAG) const {
817 MVT OpTy = ty(Op: Op0);
818 assert(OpTy == ty(Op1));
819
820 MVT ElemTy = OpTy.getVectorElementType();
821 if (ElemTy == MVT::i8)
822 return DAG.getVectorShuffle(VT: OpTy, dl, N1: Op0, N2: Op1, Mask);
823 assert(ElemTy.getSizeInBits() >= 8);
824
825 MVT ResTy = tyVector(Ty: OpTy, ElemTy: MVT::i8);
826 unsigned ElemSize = ElemTy.getSizeInBits() / 8;
827
828 SmallVector<int,128> ByteMask;
829 for (int M : Mask) {
830 if (M < 0) {
831 for (unsigned I = 0; I != ElemSize; ++I)
832 ByteMask.push_back(Elt: -1);
833 } else {
834 int NewM = M*ElemSize;
835 for (unsigned I = 0; I != ElemSize; ++I)
836 ByteMask.push_back(Elt: NewM+I);
837 }
838 }
839 assert(ResTy.getVectorNumElements() == ByteMask.size());
840 return DAG.getVectorShuffle(VT: ResTy, dl, N1: opCastElem(Vec: Op0, ElemTy: MVT::i8, DAG),
841 N2: opCastElem(Vec: Op1, ElemTy: MVT::i8, DAG), Mask: ByteMask);
842}
843
844SDValue
845HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values,
846 const SDLoc &dl, MVT VecTy,
847 SelectionDAG &DAG) const {
848 unsigned VecLen = Values.size();
849 MachineFunction &MF = DAG.getMachineFunction();
850 MVT ElemTy = VecTy.getVectorElementType();
851 unsigned ElemWidth = ElemTy.getSizeInBits();
852 unsigned HwLen = Subtarget.getVectorLength();
853
854 unsigned ElemSize = ElemWidth / 8;
855 assert(ElemSize*VecLen == HwLen);
856 SmallVector<SDValue,32> Words;
857
858 if (VecTy.getVectorElementType() != MVT::i32 &&
859 !(Subtarget.useHVXFloatingPoint() &&
860 VecTy.getVectorElementType() == MVT::f32)) {
861 assert((ElemSize == 1 || ElemSize == 2) && "Invalid element size");
862 unsigned OpsPerWord = (ElemSize == 1) ? 4 : 2;
863 MVT PartVT = MVT::getVectorVT(VT: VecTy.getVectorElementType(), NumElements: OpsPerWord);
864 for (unsigned i = 0; i != VecLen; i += OpsPerWord) {
865 SDValue W = buildVector32(Elem: Values.slice(N: i, M: OpsPerWord), dl, VecTy: PartVT, DAG);
866 Words.push_back(Elt: DAG.getBitcast(VT: MVT::i32, V: W));
867 }
868 } else {
869 for (SDValue V : Values)
870 Words.push_back(Elt: DAG.getBitcast(VT: MVT::i32, V));
871 }
872 auto isSplat = [] (ArrayRef<SDValue> Values, SDValue &SplatV) {
873 unsigned NumValues = Values.size();
874 assert(NumValues > 0);
875 bool IsUndef = true;
876 for (unsigned i = 0; i != NumValues; ++i) {
877 if (Values[i].isUndef())
878 continue;
879 IsUndef = false;
880 if (!SplatV.getNode())
881 SplatV = Values[i];
882 else if (SplatV != Values[i])
883 return false;
884 }
885 if (IsUndef)
886 SplatV = Values[0];
887 return true;
888 };
889
890 unsigned NumWords = Words.size();
891 SDValue SplatV;
892 bool IsSplat = isSplat(Words, SplatV);
893 if (IsSplat && isUndef(Op: SplatV))
894 return DAG.getUNDEF(VT: VecTy);
895 if (IsSplat) {
896 assert(SplatV.getNode());
897 if (isNullConstant(V: SplatV))
898 return getZero(dl, Ty: VecTy, DAG);
899 MVT WordTy = MVT::getVectorVT(VT: MVT::i32, NumElements: HwLen/4);
900 SDValue S = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: WordTy, Operand: SplatV);
901 return DAG.getBitcast(VT: VecTy, V: S);
902 }
903
904 // Delay recognizing constant vectors until here, so that we can generate
905 // a vsplat.
906 SmallVector<ConstantInt*, 128> Consts(VecLen);
907 bool AllConst = getBuildVectorConstInts(Values, VecTy, DAG, Consts);
908 if (AllConst) {
909 ArrayRef<Constant*> Tmp((Constant**)Consts.begin(),
910 (Constant**)Consts.end());
911 Constant *CV = ConstantVector::get(V: Tmp);
912 Align Alignment(HwLen);
913 SDValue CP = LowerConstantPool(
914 Op: DAG.getConstantPool(C: CV, VT: getPointerTy(DL: DAG.getDataLayout()), Align: Alignment),
915 DAG);
916 return DAG.getLoad(VT: VecTy, dl, Chain: DAG.getEntryNode(), Ptr: CP,
917 PtrInfo: MachinePointerInfo::getConstantPool(MF), Alignment);
918 }
919
920 // A special case is a situation where the vector is built entirely from
921 // elements extracted from another vector. This could be done via a shuffle
922 // more efficiently, but typically, the size of the source vector will not
923 // match the size of the vector being built (which precludes the use of a
924 // shuffle directly).
925 // This only handles a single source vector, and the vector being built
926 // should be of a sub-vector type of the source vector type.
927 auto IsBuildFromExtracts = [this,&Values] (SDValue &SrcVec,
928 SmallVectorImpl<int> &SrcIdx) {
929 SDValue Vec;
930 for (SDValue V : Values) {
931 if (isUndef(Op: V)) {
932 SrcIdx.push_back(Elt: -1);
933 continue;
934 }
935 if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
936 return false;
937 // All extracts should come from the same vector.
938 SDValue T = V.getOperand(i: 0);
939 if (Vec.getNode() != nullptr && T.getNode() != Vec.getNode())
940 return false;
941 Vec = T;
942 ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: V.getOperand(i: 1));
943 if (C == nullptr)
944 return false;
945 int I = C->getSExtValue();
946 assert(I >= 0 && "Negative element index");
947 SrcIdx.push_back(Elt: I);
948 }
949 SrcVec = Vec;
950 return true;
951 };
952
953 SmallVector<int,128> ExtIdx;
954 SDValue ExtVec;
955 if (IsBuildFromExtracts(ExtVec, ExtIdx)) {
956 MVT ExtTy = ty(Op: ExtVec);
957 unsigned ExtLen = ExtTy.getVectorNumElements();
958 if (ExtLen == VecLen || ExtLen == 2*VecLen) {
959 // Construct a new shuffle mask that will produce a vector with the same
960 // number of elements as the input vector, and such that the vector we
961 // want will be the initial subvector of it.
962 SmallVector<int,128> Mask;
963 BitVector Used(ExtLen);
964
965 for (int M : ExtIdx) {
966 Mask.push_back(Elt: M);
967 if (M >= 0)
968 Used.set(M);
969 }
970 // Fill the rest of the mask with the unused elements of ExtVec in hopes
971 // that it will result in a permutation of ExtVec's elements. It's still
972 // fine if it doesn't (e.g. if undefs are present, or elements are
973 // repeated), but permutations can always be done efficiently via vdelta
974 // and vrdelta.
975 for (unsigned I = 0; I != ExtLen; ++I) {
976 if (Mask.size() == ExtLen)
977 break;
978 if (!Used.test(Idx: I))
979 Mask.push_back(Elt: I);
980 }
981
982 SDValue S = DAG.getVectorShuffle(VT: ExtTy, dl, N1: ExtVec,
983 N2: DAG.getUNDEF(VT: ExtTy), Mask);
984 return ExtLen == VecLen ? S : LoHalf(V: S, DAG);
985 }
986 }
987
988 // Find most common element to initialize vector with. This is to avoid
989 // unnecessary vinsert/valign for cases where the same value is present
990 // many times. Creates a histogram of the vector's elements to find the
991 // most common element n.
992 assert(4*Words.size() == Subtarget.getVectorLength());
993 int VecHist[32];
994 int n = 0;
995 for (unsigned i = 0; i != NumWords; ++i) {
996 VecHist[i] = 0;
997 if (Words[i].isUndef())
998 continue;
999 for (unsigned j = i; j != NumWords; ++j)
1000 if (Words[i] == Words[j])
1001 VecHist[i]++;
1002
1003 if (VecHist[i] > VecHist[n])
1004 n = i;
1005 }
1006
1007 SDValue HalfV = getZero(dl, Ty: VecTy, DAG);
1008 if (VecHist[n] > 1) {
1009 SDValue SplatV = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: VecTy, Operand: Words[n]);
1010 HalfV = DAG.getNode(Opcode: HexagonISD::VALIGN, DL: dl, VT: VecTy,
1011 Ops: {HalfV, SplatV, DAG.getConstant(Val: HwLen/2, DL: dl, VT: MVT::i32)});
1012 }
1013 SDValue HalfV0 = HalfV;
1014 SDValue HalfV1 = HalfV;
1015
1016 // Construct two halves in parallel, then or them together. Rn and Rm count
1017 // number of rotations needed before the next element. One last rotation is
1018 // performed post-loop to position the last element.
1019 int Rn = 0, Rm = 0;
1020 SDValue Sn, Sm;
1021 SDValue N = HalfV0;
1022 SDValue M = HalfV1;
1023 for (unsigned i = 0; i != NumWords/2; ++i) {
1024 // Rotate by element count since last insertion.
1025 if (Words[i] != Words[n] || VecHist[n] <= 1) {
1026 Sn = DAG.getConstant(Val: Rn, DL: dl, VT: MVT::i32);
1027 HalfV0 = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {N, Sn});
1028 N = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: VecTy,
1029 Ops: {HalfV0, Words[i]});
1030 Rn = 0;
1031 }
1032 if (Words[i+NumWords/2] != Words[n] || VecHist[n] <= 1) {
1033 Sm = DAG.getConstant(Val: Rm, DL: dl, VT: MVT::i32);
1034 HalfV1 = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {M, Sm});
1035 M = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: VecTy,
1036 Ops: {HalfV1, Words[i+NumWords/2]});
1037 Rm = 0;
1038 }
1039 Rn += 4;
1040 Rm += 4;
1041 }
1042 // Perform last rotation.
1043 Sn = DAG.getConstant(Val: Rn+HwLen/2, DL: dl, VT: MVT::i32);
1044 Sm = DAG.getConstant(Val: Rm, DL: dl, VT: MVT::i32);
1045 HalfV0 = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {N, Sn});
1046 HalfV1 = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {M, Sm});
1047
1048 SDValue T0 = DAG.getBitcast(VT: tyVector(Ty: VecTy, ElemTy: MVT::i32), V: HalfV0);
1049 SDValue T1 = DAG.getBitcast(VT: tyVector(Ty: VecTy, ElemTy: MVT::i32), V: HalfV1);
1050
1051 SDValue DstV = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: ty(Op: T0), Ops: {T0, T1});
1052
1053 SDValue OutV =
1054 DAG.getBitcast(VT: tyVector(Ty: ty(Op: DstV), ElemTy: VecTy.getVectorElementType()), V: DstV);
1055 return OutV;
1056}
1057
1058SDValue
1059HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl,
1060 unsigned BitBytes, bool ZeroFill, SelectionDAG &DAG) const {
1061 MVT PredTy = ty(Op: PredV);
1062 unsigned HwLen = Subtarget.getVectorLength();
1063 MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
1064
1065 if (Subtarget.isHVXVectorType(VecTy: PredTy, IncludeBool: true)) {
1066 // Move the vector predicate SubV to a vector register, and scale it
1067 // down to match the representation (bytes per type element) that VecV
1068 // uses. The scaling down will pick every 2nd or 4th (every Scale-th
1069 // in general) element and put them at the front of the resulting
1070 // vector. This subvector will then be inserted into the Q2V of VecV.
1071 // To avoid having an operation that generates an illegal type (short
1072 // vector), generate a full size vector.
1073 //
1074 SDValue T = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: PredV);
1075 SmallVector<int,128> Mask(HwLen);
1076 // Scale = BitBytes(PredV) / Given BitBytes.
1077 unsigned Scale = HwLen / (PredTy.getVectorNumElements() * BitBytes);
1078 unsigned BlockLen = PredTy.getVectorNumElements() * BitBytes;
1079
1080 for (unsigned i = 0; i != HwLen; ++i) {
1081 unsigned Num = i % Scale;
1082 unsigned Off = i / Scale;
1083 Mask[BlockLen*Num + Off] = i;
1084 }
1085 SDValue S = DAG.getVectorShuffle(VT: ByteTy, dl, N1: T, N2: DAG.getUNDEF(VT: ByteTy), Mask);
1086 if (!ZeroFill)
1087 return S;
1088 // Fill the bytes beyond BlockLen with 0s.
1089 // V6_pred_scalar2 cannot fill the entire predicate, so it only works
1090 // when BlockLen < HwLen.
1091 assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1092 MVT BoolTy = MVT::getVectorVT(VT: MVT::i1, NumElements: HwLen);
1093 SDValue Q = getInstr(MachineOpc: Hexagon::V6_pred_scalar2, dl, Ty: BoolTy,
1094 Ops: {DAG.getConstant(Val: BlockLen, DL: dl, VT: MVT::i32)}, DAG);
1095 SDValue M = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: Q);
1096 return DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ByteTy, N1: S, N2: M);
1097 }
1098
1099 // Make sure that this is a valid scalar predicate.
1100 assert(PredTy == MVT::v2i1 || PredTy == MVT::v4i1 || PredTy == MVT::v8i1);
1101
1102 unsigned Bytes = 8 / PredTy.getVectorNumElements();
1103 SmallVector<SDValue,4> Words[2];
1104 unsigned IdxW = 0;
1105
1106 SDValue W0 = isUndef(Op: PredV)
1107 ? DAG.getUNDEF(VT: MVT::i64)
1108 : DAG.getNode(Opcode: HexagonISD::P2D, DL: dl, VT: MVT::i64, Operand: PredV);
1109 if (Bytes < BitBytes) {
1110 Words[IdxW].push_back(Elt: HiHalf(V: W0, DAG));
1111 Words[IdxW].push_back(Elt: LoHalf(V: W0, DAG));
1112 } else
1113 Words[IdxW].push_back(Elt: W0);
1114
1115 while (Bytes < BitBytes) {
1116 IdxW ^= 1;
1117 Words[IdxW].clear();
1118
1119 if (Bytes < 4) {
1120 for (const SDValue &W : Words[IdxW ^ 1]) {
1121 SDValue T = expandPredicate(Vec32: W, dl, DAG);
1122 Words[IdxW].push_back(Elt: HiHalf(V: T, DAG));
1123 Words[IdxW].push_back(Elt: LoHalf(V: T, DAG));
1124 }
1125 } else {
1126 for (const SDValue &W : Words[IdxW ^ 1]) {
1127 Words[IdxW].push_back(Elt: W);
1128 Words[IdxW].push_back(Elt: W);
1129 }
1130 }
1131 Bytes *= 2;
1132 }
1133
1134 while (Bytes > BitBytes) {
1135 IdxW ^= 1;
1136 Words[IdxW].clear();
1137
1138 if (Bytes <= 4) {
1139 for (const SDValue &W : Words[IdxW ^ 1]) {
1140 SDValue T = contractPredicate(Vec64: W, dl, DAG);
1141 Words[IdxW].push_back(Elt: T);
1142 }
1143 } else {
1144 for (const SDValue &W : Words[IdxW ^ 1]) {
1145 Words[IdxW].push_back(Elt: W);
1146 }
1147 }
1148 Bytes /= 2;
1149 }
1150
1151 assert(Bytes == BitBytes);
1152 if (BitBytes == 1 && PredTy == MVT::v2i1)
1153 ByteTy = MVT::getVectorVT(VT: MVT::i16, NumElements: HwLen);
1154
1155 SDValue Vec = ZeroFill ? getZero(dl, Ty: ByteTy, DAG) : DAG.getUNDEF(VT: ByteTy);
1156 SDValue S4 = DAG.getConstant(Val: HwLen-4, DL: dl, VT: MVT::i32);
1157 for (const SDValue &W : Words[IdxW]) {
1158 Vec = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: ByteTy, N1: Vec, N2: S4);
1159 Vec = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: ByteTy, N1: Vec, N2: W);
1160 }
1161
1162 return Vec;
1163}
1164
1165SDValue
1166HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values,
1167 const SDLoc &dl, MVT VecTy,
1168 SelectionDAG &DAG) const {
1169 // Construct a vector V of bytes, such that a comparison V >u 0 would
1170 // produce the required vector predicate.
1171 unsigned VecLen = Values.size();
1172 unsigned HwLen = Subtarget.getVectorLength();
1173 assert(VecLen <= HwLen || VecLen == 8*HwLen);
1174 SmallVector<SDValue,128> Bytes;
1175 bool AllT = true, AllF = true;
1176
1177 auto IsTrue = [] (SDValue V) {
1178 if (const auto *N = dyn_cast<ConstantSDNode>(Val: V.getNode()))
1179 return !N->isZero();
1180 return false;
1181 };
1182 auto IsFalse = [] (SDValue V) {
1183 if (const auto *N = dyn_cast<ConstantSDNode>(Val: V.getNode()))
1184 return N->isZero();
1185 return false;
1186 };
1187
1188 if (VecLen <= HwLen) {
1189 // In the hardware, each bit of a vector predicate corresponds to a byte
1190 // of a vector register. Calculate how many bytes does a bit of VecTy
1191 // correspond to.
1192 assert(HwLen % VecLen == 0);
1193 unsigned BitBytes = HwLen / VecLen;
1194 for (SDValue V : Values) {
1195 AllT &= IsTrue(V);
1196 AllF &= IsFalse(V);
1197
1198 SDValue Ext = !V.isUndef() ? DAG.getZExtOrTrunc(Op: V, DL: dl, VT: MVT::i8)
1199 : DAG.getUNDEF(VT: MVT::i8);
1200 for (unsigned B = 0; B != BitBytes; ++B)
1201 Bytes.push_back(Elt: Ext);
1202 }
1203 } else {
1204 // There are as many i1 values, as there are bits in a vector register.
1205 // Divide the values into groups of 8 and check that each group consists
1206 // of the same value (ignoring undefs).
1207 for (unsigned I = 0; I != VecLen; I += 8) {
1208 unsigned B = 0;
1209 // Find the first non-undef value in this group.
1210 for (; B != 8; ++B) {
1211 if (!Values[I+B].isUndef())
1212 break;
1213 }
1214 SDValue F = Values[I+B];
1215 AllT &= IsTrue(F);
1216 AllF &= IsFalse(F);
1217
1218 SDValue Ext = (B < 8) ? DAG.getZExtOrTrunc(Op: F, DL: dl, VT: MVT::i8)
1219 : DAG.getUNDEF(VT: MVT::i8);
1220 Bytes.push_back(Elt: Ext);
1221 // Verify that the rest of values in the group are the same as the
1222 // first.
1223 for (; B != 8; ++B)
1224 assert(Values[I+B].isUndef() || Values[I+B] == F);
1225 }
1226 }
1227
1228 if (AllT)
1229 return DAG.getNode(Opcode: HexagonISD::QTRUE, DL: dl, VT: VecTy);
1230 if (AllF)
1231 return DAG.getNode(Opcode: HexagonISD::QFALSE, DL: dl, VT: VecTy);
1232
1233 MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
1234 SDValue ByteVec = buildHvxVectorReg(Values: Bytes, dl, VecTy: ByteTy, DAG);
1235 return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: VecTy, Operand: ByteVec);
1236}
1237
1238SDValue
1239HexagonTargetLowering::extractHvxElementReg(SDValue VecV, SDValue IdxV,
1240 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1241 MVT ElemTy = ty(Op: VecV).getVectorElementType();
1242
1243 unsigned ElemWidth = ElemTy.getSizeInBits();
1244 assert(ElemWidth >= 8 && ElemWidth <= 32);
1245 (void)ElemWidth;
1246
1247 SDValue ByteIdx = convertToByteIndex(ElemIdx: IdxV, ElemTy, DAG);
1248 SDValue ExWord = DAG.getNode(Opcode: HexagonISD::VEXTRACTW, DL: dl, VT: MVT::i32,
1249 Ops: {VecV, ByteIdx});
1250 if (ElemTy == MVT::i32)
1251 return ExWord;
1252
1253 // Have an extracted word, need to extract the smaller element out of it.
1254 // 1. Extract the bits of (the original) IdxV that correspond to the index
1255 // of the desired element in the 32-bit word.
1256 SDValue SubIdx = getIndexInWord32(Idx: IdxV, ElemTy, DAG);
1257 // 2. Extract the element from the word.
1258 SDValue ExVec = DAG.getBitcast(VT: tyVector(Ty: ty(Op: ExWord), ElemTy), V: ExWord);
1259 return extractVector(VecV: ExVec, IdxV: SubIdx, dl, ValTy: ElemTy, ResTy: MVT::i32, DAG);
1260}
1261
1262SDValue
1263HexagonTargetLowering::extractHvxElementPred(SDValue VecV, SDValue IdxV,
1264 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1265 // Implement other return types if necessary.
1266 assert(ResTy == MVT::i1);
1267
1268 unsigned HwLen = Subtarget.getVectorLength();
1269 MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
1270 SDValue ByteVec = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: VecV);
1271
1272 unsigned Scale = HwLen / ty(Op: VecV).getVectorNumElements();
1273 SDValue ScV = DAG.getConstant(Val: Scale, DL: dl, VT: MVT::i32);
1274 IdxV = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MVT::i32, N1: IdxV, N2: ScV);
1275
1276 SDValue ExtB = extractHvxElementReg(VecV: ByteVec, IdxV, dl, ResTy: MVT::i32, DAG);
1277 SDValue Zero = DAG.getTargetConstant(Val: 0, DL: dl, VT: MVT::i32);
1278 return getInstr(MachineOpc: Hexagon::C2_cmpgtui, dl, Ty: MVT::i1, Ops: {ExtB, Zero}, DAG);
1279}
1280
1281SDValue
1282HexagonTargetLowering::insertHvxElementReg(SDValue VecV, SDValue IdxV,
1283 SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1284 MVT ElemTy = ty(Op: VecV).getVectorElementType();
1285
1286 unsigned ElemWidth = ElemTy.getSizeInBits();
1287 assert(ElemWidth >= 8 && ElemWidth <= 32);
1288 (void)ElemWidth;
1289
1290 auto InsertWord = [&DAG,&dl,this] (SDValue VecV, SDValue ValV,
1291 SDValue ByteIdxV) {
1292 MVT VecTy = ty(Op: VecV);
1293 unsigned HwLen = Subtarget.getVectorLength();
1294 SDValue MaskV =
1295 DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32,
1296 Ops: {ByteIdxV, DAG.getSignedConstant(Val: -4, DL: dl, VT: MVT::i32)});
1297 SDValue RotV = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {VecV, MaskV});
1298 SDValue InsV = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: VecTy, Ops: {RotV, ValV});
1299 SDValue SubV = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: MVT::i32,
1300 Ops: {DAG.getConstant(Val: HwLen, DL: dl, VT: MVT::i32), MaskV});
1301 SDValue TorV = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {InsV, SubV});
1302 return TorV;
1303 };
1304
1305 SDValue ByteIdx = convertToByteIndex(ElemIdx: IdxV, ElemTy, DAG);
1306 if (ElemTy == MVT::i32)
1307 return InsertWord(VecV, ValV, ByteIdx);
1308
1309 // If this is not inserting a 32-bit word, convert it into such a thing.
1310 // 1. Extract the existing word from the target vector.
1311 SDValue WordIdx = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MVT::i32,
1312 Ops: {ByteIdx, DAG.getConstant(Val: 2, DL: dl, VT: MVT::i32)});
1313 SDValue Ext = extractHvxElementReg(VecV: opCastElem(Vec: VecV, ElemTy: MVT::i32, DAG), IdxV: WordIdx,
1314 dl, ResTy: MVT::i32, DAG);
1315
1316 // 2. Treating the extracted word as a 32-bit vector, insert the given
1317 // value into it.
1318 SDValue SubIdx = getIndexInWord32(Idx: IdxV, ElemTy, DAG);
1319 MVT SubVecTy = tyVector(Ty: ty(Op: Ext), ElemTy);
1320 SDValue Ins = insertVector(VecV: DAG.getBitcast(VT: SubVecTy, V: Ext),
1321 ValV, IdxV: SubIdx, dl, ValTy: ElemTy, DAG);
1322
1323 // 3. Insert the 32-bit word back into the original vector.
1324 return InsertWord(VecV, Ins, ByteIdx);
1325}
1326
1327SDValue
1328HexagonTargetLowering::insertHvxElementPred(SDValue VecV, SDValue IdxV,
1329 SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const {
1330 unsigned HwLen = Subtarget.getVectorLength();
1331 MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
1332 SDValue ByteVec = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: VecV);
1333
1334 unsigned Scale = HwLen / ty(Op: VecV).getVectorNumElements();
1335 SDValue ScV = DAG.getConstant(Val: Scale, DL: dl, VT: MVT::i32);
1336 IdxV = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MVT::i32, N1: IdxV, N2: ScV);
1337 ValV = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: MVT::i32, Operand: ValV);
1338
1339 SDValue InsV = insertHvxElementReg(VecV: ByteVec, IdxV, ValV, dl, DAG);
1340 return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: ty(Op: VecV), Operand: InsV);
1341}
1342
1343SDValue
1344HexagonTargetLowering::extractHvxSubvectorReg(SDValue OrigOp, SDValue VecV,
1345 SDValue IdxV, const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1346 MVT VecTy = ty(Op: VecV);
1347 unsigned HwLen = Subtarget.getVectorLength();
1348 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1349 MVT ElemTy = VecTy.getVectorElementType();
1350 unsigned ElemWidth = ElemTy.getSizeInBits();
1351
1352 // If the source vector is a vector pair, get the single vector containing
1353 // the subvector of interest. The subvector will never overlap two single
1354 // vectors.
1355 if (isHvxPairTy(Ty: VecTy)) {
1356 unsigned SubIdx = Hexagon::vsub_lo;
1357 if (Idx * ElemWidth >= 8 * HwLen) {
1358 SubIdx = Hexagon::vsub_hi;
1359 Idx -= VecTy.getVectorNumElements() / 2;
1360 }
1361
1362 VecTy = typeSplit(VecTy).first;
1363 VecV = DAG.getTargetExtractSubreg(SRIdx: SubIdx, DL: dl, VT: VecTy, Operand: VecV);
1364 if (VecTy == ResTy)
1365 return VecV;
1366 }
1367
1368 // The only meaningful subvectors of a single HVX vector are those that
1369 // fit in a scalar register.
1370 assert(ResTy.getSizeInBits() == 32 || ResTy.getSizeInBits() == 64);
1371
1372 MVT WordTy = tyVector(Ty: VecTy, ElemTy: MVT::i32);
1373 SDValue WordVec = DAG.getBitcast(VT: WordTy, V: VecV);
1374 unsigned WordIdx = (Idx*ElemWidth) / 32;
1375
1376 SDValue W0Idx = DAG.getConstant(Val: WordIdx, DL: dl, VT: MVT::i32);
1377 SDValue W0 = extractHvxElementReg(VecV: WordVec, IdxV: W0Idx, dl, ResTy: MVT::i32, DAG);
1378 if (ResTy.getSizeInBits() == 32)
1379 return DAG.getBitcast(VT: ResTy, V: W0);
1380
1381 SDValue W1Idx = DAG.getConstant(Val: WordIdx+1, DL: dl, VT: MVT::i32);
1382 SDValue W1 = extractHvxElementReg(VecV: WordVec, IdxV: W1Idx, dl, ResTy: MVT::i32, DAG);
1383 SDValue WW = getCombine(Hi: W1, Lo: W0, dl, ResTy: MVT::i64, DAG);
1384 return DAG.getBitcast(VT: ResTy, V: WW);
1385}
1386
1387SDValue
1388HexagonTargetLowering::extractHvxSubvectorPred(SDValue VecV, SDValue IdxV,
1389 const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const {
1390 MVT VecTy = ty(Op: VecV);
1391 unsigned HwLen = Subtarget.getVectorLength();
1392 MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
1393 SDValue ByteVec = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: VecV);
1394 // IdxV is required to be a constant.
1395 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1396
1397 unsigned ResLen = ResTy.getVectorNumElements();
1398 unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1399 unsigned Offset = Idx * BitBytes;
1400 SDValue Undef = DAG.getUNDEF(VT: ByteTy);
1401 SmallVector<int,128> Mask;
1402
1403 if (Subtarget.isHVXVectorType(VecTy: ResTy, IncludeBool: true)) {
1404 // Converting between two vector predicates. Since the result is shorter
1405 // than the source, it will correspond to a vector predicate with the
1406 // relevant bits replicated. The replication count is the ratio of the
1407 // source and target vector lengths.
1408 unsigned Rep = VecTy.getVectorNumElements() / ResLen;
1409 assert(isPowerOf2_32(Rep) && HwLen % Rep == 0);
1410 for (unsigned i = 0; i != HwLen/Rep; ++i) {
1411 for (unsigned j = 0; j != Rep; ++j)
1412 Mask.push_back(Elt: i + Offset);
1413 }
1414 SDValue ShuffV = DAG.getVectorShuffle(VT: ByteTy, dl, N1: ByteVec, N2: Undef, Mask);
1415 return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: ResTy, Operand: ShuffV);
1416 }
1417
1418 // Converting between a vector predicate and a scalar predicate. In the
1419 // vector predicate, a group of BitBytes bits will correspond to a single
1420 // i1 element of the source vector type. Those bits will all have the same
1421 // value. The same will be true for ByteVec, where each byte corresponds
1422 // to a bit in the vector predicate.
1423 // The algorithm is to traverse the ByteVec, going over the i1 values from
1424 // the source vector, and generate the corresponding representation in an
1425 // 8-byte vector. To avoid repeated extracts from ByteVec, shuffle the
1426 // elements so that the interesting 8 bytes will be in the low end of the
1427 // vector.
1428 unsigned Rep = 8 / ResLen;
1429 // Make sure the output fill the entire vector register, so repeat the
1430 // 8-byte groups as many times as necessary.
1431 for (unsigned r = 0; r != HwLen/ResLen; ++r) {
1432 // This will generate the indexes of the 8 interesting bytes.
1433 for (unsigned i = 0; i != ResLen; ++i) {
1434 for (unsigned j = 0; j != Rep; ++j)
1435 Mask.push_back(Elt: Offset + i*BitBytes);
1436 }
1437 }
1438
1439 SDValue Zero = getZero(dl, Ty: MVT::i32, DAG);
1440 SDValue ShuffV = DAG.getVectorShuffle(VT: ByteTy, dl, N1: ByteVec, N2: Undef, Mask);
1441 // Combine the two low words from ShuffV into a v8i8, and byte-compare
1442 // them against 0.
1443 SDValue W0 = DAG.getNode(Opcode: HexagonISD::VEXTRACTW, DL: dl, VT: MVT::i32, Ops: {ShuffV, Zero});
1444 SDValue W1 = DAG.getNode(Opcode: HexagonISD::VEXTRACTW, DL: dl, VT: MVT::i32,
1445 Ops: {ShuffV, DAG.getConstant(Val: 4, DL: dl, VT: MVT::i32)});
1446 SDValue Vec64 = getCombine(Hi: W1, Lo: W0, dl, ResTy: MVT::v8i8, DAG);
1447 return getInstr(MachineOpc: Hexagon::A4_vcmpbgtui, dl, Ty: ResTy,
1448 Ops: {Vec64, DAG.getTargetConstant(Val: 0, DL: dl, VT: MVT::i32)}, DAG);
1449}
1450
1451SDValue
1452HexagonTargetLowering::insertHvxSubvectorReg(SDValue VecV, SDValue SubV,
1453 SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1454 MVT VecTy = ty(Op: VecV);
1455 MVT SubTy = ty(Op: SubV);
1456 unsigned HwLen = Subtarget.getVectorLength();
1457 MVT ElemTy = VecTy.getVectorElementType();
1458 unsigned ElemWidth = ElemTy.getSizeInBits();
1459
1460 bool IsPair = isHvxPairTy(Ty: VecTy);
1461 MVT SingleTy = MVT::getVectorVT(VT: ElemTy, NumElements: (8*HwLen)/ElemWidth);
1462 // The two single vectors that VecV consists of, if it's a pair.
1463 SDValue V0, V1;
1464 SDValue SingleV = VecV;
1465 SDValue PickHi;
1466
1467 if (IsPair) {
1468 V0 = LoHalf(V: VecV, DAG);
1469 V1 = HiHalf(V: VecV, DAG);
1470
1471 SDValue HalfV = DAG.getConstant(Val: SingleTy.getVectorNumElements(),
1472 DL: dl, VT: MVT::i32);
1473 PickHi = DAG.getSetCC(DL: dl, VT: MVT::i1, LHS: IdxV, RHS: HalfV, Cond: ISD::SETUGT);
1474 if (isHvxSingleTy(Ty: SubTy)) {
1475 if (const auto *CN = dyn_cast<const ConstantSDNode>(Val: IdxV.getNode())) {
1476 unsigned Idx = CN->getZExtValue();
1477 assert(Idx == 0 || Idx == VecTy.getVectorNumElements()/2);
1478 unsigned SubIdx = (Idx == 0) ? Hexagon::vsub_lo : Hexagon::vsub_hi;
1479 return DAG.getTargetInsertSubreg(SRIdx: SubIdx, DL: dl, VT: VecTy, Operand: VecV, Subreg: SubV);
1480 }
1481 // If IdxV is not a constant, generate the two variants: with the
1482 // SubV as the high and as the low subregister, and select the right
1483 // pair based on the IdxV.
1484 SDValue InLo = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: VecTy, Ops: {SubV, V1});
1485 SDValue InHi = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: VecTy, Ops: {V0, SubV});
1486 return DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: VecTy, N1: PickHi, N2: InHi, N3: InLo);
1487 }
1488 // The subvector being inserted must be entirely contained in one of
1489 // the vectors V0 or V1. Set SingleV to the correct one, and update
1490 // IdxV to be the index relative to the beginning of that vector.
1491 SDValue S = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: MVT::i32, N1: IdxV, N2: HalfV);
1492 IdxV = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: MVT::i32, N1: PickHi, N2: S, N3: IdxV);
1493 SingleV = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: SingleTy, N1: PickHi, N2: V1, N3: V0);
1494 }
1495
1496 // The only meaningful subvectors of a single HVX vector are those that
1497 // fit in a scalar register.
1498 assert(SubTy.getSizeInBits() == 32 || SubTy.getSizeInBits() == 64);
1499 // Convert IdxV to be index in bytes.
1500 auto *IdxN = dyn_cast<ConstantSDNode>(Val: IdxV.getNode());
1501 if (!IdxN || !IdxN->isZero()) {
1502 IdxV = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MVT::i32, N1: IdxV,
1503 N2: DAG.getConstant(Val: ElemWidth/8, DL: dl, VT: MVT::i32));
1504 SingleV = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: SingleTy, N1: SingleV, N2: IdxV);
1505 }
1506 // When inserting a single word, the rotation back to the original position
1507 // would be by HwLen-Idx, but if two words are inserted, it will need to be
1508 // by (HwLen-4)-Idx.
1509 unsigned RolBase = HwLen;
1510 if (SubTy.getSizeInBits() == 32) {
1511 SDValue V = DAG.getBitcast(VT: MVT::i32, V: SubV);
1512 SingleV = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: SingleTy, N1: SingleV, N2: V);
1513 } else {
1514 SDValue V = DAG.getBitcast(VT: MVT::i64, V: SubV);
1515 SDValue R0 = LoHalf(V, DAG);
1516 SDValue R1 = HiHalf(V, DAG);
1517 SingleV = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: SingleTy, N1: SingleV, N2: R0);
1518 SingleV = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: SingleTy, N1: SingleV,
1519 N2: DAG.getConstant(Val: 4, DL: dl, VT: MVT::i32));
1520 SingleV = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: SingleTy, N1: SingleV, N2: R1);
1521 RolBase = HwLen-4;
1522 }
1523 // If the vector wasn't ror'ed, don't ror it back.
1524 if (RolBase != 4 || !IdxN || !IdxN->isZero()) {
1525 SDValue RolV = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: MVT::i32,
1526 N1: DAG.getConstant(Val: RolBase, DL: dl, VT: MVT::i32), N2: IdxV);
1527 SingleV = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: SingleTy, N1: SingleV, N2: RolV);
1528 }
1529
1530 if (IsPair) {
1531 SDValue InLo = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: VecTy, Ops: {SingleV, V1});
1532 SDValue InHi = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: VecTy, Ops: {V0, SingleV});
1533 return DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: VecTy, N1: PickHi, N2: InHi, N3: InLo);
1534 }
1535 return SingleV;
1536}
1537
1538SDValue
1539HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV, SDValue SubV,
1540 SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const {
1541 MVT VecTy = ty(Op: VecV);
1542 MVT SubTy = ty(Op: SubV);
1543 assert(Subtarget.isHVXVectorType(VecTy, true));
1544 // VecV is an HVX vector predicate. SubV may be either an HVX vector
1545 // predicate as well, or it can be a scalar predicate.
1546
1547 unsigned VecLen = VecTy.getVectorNumElements();
1548 unsigned HwLen = Subtarget.getVectorLength();
1549 assert(HwLen % VecLen == 0 && "Unexpected vector type");
1550
1551 unsigned Scale = VecLen / SubTy.getVectorNumElements();
1552 unsigned BitBytes = HwLen / VecLen;
1553 unsigned BlockLen = HwLen / Scale;
1554
1555 MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
1556 SDValue ByteVec = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: VecV);
1557 SDValue ByteSub = createHvxPrefixPred(PredV: SubV, dl, BitBytes, ZeroFill: false, DAG);
1558 SDValue ByteIdx;
1559
1560 auto *IdxN = dyn_cast<ConstantSDNode>(Val: IdxV.getNode());
1561 if (!IdxN || !IdxN->isZero()) {
1562 ByteIdx = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MVT::i32, N1: IdxV,
1563 N2: DAG.getConstant(Val: BitBytes, DL: dl, VT: MVT::i32));
1564 ByteVec = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: ByteTy, N1: ByteVec, N2: ByteIdx);
1565 }
1566
1567 // ByteVec is the target vector VecV rotated in such a way that the
1568 // subvector should be inserted at index 0. Generate a predicate mask
1569 // and use vmux to do the insertion.
1570 assert(BlockLen < HwLen && "vsetq(v1) prerequisite");
1571 MVT BoolTy = MVT::getVectorVT(VT: MVT::i1, NumElements: HwLen);
1572 SDValue Q = getInstr(MachineOpc: Hexagon::V6_pred_scalar2, dl, Ty: BoolTy,
1573 Ops: {DAG.getConstant(Val: BlockLen, DL: dl, VT: MVT::i32)}, DAG);
1574 ByteVec = getInstr(MachineOpc: Hexagon::V6_vmux, dl, Ty: ByteTy, Ops: {Q, ByteSub, ByteVec}, DAG);
1575 // Rotate ByteVec back, and convert to a vector predicate.
1576 if (!IdxN || !IdxN->isZero()) {
1577 SDValue HwLenV = DAG.getConstant(Val: HwLen, DL: dl, VT: MVT::i32);
1578 SDValue ByteXdi = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: MVT::i32, N1: HwLenV, N2: ByteIdx);
1579 ByteVec = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: ByteTy, N1: ByteVec, N2: ByteXdi);
1580 }
1581 return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: VecTy, Operand: ByteVec);
1582}
1583
1584SDValue
1585HexagonTargetLowering::extendHvxVectorPred(SDValue VecV, const SDLoc &dl,
1586 MVT ResTy, bool ZeroExt, SelectionDAG &DAG) const {
1587 // Sign- and any-extending of a vector predicate to a vector register is
1588 // equivalent to Q2V. For zero-extensions, generate a vmux between 0 and
1589 // a vector of 1s (where the 1s are of type matching the vector type).
1590 assert(Subtarget.isHVXVectorType(ResTy));
1591 if (!ZeroExt)
1592 return DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ResTy, Operand: VecV);
1593
1594 assert(ty(VecV).getVectorNumElements() == ResTy.getVectorNumElements());
1595 SDValue True = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: ResTy,
1596 Operand: DAG.getConstant(Val: 1, DL: dl, VT: MVT::i32));
1597 SDValue False = getZero(dl, Ty: ResTy, DAG);
1598 return DAG.getSelect(DL: dl, VT: ResTy, Cond: VecV, LHS: True, RHS: False);
1599}
1600
1601SDValue
1602HexagonTargetLowering::compressHvxPred(SDValue VecQ, const SDLoc &dl,
1603 MVT ResTy, SelectionDAG &DAG) const {
1604 // Given a predicate register VecQ, transfer bits VecQ[0..HwLen-1]
1605 // (i.e. the entire predicate register) to bits [0..HwLen-1] of a
1606 // vector register. The remaining bits of the vector register are
1607 // unspecified.
1608
1609 MachineFunction &MF = DAG.getMachineFunction();
1610 unsigned HwLen = Subtarget.getVectorLength();
1611 MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
1612 MVT PredTy = ty(Op: VecQ);
1613 unsigned PredLen = PredTy.getVectorNumElements();
1614 assert(HwLen % PredLen == 0);
1615 MVT VecTy = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: 8*HwLen/PredLen), NumElements: PredLen);
1616
1617 Type *Int8Ty = Type::getInt8Ty(C&: *DAG.getContext());
1618 SmallVector<Constant*, 128> Tmp;
1619 // Create an array of bytes (hex): 01,02,04,08,10,20,40,80, 01,02,04,08,...
1620 // These are bytes with the LSB rotated left with respect to their index.
1621 for (unsigned i = 0; i != HwLen/8; ++i) {
1622 for (unsigned j = 0; j != 8; ++j)
1623 Tmp.push_back(Elt: ConstantInt::get(Ty: Int8Ty, V: 1ull << j));
1624 }
1625 Constant *CV = ConstantVector::get(V: Tmp);
1626 Align Alignment(HwLen);
1627 SDValue CP = LowerConstantPool(
1628 Op: DAG.getConstantPool(C: CV, VT: getPointerTy(DL: DAG.getDataLayout()), Align: Alignment),
1629 DAG);
1630 SDValue Bytes =
1631 DAG.getLoad(VT: ByteTy, dl, Chain: DAG.getEntryNode(), Ptr: CP,
1632 PtrInfo: MachinePointerInfo::getConstantPool(MF), Alignment);
1633
1634 // Select the bytes that correspond to true bits in the vector predicate.
1635 SDValue Sel = DAG.getSelect(DL: dl, VT: VecTy, Cond: VecQ, LHS: DAG.getBitcast(VT: VecTy, V: Bytes),
1636 RHS: getZero(dl, Ty: VecTy, DAG));
1637 // Calculate the OR of all bytes in each group of 8. That will compress
1638 // all the individual bits into a single byte.
1639 // First, OR groups of 4, via vrmpy with 0x01010101.
1640 SDValue All1 =
1641 DAG.getSplatBuildVector(VT: MVT::v4i8, DL: dl, Op: DAG.getConstant(Val: 1, DL: dl, VT: MVT::i32));
1642 SDValue Vrmpy = getInstr(MachineOpc: Hexagon::V6_vrmpyub, dl, Ty: ByteTy, Ops: {Sel, All1}, DAG);
1643 // Then rotate the accumulated vector by 4 bytes, and do the final OR.
1644 SDValue Rot = getInstr(MachineOpc: Hexagon::V6_valignbi, dl, Ty: ByteTy,
1645 Ops: {Vrmpy, Vrmpy, DAG.getTargetConstant(Val: 4, DL: dl, VT: MVT::i32)}, DAG);
1646 SDValue Vor = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: ByteTy, Ops: {Vrmpy, Rot});
1647
1648 // Pick every 8th byte and coalesce them at the beginning of the output.
1649 // For symmetry, coalesce every 1+8th byte after that, then every 2+8th
1650 // byte and so on.
1651 SmallVector<int,128> Mask;
1652 for (unsigned i = 0; i != HwLen; ++i)
1653 Mask.push_back(Elt: (8*i) % HwLen + i/(HwLen/8));
1654 SDValue Collect =
1655 DAG.getVectorShuffle(VT: ByteTy, dl, N1: Vor, N2: DAG.getUNDEF(VT: ByteTy), Mask);
1656 return DAG.getBitcast(VT: ResTy, V: Collect);
1657}
1658
1659SDValue
1660HexagonTargetLowering::resizeToWidth(SDValue VecV, MVT ResTy, bool Signed,
1661 const SDLoc &dl, SelectionDAG &DAG) const {
1662 // Take a vector and resize the element type to match the given type.
1663 MVT InpTy = ty(Op: VecV);
1664 if (InpTy == ResTy)
1665 return VecV;
1666
1667 unsigned InpWidth = InpTy.getSizeInBits();
1668 unsigned ResWidth = ResTy.getSizeInBits();
1669
1670 if (InpTy.isFloatingPoint()) {
1671 return InpWidth < ResWidth
1672 ? DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: ResTy, Operand: VecV)
1673 : DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT: ResTy, N1: VecV,
1674 N2: DAG.getTargetConstant(Val: 0, DL: dl, VT: MVT::i32));
1675 }
1676
1677 assert(InpTy.isInteger());
1678
1679 if (InpWidth < ResWidth) {
1680 unsigned ExtOpc = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
1681 return DAG.getNode(Opcode: ExtOpc, DL: dl, VT: ResTy, Operand: VecV);
1682 } else {
1683 unsigned NarOpc = Signed ? HexagonISD::SSAT : HexagonISD::USAT;
1684 return DAG.getNode(Opcode: NarOpc, DL: dl, VT: ResTy, N1: VecV, N2: DAG.getValueType(ResTy));
1685 }
1686}
1687
1688SDValue
1689HexagonTargetLowering::extractSubvector(SDValue Vec, MVT SubTy, unsigned SubIdx,
1690 SelectionDAG &DAG) const {
1691 assert(ty(Vec).getSizeInBits() % SubTy.getSizeInBits() == 0);
1692
1693 const SDLoc &dl(Vec);
1694 unsigned ElemIdx = SubIdx * SubTy.getVectorNumElements();
1695 return DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL: dl, VT: SubTy,
1696 Ops: {Vec, DAG.getConstant(Val: ElemIdx, DL: dl, VT: MVT::i32)});
1697}
1698
1699SDValue
1700HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG)
1701 const {
1702 const SDLoc &dl(Op);
1703 MVT VecTy = ty(Op);
1704
1705 unsigned Size = Op.getNumOperands();
1706 SmallVector<SDValue,128> Ops;
1707 for (unsigned i = 0; i != Size; ++i)
1708 Ops.push_back(Elt: Op.getOperand(i));
1709
1710 if (VecTy.getVectorElementType() == MVT::i1)
1711 return buildHvxVectorPred(Values: Ops, dl, VecTy, DAG);
1712
1713 // In case of MVT::f16 BUILD_VECTOR, since MVT::f16 is
1714 // not a legal type, just bitcast the node to use i16
1715 // types and bitcast the result back to f16
1716 if (VecTy.getVectorElementType() == MVT::f16 ||
1717 VecTy.getVectorElementType() == MVT::bf16) {
1718 SmallVector<SDValue, 64> NewOps;
1719 for (unsigned i = 0; i != Size; i++)
1720 NewOps.push_back(Elt: DAG.getBitcast(VT: MVT::i16, V: Ops[i]));
1721
1722 SDValue T0 =
1723 DAG.getNode(Opcode: ISD::BUILD_VECTOR, DL: dl, VT: tyVector(Ty: VecTy, ElemTy: MVT::i16), Ops: NewOps);
1724 return DAG.getBitcast(VT: tyVector(Ty: VecTy, ElemTy: VecTy.getVectorElementType()), V: T0);
1725 }
1726
1727 // First, split the BUILD_VECTOR for vector pairs. We could generate
1728 // some pairs directly (via splat), but splats should be generated
1729 // by the combiner prior to getting here.
1730 if (VecTy.getSizeInBits() == 16 * Subtarget.getVectorLength()) {
1731 ArrayRef<SDValue> A(Ops);
1732 MVT SingleTy = typeSplit(VecTy).first;
1733 SDValue V0 = buildHvxVectorReg(Values: A.take_front(N: Size / 2), dl, VecTy: SingleTy, DAG);
1734 SDValue V1 = buildHvxVectorReg(Values: A.drop_front(N: Size / 2), dl, VecTy: SingleTy, DAG);
1735 return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: VecTy, N1: V0, N2: V1);
1736 }
1737
1738 return buildHvxVectorReg(Values: Ops, dl, VecTy, DAG);
1739}
1740
1741SDValue
1742HexagonTargetLowering::LowerHvxSplatVector(SDValue Op, SelectionDAG &DAG)
1743 const {
1744 const SDLoc &dl(Op);
1745 MVT VecTy = ty(Op);
1746 MVT ArgTy = ty(Op: Op.getOperand(i: 0));
1747
1748 if (ArgTy == MVT::f16 || ArgTy == MVT::bf16) {
1749 MVT SplatTy = MVT::getVectorVT(VT: MVT::i16, NumElements: VecTy.getVectorNumElements());
1750 SDValue ToInt16 = DAG.getBitcast(VT: MVT::i16, V: Op.getOperand(i: 0));
1751 SDValue ToInt32 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: MVT::i32, Operand: ToInt16);
1752 SDValue Splat = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: SplatTy, Operand: ToInt32);
1753 return DAG.getBitcast(VT: VecTy, V: Splat);
1754 }
1755
1756 return SDValue();
1757}
1758
1759SDValue
1760HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG)
1761 const {
1762 // Vector concatenation of two integer (non-bool) vectors does not need
1763 // special lowering. Custom-lower concats of bool vectors and expand
1764 // concats of more than 2 vectors.
1765 MVT VecTy = ty(Op);
1766 const SDLoc &dl(Op);
1767 unsigned NumOp = Op.getNumOperands();
1768 if (VecTy.getVectorElementType() != MVT::i1) {
1769 if (NumOp == 2)
1770 return Op;
1771 // Expand the other cases into a build-vector.
1772 SmallVector<SDValue,8> Elems;
1773 for (SDValue V : Op.getNode()->ops())
1774 DAG.ExtractVectorElements(Op: V, Args&: Elems);
1775 // A vector of i16 will be broken up into a build_vector of i16's.
1776 // This is a problem, since at the time of operation legalization,
1777 // all operations are expected to be type-legalized, and i16 is not
1778 // a legal type. If any of the extracted elements is not of a valid
1779 // type, sign-extend it to a valid one.
1780 for (SDValue &V : Elems) {
1781 MVT Ty = ty(Op: V);
1782 if (!isTypeLegal(VT: Ty)) {
1783 MVT NTy = typeLegalize(Ty, DAG);
1784 if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1785 V = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL: dl, VT: NTy,
1786 N1: DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, VT: NTy,
1787 N1: V.getOperand(i: 0), N2: V.getOperand(i: 1)),
1788 N2: DAG.getValueType(Ty));
1789 continue;
1790 }
1791 // A few less complicated cases.
1792 switch (V.getOpcode()) {
1793 case ISD::Constant:
1794 V = DAG.getSExtOrTrunc(Op: V, DL: dl, VT: NTy);
1795 break;
1796 case ISD::UNDEF:
1797 V = DAG.getUNDEF(VT: NTy);
1798 break;
1799 case ISD::TRUNCATE:
1800 V = V.getOperand(i: 0);
1801 break;
1802 default:
1803 llvm_unreachable("Unexpected vector element");
1804 }
1805 }
1806 }
1807 return DAG.getBuildVector(VT: VecTy, DL: dl, Ops: Elems);
1808 }
1809
1810 assert(VecTy.getVectorElementType() == MVT::i1);
1811 unsigned HwLen = Subtarget.getVectorLength();
1812 assert(isPowerOf2_32(NumOp) && HwLen % NumOp == 0);
1813
1814 SDValue Op0 = Op.getOperand(i: 0);
1815
1816 // If the operands are HVX types (i.e. not scalar predicates), then
1817 // defer the concatenation, and create QCAT instead.
1818 if (Subtarget.isHVXVectorType(VecTy: ty(Op: Op0), IncludeBool: true)) {
1819 if (NumOp == 2)
1820 return DAG.getNode(Opcode: HexagonISD::QCAT, DL: dl, VT: VecTy, N1: Op0, N2: Op.getOperand(i: 1));
1821
1822 ArrayRef<SDUse> U(Op.getNode()->ops());
1823 SmallVector<SDValue, 4> SV(U);
1824 ArrayRef<SDValue> Ops(SV);
1825
1826 MVT HalfTy = typeSplit(VecTy).first;
1827 SDValue V0 = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: HalfTy,
1828 Ops: Ops.take_front(N: NumOp/2));
1829 SDValue V1 = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: HalfTy,
1830 Ops: Ops.take_back(N: NumOp/2));
1831 return DAG.getNode(Opcode: HexagonISD::QCAT, DL: dl, VT: VecTy, N1: V0, N2: V1);
1832 }
1833
1834 // Count how many bytes (in a vector register) each bit in VecTy
1835 // corresponds to.
1836 unsigned BitBytes = HwLen / VecTy.getVectorNumElements();
1837
1838 SmallVector<SDValue,8> Prefixes;
1839 for (SDValue V : Op.getNode()->op_values()) {
1840 SDValue P = createHvxPrefixPred(PredV: V, dl, BitBytes, ZeroFill: true, DAG);
1841 Prefixes.push_back(Elt: P);
1842 }
1843
1844 unsigned InpLen = ty(Op: Op.getOperand(i: 0)).getVectorNumElements();
1845 MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
1846 SDValue S = DAG.getConstant(Val: HwLen - InpLen*BitBytes, DL: dl, VT: MVT::i32);
1847 SDValue Res = getZero(dl, Ty: ByteTy, DAG);
1848 for (unsigned i = 0, e = Prefixes.size(); i != e; ++i) {
1849 Res = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: ByteTy, N1: Res, N2: S);
1850 Res = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: ByteTy, N1: Res, N2: Prefixes[e-i-1]);
1851 }
1852 return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: VecTy, Operand: Res);
1853}
1854
1855SDValue
1856HexagonTargetLowering::LowerHvxExtractElement(SDValue Op, SelectionDAG &DAG)
1857 const {
1858 // Change the type of the extracted element to i32.
1859 SDValue VecV = Op.getOperand(i: 0);
1860 MVT ElemTy = ty(Op: VecV).getVectorElementType();
1861 const SDLoc &dl(Op);
1862 SDValue IdxV = Op.getOperand(i: 1);
1863 if (ElemTy == MVT::i1)
1864 return extractHvxElementPred(VecV, IdxV, dl, ResTy: ty(Op), DAG);
1865
1866 return extractHvxElementReg(VecV, IdxV, dl, ResTy: ty(Op), DAG);
1867}
1868
1869SDValue
1870HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG)
1871 const {
1872 const SDLoc &dl(Op);
1873 MVT VecTy = ty(Op);
1874 SDValue VecV = Op.getOperand(i: 0);
1875 SDValue ValV = Op.getOperand(i: 1);
1876 SDValue IdxV = Op.getOperand(i: 2);
1877 MVT ElemTy = ty(Op: VecV).getVectorElementType();
1878 if (ElemTy == MVT::i1)
1879 return insertHvxElementPred(VecV, IdxV, ValV, dl, DAG);
1880
1881 if (ElemTy == MVT::f16 || ElemTy == MVT::bf16) {
1882 SDValue T0 = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: dl,
1883 VT: tyVector(Ty: VecTy, ElemTy: MVT::i16),
1884 N1: DAG.getBitcast(VT: tyVector(Ty: VecTy, ElemTy: MVT::i16), V: VecV),
1885 N2: DAG.getBitcast(VT: MVT::i16, V: ValV), N3: IdxV);
1886 return DAG.getBitcast(VT: tyVector(Ty: VecTy, ElemTy), V: T0);
1887 }
1888
1889 return insertHvxElementReg(VecV, IdxV, ValV, dl, DAG);
1890}
1891
1892SDValue
1893HexagonTargetLowering::LowerHvxExtractSubvector(SDValue Op, SelectionDAG &DAG)
1894 const {
1895 SDValue SrcV = Op.getOperand(i: 0);
1896 MVT SrcTy = ty(Op: SrcV);
1897 MVT DstTy = ty(Op);
1898 SDValue IdxV = Op.getOperand(i: 1);
1899 unsigned Idx = IdxV.getNode()->getAsZExtVal();
1900 assert(Idx % DstTy.getVectorNumElements() == 0);
1901 (void)Idx;
1902 const SDLoc &dl(Op);
1903
1904 MVT ElemTy = SrcTy.getVectorElementType();
1905 if (ElemTy == MVT::i1)
1906 return extractHvxSubvectorPred(VecV: SrcV, IdxV, dl, ResTy: DstTy, DAG);
1907
1908 return extractHvxSubvectorReg(OrigOp: Op, VecV: SrcV, IdxV, dl, ResTy: DstTy, DAG);
1909}
1910
1911SDValue
1912HexagonTargetLowering::LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG)
1913 const {
1914 // Idx does not need to be a constant.
1915 SDValue VecV = Op.getOperand(i: 0);
1916 SDValue ValV = Op.getOperand(i: 1);
1917 SDValue IdxV = Op.getOperand(i: 2);
1918
1919 const SDLoc &dl(Op);
1920 MVT VecTy = ty(Op: VecV);
1921 MVT ElemTy = VecTy.getVectorElementType();
1922 if (ElemTy == MVT::i1)
1923 return insertHvxSubvectorPred(VecV, SubV: ValV, IdxV, dl, DAG);
1924
1925 return insertHvxSubvectorReg(VecV, SubV: ValV, IdxV, dl, DAG);
1926}
1927
1928SDValue
1929HexagonTargetLowering::LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const {
1930 // Lower any-extends of boolean vectors to sign-extends, since they
1931 // translate directly to Q2V. Zero-extending could also be done equally
1932 // fast, but Q2V is used/recognized in more places.
1933 // For all other vectors, use zero-extend.
1934 MVT ResTy = ty(Op);
1935 SDValue InpV = Op.getOperand(i: 0);
1936 MVT ElemTy = ty(Op: InpV).getVectorElementType();
1937 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(VecTy: ResTy))
1938 return LowerHvxSignExt(Op, DAG);
1939 return DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: SDLoc(Op), VT: ResTy, Operand: InpV);
1940}
1941
1942SDValue
1943HexagonTargetLowering::LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const {
1944 MVT ResTy = ty(Op);
1945 SDValue InpV = Op.getOperand(i: 0);
1946 MVT ElemTy = ty(Op: InpV).getVectorElementType();
1947 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(VecTy: ResTy))
1948 return extendHvxVectorPred(VecV: InpV, dl: SDLoc(Op), ResTy: ty(Op), ZeroExt: false, DAG);
1949 return Op;
1950}
1951
1952SDValue
1953HexagonTargetLowering::LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const {
1954 MVT ResTy = ty(Op);
1955 SDValue InpV = Op.getOperand(i: 0);
1956 MVT ElemTy = ty(Op: InpV).getVectorElementType();
1957 if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(VecTy: ResTy))
1958 return extendHvxVectorPred(VecV: InpV, dl: SDLoc(Op), ResTy: ty(Op), ZeroExt: true, DAG);
1959 return Op;
1960}
1961
1962SDValue
1963HexagonTargetLowering::LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const {
1964 // Lower vector CTTZ into a computation using CTLZ (Hacker's Delight):
1965 // cttz(x) = bitwidth(x) - ctlz(~x & (x-1))
1966 const SDLoc &dl(Op);
1967 MVT ResTy = ty(Op);
1968 SDValue InpV = Op.getOperand(i: 0);
1969 assert(ResTy == ty(InpV));
1970
1971 // Calculate the vectors of 1 and bitwidth(x).
1972 MVT ElemTy = ty(Op: InpV).getVectorElementType();
1973 unsigned ElemWidth = ElemTy.getSizeInBits();
1974
1975 SDValue Vec1 = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: ResTy,
1976 Operand: DAG.getConstant(Val: 1, DL: dl, VT: MVT::i32));
1977 SDValue VecW = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: ResTy,
1978 Operand: DAG.getConstant(Val: ElemWidth, DL: dl, VT: MVT::i32));
1979 SDValue VecN1 = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: ResTy,
1980 Operand: DAG.getAllOnesConstant(DL: dl, VT: MVT::i32));
1981
1982 // Do not use DAG.getNOT, because that would create BUILD_VECTOR with
1983 // a BITCAST. Here we can skip the BITCAST (so we don't have to handle
1984 // it separately in custom combine or selection).
1985 SDValue A = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ResTy,
1986 Ops: {DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: ResTy, Ops: {InpV, VecN1}),
1987 DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: ResTy, Ops: {InpV, Vec1})});
1988 return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: ResTy,
1989 Ops: {VecW, DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT: ResTy, Operand: A)});
1990}
1991
1992SDValue
1993HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const {
1994 const SDLoc &dl(Op);
1995 MVT ResTy = ty(Op);
1996 assert(ResTy.getVectorElementType() == MVT::i32);
1997
1998 SDValue Vs = Op.getOperand(i: 0);
1999 SDValue Vt = Op.getOperand(i: 1);
2000
2001 SDVTList ResTys = DAG.getVTList(VT1: ResTy, VT2: ResTy);
2002 unsigned Opc = Op.getOpcode();
2003
2004 // On HVX v62+ producing the full product is cheap, so legalize MULH to LOHI.
2005 if (Opc == ISD::MULHU)
2006 return DAG.getNode(Opcode: HexagonISD::UMUL_LOHI, DL: dl, VTList: ResTys, Ops: {Vs, Vt}).getValue(R: 1);
2007 if (Opc == ISD::MULHS)
2008 return DAG.getNode(Opcode: HexagonISD::SMUL_LOHI, DL: dl, VTList: ResTys, Ops: {Vs, Vt}).getValue(R: 1);
2009
2010#ifndef NDEBUG
2011 Op.dump(&DAG);
2012#endif
2013 llvm_unreachable("Unexpected mulh operation");
2014}
2015
2016SDValue
2017HexagonTargetLowering::LowerHvxMulLoHi(SDValue Op, SelectionDAG &DAG) const {
2018 const SDLoc &dl(Op);
2019 unsigned Opc = Op.getOpcode();
2020 SDValue Vu = Op.getOperand(i: 0);
2021 SDValue Vv = Op.getOperand(i: 1);
2022
2023 // If the HI part is not used, convert it to a regular MUL.
2024 if (auto HiVal = Op.getValue(R: 1); HiVal.use_empty()) {
2025 // Need to preserve the types and the number of values.
2026 SDValue Hi = DAG.getUNDEF(VT: ty(Op: HiVal));
2027 SDValue Lo = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: ty(Op), Ops: {Vu, Vv});
2028 return DAG.getMergeValues(Ops: {Lo, Hi}, dl);
2029 }
2030
2031 bool SignedVu = Opc == HexagonISD::SMUL_LOHI;
2032 bool SignedVv = Opc == HexagonISD::SMUL_LOHI || Opc == HexagonISD::USMUL_LOHI;
2033
2034 // Legal on HVX v62+, but lower it here because patterns can't handle multi-
2035 // valued nodes.
2036 if (Subtarget.useHVXV62Ops())
2037 return emitHvxMulLoHiV62(A: Vu, SignedA: SignedVu, B: Vv, SignedB: SignedVv, dl, DAG);
2038
2039 if (Opc == HexagonISD::SMUL_LOHI) {
2040 // Direct MULHS expansion is cheaper than doing the whole SMUL_LOHI,
2041 // for other signedness LOHI is cheaper.
2042 if (auto LoVal = Op.getValue(R: 0); LoVal.use_empty()) {
2043 SDValue Hi = emitHvxMulHsV60(A: Vu, B: Vv, dl, DAG);
2044 SDValue Lo = DAG.getUNDEF(VT: ty(Op: LoVal));
2045 return DAG.getMergeValues(Ops: {Lo, Hi}, dl);
2046 }
2047 }
2048
2049 return emitHvxMulLoHiV60(A: Vu, SignedA: SignedVu, B: Vv, SignedB: SignedVv, dl, DAG);
2050}
2051
2052SDValue
2053HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const {
2054 SDValue Val = Op.getOperand(i: 0);
2055 MVT ResTy = ty(Op);
2056 MVT ValTy = ty(Op: Val);
2057 const SDLoc &dl(Op);
2058
2059 if (isHvxBoolTy(Ty: ValTy) && ResTy.isScalarInteger()) {
2060 unsigned HwLen = Subtarget.getVectorLength();
2061 MVT WordTy = MVT::getVectorVT(VT: MVT::i32, NumElements: HwLen/4);
2062 SDValue VQ = compressHvxPred(VecQ: Val, dl, ResTy: WordTy, DAG);
2063 unsigned BitWidth = ResTy.getSizeInBits();
2064
2065 if (BitWidth < 64) {
2066 SDValue W0 = extractHvxElementReg(VecV: VQ, IdxV: DAG.getConstant(Val: 0, DL: dl, VT: MVT::i32),
2067 dl, ResTy: MVT::i32, DAG);
2068 if (BitWidth == 32)
2069 return W0;
2070 assert(BitWidth < 32u);
2071 return DAG.getZExtOrTrunc(Op: W0, DL: dl, VT: ResTy);
2072 }
2073
2074 // The result is >= 64 bits. The only options are 64 or 128.
2075 assert(BitWidth == 64 || BitWidth == 128);
2076 SmallVector<SDValue,4> Words;
2077 for (unsigned i = 0; i != BitWidth/32; ++i) {
2078 SDValue W = extractHvxElementReg(
2079 VecV: VQ, IdxV: DAG.getConstant(Val: i, DL: dl, VT: MVT::i32), dl, ResTy: MVT::i32, DAG);
2080 Words.push_back(Elt: W);
2081 }
2082 SmallVector<SDValue,2> Combines;
2083 assert(Words.size() % 2 == 0);
2084 for (unsigned i = 0, e = Words.size(); i < e; i += 2) {
2085 SDValue C = getCombine(Hi: Words[i+1], Lo: Words[i], dl, ResTy: MVT::i64, DAG);
2086 Combines.push_back(Elt: C);
2087 }
2088
2089 if (BitWidth == 64)
2090 return Combines[0];
2091
2092 return DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT: ResTy, Ops: Combines);
2093 }
2094
2095 // Handle bitcast from i32, v2i16, and v4i8 to v32i1.
2096 // Splat the input into a 32-element i32 vector, then AND each element
2097 // with a unique bitmask to isolate individual bits.
2098 auto bitcastI32ToV32I1 = [&](SDValue Val32) {
2099 assert(Val32.getValueType().getSizeInBits() == 32 &&
2100 "Input must be 32 bits");
2101 MVT VecTy = MVT::getVectorVT(VT: MVT::i32, NumElements: 32);
2102 SDValue Splat = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: VecTy, Operand: Val32);
2103 SmallVector<SDValue, 32> Mask;
2104 for (unsigned i = 0; i < 32; ++i)
2105 Mask.push_back(Elt: DAG.getConstant(Val: 1ull << i, DL: dl, VT: MVT::i32));
2106
2107 SDValue MaskVec = DAG.getBuildVector(VT: VecTy, DL: dl, Ops: Mask);
2108 SDValue Anded = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: VecTy, N1: Splat, N2: MaskVec);
2109 return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: MVT::v32i1, Operand: Anded);
2110 };
2111 // === Case: v32i1 ===
2112 if (ResTy == MVT::v32i1 &&
2113 (ValTy == MVT::i32 || ValTy == MVT::v2i16 || ValTy == MVT::v4i8) &&
2114 Subtarget.useHVX128BOps()) {
2115 SDValue Val32 = Val;
2116 if (ValTy == MVT::v2i16 || ValTy == MVT::v4i8)
2117 Val32 = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i32, Operand: Val);
2118 return bitcastI32ToV32I1(Val32);
2119 }
2120 // === Case: v64i1 ===
2121 if (ResTy == MVT::v64i1 && ValTy == MVT::i64 && Subtarget.useHVX128BOps()) {
2122 // Split i64 into lo/hi 32-bit halves.
2123 SDValue Lo = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i32, Operand: Val);
2124 SDValue HiShifted = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MVT::i64, N1: Val,
2125 N2: DAG.getConstant(Val: 32, DL: dl, VT: MVT::i64));
2126 SDValue Hi = DAG.getNode(Opcode: ISD::TRUNCATE, DL: dl, VT: MVT::i32, Operand: HiShifted);
2127
2128 // Reuse the same 32-bit logic twice.
2129 SDValue LoRes = bitcastI32ToV32I1(Lo);
2130 SDValue HiRes = bitcastI32ToV32I1(Hi);
2131
2132 // Concatenate into a v64i1 predicate.
2133 return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: MVT::v64i1, N1: LoRes, N2: HiRes);
2134 }
2135
2136 if (isHvxBoolTy(Ty: ResTy) && ValTy.isScalarInteger()) {
2137 // Handle bitcast from i128 -> v128i1 and i64 -> v64i1.
2138 unsigned BitWidth = ValTy.getSizeInBits();
2139 unsigned HwLen = Subtarget.getVectorLength();
2140 assert(BitWidth == HwLen);
2141
2142 MVT ValAsVecTy = MVT::getVectorVT(VT: MVT::i8, NumElements: BitWidth / 8);
2143 SDValue ValAsVec = DAG.getBitcast(VT: ValAsVecTy, V: Val);
2144 // Splat each byte of Val 8 times.
2145 // Bytes = [(b0)x8, (b1)x8, ...., (b15)x8]
2146 // where b0, b1,..., b15 are least to most significant bytes of I.
2147 SmallVector<SDValue, 128> Bytes;
2148 // Tmp: 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80, 0x01,0x02,0x04,0x08,...
2149 // These are bytes with the LSB rotated left with respect to their index.
2150 SmallVector<SDValue, 128> Tmp;
2151 for (unsigned I = 0; I != HwLen / 8; ++I) {
2152 SDValue Idx = DAG.getConstant(Val: I, DL: dl, VT: MVT::i32);
2153 SDValue Byte =
2154 DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, VT: MVT::i8, N1: ValAsVec, N2: Idx);
2155 for (unsigned J = 0; J != 8; ++J) {
2156 Bytes.push_back(Elt: Byte);
2157 Tmp.push_back(Elt: DAG.getConstant(Val: 1ull << J, DL: dl, VT: MVT::i8));
2158 }
2159 }
2160
2161 MVT ConstantVecTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
2162 SDValue ConstantVec = DAG.getBuildVector(VT: ConstantVecTy, DL: dl, Ops: Tmp);
2163 SDValue I2V = buildHvxVectorReg(Values: Bytes, dl, VecTy: ConstantVecTy, DAG);
2164
2165 // Each Byte in the I2V will be set iff corresponding bit is set in Val.
2166 I2V = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ConstantVecTy, Ops: {I2V, ConstantVec});
2167 return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: ResTy, Operand: I2V);
2168 }
2169
2170 return Op;
2171}
2172
2173SDValue
2174HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const {
2175 // Sign- and zero-extends are legal.
2176 assert(Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG);
2177 return DAG.getNode(Opcode: ISD::ZERO_EXTEND_VECTOR_INREG, DL: SDLoc(Op), VT: ty(Op),
2178 Operand: Op.getOperand(i: 0));
2179}
2180
2181SDValue
2182HexagonTargetLowering::LowerHvxSelect(SDValue Op, SelectionDAG &DAG) const {
2183 MVT ResTy = ty(Op);
2184 if (ResTy.getVectorElementType() != MVT::i1)
2185 return Op;
2186
2187 const SDLoc &dl(Op);
2188 unsigned HwLen = Subtarget.getVectorLength();
2189 unsigned VecLen = ResTy.getVectorNumElements();
2190 assert(HwLen % VecLen == 0);
2191 unsigned ElemSize = HwLen / VecLen;
2192
2193 MVT VecTy = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: ElemSize * 8), NumElements: VecLen);
2194 SDValue S =
2195 DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: VecTy, N1: Op.getOperand(i: 0),
2196 N2: DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: VecTy, Operand: Op.getOperand(i: 1)),
2197 N3: DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: VecTy, Operand: Op.getOperand(i: 2)));
2198 return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: ResTy, Operand: S);
2199}
2200
2201SDValue
2202HexagonTargetLowering::LowerHvxShift(SDValue Op, SelectionDAG &DAG) const {
2203 if (SDValue S = getVectorShiftByInt(Op, DAG))
2204 return S;
2205 return Op;
2206}
2207
2208SDValue
2209HexagonTargetLowering::LowerHvxFunnelShift(SDValue Op,
2210 SelectionDAG &DAG) const {
2211 unsigned Opc = Op.getOpcode();
2212 assert(Opc == ISD::FSHL || Opc == ISD::FSHR);
2213
2214 // Make sure the shift amount is within the range of the bitwidth
2215 // of the element type.
2216 SDValue A = Op.getOperand(i: 0);
2217 SDValue B = Op.getOperand(i: 1);
2218 SDValue S = Op.getOperand(i: 2);
2219
2220 MVT InpTy = ty(Op: A);
2221 MVT ElemTy = InpTy.getVectorElementType();
2222
2223 const SDLoc &dl(Op);
2224 unsigned ElemWidth = ElemTy.getSizeInBits();
2225 bool IsLeft = Opc == ISD::FSHL;
2226
2227 // The expansion into regular shifts produces worse code for i8 and for
2228 // right shift of i32 on v65+.
2229 bool UseShifts = ElemTy != MVT::i8;
2230 if (Subtarget.useHVXV65Ops() && ElemTy == MVT::i32)
2231 UseShifts = false;
2232
2233 if (SDValue SplatV = getSplatValue(Op: S, DAG); SplatV && UseShifts) {
2234 // If this is a funnel shift by a scalar, lower it into regular shifts.
2235 SDValue Mask = DAG.getConstant(Val: ElemWidth - 1, DL: dl, VT: MVT::i32);
2236 SDValue ModS =
2237 DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32,
2238 Ops: {DAG.getZExtOrTrunc(Op: SplatV, DL: dl, VT: MVT::i32), Mask});
2239 SDValue NegS =
2240 DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: MVT::i32,
2241 Ops: {DAG.getConstant(Val: ElemWidth, DL: dl, VT: MVT::i32), ModS});
2242 SDValue IsZero =
2243 DAG.getSetCC(DL: dl, VT: MVT::i1, LHS: ModS, RHS: getZero(dl, Ty: MVT::i32, DAG), Cond: ISD::SETEQ);
2244 // FSHL A, B => A << | B >>n
2245 // FSHR A, B => A <<n | B >>
2246 SDValue Part1 =
2247 DAG.getNode(Opcode: HexagonISD::VASL, DL: dl, VT: InpTy, Ops: {A, IsLeft ? ModS : NegS});
2248 SDValue Part2 =
2249 DAG.getNode(Opcode: HexagonISD::VLSR, DL: dl, VT: InpTy, Ops: {B, IsLeft ? NegS : ModS});
2250 SDValue Or = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: InpTy, Ops: {Part1, Part2});
2251 // If the shift amount was 0, pick A or B, depending on the direction.
2252 // The opposite shift will also be by 0, so the "Or" will be incorrect.
2253 return DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: InpTy, Ops: {IsZero, (IsLeft ? A : B), Or});
2254 }
2255
2256 SDValue Mask = DAG.getSplatBuildVector(
2257 VT: InpTy, DL: dl, Op: DAG.getConstant(Val: ElemWidth - 1, DL: dl, VT: ElemTy));
2258
2259 unsigned MOpc = Opc == ISD::FSHL ? HexagonISD::MFSHL : HexagonISD::MFSHR;
2260 return DAG.getNode(Opcode: MOpc, DL: dl, VT: ty(Op),
2261 Ops: {A, B, DAG.getNode(Opcode: ISD::AND, DL: dl, VT: InpTy, Ops: {S, Mask})});
2262}
2263
2264SDValue
2265HexagonTargetLowering::LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const {
2266 const SDLoc &dl(Op);
2267 unsigned IntNo = Op.getConstantOperandVal(i: 0);
2268 SmallVector<SDValue> Ops(Op->ops());
2269
2270 auto Swap = [&](SDValue P) {
2271 return DAG.getMergeValues(Ops: {P.getValue(R: 1), P.getValue(R: 0)}, dl);
2272 };
2273
2274 switch (IntNo) {
2275 case Intrinsic::hexagon_V6_pred_typecast:
2276 case Intrinsic::hexagon_V6_pred_typecast_128B: {
2277 MVT ResTy = ty(Op), InpTy = ty(Op: Ops[1]);
2278 if (isHvxBoolTy(Ty: ResTy) && isHvxBoolTy(Ty: InpTy)) {
2279 if (ResTy == InpTy)
2280 return Ops[1];
2281 return DAG.getNode(Opcode: HexagonISD::TYPECAST, DL: dl, VT: ResTy, Operand: Ops[1]);
2282 }
2283 break;
2284 }
2285 case Intrinsic::hexagon_V6_vmpyss_parts:
2286 case Intrinsic::hexagon_V6_vmpyss_parts_128B:
2287 return Swap(DAG.getNode(Opcode: HexagonISD::SMUL_LOHI, DL: dl, VTList: Op->getVTList(),
2288 Ops: {Ops[1], Ops[2]}));
2289 case Intrinsic::hexagon_V6_vmpyuu_parts:
2290 case Intrinsic::hexagon_V6_vmpyuu_parts_128B:
2291 return Swap(DAG.getNode(Opcode: HexagonISD::UMUL_LOHI, DL: dl, VTList: Op->getVTList(),
2292 Ops: {Ops[1], Ops[2]}));
2293 case Intrinsic::hexagon_V6_vmpyus_parts:
2294 case Intrinsic::hexagon_V6_vmpyus_parts_128B: {
2295 return Swap(DAG.getNode(Opcode: HexagonISD::USMUL_LOHI, DL: dl, VTList: Op->getVTList(),
2296 Ops: {Ops[1], Ops[2]}));
2297 }
2298 } // switch
2299
2300 return Op;
2301}
2302
2303SDValue
2304HexagonTargetLowering::LowerHvxMaskedOp(SDValue Op, SelectionDAG &DAG) const {
2305 const SDLoc &dl(Op);
2306 unsigned HwLen = Subtarget.getVectorLength();
2307 MachineFunction &MF = DAG.getMachineFunction();
2308 auto *MaskN = cast<MaskedLoadStoreSDNode>(Val: Op.getNode());
2309 SDValue Mask = MaskN->getMask();
2310 SDValue Chain = MaskN->getChain();
2311 SDValue Base = MaskN->getBasePtr();
2312 auto *MemOp = MF.getMachineMemOperand(MMO: MaskN->getMemOperand(), Offset: 0, Size: HwLen);
2313
2314 unsigned Opc = Op->getOpcode();
2315 assert(Opc == ISD::MLOAD || Opc == ISD::MSTORE);
2316
2317 if (Opc == ISD::MLOAD) {
2318 MVT ValTy = ty(Op);
2319 SDValue Load = DAG.getLoad(VT: ValTy, dl, Chain, Ptr: Base, MMO: MemOp);
2320 SDValue Thru = cast<MaskedLoadSDNode>(Val: MaskN)->getPassThru();
2321 if (isUndef(Op: Thru))
2322 return Load;
2323 SDValue VSel = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ValTy, N1: Mask, N2: Load, N3: Thru);
2324 return DAG.getMergeValues(Ops: {VSel, Load.getValue(R: 1)}, dl);
2325 }
2326
2327 // MSTORE
2328 // HVX only has aligned masked stores.
2329
2330 // TODO: Fold negations of the mask into the store.
2331 unsigned StoreOpc = Hexagon::V6_vS32b_qpred_ai;
2332 SDValue Value = cast<MaskedStoreSDNode>(Val: MaskN)->getValue();
2333 SDValue Offset0 = DAG.getTargetConstant(Val: 0, DL: dl, VT: ty(Op: Base));
2334
2335 if (MaskN->getAlign().value() % HwLen == 0) {
2336 SDValue Store = getInstr(MachineOpc: StoreOpc, dl, Ty: MVT::Other,
2337 Ops: {Mask, Base, Offset0, Value, Chain}, DAG);
2338 DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: Store.getNode()), NewMemRefs: {MemOp});
2339 return Store;
2340 }
2341
2342 // Unaligned case.
2343 auto StoreAlign = [&](SDValue V, SDValue A) {
2344 SDValue Z = getZero(dl, Ty: ty(Op: V), DAG);
2345 // TODO: use funnel shifts?
2346 // vlalign(Vu,Vv,Rt) rotates the pair Vu:Vv left by Rt and takes the
2347 // upper half.
2348 SDValue LoV = getInstr(MachineOpc: Hexagon::V6_vlalignb, dl, Ty: ty(Op: V), Ops: {V, Z, A}, DAG);
2349 SDValue HiV = getInstr(MachineOpc: Hexagon::V6_vlalignb, dl, Ty: ty(Op: V), Ops: {Z, V, A}, DAG);
2350 return std::make_pair(x&: LoV, y&: HiV);
2351 };
2352
2353 MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
2354 MVT BoolTy = MVT::getVectorVT(VT: MVT::i1, NumElements: HwLen);
2355 SDValue MaskV = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: Mask);
2356 VectorPair Tmp = StoreAlign(MaskV, Base);
2357 VectorPair MaskU = {DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: BoolTy, Operand: Tmp.first),
2358 DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: BoolTy, Operand: Tmp.second)};
2359 VectorPair ValueU = StoreAlign(Value, Base);
2360
2361 SDValue Offset1 = DAG.getTargetConstant(Val: HwLen, DL: dl, VT: MVT::i32);
2362 SDValue StoreLo =
2363 getInstr(MachineOpc: StoreOpc, dl, Ty: MVT::Other,
2364 Ops: {MaskU.first, Base, Offset0, ValueU.first, Chain}, DAG);
2365 SDValue StoreHi =
2366 getInstr(MachineOpc: StoreOpc, dl, Ty: MVT::Other,
2367 Ops: {MaskU.second, Base, Offset1, ValueU.second, Chain}, DAG);
2368 DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: StoreLo.getNode()), NewMemRefs: {MemOp});
2369 DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: StoreHi.getNode()), NewMemRefs: {MemOp});
2370 return DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: {StoreLo, StoreHi});
2371}
2372
2373SDValue HexagonTargetLowering::LowerHvxFpExtend(SDValue Op,
2374 SelectionDAG &DAG) const {
2375 // This conversion only applies to QFloat. IEEE extension from f16 to f32
2376 // is legal (done via a pattern).
2377 assert(Subtarget.useHVXQFloatOps());
2378
2379 assert(Op->getOpcode() == ISD::FP_EXTEND);
2380
2381 MVT VecTy = ty(Op);
2382 MVT ArgTy = ty(Op: Op.getOperand(i: 0));
2383 const SDLoc &dl(Op);
2384
2385 if (ArgTy == MVT::v64bf16) {
2386 MVT HalfTy = typeSplit(VecTy).first;
2387 SDValue BF16Vec = Op.getOperand(i: 0);
2388 SDValue Zeroes =
2389 getInstr(MachineOpc: Hexagon::V6_vxor, dl, Ty: HalfTy, Ops: {BF16Vec, BF16Vec}, DAG);
2390 // Interleave zero vector with the bf16 vector, with zeroes in the lower
2391 // half of each 32 bit lane, effectively extending the bf16 values to fp32
2392 // values.
2393 SDValue ShuffVec =
2394 getInstr(MachineOpc: Hexagon::V6_vshufoeh, dl, Ty: VecTy, Ops: {BF16Vec, Zeroes}, DAG);
2395 VectorPair VecPair = opSplit(Vec: ShuffVec, dl, DAG);
2396 SDValue Result = getInstr(MachineOpc: Hexagon::V6_vshuffvdd, dl, Ty: VecTy,
2397 Ops: {VecPair.second, VecPair.first,
2398 DAG.getSignedConstant(Val: -4, DL: dl, VT: MVT::i32)},
2399 DAG);
2400 return Result;
2401 }
2402
2403 assert(VecTy == MVT::v64f32 && ArgTy == MVT::v64f16);
2404
2405 SDValue F16Vec = Op.getOperand(i: 0);
2406
2407 APFloat FloatVal = APFloat(1.0f);
2408 bool Ignored;
2409 FloatVal.convert(ToSemantics: APFloat::IEEEhalf(), RM: APFloat::rmNearestTiesToEven, losesInfo: &Ignored);
2410 SDValue Fp16Ones = DAG.getConstantFP(Val: FloatVal, DL: dl, VT: ArgTy);
2411 SDValue VmpyVec =
2412 getInstr(MachineOpc: Hexagon::V6_vmpy_qf32_hf, dl, Ty: VecTy, Ops: {F16Vec, Fp16Ones}, DAG);
2413
2414 MVT HalfTy = typeSplit(VecTy).first;
2415 VectorPair Pair = opSplit(Vec: VmpyVec, dl, DAG);
2416 SDValue LoVec =
2417 getInstr(MachineOpc: Hexagon::V6_vconv_sf_qf32, dl, Ty: HalfTy, Ops: {Pair.first}, DAG);
2418 SDValue HiVec =
2419 getInstr(MachineOpc: Hexagon::V6_vconv_sf_qf32, dl, Ty: HalfTy, Ops: {Pair.second}, DAG);
2420
2421 SDValue ShuffVec =
2422 getInstr(MachineOpc: Hexagon::V6_vshuffvdd, dl, Ty: VecTy,
2423 Ops: {HiVec, LoVec, DAG.getSignedConstant(Val: -4, DL: dl, VT: MVT::i32)}, DAG);
2424
2425 return ShuffVec;
2426}
2427
2428SDValue
2429HexagonTargetLowering::LowerHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
2430 // Catch invalid conversion ops (just in case).
2431 assert(Op.getOpcode() == ISD::FP_TO_SINT ||
2432 Op.getOpcode() == ISD::FP_TO_UINT);
2433
2434 MVT ResTy = ty(Op);
2435 MVT FpTy = ty(Op: Op.getOperand(i: 0)).getVectorElementType();
2436 MVT IntTy = ResTy.getVectorElementType();
2437
2438 if (Subtarget.useHVXIEEEFPOps()) {
2439 // There are only conversions from f16.
2440 if (FpTy == MVT::f16) {
2441 // Other int types aren't legal in HVX, so we shouldn't see them here.
2442 assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32);
2443 // Conversions to i8 and i16 are legal.
2444 if (IntTy == MVT::i8 || IntTy == MVT::i16)
2445 return Op;
2446 }
2447 }
2448
2449 if (IntTy.getSizeInBits() != FpTy.getSizeInBits())
2450 return EqualizeFpIntConversion(Op, DAG);
2451
2452 return ExpandHvxFpToInt(Op, DAG);
2453}
2454
2455// For vector type v32i1 uint_to_fp/sint_to_fp to v32f32:
2456// R1 = #1, R2 holds the v32i1 param
2457// V1 = vsplat(R1)
2458// V2 = vsplat(R2)
2459// Q0 = vand(V1,R1)
2460// V0.w=prefixsum(Q0)
2461// V0.w=vsub(V0.w,V1.w)
2462// V2.w = vlsr(V2.w,V0.w)
2463// V2 = vand(V2,V1)
2464// V2.sf = V2.w
2465SDValue HexagonTargetLowering::LowerHvxPred32ToFp(SDValue PredOp,
2466 SelectionDAG &DAG) const {
2467
2468 MVT ResTy = ty(Op: PredOp);
2469 const SDLoc &dl(PredOp);
2470
2471 SDValue Const = DAG.getTargetConstant(Val: 0x1, DL: dl, VT: MVT::i32);
2472 SDNode *RegConst = DAG.getMachineNode(Opcode: Hexagon::A2_tfrsi, dl, VT: MVT::i32, Op1: Const);
2473 SDNode *SplatConst = DAG.getMachineNode(Opcode: Hexagon::V6_lvsplatw, dl, VT: MVT::v32i32,
2474 Op1: SDValue(RegConst, 0));
2475 SDNode *PredTransfer =
2476 DAG.getMachineNode(Opcode: Hexagon::V6_vandvrt, dl, VT: MVT::v32i1,
2477 Op1: SDValue(SplatConst, 0), Op2: SDValue(RegConst, 0));
2478 SDNode *PrefixSum = DAG.getMachineNode(Opcode: Hexagon::V6_vprefixqw, dl, VT: MVT::v32i32,
2479 Op1: SDValue(PredTransfer, 0));
2480 SDNode *SplatParam = DAG.getMachineNode(
2481 Opcode: Hexagon::V6_lvsplatw, dl, VT: MVT::v32i32,
2482 Op1: DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i32, Operand: PredOp.getOperand(i: 0)));
2483 SDNode *Vsub =
2484 DAG.getMachineNode(Opcode: Hexagon::V6_vsubw, dl, VT: MVT::v32i32,
2485 Op1: SDValue(PrefixSum, 0), Op2: SDValue(SplatConst, 0));
2486 SDNode *IndexShift =
2487 DAG.getMachineNode(Opcode: Hexagon::V6_vlsrwv, dl, VT: MVT::v32i32,
2488 Op1: SDValue(SplatParam, 0), Op2: SDValue(Vsub, 0));
2489 SDNode *MaskOff =
2490 DAG.getMachineNode(Opcode: Hexagon::V6_vand, dl, VT: MVT::v32i32,
2491 Op1: SDValue(IndexShift, 0), Op2: SDValue(SplatConst, 0));
2492 SDNode *Convert = DAG.getMachineNode(Opcode: Hexagon::V6_vconv_sf_w, dl, VT: ResTy,
2493 Op1: SDValue(MaskOff, 0));
2494 return SDValue(Convert, 0);
2495}
2496
2497// For vector type v64i1 uint_to_fo to v64f16:
2498// i64 R32 = bitcast v64i1 R3:2 (R3:2 holds v64i1)
2499// R3 = subreg_high (R32)
2500// R2 = subreg_low (R32)
2501// R1 = #1
2502// V1 = vsplat(R1)
2503// V2 = vsplat(R2)
2504// V3 = vsplat(R3)
2505// Q0 = vand(V1,R1)
2506// V0.w=prefixsum(Q0)
2507// V0.w=vsub(V0.w,V1.w)
2508// V2.w = vlsr(V2.w,V0.w)
2509// V3.w = vlsr(V3.w,V0.w)
2510// V2 = vand(V2,V1)
2511// V3 = vand(V3,V1)
2512// V2.h = vpacke(V3.w,V2.w)
2513// V2.hf = V2.h
2514SDValue HexagonTargetLowering::LowerHvxPred64ToFp(SDValue PredOp,
2515 SelectionDAG &DAG) const {
2516
2517 MVT ResTy = ty(Op: PredOp);
2518 const SDLoc &dl(PredOp);
2519
2520 SDValue Inp = DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i64, Operand: PredOp.getOperand(i: 0));
2521 // Get the hi and lo regs
2522 SDValue HiReg =
2523 DAG.getTargetExtractSubreg(SRIdx: Hexagon::isub_hi, DL: dl, VT: MVT::i32, Operand: Inp);
2524 SDValue LoReg =
2525 DAG.getTargetExtractSubreg(SRIdx: Hexagon::isub_lo, DL: dl, VT: MVT::i32, Operand: Inp);
2526 // Get constant #1 and splat into vector V1
2527 SDValue Const = DAG.getTargetConstant(Val: 0x1, DL: dl, VT: MVT::i32);
2528 SDNode *RegConst = DAG.getMachineNode(Opcode: Hexagon::A2_tfrsi, dl, VT: MVT::i32, Op1: Const);
2529 SDNode *SplatConst = DAG.getMachineNode(Opcode: Hexagon::V6_lvsplatw, dl, VT: MVT::v32i32,
2530 Op1: SDValue(RegConst, 0));
2531 // Splat the hi and lo args
2532 SDNode *SplatHi =
2533 DAG.getMachineNode(Opcode: Hexagon::V6_lvsplatw, dl, VT: MVT::v32i32,
2534 Op1: DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i32, Operand: HiReg));
2535 SDNode *SplatLo =
2536 DAG.getMachineNode(Opcode: Hexagon::V6_lvsplatw, dl, VT: MVT::v32i32,
2537 Op1: DAG.getNode(Opcode: ISD::BITCAST, DL: dl, VT: MVT::i32, Operand: LoReg));
2538 // vand between splatted const and const
2539 SDNode *PredTransfer =
2540 DAG.getMachineNode(Opcode: Hexagon::V6_vandvrt, dl, VT: MVT::v32i1,
2541 Op1: SDValue(SplatConst, 0), Op2: SDValue(RegConst, 0));
2542 // Get the prefixsum
2543 SDNode *PrefixSum = DAG.getMachineNode(Opcode: Hexagon::V6_vprefixqw, dl, VT: MVT::v32i32,
2544 Op1: SDValue(PredTransfer, 0));
2545 // Get the vsub
2546 SDNode *Vsub =
2547 DAG.getMachineNode(Opcode: Hexagon::V6_vsubw, dl, VT: MVT::v32i32,
2548 Op1: SDValue(PrefixSum, 0), Op2: SDValue(SplatConst, 0));
2549 // Get vlsr for hi and lo
2550 SDNode *IndexShift_hi =
2551 DAG.getMachineNode(Opcode: Hexagon::V6_vlsrwv, dl, VT: MVT::v32i32,
2552 Op1: SDValue(SplatHi, 0), Op2: SDValue(Vsub, 0));
2553 SDNode *IndexShift_lo =
2554 DAG.getMachineNode(Opcode: Hexagon::V6_vlsrwv, dl, VT: MVT::v32i32,
2555 Op1: SDValue(SplatLo, 0), Op2: SDValue(Vsub, 0));
2556 // Get vand of hi and lo
2557 SDNode *MaskOff_hi =
2558 DAG.getMachineNode(Opcode: Hexagon::V6_vand, dl, VT: MVT::v32i32,
2559 Op1: SDValue(IndexShift_hi, 0), Op2: SDValue(SplatConst, 0));
2560 SDNode *MaskOff_lo =
2561 DAG.getMachineNode(Opcode: Hexagon::V6_vand, dl, VT: MVT::v32i32,
2562 Op1: SDValue(IndexShift_lo, 0), Op2: SDValue(SplatConst, 0));
2563 // Pack them
2564 SDNode *Pack =
2565 DAG.getMachineNode(Opcode: Hexagon::V6_vpackeh, dl, VT: MVT::v64i16,
2566 Op1: SDValue(MaskOff_hi, 0), Op2: SDValue(MaskOff_lo, 0));
2567 SDNode *Convert =
2568 DAG.getMachineNode(Opcode: Hexagon::V6_vconv_hf_h, dl, VT: ResTy, Op1: SDValue(Pack, 0));
2569 return SDValue(Convert, 0);
2570}
2571
2572SDValue
2573HexagonTargetLowering::LowerHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
2574 // Catch invalid conversion ops (just in case).
2575 assert(Op.getOpcode() == ISD::SINT_TO_FP ||
2576 Op.getOpcode() == ISD::UINT_TO_FP);
2577
2578 MVT ResTy = ty(Op);
2579 MVT IntTy = ty(Op: Op.getOperand(i: 0)).getVectorElementType();
2580 MVT FpTy = ResTy.getVectorElementType();
2581
2582 if (Op.getOpcode() == ISD::UINT_TO_FP || Op.getOpcode() == ISD::SINT_TO_FP) {
2583 if (ResTy == MVT::v32f32 && ty(Op: Op.getOperand(i: 0)) == MVT::v32i1)
2584 return LowerHvxPred32ToFp(PredOp: Op, DAG);
2585 if (ResTy == MVT::v64f16 && ty(Op: Op.getOperand(i: 0)) == MVT::v64i1)
2586 return LowerHvxPred64ToFp(PredOp: Op, DAG);
2587 }
2588
2589 if (Subtarget.useHVXIEEEFPOps()) {
2590 // There are only conversions to f16.
2591 if (FpTy == MVT::f16) {
2592 // Other int types aren't legal in HVX, so we shouldn't see them here.
2593 assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32);
2594 // i8, i16 -> f16 is legal.
2595 if (IntTy == MVT::i8 || IntTy == MVT::i16)
2596 return Op;
2597 }
2598 }
2599
2600 if (IntTy.getSizeInBits() != FpTy.getSizeInBits())
2601 return EqualizeFpIntConversion(Op, DAG);
2602
2603 return ExpandHvxIntToFp(Op, DAG);
2604}
2605
2606HexagonTargetLowering::TypePair
2607HexagonTargetLowering::typeExtendToWider(MVT Ty0, MVT Ty1) const {
2608 // Compare the widths of elements of the two types, and extend the narrower
2609 // type to match the with of the wider type. For vector types, apply this
2610 // to the element type.
2611 assert(Ty0.isVector() == Ty1.isVector());
2612
2613 MVT ElemTy0 = Ty0.getScalarType();
2614 MVT ElemTy1 = Ty1.getScalarType();
2615
2616 unsigned Width0 = ElemTy0.getSizeInBits();
2617 unsigned Width1 = ElemTy1.getSizeInBits();
2618 unsigned MaxWidth = std::max(a: Width0, b: Width1);
2619
2620 auto getScalarWithWidth = [](MVT ScalarTy, unsigned Width) {
2621 if (ScalarTy.isInteger())
2622 return MVT::getIntegerVT(BitWidth: Width);
2623 assert(ScalarTy.isFloatingPoint());
2624 return MVT::getFloatingPointVT(BitWidth: Width);
2625 };
2626
2627 MVT WideETy0 = getScalarWithWidth(ElemTy0, MaxWidth);
2628 MVT WideETy1 = getScalarWithWidth(ElemTy1, MaxWidth);
2629
2630 if (!Ty0.isVector()) {
2631 // Both types are scalars.
2632 return {WideETy0, WideETy1};
2633 }
2634
2635 // Vector types.
2636 unsigned NumElem = Ty0.getVectorNumElements();
2637 assert(NumElem == Ty1.getVectorNumElements());
2638
2639 return {MVT::getVectorVT(VT: WideETy0, NumElements: NumElem),
2640 MVT::getVectorVT(VT: WideETy1, NumElements: NumElem)};
2641}
2642
2643HexagonTargetLowering::TypePair
2644HexagonTargetLowering::typeWidenToWider(MVT Ty0, MVT Ty1) const {
2645 // Compare the numbers of elements of two vector types, and widen the
2646 // narrower one to match the number of elements in the wider one.
2647 assert(Ty0.isVector() && Ty1.isVector());
2648
2649 unsigned Len0 = Ty0.getVectorNumElements();
2650 unsigned Len1 = Ty1.getVectorNumElements();
2651 if (Len0 == Len1)
2652 return {Ty0, Ty1};
2653
2654 unsigned MaxLen = std::max(a: Len0, b: Len1);
2655 return {MVT::getVectorVT(VT: Ty0.getVectorElementType(), NumElements: MaxLen),
2656 MVT::getVectorVT(VT: Ty1.getVectorElementType(), NumElements: MaxLen)};
2657}
2658
2659MVT
2660HexagonTargetLowering::typeLegalize(MVT Ty, SelectionDAG &DAG) const {
2661 EVT LegalTy = getTypeToTransformTo(Context&: *DAG.getContext(), VT: Ty);
2662 assert(LegalTy.isSimple());
2663 return LegalTy.getSimpleVT();
2664}
2665
2666MVT
2667HexagonTargetLowering::typeWidenToHvx(MVT Ty) const {
2668 unsigned HwWidth = 8 * Subtarget.getVectorLength();
2669 assert(Ty.getSizeInBits() <= HwWidth);
2670 if (Ty.getSizeInBits() == HwWidth)
2671 return Ty;
2672
2673 MVT ElemTy = Ty.getScalarType();
2674 return MVT::getVectorVT(VT: ElemTy, NumElements: HwWidth / ElemTy.getSizeInBits());
2675}
2676
2677HexagonTargetLowering::VectorPair
2678HexagonTargetLowering::emitHvxAddWithOverflow(SDValue A, SDValue B,
2679 const SDLoc &dl, bool Signed, SelectionDAG &DAG) const {
2680 // Compute A+B, return {A+B, O}, where O = vector predicate indicating
2681 // whether an overflow has occurred.
2682 MVT ResTy = ty(Op: A);
2683 assert(ResTy == ty(B));
2684 MVT PredTy = MVT::getVectorVT(VT: MVT::i1, NumElements: ResTy.getVectorNumElements());
2685
2686 if (!Signed) {
2687 // V62+ has V6_vaddcarry, but it requires input predicate, so it doesn't
2688 // save any instructions.
2689 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ResTy, Ops: {A, B});
2690 SDValue Ovf = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Add, RHS: A, Cond: ISD::SETULT);
2691 return {Add, Ovf};
2692 }
2693
2694 // Signed overflow has happened, if:
2695 // (A, B have the same sign) and (A+B has a different sign from either)
2696 // i.e. (~A xor B) & ((A+B) xor B), then check the sign bit
2697 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ResTy, Ops: {A, B});
2698 SDValue NotA =
2699 DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: ResTy, Ops: {A, DAG.getAllOnesConstant(DL: dl, VT: ResTy)});
2700 SDValue Xor0 = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: ResTy, Ops: {NotA, B});
2701 SDValue Xor1 = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: ResTy, Ops: {Add, B});
2702 SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ResTy, Ops: {Xor0, Xor1});
2703 SDValue MSB =
2704 DAG.getSetCC(DL: dl, VT: PredTy, LHS: And, RHS: getZero(dl, Ty: ResTy, DAG), Cond: ISD::SETLT);
2705 return {Add, MSB};
2706}
2707
2708HexagonTargetLowering::VectorPair
2709HexagonTargetLowering::emitHvxShiftRightRnd(SDValue Val, unsigned Amt,
2710 bool Signed, SelectionDAG &DAG) const {
2711 // Shift Val right by Amt bits, round the result to the nearest integer,
2712 // tie-break by rounding halves to even integer.
2713
2714 const SDLoc &dl(Val);
2715 MVT ValTy = ty(Op: Val);
2716
2717 // This should also work for signed integers.
2718 //
2719 // uint tmp0 = inp + ((1 << (Amt-1)) - 1);
2720 // bool ovf = (inp > tmp0);
2721 // uint rup = inp & (1 << (Amt+1));
2722 //
2723 // uint tmp1 = inp >> (Amt-1); // tmp1 == tmp2 iff
2724 // uint tmp2 = tmp0 >> (Amt-1); // the Amt-1 lower bits were all 0
2725 // uint tmp3 = tmp2 + rup;
2726 // uint frac = (tmp1 != tmp2) ? tmp2 >> 1 : tmp3 >> 1;
2727 unsigned ElemWidth = ValTy.getVectorElementType().getSizeInBits();
2728 MVT ElemTy = MVT::getIntegerVT(BitWidth: ElemWidth);
2729 MVT IntTy = tyVector(Ty: ValTy, ElemTy);
2730 MVT PredTy = MVT::getVectorVT(VT: MVT::i1, NumElements: IntTy.getVectorNumElements());
2731 unsigned ShRight = Signed ? ISD::SRA : ISD::SRL;
2732
2733 SDValue Inp = DAG.getBitcast(VT: IntTy, V: Val);
2734 SDValue LowBits = DAG.getConstant(Val: (1ull << (Amt - 1)) - 1, DL: dl, VT: IntTy);
2735
2736 SDValue AmtP1 = DAG.getConstant(Val: 1ull << Amt, DL: dl, VT: IntTy);
2737 SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntTy, Ops: {Inp, AmtP1});
2738 SDValue Zero = getZero(dl, Ty: IntTy, DAG);
2739 SDValue Bit = DAG.getSetCC(DL: dl, VT: PredTy, LHS: And, RHS: Zero, Cond: ISD::SETNE);
2740 SDValue Rup = DAG.getZExtOrTrunc(Op: Bit, DL: dl, VT: IntTy);
2741 auto [Tmp0, Ovf] = emitHvxAddWithOverflow(A: Inp, B: LowBits, dl, Signed, DAG);
2742
2743 SDValue AmtM1 = DAG.getConstant(Val: Amt - 1, DL: dl, VT: IntTy);
2744 SDValue Tmp1 = DAG.getNode(Opcode: ShRight, DL: dl, VT: IntTy, N1: Inp, N2: AmtM1);
2745 SDValue Tmp2 = DAG.getNode(Opcode: ShRight, DL: dl, VT: IntTy, N1: Tmp0, N2: AmtM1);
2746 SDValue Tmp3 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: IntTy, N1: Tmp2, N2: Rup);
2747
2748 SDValue Eq = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Tmp1, RHS: Tmp2, Cond: ISD::SETEQ);
2749 SDValue One = DAG.getConstant(Val: 1, DL: dl, VT: IntTy);
2750 SDValue Tmp4 = DAG.getNode(Opcode: ShRight, DL: dl, VT: IntTy, Ops: {Tmp2, One});
2751 SDValue Tmp5 = DAG.getNode(Opcode: ShRight, DL: dl, VT: IntTy, Ops: {Tmp3, One});
2752 SDValue Mux = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: IntTy, Ops: {Eq, Tmp5, Tmp4});
2753 return {Mux, Ovf};
2754}
2755
2756SDValue
2757HexagonTargetLowering::emitHvxMulHsV60(SDValue A, SDValue B, const SDLoc &dl,
2758 SelectionDAG &DAG) const {
2759 MVT VecTy = ty(Op: A);
2760 MVT PairTy = typeJoin(Tys: {VecTy, VecTy});
2761 assert(VecTy.getVectorElementType() == MVT::i32);
2762
2763 SDValue S16 = DAG.getConstant(Val: 16, DL: dl, VT: MVT::i32);
2764
2765 // mulhs(A,B) =
2766 // = [(Hi(A)*2^16 + Lo(A)) *s (Hi(B)*2^16 + Lo(B))] >> 32
2767 // = [Hi(A)*2^16 *s Hi(B)*2^16 + Hi(A) *su Lo(B)*2^16
2768 // + Lo(A) *us (Hi(B)*2^16 + Lo(B))] >> 32
2769 // = [Hi(A) *s Hi(B)*2^32 + Hi(A) *su Lo(B)*2^16 + Lo(A) *us B] >> 32
2770 // The low half of Lo(A)*Lo(B) will be discarded (it's not added to
2771 // anything, so it cannot produce any carry over to higher bits),
2772 // so everything in [] can be shifted by 16 without loss of precision.
2773 // = [Hi(A) *s Hi(B)*2^16 + Hi(A)*su Lo(B) + Lo(A)*B >> 16] >> 16
2774 // = [Hi(A) *s Hi(B)*2^16 + Hi(A)*su Lo(B) + V6_vmpyewuh(A,B)] >> 16
2775 // The final additions need to make sure to properly maintain any carry-
2776 // out bits.
2777 //
2778 // Hi(B) Lo(B)
2779 // Hi(A) Lo(A)
2780 // --------------
2781 // Lo(B)*Lo(A) | T0 = V6_vmpyewuh(B,A) does this,
2782 // Hi(B)*Lo(A) | + dropping the low 16 bits
2783 // Hi(A)*Lo(B) | T2
2784 // Hi(B)*Hi(A)
2785
2786 SDValue T0 = getInstr(MachineOpc: Hexagon::V6_vmpyewuh, dl, Ty: VecTy, Ops: {B, A}, DAG);
2787 // T1 = get Hi(A) into low halves.
2788 SDValue T1 = getInstr(MachineOpc: Hexagon::V6_vasrw, dl, Ty: VecTy, Ops: {A, S16}, DAG);
2789 // P0 = interleaved T1.h*B.uh (full precision product)
2790 SDValue P0 = getInstr(MachineOpc: Hexagon::V6_vmpyhus, dl, Ty: PairTy, Ops: {T1, B}, DAG);
2791 // T2 = T1.even(h) * B.even(uh), i.e. Hi(A)*Lo(B)
2792 SDValue T2 = LoHalf(V: P0, DAG);
2793 // We need to add T0+T2, recording the carry-out, which will be 1<<16
2794 // added to the final sum.
2795 // P1 = interleaved even/odd 32-bit (unsigned) sums of 16-bit halves
2796 SDValue P1 = getInstr(MachineOpc: Hexagon::V6_vadduhw, dl, Ty: PairTy, Ops: {T0, T2}, DAG);
2797 // P2 = interleaved even/odd 32-bit (signed) sums of 16-bit halves
2798 SDValue P2 = getInstr(MachineOpc: Hexagon::V6_vaddhw, dl, Ty: PairTy, Ops: {T0, T2}, DAG);
2799 // T3 = full-precision(T0+T2) >> 16
2800 // The low halves are added-unsigned, the high ones are added-signed.
2801 SDValue T3 = getInstr(MachineOpc: Hexagon::V6_vasrw_acc, dl, Ty: VecTy,
2802 Ops: {HiHalf(V: P2, DAG), LoHalf(V: P1, DAG), S16}, DAG);
2803 SDValue T4 = getInstr(MachineOpc: Hexagon::V6_vasrw, dl, Ty: VecTy, Ops: {B, S16}, DAG);
2804 // P3 = interleaved Hi(B)*Hi(A) (full precision),
2805 // which is now Lo(T1)*Lo(T4), so we want to keep the even product.
2806 SDValue P3 = getInstr(MachineOpc: Hexagon::V6_vmpyhv, dl, Ty: PairTy, Ops: {T1, T4}, DAG);
2807 SDValue T5 = LoHalf(V: P3, DAG);
2808 // Add:
2809 SDValue T6 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: VecTy, Ops: {T3, T5});
2810 return T6;
2811}
2812
2813SDValue
2814HexagonTargetLowering::emitHvxMulLoHiV60(SDValue A, bool SignedA, SDValue B,
2815 bool SignedB, const SDLoc &dl,
2816 SelectionDAG &DAG) const {
2817 MVT VecTy = ty(Op: A);
2818 MVT PairTy = typeJoin(Tys: {VecTy, VecTy});
2819 assert(VecTy.getVectorElementType() == MVT::i32);
2820
2821 SDValue S16 = DAG.getConstant(Val: 16, DL: dl, VT: MVT::i32);
2822
2823 if (SignedA && !SignedB) {
2824 // Make A:unsigned, B:signed.
2825 std::swap(a&: A, b&: B);
2826 std::swap(a&: SignedA, b&: SignedB);
2827 }
2828
2829 // Do halfword-wise multiplications for unsigned*unsigned product, then
2830 // add corrections for signed and unsigned*signed.
2831
2832 SDValue Lo, Hi;
2833
2834 // P0:lo = (uu) products of low halves of A and B,
2835 // P0:hi = (uu) products of high halves.
2836 SDValue P0 = getInstr(MachineOpc: Hexagon::V6_vmpyuhv, dl, Ty: PairTy, Ops: {A, B}, DAG);
2837
2838 // Swap low/high halves in B
2839 SDValue T0 = getInstr(MachineOpc: Hexagon::V6_lvsplatw, dl, Ty: VecTy,
2840 Ops: {DAG.getConstant(Val: 0x02020202, DL: dl, VT: MVT::i32)}, DAG);
2841 SDValue T1 = getInstr(MachineOpc: Hexagon::V6_vdelta, dl, Ty: VecTy, Ops: {B, T0}, DAG);
2842 // P1 = products of even/odd halfwords.
2843 // P1:lo = (uu) products of even(A.uh) * odd(B.uh)
2844 // P1:hi = (uu) products of odd(A.uh) * even(B.uh)
2845 SDValue P1 = getInstr(MachineOpc: Hexagon::V6_vmpyuhv, dl, Ty: PairTy, Ops: {A, T1}, DAG);
2846
2847 // P2:lo = low halves of P1:lo + P1:hi,
2848 // P2:hi = high halves of P1:lo + P1:hi.
2849 SDValue P2 = getInstr(MachineOpc: Hexagon::V6_vadduhw, dl, Ty: PairTy,
2850 Ops: {HiHalf(V: P1, DAG), LoHalf(V: P1, DAG)}, DAG);
2851 // Still need to add the high halves of P0:lo to P2:lo
2852 SDValue T2 =
2853 getInstr(MachineOpc: Hexagon::V6_vlsrw, dl, Ty: VecTy, Ops: {LoHalf(V: P0, DAG), S16}, DAG);
2854 SDValue T3 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: VecTy, Ops: {LoHalf(V: P2, DAG), T2});
2855
2856 // The high halves of T3 will contribute to the HI part of LOHI.
2857 SDValue T4 = getInstr(MachineOpc: Hexagon::V6_vasrw_acc, dl, Ty: VecTy,
2858 Ops: {HiHalf(V: P2, DAG), T3, S16}, DAG);
2859
2860 // The low halves of P2 need to be added to high halves of the LO part.
2861 Lo = getInstr(MachineOpc: Hexagon::V6_vaslw_acc, dl, Ty: VecTy,
2862 Ops: {LoHalf(V: P0, DAG), LoHalf(V: P2, DAG), S16}, DAG);
2863 Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: VecTy, Ops: {HiHalf(V: P0, DAG), T4});
2864
2865 if (SignedA) {
2866 assert(SignedB && "Signed A and unsigned B should have been inverted");
2867
2868 MVT PredTy = MVT::getVectorVT(VT: MVT::i1, NumElements: VecTy.getVectorNumElements());
2869 SDValue Zero = getZero(dl, Ty: VecTy, DAG);
2870 SDValue Q0 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: A, RHS: Zero, Cond: ISD::SETLT);
2871 SDValue Q1 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: B, RHS: Zero, Cond: ISD::SETLT);
2872 SDValue X0 = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: VecTy, Ops: {Q0, B, Zero});
2873 SDValue X1 = getInstr(MachineOpc: Hexagon::V6_vaddwq, dl, Ty: VecTy, Ops: {Q1, X0, A}, DAG);
2874 Hi = getInstr(MachineOpc: Hexagon::V6_vsubw, dl, Ty: VecTy, Ops: {Hi, X1}, DAG);
2875 } else if (SignedB) {
2876 // Same correction as for mulhus:
2877 // mulhus(A.uw,B.w) = mulhu(A.uw,B.uw) - (A.w if B < 0)
2878 MVT PredTy = MVT::getVectorVT(VT: MVT::i1, NumElements: VecTy.getVectorNumElements());
2879 SDValue Zero = getZero(dl, Ty: VecTy, DAG);
2880 SDValue Q1 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: B, RHS: Zero, Cond: ISD::SETLT);
2881 Hi = getInstr(MachineOpc: Hexagon::V6_vsubwq, dl, Ty: VecTy, Ops: {Q1, Hi, A}, DAG);
2882 } else {
2883 assert(!SignedA && !SignedB);
2884 }
2885
2886 return DAG.getMergeValues(Ops: {Lo, Hi}, dl);
2887}
2888
2889SDValue
2890HexagonTargetLowering::emitHvxMulLoHiV62(SDValue A, bool SignedA,
2891 SDValue B, bool SignedB,
2892 const SDLoc &dl,
2893 SelectionDAG &DAG) const {
2894 MVT VecTy = ty(Op: A);
2895 MVT PairTy = typeJoin(Tys: {VecTy, VecTy});
2896 assert(VecTy.getVectorElementType() == MVT::i32);
2897
2898 if (SignedA && !SignedB) {
2899 // Make A:unsigned, B:signed.
2900 std::swap(a&: A, b&: B);
2901 std::swap(a&: SignedA, b&: SignedB);
2902 }
2903
2904 // Do S*S first, then make corrections for U*S or U*U if needed.
2905 SDValue P0 = getInstr(MachineOpc: Hexagon::V6_vmpyewuh_64, dl, Ty: PairTy, Ops: {A, B}, DAG);
2906 SDValue P1 =
2907 getInstr(MachineOpc: Hexagon::V6_vmpyowh_64_acc, dl, Ty: PairTy, Ops: {P0, A, B}, DAG);
2908 SDValue Lo = LoHalf(V: P1, DAG);
2909 SDValue Hi = HiHalf(V: P1, DAG);
2910
2911 if (!SignedB) {
2912 assert(!SignedA && "Signed A and unsigned B should have been inverted");
2913 SDValue Zero = getZero(dl, Ty: VecTy, DAG);
2914 MVT PredTy = MVT::getVectorVT(VT: MVT::i1, NumElements: VecTy.getVectorNumElements());
2915
2916 // Mulhu(X, Y) = Mulhs(X, Y) + (X, if Y < 0) + (Y, if X < 0).
2917 // def: Pat<(VecI32 (mulhu HVI32:$A, HVI32:$B)),
2918 // (V6_vaddw (HiHalf (Muls64O $A, $B)),
2919 // (V6_vaddwq (V6_vgtw (V6_vd0), $B),
2920 // (V6_vandvqv (V6_vgtw (V6_vd0), $A), $B),
2921 // $A))>;
2922 SDValue Q0 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: A, RHS: Zero, Cond: ISD::SETLT);
2923 SDValue Q1 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: B, RHS: Zero, Cond: ISD::SETLT);
2924 SDValue T0 = getInstr(MachineOpc: Hexagon::V6_vandvqv, dl, Ty: VecTy, Ops: {Q0, B}, DAG);
2925 SDValue T1 = getInstr(MachineOpc: Hexagon::V6_vaddwq, dl, Ty: VecTy, Ops: {Q1, T0, A}, DAG);
2926 Hi = getInstr(MachineOpc: Hexagon::V6_vaddw, dl, Ty: VecTy, Ops: {Hi, T1}, DAG);
2927 } else if (!SignedA) {
2928 SDValue Zero = getZero(dl, Ty: VecTy, DAG);
2929 MVT PredTy = MVT::getVectorVT(VT: MVT::i1, NumElements: VecTy.getVectorNumElements());
2930
2931 // Mulhus(unsigned X, signed Y) = Mulhs(X, Y) + (Y, if X < 0).
2932 // def: Pat<(VecI32 (HexagonMULHUS HVI32:$A, HVI32:$B)),
2933 // (V6_vaddwq (V6_vgtw (V6_vd0), $A),
2934 // (HiHalf (Muls64O $A, $B)),
2935 // $B)>;
2936 SDValue Q0 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: A, RHS: Zero, Cond: ISD::SETLT);
2937 Hi = getInstr(MachineOpc: Hexagon::V6_vaddwq, dl, Ty: VecTy, Ops: {Q0, Hi, B}, DAG);
2938 }
2939
2940 return DAG.getMergeValues(Ops: {Lo, Hi}, dl);
2941}
2942
2943SDValue
2944HexagonTargetLowering::EqualizeFpIntConversion(SDValue Op, SelectionDAG &DAG)
2945 const {
2946 // Rewrite conversion between integer and floating-point in such a way that
2947 // the integer type is extended/narrowed to match the bitwidth of the
2948 // floating-point type, combined with additional integer-integer extensions
2949 // or narrowings to match the original input/result types.
2950 // E.g. f32 -> i8 ==> f32 -> i32 -> i8
2951 //
2952 // The input/result types are not required to be legal, but if they are
2953 // legal, this function should not introduce illegal types.
2954
2955 unsigned Opc = Op.getOpcode();
2956 assert(Opc == ISD::FP_TO_SINT || Opc == ISD::FP_TO_UINT ||
2957 Opc == ISD::SINT_TO_FP || Opc == ISD::UINT_TO_FP);
2958
2959 SDValue Inp = Op.getOperand(i: 0);
2960 MVT InpTy = ty(Op: Inp);
2961 MVT ResTy = ty(Op);
2962
2963 if (InpTy == ResTy)
2964 return Op;
2965
2966 const SDLoc &dl(Op);
2967 bool Signed = Opc == ISD::FP_TO_SINT || Opc == ISD::SINT_TO_FP;
2968
2969 auto [WInpTy, WResTy] = typeExtendToWider(Ty0: InpTy, Ty1: ResTy);
2970 SDValue WInp = resizeToWidth(VecV: Inp, ResTy: WInpTy, Signed, dl, DAG);
2971 SDValue Conv = DAG.getNode(Opcode: Opc, DL: dl, VT: WResTy, Operand: WInp);
2972 SDValue Res = resizeToWidth(VecV: Conv, ResTy, Signed, dl, DAG);
2973 return Res;
2974}
2975
2976SDValue
2977HexagonTargetLowering::ExpandHvxFpToInt(SDValue Op, SelectionDAG &DAG) const {
2978 unsigned Opc = Op.getOpcode();
2979 assert(Opc == ISD::FP_TO_SINT || Opc == ISD::FP_TO_UINT);
2980
2981 const SDLoc &dl(Op);
2982 SDValue Op0 = Op.getOperand(i: 0);
2983 MVT InpTy = ty(Op: Op0);
2984 MVT ResTy = ty(Op);
2985 assert(InpTy.changeTypeToInteger() == ResTy);
2986
2987 // At this point this is an experiment under a flag.
2988 // In arch before V81 the rounding mode is towards nearest value.
2989 // The C/C++ standard requires rounding towards zero:
2990 // C (C99 and later): ISO/IEC 9899:2018 (C18), section 6.3.1.4 — "When a
2991 // finite value of real floating type is converted to an integer type, the
2992 // fractional part is discarded (i.e., the value is truncated toward zero)."
2993 // C++: ISO/IEC 14882:2020 (C++20), section 7.3.7 — "A prvalue of a
2994 // floating-point type can be converted to a prvalue of an integer type. The
2995 // conversion truncates; that is, the fractional part is discarded."
2996 if (InpTy == MVT::v64f16) {
2997 if (Subtarget.useHVXV81Ops()) {
2998 // This is c/c++ compliant
2999 SDValue ConvVec =
3000 getInstr(MachineOpc: Hexagon::V6_vconv_h_hf_rnd, dl, Ty: ResTy, Ops: {Op0}, DAG);
3001 return ConvVec;
3002 } else if (EnableFpFastConvert) {
3003 // Vd32.h=Vu32.hf same as Q6_Vh_equals_Vhf
3004 SDValue ConvVec = getInstr(MachineOpc: Hexagon::V6_vconv_h_hf, dl, Ty: ResTy, Ops: {Op0}, DAG);
3005 return ConvVec;
3006 }
3007 } else if (EnableFpFastConvert && InpTy == MVT::v32f32) {
3008 // Vd32.w=Vu32.sf same as Q6_Vw_equals_Vsf
3009 SDValue ConvVec = getInstr(MachineOpc: Hexagon::V6_vconv_w_sf, dl, Ty: ResTy, Ops: {Op0}, DAG);
3010 return ConvVec;
3011 }
3012
3013 // int32_t conv_f32_to_i32(uint32_t inp) {
3014 // // s | exp8 | frac23
3015 //
3016 // int neg = (int32_t)inp < 0;
3017 //
3018 // // "expm1" is the actual exponent minus 1: instead of "bias", subtract
3019 // // "bias+1". When the encoded exp is "all-1" (i.e. inf/nan), this will
3020 // // produce a large positive "expm1", which will result in max u/int.
3021 // // In all IEEE formats, bias is the largest positive number that can be
3022 // // represented in bias-width bits (i.e. 011..1).
3023 // int32_t expm1 = (inp << 1) - 0x80000000;
3024 // expm1 >>= 24;
3025 //
3026 // // Always insert the "implicit 1". Subnormal numbers will become 0
3027 // // regardless.
3028 // uint32_t frac = (inp << 8) | 0x80000000;
3029 //
3030 // // "frac" is the fraction part represented as Q1.31. If it was
3031 // // interpreted as uint32_t, it would be the fraction part multiplied
3032 // // by 2^31.
3033 //
3034 // // Calculate the amount of right shift, since shifting further to the
3035 // // left would lose significant bits. Limit it to 32, because we want
3036 // // shifts by 32+ to produce 0, whereas V6_vlsrwv treats the shift
3037 // // amount as a 6-bit signed value (so 33 is same as -31, i.e. shift
3038 // // left by 31). "rsh" can be negative.
3039 // int32_t rsh = min(31 - (expm1 + 1), 32);
3040 //
3041 // frac >>= rsh; // rsh == 32 will produce 0
3042 //
3043 // // Everything up to this point is the same for conversion to signed
3044 // // unsigned integer.
3045 //
3046 // if (neg) // Only for signed int
3047 // frac = -frac; //
3048 // if (rsh <= 0 && neg) // bound = neg ? 0x80000000 : 0x7fffffff
3049 // frac = 0x80000000; // frac = rsh <= 0 ? bound : frac
3050 // if (rsh <= 0 && !neg) //
3051 // frac = 0x7fffffff; //
3052 //
3053 // if (neg) // Only for unsigned int
3054 // frac = 0; //
3055 // if (rsh < 0 && !neg) // frac = rsh < 0 ? 0x7fffffff : frac;
3056 // frac = 0x7fffffff; // frac = neg ? 0 : frac;
3057 //
3058 // return frac;
3059 // }
3060
3061 MVT PredTy = MVT::getVectorVT(VT: MVT::i1, EC: ResTy.getVectorElementCount());
3062
3063 // Zero = V6_vd0();
3064 // Neg = V6_vgtw(Zero, Inp);
3065 // One = V6_lvsplatw(1);
3066 // M80 = V6_lvsplatw(0x80000000);
3067 // Exp00 = V6_vaslwv(Inp, One);
3068 // Exp01 = V6_vsubw(Exp00, M80);
3069 // ExpM1 = V6_vasrw(Exp01, 24);
3070 // Frc00 = V6_vaslw(Inp, 8);
3071 // Frc01 = V6_vor(Frc00, M80);
3072 // Rsh00 = V6_vsubw(V6_lvsplatw(30), ExpM1);
3073 // Rsh01 = V6_vminw(Rsh00, V6_lvsplatw(32));
3074 // Frc02 = V6_vlsrwv(Frc01, Rsh01);
3075
3076 // if signed int:
3077 // Bnd = V6_vmux(Neg, M80, V6_lvsplatw(0x7fffffff))
3078 // Pos = V6_vgtw(Rsh01, Zero);
3079 // Frc13 = V6_vsubw(Zero, Frc02);
3080 // Frc14 = V6_vmux(Neg, Frc13, Frc02);
3081 // Int = V6_vmux(Pos, Frc14, Bnd);
3082 //
3083 // if unsigned int:
3084 // Rsn = V6_vgtw(Zero, Rsh01)
3085 // Frc23 = V6_vmux(Rsn, V6_lvsplatw(0x7fffffff), Frc02)
3086 // Int = V6_vmux(Neg, Zero, Frc23)
3087
3088 auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(Ty: InpTy);
3089 unsigned ElemWidth = 1 + ExpWidth + FracWidth;
3090 assert((1ull << (ExpWidth - 1)) == (1 + ExpBias));
3091
3092 SDValue Inp = DAG.getBitcast(VT: ResTy, V: Op0);
3093 SDValue Zero = getZero(dl, Ty: ResTy, DAG);
3094 SDValue Neg = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Inp, RHS: Zero, Cond: ISD::SETLT);
3095 SDValue M80 = DAG.getConstant(Val: 1ull << (ElemWidth - 1), DL: dl, VT: ResTy);
3096 SDValue M7F = DAG.getConstant(Val: (1ull << (ElemWidth - 1)) - 1, DL: dl, VT: ResTy);
3097 SDValue One = DAG.getConstant(Val: 1, DL: dl, VT: ResTy);
3098 SDValue Exp00 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: ResTy, Ops: {Inp, One});
3099 SDValue Exp01 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: ResTy, Ops: {Exp00, M80});
3100 SDValue MNE = DAG.getConstant(Val: ElemWidth - ExpWidth, DL: dl, VT: ResTy);
3101 SDValue ExpM1 = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT: ResTy, Ops: {Exp01, MNE});
3102
3103 SDValue ExpW = DAG.getConstant(Val: ExpWidth, DL: dl, VT: ResTy);
3104 SDValue Frc00 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: ResTy, Ops: {Inp, ExpW});
3105 SDValue Frc01 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: ResTy, Ops: {Frc00, M80});
3106
3107 SDValue MN2 = DAG.getConstant(Val: ElemWidth - 2, DL: dl, VT: ResTy);
3108 SDValue Rsh00 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: ResTy, Ops: {MN2, ExpM1});
3109 SDValue MW = DAG.getConstant(Val: ElemWidth, DL: dl, VT: ResTy);
3110 SDValue Rsh01 = DAG.getNode(Opcode: ISD::SMIN, DL: dl, VT: ResTy, Ops: {Rsh00, MW});
3111 SDValue Frc02 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ResTy, Ops: {Frc01, Rsh01});
3112
3113 SDValue Int;
3114
3115 if (Opc == ISD::FP_TO_SINT) {
3116 SDValue Bnd = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ResTy, Ops: {Neg, M80, M7F});
3117 SDValue Pos = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Rsh01, RHS: Zero, Cond: ISD::SETGT);
3118 SDValue Frc13 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: ResTy, Ops: {Zero, Frc02});
3119 SDValue Frc14 = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ResTy, Ops: {Neg, Frc13, Frc02});
3120 Int = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ResTy, Ops: {Pos, Frc14, Bnd});
3121 } else {
3122 assert(Opc == ISD::FP_TO_UINT);
3123 SDValue Rsn = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Rsh01, RHS: Zero, Cond: ISD::SETLT);
3124 SDValue Frc23 = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ResTy, N1: Rsn, N2: M7F, N3: Frc02);
3125 Int = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ResTy, N1: Neg, N2: Zero, N3: Frc23);
3126 }
3127
3128 return Int;
3129}
3130
3131SDValue
3132HexagonTargetLowering::ExpandHvxIntToFp(SDValue Op, SelectionDAG &DAG) const {
3133 unsigned Opc = Op.getOpcode();
3134 assert(Opc == ISD::SINT_TO_FP || Opc == ISD::UINT_TO_FP);
3135
3136 const SDLoc &dl(Op);
3137 SDValue Op0 = Op.getOperand(i: 0);
3138 MVT InpTy = ty(Op: Op0);
3139 MVT ResTy = ty(Op);
3140 assert(ResTy.changeTypeToInteger() == InpTy);
3141
3142 // uint32_t vnoc1_rnd(int32_t w) {
3143 // int32_t iszero = w == 0;
3144 // int32_t isneg = w < 0;
3145 // uint32_t u = __builtin_HEXAGON_A2_abs(w);
3146 //
3147 // uint32_t norm_left = __builtin_HEXAGON_S2_cl0(u) + 1;
3148 // uint32_t frac0 = (uint64_t)u << norm_left;
3149 //
3150 // // Rounding:
3151 // uint32_t frac1 = frac0 + ((1 << 8) - 1);
3152 // uint32_t renorm = (frac0 > frac1);
3153 // uint32_t rup = (int)(frac0 << 22) < 0;
3154 //
3155 // uint32_t frac2 = frac0 >> 8;
3156 // uint32_t frac3 = frac1 >> 8;
3157 // uint32_t frac = (frac2 != frac3) ? frac3 >> 1 : (frac3 + rup) >> 1;
3158 //
3159 // int32_t exp = 32 - norm_left + renorm + 127;
3160 // exp <<= 23;
3161 //
3162 // uint32_t sign = 0x80000000 * isneg;
3163 // uint32_t f = sign | exp | frac;
3164 // return iszero ? 0 : f;
3165 // }
3166
3167 MVT PredTy = MVT::getVectorVT(VT: MVT::i1, EC: InpTy.getVectorElementCount());
3168 bool Signed = Opc == ISD::SINT_TO_FP;
3169
3170 auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(Ty: ResTy);
3171 unsigned ElemWidth = 1 + ExpWidth + FracWidth;
3172
3173 SDValue Zero = getZero(dl, Ty: InpTy, DAG);
3174 SDValue One = DAG.getConstant(Val: 1, DL: dl, VT: InpTy);
3175 SDValue IsZero = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Op0, RHS: Zero, Cond: ISD::SETEQ);
3176 SDValue Abs = Signed ? DAG.getNode(Opcode: ISD::ABS, DL: dl, VT: InpTy, Operand: Op0) : Op0;
3177 SDValue Clz = DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT: InpTy, Operand: Abs);
3178 SDValue NLeft = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: InpTy, Ops: {Clz, One});
3179 SDValue Frac0 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: InpTy, Ops: {Abs, NLeft});
3180
3181 auto [Frac, Ovf] = emitHvxShiftRightRnd(Val: Frac0, Amt: ExpWidth + 1, Signed: false, DAG);
3182 if (Signed) {
3183 SDValue IsNeg = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Op0, RHS: Zero, Cond: ISD::SETLT);
3184 SDValue M80 = DAG.getConstant(Val: 1ull << (ElemWidth - 1), DL: dl, VT: InpTy);
3185 SDValue Sign = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: InpTy, Ops: {IsNeg, M80, Zero});
3186 Frac = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: InpTy, Ops: {Sign, Frac});
3187 }
3188
3189 SDValue Rnrm = DAG.getZExtOrTrunc(Op: Ovf, DL: dl, VT: InpTy);
3190 SDValue Exp0 = DAG.getConstant(Val: ElemWidth + ExpBias, DL: dl, VT: InpTy);
3191 SDValue Exp1 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: InpTy, Ops: {Rnrm, Exp0});
3192 SDValue Exp2 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: InpTy, Ops: {Exp1, NLeft});
3193 SDValue Exp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: InpTy,
3194 Ops: {Exp2, DAG.getConstant(Val: FracWidth, DL: dl, VT: InpTy)});
3195 SDValue Flt0 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: InpTy, Ops: {Frac, Exp3});
3196 SDValue Flt1 = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: InpTy, Ops: {IsZero, Zero, Flt0});
3197 SDValue Flt = DAG.getBitcast(VT: ResTy, V: Flt1);
3198
3199 return Flt;
3200}
3201
3202SDValue
3203HexagonTargetLowering::CreateTLWrapper(SDValue Op, SelectionDAG &DAG) const {
3204 unsigned Opc = Op.getOpcode();
3205 unsigned TLOpc;
3206 switch (Opc) {
3207 case ISD::ANY_EXTEND:
3208 case ISD::SIGN_EXTEND:
3209 case ISD::ZERO_EXTEND:
3210 TLOpc = HexagonISD::TL_EXTEND;
3211 break;
3212 case ISD::TRUNCATE:
3213 TLOpc = HexagonISD::TL_TRUNCATE;
3214 break;
3215#ifndef NDEBUG
3216 Op.dump(&DAG);
3217#endif
3218 llvm_unreachable("Unexpected operator");
3219 }
3220
3221 const SDLoc &dl(Op);
3222 return DAG.getNode(Opcode: TLOpc, DL: dl, VT: ty(Op), N1: Op.getOperand(i: 0),
3223 N2: DAG.getUNDEF(VT: MVT::i128), // illegal type
3224 N3: DAG.getConstant(Val: Opc, DL: dl, VT: MVT::i32));
3225}
3226
3227SDValue
3228HexagonTargetLowering::RemoveTLWrapper(SDValue Op, SelectionDAG &DAG) const {
3229 assert(Op.getOpcode() == HexagonISD::TL_EXTEND ||
3230 Op.getOpcode() == HexagonISD::TL_TRUNCATE);
3231 unsigned Opc = Op.getConstantOperandVal(i: 2);
3232 return DAG.getNode(Opcode: Opc, DL: SDLoc(Op), VT: ty(Op), Operand: Op.getOperand(i: 0));
3233}
3234
3235HexagonTargetLowering::VectorPair
3236HexagonTargetLowering::SplitVectorOp(SDValue Op, SelectionDAG &DAG) const {
3237 assert(!Op.isMachineOpcode());
3238 SmallVector<SDValue, 2> OpsL, OpsH;
3239 const SDLoc &dl(Op);
3240
3241 auto SplitVTNode = [&DAG, this](const VTSDNode *N) {
3242 MVT Ty = typeSplit(VecTy: N->getVT().getSimpleVT()).first;
3243 SDValue TV = DAG.getValueType(Ty);
3244 return std::make_pair(x&: TV, y&: TV);
3245 };
3246
3247 for (SDValue A : Op.getNode()->ops()) {
3248 auto [Lo, Hi] =
3249 ty(Op: A).isVector() ? opSplit(Vec: A, dl, DAG) : std::make_pair(x&: A, y&: A);
3250 // Special case for type operand.
3251 switch (Op.getOpcode()) {
3252 case ISD::SIGN_EXTEND_INREG:
3253 case HexagonISD::SSAT:
3254 case HexagonISD::USAT:
3255 if (const auto *N = dyn_cast<const VTSDNode>(Val: A.getNode()))
3256 std::tie(args&: Lo, args&: Hi) = SplitVTNode(N);
3257 break;
3258 }
3259 OpsL.push_back(Elt: Lo);
3260 OpsH.push_back(Elt: Hi);
3261 }
3262
3263 MVT ResTy = ty(Op);
3264 MVT HalfTy = typeSplit(VecTy: ResTy).first;
3265 SDValue L = DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT: HalfTy, Ops: OpsL);
3266 SDValue H = DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT: HalfTy, Ops: OpsH);
3267 return {L, H};
3268}
3269
3270SDValue
3271HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const {
3272 auto *MemN = cast<MemSDNode>(Val: Op.getNode());
3273
3274 if (!MemN->getMemoryVT().isSimple())
3275 return Op;
3276
3277 MVT MemTy = MemN->getMemoryVT().getSimpleVT();
3278 if (!isHvxPairTy(Ty: MemTy))
3279 return Op;
3280
3281 const SDLoc &dl(Op);
3282 unsigned HwLen = Subtarget.getVectorLength();
3283 MVT SingleTy = typeSplit(VecTy: MemTy).first;
3284 SDValue Chain = MemN->getChain();
3285 SDValue Base0 = MemN->getBasePtr();
3286 SDValue Base1 =
3287 DAG.getMemBasePlusOffset(Base: Base0, Offset: TypeSize::getFixed(ExactSize: HwLen), DL: dl);
3288 unsigned MemOpc = MemN->getOpcode();
3289
3290 MachineMemOperand *MOp0 = nullptr, *MOp1 = nullptr;
3291 if (MachineMemOperand *MMO = MemN->getMemOperand()) {
3292 MachineFunction &MF = DAG.getMachineFunction();
3293 uint64_t MemSize = (MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE)
3294 ? (uint64_t)MemoryLocation::UnknownSize
3295 : HwLen;
3296 MOp0 = MF.getMachineMemOperand(MMO, Offset: 0, Size: MemSize);
3297 MOp1 = MF.getMachineMemOperand(MMO, Offset: HwLen, Size: MemSize);
3298 }
3299
3300 if (MemOpc == ISD::LOAD) {
3301 assert(cast<LoadSDNode>(Op)->isUnindexed());
3302 SDValue Load0 = DAG.getLoad(VT: SingleTy, dl, Chain, Ptr: Base0, MMO: MOp0);
3303 SDValue Load1 = DAG.getLoad(VT: SingleTy, dl, Chain, Ptr: Base1, MMO: MOp1);
3304 return DAG.getMergeValues(
3305 Ops: { DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: MemTy, N1: Load0, N2: Load1),
3306 DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other,
3307 N1: Load0.getValue(R: 1), N2: Load1.getValue(R: 1)) }, dl);
3308 }
3309 if (MemOpc == ISD::STORE) {
3310 assert(cast<StoreSDNode>(Op)->isUnindexed());
3311 VectorPair Vals = opSplit(Vec: cast<StoreSDNode>(Val&: Op)->getValue(), dl, DAG);
3312 SDValue Store0 = DAG.getStore(Chain, dl, Val: Vals.first, Ptr: Base0, MMO: MOp0);
3313 SDValue Store1 = DAG.getStore(Chain, dl, Val: Vals.second, Ptr: Base1, MMO: MOp1);
3314 return DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: Store0, N2: Store1);
3315 }
3316
3317 assert(MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE);
3318
3319 auto MaskN = cast<MaskedLoadStoreSDNode>(Val&: Op);
3320 assert(MaskN->isUnindexed());
3321 VectorPair Masks = opSplit(Vec: MaskN->getMask(), dl, DAG);
3322 SDValue Offset = DAG.getUNDEF(VT: MVT::i32);
3323
3324 if (MemOpc == ISD::MLOAD) {
3325 VectorPair Thru =
3326 opSplit(Vec: cast<MaskedLoadSDNode>(Val&: Op)->getPassThru(), dl, DAG);
3327 SDValue MLoad0 =
3328 DAG.getMaskedLoad(VT: SingleTy, dl, Chain, Base: Base0, Offset, Mask: Masks.first,
3329 Src0: Thru.first, MemVT: SingleTy, MMO: MOp0, AM: ISD::UNINDEXED,
3330 ISD::NON_EXTLOAD, IsExpanding: false);
3331 SDValue MLoad1 =
3332 DAG.getMaskedLoad(VT: SingleTy, dl, Chain, Base: Base1, Offset, Mask: Masks.second,
3333 Src0: Thru.second, MemVT: SingleTy, MMO: MOp1, AM: ISD::UNINDEXED,
3334 ISD::NON_EXTLOAD, IsExpanding: false);
3335 return DAG.getMergeValues(
3336 Ops: { DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: MemTy, N1: MLoad0, N2: MLoad1),
3337 DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other,
3338 N1: MLoad0.getValue(R: 1), N2: MLoad1.getValue(R: 1)) }, dl);
3339 }
3340 if (MemOpc == ISD::MSTORE) {
3341 VectorPair Vals = opSplit(Vec: cast<MaskedStoreSDNode>(Val&: Op)->getValue(), dl, DAG);
3342 SDValue MStore0 = DAG.getMaskedStore(Chain, dl, Val: Vals.first, Base: Base0, Offset,
3343 Mask: Masks.first, MemVT: SingleTy, MMO: MOp0,
3344 AM: ISD::UNINDEXED, IsTruncating: false, IsCompressing: false);
3345 SDValue MStore1 = DAG.getMaskedStore(Chain, dl, Val: Vals.second, Base: Base1, Offset,
3346 Mask: Masks.second, MemVT: SingleTy, MMO: MOp1,
3347 AM: ISD::UNINDEXED, IsTruncating: false, IsCompressing: false);
3348 return DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: MStore0, N2: MStore1);
3349 }
3350
3351 std::string Name = "Unexpected operation: " + Op->getOperationName(G: &DAG);
3352 llvm_unreachable(Name.c_str());
3353}
3354
3355SDValue
3356HexagonTargetLowering::WidenHvxLoad(SDValue Op, SelectionDAG &DAG) const {
3357 const SDLoc &dl(Op);
3358 auto *LoadN = cast<LoadSDNode>(Val: Op.getNode());
3359 assert(LoadN->isUnindexed() && "Not widening indexed loads yet");
3360 assert(LoadN->getMemoryVT().getVectorElementType() != MVT::i1 &&
3361 "Not widening loads of i1 yet");
3362
3363 SDValue Chain = LoadN->getChain();
3364 SDValue Base = LoadN->getBasePtr();
3365 SDValue Offset = DAG.getUNDEF(VT: MVT::i32);
3366
3367 MVT ResTy = ty(Op);
3368 unsigned HwLen = Subtarget.getVectorLength();
3369 unsigned ResLen = ResTy.getStoreSize();
3370 assert(ResLen < HwLen && "vsetq(v1) prerequisite");
3371
3372 MVT BoolTy = MVT::getVectorVT(VT: MVT::i1, NumElements: HwLen);
3373 SDValue Mask = getInstr(MachineOpc: Hexagon::V6_pred_scalar2, dl, Ty: BoolTy,
3374 Ops: {DAG.getConstant(Val: ResLen, DL: dl, VT: MVT::i32)}, DAG);
3375
3376 MVT LoadTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen);
3377 MachineFunction &MF = DAG.getMachineFunction();
3378 auto *MemOp = MF.getMachineMemOperand(MMO: LoadN->getMemOperand(), Offset: 0, Size: HwLen);
3379
3380 SDValue Load = DAG.getMaskedLoad(VT: LoadTy, dl, Chain, Base, Offset, Mask,
3381 Src0: DAG.getUNDEF(VT: LoadTy), MemVT: LoadTy, MMO: MemOp,
3382 AM: ISD::UNINDEXED, ISD::NON_EXTLOAD, IsExpanding: false);
3383 SDValue Value = opCastElem(Vec: Load, ElemTy: ResTy.getVectorElementType(), DAG);
3384 return DAG.getMergeValues(Ops: {Value, Load.getValue(R: 1)}, dl);
3385}
3386
3387SDValue
3388HexagonTargetLowering::WidenHvxStore(SDValue Op, SelectionDAG &DAG) const {
3389 const SDLoc &dl(Op);
3390 auto *StoreN = cast<StoreSDNode>(Val: Op.getNode());
3391 assert(StoreN->isUnindexed() && "Not widening indexed stores yet");
3392 assert(StoreN->getMemoryVT().getVectorElementType() != MVT::i1 &&
3393 "Not widening stores of i1 yet");
3394
3395 SDValue Chain = StoreN->getChain();
3396 SDValue Base = StoreN->getBasePtr();
3397 SDValue Offset = DAG.getUNDEF(VT: MVT::i32);
3398
3399 SDValue Value = opCastElem(Vec: StoreN->getValue(), ElemTy: MVT::i8, DAG);
3400 MVT ValueTy = ty(Op: Value);
3401 unsigned ValueLen = ValueTy.getVectorNumElements();
3402 unsigned HwLen = Subtarget.getVectorLength();
3403 assert(isPowerOf2_32(ValueLen));
3404
3405 for (unsigned Len = ValueLen; Len < HwLen; ) {
3406 Value = opJoin(Ops: {Value, DAG.getUNDEF(VT: ty(Op: Value))}, dl, DAG);
3407 Len = ty(Op: Value).getVectorNumElements(); // This is Len *= 2
3408 }
3409 assert(ty(Value).getVectorNumElements() == HwLen); // Paranoia
3410
3411 assert(ValueLen < HwLen && "vsetq(v1) prerequisite");
3412 MVT BoolTy = MVT::getVectorVT(VT: MVT::i1, NumElements: HwLen);
3413 SDValue Mask = getInstr(MachineOpc: Hexagon::V6_pred_scalar2, dl, Ty: BoolTy,
3414 Ops: {DAG.getConstant(Val: ValueLen, DL: dl, VT: MVT::i32)}, DAG);
3415 MachineFunction &MF = DAG.getMachineFunction();
3416 auto *MemOp = MF.getMachineMemOperand(MMO: StoreN->getMemOperand(), Offset: 0, Size: HwLen);
3417 return DAG.getMaskedStore(Chain, dl, Val: Value, Base, Offset, Mask, MemVT: ty(Op: Value),
3418 MMO: MemOp, AM: ISD::UNINDEXED, IsTruncating: false, IsCompressing: false);
3419}
3420
3421SDValue
3422HexagonTargetLowering::WidenHvxSetCC(SDValue Op, SelectionDAG &DAG) const {
3423 const SDLoc &dl(Op);
3424 SDValue Op0 = Op.getOperand(i: 0), Op1 = Op.getOperand(i: 1);
3425 MVT ElemTy = ty(Op: Op0).getVectorElementType();
3426 unsigned HwLen = Subtarget.getVectorLength();
3427
3428 unsigned WideOpLen = (8 * HwLen) / ElemTy.getSizeInBits();
3429 assert(WideOpLen * ElemTy.getSizeInBits() == 8 * HwLen);
3430 MVT WideOpTy = MVT::getVectorVT(VT: ElemTy, NumElements: WideOpLen);
3431 if (!Subtarget.isHVXVectorType(VecTy: WideOpTy, IncludeBool: true))
3432 return SDValue();
3433
3434 SDValue WideOp0 = appendUndef(Val: Op0, ResTy: WideOpTy, DAG);
3435 SDValue WideOp1 = appendUndef(Val: Op1, ResTy: WideOpTy, DAG);
3436 EVT ResTy =
3437 getSetCCResultType(DAG.getDataLayout(), C&: *DAG.getContext(), VT: WideOpTy);
3438 SDValue SetCC = DAG.getNode(Opcode: ISD::SETCC, DL: dl, VT: ResTy,
3439 Ops: {WideOp0, WideOp1, Op.getOperand(i: 2)});
3440
3441 EVT RetTy = typeLegalize(Ty: ty(Op), DAG);
3442 return DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL: dl, VT: RetTy,
3443 Ops: {SetCC, getZero(dl, Ty: MVT::i32, DAG)});
3444}
3445
3446SDValue
3447HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const {
3448 unsigned Opc = Op.getOpcode();
3449 bool IsPairOp = isHvxPairTy(Ty: ty(Op)) ||
3450 llvm::any_of(Range: Op.getNode()->ops(), P: [this] (SDValue V) {
3451 return isHvxPairTy(Ty: ty(Op: V));
3452 });
3453
3454 if (IsPairOp) {
3455 switch (Opc) {
3456 default:
3457 break;
3458 case ISD::LOAD:
3459 case ISD::STORE:
3460 case ISD::MLOAD:
3461 case ISD::MSTORE:
3462 return SplitHvxMemOp(Op, DAG);
3463 case ISD::SINT_TO_FP:
3464 case ISD::UINT_TO_FP:
3465 case ISD::FP_TO_SINT:
3466 case ISD::FP_TO_UINT:
3467 if (ty(Op).getSizeInBits() == ty(Op: Op.getOperand(i: 0)).getSizeInBits())
3468 return opJoin(Ops: SplitVectorOp(Op, DAG), dl: SDLoc(Op), DAG);
3469 break;
3470 case ISD::ABS:
3471 case ISD::CTPOP:
3472 case ISD::CTLZ:
3473 case ISD::CTTZ:
3474 case ISD::MUL:
3475 case ISD::FADD:
3476 case ISD::FSUB:
3477 case ISD::FMUL:
3478 case ISD::FMINIMUMNUM:
3479 case ISD::FMAXIMUMNUM:
3480 case ISD::MULHS:
3481 case ISD::MULHU:
3482 case ISD::AND:
3483 case ISD::OR:
3484 case ISD::XOR:
3485 case ISD::SRA:
3486 case ISD::SHL:
3487 case ISD::SRL:
3488 case ISD::FSHL:
3489 case ISD::FSHR:
3490 case ISD::SMIN:
3491 case ISD::SMAX:
3492 case ISD::UMIN:
3493 case ISD::UMAX:
3494 case ISD::SETCC:
3495 case ISD::VSELECT:
3496 case ISD::SIGN_EXTEND_INREG:
3497 case ISD::SPLAT_VECTOR:
3498 return opJoin(Ops: SplitVectorOp(Op, DAG), dl: SDLoc(Op), DAG);
3499 case ISD::SIGN_EXTEND:
3500 case ISD::ZERO_EXTEND:
3501 // In general, sign- and zero-extends can't be split and still
3502 // be legal. The only exception is extending bool vectors.
3503 if (ty(Op: Op.getOperand(i: 0)).getVectorElementType() == MVT::i1)
3504 return opJoin(Ops: SplitVectorOp(Op, DAG), dl: SDLoc(Op), DAG);
3505 break;
3506 }
3507 }
3508
3509 switch (Opc) {
3510 default:
3511 break;
3512 case ISD::BUILD_VECTOR: return LowerHvxBuildVector(Op, DAG);
3513 case ISD::SPLAT_VECTOR: return LowerHvxSplatVector(Op, DAG);
3514 case ISD::CONCAT_VECTORS: return LowerHvxConcatVectors(Op, DAG);
3515 case ISD::INSERT_SUBVECTOR: return LowerHvxInsertSubvector(Op, DAG);
3516 case ISD::INSERT_VECTOR_ELT: return LowerHvxInsertElement(Op, DAG);
3517 case ISD::EXTRACT_SUBVECTOR: return LowerHvxExtractSubvector(Op, DAG);
3518 case ISD::EXTRACT_VECTOR_ELT: return LowerHvxExtractElement(Op, DAG);
3519 case ISD::BITCAST: return LowerHvxBitcast(Op, DAG);
3520 case ISD::ANY_EXTEND: return LowerHvxAnyExt(Op, DAG);
3521 case ISD::SIGN_EXTEND: return LowerHvxSignExt(Op, DAG);
3522 case ISD::ZERO_EXTEND: return LowerHvxZeroExt(Op, DAG);
3523 case ISD::CTTZ: return LowerHvxCttz(Op, DAG);
3524 case ISD::SELECT: return LowerHvxSelect(Op, DAG);
3525 case ISD::SRA:
3526 case ISD::SHL:
3527 case ISD::SRL: return LowerHvxShift(Op, DAG);
3528 case ISD::FSHL:
3529 case ISD::FSHR: return LowerHvxFunnelShift(Op, DAG);
3530 case ISD::MULHS:
3531 case ISD::MULHU: return LowerHvxMulh(Op, DAG);
3532 case ISD::SMUL_LOHI:
3533 case ISD::UMUL_LOHI: return LowerHvxMulLoHi(Op, DAG);
3534 case ISD::ANY_EXTEND_VECTOR_INREG: return LowerHvxExtend(Op, DAG);
3535 case ISD::SETCC:
3536 case ISD::INTRINSIC_VOID: return Op;
3537 case ISD::INTRINSIC_WO_CHAIN: return LowerHvxIntrinsic(Op, DAG);
3538 case ISD::MLOAD:
3539 case ISD::MSTORE: return LowerHvxMaskedOp(Op, DAG);
3540 // Unaligned loads will be handled by the default lowering.
3541 case ISD::LOAD: return SDValue();
3542 case ISD::FP_EXTEND: return LowerHvxFpExtend(Op, DAG);
3543 case ISD::FP_TO_SINT:
3544 case ISD::FP_TO_UINT: return LowerHvxFpToInt(Op, DAG);
3545 case ISD::SINT_TO_FP:
3546 case ISD::UINT_TO_FP: return LowerHvxIntToFp(Op, DAG);
3547
3548 // Special nodes:
3549 case HexagonISD::SMUL_LOHI:
3550 case HexagonISD::UMUL_LOHI:
3551 case HexagonISD::USMUL_LOHI: return LowerHvxMulLoHi(Op, DAG);
3552 }
3553#ifndef NDEBUG
3554 Op.dumpr(&DAG);
3555#endif
3556 llvm_unreachable("Unhandled HVX operation");
3557}
3558
3559SDValue
3560HexagonTargetLowering::ExpandHvxResizeIntoSteps(SDValue Op, SelectionDAG &DAG)
3561 const {
3562 // Rewrite the extension/truncation/saturation op into steps where each
3563 // step changes the type widths by a factor of 2.
3564 // E.g. i8 -> i16 remains unchanged, but i8 -> i32 ==> i8 -> i16 -> i32.
3565 //
3566 // Some of the vector types in Op may not be legal.
3567
3568 unsigned Opc = Op.getOpcode();
3569 switch (Opc) {
3570 case HexagonISD::SSAT:
3571 case HexagonISD::USAT:
3572 case HexagonISD::TL_EXTEND:
3573 case HexagonISD::TL_TRUNCATE:
3574 break;
3575 case ISD::ANY_EXTEND:
3576 case ISD::ZERO_EXTEND:
3577 case ISD::SIGN_EXTEND:
3578 case ISD::TRUNCATE:
3579 llvm_unreachable("ISD:: ops will be auto-folded");
3580 break;
3581#ifndef NDEBUG
3582 Op.dump(&DAG);
3583#endif
3584 llvm_unreachable("Unexpected operation");
3585 }
3586
3587 SDValue Inp = Op.getOperand(i: 0);
3588 MVT InpTy = ty(Op: Inp);
3589 MVT ResTy = ty(Op);
3590
3591 unsigned InpWidth = InpTy.getVectorElementType().getSizeInBits();
3592 unsigned ResWidth = ResTy.getVectorElementType().getSizeInBits();
3593 assert(InpWidth != ResWidth);
3594
3595 if (InpWidth == 2 * ResWidth || ResWidth == 2 * InpWidth)
3596 return Op;
3597
3598 const SDLoc &dl(Op);
3599 unsigned NumElems = InpTy.getVectorNumElements();
3600 assert(NumElems == ResTy.getVectorNumElements());
3601
3602 auto repeatOp = [&](unsigned NewWidth, SDValue Arg) {
3603 MVT Ty = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: NewWidth), NumElements: NumElems);
3604 switch (Opc) {
3605 case HexagonISD::SSAT:
3606 case HexagonISD::USAT:
3607 return DAG.getNode(Opcode: Opc, DL: dl, VT: Ty, Ops: {Arg, DAG.getValueType(Ty)});
3608 case HexagonISD::TL_EXTEND:
3609 case HexagonISD::TL_TRUNCATE:
3610 return DAG.getNode(Opcode: Opc, DL: dl, VT: Ty, Ops: {Arg, Op.getOperand(i: 1), Op.getOperand(i: 2)});
3611 default:
3612 llvm_unreachable("Unexpected opcode");
3613 }
3614 };
3615
3616 SDValue S = Inp;
3617 if (InpWidth < ResWidth) {
3618 assert(ResWidth % InpWidth == 0 && isPowerOf2_32(ResWidth / InpWidth));
3619 while (InpWidth * 2 <= ResWidth)
3620 S = repeatOp(InpWidth *= 2, S);
3621 } else {
3622 // InpWidth > ResWidth
3623 assert(InpWidth % ResWidth == 0 && isPowerOf2_32(InpWidth / ResWidth));
3624 while (InpWidth / 2 >= ResWidth)
3625 S = repeatOp(InpWidth /= 2, S);
3626 }
3627 return S;
3628}
3629
3630SDValue
3631HexagonTargetLowering::LegalizeHvxResize(SDValue Op, SelectionDAG &DAG) const {
3632 SDValue Inp0 = Op.getOperand(i: 0);
3633 MVT InpTy = ty(Op: Inp0);
3634 MVT ResTy = ty(Op);
3635 unsigned InpWidth = InpTy.getSizeInBits();
3636 unsigned ResWidth = ResTy.getSizeInBits();
3637 unsigned Opc = Op.getOpcode();
3638
3639 if (shouldWidenToHvx(Ty: InpTy, DAG) || shouldWidenToHvx(Ty: ResTy, DAG)) {
3640 // First, make sure that the narrower type is widened to HVX.
3641 // This may cause the result to be wider than what the legalizer
3642 // expects, so insert EXTRACT_SUBVECTOR to bring it back to the
3643 // desired type.
3644 auto [WInpTy, WResTy] =
3645 InpWidth < ResWidth ? typeWidenToWider(Ty0: typeWidenToHvx(Ty: InpTy), Ty1: ResTy)
3646 : typeWidenToWider(Ty0: InpTy, Ty1: typeWidenToHvx(Ty: ResTy));
3647 SDValue W = appendUndef(Val: Inp0, ResTy: WInpTy, DAG);
3648 SDValue S;
3649 if (Opc == HexagonISD::TL_EXTEND || Opc == HexagonISD::TL_TRUNCATE) {
3650 S = DAG.getNode(Opcode: Opc, DL: SDLoc(Op), VT: WResTy, N1: W, N2: Op.getOperand(i: 1),
3651 N3: Op.getOperand(i: 2));
3652 } else {
3653 S = DAG.getNode(Opcode: Opc, DL: SDLoc(Op), VT: WResTy, N1: W, N2: DAG.getValueType(WResTy));
3654 }
3655 SDValue T = ExpandHvxResizeIntoSteps(Op: S, DAG);
3656 return extractSubvector(Vec: T, SubTy: typeLegalize(Ty: ResTy, DAG), SubIdx: 0, DAG);
3657 } else if (shouldSplitToHvx(Ty: InpWidth < ResWidth ? ResTy : InpTy, DAG)) {
3658 return opJoin(Ops: SplitVectorOp(Op, DAG), dl: SDLoc(Op), DAG);
3659 } else {
3660 assert(isTypeLegal(InpTy) && isTypeLegal(ResTy));
3661 return RemoveTLWrapper(Op, DAG);
3662 }
3663 llvm_unreachable("Unexpected situation");
3664}
3665
3666void
3667HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N,
3668 SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
3669 unsigned Opc = N->getOpcode();
3670 SDValue Op(N, 0);
3671 SDValue Inp0; // Optional first argument.
3672 if (N->getNumOperands() > 0)
3673 Inp0 = Op.getOperand(i: 0);
3674
3675 switch (Opc) {
3676 case ISD::ANY_EXTEND:
3677 case ISD::SIGN_EXTEND:
3678 case ISD::ZERO_EXTEND:
3679 case ISD::TRUNCATE:
3680 if (Subtarget.isHVXElementType(Ty: ty(Op)) &&
3681 Subtarget.isHVXElementType(Ty: ty(Op: Inp0))) {
3682 Results.push_back(Elt: CreateTLWrapper(Op, DAG));
3683 }
3684 break;
3685 case ISD::SETCC:
3686 if (shouldWidenToHvx(Ty: ty(Op: Inp0), DAG)) {
3687 if (SDValue T = WidenHvxSetCC(Op, DAG))
3688 Results.push_back(Elt: T);
3689 }
3690 break;
3691 case ISD::STORE: {
3692 if (shouldWidenToHvx(Ty: ty(Op: cast<StoreSDNode>(Val: N)->getValue()), DAG)) {
3693 SDValue Store = WidenHvxStore(Op, DAG);
3694 Results.push_back(Elt: Store);
3695 }
3696 break;
3697 }
3698 case ISD::MLOAD:
3699 if (isHvxPairTy(Ty: ty(Op))) {
3700 SDValue S = SplitHvxMemOp(Op, DAG);
3701 assert(S->getOpcode() == ISD::MERGE_VALUES);
3702 Results.push_back(Elt: S.getOperand(i: 0));
3703 Results.push_back(Elt: S.getOperand(i: 1));
3704 }
3705 break;
3706 case ISD::MSTORE:
3707 if (isHvxPairTy(Ty: ty(Op: Op->getOperand(Num: 1)))) { // Stored value
3708 SDValue S = SplitHvxMemOp(Op, DAG);
3709 Results.push_back(Elt: S);
3710 }
3711 break;
3712 case ISD::SINT_TO_FP:
3713 case ISD::UINT_TO_FP:
3714 case ISD::FP_TO_SINT:
3715 case ISD::FP_TO_UINT:
3716 if (ty(Op).getSizeInBits() != ty(Op: Inp0).getSizeInBits()) {
3717 SDValue T = EqualizeFpIntConversion(Op, DAG);
3718 Results.push_back(Elt: T);
3719 }
3720 break;
3721 case HexagonISD::SSAT:
3722 case HexagonISD::USAT:
3723 case HexagonISD::TL_EXTEND:
3724 case HexagonISD::TL_TRUNCATE:
3725 Results.push_back(Elt: LegalizeHvxResize(Op, DAG));
3726 break;
3727 default:
3728 break;
3729 }
3730}
3731
3732void
3733HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N,
3734 SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
3735 unsigned Opc = N->getOpcode();
3736 SDValue Op(N, 0);
3737 SDValue Inp0; // Optional first argument.
3738 if (N->getNumOperands() > 0)
3739 Inp0 = Op.getOperand(i: 0);
3740
3741 switch (Opc) {
3742 case ISD::ANY_EXTEND:
3743 case ISD::SIGN_EXTEND:
3744 case ISD::ZERO_EXTEND:
3745 case ISD::TRUNCATE:
3746 if (Subtarget.isHVXElementType(Ty: ty(Op)) &&
3747 Subtarget.isHVXElementType(Ty: ty(Op: Inp0))) {
3748 Results.push_back(Elt: CreateTLWrapper(Op, DAG));
3749 }
3750 break;
3751 case ISD::SETCC:
3752 if (shouldWidenToHvx(Ty: ty(Op), DAG)) {
3753 if (SDValue T = WidenHvxSetCC(Op, DAG))
3754 Results.push_back(Elt: T);
3755 }
3756 break;
3757 case ISD::LOAD: {
3758 if (shouldWidenToHvx(Ty: ty(Op), DAG)) {
3759 SDValue Load = WidenHvxLoad(Op, DAG);
3760 assert(Load->getOpcode() == ISD::MERGE_VALUES);
3761 Results.push_back(Elt: Load.getOperand(i: 0));
3762 Results.push_back(Elt: Load.getOperand(i: 1));
3763 }
3764 break;
3765 }
3766 case ISD::BITCAST:
3767 if (isHvxBoolTy(Ty: ty(Op: Inp0))) {
3768 SDValue C = LowerHvxBitcast(Op, DAG);
3769 Results.push_back(Elt: C);
3770 }
3771 break;
3772 case ISD::FP_TO_SINT:
3773 case ISD::FP_TO_UINT:
3774 if (ty(Op).getSizeInBits() != ty(Op: Inp0).getSizeInBits()) {
3775 SDValue T = EqualizeFpIntConversion(Op, DAG);
3776 Results.push_back(Elt: T);
3777 }
3778 break;
3779 case HexagonISD::SSAT:
3780 case HexagonISD::USAT:
3781 case HexagonISD::TL_EXTEND:
3782 case HexagonISD::TL_TRUNCATE:
3783 Results.push_back(Elt: LegalizeHvxResize(Op, DAG));
3784 break;
3785 default:
3786 break;
3787 }
3788}
3789
3790SDValue
3791HexagonTargetLowering::combineTruncateBeforeLegal(SDValue Op,
3792 DAGCombinerInfo &DCI) const {
3793 // Simplify V:v2NiB --(bitcast)--> vNi2B --(truncate)--> vNiB
3794 // to extract-subvector (shuffle V, pick even, pick odd)
3795
3796 assert(Op.getOpcode() == ISD::TRUNCATE);
3797 SelectionDAG &DAG = DCI.DAG;
3798 const SDLoc &dl(Op);
3799
3800 if (Op.getOperand(i: 0).getOpcode() == ISD::BITCAST)
3801 return SDValue();
3802 SDValue Cast = Op.getOperand(i: 0);
3803 SDValue Src = Cast.getOperand(i: 0);
3804
3805 EVT TruncTy = Op.getValueType();
3806 EVT CastTy = Cast.getValueType();
3807 EVT SrcTy = Src.getValueType();
3808 if (SrcTy.isSimple())
3809 return SDValue();
3810 if (SrcTy.getVectorElementType() != TruncTy.getVectorElementType())
3811 return SDValue();
3812 unsigned SrcLen = SrcTy.getVectorNumElements();
3813 unsigned CastLen = CastTy.getVectorNumElements();
3814 if (2 * CastLen != SrcLen)
3815 return SDValue();
3816
3817 SmallVector<int, 128> Mask(SrcLen);
3818 for (int i = 0; i != static_cast<int>(CastLen); ++i) {
3819 Mask[i] = 2 * i;
3820 Mask[i + CastLen] = 2 * i + 1;
3821 }
3822 SDValue Deal =
3823 DAG.getVectorShuffle(VT: SrcTy, dl, N1: Src, N2: DAG.getUNDEF(VT: SrcTy), Mask);
3824 return opSplit(Vec: Deal, dl, DAG).first;
3825}
3826
3827SDValue
3828HexagonTargetLowering::combineConcatVectorsBeforeLegal(
3829 SDValue Op, DAGCombinerInfo &DCI) const {
3830 // Fold
3831 // concat (shuffle x, y, m1), (shuffle x, y, m2)
3832 // into
3833 // shuffle (concat x, y), undef, m3
3834 if (Op.getNumOperands() != 2)
3835 return SDValue();
3836
3837 SelectionDAG &DAG = DCI.DAG;
3838 const SDLoc &dl(Op);
3839 SDValue V0 = Op.getOperand(i: 0);
3840 SDValue V1 = Op.getOperand(i: 1);
3841
3842 if (V0.getOpcode() != ISD::VECTOR_SHUFFLE)
3843 return SDValue();
3844 if (V1.getOpcode() != ISD::VECTOR_SHUFFLE)
3845 return SDValue();
3846
3847 SetVector<SDValue> Order;
3848 Order.insert(X: V0.getOperand(i: 0));
3849 Order.insert(X: V0.getOperand(i: 1));
3850 Order.insert(X: V1.getOperand(i: 0));
3851 Order.insert(X: V1.getOperand(i: 1));
3852
3853 if (Order.size() > 2)
3854 return SDValue();
3855
3856 // In ISD::VECTOR_SHUFFLE, the types of each input and the type of the
3857 // result must be the same.
3858 EVT InpTy = V0.getValueType();
3859 assert(InpTy.isVector());
3860 unsigned InpLen = InpTy.getVectorNumElements();
3861
3862 SmallVector<int, 128> LongMask;
3863 auto AppendToMask = [&](SDValue Shuffle) {
3864 auto *SV = cast<ShuffleVectorSDNode>(Val: Shuffle.getNode());
3865 ArrayRef<int> Mask = SV->getMask();
3866 SDValue X = Shuffle.getOperand(i: 0);
3867 SDValue Y = Shuffle.getOperand(i: 1);
3868 for (int M : Mask) {
3869 if (M == -1) {
3870 LongMask.push_back(Elt: M);
3871 continue;
3872 }
3873 SDValue Src = static_cast<unsigned>(M) < InpLen ? X : Y;
3874 if (static_cast<unsigned>(M) >= InpLen)
3875 M -= InpLen;
3876
3877 int OutOffset = Order[0] == Src ? 0 : InpLen;
3878 LongMask.push_back(Elt: M + OutOffset);
3879 }
3880 };
3881
3882 AppendToMask(V0);
3883 AppendToMask(V1);
3884
3885 SDValue C0 = Order.front();
3886 SDValue C1 = Order.back(); // Can be same as front
3887 EVT LongTy = InpTy.getDoubleNumVectorElementsVT(Context&: *DAG.getContext());
3888
3889 SDValue Cat = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: LongTy, Ops: {C0, C1});
3890 return DAG.getVectorShuffle(VT: LongTy, dl, N1: Cat, N2: DAG.getUNDEF(VT: LongTy), Mask: LongMask);
3891}
3892
3893SDValue
3894HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
3895 const {
3896 const SDLoc &dl(N);
3897 SelectionDAG &DAG = DCI.DAG;
3898 SDValue Op(N, 0);
3899 unsigned Opc = Op.getOpcode();
3900
3901 SmallVector<SDValue, 4> Ops(N->ops());
3902
3903 if (Opc == ISD::TRUNCATE)
3904 return combineTruncateBeforeLegal(Op, DCI);
3905 if (Opc == ISD::CONCAT_VECTORS)
3906 return combineConcatVectorsBeforeLegal(Op, DCI);
3907
3908 if (DCI.isBeforeLegalizeOps())
3909 return SDValue();
3910
3911 switch (Opc) {
3912 case ISD::VSELECT: {
3913 // (vselect (xor x, qtrue), v0, v1) -> (vselect x, v1, v0)
3914 SDValue Cond = Ops[0];
3915 if (Cond->getOpcode() == ISD::XOR) {
3916 SDValue C0 = Cond.getOperand(i: 0), C1 = Cond.getOperand(i: 1);
3917 if (C1->getOpcode() == HexagonISD::QTRUE)
3918 return DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ty(Op), N1: C0, N2: Ops[2], N3: Ops[1]);
3919 }
3920 break;
3921 }
3922 case HexagonISD::V2Q:
3923 if (Ops[0].getOpcode() == ISD::SPLAT_VECTOR) {
3924 if (const auto *C = dyn_cast<ConstantSDNode>(Val: Ops[0].getOperand(i: 0)))
3925 return C->isZero() ? DAG.getNode(Opcode: HexagonISD::QFALSE, DL: dl, VT: ty(Op))
3926 : DAG.getNode(Opcode: HexagonISD::QTRUE, DL: dl, VT: ty(Op));
3927 }
3928 break;
3929 case HexagonISD::Q2V:
3930 if (Ops[0].getOpcode() == HexagonISD::QTRUE)
3931 return DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: ty(Op),
3932 Operand: DAG.getAllOnesConstant(DL: dl, VT: MVT::i32));
3933 if (Ops[0].getOpcode() == HexagonISD::QFALSE)
3934 return getZero(dl, Ty: ty(Op), DAG);
3935 break;
3936 case HexagonISD::VINSERTW0:
3937 if (isUndef(Op: Ops[1]))
3938 return Ops[0];
3939 break;
3940 case HexagonISD::VROR: {
3941 if (Ops[0].getOpcode() == HexagonISD::VROR) {
3942 SDValue Vec = Ops[0].getOperand(i: 0);
3943 SDValue Rot0 = Ops[1], Rot1 = Ops[0].getOperand(i: 1);
3944 SDValue Rot = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ty(Op: Rot0), Ops: {Rot0, Rot1});
3945 return DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: ty(Op), Ops: {Vec, Rot});
3946 }
3947 break;
3948 }
3949 }
3950
3951 return SDValue();
3952}
3953
3954bool
3955HexagonTargetLowering::shouldSplitToHvx(MVT Ty, SelectionDAG &DAG) const {
3956 if (Subtarget.isHVXVectorType(VecTy: Ty, IncludeBool: true))
3957 return false;
3958 auto Action = getPreferredHvxVectorAction(VecTy: Ty);
3959 if (Action == TargetLoweringBase::TypeSplitVector)
3960 return Subtarget.isHVXVectorType(VecTy: typeLegalize(Ty, DAG), IncludeBool: true);
3961 return false;
3962}
3963
3964bool
3965HexagonTargetLowering::shouldWidenToHvx(MVT Ty, SelectionDAG &DAG) const {
3966 if (Subtarget.isHVXVectorType(VecTy: Ty, IncludeBool: true))
3967 return false;
3968 auto Action = getPreferredHvxVectorAction(VecTy: Ty);
3969 if (Action == TargetLoweringBase::TypeWidenVector)
3970 return Subtarget.isHVXVectorType(VecTy: typeLegalize(Ty, DAG), IncludeBool: true);
3971 return false;
3972}
3973
3974bool
3975HexagonTargetLowering::isHvxOperation(SDNode *N, SelectionDAG &DAG) const {
3976 if (!Subtarget.useHVXOps())
3977 return false;
3978 // If the type of any result, or any operand type are HVX vector types,
3979 // this is an HVX operation.
3980 auto IsHvxTy = [this](EVT Ty) {
3981 return Ty.isSimple() && Subtarget.isHVXVectorType(VecTy: Ty.getSimpleVT(), IncludeBool: true);
3982 };
3983 auto IsHvxOp = [this](SDValue Op) {
3984 return Op.getValueType().isSimple() &&
3985 Subtarget.isHVXVectorType(VecTy: ty(Op), IncludeBool: true);
3986 };
3987 if (llvm::any_of(Range: N->values(), P: IsHvxTy) || llvm::any_of(Range: N->ops(), P: IsHvxOp))
3988 return true;
3989
3990 // Check if this could be an HVX operation after type widening.
3991 auto IsWidenedToHvx = [this, &DAG](SDValue Op) {
3992 if (!Op.getValueType().isSimple())
3993 return false;
3994 MVT ValTy = ty(Op);
3995 return ValTy.isVector() && shouldWidenToHvx(Ty: ValTy, DAG);
3996 };
3997
3998 for (int i = 0, e = N->getNumValues(); i != e; ++i) {
3999 if (IsWidenedToHvx(SDValue(N, i)))
4000 return true;
4001 }
4002 return llvm::any_of(Range: N->ops(), P: IsWidenedToHvx);
4003}
4004