1 | //===-- HexagonISelLoweringHVX.cpp --- Lowering HVX operations ------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "HexagonISelLowering.h" |
10 | #include "HexagonRegisterInfo.h" |
11 | #include "HexagonSubtarget.h" |
12 | #include "llvm/ADT/SetVector.h" |
13 | #include "llvm/ADT/SmallVector.h" |
14 | #include "llvm/Analysis/MemoryLocation.h" |
15 | #include "llvm/CodeGen/MachineBasicBlock.h" |
16 | #include "llvm/CodeGen/MachineFunction.h" |
17 | #include "llvm/CodeGen/MachineInstr.h" |
18 | #include "llvm/CodeGen/MachineOperand.h" |
19 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
20 | #include "llvm/CodeGen/TargetInstrInfo.h" |
21 | #include "llvm/IR/IntrinsicsHexagon.h" |
22 | #include "llvm/Support/CommandLine.h" |
23 | |
24 | #include <algorithm> |
25 | #include <string> |
26 | #include <utility> |
27 | |
28 | using namespace llvm; |
29 | |
30 | static cl::opt<unsigned> HvxWidenThreshold("hexagon-hvx-widen" , |
31 | cl::Hidden, cl::init(Val: 16), |
32 | cl::desc("Lower threshold (in bytes) for widening to HVX vectors" )); |
33 | |
34 | static const MVT LegalV64[] = { MVT::v64i8, MVT::v32i16, MVT::v16i32 }; |
35 | static const MVT LegalW64[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 }; |
36 | static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 }; |
37 | static const MVT LegalW128[] = { MVT::v256i8, MVT::v128i16, MVT::v64i32 }; |
38 | |
39 | static std::tuple<unsigned, unsigned, unsigned> getIEEEProperties(MVT Ty) { |
40 | // For a float scalar type, return (exp-bits, exp-bias, fraction-bits) |
41 | MVT ElemTy = Ty.getScalarType(); |
42 | switch (ElemTy.SimpleTy) { |
43 | case MVT::f16: |
44 | return std::make_tuple(args: 5, args: 15, args: 10); |
45 | case MVT::f32: |
46 | return std::make_tuple(args: 8, args: 127, args: 23); |
47 | case MVT::f64: |
48 | return std::make_tuple(args: 11, args: 1023, args: 52); |
49 | default: |
50 | break; |
51 | } |
52 | llvm_unreachable(("Unexpected type: " + EVT(ElemTy).getEVTString()).c_str()); |
53 | } |
54 | |
55 | void |
56 | HexagonTargetLowering::initializeHVXLowering() { |
57 | if (Subtarget.useHVX64BOps()) { |
58 | addRegisterClass(VT: MVT::v64i8, RC: &Hexagon::HvxVRRegClass); |
59 | addRegisterClass(VT: MVT::v32i16, RC: &Hexagon::HvxVRRegClass); |
60 | addRegisterClass(VT: MVT::v16i32, RC: &Hexagon::HvxVRRegClass); |
61 | addRegisterClass(VT: MVT::v128i8, RC: &Hexagon::HvxWRRegClass); |
62 | addRegisterClass(VT: MVT::v64i16, RC: &Hexagon::HvxWRRegClass); |
63 | addRegisterClass(VT: MVT::v32i32, RC: &Hexagon::HvxWRRegClass); |
64 | // These "short" boolean vector types should be legal because |
65 | // they will appear as results of vector compares. If they were |
66 | // not legal, type legalization would try to make them legal |
67 | // and that would require using operations that do not use or |
68 | // produce such types. That, in turn, would imply using custom |
69 | // nodes, which would be unoptimizable by the DAG combiner. |
70 | // The idea is to rely on target-independent operations as much |
71 | // as possible. |
72 | addRegisterClass(VT: MVT::v16i1, RC: &Hexagon::HvxQRRegClass); |
73 | addRegisterClass(VT: MVT::v32i1, RC: &Hexagon::HvxQRRegClass); |
74 | addRegisterClass(VT: MVT::v64i1, RC: &Hexagon::HvxQRRegClass); |
75 | } else if (Subtarget.useHVX128BOps()) { |
76 | addRegisterClass(VT: MVT::v128i8, RC: &Hexagon::HvxVRRegClass); |
77 | addRegisterClass(VT: MVT::v64i16, RC: &Hexagon::HvxVRRegClass); |
78 | addRegisterClass(VT: MVT::v32i32, RC: &Hexagon::HvxVRRegClass); |
79 | addRegisterClass(VT: MVT::v256i8, RC: &Hexagon::HvxWRRegClass); |
80 | addRegisterClass(VT: MVT::v128i16, RC: &Hexagon::HvxWRRegClass); |
81 | addRegisterClass(VT: MVT::v64i32, RC: &Hexagon::HvxWRRegClass); |
82 | addRegisterClass(VT: MVT::v32i1, RC: &Hexagon::HvxQRRegClass); |
83 | addRegisterClass(VT: MVT::v64i1, RC: &Hexagon::HvxQRRegClass); |
84 | addRegisterClass(VT: MVT::v128i1, RC: &Hexagon::HvxQRRegClass); |
85 | if (Subtarget.useHVXV68Ops() && Subtarget.useHVXFloatingPoint()) { |
86 | addRegisterClass(VT: MVT::v32f32, RC: &Hexagon::HvxVRRegClass); |
87 | addRegisterClass(VT: MVT::v64f16, RC: &Hexagon::HvxVRRegClass); |
88 | addRegisterClass(VT: MVT::v64f32, RC: &Hexagon::HvxWRRegClass); |
89 | addRegisterClass(VT: MVT::v128f16, RC: &Hexagon::HvxWRRegClass); |
90 | } |
91 | } |
92 | |
93 | // Set up operation actions. |
94 | |
95 | bool Use64b = Subtarget.useHVX64BOps(); |
96 | ArrayRef<MVT> LegalV = Use64b ? LegalV64 : LegalV128; |
97 | ArrayRef<MVT> LegalW = Use64b ? LegalW64 : LegalW128; |
98 | MVT ByteV = Use64b ? MVT::v64i8 : MVT::v128i8; |
99 | MVT WordV = Use64b ? MVT::v16i32 : MVT::v32i32; |
100 | MVT ByteW = Use64b ? MVT::v128i8 : MVT::v256i8; |
101 | |
102 | auto setPromoteTo = [this] (unsigned Opc, MVT FromTy, MVT ToTy) { |
103 | setOperationAction(Op: Opc, VT: FromTy, Action: Promote); |
104 | AddPromotedToType(Opc, OrigVT: FromTy, DestVT: ToTy); |
105 | }; |
106 | |
107 | // Handle bitcasts of vector predicates to scalars (e.g. v32i1 to i32). |
108 | // Note: v16i1 -> i16 is handled in type legalization instead of op |
109 | // legalization. |
110 | setOperationAction(Op: ISD::BITCAST, VT: MVT::i16, Action: Custom); |
111 | setOperationAction(Op: ISD::BITCAST, VT: MVT::i32, Action: Custom); |
112 | setOperationAction(Op: ISD::BITCAST, VT: MVT::i64, Action: Custom); |
113 | setOperationAction(Op: ISD::BITCAST, VT: MVT::v16i1, Action: Custom); |
114 | setOperationAction(Op: ISD::BITCAST, VT: MVT::v128i1, Action: Custom); |
115 | setOperationAction(Op: ISD::BITCAST, VT: MVT::i128, Action: Custom); |
116 | setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT: ByteV, Action: Legal); |
117 | setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT: ByteW, Action: Legal); |
118 | setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::Other, Action: Custom); |
119 | |
120 | if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() && |
121 | Subtarget.useHVXFloatingPoint()) { |
122 | |
123 | static const MVT FloatV[] = { MVT::v64f16, MVT::v32f32 }; |
124 | static const MVT FloatW[] = { MVT::v128f16, MVT::v64f32 }; |
125 | |
126 | for (MVT T : FloatV) { |
127 | setOperationAction(Op: ISD::FADD, VT: T, Action: Legal); |
128 | setOperationAction(Op: ISD::FSUB, VT: T, Action: Legal); |
129 | setOperationAction(Op: ISD::FMUL, VT: T, Action: Legal); |
130 | setOperationAction(Op: ISD::FMINIMUMNUM, VT: T, Action: Legal); |
131 | setOperationAction(Op: ISD::FMAXIMUMNUM, VT: T, Action: Legal); |
132 | |
133 | setOperationAction(Op: ISD::INSERT_SUBVECTOR, VT: T, Action: Custom); |
134 | setOperationAction(Op: ISD::EXTRACT_SUBVECTOR, VT: T, Action: Custom); |
135 | |
136 | setOperationAction(Op: ISD::SPLAT_VECTOR, VT: T, Action: Legal); |
137 | setOperationAction(Op: ISD::SPLAT_VECTOR, VT: T, Action: Legal); |
138 | |
139 | setOperationAction(Op: ISD::MLOAD, VT: T, Action: Custom); |
140 | setOperationAction(Op: ISD::MSTORE, VT: T, Action: Custom); |
141 | // Custom-lower BUILD_VECTOR. The standard (target-independent) |
142 | // handling of it would convert it to a load, which is not always |
143 | // the optimal choice. |
144 | setOperationAction(Op: ISD::BUILD_VECTOR, VT: T, Action: Custom); |
145 | } |
146 | |
147 | |
148 | // BUILD_VECTOR with f16 operands cannot be promoted without |
149 | // promoting the result, so lower the node to vsplat or constant pool |
150 | setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::f16, Action: Custom); |
151 | setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::f16, Action: Custom); |
152 | setOperationAction(Op: ISD::SPLAT_VECTOR, VT: MVT::f16, Action: Custom); |
153 | |
154 | // Vector shuffle is always promoted to ByteV and a bitcast to f16 is |
155 | // generated. |
156 | setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128f16, ByteW); |
157 | setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f16, ByteV); |
158 | setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW); |
159 | setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV); |
160 | |
161 | for (MVT P : FloatW) { |
162 | setOperationAction(Op: ISD::LOAD, VT: P, Action: Custom); |
163 | setOperationAction(Op: ISD::STORE, VT: P, Action: Custom); |
164 | setOperationAction(Op: ISD::FADD, VT: P, Action: Custom); |
165 | setOperationAction(Op: ISD::FSUB, VT: P, Action: Custom); |
166 | setOperationAction(Op: ISD::FMUL, VT: P, Action: Custom); |
167 | setOperationAction(Op: ISD::FMINIMUMNUM, VT: P, Action: Custom); |
168 | setOperationAction(Op: ISD::FMAXIMUMNUM, VT: P, Action: Custom); |
169 | setOperationAction(Op: ISD::SETCC, VT: P, Action: Custom); |
170 | setOperationAction(Op: ISD::VSELECT, VT: P, Action: Custom); |
171 | |
172 | // Custom-lower BUILD_VECTOR. The standard (target-independent) |
173 | // handling of it would convert it to a load, which is not always |
174 | // the optimal choice. |
175 | setOperationAction(Op: ISD::BUILD_VECTOR, VT: P, Action: Custom); |
176 | // Make concat-vectors custom to handle concats of more than 2 vectors. |
177 | setOperationAction(Op: ISD::CONCAT_VECTORS, VT: P, Action: Custom); |
178 | |
179 | setOperationAction(Op: ISD::MLOAD, VT: P, Action: Custom); |
180 | setOperationAction(Op: ISD::MSTORE, VT: P, Action: Custom); |
181 | } |
182 | |
183 | if (Subtarget.useHVXQFloatOps()) { |
184 | setOperationAction(Op: ISD::FP_EXTEND, VT: MVT::v64f32, Action: Custom); |
185 | setOperationAction(Op: ISD::FP_ROUND, VT: MVT::v64f16, Action: Legal); |
186 | } else if (Subtarget.useHVXIEEEFPOps()) { |
187 | setOperationAction(Op: ISD::FP_EXTEND, VT: MVT::v64f32, Action: Legal); |
188 | setOperationAction(Op: ISD::FP_ROUND, VT: MVT::v64f16, Action: Legal); |
189 | } |
190 | } |
191 | |
192 | for (MVT T : LegalV) { |
193 | setIndexedLoadAction(IdxModes: ISD::POST_INC, VT: T, Action: Legal); |
194 | setIndexedStoreAction(IdxModes: ISD::POST_INC, VT: T, Action: Legal); |
195 | |
196 | setOperationAction(Op: ISD::ABS, VT: T, Action: Legal); |
197 | setOperationAction(Op: ISD::AND, VT: T, Action: Legal); |
198 | setOperationAction(Op: ISD::OR, VT: T, Action: Legal); |
199 | setOperationAction(Op: ISD::XOR, VT: T, Action: Legal); |
200 | setOperationAction(Op: ISD::ADD, VT: T, Action: Legal); |
201 | setOperationAction(Op: ISD::SUB, VT: T, Action: Legal); |
202 | setOperationAction(Op: ISD::MUL, VT: T, Action: Legal); |
203 | setOperationAction(Op: ISD::CTPOP, VT: T, Action: Legal); |
204 | setOperationAction(Op: ISD::CTLZ, VT: T, Action: Legal); |
205 | setOperationAction(Op: ISD::SELECT, VT: T, Action: Legal); |
206 | setOperationAction(Op: ISD::SPLAT_VECTOR, VT: T, Action: Legal); |
207 | if (T != ByteV) { |
208 | setOperationAction(Op: ISD::SIGN_EXTEND_VECTOR_INREG, VT: T, Action: Legal); |
209 | setOperationAction(Op: ISD::ZERO_EXTEND_VECTOR_INREG, VT: T, Action: Legal); |
210 | setOperationAction(Op: ISD::BSWAP, VT: T, Action: Legal); |
211 | } |
212 | |
213 | setOperationAction(Op: ISD::SMIN, VT: T, Action: Legal); |
214 | setOperationAction(Op: ISD::SMAX, VT: T, Action: Legal); |
215 | if (T.getScalarType() != MVT::i32) { |
216 | setOperationAction(Op: ISD::UMIN, VT: T, Action: Legal); |
217 | setOperationAction(Op: ISD::UMAX, VT: T, Action: Legal); |
218 | } |
219 | |
220 | setOperationAction(Op: ISD::CTTZ, VT: T, Action: Custom); |
221 | setOperationAction(Op: ISD::LOAD, VT: T, Action: Custom); |
222 | setOperationAction(Op: ISD::MLOAD, VT: T, Action: Custom); |
223 | setOperationAction(Op: ISD::MSTORE, VT: T, Action: Custom); |
224 | if (T.getScalarType() != MVT::i32) { |
225 | setOperationAction(Op: ISD::MULHS, VT: T, Action: Legal); |
226 | setOperationAction(Op: ISD::MULHU, VT: T, Action: Legal); |
227 | } |
228 | |
229 | setOperationAction(Op: ISD::BUILD_VECTOR, VT: T, Action: Custom); |
230 | // Make concat-vectors custom to handle concats of more than 2 vectors. |
231 | setOperationAction(Op: ISD::CONCAT_VECTORS, VT: T, Action: Custom); |
232 | setOperationAction(Op: ISD::INSERT_SUBVECTOR, VT: T, Action: Custom); |
233 | setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: T, Action: Custom); |
234 | setOperationAction(Op: ISD::EXTRACT_SUBVECTOR, VT: T, Action: Custom); |
235 | setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: T, Action: Custom); |
236 | setOperationAction(Op: ISD::ANY_EXTEND, VT: T, Action: Custom); |
237 | setOperationAction(Op: ISD::SIGN_EXTEND, VT: T, Action: Custom); |
238 | setOperationAction(Op: ISD::ZERO_EXTEND, VT: T, Action: Custom); |
239 | setOperationAction(Op: ISD::FSHL, VT: T, Action: Custom); |
240 | setOperationAction(Op: ISD::FSHR, VT: T, Action: Custom); |
241 | if (T != ByteV) { |
242 | setOperationAction(Op: ISD::ANY_EXTEND_VECTOR_INREG, VT: T, Action: Custom); |
243 | // HVX only has shifts of words and halfwords. |
244 | setOperationAction(Op: ISD::SRA, VT: T, Action: Custom); |
245 | setOperationAction(Op: ISD::SHL, VT: T, Action: Custom); |
246 | setOperationAction(Op: ISD::SRL, VT: T, Action: Custom); |
247 | |
248 | // Promote all shuffles to operate on vectors of bytes. |
249 | setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV); |
250 | } |
251 | |
252 | if (Subtarget.useHVXFloatingPoint()) { |
253 | // Same action for both QFloat and IEEE. |
254 | setOperationAction(Op: ISD::SINT_TO_FP, VT: T, Action: Custom); |
255 | setOperationAction(Op: ISD::UINT_TO_FP, VT: T, Action: Custom); |
256 | setOperationAction(Op: ISD::FP_TO_SINT, VT: T, Action: Custom); |
257 | setOperationAction(Op: ISD::FP_TO_UINT, VT: T, Action: Custom); |
258 | } |
259 | |
260 | setCondCodeAction(CCs: ISD::SETNE, VT: T, Action: Expand); |
261 | setCondCodeAction(CCs: ISD::SETLE, VT: T, Action: Expand); |
262 | setCondCodeAction(CCs: ISD::SETGE, VT: T, Action: Expand); |
263 | setCondCodeAction(CCs: ISD::SETLT, VT: T, Action: Expand); |
264 | setCondCodeAction(CCs: ISD::SETULE, VT: T, Action: Expand); |
265 | setCondCodeAction(CCs: ISD::SETUGE, VT: T, Action: Expand); |
266 | setCondCodeAction(CCs: ISD::SETULT, VT: T, Action: Expand); |
267 | } |
268 | |
269 | for (MVT T : LegalW) { |
270 | // Custom-lower BUILD_VECTOR for vector pairs. The standard (target- |
271 | // independent) handling of it would convert it to a load, which is |
272 | // not always the optimal choice. |
273 | setOperationAction(Op: ISD::BUILD_VECTOR, VT: T, Action: Custom); |
274 | // Make concat-vectors custom to handle concats of more than 2 vectors. |
275 | setOperationAction(Op: ISD::CONCAT_VECTORS, VT: T, Action: Custom); |
276 | |
277 | // Custom-lower these operations for pairs. Expand them into a concat |
278 | // of the corresponding operations on individual vectors. |
279 | setOperationAction(Op: ISD::ANY_EXTEND, VT: T, Action: Custom); |
280 | setOperationAction(Op: ISD::SIGN_EXTEND, VT: T, Action: Custom); |
281 | setOperationAction(Op: ISD::ZERO_EXTEND, VT: T, Action: Custom); |
282 | setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: T, Action: Custom); |
283 | setOperationAction(Op: ISD::ANY_EXTEND_VECTOR_INREG, VT: T, Action: Custom); |
284 | setOperationAction(Op: ISD::SIGN_EXTEND_VECTOR_INREG, VT: T, Action: Legal); |
285 | setOperationAction(Op: ISD::ZERO_EXTEND_VECTOR_INREG, VT: T, Action: Legal); |
286 | setOperationAction(Op: ISD::SPLAT_VECTOR, VT: T, Action: Custom); |
287 | |
288 | setOperationAction(Op: ISD::LOAD, VT: T, Action: Custom); |
289 | setOperationAction(Op: ISD::STORE, VT: T, Action: Custom); |
290 | setOperationAction(Op: ISD::MLOAD, VT: T, Action: Custom); |
291 | setOperationAction(Op: ISD::MSTORE, VT: T, Action: Custom); |
292 | setOperationAction(Op: ISD::ABS, VT: T, Action: Custom); |
293 | setOperationAction(Op: ISD::CTLZ, VT: T, Action: Custom); |
294 | setOperationAction(Op: ISD::CTTZ, VT: T, Action: Custom); |
295 | setOperationAction(Op: ISD::CTPOP, VT: T, Action: Custom); |
296 | |
297 | setOperationAction(Op: ISD::ADD, VT: T, Action: Legal); |
298 | setOperationAction(Op: ISD::SUB, VT: T, Action: Legal); |
299 | setOperationAction(Op: ISD::MUL, VT: T, Action: Custom); |
300 | setOperationAction(Op: ISD::MULHS, VT: T, Action: Custom); |
301 | setOperationAction(Op: ISD::MULHU, VT: T, Action: Custom); |
302 | setOperationAction(Op: ISD::AND, VT: T, Action: Custom); |
303 | setOperationAction(Op: ISD::OR, VT: T, Action: Custom); |
304 | setOperationAction(Op: ISD::XOR, VT: T, Action: Custom); |
305 | setOperationAction(Op: ISD::SETCC, VT: T, Action: Custom); |
306 | setOperationAction(Op: ISD::VSELECT, VT: T, Action: Custom); |
307 | if (T != ByteW) { |
308 | setOperationAction(Op: ISD::SRA, VT: T, Action: Custom); |
309 | setOperationAction(Op: ISD::SHL, VT: T, Action: Custom); |
310 | setOperationAction(Op: ISD::SRL, VT: T, Action: Custom); |
311 | |
312 | // Promote all shuffles to operate on vectors of bytes. |
313 | setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW); |
314 | } |
315 | setOperationAction(Op: ISD::FSHL, VT: T, Action: Custom); |
316 | setOperationAction(Op: ISD::FSHR, VT: T, Action: Custom); |
317 | |
318 | setOperationAction(Op: ISD::SMIN, VT: T, Action: Custom); |
319 | setOperationAction(Op: ISD::SMAX, VT: T, Action: Custom); |
320 | if (T.getScalarType() != MVT::i32) { |
321 | setOperationAction(Op: ISD::UMIN, VT: T, Action: Custom); |
322 | setOperationAction(Op: ISD::UMAX, VT: T, Action: Custom); |
323 | } |
324 | |
325 | if (Subtarget.useHVXFloatingPoint()) { |
326 | // Same action for both QFloat and IEEE. |
327 | setOperationAction(Op: ISD::SINT_TO_FP, VT: T, Action: Custom); |
328 | setOperationAction(Op: ISD::UINT_TO_FP, VT: T, Action: Custom); |
329 | setOperationAction(Op: ISD::FP_TO_SINT, VT: T, Action: Custom); |
330 | setOperationAction(Op: ISD::FP_TO_UINT, VT: T, Action: Custom); |
331 | } |
332 | } |
333 | |
334 | // Legalize all of these to HexagonISD::[SU]MUL_LOHI. |
335 | setOperationAction(Op: ISD::MULHS, VT: WordV, Action: Custom); // -> _LOHI |
336 | setOperationAction(Op: ISD::MULHU, VT: WordV, Action: Custom); // -> _LOHI |
337 | setOperationAction(Op: ISD::SMUL_LOHI, VT: WordV, Action: Custom); |
338 | setOperationAction(Op: ISD::UMUL_LOHI, VT: WordV, Action: Custom); |
339 | |
340 | setCondCodeAction(CCs: ISD::SETNE, VT: MVT::v64f16, Action: Expand); |
341 | setCondCodeAction(CCs: ISD::SETLE, VT: MVT::v64f16, Action: Expand); |
342 | setCondCodeAction(CCs: ISD::SETGE, VT: MVT::v64f16, Action: Expand); |
343 | setCondCodeAction(CCs: ISD::SETLT, VT: MVT::v64f16, Action: Expand); |
344 | setCondCodeAction(CCs: ISD::SETONE, VT: MVT::v64f16, Action: Expand); |
345 | setCondCodeAction(CCs: ISD::SETOLE, VT: MVT::v64f16, Action: Expand); |
346 | setCondCodeAction(CCs: ISD::SETOGE, VT: MVT::v64f16, Action: Expand); |
347 | setCondCodeAction(CCs: ISD::SETOLT, VT: MVT::v64f16, Action: Expand); |
348 | setCondCodeAction(CCs: ISD::SETUNE, VT: MVT::v64f16, Action: Expand); |
349 | setCondCodeAction(CCs: ISD::SETULE, VT: MVT::v64f16, Action: Expand); |
350 | setCondCodeAction(CCs: ISD::SETUGE, VT: MVT::v64f16, Action: Expand); |
351 | setCondCodeAction(CCs: ISD::SETULT, VT: MVT::v64f16, Action: Expand); |
352 | |
353 | setCondCodeAction(CCs: ISD::SETNE, VT: MVT::v32f32, Action: Expand); |
354 | setCondCodeAction(CCs: ISD::SETLE, VT: MVT::v32f32, Action: Expand); |
355 | setCondCodeAction(CCs: ISD::SETGE, VT: MVT::v32f32, Action: Expand); |
356 | setCondCodeAction(CCs: ISD::SETLT, VT: MVT::v32f32, Action: Expand); |
357 | setCondCodeAction(CCs: ISD::SETONE, VT: MVT::v32f32, Action: Expand); |
358 | setCondCodeAction(CCs: ISD::SETOLE, VT: MVT::v32f32, Action: Expand); |
359 | setCondCodeAction(CCs: ISD::SETOGE, VT: MVT::v32f32, Action: Expand); |
360 | setCondCodeAction(CCs: ISD::SETOLT, VT: MVT::v32f32, Action: Expand); |
361 | setCondCodeAction(CCs: ISD::SETUNE, VT: MVT::v32f32, Action: Expand); |
362 | setCondCodeAction(CCs: ISD::SETULE, VT: MVT::v32f32, Action: Expand); |
363 | setCondCodeAction(CCs: ISD::SETUGE, VT: MVT::v32f32, Action: Expand); |
364 | setCondCodeAction(CCs: ISD::SETULT, VT: MVT::v32f32, Action: Expand); |
365 | |
366 | // Boolean vectors. |
367 | |
368 | for (MVT T : LegalW) { |
369 | // Boolean types for vector pairs will overlap with the boolean |
370 | // types for single vectors, e.g. |
371 | // v64i8 -> v64i1 (single) |
372 | // v64i16 -> v64i1 (pair) |
373 | // Set these actions first, and allow the single actions to overwrite |
374 | // any duplicates. |
375 | MVT BoolW = MVT::getVectorVT(VT: MVT::i1, NumElements: T.getVectorNumElements()); |
376 | setOperationAction(Op: ISD::SETCC, VT: BoolW, Action: Custom); |
377 | setOperationAction(Op: ISD::AND, VT: BoolW, Action: Custom); |
378 | setOperationAction(Op: ISD::OR, VT: BoolW, Action: Custom); |
379 | setOperationAction(Op: ISD::XOR, VT: BoolW, Action: Custom); |
380 | // Masked load/store takes a mask that may need splitting. |
381 | setOperationAction(Op: ISD::MLOAD, VT: BoolW, Action: Custom); |
382 | setOperationAction(Op: ISD::MSTORE, VT: BoolW, Action: Custom); |
383 | } |
384 | |
385 | for (MVT T : LegalV) { |
386 | MVT BoolV = MVT::getVectorVT(VT: MVT::i1, NumElements: T.getVectorNumElements()); |
387 | setOperationAction(Op: ISD::BUILD_VECTOR, VT: BoolV, Action: Custom); |
388 | setOperationAction(Op: ISD::CONCAT_VECTORS, VT: BoolV, Action: Custom); |
389 | setOperationAction(Op: ISD::INSERT_SUBVECTOR, VT: BoolV, Action: Custom); |
390 | setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: BoolV, Action: Custom); |
391 | setOperationAction(Op: ISD::EXTRACT_SUBVECTOR, VT: BoolV, Action: Custom); |
392 | setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: BoolV, Action: Custom); |
393 | setOperationAction(Op: ISD::SELECT, VT: BoolV, Action: Custom); |
394 | setOperationAction(Op: ISD::AND, VT: BoolV, Action: Legal); |
395 | setOperationAction(Op: ISD::OR, VT: BoolV, Action: Legal); |
396 | setOperationAction(Op: ISD::XOR, VT: BoolV, Action: Legal); |
397 | } |
398 | |
399 | if (Use64b) { |
400 | for (MVT T: {MVT::v32i8, MVT::v32i16, MVT::v16i8, MVT::v16i16, MVT::v16i32}) |
401 | setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: T, Action: Legal); |
402 | } else { |
403 | for (MVT T: {MVT::v64i8, MVT::v64i16, MVT::v32i8, MVT::v32i16, MVT::v32i32}) |
404 | setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: T, Action: Legal); |
405 | } |
406 | |
407 | // Handle store widening for short vectors. |
408 | unsigned HwLen = Subtarget.getVectorLength(); |
409 | for (MVT ElemTy : Subtarget.getHVXElementTypes()) { |
410 | if (ElemTy == MVT::i1) |
411 | continue; |
412 | int ElemWidth = ElemTy.getFixedSizeInBits(); |
413 | int MaxElems = (8*HwLen) / ElemWidth; |
414 | for (int N = 2; N < MaxElems; N *= 2) { |
415 | MVT VecTy = MVT::getVectorVT(VT: ElemTy, NumElements: N); |
416 | auto Action = getPreferredVectorAction(VT: VecTy); |
417 | if (Action == TargetLoweringBase::TypeWidenVector) { |
418 | setOperationAction(Op: ISD::LOAD, VT: VecTy, Action: Custom); |
419 | setOperationAction(Op: ISD::STORE, VT: VecTy, Action: Custom); |
420 | setOperationAction(Op: ISD::SETCC, VT: VecTy, Action: Custom); |
421 | setOperationAction(Op: ISD::TRUNCATE, VT: VecTy, Action: Custom); |
422 | setOperationAction(Op: ISD::ANY_EXTEND, VT: VecTy, Action: Custom); |
423 | setOperationAction(Op: ISD::SIGN_EXTEND, VT: VecTy, Action: Custom); |
424 | setOperationAction(Op: ISD::ZERO_EXTEND, VT: VecTy, Action: Custom); |
425 | if (Subtarget.useHVXFloatingPoint()) { |
426 | setOperationAction(Op: ISD::FP_TO_SINT, VT: VecTy, Action: Custom); |
427 | setOperationAction(Op: ISD::FP_TO_UINT, VT: VecTy, Action: Custom); |
428 | setOperationAction(Op: ISD::SINT_TO_FP, VT: VecTy, Action: Custom); |
429 | setOperationAction(Op: ISD::UINT_TO_FP, VT: VecTy, Action: Custom); |
430 | } |
431 | |
432 | MVT BoolTy = MVT::getVectorVT(VT: MVT::i1, NumElements: N); |
433 | if (!isTypeLegal(VT: BoolTy)) |
434 | setOperationAction(Op: ISD::SETCC, VT: BoolTy, Action: Custom); |
435 | } |
436 | } |
437 | } |
438 | |
439 | setTargetDAGCombine({ISD::CONCAT_VECTORS, ISD::TRUNCATE, ISD::VSELECT}); |
440 | } |
441 | |
442 | unsigned |
443 | HexagonTargetLowering::getPreferredHvxVectorAction(MVT VecTy) const { |
444 | MVT ElemTy = VecTy.getVectorElementType(); |
445 | unsigned VecLen = VecTy.getVectorNumElements(); |
446 | unsigned HwLen = Subtarget.getVectorLength(); |
447 | |
448 | // Split vectors of i1 that exceed byte vector length. |
449 | if (ElemTy == MVT::i1 && VecLen > HwLen) |
450 | return TargetLoweringBase::TypeSplitVector; |
451 | |
452 | ArrayRef<MVT> Tys = Subtarget.getHVXElementTypes(); |
453 | // For shorter vectors of i1, widen them if any of the corresponding |
454 | // vectors of integers needs to be widened. |
455 | if (ElemTy == MVT::i1) { |
456 | for (MVT T : Tys) { |
457 | assert(T != MVT::i1); |
458 | auto A = getPreferredHvxVectorAction(VecTy: MVT::getVectorVT(VT: T, NumElements: VecLen)); |
459 | if (A != ~0u) |
460 | return A; |
461 | } |
462 | return ~0u; |
463 | } |
464 | |
465 | // If the size of VecTy is at least half of the vector length, |
466 | // widen the vector. Note: the threshold was not selected in |
467 | // any scientific way. |
468 | if (llvm::is_contained(Range&: Tys, Element: ElemTy)) { |
469 | unsigned VecWidth = VecTy.getSizeInBits(); |
470 | unsigned HwWidth = 8*HwLen; |
471 | if (VecWidth > 2*HwWidth) |
472 | return TargetLoweringBase::TypeSplitVector; |
473 | |
474 | bool HaveThreshold = HvxWidenThreshold.getNumOccurrences() > 0; |
475 | if (HaveThreshold && 8*HvxWidenThreshold <= VecWidth) |
476 | return TargetLoweringBase::TypeWidenVector; |
477 | if (VecWidth >= HwWidth/2 && VecWidth < HwWidth) |
478 | return TargetLoweringBase::TypeWidenVector; |
479 | } |
480 | |
481 | // Defer to default. |
482 | return ~0u; |
483 | } |
484 | |
485 | unsigned |
486 | HexagonTargetLowering::getCustomHvxOperationAction(SDNode &Op) const { |
487 | unsigned Opc = Op.getOpcode(); |
488 | switch (Opc) { |
489 | case HexagonISD::SMUL_LOHI: |
490 | case HexagonISD::UMUL_LOHI: |
491 | case HexagonISD::USMUL_LOHI: |
492 | return TargetLoweringBase::Custom; |
493 | } |
494 | return TargetLoweringBase::Legal; |
495 | } |
496 | |
497 | SDValue |
498 | HexagonTargetLowering::getInt(unsigned IntId, MVT ResTy, ArrayRef<SDValue> Ops, |
499 | const SDLoc &dl, SelectionDAG &DAG) const { |
500 | SmallVector<SDValue,4> IntOps; |
501 | IntOps.push_back(Elt: DAG.getConstant(Val: IntId, DL: dl, VT: MVT::i32)); |
502 | append_range(C&: IntOps, R&: Ops); |
503 | return DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL: dl, VT: ResTy, Ops: IntOps); |
504 | } |
505 | |
506 | MVT |
507 | HexagonTargetLowering::typeJoin(const TypePair &Tys) const { |
508 | assert(Tys.first.getVectorElementType() == Tys.second.getVectorElementType()); |
509 | |
510 | MVT ElemTy = Tys.first.getVectorElementType(); |
511 | return MVT::getVectorVT(VT: ElemTy, NumElements: Tys.first.getVectorNumElements() + |
512 | Tys.second.getVectorNumElements()); |
513 | } |
514 | |
515 | HexagonTargetLowering::TypePair |
516 | HexagonTargetLowering::typeSplit(MVT VecTy) const { |
517 | assert(VecTy.isVector()); |
518 | unsigned NumElem = VecTy.getVectorNumElements(); |
519 | assert((NumElem % 2) == 0 && "Expecting even-sized vector type" ); |
520 | MVT HalfTy = MVT::getVectorVT(VT: VecTy.getVectorElementType(), NumElements: NumElem/2); |
521 | return { HalfTy, HalfTy }; |
522 | } |
523 | |
524 | MVT |
525 | HexagonTargetLowering::typeExtElem(MVT VecTy, unsigned Factor) const { |
526 | MVT ElemTy = VecTy.getVectorElementType(); |
527 | MVT NewElemTy = MVT::getIntegerVT(BitWidth: ElemTy.getSizeInBits() * Factor); |
528 | return MVT::getVectorVT(VT: NewElemTy, NumElements: VecTy.getVectorNumElements()); |
529 | } |
530 | |
531 | MVT |
532 | HexagonTargetLowering::typeTruncElem(MVT VecTy, unsigned Factor) const { |
533 | MVT ElemTy = VecTy.getVectorElementType(); |
534 | MVT NewElemTy = MVT::getIntegerVT(BitWidth: ElemTy.getSizeInBits() / Factor); |
535 | return MVT::getVectorVT(VT: NewElemTy, NumElements: VecTy.getVectorNumElements()); |
536 | } |
537 | |
538 | SDValue |
539 | HexagonTargetLowering::opCastElem(SDValue Vec, MVT ElemTy, |
540 | SelectionDAG &DAG) const { |
541 | if (ty(Op: Vec).getVectorElementType() == ElemTy) |
542 | return Vec; |
543 | MVT CastTy = tyVector(Ty: Vec.getValueType().getSimpleVT(), ElemTy); |
544 | return DAG.getBitcast(VT: CastTy, V: Vec); |
545 | } |
546 | |
547 | SDValue |
548 | HexagonTargetLowering::opJoin(const VectorPair &Ops, const SDLoc &dl, |
549 | SelectionDAG &DAG) const { |
550 | return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: typeJoin(Tys: ty(Ops)), |
551 | N1: Ops.first, N2: Ops.second); |
552 | } |
553 | |
554 | HexagonTargetLowering::VectorPair |
555 | HexagonTargetLowering::opSplit(SDValue Vec, const SDLoc &dl, |
556 | SelectionDAG &DAG) const { |
557 | TypePair Tys = typeSplit(VecTy: ty(Op: Vec)); |
558 | if (Vec.getOpcode() == HexagonISD::QCAT) |
559 | return VectorPair(Vec.getOperand(i: 0), Vec.getOperand(i: 1)); |
560 | return DAG.SplitVector(N: Vec, DL: dl, LoVT: Tys.first, HiVT: Tys.second); |
561 | } |
562 | |
563 | bool |
564 | HexagonTargetLowering::isHvxSingleTy(MVT Ty) const { |
565 | return Subtarget.isHVXVectorType(VecTy: Ty) && |
566 | Ty.getSizeInBits() == 8 * Subtarget.getVectorLength(); |
567 | } |
568 | |
569 | bool |
570 | HexagonTargetLowering::isHvxPairTy(MVT Ty) const { |
571 | return Subtarget.isHVXVectorType(VecTy: Ty) && |
572 | Ty.getSizeInBits() == 16 * Subtarget.getVectorLength(); |
573 | } |
574 | |
575 | bool |
576 | HexagonTargetLowering::isHvxBoolTy(MVT Ty) const { |
577 | return Subtarget.isHVXVectorType(VecTy: Ty, IncludeBool: true) && |
578 | Ty.getVectorElementType() == MVT::i1; |
579 | } |
580 | |
581 | bool HexagonTargetLowering::allowsHvxMemoryAccess( |
582 | MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const { |
583 | // Bool vectors are excluded by default, but make it explicit to |
584 | // emphasize that bool vectors cannot be loaded or stored. |
585 | // Also, disallow double vector stores (to prevent unnecessary |
586 | // store widening in DAG combiner). |
587 | if (VecTy.getSizeInBits() > 8*Subtarget.getVectorLength()) |
588 | return false; |
589 | if (!Subtarget.isHVXVectorType(VecTy, /*IncludeBool=*/false)) |
590 | return false; |
591 | if (Fast) |
592 | *Fast = 1; |
593 | return true; |
594 | } |
595 | |
596 | bool HexagonTargetLowering::allowsHvxMisalignedMemoryAccesses( |
597 | MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const { |
598 | if (!Subtarget.isHVXVectorType(VecTy)) |
599 | return false; |
600 | // XXX Should this be false? vmemu are a bit slower than vmem. |
601 | if (Fast) |
602 | *Fast = 1; |
603 | return true; |
604 | } |
605 | |
606 | void HexagonTargetLowering::AdjustHvxInstrPostInstrSelection( |
607 | MachineInstr &MI, SDNode *Node) const { |
608 | unsigned Opc = MI.getOpcode(); |
609 | const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); |
610 | MachineBasicBlock &MB = *MI.getParent(); |
611 | MachineFunction &MF = *MB.getParent(); |
612 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
613 | DebugLoc DL = MI.getDebugLoc(); |
614 | auto At = MI.getIterator(); |
615 | |
616 | switch (Opc) { |
617 | case Hexagon::PS_vsplatib: |
618 | if (Subtarget.useHVXV62Ops()) { |
619 | // SplatV = A2_tfrsi #imm |
620 | // OutV = V6_lvsplatb SplatV |
621 | Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass); |
622 | BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::A2_tfrsi), DestReg: SplatV) |
623 | .add(MO: MI.getOperand(i: 1)); |
624 | Register OutV = MI.getOperand(i: 0).getReg(); |
625 | BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplatb), DestReg: OutV) |
626 | .addReg(RegNo: SplatV); |
627 | } else { |
628 | // SplatV = A2_tfrsi #imm:#imm:#imm:#imm |
629 | // OutV = V6_lvsplatw SplatV |
630 | Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass); |
631 | const MachineOperand &InpOp = MI.getOperand(i: 1); |
632 | assert(InpOp.isImm()); |
633 | uint32_t V = InpOp.getImm() & 0xFF; |
634 | BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::A2_tfrsi), DestReg: SplatV) |
635 | .addImm(Val: V << 24 | V << 16 | V << 8 | V); |
636 | Register OutV = MI.getOperand(i: 0).getReg(); |
637 | BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplatw), DestReg: OutV).addReg(RegNo: SplatV); |
638 | } |
639 | MB.erase(I: At); |
640 | break; |
641 | case Hexagon::PS_vsplatrb: |
642 | if (Subtarget.useHVXV62Ops()) { |
643 | // OutV = V6_lvsplatb Inp |
644 | Register OutV = MI.getOperand(i: 0).getReg(); |
645 | BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplatb), DestReg: OutV) |
646 | .add(MO: MI.getOperand(i: 1)); |
647 | } else { |
648 | Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass); |
649 | const MachineOperand &InpOp = MI.getOperand(i: 1); |
650 | BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::S2_vsplatrb), DestReg: SplatV) |
651 | .addReg(RegNo: InpOp.getReg(), flags: 0, SubReg: InpOp.getSubReg()); |
652 | Register OutV = MI.getOperand(i: 0).getReg(); |
653 | BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplatw), DestReg: OutV) |
654 | .addReg(RegNo: SplatV); |
655 | } |
656 | MB.erase(I: At); |
657 | break; |
658 | case Hexagon::PS_vsplatih: |
659 | if (Subtarget.useHVXV62Ops()) { |
660 | // SplatV = A2_tfrsi #imm |
661 | // OutV = V6_lvsplath SplatV |
662 | Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass); |
663 | BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::A2_tfrsi), DestReg: SplatV) |
664 | .add(MO: MI.getOperand(i: 1)); |
665 | Register OutV = MI.getOperand(i: 0).getReg(); |
666 | BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplath), DestReg: OutV) |
667 | .addReg(RegNo: SplatV); |
668 | } else { |
669 | // SplatV = A2_tfrsi #imm:#imm |
670 | // OutV = V6_lvsplatw SplatV |
671 | Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass); |
672 | const MachineOperand &InpOp = MI.getOperand(i: 1); |
673 | assert(InpOp.isImm()); |
674 | uint32_t V = InpOp.getImm() & 0xFFFF; |
675 | BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::A2_tfrsi), DestReg: SplatV) |
676 | .addImm(Val: V << 16 | V); |
677 | Register OutV = MI.getOperand(i: 0).getReg(); |
678 | BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplatw), DestReg: OutV).addReg(RegNo: SplatV); |
679 | } |
680 | MB.erase(I: At); |
681 | break; |
682 | case Hexagon::PS_vsplatrh: |
683 | if (Subtarget.useHVXV62Ops()) { |
684 | // OutV = V6_lvsplath Inp |
685 | Register OutV = MI.getOperand(i: 0).getReg(); |
686 | BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplath), DestReg: OutV) |
687 | .add(MO: MI.getOperand(i: 1)); |
688 | } else { |
689 | // SplatV = A2_combine_ll Inp, Inp |
690 | // OutV = V6_lvsplatw SplatV |
691 | Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass); |
692 | const MachineOperand &InpOp = MI.getOperand(i: 1); |
693 | BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::A2_combine_ll), DestReg: SplatV) |
694 | .addReg(RegNo: InpOp.getReg(), flags: 0, SubReg: InpOp.getSubReg()) |
695 | .addReg(RegNo: InpOp.getReg(), flags: 0, SubReg: InpOp.getSubReg()); |
696 | Register OutV = MI.getOperand(i: 0).getReg(); |
697 | BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplatw), DestReg: OutV).addReg(RegNo: SplatV); |
698 | } |
699 | MB.erase(I: At); |
700 | break; |
701 | case Hexagon::PS_vsplatiw: |
702 | case Hexagon::PS_vsplatrw: |
703 | if (Opc == Hexagon::PS_vsplatiw) { |
704 | // SplatV = A2_tfrsi #imm |
705 | Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass); |
706 | BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::A2_tfrsi), DestReg: SplatV) |
707 | .add(MO: MI.getOperand(i: 1)); |
708 | MI.getOperand(i: 1).ChangeToRegister(Reg: SplatV, isDef: false); |
709 | } |
710 | // OutV = V6_lvsplatw SplatV/Inp |
711 | MI.setDesc(TII.get(Opcode: Hexagon::V6_lvsplatw)); |
712 | break; |
713 | } |
714 | } |
715 | |
716 | SDValue |
717 | HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx, MVT ElemTy, |
718 | SelectionDAG &DAG) const { |
719 | if (ElemIdx.getValueType().getSimpleVT() != MVT::i32) |
720 | ElemIdx = DAG.getBitcast(VT: MVT::i32, V: ElemIdx); |
721 | |
722 | unsigned ElemWidth = ElemTy.getSizeInBits(); |
723 | if (ElemWidth == 8) |
724 | return ElemIdx; |
725 | |
726 | unsigned L = Log2_32(Value: ElemWidth/8); |
727 | const SDLoc &dl(ElemIdx); |
728 | return DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: MVT::i32, |
729 | Ops: {ElemIdx, DAG.getConstant(Val: L, DL: dl, VT: MVT::i32)}); |
730 | } |
731 | |
732 | SDValue |
733 | HexagonTargetLowering::getIndexInWord32(SDValue Idx, MVT ElemTy, |
734 | SelectionDAG &DAG) const { |
735 | unsigned ElemWidth = ElemTy.getSizeInBits(); |
736 | assert(ElemWidth >= 8 && ElemWidth <= 32); |
737 | if (ElemWidth == 32) |
738 | return Idx; |
739 | |
740 | if (ty(Op: Idx) != MVT::i32) |
741 | Idx = DAG.getBitcast(VT: MVT::i32, V: Idx); |
742 | const SDLoc &dl(Idx); |
743 | SDValue Mask = DAG.getConstant(Val: 32/ElemWidth - 1, DL: dl, VT: MVT::i32); |
744 | SDValue SubIdx = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32, Ops: {Idx, Mask}); |
745 | return SubIdx; |
746 | } |
747 | |
748 | SDValue |
749 | HexagonTargetLowering::getByteShuffle(const SDLoc &dl, SDValue Op0, |
750 | SDValue Op1, ArrayRef<int> Mask, |
751 | SelectionDAG &DAG) const { |
752 | MVT OpTy = ty(Op: Op0); |
753 | assert(OpTy == ty(Op1)); |
754 | |
755 | MVT ElemTy = OpTy.getVectorElementType(); |
756 | if (ElemTy == MVT::i8) |
757 | return DAG.getVectorShuffle(VT: OpTy, dl, N1: Op0, N2: Op1, Mask); |
758 | assert(ElemTy.getSizeInBits() >= 8); |
759 | |
760 | MVT ResTy = tyVector(Ty: OpTy, ElemTy: MVT::i8); |
761 | unsigned ElemSize = ElemTy.getSizeInBits() / 8; |
762 | |
763 | SmallVector<int,128> ByteMask; |
764 | for (int M : Mask) { |
765 | if (M < 0) { |
766 | for (unsigned I = 0; I != ElemSize; ++I) |
767 | ByteMask.push_back(Elt: -1); |
768 | } else { |
769 | int NewM = M*ElemSize; |
770 | for (unsigned I = 0; I != ElemSize; ++I) |
771 | ByteMask.push_back(Elt: NewM+I); |
772 | } |
773 | } |
774 | assert(ResTy.getVectorNumElements() == ByteMask.size()); |
775 | return DAG.getVectorShuffle(VT: ResTy, dl, N1: opCastElem(Vec: Op0, ElemTy: MVT::i8, DAG), |
776 | N2: opCastElem(Vec: Op1, ElemTy: MVT::i8, DAG), Mask: ByteMask); |
777 | } |
778 | |
779 | SDValue |
780 | HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values, |
781 | const SDLoc &dl, MVT VecTy, |
782 | SelectionDAG &DAG) const { |
783 | unsigned VecLen = Values.size(); |
784 | MachineFunction &MF = DAG.getMachineFunction(); |
785 | MVT ElemTy = VecTy.getVectorElementType(); |
786 | unsigned ElemWidth = ElemTy.getSizeInBits(); |
787 | unsigned HwLen = Subtarget.getVectorLength(); |
788 | |
789 | unsigned ElemSize = ElemWidth / 8; |
790 | assert(ElemSize*VecLen == HwLen); |
791 | SmallVector<SDValue,32> Words; |
792 | |
793 | if (VecTy.getVectorElementType() != MVT::i32 && |
794 | !(Subtarget.useHVXFloatingPoint() && |
795 | VecTy.getVectorElementType() == MVT::f32)) { |
796 | assert((ElemSize == 1 || ElemSize == 2) && "Invalid element size" ); |
797 | unsigned OpsPerWord = (ElemSize == 1) ? 4 : 2; |
798 | MVT PartVT = MVT::getVectorVT(VT: VecTy.getVectorElementType(), NumElements: OpsPerWord); |
799 | for (unsigned i = 0; i != VecLen; i += OpsPerWord) { |
800 | SDValue W = buildVector32(Elem: Values.slice(N: i, M: OpsPerWord), dl, VecTy: PartVT, DAG); |
801 | Words.push_back(Elt: DAG.getBitcast(VT: MVT::i32, V: W)); |
802 | } |
803 | } else { |
804 | for (SDValue V : Values) |
805 | Words.push_back(Elt: DAG.getBitcast(VT: MVT::i32, V)); |
806 | } |
807 | auto isSplat = [] (ArrayRef<SDValue> Values, SDValue &SplatV) { |
808 | unsigned NumValues = Values.size(); |
809 | assert(NumValues > 0); |
810 | bool IsUndef = true; |
811 | for (unsigned i = 0; i != NumValues; ++i) { |
812 | if (Values[i].isUndef()) |
813 | continue; |
814 | IsUndef = false; |
815 | if (!SplatV.getNode()) |
816 | SplatV = Values[i]; |
817 | else if (SplatV != Values[i]) |
818 | return false; |
819 | } |
820 | if (IsUndef) |
821 | SplatV = Values[0]; |
822 | return true; |
823 | }; |
824 | |
825 | unsigned NumWords = Words.size(); |
826 | SDValue SplatV; |
827 | bool IsSplat = isSplat(Words, SplatV); |
828 | if (IsSplat && isUndef(Op: SplatV)) |
829 | return DAG.getUNDEF(VT: VecTy); |
830 | if (IsSplat) { |
831 | assert(SplatV.getNode()); |
832 | if (isNullConstant(V: SplatV)) |
833 | return getZero(dl, Ty: VecTy, DAG); |
834 | MVT WordTy = MVT::getVectorVT(VT: MVT::i32, NumElements: HwLen/4); |
835 | SDValue S = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: WordTy, Operand: SplatV); |
836 | return DAG.getBitcast(VT: VecTy, V: S); |
837 | } |
838 | |
839 | // Delay recognizing constant vectors until here, so that we can generate |
840 | // a vsplat. |
841 | SmallVector<ConstantInt*, 128> Consts(VecLen); |
842 | bool AllConst = getBuildVectorConstInts(Values, VecTy, DAG, Consts); |
843 | if (AllConst) { |
844 | ArrayRef<Constant*> Tmp((Constant**)Consts.begin(), |
845 | (Constant**)Consts.end()); |
846 | Constant *CV = ConstantVector::get(V: Tmp); |
847 | Align Alignment(HwLen); |
848 | SDValue CP = |
849 | LowerConstantPool(Op: DAG.getConstantPool(C: CV, VT: VecTy, Align: Alignment), DAG); |
850 | return DAG.getLoad(VT: VecTy, dl, Chain: DAG.getEntryNode(), Ptr: CP, |
851 | PtrInfo: MachinePointerInfo::getConstantPool(MF), Alignment); |
852 | } |
853 | |
854 | // A special case is a situation where the vector is built entirely from |
855 | // elements extracted from another vector. This could be done via a shuffle |
856 | // more efficiently, but typically, the size of the source vector will not |
857 | // match the size of the vector being built (which precludes the use of a |
858 | // shuffle directly). |
859 | // This only handles a single source vector, and the vector being built |
860 | // should be of a sub-vector type of the source vector type. |
861 | auto = [this,&Values] (SDValue &SrcVec, |
862 | SmallVectorImpl<int> &SrcIdx) { |
863 | SDValue Vec; |
864 | for (SDValue V : Values) { |
865 | if (isUndef(Op: V)) { |
866 | SrcIdx.push_back(Elt: -1); |
867 | continue; |
868 | } |
869 | if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) |
870 | return false; |
871 | // All extracts should come from the same vector. |
872 | SDValue T = V.getOperand(i: 0); |
873 | if (Vec.getNode() != nullptr && T.getNode() != Vec.getNode()) |
874 | return false; |
875 | Vec = T; |
876 | ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: V.getOperand(i: 1)); |
877 | if (C == nullptr) |
878 | return false; |
879 | int I = C->getSExtValue(); |
880 | assert(I >= 0 && "Negative element index" ); |
881 | SrcIdx.push_back(Elt: I); |
882 | } |
883 | SrcVec = Vec; |
884 | return true; |
885 | }; |
886 | |
887 | SmallVector<int,128> ExtIdx; |
888 | SDValue ExtVec; |
889 | if (IsBuildFromExtracts(ExtVec, ExtIdx)) { |
890 | MVT ExtTy = ty(Op: ExtVec); |
891 | unsigned ExtLen = ExtTy.getVectorNumElements(); |
892 | if (ExtLen == VecLen || ExtLen == 2*VecLen) { |
893 | // Construct a new shuffle mask that will produce a vector with the same |
894 | // number of elements as the input vector, and such that the vector we |
895 | // want will be the initial subvector of it. |
896 | SmallVector<int,128> Mask; |
897 | BitVector Used(ExtLen); |
898 | |
899 | for (int M : ExtIdx) { |
900 | Mask.push_back(Elt: M); |
901 | if (M >= 0) |
902 | Used.set(M); |
903 | } |
904 | // Fill the rest of the mask with the unused elements of ExtVec in hopes |
905 | // that it will result in a permutation of ExtVec's elements. It's still |
906 | // fine if it doesn't (e.g. if undefs are present, or elements are |
907 | // repeated), but permutations can always be done efficiently via vdelta |
908 | // and vrdelta. |
909 | for (unsigned I = 0; I != ExtLen; ++I) { |
910 | if (Mask.size() == ExtLen) |
911 | break; |
912 | if (!Used.test(Idx: I)) |
913 | Mask.push_back(Elt: I); |
914 | } |
915 | |
916 | SDValue S = DAG.getVectorShuffle(VT: ExtTy, dl, N1: ExtVec, |
917 | N2: DAG.getUNDEF(VT: ExtTy), Mask); |
918 | return ExtLen == VecLen ? S : LoHalf(V: S, DAG); |
919 | } |
920 | } |
921 | |
922 | // Find most common element to initialize vector with. This is to avoid |
923 | // unnecessary vinsert/valign for cases where the same value is present |
924 | // many times. Creates a histogram of the vector's elements to find the |
925 | // most common element n. |
926 | assert(4*Words.size() == Subtarget.getVectorLength()); |
927 | int VecHist[32]; |
928 | int n = 0; |
929 | for (unsigned i = 0; i != NumWords; ++i) { |
930 | VecHist[i] = 0; |
931 | if (Words[i].isUndef()) |
932 | continue; |
933 | for (unsigned j = i; j != NumWords; ++j) |
934 | if (Words[i] == Words[j]) |
935 | VecHist[i]++; |
936 | |
937 | if (VecHist[i] > VecHist[n]) |
938 | n = i; |
939 | } |
940 | |
941 | SDValue HalfV = getZero(dl, Ty: VecTy, DAG); |
942 | if (VecHist[n] > 1) { |
943 | SDValue SplatV = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: VecTy, Operand: Words[n]); |
944 | HalfV = DAG.getNode(Opcode: HexagonISD::VALIGN, DL: dl, VT: VecTy, |
945 | Ops: {HalfV, SplatV, DAG.getConstant(Val: HwLen/2, DL: dl, VT: MVT::i32)}); |
946 | } |
947 | SDValue HalfV0 = HalfV; |
948 | SDValue HalfV1 = HalfV; |
949 | |
950 | // Construct two halves in parallel, then or them together. Rn and Rm count |
951 | // number of rotations needed before the next element. One last rotation is |
952 | // performed post-loop to position the last element. |
953 | int Rn = 0, Rm = 0; |
954 | SDValue Sn, Sm; |
955 | SDValue N = HalfV0; |
956 | SDValue M = HalfV1; |
957 | for (unsigned i = 0; i != NumWords/2; ++i) { |
958 | // Rotate by element count since last insertion. |
959 | if (Words[i] != Words[n] || VecHist[n] <= 1) { |
960 | Sn = DAG.getConstant(Val: Rn, DL: dl, VT: MVT::i32); |
961 | HalfV0 = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {N, Sn}); |
962 | N = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: VecTy, |
963 | Ops: {HalfV0, Words[i]}); |
964 | Rn = 0; |
965 | } |
966 | if (Words[i+NumWords/2] != Words[n] || VecHist[n] <= 1) { |
967 | Sm = DAG.getConstant(Val: Rm, DL: dl, VT: MVT::i32); |
968 | HalfV1 = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {M, Sm}); |
969 | M = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: VecTy, |
970 | Ops: {HalfV1, Words[i+NumWords/2]}); |
971 | Rm = 0; |
972 | } |
973 | Rn += 4; |
974 | Rm += 4; |
975 | } |
976 | // Perform last rotation. |
977 | Sn = DAG.getConstant(Val: Rn+HwLen/2, DL: dl, VT: MVT::i32); |
978 | Sm = DAG.getConstant(Val: Rm, DL: dl, VT: MVT::i32); |
979 | HalfV0 = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {N, Sn}); |
980 | HalfV1 = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {M, Sm}); |
981 | |
982 | SDValue T0 = DAG.getBitcast(VT: tyVector(Ty: VecTy, ElemTy: MVT::i32), V: HalfV0); |
983 | SDValue T1 = DAG.getBitcast(VT: tyVector(Ty: VecTy, ElemTy: MVT::i32), V: HalfV1); |
984 | |
985 | SDValue DstV = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: ty(Op: T0), Ops: {T0, T1}); |
986 | |
987 | SDValue OutV = |
988 | DAG.getBitcast(VT: tyVector(Ty: ty(Op: DstV), ElemTy: VecTy.getVectorElementType()), V: DstV); |
989 | return OutV; |
990 | } |
991 | |
992 | SDValue |
993 | HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl, |
994 | unsigned BitBytes, bool ZeroFill, SelectionDAG &DAG) const { |
995 | MVT PredTy = ty(Op: PredV); |
996 | unsigned HwLen = Subtarget.getVectorLength(); |
997 | MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen); |
998 | |
999 | if (Subtarget.isHVXVectorType(VecTy: PredTy, IncludeBool: true)) { |
1000 | // Move the vector predicate SubV to a vector register, and scale it |
1001 | // down to match the representation (bytes per type element) that VecV |
1002 | // uses. The scaling down will pick every 2nd or 4th (every Scale-th |
1003 | // in general) element and put them at the front of the resulting |
1004 | // vector. This subvector will then be inserted into the Q2V of VecV. |
1005 | // To avoid having an operation that generates an illegal type (short |
1006 | // vector), generate a full size vector. |
1007 | // |
1008 | SDValue T = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: PredV); |
1009 | SmallVector<int,128> Mask(HwLen); |
1010 | // Scale = BitBytes(PredV) / Given BitBytes. |
1011 | unsigned Scale = HwLen / (PredTy.getVectorNumElements() * BitBytes); |
1012 | unsigned BlockLen = PredTy.getVectorNumElements() * BitBytes; |
1013 | |
1014 | for (unsigned i = 0; i != HwLen; ++i) { |
1015 | unsigned Num = i % Scale; |
1016 | unsigned Off = i / Scale; |
1017 | Mask[BlockLen*Num + Off] = i; |
1018 | } |
1019 | SDValue S = DAG.getVectorShuffle(VT: ByteTy, dl, N1: T, N2: DAG.getUNDEF(VT: ByteTy), Mask); |
1020 | if (!ZeroFill) |
1021 | return S; |
1022 | // Fill the bytes beyond BlockLen with 0s. |
1023 | // V6_pred_scalar2 cannot fill the entire predicate, so it only works |
1024 | // when BlockLen < HwLen. |
1025 | assert(BlockLen < HwLen && "vsetq(v1) prerequisite" ); |
1026 | MVT BoolTy = MVT::getVectorVT(VT: MVT::i1, NumElements: HwLen); |
1027 | SDValue Q = getInstr(MachineOpc: Hexagon::V6_pred_scalar2, dl, Ty: BoolTy, |
1028 | Ops: {DAG.getConstant(Val: BlockLen, DL: dl, VT: MVT::i32)}, DAG); |
1029 | SDValue M = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: Q); |
1030 | return DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ByteTy, N1: S, N2: M); |
1031 | } |
1032 | |
1033 | // Make sure that this is a valid scalar predicate. |
1034 | assert(PredTy == MVT::v2i1 || PredTy == MVT::v4i1 || PredTy == MVT::v8i1); |
1035 | |
1036 | unsigned Bytes = 8 / PredTy.getVectorNumElements(); |
1037 | SmallVector<SDValue,4> Words[2]; |
1038 | unsigned IdxW = 0; |
1039 | |
1040 | SDValue W0 = isUndef(Op: PredV) |
1041 | ? DAG.getUNDEF(VT: MVT::i64) |
1042 | : DAG.getNode(Opcode: HexagonISD::P2D, DL: dl, VT: MVT::i64, Operand: PredV); |
1043 | Words[IdxW].push_back(Elt: HiHalf(V: W0, DAG)); |
1044 | Words[IdxW].push_back(Elt: LoHalf(V: W0, DAG)); |
1045 | |
1046 | while (Bytes < BitBytes) { |
1047 | IdxW ^= 1; |
1048 | Words[IdxW].clear(); |
1049 | |
1050 | if (Bytes < 4) { |
1051 | for (const SDValue &W : Words[IdxW ^ 1]) { |
1052 | SDValue T = expandPredicate(Vec32: W, dl, DAG); |
1053 | Words[IdxW].push_back(Elt: HiHalf(V: T, DAG)); |
1054 | Words[IdxW].push_back(Elt: LoHalf(V: T, DAG)); |
1055 | } |
1056 | } else { |
1057 | for (const SDValue &W : Words[IdxW ^ 1]) { |
1058 | Words[IdxW].push_back(Elt: W); |
1059 | Words[IdxW].push_back(Elt: W); |
1060 | } |
1061 | } |
1062 | Bytes *= 2; |
1063 | } |
1064 | |
1065 | assert(Bytes == BitBytes); |
1066 | |
1067 | SDValue Vec = ZeroFill ? getZero(dl, Ty: ByteTy, DAG) : DAG.getUNDEF(VT: ByteTy); |
1068 | SDValue S4 = DAG.getConstant(Val: HwLen-4, DL: dl, VT: MVT::i32); |
1069 | for (const SDValue &W : Words[IdxW]) { |
1070 | Vec = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: ByteTy, N1: Vec, N2: S4); |
1071 | Vec = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: ByteTy, N1: Vec, N2: W); |
1072 | } |
1073 | |
1074 | return Vec; |
1075 | } |
1076 | |
1077 | SDValue |
1078 | HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values, |
1079 | const SDLoc &dl, MVT VecTy, |
1080 | SelectionDAG &DAG) const { |
1081 | // Construct a vector V of bytes, such that a comparison V >u 0 would |
1082 | // produce the required vector predicate. |
1083 | unsigned VecLen = Values.size(); |
1084 | unsigned HwLen = Subtarget.getVectorLength(); |
1085 | assert(VecLen <= HwLen || VecLen == 8*HwLen); |
1086 | SmallVector<SDValue,128> Bytes; |
1087 | bool AllT = true, AllF = true; |
1088 | |
1089 | auto IsTrue = [] (SDValue V) { |
1090 | if (const auto *N = dyn_cast<ConstantSDNode>(Val: V.getNode())) |
1091 | return !N->isZero(); |
1092 | return false; |
1093 | }; |
1094 | auto IsFalse = [] (SDValue V) { |
1095 | if (const auto *N = dyn_cast<ConstantSDNode>(Val: V.getNode())) |
1096 | return N->isZero(); |
1097 | return false; |
1098 | }; |
1099 | |
1100 | if (VecLen <= HwLen) { |
1101 | // In the hardware, each bit of a vector predicate corresponds to a byte |
1102 | // of a vector register. Calculate how many bytes does a bit of VecTy |
1103 | // correspond to. |
1104 | assert(HwLen % VecLen == 0); |
1105 | unsigned BitBytes = HwLen / VecLen; |
1106 | for (SDValue V : Values) { |
1107 | AllT &= IsTrue(V); |
1108 | AllF &= IsFalse(V); |
1109 | |
1110 | SDValue Ext = !V.isUndef() ? DAG.getZExtOrTrunc(Op: V, DL: dl, VT: MVT::i8) |
1111 | : DAG.getUNDEF(VT: MVT::i8); |
1112 | for (unsigned B = 0; B != BitBytes; ++B) |
1113 | Bytes.push_back(Elt: Ext); |
1114 | } |
1115 | } else { |
1116 | // There are as many i1 values, as there are bits in a vector register. |
1117 | // Divide the values into groups of 8 and check that each group consists |
1118 | // of the same value (ignoring undefs). |
1119 | for (unsigned I = 0; I != VecLen; I += 8) { |
1120 | unsigned B = 0; |
1121 | // Find the first non-undef value in this group. |
1122 | for (; B != 8; ++B) { |
1123 | if (!Values[I+B].isUndef()) |
1124 | break; |
1125 | } |
1126 | SDValue F = Values[I+B]; |
1127 | AllT &= IsTrue(F); |
1128 | AllF &= IsFalse(F); |
1129 | |
1130 | SDValue Ext = (B < 8) ? DAG.getZExtOrTrunc(Op: F, DL: dl, VT: MVT::i8) |
1131 | : DAG.getUNDEF(VT: MVT::i8); |
1132 | Bytes.push_back(Elt: Ext); |
1133 | // Verify that the rest of values in the group are the same as the |
1134 | // first. |
1135 | for (; B != 8; ++B) |
1136 | assert(Values[I+B].isUndef() || Values[I+B] == F); |
1137 | } |
1138 | } |
1139 | |
1140 | if (AllT) |
1141 | return DAG.getNode(Opcode: HexagonISD::QTRUE, DL: dl, VT: VecTy); |
1142 | if (AllF) |
1143 | return DAG.getNode(Opcode: HexagonISD::QFALSE, DL: dl, VT: VecTy); |
1144 | |
1145 | MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen); |
1146 | SDValue ByteVec = buildHvxVectorReg(Values: Bytes, dl, VecTy: ByteTy, DAG); |
1147 | return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: VecTy, Operand: ByteVec); |
1148 | } |
1149 | |
1150 | SDValue |
1151 | HexagonTargetLowering::(SDValue VecV, SDValue IdxV, |
1152 | const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const { |
1153 | MVT ElemTy = ty(Op: VecV).getVectorElementType(); |
1154 | |
1155 | unsigned ElemWidth = ElemTy.getSizeInBits(); |
1156 | assert(ElemWidth >= 8 && ElemWidth <= 32); |
1157 | (void)ElemWidth; |
1158 | |
1159 | SDValue ByteIdx = convertToByteIndex(ElemIdx: IdxV, ElemTy, DAG); |
1160 | SDValue ExWord = DAG.getNode(Opcode: HexagonISD::VEXTRACTW, DL: dl, VT: MVT::i32, |
1161 | Ops: {VecV, ByteIdx}); |
1162 | if (ElemTy == MVT::i32) |
1163 | return ExWord; |
1164 | |
1165 | // Have an extracted word, need to extract the smaller element out of it. |
1166 | // 1. Extract the bits of (the original) IdxV that correspond to the index |
1167 | // of the desired element in the 32-bit word. |
1168 | SDValue SubIdx = getIndexInWord32(Idx: IdxV, ElemTy, DAG); |
1169 | // 2. Extract the element from the word. |
1170 | SDValue ExVec = DAG.getBitcast(VT: tyVector(Ty: ty(Op: ExWord), ElemTy), V: ExWord); |
1171 | return extractVector(VecV: ExVec, IdxV: SubIdx, dl, ValTy: ElemTy, ResTy: MVT::i32, DAG); |
1172 | } |
1173 | |
1174 | SDValue |
1175 | HexagonTargetLowering::(SDValue VecV, SDValue IdxV, |
1176 | const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const { |
1177 | // Implement other return types if necessary. |
1178 | assert(ResTy == MVT::i1); |
1179 | |
1180 | unsigned HwLen = Subtarget.getVectorLength(); |
1181 | MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen); |
1182 | SDValue ByteVec = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: VecV); |
1183 | |
1184 | unsigned Scale = HwLen / ty(Op: VecV).getVectorNumElements(); |
1185 | SDValue ScV = DAG.getConstant(Val: Scale, DL: dl, VT: MVT::i32); |
1186 | IdxV = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MVT::i32, N1: IdxV, N2: ScV); |
1187 | |
1188 | SDValue ExtB = extractHvxElementReg(VecV: ByteVec, IdxV, dl, ResTy: MVT::i32, DAG); |
1189 | SDValue Zero = DAG.getTargetConstant(Val: 0, DL: dl, VT: MVT::i32); |
1190 | return getInstr(MachineOpc: Hexagon::C2_cmpgtui, dl, Ty: MVT::i1, Ops: {ExtB, Zero}, DAG); |
1191 | } |
1192 | |
1193 | SDValue |
1194 | HexagonTargetLowering::insertHvxElementReg(SDValue VecV, SDValue IdxV, |
1195 | SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const { |
1196 | MVT ElemTy = ty(Op: VecV).getVectorElementType(); |
1197 | |
1198 | unsigned ElemWidth = ElemTy.getSizeInBits(); |
1199 | assert(ElemWidth >= 8 && ElemWidth <= 32); |
1200 | (void)ElemWidth; |
1201 | |
1202 | auto InsertWord = [&DAG,&dl,this] (SDValue VecV, SDValue ValV, |
1203 | SDValue ByteIdxV) { |
1204 | MVT VecTy = ty(Op: VecV); |
1205 | unsigned HwLen = Subtarget.getVectorLength(); |
1206 | SDValue MaskV = |
1207 | DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32, |
1208 | Ops: {ByteIdxV, DAG.getSignedConstant(Val: -4, DL: dl, VT: MVT::i32)}); |
1209 | SDValue RotV = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {VecV, MaskV}); |
1210 | SDValue InsV = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: VecTy, Ops: {RotV, ValV}); |
1211 | SDValue SubV = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: MVT::i32, |
1212 | Ops: {DAG.getConstant(Val: HwLen, DL: dl, VT: MVT::i32), MaskV}); |
1213 | SDValue TorV = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {InsV, SubV}); |
1214 | return TorV; |
1215 | }; |
1216 | |
1217 | SDValue ByteIdx = convertToByteIndex(ElemIdx: IdxV, ElemTy, DAG); |
1218 | if (ElemTy == MVT::i32) |
1219 | return InsertWord(VecV, ValV, ByteIdx); |
1220 | |
1221 | // If this is not inserting a 32-bit word, convert it into such a thing. |
1222 | // 1. Extract the existing word from the target vector. |
1223 | SDValue WordIdx = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MVT::i32, |
1224 | Ops: {ByteIdx, DAG.getConstant(Val: 2, DL: dl, VT: MVT::i32)}); |
1225 | SDValue Ext = extractHvxElementReg(VecV: opCastElem(Vec: VecV, ElemTy: MVT::i32, DAG), IdxV: WordIdx, |
1226 | dl, ResTy: MVT::i32, DAG); |
1227 | |
1228 | // 2. Treating the extracted word as a 32-bit vector, insert the given |
1229 | // value into it. |
1230 | SDValue SubIdx = getIndexInWord32(Idx: IdxV, ElemTy, DAG); |
1231 | MVT SubVecTy = tyVector(Ty: ty(Op: Ext), ElemTy); |
1232 | SDValue Ins = insertVector(VecV: DAG.getBitcast(VT: SubVecTy, V: Ext), |
1233 | ValV, IdxV: SubIdx, dl, ValTy: ElemTy, DAG); |
1234 | |
1235 | // 3. Insert the 32-bit word back into the original vector. |
1236 | return InsertWord(VecV, Ins, ByteIdx); |
1237 | } |
1238 | |
1239 | SDValue |
1240 | HexagonTargetLowering::insertHvxElementPred(SDValue VecV, SDValue IdxV, |
1241 | SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const { |
1242 | unsigned HwLen = Subtarget.getVectorLength(); |
1243 | MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen); |
1244 | SDValue ByteVec = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: VecV); |
1245 | |
1246 | unsigned Scale = HwLen / ty(Op: VecV).getVectorNumElements(); |
1247 | SDValue ScV = DAG.getConstant(Val: Scale, DL: dl, VT: MVT::i32); |
1248 | IdxV = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MVT::i32, N1: IdxV, N2: ScV); |
1249 | ValV = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: MVT::i32, Operand: ValV); |
1250 | |
1251 | SDValue InsV = insertHvxElementReg(VecV: ByteVec, IdxV, ValV, dl, DAG); |
1252 | return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: ty(Op: VecV), Operand: InsV); |
1253 | } |
1254 | |
1255 | SDValue |
1256 | HexagonTargetLowering::(SDValue OrigOp, SDValue VecV, |
1257 | SDValue IdxV, const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const { |
1258 | MVT VecTy = ty(Op: VecV); |
1259 | unsigned HwLen = Subtarget.getVectorLength(); |
1260 | unsigned Idx = IdxV.getNode()->getAsZExtVal(); |
1261 | MVT ElemTy = VecTy.getVectorElementType(); |
1262 | unsigned ElemWidth = ElemTy.getSizeInBits(); |
1263 | |
1264 | // If the source vector is a vector pair, get the single vector containing |
1265 | // the subvector of interest. The subvector will never overlap two single |
1266 | // vectors. |
1267 | if (isHvxPairTy(Ty: VecTy)) { |
1268 | unsigned SubIdx = Hexagon::vsub_lo; |
1269 | if (Idx * ElemWidth >= 8 * HwLen) { |
1270 | SubIdx = Hexagon::vsub_hi; |
1271 | Idx -= VecTy.getVectorNumElements() / 2; |
1272 | } |
1273 | |
1274 | VecTy = typeSplit(VecTy).first; |
1275 | VecV = DAG.getTargetExtractSubreg(SRIdx: SubIdx, DL: dl, VT: VecTy, Operand: VecV); |
1276 | if (VecTy == ResTy) |
1277 | return VecV; |
1278 | } |
1279 | |
1280 | // The only meaningful subvectors of a single HVX vector are those that |
1281 | // fit in a scalar register. |
1282 | assert(ResTy.getSizeInBits() == 32 || ResTy.getSizeInBits() == 64); |
1283 | |
1284 | MVT WordTy = tyVector(Ty: VecTy, ElemTy: MVT::i32); |
1285 | SDValue WordVec = DAG.getBitcast(VT: WordTy, V: VecV); |
1286 | unsigned WordIdx = (Idx*ElemWidth) / 32; |
1287 | |
1288 | SDValue W0Idx = DAG.getConstant(Val: WordIdx, DL: dl, VT: MVT::i32); |
1289 | SDValue W0 = extractHvxElementReg(VecV: WordVec, IdxV: W0Idx, dl, ResTy: MVT::i32, DAG); |
1290 | if (ResTy.getSizeInBits() == 32) |
1291 | return DAG.getBitcast(VT: ResTy, V: W0); |
1292 | |
1293 | SDValue W1Idx = DAG.getConstant(Val: WordIdx+1, DL: dl, VT: MVT::i32); |
1294 | SDValue W1 = extractHvxElementReg(VecV: WordVec, IdxV: W1Idx, dl, ResTy: MVT::i32, DAG); |
1295 | SDValue WW = getCombine(Hi: W1, Lo: W0, dl, ResTy: MVT::i64, DAG); |
1296 | return DAG.getBitcast(VT: ResTy, V: WW); |
1297 | } |
1298 | |
1299 | SDValue |
1300 | HexagonTargetLowering::(SDValue VecV, SDValue IdxV, |
1301 | const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const { |
1302 | MVT VecTy = ty(Op: VecV); |
1303 | unsigned HwLen = Subtarget.getVectorLength(); |
1304 | MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen); |
1305 | SDValue ByteVec = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: VecV); |
1306 | // IdxV is required to be a constant. |
1307 | unsigned Idx = IdxV.getNode()->getAsZExtVal(); |
1308 | |
1309 | unsigned ResLen = ResTy.getVectorNumElements(); |
1310 | unsigned BitBytes = HwLen / VecTy.getVectorNumElements(); |
1311 | unsigned Offset = Idx * BitBytes; |
1312 | SDValue Undef = DAG.getUNDEF(VT: ByteTy); |
1313 | SmallVector<int,128> Mask; |
1314 | |
1315 | if (Subtarget.isHVXVectorType(VecTy: ResTy, IncludeBool: true)) { |
1316 | // Converting between two vector predicates. Since the result is shorter |
1317 | // than the source, it will correspond to a vector predicate with the |
1318 | // relevant bits replicated. The replication count is the ratio of the |
1319 | // source and target vector lengths. |
1320 | unsigned Rep = VecTy.getVectorNumElements() / ResLen; |
1321 | assert(isPowerOf2_32(Rep) && HwLen % Rep == 0); |
1322 | for (unsigned i = 0; i != HwLen/Rep; ++i) { |
1323 | for (unsigned j = 0; j != Rep; ++j) |
1324 | Mask.push_back(Elt: i + Offset); |
1325 | } |
1326 | SDValue ShuffV = DAG.getVectorShuffle(VT: ByteTy, dl, N1: ByteVec, N2: Undef, Mask); |
1327 | return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: ResTy, Operand: ShuffV); |
1328 | } |
1329 | |
1330 | // Converting between a vector predicate and a scalar predicate. In the |
1331 | // vector predicate, a group of BitBytes bits will correspond to a single |
1332 | // i1 element of the source vector type. Those bits will all have the same |
1333 | // value. The same will be true for ByteVec, where each byte corresponds |
1334 | // to a bit in the vector predicate. |
1335 | // The algorithm is to traverse the ByteVec, going over the i1 values from |
1336 | // the source vector, and generate the corresponding representation in an |
1337 | // 8-byte vector. To avoid repeated extracts from ByteVec, shuffle the |
1338 | // elements so that the interesting 8 bytes will be in the low end of the |
1339 | // vector. |
1340 | unsigned Rep = 8 / ResLen; |
1341 | // Make sure the output fill the entire vector register, so repeat the |
1342 | // 8-byte groups as many times as necessary. |
1343 | for (unsigned r = 0; r != HwLen/ResLen; ++r) { |
1344 | // This will generate the indexes of the 8 interesting bytes. |
1345 | for (unsigned i = 0; i != ResLen; ++i) { |
1346 | for (unsigned j = 0; j != Rep; ++j) |
1347 | Mask.push_back(Elt: Offset + i*BitBytes); |
1348 | } |
1349 | } |
1350 | |
1351 | SDValue Zero = getZero(dl, Ty: MVT::i32, DAG); |
1352 | SDValue ShuffV = DAG.getVectorShuffle(VT: ByteTy, dl, N1: ByteVec, N2: Undef, Mask); |
1353 | // Combine the two low words from ShuffV into a v8i8, and byte-compare |
1354 | // them against 0. |
1355 | SDValue W0 = DAG.getNode(Opcode: HexagonISD::VEXTRACTW, DL: dl, VT: MVT::i32, Ops: {ShuffV, Zero}); |
1356 | SDValue W1 = DAG.getNode(Opcode: HexagonISD::VEXTRACTW, DL: dl, VT: MVT::i32, |
1357 | Ops: {ShuffV, DAG.getConstant(Val: 4, DL: dl, VT: MVT::i32)}); |
1358 | SDValue Vec64 = getCombine(Hi: W1, Lo: W0, dl, ResTy: MVT::v8i8, DAG); |
1359 | return getInstr(MachineOpc: Hexagon::A4_vcmpbgtui, dl, Ty: ResTy, |
1360 | Ops: {Vec64, DAG.getTargetConstant(Val: 0, DL: dl, VT: MVT::i32)}, DAG); |
1361 | } |
1362 | |
1363 | SDValue |
1364 | HexagonTargetLowering::insertHvxSubvectorReg(SDValue VecV, SDValue SubV, |
1365 | SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const { |
1366 | MVT VecTy = ty(Op: VecV); |
1367 | MVT SubTy = ty(Op: SubV); |
1368 | unsigned HwLen = Subtarget.getVectorLength(); |
1369 | MVT ElemTy = VecTy.getVectorElementType(); |
1370 | unsigned ElemWidth = ElemTy.getSizeInBits(); |
1371 | |
1372 | bool IsPair = isHvxPairTy(Ty: VecTy); |
1373 | MVT SingleTy = MVT::getVectorVT(VT: ElemTy, NumElements: (8*HwLen)/ElemWidth); |
1374 | // The two single vectors that VecV consists of, if it's a pair. |
1375 | SDValue V0, V1; |
1376 | SDValue SingleV = VecV; |
1377 | SDValue PickHi; |
1378 | |
1379 | if (IsPair) { |
1380 | V0 = LoHalf(V: VecV, DAG); |
1381 | V1 = HiHalf(V: VecV, DAG); |
1382 | |
1383 | SDValue HalfV = DAG.getConstant(Val: SingleTy.getVectorNumElements(), |
1384 | DL: dl, VT: MVT::i32); |
1385 | PickHi = DAG.getSetCC(DL: dl, VT: MVT::i1, LHS: IdxV, RHS: HalfV, Cond: ISD::SETUGT); |
1386 | if (isHvxSingleTy(Ty: SubTy)) { |
1387 | if (const auto *CN = dyn_cast<const ConstantSDNode>(Val: IdxV.getNode())) { |
1388 | unsigned Idx = CN->getZExtValue(); |
1389 | assert(Idx == 0 || Idx == VecTy.getVectorNumElements()/2); |
1390 | unsigned SubIdx = (Idx == 0) ? Hexagon::vsub_lo : Hexagon::vsub_hi; |
1391 | return DAG.getTargetInsertSubreg(SRIdx: SubIdx, DL: dl, VT: VecTy, Operand: VecV, Subreg: SubV); |
1392 | } |
1393 | // If IdxV is not a constant, generate the two variants: with the |
1394 | // SubV as the high and as the low subregister, and select the right |
1395 | // pair based on the IdxV. |
1396 | SDValue InLo = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: VecTy, Ops: {SubV, V1}); |
1397 | SDValue InHi = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: VecTy, Ops: {V0, SubV}); |
1398 | return DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: VecTy, N1: PickHi, N2: InHi, N3: InLo); |
1399 | } |
1400 | // The subvector being inserted must be entirely contained in one of |
1401 | // the vectors V0 or V1. Set SingleV to the correct one, and update |
1402 | // IdxV to be the index relative to the beginning of that vector. |
1403 | SDValue S = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: MVT::i32, N1: IdxV, N2: HalfV); |
1404 | IdxV = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: MVT::i32, N1: PickHi, N2: S, N3: IdxV); |
1405 | SingleV = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: SingleTy, N1: PickHi, N2: V1, N3: V0); |
1406 | } |
1407 | |
1408 | // The only meaningful subvectors of a single HVX vector are those that |
1409 | // fit in a scalar register. |
1410 | assert(SubTy.getSizeInBits() == 32 || SubTy.getSizeInBits() == 64); |
1411 | // Convert IdxV to be index in bytes. |
1412 | auto *IdxN = dyn_cast<ConstantSDNode>(Val: IdxV.getNode()); |
1413 | if (!IdxN || !IdxN->isZero()) { |
1414 | IdxV = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MVT::i32, N1: IdxV, |
1415 | N2: DAG.getConstant(Val: ElemWidth/8, DL: dl, VT: MVT::i32)); |
1416 | SingleV = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: SingleTy, N1: SingleV, N2: IdxV); |
1417 | } |
1418 | // When inserting a single word, the rotation back to the original position |
1419 | // would be by HwLen-Idx, but if two words are inserted, it will need to be |
1420 | // by (HwLen-4)-Idx. |
1421 | unsigned RolBase = HwLen; |
1422 | if (SubTy.getSizeInBits() == 32) { |
1423 | SDValue V = DAG.getBitcast(VT: MVT::i32, V: SubV); |
1424 | SingleV = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: SingleTy, N1: SingleV, N2: V); |
1425 | } else { |
1426 | SDValue V = DAG.getBitcast(VT: MVT::i64, V: SubV); |
1427 | SDValue R0 = LoHalf(V, DAG); |
1428 | SDValue R1 = HiHalf(V, DAG); |
1429 | SingleV = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: SingleTy, N1: SingleV, N2: R0); |
1430 | SingleV = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: SingleTy, N1: SingleV, |
1431 | N2: DAG.getConstant(Val: 4, DL: dl, VT: MVT::i32)); |
1432 | SingleV = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: SingleTy, N1: SingleV, N2: R1); |
1433 | RolBase = HwLen-4; |
1434 | } |
1435 | // If the vector wasn't ror'ed, don't ror it back. |
1436 | if (RolBase != 4 || !IdxN || !IdxN->isZero()) { |
1437 | SDValue RolV = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: MVT::i32, |
1438 | N1: DAG.getConstant(Val: RolBase, DL: dl, VT: MVT::i32), N2: IdxV); |
1439 | SingleV = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: SingleTy, N1: SingleV, N2: RolV); |
1440 | } |
1441 | |
1442 | if (IsPair) { |
1443 | SDValue InLo = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: VecTy, Ops: {SingleV, V1}); |
1444 | SDValue InHi = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: VecTy, Ops: {V0, SingleV}); |
1445 | return DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: VecTy, N1: PickHi, N2: InHi, N3: InLo); |
1446 | } |
1447 | return SingleV; |
1448 | } |
1449 | |
1450 | SDValue |
1451 | HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV, SDValue SubV, |
1452 | SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const { |
1453 | MVT VecTy = ty(Op: VecV); |
1454 | MVT SubTy = ty(Op: SubV); |
1455 | assert(Subtarget.isHVXVectorType(VecTy, true)); |
1456 | // VecV is an HVX vector predicate. SubV may be either an HVX vector |
1457 | // predicate as well, or it can be a scalar predicate. |
1458 | |
1459 | unsigned VecLen = VecTy.getVectorNumElements(); |
1460 | unsigned HwLen = Subtarget.getVectorLength(); |
1461 | assert(HwLen % VecLen == 0 && "Unexpected vector type" ); |
1462 | |
1463 | unsigned Scale = VecLen / SubTy.getVectorNumElements(); |
1464 | unsigned BitBytes = HwLen / VecLen; |
1465 | unsigned BlockLen = HwLen / Scale; |
1466 | |
1467 | MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen); |
1468 | SDValue ByteVec = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: VecV); |
1469 | SDValue ByteSub = createHvxPrefixPred(PredV: SubV, dl, BitBytes, ZeroFill: false, DAG); |
1470 | SDValue ByteIdx; |
1471 | |
1472 | auto *IdxN = dyn_cast<ConstantSDNode>(Val: IdxV.getNode()); |
1473 | if (!IdxN || !IdxN->isZero()) { |
1474 | ByteIdx = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MVT::i32, N1: IdxV, |
1475 | N2: DAG.getConstant(Val: BitBytes, DL: dl, VT: MVT::i32)); |
1476 | ByteVec = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: ByteTy, N1: ByteVec, N2: ByteIdx); |
1477 | } |
1478 | |
1479 | // ByteVec is the target vector VecV rotated in such a way that the |
1480 | // subvector should be inserted at index 0. Generate a predicate mask |
1481 | // and use vmux to do the insertion. |
1482 | assert(BlockLen < HwLen && "vsetq(v1) prerequisite" ); |
1483 | MVT BoolTy = MVT::getVectorVT(VT: MVT::i1, NumElements: HwLen); |
1484 | SDValue Q = getInstr(MachineOpc: Hexagon::V6_pred_scalar2, dl, Ty: BoolTy, |
1485 | Ops: {DAG.getConstant(Val: BlockLen, DL: dl, VT: MVT::i32)}, DAG); |
1486 | ByteVec = getInstr(MachineOpc: Hexagon::V6_vmux, dl, Ty: ByteTy, Ops: {Q, ByteSub, ByteVec}, DAG); |
1487 | // Rotate ByteVec back, and convert to a vector predicate. |
1488 | if (!IdxN || !IdxN->isZero()) { |
1489 | SDValue HwLenV = DAG.getConstant(Val: HwLen, DL: dl, VT: MVT::i32); |
1490 | SDValue ByteXdi = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: MVT::i32, N1: HwLenV, N2: ByteIdx); |
1491 | ByteVec = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: ByteTy, N1: ByteVec, N2: ByteXdi); |
1492 | } |
1493 | return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: VecTy, Operand: ByteVec); |
1494 | } |
1495 | |
1496 | SDValue |
1497 | HexagonTargetLowering::extendHvxVectorPred(SDValue VecV, const SDLoc &dl, |
1498 | MVT ResTy, bool ZeroExt, SelectionDAG &DAG) const { |
1499 | // Sign- and any-extending of a vector predicate to a vector register is |
1500 | // equivalent to Q2V. For zero-extensions, generate a vmux between 0 and |
1501 | // a vector of 1s (where the 1s are of type matching the vector type). |
1502 | assert(Subtarget.isHVXVectorType(ResTy)); |
1503 | if (!ZeroExt) |
1504 | return DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ResTy, Operand: VecV); |
1505 | |
1506 | assert(ty(VecV).getVectorNumElements() == ResTy.getVectorNumElements()); |
1507 | SDValue True = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: ResTy, |
1508 | Operand: DAG.getConstant(Val: 1, DL: dl, VT: MVT::i32)); |
1509 | SDValue False = getZero(dl, Ty: ResTy, DAG); |
1510 | return DAG.getSelect(DL: dl, VT: ResTy, Cond: VecV, LHS: True, RHS: False); |
1511 | } |
1512 | |
1513 | SDValue |
1514 | HexagonTargetLowering::compressHvxPred(SDValue VecQ, const SDLoc &dl, |
1515 | MVT ResTy, SelectionDAG &DAG) const { |
1516 | // Given a predicate register VecQ, transfer bits VecQ[0..HwLen-1] |
1517 | // (i.e. the entire predicate register) to bits [0..HwLen-1] of a |
1518 | // vector register. The remaining bits of the vector register are |
1519 | // unspecified. |
1520 | |
1521 | MachineFunction &MF = DAG.getMachineFunction(); |
1522 | unsigned HwLen = Subtarget.getVectorLength(); |
1523 | MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen); |
1524 | MVT PredTy = ty(Op: VecQ); |
1525 | unsigned PredLen = PredTy.getVectorNumElements(); |
1526 | assert(HwLen % PredLen == 0); |
1527 | MVT VecTy = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: 8*HwLen/PredLen), NumElements: PredLen); |
1528 | |
1529 | Type *Int8Ty = Type::getInt8Ty(C&: *DAG.getContext()); |
1530 | SmallVector<Constant*, 128> Tmp; |
1531 | // Create an array of bytes (hex): 01,02,04,08,10,20,40,80, 01,02,04,08,... |
1532 | // These are bytes with the LSB rotated left with respect to their index. |
1533 | for (unsigned i = 0; i != HwLen/8; ++i) { |
1534 | for (unsigned j = 0; j != 8; ++j) |
1535 | Tmp.push_back(Elt: ConstantInt::get(Ty: Int8Ty, V: 1ull << j)); |
1536 | } |
1537 | Constant *CV = ConstantVector::get(V: Tmp); |
1538 | Align Alignment(HwLen); |
1539 | SDValue CP = |
1540 | LowerConstantPool(Op: DAG.getConstantPool(C: CV, VT: ByteTy, Align: Alignment), DAG); |
1541 | SDValue Bytes = |
1542 | DAG.getLoad(VT: ByteTy, dl, Chain: DAG.getEntryNode(), Ptr: CP, |
1543 | PtrInfo: MachinePointerInfo::getConstantPool(MF), Alignment); |
1544 | |
1545 | // Select the bytes that correspond to true bits in the vector predicate. |
1546 | SDValue Sel = DAG.getSelect(DL: dl, VT: VecTy, Cond: VecQ, LHS: DAG.getBitcast(VT: VecTy, V: Bytes), |
1547 | RHS: getZero(dl, Ty: VecTy, DAG)); |
1548 | // Calculate the OR of all bytes in each group of 8. That will compress |
1549 | // all the individual bits into a single byte. |
1550 | // First, OR groups of 4, via vrmpy with 0x01010101. |
1551 | SDValue All1 = |
1552 | DAG.getSplatBuildVector(VT: MVT::v4i8, DL: dl, Op: DAG.getConstant(Val: 1, DL: dl, VT: MVT::i32)); |
1553 | SDValue Vrmpy = getInstr(MachineOpc: Hexagon::V6_vrmpyub, dl, Ty: ByteTy, Ops: {Sel, All1}, DAG); |
1554 | // Then rotate the accumulated vector by 4 bytes, and do the final OR. |
1555 | SDValue Rot = getInstr(MachineOpc: Hexagon::V6_valignbi, dl, Ty: ByteTy, |
1556 | Ops: {Vrmpy, Vrmpy, DAG.getTargetConstant(Val: 4, DL: dl, VT: MVT::i32)}, DAG); |
1557 | SDValue Vor = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: ByteTy, Ops: {Vrmpy, Rot}); |
1558 | |
1559 | // Pick every 8th byte and coalesce them at the beginning of the output. |
1560 | // For symmetry, coalesce every 1+8th byte after that, then every 2+8th |
1561 | // byte and so on. |
1562 | SmallVector<int,128> Mask; |
1563 | for (unsigned i = 0; i != HwLen; ++i) |
1564 | Mask.push_back(Elt: (8*i) % HwLen + i/(HwLen/8)); |
1565 | SDValue Collect = |
1566 | DAG.getVectorShuffle(VT: ByteTy, dl, N1: Vor, N2: DAG.getUNDEF(VT: ByteTy), Mask); |
1567 | return DAG.getBitcast(VT: ResTy, V: Collect); |
1568 | } |
1569 | |
1570 | SDValue |
1571 | HexagonTargetLowering::resizeToWidth(SDValue VecV, MVT ResTy, bool Signed, |
1572 | const SDLoc &dl, SelectionDAG &DAG) const { |
1573 | // Take a vector and resize the element type to match the given type. |
1574 | MVT InpTy = ty(Op: VecV); |
1575 | if (InpTy == ResTy) |
1576 | return VecV; |
1577 | |
1578 | unsigned InpWidth = InpTy.getSizeInBits(); |
1579 | unsigned ResWidth = ResTy.getSizeInBits(); |
1580 | |
1581 | if (InpTy.isFloatingPoint()) { |
1582 | return InpWidth < ResWidth |
1583 | ? DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: ResTy, Operand: VecV) |
1584 | : DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT: ResTy, N1: VecV, |
1585 | N2: DAG.getTargetConstant(Val: 0, DL: dl, VT: MVT::i32)); |
1586 | } |
1587 | |
1588 | assert(InpTy.isInteger()); |
1589 | |
1590 | if (InpWidth < ResWidth) { |
1591 | unsigned ExtOpc = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; |
1592 | return DAG.getNode(Opcode: ExtOpc, DL: dl, VT: ResTy, Operand: VecV); |
1593 | } else { |
1594 | unsigned NarOpc = Signed ? HexagonISD::SSAT : HexagonISD::USAT; |
1595 | return DAG.getNode(Opcode: NarOpc, DL: dl, VT: ResTy, N1: VecV, N2: DAG.getValueType(ResTy)); |
1596 | } |
1597 | } |
1598 | |
1599 | SDValue |
1600 | HexagonTargetLowering::(SDValue Vec, MVT SubTy, unsigned SubIdx, |
1601 | SelectionDAG &DAG) const { |
1602 | assert(ty(Vec).getSizeInBits() % SubTy.getSizeInBits() == 0); |
1603 | |
1604 | const SDLoc &dl(Vec); |
1605 | unsigned ElemIdx = SubIdx * SubTy.getVectorNumElements(); |
1606 | return DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL: dl, VT: SubTy, |
1607 | Ops: {Vec, DAG.getConstant(Val: ElemIdx, DL: dl, VT: MVT::i32)}); |
1608 | } |
1609 | |
1610 | SDValue |
1611 | HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG) |
1612 | const { |
1613 | const SDLoc &dl(Op); |
1614 | MVT VecTy = ty(Op); |
1615 | |
1616 | unsigned Size = Op.getNumOperands(); |
1617 | SmallVector<SDValue,128> Ops; |
1618 | for (unsigned i = 0; i != Size; ++i) |
1619 | Ops.push_back(Elt: Op.getOperand(i)); |
1620 | |
1621 | if (VecTy.getVectorElementType() == MVT::i1) |
1622 | return buildHvxVectorPred(Values: Ops, dl, VecTy, DAG); |
1623 | |
1624 | // In case of MVT::f16 BUILD_VECTOR, since MVT::f16 is |
1625 | // not a legal type, just bitcast the node to use i16 |
1626 | // types and bitcast the result back to f16 |
1627 | if (VecTy.getVectorElementType() == MVT::f16) { |
1628 | SmallVector<SDValue,64> NewOps; |
1629 | for (unsigned i = 0; i != Size; i++) |
1630 | NewOps.push_back(Elt: DAG.getBitcast(VT: MVT::i16, V: Ops[i])); |
1631 | |
1632 | SDValue T0 = DAG.getNode(Opcode: ISD::BUILD_VECTOR, DL: dl, |
1633 | VT: tyVector(Ty: VecTy, ElemTy: MVT::i16), Ops: NewOps); |
1634 | return DAG.getBitcast(VT: tyVector(Ty: VecTy, ElemTy: MVT::f16), V: T0); |
1635 | } |
1636 | |
1637 | // First, split the BUILD_VECTOR for vector pairs. We could generate |
1638 | // some pairs directly (via splat), but splats should be generated |
1639 | // by the combiner prior to getting here. |
1640 | if (VecTy.getSizeInBits() == 16 * Subtarget.getVectorLength()) { |
1641 | ArrayRef<SDValue> A(Ops); |
1642 | MVT SingleTy = typeSplit(VecTy).first; |
1643 | SDValue V0 = buildHvxVectorReg(Values: A.take_front(N: Size / 2), dl, VecTy: SingleTy, DAG); |
1644 | SDValue V1 = buildHvxVectorReg(Values: A.drop_front(N: Size / 2), dl, VecTy: SingleTy, DAG); |
1645 | return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: VecTy, N1: V0, N2: V1); |
1646 | } |
1647 | |
1648 | return buildHvxVectorReg(Values: Ops, dl, VecTy, DAG); |
1649 | } |
1650 | |
1651 | SDValue |
1652 | HexagonTargetLowering::LowerHvxSplatVector(SDValue Op, SelectionDAG &DAG) |
1653 | const { |
1654 | const SDLoc &dl(Op); |
1655 | MVT VecTy = ty(Op); |
1656 | MVT ArgTy = ty(Op: Op.getOperand(i: 0)); |
1657 | |
1658 | if (ArgTy == MVT::f16) { |
1659 | MVT SplatTy = MVT::getVectorVT(VT: MVT::i16, NumElements: VecTy.getVectorNumElements()); |
1660 | SDValue ToInt16 = DAG.getBitcast(VT: MVT::i16, V: Op.getOperand(i: 0)); |
1661 | SDValue ToInt32 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: MVT::i32, Operand: ToInt16); |
1662 | SDValue Splat = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: SplatTy, Operand: ToInt32); |
1663 | return DAG.getBitcast(VT: VecTy, V: Splat); |
1664 | } |
1665 | |
1666 | return SDValue(); |
1667 | } |
1668 | |
1669 | SDValue |
1670 | HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG) |
1671 | const { |
1672 | // Vector concatenation of two integer (non-bool) vectors does not need |
1673 | // special lowering. Custom-lower concats of bool vectors and expand |
1674 | // concats of more than 2 vectors. |
1675 | MVT VecTy = ty(Op); |
1676 | const SDLoc &dl(Op); |
1677 | unsigned NumOp = Op.getNumOperands(); |
1678 | if (VecTy.getVectorElementType() != MVT::i1) { |
1679 | if (NumOp == 2) |
1680 | return Op; |
1681 | // Expand the other cases into a build-vector. |
1682 | SmallVector<SDValue,8> Elems; |
1683 | for (SDValue V : Op.getNode()->ops()) |
1684 | DAG.ExtractVectorElements(Op: V, Args&: Elems); |
1685 | // A vector of i16 will be broken up into a build_vector of i16's. |
1686 | // This is a problem, since at the time of operation legalization, |
1687 | // all operations are expected to be type-legalized, and i16 is not |
1688 | // a legal type. If any of the extracted elements is not of a valid |
1689 | // type, sign-extend it to a valid one. |
1690 | for (unsigned i = 0, e = Elems.size(); i != e; ++i) { |
1691 | SDValue V = Elems[i]; |
1692 | MVT Ty = ty(Op: V); |
1693 | if (!isTypeLegal(VT: Ty)) { |
1694 | MVT NTy = typeLegalize(Ty, DAG); |
1695 | if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { |
1696 | Elems[i] = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL: dl, VT: NTy, |
1697 | N1: DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, VT: NTy, |
1698 | N1: V.getOperand(i: 0), N2: V.getOperand(i: 1)), |
1699 | N2: DAG.getValueType(Ty)); |
1700 | continue; |
1701 | } |
1702 | // A few less complicated cases. |
1703 | switch (V.getOpcode()) { |
1704 | case ISD::Constant: |
1705 | Elems[i] = DAG.getSExtOrTrunc(Op: V, DL: dl, VT: NTy); |
1706 | break; |
1707 | case ISD::UNDEF: |
1708 | Elems[i] = DAG.getUNDEF(VT: NTy); |
1709 | break; |
1710 | case ISD::TRUNCATE: |
1711 | Elems[i] = V.getOperand(i: 0); |
1712 | break; |
1713 | default: |
1714 | llvm_unreachable("Unexpected vector element" ); |
1715 | } |
1716 | } |
1717 | } |
1718 | return DAG.getBuildVector(VT: VecTy, DL: dl, Ops: Elems); |
1719 | } |
1720 | |
1721 | assert(VecTy.getVectorElementType() == MVT::i1); |
1722 | unsigned HwLen = Subtarget.getVectorLength(); |
1723 | assert(isPowerOf2_32(NumOp) && HwLen % NumOp == 0); |
1724 | |
1725 | SDValue Op0 = Op.getOperand(i: 0); |
1726 | |
1727 | // If the operands are HVX types (i.e. not scalar predicates), then |
1728 | // defer the concatenation, and create QCAT instead. |
1729 | if (Subtarget.isHVXVectorType(VecTy: ty(Op: Op0), IncludeBool: true)) { |
1730 | if (NumOp == 2) |
1731 | return DAG.getNode(Opcode: HexagonISD::QCAT, DL: dl, VT: VecTy, N1: Op0, N2: Op.getOperand(i: 1)); |
1732 | |
1733 | ArrayRef<SDUse> U(Op.getNode()->ops()); |
1734 | SmallVector<SDValue, 4> SV(U); |
1735 | ArrayRef<SDValue> Ops(SV); |
1736 | |
1737 | MVT HalfTy = typeSplit(VecTy).first; |
1738 | SDValue V0 = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: HalfTy, |
1739 | Ops: Ops.take_front(N: NumOp/2)); |
1740 | SDValue V1 = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: HalfTy, |
1741 | Ops: Ops.take_back(N: NumOp/2)); |
1742 | return DAG.getNode(Opcode: HexagonISD::QCAT, DL: dl, VT: VecTy, N1: V0, N2: V1); |
1743 | } |
1744 | |
1745 | // Count how many bytes (in a vector register) each bit in VecTy |
1746 | // corresponds to. |
1747 | unsigned BitBytes = HwLen / VecTy.getVectorNumElements(); |
1748 | |
1749 | SmallVector<SDValue,8> Prefixes; |
1750 | for (SDValue V : Op.getNode()->op_values()) { |
1751 | SDValue P = createHvxPrefixPred(PredV: V, dl, BitBytes, ZeroFill: true, DAG); |
1752 | Prefixes.push_back(Elt: P); |
1753 | } |
1754 | |
1755 | unsigned InpLen = ty(Op: Op.getOperand(i: 0)).getVectorNumElements(); |
1756 | MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen); |
1757 | SDValue S = DAG.getConstant(Val: HwLen - InpLen*BitBytes, DL: dl, VT: MVT::i32); |
1758 | SDValue Res = getZero(dl, Ty: ByteTy, DAG); |
1759 | for (unsigned i = 0, e = Prefixes.size(); i != e; ++i) { |
1760 | Res = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: ByteTy, N1: Res, N2: S); |
1761 | Res = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: ByteTy, N1: Res, N2: Prefixes[e-i-1]); |
1762 | } |
1763 | return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: VecTy, Operand: Res); |
1764 | } |
1765 | |
1766 | SDValue |
1767 | HexagonTargetLowering::(SDValue Op, SelectionDAG &DAG) |
1768 | const { |
1769 | // Change the type of the extracted element to i32. |
1770 | SDValue VecV = Op.getOperand(i: 0); |
1771 | MVT ElemTy = ty(Op: VecV).getVectorElementType(); |
1772 | const SDLoc &dl(Op); |
1773 | SDValue IdxV = Op.getOperand(i: 1); |
1774 | if (ElemTy == MVT::i1) |
1775 | return extractHvxElementPred(VecV, IdxV, dl, ResTy: ty(Op), DAG); |
1776 | |
1777 | return extractHvxElementReg(VecV, IdxV, dl, ResTy: ty(Op), DAG); |
1778 | } |
1779 | |
1780 | SDValue |
1781 | HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG) |
1782 | const { |
1783 | const SDLoc &dl(Op); |
1784 | MVT VecTy = ty(Op); |
1785 | SDValue VecV = Op.getOperand(i: 0); |
1786 | SDValue ValV = Op.getOperand(i: 1); |
1787 | SDValue IdxV = Op.getOperand(i: 2); |
1788 | MVT ElemTy = ty(Op: VecV).getVectorElementType(); |
1789 | if (ElemTy == MVT::i1) |
1790 | return insertHvxElementPred(VecV, IdxV, ValV, dl, DAG); |
1791 | |
1792 | if (ElemTy == MVT::f16) { |
1793 | SDValue T0 = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: dl, |
1794 | VT: tyVector(Ty: VecTy, ElemTy: MVT::i16), |
1795 | N1: DAG.getBitcast(VT: tyVector(Ty: VecTy, ElemTy: MVT::i16), V: VecV), |
1796 | N2: DAG.getBitcast(VT: MVT::i16, V: ValV), N3: IdxV); |
1797 | return DAG.getBitcast(VT: tyVector(Ty: VecTy, ElemTy: MVT::f16), V: T0); |
1798 | } |
1799 | |
1800 | return insertHvxElementReg(VecV, IdxV, ValV, dl, DAG); |
1801 | } |
1802 | |
1803 | SDValue |
1804 | HexagonTargetLowering::(SDValue Op, SelectionDAG &DAG) |
1805 | const { |
1806 | SDValue SrcV = Op.getOperand(i: 0); |
1807 | MVT SrcTy = ty(Op: SrcV); |
1808 | MVT DstTy = ty(Op); |
1809 | SDValue IdxV = Op.getOperand(i: 1); |
1810 | unsigned Idx = IdxV.getNode()->getAsZExtVal(); |
1811 | assert(Idx % DstTy.getVectorNumElements() == 0); |
1812 | (void)Idx; |
1813 | const SDLoc &dl(Op); |
1814 | |
1815 | MVT ElemTy = SrcTy.getVectorElementType(); |
1816 | if (ElemTy == MVT::i1) |
1817 | return extractHvxSubvectorPred(VecV: SrcV, IdxV, dl, ResTy: DstTy, DAG); |
1818 | |
1819 | return extractHvxSubvectorReg(OrigOp: Op, VecV: SrcV, IdxV, dl, ResTy: DstTy, DAG); |
1820 | } |
1821 | |
1822 | SDValue |
1823 | HexagonTargetLowering::LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG) |
1824 | const { |
1825 | // Idx does not need to be a constant. |
1826 | SDValue VecV = Op.getOperand(i: 0); |
1827 | SDValue ValV = Op.getOperand(i: 1); |
1828 | SDValue IdxV = Op.getOperand(i: 2); |
1829 | |
1830 | const SDLoc &dl(Op); |
1831 | MVT VecTy = ty(Op: VecV); |
1832 | MVT ElemTy = VecTy.getVectorElementType(); |
1833 | if (ElemTy == MVT::i1) |
1834 | return insertHvxSubvectorPred(VecV, SubV: ValV, IdxV, dl, DAG); |
1835 | |
1836 | return insertHvxSubvectorReg(VecV, SubV: ValV, IdxV, dl, DAG); |
1837 | } |
1838 | |
1839 | SDValue |
1840 | HexagonTargetLowering::LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const { |
1841 | // Lower any-extends of boolean vectors to sign-extends, since they |
1842 | // translate directly to Q2V. Zero-extending could also be done equally |
1843 | // fast, but Q2V is used/recognized in more places. |
1844 | // For all other vectors, use zero-extend. |
1845 | MVT ResTy = ty(Op); |
1846 | SDValue InpV = Op.getOperand(i: 0); |
1847 | MVT ElemTy = ty(Op: InpV).getVectorElementType(); |
1848 | if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(VecTy: ResTy)) |
1849 | return LowerHvxSignExt(Op, DAG); |
1850 | return DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: SDLoc(Op), VT: ResTy, Operand: InpV); |
1851 | } |
1852 | |
1853 | SDValue |
1854 | HexagonTargetLowering::LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const { |
1855 | MVT ResTy = ty(Op); |
1856 | SDValue InpV = Op.getOperand(i: 0); |
1857 | MVT ElemTy = ty(Op: InpV).getVectorElementType(); |
1858 | if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(VecTy: ResTy)) |
1859 | return extendHvxVectorPred(VecV: InpV, dl: SDLoc(Op), ResTy: ty(Op), ZeroExt: false, DAG); |
1860 | return Op; |
1861 | } |
1862 | |
1863 | SDValue |
1864 | HexagonTargetLowering::LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const { |
1865 | MVT ResTy = ty(Op); |
1866 | SDValue InpV = Op.getOperand(i: 0); |
1867 | MVT ElemTy = ty(Op: InpV).getVectorElementType(); |
1868 | if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(VecTy: ResTy)) |
1869 | return extendHvxVectorPred(VecV: InpV, dl: SDLoc(Op), ResTy: ty(Op), ZeroExt: true, DAG); |
1870 | return Op; |
1871 | } |
1872 | |
1873 | SDValue |
1874 | HexagonTargetLowering::LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const { |
1875 | // Lower vector CTTZ into a computation using CTLZ (Hacker's Delight): |
1876 | // cttz(x) = bitwidth(x) - ctlz(~x & (x-1)) |
1877 | const SDLoc &dl(Op); |
1878 | MVT ResTy = ty(Op); |
1879 | SDValue InpV = Op.getOperand(i: 0); |
1880 | assert(ResTy == ty(InpV)); |
1881 | |
1882 | // Calculate the vectors of 1 and bitwidth(x). |
1883 | MVT ElemTy = ty(Op: InpV).getVectorElementType(); |
1884 | unsigned ElemWidth = ElemTy.getSizeInBits(); |
1885 | |
1886 | SDValue Vec1 = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: ResTy, |
1887 | Operand: DAG.getConstant(Val: 1, DL: dl, VT: MVT::i32)); |
1888 | SDValue VecW = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: ResTy, |
1889 | Operand: DAG.getConstant(Val: ElemWidth, DL: dl, VT: MVT::i32)); |
1890 | SDValue VecN1 = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: ResTy, |
1891 | Operand: DAG.getAllOnesConstant(DL: dl, VT: MVT::i32)); |
1892 | |
1893 | // Do not use DAG.getNOT, because that would create BUILD_VECTOR with |
1894 | // a BITCAST. Here we can skip the BITCAST (so we don't have to handle |
1895 | // it separately in custom combine or selection). |
1896 | SDValue A = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ResTy, |
1897 | Ops: {DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: ResTy, Ops: {InpV, VecN1}), |
1898 | DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: ResTy, Ops: {InpV, Vec1})}); |
1899 | return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: ResTy, |
1900 | Ops: {VecW, DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT: ResTy, Operand: A)}); |
1901 | } |
1902 | |
1903 | SDValue |
1904 | HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const { |
1905 | const SDLoc &dl(Op); |
1906 | MVT ResTy = ty(Op); |
1907 | assert(ResTy.getVectorElementType() == MVT::i32); |
1908 | |
1909 | SDValue Vs = Op.getOperand(i: 0); |
1910 | SDValue Vt = Op.getOperand(i: 1); |
1911 | |
1912 | SDVTList ResTys = DAG.getVTList(VT1: ResTy, VT2: ResTy); |
1913 | unsigned Opc = Op.getOpcode(); |
1914 | |
1915 | // On HVX v62+ producing the full product is cheap, so legalize MULH to LOHI. |
1916 | if (Opc == ISD::MULHU) |
1917 | return DAG.getNode(Opcode: HexagonISD::UMUL_LOHI, DL: dl, VTList: ResTys, Ops: {Vs, Vt}).getValue(R: 1); |
1918 | if (Opc == ISD::MULHS) |
1919 | return DAG.getNode(Opcode: HexagonISD::SMUL_LOHI, DL: dl, VTList: ResTys, Ops: {Vs, Vt}).getValue(R: 1); |
1920 | |
1921 | #ifndef NDEBUG |
1922 | Op.dump(&DAG); |
1923 | #endif |
1924 | llvm_unreachable("Unexpected mulh operation" ); |
1925 | } |
1926 | |
1927 | SDValue |
1928 | HexagonTargetLowering::LowerHvxMulLoHi(SDValue Op, SelectionDAG &DAG) const { |
1929 | const SDLoc &dl(Op); |
1930 | unsigned Opc = Op.getOpcode(); |
1931 | SDValue Vu = Op.getOperand(i: 0); |
1932 | SDValue Vv = Op.getOperand(i: 1); |
1933 | |
1934 | // If the HI part is not used, convert it to a regular MUL. |
1935 | if (auto HiVal = Op.getValue(R: 1); HiVal.use_empty()) { |
1936 | // Need to preserve the types and the number of values. |
1937 | SDValue Hi = DAG.getUNDEF(VT: ty(Op: HiVal)); |
1938 | SDValue Lo = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: ty(Op), Ops: {Vu, Vv}); |
1939 | return DAG.getMergeValues(Ops: {Lo, Hi}, dl); |
1940 | } |
1941 | |
1942 | bool SignedVu = Opc == HexagonISD::SMUL_LOHI; |
1943 | bool SignedVv = Opc == HexagonISD::SMUL_LOHI || Opc == HexagonISD::USMUL_LOHI; |
1944 | |
1945 | // Legal on HVX v62+, but lower it here because patterns can't handle multi- |
1946 | // valued nodes. |
1947 | if (Subtarget.useHVXV62Ops()) |
1948 | return emitHvxMulLoHiV62(A: Vu, SignedA: SignedVu, B: Vv, SignedB: SignedVv, dl, DAG); |
1949 | |
1950 | if (Opc == HexagonISD::SMUL_LOHI) { |
1951 | // Direct MULHS expansion is cheaper than doing the whole SMUL_LOHI, |
1952 | // for other signedness LOHI is cheaper. |
1953 | if (auto LoVal = Op.getValue(R: 0); LoVal.use_empty()) { |
1954 | SDValue Hi = emitHvxMulHsV60(A: Vu, B: Vv, dl, DAG); |
1955 | SDValue Lo = DAG.getUNDEF(VT: ty(Op: LoVal)); |
1956 | return DAG.getMergeValues(Ops: {Lo, Hi}, dl); |
1957 | } |
1958 | } |
1959 | |
1960 | return emitHvxMulLoHiV60(A: Vu, SignedA: SignedVu, B: Vv, SignedB: SignedVv, dl, DAG); |
1961 | } |
1962 | |
1963 | SDValue |
1964 | HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const { |
1965 | SDValue Val = Op.getOperand(i: 0); |
1966 | MVT ResTy = ty(Op); |
1967 | MVT ValTy = ty(Op: Val); |
1968 | const SDLoc &dl(Op); |
1969 | |
1970 | if (isHvxBoolTy(Ty: ValTy) && ResTy.isScalarInteger()) { |
1971 | unsigned HwLen = Subtarget.getVectorLength(); |
1972 | MVT WordTy = MVT::getVectorVT(VT: MVT::i32, NumElements: HwLen/4); |
1973 | SDValue VQ = compressHvxPred(VecQ: Val, dl, ResTy: WordTy, DAG); |
1974 | unsigned BitWidth = ResTy.getSizeInBits(); |
1975 | |
1976 | if (BitWidth < 64) { |
1977 | SDValue W0 = extractHvxElementReg(VecV: VQ, IdxV: DAG.getConstant(Val: 0, DL: dl, VT: MVT::i32), |
1978 | dl, ResTy: MVT::i32, DAG); |
1979 | if (BitWidth == 32) |
1980 | return W0; |
1981 | assert(BitWidth < 32u); |
1982 | return DAG.getZExtOrTrunc(Op: W0, DL: dl, VT: ResTy); |
1983 | } |
1984 | |
1985 | // The result is >= 64 bits. The only options are 64 or 128. |
1986 | assert(BitWidth == 64 || BitWidth == 128); |
1987 | SmallVector<SDValue,4> Words; |
1988 | for (unsigned i = 0; i != BitWidth/32; ++i) { |
1989 | SDValue W = extractHvxElementReg( |
1990 | VecV: VQ, IdxV: DAG.getConstant(Val: i, DL: dl, VT: MVT::i32), dl, ResTy: MVT::i32, DAG); |
1991 | Words.push_back(Elt: W); |
1992 | } |
1993 | SmallVector<SDValue,2> Combines; |
1994 | assert(Words.size() % 2 == 0); |
1995 | for (unsigned i = 0, e = Words.size(); i < e; i += 2) { |
1996 | SDValue C = getCombine(Hi: Words[i+1], Lo: Words[i], dl, ResTy: MVT::i64, DAG); |
1997 | Combines.push_back(Elt: C); |
1998 | } |
1999 | |
2000 | if (BitWidth == 64) |
2001 | return Combines[0]; |
2002 | |
2003 | return DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT: ResTy, Ops: Combines); |
2004 | } |
2005 | if (isHvxBoolTy(Ty: ResTy) && ValTy.isScalarInteger()) { |
2006 | // Handle bitcast from i128 -> v128i1 and i64 -> v64i1. |
2007 | unsigned BitWidth = ValTy.getSizeInBits(); |
2008 | unsigned HwLen = Subtarget.getVectorLength(); |
2009 | assert(BitWidth == HwLen); |
2010 | |
2011 | MVT ValAsVecTy = MVT::getVectorVT(VT: MVT::i8, NumElements: BitWidth / 8); |
2012 | SDValue ValAsVec = DAG.getBitcast(VT: ValAsVecTy, V: Val); |
2013 | // Splat each byte of Val 8 times. |
2014 | // Bytes = [(b0)x8, (b1)x8, ...., (b15)x8] |
2015 | // where b0, b1,..., b15 are least to most significant bytes of I. |
2016 | SmallVector<SDValue, 128> Bytes; |
2017 | // Tmp: 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80, 0x01,0x02,0x04,0x08,... |
2018 | // These are bytes with the LSB rotated left with respect to their index. |
2019 | SmallVector<SDValue, 128> Tmp; |
2020 | for (unsigned I = 0; I != HwLen / 8; ++I) { |
2021 | SDValue Idx = DAG.getConstant(Val: I, DL: dl, VT: MVT::i32); |
2022 | SDValue Byte = |
2023 | DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, VT: MVT::i8, N1: ValAsVec, N2: Idx); |
2024 | for (unsigned J = 0; J != 8; ++J) { |
2025 | Bytes.push_back(Elt: Byte); |
2026 | Tmp.push_back(Elt: DAG.getConstant(Val: 1ull << J, DL: dl, VT: MVT::i8)); |
2027 | } |
2028 | } |
2029 | |
2030 | MVT ConstantVecTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen); |
2031 | SDValue ConstantVec = DAG.getBuildVector(VT: ConstantVecTy, DL: dl, Ops: Tmp); |
2032 | SDValue I2V = buildHvxVectorReg(Values: Bytes, dl, VecTy: ConstantVecTy, DAG); |
2033 | |
2034 | // Each Byte in the I2V will be set iff corresponding bit is set in Val. |
2035 | I2V = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ConstantVecTy, Ops: {I2V, ConstantVec}); |
2036 | return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: ResTy, Operand: I2V); |
2037 | } |
2038 | |
2039 | return Op; |
2040 | } |
2041 | |
2042 | SDValue |
2043 | HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const { |
2044 | // Sign- and zero-extends are legal. |
2045 | assert(Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG); |
2046 | return DAG.getNode(Opcode: ISD::ZERO_EXTEND_VECTOR_INREG, DL: SDLoc(Op), VT: ty(Op), |
2047 | Operand: Op.getOperand(i: 0)); |
2048 | } |
2049 | |
2050 | SDValue |
2051 | HexagonTargetLowering::LowerHvxSelect(SDValue Op, SelectionDAG &DAG) const { |
2052 | MVT ResTy = ty(Op); |
2053 | if (ResTy.getVectorElementType() != MVT::i1) |
2054 | return Op; |
2055 | |
2056 | const SDLoc &dl(Op); |
2057 | unsigned HwLen = Subtarget.getVectorLength(); |
2058 | unsigned VecLen = ResTy.getVectorNumElements(); |
2059 | assert(HwLen % VecLen == 0); |
2060 | unsigned ElemSize = HwLen / VecLen; |
2061 | |
2062 | MVT VecTy = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: ElemSize * 8), NumElements: VecLen); |
2063 | SDValue S = |
2064 | DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: VecTy, N1: Op.getOperand(i: 0), |
2065 | N2: DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: VecTy, Operand: Op.getOperand(i: 1)), |
2066 | N3: DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: VecTy, Operand: Op.getOperand(i: 2))); |
2067 | return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: ResTy, Operand: S); |
2068 | } |
2069 | |
2070 | SDValue |
2071 | HexagonTargetLowering::LowerHvxShift(SDValue Op, SelectionDAG &DAG) const { |
2072 | if (SDValue S = getVectorShiftByInt(Op, DAG)) |
2073 | return S; |
2074 | return Op; |
2075 | } |
2076 | |
2077 | SDValue |
2078 | HexagonTargetLowering::LowerHvxFunnelShift(SDValue Op, |
2079 | SelectionDAG &DAG) const { |
2080 | unsigned Opc = Op.getOpcode(); |
2081 | assert(Opc == ISD::FSHL || Opc == ISD::FSHR); |
2082 | |
2083 | // Make sure the shift amount is within the range of the bitwidth |
2084 | // of the element type. |
2085 | SDValue A = Op.getOperand(i: 0); |
2086 | SDValue B = Op.getOperand(i: 1); |
2087 | SDValue S = Op.getOperand(i: 2); |
2088 | |
2089 | MVT InpTy = ty(Op: A); |
2090 | MVT ElemTy = InpTy.getVectorElementType(); |
2091 | |
2092 | const SDLoc &dl(Op); |
2093 | unsigned ElemWidth = ElemTy.getSizeInBits(); |
2094 | bool IsLeft = Opc == ISD::FSHL; |
2095 | |
2096 | // The expansion into regular shifts produces worse code for i8 and for |
2097 | // right shift of i32 on v65+. |
2098 | bool UseShifts = ElemTy != MVT::i8; |
2099 | if (Subtarget.useHVXV65Ops() && ElemTy == MVT::i32) |
2100 | UseShifts = false; |
2101 | |
2102 | if (SDValue SplatV = getSplatValue(Op: S, DAG); SplatV && UseShifts) { |
2103 | // If this is a funnel shift by a scalar, lower it into regular shifts. |
2104 | SDValue Mask = DAG.getConstant(Val: ElemWidth - 1, DL: dl, VT: MVT::i32); |
2105 | SDValue ModS = |
2106 | DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32, |
2107 | Ops: {DAG.getZExtOrTrunc(Op: SplatV, DL: dl, VT: MVT::i32), Mask}); |
2108 | SDValue NegS = |
2109 | DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: MVT::i32, |
2110 | Ops: {DAG.getConstant(Val: ElemWidth, DL: dl, VT: MVT::i32), ModS}); |
2111 | SDValue IsZero = |
2112 | DAG.getSetCC(DL: dl, VT: MVT::i1, LHS: ModS, RHS: getZero(dl, Ty: MVT::i32, DAG), Cond: ISD::SETEQ); |
2113 | // FSHL A, B => A << | B >>n |
2114 | // FSHR A, B => A <<n | B >> |
2115 | SDValue Part1 = |
2116 | DAG.getNode(Opcode: HexagonISD::VASL, DL: dl, VT: InpTy, Ops: {A, IsLeft ? ModS : NegS}); |
2117 | SDValue Part2 = |
2118 | DAG.getNode(Opcode: HexagonISD::VLSR, DL: dl, VT: InpTy, Ops: {B, IsLeft ? NegS : ModS}); |
2119 | SDValue Or = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: InpTy, Ops: {Part1, Part2}); |
2120 | // If the shift amount was 0, pick A or B, depending on the direction. |
2121 | // The opposite shift will also be by 0, so the "Or" will be incorrect. |
2122 | return DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: InpTy, Ops: {IsZero, (IsLeft ? A : B), Or}); |
2123 | } |
2124 | |
2125 | SDValue Mask = DAG.getSplatBuildVector( |
2126 | VT: InpTy, DL: dl, Op: DAG.getConstant(Val: ElemWidth - 1, DL: dl, VT: ElemTy)); |
2127 | |
2128 | unsigned MOpc = Opc == ISD::FSHL ? HexagonISD::MFSHL : HexagonISD::MFSHR; |
2129 | return DAG.getNode(Opcode: MOpc, DL: dl, VT: ty(Op), |
2130 | Ops: {A, B, DAG.getNode(Opcode: ISD::AND, DL: dl, VT: InpTy, Ops: {S, Mask})}); |
2131 | } |
2132 | |
2133 | SDValue |
2134 | HexagonTargetLowering::LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const { |
2135 | const SDLoc &dl(Op); |
2136 | unsigned IntNo = Op.getConstantOperandVal(i: 0); |
2137 | SmallVector<SDValue> Ops(Op->ops()); |
2138 | |
2139 | auto Swap = [&](SDValue P) { |
2140 | return DAG.getMergeValues(Ops: {P.getValue(R: 1), P.getValue(R: 0)}, dl); |
2141 | }; |
2142 | |
2143 | switch (IntNo) { |
2144 | case Intrinsic::hexagon_V6_pred_typecast: |
2145 | case Intrinsic::hexagon_V6_pred_typecast_128B: { |
2146 | MVT ResTy = ty(Op), InpTy = ty(Op: Ops[1]); |
2147 | if (isHvxBoolTy(Ty: ResTy) && isHvxBoolTy(Ty: InpTy)) { |
2148 | if (ResTy == InpTy) |
2149 | return Ops[1]; |
2150 | return DAG.getNode(Opcode: HexagonISD::TYPECAST, DL: dl, VT: ResTy, Operand: Ops[1]); |
2151 | } |
2152 | break; |
2153 | } |
2154 | case Intrinsic::hexagon_V6_vmpyss_parts: |
2155 | case Intrinsic::hexagon_V6_vmpyss_parts_128B: |
2156 | return Swap(DAG.getNode(Opcode: HexagonISD::SMUL_LOHI, DL: dl, VTList: Op->getVTList(), |
2157 | Ops: {Ops[1], Ops[2]})); |
2158 | case Intrinsic::hexagon_V6_vmpyuu_parts: |
2159 | case Intrinsic::hexagon_V6_vmpyuu_parts_128B: |
2160 | return Swap(DAG.getNode(Opcode: HexagonISD::UMUL_LOHI, DL: dl, VTList: Op->getVTList(), |
2161 | Ops: {Ops[1], Ops[2]})); |
2162 | case Intrinsic::hexagon_V6_vmpyus_parts: |
2163 | case Intrinsic::hexagon_V6_vmpyus_parts_128B: { |
2164 | return Swap(DAG.getNode(Opcode: HexagonISD::USMUL_LOHI, DL: dl, VTList: Op->getVTList(), |
2165 | Ops: {Ops[1], Ops[2]})); |
2166 | } |
2167 | } // switch |
2168 | |
2169 | return Op; |
2170 | } |
2171 | |
2172 | SDValue |
2173 | HexagonTargetLowering::LowerHvxMaskedOp(SDValue Op, SelectionDAG &DAG) const { |
2174 | const SDLoc &dl(Op); |
2175 | unsigned HwLen = Subtarget.getVectorLength(); |
2176 | MachineFunction &MF = DAG.getMachineFunction(); |
2177 | auto *MaskN = cast<MaskedLoadStoreSDNode>(Val: Op.getNode()); |
2178 | SDValue Mask = MaskN->getMask(); |
2179 | SDValue Chain = MaskN->getChain(); |
2180 | SDValue Base = MaskN->getBasePtr(); |
2181 | auto *MemOp = MF.getMachineMemOperand(MMO: MaskN->getMemOperand(), Offset: 0, Size: HwLen); |
2182 | |
2183 | unsigned Opc = Op->getOpcode(); |
2184 | assert(Opc == ISD::MLOAD || Opc == ISD::MSTORE); |
2185 | |
2186 | if (Opc == ISD::MLOAD) { |
2187 | MVT ValTy = ty(Op); |
2188 | SDValue Load = DAG.getLoad(VT: ValTy, dl, Chain, Ptr: Base, MMO: MemOp); |
2189 | SDValue Thru = cast<MaskedLoadSDNode>(Val: MaskN)->getPassThru(); |
2190 | if (isUndef(Op: Thru)) |
2191 | return Load; |
2192 | SDValue VSel = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ValTy, N1: Mask, N2: Load, N3: Thru); |
2193 | return DAG.getMergeValues(Ops: {VSel, Load.getValue(R: 1)}, dl); |
2194 | } |
2195 | |
2196 | // MSTORE |
2197 | // HVX only has aligned masked stores. |
2198 | |
2199 | // TODO: Fold negations of the mask into the store. |
2200 | unsigned StoreOpc = Hexagon::V6_vS32b_qpred_ai; |
2201 | SDValue Value = cast<MaskedStoreSDNode>(Val: MaskN)->getValue(); |
2202 | SDValue Offset0 = DAG.getTargetConstant(Val: 0, DL: dl, VT: ty(Op: Base)); |
2203 | |
2204 | if (MaskN->getAlign().value() % HwLen == 0) { |
2205 | SDValue Store = getInstr(MachineOpc: StoreOpc, dl, Ty: MVT::Other, |
2206 | Ops: {Mask, Base, Offset0, Value, Chain}, DAG); |
2207 | DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: Store.getNode()), NewMemRefs: {MemOp}); |
2208 | return Store; |
2209 | } |
2210 | |
2211 | // Unaligned case. |
2212 | auto StoreAlign = [&](SDValue V, SDValue A) { |
2213 | SDValue Z = getZero(dl, Ty: ty(Op: V), DAG); |
2214 | // TODO: use funnel shifts? |
2215 | // vlalign(Vu,Vv,Rt) rotates the pair Vu:Vv left by Rt and takes the |
2216 | // upper half. |
2217 | SDValue LoV = getInstr(MachineOpc: Hexagon::V6_vlalignb, dl, Ty: ty(Op: V), Ops: {V, Z, A}, DAG); |
2218 | SDValue HiV = getInstr(MachineOpc: Hexagon::V6_vlalignb, dl, Ty: ty(Op: V), Ops: {Z, V, A}, DAG); |
2219 | return std::make_pair(x&: LoV, y&: HiV); |
2220 | }; |
2221 | |
2222 | MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen); |
2223 | MVT BoolTy = MVT::getVectorVT(VT: MVT::i1, NumElements: HwLen); |
2224 | SDValue MaskV = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: Mask); |
2225 | VectorPair Tmp = StoreAlign(MaskV, Base); |
2226 | VectorPair MaskU = {DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: BoolTy, Operand: Tmp.first), |
2227 | DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: BoolTy, Operand: Tmp.second)}; |
2228 | VectorPair ValueU = StoreAlign(Value, Base); |
2229 | |
2230 | SDValue Offset1 = DAG.getTargetConstant(Val: HwLen, DL: dl, VT: MVT::i32); |
2231 | SDValue StoreLo = |
2232 | getInstr(MachineOpc: StoreOpc, dl, Ty: MVT::Other, |
2233 | Ops: {MaskU.first, Base, Offset0, ValueU.first, Chain}, DAG); |
2234 | SDValue StoreHi = |
2235 | getInstr(MachineOpc: StoreOpc, dl, Ty: MVT::Other, |
2236 | Ops: {MaskU.second, Base, Offset1, ValueU.second, Chain}, DAG); |
2237 | DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: StoreLo.getNode()), NewMemRefs: {MemOp}); |
2238 | DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: StoreHi.getNode()), NewMemRefs: {MemOp}); |
2239 | return DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: {StoreLo, StoreHi}); |
2240 | } |
2241 | |
2242 | SDValue HexagonTargetLowering::LowerHvxFpExtend(SDValue Op, |
2243 | SelectionDAG &DAG) const { |
2244 | // This conversion only applies to QFloat. IEEE extension from f16 to f32 |
2245 | // is legal (done via a pattern). |
2246 | assert(Subtarget.useHVXQFloatOps()); |
2247 | |
2248 | assert(Op->getOpcode() == ISD::FP_EXTEND); |
2249 | |
2250 | MVT VecTy = ty(Op); |
2251 | MVT ArgTy = ty(Op: Op.getOperand(i: 0)); |
2252 | const SDLoc &dl(Op); |
2253 | assert(VecTy == MVT::v64f32 && ArgTy == MVT::v64f16); |
2254 | |
2255 | SDValue F16Vec = Op.getOperand(i: 0); |
2256 | |
2257 | APFloat FloatVal = APFloat(1.0f); |
2258 | bool Ignored; |
2259 | FloatVal.convert(ToSemantics: APFloat::IEEEhalf(), RM: APFloat::rmNearestTiesToEven, losesInfo: &Ignored); |
2260 | SDValue Fp16Ones = DAG.getConstantFP(Val: FloatVal, DL: dl, VT: ArgTy); |
2261 | SDValue VmpyVec = |
2262 | getInstr(MachineOpc: Hexagon::V6_vmpy_qf32_hf, dl, Ty: VecTy, Ops: {F16Vec, Fp16Ones}, DAG); |
2263 | |
2264 | MVT HalfTy = typeSplit(VecTy).first; |
2265 | VectorPair Pair = opSplit(Vec: VmpyVec, dl, DAG); |
2266 | SDValue LoVec = |
2267 | getInstr(MachineOpc: Hexagon::V6_vconv_sf_qf32, dl, Ty: HalfTy, Ops: {Pair.first}, DAG); |
2268 | SDValue HiVec = |
2269 | getInstr(MachineOpc: Hexagon::V6_vconv_sf_qf32, dl, Ty: HalfTy, Ops: {Pair.second}, DAG); |
2270 | |
2271 | SDValue ShuffVec = |
2272 | getInstr(MachineOpc: Hexagon::V6_vshuffvdd, dl, Ty: VecTy, |
2273 | Ops: {HiVec, LoVec, DAG.getSignedConstant(Val: -4, DL: dl, VT: MVT::i32)}, DAG); |
2274 | |
2275 | return ShuffVec; |
2276 | } |
2277 | |
2278 | SDValue |
2279 | HexagonTargetLowering::LowerHvxFpToInt(SDValue Op, SelectionDAG &DAG) const { |
2280 | // Catch invalid conversion ops (just in case). |
2281 | assert(Op.getOpcode() == ISD::FP_TO_SINT || |
2282 | Op.getOpcode() == ISD::FP_TO_UINT); |
2283 | |
2284 | MVT ResTy = ty(Op); |
2285 | MVT FpTy = ty(Op: Op.getOperand(i: 0)).getVectorElementType(); |
2286 | MVT IntTy = ResTy.getVectorElementType(); |
2287 | |
2288 | if (Subtarget.useHVXIEEEFPOps()) { |
2289 | // There are only conversions from f16. |
2290 | if (FpTy == MVT::f16) { |
2291 | // Other int types aren't legal in HVX, so we shouldn't see them here. |
2292 | assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32); |
2293 | // Conversions to i8 and i16 are legal. |
2294 | if (IntTy == MVT::i8 || IntTy == MVT::i16) |
2295 | return Op; |
2296 | } |
2297 | } |
2298 | |
2299 | if (IntTy.getSizeInBits() != FpTy.getSizeInBits()) |
2300 | return EqualizeFpIntConversion(Op, DAG); |
2301 | |
2302 | return ExpandHvxFpToInt(Op, DAG); |
2303 | } |
2304 | |
2305 | SDValue |
2306 | HexagonTargetLowering::LowerHvxIntToFp(SDValue Op, SelectionDAG &DAG) const { |
2307 | // Catch invalid conversion ops (just in case). |
2308 | assert(Op.getOpcode() == ISD::SINT_TO_FP || |
2309 | Op.getOpcode() == ISD::UINT_TO_FP); |
2310 | |
2311 | MVT ResTy = ty(Op); |
2312 | MVT IntTy = ty(Op: Op.getOperand(i: 0)).getVectorElementType(); |
2313 | MVT FpTy = ResTy.getVectorElementType(); |
2314 | |
2315 | if (Subtarget.useHVXIEEEFPOps()) { |
2316 | // There are only conversions to f16. |
2317 | if (FpTy == MVT::f16) { |
2318 | // Other int types aren't legal in HVX, so we shouldn't see them here. |
2319 | assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32); |
2320 | // i8, i16 -> f16 is legal. |
2321 | if (IntTy == MVT::i8 || IntTy == MVT::i16) |
2322 | return Op; |
2323 | } |
2324 | } |
2325 | |
2326 | if (IntTy.getSizeInBits() != FpTy.getSizeInBits()) |
2327 | return EqualizeFpIntConversion(Op, DAG); |
2328 | |
2329 | return ExpandHvxIntToFp(Op, DAG); |
2330 | } |
2331 | |
2332 | HexagonTargetLowering::TypePair |
2333 | HexagonTargetLowering::typeExtendToWider(MVT Ty0, MVT Ty1) const { |
2334 | // Compare the widths of elements of the two types, and extend the narrower |
2335 | // type to match the with of the wider type. For vector types, apply this |
2336 | // to the element type. |
2337 | assert(Ty0.isVector() == Ty1.isVector()); |
2338 | |
2339 | MVT ElemTy0 = Ty0.getScalarType(); |
2340 | MVT ElemTy1 = Ty1.getScalarType(); |
2341 | |
2342 | unsigned Width0 = ElemTy0.getSizeInBits(); |
2343 | unsigned Width1 = ElemTy1.getSizeInBits(); |
2344 | unsigned MaxWidth = std::max(a: Width0, b: Width1); |
2345 | |
2346 | auto getScalarWithWidth = [](MVT ScalarTy, unsigned Width) { |
2347 | if (ScalarTy.isInteger()) |
2348 | return MVT::getIntegerVT(BitWidth: Width); |
2349 | assert(ScalarTy.isFloatingPoint()); |
2350 | return MVT::getFloatingPointVT(BitWidth: Width); |
2351 | }; |
2352 | |
2353 | MVT WideETy0 = getScalarWithWidth(ElemTy0, MaxWidth); |
2354 | MVT WideETy1 = getScalarWithWidth(ElemTy1, MaxWidth); |
2355 | |
2356 | if (!Ty0.isVector()) { |
2357 | // Both types are scalars. |
2358 | return {WideETy0, WideETy1}; |
2359 | } |
2360 | |
2361 | // Vector types. |
2362 | unsigned NumElem = Ty0.getVectorNumElements(); |
2363 | assert(NumElem == Ty1.getVectorNumElements()); |
2364 | |
2365 | return {MVT::getVectorVT(VT: WideETy0, NumElements: NumElem), |
2366 | MVT::getVectorVT(VT: WideETy1, NumElements: NumElem)}; |
2367 | } |
2368 | |
2369 | HexagonTargetLowering::TypePair |
2370 | HexagonTargetLowering::typeWidenToWider(MVT Ty0, MVT Ty1) const { |
2371 | // Compare the numbers of elements of two vector types, and widen the |
2372 | // narrower one to match the number of elements in the wider one. |
2373 | assert(Ty0.isVector() && Ty1.isVector()); |
2374 | |
2375 | unsigned Len0 = Ty0.getVectorNumElements(); |
2376 | unsigned Len1 = Ty1.getVectorNumElements(); |
2377 | if (Len0 == Len1) |
2378 | return {Ty0, Ty1}; |
2379 | |
2380 | unsigned MaxLen = std::max(a: Len0, b: Len1); |
2381 | return {MVT::getVectorVT(VT: Ty0.getVectorElementType(), NumElements: MaxLen), |
2382 | MVT::getVectorVT(VT: Ty1.getVectorElementType(), NumElements: MaxLen)}; |
2383 | } |
2384 | |
2385 | MVT |
2386 | HexagonTargetLowering::typeLegalize(MVT Ty, SelectionDAG &DAG) const { |
2387 | EVT LegalTy = getTypeToTransformTo(Context&: *DAG.getContext(), VT: Ty); |
2388 | assert(LegalTy.isSimple()); |
2389 | return LegalTy.getSimpleVT(); |
2390 | } |
2391 | |
2392 | MVT |
2393 | HexagonTargetLowering::typeWidenToHvx(MVT Ty) const { |
2394 | unsigned HwWidth = 8 * Subtarget.getVectorLength(); |
2395 | assert(Ty.getSizeInBits() <= HwWidth); |
2396 | if (Ty.getSizeInBits() == HwWidth) |
2397 | return Ty; |
2398 | |
2399 | MVT ElemTy = Ty.getScalarType(); |
2400 | return MVT::getVectorVT(VT: ElemTy, NumElements: HwWidth / ElemTy.getSizeInBits()); |
2401 | } |
2402 | |
2403 | HexagonTargetLowering::VectorPair |
2404 | HexagonTargetLowering::emitHvxAddWithOverflow(SDValue A, SDValue B, |
2405 | const SDLoc &dl, bool Signed, SelectionDAG &DAG) const { |
2406 | // Compute A+B, return {A+B, O}, where O = vector predicate indicating |
2407 | // whether an overflow has occurred. |
2408 | MVT ResTy = ty(Op: A); |
2409 | assert(ResTy == ty(B)); |
2410 | MVT PredTy = MVT::getVectorVT(VT: MVT::i1, NumElements: ResTy.getVectorNumElements()); |
2411 | |
2412 | if (!Signed) { |
2413 | // V62+ has V6_vaddcarry, but it requires input predicate, so it doesn't |
2414 | // save any instructions. |
2415 | SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ResTy, Ops: {A, B}); |
2416 | SDValue Ovf = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Add, RHS: A, Cond: ISD::SETULT); |
2417 | return {Add, Ovf}; |
2418 | } |
2419 | |
2420 | // Signed overflow has happened, if: |
2421 | // (A, B have the same sign) and (A+B has a different sign from either) |
2422 | // i.e. (~A xor B) & ((A+B) xor B), then check the sign bit |
2423 | SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ResTy, Ops: {A, B}); |
2424 | SDValue NotA = |
2425 | DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: ResTy, Ops: {A, DAG.getAllOnesConstant(DL: dl, VT: ResTy)}); |
2426 | SDValue Xor0 = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: ResTy, Ops: {NotA, B}); |
2427 | SDValue Xor1 = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: ResTy, Ops: {Add, B}); |
2428 | SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ResTy, Ops: {Xor0, Xor1}); |
2429 | SDValue MSB = |
2430 | DAG.getSetCC(DL: dl, VT: PredTy, LHS: And, RHS: getZero(dl, Ty: ResTy, DAG), Cond: ISD::SETLT); |
2431 | return {Add, MSB}; |
2432 | } |
2433 | |
2434 | HexagonTargetLowering::VectorPair |
2435 | HexagonTargetLowering::emitHvxShiftRightRnd(SDValue Val, unsigned Amt, |
2436 | bool Signed, SelectionDAG &DAG) const { |
2437 | // Shift Val right by Amt bits, round the result to the nearest integer, |
2438 | // tie-break by rounding halves to even integer. |
2439 | |
2440 | const SDLoc &dl(Val); |
2441 | MVT ValTy = ty(Op: Val); |
2442 | |
2443 | // This should also work for signed integers. |
2444 | // |
2445 | // uint tmp0 = inp + ((1 << (Amt-1)) - 1); |
2446 | // bool ovf = (inp > tmp0); |
2447 | // uint rup = inp & (1 << (Amt+1)); |
2448 | // |
2449 | // uint tmp1 = inp >> (Amt-1); // tmp1 == tmp2 iff |
2450 | // uint tmp2 = tmp0 >> (Amt-1); // the Amt-1 lower bits were all 0 |
2451 | // uint tmp3 = tmp2 + rup; |
2452 | // uint frac = (tmp1 != tmp2) ? tmp2 >> 1 : tmp3 >> 1; |
2453 | unsigned ElemWidth = ValTy.getVectorElementType().getSizeInBits(); |
2454 | MVT ElemTy = MVT::getIntegerVT(BitWidth: ElemWidth); |
2455 | MVT IntTy = tyVector(Ty: ValTy, ElemTy); |
2456 | MVT PredTy = MVT::getVectorVT(VT: MVT::i1, NumElements: IntTy.getVectorNumElements()); |
2457 | unsigned ShRight = Signed ? ISD::SRA : ISD::SRL; |
2458 | |
2459 | SDValue Inp = DAG.getBitcast(VT: IntTy, V: Val); |
2460 | SDValue LowBits = DAG.getConstant(Val: (1ull << (Amt - 1)) - 1, DL: dl, VT: IntTy); |
2461 | |
2462 | SDValue AmtP1 = DAG.getConstant(Val: 1ull << Amt, DL: dl, VT: IntTy); |
2463 | SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntTy, Ops: {Inp, AmtP1}); |
2464 | SDValue Zero = getZero(dl, Ty: IntTy, DAG); |
2465 | SDValue Bit = DAG.getSetCC(DL: dl, VT: PredTy, LHS: And, RHS: Zero, Cond: ISD::SETNE); |
2466 | SDValue Rup = DAG.getZExtOrTrunc(Op: Bit, DL: dl, VT: IntTy); |
2467 | auto [Tmp0, Ovf] = emitHvxAddWithOverflow(A: Inp, B: LowBits, dl, Signed, DAG); |
2468 | |
2469 | SDValue AmtM1 = DAG.getConstant(Val: Amt - 1, DL: dl, VT: IntTy); |
2470 | SDValue Tmp1 = DAG.getNode(Opcode: ShRight, DL: dl, VT: IntTy, N1: Inp, N2: AmtM1); |
2471 | SDValue Tmp2 = DAG.getNode(Opcode: ShRight, DL: dl, VT: IntTy, N1: Tmp0, N2: AmtM1); |
2472 | SDValue Tmp3 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: IntTy, N1: Tmp2, N2: Rup); |
2473 | |
2474 | SDValue Eq = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Tmp1, RHS: Tmp2, Cond: ISD::SETEQ); |
2475 | SDValue One = DAG.getConstant(Val: 1, DL: dl, VT: IntTy); |
2476 | SDValue Tmp4 = DAG.getNode(Opcode: ShRight, DL: dl, VT: IntTy, Ops: {Tmp2, One}); |
2477 | SDValue Tmp5 = DAG.getNode(Opcode: ShRight, DL: dl, VT: IntTy, Ops: {Tmp3, One}); |
2478 | SDValue Mux = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: IntTy, Ops: {Eq, Tmp5, Tmp4}); |
2479 | return {Mux, Ovf}; |
2480 | } |
2481 | |
2482 | SDValue |
2483 | HexagonTargetLowering::emitHvxMulHsV60(SDValue A, SDValue B, const SDLoc &dl, |
2484 | SelectionDAG &DAG) const { |
2485 | MVT VecTy = ty(Op: A); |
2486 | MVT PairTy = typeJoin(Tys: {VecTy, VecTy}); |
2487 | assert(VecTy.getVectorElementType() == MVT::i32); |
2488 | |
2489 | SDValue S16 = DAG.getConstant(Val: 16, DL: dl, VT: MVT::i32); |
2490 | |
2491 | // mulhs(A,B) = |
2492 | // = [(Hi(A)*2^16 + Lo(A)) *s (Hi(B)*2^16 + Lo(B))] >> 32 |
2493 | // = [Hi(A)*2^16 *s Hi(B)*2^16 + Hi(A) *su Lo(B)*2^16 |
2494 | // + Lo(A) *us (Hi(B)*2^16 + Lo(B))] >> 32 |
2495 | // = [Hi(A) *s Hi(B)*2^32 + Hi(A) *su Lo(B)*2^16 + Lo(A) *us B] >> 32 |
2496 | // The low half of Lo(A)*Lo(B) will be discarded (it's not added to |
2497 | // anything, so it cannot produce any carry over to higher bits), |
2498 | // so everything in [] can be shifted by 16 without loss of precision. |
2499 | // = [Hi(A) *s Hi(B)*2^16 + Hi(A)*su Lo(B) + Lo(A)*B >> 16] >> 16 |
2500 | // = [Hi(A) *s Hi(B)*2^16 + Hi(A)*su Lo(B) + V6_vmpyewuh(A,B)] >> 16 |
2501 | // The final additions need to make sure to properly maintain any carry- |
2502 | // out bits. |
2503 | // |
2504 | // Hi(B) Lo(B) |
2505 | // Hi(A) Lo(A) |
2506 | // -------------- |
2507 | // Lo(B)*Lo(A) | T0 = V6_vmpyewuh(B,A) does this, |
2508 | // Hi(B)*Lo(A) | + dropping the low 16 bits |
2509 | // Hi(A)*Lo(B) | T2 |
2510 | // Hi(B)*Hi(A) |
2511 | |
2512 | SDValue T0 = getInstr(MachineOpc: Hexagon::V6_vmpyewuh, dl, Ty: VecTy, Ops: {B, A}, DAG); |
2513 | // T1 = get Hi(A) into low halves. |
2514 | SDValue T1 = getInstr(MachineOpc: Hexagon::V6_vasrw, dl, Ty: VecTy, Ops: {A, S16}, DAG); |
2515 | // P0 = interleaved T1.h*B.uh (full precision product) |
2516 | SDValue P0 = getInstr(MachineOpc: Hexagon::V6_vmpyhus, dl, Ty: PairTy, Ops: {T1, B}, DAG); |
2517 | // T2 = T1.even(h) * B.even(uh), i.e. Hi(A)*Lo(B) |
2518 | SDValue T2 = LoHalf(V: P0, DAG); |
2519 | // We need to add T0+T2, recording the carry-out, which will be 1<<16 |
2520 | // added to the final sum. |
2521 | // P1 = interleaved even/odd 32-bit (unsigned) sums of 16-bit halves |
2522 | SDValue P1 = getInstr(MachineOpc: Hexagon::V6_vadduhw, dl, Ty: PairTy, Ops: {T0, T2}, DAG); |
2523 | // P2 = interleaved even/odd 32-bit (signed) sums of 16-bit halves |
2524 | SDValue P2 = getInstr(MachineOpc: Hexagon::V6_vaddhw, dl, Ty: PairTy, Ops: {T0, T2}, DAG); |
2525 | // T3 = full-precision(T0+T2) >> 16 |
2526 | // The low halves are added-unsigned, the high ones are added-signed. |
2527 | SDValue T3 = getInstr(MachineOpc: Hexagon::V6_vasrw_acc, dl, Ty: VecTy, |
2528 | Ops: {HiHalf(V: P2, DAG), LoHalf(V: P1, DAG), S16}, DAG); |
2529 | SDValue T4 = getInstr(MachineOpc: Hexagon::V6_vasrw, dl, Ty: VecTy, Ops: {B, S16}, DAG); |
2530 | // P3 = interleaved Hi(B)*Hi(A) (full precision), |
2531 | // which is now Lo(T1)*Lo(T4), so we want to keep the even product. |
2532 | SDValue P3 = getInstr(MachineOpc: Hexagon::V6_vmpyhv, dl, Ty: PairTy, Ops: {T1, T4}, DAG); |
2533 | SDValue T5 = LoHalf(V: P3, DAG); |
2534 | // Add: |
2535 | SDValue T6 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: VecTy, Ops: {T3, T5}); |
2536 | return T6; |
2537 | } |
2538 | |
2539 | SDValue |
2540 | HexagonTargetLowering::emitHvxMulLoHiV60(SDValue A, bool SignedA, SDValue B, |
2541 | bool SignedB, const SDLoc &dl, |
2542 | SelectionDAG &DAG) const { |
2543 | MVT VecTy = ty(Op: A); |
2544 | MVT PairTy = typeJoin(Tys: {VecTy, VecTy}); |
2545 | assert(VecTy.getVectorElementType() == MVT::i32); |
2546 | |
2547 | SDValue S16 = DAG.getConstant(Val: 16, DL: dl, VT: MVT::i32); |
2548 | |
2549 | if (SignedA && !SignedB) { |
2550 | // Make A:unsigned, B:signed. |
2551 | std::swap(a&: A, b&: B); |
2552 | std::swap(a&: SignedA, b&: SignedB); |
2553 | } |
2554 | |
2555 | // Do halfword-wise multiplications for unsigned*unsigned product, then |
2556 | // add corrections for signed and unsigned*signed. |
2557 | |
2558 | SDValue Lo, Hi; |
2559 | |
2560 | // P0:lo = (uu) products of low halves of A and B, |
2561 | // P0:hi = (uu) products of high halves. |
2562 | SDValue P0 = getInstr(MachineOpc: Hexagon::V6_vmpyuhv, dl, Ty: PairTy, Ops: {A, B}, DAG); |
2563 | |
2564 | // Swap low/high halves in B |
2565 | SDValue T0 = getInstr(MachineOpc: Hexagon::V6_lvsplatw, dl, Ty: VecTy, |
2566 | Ops: {DAG.getConstant(Val: 0x02020202, DL: dl, VT: MVT::i32)}, DAG); |
2567 | SDValue T1 = getInstr(MachineOpc: Hexagon::V6_vdelta, dl, Ty: VecTy, Ops: {B, T0}, DAG); |
2568 | // P1 = products of even/odd halfwords. |
2569 | // P1:lo = (uu) products of even(A.uh) * odd(B.uh) |
2570 | // P1:hi = (uu) products of odd(A.uh) * even(B.uh) |
2571 | SDValue P1 = getInstr(MachineOpc: Hexagon::V6_vmpyuhv, dl, Ty: PairTy, Ops: {A, T1}, DAG); |
2572 | |
2573 | // P2:lo = low halves of P1:lo + P1:hi, |
2574 | // P2:hi = high halves of P1:lo + P1:hi. |
2575 | SDValue P2 = getInstr(MachineOpc: Hexagon::V6_vadduhw, dl, Ty: PairTy, |
2576 | Ops: {HiHalf(V: P1, DAG), LoHalf(V: P1, DAG)}, DAG); |
2577 | // Still need to add the high halves of P0:lo to P2:lo |
2578 | SDValue T2 = |
2579 | getInstr(MachineOpc: Hexagon::V6_vlsrw, dl, Ty: VecTy, Ops: {LoHalf(V: P0, DAG), S16}, DAG); |
2580 | SDValue T3 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: VecTy, Ops: {LoHalf(V: P2, DAG), T2}); |
2581 | |
2582 | // The high halves of T3 will contribute to the HI part of LOHI. |
2583 | SDValue T4 = getInstr(MachineOpc: Hexagon::V6_vasrw_acc, dl, Ty: VecTy, |
2584 | Ops: {HiHalf(V: P2, DAG), T3, S16}, DAG); |
2585 | |
2586 | // The low halves of P2 need to be added to high halves of the LO part. |
2587 | Lo = getInstr(MachineOpc: Hexagon::V6_vaslw_acc, dl, Ty: VecTy, |
2588 | Ops: {LoHalf(V: P0, DAG), LoHalf(V: P2, DAG), S16}, DAG); |
2589 | Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: VecTy, Ops: {HiHalf(V: P0, DAG), T4}); |
2590 | |
2591 | if (SignedA) { |
2592 | assert(SignedB && "Signed A and unsigned B should have been inverted" ); |
2593 | |
2594 | MVT PredTy = MVT::getVectorVT(VT: MVT::i1, NumElements: VecTy.getVectorNumElements()); |
2595 | SDValue Zero = getZero(dl, Ty: VecTy, DAG); |
2596 | SDValue Q0 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: A, RHS: Zero, Cond: ISD::SETLT); |
2597 | SDValue Q1 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: B, RHS: Zero, Cond: ISD::SETLT); |
2598 | SDValue X0 = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: VecTy, Ops: {Q0, B, Zero}); |
2599 | SDValue X1 = getInstr(MachineOpc: Hexagon::V6_vaddwq, dl, Ty: VecTy, Ops: {Q1, X0, A}, DAG); |
2600 | Hi = getInstr(MachineOpc: Hexagon::V6_vsubw, dl, Ty: VecTy, Ops: {Hi, X1}, DAG); |
2601 | } else if (SignedB) { |
2602 | // Same correction as for mulhus: |
2603 | // mulhus(A.uw,B.w) = mulhu(A.uw,B.uw) - (A.w if B < 0) |
2604 | MVT PredTy = MVT::getVectorVT(VT: MVT::i1, NumElements: VecTy.getVectorNumElements()); |
2605 | SDValue Zero = getZero(dl, Ty: VecTy, DAG); |
2606 | SDValue Q1 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: B, RHS: Zero, Cond: ISD::SETLT); |
2607 | Hi = getInstr(MachineOpc: Hexagon::V6_vsubwq, dl, Ty: VecTy, Ops: {Q1, Hi, A}, DAG); |
2608 | } else { |
2609 | assert(!SignedA && !SignedB); |
2610 | } |
2611 | |
2612 | return DAG.getMergeValues(Ops: {Lo, Hi}, dl); |
2613 | } |
2614 | |
2615 | SDValue |
2616 | HexagonTargetLowering::emitHvxMulLoHiV62(SDValue A, bool SignedA, |
2617 | SDValue B, bool SignedB, |
2618 | const SDLoc &dl, |
2619 | SelectionDAG &DAG) const { |
2620 | MVT VecTy = ty(Op: A); |
2621 | MVT PairTy = typeJoin(Tys: {VecTy, VecTy}); |
2622 | assert(VecTy.getVectorElementType() == MVT::i32); |
2623 | |
2624 | if (SignedA && !SignedB) { |
2625 | // Make A:unsigned, B:signed. |
2626 | std::swap(a&: A, b&: B); |
2627 | std::swap(a&: SignedA, b&: SignedB); |
2628 | } |
2629 | |
2630 | // Do S*S first, then make corrections for U*S or U*U if needed. |
2631 | SDValue P0 = getInstr(MachineOpc: Hexagon::V6_vmpyewuh_64, dl, Ty: PairTy, Ops: {A, B}, DAG); |
2632 | SDValue P1 = |
2633 | getInstr(MachineOpc: Hexagon::V6_vmpyowh_64_acc, dl, Ty: PairTy, Ops: {P0, A, B}, DAG); |
2634 | SDValue Lo = LoHalf(V: P1, DAG); |
2635 | SDValue Hi = HiHalf(V: P1, DAG); |
2636 | |
2637 | if (!SignedB) { |
2638 | assert(!SignedA && "Signed A and unsigned B should have been inverted" ); |
2639 | SDValue Zero = getZero(dl, Ty: VecTy, DAG); |
2640 | MVT PredTy = MVT::getVectorVT(VT: MVT::i1, NumElements: VecTy.getVectorNumElements()); |
2641 | |
2642 | // Mulhu(X, Y) = Mulhs(X, Y) + (X, if Y < 0) + (Y, if X < 0). |
2643 | // def: Pat<(VecI32 (mulhu HVI32:$A, HVI32:$B)), |
2644 | // (V6_vaddw (HiHalf (Muls64O $A, $B)), |
2645 | // (V6_vaddwq (V6_vgtw (V6_vd0), $B), |
2646 | // (V6_vandvqv (V6_vgtw (V6_vd0), $A), $B), |
2647 | // $A))>; |
2648 | SDValue Q0 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: A, RHS: Zero, Cond: ISD::SETLT); |
2649 | SDValue Q1 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: B, RHS: Zero, Cond: ISD::SETLT); |
2650 | SDValue T0 = getInstr(MachineOpc: Hexagon::V6_vandvqv, dl, Ty: VecTy, Ops: {Q0, B}, DAG); |
2651 | SDValue T1 = getInstr(MachineOpc: Hexagon::V6_vaddwq, dl, Ty: VecTy, Ops: {Q1, T0, A}, DAG); |
2652 | Hi = getInstr(MachineOpc: Hexagon::V6_vaddw, dl, Ty: VecTy, Ops: {Hi, T1}, DAG); |
2653 | } else if (!SignedA) { |
2654 | SDValue Zero = getZero(dl, Ty: VecTy, DAG); |
2655 | MVT PredTy = MVT::getVectorVT(VT: MVT::i1, NumElements: VecTy.getVectorNumElements()); |
2656 | |
2657 | // Mulhus(unsigned X, signed Y) = Mulhs(X, Y) + (Y, if X < 0). |
2658 | // def: Pat<(VecI32 (HexagonMULHUS HVI32:$A, HVI32:$B)), |
2659 | // (V6_vaddwq (V6_vgtw (V6_vd0), $A), |
2660 | // (HiHalf (Muls64O $A, $B)), |
2661 | // $B)>; |
2662 | SDValue Q0 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: A, RHS: Zero, Cond: ISD::SETLT); |
2663 | Hi = getInstr(MachineOpc: Hexagon::V6_vaddwq, dl, Ty: VecTy, Ops: {Q0, Hi, B}, DAG); |
2664 | } |
2665 | |
2666 | return DAG.getMergeValues(Ops: {Lo, Hi}, dl); |
2667 | } |
2668 | |
2669 | SDValue |
2670 | HexagonTargetLowering::EqualizeFpIntConversion(SDValue Op, SelectionDAG &DAG) |
2671 | const { |
2672 | // Rewrite conversion between integer and floating-point in such a way that |
2673 | // the integer type is extended/narrowed to match the bitwidth of the |
2674 | // floating-point type, combined with additional integer-integer extensions |
2675 | // or narrowings to match the original input/result types. |
2676 | // E.g. f32 -> i8 ==> f32 -> i32 -> i8 |
2677 | // |
2678 | // The input/result types are not required to be legal, but if they are |
2679 | // legal, this function should not introduce illegal types. |
2680 | |
2681 | unsigned Opc = Op.getOpcode(); |
2682 | assert(Opc == ISD::FP_TO_SINT || Opc == ISD::FP_TO_UINT || |
2683 | Opc == ISD::SINT_TO_FP || Opc == ISD::UINT_TO_FP); |
2684 | |
2685 | SDValue Inp = Op.getOperand(i: 0); |
2686 | MVT InpTy = ty(Op: Inp); |
2687 | MVT ResTy = ty(Op); |
2688 | |
2689 | if (InpTy == ResTy) |
2690 | return Op; |
2691 | |
2692 | const SDLoc &dl(Op); |
2693 | bool Signed = Opc == ISD::FP_TO_SINT || Opc == ISD::SINT_TO_FP; |
2694 | |
2695 | auto [WInpTy, WResTy] = typeExtendToWider(Ty0: InpTy, Ty1: ResTy); |
2696 | SDValue WInp = resizeToWidth(VecV: Inp, ResTy: WInpTy, Signed, dl, DAG); |
2697 | SDValue Conv = DAG.getNode(Opcode: Opc, DL: dl, VT: WResTy, Operand: WInp); |
2698 | SDValue Res = resizeToWidth(VecV: Conv, ResTy, Signed, dl, DAG); |
2699 | return Res; |
2700 | } |
2701 | |
2702 | SDValue |
2703 | HexagonTargetLowering::ExpandHvxFpToInt(SDValue Op, SelectionDAG &DAG) const { |
2704 | unsigned Opc = Op.getOpcode(); |
2705 | assert(Opc == ISD::FP_TO_SINT || Opc == ISD::FP_TO_UINT); |
2706 | |
2707 | const SDLoc &dl(Op); |
2708 | SDValue Op0 = Op.getOperand(i: 0); |
2709 | MVT InpTy = ty(Op: Op0); |
2710 | MVT ResTy = ty(Op); |
2711 | assert(InpTy.changeTypeToInteger() == ResTy); |
2712 | |
2713 | // int32_t conv_f32_to_i32(uint32_t inp) { |
2714 | // // s | exp8 | frac23 |
2715 | // |
2716 | // int neg = (int32_t)inp < 0; |
2717 | // |
2718 | // // "expm1" is the actual exponent minus 1: instead of "bias", subtract |
2719 | // // "bias+1". When the encoded exp is "all-1" (i.e. inf/nan), this will |
2720 | // // produce a large positive "expm1", which will result in max u/int. |
2721 | // // In all IEEE formats, bias is the largest positive number that can be |
2722 | // // represented in bias-width bits (i.e. 011..1). |
2723 | // int32_t expm1 = (inp << 1) - 0x80000000; |
2724 | // expm1 >>= 24; |
2725 | // |
2726 | // // Always insert the "implicit 1". Subnormal numbers will become 0 |
2727 | // // regardless. |
2728 | // uint32_t frac = (inp << 8) | 0x80000000; |
2729 | // |
2730 | // // "frac" is the fraction part represented as Q1.31. If it was |
2731 | // // interpreted as uint32_t, it would be the fraction part multiplied |
2732 | // // by 2^31. |
2733 | // |
2734 | // // Calculate the amount of right shift, since shifting further to the |
2735 | // // left would lose significant bits. Limit it to 32, because we want |
2736 | // // shifts by 32+ to produce 0, whereas V6_vlsrwv treats the shift |
2737 | // // amount as a 6-bit signed value (so 33 is same as -31, i.e. shift |
2738 | // // left by 31). "rsh" can be negative. |
2739 | // int32_t rsh = min(31 - (expm1 + 1), 32); |
2740 | // |
2741 | // frac >>= rsh; // rsh == 32 will produce 0 |
2742 | // |
2743 | // // Everything up to this point is the same for conversion to signed |
2744 | // // unsigned integer. |
2745 | // |
2746 | // if (neg) // Only for signed int |
2747 | // frac = -frac; // |
2748 | // if (rsh <= 0 && neg) // bound = neg ? 0x80000000 : 0x7fffffff |
2749 | // frac = 0x80000000; // frac = rsh <= 0 ? bound : frac |
2750 | // if (rsh <= 0 && !neg) // |
2751 | // frac = 0x7fffffff; // |
2752 | // |
2753 | // if (neg) // Only for unsigned int |
2754 | // frac = 0; // |
2755 | // if (rsh < 0 && !neg) // frac = rsh < 0 ? 0x7fffffff : frac; |
2756 | // frac = 0x7fffffff; // frac = neg ? 0 : frac; |
2757 | // |
2758 | // return frac; |
2759 | // } |
2760 | |
2761 | MVT PredTy = MVT::getVectorVT(VT: MVT::i1, EC: ResTy.getVectorElementCount()); |
2762 | |
2763 | // Zero = V6_vd0(); |
2764 | // Neg = V6_vgtw(Zero, Inp); |
2765 | // One = V6_lvsplatw(1); |
2766 | // M80 = V6_lvsplatw(0x80000000); |
2767 | // Exp00 = V6_vaslwv(Inp, One); |
2768 | // Exp01 = V6_vsubw(Exp00, M80); |
2769 | // ExpM1 = V6_vasrw(Exp01, 24); |
2770 | // Frc00 = V6_vaslw(Inp, 8); |
2771 | // Frc01 = V6_vor(Frc00, M80); |
2772 | // Rsh00 = V6_vsubw(V6_lvsplatw(30), ExpM1); |
2773 | // Rsh01 = V6_vminw(Rsh00, V6_lvsplatw(32)); |
2774 | // Frc02 = V6_vlsrwv(Frc01, Rsh01); |
2775 | |
2776 | // if signed int: |
2777 | // Bnd = V6_vmux(Neg, M80, V6_lvsplatw(0x7fffffff)) |
2778 | // Pos = V6_vgtw(Rsh01, Zero); |
2779 | // Frc13 = V6_vsubw(Zero, Frc02); |
2780 | // Frc14 = V6_vmux(Neg, Frc13, Frc02); |
2781 | // Int = V6_vmux(Pos, Frc14, Bnd); |
2782 | // |
2783 | // if unsigned int: |
2784 | // Rsn = V6_vgtw(Zero, Rsh01) |
2785 | // Frc23 = V6_vmux(Rsn, V6_lvsplatw(0x7fffffff), Frc02) |
2786 | // Int = V6_vmux(Neg, Zero, Frc23) |
2787 | |
2788 | auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(Ty: InpTy); |
2789 | unsigned ElemWidth = 1 + ExpWidth + FracWidth; |
2790 | assert((1ull << (ExpWidth - 1)) == (1 + ExpBias)); |
2791 | |
2792 | SDValue Inp = DAG.getBitcast(VT: ResTy, V: Op0); |
2793 | SDValue Zero = getZero(dl, Ty: ResTy, DAG); |
2794 | SDValue Neg = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Inp, RHS: Zero, Cond: ISD::SETLT); |
2795 | SDValue M80 = DAG.getConstant(Val: 1ull << (ElemWidth - 1), DL: dl, VT: ResTy); |
2796 | SDValue M7F = DAG.getConstant(Val: (1ull << (ElemWidth - 1)) - 1, DL: dl, VT: ResTy); |
2797 | SDValue One = DAG.getConstant(Val: 1, DL: dl, VT: ResTy); |
2798 | SDValue Exp00 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: ResTy, Ops: {Inp, One}); |
2799 | SDValue Exp01 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: ResTy, Ops: {Exp00, M80}); |
2800 | SDValue MNE = DAG.getConstant(Val: ElemWidth - ExpWidth, DL: dl, VT: ResTy); |
2801 | SDValue ExpM1 = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT: ResTy, Ops: {Exp01, MNE}); |
2802 | |
2803 | SDValue ExpW = DAG.getConstant(Val: ExpWidth, DL: dl, VT: ResTy); |
2804 | SDValue Frc00 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: ResTy, Ops: {Inp, ExpW}); |
2805 | SDValue Frc01 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: ResTy, Ops: {Frc00, M80}); |
2806 | |
2807 | SDValue MN2 = DAG.getConstant(Val: ElemWidth - 2, DL: dl, VT: ResTy); |
2808 | SDValue Rsh00 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: ResTy, Ops: {MN2, ExpM1}); |
2809 | SDValue MW = DAG.getConstant(Val: ElemWidth, DL: dl, VT: ResTy); |
2810 | SDValue Rsh01 = DAG.getNode(Opcode: ISD::SMIN, DL: dl, VT: ResTy, Ops: {Rsh00, MW}); |
2811 | SDValue Frc02 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ResTy, Ops: {Frc01, Rsh01}); |
2812 | |
2813 | SDValue Int; |
2814 | |
2815 | if (Opc == ISD::FP_TO_SINT) { |
2816 | SDValue Bnd = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ResTy, Ops: {Neg, M80, M7F}); |
2817 | SDValue Pos = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Rsh01, RHS: Zero, Cond: ISD::SETGT); |
2818 | SDValue Frc13 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: ResTy, Ops: {Zero, Frc02}); |
2819 | SDValue Frc14 = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ResTy, Ops: {Neg, Frc13, Frc02}); |
2820 | Int = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ResTy, Ops: {Pos, Frc14, Bnd}); |
2821 | } else { |
2822 | assert(Opc == ISD::FP_TO_UINT); |
2823 | SDValue Rsn = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Rsh01, RHS: Zero, Cond: ISD::SETLT); |
2824 | SDValue Frc23 = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ResTy, N1: Rsn, N2: M7F, N3: Frc02); |
2825 | Int = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ResTy, N1: Neg, N2: Zero, N3: Frc23); |
2826 | } |
2827 | |
2828 | return Int; |
2829 | } |
2830 | |
2831 | SDValue |
2832 | HexagonTargetLowering::ExpandHvxIntToFp(SDValue Op, SelectionDAG &DAG) const { |
2833 | unsigned Opc = Op.getOpcode(); |
2834 | assert(Opc == ISD::SINT_TO_FP || Opc == ISD::UINT_TO_FP); |
2835 | |
2836 | const SDLoc &dl(Op); |
2837 | SDValue Op0 = Op.getOperand(i: 0); |
2838 | MVT InpTy = ty(Op: Op0); |
2839 | MVT ResTy = ty(Op); |
2840 | assert(ResTy.changeTypeToInteger() == InpTy); |
2841 | |
2842 | // uint32_t vnoc1_rnd(int32_t w) { |
2843 | // int32_t iszero = w == 0; |
2844 | // int32_t isneg = w < 0; |
2845 | // uint32_t u = __builtin_HEXAGON_A2_abs(w); |
2846 | // |
2847 | // uint32_t norm_left = __builtin_HEXAGON_S2_cl0(u) + 1; |
2848 | // uint32_t frac0 = (uint64_t)u << norm_left; |
2849 | // |
2850 | // // Rounding: |
2851 | // uint32_t frac1 = frac0 + ((1 << 8) - 1); |
2852 | // uint32_t renorm = (frac0 > frac1); |
2853 | // uint32_t rup = (int)(frac0 << 22) < 0; |
2854 | // |
2855 | // uint32_t frac2 = frac0 >> 8; |
2856 | // uint32_t frac3 = frac1 >> 8; |
2857 | // uint32_t frac = (frac2 != frac3) ? frac3 >> 1 : (frac3 + rup) >> 1; |
2858 | // |
2859 | // int32_t exp = 32 - norm_left + renorm + 127; |
2860 | // exp <<= 23; |
2861 | // |
2862 | // uint32_t sign = 0x80000000 * isneg; |
2863 | // uint32_t f = sign | exp | frac; |
2864 | // return iszero ? 0 : f; |
2865 | // } |
2866 | |
2867 | MVT PredTy = MVT::getVectorVT(VT: MVT::i1, EC: InpTy.getVectorElementCount()); |
2868 | bool Signed = Opc == ISD::SINT_TO_FP; |
2869 | |
2870 | auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(Ty: ResTy); |
2871 | unsigned ElemWidth = 1 + ExpWidth + FracWidth; |
2872 | |
2873 | SDValue Zero = getZero(dl, Ty: InpTy, DAG); |
2874 | SDValue One = DAG.getConstant(Val: 1, DL: dl, VT: InpTy); |
2875 | SDValue IsZero = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Op0, RHS: Zero, Cond: ISD::SETEQ); |
2876 | SDValue Abs = Signed ? DAG.getNode(Opcode: ISD::ABS, DL: dl, VT: InpTy, Operand: Op0) : Op0; |
2877 | SDValue Clz = DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT: InpTy, Operand: Abs); |
2878 | SDValue NLeft = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: InpTy, Ops: {Clz, One}); |
2879 | SDValue Frac0 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: InpTy, Ops: {Abs, NLeft}); |
2880 | |
2881 | auto [Frac, Ovf] = emitHvxShiftRightRnd(Val: Frac0, Amt: ExpWidth + 1, Signed: false, DAG); |
2882 | if (Signed) { |
2883 | SDValue IsNeg = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Op0, RHS: Zero, Cond: ISD::SETLT); |
2884 | SDValue M80 = DAG.getConstant(Val: 1ull << (ElemWidth - 1), DL: dl, VT: InpTy); |
2885 | SDValue Sign = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: InpTy, Ops: {IsNeg, M80, Zero}); |
2886 | Frac = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: InpTy, Ops: {Sign, Frac}); |
2887 | } |
2888 | |
2889 | SDValue Rnrm = DAG.getZExtOrTrunc(Op: Ovf, DL: dl, VT: InpTy); |
2890 | SDValue Exp0 = DAG.getConstant(Val: ElemWidth + ExpBias, DL: dl, VT: InpTy); |
2891 | SDValue Exp1 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: InpTy, Ops: {Rnrm, Exp0}); |
2892 | SDValue Exp2 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: InpTy, Ops: {Exp1, NLeft}); |
2893 | SDValue Exp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: InpTy, |
2894 | Ops: {Exp2, DAG.getConstant(Val: FracWidth, DL: dl, VT: InpTy)}); |
2895 | SDValue Flt0 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: InpTy, Ops: {Frac, Exp3}); |
2896 | SDValue Flt1 = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: InpTy, Ops: {IsZero, Zero, Flt0}); |
2897 | SDValue Flt = DAG.getBitcast(VT: ResTy, V: Flt1); |
2898 | |
2899 | return Flt; |
2900 | } |
2901 | |
2902 | SDValue |
2903 | HexagonTargetLowering::CreateTLWrapper(SDValue Op, SelectionDAG &DAG) const { |
2904 | unsigned Opc = Op.getOpcode(); |
2905 | unsigned TLOpc; |
2906 | switch (Opc) { |
2907 | case ISD::ANY_EXTEND: |
2908 | case ISD::SIGN_EXTEND: |
2909 | case ISD::ZERO_EXTEND: |
2910 | TLOpc = HexagonISD::TL_EXTEND; |
2911 | break; |
2912 | case ISD::TRUNCATE: |
2913 | TLOpc = HexagonISD::TL_TRUNCATE; |
2914 | break; |
2915 | #ifndef NDEBUG |
2916 | Op.dump(&DAG); |
2917 | #endif |
2918 | llvm_unreachable("Unexpected operator" ); |
2919 | } |
2920 | |
2921 | const SDLoc &dl(Op); |
2922 | return DAG.getNode(Opcode: TLOpc, DL: dl, VT: ty(Op), N1: Op.getOperand(i: 0), |
2923 | N2: DAG.getUNDEF(VT: MVT::i128), // illegal type |
2924 | N3: DAG.getConstant(Val: Opc, DL: dl, VT: MVT::i32)); |
2925 | } |
2926 | |
2927 | SDValue |
2928 | HexagonTargetLowering::RemoveTLWrapper(SDValue Op, SelectionDAG &DAG) const { |
2929 | assert(Op.getOpcode() == HexagonISD::TL_EXTEND || |
2930 | Op.getOpcode() == HexagonISD::TL_TRUNCATE); |
2931 | unsigned Opc = Op.getConstantOperandVal(i: 2); |
2932 | return DAG.getNode(Opcode: Opc, DL: SDLoc(Op), VT: ty(Op), Operand: Op.getOperand(i: 0)); |
2933 | } |
2934 | |
2935 | HexagonTargetLowering::VectorPair |
2936 | HexagonTargetLowering::SplitVectorOp(SDValue Op, SelectionDAG &DAG) const { |
2937 | assert(!Op.isMachineOpcode()); |
2938 | SmallVector<SDValue, 2> OpsL, OpsH; |
2939 | const SDLoc &dl(Op); |
2940 | |
2941 | auto SplitVTNode = [&DAG, this](const VTSDNode *N) { |
2942 | MVT Ty = typeSplit(VecTy: N->getVT().getSimpleVT()).first; |
2943 | SDValue TV = DAG.getValueType(Ty); |
2944 | return std::make_pair(x&: TV, y&: TV); |
2945 | }; |
2946 | |
2947 | for (SDValue A : Op.getNode()->ops()) { |
2948 | auto [Lo, Hi] = |
2949 | ty(Op: A).isVector() ? opSplit(Vec: A, dl, DAG) : std::make_pair(x&: A, y&: A); |
2950 | // Special case for type operand. |
2951 | switch (Op.getOpcode()) { |
2952 | case ISD::SIGN_EXTEND_INREG: |
2953 | case HexagonISD::SSAT: |
2954 | case HexagonISD::USAT: |
2955 | if (const auto *N = dyn_cast<const VTSDNode>(Val: A.getNode())) |
2956 | std::tie(args&: Lo, args&: Hi) = SplitVTNode(N); |
2957 | break; |
2958 | } |
2959 | OpsL.push_back(Elt: Lo); |
2960 | OpsH.push_back(Elt: Hi); |
2961 | } |
2962 | |
2963 | MVT ResTy = ty(Op); |
2964 | MVT HalfTy = typeSplit(VecTy: ResTy).first; |
2965 | SDValue L = DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT: HalfTy, Ops: OpsL); |
2966 | SDValue H = DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT: HalfTy, Ops: OpsH); |
2967 | return {L, H}; |
2968 | } |
2969 | |
2970 | SDValue |
2971 | HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const { |
2972 | auto *MemN = cast<MemSDNode>(Val: Op.getNode()); |
2973 | |
2974 | MVT MemTy = MemN->getMemoryVT().getSimpleVT(); |
2975 | if (!isHvxPairTy(Ty: MemTy)) |
2976 | return Op; |
2977 | |
2978 | const SDLoc &dl(Op); |
2979 | unsigned HwLen = Subtarget.getVectorLength(); |
2980 | MVT SingleTy = typeSplit(VecTy: MemTy).first; |
2981 | SDValue Chain = MemN->getChain(); |
2982 | SDValue Base0 = MemN->getBasePtr(); |
2983 | SDValue Base1 = |
2984 | DAG.getMemBasePlusOffset(Base: Base0, Offset: TypeSize::getFixed(ExactSize: HwLen), DL: dl); |
2985 | unsigned MemOpc = MemN->getOpcode(); |
2986 | |
2987 | MachineMemOperand *MOp0 = nullptr, *MOp1 = nullptr; |
2988 | if (MachineMemOperand *MMO = MemN->getMemOperand()) { |
2989 | MachineFunction &MF = DAG.getMachineFunction(); |
2990 | uint64_t MemSize = (MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE) |
2991 | ? (uint64_t)MemoryLocation::UnknownSize |
2992 | : HwLen; |
2993 | MOp0 = MF.getMachineMemOperand(MMO, Offset: 0, Size: MemSize); |
2994 | MOp1 = MF.getMachineMemOperand(MMO, Offset: HwLen, Size: MemSize); |
2995 | } |
2996 | |
2997 | if (MemOpc == ISD::LOAD) { |
2998 | assert(cast<LoadSDNode>(Op)->isUnindexed()); |
2999 | SDValue Load0 = DAG.getLoad(VT: SingleTy, dl, Chain, Ptr: Base0, MMO: MOp0); |
3000 | SDValue Load1 = DAG.getLoad(VT: SingleTy, dl, Chain, Ptr: Base1, MMO: MOp1); |
3001 | return DAG.getMergeValues( |
3002 | Ops: { DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: MemTy, N1: Load0, N2: Load1), |
3003 | DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, |
3004 | N1: Load0.getValue(R: 1), N2: Load1.getValue(R: 1)) }, dl); |
3005 | } |
3006 | if (MemOpc == ISD::STORE) { |
3007 | assert(cast<StoreSDNode>(Op)->isUnindexed()); |
3008 | VectorPair Vals = opSplit(Vec: cast<StoreSDNode>(Val&: Op)->getValue(), dl, DAG); |
3009 | SDValue Store0 = DAG.getStore(Chain, dl, Val: Vals.first, Ptr: Base0, MMO: MOp0); |
3010 | SDValue Store1 = DAG.getStore(Chain, dl, Val: Vals.second, Ptr: Base1, MMO: MOp1); |
3011 | return DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: Store0, N2: Store1); |
3012 | } |
3013 | |
3014 | assert(MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE); |
3015 | |
3016 | auto MaskN = cast<MaskedLoadStoreSDNode>(Val&: Op); |
3017 | assert(MaskN->isUnindexed()); |
3018 | VectorPair Masks = opSplit(Vec: MaskN->getMask(), dl, DAG); |
3019 | SDValue Offset = DAG.getUNDEF(VT: MVT::i32); |
3020 | |
3021 | if (MemOpc == ISD::MLOAD) { |
3022 | VectorPair Thru = |
3023 | opSplit(Vec: cast<MaskedLoadSDNode>(Val&: Op)->getPassThru(), dl, DAG); |
3024 | SDValue MLoad0 = |
3025 | DAG.getMaskedLoad(VT: SingleTy, dl, Chain, Base: Base0, Offset, Mask: Masks.first, |
3026 | Src0: Thru.first, MemVT: SingleTy, MMO: MOp0, AM: ISD::UNINDEXED, |
3027 | ISD::NON_EXTLOAD, IsExpanding: false); |
3028 | SDValue MLoad1 = |
3029 | DAG.getMaskedLoad(VT: SingleTy, dl, Chain, Base: Base1, Offset, Mask: Masks.second, |
3030 | Src0: Thru.second, MemVT: SingleTy, MMO: MOp1, AM: ISD::UNINDEXED, |
3031 | ISD::NON_EXTLOAD, IsExpanding: false); |
3032 | return DAG.getMergeValues( |
3033 | Ops: { DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: MemTy, N1: MLoad0, N2: MLoad1), |
3034 | DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, |
3035 | N1: MLoad0.getValue(R: 1), N2: MLoad1.getValue(R: 1)) }, dl); |
3036 | } |
3037 | if (MemOpc == ISD::MSTORE) { |
3038 | VectorPair Vals = opSplit(Vec: cast<MaskedStoreSDNode>(Val&: Op)->getValue(), dl, DAG); |
3039 | SDValue MStore0 = DAG.getMaskedStore(Chain, dl, Val: Vals.first, Base: Base0, Offset, |
3040 | Mask: Masks.first, MemVT: SingleTy, MMO: MOp0, |
3041 | AM: ISD::UNINDEXED, IsTruncating: false, IsCompressing: false); |
3042 | SDValue MStore1 = DAG.getMaskedStore(Chain, dl, Val: Vals.second, Base: Base1, Offset, |
3043 | Mask: Masks.second, MemVT: SingleTy, MMO: MOp1, |
3044 | AM: ISD::UNINDEXED, IsTruncating: false, IsCompressing: false); |
3045 | return DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: MStore0, N2: MStore1); |
3046 | } |
3047 | |
3048 | std::string Name = "Unexpected operation: " + Op->getOperationName(G: &DAG); |
3049 | llvm_unreachable(Name.c_str()); |
3050 | } |
3051 | |
3052 | SDValue |
3053 | HexagonTargetLowering::WidenHvxLoad(SDValue Op, SelectionDAG &DAG) const { |
3054 | const SDLoc &dl(Op); |
3055 | auto *LoadN = cast<LoadSDNode>(Val: Op.getNode()); |
3056 | assert(LoadN->isUnindexed() && "Not widening indexed loads yet" ); |
3057 | assert(LoadN->getMemoryVT().getVectorElementType() != MVT::i1 && |
3058 | "Not widening loads of i1 yet" ); |
3059 | |
3060 | SDValue Chain = LoadN->getChain(); |
3061 | SDValue Base = LoadN->getBasePtr(); |
3062 | SDValue Offset = DAG.getUNDEF(VT: MVT::i32); |
3063 | |
3064 | MVT ResTy = ty(Op); |
3065 | unsigned HwLen = Subtarget.getVectorLength(); |
3066 | unsigned ResLen = ResTy.getStoreSize(); |
3067 | assert(ResLen < HwLen && "vsetq(v1) prerequisite" ); |
3068 | |
3069 | MVT BoolTy = MVT::getVectorVT(VT: MVT::i1, NumElements: HwLen); |
3070 | SDValue Mask = getInstr(MachineOpc: Hexagon::V6_pred_scalar2, dl, Ty: BoolTy, |
3071 | Ops: {DAG.getConstant(Val: ResLen, DL: dl, VT: MVT::i32)}, DAG); |
3072 | |
3073 | MVT LoadTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen); |
3074 | MachineFunction &MF = DAG.getMachineFunction(); |
3075 | auto *MemOp = MF.getMachineMemOperand(MMO: LoadN->getMemOperand(), Offset: 0, Size: HwLen); |
3076 | |
3077 | SDValue Load = DAG.getMaskedLoad(VT: LoadTy, dl, Chain, Base, Offset, Mask, |
3078 | Src0: DAG.getUNDEF(VT: LoadTy), MemVT: LoadTy, MMO: MemOp, |
3079 | AM: ISD::UNINDEXED, ISD::NON_EXTLOAD, IsExpanding: false); |
3080 | SDValue Value = opCastElem(Vec: Load, ElemTy: ResTy.getVectorElementType(), DAG); |
3081 | return DAG.getMergeValues(Ops: {Value, Load.getValue(R: 1)}, dl); |
3082 | } |
3083 | |
3084 | SDValue |
3085 | HexagonTargetLowering::WidenHvxStore(SDValue Op, SelectionDAG &DAG) const { |
3086 | const SDLoc &dl(Op); |
3087 | auto *StoreN = cast<StoreSDNode>(Val: Op.getNode()); |
3088 | assert(StoreN->isUnindexed() && "Not widening indexed stores yet" ); |
3089 | assert(StoreN->getMemoryVT().getVectorElementType() != MVT::i1 && |
3090 | "Not widening stores of i1 yet" ); |
3091 | |
3092 | SDValue Chain = StoreN->getChain(); |
3093 | SDValue Base = StoreN->getBasePtr(); |
3094 | SDValue Offset = DAG.getUNDEF(VT: MVT::i32); |
3095 | |
3096 | SDValue Value = opCastElem(Vec: StoreN->getValue(), ElemTy: MVT::i8, DAG); |
3097 | MVT ValueTy = ty(Op: Value); |
3098 | unsigned ValueLen = ValueTy.getVectorNumElements(); |
3099 | unsigned HwLen = Subtarget.getVectorLength(); |
3100 | assert(isPowerOf2_32(ValueLen)); |
3101 | |
3102 | for (unsigned Len = ValueLen; Len < HwLen; ) { |
3103 | Value = opJoin(Ops: {Value, DAG.getUNDEF(VT: ty(Op: Value))}, dl, DAG); |
3104 | Len = ty(Op: Value).getVectorNumElements(); // This is Len *= 2 |
3105 | } |
3106 | assert(ty(Value).getVectorNumElements() == HwLen); // Paranoia |
3107 | |
3108 | assert(ValueLen < HwLen && "vsetq(v1) prerequisite" ); |
3109 | MVT BoolTy = MVT::getVectorVT(VT: MVT::i1, NumElements: HwLen); |
3110 | SDValue Mask = getInstr(MachineOpc: Hexagon::V6_pred_scalar2, dl, Ty: BoolTy, |
3111 | Ops: {DAG.getConstant(Val: ValueLen, DL: dl, VT: MVT::i32)}, DAG); |
3112 | MachineFunction &MF = DAG.getMachineFunction(); |
3113 | auto *MemOp = MF.getMachineMemOperand(MMO: StoreN->getMemOperand(), Offset: 0, Size: HwLen); |
3114 | return DAG.getMaskedStore(Chain, dl, Val: Value, Base, Offset, Mask, MemVT: ty(Op: Value), |
3115 | MMO: MemOp, AM: ISD::UNINDEXED, IsTruncating: false, IsCompressing: false); |
3116 | } |
3117 | |
3118 | SDValue |
3119 | HexagonTargetLowering::WidenHvxSetCC(SDValue Op, SelectionDAG &DAG) const { |
3120 | const SDLoc &dl(Op); |
3121 | SDValue Op0 = Op.getOperand(i: 0), Op1 = Op.getOperand(i: 1); |
3122 | MVT ElemTy = ty(Op: Op0).getVectorElementType(); |
3123 | unsigned HwLen = Subtarget.getVectorLength(); |
3124 | |
3125 | unsigned WideOpLen = (8 * HwLen) / ElemTy.getSizeInBits(); |
3126 | assert(WideOpLen * ElemTy.getSizeInBits() == 8 * HwLen); |
3127 | MVT WideOpTy = MVT::getVectorVT(VT: ElemTy, NumElements: WideOpLen); |
3128 | if (!Subtarget.isHVXVectorType(VecTy: WideOpTy, IncludeBool: true)) |
3129 | return SDValue(); |
3130 | |
3131 | SDValue WideOp0 = appendUndef(Val: Op0, ResTy: WideOpTy, DAG); |
3132 | SDValue WideOp1 = appendUndef(Val: Op1, ResTy: WideOpTy, DAG); |
3133 | EVT ResTy = |
3134 | getSetCCResultType(DAG.getDataLayout(), C&: *DAG.getContext(), VT: WideOpTy); |
3135 | SDValue SetCC = DAG.getNode(Opcode: ISD::SETCC, DL: dl, VT: ResTy, |
3136 | Ops: {WideOp0, WideOp1, Op.getOperand(i: 2)}); |
3137 | |
3138 | EVT RetTy = typeLegalize(Ty: ty(Op), DAG); |
3139 | return DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL: dl, VT: RetTy, |
3140 | Ops: {SetCC, getZero(dl, Ty: MVT::i32, DAG)}); |
3141 | } |
3142 | |
3143 | SDValue |
3144 | HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const { |
3145 | unsigned Opc = Op.getOpcode(); |
3146 | bool IsPairOp = isHvxPairTy(Ty: ty(Op)) || |
3147 | llvm::any_of(Range: Op.getNode()->ops(), P: [this] (SDValue V) { |
3148 | return isHvxPairTy(Ty: ty(Op: V)); |
3149 | }); |
3150 | |
3151 | if (IsPairOp) { |
3152 | switch (Opc) { |
3153 | default: |
3154 | break; |
3155 | case ISD::LOAD: |
3156 | case ISD::STORE: |
3157 | case ISD::MLOAD: |
3158 | case ISD::MSTORE: |
3159 | return SplitHvxMemOp(Op, DAG); |
3160 | case ISD::SINT_TO_FP: |
3161 | case ISD::UINT_TO_FP: |
3162 | case ISD::FP_TO_SINT: |
3163 | case ISD::FP_TO_UINT: |
3164 | if (ty(Op).getSizeInBits() == ty(Op: Op.getOperand(i: 0)).getSizeInBits()) |
3165 | return opJoin(Ops: SplitVectorOp(Op, DAG), dl: SDLoc(Op), DAG); |
3166 | break; |
3167 | case ISD::ABS: |
3168 | case ISD::CTPOP: |
3169 | case ISD::CTLZ: |
3170 | case ISD::CTTZ: |
3171 | case ISD::MUL: |
3172 | case ISD::FADD: |
3173 | case ISD::FSUB: |
3174 | case ISD::FMUL: |
3175 | case ISD::FMINIMUMNUM: |
3176 | case ISD::FMAXIMUMNUM: |
3177 | case ISD::MULHS: |
3178 | case ISD::MULHU: |
3179 | case ISD::AND: |
3180 | case ISD::OR: |
3181 | case ISD::XOR: |
3182 | case ISD::SRA: |
3183 | case ISD::SHL: |
3184 | case ISD::SRL: |
3185 | case ISD::FSHL: |
3186 | case ISD::FSHR: |
3187 | case ISD::SMIN: |
3188 | case ISD::SMAX: |
3189 | case ISD::UMIN: |
3190 | case ISD::UMAX: |
3191 | case ISD::SETCC: |
3192 | case ISD::VSELECT: |
3193 | case ISD::SIGN_EXTEND_INREG: |
3194 | case ISD::SPLAT_VECTOR: |
3195 | return opJoin(Ops: SplitVectorOp(Op, DAG), dl: SDLoc(Op), DAG); |
3196 | case ISD::SIGN_EXTEND: |
3197 | case ISD::ZERO_EXTEND: |
3198 | // In general, sign- and zero-extends can't be split and still |
3199 | // be legal. The only exception is extending bool vectors. |
3200 | if (ty(Op: Op.getOperand(i: 0)).getVectorElementType() == MVT::i1) |
3201 | return opJoin(Ops: SplitVectorOp(Op, DAG), dl: SDLoc(Op), DAG); |
3202 | break; |
3203 | } |
3204 | } |
3205 | |
3206 | switch (Opc) { |
3207 | default: |
3208 | break; |
3209 | case ISD::BUILD_VECTOR: return LowerHvxBuildVector(Op, DAG); |
3210 | case ISD::SPLAT_VECTOR: return LowerHvxSplatVector(Op, DAG); |
3211 | case ISD::CONCAT_VECTORS: return LowerHvxConcatVectors(Op, DAG); |
3212 | case ISD::INSERT_SUBVECTOR: return LowerHvxInsertSubvector(Op, DAG); |
3213 | case ISD::INSERT_VECTOR_ELT: return LowerHvxInsertElement(Op, DAG); |
3214 | case ISD::EXTRACT_SUBVECTOR: return LowerHvxExtractSubvector(Op, DAG); |
3215 | case ISD::EXTRACT_VECTOR_ELT: return LowerHvxExtractElement(Op, DAG); |
3216 | case ISD::BITCAST: return LowerHvxBitcast(Op, DAG); |
3217 | case ISD::ANY_EXTEND: return LowerHvxAnyExt(Op, DAG); |
3218 | case ISD::SIGN_EXTEND: return LowerHvxSignExt(Op, DAG); |
3219 | case ISD::ZERO_EXTEND: return LowerHvxZeroExt(Op, DAG); |
3220 | case ISD::CTTZ: return LowerHvxCttz(Op, DAG); |
3221 | case ISD::SELECT: return LowerHvxSelect(Op, DAG); |
3222 | case ISD::SRA: |
3223 | case ISD::SHL: |
3224 | case ISD::SRL: return LowerHvxShift(Op, DAG); |
3225 | case ISD::FSHL: |
3226 | case ISD::FSHR: return LowerHvxFunnelShift(Op, DAG); |
3227 | case ISD::MULHS: |
3228 | case ISD::MULHU: return LowerHvxMulh(Op, DAG); |
3229 | case ISD::SMUL_LOHI: |
3230 | case ISD::UMUL_LOHI: return LowerHvxMulLoHi(Op, DAG); |
3231 | case ISD::ANY_EXTEND_VECTOR_INREG: return LowerHvxExtend(Op, DAG); |
3232 | case ISD::SETCC: |
3233 | case ISD::INTRINSIC_VOID: return Op; |
3234 | case ISD::INTRINSIC_WO_CHAIN: return LowerHvxIntrinsic(Op, DAG); |
3235 | case ISD::MLOAD: |
3236 | case ISD::MSTORE: return LowerHvxMaskedOp(Op, DAG); |
3237 | // Unaligned loads will be handled by the default lowering. |
3238 | case ISD::LOAD: return SDValue(); |
3239 | case ISD::FP_EXTEND: return LowerHvxFpExtend(Op, DAG); |
3240 | case ISD::FP_TO_SINT: |
3241 | case ISD::FP_TO_UINT: return LowerHvxFpToInt(Op, DAG); |
3242 | case ISD::SINT_TO_FP: |
3243 | case ISD::UINT_TO_FP: return LowerHvxIntToFp(Op, DAG); |
3244 | |
3245 | // Special nodes: |
3246 | case HexagonISD::SMUL_LOHI: |
3247 | case HexagonISD::UMUL_LOHI: |
3248 | case HexagonISD::USMUL_LOHI: return LowerHvxMulLoHi(Op, DAG); |
3249 | } |
3250 | #ifndef NDEBUG |
3251 | Op.dumpr(&DAG); |
3252 | #endif |
3253 | llvm_unreachable("Unhandled HVX operation" ); |
3254 | } |
3255 | |
3256 | SDValue |
3257 | HexagonTargetLowering::ExpandHvxResizeIntoSteps(SDValue Op, SelectionDAG &DAG) |
3258 | const { |
3259 | // Rewrite the extension/truncation/saturation op into steps where each |
3260 | // step changes the type widths by a factor of 2. |
3261 | // E.g. i8 -> i16 remains unchanged, but i8 -> i32 ==> i8 -> i16 -> i32. |
3262 | // |
3263 | // Some of the vector types in Op may not be legal. |
3264 | |
3265 | unsigned Opc = Op.getOpcode(); |
3266 | switch (Opc) { |
3267 | case HexagonISD::SSAT: |
3268 | case HexagonISD::USAT: |
3269 | case HexagonISD::TL_EXTEND: |
3270 | case HexagonISD::TL_TRUNCATE: |
3271 | break; |
3272 | case ISD::ANY_EXTEND: |
3273 | case ISD::ZERO_EXTEND: |
3274 | case ISD::SIGN_EXTEND: |
3275 | case ISD::TRUNCATE: |
3276 | llvm_unreachable("ISD:: ops will be auto-folded" ); |
3277 | break; |
3278 | #ifndef NDEBUG |
3279 | Op.dump(&DAG); |
3280 | #endif |
3281 | llvm_unreachable("Unexpected operation" ); |
3282 | } |
3283 | |
3284 | SDValue Inp = Op.getOperand(i: 0); |
3285 | MVT InpTy = ty(Op: Inp); |
3286 | MVT ResTy = ty(Op); |
3287 | |
3288 | unsigned InpWidth = InpTy.getVectorElementType().getSizeInBits(); |
3289 | unsigned ResWidth = ResTy.getVectorElementType().getSizeInBits(); |
3290 | assert(InpWidth != ResWidth); |
3291 | |
3292 | if (InpWidth == 2 * ResWidth || ResWidth == 2 * InpWidth) |
3293 | return Op; |
3294 | |
3295 | const SDLoc &dl(Op); |
3296 | unsigned NumElems = InpTy.getVectorNumElements(); |
3297 | assert(NumElems == ResTy.getVectorNumElements()); |
3298 | |
3299 | auto repeatOp = [&](unsigned NewWidth, SDValue Arg) { |
3300 | MVT Ty = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: NewWidth), NumElements: NumElems); |
3301 | switch (Opc) { |
3302 | case HexagonISD::SSAT: |
3303 | case HexagonISD::USAT: |
3304 | return DAG.getNode(Opcode: Opc, DL: dl, VT: Ty, Ops: {Arg, DAG.getValueType(Ty)}); |
3305 | case HexagonISD::TL_EXTEND: |
3306 | case HexagonISD::TL_TRUNCATE: |
3307 | return DAG.getNode(Opcode: Opc, DL: dl, VT: Ty, Ops: {Arg, Op.getOperand(i: 1), Op.getOperand(i: 2)}); |
3308 | default: |
3309 | llvm_unreachable("Unexpected opcode" ); |
3310 | } |
3311 | }; |
3312 | |
3313 | SDValue S = Inp; |
3314 | if (InpWidth < ResWidth) { |
3315 | assert(ResWidth % InpWidth == 0 && isPowerOf2_32(ResWidth / InpWidth)); |
3316 | while (InpWidth * 2 <= ResWidth) |
3317 | S = repeatOp(InpWidth *= 2, S); |
3318 | } else { |
3319 | // InpWidth > ResWidth |
3320 | assert(InpWidth % ResWidth == 0 && isPowerOf2_32(InpWidth / ResWidth)); |
3321 | while (InpWidth / 2 >= ResWidth) |
3322 | S = repeatOp(InpWidth /= 2, S); |
3323 | } |
3324 | return S; |
3325 | } |
3326 | |
3327 | SDValue |
3328 | HexagonTargetLowering::LegalizeHvxResize(SDValue Op, SelectionDAG &DAG) const { |
3329 | SDValue Inp0 = Op.getOperand(i: 0); |
3330 | MVT InpTy = ty(Op: Inp0); |
3331 | MVT ResTy = ty(Op); |
3332 | unsigned InpWidth = InpTy.getSizeInBits(); |
3333 | unsigned ResWidth = ResTy.getSizeInBits(); |
3334 | unsigned Opc = Op.getOpcode(); |
3335 | |
3336 | if (shouldWidenToHvx(Ty: InpTy, DAG) || shouldWidenToHvx(Ty: ResTy, DAG)) { |
3337 | // First, make sure that the narrower type is widened to HVX. |
3338 | // This may cause the result to be wider than what the legalizer |
3339 | // expects, so insert EXTRACT_SUBVECTOR to bring it back to the |
3340 | // desired type. |
3341 | auto [WInpTy, WResTy] = |
3342 | InpWidth < ResWidth ? typeWidenToWider(Ty0: typeWidenToHvx(Ty: InpTy), Ty1: ResTy) |
3343 | : typeWidenToWider(Ty0: InpTy, Ty1: typeWidenToHvx(Ty: ResTy)); |
3344 | SDValue W = appendUndef(Val: Inp0, ResTy: WInpTy, DAG); |
3345 | SDValue S; |
3346 | if (Opc == HexagonISD::TL_EXTEND || Opc == HexagonISD::TL_TRUNCATE) { |
3347 | S = DAG.getNode(Opcode: Opc, DL: SDLoc(Op), VT: WResTy, N1: W, N2: Op.getOperand(i: 1), |
3348 | N3: Op.getOperand(i: 2)); |
3349 | } else { |
3350 | S = DAG.getNode(Opcode: Opc, DL: SDLoc(Op), VT: WResTy, N1: W, N2: DAG.getValueType(WResTy)); |
3351 | } |
3352 | SDValue T = ExpandHvxResizeIntoSteps(Op: S, DAG); |
3353 | return extractSubvector(Vec: T, SubTy: typeLegalize(Ty: ResTy, DAG), SubIdx: 0, DAG); |
3354 | } else if (shouldSplitToHvx(Ty: InpWidth < ResWidth ? ResTy : InpTy, DAG)) { |
3355 | return opJoin(Ops: SplitVectorOp(Op, DAG), dl: SDLoc(Op), DAG); |
3356 | } else { |
3357 | assert(isTypeLegal(InpTy) && isTypeLegal(ResTy)); |
3358 | return RemoveTLWrapper(Op, DAG); |
3359 | } |
3360 | llvm_unreachable("Unexpected situation" ); |
3361 | } |
3362 | |
3363 | void |
3364 | HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N, |
3365 | SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { |
3366 | unsigned Opc = N->getOpcode(); |
3367 | SDValue Op(N, 0); |
3368 | SDValue Inp0; // Optional first argument. |
3369 | if (N->getNumOperands() > 0) |
3370 | Inp0 = Op.getOperand(i: 0); |
3371 | |
3372 | switch (Opc) { |
3373 | case ISD::ANY_EXTEND: |
3374 | case ISD::SIGN_EXTEND: |
3375 | case ISD::ZERO_EXTEND: |
3376 | case ISD::TRUNCATE: |
3377 | if (Subtarget.isHVXElementType(Ty: ty(Op)) && |
3378 | Subtarget.isHVXElementType(Ty: ty(Op: Inp0))) { |
3379 | Results.push_back(Elt: CreateTLWrapper(Op, DAG)); |
3380 | } |
3381 | break; |
3382 | case ISD::SETCC: |
3383 | if (shouldWidenToHvx(Ty: ty(Op: Inp0), DAG)) { |
3384 | if (SDValue T = WidenHvxSetCC(Op, DAG)) |
3385 | Results.push_back(Elt: T); |
3386 | } |
3387 | break; |
3388 | case ISD::STORE: { |
3389 | if (shouldWidenToHvx(Ty: ty(Op: cast<StoreSDNode>(Val: N)->getValue()), DAG)) { |
3390 | SDValue Store = WidenHvxStore(Op, DAG); |
3391 | Results.push_back(Elt: Store); |
3392 | } |
3393 | break; |
3394 | } |
3395 | case ISD::MLOAD: |
3396 | if (isHvxPairTy(Ty: ty(Op))) { |
3397 | SDValue S = SplitHvxMemOp(Op, DAG); |
3398 | assert(S->getOpcode() == ISD::MERGE_VALUES); |
3399 | Results.push_back(Elt: S.getOperand(i: 0)); |
3400 | Results.push_back(Elt: S.getOperand(i: 1)); |
3401 | } |
3402 | break; |
3403 | case ISD::MSTORE: |
3404 | if (isHvxPairTy(Ty: ty(Op: Op->getOperand(Num: 1)))) { // Stored value |
3405 | SDValue S = SplitHvxMemOp(Op, DAG); |
3406 | Results.push_back(Elt: S); |
3407 | } |
3408 | break; |
3409 | case ISD::SINT_TO_FP: |
3410 | case ISD::UINT_TO_FP: |
3411 | case ISD::FP_TO_SINT: |
3412 | case ISD::FP_TO_UINT: |
3413 | if (ty(Op).getSizeInBits() != ty(Op: Inp0).getSizeInBits()) { |
3414 | SDValue T = EqualizeFpIntConversion(Op, DAG); |
3415 | Results.push_back(Elt: T); |
3416 | } |
3417 | break; |
3418 | case HexagonISD::SSAT: |
3419 | case HexagonISD::USAT: |
3420 | case HexagonISD::TL_EXTEND: |
3421 | case HexagonISD::TL_TRUNCATE: |
3422 | Results.push_back(Elt: LegalizeHvxResize(Op, DAG)); |
3423 | break; |
3424 | default: |
3425 | break; |
3426 | } |
3427 | } |
3428 | |
3429 | void |
3430 | HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N, |
3431 | SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { |
3432 | unsigned Opc = N->getOpcode(); |
3433 | SDValue Op(N, 0); |
3434 | SDValue Inp0; // Optional first argument. |
3435 | if (N->getNumOperands() > 0) |
3436 | Inp0 = Op.getOperand(i: 0); |
3437 | |
3438 | switch (Opc) { |
3439 | case ISD::ANY_EXTEND: |
3440 | case ISD::SIGN_EXTEND: |
3441 | case ISD::ZERO_EXTEND: |
3442 | case ISD::TRUNCATE: |
3443 | if (Subtarget.isHVXElementType(Ty: ty(Op)) && |
3444 | Subtarget.isHVXElementType(Ty: ty(Op: Inp0))) { |
3445 | Results.push_back(Elt: CreateTLWrapper(Op, DAG)); |
3446 | } |
3447 | break; |
3448 | case ISD::SETCC: |
3449 | if (shouldWidenToHvx(Ty: ty(Op), DAG)) { |
3450 | if (SDValue T = WidenHvxSetCC(Op, DAG)) |
3451 | Results.push_back(Elt: T); |
3452 | } |
3453 | break; |
3454 | case ISD::LOAD: { |
3455 | if (shouldWidenToHvx(Ty: ty(Op), DAG)) { |
3456 | SDValue Load = WidenHvxLoad(Op, DAG); |
3457 | assert(Load->getOpcode() == ISD::MERGE_VALUES); |
3458 | Results.push_back(Elt: Load.getOperand(i: 0)); |
3459 | Results.push_back(Elt: Load.getOperand(i: 1)); |
3460 | } |
3461 | break; |
3462 | } |
3463 | case ISD::BITCAST: |
3464 | if (isHvxBoolTy(Ty: ty(Op: Inp0))) { |
3465 | SDValue C = LowerHvxBitcast(Op, DAG); |
3466 | Results.push_back(Elt: C); |
3467 | } |
3468 | break; |
3469 | case ISD::FP_TO_SINT: |
3470 | case ISD::FP_TO_UINT: |
3471 | if (ty(Op).getSizeInBits() != ty(Op: Inp0).getSizeInBits()) { |
3472 | SDValue T = EqualizeFpIntConversion(Op, DAG); |
3473 | Results.push_back(Elt: T); |
3474 | } |
3475 | break; |
3476 | case HexagonISD::SSAT: |
3477 | case HexagonISD::USAT: |
3478 | case HexagonISD::TL_EXTEND: |
3479 | case HexagonISD::TL_TRUNCATE: |
3480 | Results.push_back(Elt: LegalizeHvxResize(Op, DAG)); |
3481 | break; |
3482 | default: |
3483 | break; |
3484 | } |
3485 | } |
3486 | |
3487 | SDValue |
3488 | HexagonTargetLowering::combineTruncateBeforeLegal(SDValue Op, |
3489 | DAGCombinerInfo &DCI) const { |
3490 | // Simplify V:v2NiB --(bitcast)--> vNi2B --(truncate)--> vNiB |
3491 | // to extract-subvector (shuffle V, pick even, pick odd) |
3492 | |
3493 | assert(Op.getOpcode() == ISD::TRUNCATE); |
3494 | SelectionDAG &DAG = DCI.DAG; |
3495 | const SDLoc &dl(Op); |
3496 | |
3497 | if (Op.getOperand(i: 0).getOpcode() == ISD::BITCAST) |
3498 | return SDValue(); |
3499 | SDValue Cast = Op.getOperand(i: 0); |
3500 | SDValue Src = Cast.getOperand(i: 0); |
3501 | |
3502 | EVT TruncTy = Op.getValueType(); |
3503 | EVT CastTy = Cast.getValueType(); |
3504 | EVT SrcTy = Src.getValueType(); |
3505 | if (SrcTy.isSimple()) |
3506 | return SDValue(); |
3507 | if (SrcTy.getVectorElementType() != TruncTy.getVectorElementType()) |
3508 | return SDValue(); |
3509 | unsigned SrcLen = SrcTy.getVectorNumElements(); |
3510 | unsigned CastLen = CastTy.getVectorNumElements(); |
3511 | if (2 * CastLen != SrcLen) |
3512 | return SDValue(); |
3513 | |
3514 | SmallVector<int, 128> Mask(SrcLen); |
3515 | for (int i = 0; i != static_cast<int>(CastLen); ++i) { |
3516 | Mask[i] = 2 * i; |
3517 | Mask[i + CastLen] = 2 * i + 1; |
3518 | } |
3519 | SDValue Deal = |
3520 | DAG.getVectorShuffle(VT: SrcTy, dl, N1: Src, N2: DAG.getUNDEF(VT: SrcTy), Mask); |
3521 | return opSplit(Vec: Deal, dl, DAG).first; |
3522 | } |
3523 | |
3524 | SDValue |
3525 | HexagonTargetLowering::combineConcatVectorsBeforeLegal( |
3526 | SDValue Op, DAGCombinerInfo &DCI) const { |
3527 | // Fold |
3528 | // concat (shuffle x, y, m1), (shuffle x, y, m2) |
3529 | // into |
3530 | // shuffle (concat x, y), undef, m3 |
3531 | if (Op.getNumOperands() != 2) |
3532 | return SDValue(); |
3533 | |
3534 | SelectionDAG &DAG = DCI.DAG; |
3535 | const SDLoc &dl(Op); |
3536 | SDValue V0 = Op.getOperand(i: 0); |
3537 | SDValue V1 = Op.getOperand(i: 1); |
3538 | |
3539 | if (V0.getOpcode() != ISD::VECTOR_SHUFFLE) |
3540 | return SDValue(); |
3541 | if (V1.getOpcode() != ISD::VECTOR_SHUFFLE) |
3542 | return SDValue(); |
3543 | |
3544 | SetVector<SDValue> Order; |
3545 | Order.insert(X: V0.getOperand(i: 0)); |
3546 | Order.insert(X: V0.getOperand(i: 1)); |
3547 | Order.insert(X: V1.getOperand(i: 0)); |
3548 | Order.insert(X: V1.getOperand(i: 1)); |
3549 | |
3550 | if (Order.size() > 2) |
3551 | return SDValue(); |
3552 | |
3553 | // In ISD::VECTOR_SHUFFLE, the types of each input and the type of the |
3554 | // result must be the same. |
3555 | EVT InpTy = V0.getValueType(); |
3556 | assert(InpTy.isVector()); |
3557 | unsigned InpLen = InpTy.getVectorNumElements(); |
3558 | |
3559 | SmallVector<int, 128> LongMask; |
3560 | auto AppendToMask = [&](SDValue Shuffle) { |
3561 | auto *SV = cast<ShuffleVectorSDNode>(Val: Shuffle.getNode()); |
3562 | ArrayRef<int> Mask = SV->getMask(); |
3563 | SDValue X = Shuffle.getOperand(i: 0); |
3564 | SDValue Y = Shuffle.getOperand(i: 1); |
3565 | for (int M : Mask) { |
3566 | if (M == -1) { |
3567 | LongMask.push_back(Elt: M); |
3568 | continue; |
3569 | } |
3570 | SDValue Src = static_cast<unsigned>(M) < InpLen ? X : Y; |
3571 | if (static_cast<unsigned>(M) >= InpLen) |
3572 | M -= InpLen; |
3573 | |
3574 | int OutOffset = Order[0] == Src ? 0 : InpLen; |
3575 | LongMask.push_back(Elt: M + OutOffset); |
3576 | } |
3577 | }; |
3578 | |
3579 | AppendToMask(V0); |
3580 | AppendToMask(V1); |
3581 | |
3582 | SDValue C0 = Order.front(); |
3583 | SDValue C1 = Order.back(); // Can be same as front |
3584 | EVT LongTy = InpTy.getDoubleNumVectorElementsVT(Context&: *DAG.getContext()); |
3585 | |
3586 | SDValue Cat = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: LongTy, Ops: {C0, C1}); |
3587 | return DAG.getVectorShuffle(VT: LongTy, dl, N1: Cat, N2: DAG.getUNDEF(VT: LongTy), Mask: LongMask); |
3588 | } |
3589 | |
3590 | SDValue |
3591 | HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI) |
3592 | const { |
3593 | const SDLoc &dl(N); |
3594 | SelectionDAG &DAG = DCI.DAG; |
3595 | SDValue Op(N, 0); |
3596 | unsigned Opc = Op.getOpcode(); |
3597 | |
3598 | SmallVector<SDValue, 4> Ops(N->ops()); |
3599 | |
3600 | if (Opc == ISD::TRUNCATE) |
3601 | return combineTruncateBeforeLegal(Op, DCI); |
3602 | if (Opc == ISD::CONCAT_VECTORS) |
3603 | return combineConcatVectorsBeforeLegal(Op, DCI); |
3604 | |
3605 | if (DCI.isBeforeLegalizeOps()) |
3606 | return SDValue(); |
3607 | |
3608 | switch (Opc) { |
3609 | case ISD::VSELECT: { |
3610 | // (vselect (xor x, qtrue), v0, v1) -> (vselect x, v1, v0) |
3611 | SDValue Cond = Ops[0]; |
3612 | if (Cond->getOpcode() == ISD::XOR) { |
3613 | SDValue C0 = Cond.getOperand(i: 0), C1 = Cond.getOperand(i: 1); |
3614 | if (C1->getOpcode() == HexagonISD::QTRUE) |
3615 | return DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ty(Op), N1: C0, N2: Ops[2], N3: Ops[1]); |
3616 | } |
3617 | break; |
3618 | } |
3619 | case HexagonISD::V2Q: |
3620 | if (Ops[0].getOpcode() == ISD::SPLAT_VECTOR) { |
3621 | if (const auto *C = dyn_cast<ConstantSDNode>(Val: Ops[0].getOperand(i: 0))) |
3622 | return C->isZero() ? DAG.getNode(Opcode: HexagonISD::QFALSE, DL: dl, VT: ty(Op)) |
3623 | : DAG.getNode(Opcode: HexagonISD::QTRUE, DL: dl, VT: ty(Op)); |
3624 | } |
3625 | break; |
3626 | case HexagonISD::Q2V: |
3627 | if (Ops[0].getOpcode() == HexagonISD::QTRUE) |
3628 | return DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: ty(Op), |
3629 | Operand: DAG.getAllOnesConstant(DL: dl, VT: MVT::i32)); |
3630 | if (Ops[0].getOpcode() == HexagonISD::QFALSE) |
3631 | return getZero(dl, Ty: ty(Op), DAG); |
3632 | break; |
3633 | case HexagonISD::VINSERTW0: |
3634 | if (isUndef(Op: Ops[1])) |
3635 | return Ops[0]; |
3636 | break; |
3637 | case HexagonISD::VROR: { |
3638 | if (Ops[0].getOpcode() == HexagonISD::VROR) { |
3639 | SDValue Vec = Ops[0].getOperand(i: 0); |
3640 | SDValue Rot0 = Ops[1], Rot1 = Ops[0].getOperand(i: 1); |
3641 | SDValue Rot = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ty(Op: Rot0), Ops: {Rot0, Rot1}); |
3642 | return DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: ty(Op), Ops: {Vec, Rot}); |
3643 | } |
3644 | break; |
3645 | } |
3646 | } |
3647 | |
3648 | return SDValue(); |
3649 | } |
3650 | |
3651 | bool |
3652 | HexagonTargetLowering::shouldSplitToHvx(MVT Ty, SelectionDAG &DAG) const { |
3653 | if (Subtarget.isHVXVectorType(VecTy: Ty, IncludeBool: true)) |
3654 | return false; |
3655 | auto Action = getPreferredHvxVectorAction(VecTy: Ty); |
3656 | if (Action == TargetLoweringBase::TypeSplitVector) |
3657 | return Subtarget.isHVXVectorType(VecTy: typeLegalize(Ty, DAG), IncludeBool: true); |
3658 | return false; |
3659 | } |
3660 | |
3661 | bool |
3662 | HexagonTargetLowering::shouldWidenToHvx(MVT Ty, SelectionDAG &DAG) const { |
3663 | if (Subtarget.isHVXVectorType(VecTy: Ty, IncludeBool: true)) |
3664 | return false; |
3665 | auto Action = getPreferredHvxVectorAction(VecTy: Ty); |
3666 | if (Action == TargetLoweringBase::TypeWidenVector) |
3667 | return Subtarget.isHVXVectorType(VecTy: typeLegalize(Ty, DAG), IncludeBool: true); |
3668 | return false; |
3669 | } |
3670 | |
3671 | bool |
3672 | HexagonTargetLowering::isHvxOperation(SDNode *N, SelectionDAG &DAG) const { |
3673 | if (!Subtarget.useHVXOps()) |
3674 | return false; |
3675 | // If the type of any result, or any operand type are HVX vector types, |
3676 | // this is an HVX operation. |
3677 | auto IsHvxTy = [this](EVT Ty) { |
3678 | return Ty.isSimple() && Subtarget.isHVXVectorType(VecTy: Ty.getSimpleVT(), IncludeBool: true); |
3679 | }; |
3680 | auto IsHvxOp = [this](SDValue Op) { |
3681 | return Op.getValueType().isSimple() && |
3682 | Subtarget.isHVXVectorType(VecTy: ty(Op), IncludeBool: true); |
3683 | }; |
3684 | if (llvm::any_of(Range: N->values(), P: IsHvxTy) || llvm::any_of(Range: N->ops(), P: IsHvxOp)) |
3685 | return true; |
3686 | |
3687 | // Check if this could be an HVX operation after type widening. |
3688 | auto IsWidenedToHvx = [this, &DAG](SDValue Op) { |
3689 | if (!Op.getValueType().isSimple()) |
3690 | return false; |
3691 | MVT ValTy = ty(Op); |
3692 | return ValTy.isVector() && shouldWidenToHvx(Ty: ValTy, DAG); |
3693 | }; |
3694 | |
3695 | for (int i = 0, e = N->getNumValues(); i != e; ++i) { |
3696 | if (IsWidenedToHvx(SDValue(N, i))) |
3697 | return true; |
3698 | } |
3699 | return llvm::any_of(Range: N->ops(), P: IsWidenedToHvx); |
3700 | } |
3701 | |