1 | //===-- HexagonISelLoweringHVX.cpp --- Lowering HVX operations ------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "HexagonISelLowering.h" |
10 | #include "HexagonRegisterInfo.h" |
11 | #include "HexagonSubtarget.h" |
12 | #include "llvm/ADT/SetVector.h" |
13 | #include "llvm/ADT/SmallVector.h" |
14 | #include "llvm/Analysis/MemoryLocation.h" |
15 | #include "llvm/CodeGen/MachineBasicBlock.h" |
16 | #include "llvm/CodeGen/MachineFunction.h" |
17 | #include "llvm/CodeGen/MachineInstr.h" |
18 | #include "llvm/CodeGen/MachineOperand.h" |
19 | #include "llvm/CodeGen/MachineRegisterInfo.h" |
20 | #include "llvm/CodeGen/TargetInstrInfo.h" |
21 | #include "llvm/IR/IntrinsicsHexagon.h" |
22 | #include "llvm/Support/CommandLine.h" |
23 | |
24 | #include <algorithm> |
25 | #include <string> |
26 | #include <utility> |
27 | |
28 | using namespace llvm; |
29 | |
30 | static cl::opt<unsigned> HvxWidenThreshold("hexagon-hvx-widen" , |
31 | cl::Hidden, cl::init(Val: 16), |
32 | cl::desc("Lower threshold (in bytes) for widening to HVX vectors" )); |
33 | |
34 | static const MVT LegalV64[] = { MVT::v64i8, MVT::v32i16, MVT::v16i32 }; |
35 | static const MVT LegalW64[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 }; |
36 | static const MVT LegalV128[] = { MVT::v128i8, MVT::v64i16, MVT::v32i32 }; |
37 | static const MVT LegalW128[] = { MVT::v256i8, MVT::v128i16, MVT::v64i32 }; |
38 | |
39 | static std::tuple<unsigned, unsigned, unsigned> getIEEEProperties(MVT Ty) { |
40 | // For a float scalar type, return (exp-bits, exp-bias, fraction-bits) |
41 | MVT ElemTy = Ty.getScalarType(); |
42 | switch (ElemTy.SimpleTy) { |
43 | case MVT::f16: |
44 | return std::make_tuple(args: 5, args: 15, args: 10); |
45 | case MVT::f32: |
46 | return std::make_tuple(args: 8, args: 127, args: 23); |
47 | case MVT::f64: |
48 | return std::make_tuple(args: 11, args: 1023, args: 52); |
49 | default: |
50 | break; |
51 | } |
52 | llvm_unreachable(("Unexpected type: " + EVT(ElemTy).getEVTString()).c_str()); |
53 | } |
54 | |
55 | void |
56 | HexagonTargetLowering::initializeHVXLowering() { |
57 | if (Subtarget.useHVX64BOps()) { |
58 | addRegisterClass(VT: MVT::v64i8, RC: &Hexagon::HvxVRRegClass); |
59 | addRegisterClass(VT: MVT::v32i16, RC: &Hexagon::HvxVRRegClass); |
60 | addRegisterClass(VT: MVT::v16i32, RC: &Hexagon::HvxVRRegClass); |
61 | addRegisterClass(VT: MVT::v128i8, RC: &Hexagon::HvxWRRegClass); |
62 | addRegisterClass(VT: MVT::v64i16, RC: &Hexagon::HvxWRRegClass); |
63 | addRegisterClass(VT: MVT::v32i32, RC: &Hexagon::HvxWRRegClass); |
64 | // These "short" boolean vector types should be legal because |
65 | // they will appear as results of vector compares. If they were |
66 | // not legal, type legalization would try to make them legal |
67 | // and that would require using operations that do not use or |
68 | // produce such types. That, in turn, would imply using custom |
69 | // nodes, which would be unoptimizable by the DAG combiner. |
70 | // The idea is to rely on target-independent operations as much |
71 | // as possible. |
72 | addRegisterClass(VT: MVT::v16i1, RC: &Hexagon::HvxQRRegClass); |
73 | addRegisterClass(VT: MVT::v32i1, RC: &Hexagon::HvxQRRegClass); |
74 | addRegisterClass(VT: MVT::v64i1, RC: &Hexagon::HvxQRRegClass); |
75 | } else if (Subtarget.useHVX128BOps()) { |
76 | addRegisterClass(VT: MVT::v128i8, RC: &Hexagon::HvxVRRegClass); |
77 | addRegisterClass(VT: MVT::v64i16, RC: &Hexagon::HvxVRRegClass); |
78 | addRegisterClass(VT: MVT::v32i32, RC: &Hexagon::HvxVRRegClass); |
79 | addRegisterClass(VT: MVT::v256i8, RC: &Hexagon::HvxWRRegClass); |
80 | addRegisterClass(VT: MVT::v128i16, RC: &Hexagon::HvxWRRegClass); |
81 | addRegisterClass(VT: MVT::v64i32, RC: &Hexagon::HvxWRRegClass); |
82 | addRegisterClass(VT: MVT::v32i1, RC: &Hexagon::HvxQRRegClass); |
83 | addRegisterClass(VT: MVT::v64i1, RC: &Hexagon::HvxQRRegClass); |
84 | addRegisterClass(VT: MVT::v128i1, RC: &Hexagon::HvxQRRegClass); |
85 | if (Subtarget.useHVXV68Ops() && Subtarget.useHVXFloatingPoint()) { |
86 | addRegisterClass(VT: MVT::v32f32, RC: &Hexagon::HvxVRRegClass); |
87 | addRegisterClass(VT: MVT::v64f16, RC: &Hexagon::HvxVRRegClass); |
88 | addRegisterClass(VT: MVT::v64f32, RC: &Hexagon::HvxWRRegClass); |
89 | addRegisterClass(VT: MVT::v128f16, RC: &Hexagon::HvxWRRegClass); |
90 | } |
91 | } |
92 | |
93 | // Set up operation actions. |
94 | |
95 | bool Use64b = Subtarget.useHVX64BOps(); |
96 | ArrayRef<MVT> LegalV = Use64b ? LegalV64 : LegalV128; |
97 | ArrayRef<MVT> LegalW = Use64b ? LegalW64 : LegalW128; |
98 | MVT ByteV = Use64b ? MVT::v64i8 : MVT::v128i8; |
99 | MVT WordV = Use64b ? MVT::v16i32 : MVT::v32i32; |
100 | MVT ByteW = Use64b ? MVT::v128i8 : MVT::v256i8; |
101 | |
102 | auto setPromoteTo = [this] (unsigned Opc, MVT FromTy, MVT ToTy) { |
103 | setOperationAction(Op: Opc, VT: FromTy, Action: Promote); |
104 | AddPromotedToType(Opc, OrigVT: FromTy, DestVT: ToTy); |
105 | }; |
106 | |
107 | // Handle bitcasts of vector predicates to scalars (e.g. v32i1 to i32). |
108 | // Note: v16i1 -> i16 is handled in type legalization instead of op |
109 | // legalization. |
110 | setOperationAction(Op: ISD::BITCAST, VT: MVT::i16, Action: Custom); |
111 | setOperationAction(Op: ISD::BITCAST, VT: MVT::i32, Action: Custom); |
112 | setOperationAction(Op: ISD::BITCAST, VT: MVT::i64, Action: Custom); |
113 | setOperationAction(Op: ISD::BITCAST, VT: MVT::v16i1, Action: Custom); |
114 | setOperationAction(Op: ISD::BITCAST, VT: MVT::v128i1, Action: Custom); |
115 | setOperationAction(Op: ISD::BITCAST, VT: MVT::i128, Action: Custom); |
116 | setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT: ByteV, Action: Legal); |
117 | setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT: ByteW, Action: Legal); |
118 | setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::Other, Action: Custom); |
119 | |
120 | if (Subtarget.useHVX128BOps() && Subtarget.useHVXV68Ops() && |
121 | Subtarget.useHVXFloatingPoint()) { |
122 | |
123 | static const MVT FloatV[] = { MVT::v64f16, MVT::v32f32 }; |
124 | static const MVT FloatW[] = { MVT::v128f16, MVT::v64f32 }; |
125 | |
126 | for (MVT T : FloatV) { |
127 | setOperationAction(Op: ISD::FADD, VT: T, Action: Legal); |
128 | setOperationAction(Op: ISD::FSUB, VT: T, Action: Legal); |
129 | setOperationAction(Op: ISD::FMUL, VT: T, Action: Legal); |
130 | setOperationAction(Op: ISD::FMINNUM, VT: T, Action: Legal); |
131 | setOperationAction(Op: ISD::FMAXNUM, VT: T, Action: Legal); |
132 | |
133 | setOperationAction(Op: ISD::INSERT_SUBVECTOR, VT: T, Action: Custom); |
134 | setOperationAction(Op: ISD::EXTRACT_SUBVECTOR, VT: T, Action: Custom); |
135 | |
136 | setOperationAction(Op: ISD::SPLAT_VECTOR, VT: T, Action: Legal); |
137 | setOperationAction(Op: ISD::SPLAT_VECTOR, VT: T, Action: Legal); |
138 | |
139 | setOperationAction(Op: ISD::MLOAD, VT: T, Action: Custom); |
140 | setOperationAction(Op: ISD::MSTORE, VT: T, Action: Custom); |
141 | // Custom-lower BUILD_VECTOR. The standard (target-independent) |
142 | // handling of it would convert it to a load, which is not always |
143 | // the optimal choice. |
144 | setOperationAction(Op: ISD::BUILD_VECTOR, VT: T, Action: Custom); |
145 | } |
146 | |
147 | |
148 | // BUILD_VECTOR with f16 operands cannot be promoted without |
149 | // promoting the result, so lower the node to vsplat or constant pool |
150 | setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::f16, Action: Custom); |
151 | setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: MVT::f16, Action: Custom); |
152 | setOperationAction(Op: ISD::SPLAT_VECTOR, VT: MVT::f16, Action: Custom); |
153 | |
154 | // Vector shuffle is always promoted to ByteV and a bitcast to f16 is |
155 | // generated. |
156 | setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v128f16, ByteW); |
157 | setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f16, ByteV); |
158 | setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v64f32, ByteW); |
159 | setPromoteTo(ISD::VECTOR_SHUFFLE, MVT::v32f32, ByteV); |
160 | |
161 | for (MVT P : FloatW) { |
162 | setOperationAction(Op: ISD::LOAD, VT: P, Action: Custom); |
163 | setOperationAction(Op: ISD::STORE, VT: P, Action: Custom); |
164 | setOperationAction(Op: ISD::FADD, VT: P, Action: Custom); |
165 | setOperationAction(Op: ISD::FSUB, VT: P, Action: Custom); |
166 | setOperationAction(Op: ISD::FMUL, VT: P, Action: Custom); |
167 | setOperationAction(Op: ISD::FMINNUM, VT: P, Action: Custom); |
168 | setOperationAction(Op: ISD::FMAXNUM, VT: P, Action: Custom); |
169 | setOperationAction(Op: ISD::SETCC, VT: P, Action: Custom); |
170 | setOperationAction(Op: ISD::VSELECT, VT: P, Action: Custom); |
171 | |
172 | // Custom-lower BUILD_VECTOR. The standard (target-independent) |
173 | // handling of it would convert it to a load, which is not always |
174 | // the optimal choice. |
175 | setOperationAction(Op: ISD::BUILD_VECTOR, VT: P, Action: Custom); |
176 | // Make concat-vectors custom to handle concats of more than 2 vectors. |
177 | setOperationAction(Op: ISD::CONCAT_VECTORS, VT: P, Action: Custom); |
178 | |
179 | setOperationAction(Op: ISD::MLOAD, VT: P, Action: Custom); |
180 | setOperationAction(Op: ISD::MSTORE, VT: P, Action: Custom); |
181 | } |
182 | |
183 | if (Subtarget.useHVXQFloatOps()) { |
184 | setOperationAction(Op: ISD::FP_EXTEND, VT: MVT::v64f32, Action: Custom); |
185 | setOperationAction(Op: ISD::FP_ROUND, VT: MVT::v64f16, Action: Legal); |
186 | } else if (Subtarget.useHVXIEEEFPOps()) { |
187 | setOperationAction(Op: ISD::FP_EXTEND, VT: MVT::v64f32, Action: Legal); |
188 | setOperationAction(Op: ISD::FP_ROUND, VT: MVT::v64f16, Action: Legal); |
189 | } |
190 | } |
191 | |
192 | for (MVT T : LegalV) { |
193 | setIndexedLoadAction(IdxModes: ISD::POST_INC, VT: T, Action: Legal); |
194 | setIndexedStoreAction(IdxModes: ISD::POST_INC, VT: T, Action: Legal); |
195 | |
196 | setOperationAction(Op: ISD::ABS, VT: T, Action: Legal); |
197 | setOperationAction(Op: ISD::AND, VT: T, Action: Legal); |
198 | setOperationAction(Op: ISD::OR, VT: T, Action: Legal); |
199 | setOperationAction(Op: ISD::XOR, VT: T, Action: Legal); |
200 | setOperationAction(Op: ISD::ADD, VT: T, Action: Legal); |
201 | setOperationAction(Op: ISD::SUB, VT: T, Action: Legal); |
202 | setOperationAction(Op: ISD::MUL, VT: T, Action: Legal); |
203 | setOperationAction(Op: ISD::CTPOP, VT: T, Action: Legal); |
204 | setOperationAction(Op: ISD::CTLZ, VT: T, Action: Legal); |
205 | setOperationAction(Op: ISD::SELECT, VT: T, Action: Legal); |
206 | setOperationAction(Op: ISD::SPLAT_VECTOR, VT: T, Action: Legal); |
207 | if (T != ByteV) { |
208 | setOperationAction(Op: ISD::SIGN_EXTEND_VECTOR_INREG, VT: T, Action: Legal); |
209 | setOperationAction(Op: ISD::ZERO_EXTEND_VECTOR_INREG, VT: T, Action: Legal); |
210 | setOperationAction(Op: ISD::BSWAP, VT: T, Action: Legal); |
211 | } |
212 | |
213 | setOperationAction(Op: ISD::SMIN, VT: T, Action: Legal); |
214 | setOperationAction(Op: ISD::SMAX, VT: T, Action: Legal); |
215 | if (T.getScalarType() != MVT::i32) { |
216 | setOperationAction(Op: ISD::UMIN, VT: T, Action: Legal); |
217 | setOperationAction(Op: ISD::UMAX, VT: T, Action: Legal); |
218 | } |
219 | |
220 | setOperationAction(Op: ISD::CTTZ, VT: T, Action: Custom); |
221 | setOperationAction(Op: ISD::LOAD, VT: T, Action: Custom); |
222 | setOperationAction(Op: ISD::MLOAD, VT: T, Action: Custom); |
223 | setOperationAction(Op: ISD::MSTORE, VT: T, Action: Custom); |
224 | if (T.getScalarType() != MVT::i32) { |
225 | setOperationAction(Op: ISD::MULHS, VT: T, Action: Legal); |
226 | setOperationAction(Op: ISD::MULHU, VT: T, Action: Legal); |
227 | } |
228 | |
229 | setOperationAction(Op: ISD::BUILD_VECTOR, VT: T, Action: Custom); |
230 | // Make concat-vectors custom to handle concats of more than 2 vectors. |
231 | setOperationAction(Op: ISD::CONCAT_VECTORS, VT: T, Action: Custom); |
232 | setOperationAction(Op: ISD::INSERT_SUBVECTOR, VT: T, Action: Custom); |
233 | setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: T, Action: Custom); |
234 | setOperationAction(Op: ISD::EXTRACT_SUBVECTOR, VT: T, Action: Custom); |
235 | setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: T, Action: Custom); |
236 | setOperationAction(Op: ISD::ANY_EXTEND, VT: T, Action: Custom); |
237 | setOperationAction(Op: ISD::SIGN_EXTEND, VT: T, Action: Custom); |
238 | setOperationAction(Op: ISD::ZERO_EXTEND, VT: T, Action: Custom); |
239 | setOperationAction(Op: ISD::FSHL, VT: T, Action: Custom); |
240 | setOperationAction(Op: ISD::FSHR, VT: T, Action: Custom); |
241 | if (T != ByteV) { |
242 | setOperationAction(Op: ISD::ANY_EXTEND_VECTOR_INREG, VT: T, Action: Custom); |
243 | // HVX only has shifts of words and halfwords. |
244 | setOperationAction(Op: ISD::SRA, VT: T, Action: Custom); |
245 | setOperationAction(Op: ISD::SHL, VT: T, Action: Custom); |
246 | setOperationAction(Op: ISD::SRL, VT: T, Action: Custom); |
247 | |
248 | // Promote all shuffles to operate on vectors of bytes. |
249 | setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteV); |
250 | } |
251 | |
252 | if (Subtarget.useHVXFloatingPoint()) { |
253 | // Same action for both QFloat and IEEE. |
254 | setOperationAction(Op: ISD::SINT_TO_FP, VT: T, Action: Custom); |
255 | setOperationAction(Op: ISD::UINT_TO_FP, VT: T, Action: Custom); |
256 | setOperationAction(Op: ISD::FP_TO_SINT, VT: T, Action: Custom); |
257 | setOperationAction(Op: ISD::FP_TO_UINT, VT: T, Action: Custom); |
258 | } |
259 | |
260 | setCondCodeAction(CCs: ISD::SETNE, VT: T, Action: Expand); |
261 | setCondCodeAction(CCs: ISD::SETLE, VT: T, Action: Expand); |
262 | setCondCodeAction(CCs: ISD::SETGE, VT: T, Action: Expand); |
263 | setCondCodeAction(CCs: ISD::SETLT, VT: T, Action: Expand); |
264 | setCondCodeAction(CCs: ISD::SETULE, VT: T, Action: Expand); |
265 | setCondCodeAction(CCs: ISD::SETUGE, VT: T, Action: Expand); |
266 | setCondCodeAction(CCs: ISD::SETULT, VT: T, Action: Expand); |
267 | } |
268 | |
269 | for (MVT T : LegalW) { |
270 | // Custom-lower BUILD_VECTOR for vector pairs. The standard (target- |
271 | // independent) handling of it would convert it to a load, which is |
272 | // not always the optimal choice. |
273 | setOperationAction(Op: ISD::BUILD_VECTOR, VT: T, Action: Custom); |
274 | // Make concat-vectors custom to handle concats of more than 2 vectors. |
275 | setOperationAction(Op: ISD::CONCAT_VECTORS, VT: T, Action: Custom); |
276 | |
277 | // Custom-lower these operations for pairs. Expand them into a concat |
278 | // of the corresponding operations on individual vectors. |
279 | setOperationAction(Op: ISD::ANY_EXTEND, VT: T, Action: Custom); |
280 | setOperationAction(Op: ISD::SIGN_EXTEND, VT: T, Action: Custom); |
281 | setOperationAction(Op: ISD::ZERO_EXTEND, VT: T, Action: Custom); |
282 | setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: T, Action: Custom); |
283 | setOperationAction(Op: ISD::ANY_EXTEND_VECTOR_INREG, VT: T, Action: Custom); |
284 | setOperationAction(Op: ISD::SIGN_EXTEND_VECTOR_INREG, VT: T, Action: Legal); |
285 | setOperationAction(Op: ISD::ZERO_EXTEND_VECTOR_INREG, VT: T, Action: Legal); |
286 | setOperationAction(Op: ISD::SPLAT_VECTOR, VT: T, Action: Custom); |
287 | |
288 | setOperationAction(Op: ISD::LOAD, VT: T, Action: Custom); |
289 | setOperationAction(Op: ISD::STORE, VT: T, Action: Custom); |
290 | setOperationAction(Op: ISD::MLOAD, VT: T, Action: Custom); |
291 | setOperationAction(Op: ISD::MSTORE, VT: T, Action: Custom); |
292 | setOperationAction(Op: ISD::ABS, VT: T, Action: Custom); |
293 | setOperationAction(Op: ISD::CTLZ, VT: T, Action: Custom); |
294 | setOperationAction(Op: ISD::CTTZ, VT: T, Action: Custom); |
295 | setOperationAction(Op: ISD::CTPOP, VT: T, Action: Custom); |
296 | |
297 | setOperationAction(Op: ISD::ADD, VT: T, Action: Legal); |
298 | setOperationAction(Op: ISD::SUB, VT: T, Action: Legal); |
299 | setOperationAction(Op: ISD::MUL, VT: T, Action: Custom); |
300 | setOperationAction(Op: ISD::MULHS, VT: T, Action: Custom); |
301 | setOperationAction(Op: ISD::MULHU, VT: T, Action: Custom); |
302 | setOperationAction(Op: ISD::AND, VT: T, Action: Custom); |
303 | setOperationAction(Op: ISD::OR, VT: T, Action: Custom); |
304 | setOperationAction(Op: ISD::XOR, VT: T, Action: Custom); |
305 | setOperationAction(Op: ISD::SETCC, VT: T, Action: Custom); |
306 | setOperationAction(Op: ISD::VSELECT, VT: T, Action: Custom); |
307 | if (T != ByteW) { |
308 | setOperationAction(Op: ISD::SRA, VT: T, Action: Custom); |
309 | setOperationAction(Op: ISD::SHL, VT: T, Action: Custom); |
310 | setOperationAction(Op: ISD::SRL, VT: T, Action: Custom); |
311 | |
312 | // Promote all shuffles to operate on vectors of bytes. |
313 | setPromoteTo(ISD::VECTOR_SHUFFLE, T, ByteW); |
314 | } |
315 | setOperationAction(Op: ISD::FSHL, VT: T, Action: Custom); |
316 | setOperationAction(Op: ISD::FSHR, VT: T, Action: Custom); |
317 | |
318 | setOperationAction(Op: ISD::SMIN, VT: T, Action: Custom); |
319 | setOperationAction(Op: ISD::SMAX, VT: T, Action: Custom); |
320 | if (T.getScalarType() != MVT::i32) { |
321 | setOperationAction(Op: ISD::UMIN, VT: T, Action: Custom); |
322 | setOperationAction(Op: ISD::UMAX, VT: T, Action: Custom); |
323 | } |
324 | |
325 | if (Subtarget.useHVXFloatingPoint()) { |
326 | // Same action for both QFloat and IEEE. |
327 | setOperationAction(Op: ISD::SINT_TO_FP, VT: T, Action: Custom); |
328 | setOperationAction(Op: ISD::UINT_TO_FP, VT: T, Action: Custom); |
329 | setOperationAction(Op: ISD::FP_TO_SINT, VT: T, Action: Custom); |
330 | setOperationAction(Op: ISD::FP_TO_UINT, VT: T, Action: Custom); |
331 | } |
332 | } |
333 | |
334 | // Legalize all of these to HexagonISD::[SU]MUL_LOHI. |
335 | setOperationAction(Op: ISD::MULHS, VT: WordV, Action: Custom); // -> _LOHI |
336 | setOperationAction(Op: ISD::MULHU, VT: WordV, Action: Custom); // -> _LOHI |
337 | setOperationAction(Op: ISD::SMUL_LOHI, VT: WordV, Action: Custom); |
338 | setOperationAction(Op: ISD::UMUL_LOHI, VT: WordV, Action: Custom); |
339 | |
340 | setCondCodeAction(CCs: ISD::SETNE, VT: MVT::v64f16, Action: Expand); |
341 | setCondCodeAction(CCs: ISD::SETLE, VT: MVT::v64f16, Action: Expand); |
342 | setCondCodeAction(CCs: ISD::SETGE, VT: MVT::v64f16, Action: Expand); |
343 | setCondCodeAction(CCs: ISD::SETLT, VT: MVT::v64f16, Action: Expand); |
344 | setCondCodeAction(CCs: ISD::SETONE, VT: MVT::v64f16, Action: Expand); |
345 | setCondCodeAction(CCs: ISD::SETOLE, VT: MVT::v64f16, Action: Expand); |
346 | setCondCodeAction(CCs: ISD::SETOGE, VT: MVT::v64f16, Action: Expand); |
347 | setCondCodeAction(CCs: ISD::SETOLT, VT: MVT::v64f16, Action: Expand); |
348 | setCondCodeAction(CCs: ISD::SETUNE, VT: MVT::v64f16, Action: Expand); |
349 | setCondCodeAction(CCs: ISD::SETULE, VT: MVT::v64f16, Action: Expand); |
350 | setCondCodeAction(CCs: ISD::SETUGE, VT: MVT::v64f16, Action: Expand); |
351 | setCondCodeAction(CCs: ISD::SETULT, VT: MVT::v64f16, Action: Expand); |
352 | |
353 | setCondCodeAction(CCs: ISD::SETNE, VT: MVT::v32f32, Action: Expand); |
354 | setCondCodeAction(CCs: ISD::SETLE, VT: MVT::v32f32, Action: Expand); |
355 | setCondCodeAction(CCs: ISD::SETGE, VT: MVT::v32f32, Action: Expand); |
356 | setCondCodeAction(CCs: ISD::SETLT, VT: MVT::v32f32, Action: Expand); |
357 | setCondCodeAction(CCs: ISD::SETONE, VT: MVT::v32f32, Action: Expand); |
358 | setCondCodeAction(CCs: ISD::SETOLE, VT: MVT::v32f32, Action: Expand); |
359 | setCondCodeAction(CCs: ISD::SETOGE, VT: MVT::v32f32, Action: Expand); |
360 | setCondCodeAction(CCs: ISD::SETOLT, VT: MVT::v32f32, Action: Expand); |
361 | setCondCodeAction(CCs: ISD::SETUNE, VT: MVT::v32f32, Action: Expand); |
362 | setCondCodeAction(CCs: ISD::SETULE, VT: MVT::v32f32, Action: Expand); |
363 | setCondCodeAction(CCs: ISD::SETUGE, VT: MVT::v32f32, Action: Expand); |
364 | setCondCodeAction(CCs: ISD::SETULT, VT: MVT::v32f32, Action: Expand); |
365 | |
366 | // Boolean vectors. |
367 | |
368 | for (MVT T : LegalW) { |
369 | // Boolean types for vector pairs will overlap with the boolean |
370 | // types for single vectors, e.g. |
371 | // v64i8 -> v64i1 (single) |
372 | // v64i16 -> v64i1 (pair) |
373 | // Set these actions first, and allow the single actions to overwrite |
374 | // any duplicates. |
375 | MVT BoolW = MVT::getVectorVT(VT: MVT::i1, NumElements: T.getVectorNumElements()); |
376 | setOperationAction(Op: ISD::SETCC, VT: BoolW, Action: Custom); |
377 | setOperationAction(Op: ISD::AND, VT: BoolW, Action: Custom); |
378 | setOperationAction(Op: ISD::OR, VT: BoolW, Action: Custom); |
379 | setOperationAction(Op: ISD::XOR, VT: BoolW, Action: Custom); |
380 | // Masked load/store takes a mask that may need splitting. |
381 | setOperationAction(Op: ISD::MLOAD, VT: BoolW, Action: Custom); |
382 | setOperationAction(Op: ISD::MSTORE, VT: BoolW, Action: Custom); |
383 | } |
384 | |
385 | for (MVT T : LegalV) { |
386 | MVT BoolV = MVT::getVectorVT(VT: MVT::i1, NumElements: T.getVectorNumElements()); |
387 | setOperationAction(Op: ISD::BUILD_VECTOR, VT: BoolV, Action: Custom); |
388 | setOperationAction(Op: ISD::CONCAT_VECTORS, VT: BoolV, Action: Custom); |
389 | setOperationAction(Op: ISD::INSERT_SUBVECTOR, VT: BoolV, Action: Custom); |
390 | setOperationAction(Op: ISD::INSERT_VECTOR_ELT, VT: BoolV, Action: Custom); |
391 | setOperationAction(Op: ISD::EXTRACT_SUBVECTOR, VT: BoolV, Action: Custom); |
392 | setOperationAction(Op: ISD::EXTRACT_VECTOR_ELT, VT: BoolV, Action: Custom); |
393 | setOperationAction(Op: ISD::SELECT, VT: BoolV, Action: Custom); |
394 | setOperationAction(Op: ISD::AND, VT: BoolV, Action: Legal); |
395 | setOperationAction(Op: ISD::OR, VT: BoolV, Action: Legal); |
396 | setOperationAction(Op: ISD::XOR, VT: BoolV, Action: Legal); |
397 | } |
398 | |
399 | if (Use64b) { |
400 | for (MVT T: {MVT::v32i8, MVT::v32i16, MVT::v16i8, MVT::v16i16, MVT::v16i32}) |
401 | setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: T, Action: Legal); |
402 | } else { |
403 | for (MVT T: {MVT::v64i8, MVT::v64i16, MVT::v32i8, MVT::v32i16, MVT::v32i32}) |
404 | setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: T, Action: Legal); |
405 | } |
406 | |
407 | // Handle store widening for short vectors. |
408 | unsigned HwLen = Subtarget.getVectorLength(); |
409 | for (MVT ElemTy : Subtarget.getHVXElementTypes()) { |
410 | if (ElemTy == MVT::i1) |
411 | continue; |
412 | int ElemWidth = ElemTy.getFixedSizeInBits(); |
413 | int MaxElems = (8*HwLen) / ElemWidth; |
414 | for (int N = 2; N < MaxElems; N *= 2) { |
415 | MVT VecTy = MVT::getVectorVT(VT: ElemTy, NumElements: N); |
416 | auto Action = getPreferredVectorAction(VT: VecTy); |
417 | if (Action == TargetLoweringBase::TypeWidenVector) { |
418 | setOperationAction(Op: ISD::LOAD, VT: VecTy, Action: Custom); |
419 | setOperationAction(Op: ISD::STORE, VT: VecTy, Action: Custom); |
420 | setOperationAction(Op: ISD::SETCC, VT: VecTy, Action: Custom); |
421 | setOperationAction(Op: ISD::TRUNCATE, VT: VecTy, Action: Custom); |
422 | setOperationAction(Op: ISD::ANY_EXTEND, VT: VecTy, Action: Custom); |
423 | setOperationAction(Op: ISD::SIGN_EXTEND, VT: VecTy, Action: Custom); |
424 | setOperationAction(Op: ISD::ZERO_EXTEND, VT: VecTy, Action: Custom); |
425 | if (Subtarget.useHVXFloatingPoint()) { |
426 | setOperationAction(Op: ISD::FP_TO_SINT, VT: VecTy, Action: Custom); |
427 | setOperationAction(Op: ISD::FP_TO_UINT, VT: VecTy, Action: Custom); |
428 | setOperationAction(Op: ISD::SINT_TO_FP, VT: VecTy, Action: Custom); |
429 | setOperationAction(Op: ISD::UINT_TO_FP, VT: VecTy, Action: Custom); |
430 | } |
431 | |
432 | MVT BoolTy = MVT::getVectorVT(VT: MVT::i1, NumElements: N); |
433 | if (!isTypeLegal(VT: BoolTy)) |
434 | setOperationAction(Op: ISD::SETCC, VT: BoolTy, Action: Custom); |
435 | } |
436 | } |
437 | } |
438 | |
439 | setTargetDAGCombine({ISD::CONCAT_VECTORS, ISD::TRUNCATE, ISD::VSELECT}); |
440 | } |
441 | |
442 | unsigned |
443 | HexagonTargetLowering::getPreferredHvxVectorAction(MVT VecTy) const { |
444 | MVT ElemTy = VecTy.getVectorElementType(); |
445 | unsigned VecLen = VecTy.getVectorNumElements(); |
446 | unsigned HwLen = Subtarget.getVectorLength(); |
447 | |
448 | // Split vectors of i1 that exceed byte vector length. |
449 | if (ElemTy == MVT::i1 && VecLen > HwLen) |
450 | return TargetLoweringBase::TypeSplitVector; |
451 | |
452 | ArrayRef<MVT> Tys = Subtarget.getHVXElementTypes(); |
453 | // For shorter vectors of i1, widen them if any of the corresponding |
454 | // vectors of integers needs to be widened. |
455 | if (ElemTy == MVT::i1) { |
456 | for (MVT T : Tys) { |
457 | assert(T != MVT::i1); |
458 | auto A = getPreferredHvxVectorAction(VecTy: MVT::getVectorVT(VT: T, NumElements: VecLen)); |
459 | if (A != ~0u) |
460 | return A; |
461 | } |
462 | return ~0u; |
463 | } |
464 | |
465 | // If the size of VecTy is at least half of the vector length, |
466 | // widen the vector. Note: the threshold was not selected in |
467 | // any scientific way. |
468 | if (llvm::is_contained(Range&: Tys, Element: ElemTy)) { |
469 | unsigned VecWidth = VecTy.getSizeInBits(); |
470 | unsigned HwWidth = 8*HwLen; |
471 | if (VecWidth > 2*HwWidth) |
472 | return TargetLoweringBase::TypeSplitVector; |
473 | |
474 | bool HaveThreshold = HvxWidenThreshold.getNumOccurrences() > 0; |
475 | if (HaveThreshold && 8*HvxWidenThreshold <= VecWidth) |
476 | return TargetLoweringBase::TypeWidenVector; |
477 | if (VecWidth >= HwWidth/2 && VecWidth < HwWidth) |
478 | return TargetLoweringBase::TypeWidenVector; |
479 | } |
480 | |
481 | // Defer to default. |
482 | return ~0u; |
483 | } |
484 | |
485 | unsigned |
486 | HexagonTargetLowering::getCustomHvxOperationAction(SDNode &Op) const { |
487 | unsigned Opc = Op.getOpcode(); |
488 | switch (Opc) { |
489 | case HexagonISD::SMUL_LOHI: |
490 | case HexagonISD::UMUL_LOHI: |
491 | case HexagonISD::USMUL_LOHI: |
492 | return TargetLoweringBase::Custom; |
493 | } |
494 | return TargetLoweringBase::Legal; |
495 | } |
496 | |
497 | SDValue |
498 | HexagonTargetLowering::getInt(unsigned IntId, MVT ResTy, ArrayRef<SDValue> Ops, |
499 | const SDLoc &dl, SelectionDAG &DAG) const { |
500 | SmallVector<SDValue,4> IntOps; |
501 | IntOps.push_back(Elt: DAG.getConstant(Val: IntId, DL: dl, VT: MVT::i32)); |
502 | append_range(C&: IntOps, R&: Ops); |
503 | return DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL: dl, VT: ResTy, Ops: IntOps); |
504 | } |
505 | |
506 | MVT |
507 | HexagonTargetLowering::typeJoin(const TypePair &Tys) const { |
508 | assert(Tys.first.getVectorElementType() == Tys.second.getVectorElementType()); |
509 | |
510 | MVT ElemTy = Tys.first.getVectorElementType(); |
511 | return MVT::getVectorVT(VT: ElemTy, NumElements: Tys.first.getVectorNumElements() + |
512 | Tys.second.getVectorNumElements()); |
513 | } |
514 | |
515 | HexagonTargetLowering::TypePair |
516 | HexagonTargetLowering::typeSplit(MVT VecTy) const { |
517 | assert(VecTy.isVector()); |
518 | unsigned NumElem = VecTy.getVectorNumElements(); |
519 | assert((NumElem % 2) == 0 && "Expecting even-sized vector type" ); |
520 | MVT HalfTy = MVT::getVectorVT(VT: VecTy.getVectorElementType(), NumElements: NumElem/2); |
521 | return { HalfTy, HalfTy }; |
522 | } |
523 | |
524 | MVT |
525 | HexagonTargetLowering::typeExtElem(MVT VecTy, unsigned Factor) const { |
526 | MVT ElemTy = VecTy.getVectorElementType(); |
527 | MVT NewElemTy = MVT::getIntegerVT(BitWidth: ElemTy.getSizeInBits() * Factor); |
528 | return MVT::getVectorVT(VT: NewElemTy, NumElements: VecTy.getVectorNumElements()); |
529 | } |
530 | |
531 | MVT |
532 | HexagonTargetLowering::typeTruncElem(MVT VecTy, unsigned Factor) const { |
533 | MVT ElemTy = VecTy.getVectorElementType(); |
534 | MVT NewElemTy = MVT::getIntegerVT(BitWidth: ElemTy.getSizeInBits() / Factor); |
535 | return MVT::getVectorVT(VT: NewElemTy, NumElements: VecTy.getVectorNumElements()); |
536 | } |
537 | |
538 | SDValue |
539 | HexagonTargetLowering::opCastElem(SDValue Vec, MVT ElemTy, |
540 | SelectionDAG &DAG) const { |
541 | if (ty(Op: Vec).getVectorElementType() == ElemTy) |
542 | return Vec; |
543 | MVT CastTy = tyVector(Ty: Vec.getValueType().getSimpleVT(), ElemTy); |
544 | return DAG.getBitcast(VT: CastTy, V: Vec); |
545 | } |
546 | |
547 | SDValue |
548 | HexagonTargetLowering::opJoin(const VectorPair &Ops, const SDLoc &dl, |
549 | SelectionDAG &DAG) const { |
550 | return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: typeJoin(Tys: ty(Ops)), |
551 | N1: Ops.first, N2: Ops.second); |
552 | } |
553 | |
554 | HexagonTargetLowering::VectorPair |
555 | HexagonTargetLowering::opSplit(SDValue Vec, const SDLoc &dl, |
556 | SelectionDAG &DAG) const { |
557 | TypePair Tys = typeSplit(VecTy: ty(Op: Vec)); |
558 | if (Vec.getOpcode() == HexagonISD::QCAT) |
559 | return VectorPair(Vec.getOperand(i: 0), Vec.getOperand(i: 1)); |
560 | return DAG.SplitVector(N: Vec, DL: dl, LoVT: Tys.first, HiVT: Tys.second); |
561 | } |
562 | |
563 | bool |
564 | HexagonTargetLowering::isHvxSingleTy(MVT Ty) const { |
565 | return Subtarget.isHVXVectorType(VecTy: Ty) && |
566 | Ty.getSizeInBits() == 8 * Subtarget.getVectorLength(); |
567 | } |
568 | |
569 | bool |
570 | HexagonTargetLowering::isHvxPairTy(MVT Ty) const { |
571 | return Subtarget.isHVXVectorType(VecTy: Ty) && |
572 | Ty.getSizeInBits() == 16 * Subtarget.getVectorLength(); |
573 | } |
574 | |
575 | bool |
576 | HexagonTargetLowering::isHvxBoolTy(MVT Ty) const { |
577 | return Subtarget.isHVXVectorType(VecTy: Ty, IncludeBool: true) && |
578 | Ty.getVectorElementType() == MVT::i1; |
579 | } |
580 | |
581 | bool HexagonTargetLowering::allowsHvxMemoryAccess( |
582 | MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const { |
583 | // Bool vectors are excluded by default, but make it explicit to |
584 | // emphasize that bool vectors cannot be loaded or stored. |
585 | // Also, disallow double vector stores (to prevent unnecessary |
586 | // store widening in DAG combiner). |
587 | if (VecTy.getSizeInBits() > 8*Subtarget.getVectorLength()) |
588 | return false; |
589 | if (!Subtarget.isHVXVectorType(VecTy, /*IncludeBool=*/false)) |
590 | return false; |
591 | if (Fast) |
592 | *Fast = 1; |
593 | return true; |
594 | } |
595 | |
596 | bool HexagonTargetLowering::allowsHvxMisalignedMemoryAccesses( |
597 | MVT VecTy, MachineMemOperand::Flags Flags, unsigned *Fast) const { |
598 | if (!Subtarget.isHVXVectorType(VecTy)) |
599 | return false; |
600 | // XXX Should this be false? vmemu are a bit slower than vmem. |
601 | if (Fast) |
602 | *Fast = 1; |
603 | return true; |
604 | } |
605 | |
606 | void HexagonTargetLowering::AdjustHvxInstrPostInstrSelection( |
607 | MachineInstr &MI, SDNode *Node) const { |
608 | unsigned Opc = MI.getOpcode(); |
609 | const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); |
610 | MachineBasicBlock &MB = *MI.getParent(); |
611 | MachineFunction &MF = *MB.getParent(); |
612 | MachineRegisterInfo &MRI = MF.getRegInfo(); |
613 | DebugLoc DL = MI.getDebugLoc(); |
614 | auto At = MI.getIterator(); |
615 | |
616 | switch (Opc) { |
617 | case Hexagon::PS_vsplatib: |
618 | if (Subtarget.useHVXV62Ops()) { |
619 | // SplatV = A2_tfrsi #imm |
620 | // OutV = V6_lvsplatb SplatV |
621 | Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass); |
622 | BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::A2_tfrsi), DestReg: SplatV) |
623 | .add(MO: MI.getOperand(i: 1)); |
624 | Register OutV = MI.getOperand(i: 0).getReg(); |
625 | BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplatb), DestReg: OutV) |
626 | .addReg(RegNo: SplatV); |
627 | } else { |
628 | // SplatV = A2_tfrsi #imm:#imm:#imm:#imm |
629 | // OutV = V6_lvsplatw SplatV |
630 | Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass); |
631 | const MachineOperand &InpOp = MI.getOperand(i: 1); |
632 | assert(InpOp.isImm()); |
633 | uint32_t V = InpOp.getImm() & 0xFF; |
634 | BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::A2_tfrsi), DestReg: SplatV) |
635 | .addImm(Val: V << 24 | V << 16 | V << 8 | V); |
636 | Register OutV = MI.getOperand(i: 0).getReg(); |
637 | BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplatw), DestReg: OutV).addReg(RegNo: SplatV); |
638 | } |
639 | MB.erase(I: At); |
640 | break; |
641 | case Hexagon::PS_vsplatrb: |
642 | if (Subtarget.useHVXV62Ops()) { |
643 | // OutV = V6_lvsplatb Inp |
644 | Register OutV = MI.getOperand(i: 0).getReg(); |
645 | BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplatb), DestReg: OutV) |
646 | .add(MO: MI.getOperand(i: 1)); |
647 | } else { |
648 | Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass); |
649 | const MachineOperand &InpOp = MI.getOperand(i: 1); |
650 | BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::S2_vsplatrb), DestReg: SplatV) |
651 | .addReg(RegNo: InpOp.getReg(), flags: 0, SubReg: InpOp.getSubReg()); |
652 | Register OutV = MI.getOperand(i: 0).getReg(); |
653 | BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplatw), DestReg: OutV) |
654 | .addReg(RegNo: SplatV); |
655 | } |
656 | MB.erase(I: At); |
657 | break; |
658 | case Hexagon::PS_vsplatih: |
659 | if (Subtarget.useHVXV62Ops()) { |
660 | // SplatV = A2_tfrsi #imm |
661 | // OutV = V6_lvsplath SplatV |
662 | Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass); |
663 | BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::A2_tfrsi), DestReg: SplatV) |
664 | .add(MO: MI.getOperand(i: 1)); |
665 | Register OutV = MI.getOperand(i: 0).getReg(); |
666 | BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplath), DestReg: OutV) |
667 | .addReg(RegNo: SplatV); |
668 | } else { |
669 | // SplatV = A2_tfrsi #imm:#imm |
670 | // OutV = V6_lvsplatw SplatV |
671 | Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass); |
672 | const MachineOperand &InpOp = MI.getOperand(i: 1); |
673 | assert(InpOp.isImm()); |
674 | uint32_t V = InpOp.getImm() & 0xFFFF; |
675 | BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::A2_tfrsi), DestReg: SplatV) |
676 | .addImm(Val: V << 16 | V); |
677 | Register OutV = MI.getOperand(i: 0).getReg(); |
678 | BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplatw), DestReg: OutV).addReg(RegNo: SplatV); |
679 | } |
680 | MB.erase(I: At); |
681 | break; |
682 | case Hexagon::PS_vsplatrh: |
683 | if (Subtarget.useHVXV62Ops()) { |
684 | // OutV = V6_lvsplath Inp |
685 | Register OutV = MI.getOperand(i: 0).getReg(); |
686 | BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplath), DestReg: OutV) |
687 | .add(MO: MI.getOperand(i: 1)); |
688 | } else { |
689 | // SplatV = A2_combine_ll Inp, Inp |
690 | // OutV = V6_lvsplatw SplatV |
691 | Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass); |
692 | const MachineOperand &InpOp = MI.getOperand(i: 1); |
693 | BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::A2_combine_ll), DestReg: SplatV) |
694 | .addReg(RegNo: InpOp.getReg(), flags: 0, SubReg: InpOp.getSubReg()) |
695 | .addReg(RegNo: InpOp.getReg(), flags: 0, SubReg: InpOp.getSubReg()); |
696 | Register OutV = MI.getOperand(i: 0).getReg(); |
697 | BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::V6_lvsplatw), DestReg: OutV).addReg(RegNo: SplatV); |
698 | } |
699 | MB.erase(I: At); |
700 | break; |
701 | case Hexagon::PS_vsplatiw: |
702 | case Hexagon::PS_vsplatrw: |
703 | if (Opc == Hexagon::PS_vsplatiw) { |
704 | // SplatV = A2_tfrsi #imm |
705 | Register SplatV = MRI.createVirtualRegister(RegClass: &Hexagon::IntRegsRegClass); |
706 | BuildMI(BB&: MB, I: At, MIMD: DL, MCID: TII.get(Opcode: Hexagon::A2_tfrsi), DestReg: SplatV) |
707 | .add(MO: MI.getOperand(i: 1)); |
708 | MI.getOperand(i: 1).ChangeToRegister(Reg: SplatV, isDef: false); |
709 | } |
710 | // OutV = V6_lvsplatw SplatV/Inp |
711 | MI.setDesc(TII.get(Opcode: Hexagon::V6_lvsplatw)); |
712 | break; |
713 | } |
714 | } |
715 | |
716 | SDValue |
717 | HexagonTargetLowering::convertToByteIndex(SDValue ElemIdx, MVT ElemTy, |
718 | SelectionDAG &DAG) const { |
719 | if (ElemIdx.getValueType().getSimpleVT() != MVT::i32) |
720 | ElemIdx = DAG.getBitcast(VT: MVT::i32, V: ElemIdx); |
721 | |
722 | unsigned ElemWidth = ElemTy.getSizeInBits(); |
723 | if (ElemWidth == 8) |
724 | return ElemIdx; |
725 | |
726 | unsigned L = Log2_32(Value: ElemWidth/8); |
727 | const SDLoc &dl(ElemIdx); |
728 | return DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: MVT::i32, |
729 | Ops: {ElemIdx, DAG.getConstant(Val: L, DL: dl, VT: MVT::i32)}); |
730 | } |
731 | |
732 | SDValue |
733 | HexagonTargetLowering::getIndexInWord32(SDValue Idx, MVT ElemTy, |
734 | SelectionDAG &DAG) const { |
735 | unsigned ElemWidth = ElemTy.getSizeInBits(); |
736 | assert(ElemWidth >= 8 && ElemWidth <= 32); |
737 | if (ElemWidth == 32) |
738 | return Idx; |
739 | |
740 | if (ty(Op: Idx) != MVT::i32) |
741 | Idx = DAG.getBitcast(VT: MVT::i32, V: Idx); |
742 | const SDLoc &dl(Idx); |
743 | SDValue Mask = DAG.getConstant(Val: 32/ElemWidth - 1, DL: dl, VT: MVT::i32); |
744 | SDValue SubIdx = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32, Ops: {Idx, Mask}); |
745 | return SubIdx; |
746 | } |
747 | |
748 | SDValue |
749 | HexagonTargetLowering::getByteShuffle(const SDLoc &dl, SDValue Op0, |
750 | SDValue Op1, ArrayRef<int> Mask, |
751 | SelectionDAG &DAG) const { |
752 | MVT OpTy = ty(Op: Op0); |
753 | assert(OpTy == ty(Op1)); |
754 | |
755 | MVT ElemTy = OpTy.getVectorElementType(); |
756 | if (ElemTy == MVT::i8) |
757 | return DAG.getVectorShuffle(VT: OpTy, dl, N1: Op0, N2: Op1, Mask); |
758 | assert(ElemTy.getSizeInBits() >= 8); |
759 | |
760 | MVT ResTy = tyVector(Ty: OpTy, ElemTy: MVT::i8); |
761 | unsigned ElemSize = ElemTy.getSizeInBits() / 8; |
762 | |
763 | SmallVector<int,128> ByteMask; |
764 | for (int M : Mask) { |
765 | if (M < 0) { |
766 | for (unsigned I = 0; I != ElemSize; ++I) |
767 | ByteMask.push_back(Elt: -1); |
768 | } else { |
769 | int NewM = M*ElemSize; |
770 | for (unsigned I = 0; I != ElemSize; ++I) |
771 | ByteMask.push_back(Elt: NewM+I); |
772 | } |
773 | } |
774 | assert(ResTy.getVectorNumElements() == ByteMask.size()); |
775 | return DAG.getVectorShuffle(VT: ResTy, dl, N1: opCastElem(Vec: Op0, ElemTy: MVT::i8, DAG), |
776 | N2: opCastElem(Vec: Op1, ElemTy: MVT::i8, DAG), Mask: ByteMask); |
777 | } |
778 | |
779 | SDValue |
780 | HexagonTargetLowering::buildHvxVectorReg(ArrayRef<SDValue> Values, |
781 | const SDLoc &dl, MVT VecTy, |
782 | SelectionDAG &DAG) const { |
783 | unsigned VecLen = Values.size(); |
784 | MachineFunction &MF = DAG.getMachineFunction(); |
785 | MVT ElemTy = VecTy.getVectorElementType(); |
786 | unsigned ElemWidth = ElemTy.getSizeInBits(); |
787 | unsigned HwLen = Subtarget.getVectorLength(); |
788 | |
789 | unsigned ElemSize = ElemWidth / 8; |
790 | assert(ElemSize*VecLen == HwLen); |
791 | SmallVector<SDValue,32> Words; |
792 | |
793 | if (VecTy.getVectorElementType() != MVT::i32 && |
794 | !(Subtarget.useHVXFloatingPoint() && |
795 | VecTy.getVectorElementType() == MVT::f32)) { |
796 | assert((ElemSize == 1 || ElemSize == 2) && "Invalid element size" ); |
797 | unsigned OpsPerWord = (ElemSize == 1) ? 4 : 2; |
798 | MVT PartVT = MVT::getVectorVT(VT: VecTy.getVectorElementType(), NumElements: OpsPerWord); |
799 | for (unsigned i = 0; i != VecLen; i += OpsPerWord) { |
800 | SDValue W = buildVector32(Elem: Values.slice(N: i, M: OpsPerWord), dl, VecTy: PartVT, DAG); |
801 | Words.push_back(Elt: DAG.getBitcast(VT: MVT::i32, V: W)); |
802 | } |
803 | } else { |
804 | for (SDValue V : Values) |
805 | Words.push_back(Elt: DAG.getBitcast(VT: MVT::i32, V)); |
806 | } |
807 | auto isSplat = [] (ArrayRef<SDValue> Values, SDValue &SplatV) { |
808 | unsigned NumValues = Values.size(); |
809 | assert(NumValues > 0); |
810 | bool IsUndef = true; |
811 | for (unsigned i = 0; i != NumValues; ++i) { |
812 | if (Values[i].isUndef()) |
813 | continue; |
814 | IsUndef = false; |
815 | if (!SplatV.getNode()) |
816 | SplatV = Values[i]; |
817 | else if (SplatV != Values[i]) |
818 | return false; |
819 | } |
820 | if (IsUndef) |
821 | SplatV = Values[0]; |
822 | return true; |
823 | }; |
824 | |
825 | unsigned NumWords = Words.size(); |
826 | SDValue SplatV; |
827 | bool IsSplat = isSplat(Words, SplatV); |
828 | if (IsSplat && isUndef(Op: SplatV)) |
829 | return DAG.getUNDEF(VT: VecTy); |
830 | if (IsSplat) { |
831 | assert(SplatV.getNode()); |
832 | if (isNullConstant(V: SplatV)) |
833 | return getZero(dl, Ty: VecTy, DAG); |
834 | MVT WordTy = MVT::getVectorVT(VT: MVT::i32, NumElements: HwLen/4); |
835 | SDValue S = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: WordTy, Operand: SplatV); |
836 | return DAG.getBitcast(VT: VecTy, V: S); |
837 | } |
838 | |
839 | // Delay recognizing constant vectors until here, so that we can generate |
840 | // a vsplat. |
841 | SmallVector<ConstantInt*, 128> Consts(VecLen); |
842 | bool AllConst = getBuildVectorConstInts(Values, VecTy, DAG, Consts); |
843 | if (AllConst) { |
844 | ArrayRef<Constant*> Tmp((Constant**)Consts.begin(), |
845 | (Constant**)Consts.end()); |
846 | Constant *CV = ConstantVector::get(V: Tmp); |
847 | Align Alignment(HwLen); |
848 | SDValue CP = |
849 | LowerConstantPool(Op: DAG.getConstantPool(C: CV, VT: VecTy, Align: Alignment), DAG); |
850 | return DAG.getLoad(VT: VecTy, dl, Chain: DAG.getEntryNode(), Ptr: CP, |
851 | PtrInfo: MachinePointerInfo::getConstantPool(MF), Alignment); |
852 | } |
853 | |
854 | // A special case is a situation where the vector is built entirely from |
855 | // elements extracted from another vector. This could be done via a shuffle |
856 | // more efficiently, but typically, the size of the source vector will not |
857 | // match the size of the vector being built (which precludes the use of a |
858 | // shuffle directly). |
859 | // This only handles a single source vector, and the vector being built |
860 | // should be of a sub-vector type of the source vector type. |
861 | auto = [this,&Values] (SDValue &SrcVec, |
862 | SmallVectorImpl<int> &SrcIdx) { |
863 | SDValue Vec; |
864 | for (SDValue V : Values) { |
865 | if (isUndef(Op: V)) { |
866 | SrcIdx.push_back(Elt: -1); |
867 | continue; |
868 | } |
869 | if (V.getOpcode() != ISD::EXTRACT_VECTOR_ELT) |
870 | return false; |
871 | // All extracts should come from the same vector. |
872 | SDValue T = V.getOperand(i: 0); |
873 | if (Vec.getNode() != nullptr && T.getNode() != Vec.getNode()) |
874 | return false; |
875 | Vec = T; |
876 | ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val: V.getOperand(i: 1)); |
877 | if (C == nullptr) |
878 | return false; |
879 | int I = C->getSExtValue(); |
880 | assert(I >= 0 && "Negative element index" ); |
881 | SrcIdx.push_back(Elt: I); |
882 | } |
883 | SrcVec = Vec; |
884 | return true; |
885 | }; |
886 | |
887 | SmallVector<int,128> ExtIdx; |
888 | SDValue ExtVec; |
889 | if (IsBuildFromExtracts(ExtVec, ExtIdx)) { |
890 | MVT ExtTy = ty(Op: ExtVec); |
891 | unsigned ExtLen = ExtTy.getVectorNumElements(); |
892 | if (ExtLen == VecLen || ExtLen == 2*VecLen) { |
893 | // Construct a new shuffle mask that will produce a vector with the same |
894 | // number of elements as the input vector, and such that the vector we |
895 | // want will be the initial subvector of it. |
896 | SmallVector<int,128> Mask; |
897 | BitVector Used(ExtLen); |
898 | |
899 | for (int M : ExtIdx) { |
900 | Mask.push_back(Elt: M); |
901 | if (M >= 0) |
902 | Used.set(M); |
903 | } |
904 | // Fill the rest of the mask with the unused elements of ExtVec in hopes |
905 | // that it will result in a permutation of ExtVec's elements. It's still |
906 | // fine if it doesn't (e.g. if undefs are present, or elements are |
907 | // repeated), but permutations can always be done efficiently via vdelta |
908 | // and vrdelta. |
909 | for (unsigned I = 0; I != ExtLen; ++I) { |
910 | if (Mask.size() == ExtLen) |
911 | break; |
912 | if (!Used.test(Idx: I)) |
913 | Mask.push_back(Elt: I); |
914 | } |
915 | |
916 | SDValue S = DAG.getVectorShuffle(VT: ExtTy, dl, N1: ExtVec, |
917 | N2: DAG.getUNDEF(VT: ExtTy), Mask); |
918 | return ExtLen == VecLen ? S : LoHalf(V: S, DAG); |
919 | } |
920 | } |
921 | |
922 | // Find most common element to initialize vector with. This is to avoid |
923 | // unnecessary vinsert/valign for cases where the same value is present |
924 | // many times. Creates a histogram of the vector's elements to find the |
925 | // most common element n. |
926 | assert(4*Words.size() == Subtarget.getVectorLength()); |
927 | int VecHist[32]; |
928 | int n = 0; |
929 | for (unsigned i = 0; i != NumWords; ++i) { |
930 | VecHist[i] = 0; |
931 | if (Words[i].isUndef()) |
932 | continue; |
933 | for (unsigned j = i; j != NumWords; ++j) |
934 | if (Words[i] == Words[j]) |
935 | VecHist[i]++; |
936 | |
937 | if (VecHist[i] > VecHist[n]) |
938 | n = i; |
939 | } |
940 | |
941 | SDValue HalfV = getZero(dl, Ty: VecTy, DAG); |
942 | if (VecHist[n] > 1) { |
943 | SDValue SplatV = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: VecTy, Operand: Words[n]); |
944 | HalfV = DAG.getNode(Opcode: HexagonISD::VALIGN, DL: dl, VT: VecTy, |
945 | Ops: {HalfV, SplatV, DAG.getConstant(Val: HwLen/2, DL: dl, VT: MVT::i32)}); |
946 | } |
947 | SDValue HalfV0 = HalfV; |
948 | SDValue HalfV1 = HalfV; |
949 | |
950 | // Construct two halves in parallel, then or them together. Rn and Rm count |
951 | // number of rotations needed before the next element. One last rotation is |
952 | // performed post-loop to position the last element. |
953 | int Rn = 0, Rm = 0; |
954 | SDValue Sn, Sm; |
955 | SDValue N = HalfV0; |
956 | SDValue M = HalfV1; |
957 | for (unsigned i = 0; i != NumWords/2; ++i) { |
958 | // Rotate by element count since last insertion. |
959 | if (Words[i] != Words[n] || VecHist[n] <= 1) { |
960 | Sn = DAG.getConstant(Val: Rn, DL: dl, VT: MVT::i32); |
961 | HalfV0 = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {N, Sn}); |
962 | N = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: VecTy, |
963 | Ops: {HalfV0, Words[i]}); |
964 | Rn = 0; |
965 | } |
966 | if (Words[i+NumWords/2] != Words[n] || VecHist[n] <= 1) { |
967 | Sm = DAG.getConstant(Val: Rm, DL: dl, VT: MVT::i32); |
968 | HalfV1 = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {M, Sm}); |
969 | M = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: VecTy, |
970 | Ops: {HalfV1, Words[i+NumWords/2]}); |
971 | Rm = 0; |
972 | } |
973 | Rn += 4; |
974 | Rm += 4; |
975 | } |
976 | // Perform last rotation. |
977 | Sn = DAG.getConstant(Val: Rn+HwLen/2, DL: dl, VT: MVT::i32); |
978 | Sm = DAG.getConstant(Val: Rm, DL: dl, VT: MVT::i32); |
979 | HalfV0 = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {N, Sn}); |
980 | HalfV1 = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {M, Sm}); |
981 | |
982 | SDValue T0 = DAG.getBitcast(VT: tyVector(Ty: VecTy, ElemTy: MVT::i32), V: HalfV0); |
983 | SDValue T1 = DAG.getBitcast(VT: tyVector(Ty: VecTy, ElemTy: MVT::i32), V: HalfV1); |
984 | |
985 | SDValue DstV = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: ty(Op: T0), Ops: {T0, T1}); |
986 | |
987 | SDValue OutV = |
988 | DAG.getBitcast(VT: tyVector(Ty: ty(Op: DstV), ElemTy: VecTy.getVectorElementType()), V: DstV); |
989 | return OutV; |
990 | } |
991 | |
992 | SDValue |
993 | HexagonTargetLowering::createHvxPrefixPred(SDValue PredV, const SDLoc &dl, |
994 | unsigned BitBytes, bool ZeroFill, SelectionDAG &DAG) const { |
995 | MVT PredTy = ty(Op: PredV); |
996 | unsigned HwLen = Subtarget.getVectorLength(); |
997 | MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen); |
998 | |
999 | if (Subtarget.isHVXVectorType(VecTy: PredTy, IncludeBool: true)) { |
1000 | // Move the vector predicate SubV to a vector register, and scale it |
1001 | // down to match the representation (bytes per type element) that VecV |
1002 | // uses. The scaling down will pick every 2nd or 4th (every Scale-th |
1003 | // in general) element and put them at the front of the resulting |
1004 | // vector. This subvector will then be inserted into the Q2V of VecV. |
1005 | // To avoid having an operation that generates an illegal type (short |
1006 | // vector), generate a full size vector. |
1007 | // |
1008 | SDValue T = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: PredV); |
1009 | SmallVector<int,128> Mask(HwLen); |
1010 | // Scale = BitBytes(PredV) / Given BitBytes. |
1011 | unsigned Scale = HwLen / (PredTy.getVectorNumElements() * BitBytes); |
1012 | unsigned BlockLen = PredTy.getVectorNumElements() * BitBytes; |
1013 | |
1014 | for (unsigned i = 0; i != HwLen; ++i) { |
1015 | unsigned Num = i % Scale; |
1016 | unsigned Off = i / Scale; |
1017 | Mask[BlockLen*Num + Off] = i; |
1018 | } |
1019 | SDValue S = DAG.getVectorShuffle(VT: ByteTy, dl, N1: T, N2: DAG.getUNDEF(VT: ByteTy), Mask); |
1020 | if (!ZeroFill) |
1021 | return S; |
1022 | // Fill the bytes beyond BlockLen with 0s. |
1023 | // V6_pred_scalar2 cannot fill the entire predicate, so it only works |
1024 | // when BlockLen < HwLen. |
1025 | assert(BlockLen < HwLen && "vsetq(v1) prerequisite" ); |
1026 | MVT BoolTy = MVT::getVectorVT(VT: MVT::i1, NumElements: HwLen); |
1027 | SDValue Q = getInstr(MachineOpc: Hexagon::V6_pred_scalar2, dl, Ty: BoolTy, |
1028 | Ops: {DAG.getConstant(Val: BlockLen, DL: dl, VT: MVT::i32)}, DAG); |
1029 | SDValue M = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: Q); |
1030 | return DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ByteTy, N1: S, N2: M); |
1031 | } |
1032 | |
1033 | // Make sure that this is a valid scalar predicate. |
1034 | assert(PredTy == MVT::v2i1 || PredTy == MVT::v4i1 || PredTy == MVT::v8i1); |
1035 | |
1036 | unsigned Bytes = 8 / PredTy.getVectorNumElements(); |
1037 | SmallVector<SDValue,4> Words[2]; |
1038 | unsigned IdxW = 0; |
1039 | |
1040 | SDValue W0 = isUndef(Op: PredV) |
1041 | ? DAG.getUNDEF(VT: MVT::i64) |
1042 | : DAG.getNode(Opcode: HexagonISD::P2D, DL: dl, VT: MVT::i64, Operand: PredV); |
1043 | Words[IdxW].push_back(Elt: HiHalf(V: W0, DAG)); |
1044 | Words[IdxW].push_back(Elt: LoHalf(V: W0, DAG)); |
1045 | |
1046 | while (Bytes < BitBytes) { |
1047 | IdxW ^= 1; |
1048 | Words[IdxW].clear(); |
1049 | |
1050 | if (Bytes < 4) { |
1051 | for (const SDValue &W : Words[IdxW ^ 1]) { |
1052 | SDValue T = expandPredicate(Vec32: W, dl, DAG); |
1053 | Words[IdxW].push_back(Elt: HiHalf(V: T, DAG)); |
1054 | Words[IdxW].push_back(Elt: LoHalf(V: T, DAG)); |
1055 | } |
1056 | } else { |
1057 | for (const SDValue &W : Words[IdxW ^ 1]) { |
1058 | Words[IdxW].push_back(Elt: W); |
1059 | Words[IdxW].push_back(Elt: W); |
1060 | } |
1061 | } |
1062 | Bytes *= 2; |
1063 | } |
1064 | |
1065 | assert(Bytes == BitBytes); |
1066 | |
1067 | SDValue Vec = ZeroFill ? getZero(dl, Ty: ByteTy, DAG) : DAG.getUNDEF(VT: ByteTy); |
1068 | SDValue S4 = DAG.getConstant(Val: HwLen-4, DL: dl, VT: MVT::i32); |
1069 | for (const SDValue &W : Words[IdxW]) { |
1070 | Vec = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: ByteTy, N1: Vec, N2: S4); |
1071 | Vec = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: ByteTy, N1: Vec, N2: W); |
1072 | } |
1073 | |
1074 | return Vec; |
1075 | } |
1076 | |
1077 | SDValue |
1078 | HexagonTargetLowering::buildHvxVectorPred(ArrayRef<SDValue> Values, |
1079 | const SDLoc &dl, MVT VecTy, |
1080 | SelectionDAG &DAG) const { |
1081 | // Construct a vector V of bytes, such that a comparison V >u 0 would |
1082 | // produce the required vector predicate. |
1083 | unsigned VecLen = Values.size(); |
1084 | unsigned HwLen = Subtarget.getVectorLength(); |
1085 | assert(VecLen <= HwLen || VecLen == 8*HwLen); |
1086 | SmallVector<SDValue,128> Bytes; |
1087 | bool AllT = true, AllF = true; |
1088 | |
1089 | auto IsTrue = [] (SDValue V) { |
1090 | if (const auto *N = dyn_cast<ConstantSDNode>(Val: V.getNode())) |
1091 | return !N->isZero(); |
1092 | return false; |
1093 | }; |
1094 | auto IsFalse = [] (SDValue V) { |
1095 | if (const auto *N = dyn_cast<ConstantSDNode>(Val: V.getNode())) |
1096 | return N->isZero(); |
1097 | return false; |
1098 | }; |
1099 | |
1100 | if (VecLen <= HwLen) { |
1101 | // In the hardware, each bit of a vector predicate corresponds to a byte |
1102 | // of a vector register. Calculate how many bytes does a bit of VecTy |
1103 | // correspond to. |
1104 | assert(HwLen % VecLen == 0); |
1105 | unsigned BitBytes = HwLen / VecLen; |
1106 | for (SDValue V : Values) { |
1107 | AllT &= IsTrue(V); |
1108 | AllF &= IsFalse(V); |
1109 | |
1110 | SDValue Ext = !V.isUndef() ? DAG.getZExtOrTrunc(Op: V, DL: dl, VT: MVT::i8) |
1111 | : DAG.getUNDEF(VT: MVT::i8); |
1112 | for (unsigned B = 0; B != BitBytes; ++B) |
1113 | Bytes.push_back(Elt: Ext); |
1114 | } |
1115 | } else { |
1116 | // There are as many i1 values, as there are bits in a vector register. |
1117 | // Divide the values into groups of 8 and check that each group consists |
1118 | // of the same value (ignoring undefs). |
1119 | for (unsigned I = 0; I != VecLen; I += 8) { |
1120 | unsigned B = 0; |
1121 | // Find the first non-undef value in this group. |
1122 | for (; B != 8; ++B) { |
1123 | if (!Values[I+B].isUndef()) |
1124 | break; |
1125 | } |
1126 | SDValue F = Values[I+B]; |
1127 | AllT &= IsTrue(F); |
1128 | AllF &= IsFalse(F); |
1129 | |
1130 | SDValue Ext = (B < 8) ? DAG.getZExtOrTrunc(Op: F, DL: dl, VT: MVT::i8) |
1131 | : DAG.getUNDEF(VT: MVT::i8); |
1132 | Bytes.push_back(Elt: Ext); |
1133 | // Verify that the rest of values in the group are the same as the |
1134 | // first. |
1135 | for (; B != 8; ++B) |
1136 | assert(Values[I+B].isUndef() || Values[I+B] == F); |
1137 | } |
1138 | } |
1139 | |
1140 | if (AllT) |
1141 | return DAG.getNode(Opcode: HexagonISD::QTRUE, DL: dl, VT: VecTy); |
1142 | if (AllF) |
1143 | return DAG.getNode(Opcode: HexagonISD::QFALSE, DL: dl, VT: VecTy); |
1144 | |
1145 | MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen); |
1146 | SDValue ByteVec = buildHvxVectorReg(Values: Bytes, dl, VecTy: ByteTy, DAG); |
1147 | return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: VecTy, Operand: ByteVec); |
1148 | } |
1149 | |
1150 | SDValue |
1151 | HexagonTargetLowering::(SDValue VecV, SDValue IdxV, |
1152 | const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const { |
1153 | MVT ElemTy = ty(Op: VecV).getVectorElementType(); |
1154 | |
1155 | unsigned ElemWidth = ElemTy.getSizeInBits(); |
1156 | assert(ElemWidth >= 8 && ElemWidth <= 32); |
1157 | (void)ElemWidth; |
1158 | |
1159 | SDValue ByteIdx = convertToByteIndex(ElemIdx: IdxV, ElemTy, DAG); |
1160 | SDValue ExWord = DAG.getNode(Opcode: HexagonISD::VEXTRACTW, DL: dl, VT: MVT::i32, |
1161 | Ops: {VecV, ByteIdx}); |
1162 | if (ElemTy == MVT::i32) |
1163 | return ExWord; |
1164 | |
1165 | // Have an extracted word, need to extract the smaller element out of it. |
1166 | // 1. Extract the bits of (the original) IdxV that correspond to the index |
1167 | // of the desired element in the 32-bit word. |
1168 | SDValue SubIdx = getIndexInWord32(Idx: IdxV, ElemTy, DAG); |
1169 | // 2. Extract the element from the word. |
1170 | SDValue ExVec = DAG.getBitcast(VT: tyVector(Ty: ty(Op: ExWord), ElemTy), V: ExWord); |
1171 | return extractVector(VecV: ExVec, IdxV: SubIdx, dl, ValTy: ElemTy, ResTy: MVT::i32, DAG); |
1172 | } |
1173 | |
1174 | SDValue |
1175 | HexagonTargetLowering::(SDValue VecV, SDValue IdxV, |
1176 | const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const { |
1177 | // Implement other return types if necessary. |
1178 | assert(ResTy == MVT::i1); |
1179 | |
1180 | unsigned HwLen = Subtarget.getVectorLength(); |
1181 | MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen); |
1182 | SDValue ByteVec = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: VecV); |
1183 | |
1184 | unsigned Scale = HwLen / ty(Op: VecV).getVectorNumElements(); |
1185 | SDValue ScV = DAG.getConstant(Val: Scale, DL: dl, VT: MVT::i32); |
1186 | IdxV = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MVT::i32, N1: IdxV, N2: ScV); |
1187 | |
1188 | SDValue ExtB = extractHvxElementReg(VecV: ByteVec, IdxV, dl, ResTy: MVT::i32, DAG); |
1189 | SDValue Zero = DAG.getTargetConstant(Val: 0, DL: dl, VT: MVT::i32); |
1190 | return getInstr(MachineOpc: Hexagon::C2_cmpgtui, dl, Ty: MVT::i1, Ops: {ExtB, Zero}, DAG); |
1191 | } |
1192 | |
1193 | SDValue |
1194 | HexagonTargetLowering::insertHvxElementReg(SDValue VecV, SDValue IdxV, |
1195 | SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const { |
1196 | MVT ElemTy = ty(Op: VecV).getVectorElementType(); |
1197 | |
1198 | unsigned ElemWidth = ElemTy.getSizeInBits(); |
1199 | assert(ElemWidth >= 8 && ElemWidth <= 32); |
1200 | (void)ElemWidth; |
1201 | |
1202 | auto InsertWord = [&DAG,&dl,this] (SDValue VecV, SDValue ValV, |
1203 | SDValue ByteIdxV) { |
1204 | MVT VecTy = ty(Op: VecV); |
1205 | unsigned HwLen = Subtarget.getVectorLength(); |
1206 | SDValue MaskV = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32, |
1207 | Ops: {ByteIdxV, DAG.getConstant(Val: -4, DL: dl, VT: MVT::i32)}); |
1208 | SDValue RotV = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {VecV, MaskV}); |
1209 | SDValue InsV = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: VecTy, Ops: {RotV, ValV}); |
1210 | SDValue SubV = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: MVT::i32, |
1211 | Ops: {DAG.getConstant(Val: HwLen, DL: dl, VT: MVT::i32), MaskV}); |
1212 | SDValue TorV = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: VecTy, Ops: {InsV, SubV}); |
1213 | return TorV; |
1214 | }; |
1215 | |
1216 | SDValue ByteIdx = convertToByteIndex(ElemIdx: IdxV, ElemTy, DAG); |
1217 | if (ElemTy == MVT::i32) |
1218 | return InsertWord(VecV, ValV, ByteIdx); |
1219 | |
1220 | // If this is not inserting a 32-bit word, convert it into such a thing. |
1221 | // 1. Extract the existing word from the target vector. |
1222 | SDValue WordIdx = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: MVT::i32, |
1223 | Ops: {ByteIdx, DAG.getConstant(Val: 2, DL: dl, VT: MVT::i32)}); |
1224 | SDValue Ext = extractHvxElementReg(VecV: opCastElem(Vec: VecV, ElemTy: MVT::i32, DAG), IdxV: WordIdx, |
1225 | dl, ResTy: MVT::i32, DAG); |
1226 | |
1227 | // 2. Treating the extracted word as a 32-bit vector, insert the given |
1228 | // value into it. |
1229 | SDValue SubIdx = getIndexInWord32(Idx: IdxV, ElemTy, DAG); |
1230 | MVT SubVecTy = tyVector(Ty: ty(Op: Ext), ElemTy); |
1231 | SDValue Ins = insertVector(VecV: DAG.getBitcast(VT: SubVecTy, V: Ext), |
1232 | ValV, IdxV: SubIdx, dl, ValTy: ElemTy, DAG); |
1233 | |
1234 | // 3. Insert the 32-bit word back into the original vector. |
1235 | return InsertWord(VecV, Ins, ByteIdx); |
1236 | } |
1237 | |
1238 | SDValue |
1239 | HexagonTargetLowering::insertHvxElementPred(SDValue VecV, SDValue IdxV, |
1240 | SDValue ValV, const SDLoc &dl, SelectionDAG &DAG) const { |
1241 | unsigned HwLen = Subtarget.getVectorLength(); |
1242 | MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen); |
1243 | SDValue ByteVec = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: VecV); |
1244 | |
1245 | unsigned Scale = HwLen / ty(Op: VecV).getVectorNumElements(); |
1246 | SDValue ScV = DAG.getConstant(Val: Scale, DL: dl, VT: MVT::i32); |
1247 | IdxV = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MVT::i32, N1: IdxV, N2: ScV); |
1248 | ValV = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: dl, VT: MVT::i32, Operand: ValV); |
1249 | |
1250 | SDValue InsV = insertHvxElementReg(VecV: ByteVec, IdxV, ValV, dl, DAG); |
1251 | return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: ty(Op: VecV), Operand: InsV); |
1252 | } |
1253 | |
1254 | SDValue |
1255 | HexagonTargetLowering::(SDValue OrigOp, SDValue VecV, |
1256 | SDValue IdxV, const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const { |
1257 | MVT VecTy = ty(Op: VecV); |
1258 | unsigned HwLen = Subtarget.getVectorLength(); |
1259 | unsigned Idx = IdxV.getNode()->getAsZExtVal(); |
1260 | MVT ElemTy = VecTy.getVectorElementType(); |
1261 | unsigned ElemWidth = ElemTy.getSizeInBits(); |
1262 | |
1263 | // If the source vector is a vector pair, get the single vector containing |
1264 | // the subvector of interest. The subvector will never overlap two single |
1265 | // vectors. |
1266 | if (isHvxPairTy(Ty: VecTy)) { |
1267 | if (Idx * ElemWidth >= 8*HwLen) |
1268 | Idx -= VecTy.getVectorNumElements() / 2; |
1269 | |
1270 | VecV = OrigOp; |
1271 | if (typeSplit(VecTy).first == ResTy) |
1272 | return VecV; |
1273 | } |
1274 | |
1275 | // The only meaningful subvectors of a single HVX vector are those that |
1276 | // fit in a scalar register. |
1277 | assert(ResTy.getSizeInBits() == 32 || ResTy.getSizeInBits() == 64); |
1278 | |
1279 | MVT WordTy = tyVector(Ty: VecTy, ElemTy: MVT::i32); |
1280 | SDValue WordVec = DAG.getBitcast(VT: WordTy, V: VecV); |
1281 | unsigned WordIdx = (Idx*ElemWidth) / 32; |
1282 | |
1283 | SDValue W0Idx = DAG.getConstant(Val: WordIdx, DL: dl, VT: MVT::i32); |
1284 | SDValue W0 = extractHvxElementReg(VecV: WordVec, IdxV: W0Idx, dl, ResTy: MVT::i32, DAG); |
1285 | if (ResTy.getSizeInBits() == 32) |
1286 | return DAG.getBitcast(VT: ResTy, V: W0); |
1287 | |
1288 | SDValue W1Idx = DAG.getConstant(Val: WordIdx+1, DL: dl, VT: MVT::i32); |
1289 | SDValue W1 = extractHvxElementReg(VecV: WordVec, IdxV: W1Idx, dl, ResTy: MVT::i32, DAG); |
1290 | SDValue WW = getCombine(Hi: W1, Lo: W0, dl, ResTy: MVT::i64, DAG); |
1291 | return DAG.getBitcast(VT: ResTy, V: WW); |
1292 | } |
1293 | |
1294 | SDValue |
1295 | HexagonTargetLowering::(SDValue VecV, SDValue IdxV, |
1296 | const SDLoc &dl, MVT ResTy, SelectionDAG &DAG) const { |
1297 | MVT VecTy = ty(Op: VecV); |
1298 | unsigned HwLen = Subtarget.getVectorLength(); |
1299 | MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen); |
1300 | SDValue ByteVec = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: VecV); |
1301 | // IdxV is required to be a constant. |
1302 | unsigned Idx = IdxV.getNode()->getAsZExtVal(); |
1303 | |
1304 | unsigned ResLen = ResTy.getVectorNumElements(); |
1305 | unsigned BitBytes = HwLen / VecTy.getVectorNumElements(); |
1306 | unsigned Offset = Idx * BitBytes; |
1307 | SDValue Undef = DAG.getUNDEF(VT: ByteTy); |
1308 | SmallVector<int,128> Mask; |
1309 | |
1310 | if (Subtarget.isHVXVectorType(VecTy: ResTy, IncludeBool: true)) { |
1311 | // Converting between two vector predicates. Since the result is shorter |
1312 | // than the source, it will correspond to a vector predicate with the |
1313 | // relevant bits replicated. The replication count is the ratio of the |
1314 | // source and target vector lengths. |
1315 | unsigned Rep = VecTy.getVectorNumElements() / ResLen; |
1316 | assert(isPowerOf2_32(Rep) && HwLen % Rep == 0); |
1317 | for (unsigned i = 0; i != HwLen/Rep; ++i) { |
1318 | for (unsigned j = 0; j != Rep; ++j) |
1319 | Mask.push_back(Elt: i + Offset); |
1320 | } |
1321 | SDValue ShuffV = DAG.getVectorShuffle(VT: ByteTy, dl, N1: ByteVec, N2: Undef, Mask); |
1322 | return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: ResTy, Operand: ShuffV); |
1323 | } |
1324 | |
1325 | // Converting between a vector predicate and a scalar predicate. In the |
1326 | // vector predicate, a group of BitBytes bits will correspond to a single |
1327 | // i1 element of the source vector type. Those bits will all have the same |
1328 | // value. The same will be true for ByteVec, where each byte corresponds |
1329 | // to a bit in the vector predicate. |
1330 | // The algorithm is to traverse the ByteVec, going over the i1 values from |
1331 | // the source vector, and generate the corresponding representation in an |
1332 | // 8-byte vector. To avoid repeated extracts from ByteVec, shuffle the |
1333 | // elements so that the interesting 8 bytes will be in the low end of the |
1334 | // vector. |
1335 | unsigned Rep = 8 / ResLen; |
1336 | // Make sure the output fill the entire vector register, so repeat the |
1337 | // 8-byte groups as many times as necessary. |
1338 | for (unsigned r = 0; r != HwLen/ResLen; ++r) { |
1339 | // This will generate the indexes of the 8 interesting bytes. |
1340 | for (unsigned i = 0; i != ResLen; ++i) { |
1341 | for (unsigned j = 0; j != Rep; ++j) |
1342 | Mask.push_back(Elt: Offset + i*BitBytes); |
1343 | } |
1344 | } |
1345 | |
1346 | SDValue Zero = getZero(dl, Ty: MVT::i32, DAG); |
1347 | SDValue ShuffV = DAG.getVectorShuffle(VT: ByteTy, dl, N1: ByteVec, N2: Undef, Mask); |
1348 | // Combine the two low words from ShuffV into a v8i8, and byte-compare |
1349 | // them against 0. |
1350 | SDValue W0 = DAG.getNode(Opcode: HexagonISD::VEXTRACTW, DL: dl, VT: MVT::i32, Ops: {ShuffV, Zero}); |
1351 | SDValue W1 = DAG.getNode(Opcode: HexagonISD::VEXTRACTW, DL: dl, VT: MVT::i32, |
1352 | Ops: {ShuffV, DAG.getConstant(Val: 4, DL: dl, VT: MVT::i32)}); |
1353 | SDValue Vec64 = getCombine(Hi: W1, Lo: W0, dl, ResTy: MVT::v8i8, DAG); |
1354 | return getInstr(MachineOpc: Hexagon::A4_vcmpbgtui, dl, Ty: ResTy, |
1355 | Ops: {Vec64, DAG.getTargetConstant(Val: 0, DL: dl, VT: MVT::i32)}, DAG); |
1356 | } |
1357 | |
1358 | SDValue |
1359 | HexagonTargetLowering::insertHvxSubvectorReg(SDValue VecV, SDValue SubV, |
1360 | SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const { |
1361 | MVT VecTy = ty(Op: VecV); |
1362 | MVT SubTy = ty(Op: SubV); |
1363 | unsigned HwLen = Subtarget.getVectorLength(); |
1364 | MVT ElemTy = VecTy.getVectorElementType(); |
1365 | unsigned ElemWidth = ElemTy.getSizeInBits(); |
1366 | |
1367 | bool IsPair = isHvxPairTy(Ty: VecTy); |
1368 | MVT SingleTy = MVT::getVectorVT(VT: ElemTy, NumElements: (8*HwLen)/ElemWidth); |
1369 | // The two single vectors that VecV consists of, if it's a pair. |
1370 | SDValue V0, V1; |
1371 | SDValue SingleV = VecV; |
1372 | SDValue PickHi; |
1373 | |
1374 | if (IsPair) { |
1375 | V0 = LoHalf(V: VecV, DAG); |
1376 | V1 = HiHalf(V: VecV, DAG); |
1377 | |
1378 | SDValue HalfV = DAG.getConstant(Val: SingleTy.getVectorNumElements(), |
1379 | DL: dl, VT: MVT::i32); |
1380 | PickHi = DAG.getSetCC(DL: dl, VT: MVT::i1, LHS: IdxV, RHS: HalfV, Cond: ISD::SETUGT); |
1381 | if (isHvxSingleTy(Ty: SubTy)) { |
1382 | if (const auto *CN = dyn_cast<const ConstantSDNode>(Val: IdxV.getNode())) { |
1383 | unsigned Idx = CN->getZExtValue(); |
1384 | assert(Idx == 0 || Idx == VecTy.getVectorNumElements()/2); |
1385 | unsigned SubIdx = (Idx == 0) ? Hexagon::vsub_lo : Hexagon::vsub_hi; |
1386 | return DAG.getTargetInsertSubreg(SRIdx: SubIdx, DL: dl, VT: VecTy, Operand: VecV, Subreg: SubV); |
1387 | } |
1388 | // If IdxV is not a constant, generate the two variants: with the |
1389 | // SubV as the high and as the low subregister, and select the right |
1390 | // pair based on the IdxV. |
1391 | SDValue InLo = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: VecTy, Ops: {SubV, V1}); |
1392 | SDValue InHi = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: VecTy, Ops: {V0, SubV}); |
1393 | return DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: VecTy, N1: PickHi, N2: InHi, N3: InLo); |
1394 | } |
1395 | // The subvector being inserted must be entirely contained in one of |
1396 | // the vectors V0 or V1. Set SingleV to the correct one, and update |
1397 | // IdxV to be the index relative to the beginning of that vector. |
1398 | SDValue S = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: MVT::i32, N1: IdxV, N2: HalfV); |
1399 | IdxV = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: MVT::i32, N1: PickHi, N2: S, N3: IdxV); |
1400 | SingleV = DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: SingleTy, N1: PickHi, N2: V1, N3: V0); |
1401 | } |
1402 | |
1403 | // The only meaningful subvectors of a single HVX vector are those that |
1404 | // fit in a scalar register. |
1405 | assert(SubTy.getSizeInBits() == 32 || SubTy.getSizeInBits() == 64); |
1406 | // Convert IdxV to be index in bytes. |
1407 | auto *IdxN = dyn_cast<ConstantSDNode>(Val: IdxV.getNode()); |
1408 | if (!IdxN || !IdxN->isZero()) { |
1409 | IdxV = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MVT::i32, N1: IdxV, |
1410 | N2: DAG.getConstant(Val: ElemWidth/8, DL: dl, VT: MVT::i32)); |
1411 | SingleV = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: SingleTy, N1: SingleV, N2: IdxV); |
1412 | } |
1413 | // When inserting a single word, the rotation back to the original position |
1414 | // would be by HwLen-Idx, but if two words are inserted, it will need to be |
1415 | // by (HwLen-4)-Idx. |
1416 | unsigned RolBase = HwLen; |
1417 | if (SubTy.getSizeInBits() == 32) { |
1418 | SDValue V = DAG.getBitcast(VT: MVT::i32, V: SubV); |
1419 | SingleV = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: SingleTy, N1: SingleV, N2: V); |
1420 | } else { |
1421 | SDValue V = DAG.getBitcast(VT: MVT::i64, V: SubV); |
1422 | SDValue R0 = LoHalf(V, DAG); |
1423 | SDValue R1 = HiHalf(V, DAG); |
1424 | SingleV = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: SingleTy, N1: SingleV, N2: R0); |
1425 | SingleV = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: SingleTy, N1: SingleV, |
1426 | N2: DAG.getConstant(Val: 4, DL: dl, VT: MVT::i32)); |
1427 | SingleV = DAG.getNode(Opcode: HexagonISD::VINSERTW0, DL: dl, VT: SingleTy, N1: SingleV, N2: R1); |
1428 | RolBase = HwLen-4; |
1429 | } |
1430 | // If the vector wasn't ror'ed, don't ror it back. |
1431 | if (RolBase != 4 || !IdxN || !IdxN->isZero()) { |
1432 | SDValue RolV = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: MVT::i32, |
1433 | N1: DAG.getConstant(Val: RolBase, DL: dl, VT: MVT::i32), N2: IdxV); |
1434 | SingleV = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: SingleTy, N1: SingleV, N2: RolV); |
1435 | } |
1436 | |
1437 | if (IsPair) { |
1438 | SDValue InLo = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: VecTy, Ops: {SingleV, V1}); |
1439 | SDValue InHi = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: VecTy, Ops: {V0, SingleV}); |
1440 | return DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: VecTy, N1: PickHi, N2: InHi, N3: InLo); |
1441 | } |
1442 | return SingleV; |
1443 | } |
1444 | |
1445 | SDValue |
1446 | HexagonTargetLowering::insertHvxSubvectorPred(SDValue VecV, SDValue SubV, |
1447 | SDValue IdxV, const SDLoc &dl, SelectionDAG &DAG) const { |
1448 | MVT VecTy = ty(Op: VecV); |
1449 | MVT SubTy = ty(Op: SubV); |
1450 | assert(Subtarget.isHVXVectorType(VecTy, true)); |
1451 | // VecV is an HVX vector predicate. SubV may be either an HVX vector |
1452 | // predicate as well, or it can be a scalar predicate. |
1453 | |
1454 | unsigned VecLen = VecTy.getVectorNumElements(); |
1455 | unsigned HwLen = Subtarget.getVectorLength(); |
1456 | assert(HwLen % VecLen == 0 && "Unexpected vector type" ); |
1457 | |
1458 | unsigned Scale = VecLen / SubTy.getVectorNumElements(); |
1459 | unsigned BitBytes = HwLen / VecLen; |
1460 | unsigned BlockLen = HwLen / Scale; |
1461 | |
1462 | MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen); |
1463 | SDValue ByteVec = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: VecV); |
1464 | SDValue ByteSub = createHvxPrefixPred(PredV: SubV, dl, BitBytes, ZeroFill: false, DAG); |
1465 | SDValue ByteIdx; |
1466 | |
1467 | auto *IdxN = dyn_cast<ConstantSDNode>(Val: IdxV.getNode()); |
1468 | if (!IdxN || !IdxN->isZero()) { |
1469 | ByteIdx = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: MVT::i32, N1: IdxV, |
1470 | N2: DAG.getConstant(Val: BitBytes, DL: dl, VT: MVT::i32)); |
1471 | ByteVec = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: ByteTy, N1: ByteVec, N2: ByteIdx); |
1472 | } |
1473 | |
1474 | // ByteVec is the target vector VecV rotated in such a way that the |
1475 | // subvector should be inserted at index 0. Generate a predicate mask |
1476 | // and use vmux to do the insertion. |
1477 | assert(BlockLen < HwLen && "vsetq(v1) prerequisite" ); |
1478 | MVT BoolTy = MVT::getVectorVT(VT: MVT::i1, NumElements: HwLen); |
1479 | SDValue Q = getInstr(MachineOpc: Hexagon::V6_pred_scalar2, dl, Ty: BoolTy, |
1480 | Ops: {DAG.getConstant(Val: BlockLen, DL: dl, VT: MVT::i32)}, DAG); |
1481 | ByteVec = getInstr(MachineOpc: Hexagon::V6_vmux, dl, Ty: ByteTy, Ops: {Q, ByteSub, ByteVec}, DAG); |
1482 | // Rotate ByteVec back, and convert to a vector predicate. |
1483 | if (!IdxN || !IdxN->isZero()) { |
1484 | SDValue HwLenV = DAG.getConstant(Val: HwLen, DL: dl, VT: MVT::i32); |
1485 | SDValue ByteXdi = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: MVT::i32, N1: HwLenV, N2: ByteIdx); |
1486 | ByteVec = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: ByteTy, N1: ByteVec, N2: ByteXdi); |
1487 | } |
1488 | return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: VecTy, Operand: ByteVec); |
1489 | } |
1490 | |
1491 | SDValue |
1492 | HexagonTargetLowering::extendHvxVectorPred(SDValue VecV, const SDLoc &dl, |
1493 | MVT ResTy, bool ZeroExt, SelectionDAG &DAG) const { |
1494 | // Sign- and any-extending of a vector predicate to a vector register is |
1495 | // equivalent to Q2V. For zero-extensions, generate a vmux between 0 and |
1496 | // a vector of 1s (where the 1s are of type matching the vector type). |
1497 | assert(Subtarget.isHVXVectorType(ResTy)); |
1498 | if (!ZeroExt) |
1499 | return DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ResTy, Operand: VecV); |
1500 | |
1501 | assert(ty(VecV).getVectorNumElements() == ResTy.getVectorNumElements()); |
1502 | SDValue True = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: ResTy, |
1503 | Operand: DAG.getConstant(Val: 1, DL: dl, VT: MVT::i32)); |
1504 | SDValue False = getZero(dl, Ty: ResTy, DAG); |
1505 | return DAG.getSelect(DL: dl, VT: ResTy, Cond: VecV, LHS: True, RHS: False); |
1506 | } |
1507 | |
1508 | SDValue |
1509 | HexagonTargetLowering::compressHvxPred(SDValue VecQ, const SDLoc &dl, |
1510 | MVT ResTy, SelectionDAG &DAG) const { |
1511 | // Given a predicate register VecQ, transfer bits VecQ[0..HwLen-1] |
1512 | // (i.e. the entire predicate register) to bits [0..HwLen-1] of a |
1513 | // vector register. The remaining bits of the vector register are |
1514 | // unspecified. |
1515 | |
1516 | MachineFunction &MF = DAG.getMachineFunction(); |
1517 | unsigned HwLen = Subtarget.getVectorLength(); |
1518 | MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen); |
1519 | MVT PredTy = ty(Op: VecQ); |
1520 | unsigned PredLen = PredTy.getVectorNumElements(); |
1521 | assert(HwLen % PredLen == 0); |
1522 | MVT VecTy = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: 8*HwLen/PredLen), NumElements: PredLen); |
1523 | |
1524 | Type *Int8Ty = Type::getInt8Ty(C&: *DAG.getContext()); |
1525 | SmallVector<Constant*, 128> Tmp; |
1526 | // Create an array of bytes (hex): 01,02,04,08,10,20,40,80, 01,02,04,08,... |
1527 | // These are bytes with the LSB rotated left with respect to their index. |
1528 | for (unsigned i = 0; i != HwLen/8; ++i) { |
1529 | for (unsigned j = 0; j != 8; ++j) |
1530 | Tmp.push_back(Elt: ConstantInt::get(Ty: Int8Ty, V: 1ull << j)); |
1531 | } |
1532 | Constant *CV = ConstantVector::get(V: Tmp); |
1533 | Align Alignment(HwLen); |
1534 | SDValue CP = |
1535 | LowerConstantPool(Op: DAG.getConstantPool(C: CV, VT: ByteTy, Align: Alignment), DAG); |
1536 | SDValue Bytes = |
1537 | DAG.getLoad(VT: ByteTy, dl, Chain: DAG.getEntryNode(), Ptr: CP, |
1538 | PtrInfo: MachinePointerInfo::getConstantPool(MF), Alignment); |
1539 | |
1540 | // Select the bytes that correspond to true bits in the vector predicate. |
1541 | SDValue Sel = DAG.getSelect(DL: dl, VT: VecTy, Cond: VecQ, LHS: DAG.getBitcast(VT: VecTy, V: Bytes), |
1542 | RHS: getZero(dl, Ty: VecTy, DAG)); |
1543 | // Calculate the OR of all bytes in each group of 8. That will compress |
1544 | // all the individual bits into a single byte. |
1545 | // First, OR groups of 4, via vrmpy with 0x01010101. |
1546 | SDValue All1 = |
1547 | DAG.getSplatBuildVector(VT: MVT::v4i8, DL: dl, Op: DAG.getConstant(Val: 1, DL: dl, VT: MVT::i32)); |
1548 | SDValue Vrmpy = getInstr(MachineOpc: Hexagon::V6_vrmpyub, dl, Ty: ByteTy, Ops: {Sel, All1}, DAG); |
1549 | // Then rotate the accumulated vector by 4 bytes, and do the final OR. |
1550 | SDValue Rot = getInstr(MachineOpc: Hexagon::V6_valignbi, dl, Ty: ByteTy, |
1551 | Ops: {Vrmpy, Vrmpy, DAG.getTargetConstant(Val: 4, DL: dl, VT: MVT::i32)}, DAG); |
1552 | SDValue Vor = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: ByteTy, Ops: {Vrmpy, Rot}); |
1553 | |
1554 | // Pick every 8th byte and coalesce them at the beginning of the output. |
1555 | // For symmetry, coalesce every 1+8th byte after that, then every 2+8th |
1556 | // byte and so on. |
1557 | SmallVector<int,128> Mask; |
1558 | for (unsigned i = 0; i != HwLen; ++i) |
1559 | Mask.push_back(Elt: (8*i) % HwLen + i/(HwLen/8)); |
1560 | SDValue Collect = |
1561 | DAG.getVectorShuffle(VT: ByteTy, dl, N1: Vor, N2: DAG.getUNDEF(VT: ByteTy), Mask); |
1562 | return DAG.getBitcast(VT: ResTy, V: Collect); |
1563 | } |
1564 | |
1565 | SDValue |
1566 | HexagonTargetLowering::resizeToWidth(SDValue VecV, MVT ResTy, bool Signed, |
1567 | const SDLoc &dl, SelectionDAG &DAG) const { |
1568 | // Take a vector and resize the element type to match the given type. |
1569 | MVT InpTy = ty(Op: VecV); |
1570 | if (InpTy == ResTy) |
1571 | return VecV; |
1572 | |
1573 | unsigned InpWidth = InpTy.getSizeInBits(); |
1574 | unsigned ResWidth = ResTy.getSizeInBits(); |
1575 | |
1576 | if (InpTy.isFloatingPoint()) { |
1577 | return InpWidth < ResWidth ? DAG.getNode(Opcode: ISD::FP_EXTEND, DL: dl, VT: ResTy, Operand: VecV) |
1578 | : DAG.getNode(Opcode: ISD::FP_ROUND, DL: dl, VT: ResTy, N1: VecV, |
1579 | N2: getZero(dl, Ty: MVT::i32, DAG)); |
1580 | } |
1581 | |
1582 | assert(InpTy.isInteger()); |
1583 | |
1584 | if (InpWidth < ResWidth) { |
1585 | unsigned ExtOpc = Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; |
1586 | return DAG.getNode(Opcode: ExtOpc, DL: dl, VT: ResTy, Operand: VecV); |
1587 | } else { |
1588 | unsigned NarOpc = Signed ? HexagonISD::SSAT : HexagonISD::USAT; |
1589 | return DAG.getNode(Opcode: NarOpc, DL: dl, VT: ResTy, N1: VecV, N2: DAG.getValueType(ResTy)); |
1590 | } |
1591 | } |
1592 | |
1593 | SDValue |
1594 | HexagonTargetLowering::(SDValue Vec, MVT SubTy, unsigned SubIdx, |
1595 | SelectionDAG &DAG) const { |
1596 | assert(ty(Vec).getSizeInBits() % SubTy.getSizeInBits() == 0); |
1597 | |
1598 | const SDLoc &dl(Vec); |
1599 | unsigned ElemIdx = SubIdx * SubTy.getVectorNumElements(); |
1600 | return DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL: dl, VT: SubTy, |
1601 | Ops: {Vec, DAG.getConstant(Val: ElemIdx, DL: dl, VT: MVT::i32)}); |
1602 | } |
1603 | |
1604 | SDValue |
1605 | HexagonTargetLowering::LowerHvxBuildVector(SDValue Op, SelectionDAG &DAG) |
1606 | const { |
1607 | const SDLoc &dl(Op); |
1608 | MVT VecTy = ty(Op); |
1609 | |
1610 | unsigned Size = Op.getNumOperands(); |
1611 | SmallVector<SDValue,128> Ops; |
1612 | for (unsigned i = 0; i != Size; ++i) |
1613 | Ops.push_back(Elt: Op.getOperand(i)); |
1614 | |
1615 | // First, split the BUILD_VECTOR for vector pairs. We could generate |
1616 | // some pairs directly (via splat), but splats should be generated |
1617 | // by the combiner prior to getting here. |
1618 | if (VecTy.getSizeInBits() == 16*Subtarget.getVectorLength()) { |
1619 | ArrayRef<SDValue> A(Ops); |
1620 | MVT SingleTy = typeSplit(VecTy).first; |
1621 | SDValue V0 = buildHvxVectorReg(Values: A.take_front(N: Size/2), dl, VecTy: SingleTy, DAG); |
1622 | SDValue V1 = buildHvxVectorReg(Values: A.drop_front(N: Size/2), dl, VecTy: SingleTy, DAG); |
1623 | return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: VecTy, N1: V0, N2: V1); |
1624 | } |
1625 | |
1626 | if (VecTy.getVectorElementType() == MVT::i1) |
1627 | return buildHvxVectorPred(Values: Ops, dl, VecTy, DAG); |
1628 | |
1629 | // In case of MVT::f16 BUILD_VECTOR, since MVT::f16 is |
1630 | // not a legal type, just bitcast the node to use i16 |
1631 | // types and bitcast the result back to f16 |
1632 | if (VecTy.getVectorElementType() == MVT::f16) { |
1633 | SmallVector<SDValue,64> NewOps; |
1634 | for (unsigned i = 0; i != Size; i++) |
1635 | NewOps.push_back(Elt: DAG.getBitcast(VT: MVT::i16, V: Ops[i])); |
1636 | |
1637 | SDValue T0 = DAG.getNode(Opcode: ISD::BUILD_VECTOR, DL: dl, |
1638 | VT: tyVector(Ty: VecTy, ElemTy: MVT::i16), Ops: NewOps); |
1639 | return DAG.getBitcast(VT: tyVector(Ty: VecTy, ElemTy: MVT::f16), V: T0); |
1640 | } |
1641 | |
1642 | return buildHvxVectorReg(Values: Ops, dl, VecTy, DAG); |
1643 | } |
1644 | |
1645 | SDValue |
1646 | HexagonTargetLowering::LowerHvxSplatVector(SDValue Op, SelectionDAG &DAG) |
1647 | const { |
1648 | const SDLoc &dl(Op); |
1649 | MVT VecTy = ty(Op); |
1650 | MVT ArgTy = ty(Op: Op.getOperand(i: 0)); |
1651 | |
1652 | if (ArgTy == MVT::f16) { |
1653 | MVT SplatTy = MVT::getVectorVT(VT: MVT::i16, NumElements: VecTy.getVectorNumElements()); |
1654 | SDValue ToInt16 = DAG.getBitcast(VT: MVT::i16, V: Op.getOperand(i: 0)); |
1655 | SDValue ToInt32 = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: dl, VT: MVT::i32, Operand: ToInt16); |
1656 | SDValue Splat = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: SplatTy, Operand: ToInt32); |
1657 | return DAG.getBitcast(VT: VecTy, V: Splat); |
1658 | } |
1659 | |
1660 | return SDValue(); |
1661 | } |
1662 | |
1663 | SDValue |
1664 | HexagonTargetLowering::LowerHvxConcatVectors(SDValue Op, SelectionDAG &DAG) |
1665 | const { |
1666 | // Vector concatenation of two integer (non-bool) vectors does not need |
1667 | // special lowering. Custom-lower concats of bool vectors and expand |
1668 | // concats of more than 2 vectors. |
1669 | MVT VecTy = ty(Op); |
1670 | const SDLoc &dl(Op); |
1671 | unsigned NumOp = Op.getNumOperands(); |
1672 | if (VecTy.getVectorElementType() != MVT::i1) { |
1673 | if (NumOp == 2) |
1674 | return Op; |
1675 | // Expand the other cases into a build-vector. |
1676 | SmallVector<SDValue,8> Elems; |
1677 | for (SDValue V : Op.getNode()->ops()) |
1678 | DAG.ExtractVectorElements(Op: V, Args&: Elems); |
1679 | // A vector of i16 will be broken up into a build_vector of i16's. |
1680 | // This is a problem, since at the time of operation legalization, |
1681 | // all operations are expected to be type-legalized, and i16 is not |
1682 | // a legal type. If any of the extracted elements is not of a valid |
1683 | // type, sign-extend it to a valid one. |
1684 | for (unsigned i = 0, e = Elems.size(); i != e; ++i) { |
1685 | SDValue V = Elems[i]; |
1686 | MVT Ty = ty(Op: V); |
1687 | if (!isTypeLegal(VT: Ty)) { |
1688 | MVT NTy = typeLegalize(Ty, DAG); |
1689 | if (V.getOpcode() == ISD::EXTRACT_VECTOR_ELT) { |
1690 | Elems[i] = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL: dl, VT: NTy, |
1691 | N1: DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, VT: NTy, |
1692 | N1: V.getOperand(i: 0), N2: V.getOperand(i: 1)), |
1693 | N2: DAG.getValueType(Ty)); |
1694 | continue; |
1695 | } |
1696 | // A few less complicated cases. |
1697 | switch (V.getOpcode()) { |
1698 | case ISD::Constant: |
1699 | Elems[i] = DAG.getSExtOrTrunc(Op: V, DL: dl, VT: NTy); |
1700 | break; |
1701 | case ISD::UNDEF: |
1702 | Elems[i] = DAG.getUNDEF(VT: NTy); |
1703 | break; |
1704 | case ISD::TRUNCATE: |
1705 | Elems[i] = V.getOperand(i: 0); |
1706 | break; |
1707 | default: |
1708 | llvm_unreachable("Unexpected vector element" ); |
1709 | } |
1710 | } |
1711 | } |
1712 | return DAG.getBuildVector(VT: VecTy, DL: dl, Ops: Elems); |
1713 | } |
1714 | |
1715 | assert(VecTy.getVectorElementType() == MVT::i1); |
1716 | unsigned HwLen = Subtarget.getVectorLength(); |
1717 | assert(isPowerOf2_32(NumOp) && HwLen % NumOp == 0); |
1718 | |
1719 | SDValue Op0 = Op.getOperand(i: 0); |
1720 | |
1721 | // If the operands are HVX types (i.e. not scalar predicates), then |
1722 | // defer the concatenation, and create QCAT instead. |
1723 | if (Subtarget.isHVXVectorType(VecTy: ty(Op: Op0), IncludeBool: true)) { |
1724 | if (NumOp == 2) |
1725 | return DAG.getNode(Opcode: HexagonISD::QCAT, DL: dl, VT: VecTy, N1: Op0, N2: Op.getOperand(i: 1)); |
1726 | |
1727 | ArrayRef<SDUse> U(Op.getNode()->ops()); |
1728 | SmallVector<SDValue,4> SV(U.begin(), U.end()); |
1729 | ArrayRef<SDValue> Ops(SV); |
1730 | |
1731 | MVT HalfTy = typeSplit(VecTy).first; |
1732 | SDValue V0 = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: HalfTy, |
1733 | Ops: Ops.take_front(N: NumOp/2)); |
1734 | SDValue V1 = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: HalfTy, |
1735 | Ops: Ops.take_back(N: NumOp/2)); |
1736 | return DAG.getNode(Opcode: HexagonISD::QCAT, DL: dl, VT: VecTy, N1: V0, N2: V1); |
1737 | } |
1738 | |
1739 | // Count how many bytes (in a vector register) each bit in VecTy |
1740 | // corresponds to. |
1741 | unsigned BitBytes = HwLen / VecTy.getVectorNumElements(); |
1742 | |
1743 | SmallVector<SDValue,8> Prefixes; |
1744 | for (SDValue V : Op.getNode()->op_values()) { |
1745 | SDValue P = createHvxPrefixPred(PredV: V, dl, BitBytes, ZeroFill: true, DAG); |
1746 | Prefixes.push_back(Elt: P); |
1747 | } |
1748 | |
1749 | unsigned InpLen = ty(Op: Op.getOperand(i: 0)).getVectorNumElements(); |
1750 | MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen); |
1751 | SDValue S = DAG.getConstant(Val: InpLen*BitBytes, DL: dl, VT: MVT::i32); |
1752 | SDValue Res = getZero(dl, Ty: ByteTy, DAG); |
1753 | for (unsigned i = 0, e = Prefixes.size(); i != e; ++i) { |
1754 | Res = DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: ByteTy, N1: Res, N2: S); |
1755 | Res = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: ByteTy, N1: Res, N2: Prefixes[e-i-1]); |
1756 | } |
1757 | return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: VecTy, Operand: Res); |
1758 | } |
1759 | |
1760 | SDValue |
1761 | HexagonTargetLowering::(SDValue Op, SelectionDAG &DAG) |
1762 | const { |
1763 | // Change the type of the extracted element to i32. |
1764 | SDValue VecV = Op.getOperand(i: 0); |
1765 | MVT ElemTy = ty(Op: VecV).getVectorElementType(); |
1766 | const SDLoc &dl(Op); |
1767 | SDValue IdxV = Op.getOperand(i: 1); |
1768 | if (ElemTy == MVT::i1) |
1769 | return extractHvxElementPred(VecV, IdxV, dl, ResTy: ty(Op), DAG); |
1770 | |
1771 | return extractHvxElementReg(VecV, IdxV, dl, ResTy: ty(Op), DAG); |
1772 | } |
1773 | |
1774 | SDValue |
1775 | HexagonTargetLowering::LowerHvxInsertElement(SDValue Op, SelectionDAG &DAG) |
1776 | const { |
1777 | const SDLoc &dl(Op); |
1778 | MVT VecTy = ty(Op); |
1779 | SDValue VecV = Op.getOperand(i: 0); |
1780 | SDValue ValV = Op.getOperand(i: 1); |
1781 | SDValue IdxV = Op.getOperand(i: 2); |
1782 | MVT ElemTy = ty(Op: VecV).getVectorElementType(); |
1783 | if (ElemTy == MVT::i1) |
1784 | return insertHvxElementPred(VecV, IdxV, ValV, dl, DAG); |
1785 | |
1786 | if (ElemTy == MVT::f16) { |
1787 | SDValue T0 = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL: dl, |
1788 | VT: tyVector(Ty: VecTy, ElemTy: MVT::i16), |
1789 | N1: DAG.getBitcast(VT: tyVector(Ty: VecTy, ElemTy: MVT::i16), V: VecV), |
1790 | N2: DAG.getBitcast(VT: MVT::i16, V: ValV), N3: IdxV); |
1791 | return DAG.getBitcast(VT: tyVector(Ty: VecTy, ElemTy: MVT::f16), V: T0); |
1792 | } |
1793 | |
1794 | return insertHvxElementReg(VecV, IdxV, ValV, dl, DAG); |
1795 | } |
1796 | |
1797 | SDValue |
1798 | HexagonTargetLowering::(SDValue Op, SelectionDAG &DAG) |
1799 | const { |
1800 | SDValue SrcV = Op.getOperand(i: 0); |
1801 | MVT SrcTy = ty(Op: SrcV); |
1802 | MVT DstTy = ty(Op); |
1803 | SDValue IdxV = Op.getOperand(i: 1); |
1804 | unsigned Idx = IdxV.getNode()->getAsZExtVal(); |
1805 | assert(Idx % DstTy.getVectorNumElements() == 0); |
1806 | (void)Idx; |
1807 | const SDLoc &dl(Op); |
1808 | |
1809 | MVT ElemTy = SrcTy.getVectorElementType(); |
1810 | if (ElemTy == MVT::i1) |
1811 | return extractHvxSubvectorPred(VecV: SrcV, IdxV, dl, ResTy: DstTy, DAG); |
1812 | |
1813 | return extractHvxSubvectorReg(OrigOp: Op, VecV: SrcV, IdxV, dl, ResTy: DstTy, DAG); |
1814 | } |
1815 | |
1816 | SDValue |
1817 | HexagonTargetLowering::LowerHvxInsertSubvector(SDValue Op, SelectionDAG &DAG) |
1818 | const { |
1819 | // Idx does not need to be a constant. |
1820 | SDValue VecV = Op.getOperand(i: 0); |
1821 | SDValue ValV = Op.getOperand(i: 1); |
1822 | SDValue IdxV = Op.getOperand(i: 2); |
1823 | |
1824 | const SDLoc &dl(Op); |
1825 | MVT VecTy = ty(Op: VecV); |
1826 | MVT ElemTy = VecTy.getVectorElementType(); |
1827 | if (ElemTy == MVT::i1) |
1828 | return insertHvxSubvectorPred(VecV, SubV: ValV, IdxV, dl, DAG); |
1829 | |
1830 | return insertHvxSubvectorReg(VecV, SubV: ValV, IdxV, dl, DAG); |
1831 | } |
1832 | |
1833 | SDValue |
1834 | HexagonTargetLowering::LowerHvxAnyExt(SDValue Op, SelectionDAG &DAG) const { |
1835 | // Lower any-extends of boolean vectors to sign-extends, since they |
1836 | // translate directly to Q2V. Zero-extending could also be done equally |
1837 | // fast, but Q2V is used/recognized in more places. |
1838 | // For all other vectors, use zero-extend. |
1839 | MVT ResTy = ty(Op); |
1840 | SDValue InpV = Op.getOperand(i: 0); |
1841 | MVT ElemTy = ty(Op: InpV).getVectorElementType(); |
1842 | if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(VecTy: ResTy)) |
1843 | return LowerHvxSignExt(Op, DAG); |
1844 | return DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: SDLoc(Op), VT: ResTy, Operand: InpV); |
1845 | } |
1846 | |
1847 | SDValue |
1848 | HexagonTargetLowering::LowerHvxSignExt(SDValue Op, SelectionDAG &DAG) const { |
1849 | MVT ResTy = ty(Op); |
1850 | SDValue InpV = Op.getOperand(i: 0); |
1851 | MVT ElemTy = ty(Op: InpV).getVectorElementType(); |
1852 | if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(VecTy: ResTy)) |
1853 | return extendHvxVectorPred(VecV: InpV, dl: SDLoc(Op), ResTy: ty(Op), ZeroExt: false, DAG); |
1854 | return Op; |
1855 | } |
1856 | |
1857 | SDValue |
1858 | HexagonTargetLowering::LowerHvxZeroExt(SDValue Op, SelectionDAG &DAG) const { |
1859 | MVT ResTy = ty(Op); |
1860 | SDValue InpV = Op.getOperand(i: 0); |
1861 | MVT ElemTy = ty(Op: InpV).getVectorElementType(); |
1862 | if (ElemTy == MVT::i1 && Subtarget.isHVXVectorType(VecTy: ResTy)) |
1863 | return extendHvxVectorPred(VecV: InpV, dl: SDLoc(Op), ResTy: ty(Op), ZeroExt: true, DAG); |
1864 | return Op; |
1865 | } |
1866 | |
1867 | SDValue |
1868 | HexagonTargetLowering::LowerHvxCttz(SDValue Op, SelectionDAG &DAG) const { |
1869 | // Lower vector CTTZ into a computation using CTLZ (Hacker's Delight): |
1870 | // cttz(x) = bitwidth(x) - ctlz(~x & (x-1)) |
1871 | const SDLoc &dl(Op); |
1872 | MVT ResTy = ty(Op); |
1873 | SDValue InpV = Op.getOperand(i: 0); |
1874 | assert(ResTy == ty(InpV)); |
1875 | |
1876 | // Calculate the vectors of 1 and bitwidth(x). |
1877 | MVT ElemTy = ty(Op: InpV).getVectorElementType(); |
1878 | unsigned ElemWidth = ElemTy.getSizeInBits(); |
1879 | |
1880 | SDValue Vec1 = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: ResTy, |
1881 | Operand: DAG.getConstant(Val: 1, DL: dl, VT: MVT::i32)); |
1882 | SDValue VecW = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: ResTy, |
1883 | Operand: DAG.getConstant(Val: ElemWidth, DL: dl, VT: MVT::i32)); |
1884 | SDValue VecN1 = DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: ResTy, |
1885 | Operand: DAG.getConstant(Val: -1, DL: dl, VT: MVT::i32)); |
1886 | |
1887 | // Do not use DAG.getNOT, because that would create BUILD_VECTOR with |
1888 | // a BITCAST. Here we can skip the BITCAST (so we don't have to handle |
1889 | // it separately in custom combine or selection). |
1890 | SDValue A = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ResTy, |
1891 | Ops: {DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: ResTy, Ops: {InpV, VecN1}), |
1892 | DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: ResTy, Ops: {InpV, Vec1})}); |
1893 | return DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: ResTy, |
1894 | Ops: {VecW, DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT: ResTy, Operand: A)}); |
1895 | } |
1896 | |
1897 | SDValue |
1898 | HexagonTargetLowering::LowerHvxMulh(SDValue Op, SelectionDAG &DAG) const { |
1899 | const SDLoc &dl(Op); |
1900 | MVT ResTy = ty(Op); |
1901 | assert(ResTy.getVectorElementType() == MVT::i32); |
1902 | |
1903 | SDValue Vs = Op.getOperand(i: 0); |
1904 | SDValue Vt = Op.getOperand(i: 1); |
1905 | |
1906 | SDVTList ResTys = DAG.getVTList(VT1: ResTy, VT2: ResTy); |
1907 | unsigned Opc = Op.getOpcode(); |
1908 | |
1909 | // On HVX v62+ producing the full product is cheap, so legalize MULH to LOHI. |
1910 | if (Opc == ISD::MULHU) |
1911 | return DAG.getNode(Opcode: HexagonISD::UMUL_LOHI, DL: dl, VTList: ResTys, Ops: {Vs, Vt}).getValue(R: 1); |
1912 | if (Opc == ISD::MULHS) |
1913 | return DAG.getNode(Opcode: HexagonISD::SMUL_LOHI, DL: dl, VTList: ResTys, Ops: {Vs, Vt}).getValue(R: 1); |
1914 | |
1915 | #ifndef NDEBUG |
1916 | Op.dump(&DAG); |
1917 | #endif |
1918 | llvm_unreachable("Unexpected mulh operation" ); |
1919 | } |
1920 | |
1921 | SDValue |
1922 | HexagonTargetLowering::LowerHvxMulLoHi(SDValue Op, SelectionDAG &DAG) const { |
1923 | const SDLoc &dl(Op); |
1924 | unsigned Opc = Op.getOpcode(); |
1925 | SDValue Vu = Op.getOperand(i: 0); |
1926 | SDValue Vv = Op.getOperand(i: 1); |
1927 | |
1928 | // If the HI part is not used, convert it to a regular MUL. |
1929 | if (auto HiVal = Op.getValue(R: 1); HiVal.use_empty()) { |
1930 | // Need to preserve the types and the number of values. |
1931 | SDValue Hi = DAG.getUNDEF(VT: ty(Op: HiVal)); |
1932 | SDValue Lo = DAG.getNode(Opcode: ISD::MUL, DL: dl, VT: ty(Op), Ops: {Vu, Vv}); |
1933 | return DAG.getMergeValues(Ops: {Lo, Hi}, dl); |
1934 | } |
1935 | |
1936 | bool SignedVu = Opc == HexagonISD::SMUL_LOHI; |
1937 | bool SignedVv = Opc == HexagonISD::SMUL_LOHI || Opc == HexagonISD::USMUL_LOHI; |
1938 | |
1939 | // Legal on HVX v62+, but lower it here because patterns can't handle multi- |
1940 | // valued nodes. |
1941 | if (Subtarget.useHVXV62Ops()) |
1942 | return emitHvxMulLoHiV62(A: Vu, SignedA: SignedVu, B: Vv, SignedB: SignedVv, dl, DAG); |
1943 | |
1944 | if (Opc == HexagonISD::SMUL_LOHI) { |
1945 | // Direct MULHS expansion is cheaper than doing the whole SMUL_LOHI, |
1946 | // for other signedness LOHI is cheaper. |
1947 | if (auto LoVal = Op.getValue(R: 0); LoVal.use_empty()) { |
1948 | SDValue Hi = emitHvxMulHsV60(A: Vu, B: Vv, dl, DAG); |
1949 | SDValue Lo = DAG.getUNDEF(VT: ty(Op: LoVal)); |
1950 | return DAG.getMergeValues(Ops: {Lo, Hi}, dl); |
1951 | } |
1952 | } |
1953 | |
1954 | return emitHvxMulLoHiV60(A: Vu, SignedA: SignedVu, B: Vv, SignedB: SignedVv, dl, DAG); |
1955 | } |
1956 | |
1957 | SDValue |
1958 | HexagonTargetLowering::LowerHvxBitcast(SDValue Op, SelectionDAG &DAG) const { |
1959 | SDValue Val = Op.getOperand(i: 0); |
1960 | MVT ResTy = ty(Op); |
1961 | MVT ValTy = ty(Op: Val); |
1962 | const SDLoc &dl(Op); |
1963 | |
1964 | if (isHvxBoolTy(Ty: ValTy) && ResTy.isScalarInteger()) { |
1965 | unsigned HwLen = Subtarget.getVectorLength(); |
1966 | MVT WordTy = MVT::getVectorVT(VT: MVT::i32, NumElements: HwLen/4); |
1967 | SDValue VQ = compressHvxPred(VecQ: Val, dl, ResTy: WordTy, DAG); |
1968 | unsigned BitWidth = ResTy.getSizeInBits(); |
1969 | |
1970 | if (BitWidth < 64) { |
1971 | SDValue W0 = extractHvxElementReg(VecV: VQ, IdxV: DAG.getConstant(Val: 0, DL: dl, VT: MVT::i32), |
1972 | dl, ResTy: MVT::i32, DAG); |
1973 | if (BitWidth == 32) |
1974 | return W0; |
1975 | assert(BitWidth < 32u); |
1976 | return DAG.getZExtOrTrunc(Op: W0, DL: dl, VT: ResTy); |
1977 | } |
1978 | |
1979 | // The result is >= 64 bits. The only options are 64 or 128. |
1980 | assert(BitWidth == 64 || BitWidth == 128); |
1981 | SmallVector<SDValue,4> Words; |
1982 | for (unsigned i = 0; i != BitWidth/32; ++i) { |
1983 | SDValue W = extractHvxElementReg( |
1984 | VecV: VQ, IdxV: DAG.getConstant(Val: i, DL: dl, VT: MVT::i32), dl, ResTy: MVT::i32, DAG); |
1985 | Words.push_back(Elt: W); |
1986 | } |
1987 | SmallVector<SDValue,2> Combines; |
1988 | assert(Words.size() % 2 == 0); |
1989 | for (unsigned i = 0, e = Words.size(); i < e; i += 2) { |
1990 | SDValue C = getCombine(Hi: Words[i+1], Lo: Words[i], dl, ResTy: MVT::i64, DAG); |
1991 | Combines.push_back(Elt: C); |
1992 | } |
1993 | |
1994 | if (BitWidth == 64) |
1995 | return Combines[0]; |
1996 | |
1997 | return DAG.getNode(Opcode: ISD::BUILD_PAIR, DL: dl, VT: ResTy, Ops: Combines); |
1998 | } |
1999 | if (isHvxBoolTy(Ty: ResTy) && ValTy.isScalarInteger()) { |
2000 | // Handle bitcast from i128 -> v128i1 and i64 -> v64i1. |
2001 | unsigned BitWidth = ValTy.getSizeInBits(); |
2002 | unsigned HwLen = Subtarget.getVectorLength(); |
2003 | assert(BitWidth == HwLen); |
2004 | |
2005 | MVT ValAsVecTy = MVT::getVectorVT(VT: MVT::i8, NumElements: BitWidth / 8); |
2006 | SDValue ValAsVec = DAG.getBitcast(VT: ValAsVecTy, V: Val); |
2007 | // Splat each byte of Val 8 times. |
2008 | // Bytes = [(b0)x8, (b1)x8, ...., (b15)x8] |
2009 | // where b0, b1,..., b15 are least to most significant bytes of I. |
2010 | SmallVector<SDValue, 128> Bytes; |
2011 | // Tmp: 0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80, 0x01,0x02,0x04,0x08,... |
2012 | // These are bytes with the LSB rotated left with respect to their index. |
2013 | SmallVector<SDValue, 128> Tmp; |
2014 | for (unsigned I = 0; I != HwLen / 8; ++I) { |
2015 | SDValue Idx = DAG.getConstant(Val: I, DL: dl, VT: MVT::i32); |
2016 | SDValue Byte = |
2017 | DAG.getNode(Opcode: ISD::EXTRACT_VECTOR_ELT, DL: dl, VT: MVT::i8, N1: ValAsVec, N2: Idx); |
2018 | for (unsigned J = 0; J != 8; ++J) { |
2019 | Bytes.push_back(Elt: Byte); |
2020 | Tmp.push_back(Elt: DAG.getConstant(Val: 1ull << J, DL: dl, VT: MVT::i8)); |
2021 | } |
2022 | } |
2023 | |
2024 | MVT ConstantVecTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen); |
2025 | SDValue ConstantVec = DAG.getBuildVector(VT: ConstantVecTy, DL: dl, Ops: Tmp); |
2026 | SDValue I2V = buildHvxVectorReg(Values: Bytes, dl, VecTy: ConstantVecTy, DAG); |
2027 | |
2028 | // Each Byte in the I2V will be set iff corresponding bit is set in Val. |
2029 | I2V = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ConstantVecTy, Ops: {I2V, ConstantVec}); |
2030 | return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: ResTy, Operand: I2V); |
2031 | } |
2032 | |
2033 | return Op; |
2034 | } |
2035 | |
2036 | SDValue |
2037 | HexagonTargetLowering::LowerHvxExtend(SDValue Op, SelectionDAG &DAG) const { |
2038 | // Sign- and zero-extends are legal. |
2039 | assert(Op.getOpcode() == ISD::ANY_EXTEND_VECTOR_INREG); |
2040 | return DAG.getNode(Opcode: ISD::ZERO_EXTEND_VECTOR_INREG, DL: SDLoc(Op), VT: ty(Op), |
2041 | Operand: Op.getOperand(i: 0)); |
2042 | } |
2043 | |
2044 | SDValue |
2045 | HexagonTargetLowering::LowerHvxSelect(SDValue Op, SelectionDAG &DAG) const { |
2046 | MVT ResTy = ty(Op); |
2047 | if (ResTy.getVectorElementType() != MVT::i1) |
2048 | return Op; |
2049 | |
2050 | const SDLoc &dl(Op); |
2051 | unsigned HwLen = Subtarget.getVectorLength(); |
2052 | unsigned VecLen = ResTy.getVectorNumElements(); |
2053 | assert(HwLen % VecLen == 0); |
2054 | unsigned ElemSize = HwLen / VecLen; |
2055 | |
2056 | MVT VecTy = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: ElemSize * 8), NumElements: VecLen); |
2057 | SDValue S = |
2058 | DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: VecTy, N1: Op.getOperand(i: 0), |
2059 | N2: DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: VecTy, Operand: Op.getOperand(i: 1)), |
2060 | N3: DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: VecTy, Operand: Op.getOperand(i: 2))); |
2061 | return DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: ResTy, Operand: S); |
2062 | } |
2063 | |
2064 | SDValue |
2065 | HexagonTargetLowering::LowerHvxShift(SDValue Op, SelectionDAG &DAG) const { |
2066 | if (SDValue S = getVectorShiftByInt(Op, DAG)) |
2067 | return S; |
2068 | return Op; |
2069 | } |
2070 | |
2071 | SDValue |
2072 | HexagonTargetLowering::LowerHvxFunnelShift(SDValue Op, |
2073 | SelectionDAG &DAG) const { |
2074 | unsigned Opc = Op.getOpcode(); |
2075 | assert(Opc == ISD::FSHL || Opc == ISD::FSHR); |
2076 | |
2077 | // Make sure the shift amount is within the range of the bitwidth |
2078 | // of the element type. |
2079 | SDValue A = Op.getOperand(i: 0); |
2080 | SDValue B = Op.getOperand(i: 1); |
2081 | SDValue S = Op.getOperand(i: 2); |
2082 | |
2083 | MVT InpTy = ty(Op: A); |
2084 | MVT ElemTy = InpTy.getVectorElementType(); |
2085 | |
2086 | const SDLoc &dl(Op); |
2087 | unsigned ElemWidth = ElemTy.getSizeInBits(); |
2088 | bool IsLeft = Opc == ISD::FSHL; |
2089 | |
2090 | // The expansion into regular shifts produces worse code for i8 and for |
2091 | // right shift of i32 on v65+. |
2092 | bool UseShifts = ElemTy != MVT::i8; |
2093 | if (Subtarget.useHVXV65Ops() && ElemTy == MVT::i32) |
2094 | UseShifts = false; |
2095 | |
2096 | if (SDValue SplatV = getSplatValue(Op: S, DAG); SplatV && UseShifts) { |
2097 | // If this is a funnel shift by a scalar, lower it into regular shifts. |
2098 | SDValue Mask = DAG.getConstant(Val: ElemWidth - 1, DL: dl, VT: MVT::i32); |
2099 | SDValue ModS = |
2100 | DAG.getNode(Opcode: ISD::AND, DL: dl, VT: MVT::i32, |
2101 | Ops: {DAG.getZExtOrTrunc(Op: SplatV, DL: dl, VT: MVT::i32), Mask}); |
2102 | SDValue NegS = |
2103 | DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: MVT::i32, |
2104 | Ops: {DAG.getConstant(Val: ElemWidth, DL: dl, VT: MVT::i32), ModS}); |
2105 | SDValue IsZero = |
2106 | DAG.getSetCC(DL: dl, VT: MVT::i1, LHS: ModS, RHS: getZero(dl, Ty: MVT::i32, DAG), Cond: ISD::SETEQ); |
2107 | // FSHL A, B => A << | B >>n |
2108 | // FSHR A, B => A <<n | B >> |
2109 | SDValue Part1 = |
2110 | DAG.getNode(Opcode: HexagonISD::VASL, DL: dl, VT: InpTy, Ops: {A, IsLeft ? ModS : NegS}); |
2111 | SDValue Part2 = |
2112 | DAG.getNode(Opcode: HexagonISD::VLSR, DL: dl, VT: InpTy, Ops: {B, IsLeft ? NegS : ModS}); |
2113 | SDValue Or = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: InpTy, Ops: {Part1, Part2}); |
2114 | // If the shift amount was 0, pick A or B, depending on the direction. |
2115 | // The opposite shift will also be by 0, so the "Or" will be incorrect. |
2116 | return DAG.getNode(Opcode: ISD::SELECT, DL: dl, VT: InpTy, Ops: {IsZero, (IsLeft ? A : B), Or}); |
2117 | } |
2118 | |
2119 | SDValue Mask = DAG.getSplatBuildVector( |
2120 | VT: InpTy, DL: dl, Op: DAG.getConstant(Val: ElemWidth - 1, DL: dl, VT: ElemTy)); |
2121 | |
2122 | unsigned MOpc = Opc == ISD::FSHL ? HexagonISD::MFSHL : HexagonISD::MFSHR; |
2123 | return DAG.getNode(Opcode: MOpc, DL: dl, VT: ty(Op), |
2124 | Ops: {A, B, DAG.getNode(Opcode: ISD::AND, DL: dl, VT: InpTy, Ops: {S, Mask})}); |
2125 | } |
2126 | |
2127 | SDValue |
2128 | HexagonTargetLowering::LowerHvxIntrinsic(SDValue Op, SelectionDAG &DAG) const { |
2129 | const SDLoc &dl(Op); |
2130 | unsigned IntNo = Op.getConstantOperandVal(i: 0); |
2131 | SmallVector<SDValue> Ops(Op->ops().begin(), Op->ops().end()); |
2132 | |
2133 | auto Swap = [&](SDValue P) { |
2134 | return DAG.getMergeValues(Ops: {P.getValue(R: 1), P.getValue(R: 0)}, dl); |
2135 | }; |
2136 | |
2137 | switch (IntNo) { |
2138 | case Intrinsic::hexagon_V6_pred_typecast: |
2139 | case Intrinsic::hexagon_V6_pred_typecast_128B: { |
2140 | MVT ResTy = ty(Op), InpTy = ty(Op: Ops[1]); |
2141 | if (isHvxBoolTy(Ty: ResTy) && isHvxBoolTy(Ty: InpTy)) { |
2142 | if (ResTy == InpTy) |
2143 | return Ops[1]; |
2144 | return DAG.getNode(Opcode: HexagonISD::TYPECAST, DL: dl, VT: ResTy, Operand: Ops[1]); |
2145 | } |
2146 | break; |
2147 | } |
2148 | case Intrinsic::hexagon_V6_vmpyss_parts: |
2149 | case Intrinsic::hexagon_V6_vmpyss_parts_128B: |
2150 | return Swap(DAG.getNode(Opcode: HexagonISD::SMUL_LOHI, DL: dl, VTList: Op->getVTList(), |
2151 | Ops: {Ops[1], Ops[2]})); |
2152 | case Intrinsic::hexagon_V6_vmpyuu_parts: |
2153 | case Intrinsic::hexagon_V6_vmpyuu_parts_128B: |
2154 | return Swap(DAG.getNode(Opcode: HexagonISD::UMUL_LOHI, DL: dl, VTList: Op->getVTList(), |
2155 | Ops: {Ops[1], Ops[2]})); |
2156 | case Intrinsic::hexagon_V6_vmpyus_parts: |
2157 | case Intrinsic::hexagon_V6_vmpyus_parts_128B: { |
2158 | return Swap(DAG.getNode(Opcode: HexagonISD::USMUL_LOHI, DL: dl, VTList: Op->getVTList(), |
2159 | Ops: {Ops[1], Ops[2]})); |
2160 | } |
2161 | } // switch |
2162 | |
2163 | return Op; |
2164 | } |
2165 | |
2166 | SDValue |
2167 | HexagonTargetLowering::LowerHvxMaskedOp(SDValue Op, SelectionDAG &DAG) const { |
2168 | const SDLoc &dl(Op); |
2169 | unsigned HwLen = Subtarget.getVectorLength(); |
2170 | MachineFunction &MF = DAG.getMachineFunction(); |
2171 | auto *MaskN = cast<MaskedLoadStoreSDNode>(Val: Op.getNode()); |
2172 | SDValue Mask = MaskN->getMask(); |
2173 | SDValue Chain = MaskN->getChain(); |
2174 | SDValue Base = MaskN->getBasePtr(); |
2175 | auto *MemOp = MF.getMachineMemOperand(MMO: MaskN->getMemOperand(), Offset: 0, Size: HwLen); |
2176 | |
2177 | unsigned Opc = Op->getOpcode(); |
2178 | assert(Opc == ISD::MLOAD || Opc == ISD::MSTORE); |
2179 | |
2180 | if (Opc == ISD::MLOAD) { |
2181 | MVT ValTy = ty(Op); |
2182 | SDValue Load = DAG.getLoad(VT: ValTy, dl, Chain, Ptr: Base, MMO: MemOp); |
2183 | SDValue Thru = cast<MaskedLoadSDNode>(Val: MaskN)->getPassThru(); |
2184 | if (isUndef(Op: Thru)) |
2185 | return Load; |
2186 | SDValue VSel = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ValTy, N1: Mask, N2: Load, N3: Thru); |
2187 | return DAG.getMergeValues(Ops: {VSel, Load.getValue(R: 1)}, dl); |
2188 | } |
2189 | |
2190 | // MSTORE |
2191 | // HVX only has aligned masked stores. |
2192 | |
2193 | // TODO: Fold negations of the mask into the store. |
2194 | unsigned StoreOpc = Hexagon::V6_vS32b_qpred_ai; |
2195 | SDValue Value = cast<MaskedStoreSDNode>(Val: MaskN)->getValue(); |
2196 | SDValue Offset0 = DAG.getTargetConstant(Val: 0, DL: dl, VT: ty(Op: Base)); |
2197 | |
2198 | if (MaskN->getAlign().value() % HwLen == 0) { |
2199 | SDValue Store = getInstr(MachineOpc: StoreOpc, dl, Ty: MVT::Other, |
2200 | Ops: {Mask, Base, Offset0, Value, Chain}, DAG); |
2201 | DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: Store.getNode()), NewMemRefs: {MemOp}); |
2202 | return Store; |
2203 | } |
2204 | |
2205 | // Unaligned case. |
2206 | auto StoreAlign = [&](SDValue V, SDValue A) { |
2207 | SDValue Z = getZero(dl, Ty: ty(Op: V), DAG); |
2208 | // TODO: use funnel shifts? |
2209 | // vlalign(Vu,Vv,Rt) rotates the pair Vu:Vv left by Rt and takes the |
2210 | // upper half. |
2211 | SDValue LoV = getInstr(MachineOpc: Hexagon::V6_vlalignb, dl, Ty: ty(Op: V), Ops: {V, Z, A}, DAG); |
2212 | SDValue HiV = getInstr(MachineOpc: Hexagon::V6_vlalignb, dl, Ty: ty(Op: V), Ops: {Z, V, A}, DAG); |
2213 | return std::make_pair(x&: LoV, y&: HiV); |
2214 | }; |
2215 | |
2216 | MVT ByteTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen); |
2217 | MVT BoolTy = MVT::getVectorVT(VT: MVT::i1, NumElements: HwLen); |
2218 | SDValue MaskV = DAG.getNode(Opcode: HexagonISD::Q2V, DL: dl, VT: ByteTy, Operand: Mask); |
2219 | VectorPair Tmp = StoreAlign(MaskV, Base); |
2220 | VectorPair MaskU = {DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: BoolTy, Operand: Tmp.first), |
2221 | DAG.getNode(Opcode: HexagonISD::V2Q, DL: dl, VT: BoolTy, Operand: Tmp.second)}; |
2222 | VectorPair ValueU = StoreAlign(Value, Base); |
2223 | |
2224 | SDValue Offset1 = DAG.getTargetConstant(Val: HwLen, DL: dl, VT: MVT::i32); |
2225 | SDValue StoreLo = |
2226 | getInstr(MachineOpc: StoreOpc, dl, Ty: MVT::Other, |
2227 | Ops: {MaskU.first, Base, Offset0, ValueU.first, Chain}, DAG); |
2228 | SDValue StoreHi = |
2229 | getInstr(MachineOpc: StoreOpc, dl, Ty: MVT::Other, |
2230 | Ops: {MaskU.second, Base, Offset1, ValueU.second, Chain}, DAG); |
2231 | DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: StoreLo.getNode()), NewMemRefs: {MemOp}); |
2232 | DAG.setNodeMemRefs(N: cast<MachineSDNode>(Val: StoreHi.getNode()), NewMemRefs: {MemOp}); |
2233 | return DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, Ops: {StoreLo, StoreHi}); |
2234 | } |
2235 | |
2236 | SDValue HexagonTargetLowering::LowerHvxFpExtend(SDValue Op, |
2237 | SelectionDAG &DAG) const { |
2238 | // This conversion only applies to QFloat. IEEE extension from f16 to f32 |
2239 | // is legal (done via a pattern). |
2240 | assert(Subtarget.useHVXQFloatOps()); |
2241 | |
2242 | assert(Op->getOpcode() == ISD::FP_EXTEND); |
2243 | |
2244 | MVT VecTy = ty(Op); |
2245 | MVT ArgTy = ty(Op: Op.getOperand(i: 0)); |
2246 | const SDLoc &dl(Op); |
2247 | assert(VecTy == MVT::v64f32 && ArgTy == MVT::v64f16); |
2248 | |
2249 | SDValue F16Vec = Op.getOperand(i: 0); |
2250 | |
2251 | APFloat FloatVal = APFloat(1.0f); |
2252 | bool Ignored; |
2253 | FloatVal.convert(ToSemantics: APFloat::IEEEhalf(), RM: APFloat::rmNearestTiesToEven, losesInfo: &Ignored); |
2254 | SDValue Fp16Ones = DAG.getConstantFP(Val: FloatVal, DL: dl, VT: ArgTy); |
2255 | SDValue VmpyVec = |
2256 | getInstr(MachineOpc: Hexagon::V6_vmpy_qf32_hf, dl, Ty: VecTy, Ops: {F16Vec, Fp16Ones}, DAG); |
2257 | |
2258 | MVT HalfTy = typeSplit(VecTy).first; |
2259 | VectorPair Pair = opSplit(Vec: VmpyVec, dl, DAG); |
2260 | SDValue LoVec = |
2261 | getInstr(MachineOpc: Hexagon::V6_vconv_sf_qf32, dl, Ty: HalfTy, Ops: {Pair.first}, DAG); |
2262 | SDValue HiVec = |
2263 | getInstr(MachineOpc: Hexagon::V6_vconv_sf_qf32, dl, Ty: HalfTy, Ops: {Pair.second}, DAG); |
2264 | |
2265 | SDValue ShuffVec = |
2266 | getInstr(MachineOpc: Hexagon::V6_vshuffvdd, dl, Ty: VecTy, |
2267 | Ops: {HiVec, LoVec, DAG.getConstant(Val: -4, DL: dl, VT: MVT::i32)}, DAG); |
2268 | |
2269 | return ShuffVec; |
2270 | } |
2271 | |
2272 | SDValue |
2273 | HexagonTargetLowering::LowerHvxFpToInt(SDValue Op, SelectionDAG &DAG) const { |
2274 | // Catch invalid conversion ops (just in case). |
2275 | assert(Op.getOpcode() == ISD::FP_TO_SINT || |
2276 | Op.getOpcode() == ISD::FP_TO_UINT); |
2277 | |
2278 | MVT ResTy = ty(Op); |
2279 | MVT FpTy = ty(Op: Op.getOperand(i: 0)).getVectorElementType(); |
2280 | MVT IntTy = ResTy.getVectorElementType(); |
2281 | |
2282 | if (Subtarget.useHVXIEEEFPOps()) { |
2283 | // There are only conversions from f16. |
2284 | if (FpTy == MVT::f16) { |
2285 | // Other int types aren't legal in HVX, so we shouldn't see them here. |
2286 | assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32); |
2287 | // Conversions to i8 and i16 are legal. |
2288 | if (IntTy == MVT::i8 || IntTy == MVT::i16) |
2289 | return Op; |
2290 | } |
2291 | } |
2292 | |
2293 | if (IntTy.getSizeInBits() != FpTy.getSizeInBits()) |
2294 | return EqualizeFpIntConversion(Op, DAG); |
2295 | |
2296 | return ExpandHvxFpToInt(Op, DAG); |
2297 | } |
2298 | |
2299 | SDValue |
2300 | HexagonTargetLowering::LowerHvxIntToFp(SDValue Op, SelectionDAG &DAG) const { |
2301 | // Catch invalid conversion ops (just in case). |
2302 | assert(Op.getOpcode() == ISD::SINT_TO_FP || |
2303 | Op.getOpcode() == ISD::UINT_TO_FP); |
2304 | |
2305 | MVT ResTy = ty(Op); |
2306 | MVT IntTy = ty(Op: Op.getOperand(i: 0)).getVectorElementType(); |
2307 | MVT FpTy = ResTy.getVectorElementType(); |
2308 | |
2309 | if (Subtarget.useHVXIEEEFPOps()) { |
2310 | // There are only conversions to f16. |
2311 | if (FpTy == MVT::f16) { |
2312 | // Other int types aren't legal in HVX, so we shouldn't see them here. |
2313 | assert(IntTy == MVT::i8 || IntTy == MVT::i16 || IntTy == MVT::i32); |
2314 | // i8, i16 -> f16 is legal. |
2315 | if (IntTy == MVT::i8 || IntTy == MVT::i16) |
2316 | return Op; |
2317 | } |
2318 | } |
2319 | |
2320 | if (IntTy.getSizeInBits() != FpTy.getSizeInBits()) |
2321 | return EqualizeFpIntConversion(Op, DAG); |
2322 | |
2323 | return ExpandHvxIntToFp(Op, DAG); |
2324 | } |
2325 | |
2326 | HexagonTargetLowering::TypePair |
2327 | HexagonTargetLowering::typeExtendToWider(MVT Ty0, MVT Ty1) const { |
2328 | // Compare the widths of elements of the two types, and extend the narrower |
2329 | // type to match the with of the wider type. For vector types, apply this |
2330 | // to the element type. |
2331 | assert(Ty0.isVector() == Ty1.isVector()); |
2332 | |
2333 | MVT ElemTy0 = Ty0.getScalarType(); |
2334 | MVT ElemTy1 = Ty1.getScalarType(); |
2335 | |
2336 | unsigned Width0 = ElemTy0.getSizeInBits(); |
2337 | unsigned Width1 = ElemTy1.getSizeInBits(); |
2338 | unsigned MaxWidth = std::max(a: Width0, b: Width1); |
2339 | |
2340 | auto getScalarWithWidth = [](MVT ScalarTy, unsigned Width) { |
2341 | if (ScalarTy.isInteger()) |
2342 | return MVT::getIntegerVT(BitWidth: Width); |
2343 | assert(ScalarTy.isFloatingPoint()); |
2344 | return MVT::getFloatingPointVT(BitWidth: Width); |
2345 | }; |
2346 | |
2347 | MVT WideETy0 = getScalarWithWidth(ElemTy0, MaxWidth); |
2348 | MVT WideETy1 = getScalarWithWidth(ElemTy1, MaxWidth); |
2349 | |
2350 | if (!Ty0.isVector()) { |
2351 | // Both types are scalars. |
2352 | return {WideETy0, WideETy1}; |
2353 | } |
2354 | |
2355 | // Vector types. |
2356 | unsigned NumElem = Ty0.getVectorNumElements(); |
2357 | assert(NumElem == Ty1.getVectorNumElements()); |
2358 | |
2359 | return {MVT::getVectorVT(VT: WideETy0, NumElements: NumElem), |
2360 | MVT::getVectorVT(VT: WideETy1, NumElements: NumElem)}; |
2361 | } |
2362 | |
2363 | HexagonTargetLowering::TypePair |
2364 | HexagonTargetLowering::typeWidenToWider(MVT Ty0, MVT Ty1) const { |
2365 | // Compare the numbers of elements of two vector types, and widen the |
2366 | // narrower one to match the number of elements in the wider one. |
2367 | assert(Ty0.isVector() && Ty1.isVector()); |
2368 | |
2369 | unsigned Len0 = Ty0.getVectorNumElements(); |
2370 | unsigned Len1 = Ty1.getVectorNumElements(); |
2371 | if (Len0 == Len1) |
2372 | return {Ty0, Ty1}; |
2373 | |
2374 | unsigned MaxLen = std::max(a: Len0, b: Len1); |
2375 | return {MVT::getVectorVT(VT: Ty0.getVectorElementType(), NumElements: MaxLen), |
2376 | MVT::getVectorVT(VT: Ty1.getVectorElementType(), NumElements: MaxLen)}; |
2377 | } |
2378 | |
2379 | MVT |
2380 | HexagonTargetLowering::typeLegalize(MVT Ty, SelectionDAG &DAG) const { |
2381 | EVT LegalTy = getTypeToTransformTo(Context&: *DAG.getContext(), VT: Ty); |
2382 | assert(LegalTy.isSimple()); |
2383 | return LegalTy.getSimpleVT(); |
2384 | } |
2385 | |
2386 | MVT |
2387 | HexagonTargetLowering::typeWidenToHvx(MVT Ty) const { |
2388 | unsigned HwWidth = 8 * Subtarget.getVectorLength(); |
2389 | assert(Ty.getSizeInBits() <= HwWidth); |
2390 | if (Ty.getSizeInBits() == HwWidth) |
2391 | return Ty; |
2392 | |
2393 | MVT ElemTy = Ty.getScalarType(); |
2394 | return MVT::getVectorVT(VT: ElemTy, NumElements: HwWidth / ElemTy.getSizeInBits()); |
2395 | } |
2396 | |
2397 | HexagonTargetLowering::VectorPair |
2398 | HexagonTargetLowering::emitHvxAddWithOverflow(SDValue A, SDValue B, |
2399 | const SDLoc &dl, bool Signed, SelectionDAG &DAG) const { |
2400 | // Compute A+B, return {A+B, O}, where O = vector predicate indicating |
2401 | // whether an overflow has occured. |
2402 | MVT ResTy = ty(Op: A); |
2403 | assert(ResTy == ty(B)); |
2404 | MVT PredTy = MVT::getVectorVT(VT: MVT::i1, NumElements: ResTy.getVectorNumElements()); |
2405 | |
2406 | if (!Signed) { |
2407 | // V62+ has V6_vaddcarry, but it requires input predicate, so it doesn't |
2408 | // save any instructions. |
2409 | SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ResTy, Ops: {A, B}); |
2410 | SDValue Ovf = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Add, RHS: A, Cond: ISD::SETULT); |
2411 | return {Add, Ovf}; |
2412 | } |
2413 | |
2414 | // Signed overflow has happened, if: |
2415 | // (A, B have the same sign) and (A+B has a different sign from either) |
2416 | // i.e. (~A xor B) & ((A+B) xor B), then check the sign bit |
2417 | SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ResTy, Ops: {A, B}); |
2418 | SDValue NotA = |
2419 | DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: ResTy, Ops: {A, DAG.getConstant(Val: -1, DL: dl, VT: ResTy)}); |
2420 | SDValue Xor0 = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: ResTy, Ops: {NotA, B}); |
2421 | SDValue Xor1 = DAG.getNode(Opcode: ISD::XOR, DL: dl, VT: ResTy, Ops: {Add, B}); |
2422 | SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: ResTy, Ops: {Xor0, Xor1}); |
2423 | SDValue MSB = |
2424 | DAG.getSetCC(DL: dl, VT: PredTy, LHS: And, RHS: getZero(dl, Ty: ResTy, DAG), Cond: ISD::SETLT); |
2425 | return {Add, MSB}; |
2426 | } |
2427 | |
2428 | HexagonTargetLowering::VectorPair |
2429 | HexagonTargetLowering::emitHvxShiftRightRnd(SDValue Val, unsigned Amt, |
2430 | bool Signed, SelectionDAG &DAG) const { |
2431 | // Shift Val right by Amt bits, round the result to the nearest integer, |
2432 | // tie-break by rounding halves to even integer. |
2433 | |
2434 | const SDLoc &dl(Val); |
2435 | MVT ValTy = ty(Op: Val); |
2436 | |
2437 | // This should also work for signed integers. |
2438 | // |
2439 | // uint tmp0 = inp + ((1 << (Amt-1)) - 1); |
2440 | // bool ovf = (inp > tmp0); |
2441 | // uint rup = inp & (1 << (Amt+1)); |
2442 | // |
2443 | // uint tmp1 = inp >> (Amt-1); // tmp1 == tmp2 iff |
2444 | // uint tmp2 = tmp0 >> (Amt-1); // the Amt-1 lower bits were all 0 |
2445 | // uint tmp3 = tmp2 + rup; |
2446 | // uint frac = (tmp1 != tmp2) ? tmp2 >> 1 : tmp3 >> 1; |
2447 | unsigned ElemWidth = ValTy.getVectorElementType().getSizeInBits(); |
2448 | MVT ElemTy = MVT::getIntegerVT(BitWidth: ElemWidth); |
2449 | MVT IntTy = tyVector(Ty: ValTy, ElemTy); |
2450 | MVT PredTy = MVT::getVectorVT(VT: MVT::i1, NumElements: IntTy.getVectorNumElements()); |
2451 | unsigned ShRight = Signed ? ISD::SRA : ISD::SRL; |
2452 | |
2453 | SDValue Inp = DAG.getBitcast(VT: IntTy, V: Val); |
2454 | SDValue LowBits = DAG.getConstant(Val: (1ull << (Amt - 1)) - 1, DL: dl, VT: IntTy); |
2455 | |
2456 | SDValue AmtP1 = DAG.getConstant(Val: 1ull << Amt, DL: dl, VT: IntTy); |
2457 | SDValue And = DAG.getNode(Opcode: ISD::AND, DL: dl, VT: IntTy, Ops: {Inp, AmtP1}); |
2458 | SDValue Zero = getZero(dl, Ty: IntTy, DAG); |
2459 | SDValue Bit = DAG.getSetCC(DL: dl, VT: PredTy, LHS: And, RHS: Zero, Cond: ISD::SETNE); |
2460 | SDValue Rup = DAG.getZExtOrTrunc(Op: Bit, DL: dl, VT: IntTy); |
2461 | auto [Tmp0, Ovf] = emitHvxAddWithOverflow(A: Inp, B: LowBits, dl, Signed, DAG); |
2462 | |
2463 | SDValue AmtM1 = DAG.getConstant(Val: Amt - 1, DL: dl, VT: IntTy); |
2464 | SDValue Tmp1 = DAG.getNode(Opcode: ShRight, DL: dl, VT: IntTy, N1: Inp, N2: AmtM1); |
2465 | SDValue Tmp2 = DAG.getNode(Opcode: ShRight, DL: dl, VT: IntTy, N1: Tmp0, N2: AmtM1); |
2466 | SDValue Tmp3 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: IntTy, N1: Tmp2, N2: Rup); |
2467 | |
2468 | SDValue Eq = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Tmp1, RHS: Tmp2, Cond: ISD::SETEQ); |
2469 | SDValue One = DAG.getConstant(Val: 1, DL: dl, VT: IntTy); |
2470 | SDValue Tmp4 = DAG.getNode(Opcode: ShRight, DL: dl, VT: IntTy, Ops: {Tmp2, One}); |
2471 | SDValue Tmp5 = DAG.getNode(Opcode: ShRight, DL: dl, VT: IntTy, Ops: {Tmp3, One}); |
2472 | SDValue Mux = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: IntTy, Ops: {Eq, Tmp5, Tmp4}); |
2473 | return {Mux, Ovf}; |
2474 | } |
2475 | |
2476 | SDValue |
2477 | HexagonTargetLowering::emitHvxMulHsV60(SDValue A, SDValue B, const SDLoc &dl, |
2478 | SelectionDAG &DAG) const { |
2479 | MVT VecTy = ty(Op: A); |
2480 | MVT PairTy = typeJoin(Tys: {VecTy, VecTy}); |
2481 | assert(VecTy.getVectorElementType() == MVT::i32); |
2482 | |
2483 | SDValue S16 = DAG.getConstant(Val: 16, DL: dl, VT: MVT::i32); |
2484 | |
2485 | // mulhs(A,B) = |
2486 | // = [(Hi(A)*2^16 + Lo(A)) *s (Hi(B)*2^16 + Lo(B))] >> 32 |
2487 | // = [Hi(A)*2^16 *s Hi(B)*2^16 + Hi(A) *su Lo(B)*2^16 |
2488 | // + Lo(A) *us (Hi(B)*2^16 + Lo(B))] >> 32 |
2489 | // = [Hi(A) *s Hi(B)*2^32 + Hi(A) *su Lo(B)*2^16 + Lo(A) *us B] >> 32 |
2490 | // The low half of Lo(A)*Lo(B) will be discarded (it's not added to |
2491 | // anything, so it cannot produce any carry over to higher bits), |
2492 | // so everything in [] can be shifted by 16 without loss of precision. |
2493 | // = [Hi(A) *s Hi(B)*2^16 + Hi(A)*su Lo(B) + Lo(A)*B >> 16] >> 16 |
2494 | // = [Hi(A) *s Hi(B)*2^16 + Hi(A)*su Lo(B) + V6_vmpyewuh(A,B)] >> 16 |
2495 | // The final additions need to make sure to properly maintain any carry- |
2496 | // out bits. |
2497 | // |
2498 | // Hi(B) Lo(B) |
2499 | // Hi(A) Lo(A) |
2500 | // -------------- |
2501 | // Lo(B)*Lo(A) | T0 = V6_vmpyewuh(B,A) does this, |
2502 | // Hi(B)*Lo(A) | + dropping the low 16 bits |
2503 | // Hi(A)*Lo(B) | T2 |
2504 | // Hi(B)*Hi(A) |
2505 | |
2506 | SDValue T0 = getInstr(MachineOpc: Hexagon::V6_vmpyewuh, dl, Ty: VecTy, Ops: {B, A}, DAG); |
2507 | // T1 = get Hi(A) into low halves. |
2508 | SDValue T1 = getInstr(MachineOpc: Hexagon::V6_vasrw, dl, Ty: VecTy, Ops: {A, S16}, DAG); |
2509 | // P0 = interleaved T1.h*B.uh (full precision product) |
2510 | SDValue P0 = getInstr(MachineOpc: Hexagon::V6_vmpyhus, dl, Ty: PairTy, Ops: {T1, B}, DAG); |
2511 | // T2 = T1.even(h) * B.even(uh), i.e. Hi(A)*Lo(B) |
2512 | SDValue T2 = LoHalf(V: P0, DAG); |
2513 | // We need to add T0+T2, recording the carry-out, which will be 1<<16 |
2514 | // added to the final sum. |
2515 | // P1 = interleaved even/odd 32-bit (unsigned) sums of 16-bit halves |
2516 | SDValue P1 = getInstr(MachineOpc: Hexagon::V6_vadduhw, dl, Ty: PairTy, Ops: {T0, T2}, DAG); |
2517 | // P2 = interleaved even/odd 32-bit (signed) sums of 16-bit halves |
2518 | SDValue P2 = getInstr(MachineOpc: Hexagon::V6_vaddhw, dl, Ty: PairTy, Ops: {T0, T2}, DAG); |
2519 | // T3 = full-precision(T0+T2) >> 16 |
2520 | // The low halves are added-unsigned, the high ones are added-signed. |
2521 | SDValue T3 = getInstr(MachineOpc: Hexagon::V6_vasrw_acc, dl, Ty: VecTy, |
2522 | Ops: {HiHalf(V: P2, DAG), LoHalf(V: P1, DAG), S16}, DAG); |
2523 | SDValue T4 = getInstr(MachineOpc: Hexagon::V6_vasrw, dl, Ty: VecTy, Ops: {B, S16}, DAG); |
2524 | // P3 = interleaved Hi(B)*Hi(A) (full precision), |
2525 | // which is now Lo(T1)*Lo(T4), so we want to keep the even product. |
2526 | SDValue P3 = getInstr(MachineOpc: Hexagon::V6_vmpyhv, dl, Ty: PairTy, Ops: {T1, T4}, DAG); |
2527 | SDValue T5 = LoHalf(V: P3, DAG); |
2528 | // Add: |
2529 | SDValue T6 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: VecTy, Ops: {T3, T5}); |
2530 | return T6; |
2531 | } |
2532 | |
2533 | SDValue |
2534 | HexagonTargetLowering::emitHvxMulLoHiV60(SDValue A, bool SignedA, SDValue B, |
2535 | bool SignedB, const SDLoc &dl, |
2536 | SelectionDAG &DAG) const { |
2537 | MVT VecTy = ty(Op: A); |
2538 | MVT PairTy = typeJoin(Tys: {VecTy, VecTy}); |
2539 | assert(VecTy.getVectorElementType() == MVT::i32); |
2540 | |
2541 | SDValue S16 = DAG.getConstant(Val: 16, DL: dl, VT: MVT::i32); |
2542 | |
2543 | if (SignedA && !SignedB) { |
2544 | // Make A:unsigned, B:signed. |
2545 | std::swap(a&: A, b&: B); |
2546 | std::swap(a&: SignedA, b&: SignedB); |
2547 | } |
2548 | |
2549 | // Do halfword-wise multiplications for unsigned*unsigned product, then |
2550 | // add corrections for signed and unsigned*signed. |
2551 | |
2552 | SDValue Lo, Hi; |
2553 | |
2554 | // P0:lo = (uu) products of low halves of A and B, |
2555 | // P0:hi = (uu) products of high halves. |
2556 | SDValue P0 = getInstr(MachineOpc: Hexagon::V6_vmpyuhv, dl, Ty: PairTy, Ops: {A, B}, DAG); |
2557 | |
2558 | // Swap low/high halves in B |
2559 | SDValue T0 = getInstr(MachineOpc: Hexagon::V6_lvsplatw, dl, Ty: VecTy, |
2560 | Ops: {DAG.getConstant(Val: 0x02020202, DL: dl, VT: MVT::i32)}, DAG); |
2561 | SDValue T1 = getInstr(MachineOpc: Hexagon::V6_vdelta, dl, Ty: VecTy, Ops: {B, T0}, DAG); |
2562 | // P1 = products of even/odd halfwords. |
2563 | // P1:lo = (uu) products of even(A.uh) * odd(B.uh) |
2564 | // P1:hi = (uu) products of odd(A.uh) * even(B.uh) |
2565 | SDValue P1 = getInstr(MachineOpc: Hexagon::V6_vmpyuhv, dl, Ty: PairTy, Ops: {A, T1}, DAG); |
2566 | |
2567 | // P2:lo = low halves of P1:lo + P1:hi, |
2568 | // P2:hi = high halves of P1:lo + P1:hi. |
2569 | SDValue P2 = getInstr(MachineOpc: Hexagon::V6_vadduhw, dl, Ty: PairTy, |
2570 | Ops: {HiHalf(V: P1, DAG), LoHalf(V: P1, DAG)}, DAG); |
2571 | // Still need to add the high halves of P0:lo to P2:lo |
2572 | SDValue T2 = |
2573 | getInstr(MachineOpc: Hexagon::V6_vlsrw, dl, Ty: VecTy, Ops: {LoHalf(V: P0, DAG), S16}, DAG); |
2574 | SDValue T3 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: VecTy, Ops: {LoHalf(V: P2, DAG), T2}); |
2575 | |
2576 | // The high halves of T3 will contribute to the HI part of LOHI. |
2577 | SDValue T4 = getInstr(MachineOpc: Hexagon::V6_vasrw_acc, dl, Ty: VecTy, |
2578 | Ops: {HiHalf(V: P2, DAG), T3, S16}, DAG); |
2579 | |
2580 | // The low halves of P2 need to be added to high halves of the LO part. |
2581 | Lo = getInstr(MachineOpc: Hexagon::V6_vaslw_acc, dl, Ty: VecTy, |
2582 | Ops: {LoHalf(V: P0, DAG), LoHalf(V: P2, DAG), S16}, DAG); |
2583 | Hi = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: VecTy, Ops: {HiHalf(V: P0, DAG), T4}); |
2584 | |
2585 | if (SignedA) { |
2586 | assert(SignedB && "Signed A and unsigned B should have been inverted" ); |
2587 | |
2588 | MVT PredTy = MVT::getVectorVT(VT: MVT::i1, NumElements: VecTy.getVectorNumElements()); |
2589 | SDValue Zero = getZero(dl, Ty: VecTy, DAG); |
2590 | SDValue Q0 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: A, RHS: Zero, Cond: ISD::SETLT); |
2591 | SDValue Q1 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: B, RHS: Zero, Cond: ISD::SETLT); |
2592 | SDValue X0 = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: VecTy, Ops: {Q0, B, Zero}); |
2593 | SDValue X1 = getInstr(MachineOpc: Hexagon::V6_vaddwq, dl, Ty: VecTy, Ops: {Q1, X0, A}, DAG); |
2594 | Hi = getInstr(MachineOpc: Hexagon::V6_vsubw, dl, Ty: VecTy, Ops: {Hi, X1}, DAG); |
2595 | } else if (SignedB) { |
2596 | // Same correction as for mulhus: |
2597 | // mulhus(A.uw,B.w) = mulhu(A.uw,B.uw) - (A.w if B < 0) |
2598 | MVT PredTy = MVT::getVectorVT(VT: MVT::i1, NumElements: VecTy.getVectorNumElements()); |
2599 | SDValue Zero = getZero(dl, Ty: VecTy, DAG); |
2600 | SDValue Q1 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: B, RHS: Zero, Cond: ISD::SETLT); |
2601 | Hi = getInstr(MachineOpc: Hexagon::V6_vsubwq, dl, Ty: VecTy, Ops: {Q1, Hi, A}, DAG); |
2602 | } else { |
2603 | assert(!SignedA && !SignedB); |
2604 | } |
2605 | |
2606 | return DAG.getMergeValues(Ops: {Lo, Hi}, dl); |
2607 | } |
2608 | |
2609 | SDValue |
2610 | HexagonTargetLowering::emitHvxMulLoHiV62(SDValue A, bool SignedA, |
2611 | SDValue B, bool SignedB, |
2612 | const SDLoc &dl, |
2613 | SelectionDAG &DAG) const { |
2614 | MVT VecTy = ty(Op: A); |
2615 | MVT PairTy = typeJoin(Tys: {VecTy, VecTy}); |
2616 | assert(VecTy.getVectorElementType() == MVT::i32); |
2617 | |
2618 | if (SignedA && !SignedB) { |
2619 | // Make A:unsigned, B:signed. |
2620 | std::swap(a&: A, b&: B); |
2621 | std::swap(a&: SignedA, b&: SignedB); |
2622 | } |
2623 | |
2624 | // Do S*S first, then make corrections for U*S or U*U if needed. |
2625 | SDValue P0 = getInstr(MachineOpc: Hexagon::V6_vmpyewuh_64, dl, Ty: PairTy, Ops: {A, B}, DAG); |
2626 | SDValue P1 = |
2627 | getInstr(MachineOpc: Hexagon::V6_vmpyowh_64_acc, dl, Ty: PairTy, Ops: {P0, A, B}, DAG); |
2628 | SDValue Lo = LoHalf(V: P1, DAG); |
2629 | SDValue Hi = HiHalf(V: P1, DAG); |
2630 | |
2631 | if (!SignedB) { |
2632 | assert(!SignedA && "Signed A and unsigned B should have been inverted" ); |
2633 | SDValue Zero = getZero(dl, Ty: VecTy, DAG); |
2634 | MVT PredTy = MVT::getVectorVT(VT: MVT::i1, NumElements: VecTy.getVectorNumElements()); |
2635 | |
2636 | // Mulhu(X, Y) = Mulhs(X, Y) + (X, if Y < 0) + (Y, if X < 0). |
2637 | // def: Pat<(VecI32 (mulhu HVI32:$A, HVI32:$B)), |
2638 | // (V6_vaddw (HiHalf (Muls64O $A, $B)), |
2639 | // (V6_vaddwq (V6_vgtw (V6_vd0), $B), |
2640 | // (V6_vandvqv (V6_vgtw (V6_vd0), $A), $B), |
2641 | // $A))>; |
2642 | SDValue Q0 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: A, RHS: Zero, Cond: ISD::SETLT); |
2643 | SDValue Q1 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: B, RHS: Zero, Cond: ISD::SETLT); |
2644 | SDValue T0 = getInstr(MachineOpc: Hexagon::V6_vandvqv, dl, Ty: VecTy, Ops: {Q0, B}, DAG); |
2645 | SDValue T1 = getInstr(MachineOpc: Hexagon::V6_vaddwq, dl, Ty: VecTy, Ops: {Q1, T0, A}, DAG); |
2646 | Hi = getInstr(MachineOpc: Hexagon::V6_vaddw, dl, Ty: VecTy, Ops: {Hi, T1}, DAG); |
2647 | } else if (!SignedA) { |
2648 | SDValue Zero = getZero(dl, Ty: VecTy, DAG); |
2649 | MVT PredTy = MVT::getVectorVT(VT: MVT::i1, NumElements: VecTy.getVectorNumElements()); |
2650 | |
2651 | // Mulhus(unsigned X, signed Y) = Mulhs(X, Y) + (Y, if X < 0). |
2652 | // def: Pat<(VecI32 (HexagonMULHUS HVI32:$A, HVI32:$B)), |
2653 | // (V6_vaddwq (V6_vgtw (V6_vd0), $A), |
2654 | // (HiHalf (Muls64O $A, $B)), |
2655 | // $B)>; |
2656 | SDValue Q0 = DAG.getSetCC(DL: dl, VT: PredTy, LHS: A, RHS: Zero, Cond: ISD::SETLT); |
2657 | Hi = getInstr(MachineOpc: Hexagon::V6_vaddwq, dl, Ty: VecTy, Ops: {Q0, Hi, B}, DAG); |
2658 | } |
2659 | |
2660 | return DAG.getMergeValues(Ops: {Lo, Hi}, dl); |
2661 | } |
2662 | |
2663 | SDValue |
2664 | HexagonTargetLowering::EqualizeFpIntConversion(SDValue Op, SelectionDAG &DAG) |
2665 | const { |
2666 | // Rewrite conversion between integer and floating-point in such a way that |
2667 | // the integer type is extended/narrowed to match the bitwidth of the |
2668 | // floating-point type, combined with additional integer-integer extensions |
2669 | // or narrowings to match the original input/result types. |
2670 | // E.g. f32 -> i8 ==> f32 -> i32 -> i8 |
2671 | // |
2672 | // The input/result types are not required to be legal, but if they are |
2673 | // legal, this function should not introduce illegal types. |
2674 | |
2675 | unsigned Opc = Op.getOpcode(); |
2676 | assert(Opc == ISD::FP_TO_SINT || Opc == ISD::FP_TO_UINT || |
2677 | Opc == ISD::SINT_TO_FP || Opc == ISD::UINT_TO_FP); |
2678 | |
2679 | SDValue Inp = Op.getOperand(i: 0); |
2680 | MVT InpTy = ty(Op: Inp); |
2681 | MVT ResTy = ty(Op); |
2682 | |
2683 | if (InpTy == ResTy) |
2684 | return Op; |
2685 | |
2686 | const SDLoc &dl(Op); |
2687 | bool Signed = Opc == ISD::FP_TO_SINT || Opc == ISD::SINT_TO_FP; |
2688 | |
2689 | auto [WInpTy, WResTy] = typeExtendToWider(Ty0: InpTy, Ty1: ResTy); |
2690 | SDValue WInp = resizeToWidth(VecV: Inp, ResTy: WInpTy, Signed, dl, DAG); |
2691 | SDValue Conv = DAG.getNode(Opcode: Opc, DL: dl, VT: WResTy, Operand: WInp); |
2692 | SDValue Res = resizeToWidth(VecV: Conv, ResTy, Signed, dl, DAG); |
2693 | return Res; |
2694 | } |
2695 | |
2696 | SDValue |
2697 | HexagonTargetLowering::ExpandHvxFpToInt(SDValue Op, SelectionDAG &DAG) const { |
2698 | unsigned Opc = Op.getOpcode(); |
2699 | assert(Opc == ISD::FP_TO_SINT || Opc == ISD::FP_TO_UINT); |
2700 | |
2701 | const SDLoc &dl(Op); |
2702 | SDValue Op0 = Op.getOperand(i: 0); |
2703 | MVT InpTy = ty(Op: Op0); |
2704 | MVT ResTy = ty(Op); |
2705 | assert(InpTy.changeTypeToInteger() == ResTy); |
2706 | |
2707 | // int32_t conv_f32_to_i32(uint32_t inp) { |
2708 | // // s | exp8 | frac23 |
2709 | // |
2710 | // int neg = (int32_t)inp < 0; |
2711 | // |
2712 | // // "expm1" is the actual exponent minus 1: instead of "bias", subtract |
2713 | // // "bias+1". When the encoded exp is "all-1" (i.e. inf/nan), this will |
2714 | // // produce a large positive "expm1", which will result in max u/int. |
2715 | // // In all IEEE formats, bias is the largest positive number that can be |
2716 | // // represented in bias-width bits (i.e. 011..1). |
2717 | // int32_t expm1 = (inp << 1) - 0x80000000; |
2718 | // expm1 >>= 24; |
2719 | // |
2720 | // // Always insert the "implicit 1". Subnormal numbers will become 0 |
2721 | // // regardless. |
2722 | // uint32_t frac = (inp << 8) | 0x80000000; |
2723 | // |
2724 | // // "frac" is the fraction part represented as Q1.31. If it was |
2725 | // // interpreted as uint32_t, it would be the fraction part multiplied |
2726 | // // by 2^31. |
2727 | // |
2728 | // // Calculate the amount of right shift, since shifting further to the |
2729 | // // left would lose significant bits. Limit it to 32, because we want |
2730 | // // shifts by 32+ to produce 0, whereas V6_vlsrwv treats the shift |
2731 | // // amount as a 6-bit signed value (so 33 is same as -31, i.e. shift |
2732 | // // left by 31). "rsh" can be negative. |
2733 | // int32_t rsh = min(31 - (expm1 + 1), 32); |
2734 | // |
2735 | // frac >>= rsh; // rsh == 32 will produce 0 |
2736 | // |
2737 | // // Everything up to this point is the same for conversion to signed |
2738 | // // unsigned integer. |
2739 | // |
2740 | // if (neg) // Only for signed int |
2741 | // frac = -frac; // |
2742 | // if (rsh <= 0 && neg) // bound = neg ? 0x80000000 : 0x7fffffff |
2743 | // frac = 0x80000000; // frac = rsh <= 0 ? bound : frac |
2744 | // if (rsh <= 0 && !neg) // |
2745 | // frac = 0x7fffffff; // |
2746 | // |
2747 | // if (neg) // Only for unsigned int |
2748 | // frac = 0; // |
2749 | // if (rsh < 0 && !neg) // frac = rsh < 0 ? 0x7fffffff : frac; |
2750 | // frac = 0x7fffffff; // frac = neg ? 0 : frac; |
2751 | // |
2752 | // return frac; |
2753 | // } |
2754 | |
2755 | MVT PredTy = MVT::getVectorVT(VT: MVT::i1, EC: ResTy.getVectorElementCount()); |
2756 | |
2757 | // Zero = V6_vd0(); |
2758 | // Neg = V6_vgtw(Zero, Inp); |
2759 | // One = V6_lvsplatw(1); |
2760 | // M80 = V6_lvsplatw(0x80000000); |
2761 | // Exp00 = V6_vaslwv(Inp, One); |
2762 | // Exp01 = V6_vsubw(Exp00, M80); |
2763 | // ExpM1 = V6_vasrw(Exp01, 24); |
2764 | // Frc00 = V6_vaslw(Inp, 8); |
2765 | // Frc01 = V6_vor(Frc00, M80); |
2766 | // Rsh00 = V6_vsubw(V6_lvsplatw(30), ExpM1); |
2767 | // Rsh01 = V6_vminw(Rsh00, V6_lvsplatw(32)); |
2768 | // Frc02 = V6_vlsrwv(Frc01, Rsh01); |
2769 | |
2770 | // if signed int: |
2771 | // Bnd = V6_vmux(Neg, M80, V6_lvsplatw(0x7fffffff)) |
2772 | // Pos = V6_vgtw(Rsh01, Zero); |
2773 | // Frc13 = V6_vsubw(Zero, Frc02); |
2774 | // Frc14 = V6_vmux(Neg, Frc13, Frc02); |
2775 | // Int = V6_vmux(Pos, Frc14, Bnd); |
2776 | // |
2777 | // if unsigned int: |
2778 | // Rsn = V6_vgtw(Zero, Rsh01) |
2779 | // Frc23 = V6_vmux(Rsn, V6_lvsplatw(0x7fffffff), Frc02) |
2780 | // Int = V6_vmux(Neg, Zero, Frc23) |
2781 | |
2782 | auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(Ty: InpTy); |
2783 | unsigned ElemWidth = 1 + ExpWidth + FracWidth; |
2784 | assert((1ull << (ExpWidth - 1)) == (1 + ExpBias)); |
2785 | |
2786 | SDValue Inp = DAG.getBitcast(VT: ResTy, V: Op0); |
2787 | SDValue Zero = getZero(dl, Ty: ResTy, DAG); |
2788 | SDValue Neg = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Inp, RHS: Zero, Cond: ISD::SETLT); |
2789 | SDValue M80 = DAG.getConstant(Val: 1ull << (ElemWidth - 1), DL: dl, VT: ResTy); |
2790 | SDValue M7F = DAG.getConstant(Val: (1ull << (ElemWidth - 1)) - 1, DL: dl, VT: ResTy); |
2791 | SDValue One = DAG.getConstant(Val: 1, DL: dl, VT: ResTy); |
2792 | SDValue Exp00 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: ResTy, Ops: {Inp, One}); |
2793 | SDValue Exp01 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: ResTy, Ops: {Exp00, M80}); |
2794 | SDValue MNE = DAG.getConstant(Val: ElemWidth - ExpWidth, DL: dl, VT: ResTy); |
2795 | SDValue ExpM1 = DAG.getNode(Opcode: ISD::SRA, DL: dl, VT: ResTy, Ops: {Exp01, MNE}); |
2796 | |
2797 | SDValue ExpW = DAG.getConstant(Val: ExpWidth, DL: dl, VT: ResTy); |
2798 | SDValue Frc00 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: ResTy, Ops: {Inp, ExpW}); |
2799 | SDValue Frc01 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: ResTy, Ops: {Frc00, M80}); |
2800 | |
2801 | SDValue MN2 = DAG.getConstant(Val: ElemWidth - 2, DL: dl, VT: ResTy); |
2802 | SDValue Rsh00 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: ResTy, Ops: {MN2, ExpM1}); |
2803 | SDValue MW = DAG.getConstant(Val: ElemWidth, DL: dl, VT: ResTy); |
2804 | SDValue Rsh01 = DAG.getNode(Opcode: ISD::SMIN, DL: dl, VT: ResTy, Ops: {Rsh00, MW}); |
2805 | SDValue Frc02 = DAG.getNode(Opcode: ISD::SRL, DL: dl, VT: ResTy, Ops: {Frc01, Rsh01}); |
2806 | |
2807 | SDValue Int; |
2808 | |
2809 | if (Opc == ISD::FP_TO_SINT) { |
2810 | SDValue Bnd = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ResTy, Ops: {Neg, M80, M7F}); |
2811 | SDValue Pos = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Rsh01, RHS: Zero, Cond: ISD::SETGT); |
2812 | SDValue Frc13 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: ResTy, Ops: {Zero, Frc02}); |
2813 | SDValue Frc14 = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ResTy, Ops: {Neg, Frc13, Frc02}); |
2814 | Int = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ResTy, Ops: {Pos, Frc14, Bnd}); |
2815 | } else { |
2816 | assert(Opc == ISD::FP_TO_UINT); |
2817 | SDValue Rsn = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Rsh01, RHS: Zero, Cond: ISD::SETLT); |
2818 | SDValue Frc23 = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ResTy, N1: Rsn, N2: M7F, N3: Frc02); |
2819 | Int = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ResTy, N1: Neg, N2: Zero, N3: Frc23); |
2820 | } |
2821 | |
2822 | return Int; |
2823 | } |
2824 | |
2825 | SDValue |
2826 | HexagonTargetLowering::ExpandHvxIntToFp(SDValue Op, SelectionDAG &DAG) const { |
2827 | unsigned Opc = Op.getOpcode(); |
2828 | assert(Opc == ISD::SINT_TO_FP || Opc == ISD::UINT_TO_FP); |
2829 | |
2830 | const SDLoc &dl(Op); |
2831 | SDValue Op0 = Op.getOperand(i: 0); |
2832 | MVT InpTy = ty(Op: Op0); |
2833 | MVT ResTy = ty(Op); |
2834 | assert(ResTy.changeTypeToInteger() == InpTy); |
2835 | |
2836 | // uint32_t vnoc1_rnd(int32_t w) { |
2837 | // int32_t iszero = w == 0; |
2838 | // int32_t isneg = w < 0; |
2839 | // uint32_t u = __builtin_HEXAGON_A2_abs(w); |
2840 | // |
2841 | // uint32_t norm_left = __builtin_HEXAGON_S2_cl0(u) + 1; |
2842 | // uint32_t frac0 = (uint64_t)u << norm_left; |
2843 | // |
2844 | // // Rounding: |
2845 | // uint32_t frac1 = frac0 + ((1 << 8) - 1); |
2846 | // uint32_t renorm = (frac0 > frac1); |
2847 | // uint32_t rup = (int)(frac0 << 22) < 0; |
2848 | // |
2849 | // uint32_t frac2 = frac0 >> 8; |
2850 | // uint32_t frac3 = frac1 >> 8; |
2851 | // uint32_t frac = (frac2 != frac3) ? frac3 >> 1 : (frac3 + rup) >> 1; |
2852 | // |
2853 | // int32_t exp = 32 - norm_left + renorm + 127; |
2854 | // exp <<= 23; |
2855 | // |
2856 | // uint32_t sign = 0x80000000 * isneg; |
2857 | // uint32_t f = sign | exp | frac; |
2858 | // return iszero ? 0 : f; |
2859 | // } |
2860 | |
2861 | MVT PredTy = MVT::getVectorVT(VT: MVT::i1, EC: InpTy.getVectorElementCount()); |
2862 | bool Signed = Opc == ISD::SINT_TO_FP; |
2863 | |
2864 | auto [ExpWidth, ExpBias, FracWidth] = getIEEEProperties(Ty: ResTy); |
2865 | unsigned ElemWidth = 1 + ExpWidth + FracWidth; |
2866 | |
2867 | SDValue Zero = getZero(dl, Ty: InpTy, DAG); |
2868 | SDValue One = DAG.getConstant(Val: 1, DL: dl, VT: InpTy); |
2869 | SDValue IsZero = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Op0, RHS: Zero, Cond: ISD::SETEQ); |
2870 | SDValue Abs = Signed ? DAG.getNode(Opcode: ISD::ABS, DL: dl, VT: InpTy, Operand: Op0) : Op0; |
2871 | SDValue Clz = DAG.getNode(Opcode: ISD::CTLZ, DL: dl, VT: InpTy, Operand: Abs); |
2872 | SDValue NLeft = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: InpTy, Ops: {Clz, One}); |
2873 | SDValue Frac0 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: InpTy, Ops: {Abs, NLeft}); |
2874 | |
2875 | auto [Frac, Ovf] = emitHvxShiftRightRnd(Val: Frac0, Amt: ExpWidth + 1, Signed: false, DAG); |
2876 | if (Signed) { |
2877 | SDValue IsNeg = DAG.getSetCC(DL: dl, VT: PredTy, LHS: Op0, RHS: Zero, Cond: ISD::SETLT); |
2878 | SDValue M80 = DAG.getConstant(Val: 1ull << (ElemWidth - 1), DL: dl, VT: InpTy); |
2879 | SDValue Sign = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: InpTy, Ops: {IsNeg, M80, Zero}); |
2880 | Frac = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: InpTy, Ops: {Sign, Frac}); |
2881 | } |
2882 | |
2883 | SDValue Rnrm = DAG.getZExtOrTrunc(Op: Ovf, DL: dl, VT: InpTy); |
2884 | SDValue Exp0 = DAG.getConstant(Val: ElemWidth + ExpBias, DL: dl, VT: InpTy); |
2885 | SDValue Exp1 = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: InpTy, Ops: {Rnrm, Exp0}); |
2886 | SDValue Exp2 = DAG.getNode(Opcode: ISD::SUB, DL: dl, VT: InpTy, Ops: {Exp1, NLeft}); |
2887 | SDValue Exp3 = DAG.getNode(Opcode: ISD::SHL, DL: dl, VT: InpTy, |
2888 | Ops: {Exp2, DAG.getConstant(Val: FracWidth, DL: dl, VT: InpTy)}); |
2889 | SDValue Flt0 = DAG.getNode(Opcode: ISD::OR, DL: dl, VT: InpTy, Ops: {Frac, Exp3}); |
2890 | SDValue Flt1 = DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: InpTy, Ops: {IsZero, Zero, Flt0}); |
2891 | SDValue Flt = DAG.getBitcast(VT: ResTy, V: Flt1); |
2892 | |
2893 | return Flt; |
2894 | } |
2895 | |
2896 | SDValue |
2897 | HexagonTargetLowering::CreateTLWrapper(SDValue Op, SelectionDAG &DAG) const { |
2898 | unsigned Opc = Op.getOpcode(); |
2899 | unsigned TLOpc; |
2900 | switch (Opc) { |
2901 | case ISD::ANY_EXTEND: |
2902 | case ISD::SIGN_EXTEND: |
2903 | case ISD::ZERO_EXTEND: |
2904 | TLOpc = HexagonISD::TL_EXTEND; |
2905 | break; |
2906 | case ISD::TRUNCATE: |
2907 | TLOpc = HexagonISD::TL_TRUNCATE; |
2908 | break; |
2909 | #ifndef NDEBUG |
2910 | Op.dump(&DAG); |
2911 | #endif |
2912 | llvm_unreachable("Unepected operator" ); |
2913 | } |
2914 | |
2915 | const SDLoc &dl(Op); |
2916 | return DAG.getNode(Opcode: TLOpc, DL: dl, VT: ty(Op), N1: Op.getOperand(i: 0), |
2917 | N2: DAG.getUNDEF(VT: MVT::i128), // illegal type |
2918 | N3: DAG.getConstant(Val: Opc, DL: dl, VT: MVT::i32)); |
2919 | } |
2920 | |
2921 | SDValue |
2922 | HexagonTargetLowering::RemoveTLWrapper(SDValue Op, SelectionDAG &DAG) const { |
2923 | assert(Op.getOpcode() == HexagonISD::TL_EXTEND || |
2924 | Op.getOpcode() == HexagonISD::TL_TRUNCATE); |
2925 | unsigned Opc = Op.getConstantOperandVal(i: 2); |
2926 | return DAG.getNode(Opcode: Opc, DL: SDLoc(Op), VT: ty(Op), Operand: Op.getOperand(i: 0)); |
2927 | } |
2928 | |
2929 | HexagonTargetLowering::VectorPair |
2930 | HexagonTargetLowering::SplitVectorOp(SDValue Op, SelectionDAG &DAG) const { |
2931 | assert(!Op.isMachineOpcode()); |
2932 | SmallVector<SDValue, 2> OpsL, OpsH; |
2933 | const SDLoc &dl(Op); |
2934 | |
2935 | auto SplitVTNode = [&DAG, this](const VTSDNode *N) { |
2936 | MVT Ty = typeSplit(VecTy: N->getVT().getSimpleVT()).first; |
2937 | SDValue TV = DAG.getValueType(Ty); |
2938 | return std::make_pair(x&: TV, y&: TV); |
2939 | }; |
2940 | |
2941 | for (SDValue A : Op.getNode()->ops()) { |
2942 | auto [Lo, Hi] = |
2943 | ty(Op: A).isVector() ? opSplit(Vec: A, dl, DAG) : std::make_pair(x&: A, y&: A); |
2944 | // Special case for type operand. |
2945 | switch (Op.getOpcode()) { |
2946 | case ISD::SIGN_EXTEND_INREG: |
2947 | case HexagonISD::SSAT: |
2948 | case HexagonISD::USAT: |
2949 | if (const auto *N = dyn_cast<const VTSDNode>(Val: A.getNode())) |
2950 | std::tie(args&: Lo, args&: Hi) = SplitVTNode(N); |
2951 | break; |
2952 | } |
2953 | OpsL.push_back(Elt: Lo); |
2954 | OpsH.push_back(Elt: Hi); |
2955 | } |
2956 | |
2957 | MVT ResTy = ty(Op); |
2958 | MVT HalfTy = typeSplit(VecTy: ResTy).first; |
2959 | SDValue L = DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT: HalfTy, Ops: OpsL); |
2960 | SDValue H = DAG.getNode(Opcode: Op.getOpcode(), DL: dl, VT: HalfTy, Ops: OpsH); |
2961 | return {L, H}; |
2962 | } |
2963 | |
2964 | SDValue |
2965 | HexagonTargetLowering::SplitHvxMemOp(SDValue Op, SelectionDAG &DAG) const { |
2966 | auto *MemN = cast<MemSDNode>(Val: Op.getNode()); |
2967 | |
2968 | MVT MemTy = MemN->getMemoryVT().getSimpleVT(); |
2969 | if (!isHvxPairTy(Ty: MemTy)) |
2970 | return Op; |
2971 | |
2972 | const SDLoc &dl(Op); |
2973 | unsigned HwLen = Subtarget.getVectorLength(); |
2974 | MVT SingleTy = typeSplit(VecTy: MemTy).first; |
2975 | SDValue Chain = MemN->getChain(); |
2976 | SDValue Base0 = MemN->getBasePtr(); |
2977 | SDValue Base1 = |
2978 | DAG.getMemBasePlusOffset(Base: Base0, Offset: TypeSize::getFixed(ExactSize: HwLen), DL: dl); |
2979 | unsigned MemOpc = MemN->getOpcode(); |
2980 | |
2981 | MachineMemOperand *MOp0 = nullptr, *MOp1 = nullptr; |
2982 | if (MachineMemOperand *MMO = MemN->getMemOperand()) { |
2983 | MachineFunction &MF = DAG.getMachineFunction(); |
2984 | uint64_t MemSize = (MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE) |
2985 | ? (uint64_t)MemoryLocation::UnknownSize |
2986 | : HwLen; |
2987 | MOp0 = MF.getMachineMemOperand(MMO, Offset: 0, Size: MemSize); |
2988 | MOp1 = MF.getMachineMemOperand(MMO, Offset: HwLen, Size: MemSize); |
2989 | } |
2990 | |
2991 | if (MemOpc == ISD::LOAD) { |
2992 | assert(cast<LoadSDNode>(Op)->isUnindexed()); |
2993 | SDValue Load0 = DAG.getLoad(VT: SingleTy, dl, Chain, Ptr: Base0, MMO: MOp0); |
2994 | SDValue Load1 = DAG.getLoad(VT: SingleTy, dl, Chain, Ptr: Base1, MMO: MOp1); |
2995 | return DAG.getMergeValues( |
2996 | Ops: { DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: MemTy, N1: Load0, N2: Load1), |
2997 | DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, |
2998 | N1: Load0.getValue(R: 1), N2: Load1.getValue(R: 1)) }, dl); |
2999 | } |
3000 | if (MemOpc == ISD::STORE) { |
3001 | assert(cast<StoreSDNode>(Op)->isUnindexed()); |
3002 | VectorPair Vals = opSplit(Vec: cast<StoreSDNode>(Val&: Op)->getValue(), dl, DAG); |
3003 | SDValue Store0 = DAG.getStore(Chain, dl, Val: Vals.first, Ptr: Base0, MMO: MOp0); |
3004 | SDValue Store1 = DAG.getStore(Chain, dl, Val: Vals.second, Ptr: Base1, MMO: MOp1); |
3005 | return DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: Store0, N2: Store1); |
3006 | } |
3007 | |
3008 | assert(MemOpc == ISD::MLOAD || MemOpc == ISD::MSTORE); |
3009 | |
3010 | auto MaskN = cast<MaskedLoadStoreSDNode>(Val&: Op); |
3011 | assert(MaskN->isUnindexed()); |
3012 | VectorPair Masks = opSplit(Vec: MaskN->getMask(), dl, DAG); |
3013 | SDValue Offset = DAG.getUNDEF(VT: MVT::i32); |
3014 | |
3015 | if (MemOpc == ISD::MLOAD) { |
3016 | VectorPair Thru = |
3017 | opSplit(Vec: cast<MaskedLoadSDNode>(Val&: Op)->getPassThru(), dl, DAG); |
3018 | SDValue MLoad0 = |
3019 | DAG.getMaskedLoad(VT: SingleTy, dl, Chain, Base: Base0, Offset, Mask: Masks.first, |
3020 | Src0: Thru.first, MemVT: SingleTy, MMO: MOp0, AM: ISD::UNINDEXED, |
3021 | ISD::NON_EXTLOAD, IsExpanding: false); |
3022 | SDValue MLoad1 = |
3023 | DAG.getMaskedLoad(VT: SingleTy, dl, Chain, Base: Base1, Offset, Mask: Masks.second, |
3024 | Src0: Thru.second, MemVT: SingleTy, MMO: MOp1, AM: ISD::UNINDEXED, |
3025 | ISD::NON_EXTLOAD, IsExpanding: false); |
3026 | return DAG.getMergeValues( |
3027 | Ops: { DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: MemTy, N1: MLoad0, N2: MLoad1), |
3028 | DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, |
3029 | N1: MLoad0.getValue(R: 1), N2: MLoad1.getValue(R: 1)) }, dl); |
3030 | } |
3031 | if (MemOpc == ISD::MSTORE) { |
3032 | VectorPair Vals = opSplit(Vec: cast<MaskedStoreSDNode>(Val&: Op)->getValue(), dl, DAG); |
3033 | SDValue MStore0 = DAG.getMaskedStore(Chain, dl, Val: Vals.first, Base: Base0, Offset, |
3034 | Mask: Masks.first, MemVT: SingleTy, MMO: MOp0, |
3035 | AM: ISD::UNINDEXED, IsTruncating: false, IsCompressing: false); |
3036 | SDValue MStore1 = DAG.getMaskedStore(Chain, dl, Val: Vals.second, Base: Base1, Offset, |
3037 | Mask: Masks.second, MemVT: SingleTy, MMO: MOp1, |
3038 | AM: ISD::UNINDEXED, IsTruncating: false, IsCompressing: false); |
3039 | return DAG.getNode(Opcode: ISD::TokenFactor, DL: dl, VT: MVT::Other, N1: MStore0, N2: MStore1); |
3040 | } |
3041 | |
3042 | std::string Name = "Unexpected operation: " + Op->getOperationName(G: &DAG); |
3043 | llvm_unreachable(Name.c_str()); |
3044 | } |
3045 | |
3046 | SDValue |
3047 | HexagonTargetLowering::WidenHvxLoad(SDValue Op, SelectionDAG &DAG) const { |
3048 | const SDLoc &dl(Op); |
3049 | auto *LoadN = cast<LoadSDNode>(Val: Op.getNode()); |
3050 | assert(LoadN->isUnindexed() && "Not widening indexed loads yet" ); |
3051 | assert(LoadN->getMemoryVT().getVectorElementType() != MVT::i1 && |
3052 | "Not widening loads of i1 yet" ); |
3053 | |
3054 | SDValue Chain = LoadN->getChain(); |
3055 | SDValue Base = LoadN->getBasePtr(); |
3056 | SDValue Offset = DAG.getUNDEF(VT: MVT::i32); |
3057 | |
3058 | MVT ResTy = ty(Op); |
3059 | unsigned HwLen = Subtarget.getVectorLength(); |
3060 | unsigned ResLen = ResTy.getStoreSize(); |
3061 | assert(ResLen < HwLen && "vsetq(v1) prerequisite" ); |
3062 | |
3063 | MVT BoolTy = MVT::getVectorVT(VT: MVT::i1, NumElements: HwLen); |
3064 | SDValue Mask = getInstr(MachineOpc: Hexagon::V6_pred_scalar2, dl, Ty: BoolTy, |
3065 | Ops: {DAG.getConstant(Val: ResLen, DL: dl, VT: MVT::i32)}, DAG); |
3066 | |
3067 | MVT LoadTy = MVT::getVectorVT(VT: MVT::i8, NumElements: HwLen); |
3068 | MachineFunction &MF = DAG.getMachineFunction(); |
3069 | auto *MemOp = MF.getMachineMemOperand(MMO: LoadN->getMemOperand(), Offset: 0, Size: HwLen); |
3070 | |
3071 | SDValue Load = DAG.getMaskedLoad(VT: LoadTy, dl, Chain, Base, Offset, Mask, |
3072 | Src0: DAG.getUNDEF(VT: LoadTy), MemVT: LoadTy, MMO: MemOp, |
3073 | AM: ISD::UNINDEXED, ISD::NON_EXTLOAD, IsExpanding: false); |
3074 | SDValue Value = opCastElem(Vec: Load, ElemTy: ResTy.getVectorElementType(), DAG); |
3075 | return DAG.getMergeValues(Ops: {Value, Load.getValue(R: 1)}, dl); |
3076 | } |
3077 | |
3078 | SDValue |
3079 | HexagonTargetLowering::WidenHvxStore(SDValue Op, SelectionDAG &DAG) const { |
3080 | const SDLoc &dl(Op); |
3081 | auto *StoreN = cast<StoreSDNode>(Val: Op.getNode()); |
3082 | assert(StoreN->isUnindexed() && "Not widening indexed stores yet" ); |
3083 | assert(StoreN->getMemoryVT().getVectorElementType() != MVT::i1 && |
3084 | "Not widening stores of i1 yet" ); |
3085 | |
3086 | SDValue Chain = StoreN->getChain(); |
3087 | SDValue Base = StoreN->getBasePtr(); |
3088 | SDValue Offset = DAG.getUNDEF(VT: MVT::i32); |
3089 | |
3090 | SDValue Value = opCastElem(Vec: StoreN->getValue(), ElemTy: MVT::i8, DAG); |
3091 | MVT ValueTy = ty(Op: Value); |
3092 | unsigned ValueLen = ValueTy.getVectorNumElements(); |
3093 | unsigned HwLen = Subtarget.getVectorLength(); |
3094 | assert(isPowerOf2_32(ValueLen)); |
3095 | |
3096 | for (unsigned Len = ValueLen; Len < HwLen; ) { |
3097 | Value = opJoin(Ops: {Value, DAG.getUNDEF(VT: ty(Op: Value))}, dl, DAG); |
3098 | Len = ty(Op: Value).getVectorNumElements(); // This is Len *= 2 |
3099 | } |
3100 | assert(ty(Value).getVectorNumElements() == HwLen); // Paranoia |
3101 | |
3102 | assert(ValueLen < HwLen && "vsetq(v1) prerequisite" ); |
3103 | MVT BoolTy = MVT::getVectorVT(VT: MVT::i1, NumElements: HwLen); |
3104 | SDValue Mask = getInstr(MachineOpc: Hexagon::V6_pred_scalar2, dl, Ty: BoolTy, |
3105 | Ops: {DAG.getConstant(Val: ValueLen, DL: dl, VT: MVT::i32)}, DAG); |
3106 | MachineFunction &MF = DAG.getMachineFunction(); |
3107 | auto *MemOp = MF.getMachineMemOperand(MMO: StoreN->getMemOperand(), Offset: 0, Size: HwLen); |
3108 | return DAG.getMaskedStore(Chain, dl, Val: Value, Base, Offset, Mask, MemVT: ty(Op: Value), |
3109 | MMO: MemOp, AM: ISD::UNINDEXED, IsTruncating: false, IsCompressing: false); |
3110 | } |
3111 | |
3112 | SDValue |
3113 | HexagonTargetLowering::WidenHvxSetCC(SDValue Op, SelectionDAG &DAG) const { |
3114 | const SDLoc &dl(Op); |
3115 | SDValue Op0 = Op.getOperand(i: 0), Op1 = Op.getOperand(i: 1); |
3116 | MVT ElemTy = ty(Op: Op0).getVectorElementType(); |
3117 | unsigned HwLen = Subtarget.getVectorLength(); |
3118 | |
3119 | unsigned WideOpLen = (8 * HwLen) / ElemTy.getSizeInBits(); |
3120 | assert(WideOpLen * ElemTy.getSizeInBits() == 8 * HwLen); |
3121 | MVT WideOpTy = MVT::getVectorVT(VT: ElemTy, NumElements: WideOpLen); |
3122 | if (!Subtarget.isHVXVectorType(VecTy: WideOpTy, IncludeBool: true)) |
3123 | return SDValue(); |
3124 | |
3125 | SDValue WideOp0 = appendUndef(Val: Op0, ResTy: WideOpTy, DAG); |
3126 | SDValue WideOp1 = appendUndef(Val: Op1, ResTy: WideOpTy, DAG); |
3127 | EVT ResTy = |
3128 | getSetCCResultType(DAG.getDataLayout(), C&: *DAG.getContext(), VT: WideOpTy); |
3129 | SDValue SetCC = DAG.getNode(Opcode: ISD::SETCC, DL: dl, VT: ResTy, |
3130 | Ops: {WideOp0, WideOp1, Op.getOperand(i: 2)}); |
3131 | |
3132 | EVT RetTy = typeLegalize(Ty: ty(Op), DAG); |
3133 | return DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL: dl, VT: RetTy, |
3134 | Ops: {SetCC, getZero(dl, Ty: MVT::i32, DAG)}); |
3135 | } |
3136 | |
3137 | SDValue |
3138 | HexagonTargetLowering::LowerHvxOperation(SDValue Op, SelectionDAG &DAG) const { |
3139 | unsigned Opc = Op.getOpcode(); |
3140 | bool IsPairOp = isHvxPairTy(Ty: ty(Op)) || |
3141 | llvm::any_of(Range: Op.getNode()->ops(), P: [this] (SDValue V) { |
3142 | return isHvxPairTy(Ty: ty(Op: V)); |
3143 | }); |
3144 | |
3145 | if (IsPairOp) { |
3146 | switch (Opc) { |
3147 | default: |
3148 | break; |
3149 | case ISD::LOAD: |
3150 | case ISD::STORE: |
3151 | case ISD::MLOAD: |
3152 | case ISD::MSTORE: |
3153 | return SplitHvxMemOp(Op, DAG); |
3154 | case ISD::SINT_TO_FP: |
3155 | case ISD::UINT_TO_FP: |
3156 | case ISD::FP_TO_SINT: |
3157 | case ISD::FP_TO_UINT: |
3158 | if (ty(Op).getSizeInBits() == ty(Op: Op.getOperand(i: 0)).getSizeInBits()) |
3159 | return opJoin(Ops: SplitVectorOp(Op, DAG), dl: SDLoc(Op), DAG); |
3160 | break; |
3161 | case ISD::ABS: |
3162 | case ISD::CTPOP: |
3163 | case ISD::CTLZ: |
3164 | case ISD::CTTZ: |
3165 | case ISD::MUL: |
3166 | case ISD::FADD: |
3167 | case ISD::FSUB: |
3168 | case ISD::FMUL: |
3169 | case ISD::FMINNUM: |
3170 | case ISD::FMAXNUM: |
3171 | case ISD::MULHS: |
3172 | case ISD::MULHU: |
3173 | case ISD::AND: |
3174 | case ISD::OR: |
3175 | case ISD::XOR: |
3176 | case ISD::SRA: |
3177 | case ISD::SHL: |
3178 | case ISD::SRL: |
3179 | case ISD::FSHL: |
3180 | case ISD::FSHR: |
3181 | case ISD::SMIN: |
3182 | case ISD::SMAX: |
3183 | case ISD::UMIN: |
3184 | case ISD::UMAX: |
3185 | case ISD::SETCC: |
3186 | case ISD::VSELECT: |
3187 | case ISD::SIGN_EXTEND_INREG: |
3188 | case ISD::SPLAT_VECTOR: |
3189 | return opJoin(Ops: SplitVectorOp(Op, DAG), dl: SDLoc(Op), DAG); |
3190 | case ISD::SIGN_EXTEND: |
3191 | case ISD::ZERO_EXTEND: |
3192 | // In general, sign- and zero-extends can't be split and still |
3193 | // be legal. The only exception is extending bool vectors. |
3194 | if (ty(Op: Op.getOperand(i: 0)).getVectorElementType() == MVT::i1) |
3195 | return opJoin(Ops: SplitVectorOp(Op, DAG), dl: SDLoc(Op), DAG); |
3196 | break; |
3197 | } |
3198 | } |
3199 | |
3200 | switch (Opc) { |
3201 | default: |
3202 | break; |
3203 | case ISD::BUILD_VECTOR: return LowerHvxBuildVector(Op, DAG); |
3204 | case ISD::SPLAT_VECTOR: return LowerHvxSplatVector(Op, DAG); |
3205 | case ISD::CONCAT_VECTORS: return LowerHvxConcatVectors(Op, DAG); |
3206 | case ISD::INSERT_SUBVECTOR: return LowerHvxInsertSubvector(Op, DAG); |
3207 | case ISD::INSERT_VECTOR_ELT: return LowerHvxInsertElement(Op, DAG); |
3208 | case ISD::EXTRACT_SUBVECTOR: return LowerHvxExtractSubvector(Op, DAG); |
3209 | case ISD::EXTRACT_VECTOR_ELT: return LowerHvxExtractElement(Op, DAG); |
3210 | case ISD::BITCAST: return LowerHvxBitcast(Op, DAG); |
3211 | case ISD::ANY_EXTEND: return LowerHvxAnyExt(Op, DAG); |
3212 | case ISD::SIGN_EXTEND: return LowerHvxSignExt(Op, DAG); |
3213 | case ISD::ZERO_EXTEND: return LowerHvxZeroExt(Op, DAG); |
3214 | case ISD::CTTZ: return LowerHvxCttz(Op, DAG); |
3215 | case ISD::SELECT: return LowerHvxSelect(Op, DAG); |
3216 | case ISD::SRA: |
3217 | case ISD::SHL: |
3218 | case ISD::SRL: return LowerHvxShift(Op, DAG); |
3219 | case ISD::FSHL: |
3220 | case ISD::FSHR: return LowerHvxFunnelShift(Op, DAG); |
3221 | case ISD::MULHS: |
3222 | case ISD::MULHU: return LowerHvxMulh(Op, DAG); |
3223 | case ISD::SMUL_LOHI: |
3224 | case ISD::UMUL_LOHI: return LowerHvxMulLoHi(Op, DAG); |
3225 | case ISD::ANY_EXTEND_VECTOR_INREG: return LowerHvxExtend(Op, DAG); |
3226 | case ISD::SETCC: |
3227 | case ISD::INTRINSIC_VOID: return Op; |
3228 | case ISD::INTRINSIC_WO_CHAIN: return LowerHvxIntrinsic(Op, DAG); |
3229 | case ISD::MLOAD: |
3230 | case ISD::MSTORE: return LowerHvxMaskedOp(Op, DAG); |
3231 | // Unaligned loads will be handled by the default lowering. |
3232 | case ISD::LOAD: return SDValue(); |
3233 | case ISD::FP_EXTEND: return LowerHvxFpExtend(Op, DAG); |
3234 | case ISD::FP_TO_SINT: |
3235 | case ISD::FP_TO_UINT: return LowerHvxFpToInt(Op, DAG); |
3236 | case ISD::SINT_TO_FP: |
3237 | case ISD::UINT_TO_FP: return LowerHvxIntToFp(Op, DAG); |
3238 | |
3239 | // Special nodes: |
3240 | case HexagonISD::SMUL_LOHI: |
3241 | case HexagonISD::UMUL_LOHI: |
3242 | case HexagonISD::USMUL_LOHI: return LowerHvxMulLoHi(Op, DAG); |
3243 | } |
3244 | #ifndef NDEBUG |
3245 | Op.dumpr(&DAG); |
3246 | #endif |
3247 | llvm_unreachable("Unhandled HVX operation" ); |
3248 | } |
3249 | |
3250 | SDValue |
3251 | HexagonTargetLowering::ExpandHvxResizeIntoSteps(SDValue Op, SelectionDAG &DAG) |
3252 | const { |
3253 | // Rewrite the extension/truncation/saturation op into steps where each |
3254 | // step changes the type widths by a factor of 2. |
3255 | // E.g. i8 -> i16 remains unchanged, but i8 -> i32 ==> i8 -> i16 -> i32. |
3256 | // |
3257 | // Some of the vector types in Op may not be legal. |
3258 | |
3259 | unsigned Opc = Op.getOpcode(); |
3260 | switch (Opc) { |
3261 | case HexagonISD::SSAT: |
3262 | case HexagonISD::USAT: |
3263 | case HexagonISD::TL_EXTEND: |
3264 | case HexagonISD::TL_TRUNCATE: |
3265 | break; |
3266 | case ISD::ANY_EXTEND: |
3267 | case ISD::ZERO_EXTEND: |
3268 | case ISD::SIGN_EXTEND: |
3269 | case ISD::TRUNCATE: |
3270 | llvm_unreachable("ISD:: ops will be auto-folded" ); |
3271 | break; |
3272 | #ifndef NDEBUG |
3273 | Op.dump(&DAG); |
3274 | #endif |
3275 | llvm_unreachable("Unexpected operation" ); |
3276 | } |
3277 | |
3278 | SDValue Inp = Op.getOperand(i: 0); |
3279 | MVT InpTy = ty(Op: Inp); |
3280 | MVT ResTy = ty(Op); |
3281 | |
3282 | unsigned InpWidth = InpTy.getVectorElementType().getSizeInBits(); |
3283 | unsigned ResWidth = ResTy.getVectorElementType().getSizeInBits(); |
3284 | assert(InpWidth != ResWidth); |
3285 | |
3286 | if (InpWidth == 2 * ResWidth || ResWidth == 2 * InpWidth) |
3287 | return Op; |
3288 | |
3289 | const SDLoc &dl(Op); |
3290 | unsigned NumElems = InpTy.getVectorNumElements(); |
3291 | assert(NumElems == ResTy.getVectorNumElements()); |
3292 | |
3293 | auto repeatOp = [&](unsigned NewWidth, SDValue Arg) { |
3294 | MVT Ty = MVT::getVectorVT(VT: MVT::getIntegerVT(BitWidth: NewWidth), NumElements: NumElems); |
3295 | switch (Opc) { |
3296 | case HexagonISD::SSAT: |
3297 | case HexagonISD::USAT: |
3298 | return DAG.getNode(Opcode: Opc, DL: dl, VT: Ty, Ops: {Arg, DAG.getValueType(Ty)}); |
3299 | case HexagonISD::TL_EXTEND: |
3300 | case HexagonISD::TL_TRUNCATE: |
3301 | return DAG.getNode(Opcode: Opc, DL: dl, VT: Ty, Ops: {Arg, Op.getOperand(i: 1), Op.getOperand(i: 2)}); |
3302 | default: |
3303 | llvm_unreachable("Unexpected opcode" ); |
3304 | } |
3305 | }; |
3306 | |
3307 | SDValue S = Inp; |
3308 | if (InpWidth < ResWidth) { |
3309 | assert(ResWidth % InpWidth == 0 && isPowerOf2_32(ResWidth / InpWidth)); |
3310 | while (InpWidth * 2 <= ResWidth) |
3311 | S = repeatOp(InpWidth *= 2, S); |
3312 | } else { |
3313 | // InpWidth > ResWidth |
3314 | assert(InpWidth % ResWidth == 0 && isPowerOf2_32(InpWidth / ResWidth)); |
3315 | while (InpWidth / 2 >= ResWidth) |
3316 | S = repeatOp(InpWidth /= 2, S); |
3317 | } |
3318 | return S; |
3319 | } |
3320 | |
3321 | SDValue |
3322 | HexagonTargetLowering::LegalizeHvxResize(SDValue Op, SelectionDAG &DAG) const { |
3323 | SDValue Inp0 = Op.getOperand(i: 0); |
3324 | MVT InpTy = ty(Op: Inp0); |
3325 | MVT ResTy = ty(Op); |
3326 | unsigned InpWidth = InpTy.getSizeInBits(); |
3327 | unsigned ResWidth = ResTy.getSizeInBits(); |
3328 | unsigned Opc = Op.getOpcode(); |
3329 | |
3330 | if (shouldWidenToHvx(Ty: InpTy, DAG) || shouldWidenToHvx(Ty: ResTy, DAG)) { |
3331 | // First, make sure that the narrower type is widened to HVX. |
3332 | // This may cause the result to be wider than what the legalizer |
3333 | // expects, so insert EXTRACT_SUBVECTOR to bring it back to the |
3334 | // desired type. |
3335 | auto [WInpTy, WResTy] = |
3336 | InpWidth < ResWidth ? typeWidenToWider(Ty0: typeWidenToHvx(Ty: InpTy), Ty1: ResTy) |
3337 | : typeWidenToWider(Ty0: InpTy, Ty1: typeWidenToHvx(Ty: ResTy)); |
3338 | SDValue W = appendUndef(Val: Inp0, ResTy: WInpTy, DAG); |
3339 | SDValue S; |
3340 | if (Opc == HexagonISD::TL_EXTEND || Opc == HexagonISD::TL_TRUNCATE) { |
3341 | S = DAG.getNode(Opcode: Opc, DL: SDLoc(Op), VT: WResTy, N1: W, N2: Op.getOperand(i: 1), |
3342 | N3: Op.getOperand(i: 2)); |
3343 | } else { |
3344 | S = DAG.getNode(Opcode: Opc, DL: SDLoc(Op), VT: WResTy, N1: W, N2: DAG.getValueType(WResTy)); |
3345 | } |
3346 | SDValue T = ExpandHvxResizeIntoSteps(Op: S, DAG); |
3347 | return extractSubvector(Vec: T, SubTy: typeLegalize(Ty: ResTy, DAG), SubIdx: 0, DAG); |
3348 | } else if (shouldSplitToHvx(Ty: InpWidth < ResWidth ? ResTy : InpTy, DAG)) { |
3349 | return opJoin(Ops: SplitVectorOp(Op, DAG), dl: SDLoc(Op), DAG); |
3350 | } else { |
3351 | assert(isTypeLegal(InpTy) && isTypeLegal(ResTy)); |
3352 | return RemoveTLWrapper(Op, DAG); |
3353 | } |
3354 | llvm_unreachable("Unexpected situation" ); |
3355 | } |
3356 | |
3357 | void |
3358 | HexagonTargetLowering::LowerHvxOperationWrapper(SDNode *N, |
3359 | SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { |
3360 | unsigned Opc = N->getOpcode(); |
3361 | SDValue Op(N, 0); |
3362 | SDValue Inp0; // Optional first argument. |
3363 | if (N->getNumOperands() > 0) |
3364 | Inp0 = Op.getOperand(i: 0); |
3365 | |
3366 | switch (Opc) { |
3367 | case ISD::ANY_EXTEND: |
3368 | case ISD::SIGN_EXTEND: |
3369 | case ISD::ZERO_EXTEND: |
3370 | case ISD::TRUNCATE: |
3371 | if (Subtarget.isHVXElementType(Ty: ty(Op)) && |
3372 | Subtarget.isHVXElementType(Ty: ty(Op: Inp0))) { |
3373 | Results.push_back(Elt: CreateTLWrapper(Op, DAG)); |
3374 | } |
3375 | break; |
3376 | case ISD::SETCC: |
3377 | if (shouldWidenToHvx(Ty: ty(Op: Inp0), DAG)) { |
3378 | if (SDValue T = WidenHvxSetCC(Op, DAG)) |
3379 | Results.push_back(Elt: T); |
3380 | } |
3381 | break; |
3382 | case ISD::STORE: { |
3383 | if (shouldWidenToHvx(Ty: ty(Op: cast<StoreSDNode>(Val: N)->getValue()), DAG)) { |
3384 | SDValue Store = WidenHvxStore(Op, DAG); |
3385 | Results.push_back(Elt: Store); |
3386 | } |
3387 | break; |
3388 | } |
3389 | case ISD::MLOAD: |
3390 | if (isHvxPairTy(Ty: ty(Op))) { |
3391 | SDValue S = SplitHvxMemOp(Op, DAG); |
3392 | assert(S->getOpcode() == ISD::MERGE_VALUES); |
3393 | Results.push_back(Elt: S.getOperand(i: 0)); |
3394 | Results.push_back(Elt: S.getOperand(i: 1)); |
3395 | } |
3396 | break; |
3397 | case ISD::MSTORE: |
3398 | if (isHvxPairTy(Ty: ty(Op: Op->getOperand(Num: 1)))) { // Stored value |
3399 | SDValue S = SplitHvxMemOp(Op, DAG); |
3400 | Results.push_back(Elt: S); |
3401 | } |
3402 | break; |
3403 | case ISD::SINT_TO_FP: |
3404 | case ISD::UINT_TO_FP: |
3405 | case ISD::FP_TO_SINT: |
3406 | case ISD::FP_TO_UINT: |
3407 | if (ty(Op).getSizeInBits() != ty(Op: Inp0).getSizeInBits()) { |
3408 | SDValue T = EqualizeFpIntConversion(Op, DAG); |
3409 | Results.push_back(Elt: T); |
3410 | } |
3411 | break; |
3412 | case HexagonISD::SSAT: |
3413 | case HexagonISD::USAT: |
3414 | case HexagonISD::TL_EXTEND: |
3415 | case HexagonISD::TL_TRUNCATE: |
3416 | Results.push_back(Elt: LegalizeHvxResize(Op, DAG)); |
3417 | break; |
3418 | default: |
3419 | break; |
3420 | } |
3421 | } |
3422 | |
3423 | void |
3424 | HexagonTargetLowering::ReplaceHvxNodeResults(SDNode *N, |
3425 | SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const { |
3426 | unsigned Opc = N->getOpcode(); |
3427 | SDValue Op(N, 0); |
3428 | SDValue Inp0; // Optional first argument. |
3429 | if (N->getNumOperands() > 0) |
3430 | Inp0 = Op.getOperand(i: 0); |
3431 | |
3432 | switch (Opc) { |
3433 | case ISD::ANY_EXTEND: |
3434 | case ISD::SIGN_EXTEND: |
3435 | case ISD::ZERO_EXTEND: |
3436 | case ISD::TRUNCATE: |
3437 | if (Subtarget.isHVXElementType(Ty: ty(Op)) && |
3438 | Subtarget.isHVXElementType(Ty: ty(Op: Inp0))) { |
3439 | Results.push_back(Elt: CreateTLWrapper(Op, DAG)); |
3440 | } |
3441 | break; |
3442 | case ISD::SETCC: |
3443 | if (shouldWidenToHvx(Ty: ty(Op), DAG)) { |
3444 | if (SDValue T = WidenHvxSetCC(Op, DAG)) |
3445 | Results.push_back(Elt: T); |
3446 | } |
3447 | break; |
3448 | case ISD::LOAD: { |
3449 | if (shouldWidenToHvx(Ty: ty(Op), DAG)) { |
3450 | SDValue Load = WidenHvxLoad(Op, DAG); |
3451 | assert(Load->getOpcode() == ISD::MERGE_VALUES); |
3452 | Results.push_back(Elt: Load.getOperand(i: 0)); |
3453 | Results.push_back(Elt: Load.getOperand(i: 1)); |
3454 | } |
3455 | break; |
3456 | } |
3457 | case ISD::BITCAST: |
3458 | if (isHvxBoolTy(Ty: ty(Op: Inp0))) { |
3459 | SDValue C = LowerHvxBitcast(Op, DAG); |
3460 | Results.push_back(Elt: C); |
3461 | } |
3462 | break; |
3463 | case ISD::FP_TO_SINT: |
3464 | case ISD::FP_TO_UINT: |
3465 | if (ty(Op).getSizeInBits() != ty(Op: Inp0).getSizeInBits()) { |
3466 | SDValue T = EqualizeFpIntConversion(Op, DAG); |
3467 | Results.push_back(Elt: T); |
3468 | } |
3469 | break; |
3470 | case HexagonISD::SSAT: |
3471 | case HexagonISD::USAT: |
3472 | case HexagonISD::TL_EXTEND: |
3473 | case HexagonISD::TL_TRUNCATE: |
3474 | Results.push_back(Elt: LegalizeHvxResize(Op, DAG)); |
3475 | break; |
3476 | default: |
3477 | break; |
3478 | } |
3479 | } |
3480 | |
3481 | SDValue |
3482 | HexagonTargetLowering::combineTruncateBeforeLegal(SDValue Op, |
3483 | DAGCombinerInfo &DCI) const { |
3484 | // Simplify V:v2NiB --(bitcast)--> vNi2B --(truncate)--> vNiB |
3485 | // to extract-subvector (shuffle V, pick even, pick odd) |
3486 | |
3487 | assert(Op.getOpcode() == ISD::TRUNCATE); |
3488 | SelectionDAG &DAG = DCI.DAG; |
3489 | const SDLoc &dl(Op); |
3490 | |
3491 | if (Op.getOperand(i: 0).getOpcode() == ISD::BITCAST) |
3492 | return SDValue(); |
3493 | SDValue Cast = Op.getOperand(i: 0); |
3494 | SDValue Src = Cast.getOperand(i: 0); |
3495 | |
3496 | EVT TruncTy = Op.getValueType(); |
3497 | EVT CastTy = Cast.getValueType(); |
3498 | EVT SrcTy = Src.getValueType(); |
3499 | if (SrcTy.isSimple()) |
3500 | return SDValue(); |
3501 | if (SrcTy.getVectorElementType() != TruncTy.getVectorElementType()) |
3502 | return SDValue(); |
3503 | unsigned SrcLen = SrcTy.getVectorNumElements(); |
3504 | unsigned CastLen = CastTy.getVectorNumElements(); |
3505 | if (2 * CastLen != SrcLen) |
3506 | return SDValue(); |
3507 | |
3508 | SmallVector<int, 128> Mask(SrcLen); |
3509 | for (int i = 0; i != static_cast<int>(CastLen); ++i) { |
3510 | Mask[i] = 2 * i; |
3511 | Mask[i + CastLen] = 2 * i + 1; |
3512 | } |
3513 | SDValue Deal = |
3514 | DAG.getVectorShuffle(VT: SrcTy, dl, N1: Src, N2: DAG.getUNDEF(VT: SrcTy), Mask); |
3515 | return opSplit(Vec: Deal, dl, DAG).first; |
3516 | } |
3517 | |
3518 | SDValue |
3519 | HexagonTargetLowering::combineConcatVectorsBeforeLegal( |
3520 | SDValue Op, DAGCombinerInfo &DCI) const { |
3521 | // Fold |
3522 | // concat (shuffle x, y, m1), (shuffle x, y, m2) |
3523 | // into |
3524 | // shuffle (concat x, y), undef, m3 |
3525 | if (Op.getNumOperands() != 2) |
3526 | return SDValue(); |
3527 | |
3528 | SelectionDAG &DAG = DCI.DAG; |
3529 | const SDLoc &dl(Op); |
3530 | SDValue V0 = Op.getOperand(i: 0); |
3531 | SDValue V1 = Op.getOperand(i: 1); |
3532 | |
3533 | if (V0.getOpcode() != ISD::VECTOR_SHUFFLE) |
3534 | return SDValue(); |
3535 | if (V1.getOpcode() != ISD::VECTOR_SHUFFLE) |
3536 | return SDValue(); |
3537 | |
3538 | SetVector<SDValue> Order; |
3539 | Order.insert(X: V0.getOperand(i: 0)); |
3540 | Order.insert(X: V0.getOperand(i: 1)); |
3541 | Order.insert(X: V1.getOperand(i: 0)); |
3542 | Order.insert(X: V1.getOperand(i: 1)); |
3543 | |
3544 | if (Order.size() > 2) |
3545 | return SDValue(); |
3546 | |
3547 | // In ISD::VECTOR_SHUFFLE, the types of each input and the type of the |
3548 | // result must be the same. |
3549 | EVT InpTy = V0.getValueType(); |
3550 | assert(InpTy.isVector()); |
3551 | unsigned InpLen = InpTy.getVectorNumElements(); |
3552 | |
3553 | SmallVector<int, 128> LongMask; |
3554 | auto AppendToMask = [&](SDValue Shuffle) { |
3555 | auto *SV = cast<ShuffleVectorSDNode>(Val: Shuffle.getNode()); |
3556 | ArrayRef<int> Mask = SV->getMask(); |
3557 | SDValue X = Shuffle.getOperand(i: 0); |
3558 | SDValue Y = Shuffle.getOperand(i: 1); |
3559 | for (int M : Mask) { |
3560 | if (M == -1) { |
3561 | LongMask.push_back(Elt: M); |
3562 | continue; |
3563 | } |
3564 | SDValue Src = static_cast<unsigned>(M) < InpLen ? X : Y; |
3565 | if (static_cast<unsigned>(M) >= InpLen) |
3566 | M -= InpLen; |
3567 | |
3568 | int OutOffset = Order[0] == Src ? 0 : InpLen; |
3569 | LongMask.push_back(Elt: M + OutOffset); |
3570 | } |
3571 | }; |
3572 | |
3573 | AppendToMask(V0); |
3574 | AppendToMask(V1); |
3575 | |
3576 | SDValue C0 = Order.front(); |
3577 | SDValue C1 = Order.back(); // Can be same as front |
3578 | EVT LongTy = InpTy.getDoubleNumVectorElementsVT(Context&: *DAG.getContext()); |
3579 | |
3580 | SDValue Cat = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL: dl, VT: LongTy, Ops: {C0, C1}); |
3581 | return DAG.getVectorShuffle(VT: LongTy, dl, N1: Cat, N2: DAG.getUNDEF(VT: LongTy), Mask: LongMask); |
3582 | } |
3583 | |
3584 | SDValue |
3585 | HexagonTargetLowering::PerformHvxDAGCombine(SDNode *N, DAGCombinerInfo &DCI) |
3586 | const { |
3587 | const SDLoc &dl(N); |
3588 | SelectionDAG &DAG = DCI.DAG; |
3589 | SDValue Op(N, 0); |
3590 | unsigned Opc = Op.getOpcode(); |
3591 | |
3592 | SmallVector<SDValue, 4> Ops(N->ops().begin(), N->ops().end()); |
3593 | |
3594 | if (Opc == ISD::TRUNCATE) |
3595 | return combineTruncateBeforeLegal(Op, DCI); |
3596 | if (Opc == ISD::CONCAT_VECTORS) |
3597 | return combineConcatVectorsBeforeLegal(Op, DCI); |
3598 | |
3599 | if (DCI.isBeforeLegalizeOps()) |
3600 | return SDValue(); |
3601 | |
3602 | switch (Opc) { |
3603 | case ISD::VSELECT: { |
3604 | // (vselect (xor x, qtrue), v0, v1) -> (vselect x, v1, v0) |
3605 | SDValue Cond = Ops[0]; |
3606 | if (Cond->getOpcode() == ISD::XOR) { |
3607 | SDValue C0 = Cond.getOperand(i: 0), C1 = Cond.getOperand(i: 1); |
3608 | if (C1->getOpcode() == HexagonISD::QTRUE) |
3609 | return DAG.getNode(Opcode: ISD::VSELECT, DL: dl, VT: ty(Op), N1: C0, N2: Ops[2], N3: Ops[1]); |
3610 | } |
3611 | break; |
3612 | } |
3613 | case HexagonISD::V2Q: |
3614 | if (Ops[0].getOpcode() == ISD::SPLAT_VECTOR) { |
3615 | if (const auto *C = dyn_cast<ConstantSDNode>(Val: Ops[0].getOperand(i: 0))) |
3616 | return C->isZero() ? DAG.getNode(Opcode: HexagonISD::QFALSE, DL: dl, VT: ty(Op)) |
3617 | : DAG.getNode(Opcode: HexagonISD::QTRUE, DL: dl, VT: ty(Op)); |
3618 | } |
3619 | break; |
3620 | case HexagonISD::Q2V: |
3621 | if (Ops[0].getOpcode() == HexagonISD::QTRUE) |
3622 | return DAG.getNode(Opcode: ISD::SPLAT_VECTOR, DL: dl, VT: ty(Op), |
3623 | Operand: DAG.getConstant(Val: -1, DL: dl, VT: MVT::i32)); |
3624 | if (Ops[0].getOpcode() == HexagonISD::QFALSE) |
3625 | return getZero(dl, Ty: ty(Op), DAG); |
3626 | break; |
3627 | case HexagonISD::VINSERTW0: |
3628 | if (isUndef(Op: Ops[1])) |
3629 | return Ops[0]; |
3630 | break; |
3631 | case HexagonISD::VROR: { |
3632 | if (Ops[0].getOpcode() == HexagonISD::VROR) { |
3633 | SDValue Vec = Ops[0].getOperand(i: 0); |
3634 | SDValue Rot0 = Ops[1], Rot1 = Ops[0].getOperand(i: 1); |
3635 | SDValue Rot = DAG.getNode(Opcode: ISD::ADD, DL: dl, VT: ty(Op: Rot0), Ops: {Rot0, Rot1}); |
3636 | return DAG.getNode(Opcode: HexagonISD::VROR, DL: dl, VT: ty(Op), Ops: {Vec, Rot}); |
3637 | } |
3638 | break; |
3639 | } |
3640 | } |
3641 | |
3642 | return SDValue(); |
3643 | } |
3644 | |
3645 | bool |
3646 | HexagonTargetLowering::shouldSplitToHvx(MVT Ty, SelectionDAG &DAG) const { |
3647 | if (Subtarget.isHVXVectorType(VecTy: Ty, IncludeBool: true)) |
3648 | return false; |
3649 | auto Action = getPreferredHvxVectorAction(VecTy: Ty); |
3650 | if (Action == TargetLoweringBase::TypeSplitVector) |
3651 | return Subtarget.isHVXVectorType(VecTy: typeLegalize(Ty, DAG), IncludeBool: true); |
3652 | return false; |
3653 | } |
3654 | |
3655 | bool |
3656 | HexagonTargetLowering::shouldWidenToHvx(MVT Ty, SelectionDAG &DAG) const { |
3657 | if (Subtarget.isHVXVectorType(VecTy: Ty, IncludeBool: true)) |
3658 | return false; |
3659 | auto Action = getPreferredHvxVectorAction(VecTy: Ty); |
3660 | if (Action == TargetLoweringBase::TypeWidenVector) |
3661 | return Subtarget.isHVXVectorType(VecTy: typeLegalize(Ty, DAG), IncludeBool: true); |
3662 | return false; |
3663 | } |
3664 | |
3665 | bool |
3666 | HexagonTargetLowering::isHvxOperation(SDNode *N, SelectionDAG &DAG) const { |
3667 | if (!Subtarget.useHVXOps()) |
3668 | return false; |
3669 | // If the type of any result, or any operand type are HVX vector types, |
3670 | // this is an HVX operation. |
3671 | auto IsHvxTy = [this](EVT Ty) { |
3672 | return Ty.isSimple() && Subtarget.isHVXVectorType(VecTy: Ty.getSimpleVT(), IncludeBool: true); |
3673 | }; |
3674 | auto IsHvxOp = [this](SDValue Op) { |
3675 | return Op.getValueType().isSimple() && |
3676 | Subtarget.isHVXVectorType(VecTy: ty(Op), IncludeBool: true); |
3677 | }; |
3678 | if (llvm::any_of(Range: N->values(), P: IsHvxTy) || llvm::any_of(Range: N->ops(), P: IsHvxOp)) |
3679 | return true; |
3680 | |
3681 | // Check if this could be an HVX operation after type widening. |
3682 | auto IsWidenedToHvx = [this, &DAG](SDValue Op) { |
3683 | if (!Op.getValueType().isSimple()) |
3684 | return false; |
3685 | MVT ValTy = ty(Op); |
3686 | return ValTy.isVector() && shouldWidenToHvx(Ty: ValTy, DAG); |
3687 | }; |
3688 | |
3689 | for (int i = 0, e = N->getNumValues(); i != e; ++i) { |
3690 | if (IsWidenedToHvx(SDValue(N, i))) |
3691 | return true; |
3692 | } |
3693 | return llvm::any_of(Range: N->ops(), P: IsWidenedToHvx); |
3694 | } |
3695 | |