1//=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8///
9/// \file
10/// This file implements the WebAssemblyTargetLowering class.
11///
12//===----------------------------------------------------------------------===//
13
14#include "WebAssemblyISelLowering.h"
15#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
16#include "Utils/WebAssemblyTypeUtilities.h"
17#include "WebAssemblyMachineFunctionInfo.h"
18#include "WebAssemblySubtarget.h"
19#include "WebAssemblyTargetMachine.h"
20#include "WebAssemblyUtilities.h"
21#include "llvm/CodeGen/CallingConvLower.h"
22#include "llvm/CodeGen/MachineFrameInfo.h"
23#include "llvm/CodeGen/MachineInstrBuilder.h"
24#include "llvm/CodeGen/MachineJumpTableInfo.h"
25#include "llvm/CodeGen/MachineModuleInfo.h"
26#include "llvm/CodeGen/MachineRegisterInfo.h"
27#include "llvm/CodeGen/SDPatternMatch.h"
28#include "llvm/CodeGen/SelectionDAG.h"
29#include "llvm/CodeGen/SelectionDAGNodes.h"
30#include "llvm/IR/DiagnosticInfo.h"
31#include "llvm/IR/DiagnosticPrinter.h"
32#include "llvm/IR/Function.h"
33#include "llvm/IR/IntrinsicInst.h"
34#include "llvm/IR/Intrinsics.h"
35#include "llvm/IR/IntrinsicsWebAssembly.h"
36#include "llvm/Support/ErrorHandling.h"
37#include "llvm/Support/KnownBits.h"
38#include "llvm/Support/MathExtras.h"
39#include "llvm/Target/TargetOptions.h"
40using namespace llvm;
41
42#define DEBUG_TYPE "wasm-lower"
43
44WebAssemblyTargetLowering::WebAssemblyTargetLowering(
45 const TargetMachine &TM, const WebAssemblySubtarget &STI)
46 : TargetLowering(TM, STI), Subtarget(&STI) {
47 auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32;
48
49 // Set the load count for memcmp expand optimization
50 MaxLoadsPerMemcmp = 8;
51 MaxLoadsPerMemcmpOptSize = 4;
52
53 // Booleans always contain 0 or 1.
54 setBooleanContents(ZeroOrOneBooleanContent);
55 // Except in SIMD vectors
56 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
57 // We don't know the microarchitecture here, so just reduce register pressure.
58 setSchedulingPreference(Sched::RegPressure);
59 // Tell ISel that we have a stack pointer.
60 setStackPointerRegisterToSaveRestore(
61 Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32);
62 // Set up the register classes.
63 addRegisterClass(VT: MVT::i32, RC: &WebAssembly::I32RegClass);
64 addRegisterClass(VT: MVT::i64, RC: &WebAssembly::I64RegClass);
65 addRegisterClass(VT: MVT::f32, RC: &WebAssembly::F32RegClass);
66 addRegisterClass(VT: MVT::f64, RC: &WebAssembly::F64RegClass);
67 if (Subtarget->hasSIMD128()) {
68 addRegisterClass(VT: MVT::v16i8, RC: &WebAssembly::V128RegClass);
69 addRegisterClass(VT: MVT::v8i16, RC: &WebAssembly::V128RegClass);
70 addRegisterClass(VT: MVT::v4i32, RC: &WebAssembly::V128RegClass);
71 addRegisterClass(VT: MVT::v4f32, RC: &WebAssembly::V128RegClass);
72 addRegisterClass(VT: MVT::v2i64, RC: &WebAssembly::V128RegClass);
73 addRegisterClass(VT: MVT::v2f64, RC: &WebAssembly::V128RegClass);
74 }
75 if (Subtarget->hasFP16()) {
76 addRegisterClass(VT: MVT::v8f16, RC: &WebAssembly::V128RegClass);
77 }
78 if (Subtarget->hasReferenceTypes()) {
79 addRegisterClass(VT: MVT::externref, RC: &WebAssembly::EXTERNREFRegClass);
80 addRegisterClass(VT: MVT::funcref, RC: &WebAssembly::FUNCREFRegClass);
81 if (Subtarget->hasExceptionHandling()) {
82 addRegisterClass(VT: MVT::exnref, RC: &WebAssembly::EXNREFRegClass);
83 }
84 }
85 // Compute derived properties from the register classes.
86 computeRegisterProperties(TRI: Subtarget->getRegisterInfo());
87
88 // Transform loads and stores to pointers in address space 1 to loads and
89 // stores to WebAssembly global variables, outside linear memory.
90 for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) {
91 setOperationAction(Op: ISD::LOAD, VT: T, Action: Custom);
92 setOperationAction(Op: ISD::STORE, VT: T, Action: Custom);
93 }
94 if (Subtarget->hasSIMD128()) {
95 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
96 MVT::v2f64}) {
97 setOperationAction(Op: ISD::LOAD, VT: T, Action: Custom);
98 setOperationAction(Op: ISD::STORE, VT: T, Action: Custom);
99 }
100 }
101 if (Subtarget->hasFP16()) {
102 setOperationAction(Op: ISD::LOAD, VT: MVT::v8f16, Action: Custom);
103 setOperationAction(Op: ISD::STORE, VT: MVT::v8f16, Action: Custom);
104 }
105 if (Subtarget->hasReferenceTypes()) {
106 // We need custom load and store lowering for both externref, funcref and
107 // Other. The MVT::Other here represents tables of reference types.
108 for (auto T : {MVT::externref, MVT::funcref, MVT::Other}) {
109 setOperationAction(Op: ISD::LOAD, VT: T, Action: Custom);
110 setOperationAction(Op: ISD::STORE, VT: T, Action: Custom);
111 }
112 }
113
114 setOperationAction(Op: ISD::GlobalAddress, VT: MVTPtr, Action: Custom);
115 setOperationAction(Op: ISD::GlobalTLSAddress, VT: MVTPtr, Action: Custom);
116 setOperationAction(Op: ISD::ExternalSymbol, VT: MVTPtr, Action: Custom);
117 setOperationAction(Op: ISD::JumpTable, VT: MVTPtr, Action: Custom);
118 setOperationAction(Op: ISD::BlockAddress, VT: MVTPtr, Action: Custom);
119 setOperationAction(Op: ISD::BRIND, VT: MVT::Other, Action: Custom);
120 setOperationAction(Op: ISD::CLEAR_CACHE, VT: MVT::Other, Action: Custom);
121
122 // Take the default expansion for va_arg, va_copy, and va_end. There is no
123 // default action for va_start, so we do that custom.
124 setOperationAction(Op: ISD::VASTART, VT: MVT::Other, Action: Custom);
125 setOperationAction(Op: ISD::VAARG, VT: MVT::Other, Action: Expand);
126 setOperationAction(Op: ISD::VACOPY, VT: MVT::Other, Action: Expand);
127 setOperationAction(Op: ISD::VAEND, VT: MVT::Other, Action: Expand);
128
129 for (auto T : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64, MVT::v8f16}) {
130 if (!Subtarget->hasFP16() && T == MVT::v8f16) {
131 continue;
132 }
133 // Don't expand the floating-point types to constant pools.
134 setOperationAction(Op: ISD::ConstantFP, VT: T, Action: Legal);
135 // Expand floating-point comparisons.
136 for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE,
137 ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE})
138 setCondCodeAction(CCs: CC, VT: T, Action: Expand);
139 // Expand floating-point library function operators.
140 for (auto Op : {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FMA})
141 setOperationAction(Op, VT: T, Action: Expand);
142 // Expand vector FREM, but use a libcall rather than an expansion for scalar
143 if (MVT(T).isVector())
144 setOperationAction(Op: ISD::FREM, VT: T, Action: Expand);
145 else
146 setOperationAction(Op: ISD::FREM, VT: T, Action: LibCall);
147 // Note supported floating-point library function operators that otherwise
148 // default to expand.
149 for (auto Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT,
150 ISD::FRINT, ISD::FROUNDEVEN})
151 setOperationAction(Op, VT: T, Action: Legal);
152 // Support minimum and maximum, which otherwise default to expand.
153 setOperationAction(Op: ISD::FMINIMUM, VT: T, Action: Legal);
154 setOperationAction(Op: ISD::FMAXIMUM, VT: T, Action: Legal);
155 // When experimental v8f16 support is enabled these instructions don't need
156 // to be expanded.
157 if (T != MVT::v8f16) {
158 setOperationAction(Op: ISD::FP16_TO_FP, VT: T, Action: Expand);
159 setOperationAction(Op: ISD::FP_TO_FP16, VT: T, Action: Expand);
160 }
161 if (Subtarget->hasFP16() && T == MVT::f32) {
162 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: T, MemVT: MVT::f16, Action: Legal);
163 setTruncStoreAction(ValVT: T, MemVT: MVT::f16, Action: Legal);
164 } else {
165 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: T, MemVT: MVT::f16, Action: Expand);
166 setTruncStoreAction(ValVT: T, MemVT: MVT::f16, Action: Expand);
167 }
168 }
169
170 // Expand unavailable integer operations.
171 for (auto Op :
172 {ISD::BSWAP, ISD::SMUL_LOHI, ISD::UMUL_LOHI, ISD::MULHS, ISD::MULHU,
173 ISD::SDIVREM, ISD::UDIVREM, ISD::SHL_PARTS, ISD::SRA_PARTS,
174 ISD::SRL_PARTS, ISD::ADDC, ISD::ADDE, ISD::SUBC, ISD::SUBE}) {
175 for (auto T : {MVT::i32, MVT::i64})
176 setOperationAction(Op, VT: T, Action: Expand);
177 if (Subtarget->hasSIMD128())
178 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
179 setOperationAction(Op, VT: T, Action: Expand);
180 }
181
182 if (Subtarget->hasWideArithmetic()) {
183 setOperationAction(Op: ISD::ADD, VT: MVT::i128, Action: Custom);
184 setOperationAction(Op: ISD::SUB, VT: MVT::i128, Action: Custom);
185 setOperationAction(Op: ISD::SMUL_LOHI, VT: MVT::i64, Action: Custom);
186 setOperationAction(Op: ISD::UMUL_LOHI, VT: MVT::i64, Action: Custom);
187 setOperationAction(Op: ISD::UADDO, VT: MVT::i64, Action: Custom);
188 }
189
190 if (Subtarget->hasNontrappingFPToInt())
191 for (auto Op : {ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT})
192 for (auto T : {MVT::i32, MVT::i64})
193 setOperationAction(Op, VT: T, Action: Custom);
194
195 if (Subtarget->hasRelaxedSIMD()) {
196 setOperationAction(
197 Ops: {ISD::FMINNUM, ISD::FMINIMUMNUM, ISD::FMAXNUM, ISD::FMAXIMUMNUM},
198 VTs: {MVT::v4f32, MVT::v2f64}, Action: Custom);
199 }
200 // SIMD-specific configuration
201 if (Subtarget->hasSIMD128()) {
202
203 setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
204
205 // Combine wide-vector muls, with extend inputs, to extmul_half.
206 setTargetDAGCombine(ISD::MUL);
207 setTargetDAGCombine(ISD::SHL);
208
209 // Combine vector mask reductions into alltrue/anytrue
210 setTargetDAGCombine(ISD::SETCC);
211
212 // Convert vector to integer bitcasts to bitmask
213 setTargetDAGCombine(ISD::BITCAST);
214
215 // Hoist bitcasts out of shuffles
216 setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
217
218 // Combine extends of extract_subvectors into widening ops
219 setTargetDAGCombine({ISD::SIGN_EXTEND, ISD::ZERO_EXTEND});
220
221 // Combine int_to_fp or fp_extend of extract_vectors and vice versa into
222 // conversions ops
223 setTargetDAGCombine({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_EXTEND,
224 ISD::EXTRACT_SUBVECTOR});
225
226 // Combine fp_to_{s,u}int_sat or fp_round of concat_vectors or vice versa
227 // into conversion ops
228 setTargetDAGCombine({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT,
229 ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::FP_ROUND,
230 ISD::CONCAT_VECTORS});
231
232 setTargetDAGCombine(ISD::TRUNCATE);
233
234 // Support saturating add/sub for i8x16 and i16x8
235 for (auto Op : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT})
236 for (auto T : {MVT::v16i8, MVT::v8i16})
237 setOperationAction(Op, VT: T, Action: Legal);
238
239 // Support integer abs
240 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
241 setOperationAction(Op: ISD::ABS, VT: T, Action: Legal);
242
243 // Custom lower BUILD_VECTORs to minimize number of replace_lanes
244 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
245 MVT::v2f64})
246 setOperationAction(Op: ISD::BUILD_VECTOR, VT: T, Action: Custom);
247
248 if (Subtarget->hasFP16()) {
249 setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::f16, Action: Custom);
250 setOperationAction(Op: ISD::FP_ROUND, VT: MVT::v4f16, Action: Custom);
251 }
252
253 // We have custom shuffle lowering to expose the shuffle mask
254 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
255 MVT::v2f64})
256 setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT: T, Action: Custom);
257
258 if (Subtarget->hasFP16())
259 setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT: MVT::v8f16, Action: Custom);
260
261 // Support splatting
262 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
263 MVT::v2f64})
264 setOperationAction(Op: ISD::SPLAT_VECTOR, VT: T, Action: Legal);
265
266 setOperationAction(Ops: ISD::AVGCEILU, VTs: {MVT::v8i16, MVT::v16i8}, Action: Legal);
267
268 // Custom lowering since wasm shifts must have a scalar shift amount
269 for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
270 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
271 setOperationAction(Op, VT: T, Action: Custom);
272
273 // Custom lower lane accesses to expand out variable indices
274 for (auto Op : {ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT})
275 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
276 MVT::v2f64})
277 setOperationAction(Op, VT: T, Action: Custom);
278
279 // There is no i8x16.mul instruction
280 setOperationAction(Op: ISD::MUL, VT: MVT::v16i8, Action: Expand);
281
282 // Expand integer operations supported for scalars but not SIMD
283 for (auto Op :
284 {ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM, ISD::ROTL, ISD::ROTR})
285 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
286 setOperationAction(Op, VT: T, Action: Expand);
287
288 // But we do have integer min and max operations
289 for (auto Op : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
290 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
291 setOperationAction(Op, VT: T, Action: Legal);
292
293 // And we have popcnt for i8x16. It can be used to expand ctlz/cttz.
294 setOperationAction(Op: ISD::CTPOP, VT: MVT::v16i8, Action: Legal);
295 setOperationAction(Op: ISD::CTLZ, VT: MVT::v16i8, Action: Expand);
296 setOperationAction(Op: ISD::CTTZ, VT: MVT::v16i8, Action: Expand);
297
298 // Custom lower bit counting operations for other types to scalarize them.
299 for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP})
300 for (auto T : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
301 setOperationAction(Op, VT: T, Action: Custom);
302
303 // Expand float operations supported for scalars but not SIMD
304 for (auto Op : {ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
305 ISD::FEXP, ISD::FEXP2, ISD::FEXP10})
306 for (auto T : {MVT::v4f32, MVT::v2f64})
307 setOperationAction(Op, VT: T, Action: Expand);
308
309 // Unsigned comparison operations are unavailable for i64x2 vectors.
310 for (auto CC : {ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE})
311 setCondCodeAction(CCs: CC, VT: MVT::v2i64, Action: Custom);
312
313 // 64x2 conversions are not in the spec
314 for (auto Op :
315 {ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT})
316 for (auto T : {MVT::v2i64, MVT::v2f64})
317 setOperationAction(Op, VT: T, Action: Expand);
318
319 // But saturating fp_to_int converstions are
320 for (auto Op : {ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}) {
321 setOperationAction(Op, VT: MVT::v4i32, Action: Custom);
322 if (Subtarget->hasFP16()) {
323 setOperationAction(Op, VT: MVT::v8i16, Action: Custom);
324 }
325 }
326
327 // Support vector extending
328 for (auto T : MVT::integer_fixedlen_vector_valuetypes()) {
329 setOperationAction(Op: ISD::ANY_EXTEND_VECTOR_INREG, VT: T, Action: Custom);
330 setOperationAction(Op: ISD::SIGN_EXTEND_VECTOR_INREG, VT: T, Action: Custom);
331 setOperationAction(Op: ISD::ZERO_EXTEND_VECTOR_INREG, VT: T, Action: Custom);
332 }
333
334 if (Subtarget->hasFP16()) {
335 setOperationAction(Op: ISD::FMA, VT: MVT::v8f16, Action: Legal);
336 }
337
338 if (Subtarget->hasRelaxedSIMD()) {
339 setOperationAction(Op: ISD::FMULADD, VT: MVT::v4f32, Action: Legal);
340 setOperationAction(Op: ISD::FMULADD, VT: MVT::v2f64, Action: Legal);
341 }
342
343 // Partial MLA reductions.
344 for (auto Op : {ISD::PARTIAL_REDUCE_SMLA, ISD::PARTIAL_REDUCE_UMLA}) {
345 setPartialReduceMLAAction(Opc: Op, AccVT: MVT::v4i32, InputVT: MVT::v16i8, Action: Legal);
346 setPartialReduceMLAAction(Opc: Op, AccVT: MVT::v4i32, InputVT: MVT::v8i16, Action: Legal);
347 }
348 }
349
350 // As a special case, these operators use the type to mean the type to
351 // sign-extend from.
352 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i1, Action: Expand);
353 if (!Subtarget->hasSignExt()) {
354 // Sign extends are legal only when extending a vector extract
355 auto Action = Subtarget->hasSIMD128() ? Custom : Expand;
356 for (auto T : {MVT::i8, MVT::i16, MVT::i32})
357 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: T, Action);
358 }
359 for (auto T : MVT::integer_fixedlen_vector_valuetypes())
360 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: T, Action: Expand);
361
362 // Dynamic stack allocation: use the default expansion.
363 setOperationAction(Op: ISD::STACKSAVE, VT: MVT::Other, Action: Expand);
364 setOperationAction(Op: ISD::STACKRESTORE, VT: MVT::Other, Action: Expand);
365 setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: MVTPtr, Action: Expand);
366
367 setOperationAction(Op: ISD::FrameIndex, VT: MVT::i32, Action: Custom);
368 setOperationAction(Op: ISD::FrameIndex, VT: MVT::i64, Action: Custom);
369 setOperationAction(Op: ISD::CopyToReg, VT: MVT::Other, Action: Custom);
370
371 // Expand these forms; we pattern-match the forms that we can handle in isel.
372 for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
373 for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
374 setOperationAction(Op, VT: T, Action: Expand);
375
376 if (Subtarget->hasReferenceTypes())
377 for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
378 for (auto T : {MVT::externref, MVT::funcref})
379 setOperationAction(Op, VT: T, Action: Expand);
380
381 // There is no vector conditional select instruction
382 for (auto T :
383 {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64, MVT::v2f64})
384 setOperationAction(Op: ISD::SELECT_CC, VT: T, Action: Expand);
385
386 // We have custom switch handling.
387 setOperationAction(Op: ISD::BR_JT, VT: MVT::Other, Action: Custom);
388
389 // WebAssembly doesn't have:
390 // - Floating-point extending loads.
391 // - Floating-point truncating stores.
392 // - i1 extending loads.
393 // - truncating SIMD stores and most extending loads
394 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::f32, Action: Expand);
395 setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f32, Action: Expand);
396 for (auto T : MVT::integer_valuetypes())
397 for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
398 setLoadExtAction(ExtType: Ext, ValVT: T, MemVT: MVT::i1, Action: Promote);
399 if (Subtarget->hasSIMD128()) {
400 for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32,
401 MVT::v2f64}) {
402 for (auto MemT : MVT::fixedlen_vector_valuetypes()) {
403 if (MVT(T) != MemT) {
404 setTruncStoreAction(ValVT: T, MemVT: MemT, Action: Expand);
405 for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
406 setLoadExtAction(ExtType: Ext, ValVT: T, MemVT: MemT, Action: Expand);
407 }
408 }
409 }
410 // But some vector extending loads are legal
411 for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
412 setLoadExtAction(ExtType: Ext, ValVT: MVT::v8i16, MemVT: MVT::v8i8, Action: Legal);
413 setLoadExtAction(ExtType: Ext, ValVT: MVT::v4i32, MemVT: MVT::v4i16, Action: Legal);
414 setLoadExtAction(ExtType: Ext, ValVT: MVT::v2i64, MemVT: MVT::v2i32, Action: Legal);
415 }
416 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::v2f64, MemVT: MVT::v2f32, Action: Legal);
417 }
418
419 // Don't do anything clever with build_pairs
420 setOperationAction(Op: ISD::BUILD_PAIR, VT: MVT::i64, Action: Expand);
421
422 // Trap lowers to wasm unreachable
423 setOperationAction(Op: ISD::TRAP, VT: MVT::Other, Action: Legal);
424 setOperationAction(Op: ISD::DEBUGTRAP, VT: MVT::Other, Action: Legal);
425
426 // Exception handling intrinsics
427 setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::Other, Action: Custom);
428 setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::Other, Action: Custom);
429 setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::Other, Action: Custom);
430
431 setMaxAtomicSizeInBitsSupported(64);
432
433 // Always convert switches to br_tables unless there is only one case, which
434 // is equivalent to a simple branch. This reduces code size for wasm, and we
435 // defer possible jump table optimizations to the VM.
436 setMinimumJumpTableEntries(2);
437}
438
439TargetLowering::AtomicExpansionKind
440WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(
441 const AtomicRMWInst *AI) const {
442 // We have wasm instructions for these
443 switch (AI->getOperation()) {
444 case AtomicRMWInst::Add:
445 case AtomicRMWInst::Sub:
446 case AtomicRMWInst::And:
447 case AtomicRMWInst::Or:
448 case AtomicRMWInst::Xor:
449 case AtomicRMWInst::Xchg:
450 return AtomicExpansionKind::None;
451 default:
452 break;
453 }
454 return AtomicExpansionKind::CmpXChg;
455}
456
457bool WebAssemblyTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
458 // Implementation copied from X86TargetLowering.
459 unsigned Opc = VecOp.getOpcode();
460
461 // Assume target opcodes can't be scalarized.
462 // TODO - do we have any exceptions?
463 if (Opc >= ISD::BUILTIN_OP_END || !isBinOp(Opcode: Opc))
464 return false;
465
466 // If the vector op is not supported, try to convert to scalar.
467 EVT VecVT = VecOp.getValueType();
468 if (!isOperationLegalOrCustomOrPromote(Op: Opc, VT: VecVT))
469 return true;
470
471 // If the vector op is supported, but the scalar op is not, the transform may
472 // not be worthwhile.
473 EVT ScalarVT = VecVT.getScalarType();
474 return isOperationLegalOrCustomOrPromote(Op: Opc, VT: ScalarVT);
475}
476
477FastISel *WebAssemblyTargetLowering::createFastISel(
478 FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo,
479 const LibcallLoweringInfo *LibcallLowering) const {
480 return WebAssembly::createFastISel(funcInfo&: FuncInfo, libInfo: LibInfo, libcallLowering: LibcallLowering);
481}
482
483MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/,
484 EVT VT) const {
485 unsigned BitWidth = NextPowerOf2(A: VT.getSizeInBits() - 1);
486 if (BitWidth > 1 && BitWidth < 8)
487 BitWidth = 8;
488
489 if (BitWidth > 64) {
490 // The shift will be lowered to a libcall, and compiler-rt libcalls expect
491 // the count to be an i32.
492 BitWidth = 32;
493 assert(BitWidth >= Log2_32_Ceil(VT.getSizeInBits()) &&
494 "32-bit shift counts ought to be enough for anyone");
495 }
496
497 MVT Result = MVT::getIntegerVT(BitWidth);
498 assert(Result != MVT::INVALID_SIMPLE_VALUE_TYPE &&
499 "Unable to represent scalar shift amount type");
500 return Result;
501}
502
503// Lower an fp-to-int conversion operator from the LLVM opcode, which has an
504// undefined result on invalid/overflow, to the WebAssembly opcode, which
505// traps on invalid/overflow.
506static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL,
507 MachineBasicBlock *BB,
508 const TargetInstrInfo &TII,
509 bool IsUnsigned, bool Int64,
510 bool Float64, unsigned LoweredOpcode) {
511 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
512
513 Register OutReg = MI.getOperand(i: 0).getReg();
514 Register InReg = MI.getOperand(i: 1).getReg();
515
516 unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32;
517 unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32;
518 unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32;
519 unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32;
520 unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32;
521 unsigned Eqz = WebAssembly::EQZ_I32;
522 unsigned And = WebAssembly::AND_I32;
523 int64_t Limit = Int64 ? INT64_MIN : INT32_MIN;
524 int64_t Substitute = IsUnsigned ? 0 : Limit;
525 double CmpVal = IsUnsigned ? -(double)Limit * 2.0 : -(double)Limit;
526 auto &Context = BB->getParent()->getFunction().getContext();
527 Type *Ty = Float64 ? Type::getDoubleTy(C&: Context) : Type::getFloatTy(C&: Context);
528
529 const BasicBlock *LLVMBB = BB->getBasicBlock();
530 MachineFunction *F = BB->getParent();
531 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(BB: LLVMBB);
532 MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(BB: LLVMBB);
533 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB: LLVMBB);
534
535 MachineFunction::iterator It = ++BB->getIterator();
536 F->insert(MBBI: It, MBB: FalseMBB);
537 F->insert(MBBI: It, MBB: TrueMBB);
538 F->insert(MBBI: It, MBB: DoneMBB);
539
540 // Transfer the remainder of BB and its successor edges to DoneMBB.
541 DoneMBB->splice(Where: DoneMBB->begin(), Other: BB, From: std::next(x: MI.getIterator()), To: BB->end());
542 DoneMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
543
544 BB->addSuccessor(Succ: TrueMBB);
545 BB->addSuccessor(Succ: FalseMBB);
546 TrueMBB->addSuccessor(Succ: DoneMBB);
547 FalseMBB->addSuccessor(Succ: DoneMBB);
548
549 unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg;
550 Tmp0 = MRI.createVirtualRegister(RegClass: MRI.getRegClass(Reg: InReg));
551 Tmp1 = MRI.createVirtualRegister(RegClass: MRI.getRegClass(Reg: InReg));
552 CmpReg = MRI.createVirtualRegister(RegClass: &WebAssembly::I32RegClass);
553 EqzReg = MRI.createVirtualRegister(RegClass: &WebAssembly::I32RegClass);
554 FalseReg = MRI.createVirtualRegister(RegClass: MRI.getRegClass(Reg: OutReg));
555 TrueReg = MRI.createVirtualRegister(RegClass: MRI.getRegClass(Reg: OutReg));
556
557 MI.eraseFromParent();
558 // For signed numbers, we can do a single comparison to determine whether
559 // fabs(x) is within range.
560 if (IsUnsigned) {
561 Tmp0 = InReg;
562 } else {
563 BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: Abs), DestReg: Tmp0).addReg(RegNo: InReg);
564 }
565 BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: FConst), DestReg: Tmp1)
566 .addFPImm(Val: cast<ConstantFP>(Val: ConstantFP::get(Ty, V: CmpVal)));
567 BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: LT), DestReg: CmpReg).addReg(RegNo: Tmp0).addReg(RegNo: Tmp1);
568
569 // For unsigned numbers, we have to do a separate comparison with zero.
570 if (IsUnsigned) {
571 Tmp1 = MRI.createVirtualRegister(RegClass: MRI.getRegClass(Reg: InReg));
572 Register SecondCmpReg =
573 MRI.createVirtualRegister(RegClass: &WebAssembly::I32RegClass);
574 Register AndReg = MRI.createVirtualRegister(RegClass: &WebAssembly::I32RegClass);
575 BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: FConst), DestReg: Tmp1)
576 .addFPImm(Val: cast<ConstantFP>(Val: ConstantFP::get(Ty, V: 0.0)));
577 BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: GE), DestReg: SecondCmpReg).addReg(RegNo: Tmp0).addReg(RegNo: Tmp1);
578 BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: And), DestReg: AndReg).addReg(RegNo: CmpReg).addReg(RegNo: SecondCmpReg);
579 CmpReg = AndReg;
580 }
581
582 BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: Eqz), DestReg: EqzReg).addReg(RegNo: CmpReg);
583
584 // Create the CFG diamond to select between doing the conversion or using
585 // the substitute value.
586 BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: WebAssembly::BR_IF)).addMBB(MBB: TrueMBB).addReg(RegNo: EqzReg);
587 BuildMI(BB: FalseMBB, MIMD: DL, MCID: TII.get(Opcode: LoweredOpcode), DestReg: FalseReg).addReg(RegNo: InReg);
588 BuildMI(BB: FalseMBB, MIMD: DL, MCID: TII.get(Opcode: WebAssembly::BR)).addMBB(MBB: DoneMBB);
589 BuildMI(BB: TrueMBB, MIMD: DL, MCID: TII.get(Opcode: IConst), DestReg: TrueReg).addImm(Val: Substitute);
590 BuildMI(BB&: *DoneMBB, I: DoneMBB->begin(), MIMD: DL, MCID: TII.get(Opcode: TargetOpcode::PHI), DestReg: OutReg)
591 .addReg(RegNo: FalseReg)
592 .addMBB(MBB: FalseMBB)
593 .addReg(RegNo: TrueReg)
594 .addMBB(MBB: TrueMBB);
595
596 return DoneMBB;
597}
598
599// Lower a `MEMCPY` instruction into a CFG triangle around a `MEMORY_COPY`
600// instuction to handle the zero-length case.
601static MachineBasicBlock *LowerMemcpy(MachineInstr &MI, DebugLoc DL,
602 MachineBasicBlock *BB,
603 const TargetInstrInfo &TII, bool Int64) {
604 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
605
606 MachineOperand DstMem = MI.getOperand(i: 0);
607 MachineOperand SrcMem = MI.getOperand(i: 1);
608 MachineOperand Dst = MI.getOperand(i: 2);
609 MachineOperand Src = MI.getOperand(i: 3);
610 MachineOperand Len = MI.getOperand(i: 4);
611
612 // If the length is a constant, we don't actually need the check.
613 if (MachineInstr *Def = MRI.getVRegDef(Reg: Len.getReg())) {
614 if (Def->getOpcode() == WebAssembly::CONST_I32 ||
615 Def->getOpcode() == WebAssembly::CONST_I64) {
616 if (Def->getOperand(i: 1).getImm() == 0) {
617 // A zero-length memcpy is a no-op.
618 MI.eraseFromParent();
619 return BB;
620 }
621 // A non-zero-length memcpy doesn't need a zero check.
622 unsigned MemoryCopy =
623 Int64 ? WebAssembly::MEMORY_COPY_A64 : WebAssembly::MEMORY_COPY_A32;
624 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: MemoryCopy))
625 .add(MO: DstMem)
626 .add(MO: SrcMem)
627 .add(MO: Dst)
628 .add(MO: Src)
629 .add(MO: Len);
630 MI.eraseFromParent();
631 return BB;
632 }
633 }
634
635 // We're going to add an extra use to `Len` to test if it's zero; that
636 // use shouldn't be a kill, even if the original use is.
637 MachineOperand NoKillLen = Len;
638 NoKillLen.setIsKill(false);
639
640 // Decide on which `MachineInstr` opcode we're going to use.
641 unsigned Eqz = Int64 ? WebAssembly::EQZ_I64 : WebAssembly::EQZ_I32;
642 unsigned MemoryCopy =
643 Int64 ? WebAssembly::MEMORY_COPY_A64 : WebAssembly::MEMORY_COPY_A32;
644
645 // Create two new basic blocks; one for the new `memory.fill` that we can
646 // branch over, and one for the rest of the instructions after the original
647 // `memory.fill`.
648 const BasicBlock *LLVMBB = BB->getBasicBlock();
649 MachineFunction *F = BB->getParent();
650 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(BB: LLVMBB);
651 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB: LLVMBB);
652
653 MachineFunction::iterator It = ++BB->getIterator();
654 F->insert(MBBI: It, MBB: TrueMBB);
655 F->insert(MBBI: It, MBB: DoneMBB);
656
657 // Transfer the remainder of BB and its successor edges to DoneMBB.
658 DoneMBB->splice(Where: DoneMBB->begin(), Other: BB, From: std::next(x: MI.getIterator()), To: BB->end());
659 DoneMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
660
661 // Connect the CFG edges.
662 BB->addSuccessor(Succ: TrueMBB);
663 BB->addSuccessor(Succ: DoneMBB);
664 TrueMBB->addSuccessor(Succ: DoneMBB);
665
666 // Create a virtual register for the `Eqz` result.
667 unsigned EqzReg;
668 EqzReg = MRI.createVirtualRegister(RegClass: &WebAssembly::I32RegClass);
669
670 // Erase the original `memory.copy`.
671 MI.eraseFromParent();
672
673 // Test if `Len` is zero.
674 BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: Eqz), DestReg: EqzReg).add(MO: NoKillLen);
675
676 // Insert a new `memory.copy`.
677 BuildMI(BB: TrueMBB, MIMD: DL, MCID: TII.get(Opcode: MemoryCopy))
678 .add(MO: DstMem)
679 .add(MO: SrcMem)
680 .add(MO: Dst)
681 .add(MO: Src)
682 .add(MO: Len);
683
684 // Create the CFG triangle.
685 BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: WebAssembly::BR_IF)).addMBB(MBB: DoneMBB).addReg(RegNo: EqzReg);
686 BuildMI(BB: TrueMBB, MIMD: DL, MCID: TII.get(Opcode: WebAssembly::BR)).addMBB(MBB: DoneMBB);
687
688 return DoneMBB;
689}
690
691// Lower a `MEMSET` instruction into a CFG triangle around a `MEMORY_FILL`
692// instuction to handle the zero-length case.
693static MachineBasicBlock *LowerMemset(MachineInstr &MI, DebugLoc DL,
694 MachineBasicBlock *BB,
695 const TargetInstrInfo &TII, bool Int64) {
696 MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
697
698 MachineOperand Mem = MI.getOperand(i: 0);
699 MachineOperand Dst = MI.getOperand(i: 1);
700 MachineOperand Val = MI.getOperand(i: 2);
701 MachineOperand Len = MI.getOperand(i: 3);
702
703 // If the length is a constant, we don't actually need the check.
704 if (MachineInstr *Def = MRI.getVRegDef(Reg: Len.getReg())) {
705 if (Def->getOpcode() == WebAssembly::CONST_I32 ||
706 Def->getOpcode() == WebAssembly::CONST_I64) {
707 if (Def->getOperand(i: 1).getImm() == 0) {
708 // A zero-length memset is a no-op.
709 MI.eraseFromParent();
710 return BB;
711 }
712 // A non-zero-length memset doesn't need a zero check.
713 unsigned MemoryFill =
714 Int64 ? WebAssembly::MEMORY_FILL_A64 : WebAssembly::MEMORY_FILL_A32;
715 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: MemoryFill))
716 .add(MO: Mem)
717 .add(MO: Dst)
718 .add(MO: Val)
719 .add(MO: Len);
720 MI.eraseFromParent();
721 return BB;
722 }
723 }
724
725 // We're going to add an extra use to `Len` to test if it's zero; that
726 // use shouldn't be a kill, even if the original use is.
727 MachineOperand NoKillLen = Len;
728 NoKillLen.setIsKill(false);
729
730 // Decide on which `MachineInstr` opcode we're going to use.
731 unsigned Eqz = Int64 ? WebAssembly::EQZ_I64 : WebAssembly::EQZ_I32;
732 unsigned MemoryFill =
733 Int64 ? WebAssembly::MEMORY_FILL_A64 : WebAssembly::MEMORY_FILL_A32;
734
735 // Create two new basic blocks; one for the new `memory.fill` that we can
736 // branch over, and one for the rest of the instructions after the original
737 // `memory.fill`.
738 const BasicBlock *LLVMBB = BB->getBasicBlock();
739 MachineFunction *F = BB->getParent();
740 MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(BB: LLVMBB);
741 MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB: LLVMBB);
742
743 MachineFunction::iterator It = ++BB->getIterator();
744 F->insert(MBBI: It, MBB: TrueMBB);
745 F->insert(MBBI: It, MBB: DoneMBB);
746
747 // Transfer the remainder of BB and its successor edges to DoneMBB.
748 DoneMBB->splice(Where: DoneMBB->begin(), Other: BB, From: std::next(x: MI.getIterator()), To: BB->end());
749 DoneMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
750
751 // Connect the CFG edges.
752 BB->addSuccessor(Succ: TrueMBB);
753 BB->addSuccessor(Succ: DoneMBB);
754 TrueMBB->addSuccessor(Succ: DoneMBB);
755
756 // Create a virtual register for the `Eqz` result.
757 unsigned EqzReg;
758 EqzReg = MRI.createVirtualRegister(RegClass: &WebAssembly::I32RegClass);
759
760 // Erase the original `memory.fill`.
761 MI.eraseFromParent();
762
763 // Test if `Len` is zero.
764 BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: Eqz), DestReg: EqzReg).add(MO: NoKillLen);
765
766 // Insert a new `memory.copy`.
767 BuildMI(BB: TrueMBB, MIMD: DL, MCID: TII.get(Opcode: MemoryFill)).add(MO: Mem).add(MO: Dst).add(MO: Val).add(MO: Len);
768
769 // Create the CFG triangle.
770 BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: WebAssembly::BR_IF)).addMBB(MBB: DoneMBB).addReg(RegNo: EqzReg);
771 BuildMI(BB: TrueMBB, MIMD: DL, MCID: TII.get(Opcode: WebAssembly::BR)).addMBB(MBB: DoneMBB);
772
773 return DoneMBB;
774}
775
776static MachineBasicBlock *
777LowerCallResults(MachineInstr &CallResults, DebugLoc DL, MachineBasicBlock *BB,
778 const WebAssemblySubtarget *Subtarget,
779 const TargetInstrInfo &TII) {
780 MachineInstr &CallParams = *CallResults.getPrevNode();
781 assert(CallParams.getOpcode() == WebAssembly::CALL_PARAMS);
782 assert(CallResults.getOpcode() == WebAssembly::CALL_RESULTS ||
783 CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS);
784
785 bool IsIndirect =
786 CallParams.getOperand(i: 0).isReg() || CallParams.getOperand(i: 0).isFI();
787 bool IsRetCall = CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS;
788
789 bool IsFuncrefCall = false;
790 if (IsIndirect && CallParams.getOperand(i: 0).isReg()) {
791 Register Reg = CallParams.getOperand(i: 0).getReg();
792 const MachineFunction *MF = BB->getParent();
793 const MachineRegisterInfo &MRI = MF->getRegInfo();
794 const TargetRegisterClass *TRC = MRI.getRegClass(Reg);
795 IsFuncrefCall = (TRC == &WebAssembly::FUNCREFRegClass);
796 assert(!IsFuncrefCall || Subtarget->hasReferenceTypes());
797 }
798
799 unsigned CallOp;
800 if (IsIndirect && IsRetCall) {
801 CallOp = WebAssembly::RET_CALL_INDIRECT;
802 } else if (IsIndirect) {
803 CallOp = WebAssembly::CALL_INDIRECT;
804 } else if (IsRetCall) {
805 CallOp = WebAssembly::RET_CALL;
806 } else {
807 CallOp = WebAssembly::CALL;
808 }
809
810 MachineFunction &MF = *BB->getParent();
811 const MCInstrDesc &MCID = TII.get(Opcode: CallOp);
812 MachineInstrBuilder MIB(MF, MF.CreateMachineInstr(MCID, DL));
813
814 // Move the function pointer to the end of the arguments for indirect calls
815 if (IsIndirect) {
816 auto FnPtr = CallParams.getOperand(i: 0);
817 CallParams.removeOperand(OpNo: 0);
818
819 // For funcrefs, call_indirect is done through __funcref_call_table and the
820 // funcref is always installed in slot 0 of the table, therefore instead of
821 // having the function pointer added at the end of the params list, a zero
822 // (the index in
823 // __funcref_call_table is added).
824 if (IsFuncrefCall) {
825 Register RegZero =
826 MF.getRegInfo().createVirtualRegister(RegClass: &WebAssembly::I32RegClass);
827 MachineInstrBuilder MIBC0 =
828 BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: WebAssembly::CONST_I32), DestReg: RegZero).addImm(Val: 0);
829
830 BB->insert(I: CallResults.getIterator(), M: MIBC0);
831 MachineInstrBuilder(MF, CallParams).addReg(RegNo: RegZero);
832 } else
833 CallParams.addOperand(Op: FnPtr);
834 }
835
836 for (auto Def : CallResults.defs())
837 MIB.add(MO: Def);
838
839 if (IsIndirect) {
840 // Placeholder for the type index.
841 // This gets replaced with the correct value in WebAssemblyMCInstLower.cpp
842 MIB.addImm(Val: 0);
843 // The table into which this call_indirect indexes.
844 MCSymbolWasm *Table = IsFuncrefCall
845 ? WebAssembly::getOrCreateFuncrefCallTableSymbol(
846 Ctx&: MF.getContext(), Subtarget)
847 : WebAssembly::getOrCreateFunctionTableSymbol(
848 Ctx&: MF.getContext(), Subtarget);
849 if (Subtarget->hasCallIndirectOverlong()) {
850 MIB.addSym(Sym: Table);
851 } else {
852 // For the MVP there is at most one table whose number is 0, but we can't
853 // write a table symbol or issue relocations. Instead we just ensure the
854 // table is live and write a zero.
855 Table->setNoStrip();
856 MIB.addImm(Val: 0);
857 }
858 }
859
860 for (auto Use : CallParams.uses())
861 MIB.add(MO: Use);
862
863 BB->insert(I: CallResults.getIterator(), M: MIB);
864 CallParams.eraseFromParent();
865 CallResults.eraseFromParent();
866
867 // If this is a funcref call, to avoid hidden GC roots, we need to clear the
868 // table slot with ref.null upon call_indirect return.
869 //
870 // This generates the following code, which comes right after a call_indirect
871 // of a funcref:
872 //
873 // i32.const 0
874 // ref.null func
875 // table.set __funcref_call_table
876 if (IsIndirect && IsFuncrefCall) {
877 MCSymbolWasm *Table = WebAssembly::getOrCreateFuncrefCallTableSymbol(
878 Ctx&: MF.getContext(), Subtarget);
879 Register RegZero =
880 MF.getRegInfo().createVirtualRegister(RegClass: &WebAssembly::I32RegClass);
881 MachineInstr *Const0 =
882 BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: WebAssembly::CONST_I32), DestReg: RegZero).addImm(Val: 0);
883 BB->insertAfter(I: MIB.getInstr()->getIterator(), MI: Const0);
884
885 Register RegFuncref =
886 MF.getRegInfo().createVirtualRegister(RegClass: &WebAssembly::FUNCREFRegClass);
887 MachineInstr *RefNull =
888 BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: WebAssembly::REF_NULL_FUNCREF), DestReg: RegFuncref);
889 BB->insertAfter(I: Const0->getIterator(), MI: RefNull);
890
891 MachineInstr *TableSet =
892 BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: WebAssembly::TABLE_SET_FUNCREF))
893 .addSym(Sym: Table)
894 .addReg(RegNo: RegZero)
895 .addReg(RegNo: RegFuncref);
896 BB->insertAfter(I: RefNull->getIterator(), MI: TableSet);
897 }
898
899 return BB;
900}
901
902MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
903 MachineInstr &MI, MachineBasicBlock *BB) const {
904 const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
905 DebugLoc DL = MI.getDebugLoc();
906
907 switch (MI.getOpcode()) {
908 default:
909 llvm_unreachable("Unexpected instr type to insert");
910 case WebAssembly::FP_TO_SINT_I32_F32:
911 return LowerFPToInt(MI, DL, BB, TII, IsUnsigned: false, Int64: false, Float64: false,
912 LoweredOpcode: WebAssembly::I32_TRUNC_S_F32);
913 case WebAssembly::FP_TO_UINT_I32_F32:
914 return LowerFPToInt(MI, DL, BB, TII, IsUnsigned: true, Int64: false, Float64: false,
915 LoweredOpcode: WebAssembly::I32_TRUNC_U_F32);
916 case WebAssembly::FP_TO_SINT_I64_F32:
917 return LowerFPToInt(MI, DL, BB, TII, IsUnsigned: false, Int64: true, Float64: false,
918 LoweredOpcode: WebAssembly::I64_TRUNC_S_F32);
919 case WebAssembly::FP_TO_UINT_I64_F32:
920 return LowerFPToInt(MI, DL, BB, TII, IsUnsigned: true, Int64: true, Float64: false,
921 LoweredOpcode: WebAssembly::I64_TRUNC_U_F32);
922 case WebAssembly::FP_TO_SINT_I32_F64:
923 return LowerFPToInt(MI, DL, BB, TII, IsUnsigned: false, Int64: false, Float64: true,
924 LoweredOpcode: WebAssembly::I32_TRUNC_S_F64);
925 case WebAssembly::FP_TO_UINT_I32_F64:
926 return LowerFPToInt(MI, DL, BB, TII, IsUnsigned: true, Int64: false, Float64: true,
927 LoweredOpcode: WebAssembly::I32_TRUNC_U_F64);
928 case WebAssembly::FP_TO_SINT_I64_F64:
929 return LowerFPToInt(MI, DL, BB, TII, IsUnsigned: false, Int64: true, Float64: true,
930 LoweredOpcode: WebAssembly::I64_TRUNC_S_F64);
931 case WebAssembly::FP_TO_UINT_I64_F64:
932 return LowerFPToInt(MI, DL, BB, TII, IsUnsigned: true, Int64: true, Float64: true,
933 LoweredOpcode: WebAssembly::I64_TRUNC_U_F64);
934 case WebAssembly::MEMCPY_A32:
935 return LowerMemcpy(MI, DL, BB, TII, Int64: false);
936 case WebAssembly::MEMCPY_A64:
937 return LowerMemcpy(MI, DL, BB, TII, Int64: true);
938 case WebAssembly::MEMSET_A32:
939 return LowerMemset(MI, DL, BB, TII, Int64: false);
940 case WebAssembly::MEMSET_A64:
941 return LowerMemset(MI, DL, BB, TII, Int64: true);
942 case WebAssembly::CALL_RESULTS:
943 case WebAssembly::RET_CALL_RESULTS:
944 return LowerCallResults(CallResults&: MI, DL, BB, Subtarget, TII);
945 }
946}
947
948std::pair<unsigned, const TargetRegisterClass *>
949WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
950 const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
951 // First, see if this is a constraint that directly corresponds to a
952 // WebAssembly register class.
953 if (Constraint.size() == 1) {
954 switch (Constraint[0]) {
955 case 'r':
956 assert(VT != MVT::iPTR && "Pointer MVT not expected here");
957 if (Subtarget->hasSIMD128() && VT.isVector()) {
958 if (VT.getSizeInBits() == 128)
959 return std::make_pair(x: 0U, y: &WebAssembly::V128RegClass);
960 }
961 if (VT.isInteger() && !VT.isVector()) {
962 if (VT.getSizeInBits() <= 32)
963 return std::make_pair(x: 0U, y: &WebAssembly::I32RegClass);
964 if (VT.getSizeInBits() <= 64)
965 return std::make_pair(x: 0U, y: &WebAssembly::I64RegClass);
966 }
967 if (VT.isFloatingPoint() && !VT.isVector()) {
968 switch (VT.getSizeInBits()) {
969 case 32:
970 return std::make_pair(x: 0U, y: &WebAssembly::F32RegClass);
971 case 64:
972 return std::make_pair(x: 0U, y: &WebAssembly::F64RegClass);
973 default:
974 break;
975 }
976 }
977 break;
978 default:
979 break;
980 }
981 }
982
983 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
984}
985
986bool WebAssemblyTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
987 // Assume ctz is a relatively cheap operation.
988 return true;
989}
990
991bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
992 // Assume clz is a relatively cheap operation.
993 return true;
994}
995
996bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
997 const AddrMode &AM,
998 Type *Ty, unsigned AS,
999 Instruction *I) const {
1000 // WebAssembly offsets are added as unsigned without wrapping. The
1001 // isLegalAddressingMode gives us no way to determine if wrapping could be
1002 // happening, so we approximate this by accepting only non-negative offsets.
1003 if (AM.BaseOffs < 0)
1004 return false;
1005
1006 // WebAssembly has no scale register operands.
1007 if (AM.Scale != 0)
1008 return false;
1009
1010 // Everything else is legal.
1011 return true;
1012}
1013
1014bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
1015 EVT /*VT*/, unsigned /*AddrSpace*/, Align /*Align*/,
1016 MachineMemOperand::Flags /*Flags*/, unsigned *Fast) const {
1017 // WebAssembly supports unaligned accesses, though it should be declared
1018 // with the p2align attribute on loads and stores which do so, and there
1019 // may be a performance impact. We tell LLVM they're "fast" because
1020 // for the kinds of things that LLVM uses this for (merging adjacent stores
1021 // of constants, etc.), WebAssembly implementations will either want the
1022 // unaligned access or they'll split anyway.
1023 if (Fast)
1024 *Fast = 1;
1025 return true;
1026}
1027
1028bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,
1029 AttributeList Attr) const {
1030 // The current thinking is that wasm engines will perform this optimization,
1031 // so we can save on code size.
1032 return true;
1033}
1034
1035bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
1036 EVT ExtT = ExtVal.getValueType();
1037 SDValue N0 = peekThroughFreeze(V: ExtVal->getOperand(Num: 0));
1038 auto *Load = dyn_cast<LoadSDNode>(Val&: N0);
1039 if (!Load)
1040 return false;
1041 EVT MemT = Load->getValueType(ResNo: 0);
1042 return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) ||
1043 (ExtT == MVT::v4i32 && MemT == MVT::v4i16) ||
1044 (ExtT == MVT::v2i64 && MemT == MVT::v2i32);
1045}
1046
1047bool WebAssemblyTargetLowering::isOffsetFoldingLegal(
1048 const GlobalAddressSDNode *GA) const {
1049 // Wasm doesn't support function addresses with offsets
1050 const GlobalValue *GV = GA->getGlobal();
1051 return isa<Function>(Val: GV) ? false : TargetLowering::isOffsetFoldingLegal(GA);
1052}
1053
1054EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
1055 LLVMContext &C,
1056 EVT VT) const {
1057 if (VT.isVector()) {
1058 if (VT.getVectorElementType() == MVT::f16 && !Subtarget->hasFP16())
1059 return VT.changeElementType(Context&: C, EltVT: MVT::i1);
1060
1061 return VT.changeVectorElementTypeToInteger();
1062 }
1063
1064 // So far, all branch instructions in Wasm take an I32 condition.
1065 // The default TargetLowering::getSetCCResultType returns the pointer size,
1066 // which would be useful to reduce instruction counts when testing
1067 // against 64-bit pointers/values if at some point Wasm supports that.
1068 return EVT::getIntegerVT(Context&: C, BitWidth: 32);
1069}
1070
1071void WebAssemblyTargetLowering::getTgtMemIntrinsic(
1072 SmallVectorImpl<IntrinsicInfo> &Infos, const CallBase &I,
1073 MachineFunction &MF, unsigned Intrinsic) const {
1074 IntrinsicInfo Info;
1075 switch (Intrinsic) {
1076 case Intrinsic::wasm_memory_atomic_notify:
1077 Info.opc = ISD::INTRINSIC_W_CHAIN;
1078 Info.memVT = MVT::i32;
1079 Info.ptrVal = I.getArgOperand(i: 0);
1080 Info.offset = 0;
1081 Info.align = Align(4);
1082 // atomic.notify instruction does not really load the memory specified with
1083 // this argument, but MachineMemOperand should either be load or store, so
1084 // we set this to a load.
1085 // FIXME Volatile isn't really correct, but currently all LLVM atomic
1086 // instructions are treated as volatiles in the backend, so we should be
1087 // consistent. The same applies for wasm_atomic_wait intrinsics too.
1088 Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
1089 Infos.push_back(Elt: Info);
1090 return;
1091 case Intrinsic::wasm_memory_atomic_wait32:
1092 Info.opc = ISD::INTRINSIC_W_CHAIN;
1093 Info.memVT = MVT::i32;
1094 Info.ptrVal = I.getArgOperand(i: 0);
1095 Info.offset = 0;
1096 Info.align = Align(4);
1097 Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
1098 Infos.push_back(Elt: Info);
1099 return;
1100 case Intrinsic::wasm_memory_atomic_wait64:
1101 Info.opc = ISD::INTRINSIC_W_CHAIN;
1102 Info.memVT = MVT::i64;
1103 Info.ptrVal = I.getArgOperand(i: 0);
1104 Info.offset = 0;
1105 Info.align = Align(8);
1106 Info.flags = MachineMemOperand::MOVolatile | MachineMemOperand::MOLoad;
1107 Infos.push_back(Elt: Info);
1108 return;
1109 case Intrinsic::wasm_loadf16_f32:
1110 Info.opc = ISD::INTRINSIC_W_CHAIN;
1111 Info.memVT = MVT::f16;
1112 Info.ptrVal = I.getArgOperand(i: 0);
1113 Info.offset = 0;
1114 Info.align = Align(2);
1115 Info.flags = MachineMemOperand::MOLoad;
1116 Infos.push_back(Elt: Info);
1117 return;
1118 case Intrinsic::wasm_storef16_f32:
1119 Info.opc = ISD::INTRINSIC_VOID;
1120 Info.memVT = MVT::f16;
1121 Info.ptrVal = I.getArgOperand(i: 1);
1122 Info.offset = 0;
1123 Info.align = Align(2);
1124 Info.flags = MachineMemOperand::MOStore;
1125 Infos.push_back(Elt: Info);
1126 return;
1127 default:
1128 return;
1129 }
1130}
1131
1132void WebAssemblyTargetLowering::computeKnownBitsForTargetNode(
1133 const SDValue Op, KnownBits &Known, const APInt &DemandedElts,
1134 const SelectionDAG &DAG, unsigned Depth) const {
1135 switch (Op.getOpcode()) {
1136 default:
1137 break;
1138 case ISD::INTRINSIC_WO_CHAIN: {
1139 unsigned IntNo = Op.getConstantOperandVal(i: 0);
1140 switch (IntNo) {
1141 default:
1142 break;
1143 case Intrinsic::wasm_bitmask: {
1144 unsigned BitWidth = Known.getBitWidth();
1145 EVT VT = Op.getOperand(i: 1).getSimpleValueType();
1146 unsigned PossibleBits = VT.getVectorNumElements();
1147 APInt ZeroMask = APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: BitWidth - PossibleBits);
1148 Known.Zero |= ZeroMask;
1149 break;
1150 }
1151 }
1152 break;
1153 }
1154 case WebAssemblyISD::EXTEND_LOW_U:
1155 case WebAssemblyISD::EXTEND_HIGH_U: {
1156 // We know the high half, of each destination vector element, will be zero.
1157 SDValue SrcOp = Op.getOperand(i: 0);
1158 EVT VT = SrcOp.getSimpleValueType();
1159 unsigned BitWidth = Known.getBitWidth();
1160 if (VT == MVT::v8i8 || VT == MVT::v16i8) {
1161 assert(BitWidth >= 8 && "Unexpected width!");
1162 APInt Mask = APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: BitWidth - 8);
1163 Known.Zero |= Mask;
1164 } else if (VT == MVT::v4i16 || VT == MVT::v8i16) {
1165 assert(BitWidth >= 16 && "Unexpected width!");
1166 APInt Mask = APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: BitWidth - 16);
1167 Known.Zero |= Mask;
1168 } else if (VT == MVT::v2i32 || VT == MVT::v4i32) {
1169 assert(BitWidth >= 32 && "Unexpected width!");
1170 APInt Mask = APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: BitWidth - 32);
1171 Known.Zero |= Mask;
1172 }
1173 break;
1174 }
1175 // For 128-bit addition if the upper bits are all zero then it's known that
1176 // the upper bits of the result will have all bits guaranteed zero except the
1177 // first.
1178 case WebAssemblyISD::I64_ADD128:
1179 if (Op.getResNo() == 1) {
1180 SDValue LHS_HI = Op.getOperand(i: 1);
1181 SDValue RHS_HI = Op.getOperand(i: 3);
1182 if (isNullConstant(V: LHS_HI) && isNullConstant(V: RHS_HI))
1183 Known.Zero.setBitsFrom(1);
1184 }
1185 break;
1186 }
1187}
1188
1189TargetLoweringBase::LegalizeTypeAction
1190WebAssemblyTargetLowering::getPreferredVectorAction(MVT VT) const {
1191 if (VT.isFixedLengthVector()) {
1192 MVT EltVT = VT.getVectorElementType();
1193 // We have legal vector types with these lane types, so widening the
1194 // vector would let us use some of the lanes directly without having to
1195 // extend or truncate values.
1196 if (EltVT == MVT::i8 || EltVT == MVT::i16 || EltVT == MVT::i32 ||
1197 EltVT == MVT::i64 || EltVT == MVT::f32 || EltVT == MVT::f64)
1198 return TypeWidenVector;
1199 }
1200
1201 return TargetLoweringBase::getPreferredVectorAction(VT);
1202}
1203
1204bool WebAssemblyTargetLowering::isFMAFasterThanFMulAndFAdd(
1205 const MachineFunction &MF, EVT VT) const {
1206 if (!Subtarget->hasFP16() || !VT.isVector())
1207 return false;
1208
1209 EVT ScalarVT = VT.getScalarType();
1210 if (!ScalarVT.isSimple())
1211 return false;
1212
1213 return ScalarVT.getSimpleVT().SimpleTy == MVT::f16;
1214}
1215
1216bool WebAssemblyTargetLowering::shouldSimplifyDemandedVectorElts(
1217 SDValue Op, const TargetLoweringOpt &TLO) const {
1218 // ISel process runs DAGCombiner after legalization; this step is called
1219 // SelectionDAG optimization phase. This post-legalization combining process
1220 // runs DAGCombiner on each node, and if there was a change to be made,
1221 // re-runs legalization again on it and its user nodes to make sure
1222 // everythiing is in a legalized state.
1223 //
1224 // The legalization calls lowering routines, and we do our custom lowering for
1225 // build_vectors (LowerBUILD_VECTOR), which converts undef vector elements
1226 // into zeros. But there is a set of routines in DAGCombiner that turns unused
1227 // (= not demanded) nodes into undef, among which SimplifyDemandedVectorElts
1228 // turns unused vector elements into undefs. But this routine does not work
1229 // with our custom LowerBUILD_VECTOR, which turns undefs into zeros. This
1230 // combination can result in a infinite loop, in which undefs are converted to
1231 // zeros in legalization and back to undefs in combining.
1232 //
1233 // So after DAG is legalized, we prevent SimplifyDemandedVectorElts from
1234 // running for build_vectors.
1235 if (Op.getOpcode() == ISD::BUILD_VECTOR && TLO.LegalOps && TLO.LegalTys)
1236 return false;
1237 return true;
1238}
1239
1240//===----------------------------------------------------------------------===//
1241// WebAssembly Lowering private implementation.
1242//===----------------------------------------------------------------------===//
1243
1244//===----------------------------------------------------------------------===//
1245// Lowering Code
1246//===----------------------------------------------------------------------===//
1247
1248static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) {
1249 MachineFunction &MF = DAG.getMachineFunction();
1250 DAG.getContext()->diagnose(
1251 DI: DiagnosticInfoUnsupported(MF.getFunction(), Msg, DL.getDebugLoc()));
1252}
1253
1254// Test whether the given calling convention is supported.
1255static bool callingConvSupported(CallingConv::ID CallConv) {
1256 // We currently support the language-independent target-independent
1257 // conventions. We don't yet have a way to annotate calls with properties like
1258 // "cold", and we don't have any call-clobbered registers, so these are mostly
1259 // all handled the same.
1260 return CallConv == CallingConv::C || CallConv == CallingConv::Fast ||
1261 CallConv == CallingConv::Cold ||
1262 CallConv == CallingConv::PreserveMost ||
1263 CallConv == CallingConv::PreserveAll ||
1264 CallConv == CallingConv::CXX_FAST_TLS ||
1265 CallConv == CallingConv::WASM_EmscriptenInvoke ||
1266 CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail;
1267}
1268
1269SDValue
1270WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
1271 SmallVectorImpl<SDValue> &InVals) const {
1272 SelectionDAG &DAG = CLI.DAG;
1273 SDLoc DL = CLI.DL;
1274 SDValue Chain = CLI.Chain;
1275 SDValue Callee = CLI.Callee;
1276 MachineFunction &MF = DAG.getMachineFunction();
1277 auto Layout = MF.getDataLayout();
1278
1279 // A call through a funcref is expressed in IR as a call through the pointer
1280 // produced by the llvm.wasm.funcref.to_ptr intrinsic. Detect this here and
1281 // recover the underlying funcref value so the call can be lowered to a
1282 // table.set + call_indirect through the dedicated __funcref_call_table.
1283 bool IsFuncrefCall = false;
1284 if (Callee.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
1285 Callee.getConstantOperandVal(i: 0) == Intrinsic::wasm_funcref_to_ptr) {
1286 Callee = Callee.getOperand(i: 1);
1287 IsFuncrefCall = true;
1288 }
1289
1290 CallingConv::ID CallConv = CLI.CallConv;
1291 if (!callingConvSupported(CallConv))
1292 fail(DL, DAG,
1293 Msg: "WebAssembly doesn't support language-specific or target-specific "
1294 "calling conventions yet");
1295 if (CLI.IsPatchPoint)
1296 fail(DL, DAG, Msg: "WebAssembly doesn't support patch point yet");
1297
1298 if (CLI.IsTailCall) {
1299 auto NoTail = [&](const char *Msg) {
1300 if (CLI.CB && CLI.CB->isMustTailCall())
1301 fail(DL, DAG, Msg);
1302 CLI.IsTailCall = false;
1303 };
1304
1305 if (!Subtarget->hasTailCall())
1306 NoTail("WebAssembly 'tail-call' feature not enabled");
1307
1308 // Varargs calls cannot be tail calls because the buffer is on the stack
1309 if (CLI.IsVarArg)
1310 NoTail("WebAssembly does not support varargs tail calls");
1311
1312 // Do not tail call unless caller and callee return types match
1313 const Function &F = MF.getFunction();
1314 const TargetMachine &TM = getTargetMachine();
1315 Type *RetTy = F.getReturnType();
1316 SmallVector<MVT, 4> CallerRetTys;
1317 SmallVector<MVT, 4> CalleeRetTys;
1318 computeLegalValueVTs(F, TM, Ty: RetTy, ValueVTs&: CallerRetTys);
1319 computeLegalValueVTs(F, TM, Ty: CLI.RetTy, ValueVTs&: CalleeRetTys);
1320 bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() &&
1321 std::equal(first1: CallerRetTys.begin(), last1: CallerRetTys.end(),
1322 first2: CalleeRetTys.begin());
1323 if (!TypesMatch)
1324 NoTail("WebAssembly tail call requires caller and callee return types to "
1325 "match");
1326
1327 // If pointers to local stack values are passed, we cannot tail call
1328 if (CLI.CB) {
1329 for (auto &Arg : CLI.CB->args()) {
1330 Value *Val = Arg.get();
1331 // Trace the value back through pointer operations
1332 while (true) {
1333 Value *Src = Val->stripPointerCastsAndAliases();
1334 if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: Src))
1335 Src = GEP->getPointerOperand();
1336 if (Val == Src)
1337 break;
1338 Val = Src;
1339 }
1340 if (isa<AllocaInst>(Val)) {
1341 NoTail(
1342 "WebAssembly does not support tail calling with stack arguments");
1343 break;
1344 }
1345 }
1346 }
1347 }
1348
1349 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1350 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1351 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1352
1353 // The generic code may have added an sret argument. If we're lowering an
1354 // invoke function, the ABI requires that the function pointer be the first
1355 // argument, so we may have to swap the arguments.
1356 if (CallConv == CallingConv::WASM_EmscriptenInvoke && Outs.size() >= 2 &&
1357 Outs[0].Flags.isSRet()) {
1358 std::swap(a&: Outs[0], b&: Outs[1]);
1359 std::swap(a&: OutVals[0], b&: OutVals[1]);
1360 }
1361
1362 bool HasSwiftSelfArg = false;
1363 bool HasSwiftErrorArg = false;
1364 bool HasSwiftAsyncArg = false;
1365 unsigned NumFixedArgs = 0;
1366 for (unsigned I = 0; I < Outs.size(); ++I) {
1367 const ISD::OutputArg &Out = Outs[I];
1368 SDValue &OutVal = OutVals[I];
1369 HasSwiftSelfArg |= Out.Flags.isSwiftSelf();
1370 HasSwiftErrorArg |= Out.Flags.isSwiftError();
1371 HasSwiftAsyncArg |= Out.Flags.isSwiftAsync();
1372 if (Out.Flags.isNest())
1373 fail(DL, DAG, Msg: "WebAssembly hasn't implemented nest arguments");
1374 if (Out.Flags.isInAlloca())
1375 fail(DL, DAG, Msg: "WebAssembly hasn't implemented inalloca arguments");
1376 if (Out.Flags.isInConsecutiveRegs())
1377 fail(DL, DAG, Msg: "WebAssembly hasn't implemented cons regs arguments");
1378 if (Out.Flags.isInConsecutiveRegsLast())
1379 fail(DL, DAG, Msg: "WebAssembly hasn't implemented cons regs last arguments");
1380 if (Out.Flags.isByVal() && Out.Flags.getByValSize() != 0) {
1381 auto &MFI = MF.getFrameInfo();
1382 int FI = MFI.CreateStackObject(Size: Out.Flags.getByValSize(),
1383 Alignment: Out.Flags.getNonZeroByValAlign(),
1384 /*isSS=*/isSpillSlot: false);
1385 SDValue SizeNode =
1386 DAG.getConstant(Val: Out.Flags.getByValSize(), DL, VT: MVT::i32);
1387 SDValue FINode = DAG.getFrameIndex(FI, VT: getPointerTy(DL: Layout));
1388 Align Alignment = Out.Flags.getNonZeroByValAlign();
1389 Chain = DAG.getMemcpy(Chain, dl: DL, Dst: FINode, Src: OutVal, Size: SizeNode, DstAlign: Alignment,
1390 SrcAlign: Alignment,
1391 /*isVolatile*/ isVol: false, /*AlwaysInline=*/false,
1392 /*CI=*/nullptr, OverrideTailCall: std::nullopt, DstPtrInfo: MachinePointerInfo(),
1393 SrcPtrInfo: MachinePointerInfo());
1394 OutVal = FINode;
1395 }
1396 // Count the number of fixed args *after* legalization.
1397 NumFixedArgs += !Out.Flags.isVarArg();
1398 }
1399
1400 bool IsVarArg = CLI.IsVarArg;
1401 auto PtrVT = getPointerTy(DL: Layout);
1402
1403 // For swiftcc and swifttailcc, emit additional swiftself, swifterror, and
1404 // (for swifttailcc) swiftasync arguments if there aren't. These additional
1405 // arguments are also added for callee signature. They are necessary to match
1406 // callee and caller signature for indirect call.
1407 if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail) {
1408 Type *PtrTy = PointerType::getUnqual(C&: *DAG.getContext());
1409 if (!HasSwiftSelfArg) {
1410 NumFixedArgs++;
1411 ISD::ArgFlagsTy Flags;
1412 Flags.setSwiftSelf();
1413 ISD::OutputArg Arg(Flags, PtrVT, EVT(PtrVT), PtrTy, 0, 0);
1414 CLI.Outs.push_back(Elt: Arg);
1415 SDValue ArgVal = DAG.getUNDEF(VT: PtrVT);
1416 CLI.OutVals.push_back(Elt: ArgVal);
1417 }
1418 if (!HasSwiftErrorArg) {
1419 NumFixedArgs++;
1420 ISD::ArgFlagsTy Flags;
1421 Flags.setSwiftError();
1422 ISD::OutputArg Arg(Flags, PtrVT, EVT(PtrVT), PtrTy, 0, 0);
1423 CLI.Outs.push_back(Elt: Arg);
1424 SDValue ArgVal = DAG.getUNDEF(VT: PtrVT);
1425 CLI.OutVals.push_back(Elt: ArgVal);
1426 }
1427 if (CallConv == CallingConv::SwiftTail && !HasSwiftAsyncArg) {
1428 NumFixedArgs++;
1429 ISD::ArgFlagsTy Flags;
1430 Flags.setSwiftAsync();
1431 ISD::OutputArg Arg(Flags, PtrVT, EVT(PtrVT), PtrTy, 0, 0);
1432 CLI.Outs.push_back(Elt: Arg);
1433 SDValue ArgVal = DAG.getUNDEF(VT: PtrVT);
1434 CLI.OutVals.push_back(Elt: ArgVal);
1435 }
1436 }
1437
1438 // Analyze operands of the call, assigning locations to each operand.
1439 SmallVector<CCValAssign, 16> ArgLocs;
1440 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1441
1442 if (IsVarArg) {
1443 // Outgoing non-fixed arguments are placed in a buffer. First
1444 // compute their offsets and the total amount of buffer space needed.
1445 for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) {
1446 const ISD::OutputArg &Out = Outs[I];
1447 SDValue &Arg = OutVals[I];
1448 EVT VT = Arg.getValueType();
1449 assert(VT != MVT::iPTR && "Legalized args should be concrete");
1450 Type *Ty = VT.getTypeForEVT(Context&: *DAG.getContext());
1451 Align Alignment =
1452 std::max(a: Out.Flags.getNonZeroOrigAlign(), b: Layout.getABITypeAlign(Ty));
1453 unsigned Offset =
1454 CCInfo.AllocateStack(Size: Layout.getTypeAllocSize(Ty), Alignment);
1455 CCInfo.addLoc(V: CCValAssign::getMem(ValNo: ArgLocs.size(), ValVT: VT.getSimpleVT(),
1456 Offset, LocVT: VT.getSimpleVT(),
1457 HTP: CCValAssign::Full));
1458 }
1459 }
1460
1461 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
1462
1463 SDValue FINode;
1464 if (IsVarArg && NumBytes) {
1465 // For non-fixed arguments, next emit stores to store the argument values
1466 // to the stack buffer at the offsets computed above.
1467 MaybeAlign StackAlign = Layout.getStackAlignment();
1468 assert(StackAlign && "data layout string is missing stack alignment");
1469 int FI = MF.getFrameInfo().CreateStackObject(Size: NumBytes, Alignment: *StackAlign,
1470 /*isSS=*/isSpillSlot: false);
1471 unsigned ValNo = 0;
1472 SmallVector<SDValue, 8> Chains;
1473 for (SDValue Arg : drop_begin(RangeOrContainer&: OutVals, N: NumFixedArgs)) {
1474 assert(ArgLocs[ValNo].getValNo() == ValNo &&
1475 "ArgLocs should remain in order and only hold varargs args");
1476 unsigned Offset = ArgLocs[ValNo++].getLocMemOffset();
1477 FINode = DAG.getFrameIndex(FI, VT: getPointerTy(DL: Layout));
1478 SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: FINode,
1479 N2: DAG.getConstant(Val: Offset, DL, VT: PtrVT));
1480 Chains.push_back(
1481 Elt: DAG.getStore(Chain, dl: DL, Val: Arg, Ptr: Add,
1482 PtrInfo: MachinePointerInfo::getFixedStack(MF, FI, Offset)));
1483 }
1484 if (!Chains.empty())
1485 Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: Chains);
1486 } else if (IsVarArg) {
1487 FINode = DAG.getIntPtrConstant(Val: 0, DL);
1488 }
1489
1490 if (Callee->getOpcode() == ISD::GlobalAddress) {
1491 // If the callee is a GlobalAddress node (quite common, every direct call
1492 // is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress
1493 // doesn't at MO_GOT which is not needed for direct calls.
1494 GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Val&: Callee);
1495 Callee = DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL,
1496 VT: getPointerTy(DL: DAG.getDataLayout()),
1497 offset: GA->getOffset());
1498 Callee = DAG.getNode(Opcode: WebAssemblyISD::Wrapper, DL,
1499 VT: getPointerTy(DL: DAG.getDataLayout()), Operand: Callee);
1500 }
1501
1502 // Compute the operands for the CALLn node.
1503 SmallVector<SDValue, 16> Ops;
1504 Ops.push_back(Elt: Chain);
1505 Ops.push_back(Elt: Callee);
1506
1507 // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs
1508 // isn't reliable.
1509 Ops.append(in_start: OutVals.begin(),
1510 in_end: IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end());
1511 // Add a pointer to the vararg buffer.
1512 if (IsVarArg)
1513 Ops.push_back(Elt: FINode);
1514
1515 SmallVector<EVT, 8> InTys;
1516 for (const auto &In : Ins) {
1517 assert(!In.Flags.isByVal() && "byval is not valid for return values");
1518 assert(!In.Flags.isNest() && "nest is not valid for return values");
1519 if (In.Flags.isInAlloca())
1520 fail(DL, DAG, Msg: "WebAssembly hasn't implemented inalloca return values");
1521 if (In.Flags.isInConsecutiveRegs())
1522 fail(DL, DAG, Msg: "WebAssembly hasn't implemented cons regs return values");
1523 if (In.Flags.isInConsecutiveRegsLast())
1524 fail(DL, DAG,
1525 Msg: "WebAssembly hasn't implemented cons regs last return values");
1526 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1527 // registers.
1528 InTys.push_back(Elt: In.VT);
1529 }
1530
1531 // Lastly, if this is a call to a funcref we need to add an instruction
1532 // table.set to the chain and transform the call.
1533 if (IsFuncrefCall) {
1534 // In the absence of function references proposal where a funcref call is
1535 // lowered to call_ref, using reference types we generate a table.set to set
1536 // the funcref to a special table used solely for this purpose, followed by
1537 // a call_indirect. Here we just generate the table set, and return the
1538 // SDValue of the table.set so that LowerCall can finalize the lowering by
1539 // generating the call_indirect.
1540 SDValue Chain = Ops[0];
1541
1542 MCSymbolWasm *Table = WebAssembly::getOrCreateFuncrefCallTableSymbol(
1543 Ctx&: MF.getContext(), Subtarget);
1544 SDValue Sym = DAG.getMCSymbol(Sym: Table, VT: PtrVT);
1545 SDValue TableSlot = DAG.getConstant(Val: 0, DL, VT: MVT::i32);
1546 SDValue TableSetOps[] = {Chain, Sym, TableSlot, Callee};
1547 SDValue TableSet = DAG.getMemIntrinsicNode(
1548 Opcode: WebAssemblyISD::TABLE_SET, dl: DL, VTList: DAG.getVTList(VT: MVT::Other), Ops: TableSetOps,
1549 MemVT: MVT::funcref, PtrInfo: MachinePointerInfo(), Alignment: Align(1),
1550 Flags: MachineMemOperand::MOStore);
1551
1552 Ops[0] = TableSet; // The new chain is the TableSet itself
1553 }
1554
1555 if (CLI.IsTailCall) {
1556 // ret_calls do not return values to the current frame
1557 SDVTList NodeTys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue);
1558 return DAG.getNode(Opcode: WebAssemblyISD::RET_CALL, DL, VTList: NodeTys, Ops);
1559 }
1560
1561 InTys.push_back(Elt: MVT::Other);
1562 SDVTList InTyList = DAG.getVTList(VTs: InTys);
1563 SDValue Res = DAG.getNode(Opcode: WebAssemblyISD::CALL, DL, VTList: InTyList, Ops);
1564
1565 for (size_t I = 0; I < Ins.size(); ++I)
1566 InVals.push_back(Elt: Res.getValue(R: I));
1567
1568 // Return the chain
1569 return Res.getValue(R: Ins.size());
1570}
1571
1572bool WebAssemblyTargetLowering::CanLowerReturn(
1573 CallingConv::ID /*CallConv*/, MachineFunction & /*MF*/, bool /*IsVarArg*/,
1574 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext & /*Context*/,
1575 const Type *RetTy) const {
1576 // WebAssembly can only handle returning tuples with multivalue enabled
1577 return WebAssembly::canLowerReturn(ResultSize: Outs.size(), Subtarget);
1578}
1579
1580SDValue WebAssemblyTargetLowering::LowerReturn(
1581 SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/,
1582 const SmallVectorImpl<ISD::OutputArg> &Outs,
1583 const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
1584 SelectionDAG &DAG) const {
1585 assert(WebAssembly::canLowerReturn(Outs.size(), Subtarget) &&
1586 "MVP WebAssembly can only return up to one value");
1587 if (!callingConvSupported(CallConv))
1588 fail(DL, DAG, Msg: "WebAssembly doesn't support non-C calling conventions");
1589
1590 SmallVector<SDValue, 4> RetOps(1, Chain);
1591 RetOps.append(in_start: OutVals.begin(), in_end: OutVals.end());
1592 Chain = DAG.getNode(Opcode: WebAssemblyISD::RETURN, DL, VT: MVT::Other, Ops: RetOps);
1593
1594 // Record the number and types of the return values.
1595 for (const ISD::OutputArg &Out : Outs) {
1596 assert(!Out.Flags.isByVal() && "byval is not valid for return values");
1597 assert(!Out.Flags.isNest() && "nest is not valid for return values");
1598 assert(!Out.Flags.isVarArg() && "non-fixed return value is not valid");
1599 if (Out.Flags.isInAlloca())
1600 fail(DL, DAG, Msg: "WebAssembly hasn't implemented inalloca results");
1601 if (Out.Flags.isInConsecutiveRegs())
1602 fail(DL, DAG, Msg: "WebAssembly hasn't implemented cons regs results");
1603 if (Out.Flags.isInConsecutiveRegsLast())
1604 fail(DL, DAG, Msg: "WebAssembly hasn't implemented cons regs last results");
1605 }
1606
1607 return Chain;
1608}
1609
1610SDValue WebAssemblyTargetLowering::LowerFormalArguments(
1611 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1612 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1613 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1614 if (!callingConvSupported(CallConv))
1615 fail(DL, DAG, Msg: "WebAssembly doesn't support non-C calling conventions");
1616
1617 MachineFunction &MF = DAG.getMachineFunction();
1618 auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>();
1619
1620 // Set up the incoming ARGUMENTS value, which serves to represent the liveness
1621 // of the incoming values before they're represented by virtual registers.
1622 MF.getRegInfo().addLiveIn(Reg: WebAssembly::ARGUMENTS);
1623
1624 bool HasSwiftErrorArg = false;
1625 bool HasSwiftSelfArg = false;
1626 bool HasSwiftAsyncArg = false;
1627 for (const ISD::InputArg &In : Ins) {
1628 HasSwiftSelfArg |= In.Flags.isSwiftSelf();
1629 HasSwiftErrorArg |= In.Flags.isSwiftError();
1630 HasSwiftAsyncArg |= In.Flags.isSwiftAsync();
1631 if (In.Flags.isInAlloca())
1632 fail(DL, DAG, Msg: "WebAssembly hasn't implemented inalloca arguments");
1633 if (In.Flags.isNest())
1634 fail(DL, DAG, Msg: "WebAssembly hasn't implemented nest arguments");
1635 if (In.Flags.isInConsecutiveRegs())
1636 fail(DL, DAG, Msg: "WebAssembly hasn't implemented cons regs arguments");
1637 if (In.Flags.isInConsecutiveRegsLast())
1638 fail(DL, DAG, Msg: "WebAssembly hasn't implemented cons regs last arguments");
1639 // Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1640 // registers.
1641 InVals.push_back(Elt: In.Used ? DAG.getNode(Opcode: WebAssemblyISD::ARGUMENT, DL, VT: In.VT,
1642 Operand: DAG.getTargetConstant(Val: InVals.size(),
1643 DL, VT: MVT::i32))
1644 : DAG.getUNDEF(VT: In.VT));
1645
1646 // Record the number and types of arguments.
1647 MFI->addParam(VT: In.VT);
1648 }
1649
1650 // For swiftcc and swifttailcc, emit additional swiftself, swifterror, and
1651 // (for swifttailcc) swiftasync arguments if there aren't. These additional
1652 // arguments are also added for callee signature. They are necessary to match
1653 // callee and caller signature for indirect call.
1654 auto PtrVT = getPointerTy(DL: MF.getDataLayout());
1655 if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail) {
1656 if (!HasSwiftSelfArg) {
1657 MFI->addParam(VT: PtrVT);
1658 }
1659 if (!HasSwiftErrorArg) {
1660 MFI->addParam(VT: PtrVT);
1661 }
1662 if (CallConv == CallingConv::SwiftTail && !HasSwiftAsyncArg) {
1663 MFI->addParam(VT: PtrVT);
1664 }
1665 }
1666 // Varargs are copied into a buffer allocated by the caller, and a pointer to
1667 // the buffer is passed as an argument.
1668 if (IsVarArg) {
1669 MVT PtrVT = getPointerTy(DL: MF.getDataLayout());
1670 Register VarargVreg =
1671 MF.getRegInfo().createVirtualRegister(RegClass: getRegClassFor(VT: PtrVT));
1672 MFI->setVarargBufferVreg(VarargVreg);
1673 Chain = DAG.getCopyToReg(
1674 Chain, dl: DL, Reg: VarargVreg,
1675 N: DAG.getNode(Opcode: WebAssemblyISD::ARGUMENT, DL, VT: PtrVT,
1676 Operand: DAG.getTargetConstant(Val: Ins.size(), DL, VT: MVT::i32)));
1677 MFI->addParam(VT: PtrVT);
1678 }
1679
1680 // Record the number and types of arguments and results.
1681 SmallVector<MVT, 4> Params;
1682 SmallVector<MVT, 4> Results;
1683 computeSignatureVTs(Ty: MF.getFunction().getFunctionType(), TargetFunc: &MF.getFunction(),
1684 ContextFunc: MF.getFunction(), TM: DAG.getTarget(), Params, Results);
1685 for (MVT VT : Results)
1686 MFI->addResult(VT);
1687 // TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify
1688 // the param logic here with ComputeSignatureVTs
1689 assert(MFI->getParams().size() == Params.size() &&
1690 std::equal(MFI->getParams().begin(), MFI->getParams().end(),
1691 Params.begin()));
1692
1693 return Chain;
1694}
1695
1696void WebAssemblyTargetLowering::ReplaceNodeResults(
1697 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
1698 switch (N->getOpcode()) {
1699 case ISD::SIGN_EXTEND_INREG:
1700 // Do not add any results, signifying that N should not be custom lowered
1701 // after all. This happens because simd128 turns on custom lowering for
1702 // SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an
1703 // illegal type.
1704 break;
1705 case ISD::ANY_EXTEND_VECTOR_INREG:
1706 case ISD::SIGN_EXTEND_VECTOR_INREG:
1707 case ISD::ZERO_EXTEND_VECTOR_INREG:
1708 // Do not add any results, signifying that N should not be custom lowered.
1709 // EXTEND_VECTOR_INREG is implemented for some vectors, but not all.
1710 break;
1711 case ISD::FP_ROUND: {
1712 EVT VT = N->getValueType(ResNo: 0);
1713 SDValue Src = N->getOperand(Num: 0);
1714 if (VT == MVT::v4f16 && Src.getValueType() == MVT::v4f32) {
1715 Results.push_back(
1716 Elt: DAG.getNode(Opcode: WebAssemblyISD::DEMOTE_ZERO, DL: SDLoc(N), VT: MVT::v8f16, Operand: Src));
1717 }
1718 break;
1719 }
1720 case ISD::ADD:
1721 case ISD::SUB:
1722 Results.push_back(Elt: Replace128Op(N, DAG));
1723 break;
1724 default:
1725 llvm_unreachable(
1726 "ReplaceNodeResults not implemented for this op for WebAssembly!");
1727 }
1728}
1729
1730//===----------------------------------------------------------------------===//
1731// Custom lowering hooks.
1732//===----------------------------------------------------------------------===//
1733
1734SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
1735 SelectionDAG &DAG) const {
1736 SDLoc DL(Op);
1737 switch (Op.getOpcode()) {
1738 default:
1739 llvm_unreachable("unimplemented operation lowering");
1740 return SDValue();
1741 case ISD::FrameIndex:
1742 return LowerFrameIndex(Op, DAG);
1743 case ISD::GlobalAddress:
1744 return LowerGlobalAddress(Op, DAG);
1745 case ISD::GlobalTLSAddress:
1746 return LowerGlobalTLSAddress(Op, DAG);
1747 case ISD::ExternalSymbol:
1748 return LowerExternalSymbol(Op, DAG);
1749 case ISD::JumpTable:
1750 return LowerJumpTable(Op, DAG);
1751 case ISD::BR_JT:
1752 return LowerBR_JT(Op, DAG);
1753 case ISD::VASTART:
1754 return LowerVASTART(Op, DAG);
1755 case ISD::BlockAddress:
1756 case ISD::BRIND:
1757 fail(DL, DAG, Msg: "WebAssembly hasn't implemented computed gotos");
1758 return SDValue();
1759 case ISD::RETURNADDR:
1760 return LowerRETURNADDR(Op, DAG);
1761 case ISD::FRAMEADDR:
1762 return LowerFRAMEADDR(Op, DAG);
1763 case ISD::CopyToReg:
1764 return LowerCopyToReg(Op, DAG);
1765 case ISD::EXTRACT_VECTOR_ELT:
1766 case ISD::INSERT_VECTOR_ELT:
1767 return LowerAccessVectorElement(Op, DAG);
1768 case ISD::INTRINSIC_VOID:
1769 case ISD::INTRINSIC_WO_CHAIN:
1770 case ISD::INTRINSIC_W_CHAIN:
1771 return LowerIntrinsic(Op, DAG);
1772 case ISD::SIGN_EXTEND_INREG:
1773 return LowerSIGN_EXTEND_INREG(Op, DAG);
1774 case ISD::ZERO_EXTEND_VECTOR_INREG:
1775 case ISD::SIGN_EXTEND_VECTOR_INREG:
1776 case ISD::ANY_EXTEND_VECTOR_INREG:
1777 return LowerEXTEND_VECTOR_INREG(Op, DAG);
1778 case ISD::BUILD_VECTOR:
1779 return LowerBUILD_VECTOR(Op, DAG);
1780 case ISD::VECTOR_SHUFFLE:
1781 return LowerVECTOR_SHUFFLE(Op, DAG);
1782 case ISD::SETCC:
1783 return LowerSETCC(Op, DAG);
1784 case ISD::SHL:
1785 case ISD::SRA:
1786 case ISD::SRL:
1787 return LowerShift(Op, DAG);
1788 case ISD::FP_TO_SINT_SAT:
1789 case ISD::FP_TO_UINT_SAT:
1790 return LowerFP_TO_INT_SAT(Op, DAG);
1791 case ISD::FMINNUM:
1792 case ISD::FMINIMUMNUM:
1793 return LowerFMIN(Op, DAG);
1794 case ISD::FMAXNUM:
1795 case ISD::FMAXIMUMNUM:
1796 return LowerFMAX(Op, DAG);
1797 case ISD::LOAD:
1798 return LowerLoad(Op, DAG);
1799 case ISD::STORE:
1800 return LowerStore(Op, DAG);
1801 case ISD::CTPOP:
1802 case ISD::CTLZ:
1803 case ISD::CTTZ:
1804 return DAG.UnrollVectorOp(N: Op.getNode());
1805 case ISD::CLEAR_CACHE:
1806 report_fatal_error(reason: "llvm.clear_cache is not supported on wasm");
1807 case ISD::SMUL_LOHI:
1808 case ISD::UMUL_LOHI:
1809 return LowerMUL_LOHI(Op, DAG);
1810 case ISD::UADDO:
1811 return LowerUADDO(Op, DAG);
1812 }
1813}
1814
1815static bool IsWebAssemblyGlobal(SDValue Op) {
1816 if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val&: Op))
1817 return WebAssembly::isWasmVarAddressSpace(AS: GA->getAddressSpace());
1818
1819 return false;
1820}
1821
1822static std::optional<unsigned> IsWebAssemblyLocal(SDValue Op,
1823 SelectionDAG &DAG) {
1824 const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val&: Op);
1825 if (!FI)
1826 return std::nullopt;
1827
1828 auto &MF = DAG.getMachineFunction();
1829 return WebAssemblyFrameLowering::getLocalForStackObject(MF, FrameIndex: FI->getIndex());
1830}
1831
1832SDValue WebAssemblyTargetLowering::LowerStore(SDValue Op,
1833 SelectionDAG &DAG) const {
1834 SDLoc DL(Op);
1835 StoreSDNode *SN = cast<StoreSDNode>(Val: Op.getNode());
1836 const SDValue &Value = SN->getValue();
1837 const SDValue &Base = SN->getBasePtr();
1838 const SDValue &Offset = SN->getOffset();
1839
1840 if (IsWebAssemblyGlobal(Op: Base)) {
1841 if (!Offset->isUndef())
1842 report_fatal_error(reason: "unexpected offset when storing to webassembly global",
1843 gen_crash_diag: false);
1844
1845 SDVTList Tys = DAG.getVTList(VT: MVT::Other);
1846 SDValue Ops[] = {SN->getChain(), Value, Base};
1847 return DAG.getMemIntrinsicNode(Opcode: WebAssemblyISD::GLOBAL_SET, dl: DL, VTList: Tys, Ops,
1848 MemVT: SN->getMemoryVT(), MMO: SN->getMemOperand());
1849 }
1850
1851 if (std::optional<unsigned> Local = IsWebAssemblyLocal(Op: Base, DAG)) {
1852 if (!Offset->isUndef())
1853 report_fatal_error(reason: "unexpected offset when storing to webassembly local",
1854 gen_crash_diag: false);
1855
1856 SDValue Idx = DAG.getTargetConstant(Val: *Local, DL: Base, VT: MVT::i32);
1857 SDVTList Tys = DAG.getVTList(VT: MVT::Other); // The chain.
1858 SDValue Ops[] = {SN->getChain(), Idx, Value};
1859 return DAG.getNode(Opcode: WebAssemblyISD::LOCAL_SET, DL, VTList: Tys, Ops);
1860 }
1861
1862 if (WebAssembly::isWasmVarAddressSpace(AS: SN->getAddressSpace()))
1863 report_fatal_error(
1864 reason: "Encountered an unlowerable store to the wasm_var address space",
1865 gen_crash_diag: false);
1866
1867 return Op;
1868}
1869
1870SDValue WebAssemblyTargetLowering::LowerLoad(SDValue Op,
1871 SelectionDAG &DAG) const {
1872 SDLoc DL(Op);
1873 LoadSDNode *LN = cast<LoadSDNode>(Val: Op.getNode());
1874 const SDValue &Base = LN->getBasePtr();
1875 const SDValue &Offset = LN->getOffset();
1876
1877 if (IsWebAssemblyGlobal(Op: Base)) {
1878 if (!Offset->isUndef())
1879 report_fatal_error(
1880 reason: "unexpected offset when loading from webassembly global", gen_crash_diag: false);
1881
1882 SDVTList Tys = DAG.getVTList(VT1: LN->getValueType(ResNo: 0), VT2: MVT::Other);
1883 SDValue Ops[] = {LN->getChain(), Base};
1884 return DAG.getMemIntrinsicNode(Opcode: WebAssemblyISD::GLOBAL_GET, dl: DL, VTList: Tys, Ops,
1885 MemVT: LN->getMemoryVT(), MMO: LN->getMemOperand());
1886 }
1887
1888 if (std::optional<unsigned> Local = IsWebAssemblyLocal(Op: Base, DAG)) {
1889 if (!Offset->isUndef())
1890 report_fatal_error(
1891 reason: "unexpected offset when loading from webassembly local", gen_crash_diag: false);
1892
1893 SDValue Idx = DAG.getTargetConstant(Val: *Local, DL: Base, VT: MVT::i32);
1894 EVT LocalVT = LN->getValueType(ResNo: 0);
1895 return DAG.getNode(Opcode: WebAssemblyISD::LOCAL_GET, DL, ResultTys: {LocalVT, MVT::Other},
1896 Ops: {LN->getChain(), Idx});
1897 }
1898
1899 if (WebAssembly::isWasmVarAddressSpace(AS: LN->getAddressSpace()))
1900 report_fatal_error(
1901 reason: "Encountered an unlowerable load from the wasm_var address space",
1902 gen_crash_diag: false);
1903
1904 return Op;
1905}
1906
1907SDValue WebAssemblyTargetLowering::LowerMUL_LOHI(SDValue Op,
1908 SelectionDAG &DAG) const {
1909 assert(Subtarget->hasWideArithmetic());
1910 assert(Op.getValueType() == MVT::i64);
1911 SDLoc DL(Op);
1912 unsigned Opcode;
1913 switch (Op.getOpcode()) {
1914 case ISD::UMUL_LOHI:
1915 Opcode = WebAssemblyISD::I64_MUL_WIDE_U;
1916 break;
1917 case ISD::SMUL_LOHI:
1918 Opcode = WebAssemblyISD::I64_MUL_WIDE_S;
1919 break;
1920 default:
1921 llvm_unreachable("unexpected opcode");
1922 }
1923 SDValue LHS = Op.getOperand(i: 0);
1924 SDValue RHS = Op.getOperand(i: 1);
1925 SDValue Lo =
1926 DAG.getNode(Opcode, DL, VTList: DAG.getVTList(VT1: MVT::i64, VT2: MVT::i64), N1: LHS, N2: RHS);
1927 SDValue Hi(Lo.getNode(), 1);
1928 SDValue Ops[] = {Lo, Hi};
1929 return DAG.getMergeValues(Ops, dl: DL);
1930}
1931
1932// Lowers `UADDO` intrinsics to an `i64.add128` instruction when it's enabled.
1933//
1934// This enables generating a single wasm instruction for this operation where
1935// the upper half of both operands are constant zeros. The upper half of the
1936// result is then whether the overflow happened.
1937SDValue WebAssemblyTargetLowering::LowerUADDO(SDValue Op,
1938 SelectionDAG &DAG) const {
1939 assert(Subtarget->hasWideArithmetic());
1940 assert(Op.getValueType() == MVT::i64);
1941 assert(Op.getOpcode() == ISD::UADDO);
1942 SDLoc DL(Op);
1943 SDValue LHS = Op.getOperand(i: 0);
1944 SDValue RHS = Op.getOperand(i: 1);
1945 SDValue Zero = DAG.getConstant(Val: 0, DL, VT: MVT::i64);
1946 SDValue Result =
1947 DAG.getNode(Opcode: WebAssemblyISD::I64_ADD128, DL,
1948 VTList: DAG.getVTList(VT1: MVT::i64, VT2: MVT::i64), N1: LHS, N2: Zero, N3: RHS, N4: Zero);
1949 SDValue CarryI64(Result.getNode(), 1);
1950 SDValue CarryI32 = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: CarryI64);
1951 SDValue Ops[] = {Result, CarryI32};
1952 return DAG.getMergeValues(Ops, dl: DL);
1953}
1954
1955SDValue WebAssemblyTargetLowering::Replace128Op(SDNode *N,
1956 SelectionDAG &DAG) const {
1957 assert(Subtarget->hasWideArithmetic());
1958 assert(N->getValueType(0) == MVT::i128);
1959 SDLoc DL(N);
1960 unsigned Opcode;
1961 switch (N->getOpcode()) {
1962 case ISD::ADD:
1963 Opcode = WebAssemblyISD::I64_ADD128;
1964 break;
1965 case ISD::SUB:
1966 Opcode = WebAssemblyISD::I64_SUB128;
1967 break;
1968 default:
1969 llvm_unreachable("unexpected opcode");
1970 }
1971 SDValue LHS = N->getOperand(Num: 0);
1972 SDValue RHS = N->getOperand(Num: 1);
1973
1974 SDValue C0 = DAG.getConstant(Val: 0, DL, VT: MVT::i64);
1975 SDValue C1 = DAG.getConstant(Val: 1, DL, VT: MVT::i64);
1976 SDValue LHS_0 = DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL, VT: MVT::i64, N1: LHS, N2: C0);
1977 SDValue LHS_1 = DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL, VT: MVT::i64, N1: LHS, N2: C1);
1978 SDValue RHS_0 = DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL, VT: MVT::i64, N1: RHS, N2: C0);
1979 SDValue RHS_1 = DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL, VT: MVT::i64, N1: RHS, N2: C1);
1980 SDValue Result_LO = DAG.getNode(Opcode, DL, VTList: DAG.getVTList(VT1: MVT::i64, VT2: MVT::i64),
1981 N1: LHS_0, N2: LHS_1, N3: RHS_0, N4: RHS_1);
1982 SDValue Result_HI(Result_LO.getNode(), 1);
1983 return DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VTList: N->getVTList(), N1: Result_LO, N2: Result_HI);
1984}
1985
1986SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op,
1987 SelectionDAG &DAG) const {
1988 SDValue Src = Op.getOperand(i: 2);
1989 if (isa<FrameIndexSDNode>(Val: Src.getNode())) {
1990 // CopyToReg nodes don't support FrameIndex operands. Other targets select
1991 // the FI to some LEA-like instruction, but since we don't have that, we
1992 // need to insert some kind of instruction that can take an FI operand and
1993 // produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy
1994 // local.copy between Op and its FI operand.
1995 SDValue Chain = Op.getOperand(i: 0);
1996 SDLoc DL(Op);
1997 Register Reg = cast<RegisterSDNode>(Val: Op.getOperand(i: 1))->getReg();
1998 EVT VT = Src.getValueType();
1999 SDValue Copy(DAG.getMachineNode(Opcode: VT == MVT::i32 ? WebAssembly::COPY_I32
2000 : WebAssembly::COPY_I64,
2001 dl: DL, VT, Op1: Src),
2002 0);
2003 return Op.getNode()->getNumValues() == 1
2004 ? DAG.getCopyToReg(Chain, dl: DL, Reg, N: Copy)
2005 : DAG.getCopyToReg(Chain, dl: DL, Reg, N: Copy,
2006 Glue: Op.getNumOperands() == 4 ? Op.getOperand(i: 3)
2007 : SDValue());
2008 }
2009 return SDValue();
2010}
2011
2012SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op,
2013 SelectionDAG &DAG) const {
2014 int FI = cast<FrameIndexSDNode>(Val&: Op)->getIndex();
2015 return DAG.getTargetFrameIndex(FI, VT: Op.getValueType());
2016}
2017
2018SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op,
2019 SelectionDAG &DAG) const {
2020 SDLoc DL(Op);
2021
2022 if (!Subtarget->getTargetTriple().isOSEmscripten()) {
2023 fail(DL, DAG,
2024 Msg: "Non-Emscripten WebAssembly hasn't implemented "
2025 "__builtin_return_address");
2026 return SDValue();
2027 }
2028
2029 unsigned Depth = Op.getConstantOperandVal(i: 0);
2030 MakeLibCallOptions CallOptions;
2031 return makeLibCall(DAG, LC: RTLIB::RETURN_ADDRESS, RetVT: Op.getValueType(),
2032 Ops: {DAG.getConstant(Val: Depth, DL, VT: MVT::i32)}, CallOptions, dl: DL)
2033 .first;
2034}
2035
2036SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op,
2037 SelectionDAG &DAG) const {
2038 // Non-zero depths are not supported by WebAssembly currently. Use the
2039 // legalizer's default expansion, which is to return 0 (what this function is
2040 // documented to do).
2041 if (Op.getConstantOperandVal(i: 0) > 0)
2042 return SDValue();
2043
2044 DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true);
2045 EVT VT = Op.getValueType();
2046 Register FP =
2047 Subtarget->getRegisterInfo()->getFrameRegister(MF: DAG.getMachineFunction());
2048 return DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: SDLoc(Op), Reg: FP, VT);
2049}
2050
2051SDValue
2052WebAssemblyTargetLowering::LowerGlobalTLSAddress(SDValue Op,
2053 SelectionDAG &DAG) const {
2054 SDLoc DL(Op);
2055 const auto *GA = cast<GlobalAddressSDNode>(Val&: Op);
2056
2057 MachineFunction &MF = DAG.getMachineFunction();
2058 if (!MF.getSubtarget<WebAssemblySubtarget>().hasBulkMemory())
2059 report_fatal_error(reason: "cannot use thread-local storage without bulk memory",
2060 gen_crash_diag: false);
2061
2062 const GlobalValue *GV = GA->getGlobal();
2063
2064 // Currently only Emscripten supports dynamic linking with threads. Therefore,
2065 // on other targets, if we have thread-local storage, only the local-exec
2066 // model is possible.
2067 auto model = Subtarget->getTargetTriple().isOSEmscripten()
2068 ? GV->getThreadLocalMode()
2069 : GlobalValue::LocalExecTLSModel;
2070
2071 // Unsupported TLS modes
2072 assert(model != GlobalValue::NotThreadLocal);
2073 assert(model != GlobalValue::InitialExecTLSModel);
2074
2075 if (model == GlobalValue::LocalExecTLSModel ||
2076 model == GlobalValue::LocalDynamicTLSModel ||
2077 (model == GlobalValue::GeneralDynamicTLSModel &&
2078 getTargetMachine().shouldAssumeDSOLocal(GV))) {
2079 // For DSO-local TLS variables we use offset from __tls_base, or
2080 // __wasm_get_tls_base() if using libcall thread context.
2081
2082 MVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
2083 SDValue BaseAddr(WebAssembly::getTLSBase(DAG, DL, Subtarget), 0);
2084
2085 SDValue TLSOffset = DAG.getTargetGlobalAddress(
2086 GV, DL, VT: PtrVT, offset: GA->getOffset(), TargetFlags: WebAssemblyII::MO_TLS_BASE_REL);
2087 SDValue SymOffset =
2088 DAG.getNode(Opcode: WebAssemblyISD::WrapperREL, DL, VT: PtrVT, Operand: TLSOffset);
2089
2090 return DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: BaseAddr, N2: SymOffset);
2091 }
2092
2093 assert(model == GlobalValue::GeneralDynamicTLSModel);
2094
2095 EVT VT = Op.getValueType();
2096 return DAG.getNode(Opcode: WebAssemblyISD::Wrapper, DL, VT,
2097 Operand: DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL, VT,
2098 offset: GA->getOffset(),
2099 TargetFlags: WebAssemblyII::MO_GOT_TLS));
2100}
2101
2102SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
2103 SelectionDAG &DAG) const {
2104 SDLoc DL(Op);
2105 const auto *GA = cast<GlobalAddressSDNode>(Val&: Op);
2106 EVT VT = Op.getValueType();
2107 assert(GA->getTargetFlags() == 0 &&
2108 "Unexpected target flags on generic GlobalAddressSDNode");
2109 if (!WebAssembly::isValidAddressSpace(AS: GA->getAddressSpace()))
2110 fail(DL, DAG, Msg: "Invalid address space for WebAssembly target");
2111
2112 unsigned OperandFlags = 0;
2113 const GlobalValue *GV = GA->getGlobal();
2114 // Since WebAssembly tables cannot yet be shared accross modules, we don't
2115 // need special treatment for tables in PIC mode.
2116 if (isPositionIndependent() &&
2117 !WebAssembly::isWebAssemblyTableType(Ty: GV->getValueType())) {
2118 if (getTargetMachine().shouldAssumeDSOLocal(GV)) {
2119 MachineFunction &MF = DAG.getMachineFunction();
2120 MVT PtrVT = getPointerTy(DL: MF.getDataLayout());
2121 const char *BaseName;
2122 if (GV->getValueType()->isFunctionTy()) {
2123 BaseName = MF.createExternalSymbolName(Name: "__table_base");
2124 OperandFlags = WebAssemblyII::MO_TABLE_BASE_REL;
2125 } else {
2126 BaseName = MF.createExternalSymbolName(Name: "__memory_base");
2127 OperandFlags = WebAssemblyII::MO_MEMORY_BASE_REL;
2128 }
2129 SDValue BaseAddr =
2130 DAG.getNode(Opcode: WebAssemblyISD::Wrapper, DL, VT: PtrVT,
2131 Operand: DAG.getTargetExternalSymbol(Sym: BaseName, VT: PtrVT));
2132
2133 SDValue SymAddr = DAG.getNode(
2134 Opcode: WebAssemblyISD::WrapperREL, DL, VT,
2135 Operand: DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL, VT, offset: GA->getOffset(),
2136 TargetFlags: OperandFlags));
2137
2138 return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: BaseAddr, N2: SymAddr);
2139 }
2140 OperandFlags = WebAssemblyII::MO_GOT;
2141 }
2142
2143 return DAG.getNode(Opcode: WebAssemblyISD::Wrapper, DL, VT,
2144 Operand: DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL, VT,
2145 offset: GA->getOffset(), TargetFlags: OperandFlags));
2146}
2147
2148SDValue
2149WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op,
2150 SelectionDAG &DAG) const {
2151 SDLoc DL(Op);
2152 const auto *ES = cast<ExternalSymbolSDNode>(Val&: Op);
2153 EVT VT = Op.getValueType();
2154 assert(ES->getTargetFlags() == 0 &&
2155 "Unexpected target flags on generic ExternalSymbolSDNode");
2156 return DAG.getNode(Opcode: WebAssemblyISD::Wrapper, DL, VT,
2157 Operand: DAG.getTargetExternalSymbol(Sym: ES->getSymbol(), VT));
2158}
2159
2160SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
2161 SelectionDAG &DAG) const {
2162 // There's no need for a Wrapper node because we always incorporate a jump
2163 // table operand into a BR_TABLE instruction, rather than ever
2164 // materializing it in a register.
2165 const JumpTableSDNode *JT = cast<JumpTableSDNode>(Val&: Op);
2166 return DAG.getTargetJumpTable(JTI: JT->getIndex(), VT: Op.getValueType(),
2167 TargetFlags: JT->getTargetFlags());
2168}
2169
2170SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op,
2171 SelectionDAG &DAG) const {
2172 SDLoc DL(Op);
2173 SDValue Chain = Op.getOperand(i: 0);
2174 const auto *JT = cast<JumpTableSDNode>(Val: Op.getOperand(i: 1));
2175 SDValue Index = Op.getOperand(i: 2);
2176 assert(JT->getTargetFlags() == 0 && "WebAssembly doesn't set target flags");
2177
2178 SmallVector<SDValue, 8> Ops;
2179 Ops.push_back(Elt: Chain);
2180 Ops.push_back(Elt: Index);
2181
2182 MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo();
2183 const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs;
2184
2185 // Add an operand for each case.
2186 for (auto *MBB : MBBs)
2187 Ops.push_back(Elt: DAG.getBasicBlock(MBB));
2188
2189 // Add the first MBB as a dummy default target for now. This will be replaced
2190 // with the proper default target (and the preceding range check eliminated)
2191 // if possible by WebAssemblyFixBrTableDefaults.
2192 Ops.push_back(Elt: DAG.getBasicBlock(MBB: *MBBs.begin()));
2193 return DAG.getNode(Opcode: WebAssemblyISD::BR_TABLE, DL, VT: MVT::Other, Ops);
2194}
2195
2196SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op,
2197 SelectionDAG &DAG) const {
2198 SDLoc DL(Op);
2199 EVT PtrVT = getPointerTy(DL: DAG.getMachineFunction().getDataLayout());
2200
2201 auto *MFI = DAG.getMachineFunction().getInfo<WebAssemblyFunctionInfo>();
2202 const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: 2))->getValue();
2203
2204 SDValue ArgN = DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: DL,
2205 Reg: MFI->getVarargBufferVreg(), VT: PtrVT);
2206 return DAG.getStore(Chain: Op.getOperand(i: 0), dl: DL, Val: ArgN, Ptr: Op.getOperand(i: 1),
2207 PtrInfo: MachinePointerInfo(SV));
2208}
2209
2210SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
2211 SelectionDAG &DAG) const {
2212 MachineFunction &MF = DAG.getMachineFunction();
2213 unsigned IntNo;
2214 switch (Op.getOpcode()) {
2215 case ISD::INTRINSIC_VOID:
2216 case ISD::INTRINSIC_W_CHAIN:
2217 IntNo = Op.getConstantOperandVal(i: 1);
2218 break;
2219 case ISD::INTRINSIC_WO_CHAIN:
2220 IntNo = Op.getConstantOperandVal(i: 0);
2221 break;
2222 default:
2223 llvm_unreachable("Invalid intrinsic");
2224 }
2225 SDLoc DL(Op);
2226
2227 switch (IntNo) {
2228 default:
2229 return SDValue(); // Don't custom lower most intrinsics.
2230
2231 case Intrinsic::wasm_lsda: {
2232 auto PtrVT = getPointerTy(DL: MF.getDataLayout());
2233 const char *SymName = MF.createExternalSymbolName(
2234 Name: "GCC_except_table" + std::to_string(val: MF.getFunctionNumber()));
2235 if (isPositionIndependent()) {
2236 SDValue Node = DAG.getTargetExternalSymbol(
2237 Sym: SymName, VT: PtrVT, TargetFlags: WebAssemblyII::MO_MEMORY_BASE_REL);
2238 const char *BaseName = MF.createExternalSymbolName(Name: "__memory_base");
2239 SDValue BaseAddr =
2240 DAG.getNode(Opcode: WebAssemblyISD::Wrapper, DL, VT: PtrVT,
2241 Operand: DAG.getTargetExternalSymbol(Sym: BaseName, VT: PtrVT));
2242 SDValue SymAddr =
2243 DAG.getNode(Opcode: WebAssemblyISD::WrapperREL, DL, VT: PtrVT, Operand: Node);
2244 return DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: BaseAddr, N2: SymAddr);
2245 }
2246 SDValue Node = DAG.getTargetExternalSymbol(Sym: SymName, VT: PtrVT);
2247 return DAG.getNode(Opcode: WebAssemblyISD::Wrapper, DL, VT: PtrVT, Operand: Node);
2248 }
2249
2250 case Intrinsic::wasm_shuffle: {
2251 // Drop in-chain and replace undefs, but otherwise pass through unchanged
2252 SDValue Ops[18];
2253 size_t OpIdx = 0;
2254 Ops[OpIdx++] = Op.getOperand(i: 1);
2255 Ops[OpIdx++] = Op.getOperand(i: 2);
2256 while (OpIdx < 18) {
2257 const SDValue &MaskIdx = Op.getOperand(i: OpIdx + 1);
2258 if (MaskIdx.isUndef() || MaskIdx.getNode()->getAsZExtVal() >= 32) {
2259 bool isTarget = MaskIdx.getNode()->getOpcode() == ISD::TargetConstant;
2260 Ops[OpIdx++] = DAG.getConstant(Val: 0, DL, VT: MVT::i32, isTarget);
2261 } else {
2262 Ops[OpIdx++] = MaskIdx;
2263 }
2264 }
2265 return DAG.getNode(Opcode: WebAssemblyISD::SHUFFLE, DL, VT: Op.getValueType(), Ops);
2266 }
2267
2268 case Intrinsic::wasm_funcref_to_ptr: {
2269 // llvm.wasm.funcref.to_ptr only has a defined lowering when its result
2270 // feeds directly into an indirect call. Reaching here means the pointer
2271 // escapes a direct call. We haven't implemented conversion of a funcref
2272 // into a real function pointer so we crash if we get here.
2273 fail(DL, DAG,
2274 Msg: "a funcref can only be converted to a pointer to be directly called; "
2275 "the resulting pointer cannot otherwise be used");
2276 return DAG.getPOISON(VT: Op.getValueType());
2277 }
2278
2279 case Intrinsic::thread_pointer: {
2280 return SDValue(WebAssembly::getTLSBase(DAG, DL, Subtarget), 0);
2281 }
2282 }
2283}
2284
2285SDValue
2286WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
2287 SelectionDAG &DAG) const {
2288 SDLoc DL(Op);
2289 // If sign extension operations are disabled, allow sext_inreg only if operand
2290 // is a vector extract of an i8 or i16 lane. SIMD does not depend on sign
2291 // extension operations, but allowing sext_inreg in this context lets us have
2292 // simple patterns to select extract_lane_s instructions. Expanding sext_inreg
2293 // everywhere would be simpler in this file, but would necessitate large and
2294 // brittle patterns to undo the expansion and select extract_lane_s
2295 // instructions.
2296 assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128());
2297 if (Op.getOperand(i: 0).getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2298 return SDValue();
2299
2300 const SDValue &Extract = Op.getOperand(i: 0);
2301 MVT VecT = Extract.getOperand(i: 0).getSimpleValueType();
2302 if (VecT.getVectorElementType().getSizeInBits() > 32)
2303 return SDValue();
2304 MVT ExtractedLaneT =
2305 cast<VTSDNode>(Val: Op.getOperand(i: 1).getNode())->getVT().getSimpleVT();
2306 MVT ExtractedVecT =
2307 MVT::getVectorVT(VT: ExtractedLaneT, NumElements: 128 / ExtractedLaneT.getSizeInBits());
2308 if (ExtractedVecT == VecT)
2309 return Op;
2310
2311 // Bitcast vector to appropriate type to ensure ISel pattern coverage
2312 const SDNode *Index = Extract.getOperand(i: 1).getNode();
2313 if (!isa<ConstantSDNode>(Val: Index))
2314 return SDValue();
2315 unsigned IndexVal = Index->getAsZExtVal();
2316 unsigned Scale =
2317 ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements();
2318 assert(Scale > 1);
2319 SDValue NewIndex =
2320 DAG.getConstant(Val: IndexVal * Scale, DL, VT: Index->getValueType(ResNo: 0));
2321 SDValue NewExtract = DAG.getNode(
2322 Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: Extract.getValueType(),
2323 N1: DAG.getBitcast(VT: ExtractedVecT, V: Extract.getOperand(i: 0)), N2: NewIndex);
2324 return DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: Op.getValueType(), N1: NewExtract,
2325 N2: Op.getOperand(i: 1));
2326}
2327
2328static SDValue GetExtendHigh(SDValue Op, unsigned UserOpc, EVT VT,
2329 SelectionDAG &DAG) {
2330 SDValue Source = peekThroughBitcasts(V: Op);
2331 if (Source.getOpcode() != ISD::VECTOR_SHUFFLE)
2332 return SDValue();
2333
2334 assert((UserOpc == WebAssemblyISD::EXTEND_LOW_U ||
2335 UserOpc == WebAssemblyISD::EXTEND_LOW_S) &&
2336 "expected extend_low");
2337 auto *Shuffle = cast<ShuffleVectorSDNode>(Val: Source.getNode());
2338
2339 ArrayRef<int> Mask = Shuffle->getMask();
2340 // Look for a shuffle which moves from the high half to the low half.
2341 size_t FirstIdx = Mask.size() / 2;
2342 for (size_t i = 0; i < Mask.size() / 2; ++i) {
2343 if (Mask[i] != static_cast<int>(FirstIdx + i)) {
2344 return SDValue();
2345 }
2346 }
2347
2348 SDLoc DL(Op);
2349 unsigned Opc = UserOpc == WebAssemblyISD::EXTEND_LOW_S
2350 ? WebAssemblyISD::EXTEND_HIGH_S
2351 : WebAssemblyISD::EXTEND_HIGH_U;
2352 SDValue ShuffleSrc = Shuffle->getOperand(Num: 0);
2353 if (Op.getOpcode() == ISD::BITCAST)
2354 ShuffleSrc = DAG.getBitcast(VT: Op.getValueType(), V: ShuffleSrc);
2355
2356 return DAG.getNode(Opcode: Opc, DL, VT, Operand: ShuffleSrc);
2357}
2358
2359SDValue
2360WebAssemblyTargetLowering::LowerEXTEND_VECTOR_INREG(SDValue Op,
2361 SelectionDAG &DAG) const {
2362 SDLoc DL(Op);
2363 EVT VT = Op.getValueType();
2364 SDValue Src = Op.getOperand(i: 0);
2365 EVT SrcVT = Src.getValueType();
2366
2367 if (SrcVT.getVectorElementType() == MVT::i1 ||
2368 SrcVT.getVectorElementType() == MVT::i64)
2369 return SDValue();
2370
2371 assert(VT.getScalarSizeInBits() % SrcVT.getScalarSizeInBits() == 0 &&
2372 "Unexpected extension factor.");
2373 unsigned Scale = VT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits();
2374
2375 if (Scale != 2 && Scale != 4 && Scale != 8)
2376 return SDValue();
2377
2378 unsigned Ext;
2379 switch (Op.getOpcode()) {
2380 default:
2381 llvm_unreachable("unexpected opcode");
2382 case ISD::ANY_EXTEND_VECTOR_INREG:
2383 case ISD::ZERO_EXTEND_VECTOR_INREG:
2384 Ext = WebAssemblyISD::EXTEND_LOW_U;
2385 break;
2386 case ISD::SIGN_EXTEND_VECTOR_INREG:
2387 Ext = WebAssemblyISD::EXTEND_LOW_S;
2388 break;
2389 }
2390
2391 if (Scale == 2) {
2392 // See if we can use EXTEND_HIGH.
2393 if (auto ExtendHigh = GetExtendHigh(Op: Op.getOperand(i: 0), UserOpc: Ext, VT, DAG))
2394 return ExtendHigh;
2395 }
2396
2397 SDValue Ret = Src;
2398 while (Scale != 1) {
2399 Ret = DAG.getNode(Opcode: Ext, DL,
2400 VT: Ret.getValueType()
2401 .widenIntegerVectorElementType(Context&: *DAG.getContext())
2402 .getHalfNumVectorElementsVT(Context&: *DAG.getContext()),
2403 Operand: Ret);
2404 Scale /= 2;
2405 }
2406 assert(Ret.getValueType() == VT);
2407 return Ret;
2408}
2409
2410static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG) {
2411 SDLoc DL(Op);
2412 if (Op.getValueType() != MVT::v2f64 && Op.getValueType() != MVT::v4f32)
2413 return SDValue();
2414
2415 auto GetConvertedLane = [](SDValue Op, unsigned &Opcode, SDValue &SrcVec,
2416 unsigned &Index) -> bool {
2417 switch (Op.getOpcode()) {
2418 case ISD::SINT_TO_FP:
2419 Opcode = WebAssemblyISD::CONVERT_LOW_S;
2420 break;
2421 case ISD::UINT_TO_FP:
2422 Opcode = WebAssemblyISD::CONVERT_LOW_U;
2423 break;
2424 case ISD::FP_EXTEND:
2425 case ISD::FP16_TO_FP:
2426 Opcode = WebAssemblyISD::PROMOTE_LOW;
2427 break;
2428 default:
2429 return false;
2430 }
2431
2432 auto ExtractVector = Op.getOperand(i: 0);
2433 if (ExtractVector.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2434 return false;
2435
2436 if (!isa<ConstantSDNode>(Val: ExtractVector.getOperand(i: 1).getNode()))
2437 return false;
2438
2439 SrcVec = ExtractVector.getOperand(i: 0);
2440 Index = ExtractVector.getConstantOperandVal(i: 1);
2441 return true;
2442 };
2443
2444 unsigned NumLanes = Op.getValueType() == MVT::v2f64 ? 2 : 4;
2445 unsigned FirstOpcode = 0, SecondOpcode = 0, ThirdOpcode = 0, FourthOpcode = 0;
2446 unsigned FirstIndex = 0, SecondIndex = 0, ThirdIndex = 0, FourthIndex = 0;
2447 SDValue FirstSrcVec, SecondSrcVec, ThirdSrcVec, FourthSrcVec;
2448
2449 if (!GetConvertedLane(Op.getOperand(i: 0), FirstOpcode, FirstSrcVec,
2450 FirstIndex) ||
2451 !GetConvertedLane(Op.getOperand(i: 1), SecondOpcode, SecondSrcVec,
2452 SecondIndex))
2453 return SDValue();
2454
2455 // If we're converting to v4f32, check the third and fourth lanes, too.
2456 if (NumLanes == 4 && (!GetConvertedLane(Op.getOperand(i: 2), ThirdOpcode,
2457 ThirdSrcVec, ThirdIndex) ||
2458 !GetConvertedLane(Op.getOperand(i: 3), FourthOpcode,
2459 FourthSrcVec, FourthIndex)))
2460 return SDValue();
2461
2462 if (FirstOpcode != SecondOpcode)
2463 return SDValue();
2464
2465 // TODO Add an optimization similar to the v2f64 below for shuffling the
2466 // vectors when the lanes are in the wrong order or come from different src
2467 // vectors.
2468 if (NumLanes == 4 &&
2469 (FirstOpcode != ThirdOpcode || FirstOpcode != FourthOpcode ||
2470 FirstSrcVec != SecondSrcVec || FirstSrcVec != ThirdSrcVec ||
2471 FirstSrcVec != FourthSrcVec || FirstIndex != 0 || SecondIndex != 1 ||
2472 ThirdIndex != 2 || FourthIndex != 3))
2473 return SDValue();
2474
2475 MVT ExpectedSrcVT;
2476 switch (FirstOpcode) {
2477 case WebAssemblyISD::CONVERT_LOW_S:
2478 case WebAssemblyISD::CONVERT_LOW_U:
2479 ExpectedSrcVT = MVT::v4i32;
2480 break;
2481 case WebAssemblyISD::PROMOTE_LOW:
2482 ExpectedSrcVT = NumLanes == 2 ? MVT::v4f32 : MVT::v8i16;
2483 break;
2484 }
2485 if (FirstSrcVec.getValueType() != ExpectedSrcVT)
2486 return SDValue();
2487
2488 auto Src = FirstSrcVec;
2489 if (NumLanes == 2 &&
2490 (FirstIndex != 0 || SecondIndex != 1 || FirstSrcVec != SecondSrcVec)) {
2491 // Shuffle the source vector so that the converted lanes are the low lanes.
2492 Src = DAG.getVectorShuffle(VT: ExpectedSrcVT, dl: DL, N1: FirstSrcVec, N2: SecondSrcVec,
2493 Mask: {static_cast<int>(FirstIndex),
2494 static_cast<int>(SecondIndex) + 4, -1, -1});
2495 }
2496 return DAG.getNode(Opcode: FirstOpcode, DL, VT: NumLanes == 2 ? MVT::v2f64 : MVT::v4f32,
2497 Operand: Src);
2498}
2499
2500SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
2501 SelectionDAG &DAG) const {
2502 MVT VT = Op.getSimpleValueType();
2503 if (VT == MVT::v8f16) {
2504 // BUILD_VECTOR can't handle FP16 operands since Wasm doesn't have a scaler
2505 // FP16 type, so cast them to I16s.
2506 MVT IVT = VT.changeVectorElementType(EltVT: MVT::i16);
2507 SmallVector<SDValue, 8> NewOps;
2508 for (unsigned I = 0, E = Op.getNumOperands(); I < E; ++I)
2509 NewOps.push_back(Elt: DAG.getBitcast(VT: MVT::i16, V: Op.getOperand(i: I)));
2510 SDValue Res = DAG.getNode(Opcode: ISD::BUILD_VECTOR, DL: SDLoc(), VT: IVT, Ops: NewOps);
2511 return DAG.getBitcast(VT, V: Res);
2512 }
2513
2514 if (auto ConvertLow = LowerConvertLow(Op, DAG))
2515 return ConvertLow;
2516
2517 SDLoc DL(Op);
2518 const EVT VecT = Op.getValueType();
2519 const EVT LaneT = Op.getOperand(i: 0).getValueType();
2520 const size_t Lanes = Op.getNumOperands();
2521 bool CanSwizzle = VecT == MVT::v16i8;
2522
2523 // BUILD_VECTORs are lowered to the instruction that initializes the highest
2524 // possible number of lanes at once followed by a sequence of replace_lane
2525 // instructions to individually initialize any remaining lanes.
2526
2527 // TODO: Tune this. For example, lanewise swizzling is very expensive, so
2528 // swizzled lanes should be given greater weight.
2529
2530 // TODO: Investigate looping rather than always extracting/replacing specific
2531 // lanes to fill gaps.
2532
2533 auto IsConstant = [](const SDValue &V) {
2534 return V.getOpcode() == ISD::Constant || V.getOpcode() == ISD::ConstantFP;
2535 };
2536
2537 // Returns the source vector and index vector pair if they exist. Checks for:
2538 // (extract_vector_elt
2539 // $src,
2540 // (sign_extend_inreg (extract_vector_elt $indices, $i))
2541 // )
2542 auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) {
2543 auto Bail = std::make_pair(x: SDValue(), y: SDValue());
2544 if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2545 return Bail;
2546 const SDValue &SwizzleSrc = Lane->getOperand(Num: 0);
2547 const SDValue &IndexExt = Lane->getOperand(Num: 1);
2548 if (IndexExt->getOpcode() != ISD::SIGN_EXTEND_INREG)
2549 return Bail;
2550 const SDValue &Index = IndexExt->getOperand(Num: 0);
2551 if (Index->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2552 return Bail;
2553 const SDValue &SwizzleIndices = Index->getOperand(Num: 0);
2554 if (SwizzleSrc.getValueType() != MVT::v16i8 ||
2555 SwizzleIndices.getValueType() != MVT::v16i8 ||
2556 Index->getOperand(Num: 1)->getOpcode() != ISD::Constant ||
2557 Index->getConstantOperandVal(Num: 1) != I)
2558 return Bail;
2559 return std::make_pair(x: SwizzleSrc, y: SwizzleIndices);
2560 };
2561
2562 // If the lane is extracted from another vector at a constant index, return
2563 // that vector. The source vector must not have more lanes than the dest
2564 // because the shufflevector indices are in terms of the destination lanes and
2565 // would not be able to address the smaller individual source lanes.
2566 auto GetShuffleSrc = [&](const SDValue &Lane) {
2567 if (Lane->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2568 return SDValue();
2569 if (!isa<ConstantSDNode>(Val: Lane->getOperand(Num: 1).getNode()))
2570 return SDValue();
2571 if (Lane->getOperand(Num: 0).getValueType().getVectorNumElements() >
2572 VecT.getVectorNumElements())
2573 return SDValue();
2574 return Lane->getOperand(Num: 0);
2575 };
2576
2577 using ValueEntry = std::pair<SDValue, size_t>;
2578 SmallVector<ValueEntry, 16> SplatValueCounts;
2579
2580 using SwizzleEntry = std::pair<std::pair<SDValue, SDValue>, size_t>;
2581 SmallVector<SwizzleEntry, 16> SwizzleCounts;
2582
2583 using ShuffleEntry = std::pair<SDValue, size_t>;
2584 SmallVector<ShuffleEntry, 16> ShuffleCounts;
2585
2586 auto AddCount = [](auto &Counts, const auto &Val) {
2587 auto CountIt =
2588 llvm::find_if(Counts, [&Val](auto E) { return E.first == Val; });
2589 if (CountIt == Counts.end()) {
2590 Counts.emplace_back(Val, 1);
2591 } else {
2592 CountIt->second++;
2593 }
2594 };
2595
2596 auto GetMostCommon = [](auto &Counts) {
2597 auto CommonIt = llvm::max_element(Counts, llvm::less_second());
2598 assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector");
2599 return *CommonIt;
2600 };
2601
2602 size_t NumConstantLanes = 0;
2603
2604 // Count eligible lanes for each type of vector creation op
2605 for (size_t I = 0; I < Lanes; ++I) {
2606 const SDValue &Lane = Op->getOperand(Num: I);
2607 if (Lane.isUndef())
2608 continue;
2609
2610 AddCount(SplatValueCounts, Lane);
2611
2612 if (IsConstant(Lane))
2613 NumConstantLanes++;
2614 if (auto ShuffleSrc = GetShuffleSrc(Lane))
2615 AddCount(ShuffleCounts, ShuffleSrc);
2616 if (CanSwizzle) {
2617 auto SwizzleSrcs = GetSwizzleSrcs(I, Lane);
2618 if (SwizzleSrcs.first)
2619 AddCount(SwizzleCounts, SwizzleSrcs);
2620 }
2621 }
2622
2623 SDValue SplatValue;
2624 size_t NumSplatLanes;
2625 std::tie(args&: SplatValue, args&: NumSplatLanes) = GetMostCommon(SplatValueCounts);
2626
2627 SDValue SwizzleSrc;
2628 SDValue SwizzleIndices;
2629 size_t NumSwizzleLanes = 0;
2630 if (SwizzleCounts.size())
2631 std::forward_as_tuple(args: std::tie(args&: SwizzleSrc, args&: SwizzleIndices),
2632 args&: NumSwizzleLanes) = GetMostCommon(SwizzleCounts);
2633
2634 // Shuffles can draw from up to two vectors, so find the two most common
2635 // sources.
2636 SDValue ShuffleSrc1, ShuffleSrc2;
2637 size_t NumShuffleLanes = 0;
2638 if (ShuffleCounts.size()) {
2639 std::tie(args&: ShuffleSrc1, args&: NumShuffleLanes) = GetMostCommon(ShuffleCounts);
2640 llvm::erase_if(C&: ShuffleCounts,
2641 P: [&](const auto &Pair) { return Pair.first == ShuffleSrc1; });
2642 }
2643 if (ShuffleCounts.size()) {
2644 size_t AdditionalShuffleLanes;
2645 std::tie(args&: ShuffleSrc2, args&: AdditionalShuffleLanes) =
2646 GetMostCommon(ShuffleCounts);
2647 NumShuffleLanes += AdditionalShuffleLanes;
2648 }
2649
2650 // Predicate returning true if the lane is properly initialized by the
2651 // original instruction
2652 std::function<bool(size_t, const SDValue &)> IsLaneConstructed;
2653 SDValue Result;
2654 // Prefer swizzles over shuffles over vector consts over splats
2655 if (NumSwizzleLanes >= NumShuffleLanes &&
2656 NumSwizzleLanes >= NumConstantLanes && NumSwizzleLanes >= NumSplatLanes) {
2657 Result = DAG.getNode(Opcode: WebAssemblyISD::SWIZZLE, DL, VT: VecT, N1: SwizzleSrc,
2658 N2: SwizzleIndices);
2659 auto Swizzled = std::make_pair(x&: SwizzleSrc, y&: SwizzleIndices);
2660 IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) {
2661 return Swizzled == GetSwizzleSrcs(I, Lane);
2662 };
2663 } else if (NumShuffleLanes >= NumConstantLanes &&
2664 NumShuffleLanes >= NumSplatLanes) {
2665 size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits() / 8;
2666 size_t DestLaneCount = VecT.getVectorNumElements();
2667 size_t Scale1 = 1;
2668 size_t Scale2 = 1;
2669 SDValue Src1 = ShuffleSrc1;
2670 SDValue Src2 = ShuffleSrc2 ? ShuffleSrc2 : DAG.getUNDEF(VT: VecT);
2671 if (Src1.getValueType() != VecT) {
2672 size_t LaneSize =
2673 Src1.getValueType().getVectorElementType().getFixedSizeInBits() / 8;
2674 assert(LaneSize > DestLaneSize);
2675 Scale1 = LaneSize / DestLaneSize;
2676 Src1 = DAG.getBitcast(VT: VecT, V: Src1);
2677 }
2678 if (Src2.getValueType() != VecT) {
2679 size_t LaneSize =
2680 Src2.getValueType().getVectorElementType().getFixedSizeInBits() / 8;
2681 assert(LaneSize > DestLaneSize);
2682 Scale2 = LaneSize / DestLaneSize;
2683 Src2 = DAG.getBitcast(VT: VecT, V: Src2);
2684 }
2685
2686 int Mask[16];
2687 assert(DestLaneCount <= 16);
2688 for (size_t I = 0; I < DestLaneCount; ++I) {
2689 const SDValue &Lane = Op->getOperand(Num: I);
2690 SDValue Src = GetShuffleSrc(Lane);
2691 if (Src == ShuffleSrc1) {
2692 Mask[I] = Lane->getConstantOperandVal(Num: 1) * Scale1;
2693 } else if (Src && Src == ShuffleSrc2) {
2694 Mask[I] = DestLaneCount + Lane->getConstantOperandVal(Num: 1) * Scale2;
2695 } else {
2696 Mask[I] = -1;
2697 }
2698 }
2699 ArrayRef<int> MaskRef(Mask, DestLaneCount);
2700 Result = DAG.getVectorShuffle(VT: VecT, dl: DL, N1: Src1, N2: Src2, Mask: MaskRef);
2701 IsLaneConstructed = [&](size_t, const SDValue &Lane) {
2702 auto Src = GetShuffleSrc(Lane);
2703 return Src == ShuffleSrc1 || (Src && Src == ShuffleSrc2);
2704 };
2705 } else if (NumConstantLanes >= NumSplatLanes) {
2706 SmallVector<SDValue, 16> ConstLanes;
2707 for (const SDValue &Lane : Op->op_values()) {
2708 if (IsConstant(Lane)) {
2709 // Values may need to be fixed so that they will sign extend to be
2710 // within the expected range during ISel. Check whether the value is in
2711 // bounds based on the lane bit width and if it is out of bounds, lop
2712 // off the extra bits.
2713 uint64_t LaneBits = 128 / Lanes;
2714 if (auto *Const = dyn_cast<ConstantSDNode>(Val: Lane.getNode())) {
2715 ConstLanes.push_back(Elt: DAG.getConstant(
2716 Val: Const->getAPIntValue().trunc(width: LaneBits).getZExtValue(),
2717 DL: SDLoc(Lane), VT: LaneT));
2718 } else {
2719 ConstLanes.push_back(Elt: Lane);
2720 }
2721 } else if (LaneT.isFloatingPoint()) {
2722 ConstLanes.push_back(Elt: DAG.getConstantFP(Val: 0, DL, VT: LaneT));
2723 } else {
2724 ConstLanes.push_back(Elt: DAG.getConstant(Val: 0, DL, VT: LaneT));
2725 }
2726 }
2727 Result = DAG.getBuildVector(VT: VecT, DL, Ops: ConstLanes);
2728 IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) {
2729 return IsConstant(Lane);
2730 };
2731 } else {
2732 size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits();
2733 if (NumSplatLanes == 1 && Op->getOperand(Num: 0) == SplatValue &&
2734 (DestLaneSize == 32 || DestLaneSize == 64)) {
2735 // Could be selected to load_zero.
2736 Result = DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL, VT: VecT, Operand: SplatValue);
2737 } else {
2738 // Use a splat (which might be selected as a load splat)
2739 Result = DAG.getSplatBuildVector(VT: VecT, DL, Op: SplatValue);
2740 }
2741 IsLaneConstructed = [&SplatValue](size_t _, const SDValue &Lane) {
2742 return Lane == SplatValue;
2743 };
2744 }
2745
2746 assert(Result);
2747 assert(IsLaneConstructed);
2748
2749 // Add replace_lane instructions for any unhandled values
2750 for (size_t I = 0; I < Lanes; ++I) {
2751 const SDValue &Lane = Op->getOperand(Num: I);
2752 if (!Lane.isUndef() && !IsLaneConstructed(I, Lane))
2753 Result = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: VecT, N1: Result, N2: Lane,
2754 N3: DAG.getConstant(Val: I, DL, VT: MVT::i32));
2755 }
2756
2757 return Result;
2758}
2759
2760SDValue
2761WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
2762 SelectionDAG &DAG) const {
2763 SDLoc DL(Op);
2764 ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Val: Op.getNode())->getMask();
2765 MVT VecType = Op.getOperand(i: 0).getSimpleValueType();
2766 assert(VecType.is128BitVector() && "Unexpected shuffle vector type");
2767 size_t LaneBytes = VecType.getVectorElementType().getSizeInBits() / 8;
2768
2769 // Space for two vector args and sixteen mask indices
2770 SDValue Ops[18];
2771 size_t OpIdx = 0;
2772 Ops[OpIdx++] = Op.getOperand(i: 0);
2773 Ops[OpIdx++] = Op.getOperand(i: 1);
2774
2775 // Expand mask indices to byte indices and materialize them as operands
2776 for (int M : Mask) {
2777 for (size_t J = 0; J < LaneBytes; ++J) {
2778 // Lower undefs (represented by -1 in mask) to {0..J}, which use a
2779 // whole lane of vector input, to allow further reduction at VM. E.g.
2780 // match an 8x16 byte shuffle to an equivalent cheaper 32x4 shuffle.
2781 uint64_t ByteIndex = M == -1 ? J : (uint64_t)M * LaneBytes + J;
2782 Ops[OpIdx++] = DAG.getConstant(Val: ByteIndex, DL, VT: MVT::i32);
2783 }
2784 }
2785
2786 return DAG.getNode(Opcode: WebAssemblyISD::SHUFFLE, DL, VT: Op.getValueType(), Ops);
2787}
2788
2789SDValue WebAssemblyTargetLowering::LowerSETCC(SDValue Op,
2790 SelectionDAG &DAG) const {
2791 SDLoc DL(Op);
2792 // The legalizer does not know how to expand the unsupported comparison modes
2793 // of i64x2 vectors, so we manually unroll them here.
2794 assert(Op->getOperand(0)->getSimpleValueType(0) == MVT::v2i64);
2795 SmallVector<SDValue, 2> LHS, RHS;
2796 DAG.ExtractVectorElements(Op: Op->getOperand(Num: 0), Args&: LHS);
2797 DAG.ExtractVectorElements(Op: Op->getOperand(Num: 1), Args&: RHS);
2798 const SDValue &CC = Op->getOperand(Num: 2);
2799 auto MakeLane = [&](unsigned I) {
2800 return DAG.getNode(Opcode: ISD::SELECT_CC, DL, VT: MVT::i64, N1: LHS[I], N2: RHS[I],
2801 N3: DAG.getConstant(Val: uint64_t(-1), DL, VT: MVT::i64),
2802 N4: DAG.getConstant(Val: uint64_t(0), DL, VT: MVT::i64), N5: CC);
2803 };
2804 return DAG.getBuildVector(VT: Op->getValueType(ResNo: 0), DL,
2805 Ops: {MakeLane(0), MakeLane(1)});
2806}
2807
2808SDValue
2809WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op,
2810 SelectionDAG &DAG) const {
2811 // Allow constant lane indices, expand variable lane indices
2812 SDNode *IdxNode = Op.getOperand(i: Op.getNumOperands() - 1).getNode();
2813 if (isa<ConstantSDNode>(Val: IdxNode)) {
2814 // Ensure the index type is i32 to match the tablegen patterns
2815 uint64_t Idx = IdxNode->getAsZExtVal();
2816 SmallVector<SDValue, 3> Ops(Op.getNode()->ops());
2817 Ops[Op.getNumOperands() - 1] =
2818 DAG.getConstant(Val: Idx, DL: SDLoc(IdxNode), VT: MVT::i32);
2819 return DAG.getNode(Opcode: Op.getOpcode(), DL: SDLoc(Op), VT: Op.getValueType(), Ops);
2820 }
2821 // Perform default expansion
2822 return SDValue();
2823}
2824
2825static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG) {
2826 EVT LaneT = Op.getSimpleValueType().getVectorElementType();
2827 // 32-bit and 64-bit unrolled shifts will have proper semantics
2828 if (LaneT.bitsGE(VT: MVT::i32))
2829 return DAG.UnrollVectorOp(N: Op.getNode());
2830 // Otherwise mask the shift value to get proper semantics from 32-bit shift
2831 SDLoc DL(Op);
2832 size_t NumLanes = Op.getSimpleValueType().getVectorNumElements();
2833 SDValue Mask = DAG.getConstant(Val: LaneT.getSizeInBits() - 1, DL, VT: MVT::i32);
2834 unsigned ShiftOpcode = Op.getOpcode();
2835 SmallVector<SDValue, 16> ShiftedElements;
2836 DAG.ExtractVectorElements(Op: Op.getOperand(i: 0), Args&: ShiftedElements, Start: 0, Count: 0, EltVT: MVT::i32);
2837 SmallVector<SDValue, 16> ShiftElements;
2838 DAG.ExtractVectorElements(Op: Op.getOperand(i: 1), Args&: ShiftElements, Start: 0, Count: 0, EltVT: MVT::i32);
2839 SmallVector<SDValue, 16> UnrolledOps;
2840 for (size_t i = 0; i < NumLanes; ++i) {
2841 SDValue MaskedShiftValue =
2842 DAG.getNode(Opcode: ISD::AND, DL, VT: MVT::i32, N1: ShiftElements[i], N2: Mask);
2843 SDValue ShiftedValue = ShiftedElements[i];
2844 if (ShiftOpcode == ISD::SRA)
2845 ShiftedValue = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: MVT::i32,
2846 N1: ShiftedValue, N2: DAG.getValueType(LaneT));
2847 UnrolledOps.push_back(
2848 Elt: DAG.getNode(Opcode: ShiftOpcode, DL, VT: MVT::i32, N1: ShiftedValue, N2: MaskedShiftValue));
2849 }
2850 return DAG.getBuildVector(VT: Op.getValueType(), DL, Ops: UnrolledOps);
2851}
2852
2853SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
2854 SelectionDAG &DAG) const {
2855 SDLoc DL(Op);
2856 // Only manually lower vector shifts
2857 assert(Op.getSimpleValueType().isVector());
2858
2859 uint64_t LaneBits = Op.getValueType().getScalarSizeInBits();
2860 auto ShiftVal = Op.getOperand(i: 1);
2861
2862 // Try to skip bitmask operation since it is implied inside shift instruction
2863 auto SkipImpliedMask = [](SDValue MaskOp, uint64_t MaskBits) {
2864 if (MaskOp.getOpcode() != ISD::AND)
2865 return MaskOp;
2866 SDValue LHS = MaskOp.getOperand(i: 0);
2867 SDValue RHS = MaskOp.getOperand(i: 1);
2868 if (MaskOp.getValueType().isVector()) {
2869 APInt MaskVal;
2870 if (!ISD::isConstantSplatVector(N: RHS.getNode(), SplatValue&: MaskVal))
2871 std::swap(a&: LHS, b&: RHS);
2872
2873 if (ISD::isConstantSplatVector(N: RHS.getNode(), SplatValue&: MaskVal) &&
2874 MaskVal == MaskBits)
2875 MaskOp = LHS;
2876 } else {
2877 if (!isa<ConstantSDNode>(Val: RHS.getNode()))
2878 std::swap(a&: LHS, b&: RHS);
2879
2880 auto ConstantRHS = dyn_cast<ConstantSDNode>(Val: RHS.getNode());
2881 if (ConstantRHS && ConstantRHS->getAPIntValue() == MaskBits)
2882 MaskOp = LHS;
2883 }
2884
2885 return MaskOp;
2886 };
2887
2888 // Skip vector and operation
2889 ShiftVal = SkipImpliedMask(ShiftVal, LaneBits - 1);
2890 ShiftVal = DAG.getSplatValue(V: ShiftVal);
2891 if (!ShiftVal)
2892 return unrollVectorShift(Op, DAG);
2893
2894 // Skip scalar and operation
2895 ShiftVal = SkipImpliedMask(ShiftVal, LaneBits - 1);
2896 // Use anyext because none of the high bits can affect the shift
2897 ShiftVal = DAG.getAnyExtOrTrunc(Op: ShiftVal, DL, VT: MVT::i32);
2898
2899 unsigned Opcode;
2900 switch (Op.getOpcode()) {
2901 case ISD::SHL:
2902 Opcode = WebAssemblyISD::VEC_SHL;
2903 break;
2904 case ISD::SRA:
2905 Opcode = WebAssemblyISD::VEC_SHR_S;
2906 break;
2907 case ISD::SRL:
2908 Opcode = WebAssemblyISD::VEC_SHR_U;
2909 break;
2910 default:
2911 llvm_unreachable("unexpected opcode");
2912 }
2913
2914 return DAG.getNode(Opcode, DL, VT: Op.getValueType(), N1: Op.getOperand(i: 0), N2: ShiftVal);
2915}
2916
2917SDValue WebAssemblyTargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
2918 SelectionDAG &DAG) const {
2919 EVT ResT = Op.getValueType();
2920 EVT SatVT = cast<VTSDNode>(Val: Op.getOperand(i: 1))->getVT();
2921
2922 if ((ResT == MVT::i32 || ResT == MVT::i64) &&
2923 (SatVT == MVT::i32 || SatVT == MVT::i64))
2924 return Op;
2925
2926 if (ResT == MVT::v4i32 && SatVT == MVT::i32)
2927 return Op;
2928
2929 if (ResT == MVT::v8i16 && SatVT == MVT::i16)
2930 return Op;
2931
2932 return SDValue();
2933}
2934
2935static bool HasNoSignedZerosOrNaNs(SDValue Op, SelectionDAG &DAG) {
2936 return (Op->getFlags().hasNoNaNs() ||
2937 (DAG.isKnownNeverNaN(Op: Op->getOperand(Num: 0)) &&
2938 DAG.isKnownNeverNaN(Op: Op->getOperand(Num: 1)))) &&
2939 (Op->getFlags().hasNoSignedZeros() ||
2940 DAG.isKnownNeverLogicalZero(Op: Op->getOperand(Num: 0)) ||
2941 DAG.isKnownNeverLogicalZero(Op: Op->getOperand(Num: 1)));
2942}
2943
2944SDValue WebAssemblyTargetLowering::LowerFMIN(SDValue Op,
2945 SelectionDAG &DAG) const {
2946 if (Subtarget->hasRelaxedSIMD() && HasNoSignedZerosOrNaNs(Op, DAG)) {
2947 return DAG.getNode(Opcode: WebAssemblyISD::RELAXED_FMIN, DL: SDLoc(Op),
2948 VT: Op.getValueType(), N1: Op.getOperand(i: 0), N2: Op.getOperand(i: 1));
2949 }
2950 return SDValue();
2951}
2952
2953SDValue WebAssemblyTargetLowering::LowerFMAX(SDValue Op,
2954 SelectionDAG &DAG) const {
2955 if (Subtarget->hasRelaxedSIMD() && HasNoSignedZerosOrNaNs(Op, DAG)) {
2956 return DAG.getNode(Opcode: WebAssemblyISD::RELAXED_FMAX, DL: SDLoc(Op),
2957 VT: Op.getValueType(), N1: Op.getOperand(i: 0), N2: Op.getOperand(i: 1));
2958 }
2959 return SDValue();
2960}
2961
2962//===----------------------------------------------------------------------===//
2963// Custom DAG combine hooks
2964//===----------------------------------------------------------------------===//
2965static SDValue
2966performVECTOR_SHUFFLECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
2967 auto &DAG = DCI.DAG;
2968 auto Shuffle = cast<ShuffleVectorSDNode>(Val: N);
2969
2970 // Hoist vector bitcasts that don't change the number of lanes out of unary
2971 // shuffles, where they are less likely to get in the way of other combines.
2972 // (shuffle (vNxT1 (bitcast (vNxT0 x))), undef, mask) ->
2973 // (vNxT1 (bitcast (vNxT0 (shuffle x, undef, mask))))
2974 SDValue Bitcast = N->getOperand(Num: 0);
2975 if (Bitcast.getOpcode() != ISD::BITCAST)
2976 return SDValue();
2977 if (!N->getOperand(Num: 1).isUndef())
2978 return SDValue();
2979 SDValue CastOp = Bitcast.getOperand(i: 0);
2980 EVT SrcType = CastOp.getValueType();
2981 EVT DstType = Bitcast.getValueType();
2982 if (!SrcType.is128BitVector() ||
2983 SrcType.getVectorNumElements() != DstType.getVectorNumElements())
2984 return SDValue();
2985 SDValue NewShuffle = DAG.getVectorShuffle(
2986 VT: SrcType, dl: SDLoc(N), N1: CastOp, N2: DAG.getUNDEF(VT: SrcType), Mask: Shuffle->getMask());
2987 return DAG.getBitcast(VT: DstType, V: NewShuffle);
2988}
2989
2990/// Convert ({u,s}itofp vec) --> ({u,s}itofp ({s,z}ext vec)) so it doesn't get
2991/// split up into scalar instructions during legalization, and the vector
2992/// extending instructions are selected in performVectorExtendCombine below.
2993static SDValue
2994performVectorExtendToFPCombine(SDNode *N,
2995 TargetLowering::DAGCombinerInfo &DCI) {
2996 auto &DAG = DCI.DAG;
2997 assert(N->getOpcode() == ISD::UINT_TO_FP ||
2998 N->getOpcode() == ISD::SINT_TO_FP);
2999
3000 EVT InVT = N->getOperand(Num: 0)->getValueType(ResNo: 0);
3001 EVT ResVT = N->getValueType(ResNo: 0);
3002 MVT ExtVT;
3003 if (ResVT == MVT::v4f32 && (InVT == MVT::v4i16 || InVT == MVT::v4i8))
3004 ExtVT = MVT::v4i32;
3005 else if (ResVT == MVT::v2f64 && (InVT == MVT::v2i16 || InVT == MVT::v2i8))
3006 ExtVT = MVT::v2i32;
3007 else
3008 return SDValue();
3009
3010 unsigned Op =
3011 N->getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
3012 SDValue Conv = DAG.getNode(Opcode: Op, DL: SDLoc(N), VT: ExtVT, Operand: N->getOperand(Num: 0));
3013 return DAG.getNode(Opcode: N->getOpcode(), DL: SDLoc(N), VT: ResVT, Operand: Conv);
3014}
3015
3016static SDValue
3017performVectorNonNegToFPCombine(SDNode *N,
3018 TargetLowering::DAGCombinerInfo &DCI) {
3019 auto &DAG = DCI.DAG;
3020
3021 SDNodeFlags Flags = N->getFlags();
3022 SDValue Op0 = N->getOperand(Num: 0);
3023 EVT VT = N->getValueType(ResNo: 0);
3024
3025 // Optimize uitofp to sitofp when the sign bit is known to be zero.
3026 // Depending on the target (runtime) backend, this might be performance
3027 // neutral (e.g. AArch64) or a significant improvement (e.g. x86_64).
3028 if (VT.isVector() && (Flags.hasNonNeg() || DAG.SignBitIsZero(Op: Op0))) {
3029 return DAG.getNode(Opcode: ISD::SINT_TO_FP, DL: SDLoc(N), VT, Operand: Op0);
3030 }
3031
3032 return SDValue();
3033}
3034
3035static SDValue
3036performVectorExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
3037 auto &DAG = DCI.DAG;
3038 assert(N->getOpcode() == ISD::SIGN_EXTEND ||
3039 N->getOpcode() == ISD::ZERO_EXTEND);
3040
3041 EVT ResVT = N->getValueType(ResNo: 0);
3042 bool IsSext = N->getOpcode() == ISD::SIGN_EXTEND;
3043 SDLoc DL(N);
3044
3045 if (ResVT == MVT::v16i32 && N->getOperand(Num: 0)->getValueType(ResNo: 0) == MVT::v16i8) {
3046 // Use a tree of extend low/high to split and extend the input in two
3047 // layers to avoid doing several shuffles and even more extends.
3048 unsigned LowOp =
3049 IsSext ? WebAssemblyISD::EXTEND_LOW_S : WebAssemblyISD::EXTEND_LOW_U;
3050 unsigned HighOp =
3051 IsSext ? WebAssemblyISD::EXTEND_HIGH_S : WebAssemblyISD::EXTEND_HIGH_U;
3052 SDValue Input = N->getOperand(Num: 0);
3053 SDValue LowHalf = DAG.getNode(Opcode: LowOp, DL, VT: MVT::v8i16, Operand: Input);
3054 SDValue HighHalf = DAG.getNode(Opcode: HighOp, DL, VT: MVT::v8i16, Operand: Input);
3055 SDValue Subvectors[] = {
3056 DAG.getNode(Opcode: LowOp, DL, VT: MVT::v4i32, Operand: LowHalf),
3057 DAG.getNode(Opcode: HighOp, DL, VT: MVT::v4i32, Operand: LowHalf),
3058 DAG.getNode(Opcode: LowOp, DL, VT: MVT::v4i32, Operand: HighHalf),
3059 DAG.getNode(Opcode: HighOp, DL, VT: MVT::v4i32, Operand: HighHalf),
3060 };
3061 return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: ResVT, Ops: Subvectors);
3062 }
3063
3064 // Combine ({s,z}ext (extract_subvector src, i)) into a widening operation if
3065 // possible before the extract_subvector can be expanded.
3066 auto Extract = N->getOperand(Num: 0);
3067 if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
3068 return SDValue();
3069 auto Source = Extract.getOperand(i: 0);
3070 auto *IndexNode = dyn_cast<ConstantSDNode>(Val: Extract.getOperand(i: 1));
3071 if (IndexNode == nullptr)
3072 return SDValue();
3073 auto Index = IndexNode->getZExtValue();
3074
3075 // Only v8i8, v4i16, and v2i32 extracts can be widened, and only if the
3076 // extracted subvector is the low or high half of its source.
3077 if (ResVT == MVT::v8i16) {
3078 if (Extract.getValueType() != MVT::v8i8 ||
3079 Source.getValueType() != MVT::v16i8 || (Index != 0 && Index != 8))
3080 return SDValue();
3081 } else if (ResVT == MVT::v4i32) {
3082 if (Extract.getValueType() != MVT::v4i16 ||
3083 Source.getValueType() != MVT::v8i16 || (Index != 0 && Index != 4))
3084 return SDValue();
3085 } else if (ResVT == MVT::v2i64) {
3086 if (Extract.getValueType() != MVT::v2i32 ||
3087 Source.getValueType() != MVT::v4i32 || (Index != 0 && Index != 2))
3088 return SDValue();
3089 } else {
3090 return SDValue();
3091 }
3092
3093 bool IsLow = Index == 0;
3094
3095 unsigned Op = IsSext ? (IsLow ? WebAssemblyISD::EXTEND_LOW_S
3096 : WebAssemblyISD::EXTEND_HIGH_S)
3097 : (IsLow ? WebAssemblyISD::EXTEND_LOW_U
3098 : WebAssemblyISD::EXTEND_HIGH_U);
3099
3100 return DAG.getNode(Opcode: Op, DL, VT: ResVT, Operand: Source);
3101}
3102
3103static SDValue
3104performVectorTruncZeroCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
3105 auto &DAG = DCI.DAG;
3106
3107 auto GetWasmConversionOp = [](unsigned Op) {
3108 switch (Op) {
3109 case ISD::FP_TO_SINT_SAT:
3110 return WebAssemblyISD::TRUNC_SAT_ZERO_S;
3111 case ISD::FP_TO_UINT_SAT:
3112 return WebAssemblyISD::TRUNC_SAT_ZERO_U;
3113 case ISD::FP_ROUND:
3114 return WebAssemblyISD::DEMOTE_ZERO;
3115 }
3116 llvm_unreachable("unexpected op");
3117 };
3118
3119 auto IsZeroSplat = [](SDValue SplatVal) {
3120 auto *Splat = dyn_cast<BuildVectorSDNode>(Val: SplatVal.getNode());
3121 APInt SplatValue, SplatUndef;
3122 unsigned SplatBitSize;
3123 bool HasAnyUndefs;
3124 // Endianness doesn't matter in this context because we are looking for
3125 // an all-zero value.
3126 return Splat &&
3127 Splat->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
3128 HasAnyUndefs) &&
3129 SplatValue == 0;
3130 };
3131
3132 if (N->getOpcode() == ISD::CONCAT_VECTORS) {
3133 // Combine this:
3134 //
3135 // (concat_vectors (v2i32 (fp_to_{s,u}int_sat $x, 32)), (v2i32 (splat 0)))
3136 //
3137 // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
3138 //
3139 // Or this:
3140 //
3141 // (concat_vectors ({v2f32, v4f16} (fp_round ({v2f64, v4f32} $x))),
3142 // ({v2f32, v4f16} (splat 0)))
3143 //
3144 // into ({f32x4, f16x8}.demote_zero_{f64x2, f32x4} $x).
3145 EVT ResVT;
3146 EVT ExpectedConversionType;
3147 auto Conversion = N->getOperand(Num: 0);
3148 auto ConversionOp = Conversion.getOpcode();
3149 switch (ConversionOp) {
3150 case ISD::FP_TO_SINT_SAT:
3151 case ISD::FP_TO_UINT_SAT:
3152 ResVT = MVT::v4i32;
3153 ExpectedConversionType = MVT::v2i32;
3154 break;
3155 case ISD::FP_ROUND:
3156 if (Conversion.getValueType() == MVT::v2f32) {
3157 ResVT = MVT::v4f32;
3158 ExpectedConversionType = MVT::v2f32;
3159 } else if (Conversion.getValueType() == MVT::v4f16) {
3160 ResVT = MVT::v8f16;
3161 ExpectedConversionType = MVT::v4f16;
3162 } else {
3163 return SDValue();
3164 }
3165 break;
3166 default:
3167 return SDValue();
3168 }
3169
3170 if (N->getValueType(ResNo: 0) != ResVT)
3171 return SDValue();
3172
3173 if (Conversion.getValueType() != ExpectedConversionType)
3174 return SDValue();
3175
3176 auto Source = Conversion.getOperand(i: 0);
3177 if (!((Source.getValueType() == MVT::v2f64 && ResVT == MVT::v4f32) ||
3178 (Source.getValueType() == MVT::v2f64 && ResVT == MVT::v4i32) ||
3179 (Source.getValueType() == MVT::v4f32 && ResVT == MVT::v8f16)))
3180 return SDValue();
3181
3182 if (!IsZeroSplat(N->getOperand(Num: 1)) ||
3183 N->getOperand(Num: 1).getValueType() != ExpectedConversionType)
3184 return SDValue();
3185
3186 unsigned Op = GetWasmConversionOp(ConversionOp);
3187 return DAG.getNode(Opcode: Op, DL: SDLoc(N), VT: ResVT, Operand: Source);
3188 }
3189
3190 // Combine this:
3191 //
3192 // (fp_to_{s,u}int_sat (concat_vectors $x, (v2f64 (splat 0))), 32)
3193 //
3194 // into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
3195 //
3196 // Or this:
3197 //
3198 // ({v4f32, v8f16} (fp_round (concat_vectors $x,
3199 // ({v2f64, v4f32} (splat 0)))))
3200 //
3201 // into ({f32x4, f16x8}.demote_zero_{f64x2, f32x4} $x).
3202 EVT ResVT;
3203 auto ConversionOp = N->getOpcode();
3204 switch (ConversionOp) {
3205 case ISD::FP_TO_SINT_SAT:
3206 case ISD::FP_TO_UINT_SAT:
3207 ResVT = MVT::v4i32;
3208 break;
3209 case ISD::FP_ROUND:
3210 ResVT = N->getValueType(ResNo: 0);
3211 break;
3212 default:
3213 llvm_unreachable("unexpected op");
3214 }
3215
3216 if (N->getValueType(ResNo: 0) != ResVT)
3217 return SDValue();
3218
3219 auto Concat = N->getOperand(Num: 0);
3220 if (Concat.getOpcode() != ISD::CONCAT_VECTORS)
3221 return SDValue();
3222 EVT ConcatVT = Concat.getValueType();
3223 EVT SourceVT = Concat.getOperand(i: 0).getValueType();
3224
3225 if (!IsZeroSplat(Concat.getOperand(i: 1)))
3226 return SDValue();
3227
3228 if (ConversionOp == ISD::FP_ROUND) {
3229 bool IsF64ToF32 =
3230 ConcatVT == MVT::v4f64 && SourceVT == MVT::v2f64 && ResVT == MVT::v4f32;
3231 bool IsF32ToF16 =
3232 ConcatVT == MVT::v8f32 && SourceVT == MVT::v4f32 && ResVT == MVT::v8f16;
3233 if (!(IsF64ToF32 || IsF32ToF16))
3234 return SDValue();
3235 } else {
3236 if (ConcatVT != MVT::v4f64 || SourceVT != MVT::v2f64 || ResVT != MVT::v4i32)
3237 return SDValue();
3238 }
3239
3240 unsigned Op = GetWasmConversionOp(ConversionOp);
3241 return DAG.getNode(Opcode: Op, DL: SDLoc(N), VT: ResVT, Operand: Concat.getOperand(i: 0));
3242}
3243
3244// Helper to extract VectorWidth bits from Vec, starting from IdxVal.
3245static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
3246 const SDLoc &DL, unsigned VectorWidth) {
3247 EVT VT = Vec.getValueType();
3248 EVT ElVT = VT.getVectorElementType();
3249 unsigned Factor = VT.getSizeInBits() / VectorWidth;
3250 EVT ResultVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: ElVT,
3251 NumElements: VT.getVectorNumElements() / Factor);
3252
3253 // Extract the relevant VectorWidth bits. Generate an EXTRACT_SUBVECTOR
3254 unsigned ElemsPerChunk = VectorWidth / ElVT.getSizeInBits();
3255 assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
3256
3257 // This is the index of the first element of the VectorWidth-bit chunk
3258 // we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
3259 IdxVal &= ~(ElemsPerChunk - 1);
3260
3261 // If the input is a buildvector just emit a smaller one.
3262 if (Vec.getOpcode() == ISD::BUILD_VECTOR)
3263 return DAG.getBuildVector(VT: ResultVT, DL,
3264 Ops: Vec->ops().slice(N: IdxVal, M: ElemsPerChunk));
3265
3266 SDValue VecIdx = DAG.getIntPtrConstant(Val: IdxVal, DL);
3267 return DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: ResultVT, N1: Vec, N2: VecIdx);
3268}
3269
3270// Helper to recursively truncate vector elements in half with NARROW_U. DstVT
3271// is the expected destination value type after recursion. In is the initial
3272// input. Note that the input should have enough leading zero bits to prevent
3273// NARROW_U from saturating results.
3274static SDValue truncateVectorWithNARROW(EVT DstVT, SDValue In, const SDLoc &DL,
3275 SelectionDAG &DAG) {
3276 EVT SrcVT = In.getValueType();
3277
3278 // No truncation required, we might get here due to recursive calls.
3279 if (SrcVT == DstVT)
3280 return In;
3281
3282 unsigned SrcSizeInBits = SrcVT.getSizeInBits();
3283 unsigned NumElems = SrcVT.getVectorNumElements();
3284 if (!isPowerOf2_32(Value: NumElems))
3285 return SDValue();
3286 assert(DstVT.getVectorNumElements() == NumElems && "Illegal truncation");
3287 assert(SrcSizeInBits > DstVT.getSizeInBits() && "Illegal truncation");
3288
3289 LLVMContext &Ctx = *DAG.getContext();
3290 EVT PackedSVT = EVT::getIntegerVT(Context&: Ctx, BitWidth: SrcVT.getScalarSizeInBits() / 2);
3291
3292 // Narrow to the largest type possible:
3293 // vXi64/vXi32 -> i16x8.narrow_i32x4_u and vXi16 -> i8x16.narrow_i16x8_u.
3294 EVT InVT = MVT::i16, OutVT = MVT::i8;
3295 if (SrcVT.getScalarSizeInBits() > 16) {
3296 InVT = MVT::i32;
3297 OutVT = MVT::i16;
3298 }
3299 unsigned SubSizeInBits = SrcSizeInBits / 2;
3300 InVT = EVT::getVectorVT(Context&: Ctx, VT: InVT, NumElements: SubSizeInBits / InVT.getSizeInBits());
3301 OutVT = EVT::getVectorVT(Context&: Ctx, VT: OutVT, NumElements: SubSizeInBits / OutVT.getSizeInBits());
3302
3303 // Split lower/upper subvectors.
3304 SDValue Lo = extractSubVector(Vec: In, IdxVal: 0, DAG, DL, VectorWidth: SubSizeInBits);
3305 SDValue Hi = extractSubVector(Vec: In, IdxVal: NumElems / 2, DAG, DL, VectorWidth: SubSizeInBits);
3306
3307 // 256bit -> 128bit truncate - Narrow lower/upper 128-bit subvectors.
3308 if (SrcVT.is256BitVector() && DstVT.is128BitVector()) {
3309 Lo = DAG.getBitcast(VT: InVT, V: Lo);
3310 Hi = DAG.getBitcast(VT: InVT, V: Hi);
3311 SDValue Res = DAG.getNode(Opcode: WebAssemblyISD::NARROW_U, DL, VT: OutVT, N1: Lo, N2: Hi);
3312 return DAG.getBitcast(VT: DstVT, V: Res);
3313 }
3314
3315 // Recursively narrow lower/upper subvectors, concat result and narrow again.
3316 EVT PackedVT = EVT::getVectorVT(Context&: Ctx, VT: PackedSVT, NumElements: NumElems / 2);
3317 Lo = truncateVectorWithNARROW(DstVT: PackedVT, In: Lo, DL, DAG);
3318 Hi = truncateVectorWithNARROW(DstVT: PackedVT, In: Hi, DL, DAG);
3319
3320 PackedVT = EVT::getVectorVT(Context&: Ctx, VT: PackedSVT, NumElements: NumElems);
3321 SDValue Res = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: PackedVT, N1: Lo, N2: Hi);
3322 return truncateVectorWithNARROW(DstVT, In: Res, DL, DAG);
3323}
3324
3325static SDValue performTruncateCombine(SDNode *N,
3326 TargetLowering::DAGCombinerInfo &DCI) {
3327 auto &DAG = DCI.DAG;
3328
3329 SDValue In = N->getOperand(Num: 0);
3330 EVT InVT = In.getValueType();
3331 if (!InVT.isSimple())
3332 return SDValue();
3333
3334 EVT OutVT = N->getValueType(ResNo: 0);
3335 if (!OutVT.isVector())
3336 return SDValue();
3337
3338 EVT OutSVT = OutVT.getVectorElementType();
3339 EVT InSVT = InVT.getVectorElementType();
3340 // Currently only cover truncate to v16i8 or v8i16.
3341 if (!((InSVT == MVT::i16 || InSVT == MVT::i32 || InSVT == MVT::i64) &&
3342 (OutSVT == MVT::i8 || OutSVT == MVT::i16) && OutVT.is128BitVector()))
3343 return SDValue();
3344
3345 SDLoc DL(N);
3346 APInt Mask = APInt::getLowBitsSet(numBits: InVT.getScalarSizeInBits(),
3347 loBitsSet: OutVT.getScalarSizeInBits());
3348 In = DAG.getNode(Opcode: ISD::AND, DL, VT: InVT, N1: In, N2: DAG.getConstant(Val: Mask, DL, VT: InVT));
3349 return truncateVectorWithNARROW(DstVT: OutVT, In, DL, DAG);
3350}
3351
3352static SDValue performBitcastCombine(SDNode *N,
3353 TargetLowering::DAGCombinerInfo &DCI) {
3354 using namespace llvm::SDPatternMatch;
3355 auto &DAG = DCI.DAG;
3356 SDLoc DL(N);
3357 SDValue Src = N->getOperand(Num: 0);
3358 EVT VT = N->getValueType(ResNo: 0);
3359 EVT SrcVT = Src.getValueType();
3360
3361 if (!(DCI.isBeforeLegalize() && VT.isScalarInteger() &&
3362 SrcVT.isFixedLengthVectorOf(EltVT: MVT::i1)))
3363 return SDValue();
3364
3365 unsigned NumElts = SrcVT.getVectorNumElements();
3366 EVT Width = MVT::getIntegerVT(BitWidth: 128 / NumElts);
3367
3368 // bitcast <N x i1> to iN, where N = 2, 4, 8, 16 (legal)
3369 // ==> bitmask
3370 if (NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16) {
3371 return DAG.getZExtOrTrunc(
3372 Op: DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL, VT: MVT::i32,
3373 Ops: {DAG.getConstant(Val: Intrinsic::wasm_bitmask, DL, VT: MVT::i32),
3374 DAG.getSExtOrTrunc(Op: N->getOperand(Num: 0), DL,
3375 VT: SrcVT.changeVectorElementType(
3376 Context&: *DAG.getContext(), EltVT: Width))}),
3377 DL, VT);
3378 }
3379
3380 // bitcast <N x i1>(setcc ...) to concat iN, where N = 32 and 64 (illegal)
3381 if (NumElts == 32 || NumElts == 64) {
3382 SDValue Concat, SetCCVector;
3383 ISD::CondCode SetCond;
3384
3385 if (!sd_match(N, P: m_BitCast(Op: m_c_SetCC(LHS: m_Value(N&: Concat), RHS: m_Value(N&: SetCCVector),
3386 CC: m_CondCode(CC&: SetCond)))))
3387 return SDValue();
3388 if (Concat.getOpcode() != ISD::CONCAT_VECTORS)
3389 return SDValue();
3390
3391 // Reconstruct the wide bitmask from each CONCAT_VECTORS operand.
3392 // Derive the per-chunk mask/integer types from the actual operand type
3393 // instead of hardcoding v16i1 / i16 for every chunk.
3394 EVT ConcatOperandVT = Concat.getOperand(i: 0).getValueType();
3395 unsigned ConcatOperandNumElts = ConcatOperandVT.getVectorNumElements();
3396
3397 EVT ConcatOperandMaskVT =
3398 EVT::getVectorVT(Context&: *DAG.getContext(), VT: MVT::i1,
3399 EC: ElementCount::getFixed(MinVal: ConcatOperandNumElts));
3400 EVT ConcatOperandBitmaskVT =
3401 EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: ConcatOperandNumElts);
3402 EVT ReturnVT = N->getValueType(ResNo: 0);
3403 SDValue ReconstructedBitmask = DAG.getConstant(Val: 0, DL, VT: ReturnVT);
3404 // Example:
3405 // v32i16 = concat(v8i16, v8i16, v8i16, v8i16)
3406 // -> v8i1 + v8i1 + v8i1 + v8i1
3407 // -> i8 + i8 + i8 + i8
3408 // -> reconstructed i32 bitmask
3409 for (size_t I = 0; I < Concat->ops().size(); ++I) {
3410 SDValue ConcatOperand = Concat.getOperand(i: I);
3411 assert(ConcatOperand.getValueType() == ConcatOperandVT &&
3412 "concat_vectors operands must have the same type");
3413
3414 SDValue SetCCVectorOperand =
3415 extractSubVector(Vec: SetCCVector, IdxVal: I * ConcatOperandNumElts, DAG, DL, VectorWidth: 128);
3416 if (!SetCCVectorOperand ||
3417 SetCCVectorOperand.getValueType() != ConcatOperandVT)
3418 return SDValue();
3419
3420 // Build the per-chunk mask using the correct chunk type:
3421 // v16i8 -> v16i1 -> i16
3422 // v8i16 -> v8i1 -> i8
3423 // v4i32 -> v4i1 -> i4
3424 // v2i64 -> v2i1 -> i2
3425 SDValue ConcatOperandMask = DAG.getSetCC(
3426 DL, VT: ConcatOperandMaskVT, LHS: ConcatOperand, RHS: SetCCVectorOperand, Cond: SetCond);
3427 SDValue ConcatOperandBitmask =
3428 DAG.getBitcast(VT: ConcatOperandBitmaskVT, V: ConcatOperandMask);
3429 SDValue ExtendedConcatOperandBitmask =
3430 DAG.getZExtOrTrunc(Op: ConcatOperandBitmask, DL, VT: ReturnVT);
3431
3432 // Shift the previously reconstructed bits to make room for this chunk.
3433 if (I != 0) {
3434 ReconstructedBitmask = DAG.getNode(
3435 Opcode: ISD::SHL, DL, VT: ReturnVT, N1: ReconstructedBitmask,
3436 N2: DAG.getShiftAmountConstant(Val: ConcatOperandNumElts, VT: ReturnVT, DL));
3437 }
3438
3439 // Merge disjoint partial bitmasks with OR.
3440 ReconstructedBitmask =
3441 DAG.getNode(Opcode: ISD::OR, DL, VT: ReturnVT, N1: ReconstructedBitmask,
3442 N2: ExtendedConcatOperandBitmask);
3443 }
3444
3445 return ReconstructedBitmask;
3446 }
3447
3448 return SDValue();
3449}
3450
3451static SDValue performBitmaskCombine(SDNode *N, SelectionDAG &DAG) {
3452 // bitmask (setcc <X>, 0, setlt) => bitmask X
3453 assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN);
3454 using namespace llvm::SDPatternMatch;
3455
3456 if (N->getConstantOperandVal(Num: 0) != Intrinsic::wasm_bitmask)
3457 return SDValue();
3458
3459 SDValue LHS;
3460 if (!sd_match(N: N->getOperand(Num: 1), P: m_c_SetCC(LHS: m_Value(N&: LHS), RHS: m_Zero(),
3461 CC: m_SpecificCondCode(CC: ISD::SETLT))))
3462 return SDValue();
3463
3464 SDLoc DL(N);
3465 return DAG.getNode(
3466 Opcode: ISD::INTRINSIC_WO_CHAIN, DL, VT: N->getValueType(ResNo: 0),
3467 Ops: {DAG.getConstant(Val: Intrinsic::wasm_bitmask, DL, VT: MVT::i32), LHS});
3468}
3469
3470static SDValue performAnyAllCombine(SDNode *N, SelectionDAG &DAG) {
3471 // any_true (setcc <X>, 0, eq) => (not (all_true X))
3472 // all_true (setcc <X>, 0, eq) => (not (any_true X))
3473 // any_true (setcc <X>, 0, ne) => (any_true X)
3474 // all_true (setcc <X>, 0, ne) => (all_true X)
3475 assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN);
3476 using namespace llvm::SDPatternMatch;
3477
3478 SDValue LHS;
3479 if (N->getNumOperands() < 2 ||
3480 !sd_match(N: N->getOperand(Num: 1),
3481 P: m_c_SetCC(LHS: m_Value(N&: LHS), RHS: m_Zero(), CC: m_CondCode())))
3482 return SDValue();
3483 EVT LT = LHS.getValueType();
3484 if (LT.getScalarSizeInBits() > 128 / LT.getVectorNumElements())
3485 return SDValue();
3486
3487 auto CombineSetCC = [&N, &DAG](Intrinsic::WASMIntrinsics InPre,
3488 ISD::CondCode SetType,
3489 Intrinsic::WASMIntrinsics InPost) {
3490 if (N->getConstantOperandVal(Num: 0) != InPre)
3491 return SDValue();
3492
3493 SDValue LHS;
3494 if (!sd_match(N: N->getOperand(Num: 1), P: m_c_SetCC(LHS: m_Value(N&: LHS), RHS: m_Zero(),
3495 CC: m_SpecificCondCode(CC: SetType))))
3496 return SDValue();
3497
3498 SDLoc DL(N);
3499 SDValue Ret = DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL, VT: MVT::i32,
3500 Ops: {DAG.getConstant(Val: InPost, DL, VT: MVT::i32), LHS});
3501 if (SetType == ISD::SETEQ)
3502 Ret = DAG.getNode(Opcode: ISD::XOR, DL, VT: MVT::i32, N1: Ret,
3503 N2: DAG.getConstant(Val: 1, DL, VT: MVT::i32));
3504 return DAG.getZExtOrTrunc(Op: Ret, DL, VT: N->getValueType(ResNo: 0));
3505 };
3506
3507 if (SDValue AnyTrueEQ = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETEQ,
3508 Intrinsic::wasm_alltrue))
3509 return AnyTrueEQ;
3510 if (SDValue AllTrueEQ = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETEQ,
3511 Intrinsic::wasm_anytrue))
3512 return AllTrueEQ;
3513 if (SDValue AnyTrueNE = CombineSetCC(Intrinsic::wasm_anytrue, ISD::SETNE,
3514 Intrinsic::wasm_anytrue))
3515 return AnyTrueNE;
3516 if (SDValue AllTrueNE = CombineSetCC(Intrinsic::wasm_alltrue, ISD::SETNE,
3517 Intrinsic::wasm_alltrue))
3518 return AllTrueNE;
3519
3520 return SDValue();
3521}
3522
3523struct MaskReduceInfo {
3524 Intrinsic::ID IID;
3525 unsigned WideCombineOpcode;
3526 bool Invert;
3527};
3528
3529static SDValue combineSmallMaskReduction(SDNode *N, EVT FromVT,
3530 unsigned NumElts,
3531 const MaskReduceInfo &Info,
3532 SelectionDAG &DAG) {
3533 EVT VecVT = FromVT.changeVectorElementType(Context&: *DAG.getContext(),
3534 EltVT: MVT::getIntegerVT(BitWidth: 128 / NumElts));
3535 assert(VecVT.getSizeInBits() == 128 &&
3536 "mask reduction should be widened to a 128-bit vector");
3537
3538 SDLoc DL(N);
3539 SDValue Mask = N->getOperand(Num: 0)->getOperand(Num: 0);
3540 SDValue Ret = DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL, VT: MVT::i32,
3541 Ops: {DAG.getConstant(Val: Info.IID, DL, VT: MVT::i32),
3542 DAG.getSExtOrTrunc(Op: Mask, DL, VT: VecVT)});
3543 if (Info.Invert)
3544 Ret = DAG.getNode(Opcode: ISD::XOR, DL, VT: MVT::i32, N1: Ret,
3545 N2: DAG.getConstant(Val: 1, DL, VT: MVT::i32));
3546 return DAG.getZExtOrTrunc(Op: Ret, DL, VT: N->getValueType(ResNo: 0));
3547}
3548
3549static SDValue combineWideMaskReduction(SDNode *N, SDValue Mask, EVT MaskVT,
3550 unsigned NumElts,
3551 const MaskReduceInfo &Info,
3552 SelectionDAG &DAG) {
3553 assert((NumElts == 32 || NumElts == 64) &&
3554 "combineWideMaskReduction is only for wide masks");
3555 assert(MaskVT.isFixedLengthVector() &&
3556 MaskVT.getVectorElementType() == MVT::i1);
3557 SDLoc DL(N);
3558 unsigned ChunkElts = 16;
3559 EVT ChunkMaskVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: MVT::i1,
3560 EC: ElementCount::getFixed(MinVal: ChunkElts));
3561 EVT LegalVecVT = ChunkMaskVT.changeVectorElementType(
3562 Context&: *DAG.getContext(), EltVT: MVT::getIntegerVT(BitWidth: 128 / ChunkElts));
3563
3564 SmallVector<SDValue, 4> ChunkResults;
3565 // Split the wide mask into v16i1 chunks and reduce each chunk separately.
3566 // For example:
3567 // v32i1: [0..15] [16..31]
3568 // | |
3569 // v v
3570 // chunk0 chunk1
3571 //
3572 // v64i1: [0..15] [16..31] [32..47] [48..63]
3573 // | | | |
3574 // v v v v
3575 // chunk0 chunk1 chunk2 chunk3
3576 //
3577 // each chunk:
3578 // v16i1 -> v16i8 -> wasm_anytrue/alltrue -> i32 0/1
3579 for (unsigned I = 0; I < NumElts; I += ChunkElts) {
3580 SDValue ChunkMask = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: ChunkMaskVT,
3581 N1: Mask, N2: DAG.getVectorIdxConstant(Val: I, DL));
3582 SDValue LegalMask = DAG.getSExtOrTrunc(Op: ChunkMask, DL, VT: LegalVecVT);
3583 SDValue Reduced =
3584 DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL, VT: MVT::i32,
3585 N1: DAG.getConstant(Val: Info.IID, DL, VT: MVT::i32), N2: LegalMask);
3586 ChunkResults.push_back(Elt: Reduced);
3587 }
3588
3589 SDValue Acc = ChunkResults[0];
3590 for (unsigned I = 1; I < ChunkResults.size(); ++I)
3591 Acc =
3592 DAG.getNode(Opcode: Info.WideCombineOpcode, DL, VT: MVT::i32, N1: Acc, N2: ChunkResults[I]);
3593
3594 if (Info.Invert)
3595 Acc = DAG.getNode(Opcode: ISD::XOR, DL, VT: MVT::i32, N1: Acc,
3596 N2: DAG.getConstant(Val: 1, DL, VT: MVT::i32));
3597
3598 return DAG.getZExtOrTrunc(Op: Acc, DL, VT: N->getValueType(ResNo: 0));
3599}
3600
3601static std::optional<MaskReduceInfo> classifyMaskReduction(SDNode *N) {
3602 auto *C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: 1));
3603 if (!C)
3604 return std::nullopt;
3605
3606 ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: 2))->get();
3607
3608 // setcc (bitcast mask), 0, ne -> any_true(mask)
3609 if (C->isZero() && CC == ISD::SETNE)
3610 return MaskReduceInfo{.IID: Intrinsic::wasm_anytrue, .WideCombineOpcode: ISD::OR, .Invert: false};
3611
3612 // setcc (bitcast mask), 0, eq -> !any_true(mask)
3613 if (C->isZero() && CC == ISD::SETEQ)
3614 return MaskReduceInfo{.IID: Intrinsic::wasm_anytrue, .WideCombineOpcode: ISD::OR, .Invert: true};
3615
3616 // setcc (bitcast mask), -1, eq -> all_true(mask)
3617 if (C->isAllOnes() && CC == ISD::SETEQ)
3618 return MaskReduceInfo{.IID: Intrinsic::wasm_alltrue, .WideCombineOpcode: ISD::AND, .Invert: false};
3619
3620 // setcc (bitcast mask), -1, ne -> !all_true(mask)
3621 if (C->isAllOnes() && CC == ISD::SETNE)
3622 return MaskReduceInfo{.IID: Intrinsic::wasm_alltrue, .WideCombineOpcode: ISD::AND, .Invert: true};
3623
3624 return std::nullopt;
3625}
3626
3627/// Try to convert a i128 comparison to a v16i8 comparison before type
3628/// legalization splits it up into chunks
3629static SDValue
3630combineVectorSizedSetCCEquality(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
3631 const WebAssemblySubtarget *Subtarget) {
3632
3633 SDLoc DL(N);
3634 SDValue X = N->getOperand(Num: 0);
3635 SDValue Y = N->getOperand(Num: 1);
3636 EVT VT = N->getValueType(ResNo: 0);
3637 EVT OpVT = X.getValueType();
3638
3639 SelectionDAG &DAG = DCI.DAG;
3640 if (DCI.DAG.getMachineFunction().getFunction().hasFnAttribute(
3641 Kind: Attribute::NoImplicitFloat))
3642 return SDValue();
3643
3644 ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: 2))->get();
3645 // We're looking for an oversized integer equality comparison with SIMD
3646 if (!OpVT.isScalarInteger() || !OpVT.isByteSized() || OpVT != MVT::i128 ||
3647 !Subtarget->hasSIMD128() || !isIntEqualitySetCC(Code: CC))
3648 return SDValue();
3649
3650 // Don't perform this combine if constructing the vector will be expensive.
3651 auto IsVectorBitCastCheap = [](SDValue X) {
3652 X = peekThroughBitcasts(V: X);
3653 return isa<ConstantSDNode>(Val: X) || X.getOpcode() == ISD::LOAD;
3654 };
3655
3656 if (!IsVectorBitCastCheap(X) || !IsVectorBitCastCheap(Y))
3657 return SDValue();
3658
3659 SDValue VecX = DAG.getBitcast(VT: MVT::v16i8, V: X);
3660 SDValue VecY = DAG.getBitcast(VT: MVT::v16i8, V: Y);
3661 SDValue Cmp = DAG.getSetCC(DL, VT: MVT::v16i8, LHS: VecX, RHS: VecY, Cond: CC);
3662
3663 SDValue Intr =
3664 DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL, VT: MVT::i32,
3665 Ops: {DAG.getConstant(Val: CC == ISD::SETEQ ? Intrinsic::wasm_alltrue
3666 : Intrinsic::wasm_anytrue,
3667 DL, VT: MVT::i32),
3668 Cmp});
3669
3670 return DAG.getSetCC(DL, VT, LHS: Intr, RHS: DAG.getConstant(Val: 0, DL, VT: MVT::i32),
3671 Cond: ISD::SETNE);
3672}
3673
3674static SDValue performSETCCCombine(SDNode *N,
3675 TargetLowering::DAGCombinerInfo &DCI,
3676 const WebAssemblySubtarget *Subtarget) {
3677 if (!DCI.isBeforeLegalize())
3678 return SDValue();
3679
3680 EVT VT = N->getValueType(ResNo: 0);
3681 if (!VT.isScalarInteger())
3682 return SDValue();
3683
3684 if (SDValue V = combineVectorSizedSetCCEquality(N, DCI, Subtarget))
3685 return V;
3686
3687 SDValue LHS = N->getOperand(Num: 0);
3688 if (LHS->getOpcode() != ISD::BITCAST)
3689 return SDValue();
3690
3691 EVT FromVT = LHS->getOperand(Num: 0).getValueType();
3692 if (!FromVT.isFixedLengthVectorOf(EltVT: MVT::i1))
3693 return SDValue();
3694
3695 unsigned NumElts = FromVT.getVectorNumElements();
3696 auto Info = classifyMaskReduction(N);
3697 if (!Info)
3698 return SDValue();
3699
3700 auto &DAG = DCI.DAG;
3701 if (NumElts == 2 || NumElts == 4 || NumElts == 8 || NumElts == 16)
3702 return combineSmallMaskReduction(N, FromVT, NumElts, Info: *Info, DAG);
3703
3704 if (NumElts == 32 || NumElts == 64)
3705 return combineWideMaskReduction(N, Mask: LHS.getOperand(i: 0), MaskVT: FromVT, NumElts,
3706 Info: *Info, DAG);
3707
3708 return SDValue();
3709}
3710
3711static SDValue TryWideExtMulCombine(SDNode *N, SelectionDAG &DAG) {
3712 EVT VT = N->getValueType(ResNo: 0);
3713 if (VT != MVT::v8i32 && VT != MVT::v16i32)
3714 return SDValue();
3715
3716 // Mul with extending inputs.
3717 SDValue LHS = N->getOperand(Num: 0);
3718 SDValue RHS = N->getOperand(Num: 1);
3719 if (LHS.getOpcode() != RHS.getOpcode())
3720 return SDValue();
3721
3722 if (LHS.getOpcode() != ISD::SIGN_EXTEND &&
3723 LHS.getOpcode() != ISD::ZERO_EXTEND)
3724 return SDValue();
3725
3726 if (LHS->getOperand(Num: 0).getValueType() != RHS->getOperand(Num: 0).getValueType())
3727 return SDValue();
3728
3729 EVT FromVT = LHS->getOperand(Num: 0).getValueType();
3730 EVT EltTy = FromVT.getVectorElementType();
3731 if (EltTy != MVT::i8)
3732 return SDValue();
3733
3734 // For an input DAG that looks like this
3735 // %a = input_type
3736 // %b = input_type
3737 // %lhs = extend %a to output_type
3738 // %rhs = extend %b to output_type
3739 // %mul = mul %lhs, %rhs
3740
3741 // input_type | output_type | instructions
3742 // v16i8 | v16i32 | %low = i16x8.extmul_low_i8x16_ %a, %b
3743 // | | %high = i16x8.extmul_high_i8x16_, %a, %b
3744 // | | %low_low = i32x4.ext_low_i16x8_ %low
3745 // | | %low_high = i32x4.ext_high_i16x8_ %low
3746 // | | %high_low = i32x4.ext_low_i16x8_ %high
3747 // | | %high_high = i32x4.ext_high_i16x8_ %high
3748 // | | %res = concat_vector(...)
3749 // v8i8 | v8i32 | %low = i16x8.extmul_low_i8x16_ %a, %b
3750 // | | %low_low = i32x4.ext_low_i16x8_ %low
3751 // | | %low_high = i32x4.ext_high_i16x8_ %low
3752 // | | %res = concat_vector(%low_low, %low_high)
3753
3754 SDLoc DL(N);
3755 unsigned NumElts = VT.getVectorNumElements();
3756 SDValue ExtendInLHS = LHS->getOperand(Num: 0);
3757 SDValue ExtendInRHS = RHS->getOperand(Num: 0);
3758 bool IsSigned = LHS->getOpcode() == ISD::SIGN_EXTEND;
3759 unsigned ExtendLowOpc =
3760 IsSigned ? WebAssemblyISD::EXTEND_LOW_S : WebAssemblyISD::EXTEND_LOW_U;
3761 unsigned ExtendHighOpc =
3762 IsSigned ? WebAssemblyISD::EXTEND_HIGH_S : WebAssemblyISD::EXTEND_HIGH_U;
3763
3764 auto GetExtendLow = [&DAG, &DL, &ExtendLowOpc](EVT VT, SDValue Op) {
3765 return DAG.getNode(Opcode: ExtendLowOpc, DL, VT, Operand: Op);
3766 };
3767 auto GetExtendHigh = [&DAG, &DL, &ExtendHighOpc](EVT VT, SDValue Op) {
3768 return DAG.getNode(Opcode: ExtendHighOpc, DL, VT, Operand: Op);
3769 };
3770
3771 if (NumElts == 16) {
3772 SDValue LowLHS = GetExtendLow(MVT::v8i16, ExtendInLHS);
3773 SDValue LowRHS = GetExtendLow(MVT::v8i16, ExtendInRHS);
3774 SDValue MulLow = DAG.getNode(Opcode: ISD::MUL, DL, VT: MVT::v8i16, N1: LowLHS, N2: LowRHS);
3775 SDValue HighLHS = GetExtendHigh(MVT::v8i16, ExtendInLHS);
3776 SDValue HighRHS = GetExtendHigh(MVT::v8i16, ExtendInRHS);
3777 SDValue MulHigh = DAG.getNode(Opcode: ISD::MUL, DL, VT: MVT::v8i16, N1: HighLHS, N2: HighRHS);
3778 SDValue SubVectors[] = {
3779 GetExtendLow(MVT::v4i32, MulLow),
3780 GetExtendHigh(MVT::v4i32, MulLow),
3781 GetExtendLow(MVT::v4i32, MulHigh),
3782 GetExtendHigh(MVT::v4i32, MulHigh),
3783 };
3784 return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT, Ops: SubVectors);
3785 } else {
3786 assert(NumElts == 8);
3787 SDValue LowLHS = DAG.getNode(Opcode: LHS->getOpcode(), DL, VT: MVT::v8i16, Operand: ExtendInLHS);
3788 SDValue LowRHS = DAG.getNode(Opcode: RHS->getOpcode(), DL, VT: MVT::v8i16, Operand: ExtendInRHS);
3789 SDValue MulLow = DAG.getNode(Opcode: ISD::MUL, DL, VT: MVT::v8i16, N1: LowLHS, N2: LowRHS);
3790 SDValue Lo = GetExtendLow(MVT::v4i32, MulLow);
3791 SDValue Hi = GetExtendHigh(MVT::v4i32, MulLow);
3792 return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT, N1: Lo, N2: Hi);
3793 }
3794 return SDValue();
3795}
3796
3797static SDValue performMulCombine(SDNode *N,
3798 TargetLowering::DAGCombinerInfo &DCI) {
3799 assert(N->getOpcode() == ISD::MUL);
3800 EVT VT = N->getValueType(ResNo: 0);
3801 if (!VT.isVector())
3802 return SDValue();
3803
3804 if (auto Res = TryWideExtMulCombine(N, DAG&: DCI.DAG))
3805 return Res;
3806
3807 // We don't natively support v16i8 or v8i8 mul, but we do support v8i16. So,
3808 // extend them to v8i16.
3809 if (VT != MVT::v8i8 && VT != MVT::v16i8)
3810 return SDValue();
3811
3812 SDLoc DL(N);
3813 SelectionDAG &DAG = DCI.DAG;
3814 SDValue LHS = N->getOperand(Num: 0);
3815 SDValue RHS = N->getOperand(Num: 1);
3816 EVT MulVT = MVT::v8i16;
3817
3818 if (VT == MVT::v8i8) {
3819 SDValue PromotedLHS = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: MVT::v16i8, N1: LHS,
3820 N2: DAG.getUNDEF(VT: MVT::v8i8));
3821 SDValue PromotedRHS = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: MVT::v16i8, N1: RHS,
3822 N2: DAG.getUNDEF(VT: MVT::v8i8));
3823 SDValue LowLHS =
3824 DAG.getNode(Opcode: WebAssemblyISD::EXTEND_LOW_U, DL, VT: MulVT, Operand: PromotedLHS);
3825 SDValue LowRHS =
3826 DAG.getNode(Opcode: WebAssemblyISD::EXTEND_LOW_U, DL, VT: MulVT, Operand: PromotedRHS);
3827 SDValue MulLow = DAG.getBitcast(
3828 VT: MVT::v16i8, V: DAG.getNode(Opcode: ISD::MUL, DL, VT: MulVT, N1: LowLHS, N2: LowRHS));
3829 // Take the low byte of each lane.
3830 SDValue Shuffle = DAG.getVectorShuffle(
3831 VT: MVT::v16i8, dl: DL, N1: MulLow, N2: DAG.getUNDEF(VT: MVT::v16i8),
3832 Mask: {0, 2, 4, 6, 8, 10, 12, 14, -1, -1, -1, -1, -1, -1, -1, -1});
3833 return extractSubVector(Vec: Shuffle, IdxVal: 0, DAG, DL, VectorWidth: 64);
3834 } else {
3835 assert(VT == MVT::v16i8 && "Expected v16i8");
3836 SDValue LowLHS = DAG.getNode(Opcode: WebAssemblyISD::EXTEND_LOW_U, DL, VT: MulVT, Operand: LHS);
3837 SDValue LowRHS = DAG.getNode(Opcode: WebAssemblyISD::EXTEND_LOW_U, DL, VT: MulVT, Operand: RHS);
3838 SDValue HighLHS =
3839 DAG.getNode(Opcode: WebAssemblyISD::EXTEND_HIGH_U, DL, VT: MulVT, Operand: LHS);
3840 SDValue HighRHS =
3841 DAG.getNode(Opcode: WebAssemblyISD::EXTEND_HIGH_U, DL, VT: MulVT, Operand: RHS);
3842
3843 SDValue MulLow =
3844 DAG.getBitcast(VT, V: DAG.getNode(Opcode: ISD::MUL, DL, VT: MulVT, N1: LowLHS, N2: LowRHS));
3845 SDValue MulHigh =
3846 DAG.getBitcast(VT, V: DAG.getNode(Opcode: ISD::MUL, DL, VT: MulVT, N1: HighLHS, N2: HighRHS));
3847
3848 // Take the low byte of each lane.
3849 return DAG.getVectorShuffle(
3850 VT, dl: DL, N1: MulLow, N2: MulHigh,
3851 Mask: {0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30});
3852 }
3853}
3854
3855SDValue DoubleVectorWidth(SDValue In, unsigned RequiredNumElems,
3856 SelectionDAG &DAG) {
3857 SDLoc DL(In);
3858 LLVMContext &Ctx = *DAG.getContext();
3859 EVT InVT = In.getValueType();
3860 unsigned NumElems = InVT.getVectorNumElements() * 2;
3861 EVT OutVT = EVT::getVectorVT(Context&: Ctx, VT: InVT.getVectorElementType(), NumElements: NumElems);
3862 SDValue Concat =
3863 DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: OutVT, N1: In, N2: DAG.getPOISON(VT: InVT));
3864 if (NumElems < RequiredNumElems) {
3865 return DoubleVectorWidth(In: Concat, RequiredNumElems, DAG);
3866 }
3867 return Concat;
3868}
3869
3870SDValue performConvertFPCombine(SDNode *N, SelectionDAG &DAG) {
3871 EVT OutVT = N->getValueType(ResNo: 0);
3872 if (!OutVT.isVector())
3873 return SDValue();
3874
3875 EVT OutElTy = OutVT.getVectorElementType();
3876 if (OutElTy != MVT::i8 && OutElTy != MVT::i16)
3877 return SDValue();
3878
3879 unsigned NumElems = OutVT.getVectorNumElements();
3880 if (!isPowerOf2_32(Value: NumElems))
3881 return SDValue();
3882
3883 EVT FPVT = N->getOperand(Num: 0)->getValueType(ResNo: 0);
3884 if (FPVT.getVectorElementType() != MVT::f32)
3885 return SDValue();
3886
3887 SDLoc DL(N);
3888
3889 // First, convert to i32.
3890 LLVMContext &Ctx = *DAG.getContext();
3891 EVT IntVT = EVT::getVectorVT(Context&: Ctx, VT: MVT::i32, NumElements: NumElems);
3892 SDValue ToInt = DAG.getNode(Opcode: N->getOpcode(), DL, VT: IntVT, Operand: N->getOperand(Num: 0));
3893 APInt Mask = APInt::getLowBitsSet(numBits: IntVT.getScalarSizeInBits(),
3894 loBitsSet: OutVT.getScalarSizeInBits());
3895 // Mask out the top MSBs.
3896 SDValue Masked =
3897 DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: ToInt, N2: DAG.getConstant(Val: Mask, DL, VT: IntVT));
3898
3899 if (OutVT.getSizeInBits() < 128) {
3900 // Create a wide enough vector that we can use narrow.
3901 EVT NarrowedVT = OutElTy == MVT::i8 ? MVT::v16i8 : MVT::v8i16;
3902 unsigned NumRequiredElems = NarrowedVT.getVectorNumElements();
3903 SDValue WideVector = DoubleVectorWidth(In: Masked, RequiredNumElems: NumRequiredElems, DAG);
3904 SDValue Trunc = truncateVectorWithNARROW(DstVT: NarrowedVT, In: WideVector, DL, DAG);
3905 return DAG.getBitcast(
3906 VT: OutVT, V: extractSubVector(Vec: Trunc, IdxVal: 0, DAG, DL, VectorWidth: OutVT.getSizeInBits()));
3907 } else {
3908 return truncateVectorWithNARROW(DstVT: OutVT, In: Masked, DL, DAG);
3909 }
3910 return SDValue();
3911}
3912
3913// Wide vector shift operations such as v8i32 with sign-extended
3914// operands cause Type Legalizer crashes because the target-specific
3915// extension nodes cannot be directly mapped to the 256-bit size.
3916//
3917// To resolve the crash and optimize performance, we intercept the
3918// illegal v8i32 shift in DAGCombine. We convert the shift amounts
3919// into multipliers and manually split the vector into two v4i32 halves.
3920//
3921// Before: t1: v8i32 = shl (sign_extend v8i16), const_vec
3922// After : t2: v4i32 = mul (ext_low_s v8i16), (ext_low_s narrow_vec)
3923// t3: v4i32 = mul (ext_high_s v8i16), (ext_high_s narrow_vec)
3924// t4: v8i32 = concat_vectors t2, t3
3925static SDValue performShiftCombine(SDNode *N,
3926 TargetLowering::DAGCombinerInfo &DCI) {
3927 SelectionDAG &DAG = DCI.DAG;
3928 assert(N->getOpcode() == ISD::SHL);
3929 EVT VT = N->getValueType(ResNo: 0);
3930 if (VT != MVT::v8i32)
3931 return SDValue();
3932
3933 SDValue LHS = N->getOperand(Num: 0);
3934 SDValue RHS = N->getOperand(Num: 1);
3935 unsigned ExtOpc = LHS.getOpcode();
3936 if (ExtOpc != ISD::SIGN_EXTEND && ExtOpc != ISD::ZERO_EXTEND)
3937 return SDValue();
3938
3939 if (RHS.getOpcode() != ISD::BUILD_VECTOR)
3940 return SDValue();
3941
3942 SDLoc DL(N);
3943 SDValue ExtendIn = LHS.getOperand(i: 0);
3944 EVT FromVT = ExtendIn.getValueType();
3945 if (FromVT != MVT::v8i16)
3946 return SDValue();
3947
3948 unsigned NumElts = VT.getVectorNumElements();
3949 unsigned BitWidth = FromVT.getScalarSizeInBits();
3950 bool IsSigned = (ExtOpc == ISD::SIGN_EXTEND);
3951 unsigned MaxValidShift = IsSigned ? (BitWidth - 1) : BitWidth;
3952 SmallVector<SDValue, 16> MulConsts;
3953 for (unsigned I = 0; I < NumElts; ++I) {
3954 auto *C = dyn_cast<ConstantSDNode>(Val: RHS.getOperand(i: I));
3955 if (!C)
3956 return SDValue();
3957
3958 const APInt &ShiftAmt = C->getAPIntValue();
3959 if (ShiftAmt.uge(RHS: MaxValidShift))
3960 return SDValue();
3961
3962 APInt MulAmt = APInt::getOneBitSet(numBits: BitWidth, BitNo: ShiftAmt.getZExtValue());
3963 MulConsts.push_back(Elt: DAG.getConstant(Val: MulAmt, DL, VT: FromVT.getScalarType(),
3964 /*isTarget=*/false, /*isOpaque=*/true));
3965 }
3966
3967 SDValue NarrowConst = DAG.getBuildVector(VT: FromVT, DL, Ops: MulConsts);
3968 unsigned ExtLowOpc =
3969 IsSigned ? WebAssemblyISD::EXTEND_LOW_S : WebAssemblyISD::EXTEND_LOW_U;
3970 unsigned ExtHighOpc =
3971 IsSigned ? WebAssemblyISD::EXTEND_HIGH_S : WebAssemblyISD::EXTEND_HIGH_U;
3972
3973 EVT HalfVT = MVT::v4i32;
3974 SDValue LHSLo = DAG.getNode(Opcode: ExtLowOpc, DL, VT: HalfVT, Operand: ExtendIn);
3975 SDValue LHSHi = DAG.getNode(Opcode: ExtHighOpc, DL, VT: HalfVT, Operand: ExtendIn);
3976 SDValue RHSLo = DAG.getNode(Opcode: ExtLowOpc, DL, VT: HalfVT, Operand: NarrowConst);
3977 SDValue RHSHi = DAG.getNode(Opcode: ExtHighOpc, DL, VT: HalfVT, Operand: NarrowConst);
3978 SDValue MulLo = DAG.getNode(Opcode: ISD::MUL, DL, VT: HalfVT, N1: LHSLo, N2: RHSLo);
3979 SDValue MulHi = DAG.getNode(Opcode: ISD::MUL, DL, VT: HalfVT, N1: LHSHi, N2: RHSHi);
3980 return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT, N1: MulLo, N2: MulHi);
3981}
3982
3983SDValue
3984WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
3985 DAGCombinerInfo &DCI) const {
3986 switch (N->getOpcode()) {
3987 default:
3988 return SDValue();
3989 case ISD::BITCAST:
3990 return performBitcastCombine(N, DCI);
3991 case ISD::SETCC:
3992 return performSETCCCombine(N, DCI, Subtarget);
3993 case ISD::VECTOR_SHUFFLE:
3994 return performVECTOR_SHUFFLECombine(N, DCI);
3995 case ISD::SIGN_EXTEND:
3996 case ISD::ZERO_EXTEND:
3997 return performVectorExtendCombine(N, DCI);
3998 case ISD::UINT_TO_FP:
3999 if (auto ExtCombine = performVectorExtendToFPCombine(N, DCI))
4000 return ExtCombine;
4001 return performVectorNonNegToFPCombine(N, DCI);
4002 case ISD::SINT_TO_FP:
4003 return performVectorExtendToFPCombine(N, DCI);
4004 case ISD::FP_TO_SINT_SAT:
4005 case ISD::FP_TO_UINT_SAT:
4006 case ISD::FP_ROUND:
4007 case ISD::CONCAT_VECTORS:
4008 return performVectorTruncZeroCombine(N, DCI);
4009 case ISD::FP_TO_SINT:
4010 case ISD::FP_TO_UINT:
4011 return performConvertFPCombine(N, DAG&: DCI.DAG);
4012 case ISD::TRUNCATE:
4013 return performTruncateCombine(N, DCI);
4014 case ISD::INTRINSIC_WO_CHAIN: {
4015 if (SDValue V = performBitmaskCombine(N, DAG&: DCI.DAG))
4016 return V;
4017 return performAnyAllCombine(N, DAG&: DCI.DAG);
4018 }
4019 case ISD::MUL:
4020 return performMulCombine(N, DCI);
4021 case ISD::SHL:
4022 return performShiftCombine(N, DCI);
4023 }
4024}
4025