WebAssemblyISelLowering.cpp source code [llvm_projects/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp]

1	//=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	///
9	/// \file
10	/// This file implements the WebAssemblyTargetLowering class.
11	///
12	//===----------------------------------------------------------------------===//
13
14	#include "WebAssemblyISelLowering.h"
15	#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
16	#include "Utils/WebAssemblyTypeUtilities.h"
17	#include "WebAssemblyMachineFunctionInfo.h"
18	#include "WebAssemblySubtarget.h"
19	#include "WebAssemblyTargetMachine.h"
20	#include "WebAssemblyUtilities.h"
21	#include "llvm/CodeGen/CallingConvLower.h"
22	#include "llvm/CodeGen/MachineFrameInfo.h"
23	#include "llvm/CodeGen/MachineInstrBuilder.h"
24	#include "llvm/CodeGen/MachineJumpTableInfo.h"
25	#include "llvm/CodeGen/MachineModuleInfo.h"
26	#include "llvm/CodeGen/MachineRegisterInfo.h"
27	#include "llvm/CodeGen/SDPatternMatch.h"
28	#include "llvm/CodeGen/SelectionDAG.h"
29	#include "llvm/CodeGen/SelectionDAGNodes.h"
30	#include "llvm/IR/DiagnosticInfo.h"
31	#include "llvm/IR/DiagnosticPrinter.h"
32	#include "llvm/IR/Function.h"
33	#include "llvm/IR/IntrinsicInst.h"
34	#include "llvm/IR/Intrinsics.h"
35	#include "llvm/IR/IntrinsicsWebAssembly.h"
36	#include "llvm/Support/ErrorHandling.h"
37	#include "llvm/Support/KnownBits.h"
38	#include "llvm/Support/MathExtras.h"
39	#include "llvm/Target/TargetOptions.h"
40	using namespace llvm;
41
42	#define DEBUG_TYPE "wasm-lower"
43
44	WebAssemblyTargetLowering::WebAssemblyTargetLowering(
45	const TargetMachine &TM, const WebAssemblySubtarget &STI)
46	: TargetLowering (TM), Subtarget(&STI) {
47	auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32;
48
49	// Booleans always contain 0 or 1.
50	setBooleanContents(ZeroOrOneBooleanContent);
51	// Except in SIMD vectors
52	setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
53	// We don't know the microarchitecture here, so just reduce register pressure.
54	setSchedulingPreference(Sched::RegPressure);
55	// Tell ISel that we have a stack pointer.
56	setStackPointerRegisterToSaveRestore(
57	Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32);
58	// Set up the register classes.
59	addRegisterClass(VT: MVT::i32, RC: &WebAssembly::I32RegClass);
60	addRegisterClass(VT: MVT::i64, RC: &WebAssembly::I64RegClass);
61	addRegisterClass(VT: MVT::f32, RC: &WebAssembly::F32RegClass);
62	addRegisterClass(VT: MVT::f64, RC: &WebAssembly::F64RegClass);
63	if (Subtarget->hasSIMD128()) {
64	addRegisterClass(VT: MVT::v16i8, RC: &WebAssembly::V128RegClass);
65	addRegisterClass(VT: MVT::v8i16, RC: &WebAssembly::V128RegClass);
66	addRegisterClass(VT: MVT::v4i32, RC: &WebAssembly::V128RegClass);
67	addRegisterClass(VT: MVT::v4f32, RC: &WebAssembly::V128RegClass);
68	addRegisterClass(VT: MVT::v2i64, RC: &WebAssembly::V128RegClass);
69	addRegisterClass(VT: MVT::v2f64, RC: &WebAssembly::V128RegClass);
70	}
71	if (Subtarget->hasFP16()) {
72	addRegisterClass(VT: MVT::v8f16, RC: &WebAssembly::V128RegClass);
73	}
74	if (Subtarget->hasReferenceTypes()) {
75	addRegisterClass(VT: MVT::externref, RC: &WebAssembly::EXTERNREFRegClass);
76	addRegisterClass(VT: MVT::funcref, RC: &WebAssembly::FUNCREFRegClass);
77	if (Subtarget->hasExceptionHandling()) {
78	addRegisterClass(VT: MVT::exnref, RC: &WebAssembly::EXNREFRegClass);
79	}
80	}
81	// Compute derived properties from the register classes.
82	computeRegisterProperties(TRI: Subtarget->getRegisterInfo());
83
84	// Transform loads and stores to pointers in address space 1 to loads and
85	// stores to WebAssembly global variables, outside linear memory.
86	for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) {
87	setOperationAction(Op: ISD::LOAD, VT: T, Action: Custom);
88	setOperationAction(Op: ISD::STORE, VT: T, Action: Custom);
89	}
90	if (Subtarget->hasSIMD128()) {
91	for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
92	MVT::v2f64}) {
93	setOperationAction(Op: ISD::LOAD, VT: T, Action: Custom);
94	setOperationAction(Op: ISD::STORE, VT: T, Action: Custom);
95	}
96	}
97	if (Subtarget->hasFP16()) {
98	setOperationAction(Op: ISD::LOAD, VT: MVT::v8f16, Action: Custom);
99	setOperationAction(Op: ISD::STORE, VT: MVT::v8f16, Action: Custom);
100	}
101	if (Subtarget->hasReferenceTypes()) {
102	// We need custom load and store lowering for both externref, funcref and
103	// Other. The MVT::Other here represents tables of reference types.
104	for (auto T : {MVT::externref, MVT::funcref, MVT::Other}) {
105	setOperationAction(Op: ISD::LOAD, VT: T, Action: Custom);
106	setOperationAction(Op: ISD::STORE, VT: T, Action: Custom);
107	}
108	}
109
110	setOperationAction(Op: ISD::GlobalAddress, VT: MVTPtr, Action: Custom);
111	setOperationAction(Op: ISD::GlobalTLSAddress, VT: MVTPtr, Action: Custom);
112	setOperationAction(Op: ISD::ExternalSymbol, VT: MVTPtr, Action: Custom);
113	setOperationAction(Op: ISD::JumpTable, VT: MVTPtr, Action: Custom);
114	setOperationAction(Op: ISD::BlockAddress, VT: MVTPtr, Action: Custom);
115	setOperationAction(Op: ISD::BRIND, VT: MVT::Other, Action: Custom);
116	setOperationAction(Op: ISD::CLEAR_CACHE, VT: MVT::Other, Action: Custom);
117
118	// Take the default expansion for va_arg, va_copy, and va_end. There is no
119	// default action for va_start, so we do that custom.
120	setOperationAction(Op: ISD::VASTART, VT: MVT::Other, Action: Custom);
121	setOperationAction(Op: ISD::VAARG, VT: MVT::Other, Action: Expand);
122	setOperationAction(Op: ISD::VACOPY, VT: MVT::Other, Action: Expand);
123	setOperationAction(Op: ISD::VAEND, VT: MVT::Other, Action: Expand);
124
125	for (auto T : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64, MVT::v8f16}) {
126	if (!Subtarget->hasFP16() && T == MVT::v8f16) {
127	continue;
128	}
129	// Don't expand the floating-point types to constant pools.
130	setOperationAction(Op: ISD::ConstantFP, VT: T, Action: Legal);
131	// Expand floating-point comparisons.
132	for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE,
133	ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE})
134	setCondCodeAction(CCs: CC, VT: T, Action: Expand);
135	// Expand floating-point library function operators.
136	for (auto Op :
137	{ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FMA})
138	setOperationAction(Op, VT: T, Action: Expand);
139	// Note supported floating-point library function operators that otherwise
140	// default to expand.
141	for (auto Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT,
142	ISD::FRINT, ISD::FROUNDEVEN})
143	setOperationAction(Op, VT: T, Action: Legal);
144	// Support minimum and maximum, which otherwise default to expand.
145	setOperationAction(Op: ISD::FMINIMUM, VT: T, Action: Legal);
146	setOperationAction(Op: ISD::FMAXIMUM, VT: T, Action: Legal);
147	// When experimental v8f16 support is enabled these instructions don't need
148	// to be expanded.
149	if (T != MVT::v8f16) {
150	setOperationAction(Op: ISD::FP16_TO_FP, VT: T, Action: Expand);
151	setOperationAction(Op: ISD::FP_TO_FP16, VT: T, Action: Expand);
152	}
153	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: T, MemVT: MVT::f16, Action: Expand);
154	setTruncStoreAction(ValVT: T, MemVT: MVT::f16, Action: Expand);
155	}
156
157	// Expand unavailable integer operations.
158	for (auto Op :
159	{ISD::BSWAP, ISD::SMUL_LOHI, ISD::UMUL_LOHI, ISD::MULHS, ISD::MULHU,
160	ISD::SDIVREM, ISD::UDIVREM, ISD::SHL_PARTS, ISD::SRA_PARTS,
161	ISD::SRL_PARTS, ISD::ADDC, ISD::ADDE, ISD::SUBC, ISD::SUBE}) {
162	for (auto T : {MVT::i32, MVT::i64})
163	setOperationAction(Op, VT: T, Action: Expand);
164	if (Subtarget->hasSIMD128())
165	for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
166	setOperationAction(Op, VT: T, Action: Expand);
167	}
168
169	if (Subtarget->hasWideArithmetic()) {
170	setOperationAction(Op: ISD::ADD, VT: MVT::i128, Action: Custom);
171	setOperationAction(Op: ISD::SUB, VT: MVT::i128, Action: Custom);
172	setOperationAction(Op: ISD::SMUL_LOHI, VT: MVT::i64, Action: Custom);
173	setOperationAction(Op: ISD::UMUL_LOHI, VT: MVT::i64, Action: Custom);
174	setOperationAction(Op: ISD::UADDO, VT: MVT::i64, Action: Custom);
175	}
176
177	if (Subtarget->hasNontrappingFPToInt())
178	for (auto Op : {ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT})
179	for (auto T : {MVT::i32, MVT::i64})
180	setOperationAction(Op, VT: T, Action: Custom);
181
182	// SIMD-specific configuration
183	if (Subtarget->hasSIMD128()) {
184
185	// Combine partial.reduce.add before legalization gets confused.
186	setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
187
188	// Combine wide-vector muls, with extend inputs, to extmul_half.
189	setTargetDAGCombine(ISD::MUL);
190
191	// Combine vector mask reductions into alltrue/anytrue
192	setTargetDAGCombine(ISD::SETCC);
193
194	// Convert vector to integer bitcasts to bitmask
195	setTargetDAGCombine(ISD::BITCAST);
196
197	// Hoist bitcasts out of shuffles
198	setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
199
200	// Combine extends of extract_subvectors into widening ops
201	setTargetDAGCombine({ISD::SIGN_EXTEND, ISD::ZERO_EXTEND});
202
203	// Combine int_to_fp or fp_extend of extract_vectors and vice versa into
204	// conversions ops
205	setTargetDAGCombine({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_EXTEND,
206	ISD::EXTRACT_SUBVECTOR});
207
208	// Combine fp_to_{s,u}int_sat or fp_round of concat_vectors or vice versa
209	// into conversion ops
210	setTargetDAGCombine({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT,
211	ISD::FP_ROUND, ISD::CONCAT_VECTORS});
212
213	setTargetDAGCombine(ISD::TRUNCATE);
214
215	// Support saturating add/sub for i8x16 and i16x8
216	for (auto Op : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT})
217	for (auto T : {MVT::v16i8, MVT::v8i16})
218	setOperationAction(Op, VT: T, Action: Legal);
219
220	// Support integer abs
221	for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
222	setOperationAction(Op: ISD::ABS, VT: T, Action: Legal);
223
224	// Custom lower BUILD_VECTORs to minimize number of replace_lanes
225	for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
226	MVT::v2f64})
227	setOperationAction(Op: ISD::BUILD_VECTOR, VT: T, Action: Custom);
228
229	if (Subtarget->hasFP16())
230	setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::f16, Action: Custom);
231
232	// We have custom shuffle lowering to expose the shuffle mask
233	for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
234	MVT::v2f64})
235	setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT: T, Action: Custom);
236
237	if (Subtarget->hasFP16())
238	setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT: MVT::v8f16, Action: Custom);
239
240	// Support splatting
241	for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
242	MVT::v2f64})
243	setOperationAction(Op: ISD::SPLAT_VECTOR, VT: T, Action: Legal);
244
245	// Custom lowering since wasm shifts must have a scalar shift amount
246	for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
247	for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
248	setOperationAction(Op, VT: T, Action: Custom);
249
250	// Custom lower lane accesses to expand out variable indices
251	for (auto Op : {ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT})
252	for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
253	MVT::v2f64})
254	setOperationAction(Op, VT: T, Action: Custom);
255
256	// There is no i8x16.mul instruction
257	setOperationAction(Op: ISD::MUL, VT: MVT::v16i8, Action: Expand);
258
259	// There is no vector conditional select instruction
260	for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
261	MVT::v2f64})
262	setOperationAction(Op: ISD::SELECT_CC, VT: T, Action: Expand);
263
264	// Expand integer operations supported for scalars but not SIMD
265	for (auto Op :
266	{ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM, ISD::ROTL, ISD::ROTR})
267	for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
268	setOperationAction(Op, VT: T, Action: Expand);
269
270	// But we do have integer min and max operations
271	for (auto Op : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
272	for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
273	setOperationAction(Op, VT: T, Action: Legal);
274
275	// And we have popcnt for i8x16. It can be used to expand ctlz/cttz.
276	setOperationAction(Op: ISD::CTPOP, VT: MVT::v16i8, Action: Legal);
277	setOperationAction(Op: ISD::CTLZ, VT: MVT::v16i8, Action: Expand);
278	setOperationAction(Op: ISD::CTTZ, VT: MVT::v16i8, Action: Expand);
279
280	// Custom lower bit counting operations for other types to scalarize them.
281	for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP})
282	for (auto T : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
283	setOperationAction(Op, VT: T, Action: Custom);
284
285	// Expand float operations supported for scalars but not SIMD
286	for (auto Op : {ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
287	ISD::FEXP, ISD::FEXP2})
288	for (auto T : {MVT::v4f32, MVT::v2f64})
289	setOperationAction(Op, VT: T, Action: Expand);
290
291	// Unsigned comparison operations are unavailable for i64x2 vectors.
292	for (auto CC : {ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE})
293	setCondCodeAction(CCs: CC, VT: MVT::v2i64, Action: Custom);
294
295	// 64x2 conversions are not in the spec
296	for (auto Op :
297	{ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT})
298	for (auto T : {MVT::v2i64, MVT::v2f64})
299	setOperationAction(Op, VT: T, Action: Expand);
300
301	// But saturating fp_to_int converstions are
302	for (auto Op : {ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}) {
303	setOperationAction(Op, VT: MVT::v4i32, Action: Custom);
304	if (Subtarget->hasFP16()) {
305	setOperationAction(Op, VT: MVT::v8i16, Action: Custom);
306	}
307	}
308
309	// Support vector extending
310	for (auto T : MVT::integer_fixedlen_vector_valuetypes()) {
311	setOperationAction(Op: ISD::SIGN_EXTEND_VECTOR_INREG, VT: T, Action: Custom);
312	setOperationAction(Op: ISD::ZERO_EXTEND_VECTOR_INREG, VT: T, Action: Custom);
313	}
314	}
315
316	// As a special case, these operators use the type to mean the type to
317	// sign-extend from.
318	setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i1, Action: Expand);
319	if (!Subtarget->hasSignExt()) {
320	// Sign extends are legal only when extending a vector extract
321	auto Action = Subtarget->hasSIMD128() ? Custom : Expand;
322	for (auto T : {MVT::i8, MVT::i16, MVT::i32})
323	setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: T, Action);
324	}
325	for (auto T : MVT::integer_fixedlen_vector_valuetypes())
326	setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: T, Action: Expand);
327
328	// Dynamic stack allocation: use the default expansion.
329	setOperationAction(Op: ISD::STACKSAVE, VT: MVT::Other, Action: Expand);
330	setOperationAction(Op: ISD::STACKRESTORE, VT: MVT::Other, Action: Expand);
331	setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: MVTPtr, Action: Expand);
332
333	setOperationAction(Op: ISD::FrameIndex, VT: MVT::i32, Action: Custom);
334	setOperationAction(Op: ISD::FrameIndex, VT: MVT::i64, Action: Custom);
335	setOperationAction(Op: ISD::CopyToReg, VT: MVT::Other, Action: Custom);
336
337	// Expand these forms; we pattern-match the forms that we can handle in isel.
338	for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
339	for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
340	setOperationAction(Op, VT: T, Action: Expand);
341
342	// We have custom switch handling.
343	setOperationAction(Op: ISD::BR_JT, VT: MVT::Other, Action: Custom);
344
345	// WebAssembly doesn't have:
346	// - Floating-point extending loads.
347	// - Floating-point truncating stores.
348	// - i1 extending loads.
349	// - truncating SIMD stores and most extending loads
350	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::f32, Action: Expand);
351	setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f32, Action: Expand);
352	for (auto T : MVT::integer_valuetypes())
353	for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
354	setLoadExtAction(ExtType: Ext, ValVT: T, MemVT: MVT::i1, Action: Promote);
355	if (Subtarget->hasSIMD128()) {
356	for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32,
357	MVT::v2f64}) {
358	for (auto MemT : MVT::fixedlen_vector_valuetypes()) {
359	if (MVT (T) != MemT) {
360	setTruncStoreAction(ValVT: T, MemVT: MemT, Action: Expand);
361	for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
362	setLoadExtAction(ExtType: Ext, ValVT: T, MemVT: MemT, Action: Expand);
363	}
364	}
365	}
366	// But some vector extending loads are legal
367	for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
368	setLoadExtAction(ExtType: Ext, ValVT: MVT::v8i16, MemVT: MVT::v8i8, Action: Legal);
369	setLoadExtAction(ExtType: Ext, ValVT: MVT::v4i32, MemVT: MVT::v4i16, Action: Legal);
370	setLoadExtAction(ExtType: Ext, ValVT: MVT::v2i64, MemVT: MVT::v2i32, Action: Legal);
371	}
372	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::v2f64, MemVT: MVT::v2f32, Action: Legal);
373	}
374
375	// Don't do anything clever with build_pairs
376	setOperationAction(Op: ISD::BUILD_PAIR, VT: MVT::i64, Action: Expand);
377
378	// Trap lowers to wasm unreachable
379	setOperationAction(Op: ISD::TRAP, VT: MVT::Other, Action: Legal);
380	setOperationAction(Op: ISD::DEBUGTRAP, VT: MVT::Other, Action: Legal);
381
382	// Exception handling intrinsics
383	setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::Other, Action: Custom);
384	setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::Other, Action: Custom);
385	setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::Other, Action: Custom);
386
387	setMaxAtomicSizeInBitsSupported(`64`);
388
389	// Always convert switches to br_tables unless there is only one case, which
390	// is equivalent to a simple branch. This reduces code size for wasm, and we
391	// defer possible jump table optimizations to the VM.
392	setMinimumJumpTableEntries(`2`);
393	}
394
395	MVT WebAssemblyTargetLowering::getPointerTy(const DataLayout &DL,
396	uint32_t AS) const {
397	if (AS == WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF)
398	return MVT::externref;
399	if (AS == WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF)
400	return MVT::funcref;
401	return TargetLowering::getPointerTy(DL, AS);
402	}
403
404	MVT WebAssemblyTargetLowering::getPointerMemTy(const DataLayout &DL,
405	uint32_t AS) const {
406	if (AS == WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_EXTERNREF)
407	return MVT::externref;
408	if (AS == WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF)
409	return MVT::funcref;
410	return TargetLowering::getPointerMemTy(DL, AS);
411	}
412
413	bool WebAssemblyTargetLowering::shouldExpandPartialReductionIntrinsic(
414	const IntrinsicInst I) const* {
415	if (I->getIntrinsicID() != Intrinsic::experimental_vector_partial_reduce_add)
416	return true;
417
418	EVT VT = EVT::getEVT(Ty: I->getType());
419	auto Op1 = I->getOperand(i_nocapture: `1`);
420
421	if (auto *InputInst = dyn_cast<Instruction>(Val: Op1)) {
422	if (InstructionOpcodeToISD(Opcode: InputInst->getOpcode()) != ISD::MUL)
423	return true;
424
425	if (isa<Instruction>(Val: InputInst->getOperand(i: `0`)) &&
426	isa<Instruction>(Val: InputInst->getOperand(i: `1`))) {
427	// dot only supports signed inputs but also support lowering unsigned.
428	if (cast<Instruction>(Val: InputInst->getOperand(i: `0`))->getOpcode() !=
429	cast<Instruction>(Val: InputInst->getOperand(i: `1`))->getOpcode())
430	return true;
431
432	EVT Op1VT = EVT::getEVT(Ty: Op1->getType());
433	if (Op1VT.getVectorElementType() == VT.getVectorElementType() &&
434	((VT.getVectorElementCount() * `2` == Op1VT.getVectorElementCount()) \|\|
435	(VT.getVectorElementCount() * `4` == Op1VT.getVectorElementCount())))
436	return false;
437	}
438	}
439	return true;
440	}
441
442	TargetLowering::AtomicExpansionKind
443	WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst AI) const* {
444	// We have wasm instructions for these
445	switch (AI->getOperation()) {
446	case AtomicRMWInst::Add:
447	case AtomicRMWInst::Sub:
448	case AtomicRMWInst::And:
449	case AtomicRMWInst::Or:
450	case AtomicRMWInst::Xor:
451	case AtomicRMWInst::Xchg:
452	return AtomicExpansionKind::None;
453	default:
454	break;
455	}
456	return AtomicExpansionKind::CmpXChg;
457	}
458
459	bool WebAssemblyTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
460	// Implementation copied from X86TargetLowering.
461	unsigned Opc = VecOp.getOpcode();
462
463	// Assume target opcodes can't be scalarized.
464	// TODO - do we have any exceptions?
465	if (Opc >= ISD::BUILTIN_OP_END \|\| !isBinOp(Opcode: Opc))
466	return false;
467
468	// If the vector op is not supported, try to convert to scalar.
469	EVT VecVT = VecOp.getValueType();
470	if (!isOperationLegalOrCustomOrPromote(Op: Opc, VT: VecVT))
471	return true;
472
473	// If the vector op is supported, but the scalar op is not, the transform may
474	// not be worthwhile.
475	EVT ScalarVT = VecVT.getScalarType();
476	return isOperationLegalOrCustomOrPromote(Op: Opc, VT: ScalarVT);
477	}
478
479	FastISel *WebAssemblyTargetLowering::createFastISel(
480	FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo LibInfo) const* {
481	return WebAssembly::createFastISel(funcInfo&: FuncInfo, libInfo: LibInfo);
482	}
483
484	MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /DL/,
485	EVT VT) const {
486	unsigned BitWidth = NextPowerOf2(A: VT.getSizeInBits() - `1`);
487	if (BitWidth > `1` && BitWidth < `8`)
488	BitWidth = `8`;
489
490	if (BitWidth > `64`) {
491	// The shift will be lowered to a libcall, and compiler-rt libcalls expect
492	// the count to be an i32.
493	BitWidth = `32`;
494	assert(BitWidth >= Log2_32_Ceil(VT.getSizeInBits()) &&
495	"32-bit shift counts ought to be enough for anyone");
496	}
497
498	MVT Result = MVT::getIntegerVT(BitWidth);
499	assert(Result != MVT::INVALID_SIMPLE_VALUE_TYPE &&
500	"Unable to represent scalar shift amount type");
501	return Result;
502	}
503
504	// Lower an fp-to-int conversion operator from the LLVM opcode, which has an
505	// undefined result on invalid/overflow, to the WebAssembly opcode, which
506	// traps on invalid/overflow.
507	static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL,
508	MachineBasicBlock *BB,
509	const TargetInstrInfo &TII,
510	bool IsUnsigned, bool Int64,
511	bool Float64, unsigned LoweredOpcode) {
512	MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
513
514	Register OutReg = MI.getOperand(i: `0`).getReg();
515	Register InReg = MI.getOperand(i: `1`).getReg();
516
517	unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32;
518	unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32;
519	unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32;
520	unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32;
521	unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32;
522	unsigned Eqz = WebAssembly::EQZ_I32;
523	unsigned And = WebAssembly::AND_I32;
524	int64_t Limit = Int64 ? INT64_MIN : INT32_MIN;
525	int64_t Substitute = IsUnsigned ? `0` : Limit;
526	double CmpVal = IsUnsigned ? -(double)Limit * `2.0` : -(double)Limit;
527	auto &Context = BB->getParent()->getFunction().getContext();
528	Type *Ty = Float64 ? Type::getDoubleTy(C&: Context) : Type::getFloatTy(C&: Context);
529
530	const BasicBlock *LLVMBB = BB->getBasicBlock();
531	MachineFunction *F = BB->getParent();
532	MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(BB: LLVMBB);
533	MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(BB: LLVMBB);
534	MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB: LLVMBB);
535
536	MachineFunction::iterator It = ++BB->getIterator();
537	F->insert(MBBI: It, MBB: FalseMBB);
538	F->insert(MBBI: It, MBB: TrueMBB);
539	F->insert(MBBI: It, MBB: DoneMBB);
540
541	// Transfer the remainder of BB and its successor edges to DoneMBB.
542	DoneMBB->splice(Where: DoneMBB->begin(), Other: BB, From: std::next(x: MI.getIterator()), To: BB->end());
543	DoneMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
544
545	BB->addSuccessor(Succ: TrueMBB);
546	BB->addSuccessor(Succ: FalseMBB);
547	TrueMBB->addSuccessor(Succ: DoneMBB);
548	FalseMBB->addSuccessor(Succ: DoneMBB);
549
550	unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg;
551	Tmp0 = MRI.createVirtualRegister(RegClass: MRI.getRegClass(Reg: InReg));
552	Tmp1 = MRI.createVirtualRegister(RegClass: MRI.getRegClass(Reg: InReg));
553	CmpReg = MRI.createVirtualRegister(RegClass: &WebAssembly::I32RegClass);
554	EqzReg = MRI.createVirtualRegister(RegClass: &WebAssembly::I32RegClass);
555	FalseReg = MRI.createVirtualRegister(RegClass: MRI.getRegClass(Reg: OutReg));
556	TrueReg = MRI.createVirtualRegister(RegClass: MRI.getRegClass(Reg: OutReg));
557
558	MI.eraseFromParent();
559	// For signed numbers, we can do a single comparison to determine whether
560	// fabs(x) is within range.
561	if (IsUnsigned) {
562	Tmp0 = InReg;
563	} else {
564	BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: Abs), DestReg: Tmp0).addReg(RegNo: InReg);
565	}
566	BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: FConst), DestReg: Tmp1)
567	.addFPImm(Val: cast<ConstantFP>(Val: ConstantFP::get(Ty, V: CmpVal)));
568	BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: LT), DestReg: CmpReg).addReg(RegNo: Tmp0).addReg(RegNo: Tmp1);
569
570	// For unsigned numbers, we have to do a separate comparison with zero.
571	if (IsUnsigned) {
572	Tmp1 = MRI.createVirtualRegister(RegClass: MRI.getRegClass(Reg: InReg));
573	Register SecondCmpReg =
574	MRI.createVirtualRegister(RegClass: &WebAssembly::I32RegClass);
575	Register AndReg = MRI.createVirtualRegister(RegClass: &WebAssembly::I32RegClass);
576	BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: FConst), DestReg: Tmp1)
577	.addFPImm(Val: cast<ConstantFP>(Val: ConstantFP::get(Ty, V: `0.0`)));
578	BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: GE), DestReg: SecondCmpReg).addReg(RegNo: Tmp0).addReg(RegNo: Tmp1);
579	BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: And), DestReg: AndReg).addReg(RegNo: CmpReg).addReg(RegNo: SecondCmpReg);
580	CmpReg = AndReg;
581	}
582
583	BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: Eqz), DestReg: EqzReg).addReg(RegNo: CmpReg);
584
585	// Create the CFG diamond to select between doing the conversion or using
586	// the substitute value.
587	BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: WebAssembly::BR_IF)).addMBB(MBB: TrueMBB).addReg(RegNo: EqzReg);
588	BuildMI(BB: FalseMBB, MIMD: DL, MCID: TII.get(Opcode: LoweredOpcode), DestReg: FalseReg).addReg(RegNo: InReg);
589	BuildMI(BB: FalseMBB, MIMD: DL, MCID: TII.get(Opcode: WebAssembly::BR)).addMBB(MBB: DoneMBB);
590	BuildMI(BB: TrueMBB, MIMD: DL, MCID: TII.get(Opcode: IConst), DestReg: TrueReg).addImm(Val: Substitute);
591	BuildMI(BB&: *DoneMBB, I: DoneMBB->begin(), MIMD: DL, MCID: TII.get(Opcode: TargetOpcode::PHI), DestReg: OutReg)
592	.addReg(RegNo: FalseReg)
593	.addMBB(MBB: FalseMBB)
594	.addReg(RegNo: TrueReg)
595	.addMBB(MBB: TrueMBB);
596
597	return DoneMBB;
598	}
599
600	// Lower a `MEMCPY` instruction into a CFG triangle around a `MEMORY_COPY`
601	// instuction to handle the zero-length case.
602	static MachineBasicBlock *LowerMemcpy(MachineInstr &MI, DebugLoc DL,
603	MachineBasicBlock *BB,
604	const TargetInstrInfo &TII, bool Int64) {
605	MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
606
607	MachineOperand DstMem = MI.getOperand(i: `0`);
608	MachineOperand SrcMem = MI.getOperand(i: `1`);
609	MachineOperand Dst = MI.getOperand(i: `2`);
610	MachineOperand Src = MI.getOperand(i: `3`);
611	MachineOperand Len = MI.getOperand(i: `4`);
612
613	// We're going to add an extra use to `Len` to test if it's zero; that
614	// use shouldn't be a kill, even if the original use is.
615	MachineOperand NoKillLen = Len;
616	NoKillLen.setIsKill(false);
617
618	// Decide on which `MachineInstr` opcode we're going to use.
619	unsigned Eqz = Int64 ? WebAssembly::EQZ_I64 : WebAssembly::EQZ_I32;
620	unsigned MemoryCopy =
621	Int64 ? WebAssembly::MEMORY_COPY_A64 : WebAssembly::MEMORY_COPY_A32;
622
623	// Create two new basic blocks; one for the new `memory.fill` that we can
624	// branch over, and one for the rest of the instructions after the original
625	// `memory.fill`.
626	const BasicBlock *LLVMBB = BB->getBasicBlock();
627	MachineFunction *F = BB->getParent();
628	MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(BB: LLVMBB);
629	MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB: LLVMBB);
630
631	MachineFunction::iterator It = ++BB->getIterator();
632	F->insert(MBBI: It, MBB: TrueMBB);
633	F->insert(MBBI: It, MBB: DoneMBB);
634
635	// Transfer the remainder of BB and its successor edges to DoneMBB.
636	DoneMBB->splice(Where: DoneMBB->begin(), Other: BB, From: std::next(x: MI.getIterator()), To: BB->end());
637	DoneMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
638
639	// Connect the CFG edges.
640	BB->addSuccessor(Succ: TrueMBB);
641	BB->addSuccessor(Succ: DoneMBB);
642	TrueMBB->addSuccessor(Succ: DoneMBB);
643
644	// Create a virtual register for the `Eqz` result.
645	unsigned EqzReg;
646	EqzReg = MRI.createVirtualRegister(RegClass: &WebAssembly::I32RegClass);
647
648	// Erase the original `memory.copy`.
649	MI.eraseFromParent();
650
651	// Test if `Len` is zero.
652	BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: Eqz), DestReg: EqzReg).add(MO: NoKillLen);
653
654	// Insert a new `memory.copy`.
655	BuildMI(BB: TrueMBB, MIMD: DL, MCID: TII.get(Opcode: MemoryCopy))
656	.add(MO: DstMem)
657	.add(MO: SrcMem)
658	.add(MO: Dst)
659	.add(MO: Src)
660	.add(MO: Len);
661
662	// Create the CFG triangle.
663	BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: WebAssembly::BR_IF)).addMBB(MBB: DoneMBB).addReg(RegNo: EqzReg);
664	BuildMI(BB: TrueMBB, MIMD: DL, MCID: TII.get(Opcode: WebAssembly::BR)).addMBB(MBB: DoneMBB);
665
666	return DoneMBB;
667	}
668
669	// Lower a `MEMSET` instruction into a CFG triangle around a `MEMORY_FILL`
670	// instuction to handle the zero-length case.
671	static MachineBasicBlock *LowerMemset(MachineInstr &MI, DebugLoc DL,
672	MachineBasicBlock *BB,
673	const TargetInstrInfo &TII, bool Int64) {
674	MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
675
676	MachineOperand Mem = MI.getOperand(i: `0`);
677	MachineOperand Dst = MI.getOperand(i: `1`);
678	MachineOperand Val = MI.getOperand(i: `2`);
679	MachineOperand Len = MI.getOperand(i: `3`);
680
681	// We're going to add an extra use to `Len` to test if it's zero; that
682	// use shouldn't be a kill, even if the original use is.
683	MachineOperand NoKillLen = Len;
684	NoKillLen.setIsKill(false);
685
686	// Decide on which `MachineInstr` opcode we're going to use.
687	unsigned Eqz = Int64 ? WebAssembly::EQZ_I64 : WebAssembly::EQZ_I32;
688	unsigned MemoryFill =
689	Int64 ? WebAssembly::MEMORY_FILL_A64 : WebAssembly::MEMORY_FILL_A32;
690
691	// Create two new basic blocks; one for the new `memory.fill` that we can
692	// branch over, and one for the rest of the instructions after the original
693	// `memory.fill`.
694	const BasicBlock *LLVMBB = BB->getBasicBlock();
695	MachineFunction *F = BB->getParent();
696	MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(BB: LLVMBB);
697	MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB: LLVMBB);
698
699	MachineFunction::iterator It = ++BB->getIterator();
700	F->insert(MBBI: It, MBB: TrueMBB);
701	F->insert(MBBI: It, MBB: DoneMBB);
702
703	// Transfer the remainder of BB and its successor edges to DoneMBB.
704	DoneMBB->splice(Where: DoneMBB->begin(), Other: BB, From: std::next(x: MI.getIterator()), To: BB->end());
705	DoneMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
706
707	// Connect the CFG edges.
708	BB->addSuccessor(Succ: TrueMBB);
709	BB->addSuccessor(Succ: DoneMBB);
710	TrueMBB->addSuccessor(Succ: DoneMBB);
711
712	// Create a virtual register for the `Eqz` result.
713	unsigned EqzReg;
714	EqzReg = MRI.createVirtualRegister(RegClass: &WebAssembly::I32RegClass);
715
716	// Erase the original `memory.fill`.
717	MI.eraseFromParent();
718
719	// Test if `Len` is zero.
720	BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: Eqz), DestReg: EqzReg).add(MO: NoKillLen);
721
722	// Insert a new `memory.copy`.
723	BuildMI(BB: TrueMBB, MIMD: DL, MCID: TII.get(Opcode: MemoryFill)).add(MO: Mem).add(MO: Dst).add(MO: Val).add(MO: Len);
724
725	// Create the CFG triangle.
726	BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: WebAssembly::BR_IF)).addMBB(MBB: DoneMBB).addReg(RegNo: EqzReg);
727	BuildMI(BB: TrueMBB, MIMD: DL, MCID: TII.get(Opcode: WebAssembly::BR)).addMBB(MBB: DoneMBB);
728
729	return DoneMBB;
730	}
731
732	static MachineBasicBlock *
733	LowerCallResults(MachineInstr &CallResults, DebugLoc DL, MachineBasicBlock *BB,
734	const WebAssemblySubtarget *Subtarget,
735	const TargetInstrInfo &TII) {
736	MachineInstr &CallParams = *CallResults.getPrevNode();
737	assert(CallParams.getOpcode() == WebAssembly::CALL_PARAMS);
738	assert(CallResults.getOpcode() == WebAssembly::CALL_RESULTS \|\|
739	CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS);
740
741	bool IsIndirect =
742	CallParams.getOperand(i: `0`).isReg() \|\| CallParams.getOperand(i: `0`).isFI();
743	bool IsRetCall = CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS;
744
745	bool IsFuncrefCall = false;
746	if (IsIndirect && CallParams.getOperand(i: `0`).isReg()) {
747	Register Reg = CallParams.getOperand(i: `0`).getReg();
748	const MachineFunction *MF = BB->getParent();
749	const MachineRegisterInfo &MRI = MF->getRegInfo();
750	const TargetRegisterClass *TRC = MRI.getRegClass(Reg);
751	IsFuncrefCall = (TRC == &WebAssembly::FUNCREFRegClass);
752	assert(!IsFuncrefCall \|\| Subtarget->hasReferenceTypes());
753	}
754
755	unsigned CallOp;
756	if (IsIndirect && IsRetCall) {
757	CallOp = WebAssembly::RET_CALL_INDIRECT;
758	} else if (IsIndirect) {
759	CallOp = WebAssembly::CALL_INDIRECT;
760	} else if (IsRetCall) {
761	CallOp = WebAssembly::RET_CALL;
762	} else {
763	CallOp = WebAssembly::CALL;
764	}
765
766	MachineFunction &MF = *BB->getParent();
767	const MCInstrDesc &MCID = TII.get(Opcode: CallOp);
768	MachineInstrBuilder MIB(MF, MF.CreateMachineInstr(MCID, DL));
769
770	// Move the function pointer to the end of the arguments for indirect calls
771	if (IsIndirect) {
772	auto FnPtr = CallParams.getOperand(i: `0`);
773	CallParams.removeOperand(OpNo: `0`);
774
775	// For funcrefs, call_indirect is done through __funcref_call_table and the
776	// funcref is always installed in slot 0 of the table, therefore instead of
777	// having the function pointer added at the end of the params list, a zero
778	// (the index in
779	// __funcref_call_table is added).
780	if (IsFuncrefCall) {
781	Register RegZero =
782	MF.getRegInfo().createVirtualRegister(RegClass: &WebAssembly::I32RegClass);
783	MachineInstrBuilder MIBC0 =
784	BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: WebAssembly::CONST_I32), DestReg: RegZero).addImm(Val: `0`);
785
786	BB->insert(I: CallResults.getIterator(), M: MIBC0);
787	MachineInstrBuilder (MF, CallParams).addReg(RegNo: RegZero);
788	} else
789	CallParams.addOperand(Op: FnPtr);
790	}
791
792	for (auto Def : CallResults.defs())
793	MIB.add(MO: Def);
794
795	if (IsIndirect) {
796	// Placeholder for the type index.
797	MIB.addImm(Val: `0`);
798	// The table into which this call_indirect indexes.
799	MCSymbolWasm *Table = IsFuncrefCall
800	? WebAssembly::getOrCreateFuncrefCallTableSymbol(
801	Ctx&: MF.getContext(), Subtarget)
802	: WebAssembly::getOrCreateFunctionTableSymbol(
803	Ctx&: MF.getContext(), Subtarget);
804	if (Subtarget->hasCallIndirectOverlong()) {
805	MIB.addSym(Sym: Table);
806	} else {
807	// For the MVP there is at most one table whose number is 0, but we can't
808	// write a table symbol or issue relocations. Instead we just ensure the
809	// table is live and write a zero.
810	Table->setNoStrip();
811	MIB.addImm(Val: `0`);
812	}
813	}
814
815	for (auto Use : CallParams.uses())
816	MIB.add(MO: Use);
817
818	BB->insert(I: CallResults.getIterator(), M: MIB);
819	CallParams.eraseFromParent();
820	CallResults.eraseFromParent();
821
822	// If this is a funcref call, to avoid hidden GC roots, we need to clear the
823	// table slot with ref.null upon call_indirect return.
824	//
825	// This generates the following code, which comes right after a call_indirect
826	// of a funcref:
827	//
828	// i32.const 0
829	// ref.null func
830	// table.set __funcref_call_table
831	if (IsIndirect && IsFuncrefCall) {
832	MCSymbolWasm *Table = WebAssembly::getOrCreateFuncrefCallTableSymbol(
833	Ctx&: MF.getContext(), Subtarget);
834	Register RegZero =
835	MF.getRegInfo().createVirtualRegister(RegClass: &WebAssembly::I32RegClass);
836	MachineInstr *Const0 =
837	BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: WebAssembly::CONST_I32), DestReg: RegZero).addImm(Val: `0`);
838	BB->insertAfter(I: MIB.getInstr()->getIterator(), MI: Const0);
839
840	Register RegFuncref =
841	MF.getRegInfo().createVirtualRegister(RegClass: &WebAssembly::FUNCREFRegClass);
842	MachineInstr *RefNull =
843	BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: WebAssembly::REF_NULL_FUNCREF), DestReg: RegFuncref);
844	BB->insertAfter(I: Const0->getIterator(), MI: RefNull);
845
846	MachineInstr *TableSet =
847	BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: WebAssembly::TABLE_SET_FUNCREF))
848	.addSym(Sym: Table)
849	.addReg(RegNo: RegZero)
850	.addReg(RegNo: RegFuncref);
851	BB->insertAfter(I: RefNull->getIterator(), MI: TableSet);
852	}
853
854	return BB;
855	}
856
857	MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
858	MachineInstr &MI, MachineBasicBlock BB) const* {
859	const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
860	DebugLoc DL = MI.getDebugLoc();
861
862	switch (MI.getOpcode()) {
863	default:
864	llvm_unreachable("Unexpected instr type to insert");
865	case WebAssembly::FP_TO_SINT_I32_F32:
866	return LowerFPToInt(MI, DL, BB, TII, IsUnsigned: false, Int64: false, Float64: false,
867	LoweredOpcode: WebAssembly::I32_TRUNC_S_F32);
868	case WebAssembly::FP_TO_UINT_I32_F32:
869	return LowerFPToInt(MI, DL, BB, TII, IsUnsigned: true, Int64: false, Float64: false,
870	LoweredOpcode: WebAssembly::I32_TRUNC_U_F32);
871	case WebAssembly::FP_TO_SINT_I64_F32:
872	return LowerFPToInt(MI, DL, BB, TII, IsUnsigned: false, Int64: true, Float64: false,
873	LoweredOpcode: WebAssembly::I64_TRUNC_S_F32);
874	case WebAssembly::FP_TO_UINT_I64_F32:
875	return LowerFPToInt(MI, DL, BB, TII, IsUnsigned: true, Int64: true, Float64: false,
876	LoweredOpcode: WebAssembly::I64_TRUNC_U_F32);
877	case WebAssembly::FP_TO_SINT_I32_F64:
878	return LowerFPToInt(MI, DL, BB, TII, IsUnsigned: false, Int64: false, Float64: true,
879	LoweredOpcode: WebAssembly::I32_TRUNC_S_F64);
880	case WebAssembly::FP_TO_UINT_I32_F64:
881	return LowerFPToInt(MI, DL, BB, TII, IsUnsigned: true, Int64: false, Float64: true,
882	LoweredOpcode: WebAssembly::I32_TRUNC_U_F64);
883	case WebAssembly::FP_TO_SINT_I64_F64:
884	return LowerFPToInt(MI, DL, BB, TII, IsUnsigned: false, Int64: true, Float64: true,
885	LoweredOpcode: WebAssembly::I64_TRUNC_S_F64);
886	case WebAssembly::FP_TO_UINT_I64_F64:
887	return LowerFPToInt(MI, DL, BB, TII, IsUnsigned: true, Int64: true, Float64: true,
888	LoweredOpcode: WebAssembly::I64_TRUNC_U_F64);
889	case WebAssembly::MEMCPY_A32:
890	return LowerMemcpy(MI, DL, BB, TII, Int64: false);
891	case WebAssembly::MEMCPY_A64:
892	return LowerMemcpy(MI, DL, BB, TII, Int64: true);
893	case WebAssembly::MEMSET_A32:
894	return LowerMemset(MI, DL, BB, TII, Int64: false);
895	case WebAssembly::MEMSET_A64:
896	return LowerMemset(MI, DL, BB, TII, Int64: true);
897	case WebAssembly::CALL_RESULTS:
898	case WebAssembly::RET_CALL_RESULTS:
899	return LowerCallResults(CallResults&: MI, DL, BB, Subtarget, TII);
900	}
901	}
902
903	const char *
904	WebAssemblyTargetLowering::getTargetNodeName(unsigned Opcode) const {
905	switch (static_cast<WebAssemblyISD::NodeType>(Opcode)) {
906	case WebAssemblyISD::FIRST_NUMBER:
907	break;
908	#define HANDLE_NODETYPE(NODE) \
909	case WebAssemblyISD::NODE: \
910	return "WebAssemblyISD::" #NODE;
911	#include "WebAssemblyISD.def"
912	#undef HANDLE_NODETYPE
913	}
914	return nullptr;
915	}
916
917	std::pair<unsigned, const TargetRegisterClass *>
918	WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
919	const TargetRegisterInfo TRI, StringRef Constraint, MVT VT) const* {
920	// First, see if this is a constraint that directly corresponds to a
921	// WebAssembly register class.
922	if (Constraint.size() == `1`) {
923	switch (Constraint [`0`]) {
924	case `'r'`:
925	assert(VT != MVT::iPTR && "Pointer MVT not expected here");
926	if (Subtarget->hasSIMD128() && VT.isVector()) {
927	if (VT.getSizeInBits() == `128`)
928	return std::make_pair(x: `0U`, y: &WebAssembly::V128RegClass);
929	}
930	if (VT.isInteger() && !VT.isVector()) {
931	if (VT.getSizeInBits() <= `32`)
932	return std::make_pair(x: `0U`, y: &WebAssembly::I32RegClass);
933	if (VT.getSizeInBits() <= `64`)
934	return std::make_pair(x: `0U`, y: &WebAssembly::I64RegClass);
935	}
936	if (VT.isFloatingPoint() && !VT.isVector()) {
937	switch (VT.getSizeInBits()) {
938	case `32`:
939	return std::make_pair(x: `0U`, y: &WebAssembly::F32RegClass);
940	case `64`:
941	return std::make_pair(x: `0U`, y: &WebAssembly::F64RegClass);
942	default:
943	break;
944	}
945	}
946	break;
947	default:
948	break;
949	}
950	}
951
952	return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
953	}
954
955	bool WebAssemblyTargetLowering::isCheapToSpeculateCttz(Type Ty) const* {
956	// Assume ctz is a relatively cheap operation.
957	return true;
958	}
959
960	bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz(Type Ty) const* {
961	// Assume clz is a relatively cheap operation.
962	return true;
963	}
964
965	bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
966	const AddrMode &AM,
967	Type Ty, unsigned* AS,
968	Instruction I) const* {
969	// WebAssembly offsets are added as unsigned without wrapping. The
970	// isLegalAddressingMode gives us no way to determine if wrapping could be
971	// happening, so we approximate this by accepting only non-negative offsets.
972	if (AM.BaseOffs < `0`)
973	return false;
974
975	// WebAssembly has no scale register operands.
976	if (AM.Scale != `0`)
977	return false;
978
979	// Everything else is legal.
980	return true;
981	}
982
983	bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
984	EVT /VT/, unsigned /AddrSpace/, Align /Align/,
985	MachineMemOperand::Flags /Flags/, unsigned Fast) const* {
986	// WebAssembly supports unaligned accesses, though it should be declared
987	// with the p2align attribute on loads and stores which do so, and there
988	// may be a performance impact. We tell LLVM they're "fast" because
989	// for the kinds of things that LLVM uses this for (merging adjacent stores
990	// of constants, etc.), WebAssembly implementations will either want the
991	// unaligned access or they'll split anyway.
992	if (Fast)
993	*Fast = `1`;
994	return true;
995	}
996
997	bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,
998	AttributeList Attr) const {
999	// The current thinking is that wasm engines will perform this optimization,
1000	// so we can save on code size.
1001	return true;
1002	}
1003
1004	bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
1005	EVT ExtT = ExtVal.getValueType();
1006	EVT MemT = cast<LoadSDNode>(Val: ExtVal ->getOperand(Num: `0`))->getValueType(ResNo: `0`);
1007	return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) \|\|
1008	(ExtT == MVT::v4i32 && MemT == MVT::v4i16) \|\|
1009	(ExtT == MVT::v2i64 && MemT == MVT::v2i32);
1010	}
1011
1012	bool WebAssemblyTargetLowering::isOffsetFoldingLegal(
1013	const GlobalAddressSDNode GA) const* {
1014	// Wasm doesn't support function addresses with offsets
1015	const GlobalValue *GV = GA->getGlobal();
1016	return isa<Function>(Val: GV) ? false : TargetLowering::isOffsetFoldingLegal(GA);
1017	}
1018
1019	EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
1020	LLVMContext &C,
1021	EVT VT) const {
1022	if (VT.isVector())
1023	return VT.changeVectorElementTypeToInteger();
1024
1025	// So far, all branch instructions in Wasm take an I32 condition.
1026	// The default TargetLowering::getSetCCResultType returns the pointer size,
1027	// which would be useful to reduce instruction counts when testing
1028	// against 64-bit pointers/values if at some point Wasm supports that.
1029	return EVT::getIntegerVT(Context&: C, BitWidth: `32`);
1030	}
1031
1032	bool WebAssemblyTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
1033	const CallInst &I,
1034	MachineFunction &MF,
1035	unsigned Intrinsic) const {
1036	switch (Intrinsic) {
1037	case Intrinsic::wasm_memory_atomic_notify:
1038	Info.opc = ISD::INTRINSIC_W_CHAIN;
1039	Info.memVT = MVT::i32;
1040	Info.ptrVal = I.getArgOperand(i: `0`);
1041	Info.offset = `0`;
1042	Info.align = Align (`4`);
1043	// atomic.notify instruction does not really load the memory specified with
1044	// this argument, but MachineMemOperand should either be load or store, so
1045	// we set this to a load.
1046	// FIXME Volatile isn't really correct, but currently all LLVM atomic
1047	// instructions are treated as volatiles in the backend, so we should be
1048	// consistent. The same applies for wasm_atomic_wait intrinsics too.
1049	Info.flags = MachineMemOperand::MOVolatile \| MachineMemOperand::MOLoad;
1050	return true;
1051	case Intrinsic::wasm_memory_atomic_wait32:
1052	Info.opc = ISD::INTRINSIC_W_CHAIN;
1053	Info.memVT = MVT::i32;
1054	Info.ptrVal = I.getArgOperand(i: `0`);
1055	Info.offset = `0`;
1056	Info.align = Align (`4`);
1057	Info.flags = MachineMemOperand::MOVolatile \| MachineMemOperand::MOLoad;
1058	return true;
1059	case Intrinsic::wasm_memory_atomic_wait64:
1060	Info.opc = ISD::INTRINSIC_W_CHAIN;
1061	Info.memVT = MVT::i64;
1062	Info.ptrVal = I.getArgOperand(i: `0`);
1063	Info.offset = `0`;
1064	Info.align = Align (`8`);
1065	Info.flags = MachineMemOperand::MOVolatile \| MachineMemOperand::MOLoad;
1066	return true;
1067	case Intrinsic::wasm_loadf16_f32:
1068	Info.opc = ISD::INTRINSIC_W_CHAIN;
1069	Info.memVT = MVT::f16;
1070	Info.ptrVal = I.getArgOperand(i: `0`);
1071	Info.offset = `0`;
1072	Info.align = Align (`2`);
1073	Info.flags = MachineMemOperand::MOLoad;
1074	return true;
1075	case Intrinsic::wasm_storef16_f32:
1076	Info.opc = ISD::INTRINSIC_VOID;
1077	Info.memVT = MVT::f16;
1078	Info.ptrVal = I.getArgOperand(i: `1`);
1079	Info.offset = `0`;
1080	Info.align = Align (`2`);
1081	Info.flags = MachineMemOperand::MOStore;
1082	return true;
1083	default:
1084	return false;
1085	}
1086	}
1087
1088	void WebAssemblyTargetLowering::computeKnownBitsForTargetNode(
1089	const SDValue Op, KnownBits &Known, const APInt &DemandedElts,
1090	const SelectionDAG &DAG, unsigned Depth) const {
1091	switch (Op.getOpcode()) {
1092	default:
1093	break;
1094	case ISD::INTRINSIC_WO_CHAIN: {
1095	unsigned IntNo = Op.getConstantOperandVal(i: `0`);
1096	switch (IntNo) {
1097	default:
1098	break;
1099	case Intrinsic::wasm_bitmask: {
1100	unsigned BitWidth = Known.getBitWidth();
1101	EVT VT = Op.getOperand(i: `1`).getSimpleValueType();
1102	unsigned PossibleBits = VT.getVectorNumElements();
1103	APInt ZeroMask = APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: BitWidth - PossibleBits);
1104	Known.Zero \|= ZeroMask;
1105	break;
1106	}
1107	}
1108	break;
1109	}
1110
1111	// For 128-bit addition if the upper bits are all zero then it's known that
1112	// the upper bits of the result will have all bits guaranteed zero except the
1113	// first.
1114	case WebAssemblyISD::I64_ADD128:
1115	if (Op.getResNo() == `1`) {
1116	SDValue LHS_HI = Op.getOperand(i: `1`);
1117	SDValue RHS_HI = Op.getOperand(i: `3`);
1118	if (isNullConstant(V: LHS_HI) && isNullConstant(V: RHS_HI))
1119	Known.Zero.setBitsFrom(`1`);
1120	}
1121	break;
1122	}
1123	}
1124
1125	TargetLoweringBase::LegalizeTypeAction
1126	WebAssemblyTargetLowering::getPreferredVectorAction(MVT VT) const {
1127	if (VT.isFixedLengthVector()) {
1128	MVT EltVT = VT.getVectorElementType();
1129	// We have legal vector types with these lane types, so widening the
1130	// vector would let us use some of the lanes directly without having to
1131	// extend or truncate values.
1132	if (EltVT == MVT::i8 \|\| EltVT == MVT::i16 \|\| EltVT == MVT::i32 \|\|
1133	EltVT == MVT::i64 \|\| EltVT == MVT::f32 \|\| EltVT == MVT::f64)
1134	return TypeWidenVector;
1135	}
1136
1137	return TargetLoweringBase::getPreferredVectorAction(VT);
1138	}
1139
1140	bool WebAssemblyTargetLowering::shouldSimplifyDemandedVectorElts(
1141	SDValue Op, const TargetLoweringOpt &TLO) const {
1142	// ISel process runs DAGCombiner after legalization; this step is called
1143	// SelectionDAG optimization phase. This post-legalization combining process
1144	// runs DAGCombiner on each node, and if there was a change to be made,
1145	// re-runs legalization again on it and its user nodes to make sure
1146	// everythiing is in a legalized state.
1147	//
1148	// The legalization calls lowering routines, and we do our custom lowering for
1149	// build_vectors (LowerBUILD_VECTOR), which converts undef vector elements
1150	// into zeros. But there is a set of routines in DAGCombiner that turns unused
1151	// (= not demanded) nodes into undef, among which SimplifyDemandedVectorElts
1152	// turns unused vector elements into undefs. But this routine does not work
1153	// with our custom LowerBUILD_VECTOR, which turns undefs into zeros. This
1154	// combination can result in a infinite loop, in which undefs are converted to
1155	// zeros in legalization and back to undefs in combining.
1156	//
1157	// So after DAG is legalized, we prevent SimplifyDemandedVectorElts from
1158	// running for build_vectors.
1159	if (Op.getOpcode() == ISD::BUILD_VECTOR && TLO.LegalOps && TLO.LegalTys)
1160	return false;
1161	return true;
1162	}
1163
1164	//===----------------------------------------------------------------------===//
1165	// WebAssembly Lowering private implementation.
1166	//===----------------------------------------------------------------------===//
1167
1168	//===----------------------------------------------------------------------===//
1169	// Lowering Code
1170	//===----------------------------------------------------------------------===//
1171
1172	static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) {
1173	MachineFunction &MF = DAG.getMachineFunction();
1174	DAG.getContext()->diagnose(
1175	DI: DiagnosticInfoUnsupported (MF.getFunction(), Msg, DL.getDebugLoc()));
1176	}
1177
1178	// Test whether the given calling convention is supported.
1179	static bool callingConvSupported(CallingConv::ID CallConv) {
1180	// We currently support the language-independent target-independent
1181	// conventions. We don't yet have a way to annotate calls with properties like
1182	// "cold", and we don't have any call-clobbered registers, so these are mostly
1183	// all handled the same.
1184	return CallConv == CallingConv::C \|\| CallConv == CallingConv::Fast \|\|
1185	CallConv == CallingConv::Cold \|\|
1186	CallConv == CallingConv::PreserveMost \|\|
1187	CallConv == CallingConv::PreserveAll \|\|
1188	CallConv == CallingConv::CXX_FAST_TLS \|\|
1189	CallConv == CallingConv::WASM_EmscriptenInvoke \|\|
1190	CallConv == CallingConv::Swift;
1191	}
1192
1193	SDValue
1194	WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
1195	SmallVectorImpl<SDValue> &InVals) const {
1196	SelectionDAG &DAG = CLI.DAG;
1197	SDLoc DL = CLI.DL;
1198	SDValue Chain = CLI.Chain;
1199	SDValue Callee = CLI.Callee;
1200	MachineFunction &MF = DAG.getMachineFunction();
1201	auto Layout = MF.getDataLayout();
1202
1203	CallingConv::ID CallConv = CLI.CallConv;
1204	if (!callingConvSupported(CallConv))
1205	fail(DL, DAG,
1206	Msg: "WebAssembly doesn't support language-specific or target-specific "
1207	"calling conventions yet");
1208	if (CLI.IsPatchPoint)
1209	fail(DL, DAG, Msg: "WebAssembly doesn't support patch point yet");
1210
1211	if (CLI.IsTailCall) {
1212	auto NoTail = [&](const char *Msg) {
1213	if (CLI.CB && CLI.CB->isMustTailCall())
1214	fail(DL, DAG, Msg);
1215	CLI.IsTailCall = false;
1216	};
1217
1218	if (!Subtarget->hasTailCall())
1219	NoTail ("WebAssembly 'tail-call' feature not enabled");
1220
1221	// Varargs calls cannot be tail calls because the buffer is on the stack
1222	if (CLI.IsVarArg)
1223	NoTail ("WebAssembly does not support varargs tail calls");
1224
1225	// Do not tail call unless caller and callee return types match
1226	const Function &F = MF.getFunction();
1227	const TargetMachine &TM = getTargetMachine();
1228	Type *RetTy = F.getReturnType();
1229	SmallVector<MVT, `4`> CallerRetTys;
1230	SmallVector<MVT, `4`> CalleeRetTys;
1231	computeLegalValueVTs(F, TM, Ty: RetTy, ValueVTs&: CallerRetTys);
1232	computeLegalValueVTs(F, TM, Ty: CLI.RetTy, ValueVTs&: CalleeRetTys);
1233	bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() &&
1234	std::equal(first1: CallerRetTys.begin(), last1: CallerRetTys.end(),
1235	first2: CalleeRetTys.begin());
1236	if (!TypesMatch)
1237	NoTail ("WebAssembly tail call requires caller and callee return types to "
1238	"match");
1239
1240	// If pointers to local stack values are passed, we cannot tail call
1241	if (CLI.CB) {
1242	for (auto &Arg : CLI.CB->args()) {
1243	Value *Val = Arg.get();
1244	// Trace the value back through pointer operations
1245	while (true) {
1246	Value *Src = Val->stripPointerCastsAndAliases();
1247	if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: Src))
1248	Src = GEP->getPointerOperand();
1249	if (Val == Src)
1250	break;
1251	Val = Src;
1252	}
1253	if (isa<AllocaInst>(Val)) {
1254	NoTail (
1255	"WebAssembly does not support tail calling with stack arguments");
1256	break;
1257	}
1258	}
1259	}
1260	}
1261
1262	SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1263	SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1264	SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1265
1266	// The generic code may have added an sret argument. If we're lowering an
1267	// invoke function, the ABI requires that the function pointer be the first
1268	// argument, so we may have to swap the arguments.
1269	if (CallConv == CallingConv::WASM_EmscriptenInvoke && Outs.size() >= `2` &&
1270	Outs [`0`].Flags.isSRet()) {
1271	std::swap(a&: Outs [`0`], b&: Outs [`1`]);
1272	std::swap(a&: OutVals [`0`], b&: OutVals [`1`]);
1273	}
1274
1275	bool HasSwiftSelfArg = false;
1276	bool HasSwiftErrorArg = false;
1277	unsigned NumFixedArgs = `0`;
1278	for (unsigned I = `0`; I < Outs.size(); ++I) {
1279	const ISD::OutputArg &Out = Outs [I];
1280	SDValue &OutVal = OutVals [I];
1281	HasSwiftSelfArg \|= Out.Flags.isSwiftSelf();
1282	HasSwiftErrorArg \|= Out.Flags.isSwiftError();
1283	if (Out.Flags.isNest())
1284	fail(DL, DAG, Msg: "WebAssembly hasn't implemented nest arguments");
1285	if (Out.Flags.isInAlloca())
1286	fail(DL, DAG, Msg: "WebAssembly hasn't implemented inalloca arguments");
1287	if (Out.Flags.isInConsecutiveRegs())
1288	fail(DL, DAG, Msg: "WebAssembly hasn't implemented cons regs arguments");
1289	if (Out.Flags.isInConsecutiveRegsLast())
1290	fail(DL, DAG, Msg: "WebAssembly hasn't implemented cons regs last arguments");
1291	if (Out.Flags.isByVal() && Out.Flags.getByValSize() != `0`) {
1292	auto &MFI = MF.getFrameInfo();
1293	int FI = MFI.CreateStackObject(Size: Out.Flags.getByValSize(),
1294	Alignment: Out.Flags.getNonZeroByValAlign(),
1295	/isSS=/isSpillSlot: false);
1296	SDValue SizeNode =
1297	DAG.getConstant(Val: Out.Flags.getByValSize(), DL, VT: MVT::i32);
1298	SDValue FINode = DAG.getFrameIndex(FI, VT: getPointerTy(DL: Layout));
1299	Chain = DAG.getMemcpy(Chain, dl: DL, Dst: FINode, Src: OutVal, Size: SizeNode,
1300	Alignment: Out.Flags.getNonZeroByValAlign(),
1301	/isVolatile/ isVol: false, /AlwaysInline=/false,
1302	/CI=/nullptr, OverrideTailCall: std::nullopt, DstPtrInfo: MachinePointerInfo (),
1303	SrcPtrInfo: MachinePointerInfo ());
1304	OutVal = FINode;
1305	}
1306	// Count the number of fixed args after* legalization.*
1307	NumFixedArgs += Out.IsFixed;
1308	}
1309
1310	bool IsVarArg = CLI.IsVarArg;
1311	auto PtrVT = getPointerTy(DL: Layout);
1312
1313	// For swiftcc, emit additional swiftself and swifterror arguments
1314	// if there aren't. These additional arguments are also added for callee
1315	// signature They are necessary to match callee and caller signature for
1316	// indirect call.
1317	if (CallConv == CallingConv::Swift) {
1318	if (!HasSwiftSelfArg) {
1319	NumFixedArgs++;
1320	ISD::OutputArg Arg;
1321	Arg.Flags.setSwiftSelf();
1322	CLI.Outs.push_back(Elt: Arg);
1323	SDValue ArgVal = DAG.getUNDEF(VT: PtrVT);
1324	CLI.OutVals.push_back(Elt: ArgVal);
1325	}
1326	if (!HasSwiftErrorArg) {
1327	NumFixedArgs++;
1328	ISD::OutputArg Arg;
1329	Arg.Flags.setSwiftError();
1330	CLI.Outs.push_back(Elt: Arg);
1331	SDValue ArgVal = DAG.getUNDEF(VT: PtrVT);
1332	CLI.OutVals.push_back(Elt: ArgVal);
1333	}
1334	}
1335
1336	// Analyze operands of the call, assigning locations to each operand.
1337	SmallVector<CCValAssign, `16`> ArgLocs;
1338	CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1339
1340	if (IsVarArg) {
1341	// Outgoing non-fixed arguments are placed in a buffer. First
1342	// compute their offsets and the total amount of buffer space needed.
1343	for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) {
1344	const ISD::OutputArg &Out = Outs [I];
1345	SDValue &Arg = OutVals [I];
1346	EVT VT = Arg.getValueType();
1347	assert(VT != MVT::iPTR && "Legalized args should be concrete");
1348	Type Ty = VT.getTypeForEVT(Context&: DAG.getContext());
1349	Align Alignment =
1350	std::max(a: Out.Flags.getNonZeroOrigAlign(), b: Layout.getABITypeAlign(Ty));
1351	unsigned Offset =
1352	CCInfo.AllocateStack(Size: Layout.getTypeAllocSize(Ty), Alignment);
1353	CCInfo.addLoc(V: CCValAssign::getMem(ValNo: ArgLocs.size(), ValVT: VT.getSimpleVT(),
1354	Offset, LocVT: VT.getSimpleVT(),
1355	HTP: CCValAssign::Full));
1356	}
1357	}
1358
1359	unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
1360
1361	SDValue FINode;
1362	if (IsVarArg && NumBytes) {
1363	// For non-fixed arguments, next emit stores to store the argument values
1364	// to the stack buffer at the offsets computed above.
1365	MaybeAlign StackAlign = Layout.getStackAlignment();
1366	assert(StackAlign && "data layout string is missing stack alignment");
1367	int FI = MF.getFrameInfo().CreateStackObject(Size: NumBytes, Alignment: *StackAlign,
1368	/isSS=/isSpillSlot: false);
1369	unsigned ValNo = `0`;
1370	SmallVector<SDValue, `8`> Chains;
1371	for (SDValue Arg : drop_begin(RangeOrContainer&: OutVals, N: NumFixedArgs)) {
1372	assert(ArgLocs[ValNo].getValNo() == ValNo &&
1373	"ArgLocs should remain in order and only hold varargs args");
1374	unsigned Offset = ArgLocs [ValNo++].getLocMemOffset();
1375	FINode = DAG.getFrameIndex(FI, VT: getPointerTy(DL: Layout));
1376	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: FINode,
1377	N2: DAG.getConstant(Val: Offset, DL, VT: PtrVT));
1378	Chains.push_back(
1379	Elt: DAG.getStore(Chain, dl: DL, Val: Arg, Ptr: Add,
1380	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI, Offset)));
1381	}
1382	if (!Chains.empty())
1383	Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: Chains);
1384	} else if (IsVarArg) {
1385	FINode = DAG.getIntPtrConstant(Val: `0`, DL);
1386	}
1387
1388	if (Callee ->getOpcode() == ISD::GlobalAddress) {
1389	// If the callee is a GlobalAddress node (quite common, every direct call
1390	// is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress
1391	// doesn't at MO_GOT which is not needed for direct calls.
1392	GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Val&: Callee);
1393	Callee = DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL,
1394	VT: getPointerTy(DL: DAG.getDataLayout()),
1395	offset: GA->getOffset());
1396	Callee = DAG.getNode(Opcode: WebAssemblyISD::Wrapper, DL,
1397	VT: getPointerTy(DL: DAG.getDataLayout()), Operand: Callee);
1398	}
1399
1400	// Compute the operands for the CALLn node.
1401	SmallVector<SDValue, `16`> Ops;
1402	Ops.push_back(Elt: Chain);
1403	Ops.push_back(Elt: Callee);
1404
1405	// Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs
1406	// isn't reliable.
1407	Ops.append(in_start: OutVals.begin(),
1408	in_end: IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end());
1409	// Add a pointer to the vararg buffer.
1410	if (IsVarArg)
1411	Ops.push_back(Elt: FINode);
1412
1413	SmallVector<EVT, `8`> InTys;
1414	for (const auto &In : Ins) {
1415	assert(!In.Flags.isByVal() && "byval is not valid for return values");
1416	assert(!In.Flags.isNest() && "nest is not valid for return values");
1417	if (In.Flags.isInAlloca())
1418	fail(DL, DAG, Msg: "WebAssembly hasn't implemented inalloca return values");
1419	if (In.Flags.isInConsecutiveRegs())
1420	fail(DL, DAG, Msg: "WebAssembly hasn't implemented cons regs return values");
1421	if (In.Flags.isInConsecutiveRegsLast())
1422	fail(DL, DAG,
1423	Msg: "WebAssembly hasn't implemented cons regs last return values");
1424	// Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1425	// registers.
1426	InTys.push_back(Elt: In.VT);
1427	}
1428
1429	// Lastly, if this is a call to a funcref we need to add an instruction
1430	// table.set to the chain and transform the call.
1431	if (CLI.CB && WebAssembly::isWebAssemblyFuncrefType(
1432	Ty: CLI.CB->getCalledOperand()->getType())) {
1433	// In the absence of function references proposal where a funcref call is
1434	// lowered to call_ref, using reference types we generate a table.set to set
1435	// the funcref to a special table used solely for this purpose, followed by
1436	// a call_indirect. Here we just generate the table set, and return the
1437	// SDValue of the table.set so that LowerCall can finalize the lowering by
1438	// generating the call_indirect.
1439	SDValue Chain = Ops [`0`];
1440
1441	MCSymbolWasm *Table = WebAssembly::getOrCreateFuncrefCallTableSymbol(
1442	Ctx&: MF.getContext(), Subtarget);
1443	SDValue Sym = DAG.getMCSymbol(Sym: Table, VT: PtrVT);
1444	SDValue TableSlot = DAG.getConstant(Val: `0`, DL, VT: MVT::i32);
1445	SDValue TableSetOps[] = {Chain, Sym, TableSlot, Callee};
1446	SDValue TableSet = DAG.getMemIntrinsicNode(
1447	Opcode: WebAssemblyISD::TABLE_SET, dl: DL, VTList: DAG.getVTList(VT: MVT::Other), Ops: TableSetOps,
1448	MemVT: MVT::funcref,
1449	// Machine Mem Operand args
1450	PtrInfo: MachinePointerInfo (
1451	WebAssembly::WasmAddressSpace::WASM_ADDRESS_SPACE_FUNCREF),
1452	Alignment: CLI.CB->getCalledOperand()->getPointerAlignment(DL: DAG.getDataLayout()),
1453	Flags: MachineMemOperand::MOStore);
1454
1455	Ops [`0`] = TableSet; // The new chain is the TableSet itself
1456	}
1457
1458	if (CLI.IsTailCall) {
1459	// ret_calls do not return values to the current frame
1460	SDVTList NodeTys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue);
1461	return DAG.getNode(Opcode: WebAssemblyISD::RET_CALL, DL, VTList: NodeTys, Ops);
1462	}
1463
1464	InTys.push_back(Elt: MVT::Other);
1465	SDVTList InTyList = DAG.getVTList(VTs: InTys);
1466	SDValue Res = DAG.getNode(Opcode: WebAssemblyISD::CALL, DL, VTList: InTyList, Ops);
1467
1468	for (size_t I = `0`; I < Ins.size(); ++I)
1469	InVals.push_back(Elt: Res.getValue(R: I));
1470
1471	// Return the chain
1472	return Res.getValue(R: Ins.size());
1473	}
1474
1475	bool WebAssemblyTargetLowering::CanLowerReturn(
1476	CallingConv::ID /CallConv/, MachineFunction & /MF/, bool /IsVarArg/,
1477	const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext & /Context/,
1478	const Type RetTy) const* {
1479	// WebAssembly can only handle returning tuples with multivalue enabled
1480	return WebAssembly::canLowerReturn(ResultSize: Outs.size(), Subtarget);
1481	}
1482
1483	SDValue WebAssemblyTargetLowering::LowerReturn(
1484	SDValue Chain, CallingConv::ID CallConv, bool /IsVarArg/,
1485	const SmallVectorImpl<ISD::OutputArg> &Outs,
1486	const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
1487	SelectionDAG &DAG) const {
1488	assert(WebAssembly::canLowerReturn(Outs.size(), Subtarget) &&
1489	"MVP WebAssembly can only return up to one value");
1490	if (!callingConvSupported(CallConv))
1491	fail(DL, DAG, Msg: "WebAssembly doesn't support non-C calling conventions");
1492
1493	SmallVector<SDValue, `4`> RetOps(`1`, Chain);
1494	RetOps.append(in_start: OutVals.begin(), in_end: OutVals.end());
1495	Chain = DAG.getNode(Opcode: WebAssemblyISD::RETURN, DL, VT: MVT::Other, Ops: RetOps);
1496
1497	// Record the number and types of the return values.
1498	for (const ISD::OutputArg &Out : Outs) {
1499	assert(!Out.Flags.isByVal() && "byval is not valid for return values");
1500	assert(!Out.Flags.isNest() && "nest is not valid for return values");
1501	assert(Out.IsFixed && "non-fixed return value is not valid");
1502	if (Out.Flags.isInAlloca())
1503	fail(DL, DAG, Msg: "WebAssembly hasn't implemented inalloca results");
1504	if (Out.Flags.isInConsecutiveRegs())
1505	fail(DL, DAG, Msg: "WebAssembly hasn't implemented cons regs results");
1506	if (Out.Flags.isInConsecutiveRegsLast())
1507	fail(DL, DAG, Msg: "WebAssembly hasn't implemented cons regs last results");
1508	}
1509
1510	return Chain;
1511	}
1512
1513	SDValue WebAssemblyTargetLowering::LowerFormalArguments(
1514	SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1515	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1516	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1517	if (!callingConvSupported(CallConv))
1518	fail(DL, DAG, Msg: "WebAssembly doesn't support non-C calling conventions");
1519
1520	MachineFunction &MF = DAG.getMachineFunction();
1521	auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>();
1522
1523	// Set up the incoming ARGUMENTS value, which serves to represent the liveness
1524	// of the incoming values before they're represented by virtual registers.
1525	MF.getRegInfo().addLiveIn(Reg: WebAssembly::ARGUMENTS);
1526
1527	bool HasSwiftErrorArg = false;
1528	bool HasSwiftSelfArg = false;
1529	for (const ISD::InputArg &In : Ins) {
1530	HasSwiftSelfArg \|= In.Flags.isSwiftSelf();
1531	HasSwiftErrorArg \|= In.Flags.isSwiftError();
1532	if (In.Flags.isInAlloca())
1533	fail(DL, DAG, Msg: "WebAssembly hasn't implemented inalloca arguments");
1534	if (In.Flags.isNest())
1535	fail(DL, DAG, Msg: "WebAssembly hasn't implemented nest arguments");
1536	if (In.Flags.isInConsecutiveRegs())
1537	fail(DL, DAG, Msg: "WebAssembly hasn't implemented cons regs arguments");
1538	if (In.Flags.isInConsecutiveRegsLast())
1539	fail(DL, DAG, Msg: "WebAssembly hasn't implemented cons regs last arguments");
1540	// Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1541	// registers.
1542	InVals.push_back(Elt: In.Used ? DAG.getNode(Opcode: WebAssemblyISD::ARGUMENT, DL, VT: In.VT,
1543	Operand: DAG.getTargetConstant(Val: InVals.size(),
1544	DL, VT: MVT::i32))
1545	: DAG.getUNDEF(VT: In.VT));
1546
1547	// Record the number and types of arguments.
1548	MFI->addParam(VT: In.VT);
1549	}
1550
1551	// For swiftcc, emit additional swiftself and swifterror arguments
1552	// if there aren't. These additional arguments are also added for callee
1553	// signature They are necessary to match callee and caller signature for
1554	// indirect call.
1555	auto PtrVT = getPointerTy(DL: MF.getDataLayout());
1556	if (CallConv == CallingConv::Swift) {
1557	if (!HasSwiftSelfArg) {
1558	MFI->addParam(VT: PtrVT);
1559	}
1560	if (!HasSwiftErrorArg) {
1561	MFI->addParam(VT: PtrVT);
1562	}
1563	}
1564	// Varargs are copied into a buffer allocated by the caller, and a pointer to
1565	// the buffer is passed as an argument.
1566	if (IsVarArg) {
1567	MVT PtrVT = getPointerTy(DL: MF.getDataLayout());
1568	Register VarargVreg =
1569	MF.getRegInfo().createVirtualRegister(RegClass: getRegClassFor(VT: PtrVT));
1570	MFI->setVarargBufferVreg(VarargVreg);
1571	Chain = DAG.getCopyToReg(
1572	Chain, dl: DL, Reg: VarargVreg,
1573	N: DAG.getNode(Opcode: WebAssemblyISD::ARGUMENT, DL, VT: PtrVT,
1574	Operand: DAG.getTargetConstant(Val: Ins.size(), DL, VT: MVT::i32)));
1575	MFI->addParam(VT: PtrVT);
1576	}
1577
1578	// Record the number and types of arguments and results.
1579	SmallVector<MVT, `4`> Params;
1580	SmallVector<MVT, `4`> Results;
1581	computeSignatureVTs(Ty: MF.getFunction().getFunctionType(), TargetFunc: &MF.getFunction(),
1582	ContextFunc: MF.getFunction(), TM: DAG.getTarget(), Params, Results);
1583	for (MVT VT : Results)
1584	MFI->addResult(VT);
1585	// TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify
1586	// the param logic here with ComputeSignatureVTs
1587	assert(MFI->getParams().size() == Params.size() &&
1588	std::equal(MFI->getParams().begin(), MFI->getParams().end(),
1589	Params.begin()));
1590
1591	return Chain;
1592	}
1593
1594	void WebAssemblyTargetLowering::ReplaceNodeResults(
1595	SDNode N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const* {
1596	switch (N->getOpcode()) {
1597	case ISD::SIGN_EXTEND_INREG:
1598	// Do not add any results, signifying that N should not be custom lowered
1599	// after all. This happens because simd128 turns on custom lowering for
1600	// SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an
1601	// illegal type.
1602	break;
1603	case ISD::SIGN_EXTEND_VECTOR_INREG:
1604	case ISD::ZERO_EXTEND_VECTOR_INREG:
1605	// Do not add any results, signifying that N should not be custom lowered.
1606	// EXTEND_VECTOR_INREG is implemented for some vectors, but not all.
1607	break;
1608	case ISD::ADD:
1609	case ISD::SUB:
1610	Results.push_back(Elt: Replace128Op(N, DAG));
1611	break;
1612	default:
1613	llvm_unreachable(
1614	"ReplaceNodeResults not implemented for this op for WebAssembly!");
1615	}
1616	}
1617
1618	//===----------------------------------------------------------------------===//
1619	// Custom lowering hooks.
1620	//===----------------------------------------------------------------------===//
1621
1622	SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
1623	SelectionDAG &DAG) const {
1624	SDLoc DL(Op);
1625	switch (Op.getOpcode()) {
1626	default:
1627	llvm_unreachable("unimplemented operation lowering");
1628	return SDValue ();
1629	case ISD::FrameIndex:
1630	return LowerFrameIndex(Op, DAG);
1631	case ISD::GlobalAddress:
1632	return LowerGlobalAddress(Op, DAG);
1633	case ISD::GlobalTLSAddress:
1634	return LowerGlobalTLSAddress(Op, DAG);
1635	case ISD::ExternalSymbol:
1636	return LowerExternalSymbol(Op, DAG);
1637	case ISD::JumpTable:
1638	return LowerJumpTable(Op, DAG);
1639	case ISD::BR_JT:
1640	return LowerBR_JT(Op, DAG);
1641	case ISD::VASTART:
1642	return LowerVASTART(Op, DAG);
1643	case ISD::BlockAddress:
1644	case ISD::BRIND:
1645	fail(DL, DAG, Msg: "WebAssembly hasn't implemented computed gotos");
1646	return SDValue ();
1647	case ISD::RETURNADDR:
1648	return LowerRETURNADDR(Op, DAG);
1649	case ISD::FRAMEADDR:
1650	return LowerFRAMEADDR(Op, DAG);
1651	case ISD::CopyToReg:
1652	return LowerCopyToReg(Op, DAG);
1653	case ISD::EXTRACT_VECTOR_ELT:
1654	case ISD::INSERT_VECTOR_ELT:
1655	return LowerAccessVectorElement(Op, DAG);
1656	case ISD::INTRINSIC_VOID:
1657	case ISD::INTRINSIC_WO_CHAIN:
1658	case ISD::INTRINSIC_W_CHAIN:
1659	return LowerIntrinsic(Op, DAG);
1660	case ISD::SIGN_EXTEND_INREG:
1661	return LowerSIGN_EXTEND_INREG(Op, DAG);
1662	case ISD::ZERO_EXTEND_VECTOR_INREG:
1663	case ISD::SIGN_EXTEND_VECTOR_INREG:
1664	return LowerEXTEND_VECTOR_INREG(Op, DAG);
1665	case ISD::BUILD_VECTOR:
1666	return LowerBUILD_VECTOR(Op, DAG);
1667	case ISD::VECTOR_SHUFFLE:
1668	return LowerVECTOR_SHUFFLE(Op, DAG);
1669	case ISD::SETCC:
1670	return LowerSETCC(Op, DAG);
1671	case ISD::SHL:
1672	case ISD::SRA:
1673	case ISD::SRL:
1674	return LowerShift(Op, DAG);
1675	case ISD::FP_TO_SINT_SAT:
1676	case ISD::FP_TO_UINT_SAT:
1677	return LowerFP_TO_INT_SAT(Op, DAG);
1678	case ISD::LOAD:
1679	return LowerLoad(Op, DAG);
1680	case ISD::STORE:
1681	return LowerStore(Op, DAG);
1682	case ISD::CTPOP:
1683	case ISD::CTLZ:
1684	case ISD::CTTZ:
1685	return DAG.UnrollVectorOp(N: Op.getNode());
1686	case ISD::CLEAR_CACHE:
1687	report_fatal_error(reason: "llvm.clear_cache is not supported on wasm");
1688	case ISD::SMUL_LOHI:
1689	case ISD::UMUL_LOHI:
1690	return LowerMUL_LOHI(Op, DAG);
1691	case ISD::UADDO:
1692	return LowerUADDO(Op, DAG);
1693	}
1694	}
1695
1696	static bool IsWebAssemblyGlobal(SDValue Op) {
1697	if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val&: Op))
1698	return WebAssembly::isWasmVarAddressSpace(AS: GA->getAddressSpace());
1699
1700	return false;
1701	}
1702
1703	static std::optional<unsigned> IsWebAssemblyLocal(SDValue Op,
1704	SelectionDAG &DAG) {
1705	const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val&: Op);
1706	if (!FI)
1707	return std::nullopt;
1708
1709	auto &MF = DAG.getMachineFunction();
1710	return WebAssemblyFrameLowering::getLocalForStackObject(MF, FrameIndex: FI->getIndex());
1711	}
1712
1713	SDValue WebAssemblyTargetLowering::LowerStore(SDValue Op,
1714	SelectionDAG &DAG) const {
1715	SDLoc DL(Op);
1716	StoreSDNode *SN = cast<StoreSDNode>(Val: Op.getNode());
1717	const SDValue &Value = SN->getValue();
1718	const SDValue &Base = SN->getBasePtr();
1719	const SDValue &Offset = SN->getOffset();
1720
1721	if (IsWebAssemblyGlobal(Op: Base)) {
1722	if (!Offset ->isUndef())
1723	report_fatal_error(reason: "unexpected offset when storing to webassembly global",
1724	gen_crash_diag: false);
1725
1726	SDVTList Tys = DAG.getVTList(VT: MVT::Other);
1727	SDValue Ops[] = {SN->getChain(), Value, Base};
1728	return DAG.getMemIntrinsicNode(Opcode: WebAssemblyISD::GLOBAL_SET, dl: DL, VTList: Tys, Ops,
1729	MemVT: SN->getMemoryVT(), MMO: SN->getMemOperand());
1730	}
1731
1732	if (std::optional<unsigned> Local = IsWebAssemblyLocal(Op: Base, DAG)) {
1733	if (!Offset ->isUndef())
1734	report_fatal_error(reason: "unexpected offset when storing to webassembly local",
1735	gen_crash_diag: false);
1736
1737	SDValue Idx = DAG.getTargetConstant(Val: *Local, DL: Base, VT: MVT::i32);
1738	SDVTList Tys = DAG.getVTList(VT: MVT::Other); // The chain.
1739	SDValue Ops[] = {SN->getChain(), Idx, Value};
1740	return DAG.getNode(Opcode: WebAssemblyISD::LOCAL_SET, DL, VTList: Tys, Ops);
1741	}
1742
1743	if (WebAssembly::isWasmVarAddressSpace(AS: SN->getAddressSpace()))
1744	report_fatal_error(
1745	reason: "Encountered an unlowerable store to the wasm_var address space",
1746	gen_crash_diag: false);
1747
1748	return Op;
1749	}
1750
1751	SDValue WebAssemblyTargetLowering::LowerLoad(SDValue Op,
1752	SelectionDAG &DAG) const {
1753	SDLoc DL(Op);
1754	LoadSDNode *LN = cast<LoadSDNode>(Val: Op.getNode());
1755	const SDValue &Base = LN->getBasePtr();
1756	const SDValue &Offset = LN->getOffset();
1757
1758	if (IsWebAssemblyGlobal(Op: Base)) {
1759	if (!Offset ->isUndef())
1760	report_fatal_error(
1761	reason: "unexpected offset when loading from webassembly global", gen_crash_diag: false);
1762
1763	SDVTList Tys = DAG.getVTList(VT1: LN->getValueType(ResNo: `0`), VT2: MVT::Other);
1764	SDValue Ops[] = {LN->getChain(), Base};
1765	return DAG.getMemIntrinsicNode(Opcode: WebAssemblyISD::GLOBAL_GET, dl: DL, VTList: Tys, Ops,
1766	MemVT: LN->getMemoryVT(), MMO: LN->getMemOperand());
1767	}
1768
1769	if (std::optional<unsigned> Local = IsWebAssemblyLocal(Op: Base, DAG)) {
1770	if (!Offset ->isUndef())
1771	report_fatal_error(
1772	reason: "unexpected offset when loading from webassembly local", gen_crash_diag: false);
1773
1774	SDValue Idx = DAG.getTargetConstant(Val: *Local, DL: Base, VT: MVT::i32);
1775	EVT LocalVT = LN->getValueType(ResNo: `0`);
1776	SDValue LocalGet = DAG.getNode(Opcode: WebAssemblyISD::LOCAL_GET, DL, VT: LocalVT,
1777	Ops: {LN->getChain(), Idx});
1778	SDValue Result = DAG.getMergeValues(Ops: {LocalGet, LN->getChain()}, dl: DL);
1779	assert(Result->getNumValues() == `2` && "Loads must carry a chain!");
1780	return Result;
1781	}
1782
1783	if (WebAssembly::isWasmVarAddressSpace(AS: LN->getAddressSpace()))
1784	report_fatal_error(
1785	reason: "Encountered an unlowerable load from the wasm_var address space",
1786	gen_crash_diag: false);
1787
1788	return Op;
1789	}
1790
1791	SDValue WebAssemblyTargetLowering::LowerMUL_LOHI(SDValue Op,
1792	SelectionDAG &DAG) const {
1793	assert(Subtarget->hasWideArithmetic());
1794	assert(Op.getValueType() == MVT::i64);
1795	SDLoc DL(Op);
1796	unsigned Opcode;
1797	switch (Op.getOpcode()) {
1798	case ISD::UMUL_LOHI:
1799	Opcode = WebAssemblyISD::I64_MUL_WIDE_U;
1800	break;
1801	case ISD::SMUL_LOHI:
1802	Opcode = WebAssemblyISD::I64_MUL_WIDE_S;
1803	break;
1804	default:
1805	llvm_unreachable("unexpected opcode");
1806	}
1807	SDValue LHS = Op.getOperand(i: `0`);
1808	SDValue RHS = Op.getOperand(i: `1`);
1809	SDValue Lo =
1810	DAG.getNode(Opcode, DL, VTList: DAG.getVTList(VT1: MVT::i64, VT2: MVT::i64), N1: LHS, N2: RHS);
1811	SDValue Hi(Lo.getNode(), `1`);
1812	SDValue Ops[] = {Lo, Hi};
1813	return DAG.getMergeValues(Ops, dl: DL);
1814	}
1815
1816	// Lowers `UADDO` intrinsics to an `i64.add128` instruction when it's enabled.
1817	//
1818	// This enables generating a single wasm instruction for this operation where
1819	// the upper half of both operands are constant zeros. The upper half of the
1820	// result is then whether the overflow happened.
1821	SDValue WebAssemblyTargetLowering::LowerUADDO(SDValue Op,
1822	SelectionDAG &DAG) const {
1823	assert(Subtarget->hasWideArithmetic());
1824	assert(Op.getValueType() == MVT::i64);
1825	assert(Op.getOpcode() == ISD::UADDO);
1826	SDLoc DL(Op);
1827	SDValue LHS = Op.getOperand(i: `0`);
1828	SDValue RHS = Op.getOperand(i: `1`);
1829	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: MVT::i64);
1830	SDValue Result =
1831	DAG.getNode(Opcode: WebAssemblyISD::I64_ADD128, DL,
1832	VTList: DAG.getVTList(VT1: MVT::i64, VT2: MVT::i64), N1: LHS, N2: Zero, N3: RHS, N4: Zero);
1833	SDValue CarryI64(Result.getNode(), `1`);
1834	SDValue CarryI32 = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: CarryI64);
1835	SDValue Ops[] = {Result, CarryI32};
1836	return DAG.getMergeValues(Ops, dl: DL);
1837	}
1838
1839	SDValue WebAssemblyTargetLowering::Replace128Op(SDNode *N,
1840	SelectionDAG &DAG) const {
1841	assert(Subtarget->hasWideArithmetic());
1842	assert(N->getValueType(`0`) == MVT::i128);
1843	SDLoc DL(N);
1844	unsigned Opcode;
1845	switch (N->getOpcode()) {
1846	case ISD::ADD:
1847	Opcode = WebAssemblyISD::I64_ADD128;
1848	break;
1849	case ISD::SUB:
1850	Opcode = WebAssemblyISD::I64_SUB128;
1851	break;
1852	default:
1853	llvm_unreachable("unexpected opcode");
1854	}
1855	SDValue LHS = N->getOperand(Num: `0`);
1856	SDValue RHS = N->getOperand(Num: `1`);
1857
1858	SDValue C0 = DAG.getConstant(Val: `0`, DL, VT: MVT::i64);
1859	SDValue C1 = DAG.getConstant(Val: `1`, DL, VT: MVT::i64);
1860	SDValue LHS_0 = DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL, VT: MVT::i64, N1: LHS, N2: C0);
1861	SDValue LHS_1 = DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL, VT: MVT::i64, N1: LHS, N2: C1);
1862	SDValue RHS_0 = DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL, VT: MVT::i64, N1: RHS, N2: C0);
1863	SDValue RHS_1 = DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL, VT: MVT::i64, N1: RHS, N2: C1);
1864	SDValue Result_LO = DAG.getNode(Opcode, DL, VTList: DAG.getVTList(VT1: MVT::i64, VT2: MVT::i64),
1865	N1: LHS_0, N2: LHS_1, N3: RHS_0, N4: RHS_1);
1866	SDValue Result_HI(Result_LO.getNode(), `1`);
1867	return DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VTList: N->getVTList(), N1: Result_LO, N2: Result_HI);
1868	}
1869
1870	SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op,
1871	SelectionDAG &DAG) const {
1872	SDValue Src = Op.getOperand(i: `2`);
1873	if (isa<FrameIndexSDNode>(Val: Src.getNode())) {
1874	// CopyToReg nodes don't support FrameIndex operands. Other targets select
1875	// the FI to some LEA-like instruction, but since we don't have that, we
1876	// need to insert some kind of instruction that can take an FI operand and
1877	// produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy
1878	// local.copy between Op and its FI operand.
1879	SDValue Chain = Op.getOperand(i: `0`);
1880	SDLoc DL(Op);
1881	Register Reg = cast<RegisterSDNode>(Val: Op.getOperand(i: `1`))->getReg();
1882	EVT VT = Src.getValueType();
1883	SDValue Copy(DAG.getMachineNode(Opcode: VT == MVT::i32 ? WebAssembly::COPY_I32
1884	: WebAssembly::COPY_I64,
1885	dl: DL, VT, Op1: Src),
1886	`0`);
1887	return Op.getNode()->getNumValues() == `1`
1888	? DAG.getCopyToReg(Chain, dl: DL, Reg, N: Copy)
1889	: DAG.getCopyToReg(Chain, dl: DL, Reg, N: Copy,
1890	Glue: Op.getNumOperands() == `4` ? Op.getOperand(i: `3`)
1891	: SDValue ());
1892	}
1893	return SDValue ();
1894	}
1895
1896	SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op,
1897	SelectionDAG &DAG) const {
1898	int FI = cast<FrameIndexSDNode>(Val&: Op)->getIndex();
1899	return DAG.getTargetFrameIndex(FI, VT: Op.getValueType());
1900	}
1901
1902	SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op,
1903	SelectionDAG &DAG) const {
1904	SDLoc DL(Op);
1905
1906	if (!Subtarget->getTargetTriple().isOSEmscripten()) {
1907	fail(DL, DAG,
1908	Msg: "Non-Emscripten WebAssembly hasn't implemented "
1909	"__builtin_return_address");
1910	return SDValue ();
1911	}
1912
1913	if (verifyReturnAddressArgumentIsConstant(Op, DAG))
1914	return SDValue ();
1915
1916	unsigned Depth = Op.getConstantOperandVal(i: `0`);
1917	MakeLibCallOptions CallOptions;
1918	return makeLibCall(DAG, LC: RTLIB::RETURN_ADDRESS, RetVT: Op.getValueType(),
1919	Ops: {DAG.getConstant(Val: Depth, DL, VT: MVT::i32)}, CallOptions, dl: DL)
1920	.first;
1921	}
1922
1923	SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op,
1924	SelectionDAG &DAG) const {
1925	// Non-zero depths are not supported by WebAssembly currently. Use the
1926	// legalizer's default expansion, which is to return 0 (what this function is
1927	// documented to do).
1928	if (Op.getConstantOperandVal(i: `0`) > `0`)
1929	return SDValue ();
1930
1931	DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true);
1932	EVT VT = Op.getValueType();
1933	Register FP =
1934	Subtarget->getRegisterInfo()->getFrameRegister(MF: DAG.getMachineFunction());
1935	return DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: SDLoc (Op), Reg: FP, VT);
1936	}
1937
1938	SDValue
1939	WebAssemblyTargetLowering::LowerGlobalTLSAddress(SDValue Op,
1940	SelectionDAG &DAG) const {
1941	SDLoc DL(Op);
1942	const auto *GA = cast<GlobalAddressSDNode>(Val&: Op);
1943
1944	MachineFunction &MF = DAG.getMachineFunction();
1945	if (!MF.getSubtarget<WebAssemblySubtarget>().hasBulkMemory())
1946	report_fatal_error(reason: "cannot use thread-local storage without bulk memory",
1947	gen_crash_diag: false);
1948
1949	const GlobalValue *GV = GA->getGlobal();
1950
1951	// Currently only Emscripten supports dynamic linking with threads. Therefore,
1952	// on other targets, if we have thread-local storage, only the local-exec
1953	// model is possible.
1954	auto model = Subtarget->getTargetTriple().isOSEmscripten()
1955	? GV->getThreadLocalMode()
1956	: GlobalValue::LocalExecTLSModel;
1957
1958	// Unsupported TLS modes
1959	assert(model != GlobalValue::NotThreadLocal);
1960	assert(model != GlobalValue::InitialExecTLSModel);
1961
1962	if (model == GlobalValue::LocalExecTLSModel \|\|
1963	model == GlobalValue::LocalDynamicTLSModel \|\|
1964	(model == GlobalValue::GeneralDynamicTLSModel &&
1965	getTargetMachine().shouldAssumeDSOLocal(GV))) {
1966	// For DSO-local TLS variables we use offset from __tls_base
1967
1968	MVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
1969	auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
1970	: WebAssembly::GLOBAL_GET_I32;
1971	const char *BaseName = MF.createExternalSymbolName(Name: "__tls_base");
1972
1973	SDValue BaseAddr(
1974	DAG.getMachineNode(Opcode: GlobalGet, dl: DL, VT: PtrVT,
1975	Op1: DAG.getTargetExternalSymbol(Sym: BaseName, VT: PtrVT)),
1976	`0`);
1977
1978	SDValue TLSOffset = DAG.getTargetGlobalAddress(
1979	GV, DL, VT: PtrVT, offset: GA->getOffset(), TargetFlags: WebAssemblyII::MO_TLS_BASE_REL);
1980	SDValue SymOffset =
1981	DAG.getNode(Opcode: WebAssemblyISD::WrapperREL, DL, VT: PtrVT, Operand: TLSOffset);
1982
1983	return DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: BaseAddr, N2: SymOffset);
1984	}
1985
1986	assert(model == GlobalValue::GeneralDynamicTLSModel);
1987
1988	EVT VT = Op.getValueType();
1989	return DAG.getNode(Opcode: WebAssemblyISD::Wrapper, DL, VT,
1990	Operand: DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL, VT,
1991	offset: GA->getOffset(),
1992	TargetFlags: WebAssemblyII::MO_GOT_TLS));
1993	}
1994
1995	SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
1996	SelectionDAG &DAG) const {
1997	SDLoc DL(Op);
1998	const auto *GA = cast<GlobalAddressSDNode>(Val&: Op);
1999	EVT VT = Op.getValueType();
2000	assert(GA->getTargetFlags() == `0` &&
2001	"Unexpected target flags on generic GlobalAddressSDNode");
2002	if (!WebAssembly::isValidAddressSpace(AS: GA->getAddressSpace()))
2003	fail(DL, DAG, Msg: "Invalid address space for WebAssembly target");
2004
2005	unsigned OperandFlags = `0`;
2006	const GlobalValue *GV = GA->getGlobal();
2007	// Since WebAssembly tables cannot yet be shared accross modules, we don't
2008	// need special treatment for tables in PIC mode.
2009	if (isPositionIndependent() &&
2010	!WebAssembly::isWebAssemblyTableType(Ty: GV->getValueType())) {
2011	if (getTargetMachine().shouldAssumeDSOLocal(GV)) {
2012	MachineFunction &MF = DAG.getMachineFunction();
2013	MVT PtrVT = getPointerTy(DL: MF.getDataLayout());
2014	const char *BaseName;
2015	if (GV->getValueType()->isFunctionTy()) {
2016	BaseName = MF.createExternalSymbolName(Name: "__table_base");
2017	OperandFlags = WebAssemblyII::MO_TABLE_BASE_REL;
2018	} else {
2019	BaseName = MF.createExternalSymbolName(Name: "__memory_base");
2020	OperandFlags = WebAssemblyII::MO_MEMORY_BASE_REL;
2021	}
2022	SDValue BaseAddr =
2023	DAG.getNode(Opcode: WebAssemblyISD::Wrapper, DL, VT: PtrVT,
2024	Operand: DAG.getTargetExternalSymbol(Sym: BaseName, VT: PtrVT));
2025
2026	SDValue SymAddr = DAG.getNode(
2027	Opcode: WebAssemblyISD::WrapperREL, DL, VT,
2028	Operand: DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL, VT, offset: GA->getOffset(),
2029	TargetFlags: OperandFlags));
2030
2031	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: BaseAddr, N2: SymAddr);
2032	}
2033	OperandFlags = WebAssemblyII::MO_GOT;
2034	}
2035
2036	return DAG.getNode(Opcode: WebAssemblyISD::Wrapper, DL, VT,
2037	Operand: DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL, VT,
2038	offset: GA->getOffset(), TargetFlags: OperandFlags));
2039	}
2040
2041	SDValue
2042	WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op,
2043	SelectionDAG &DAG) const {
2044	SDLoc DL(Op);
2045	const auto *ES = cast<ExternalSymbolSDNode>(Val&: Op);
2046	EVT VT = Op.getValueType();
2047	assert(ES->getTargetFlags() == `0` &&
2048	"Unexpected target flags on generic ExternalSymbolSDNode");
2049	return DAG.getNode(Opcode: WebAssemblyISD::Wrapper, DL, VT,
2050	Operand: DAG.getTargetExternalSymbol(Sym: ES->getSymbol(), VT));
2051	}
2052
2053	SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
2054	SelectionDAG &DAG) const {
2055	// There's no need for a Wrapper node because we always incorporate a jump
2056	// table operand into a BR_TABLE instruction, rather than ever
2057	// materializing it in a register.
2058	const JumpTableSDNode *JT = cast<JumpTableSDNode>(Val&: Op);
2059	return DAG.getTargetJumpTable(JTI: JT->getIndex(), VT: Op.getValueType(),
2060	TargetFlags: JT->getTargetFlags());
2061	}
2062
2063	SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op,
2064	SelectionDAG &DAG) const {
2065	SDLoc DL(Op);
2066	SDValue Chain = Op.getOperand(i: `0`);
2067	const auto *JT = cast<JumpTableSDNode>(Val: Op.getOperand(i: `1`));
2068	SDValue Index = Op.getOperand(i: `2`);
2069	assert(JT->getTargetFlags() == `0` && "WebAssembly doesn't set target flags");
2070
2071	SmallVector<SDValue, `8`> Ops;
2072	Ops.push_back(Elt: Chain);
2073	Ops.push_back(Elt: Index);
2074
2075	MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo();
2076	const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs;
2077
2078	// Add an operand for each case.
2079	for (auto *MBB : MBBs)
2080	Ops.push_back(Elt: DAG.getBasicBlock(MBB));
2081
2082	// Add the first MBB as a dummy default target for now. This will be replaced
2083	// with the proper default target (and the preceding range check eliminated)
2084	// if possible by WebAssemblyFixBrTableDefaults.
2085	Ops.push_back(Elt: DAG.getBasicBlock(MBB: *MBBs.begin()));
2086	return DAG.getNode(Opcode: WebAssemblyISD::BR_TABLE, DL, VT: MVT::Other, Ops);
2087	}
2088
2089	SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op,
2090	SelectionDAG &DAG) const {
2091	SDLoc DL(Op);
2092	EVT PtrVT = getPointerTy(DL: DAG.getMachineFunction().getDataLayout());
2093
2094	auto *MFI = DAG.getMachineFunction().getInfo<WebAssemblyFunctionInfo>();
2095	const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: `2`))->getValue();
2096
2097	SDValue ArgN = DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: DL,
2098	Reg: MFI->getVarargBufferVreg(), VT: PtrVT);
2099	return DAG.getStore(Chain: Op.getOperand(i: `0`), dl: DL, Val: ArgN, Ptr: Op.getOperand(i: `1`),
2100	PtrInfo: MachinePointerInfo (SV));
2101	}
2102
2103	// Try to lower partial.reduce.add to a dot or fallback to a sequence with
2104	// extmul and adds.
2105	SDValue performLowerPartialReduction(SDNode *N, SelectionDAG &DAG) {
2106	assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN);
2107	if (N->getConstantOperandVal(Num: `0`) !=
2108	Intrinsic::experimental_vector_partial_reduce_add)
2109	return SDValue ();
2110
2111	assert(N->getValueType(`0`) == MVT::v4i32 && "can only support v4i32");
2112	SDLoc DL(N);
2113	SDValue Mul = N->getOperand(Num: `2`);
2114	assert(Mul->getOpcode() == ISD::MUL && "expected mul input");
2115
2116	SDValue ExtendLHS = Mul ->getOperand(Num: `0`);
2117	SDValue ExtendRHS = Mul ->getOperand(Num: `1`);
2118	assert((ISD::isExtOpcode(ExtendLHS.getOpcode()) &&
2119	ISD::isExtOpcode(ExtendRHS.getOpcode())) &&
2120	"expected widening mul");
2121	assert(ExtendLHS.getOpcode() == ExtendRHS.getOpcode() &&
2122	"expected mul to use the same extend for both operands");
2123
2124	SDValue ExtendInLHS = ExtendLHS ->getOperand(Num: `0`);
2125	SDValue ExtendInRHS = ExtendRHS ->getOperand(Num: `0`);
2126	bool IsSigned = ExtendLHS ->getOpcode() == ISD::SIGN_EXTEND;
2127
2128	if (ExtendInLHS ->getValueType(ResNo: `0`) == MVT::v8i16) {
2129	if (IsSigned) {
2130	// i32x4.dot_i16x8_s
2131	SDValue Dot = DAG.getNode(Opcode: WebAssemblyISD::DOT, DL, VT: MVT::v4i32,
2132	N1: ExtendInLHS, N2: ExtendInRHS);
2133	return DAG.getNode(Opcode: ISD::ADD, DL, VT: MVT::v4i32, N1: N->getOperand(Num: `1`), N2: Dot);
2134	}
2135
2136	unsigned LowOpc = WebAssemblyISD::EXTEND_LOW_U;
2137	unsigned HighOpc = WebAssemblyISD::EXTEND_HIGH_U;
2138
2139	// (add (add (extmul_low_sx lhs, rhs), (extmul_high_sx lhs, rhs)))
2140	SDValue LowLHS = DAG.getNode(Opcode: LowOpc, DL, VT: MVT::v4i32, Operand: ExtendInLHS);
2141	SDValue LowRHS = DAG.getNode(Opcode: LowOpc, DL, VT: MVT::v4i32, Operand: ExtendInRHS);
2142	SDValue HighLHS = DAG.getNode(Opcode: HighOpc, DL, VT: MVT::v4i32, Operand: ExtendInLHS);
2143	SDValue HighRHS = DAG.getNode(Opcode: HighOpc, DL, VT: MVT::v4i32, Operand: ExtendInRHS);
2144
2145	SDValue MulLow = DAG.getNode(Opcode: ISD::MUL, DL, VT: MVT::v4i32, N1: LowLHS, N2: LowRHS);
2146	SDValue MulHigh = DAG.getNode(Opcode: ISD::MUL, DL, VT: MVT::v4i32, N1: HighLHS, N2: HighRHS);
2147	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL, VT: MVT::v4i32, N1: MulLow, N2: MulHigh);
2148	return DAG.getNode(Opcode: ISD::ADD, DL, VT: MVT::v4i32, N1: N->getOperand(Num: `1`), N2: Add);
2149	} else {
2150	assert(ExtendInLHS->getValueType(`0`) == MVT::v16i8 &&
2151	"expected v16i8 input types");
2152	// Lower to a wider tree, using twice the operations compared to above.
2153	if (IsSigned) {
2154	// Use two dots
2155	unsigned LowOpc = WebAssemblyISD::EXTEND_LOW_S;
2156	unsigned HighOpc = WebAssemblyISD::EXTEND_HIGH_S;
2157	SDValue LowLHS = DAG.getNode(Opcode: LowOpc, DL, VT: MVT::v8i16, Operand: ExtendInLHS);
2158	SDValue LowRHS = DAG.getNode(Opcode: LowOpc, DL, VT: MVT::v8i16, Operand: ExtendInRHS);
2159	SDValue HighLHS = DAG.getNode(Opcode: HighOpc, DL, VT: MVT::v8i16, Operand: ExtendInLHS);
2160	SDValue HighRHS = DAG.getNode(Opcode: HighOpc, DL, VT: MVT::v8i16, Operand: ExtendInRHS);
2161	SDValue DotLHS =
2162	DAG.getNode(Opcode: WebAssemblyISD::DOT, DL, VT: MVT::v4i32, N1: LowLHS, N2: LowRHS);
2163	SDValue DotRHS =
2164	DAG.getNode(Opcode: WebAssemblyISD::DOT, DL, VT: MVT::v4i32, N1: HighLHS, N2: HighRHS);
2165	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL, VT: MVT::v4i32, N1: DotLHS, N2: DotRHS);
2166	return DAG.getNode(Opcode: ISD::ADD, DL, VT: MVT::v4i32, N1: N->getOperand(Num: `1`), N2: Add);
2167	}
2168
2169	unsigned LowOpc = WebAssemblyISD::EXTEND_LOW_U;
2170	unsigned HighOpc = WebAssemblyISD::EXTEND_HIGH_U;
2171	SDValue LowLHS = DAG.getNode(Opcode: LowOpc, DL, VT: MVT::v8i16, Operand: ExtendInLHS);
2172	SDValue LowRHS = DAG.getNode(Opcode: LowOpc, DL, VT: MVT::v8i16, Operand: ExtendInRHS);
2173	SDValue HighLHS = DAG.getNode(Opcode: HighOpc, DL, VT: MVT::v8i16, Operand: ExtendInLHS);
2174	SDValue HighRHS = DAG.getNode(Opcode: HighOpc, DL, VT: MVT::v8i16, Operand: ExtendInRHS);
2175
2176	SDValue MulLow = DAG.getNode(Opcode: ISD::MUL, DL, VT: MVT::v8i16, N1: LowLHS, N2: LowRHS);
2177	SDValue MulHigh = DAG.getNode(Opcode: ISD::MUL, DL, VT: MVT::v8i16, N1: HighLHS, N2: HighRHS);
2178
2179	SDValue LowLow = DAG.getNode(Opcode: LowOpc, DL, VT: MVT::v4i32, Operand: MulLow);
2180	SDValue LowHigh = DAG.getNode(Opcode: LowOpc, DL, VT: MVT::v4i32, Operand: MulHigh);
2181	SDValue HighLow = DAG.getNode(Opcode: HighOpc, DL, VT: MVT::v4i32, Operand: MulLow);
2182	SDValue HighHigh = DAG.getNode(Opcode: HighOpc, DL, VT: MVT::v4i32, Operand: MulHigh);
2183
2184	SDValue AddLow = DAG.getNode(Opcode: ISD::ADD, DL, VT: MVT::v4i32, N1: LowLow, N2: HighLow);
2185	SDValue AddHigh = DAG.getNode(Opcode: ISD::ADD, DL, VT: MVT::v4i32, N1: LowHigh, N2: HighHigh);
2186	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL, VT: MVT::v4i32, N1: AddLow, N2: AddHigh);
2187	return DAG.getNode(Opcode: ISD::ADD, DL, VT: MVT::v4i32, N1: N->getOperand(Num: `1`), N2: Add);
2188	}
2189	}
2190
2191	SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
2192	SelectionDAG &DAG) const {
2193	MachineFunction &MF = DAG.getMachineFunction();
2194	unsigned IntNo;
2195	switch (Op.getOpcode()) {
2196	case ISD::INTRINSIC_VOID:
2197	case ISD::INTRINSIC_W_CHAIN:
2198	IntNo = Op.getConstantOperandVal(i: `1`);
2199	break;
2200	case ISD::INTRINSIC_WO_CHAIN:
2201	IntNo = Op.getConstantOperandVal(i: `0`);
2202	break;
2203	default:
2204	llvm_unreachable("Invalid intrinsic");
2205	}
2206	SDLoc DL(Op);
2207
2208	switch (IntNo) {
2209	default:
2210	return SDValue (); // Don't custom lower most intrinsics.
2211
2212	case Intrinsic::wasm_lsda: {
2213	auto PtrVT = getPointerTy(DL: MF.getDataLayout());
2214	const char *SymName = MF.createExternalSymbolName(
2215	Name: "GCC_except_table" + std::to_string(val: MF.getFunctionNumber()));
2216	if (isPositionIndependent()) {
2217	SDValue Node = DAG.getTargetExternalSymbol(
2218	Sym: SymName, VT: PtrVT, TargetFlags: WebAssemblyII::MO_MEMORY_BASE_REL);
2219	const char *BaseName = MF.createExternalSymbolName(Name: "__memory_base");
2220	SDValue BaseAddr =
2221	DAG.getNode(Opcode: WebAssemblyISD::Wrapper, DL, VT: PtrVT,
2222	Operand: DAG.getTargetExternalSymbol(Sym: BaseName, VT: PtrVT));
2223	SDValue SymAddr =
2224	DAG.getNode(Opcode: WebAssemblyISD::WrapperREL, DL, VT: PtrVT, Operand: Node);
2225	return DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: BaseAddr, N2: SymAddr);
2226	}
2227	SDValue Node = DAG.getTargetExternalSymbol(Sym: SymName, VT: PtrVT);
2228	return DAG.getNode(Opcode: WebAssemblyISD::Wrapper, DL, VT: PtrVT, Operand: Node);
2229	}
2230
2231	case Intrinsic::wasm_shuffle: {
2232	// Drop in-chain and replace undefs, but otherwise pass through unchanged
2233	SDValue Ops[`18`];
2234	size_t OpIdx = `0`;
2235	Ops[OpIdx++] = Op.getOperand(i: `1`);
2236	Ops[OpIdx++] = Op.getOperand(i: `2`);
2237	while (OpIdx < `18`) {
2238	const SDValue &MaskIdx = Op.getOperand(i: OpIdx + `1`);
2239	if (MaskIdx.isUndef() \|\| MaskIdx.getNode()->getAsZExtVal() >= `32`) {
2240	bool isTarget = MaskIdx.getNode()->getOpcode() == ISD::TargetConstant;
2241	Ops[OpIdx++] = DAG.getConstant(Val: `0`, DL, VT: MVT::i32, isTarget);
2242	} else {
2243	Ops[OpIdx++] = MaskIdx;
2244	}
2245	}
2246	return DAG.getNode(Opcode: WebAssemblyISD::SHUFFLE, DL, VT: Op.getValueType(), Ops);
2247	}
2248
2249	case Intrinsic::thread_pointer: {
2250	MVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
2251	auto GlobalGet = PtrVT == MVT::i64 ? WebAssembly::GLOBAL_GET_I64
2252	: WebAssembly::GLOBAL_GET_I32;
2253	const char *TlsBase = MF.createExternalSymbolName(Name: "__tls_base");
2254	return SDValue (
2255	DAG.getMachineNode(Opcode: GlobalGet, dl: DL, VT: PtrVT,
2256	Op1: DAG.getTargetExternalSymbol(Sym: TlsBase, VT: PtrVT)),
2257	`0`);
2258	}
2259	}
2260	}
2261
2262	SDValue
2263	WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
2264	SelectionDAG &DAG) const {
2265	SDLoc DL(Op);
2266	// If sign extension operations are disabled, allow sext_inreg only if operand
2267	// is a vector extract of an i8 or i16 lane. SIMD does not depend on sign
2268	// extension operations, but allowing sext_inreg in this context lets us have
2269	// simple patterns to select extract_lane_s instructions. Expanding sext_inreg
2270	// everywhere would be simpler in this file, but would necessitate large and
2271	// brittle patterns to undo the expansion and select extract_lane_s
2272	// instructions.
2273	assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128());
2274	if (Op.getOperand(i: `0`).getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2275	return SDValue ();
2276
2277	const SDValue &Extract = Op.getOperand(i: `0`);
2278	MVT VecT = Extract.getOperand(i: `0`).getSimpleValueType();
2279	if (VecT.getVectorElementType().getSizeInBits() > `32`)
2280	return SDValue ();
2281	MVT ExtractedLaneT =
2282	cast<VTSDNode>(Val: Op.getOperand(i: `1`).getNode())->getVT().getSimpleVT();
2283	MVT ExtractedVecT =
2284	MVT::getVectorVT(VT: ExtractedLaneT, NumElements: `128` / ExtractedLaneT.getSizeInBits());
2285	if (ExtractedVecT == VecT)
2286	return Op;
2287
2288	// Bitcast vector to appropriate type to ensure ISel pattern coverage
2289	const SDNode *Index = Extract.getOperand(i: `1`).getNode();
2290	if (!isa<ConstantSDNode>(Val: Index))
2291	return SDValue ();
2292	unsigned IndexVal = Index->getAsZExtVal();
2293	unsigned Scale =
2294	ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements();
2295	assert(Scale > `1`);
2296	SDValue NewIndex =
2297	DAG.getConstant(Val: IndexVal * Scale, DL, VT: Index->getValueType(ResNo: `0`));
2298	SDValue NewExtract = DAG.getNode(
2299	Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: Extract.getValueType(),
2300	N1: DAG.getBitcast(VT: ExtractedVecT, V: Extract.getOperand(i: `0`)), N2: NewIndex);
2301	return DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: Op.getValueType(), N1: NewExtract,
2302	N2: Op.getOperand(i: `1`));
2303	}
2304
2305	static SDValue GetExtendHigh(SDValue Op, unsigned UserOpc, EVT VT,
2306	SelectionDAG &DAG) {
2307	if (Op.getOpcode() != ISD::VECTOR_SHUFFLE)
2308	return SDValue ();
2309
2310	assert((UserOpc == WebAssemblyISD::EXTEND_LOW_U \|\|
2311	UserOpc == WebAssemblyISD::EXTEND_LOW_S) &&
2312	"expected extend_low");
2313	auto *Shuffle = cast<ShuffleVectorSDNode>(Val: Op.getNode());
2314
2315	ArrayRef<int> Mask = Shuffle->getMask();
2316	// Look for a shuffle which moves from the high half to the low half.
2317	size_t FirstIdx = Mask.size() / `2`;
2318	for (size_t i = `0`; i < Mask.size() / `2`; ++i) {
2319	if (Mask [i] != static_cast<int>(FirstIdx + i)) {
2320	return SDValue ();
2321	}
2322	}
2323
2324	SDLoc DL(Op);
2325	unsigned Opc = UserOpc == WebAssemblyISD::EXTEND_LOW_S
2326	? WebAssemblyISD::EXTEND_HIGH_S
2327	: WebAssemblyISD::EXTEND_HIGH_U;
2328	return DAG.getNode(Opcode: Opc, DL, VT, Operand: Shuffle->getOperand(Num: `0`));
2329	}
2330
2331	SDValue
2332	WebAssemblyTargetLowering::LowerEXTEND_VECTOR_INREG(SDValue Op,
2333	SelectionDAG &DAG) const {
2334	SDLoc DL(Op);
2335	EVT VT = Op.getValueType();
2336	SDValue Src = Op.getOperand(i: `0`);
2337	EVT SrcVT = Src.getValueType();
2338
2339	if (SrcVT.getVectorElementType() == MVT::i1 \|\|
2340	SrcVT.getVectorElementType() == MVT::i64)
2341	return SDValue ();
2342
2343	assert(VT.getScalarSizeInBits() % SrcVT.getScalarSizeInBits() == `0` &&
2344	"Unexpected extension factor.");
2345	unsigned Scale = VT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits();
2346
2347	if (Scale != `2` && Scale != `4` && Scale != `8`)
2348	return SDValue ();
2349
2350	unsigned Ext;
2351	switch (Op.getOpcode()) {
2352	case ISD::ZERO_EXTEND_VECTOR_INREG:
2353	Ext = WebAssemblyISD::EXTEND_LOW_U;
2354	break;
2355	case ISD::SIGN_EXTEND_VECTOR_INREG:
2356	Ext = WebAssemblyISD::EXTEND_LOW_S;
2357	break;
2358	}
2359
2360	if (Scale == `2`) {
2361	// See if we can use EXTEND_HIGH.
2362	if (auto ExtendHigh = GetExtendHigh(Op: Op.getOperand(i: `0`), UserOpc: Ext, VT, DAG))
2363	return ExtendHigh;
2364	}
2365
2366	SDValue Ret = Src;
2367	while (Scale != `1`) {
2368	Ret = DAG.getNode(Opcode: Ext, DL,
2369	VT: Ret.getValueType()
2370	.widenIntegerVectorElementType(Context&: *DAG.getContext())
2371	.getHalfNumVectorElementsVT(Context&: *DAG.getContext()),
2372	Operand: Ret);
2373	Scale /= `2`;
2374	}
2375	assert(Ret.getValueType() == VT);
2376	return Ret;
2377	}
2378
2379	static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG) {
2380	SDLoc DL(Op);
2381	if (Op.getValueType() != MVT::v2f64)
2382	return SDValue ();
2383
2384	auto GetConvertedLane = [](SDValue Op, unsigned &Opcode, SDValue &SrcVec,
2385	unsigned &Index) -> bool {
2386	switch (Op.getOpcode()) {
2387	case ISD::SINT_TO_FP:
2388	Opcode = WebAssemblyISD::CONVERT_LOW_S;
2389	break;
2390	case ISD::UINT_TO_FP:
2391	Opcode = WebAssemblyISD::CONVERT_LOW_U;
2392	break;
2393	case ISD::FP_EXTEND:
2394	Opcode = WebAssemblyISD::PROMOTE_LOW;
2395	break;
2396	default:
2397	return false;
2398	}
2399
2400	auto ExtractVector = Op.getOperand(i: `0`);
2401	if (ExtractVector.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2402	return false;
2403
2404	if (!isa<ConstantSDNode>(Val: ExtractVector.getOperand(i: `1`).getNode()))
2405	return false;
2406
2407	SrcVec = ExtractVector.getOperand(i: `0`);
2408	Index = ExtractVector.getConstantOperandVal(i: `1`);
2409	return true;
2410	};
2411
2412	unsigned LHSOpcode, RHSOpcode, LHSIndex, RHSIndex;
2413	SDValue LHSSrcVec, RHSSrcVec;
2414	if (!GetConvertedLane (Op.getOperand(i: `0`), LHSOpcode, LHSSrcVec, LHSIndex) \|\|
2415	!GetConvertedLane (Op.getOperand(i: `1`), RHSOpcode, RHSSrcVec, RHSIndex))
2416	return SDValue ();
2417
2418	if (LHSOpcode != RHSOpcode)
2419	return SDValue ();
2420
2421	MVT ExpectedSrcVT;
2422	switch (LHSOpcode) {
2423	case WebAssemblyISD::CONVERT_LOW_S:
2424	case WebAssemblyISD::CONVERT_LOW_U:
2425	ExpectedSrcVT = MVT::v4i32;
2426	break;
2427	case WebAssemblyISD::PROMOTE_LOW:
2428	ExpectedSrcVT = MVT::v4f32;
2429	break;
2430	}
2431	if (LHSSrcVec.getValueType() != ExpectedSrcVT)
2432	return SDValue ();
2433
2434	auto Src = LHSSrcVec;
2435	if (LHSIndex != `0` \|\| RHSIndex != `1` \|\| LHSSrcVec != RHSSrcVec) {
2436	// Shuffle the source vector so that the converted lanes are the low lanes.
2437	Src = DAG.getVectorShuffle(
2438	VT: ExpectedSrcVT, dl: DL, N1: LHSSrcVec, N2: RHSSrcVec,
2439	Mask: {static_cast<int>(LHSIndex), static_cast<int>(RHSIndex) + `4`, -`1`, -`1`});
2440	}
2441	return DAG.getNode(Opcode: LHSOpcode, DL, VT: MVT::v2f64, Operand: Src);
2442	}
2443
2444	SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
2445	SelectionDAG &DAG) const {
2446	MVT VT = Op.getSimpleValueType();
2447	if (VT == MVT::v8f16) {
2448	// BUILD_VECTOR can't handle FP16 operands since Wasm doesn't have a scaler
2449	// FP16 type, so cast them to I16s.
2450	MVT IVT = VT.changeVectorElementType(EltVT: MVT::i16);
2451	SmallVector<SDValue, `8`> NewOps;
2452	for (unsigned I = `0`, E = Op.getNumOperands(); I < E; ++I)
2453	NewOps.push_back(Elt: DAG.getBitcast(VT: MVT::i16, V: Op.getOperand(i: I)));
2454	SDValue Res = DAG.getNode(Opcode: ISD::BUILD_VECTOR, DL: SDLoc (), VT: IVT, Ops: NewOps);
2455	return DAG.getBitcast(VT, V: Res);
2456	}
2457
2458	if (auto ConvertLow = LowerConvertLow(Op, DAG))
2459	return ConvertLow;
2460
2461	SDLoc DL(Op);
2462	const EVT VecT = Op.getValueType();
2463	const EVT LaneT = Op.getOperand(i: `0`).getValueType();
2464	const size_t Lanes = Op.getNumOperands();
2465	bool CanSwizzle = VecT == MVT::v16i8;
2466
2467	// BUILD_VECTORs are lowered to the instruction that initializes the highest
2468	// possible number of lanes at once followed by a sequence of replace_lane
2469	// instructions to individually initialize any remaining lanes.
2470
2471	// TODO: Tune this. For example, lanewise swizzling is very expensive, so
2472	// swizzled lanes should be given greater weight.
2473
2474	// TODO: Investigate looping rather than always extracting/replacing specific
2475	// lanes to fill gaps.
2476
2477	auto IsConstant = [](const SDValue &V) {
2478	return V.getOpcode() == ISD::Constant \|\| V.getOpcode() == ISD::ConstantFP;
2479	};
2480
2481	// Returns the source vector and index vector pair if they exist. Checks for:
2482	// (extract_vector_elt
2483	// $src,
2484	// (sign_extend_inreg (extract_vector_elt $indices, $i))
2485	// )
2486	auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) {
2487	auto Bail = std::make_pair(x: SDValue (), y: SDValue ());
2488	if (Lane ->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2489	return Bail;
2490	const SDValue &SwizzleSrc = Lane ->getOperand(Num: `0`);
2491	const SDValue &IndexExt = Lane ->getOperand(Num: `1`);
2492	if (IndexExt ->getOpcode() != ISD::SIGN_EXTEND_INREG)
2493	return Bail;
2494	const SDValue &Index = IndexExt ->getOperand(Num: `0`);
2495	if (Index ->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2496	return Bail;
2497	const SDValue &SwizzleIndices = Index ->getOperand(Num: `0`);
2498	if (SwizzleSrc.getValueType() != MVT::v16i8 \|\|
2499	SwizzleIndices.getValueType() != MVT::v16i8 \|\|
2500	Index ->getOperand(Num: `1`)->getOpcode() != ISD::Constant \|\|
2501	Index ->getConstantOperandVal(Num: `1`) != I)
2502	return Bail;
2503	return std::make_pair(x: SwizzleSrc, y: SwizzleIndices);
2504	};
2505
2506	// If the lane is extracted from another vector at a constant index, return
2507	// that vector. The source vector must not have more lanes than the dest
2508	// because the shufflevector indices are in terms of the destination lanes and
2509	// would not be able to address the smaller individual source lanes.
2510	auto GetShuffleSrc = [&](const SDValue &Lane) {
2511	if (Lane ->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2512	return SDValue ();
2513	if (!isa<ConstantSDNode>(Val: Lane ->getOperand(Num: `1`).getNode()))
2514	return SDValue ();
2515	if (Lane ->getOperand(Num: `0`).getValueType().getVectorNumElements() >
2516	VecT.getVectorNumElements())
2517	return SDValue ();
2518	return Lane ->getOperand(Num: `0`);
2519	};
2520
2521	using ValueEntry = std::pair<SDValue, size_t>;
2522	SmallVector<ValueEntry, `16`> SplatValueCounts;
2523
2524	using SwizzleEntry = std::pair<std::pair<SDValue, SDValue>, size_t>;
2525	SmallVector<SwizzleEntry, `16`> SwizzleCounts;
2526
2527	using ShuffleEntry = std::pair<SDValue, size_t>;
2528	SmallVector<ShuffleEntry, `16`> ShuffleCounts;
2529
2530	auto AddCount = [](auto &Counts, const auto &Val) {
2531	auto CountIt =
2532	llvm::find_if(Counts, [&Val](auto E) { return E.first == Val; });
2533	if (CountIt == Counts.end()) {
2534	Counts.emplace_back(Val, `1`);
2535	} else {
2536	CountIt->second++;
2537	}
2538	};
2539
2540	auto GetMostCommon = [](auto &Counts) {
2541	auto CommonIt = llvm::max_element(Counts, llvm::less_second ());
2542	assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector");
2543	return *CommonIt;
2544	};
2545
2546	size_t NumConstantLanes = `0`;
2547
2548	// Count eligible lanes for each type of vector creation op
2549	for (size_t I = `0`; I < Lanes; ++I) {
2550	const SDValue &Lane = Op ->getOperand(Num: I);
2551	if (Lane.isUndef())
2552	continue;
2553
2554	AddCount (SplatValueCounts, Lane);
2555
2556	if (IsConstant (Lane))
2557	NumConstantLanes++;
2558	if (auto ShuffleSrc = GetShuffleSrc (Lane))
2559	AddCount (ShuffleCounts, ShuffleSrc);
2560	if (CanSwizzle) {
2561	auto SwizzleSrcs = GetSwizzleSrcs (I, Lane);
2562	if (SwizzleSrcs.first)
2563	AddCount (SwizzleCounts, SwizzleSrcs);
2564	}
2565	}
2566
2567	SDValue SplatValue;
2568	size_t NumSplatLanes;
2569	std::tie(args&: SplatValue, args&: NumSplatLanes) = GetMostCommon (SplatValueCounts);
2570
2571	SDValue SwizzleSrc;
2572	SDValue SwizzleIndices;
2573	size_t NumSwizzleLanes = `0`;
2574	if (SwizzleCounts.size())
2575	std::forward_as_tuple(args: std::tie(args&: SwizzleSrc, args&: SwizzleIndices),
2576	args&: NumSwizzleLanes) = GetMostCommon (SwizzleCounts);
2577
2578	// Shuffles can draw from up to two vectors, so find the two most common
2579	// sources.
2580	SDValue ShuffleSrc1, ShuffleSrc2;
2581	size_t NumShuffleLanes = `0`;
2582	if (ShuffleCounts.size()) {
2583	std::tie(args&: ShuffleSrc1, args&: NumShuffleLanes) = GetMostCommon (ShuffleCounts);
2584	llvm::erase_if(C&: ShuffleCounts,
2585	P: [&](const auto &Pair) { return Pair.first == ShuffleSrc1; });
2586	}
2587	if (ShuffleCounts.size()) {
2588	size_t AdditionalShuffleLanes;
2589	std::tie(args&: ShuffleSrc2, args&: AdditionalShuffleLanes) =
2590	GetMostCommon (ShuffleCounts);
2591	NumShuffleLanes += AdditionalShuffleLanes;
2592	}
2593
2594	// Predicate returning true if the lane is properly initialized by the
2595	// original instruction
2596	std::function<bool(size_t, const SDValue &)> IsLaneConstructed;
2597	SDValue Result;
2598	// Prefer swizzles over shuffles over vector consts over splats
2599	if (NumSwizzleLanes >= NumShuffleLanes &&
2600	NumSwizzleLanes >= NumConstantLanes && NumSwizzleLanes >= NumSplatLanes) {
2601	Result = DAG.getNode(Opcode: WebAssemblyISD::SWIZZLE, DL, VT: VecT, N1: SwizzleSrc,
2602	N2: SwizzleIndices);
2603	auto Swizzled = std::make_pair(x&: SwizzleSrc, y&: SwizzleIndices);
2604	IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) {
2605	return Swizzled == GetSwizzleSrcs (I, Lane);
2606	};
2607	} else if (NumShuffleLanes >= NumConstantLanes &&
2608	NumShuffleLanes >= NumSplatLanes) {
2609	size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits() / `8`;
2610	size_t DestLaneCount = VecT.getVectorNumElements();
2611	size_t Scale1 = `1`;
2612	size_t Scale2 = `1`;
2613	SDValue Src1 = ShuffleSrc1;
2614	SDValue Src2 = ShuffleSrc2 ? ShuffleSrc2 : DAG.getUNDEF(VT: VecT);
2615	if (Src1.getValueType() != VecT) {
2616	size_t LaneSize =
2617	Src1.getValueType().getVectorElementType().getFixedSizeInBits() / `8`;
2618	assert(LaneSize > DestLaneSize);
2619	Scale1 = LaneSize / DestLaneSize;
2620	Src1 = DAG.getBitcast(VT: VecT, V: Src1);
2621	}
2622	if (Src2.getValueType() != VecT) {
2623	size_t LaneSize =
2624	Src2.getValueType().getVectorElementType().getFixedSizeInBits() / `8`;
2625	assert(LaneSize > DestLaneSize);
2626	Scale2 = LaneSize / DestLaneSize;
2627	Src2 = DAG.getBitcast(VT: VecT, V: Src2);
2628	}
2629
2630	int Mask[`16`];
2631	assert(DestLaneCount <= `16`);
2632	for (size_t I = `0`; I < DestLaneCount; ++I) {
2633	const SDValue &Lane = Op ->getOperand(Num: I);
2634	SDValue Src = GetShuffleSrc (Lane);
2635	if (Src == ShuffleSrc1) {
2636	Mask[I] = Lane ->getConstantOperandVal(Num: `1`) * Scale1;
2637	} else if (Src && Src == ShuffleSrc2) {
2638	Mask[I] = DestLaneCount + Lane ->getConstantOperandVal(Num: `1`) * Scale2;
2639	} else {
2640	Mask[I] = -`1`;
2641	}
2642	}
2643	ArrayRef<int> MaskRef(Mask, DestLaneCount);
2644	Result = DAG.getVectorShuffle(VT: VecT, dl: DL, N1: Src1, N2: Src2, Mask: MaskRef);
2645	IsLaneConstructed = [&](size_t, const SDValue &Lane) {
2646	auto Src = GetShuffleSrc (Lane);
2647	return Src == ShuffleSrc1 \|\| (Src && Src == ShuffleSrc2);
2648	};
2649	} else if (NumConstantLanes >= NumSplatLanes) {
2650	SmallVector<SDValue, `16`> ConstLanes;
2651	for (const SDValue &Lane : Op ->op_values()) {
2652	if (IsConstant (Lane)) {
2653	// Values may need to be fixed so that they will sign extend to be
2654	// within the expected range during ISel. Check whether the value is in
2655	// bounds based on the lane bit width and if it is out of bounds, lop
2656	// off the extra bits and subtract 2^n to reflect giving the high bit
2657	// value -2^(n-1) rather than +2^(n-1). Skip the i64 case because it
2658	// cannot possibly be out of range.
2659	auto *Const = dyn_cast<ConstantSDNode>(Val: Lane.getNode());
2660	int64_t Val = Const ? Const->getSExtValue() : `0`;
2661	uint64_t LaneBits = `128` / Lanes;
2662	assert((LaneBits == `64` \|\| Val >= -(`1ll` << (LaneBits - `1`))) &&
2663	"Unexpected out of bounds negative value");
2664	if (Const && LaneBits != `64` && Val > (`1ll` << (LaneBits - `1`)) - `1`) {
2665	uint64_t Mask = (`1ll` << LaneBits) - `1`;
2666	auto NewVal = (((uint64_t)Val & Mask) - (`1ll` << LaneBits)) & Mask;
2667	ConstLanes.push_back(Elt: DAG.getConstant(Val: NewVal, DL: SDLoc (Lane), VT: LaneT));
2668	} else {
2669	ConstLanes.push_back(Elt: Lane);
2670	}
2671	} else if (LaneT.isFloatingPoint()) {
2672	ConstLanes.push_back(Elt: DAG.getConstantFP(Val: `0`, DL, VT: LaneT));
2673	} else {
2674	ConstLanes.push_back(Elt: DAG.getConstant(Val: `0`, DL, VT: LaneT));
2675	}
2676	}
2677	Result = DAG.getBuildVector(VT: VecT, DL, Ops: ConstLanes);
2678	IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) {
2679	return IsConstant (Lane);
2680	};
2681	} else {
2682	size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits();
2683	if (NumSplatLanes == `1` && Op ->getOperand(Num: `0`) == SplatValue &&
2684	(DestLaneSize == `32` \|\| DestLaneSize == `64`)) {
2685	// Could be selected to load_zero.
2686	Result = DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL, VT: VecT, Operand: SplatValue);
2687	} else {
2688	// Use a splat (which might be selected as a load splat)
2689	Result = DAG.getSplatBuildVector(VT: VecT, DL, Op: SplatValue);
2690	}
2691	IsLaneConstructed = [&SplatValue](size_t _, const SDValue &Lane) {
2692	return Lane == SplatValue;
2693	};
2694	}
2695
2696	assert(Result);
2697	assert(IsLaneConstructed);
2698
2699	// Add replace_lane instructions for any unhandled values
2700	for (size_t I = `0`; I < Lanes; ++I) {
2701	const SDValue &Lane = Op ->getOperand(Num: I);
2702	if (!Lane.isUndef() && !IsLaneConstructed (I, Lane))
2703	Result = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: VecT, N1: Result, N2: Lane,
2704	N3: DAG.getConstant(Val: I, DL, VT: MVT::i32));
2705	}
2706
2707	return Result;
2708	}
2709
2710	SDValue
2711	WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
2712	SelectionDAG &DAG) const {
2713	SDLoc DL(Op);
2714	ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Val: Op.getNode())->getMask();
2715	MVT VecType = Op.getOperand(i: `0`).getSimpleValueType();
2716	assert(VecType.is128BitVector() && "Unexpected shuffle vector type");
2717	size_t LaneBytes = VecType.getVectorElementType().getSizeInBits() / `8`;
2718
2719	// Space for two vector args and sixteen mask indices
2720	SDValue Ops[`18`];
2721	size_t OpIdx = `0`;
2722	Ops[OpIdx++] = Op.getOperand(i: `0`);
2723	Ops[OpIdx++] = Op.getOperand(i: `1`);
2724
2725	// Expand mask indices to byte indices and materialize them as operands
2726	for (int M : Mask) {
2727	for (size_t J = `0`; J < LaneBytes; ++J) {
2728	// Lower undefs (represented by -1 in mask) to {0..J}, which use a
2729	// whole lane of vector input, to allow further reduction at VM. E.g.
2730	// match an 8x16 byte shuffle to an equivalent cheaper 32x4 shuffle.
2731	uint64_t ByteIndex = M == -`1` ? J : (uint64_t)M * LaneBytes + J;
2732	Ops[OpIdx++] = DAG.getConstant(Val: ByteIndex, DL, VT: MVT::i32);
2733	}
2734	}
2735
2736	return DAG.getNode(Opcode: WebAssemblyISD::SHUFFLE, DL, VT: Op.getValueType(), Ops);
2737	}
2738
2739	SDValue WebAssemblyTargetLowering::LowerSETCC(SDValue Op,
2740	SelectionDAG &DAG) const {
2741	SDLoc DL(Op);
2742	// The legalizer does not know how to expand the unsupported comparison modes
2743	// of i64x2 vectors, so we manually unroll them here.
2744	assert(Op->getOperand(`0`)->getSimpleValueType(`0`) == MVT::v2i64);
2745	SmallVector<SDValue, `2`> LHS, RHS;
2746	DAG.ExtractVectorElements(Op: Op ->getOperand(Num: `0`), Args&: LHS);
2747	DAG.ExtractVectorElements(Op: Op ->getOperand(Num: `1`), Args&: RHS);
2748	const SDValue &CC = Op ->getOperand(Num: `2`);
2749	auto MakeLane = [&](unsigned I) {
2750	return DAG.getNode(Opcode: ISD::SELECT_CC, DL, VT: MVT::i64, N1: LHS [I], N2: RHS [I],
2751	N3: DAG.getConstant(Val: uint64_t(-`1`), DL, VT: MVT::i64),
2752	N4: DAG.getConstant(Val: uint64_t(`0`), DL, VT: MVT::i64), N5: CC);
2753	};
2754	return DAG.getBuildVector(VT: Op ->getValueType(ResNo: `0`), DL,
2755	Ops: {MakeLane (`0`), MakeLane (`1`)});
2756	}
2757
2758	SDValue
2759	WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op,
2760	SelectionDAG &DAG) const {
2761	// Allow constant lane indices, expand variable lane indices
2762	SDNode *IdxNode = Op.getOperand(i: Op.getNumOperands() - `1`).getNode();
2763	if (isa<ConstantSDNode>(Val: IdxNode)) {
2764	// Ensure the index type is i32 to match the tablegen patterns
2765	uint64_t Idx = IdxNode->getAsZExtVal();
2766	SmallVector<SDValue, `3`> Ops(Op.getNode()->ops());
2767	Ops [Op.getNumOperands() - `1`] =
2768	DAG.getConstant(Val: Idx, DL: SDLoc (IdxNode), VT: MVT::i32);
2769	return DAG.getNode(Opcode: Op.getOpcode(), DL: SDLoc (Op), VT: Op.getValueType(), Ops);
2770	}
2771	// Perform default expansion
2772	return SDValue ();
2773	}
2774
2775	static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG) {
2776	EVT LaneT = Op.getSimpleValueType().getVectorElementType();
2777	// 32-bit and 64-bit unrolled shifts will have proper semantics
2778	if (LaneT.bitsGE(VT: MVT::i32))
2779	return DAG.UnrollVectorOp(N: Op.getNode());
2780	// Otherwise mask the shift value to get proper semantics from 32-bit shift
2781	SDLoc DL(Op);
2782	size_t NumLanes = Op.getSimpleValueType().getVectorNumElements();
2783	SDValue Mask = DAG.getConstant(Val: LaneT.getSizeInBits() - `1`, DL, VT: MVT::i32);
2784	unsigned ShiftOpcode = Op.getOpcode();
2785	SmallVector<SDValue, `16`> ShiftedElements;
2786	DAG.ExtractVectorElements(Op: Op.getOperand(i: `0`), Args&: ShiftedElements, Start: `0`, Count: `0`, EltVT: MVT::i32);
2787	SmallVector<SDValue, `16`> ShiftElements;
2788	DAG.ExtractVectorElements(Op: Op.getOperand(i: `1`), Args&: ShiftElements, Start: `0`, Count: `0`, EltVT: MVT::i32);
2789	SmallVector<SDValue, `16`> UnrolledOps;
2790	for (size_t i = `0`; i < NumLanes; ++i) {
2791	SDValue MaskedShiftValue =
2792	DAG.getNode(Opcode: ISD::AND, DL, VT: MVT::i32, N1: ShiftElements [i], N2: Mask);
2793	SDValue ShiftedValue = ShiftedElements [i];
2794	if (ShiftOpcode == ISD::SRA)
2795	ShiftedValue = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: MVT::i32,
2796	N1: ShiftedValue, N2: DAG.getValueType(LaneT));
2797	UnrolledOps.push_back(
2798	Elt: DAG.getNode(Opcode: ShiftOpcode, DL, VT: MVT::i32, N1: ShiftedValue, N2: MaskedShiftValue));
2799	}
2800	return DAG.getBuildVector(VT: Op.getValueType(), DL, Ops: UnrolledOps);
2801	}
2802
2803	SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
2804	SelectionDAG &DAG) const {
2805	SDLoc DL(Op);
2806
2807	// Only manually lower vector shifts
2808	assert(Op.getSimpleValueType().isVector());
2809
2810	uint64_t LaneBits = Op.getValueType().getScalarSizeInBits();
2811	auto ShiftVal = Op.getOperand(i: `1`);
2812
2813	// Try to skip bitmask operation since it is implied inside shift instruction
2814	auto SkipImpliedMask = [](SDValue MaskOp, uint64_t MaskBits) {
2815	if (MaskOp.getOpcode() != ISD::AND)
2816	return MaskOp;
2817	SDValue LHS = MaskOp.getOperand(i: `0`);
2818	SDValue RHS = MaskOp.getOperand(i: `1`);
2819	if (MaskOp.getValueType().isVector()) {
2820	APInt MaskVal;
2821	if (!ISD::isConstantSplatVector(N: RHS.getNode(), SplatValue&: MaskVal))
2822	std::swap(a&: LHS, b&: RHS);
2823
2824	if (ISD::isConstantSplatVector(N: RHS.getNode(), SplatValue&: MaskVal) &&
2825	MaskVal == MaskBits)
2826	MaskOp = LHS;
2827	} else {
2828	if (!isa<ConstantSDNode>(Val: RHS.getNode()))
2829	std::swap(a&: LHS, b&: RHS);
2830
2831	auto ConstantRHS = dyn_cast<ConstantSDNode>(Val: RHS.getNode());
2832	if (ConstantRHS && ConstantRHS->getAPIntValue() == MaskBits)
2833	MaskOp = LHS;
2834	}
2835
2836	return MaskOp;
2837	};
2838
2839	// Skip vector and operation
2840	ShiftVal = SkipImpliedMask (ShiftVal, LaneBits - `1`);
2841	ShiftVal = DAG.getSplatValue(V: ShiftVal);
2842	if (!ShiftVal)
2843	return unrollVectorShift(Op, DAG);
2844
2845	// Skip scalar and operation
2846	ShiftVal = SkipImpliedMask (ShiftVal, LaneBits - `1`);
2847	// Use anyext because none of the high bits can affect the shift
2848	ShiftVal = DAG.getAnyExtOrTrunc(Op: ShiftVal, DL, VT: MVT::i32);
2849
2850	unsigned Opcode;
2851	switch (Op.getOpcode()) {
2852	case ISD::SHL:
2853	Opcode = WebAssemblyISD::VEC_SHL;
2854	break;
2855	case ISD::SRA:
2856	Opcode = WebAssemblyISD::VEC_SHR_S;
2857	break;
2858	case ISD::SRL:
2859	Opcode = WebAssemblyISD::VEC_SHR_U;
2860	break;
2861	default:
2862	llvm_unreachable("unexpected opcode");
2863	}
2864
2865	return DAG.getNode(Opcode, DL, VT: Op.getValueType(), N1: Op.getOperand(i: `0`), N2: ShiftVal);
2866	}
2867
2868	SDValue WebAssemblyTargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
2869	SelectionDAG &DAG) const {
2870	EVT ResT = Op.getValueType();
2871	EVT SatVT = cast<VTSDNode>(Val: Op.getOperand(i: `1`))->getVT();
2872
2873	if ((ResT == MVT::i32 \|\| ResT == MVT::i64) &&
2874	(SatVT == MVT::i32 \|\| SatVT == MVT::i64))
2875	return Op;
2876
2877	if (ResT == MVT::v4i32 && SatVT == MVT::i32)
2878	return Op;
2879
2880	if (ResT == MVT::v8i16 && SatVT == MVT::i16)
2881	return Op;
2882
2883	return SDValue ();
2884	}
2885
2886	//===----------------------------------------------------------------------===//
2887	// Custom DAG combine hooks
2888	//===----------------------------------------------------------------------===//
2889	static SDValue
2890	performVECTOR_SHUFFLECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
2891	auto &DAG = DCI.DAG;
2892	auto Shuffle = cast<ShuffleVectorSDNode>(Val: N);
2893
2894	// Hoist vector bitcasts that don't change the number of lanes out of unary
2895	// shuffles, where they are less likely to get in the way of other combines.
2896	// (shuffle (vNxT1 (bitcast (vNxT0 x))), undef, mask) ->
2897	// (vNxT1 (bitcast (vNxT0 (shuffle x, undef, mask))))
2898	SDValue Bitcast = N->getOperand(Num: `0`);
2899	if (Bitcast.getOpcode() != ISD::BITCAST)
2900	return SDValue ();
2901	if (!N->getOperand(Num: `1`).isUndef())
2902	return SDValue ();
2903	SDValue CastOp = Bitcast.getOperand(i: `0`);
2904	EVT SrcType = CastOp.getValueType();
2905	EVT DstType = Bitcast.getValueType();
2906	if (!SrcType.is128BitVector() \|\|
2907	SrcType.getVectorNumElements() != DstType.getVectorNumElements())
2908	return SDValue ();
2909	SDValue NewShuffle = DAG.getVectorShuffle(
2910	VT: SrcType, dl: SDLoc (N), N1: CastOp, N2: DAG.getUNDEF(VT: SrcType), Mask: Shuffle->getMask());
2911	return DAG.getBitcast(VT: DstType, V: NewShuffle);
2912	}
2913
2914	/// Convert ({u,s}itofp vec) --> ({u,s}itofp ({s,z}ext vec)) so it doesn't get
2915	/// split up into scalar instructions during legalization, and the vector
2916	/// extending instructions are selected in performVectorExtendCombine below.
2917	static SDValue
2918	performVectorExtendToFPCombine(SDNode *N,
2919	TargetLowering::DAGCombinerInfo &DCI) {
2920	auto &DAG = DCI.DAG;
2921	assert(N->getOpcode() == ISD::UINT_TO_FP \|\|
2922	N->getOpcode() == ISD::SINT_TO_FP);
2923
2924	EVT InVT = N->getOperand(Num: `0`)->getValueType(ResNo: `0`);
2925	EVT ResVT = N->getValueType(ResNo: `0`);
2926	MVT ExtVT;
2927	if (ResVT == MVT::v4f32 && (InVT == MVT::v4i16 \|\| InVT == MVT::v4i8))
2928	ExtVT = MVT::v4i32;
2929	else if (ResVT == MVT::v2f64 && (InVT == MVT::v2i16 \|\| InVT == MVT::v2i8))
2930	ExtVT = MVT::v2i32;
2931	else
2932	return SDValue ();
2933
2934	unsigned Op =
2935	N->getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
2936	SDValue Conv = DAG.getNode(Opcode: Op, DL: SDLoc (N), VT: ExtVT, Operand: N->getOperand(Num: `0`));
2937	return DAG.getNode(Opcode: N->getOpcode(), DL: SDLoc (N), VT: ResVT, Operand: Conv);
2938	}
2939
2940	static SDValue
2941	performVectorExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
2942	auto &DAG = DCI.DAG;
2943	assert(N->getOpcode() == ISD::SIGN_EXTEND \|\|
2944	N->getOpcode() == ISD::ZERO_EXTEND);
2945
2946	// Combine ({s,z}ext (extract_subvector src, i)) into a widening operation if
2947	// possible before the extract_subvector can be expanded.
2948	auto Extract = N->getOperand(Num: `0`);
2949	if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
2950	return SDValue ();
2951	auto Source = Extract.getOperand(i: `0`);
2952	auto *IndexNode = dyn_cast<ConstantSDNode>(Val: Extract.getOperand(i: `1`));
2953	if (IndexNode == nullptr)
2954	return SDValue ();
2955	auto Index = IndexNode->getZExtValue();
2956
2957	// Only v8i8, v4i16, and v2i32 extracts can be widened, and only if the
2958	// extracted subvector is the low or high half of its source.
2959	EVT ResVT = N->getValueType(ResNo: `0`);
2960	if (ResVT == MVT::v8i16) {
2961	if (Extract.getValueType() != MVT::v8i8 \|\|
2962	Source.getValueType() != MVT::v16i8 \|\| (Index != `0` && Index != `8`))
2963	return SDValue ();
2964	} else if (ResVT == MVT::v4i32) {
2965	if (Extract.getValueType() != MVT::v4i16 \|\|
2966	Source.getValueType() != MVT::v8i16 \|\| (Index != `0` && Index != `4`))
2967	return SDValue ();
2968	} else if (ResVT == MVT::v2i64) {
2969	if (Extract.getValueType() != MVT::v2i32 \|\|
2970	Source.getValueType() != MVT::v4i32 \|\| (Index != `0` && Index != `2`))
2971	return SDValue ();
2972	} else {
2973	return SDValue ();
2974	}
2975
2976	bool IsSext = N->getOpcode() == ISD::SIGN_EXTEND;
2977	bool IsLow = Index == `0`;
2978
2979	unsigned Op = IsSext ? (IsLow ? WebAssemblyISD::EXTEND_LOW_S
2980	: WebAssemblyISD::EXTEND_HIGH_S)
2981	: (IsLow ? WebAssemblyISD::EXTEND_LOW_U
2982	: WebAssemblyISD::EXTEND_HIGH_U);
2983
2984	return DAG.getNode(Opcode: Op, DL: SDLoc (N), VT: ResVT, Operand: Source);
2985	}
2986
2987	static SDValue
2988	performVectorTruncZeroCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
2989	auto &DAG = DCI.DAG;
2990
2991	auto GetWasmConversionOp = [](unsigned Op) {
2992	switch (Op) {
2993	case ISD::FP_TO_SINT_SAT:
2994	return WebAssemblyISD::TRUNC_SAT_ZERO_S;
2995	case ISD::FP_TO_UINT_SAT:
2996	return WebAssemblyISD::TRUNC_SAT_ZERO_U;
2997	case ISD::FP_ROUND:
2998	return WebAssemblyISD::DEMOTE_ZERO;
2999	}
3000	llvm_unreachable("unexpected op");
3001	};
3002
3003	auto IsZeroSplat = [](SDValue SplatVal) {
3004	auto *Splat = dyn_cast<BuildVectorSDNode>(Val: SplatVal.getNode());
3005	APInt SplatValue, SplatUndef;
3006	unsigned SplatBitSize;
3007	bool HasAnyUndefs;
3008	// Endianness doesn't matter in this context because we are looking for
3009	// an all-zero value.
3010	return Splat &&
3011	Splat->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
3012	HasAnyUndefs) &&
3013	SplatValue == `0`;
3014	};
3015
3016	if (N->getOpcode() == ISD::CONCAT_VECTORS) {
3017	// Combine this:
3018	//
3019	// (concat_vectors (v2i32 (fp_to_{s,u}int_sat $x, 32)), (v2i32 (splat 0)))
3020	//
3021	// into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
3022	//
3023	// Or this:
3024	//
3025	// (concat_vectors (v2f32 (fp_round (v2f64 $x))), (v2f32 (splat 0)))
3026	//
3027	// into (f32x4.demote_zero_f64x2 $x).
3028	EVT ResVT;
3029	EVT ExpectedConversionType;
3030	auto Conversion = N->getOperand(Num: `0`);
3031	auto ConversionOp = Conversion.getOpcode();
3032	switch (ConversionOp) {
3033	case ISD::FP_TO_SINT_SAT:
3034	case ISD::FP_TO_UINT_SAT:
3035	ResVT = MVT::v4i32;
3036	ExpectedConversionType = MVT::v2i32;
3037	break;
3038	case ISD::FP_ROUND:
3039	ResVT = MVT::v4f32;
3040	ExpectedConversionType = MVT::v2f32;
3041	break;
3042	default:
3043	return SDValue ();
3044	}
3045
3046	if (N->getValueType(ResNo: `0`) != ResVT)
3047	return SDValue ();
3048
3049	if (Conversion.getValueType() != ExpectedConversionType)
3050	return SDValue ();
3051
3052	auto Source = Conversion.getOperand(i: `0`);
3053	if (Source.getValueType() != MVT::v2f64)
3054	return SDValue ();
3055
3056	if (!IsZeroSplat (N->getOperand(Num: `1`)) \|\|
3057	N->getOperand(Num: `1`).getValueType() != ExpectedConversionType)
3058	return SDValue ();
3059
3060	unsigned Op = GetWasmConversionOp (ConversionOp);
3061	return DAG.getNode(Opcode: Op, DL: SDLoc (N), VT: ResVT, Operand: Source);
3062	}
3063
3064	// Combine this:
3065	//
3066	// (fp_to_{s,u}int_sat (concat_vectors $x, (v2f64 (splat 0))), 32)
3067	//
3068	// into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
3069	//
3070	// Or this:
3071	//
3072	// (v4f32 (fp_round (concat_vectors $x, (v2f64 (splat 0)))))
3073	//
3074	// into (f32x4.demote_zero_f64x2 $x).
3075	EVT ResVT;
3076	auto ConversionOp = N->getOpcode();
3077	switch (ConversionOp) {
3078	case ISD::FP_TO_SINT_SAT:
3079	case ISD::FP_TO_UINT_SAT:
3080	ResVT = MVT::v4i32;
3081	break;
3082	case ISD::FP_ROUND:
3083	ResVT = MVT::v4f32;
3084	break;
3085	default:
3086	llvm_unreachable("unexpected op");
3087	}
3088
3089	if (N->getValueType(ResNo: `0`) != ResVT)
3090	return SDValue ();
3091
3092	auto Concat = N->getOperand(Num: `0`);
3093	if (Concat.getValueType() != MVT::v4f64)
3094	return SDValue ();
3095
3096	auto Source = Concat.getOperand(i: `0`);
3097	if (Source.getValueType() != MVT::v2f64)
3098	return SDValue ();
3099
3100	if (!IsZeroSplat (Concat.getOperand(i: `1`)) \|\|
3101	Concat.getOperand(i: `1`).getValueType() != MVT::v2f64)
3102	return SDValue ();
3103
3104	unsigned Op = GetWasmConversionOp (ConversionOp);
3105	return DAG.getNode(Opcode: Op, DL: SDLoc (N), VT: ResVT, Operand: Source);
3106	}
3107
3108	// Helper to extract VectorWidth bits from Vec, starting from IdxVal.
3109	static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
3110	const SDLoc &DL, unsigned VectorWidth) {
3111	EVT VT = Vec.getValueType();
3112	EVT ElVT = VT.getVectorElementType();
3113	unsigned Factor = VT.getSizeInBits() / VectorWidth;
3114	EVT ResultVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: ElVT,
3115	NumElements: VT.getVectorNumElements() / Factor);
3116
3117	// Extract the relevant VectorWidth bits. Generate an EXTRACT_SUBVECTOR
3118	unsigned ElemsPerChunk = VectorWidth / ElVT.getSizeInBits();
3119	assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
3120
3121	// This is the index of the first element of the VectorWidth-bit chunk
3122	// we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
3123	IdxVal &= ~(ElemsPerChunk - `1`);
3124
3125	// If the input is a buildvector just emit a smaller one.
3126	if (Vec.getOpcode() == ISD::BUILD_VECTOR)
3127	return DAG.getBuildVector(VT: ResultVT, DL,
3128	Ops: Vec ->ops().slice(N: IdxVal, M: ElemsPerChunk));
3129
3130	SDValue VecIdx = DAG.getIntPtrConstant(Val: IdxVal, DL);
3131	return DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: ResultVT, N1: Vec, N2: VecIdx);
3132	}
3133
3134	// Helper to recursively truncate vector elements in half with NARROW_U. DstVT
3135	// is the expected destination value type after recursion. In is the initial
3136	// input. Note that the input should have enough leading zero bits to prevent
3137	// NARROW_U from saturating results.
3138	static SDValue truncateVectorWithNARROW(EVT DstVT, SDValue In, const SDLoc &DL,
3139	SelectionDAG &DAG) {
3140	EVT SrcVT = In.getValueType();
3141
3142	// No truncation required, we might get here due to recursive calls.
3143	if (SrcVT == DstVT)
3144	return In;
3145
3146	unsigned SrcSizeInBits = SrcVT.getSizeInBits();
3147	unsigned NumElems = SrcVT.getVectorNumElements();
3148	if (!isPowerOf2_32(Value: NumElems))
3149	return SDValue ();
3150	assert(DstVT.getVectorNumElements() == NumElems && "Illegal truncation");
3151	assert(SrcSizeInBits > DstVT.getSizeInBits() && "Illegal truncation");
3152
3153	LLVMContext &Ctx = *DAG.getContext();
3154	EVT PackedSVT = EVT::getIntegerVT(Context&: Ctx, BitWidth: SrcVT.getScalarSizeInBits() / `2`);
3155
3156	// Narrow to the largest type possible:
3157	// vXi64/vXi32 -> i16x8.narrow_i32x4_u and vXi16 -> i8x16.narrow_i16x8_u.
3158	EVT InVT = MVT::i16, OutVT = MVT::i8;
3159	if (SrcVT.getScalarSizeInBits() > `16`) {
3160	InVT = MVT::i32;
3161	OutVT = MVT::i16;
3162	}
3163	unsigned SubSizeInBits = SrcSizeInBits / `2`;
3164	InVT = EVT::getVectorVT(Context&: Ctx, VT: InVT, NumElements: SubSizeInBits / InVT.getSizeInBits());
3165	OutVT = EVT::getVectorVT(Context&: Ctx, VT: OutVT, NumElements: SubSizeInBits / OutVT.getSizeInBits());
3166
3167	// Split lower/upper subvectors.
3168	SDValue Lo = extractSubVector(Vec: In, IdxVal: `0`, DAG, DL, VectorWidth: SubSizeInBits);
3169	SDValue Hi = extractSubVector(Vec: In, IdxVal: NumElems / `2`, DAG, DL, VectorWidth: SubSizeInBits);
3170
3171	// 256bit -> 128bit truncate - Narrow lower/upper 128-bit subvectors.
3172	if (SrcVT.is256BitVector() && DstVT.is128BitVector()) {
3173	Lo = DAG.getBitcast(VT: InVT, V: Lo);
3174	Hi = DAG.getBitcast(VT: InVT, V: Hi);
3175	SDValue Res = DAG.getNode(Opcode: WebAssemblyISD::NARROW_U, DL, VT: OutVT, N1: Lo, N2: Hi);
3176	return DAG.getBitcast(VT: DstVT, V: Res);
3177	}
3178
3179	// Recursively narrow lower/upper subvectors, concat result and narrow again.
3180	EVT PackedVT = EVT::getVectorVT(Context&: Ctx, VT: PackedSVT, NumElements: NumElems / `2`);
3181	Lo = truncateVectorWithNARROW(DstVT: PackedVT, In: Lo, DL, DAG);
3182	Hi = truncateVectorWithNARROW(DstVT: PackedVT, In: Hi, DL, DAG);
3183
3184	PackedVT = EVT::getVectorVT(Context&: Ctx, VT: PackedSVT, NumElements: NumElems);
3185	SDValue Res = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: PackedVT, N1: Lo, N2: Hi);
3186	return truncateVectorWithNARROW(DstVT, In: Res, DL, DAG);
3187	}
3188
3189	static SDValue performTruncateCombine(SDNode *N,
3190	TargetLowering::DAGCombinerInfo &DCI) {
3191	auto &DAG = DCI.DAG;
3192
3193	SDValue In = N->getOperand(Num: `0`);
3194	EVT InVT = In.getValueType();
3195	if (!InVT.isSimple())
3196	return SDValue ();
3197
3198	EVT OutVT = N->getValueType(ResNo: `0`);
3199	if (!OutVT.isVector())
3200	return SDValue ();
3201
3202	EVT OutSVT = OutVT.getVectorElementType();
3203	EVT InSVT = InVT.getVectorElementType();
3204	// Currently only cover truncate to v16i8 or v8i16.
3205	if (!((InSVT == MVT::i16 \|\| InSVT == MVT::i32 \|\| InSVT == MVT::i64) &&
3206	(OutSVT == MVT::i8 \|\| OutSVT == MVT::i16) && OutVT.is128BitVector()))
3207	return SDValue ();
3208
3209	SDLoc DL(N);
3210	APInt Mask = APInt::getLowBitsSet(numBits: InVT.getScalarSizeInBits(),
3211	loBitsSet: OutVT.getScalarSizeInBits());
3212	In = DAG.getNode(Opcode: ISD::AND, DL, VT: InVT, N1: In, N2: DAG.getConstant(Val: Mask, DL, VT: InVT));
3213	return truncateVectorWithNARROW(DstVT: OutVT, In, DL, DAG);
3214	}
3215
3216	static SDValue performBitcastCombine(SDNode *N,
3217	TargetLowering::DAGCombinerInfo &DCI) {
3218	using namespace llvm::SDPatternMatch;
3219	auto &DAG = DCI.DAG;
3220	SDLoc DL(N);
3221	SDValue Src = N->getOperand(Num: `0`);
3222	EVT VT = N->getValueType(ResNo: `0`);
3223	EVT SrcVT = Src.getValueType();
3224
3225	if (!(DCI.isBeforeLegalize() && VT.isScalarInteger() &&
3226	SrcVT.isFixedLengthVector() && SrcVT.getScalarType() == MVT::i1))
3227	return SDValue ();
3228
3229	unsigned NumElts = SrcVT.getVectorNumElements();
3230	EVT Width = MVT::getIntegerVT(BitWidth: `128` / NumElts);
3231
3232	// bitcast <N x i1> to iN, where N = 2, 4, 8, 16 (legal)
3233	// ==> bitmask
3234	if (NumElts == `2` \|\| NumElts == `4` \|\| NumElts == `8` \|\| NumElts == `16`) {
3235	return DAG.getZExtOrTrunc(
3236	Op: DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL, VT: MVT::i32,
3237	Ops: {DAG.getConstant(Val: Intrinsic::wasm_bitmask, DL, VT: MVT::i32),
3238	DAG.getSExtOrTrunc(Op: N->getOperand(Num: `0`), DL,
3239	VT: SrcVT.changeVectorElementType(EltVT: Width))}),
3240	DL, VT);
3241	}
3242
3243	// bitcast <N x i1>(setcc ...) to concat iN, where N = 32 and 64 (illegal)
3244	if (NumElts == `32` \|\| NumElts == `64`) {
3245	// Strategy: We will setcc them seperately in v16i8 -> v16i1
3246	// Bitcast them to i16, extend them to either i32 or i64.
3247	// Add them together, shifting left 1 by 1.
3248	SDValue Concat, SetCCVector;
3249	ISD::CondCode SetCond;
3250
3251	if (!sd_match(N, P: m_BitCast(Op: m_c_SetCC(LHS: m_Value(N&: Concat), RHS: m_Value(N&: SetCCVector),
3252	CC: m_CondCode(CC&: SetCond)))))
3253	return SDValue ();
3254	if (Concat.getOpcode() != ISD::CONCAT_VECTORS)
3255	return SDValue ();
3256
3257	uint64_t ElementWidth =
3258	SetCCVector.getValueType().getVectorElementType().getFixedSizeInBits();
3259
3260	SmallVector<SDValue> VectorsToShuffle;
3261	for (size_t I = `0`; I < Concat ->ops().size(); I++) {
3262	VectorsToShuffle.push_back(Elt: DAG.getBitcast(
3263	VT: MVT::i16,
3264	V: DAG.getSetCC(DL, VT: MVT::v16i1, LHS: Concat ->ops()[I],
3265	RHS: extractSubVector(Vec: SetCCVector, IdxVal: I * (`128` / ElementWidth),
3266	DAG, DL, VectorWidth: `128`),
3267	Cond: SetCond)));
3268	}
3269
3270	MVT ReturnType = VectorsToShuffle.size() == `2` ? MVT::i32 : MVT::i64;
3271	SDValue ReturningInteger = DAG.getConstant(Val: `0`, DL, VT: ReturnType);
3272
3273	for (SDValue V : VectorsToShuffle) {
3274	ReturningInteger = DAG.getNode(
3275	Opcode: ISD::SHL, DL, VT: ReturnType,
3276	Ops: {DAG.getShiftAmountConstant(Val: `16`, VT: ReturnType, DL), ReturningInteger});
3277
3278	SDValue ExtendedV = DAG.getZExtOrTrunc(Op: V, DL, VT: ReturnType);
3279	ReturningInteger =
3280	DAG.getNode(Opcode: ISD::ADD, DL, VT: ReturnType, Ops: {ReturningInteger, ExtendedV});
3281	}
3282
3283	return ReturningInteger;
3284	}
3285
3286	return SDValue ();
3287	}
3288
3289	static SDValue performAnyAllCombine(SDNode *N, SelectionDAG &DAG) {
3290	// any_true (setcc <X>, 0, eq) => (not (all_true X))
3291	// all_true (setcc <X>, 0, eq) => (not (any_true X))
3292	// any_true (setcc <X>, 0, ne) => (any_true X)
3293	// all_true (setcc <X>, 0, ne) => (all_true X)
3294	assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN);
3295	using namespace llvm::SDPatternMatch;
3296
3297	SDValue LHS;
3298	if (!sd_match(N: N->getOperand(Num: `1`),
3299	P: m_c_SetCC(LHS: m_Value(N&: LHS), RHS: m_Zero(), CC: m_CondCode())))
3300	return SDValue ();
3301	EVT LT = LHS.getValueType();
3302	if (LT.getScalarSizeInBits() > `128` / LT.getVectorNumElements())
3303	return SDValue ();
3304
3305	auto CombineSetCC = [&N, &DAG](Intrinsic::WASMIntrinsics InPre,
3306	ISD::CondCode SetType,
3307	Intrinsic::WASMIntrinsics InPost) {
3308	if (N->getConstantOperandVal(Num: `0`) != InPre)
3309	return SDValue ();
3310
3311	SDValue LHS;
3312	if (!sd_match(N: N->getOperand(Num: `1`), P: m_c_SetCC(LHS: m_Value(N&: LHS), RHS: m_Zero(),
3313	CC: m_SpecificCondCode(CC: SetType))))
3314	return SDValue ();
3315
3316	SDLoc DL(N);
3317	SDValue Ret = DAG.getZExtOrTrunc(
3318	Op: DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL, VT: MVT::i32,
3319	Ops: {DAG.getConstant(Val: InPost, DL, VT: MVT::i32), LHS}),
3320	DL, VT: MVT::i1);
3321	if (SetType == ISD::SETEQ)
3322	Ret = DAG.getNOT(DL, Val: Ret, VT: MVT::i1);
3323	return DAG.getZExtOrTrunc(Op: Ret, DL, VT: N->getValueType(ResNo: `0`));
3324	};
3325
3326	if (SDValue AnyTrueEQ = CombineSetCC (Intrinsic::wasm_anytrue, ISD::SETEQ,
3327	Intrinsic::wasm_alltrue))
3328	return AnyTrueEQ;
3329	if (SDValue AllTrueEQ = CombineSetCC (Intrinsic::wasm_alltrue, ISD::SETEQ,
3330	Intrinsic::wasm_anytrue))
3331	return AllTrueEQ;
3332	if (SDValue AnyTrueNE = CombineSetCC (Intrinsic::wasm_anytrue, ISD::SETNE,
3333	Intrinsic::wasm_anytrue))
3334	return AnyTrueNE;
3335	if (SDValue AllTrueNE = CombineSetCC (Intrinsic::wasm_alltrue, ISD::SETNE,
3336	Intrinsic::wasm_alltrue))
3337	return AllTrueNE;
3338
3339	return SDValue ();
3340	}
3341
3342	template <int MatchRHS, ISD::CondCode MatchCond, bool RequiresNegate,
3343	Intrinsic::ID Intrin>
3344	static SDValue TryMatchTrue(SDNode *N, EVT VecVT, SelectionDAG &DAG) {
3345	SDValue LHS = N->getOperand(Num: `0`);
3346	SDValue RHS = N->getOperand(Num: `1`);
3347	SDValue Cond = N->getOperand(Num: `2`);
3348	if (MatchCond != cast<CondCodeSDNode>(Val&: Cond)->get())
3349	return SDValue ();
3350
3351	if (MatchRHS != cast<ConstantSDNode>(Val&: RHS)->getSExtValue())
3352	return SDValue ();
3353
3354	SDLoc DL(N);
3355	SDValue Ret = DAG.getZExtOrTrunc(
3356	Op: DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL, VT: MVT::i32,
3357	Ops: {DAG.getConstant(Val: Intrin, DL, VT: MVT::i32),
3358	DAG.getSExtOrTrunc(Op: LHS ->getOperand(Num: `0`), DL, VT: VecVT)}),
3359	DL, VT: MVT::i1);
3360	if (RequiresNegate)
3361	Ret = DAG.getNOT(DL, Val: Ret, VT: MVT::i1);
3362	return DAG.getZExtOrTrunc(Op: Ret, DL, VT: N->getValueType(ResNo: `0`));
3363	}
3364
3365	static SDValue performSETCCCombine(SDNode *N,
3366	TargetLowering::DAGCombinerInfo &DCI) {
3367	if (!DCI.isBeforeLegalize())
3368	return SDValue ();
3369
3370	EVT VT = N->getValueType(ResNo: `0`);
3371	if (!VT.isScalarInteger())
3372	return SDValue ();
3373
3374	SDValue LHS = N->getOperand(Num: `0`);
3375	if (LHS ->getOpcode() != ISD::BITCAST)
3376	return SDValue ();
3377
3378	EVT FromVT = LHS ->getOperand(Num: `0`).getValueType();
3379	if (!FromVT.isFixedLengthVector() \|\| FromVT.getVectorElementType() != MVT::i1)
3380	return SDValue ();
3381
3382	unsigned NumElts = FromVT.getVectorNumElements();
3383	if (NumElts != `2` && NumElts != `4` && NumElts != `8` && NumElts != `16`)
3384	return SDValue ();
3385
3386	if (!cast<ConstantSDNode>(Val: N->getOperand(Num: `1`)))
3387	return SDValue ();
3388
3389	EVT VecVT = FromVT.changeVectorElementType(EltVT: MVT::getIntegerVT(BitWidth: `128` / NumElts));
3390	auto &DAG = DCI.DAG;
3391	// setcc (iN (bitcast (vNi1 X))), 0, ne
3392	// ==> any_true (vNi1 X)
3393	if (auto Match = TryMatchTrue<`0`, ISD::SETNE, false, Intrinsic::wasm_anytrue>(
3394	N, VecVT, DAG)) {
3395	return Match;
3396	}
3397	// setcc (iN (bitcast (vNi1 X))), 0, eq
3398	// ==> xor (any_true (vNi1 X)), -1
3399	if (auto Match = TryMatchTrue<`0`, ISD::SETEQ, true, Intrinsic::wasm_anytrue>(
3400	N, VecVT, DAG)) {
3401	return Match;
3402	}
3403	// setcc (iN (bitcast (vNi1 X))), -1, eq
3404	// ==> all_true (vNi1 X)
3405	if (auto Match = TryMatchTrue<-`1`, ISD::SETEQ, false, Intrinsic::wasm_alltrue>(
3406	N, VecVT, DAG)) {
3407	return Match;
3408	}
3409	// setcc (iN (bitcast (vNi1 X))), -1, ne
3410	// ==> xor (all_true (vNi1 X)), -1
3411	if (auto Match = TryMatchTrue<-`1`, ISD::SETNE, true, Intrinsic::wasm_alltrue>(
3412	N, VecVT, DAG)) {
3413	return Match;
3414	}
3415	return SDValue ();
3416	}
3417
3418	static SDValue performMulCombine(SDNode *N, SelectionDAG &DAG) {
3419	assert(N->getOpcode() == ISD::MUL);
3420	EVT VT = N->getValueType(ResNo: `0`);
3421	if (VT != MVT::v8i32 && VT != MVT::v16i32)
3422	return SDValue ();
3423
3424	// Mul with extending inputs.
3425	SDValue LHS = N->getOperand(Num: `0`);
3426	SDValue RHS = N->getOperand(Num: `1`);
3427	if (LHS.getOpcode() != RHS.getOpcode())
3428	return SDValue ();
3429
3430	if (LHS.getOpcode() != ISD::SIGN_EXTEND &&
3431	LHS.getOpcode() != ISD::ZERO_EXTEND)
3432	return SDValue ();
3433
3434	if (LHS ->getOperand(Num: `0`).getValueType() != RHS ->getOperand(Num: `0`).getValueType())
3435	return SDValue ();
3436
3437	EVT FromVT = LHS ->getOperand(Num: `0`).getValueType();
3438	EVT EltTy = FromVT.getVectorElementType();
3439	if (EltTy != MVT::i8)
3440	return SDValue ();
3441
3442	// For an input DAG that looks like this
3443	// %a = input_type
3444	// %b = input_type
3445	// %lhs = extend %a to output_type
3446	// %rhs = extend %b to output_type
3447	// %mul = mul %lhs, %rhs
3448
3449	// input_type \| output_type \| instructions
3450	// v16i8 \| v16i32 \| %low = i16x8.extmul_low_i8x16_ %a, %b
3451	// \| \| %high = i16x8.extmul_high_i8x16_, %a, %b
3452	// \| \| %low_low = i32x4.ext_low_i16x8_ %low
3453	// \| \| %low_high = i32x4.ext_high_i16x8_ %low
3454	// \| \| %high_low = i32x4.ext_low_i16x8_ %high
3455	// \| \| %high_high = i32x4.ext_high_i16x8_ %high
3456	// \| \| %res = concat_vector(...)
3457	// v8i8 \| v8i32 \| %low = i16x8.extmul_low_i8x16_ %a, %b
3458	// \| \| %low_low = i32x4.ext_low_i16x8_ %low
3459	// \| \| %low_high = i32x4.ext_high_i16x8_ %low
3460	// \| \| %res = concat_vector(%low_low, %low_high)
3461
3462	SDLoc DL(N);
3463	unsigned NumElts = VT.getVectorNumElements();
3464	SDValue ExtendInLHS = LHS ->getOperand(Num: `0`);
3465	SDValue ExtendInRHS = RHS ->getOperand(Num: `0`);
3466	bool IsSigned = LHS ->getOpcode() == ISD::SIGN_EXTEND;
3467	unsigned ExtendLowOpc =
3468	IsSigned ? WebAssemblyISD::EXTEND_LOW_S : WebAssemblyISD::EXTEND_LOW_U;
3469	unsigned ExtendHighOpc =
3470	IsSigned ? WebAssemblyISD::EXTEND_HIGH_S : WebAssemblyISD::EXTEND_HIGH_U;
3471
3472	auto GetExtendLow = [&DAG, &DL, &ExtendLowOpc](EVT VT, SDValue Op) {
3473	return DAG.getNode(Opcode: ExtendLowOpc, DL, VT, Operand: Op);
3474	};
3475	auto GetExtendHigh = [&DAG, &DL, &ExtendHighOpc](EVT VT, SDValue Op) {
3476	return DAG.getNode(Opcode: ExtendHighOpc, DL, VT, Operand: Op);
3477	};
3478
3479	if (NumElts == `16`) {
3480	SDValue LowLHS = GetExtendLow (MVT::v8i16, ExtendInLHS);
3481	SDValue LowRHS = GetExtendLow (MVT::v8i16, ExtendInRHS);
3482	SDValue MulLow = DAG.getNode(Opcode: ISD::MUL, DL, VT: MVT::v8i16, N1: LowLHS, N2: LowRHS);
3483	SDValue HighLHS = GetExtendHigh (MVT::v8i16, ExtendInLHS);
3484	SDValue HighRHS = GetExtendHigh (MVT::v8i16, ExtendInRHS);
3485	SDValue MulHigh = DAG.getNode(Opcode: ISD::MUL, DL, VT: MVT::v8i16, N1: HighLHS, N2: HighRHS);
3486	SDValue SubVectors[] = {
3487	GetExtendLow (MVT::v4i32, MulLow),
3488	GetExtendHigh (MVT::v4i32, MulLow),
3489	GetExtendLow (MVT::v4i32, MulHigh),
3490	GetExtendHigh (MVT::v4i32, MulHigh),
3491	};
3492	return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT, Ops: SubVectors);
3493	} else {
3494	assert(NumElts == `8`);
3495	SDValue LowLHS = DAG.getNode(Opcode: LHS ->getOpcode(), DL, VT: MVT::v8i16, Operand: ExtendInLHS);
3496	SDValue LowRHS = DAG.getNode(Opcode: RHS ->getOpcode(), DL, VT: MVT::v8i16, Operand: ExtendInRHS);
3497	SDValue MulLow = DAG.getNode(Opcode: ISD::MUL, DL, VT: MVT::v8i16, N1: LowLHS, N2: LowRHS);
3498	SDValue Lo = GetExtendLow (MVT::v4i32, MulLow);
3499	SDValue Hi = GetExtendHigh (MVT::v4i32, MulLow);
3500	return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT, N1: Lo, N2: Hi);
3501	}
3502	return SDValue ();
3503	}
3504
3505	SDValue
3506	WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
3507	DAGCombinerInfo &DCI) const {
3508	switch (N->getOpcode()) {
3509	default:
3510	return SDValue ();
3511	case ISD::BITCAST:
3512	return performBitcastCombine(N, DCI);
3513	case ISD::SETCC:
3514	return performSETCCCombine(N, DCI);
3515	case ISD::VECTOR_SHUFFLE:
3516	return performVECTOR_SHUFFLECombine(N, DCI);
3517	case ISD::SIGN_EXTEND:
3518	case ISD::ZERO_EXTEND:
3519	return performVectorExtendCombine(N, DCI);
3520	case ISD::UINT_TO_FP:
3521	case ISD::SINT_TO_FP:
3522	return performVectorExtendToFPCombine(N, DCI);
3523	case ISD::FP_TO_SINT_SAT:
3524	case ISD::FP_TO_UINT_SAT:
3525	case ISD::FP_ROUND:
3526	case ISD::CONCAT_VECTORS:
3527	return performVectorTruncZeroCombine(N, DCI);
3528	case ISD::TRUNCATE:
3529	return performTruncateCombine(N, DCI);
3530	case ISD::INTRINSIC_WO_CHAIN: {
3531	if (auto AnyAllCombine = performAnyAllCombine(N, DAG&: DCI.DAG))
3532	return AnyAllCombine;
3533	return performLowerPartialReduction(N, DAG&: DCI.DAG);
3534	}
3535	case ISD::MUL:
3536	return performMulCombine(N, DAG&: DCI.DAG);
3537	}
3538	}
3539

Browse the source code of llvm_projects/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp