WebAssemblyISelLowering.cpp source code [llvm_projects/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp]

1	//=- WebAssemblyISelLowering.cpp - WebAssembly DAG Lowering Implementation -==//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	///
9	/// \file
10	/// This file implements the WebAssemblyTargetLowering class.
11	///
12	//===----------------------------------------------------------------------===//
13
14	#include "WebAssemblyISelLowering.h"
15	#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
16	#include "Utils/WebAssemblyTypeUtilities.h"
17	#include "WebAssemblyMachineFunctionInfo.h"
18	#include "WebAssemblySubtarget.h"
19	#include "WebAssemblyTargetMachine.h"
20	#include "WebAssemblyUtilities.h"
21	#include "llvm/CodeGen/CallingConvLower.h"
22	#include "llvm/CodeGen/MachineFrameInfo.h"
23	#include "llvm/CodeGen/MachineInstrBuilder.h"
24	#include "llvm/CodeGen/MachineJumpTableInfo.h"
25	#include "llvm/CodeGen/MachineModuleInfo.h"
26	#include "llvm/CodeGen/MachineRegisterInfo.h"
27	#include "llvm/CodeGen/SDPatternMatch.h"
28	#include "llvm/CodeGen/SelectionDAG.h"
29	#include "llvm/CodeGen/SelectionDAGNodes.h"
30	#include "llvm/IR/DiagnosticInfo.h"
31	#include "llvm/IR/DiagnosticPrinter.h"
32	#include "llvm/IR/Function.h"
33	#include "llvm/IR/IntrinsicInst.h"
34	#include "llvm/IR/Intrinsics.h"
35	#include "llvm/IR/IntrinsicsWebAssembly.h"
36	#include "llvm/Support/ErrorHandling.h"
37	#include "llvm/Support/KnownBits.h"
38	#include "llvm/Support/MathExtras.h"
39	#include "llvm/Target/TargetOptions.h"
40	using namespace llvm;
41
42	#define DEBUG_TYPE "wasm-lower"
43
44	WebAssemblyTargetLowering::WebAssemblyTargetLowering(
45	const TargetMachine &TM, const WebAssemblySubtarget &STI)
46	: TargetLowering (TM, STI), Subtarget(&STI) {
47	auto MVTPtr = Subtarget->hasAddr64() ? MVT::i64 : MVT::i32;
48
49	// Set the load count for memcmp expand optimization
50	MaxLoadsPerMemcmp = `8`;
51	MaxLoadsPerMemcmpOptSize = `4`;
52
53	// Booleans always contain 0 or 1.
54	setBooleanContents(ZeroOrOneBooleanContent);
55	// Except in SIMD vectors
56	setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
57	// We don't know the microarchitecture here, so just reduce register pressure.
58	setSchedulingPreference(Sched::RegPressure);
59	// Tell ISel that we have a stack pointer.
60	setStackPointerRegisterToSaveRestore(
61	Subtarget->hasAddr64() ? WebAssembly::SP64 : WebAssembly::SP32);
62	// Set up the register classes.
63	addRegisterClass(VT: MVT::i32, RC: &WebAssembly::I32RegClass);
64	addRegisterClass(VT: MVT::i64, RC: &WebAssembly::I64RegClass);
65	addRegisterClass(VT: MVT::f32, RC: &WebAssembly::F32RegClass);
66	addRegisterClass(VT: MVT::f64, RC: &WebAssembly::F64RegClass);
67	if (Subtarget->hasSIMD128()) {
68	addRegisterClass(VT: MVT::v16i8, RC: &WebAssembly::V128RegClass);
69	addRegisterClass(VT: MVT::v8i16, RC: &WebAssembly::V128RegClass);
70	addRegisterClass(VT: MVT::v4i32, RC: &WebAssembly::V128RegClass);
71	addRegisterClass(VT: MVT::v4f32, RC: &WebAssembly::V128RegClass);
72	addRegisterClass(VT: MVT::v2i64, RC: &WebAssembly::V128RegClass);
73	addRegisterClass(VT: MVT::v2f64, RC: &WebAssembly::V128RegClass);
74	}
75	if (Subtarget->hasFP16()) {
76	addRegisterClass(VT: MVT::v8f16, RC: &WebAssembly::V128RegClass);
77	}
78	if (Subtarget->hasReferenceTypes()) {
79	addRegisterClass(VT: MVT::externref, RC: &WebAssembly::EXTERNREFRegClass);
80	addRegisterClass(VT: MVT::funcref, RC: &WebAssembly::FUNCREFRegClass);
81	if (Subtarget->hasExceptionHandling()) {
82	addRegisterClass(VT: MVT::exnref, RC: &WebAssembly::EXNREFRegClass);
83	}
84	}
85	// Compute derived properties from the register classes.
86	computeRegisterProperties(TRI: Subtarget->getRegisterInfo());
87
88	// Transform loads and stores to pointers in address space 1 to loads and
89	// stores to WebAssembly global variables, outside linear memory.
90	for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64}) {
91	setOperationAction(Op: ISD::LOAD, VT: T, Action: Custom);
92	setOperationAction(Op: ISD::STORE, VT: T, Action: Custom);
93	}
94	if (Subtarget->hasSIMD128()) {
95	for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
96	MVT::v2f64}) {
97	setOperationAction(Op: ISD::LOAD, VT: T, Action: Custom);
98	setOperationAction(Op: ISD::STORE, VT: T, Action: Custom);
99	}
100	}
101	if (Subtarget->hasFP16()) {
102	setOperationAction(Op: ISD::LOAD, VT: MVT::v8f16, Action: Custom);
103	setOperationAction(Op: ISD::STORE, VT: MVT::v8f16, Action: Custom);
104	}
105	if (Subtarget->hasReferenceTypes()) {
106	// We need custom load and store lowering for both externref, funcref and
107	// Other. The MVT::Other here represents tables of reference types.
108	for (auto T : {MVT::externref, MVT::funcref, MVT::Other}) {
109	setOperationAction(Op: ISD::LOAD, VT: T, Action: Custom);
110	setOperationAction(Op: ISD::STORE, VT: T, Action: Custom);
111	}
112	}
113
114	setOperationAction(Op: ISD::GlobalAddress, VT: MVTPtr, Action: Custom);
115	setOperationAction(Op: ISD::GlobalTLSAddress, VT: MVTPtr, Action: Custom);
116	setOperationAction(Op: ISD::ExternalSymbol, VT: MVTPtr, Action: Custom);
117	setOperationAction(Op: ISD::JumpTable, VT: MVTPtr, Action: Custom);
118	setOperationAction(Op: ISD::BlockAddress, VT: MVTPtr, Action: Custom);
119	setOperationAction(Op: ISD::BRIND, VT: MVT::Other, Action: Custom);
120	setOperationAction(Op: ISD::CLEAR_CACHE, VT: MVT::Other, Action: Custom);
121
122	// Take the default expansion for va_arg, va_copy, and va_end. There is no
123	// default action for va_start, so we do that custom.
124	setOperationAction(Op: ISD::VASTART, VT: MVT::Other, Action: Custom);
125	setOperationAction(Op: ISD::VAARG, VT: MVT::Other, Action: Expand);
126	setOperationAction(Op: ISD::VACOPY, VT: MVT::Other, Action: Expand);
127	setOperationAction(Op: ISD::VAEND, VT: MVT::Other, Action: Expand);
128
129	for (auto T : {MVT::f32, MVT::f64, MVT::v4f32, MVT::v2f64, MVT::v8f16}) {
130	if (!Subtarget->hasFP16() && T == MVT::v8f16) {
131	continue;
132	}
133	// Don't expand the floating-point types to constant pools.
134	setOperationAction(Op: ISD::ConstantFP, VT: T, Action: Legal);
135	// Expand floating-point comparisons.
136	for (auto CC : {ISD::SETO, ISD::SETUO, ISD::SETUEQ, ISD::SETONE,
137	ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE})
138	setCondCodeAction(CCs: CC, VT: T, Action: Expand);
139	// Expand floating-point library function operators.
140	for (auto Op : {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FMA})
141	setOperationAction(Op, VT: T, Action: Expand);
142	// Expand vector FREM, but use a libcall rather than an expansion for scalar
143	if (MVT (T).isVector())
144	setOperationAction(Op: ISD::FREM, VT: T, Action: Expand);
145	else
146	setOperationAction(Op: ISD::FREM, VT: T, Action: LibCall);
147	// Note supported floating-point library function operators that otherwise
148	// default to expand.
149	for (auto Op : {ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FNEARBYINT,
150	ISD::FRINT, ISD::FROUNDEVEN})
151	setOperationAction(Op, VT: T, Action: Legal);
152	// Support minimum and maximum, which otherwise default to expand.
153	setOperationAction(Op: ISD::FMINIMUM, VT: T, Action: Legal);
154	setOperationAction(Op: ISD::FMAXIMUM, VT: T, Action: Legal);
155	if (Subtarget->hasSIMD128() && MVT (T).isVector()) {
156	setOperationAction(Op: ISD::PSEUDO_FMIN, VT: T, Action: Legal);
157	setOperationAction(Op: ISD::PSEUDO_FMAX, VT: T, Action: Legal);
158	}
159	// When experimental v8f16 support is enabled these instructions don't need
160	// to be expanded.
161	if (T != MVT::v8f16) {
162	setOperationAction(Op: ISD::FP16_TO_FP, VT: T, Action: Expand);
163	setOperationAction(Op: ISD::FP_TO_FP16, VT: T, Action: Expand);
164	}
165	if (Subtarget->hasFP16() && T == MVT::f32) {
166	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: T, MemVT: MVT::f16, Action: Legal);
167	setTruncStoreAction(ValVT: T, MemVT: MVT::f16, Action: Legal);
168	} else {
169	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: T, MemVT: MVT::f16, Action: Expand);
170	setTruncStoreAction(ValVT: T, MemVT: MVT::f16, Action: Expand);
171	}
172	}
173
174	// Expand unavailable integer operations.
175	for (auto Op :
176	{ISD::BSWAP, ISD::SMUL_LOHI, ISD::UMUL_LOHI, ISD::MULHS, ISD::MULHU,
177	ISD::SDIVREM, ISD::UDIVREM, ISD::SHL_PARTS, ISD::SRA_PARTS,
178	ISD::SRL_PARTS, ISD::ADDC, ISD::ADDE, ISD::SUBC, ISD::SUBE}) {
179	for (auto T : {MVT::i32, MVT::i64})
180	setOperationAction(Op, VT: T, Action: Expand);
181	if (Subtarget->hasSIMD128())
182	for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
183	setOperationAction(Op, VT: T, Action: Expand);
184	}
185
186	if (Subtarget->hasWideArithmetic()) {
187	setOperationAction(Op: ISD::ADD, VT: MVT::i128, Action: Custom);
188	setOperationAction(Op: ISD::SUB, VT: MVT::i128, Action: Custom);
189	setOperationAction(Op: ISD::SMUL_LOHI, VT: MVT::i64, Action: Custom);
190	setOperationAction(Op: ISD::UMUL_LOHI, VT: MVT::i64, Action: Custom);
191	setOperationAction(Op: ISD::UADDO, VT: MVT::i64, Action: Custom);
192	}
193
194	if (Subtarget->hasNontrappingFPToInt())
195	for (auto Op : {ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT})
196	for (auto T : {MVT::i32, MVT::i64})
197	setOperationAction(Op, VT: T, Action: Custom);
198
199	if (Subtarget->hasRelaxedSIMD()) {
200	setOperationAction(
201	Ops: {ISD::FMINNUM, ISD::FMINIMUMNUM, ISD::FMAXNUM, ISD::FMAXIMUMNUM},
202	VTs: {MVT::v4f32, MVT::v2f64}, Action: Custom);
203	}
204
205	// Combine expands these operations, because wasi-libc and emscripten do not
206	// yet have the dedicated libcalls.
207	setTargetDAGCombine(
208	{ISD::FMINIMUM, ISD::FMAXIMUM, ISD::FMINIMUMNUM, ISD::FMAXIMUMNUM});
209
210	// SIMD-specific configuration
211	if (Subtarget->hasSIMD128()) {
212
213	setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN);
214
215	// Combine wide-vector muls, with extend inputs, to extmul_half.
216	setTargetDAGCombine(ISD::MUL);
217	setTargetDAGCombine(ISD::SHL);
218
219	// Combine vector mask reductions into alltrue/anytrue
220	setTargetDAGCombine(ISD::SETCC);
221
222	// Convert vector to integer bitcasts to bitmask
223	setTargetDAGCombine(ISD::BITCAST);
224
225	// Hoist bitcasts out of shuffles
226	setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
227
228	// Combine extends of extract_subvectors into widening ops
229	setTargetDAGCombine({ISD::SIGN_EXTEND, ISD::ZERO_EXTEND});
230
231	// Combine int_to_fp or fp_extend of extract_vectors and vice versa into
232	// conversions ops
233	setTargetDAGCombine({ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_EXTEND,
234	ISD::EXTRACT_SUBVECTOR});
235
236	// Combine fp_to_{s,u}int_sat or fp_round of concat_vectors or vice versa
237	// into conversion ops
238	setTargetDAGCombine({ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT,
239	ISD::FP_TO_SINT, ISD::FP_TO_UINT, ISD::FP_ROUND,
240	ISD::CONCAT_VECTORS});
241
242	setTargetDAGCombine(ISD::TRUNCATE);
243
244	// Support saturating add/sub for i8x16 and i16x8
245	for (auto Op : {ISD::SADDSAT, ISD::UADDSAT, ISD::SSUBSAT, ISD::USUBSAT})
246	for (auto T : {MVT::v16i8, MVT::v8i16})
247	setOperationAction(Op, VT: T, Action: Legal);
248
249	// Support integer abs
250	for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
251	setOperationAction(Op: ISD::ABS, VT: T, Action: Legal);
252
253	// Custom lower BUILD_VECTORs to minimize number of replace_lanes
254	for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
255	MVT::v2f64})
256	setOperationAction(Op: ISD::BUILD_VECTOR, VT: T, Action: Custom);
257
258	if (Subtarget->hasFP16()) {
259	setOperationAction(Op: ISD::BUILD_VECTOR, VT: MVT::f16, Action: Custom);
260	setOperationAction(Op: ISD::FP_ROUND, VT: MVT::v4f16, Action: Custom);
261	}
262
263	// We have custom shuffle lowering to expose the shuffle mask
264	for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
265	MVT::v2f64})
266	setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT: T, Action: Custom);
267
268	if (Subtarget->hasFP16())
269	setOperationAction(Op: ISD::VECTOR_SHUFFLE, VT: MVT::v8f16, Action: Custom);
270
271	// Support splatting
272	for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
273	MVT::v2f64})
274	setOperationAction(Op: ISD::SPLAT_VECTOR, VT: T, Action: Legal);
275
276	setOperationAction(Ops: ISD::AVGCEILU, VTs: {MVT::v8i16, MVT::v16i8}, Action: Legal);
277
278	// Custom lowering since wasm shifts must have a scalar shift amount
279	for (auto Op : {ISD::SHL, ISD::SRA, ISD::SRL})
280	for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
281	setOperationAction(Op, VT: T, Action: Custom);
282
283	// Custom lower lane accesses to expand out variable indices
284	for (auto Op : {ISD::EXTRACT_VECTOR_ELT, ISD::INSERT_VECTOR_ELT})
285	for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
286	MVT::v2f64})
287	setOperationAction(Op, VT: T, Action: Custom);
288
289	// There is no i8x16.mul instruction
290	setOperationAction(Op: ISD::MUL, VT: MVT::v16i8, Action: Expand);
291
292	// Expand integer operations supported for scalars but not SIMD
293	for (auto Op :
294	{ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM, ISD::ROTL, ISD::ROTR})
295	for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64})
296	setOperationAction(Op, VT: T, Action: Expand);
297
298	// But we do have integer min and max operations
299	for (auto Op : {ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX})
300	for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
301	setOperationAction(Op, VT: T, Action: Legal);
302
303	// And we have popcnt for i8x16. It can be used to expand ctlz/cttz.
304	setOperationAction(Op: ISD::CTPOP, VT: MVT::v16i8, Action: Legal);
305	setOperationAction(Op: ISD::CTLZ, VT: MVT::v16i8, Action: Expand);
306	setOperationAction(Op: ISD::CTTZ, VT: MVT::v16i8, Action: Expand);
307
308	// Custom lower bit counting operations for other types to scalarize them.
309	for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP})
310	for (auto T : {MVT::v8i16, MVT::v4i32, MVT::v2i64})
311	setOperationAction(Op, VT: T, Action: Custom);
312
313	// Expand float operations supported for scalars but not SIMD
314	for (auto Op : {ISD::FCOPYSIGN, ISD::FLOG, ISD::FLOG2, ISD::FLOG10,
315	ISD::FEXP, ISD::FEXP2, ISD::FEXP10})
316	for (auto T : {MVT::v4f32, MVT::v2f64})
317	setOperationAction(Op, VT: T, Action: Expand);
318
319	// Unsigned comparison operations are unavailable for i64x2 vectors.
320	for (auto CC : {ISD::SETUGT, ISD::SETUGE, ISD::SETULT, ISD::SETULE})
321	setCondCodeAction(CCs: CC, VT: MVT::v2i64, Action: Custom);
322
323	// 64x2 conversions are not in the spec
324	for (auto Op :
325	{ISD::SINT_TO_FP, ISD::UINT_TO_FP, ISD::FP_TO_SINT, ISD::FP_TO_UINT})
326	for (auto T : {MVT::v2i64, MVT::v2f64})
327	setOperationAction(Op, VT: T, Action: Expand);
328
329	// But saturating fp_to_int converstions are
330	for (auto Op : {ISD::FP_TO_SINT_SAT, ISD::FP_TO_UINT_SAT}) {
331	setOperationAction(Op, VT: MVT::v4i32, Action: Custom);
332	if (Subtarget->hasFP16()) {
333	setOperationAction(Op, VT: MVT::v8i16, Action: Custom);
334	}
335	}
336
337	// Support vector extending
338	for (auto T : MVT::integer_fixedlen_vector_valuetypes()) {
339	setOperationAction(Op: ISD::ANY_EXTEND_VECTOR_INREG, VT: T, Action: Custom);
340	setOperationAction(Op: ISD::SIGN_EXTEND_VECTOR_INREG, VT: T, Action: Custom);
341	setOperationAction(Op: ISD::ZERO_EXTEND_VECTOR_INREG, VT: T, Action: Custom);
342	}
343
344	if (Subtarget->hasFP16()) {
345	setOperationAction(Op: ISD::FMA, VT: MVT::v8f16, Action: Legal);
346	}
347
348	if (Subtarget->hasRelaxedSIMD()) {
349	setOperationAction(Op: ISD::FMULADD, VT: MVT::v4f32, Action: Legal);
350	setOperationAction(Op: ISD::FMULADD, VT: MVT::v2f64, Action: Legal);
351	}
352
353	// Partial MLA reductions.
354	for (auto Op : {ISD::PARTIAL_REDUCE_SMLA, ISD::PARTIAL_REDUCE_UMLA}) {
355	setPartialReduceMLAAction(Opc: Op, AccVT: MVT::v4i32, InputVT: MVT::v16i8, Action: Legal);
356	setPartialReduceMLAAction(Opc: Op, AccVT: MVT::v4i32, InputVT: MVT::v8i16, Action: Legal);
357	}
358	}
359
360	// As a special case, these operators use the type to mean the type to
361	// sign-extend from.
362	setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i1, Action: Expand);
363	if (!Subtarget->hasSignExt()) {
364	// Sign extends are legal only when extending a vector extract
365	auto Action = Subtarget->hasSIMD128() ? Custom : Expand;
366	for (auto T : {MVT::i8, MVT::i16, MVT::i32})
367	setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: T, Action);
368	}
369	for (auto T : MVT::integer_fixedlen_vector_valuetypes())
370	setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: T, Action: Expand);
371
372	// Dynamic stack allocation: use the default expansion.
373	setOperationAction(Op: ISD::STACKSAVE, VT: MVT::Other, Action: Expand);
374	setOperationAction(Op: ISD::STACKRESTORE, VT: MVT::Other, Action: Expand);
375	setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: MVTPtr, Action: Expand);
376
377	setOperationAction(Op: ISD::FrameIndex, VT: MVT::i32, Action: Custom);
378	setOperationAction(Op: ISD::FrameIndex, VT: MVT::i64, Action: Custom);
379	setOperationAction(Op: ISD::CopyToReg, VT: MVT::Other, Action: Custom);
380
381	// Expand these forms; we pattern-match the forms that we can handle in isel.
382	for (auto T : {MVT::i32, MVT::i64, MVT::f32, MVT::f64})
383	for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
384	setOperationAction(Op, VT: T, Action: Expand);
385
386	if (Subtarget->hasReferenceTypes())
387	for (auto Op : {ISD::BR_CC, ISD::SELECT_CC})
388	for (auto T : {MVT::externref, MVT::funcref})
389	setOperationAction(Op, VT: T, Action: Expand);
390
391	// There is no vector conditional select instruction
392	for (auto T :
393	{MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64, MVT::v2f64})
394	setOperationAction(Op: ISD::SELECT_CC, VT: T, Action: Expand);
395
396	// We have custom switch handling.
397	setOperationAction(Op: ISD::BR_JT, VT: MVT::Other, Action: Custom);
398
399	// WebAssembly doesn't have:
400	// - Floating-point extending loads.
401	// - Floating-point truncating stores.
402	// - i1 extending loads.
403	// - truncating SIMD stores and most extending loads
404	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::f64, MemVT: MVT::f32, Action: Expand);
405	setTruncStoreAction(ValVT: MVT::f64, MemVT: MVT::f32, Action: Expand);
406	for (auto T : MVT::integer_valuetypes())
407	for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
408	setLoadExtAction(ExtType: Ext, ValVT: T, MemVT: MVT::i1, Action: Promote);
409	if (Subtarget->hasSIMD128()) {
410	for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v2i64, MVT::v4f32,
411	MVT::v2f64}) {
412	for (auto MemT : MVT::fixedlen_vector_valuetypes()) {
413	if (MVT (T) != MemT) {
414	setTruncStoreAction(ValVT: T, MemVT: MemT, Action: Expand);
415	for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD})
416	setLoadExtAction(ExtType: Ext, ValVT: T, MemVT: MemT, Action: Expand);
417	}
418	}
419	}
420	// But some vector extending loads are legal
421	for (auto Ext : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD}) {
422	setLoadExtAction(ExtType: Ext, ValVT: MVT::v8i16, MemVT: MVT::v8i8, Action: Legal);
423	setLoadExtAction(ExtType: Ext, ValVT: MVT::v4i32, MemVT: MVT::v4i16, Action: Legal);
424	setLoadExtAction(ExtType: Ext, ValVT: MVT::v2i64, MemVT: MVT::v2i32, Action: Legal);
425	}
426	setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: MVT::v2f64, MemVT: MVT::v2f32, Action: Legal);
427	}
428
429	// Don't do anything clever with build_pairs
430	setOperationAction(Op: ISD::BUILD_PAIR, VT: MVT::i64, Action: Expand);
431
432	// Trap lowers to wasm unreachable
433	setOperationAction(Op: ISD::TRAP, VT: MVT::Other, Action: Legal);
434	setOperationAction(Op: ISD::DEBUGTRAP, VT: MVT::Other, Action: Legal);
435
436	// Exception handling intrinsics
437	setOperationAction(Op: ISD::INTRINSIC_WO_CHAIN, VT: MVT::Other, Action: Custom);
438	setOperationAction(Op: ISD::INTRINSIC_W_CHAIN, VT: MVT::Other, Action: Custom);
439	setOperationAction(Op: ISD::INTRINSIC_VOID, VT: MVT::Other, Action: Custom);
440
441	setMaxAtomicSizeInBitsSupported(`64`);
442
443	// Always convert switches to br_tables unless there is only one case, which
444	// is equivalent to a simple branch. This reduces code size for wasm, and we
445	// defer possible jump table optimizations to the VM.
446	setMinimumJumpTableEntries(`2`);
447	}
448
449	TargetLowering::AtomicExpansionKind
450	WebAssemblyTargetLowering::shouldExpandAtomicRMWInIR(
451	const AtomicRMWInst AI) const* {
452	// We have wasm instructions for these
453	switch (AI->getOperation()) {
454	case AtomicRMWInst::Add:
455	case AtomicRMWInst::Sub:
456	case AtomicRMWInst::And:
457	case AtomicRMWInst::Or:
458	case AtomicRMWInst::Xor:
459	case AtomicRMWInst::Xchg:
460	return AtomicExpansionKind::None;
461	default:
462	break;
463	}
464	return AtomicExpansionKind::CmpXChg;
465	}
466
467	bool WebAssemblyTargetLowering::shouldScalarizeBinop(SDValue VecOp) const {
468	// Implementation copied from X86TargetLowering.
469	unsigned Opc = VecOp.getOpcode();
470
471	// Assume target opcodes can't be scalarized.
472	// TODO - do we have any exceptions?
473	if (Opc >= ISD::BUILTIN_OP_END \|\| !isBinOp(Opcode: Opc))
474	return false;
475
476	// If the vector op is not supported, try to convert to scalar.
477	EVT VecVT = VecOp.getValueType();
478	if (!isOperationLegalOrCustomOrPromote(Op: Opc, VT: VecVT))
479	return true;
480
481	// If the vector op is supported, but the scalar op is not, the transform may
482	// not be worthwhile.
483	EVT ScalarVT = VecVT.getScalarType();
484	return isOperationLegalOrCustomOrPromote(Op: Opc, VT: ScalarVT);
485	}
486
487	FastISel *WebAssemblyTargetLowering::createFastISel(
488	FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo,
489	const LibcallLoweringInfo LibcallLowering) const* {
490	return WebAssembly::createFastISel(funcInfo&: FuncInfo, libInfo: LibInfo, libcallLowering: LibcallLowering);
491	}
492
493	MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /DL/,
494	EVT VT) const {
495	unsigned BitWidth = NextPowerOf2(A: VT.getSizeInBits() - `1`);
496	if (BitWidth > `1` && BitWidth < `8`)
497	BitWidth = `8`;
498
499	if (BitWidth > `64`) {
500	// The shift will be lowered to a libcall, and compiler-rt libcalls expect
501	// the count to be an i32.
502	BitWidth = `32`;
503	assert(BitWidth >= Log2_32_Ceil(VT.getSizeInBits()) &&
504	"32-bit shift counts ought to be enough for anyone");
505	}
506
507	MVT Result = MVT::getIntegerVT(BitWidth);
508	assert(Result != MVT::INVALID_SIMPLE_VALUE_TYPE &&
509	"Unable to represent scalar shift amount type");
510	return Result;
511	}
512
513	// Lower an fp-to-int conversion operator from the LLVM opcode, which has an
514	// undefined result on invalid/overflow, to the WebAssembly opcode, which
515	// traps on invalid/overflow.
516	static MachineBasicBlock *LowerFPToInt(MachineInstr &MI, DebugLoc DL,
517	MachineBasicBlock *BB,
518	const TargetInstrInfo &TII,
519	bool IsUnsigned, bool Int64,
520	bool Float64, unsigned LoweredOpcode) {
521	MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
522
523	Register OutReg = MI.getOperand(i: `0`).getReg();
524	Register InReg = MI.getOperand(i: `1`).getReg();
525
526	unsigned Abs = Float64 ? WebAssembly::ABS_F64 : WebAssembly::ABS_F32;
527	unsigned FConst = Float64 ? WebAssembly::CONST_F64 : WebAssembly::CONST_F32;
528	unsigned LT = Float64 ? WebAssembly::LT_F64 : WebAssembly::LT_F32;
529	unsigned GE = Float64 ? WebAssembly::GE_F64 : WebAssembly::GE_F32;
530	unsigned IConst = Int64 ? WebAssembly::CONST_I64 : WebAssembly::CONST_I32;
531	unsigned Eqz = WebAssembly::EQZ_I32;
532	unsigned And = WebAssembly::AND_I32;
533	int64_t Limit = Int64 ? INT64_MIN : INT32_MIN;
534	int64_t Substitute = IsUnsigned ? `0` : Limit;
535	double CmpVal = IsUnsigned ? -(double)Limit * `2.0` : -(double)Limit;
536	auto &Context = BB->getParent()->getFunction().getContext();
537	Type *Ty = Float64 ? Type::getDoubleTy(C&: Context) : Type::getFloatTy(C&: Context);
538
539	const BasicBlock *LLVMBB = BB->getBasicBlock();
540	MachineFunction *F = BB->getParent();
541	MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(BB: LLVMBB);
542	MachineBasicBlock *FalseMBB = F->CreateMachineBasicBlock(BB: LLVMBB);
543	MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB: LLVMBB);
544
545	MachineFunction::iterator It = ++BB->getIterator();
546	F->insert(MBBI: It, MBB: FalseMBB);
547	F->insert(MBBI: It, MBB: TrueMBB);
548	F->insert(MBBI: It, MBB: DoneMBB);
549
550	// Transfer the remainder of BB and its successor edges to DoneMBB.
551	DoneMBB->splice(Where: DoneMBB->begin(), Other: BB, From: std::next(x: MI.getIterator()), To: BB->end());
552	DoneMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
553
554	BB->addSuccessor(Succ: TrueMBB);
555	BB->addSuccessor(Succ: FalseMBB);
556	TrueMBB->addSuccessor(Succ: DoneMBB);
557	FalseMBB->addSuccessor(Succ: DoneMBB);
558
559	unsigned Tmp0, Tmp1, CmpReg, EqzReg, FalseReg, TrueReg;
560	Tmp0 = MRI.createVirtualRegister(RegClass: MRI.getRegClass(Reg: InReg));
561	Tmp1 = MRI.createVirtualRegister(RegClass: MRI.getRegClass(Reg: InReg));
562	CmpReg = MRI.createVirtualRegister(RegClass: &WebAssembly::I32RegClass);
563	EqzReg = MRI.createVirtualRegister(RegClass: &WebAssembly::I32RegClass);
564	FalseReg = MRI.createVirtualRegister(RegClass: MRI.getRegClass(Reg: OutReg));
565	TrueReg = MRI.createVirtualRegister(RegClass: MRI.getRegClass(Reg: OutReg));
566
567	MI.eraseFromParent();
568	// For signed numbers, we can do a single comparison to determine whether
569	// fabs(x) is within range.
570	if (IsUnsigned) {
571	Tmp0 = InReg;
572	} else {
573	BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: Abs), DestReg: Tmp0).addReg(RegNo: InReg);
574	}
575	BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: FConst), DestReg: Tmp1)
576	.addFPImm(Val: cast<ConstantFP>(Val: ConstantFP::get(Ty, V: CmpVal)));
577	BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: LT), DestReg: CmpReg).addReg(RegNo: Tmp0).addReg(RegNo: Tmp1);
578
579	// For unsigned numbers, we have to do a separate comparison with zero.
580	if (IsUnsigned) {
581	Tmp1 = MRI.createVirtualRegister(RegClass: MRI.getRegClass(Reg: InReg));
582	Register SecondCmpReg =
583	MRI.createVirtualRegister(RegClass: &WebAssembly::I32RegClass);
584	Register AndReg = MRI.createVirtualRegister(RegClass: &WebAssembly::I32RegClass);
585	BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: FConst), DestReg: Tmp1)
586	.addFPImm(Val: cast<ConstantFP>(Val: ConstantFP::get(Ty, V: `0.0`)));
587	BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: GE), DestReg: SecondCmpReg).addReg(RegNo: Tmp0).addReg(RegNo: Tmp1);
588	BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: And), DestReg: AndReg).addReg(RegNo: CmpReg).addReg(RegNo: SecondCmpReg);
589	CmpReg = AndReg;
590	}
591
592	BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: Eqz), DestReg: EqzReg).addReg(RegNo: CmpReg);
593
594	// Create the CFG diamond to select between doing the conversion or using
595	// the substitute value.
596	BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: WebAssembly::BR_IF)).addMBB(MBB: TrueMBB).addReg(RegNo: EqzReg);
597	BuildMI(BB: FalseMBB, MIMD: DL, MCID: TII.get(Opcode: LoweredOpcode), DestReg: FalseReg).addReg(RegNo: InReg);
598	BuildMI(BB: FalseMBB, MIMD: DL, MCID: TII.get(Opcode: WebAssembly::BR)).addMBB(MBB: DoneMBB);
599	BuildMI(BB: TrueMBB, MIMD: DL, MCID: TII.get(Opcode: IConst), DestReg: TrueReg).addImm(Val: Substitute);
600	BuildMI(BB&: *DoneMBB, I: DoneMBB->begin(), MIMD: DL, MCID: TII.get(Opcode: TargetOpcode::PHI), DestReg: OutReg)
601	.addReg(RegNo: FalseReg)
602	.addMBB(MBB: FalseMBB)
603	.addReg(RegNo: TrueReg)
604	.addMBB(MBB: TrueMBB);
605
606	return DoneMBB;
607	}
608
609	// Lower a `MEMCPY` instruction into a CFG triangle around a `MEMORY_COPY`
610	// instuction to handle the zero-length case.
611	static MachineBasicBlock *LowerMemcpy(MachineInstr &MI, DebugLoc DL,
612	MachineBasicBlock *BB,
613	const TargetInstrInfo &TII, bool Int64) {
614	MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
615
616	MachineOperand DstMem = MI.getOperand(i: `0`);
617	MachineOperand SrcMem = MI.getOperand(i: `1`);
618	MachineOperand Dst = MI.getOperand(i: `2`);
619	MachineOperand Src = MI.getOperand(i: `3`);
620	MachineOperand Len = MI.getOperand(i: `4`);
621
622	// If the length is a constant, we don't actually need the check.
623	if (MachineInstr *Def = MRI.getVRegDef(Reg: Len.getReg())) {
624	if (Def->getOpcode() == WebAssembly::CONST_I32 \|\|
625	Def->getOpcode() == WebAssembly::CONST_I64) {
626	if (Def->getOperand(i: `1`).getImm() == `0`) {
627	// A zero-length memcpy is a no-op.
628	MI.eraseFromParent();
629	return BB;
630	}
631	// A non-zero-length memcpy doesn't need a zero check.
632	unsigned MemoryCopy =
633	Int64 ? WebAssembly::MEMORY_COPY_A64 : WebAssembly::MEMORY_COPY_A32;
634	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: MemoryCopy))
635	.add(MO: DstMem)
636	.add(MO: SrcMem)
637	.add(MO: Dst)
638	.add(MO: Src)
639	.add(MO: Len);
640	MI.eraseFromParent();
641	return BB;
642	}
643	}
644
645	// We're going to add an extra use to `Len` to test if it's zero; that
646	// use shouldn't be a kill, even if the original use is.
647	MachineOperand NoKillLen = Len;
648	NoKillLen.setIsKill(false);
649
650	// Decide on which `MachineInstr` opcode we're going to use.
651	unsigned Eqz = Int64 ? WebAssembly::EQZ_I64 : WebAssembly::EQZ_I32;
652	unsigned MemoryCopy =
653	Int64 ? WebAssembly::MEMORY_COPY_A64 : WebAssembly::MEMORY_COPY_A32;
654
655	// Create two new basic blocks; one for the new `memory.fill` that we can
656	// branch over, and one for the rest of the instructions after the original
657	// `memory.fill`.
658	const BasicBlock *LLVMBB = BB->getBasicBlock();
659	MachineFunction *F = BB->getParent();
660	MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(BB: LLVMBB);
661	MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB: LLVMBB);
662
663	MachineFunction::iterator It = ++BB->getIterator();
664	F->insert(MBBI: It, MBB: TrueMBB);
665	F->insert(MBBI: It, MBB: DoneMBB);
666
667	// Transfer the remainder of BB and its successor edges to DoneMBB.
668	DoneMBB->splice(Where: DoneMBB->begin(), Other: BB, From: std::next(x: MI.getIterator()), To: BB->end());
669	DoneMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
670
671	// Connect the CFG edges.
672	BB->addSuccessor(Succ: TrueMBB);
673	BB->addSuccessor(Succ: DoneMBB);
674	TrueMBB->addSuccessor(Succ: DoneMBB);
675
676	// Create a virtual register for the `Eqz` result.
677	unsigned EqzReg;
678	EqzReg = MRI.createVirtualRegister(RegClass: &WebAssembly::I32RegClass);
679
680	// Erase the original `memory.copy`.
681	MI.eraseFromParent();
682
683	// Test if `Len` is zero.
684	BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: Eqz), DestReg: EqzReg).add(MO: NoKillLen);
685
686	// Insert a new `memory.copy`.
687	BuildMI(BB: TrueMBB, MIMD: DL, MCID: TII.get(Opcode: MemoryCopy))
688	.add(MO: DstMem)
689	.add(MO: SrcMem)
690	.add(MO: Dst)
691	.add(MO: Src)
692	.add(MO: Len);
693
694	// Create the CFG triangle.
695	BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: WebAssembly::BR_IF)).addMBB(MBB: DoneMBB).addReg(RegNo: EqzReg);
696	BuildMI(BB: TrueMBB, MIMD: DL, MCID: TII.get(Opcode: WebAssembly::BR)).addMBB(MBB: DoneMBB);
697
698	return DoneMBB;
699	}
700
701	// Lower a `MEMSET` instruction into a CFG triangle around a `MEMORY_FILL`
702	// instuction to handle the zero-length case.
703	static MachineBasicBlock *LowerMemset(MachineInstr &MI, DebugLoc DL,
704	MachineBasicBlock *BB,
705	const TargetInstrInfo &TII, bool Int64) {
706	MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
707
708	MachineOperand Mem = MI.getOperand(i: `0`);
709	MachineOperand Dst = MI.getOperand(i: `1`);
710	MachineOperand Val = MI.getOperand(i: `2`);
711	MachineOperand Len = MI.getOperand(i: `3`);
712
713	// If the length is a constant, we don't actually need the check.
714	if (MachineInstr *Def = MRI.getVRegDef(Reg: Len.getReg())) {
715	if (Def->getOpcode() == WebAssembly::CONST_I32 \|\|
716	Def->getOpcode() == WebAssembly::CONST_I64) {
717	if (Def->getOperand(i: `1`).getImm() == `0`) {
718	// A zero-length memset is a no-op.
719	MI.eraseFromParent();
720	return BB;
721	}
722	// A non-zero-length memset doesn't need a zero check.
723	unsigned MemoryFill =
724	Int64 ? WebAssembly::MEMORY_FILL_A64 : WebAssembly::MEMORY_FILL_A32;
725	BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII.get(Opcode: MemoryFill))
726	.add(MO: Mem)
727	.add(MO: Dst)
728	.add(MO: Val)
729	.add(MO: Len);
730	MI.eraseFromParent();
731	return BB;
732	}
733	}
734
735	// We're going to add an extra use to `Len` to test if it's zero; that
736	// use shouldn't be a kill, even if the original use is.
737	MachineOperand NoKillLen = Len;
738	NoKillLen.setIsKill(false);
739
740	// Decide on which `MachineInstr` opcode we're going to use.
741	unsigned Eqz = Int64 ? WebAssembly::EQZ_I64 : WebAssembly::EQZ_I32;
742	unsigned MemoryFill =
743	Int64 ? WebAssembly::MEMORY_FILL_A64 : WebAssembly::MEMORY_FILL_A32;
744
745	// Create two new basic blocks; one for the new `memory.fill` that we can
746	// branch over, and one for the rest of the instructions after the original
747	// `memory.fill`.
748	const BasicBlock *LLVMBB = BB->getBasicBlock();
749	MachineFunction *F = BB->getParent();
750	MachineBasicBlock *TrueMBB = F->CreateMachineBasicBlock(BB: LLVMBB);
751	MachineBasicBlock *DoneMBB = F->CreateMachineBasicBlock(BB: LLVMBB);
752
753	MachineFunction::iterator It = ++BB->getIterator();
754	F->insert(MBBI: It, MBB: TrueMBB);
755	F->insert(MBBI: It, MBB: DoneMBB);
756
757	// Transfer the remainder of BB and its successor edges to DoneMBB.
758	DoneMBB->splice(Where: DoneMBB->begin(), Other: BB, From: std::next(x: MI.getIterator()), To: BB->end());
759	DoneMBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
760
761	// Connect the CFG edges.
762	BB->addSuccessor(Succ: TrueMBB);
763	BB->addSuccessor(Succ: DoneMBB);
764	TrueMBB->addSuccessor(Succ: DoneMBB);
765
766	// Create a virtual register for the `Eqz` result.
767	unsigned EqzReg;
768	EqzReg = MRI.createVirtualRegister(RegClass: &WebAssembly::I32RegClass);
769
770	// Erase the original `memory.fill`.
771	MI.eraseFromParent();
772
773	// Test if `Len` is zero.
774	BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: Eqz), DestReg: EqzReg).add(MO: NoKillLen);
775
776	// Insert a new `memory.copy`.
777	BuildMI(BB: TrueMBB, MIMD: DL, MCID: TII.get(Opcode: MemoryFill)).add(MO: Mem).add(MO: Dst).add(MO: Val).add(MO: Len);
778
779	// Create the CFG triangle.
780	BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: WebAssembly::BR_IF)).addMBB(MBB: DoneMBB).addReg(RegNo: EqzReg);
781	BuildMI(BB: TrueMBB, MIMD: DL, MCID: TII.get(Opcode: WebAssembly::BR)).addMBB(MBB: DoneMBB);
782
783	return DoneMBB;
784	}
785
786	static MachineBasicBlock *
787	LowerCallResults(MachineInstr &CallResults, DebugLoc DL, MachineBasicBlock *BB,
788	const WebAssemblySubtarget *Subtarget,
789	const TargetInstrInfo &TII) {
790	MachineInstr &CallParams = *CallResults.getPrevNode();
791	assert(CallParams.getOpcode() == WebAssembly::CALL_PARAMS);
792	assert(CallResults.getOpcode() == WebAssembly::CALL_RESULTS \|\|
793	CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS);
794
795	bool IsIndirect =
796	CallParams.getOperand(i: `0`).isReg() \|\| CallParams.getOperand(i: `0`).isFI();
797	bool IsRetCall = CallResults.getOpcode() == WebAssembly::RET_CALL_RESULTS;
798
799	bool IsFuncrefCall = false;
800	if (IsIndirect && CallParams.getOperand(i: `0`).isReg()) {
801	Register Reg = CallParams.getOperand(i: `0`).getReg();
802	const MachineFunction *MF = BB->getParent();
803	const MachineRegisterInfo &MRI = MF->getRegInfo();
804	const TargetRegisterClass *TRC = MRI.getRegClass(Reg);
805	IsFuncrefCall = (TRC == &WebAssembly::FUNCREFRegClass);
806	assert(!IsFuncrefCall \|\| Subtarget->hasReferenceTypes());
807	}
808
809	unsigned CallOp;
810	if (IsIndirect && IsRetCall) {
811	CallOp = WebAssembly::RET_CALL_INDIRECT;
812	} else if (IsIndirect) {
813	CallOp = WebAssembly::CALL_INDIRECT;
814	} else if (IsRetCall) {
815	CallOp = WebAssembly::RET_CALL;
816	} else {
817	CallOp = WebAssembly::CALL;
818	}
819
820	MachineFunction &MF = *BB->getParent();
821	const MCInstrDesc &MCID = TII.get(Opcode: CallOp);
822	MachineInstrBuilder MIB(MF, MF.CreateMachineInstr(MCID, DL));
823
824	// Move the function pointer to the end of the arguments for indirect calls
825	if (IsIndirect) {
826	auto FnPtr = CallParams.getOperand(i: `0`);
827	CallParams.removeOperand(OpNo: `0`);
828
829	// For funcrefs, call_indirect is done through __funcref_call_table and the
830	// funcref is always installed in slot 0 of the table, therefore instead of
831	// having the function pointer added at the end of the params list, a zero
832	// (the index in
833	// __funcref_call_table is added).
834	if (IsFuncrefCall) {
835	Register RegZero =
836	MF.getRegInfo().createVirtualRegister(RegClass: &WebAssembly::I32RegClass);
837	MachineInstrBuilder MIBC0 =
838	BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: WebAssembly::CONST_I32), DestReg: RegZero).addImm(Val: `0`);
839
840	BB->insert(I: CallResults.getIterator(), M: MIBC0);
841	MachineInstrBuilder (MF, CallParams).addReg(RegNo: RegZero);
842	} else
843	CallParams.addOperand(Op: FnPtr);
844	}
845
846	for (auto Def : CallResults.defs())
847	MIB.add(MO: Def);
848
849	if (IsIndirect) {
850	// Placeholder for the type index.
851	// This gets replaced with the correct value in WebAssemblyMCInstLower.cpp
852	MIB.addImm(Val: `0`);
853	// The table into which this call_indirect indexes.
854	MCSymbolWasm *Table = IsFuncrefCall
855	? WebAssembly::getOrCreateFuncrefCallTableSymbol(
856	Ctx&: MF.getContext(), Subtarget)
857	: WebAssembly::getOrCreateFunctionTableSymbol(
858	Ctx&: MF.getContext(), Subtarget);
859	if (Subtarget->hasCallIndirectOverlong()) {
860	MIB.addSym(Sym: Table);
861	} else {
862	// For the MVP there is at most one table whose number is 0, but we can't
863	// write a table symbol or issue relocations. Instead we just ensure the
864	// table is live and write a zero.
865	Table->setNoStrip();
866	MIB.addImm(Val: `0`);
867	}
868	}
869
870	for (auto Use : CallParams.uses())
871	MIB.add(MO: Use);
872
873	BB->insert(I: CallResults.getIterator(), M: MIB);
874	CallParams.eraseFromParent();
875	CallResults.eraseFromParent();
876
877	// If this is a funcref call, to avoid hidden GC roots, we need to clear the
878	// table slot with ref.null upon call_indirect return.
879	//
880	// This generates the following code, which comes right after a call_indirect
881	// of a funcref:
882	//
883	// i32.const 0
884	// ref.null func
885	// table.set __funcref_call_table
886	if (IsIndirect && IsFuncrefCall) {
887	MCSymbolWasm *Table = WebAssembly::getOrCreateFuncrefCallTableSymbol(
888	Ctx&: MF.getContext(), Subtarget);
889	Register RegZero =
890	MF.getRegInfo().createVirtualRegister(RegClass: &WebAssembly::I32RegClass);
891	MachineInstr *Const0 =
892	BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: WebAssembly::CONST_I32), DestReg: RegZero).addImm(Val: `0`);
893	BB->insertAfter(I: MIB.getInstr()->getIterator(), MI: Const0);
894
895	Register RegFuncref =
896	MF.getRegInfo().createVirtualRegister(RegClass: &WebAssembly::FUNCREFRegClass);
897	MachineInstr *RefNull =
898	BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: WebAssembly::REF_NULL_FUNCREF), DestReg: RegFuncref);
899	BB->insertAfter(I: Const0->getIterator(), MI: RefNull);
900
901	MachineInstr *TableSet =
902	BuildMI(MF, MIMD: DL, MCID: TII.get(Opcode: WebAssembly::TABLE_SET_FUNCREF))
903	.addSym(Sym: Table)
904	.addReg(RegNo: RegZero)
905	.addReg(RegNo: RegFuncref);
906	BB->insertAfter(I: RefNull->getIterator(), MI: TableSet);
907	}
908
909	return BB;
910	}
911
912	MachineBasicBlock *WebAssemblyTargetLowering::EmitInstrWithCustomInserter(
913	MachineInstr &MI, MachineBasicBlock BB) const* {
914	const TargetInstrInfo &TII = *Subtarget->getInstrInfo();
915	DebugLoc DL = MI.getDebugLoc();
916
917	switch (MI.getOpcode()) {
918	default:
919	llvm_unreachable("Unexpected instr type to insert");
920	case WebAssembly::FP_TO_SINT_I32_F32:
921	return LowerFPToInt(MI, DL, BB, TII, IsUnsigned: false, Int64: false, Float64: false,
922	LoweredOpcode: WebAssembly::I32_TRUNC_S_F32);
923	case WebAssembly::FP_TO_UINT_I32_F32:
924	return LowerFPToInt(MI, DL, BB, TII, IsUnsigned: true, Int64: false, Float64: false,
925	LoweredOpcode: WebAssembly::I32_TRUNC_U_F32);
926	case WebAssembly::FP_TO_SINT_I64_F32:
927	return LowerFPToInt(MI, DL, BB, TII, IsUnsigned: false, Int64: true, Float64: false,
928	LoweredOpcode: WebAssembly::I64_TRUNC_S_F32);
929	case WebAssembly::FP_TO_UINT_I64_F32:
930	return LowerFPToInt(MI, DL, BB, TII, IsUnsigned: true, Int64: true, Float64: false,
931	LoweredOpcode: WebAssembly::I64_TRUNC_U_F32);
932	case WebAssembly::FP_TO_SINT_I32_F64:
933	return LowerFPToInt(MI, DL, BB, TII, IsUnsigned: false, Int64: false, Float64: true,
934	LoweredOpcode: WebAssembly::I32_TRUNC_S_F64);
935	case WebAssembly::FP_TO_UINT_I32_F64:
936	return LowerFPToInt(MI, DL, BB, TII, IsUnsigned: true, Int64: false, Float64: true,
937	LoweredOpcode: WebAssembly::I32_TRUNC_U_F64);
938	case WebAssembly::FP_TO_SINT_I64_F64:
939	return LowerFPToInt(MI, DL, BB, TII, IsUnsigned: false, Int64: true, Float64: true,
940	LoweredOpcode: WebAssembly::I64_TRUNC_S_F64);
941	case WebAssembly::FP_TO_UINT_I64_F64:
942	return LowerFPToInt(MI, DL, BB, TII, IsUnsigned: true, Int64: true, Float64: true,
943	LoweredOpcode: WebAssembly::I64_TRUNC_U_F64);
944	case WebAssembly::MEMCPY_A32:
945	return LowerMemcpy(MI, DL, BB, TII, Int64: false);
946	case WebAssembly::MEMCPY_A64:
947	return LowerMemcpy(MI, DL, BB, TII, Int64: true);
948	case WebAssembly::MEMSET_A32:
949	return LowerMemset(MI, DL, BB, TII, Int64: false);
950	case WebAssembly::MEMSET_A64:
951	return LowerMemset(MI, DL, BB, TII, Int64: true);
952	case WebAssembly::CALL_RESULTS:
953	case WebAssembly::RET_CALL_RESULTS:
954	return LowerCallResults(CallResults&: MI, DL, BB, Subtarget, TII);
955	}
956	}
957
958	std::pair<unsigned, const TargetRegisterClass *>
959	WebAssemblyTargetLowering::getRegForInlineAsmConstraint(
960	const TargetRegisterInfo TRI, StringRef Constraint, MVT VT) const* {
961	// First, see if this is a constraint that directly corresponds to a
962	// WebAssembly register class.
963	if (Constraint.size() == `1`) {
964	switch (Constraint [`0`]) {
965	case `'r'`:
966	assert(VT != MVT::iPTR && "Pointer MVT not expected here");
967	if (Subtarget->hasSIMD128() && VT.isVector()) {
968	if (VT.getSizeInBits() == `128`)
969	return std::make_pair(x: `0U`, y: &WebAssembly::V128RegClass);
970	}
971	if (VT.isInteger() && !VT.isVector()) {
972	if (VT.getSizeInBits() <= `32`)
973	return std::make_pair(x: `0U`, y: &WebAssembly::I32RegClass);
974	if (VT.getSizeInBits() <= `64`)
975	return std::make_pair(x: `0U`, y: &WebAssembly::I64RegClass);
976	}
977	if (VT.isFloatingPoint() && !VT.isVector()) {
978	switch (VT.getSizeInBits()) {
979	case `32`:
980	return std::make_pair(x: `0U`, y: &WebAssembly::F32RegClass);
981	case `64`:
982	return std::make_pair(x: `0U`, y: &WebAssembly::F64RegClass);
983	default:
984	break;
985	}
986	}
987	break;
988	default:
989	break;
990	}
991	}
992
993	return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
994	}
995
996	bool WebAssemblyTargetLowering::isCheapToSpeculateCttz(Type Ty) const* {
997	// Assume ctz is a relatively cheap operation.
998	return true;
999	}
1000
1001	bool WebAssemblyTargetLowering::isCheapToSpeculateCtlz(Type Ty) const* {
1002	// Assume clz is a relatively cheap operation.
1003	return true;
1004	}
1005
1006	bool WebAssemblyTargetLowering::isLegalAddressingMode(const DataLayout &DL,
1007	const AddrMode &AM,
1008	Type Ty, unsigned* AS,
1009	Instruction I) const* {
1010	// WebAssembly offsets are added as unsigned without wrapping. The
1011	// isLegalAddressingMode gives us no way to determine if wrapping could be
1012	// happening, so we approximate this by accepting only non-negative offsets.
1013	if (AM.BaseOffs < `0`)
1014	return false;
1015
1016	// WebAssembly has no scale register operands.
1017	if (AM.Scale != `0`)
1018	return false;
1019
1020	// Everything else is legal.
1021	return true;
1022	}
1023
1024	bool WebAssemblyTargetLowering::allowsMisalignedMemoryAccesses(
1025	EVT /VT/, unsigned /AddrSpace/, Align /Align/,
1026	MachineMemOperand::Flags /Flags/, unsigned Fast) const* {
1027	// WebAssembly supports unaligned accesses, though it should be declared
1028	// with the p2align attribute on loads and stores which do so, and there
1029	// may be a performance impact. We tell LLVM they're "fast" because
1030	// for the kinds of things that LLVM uses this for (merging adjacent stores
1031	// of constants, etc.), WebAssembly implementations will either want the
1032	// unaligned access or they'll split anyway.
1033	if (Fast)
1034	*Fast = `1`;
1035	return true;
1036	}
1037
1038	bool WebAssemblyTargetLowering::isIntDivCheap(EVT VT,
1039	AttributeList Attr) const {
1040	// The current thinking is that wasm engines will perform this optimization,
1041	// so we can save on code size.
1042	return true;
1043	}
1044
1045	bool WebAssemblyTargetLowering::isVectorLoadExtDesirable(SDValue ExtVal) const {
1046	EVT ExtT = ExtVal.getValueType();
1047	SDValue N0 = peekThroughFreeze(V: ExtVal ->getOperand(Num: `0`));
1048	auto *Load = dyn_cast<LoadSDNode>(Val&: N0);
1049	if (!Load)
1050	return false;
1051	EVT MemT = Load->getValueType(ResNo: `0`);
1052	return (ExtT == MVT::v8i16 && MemT == MVT::v8i8) \|\|
1053	(ExtT == MVT::v4i32 && MemT == MVT::v4i16) \|\|
1054	(ExtT == MVT::v2i64 && MemT == MVT::v2i32);
1055	}
1056
1057	bool WebAssemblyTargetLowering::isOffsetFoldingLegal(
1058	const GlobalAddressSDNode GA) const* {
1059	// Wasm doesn't support function addresses with offsets
1060	const GlobalValue *GV = GA->getGlobal();
1061	return isa<Function>(Val: GV) ? false : TargetLowering::isOffsetFoldingLegal(GA);
1062	}
1063
1064	EVT WebAssemblyTargetLowering::getSetCCResultType(const DataLayout &DL,
1065	LLVMContext &C,
1066	EVT VT) const {
1067	if (VT.isVector()) {
1068	if (VT.getVectorElementType() == MVT::f16 && !Subtarget->hasFP16())
1069	return VT.changeElementType(Context&: C, EltVT: MVT::i1);
1070
1071	return VT.changeVectorElementTypeToInteger();
1072	}
1073
1074	// So far, all branch instructions in Wasm take an I32 condition.
1075	// The default TargetLowering::getSetCCResultType returns the pointer size,
1076	// which would be useful to reduce instruction counts when testing
1077	// against 64-bit pointers/values if at some point Wasm supports that.
1078	return EVT::getIntegerVT(Context&: C, BitWidth: `32`);
1079	}
1080
1081	void WebAssemblyTargetLowering::getTgtMemIntrinsic(
1082	SmallVectorImpl<IntrinsicInfo> &Infos, const CallBase &I,
1083	MachineFunction &MF, unsigned Intrinsic) const {
1084	IntrinsicInfo Info;
1085	switch (Intrinsic) {
1086	case Intrinsic::wasm_memory_atomic_notify:
1087	Info.opc = ISD::INTRINSIC_W_CHAIN;
1088	Info.memVT = MVT::i32;
1089	Info.ptrVal = I.getArgOperand(i: `0`);
1090	Info.offset = `0`;
1091	Info.align = Align (`4`);
1092	// atomic.notify instruction does not really load the memory specified with
1093	// this argument, but MachineMemOperand should either be load or store, so
1094	// we set this to a load.
1095	// FIXME Volatile isn't really correct, but currently all LLVM atomic
1096	// instructions are treated as volatiles in the backend, so we should be
1097	// consistent. The same applies for wasm_atomic_wait intrinsics too.
1098	Info.flags = MachineMemOperand::MOVolatile \| MachineMemOperand::MOLoad;
1099	Infos.push_back(Elt: Info);
1100	return;
1101	case Intrinsic::wasm_memory_atomic_wait32:
1102	Info.opc = ISD::INTRINSIC_W_CHAIN;
1103	Info.memVT = MVT::i32;
1104	Info.ptrVal = I.getArgOperand(i: `0`);
1105	Info.offset = `0`;
1106	Info.align = Align (`4`);
1107	Info.flags = MachineMemOperand::MOVolatile \| MachineMemOperand::MOLoad;
1108	Infos.push_back(Elt: Info);
1109	return;
1110	case Intrinsic::wasm_memory_atomic_wait64:
1111	Info.opc = ISD::INTRINSIC_W_CHAIN;
1112	Info.memVT = MVT::i64;
1113	Info.ptrVal = I.getArgOperand(i: `0`);
1114	Info.offset = `0`;
1115	Info.align = Align (`8`);
1116	Info.flags = MachineMemOperand::MOVolatile \| MachineMemOperand::MOLoad;
1117	Infos.push_back(Elt: Info);
1118	return;
1119	case Intrinsic::wasm_loadf16_f32:
1120	Info.opc = ISD::INTRINSIC_W_CHAIN;
1121	Info.memVT = MVT::f16;
1122	Info.ptrVal = I.getArgOperand(i: `0`);
1123	Info.offset = `0`;
1124	Info.align = Align (`2`);
1125	Info.flags = MachineMemOperand::MOLoad;
1126	Infos.push_back(Elt: Info);
1127	return;
1128	case Intrinsic::wasm_storef16_f32:
1129	Info.opc = ISD::INTRINSIC_VOID;
1130	Info.memVT = MVT::f16;
1131	Info.ptrVal = I.getArgOperand(i: `1`);
1132	Info.offset = `0`;
1133	Info.align = Align (`2`);
1134	Info.flags = MachineMemOperand::MOStore;
1135	Infos.push_back(Elt: Info);
1136	return;
1137	default:
1138	return;
1139	}
1140	}
1141
1142	void WebAssemblyTargetLowering::computeKnownBitsForTargetNode(
1143	const SDValue Op, KnownBits &Known, const APInt &DemandedElts,
1144	const SelectionDAG &DAG, unsigned Depth) const {
1145	switch (Op.getOpcode()) {
1146	default:
1147	break;
1148	case ISD::INTRINSIC_WO_CHAIN: {
1149	unsigned IntNo = Op.getConstantOperandVal(i: `0`);
1150	switch (IntNo) {
1151	default:
1152	break;
1153	case Intrinsic::wasm_bitmask: {
1154	unsigned BitWidth = Known.getBitWidth();
1155	EVT VT = Op.getOperand(i: `1`).getSimpleValueType();
1156	unsigned PossibleBits = VT.getVectorNumElements();
1157	APInt ZeroMask = APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: BitWidth - PossibleBits);
1158	Known.Zero \|= ZeroMask;
1159	break;
1160	}
1161	}
1162	break;
1163	}
1164	case WebAssemblyISD::EXTEND_LOW_U:
1165	case WebAssemblyISD::EXTEND_HIGH_U: {
1166	// We know the high half, of each destination vector element, will be zero.
1167	SDValue SrcOp = Op.getOperand(i: `0`);
1168	EVT VT = SrcOp.getSimpleValueType();
1169	unsigned BitWidth = Known.getBitWidth();
1170	if (VT == MVT::v8i8 \|\| VT == MVT::v16i8) {
1171	assert(BitWidth >= `8` && "Unexpected width!");
1172	APInt Mask = APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: BitWidth - `8`);
1173	Known.Zero \|= Mask;
1174	} else if (VT == MVT::v4i16 \|\| VT == MVT::v8i16) {
1175	assert(BitWidth >= `16` && "Unexpected width!");
1176	APInt Mask = APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: BitWidth - `16`);
1177	Known.Zero \|= Mask;
1178	} else if (VT == MVT::v2i32 \|\| VT == MVT::v4i32) {
1179	assert(BitWidth >= `32` && "Unexpected width!");
1180	APInt Mask = APInt::getHighBitsSet(numBits: BitWidth, hiBitsSet: BitWidth - `32`);
1181	Known.Zero \|= Mask;
1182	}
1183	break;
1184	}
1185	// For 128-bit addition if the upper bits are all zero then it's known that
1186	// the upper bits of the result will have all bits guaranteed zero except the
1187	// first.
1188	case WebAssemblyISD::I64_ADD128:
1189	if (Op.getResNo() == `1`) {
1190	SDValue LHS_HI = Op.getOperand(i: `1`);
1191	SDValue RHS_HI = Op.getOperand(i: `3`);
1192	if (isNullConstant(V: LHS_HI) && isNullConstant(V: RHS_HI))
1193	Known.Zero.setBitsFrom(`1`);
1194	}
1195	break;
1196	}
1197	}
1198
1199	TargetLoweringBase::LegalizeTypeAction
1200	WebAssemblyTargetLowering::getPreferredVectorAction(MVT VT) const {
1201	if (VT.isFixedLengthVector()) {
1202	MVT EltVT = VT.getVectorElementType();
1203	// We have legal vector types with these lane types, so widening the
1204	// vector would let us use some of the lanes directly without having to
1205	// extend or truncate values.
1206	if (EltVT == MVT::i8 \|\| EltVT == MVT::i16 \|\| EltVT == MVT::i32 \|\|
1207	EltVT == MVT::i64 \|\| EltVT == MVT::f32 \|\| EltVT == MVT::f64)
1208	return TypeWidenVector;
1209	}
1210
1211	return TargetLoweringBase::getPreferredVectorAction(VT);
1212	}
1213
1214	bool WebAssemblyTargetLowering::isFMAFasterThanFMulAndFAdd(
1215	const MachineFunction &MF, EVT VT) const {
1216	if (!Subtarget->hasFP16() \|\| !VT.isVector())
1217	return false;
1218
1219	EVT ScalarVT = VT.getScalarType();
1220	if (!ScalarVT.isSimple())
1221	return false;
1222
1223	return ScalarVT.getSimpleVT().SimpleTy == MVT::f16;
1224	}
1225
1226	bool WebAssemblyTargetLowering::shouldSimplifyDemandedVectorElts(
1227	SDValue Op, const TargetLoweringOpt &TLO) const {
1228	// ISel process runs DAGCombiner after legalization; this step is called
1229	// SelectionDAG optimization phase. This post-legalization combining process
1230	// runs DAGCombiner on each node, and if there was a change to be made,
1231	// re-runs legalization again on it and its user nodes to make sure
1232	// everythiing is in a legalized state.
1233	//
1234	// The legalization calls lowering routines, and we do our custom lowering for
1235	// build_vectors (LowerBUILD_VECTOR), which converts undef vector elements
1236	// into zeros. But there is a set of routines in DAGCombiner that turns unused
1237	// (= not demanded) nodes into undef, among which SimplifyDemandedVectorElts
1238	// turns unused vector elements into undefs. But this routine does not work
1239	// with our custom LowerBUILD_VECTOR, which turns undefs into zeros. This
1240	// combination can result in a infinite loop, in which undefs are converted to
1241	// zeros in legalization and back to undefs in combining.
1242	//
1243	// So after DAG is legalized, we prevent SimplifyDemandedVectorElts from
1244	// running for build_vectors.
1245	if (Op.getOpcode() == ISD::BUILD_VECTOR && TLO.LegalOps && TLO.LegalTys)
1246	return false;
1247	return true;
1248	}
1249
1250	//===----------------------------------------------------------------------===//
1251	// WebAssembly Lowering private implementation.
1252	//===----------------------------------------------------------------------===//
1253
1254	//===----------------------------------------------------------------------===//
1255	// Lowering Code
1256	//===----------------------------------------------------------------------===//
1257
1258	static void fail(const SDLoc &DL, SelectionDAG &DAG, const char *Msg) {
1259	MachineFunction &MF = DAG.getMachineFunction();
1260	DAG.getContext()->diagnose(
1261	DI: DiagnosticInfoUnsupported (MF.getFunction(), Msg, DL.getDebugLoc()));
1262	}
1263
1264	// Test whether the given calling convention is supported.
1265	static bool callingConvSupported(CallingConv::ID CallConv) {
1266	// We currently support the language-independent target-independent
1267	// conventions. We don't yet have a way to annotate calls with properties like
1268	// "cold", and we don't have any call-clobbered registers, so these are mostly
1269	// all handled the same.
1270	return CallConv == CallingConv::C \|\| CallConv == CallingConv::Fast \|\|
1271	CallConv == CallingConv::Cold \|\|
1272	CallConv == CallingConv::PreserveMost \|\|
1273	CallConv == CallingConv::PreserveAll \|\|
1274	CallConv == CallingConv::CXX_FAST_TLS \|\|
1275	CallConv == CallingConv::WASM_EmscriptenInvoke \|\|
1276	CallConv == CallingConv::Swift \|\| CallConv == CallingConv::SwiftTail;
1277	}
1278
1279	SDValue
1280	WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI,
1281	SmallVectorImpl<SDValue> &InVals) const {
1282	SelectionDAG &DAG = CLI.DAG;
1283	SDLoc DL = CLI.DL;
1284	SDValue Chain = CLI.Chain;
1285	SDValue Callee = CLI.Callee;
1286	MachineFunction &MF = DAG.getMachineFunction();
1287	auto Layout = MF.getDataLayout();
1288
1289	// A call through a funcref is expressed in IR as a call through the pointer
1290	// produced by the llvm.wasm.funcref.to_ptr intrinsic. Detect this here and
1291	// recover the underlying funcref value so the call can be lowered to a
1292	// table.set + call_indirect through the dedicated __funcref_call_table.
1293	bool IsFuncrefCall = false;
1294	if (Callee.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
1295	Callee.getConstantOperandVal(i: `0`) == Intrinsic::wasm_funcref_to_ptr) {
1296	Callee = Callee.getOperand(i: `1`);
1297	IsFuncrefCall = true;
1298	}
1299
1300	CallingConv::ID CallConv = CLI.CallConv;
1301	if (!callingConvSupported(CallConv))
1302	fail(DL, DAG,
1303	Msg: "WebAssembly doesn't support language-specific or target-specific "
1304	"calling conventions yet");
1305	if (CLI.IsPatchPoint)
1306	fail(DL, DAG, Msg: "WebAssembly doesn't support patch point yet");
1307
1308	if (CLI.IsTailCall) {
1309	auto NoTail = [&](const char *Msg) {
1310	if (CLI.CB && CLI.CB->isMustTailCall())
1311	fail(DL, DAG, Msg);
1312	CLI.IsTailCall = false;
1313	};
1314
1315	if (!Subtarget->hasTailCall())
1316	NoTail ("WebAssembly 'tail-call' feature not enabled");
1317
1318	// Varargs calls cannot be tail calls because the buffer is on the stack
1319	if (CLI.IsVarArg)
1320	NoTail ("WebAssembly does not support varargs tail calls");
1321
1322	// Do not tail call unless caller and callee return types match
1323	const Function &F = MF.getFunction();
1324	const TargetMachine &TM = getTargetMachine();
1325	Type *RetTy = F.getReturnType();
1326	SmallVector<MVT, `4`> CallerRetTys;
1327	SmallVector<MVT, `4`> CalleeRetTys;
1328	computeLegalValueVTs(F, TM, Ty: RetTy, ValueVTs&: CallerRetTys);
1329	computeLegalValueVTs(F, TM, Ty: CLI.RetTy, ValueVTs&: CalleeRetTys);
1330	bool TypesMatch = CallerRetTys.size() == CalleeRetTys.size() &&
1331	std::equal(first1: CallerRetTys.begin(), last1: CallerRetTys.end(),
1332	first2: CalleeRetTys.begin());
1333	if (!TypesMatch)
1334	NoTail ("WebAssembly tail call requires caller and callee return types to "
1335	"match");
1336
1337	// If pointers to local stack values are passed, we cannot tail call
1338	if (CLI.CB) {
1339	for (auto &Arg : CLI.CB->args()) {
1340	Value *Val = Arg.get();
1341	// Trace the value back through pointer operations
1342	while (true) {
1343	Value *Src = Val->stripPointerCastsAndAliases();
1344	if (auto *GEP = dyn_cast<GetElementPtrInst>(Val: Src))
1345	Src = GEP->getPointerOperand();
1346	if (Val == Src)
1347	break;
1348	Val = Src;
1349	}
1350	if (isa<AllocaInst>(Val)) {
1351	NoTail (
1352	"WebAssembly does not support tail calling with stack arguments");
1353	break;
1354	}
1355	}
1356	}
1357	}
1358
1359	SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
1360	SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
1361	SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1362
1363	// The generic code may have added an sret argument. If we're lowering an
1364	// invoke function, the ABI requires that the function pointer be the first
1365	// argument, so we may have to swap the arguments.
1366	if (CallConv == CallingConv::WASM_EmscriptenInvoke && Outs.size() >= `2` &&
1367	Outs [`0`].Flags.isSRet()) {
1368	std::swap(a&: Outs [`0`], b&: Outs [`1`]);
1369	std::swap(a&: OutVals [`0`], b&: OutVals [`1`]);
1370	}
1371
1372	bool HasSwiftSelfArg = false;
1373	bool HasSwiftErrorArg = false;
1374	bool HasSwiftAsyncArg = false;
1375	unsigned NumFixedArgs = `0`;
1376	for (unsigned I = `0`; I < Outs.size(); ++I) {
1377	const ISD::OutputArg &Out = Outs [I];
1378	SDValue &OutVal = OutVals [I];
1379	HasSwiftSelfArg \|= Out.Flags.isSwiftSelf();
1380	HasSwiftErrorArg \|= Out.Flags.isSwiftError();
1381	HasSwiftAsyncArg \|= Out.Flags.isSwiftAsync();
1382	if (Out.Flags.isNest())
1383	fail(DL, DAG, Msg: "WebAssembly hasn't implemented nest arguments");
1384	if (Out.Flags.isInAlloca())
1385	fail(DL, DAG, Msg: "WebAssembly hasn't implemented inalloca arguments");
1386	if (Out.Flags.isInConsecutiveRegs())
1387	fail(DL, DAG, Msg: "WebAssembly hasn't implemented cons regs arguments");
1388	if (Out.Flags.isInConsecutiveRegsLast())
1389	fail(DL, DAG, Msg: "WebAssembly hasn't implemented cons regs last arguments");
1390	if (Out.Flags.isByVal() && Out.Flags.getByValSize() != `0`) {
1391	auto &MFI = MF.getFrameInfo();
1392	int FI = MFI.CreateStackObject(Size: Out.Flags.getByValSize(),
1393	Alignment: Out.Flags.getNonZeroByValAlign(),
1394	/isSS=/isSpillSlot: false);
1395	SDValue SizeNode =
1396	DAG.getConstant(Val: Out.Flags.getByValSize(), DL, VT: MVT::i32);
1397	SDValue FINode = DAG.getFrameIndex(FI, VT: getPointerTy(DL: Layout));
1398	Align Alignment = Out.Flags.getNonZeroByValAlign();
1399	Chain = DAG.getMemcpy(Chain, dl: DL, Dst: FINode, Src: OutVal, Size: SizeNode, DstAlign: Alignment,
1400	SrcAlign: Alignment,
1401	/isVolatile/ isVol: false, /AlwaysInline=/false,
1402	/CI=/nullptr, OverrideTailCall: std::nullopt, DstPtrInfo: MachinePointerInfo (),
1403	SrcPtrInfo: MachinePointerInfo ());
1404	OutVal = FINode;
1405	}
1406	// Count the number of fixed args after* legalization.*
1407	NumFixedArgs += !Out.Flags.isVarArg();
1408	}
1409
1410	bool IsVarArg = CLI.IsVarArg;
1411	auto PtrVT = getPointerTy(DL: Layout);
1412
1413	// For swiftcc and swifttailcc, emit additional swiftself, swifterror, and
1414	// (for swifttailcc) swiftasync arguments if there aren't. These additional
1415	// arguments are also added for callee signature. They are necessary to match
1416	// callee and caller signature for indirect call.
1417	if (CallConv == CallingConv::Swift \|\| CallConv == CallingConv::SwiftTail) {
1418	Type PtrTy = PointerType::getUnqual(C&: DAG.getContext());
1419	if (!HasSwiftSelfArg) {
1420	NumFixedArgs++;
1421	ISD::ArgFlagsTy Flags;
1422	Flags.setSwiftSelf();
1423	ISD::OutputArg Arg(Flags, PtrVT, EVT (PtrVT), PtrTy, `0`, `0`);
1424	CLI.Outs.push_back(Elt: Arg);
1425	SDValue ArgVal = DAG.getUNDEF(VT: PtrVT);
1426	CLI.OutVals.push_back(Elt: ArgVal);
1427	}
1428	if (!HasSwiftErrorArg) {
1429	NumFixedArgs++;
1430	ISD::ArgFlagsTy Flags;
1431	Flags.setSwiftError();
1432	ISD::OutputArg Arg(Flags, PtrVT, EVT (PtrVT), PtrTy, `0`, `0`);
1433	CLI.Outs.push_back(Elt: Arg);
1434	SDValue ArgVal = DAG.getUNDEF(VT: PtrVT);
1435	CLI.OutVals.push_back(Elt: ArgVal);
1436	}
1437	if (CallConv == CallingConv::SwiftTail && !HasSwiftAsyncArg) {
1438	NumFixedArgs++;
1439	ISD::ArgFlagsTy Flags;
1440	Flags.setSwiftAsync();
1441	ISD::OutputArg Arg(Flags, PtrVT, EVT (PtrVT), PtrTy, `0`, `0`);
1442	CLI.Outs.push_back(Elt: Arg);
1443	SDValue ArgVal = DAG.getUNDEF(VT: PtrVT);
1444	CLI.OutVals.push_back(Elt: ArgVal);
1445	}
1446	}
1447
1448	// Analyze operands of the call, assigning locations to each operand.
1449	SmallVector<CCValAssign, `16`> ArgLocs;
1450	CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1451
1452	if (IsVarArg) {
1453	// Outgoing non-fixed arguments are placed in a buffer. First
1454	// compute their offsets and the total amount of buffer space needed.
1455	for (unsigned I = NumFixedArgs; I < Outs.size(); ++I) {
1456	const ISD::OutputArg &Out = Outs [I];
1457	SDValue &Arg = OutVals [I];
1458	EVT VT = Arg.getValueType();
1459	assert(VT != MVT::iPTR && "Legalized args should be concrete");
1460	Type Ty = VT.getTypeForEVT(Context&: DAG.getContext());
1461	Align Alignment =
1462	std::max(a: Out.Flags.getNonZeroOrigAlign(), b: Layout.getABITypeAlign(Ty));
1463	unsigned Offset =
1464	CCInfo.AllocateStack(Size: Layout.getTypeAllocSize(Ty), Alignment);
1465	CCInfo.addLoc(V: CCValAssign::getMem(ValNo: ArgLocs.size(), ValVT: VT.getSimpleVT(),
1466	Offset, LocVT: VT.getSimpleVT(),
1467	HTP: CCValAssign::Full));
1468	}
1469	}
1470
1471	unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
1472
1473	SDValue FINode;
1474	if (IsVarArg && NumBytes) {
1475	// For non-fixed arguments, next emit stores to store the argument values
1476	// to the stack buffer at the offsets computed above.
1477	MaybeAlign StackAlign = Layout.getStackAlignment();
1478	assert(StackAlign && "data layout string is missing stack alignment");
1479	int FI = MF.getFrameInfo().CreateStackObject(Size: NumBytes, Alignment: *StackAlign,
1480	/isSS=/isSpillSlot: false);
1481	unsigned ValNo = `0`;
1482	SmallVector<SDValue, `8`> Chains;
1483	for (SDValue Arg : drop_begin(RangeOrContainer&: OutVals, N: NumFixedArgs)) {
1484	assert(ArgLocs[ValNo].getValNo() == ValNo &&
1485	"ArgLocs should remain in order and only hold varargs args");
1486	unsigned Offset = ArgLocs [ValNo++].getLocMemOffset();
1487	FINode = DAG.getFrameIndex(FI, VT: getPointerTy(DL: Layout));
1488	SDValue Add = DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: FINode,
1489	N2: DAG.getConstant(Val: Offset, DL, VT: PtrVT));
1490	Chains.push_back(
1491	Elt: DAG.getStore(Chain, dl: DL, Val: Arg, Ptr: Add,
1492	PtrInfo: MachinePointerInfo::getFixedStack(MF, FI, Offset)));
1493	}
1494	if (!Chains.empty())
1495	Chain = DAG.getNode(Opcode: ISD::TokenFactor, DL, VT: MVT::Other, Ops: Chains);
1496	} else if (IsVarArg) {
1497	FINode = DAG.getIntPtrConstant(Val: `0`, DL);
1498	}
1499
1500	if (Callee ->getOpcode() == ISD::GlobalAddress) {
1501	// If the callee is a GlobalAddress node (quite common, every direct call
1502	// is) turn it into a TargetGlobalAddress node so that LowerGlobalAddress
1503	// doesn't at MO_GOT which is not needed for direct calls.
1504	GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Val&: Callee);
1505	Callee = DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL,
1506	VT: getPointerTy(DL: DAG.getDataLayout()),
1507	offset: GA->getOffset());
1508	Callee = DAG.getNode(Opcode: WebAssemblyISD::Wrapper, DL,
1509	VT: getPointerTy(DL: DAG.getDataLayout()), Operand: Callee);
1510	}
1511
1512	// Compute the operands for the CALLn node.
1513	SmallVector<SDValue, `16`> Ops;
1514	Ops.push_back(Elt: Chain);
1515	Ops.push_back(Elt: Callee);
1516
1517	// Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs
1518	// isn't reliable.
1519	Ops.append(in_start: OutVals.begin(),
1520	in_end: IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end());
1521	// Add a pointer to the vararg buffer.
1522	if (IsVarArg)
1523	Ops.push_back(Elt: FINode);
1524
1525	SmallVector<EVT, `8`> InTys;
1526	for (const auto &In : Ins) {
1527	assert(!In.Flags.isByVal() && "byval is not valid for return values");
1528	assert(!In.Flags.isNest() && "nest is not valid for return values");
1529	if (In.Flags.isInAlloca())
1530	fail(DL, DAG, Msg: "WebAssembly hasn't implemented inalloca return values");
1531	if (In.Flags.isInConsecutiveRegs())
1532	fail(DL, DAG, Msg: "WebAssembly hasn't implemented cons regs return values");
1533	if (In.Flags.isInConsecutiveRegsLast())
1534	fail(DL, DAG,
1535	Msg: "WebAssembly hasn't implemented cons regs last return values");
1536	// Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1537	// registers.
1538	InTys.push_back(Elt: In.VT);
1539	}
1540
1541	// Lastly, if this is a call to a funcref we need to add an instruction
1542	// table.set to the chain and transform the call.
1543	if (IsFuncrefCall) {
1544	// In the absence of function references proposal where a funcref call is
1545	// lowered to call_ref, using reference types we generate a table.set to set
1546	// the funcref to a special table used solely for this purpose, followed by
1547	// a call_indirect. Here we just generate the table set, and return the
1548	// SDValue of the table.set so that LowerCall can finalize the lowering by
1549	// generating the call_indirect.
1550	SDValue Chain = Ops [`0`];
1551
1552	MCSymbolWasm *Table = WebAssembly::getOrCreateFuncrefCallTableSymbol(
1553	Ctx&: MF.getContext(), Subtarget);
1554	SDValue Sym = DAG.getMCSymbol(Sym: Table, VT: PtrVT);
1555	SDValue TableSlot = DAG.getConstant(Val: `0`, DL, VT: MVT::i32);
1556	SDValue TableSetOps[] = {Chain, Sym, TableSlot, Callee};
1557	SDValue TableSet = DAG.getMemIntrinsicNode(
1558	Opcode: WebAssemblyISD::TABLE_SET, dl: DL, VTList: DAG.getVTList(VT: MVT::Other), Ops: TableSetOps,
1559	MemVT: MVT::funcref, PtrInfo: MachinePointerInfo (), Alignment: Align (`1`),
1560	Flags: MachineMemOperand::MOStore);
1561
1562	Ops [`0`] = TableSet; // The new chain is the TableSet itself
1563	}
1564
1565	if (CLI.IsTailCall) {
1566	// ret_calls do not return values to the current frame
1567	SDVTList NodeTys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue);
1568	return DAG.getNode(Opcode: WebAssemblyISD::RET_CALL, DL, VTList: NodeTys, Ops);
1569	}
1570
1571	InTys.push_back(Elt: MVT::Other);
1572	SDVTList InTyList = DAG.getVTList(VTs: InTys);
1573	SDValue Res = DAG.getNode(Opcode: WebAssemblyISD::CALL, DL, VTList: InTyList, Ops);
1574
1575	for (size_t I = `0`; I < Ins.size(); ++I)
1576	InVals.push_back(Elt: Res.getValue(R: I));
1577
1578	// Return the chain
1579	return Res.getValue(R: Ins.size());
1580	}
1581
1582	bool WebAssemblyTargetLowering::CanLowerReturn(
1583	CallingConv::ID /CallConv/, MachineFunction & /MF/, bool /IsVarArg/,
1584	const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext & /Context/,
1585	const Type RetTy) const* {
1586	// WebAssembly can only handle returning tuples with multivalue enabled
1587	return WebAssembly::canLowerReturn(ResultSize: Outs.size(), Subtarget);
1588	}
1589
1590	SDValue WebAssemblyTargetLowering::LowerReturn(
1591	SDValue Chain, CallingConv::ID CallConv, bool /IsVarArg/,
1592	const SmallVectorImpl<ISD::OutputArg> &Outs,
1593	const SmallVectorImpl<SDValue> &OutVals, const SDLoc &DL,
1594	SelectionDAG &DAG) const {
1595	assert(WebAssembly::canLowerReturn(Outs.size(), Subtarget) &&
1596	"MVP WebAssembly can only return up to one value");
1597	if (!callingConvSupported(CallConv))
1598	fail(DL, DAG, Msg: "WebAssembly doesn't support non-C calling conventions");
1599
1600	SmallVector<SDValue, `4`> RetOps(`1`, Chain);
1601	RetOps.append(in_start: OutVals.begin(), in_end: OutVals.end());
1602	Chain = DAG.getNode(Opcode: WebAssemblyISD::RETURN, DL, VT: MVT::Other, Ops: RetOps);
1603
1604	// Record the number and types of the return values.
1605	for (const ISD::OutputArg &Out : Outs) {
1606	assert(!Out.Flags.isByVal() && "byval is not valid for return values");
1607	assert(!Out.Flags.isNest() && "nest is not valid for return values");
1608	assert(!Out.Flags.isVarArg() && "non-fixed return value is not valid");
1609	if (Out.Flags.isInAlloca())
1610	fail(DL, DAG, Msg: "WebAssembly hasn't implemented inalloca results");
1611	if (Out.Flags.isInConsecutiveRegs())
1612	fail(DL, DAG, Msg: "WebAssembly hasn't implemented cons regs results");
1613	if (Out.Flags.isInConsecutiveRegsLast())
1614	fail(DL, DAG, Msg: "WebAssembly hasn't implemented cons regs last results");
1615	}
1616
1617	return Chain;
1618	}
1619
1620	SDValue WebAssemblyTargetLowering::LowerFormalArguments(
1621	SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1622	const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1623	SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1624	if (!callingConvSupported(CallConv))
1625	fail(DL, DAG, Msg: "WebAssembly doesn't support non-C calling conventions");
1626
1627	MachineFunction &MF = DAG.getMachineFunction();
1628	auto *MFI = MF.getInfo<WebAssemblyFunctionInfo>();
1629
1630	// Set up the incoming ARGUMENTS value, which serves to represent the liveness
1631	// of the incoming values before they're represented by virtual registers.
1632	MF.getRegInfo().addLiveIn(Reg: WebAssembly::ARGUMENTS);
1633
1634	bool HasSwiftErrorArg = false;
1635	bool HasSwiftSelfArg = false;
1636	bool HasSwiftAsyncArg = false;
1637	for (const ISD::InputArg &In : Ins) {
1638	HasSwiftSelfArg \|= In.Flags.isSwiftSelf();
1639	HasSwiftErrorArg \|= In.Flags.isSwiftError();
1640	HasSwiftAsyncArg \|= In.Flags.isSwiftAsync();
1641	if (In.Flags.isInAlloca())
1642	fail(DL, DAG, Msg: "WebAssembly hasn't implemented inalloca arguments");
1643	if (In.Flags.isNest())
1644	fail(DL, DAG, Msg: "WebAssembly hasn't implemented nest arguments");
1645	if (In.Flags.isInConsecutiveRegs())
1646	fail(DL, DAG, Msg: "WebAssembly hasn't implemented cons regs arguments");
1647	if (In.Flags.isInConsecutiveRegsLast())
1648	fail(DL, DAG, Msg: "WebAssembly hasn't implemented cons regs last arguments");
1649	// Ignore In.getNonZeroOrigAlign() because all our arguments are passed in
1650	// registers.
1651	InVals.push_back(Elt: In.Used ? DAG.getNode(Opcode: WebAssemblyISD::ARGUMENT, DL, VT: In.VT,
1652	Operand: DAG.getTargetConstant(Val: InVals.size(),
1653	DL, VT: MVT::i32))
1654	: DAG.getUNDEF(VT: In.VT));
1655
1656	// Record the number and types of arguments.
1657	MFI->addParam(VT: In.VT);
1658	}
1659
1660	// For swiftcc and swifttailcc, emit additional swiftself, swifterror, and
1661	// (for swifttailcc) swiftasync arguments if there aren't. These additional
1662	// arguments are also added for callee signature. They are necessary to match
1663	// callee and caller signature for indirect call.
1664	auto PtrVT = getPointerTy(DL: MF.getDataLayout());
1665	if (CallConv == CallingConv::Swift \|\| CallConv == CallingConv::SwiftTail) {
1666	if (!HasSwiftSelfArg) {
1667	MFI->addParam(VT: PtrVT);
1668	}
1669	if (!HasSwiftErrorArg) {
1670	MFI->addParam(VT: PtrVT);
1671	}
1672	if (CallConv == CallingConv::SwiftTail && !HasSwiftAsyncArg) {
1673	MFI->addParam(VT: PtrVT);
1674	}
1675	}
1676	// Varargs are copied into a buffer allocated by the caller, and a pointer to
1677	// the buffer is passed as an argument.
1678	if (IsVarArg) {
1679	MVT PtrVT = getPointerTy(DL: MF.getDataLayout());
1680	Register VarargVreg =
1681	MF.getRegInfo().createVirtualRegister(RegClass: getRegClassFor(VT: PtrVT));
1682	MFI->setVarargBufferVreg(VarargVreg);
1683	Chain = DAG.getCopyToReg(
1684	Chain, dl: DL, Reg: VarargVreg,
1685	N: DAG.getNode(Opcode: WebAssemblyISD::ARGUMENT, DL, VT: PtrVT,
1686	Operand: DAG.getTargetConstant(Val: Ins.size(), DL, VT: MVT::i32)));
1687	MFI->addParam(VT: PtrVT);
1688	}
1689
1690	// Record the number and types of arguments and results.
1691	SmallVector<MVT, `4`> Params;
1692	SmallVector<MVT, `4`> Results;
1693	computeSignatureVTs(Ty: MF.getFunction().getFunctionType(), TargetFunc: &MF.getFunction(),
1694	ContextFunc: MF.getFunction(), TM: DAG.getTarget(), Params, Results);
1695	for (MVT VT : Results)
1696	MFI->addResult(VT);
1697	// TODO: Use signatures in WebAssemblyMachineFunctionInfo too and unify
1698	// the param logic here with ComputeSignatureVTs
1699	assert(MFI->getParams().size() == Params.size() &&
1700	std::equal(MFI->getParams().begin(), MFI->getParams().end(),
1701	Params.begin()));
1702
1703	return Chain;
1704	}
1705
1706	void WebAssemblyTargetLowering::ReplaceNodeResults(
1707	SDNode N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const* {
1708	switch (N->getOpcode()) {
1709	case ISD::SIGN_EXTEND_INREG:
1710	// Do not add any results, signifying that N should not be custom lowered
1711	// after all. This happens because simd128 turns on custom lowering for
1712	// SIGN_EXTEND_INREG, but for non-vector sign extends the result might be an
1713	// illegal type.
1714	break;
1715	case ISD::ANY_EXTEND_VECTOR_INREG:
1716	case ISD::SIGN_EXTEND_VECTOR_INREG:
1717	case ISD::ZERO_EXTEND_VECTOR_INREG:
1718	// Do not add any results, signifying that N should not be custom lowered.
1719	// EXTEND_VECTOR_INREG is implemented for some vectors, but not all.
1720	break;
1721	case ISD::FP_ROUND: {
1722	EVT VT = N->getValueType(ResNo: `0`);
1723	SDValue Src = N->getOperand(Num: `0`);
1724	if (VT == MVT::v4f16 && Src.getValueType() == MVT::v4f32) {
1725	Results.push_back(
1726	Elt: DAG.getNode(Opcode: WebAssemblyISD::DEMOTE_ZERO, DL: SDLoc (N), VT: MVT::v8f16, Operand: Src));
1727	}
1728	break;
1729	}
1730	case ISD::ADD:
1731	case ISD::SUB:
1732	Results.push_back(Elt: Replace128Op(N, DAG));
1733	break;
1734	default:
1735	llvm_unreachable(
1736	"ReplaceNodeResults not implemented for this op for WebAssembly!");
1737	}
1738	}
1739
1740	//===----------------------------------------------------------------------===//
1741	// Custom lowering hooks.
1742	//===----------------------------------------------------------------------===//
1743
1744	SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op,
1745	SelectionDAG &DAG) const {
1746	SDLoc DL(Op);
1747	switch (Op.getOpcode()) {
1748	default:
1749	llvm_unreachable("unimplemented operation lowering");
1750	return SDValue ();
1751	case ISD::FrameIndex:
1752	return LowerFrameIndex(Op, DAG);
1753	case ISD::GlobalAddress:
1754	return LowerGlobalAddress(Op, DAG);
1755	case ISD::GlobalTLSAddress:
1756	return LowerGlobalTLSAddress(Op, DAG);
1757	case ISD::ExternalSymbol:
1758	return LowerExternalSymbol(Op, DAG);
1759	case ISD::JumpTable:
1760	return LowerJumpTable(Op, DAG);
1761	case ISD::BR_JT:
1762	return LowerBR_JT(Op, DAG);
1763	case ISD::VASTART:
1764	return LowerVASTART(Op, DAG);
1765	case ISD::BlockAddress:
1766	case ISD::BRIND:
1767	fail(DL, DAG, Msg: "WebAssembly hasn't implemented computed gotos");
1768	return SDValue ();
1769	case ISD::RETURNADDR:
1770	return LowerRETURNADDR(Op, DAG);
1771	case ISD::FRAMEADDR:
1772	return LowerFRAMEADDR(Op, DAG);
1773	case ISD::CopyToReg:
1774	return LowerCopyToReg(Op, DAG);
1775	case ISD::EXTRACT_VECTOR_ELT:
1776	case ISD::INSERT_VECTOR_ELT:
1777	return LowerAccessVectorElement(Op, DAG);
1778	case ISD::INTRINSIC_VOID:
1779	case ISD::INTRINSIC_WO_CHAIN:
1780	case ISD::INTRINSIC_W_CHAIN:
1781	return LowerIntrinsic(Op, DAG);
1782	case ISD::SIGN_EXTEND_INREG:
1783	return LowerSIGN_EXTEND_INREG(Op, DAG);
1784	case ISD::ZERO_EXTEND_VECTOR_INREG:
1785	case ISD::SIGN_EXTEND_VECTOR_INREG:
1786	case ISD::ANY_EXTEND_VECTOR_INREG:
1787	return LowerEXTEND_VECTOR_INREG(Op, DAG);
1788	case ISD::BUILD_VECTOR:
1789	return LowerBUILD_VECTOR(Op, DAG);
1790	case ISD::VECTOR_SHUFFLE:
1791	return LowerVECTOR_SHUFFLE(Op, DAG);
1792	case ISD::SETCC:
1793	return LowerSETCC(Op, DAG);
1794	case ISD::SHL:
1795	case ISD::SRA:
1796	case ISD::SRL:
1797	return LowerShift(Op, DAG);
1798	case ISD::FP_TO_SINT_SAT:
1799	case ISD::FP_TO_UINT_SAT:
1800	return LowerFP_TO_INT_SAT(Op, DAG);
1801	case ISD::FMINNUM:
1802	case ISD::FMINIMUMNUM:
1803	return LowerFMIN(Op, DAG);
1804	case ISD::FMAXNUM:
1805	case ISD::FMAXIMUMNUM:
1806	return LowerFMAX(Op, DAG);
1807	case ISD::LOAD:
1808	return LowerLoad(Op, DAG);
1809	case ISD::STORE:
1810	return LowerStore(Op, DAG);
1811	case ISD::CTPOP:
1812	case ISD::CTLZ:
1813	case ISD::CTTZ:
1814	return DAG.UnrollVectorOp(N: Op.getNode());
1815	case ISD::CLEAR_CACHE:
1816	report_fatal_error(reason: "llvm.clear_cache is not supported on wasm");
1817	case ISD::SMUL_LOHI:
1818	case ISD::UMUL_LOHI:
1819	return LowerMUL_LOHI(Op, DAG);
1820	case ISD::UADDO:
1821	return LowerUADDO(Op, DAG);
1822	}
1823	}
1824
1825	static bool IsWebAssemblyGlobal(SDValue Op) {
1826	if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Val&: Op))
1827	return WebAssembly::isWasmVarAddressSpace(AS: GA->getAddressSpace());
1828
1829	return false;
1830	}
1831
1832	static std::optional<unsigned> IsWebAssemblyLocal(SDValue Op,
1833	SelectionDAG &DAG) {
1834	const FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Val&: Op);
1835	if (!FI)
1836	return std::nullopt;
1837
1838	auto &MF = DAG.getMachineFunction();
1839	return WebAssemblyFrameLowering::getLocalForStackObject(MF, FrameIndex: FI->getIndex());
1840	}
1841
1842	SDValue WebAssemblyTargetLowering::LowerStore(SDValue Op,
1843	SelectionDAG &DAG) const {
1844	SDLoc DL(Op);
1845	StoreSDNode *SN = cast<StoreSDNode>(Val: Op.getNode());
1846	const SDValue &Value = SN->getValue();
1847	const SDValue &Base = SN->getBasePtr();
1848	const SDValue &Offset = SN->getOffset();
1849
1850	if (IsWebAssemblyGlobal(Op: Base)) {
1851	if (!Offset ->isUndef())
1852	report_fatal_error(reason: "unexpected offset when storing to webassembly global",
1853	gen_crash_diag: false);
1854
1855	SDVTList Tys = DAG.getVTList(VT: MVT::Other);
1856	SDValue Ops[] = {SN->getChain(), Value, Base};
1857	return DAG.getMemIntrinsicNode(Opcode: WebAssemblyISD::GLOBAL_SET, dl: DL, VTList: Tys, Ops,
1858	MemVT: SN->getMemoryVT(), MMO: SN->getMemOperand());
1859	}
1860
1861	if (std::optional<unsigned> Local = IsWebAssemblyLocal(Op: Base, DAG)) {
1862	if (!Offset ->isUndef())
1863	report_fatal_error(reason: "unexpected offset when storing to webassembly local",
1864	gen_crash_diag: false);
1865
1866	SDValue Idx = DAG.getTargetConstant(Val: *Local, DL: Base, VT: MVT::i32);
1867	SDVTList Tys = DAG.getVTList(VT: MVT::Other); // The chain.
1868	SDValue Ops[] = {SN->getChain(), Idx, Value};
1869	return DAG.getNode(Opcode: WebAssemblyISD::LOCAL_SET, DL, VTList: Tys, Ops);
1870	}
1871
1872	if (WebAssembly::isWasmVarAddressSpace(AS: SN->getAddressSpace()))
1873	report_fatal_error(
1874	reason: "Encountered an unlowerable store to the wasm_var address space",
1875	gen_crash_diag: false);
1876
1877	return Op;
1878	}
1879
1880	SDValue WebAssemblyTargetLowering::LowerLoad(SDValue Op,
1881	SelectionDAG &DAG) const {
1882	SDLoc DL(Op);
1883	LoadSDNode *LN = cast<LoadSDNode>(Val: Op.getNode());
1884	const SDValue &Base = LN->getBasePtr();
1885	const SDValue &Offset = LN->getOffset();
1886
1887	if (IsWebAssemblyGlobal(Op: Base)) {
1888	if (!Offset ->isUndef())
1889	report_fatal_error(
1890	reason: "unexpected offset when loading from webassembly global", gen_crash_diag: false);
1891
1892	SDVTList Tys = DAG.getVTList(VT1: LN->getValueType(ResNo: `0`), VT2: MVT::Other);
1893	SDValue Ops[] = {LN->getChain(), Base};
1894	return DAG.getMemIntrinsicNode(Opcode: WebAssemblyISD::GLOBAL_GET, dl: DL, VTList: Tys, Ops,
1895	MemVT: LN->getMemoryVT(), MMO: LN->getMemOperand());
1896	}
1897
1898	if (std::optional<unsigned> Local = IsWebAssemblyLocal(Op: Base, DAG)) {
1899	if (!Offset ->isUndef())
1900	report_fatal_error(
1901	reason: "unexpected offset when loading from webassembly local", gen_crash_diag: false);
1902
1903	SDValue Idx = DAG.getTargetConstant(Val: *Local, DL: Base, VT: MVT::i32);
1904	EVT LocalVT = LN->getValueType(ResNo: `0`);
1905	return DAG.getNode(Opcode: WebAssemblyISD::LOCAL_GET, DL, ResultTys: {LocalVT, MVT::Other},
1906	Ops: {LN->getChain(), Idx});
1907	}
1908
1909	if (WebAssembly::isWasmVarAddressSpace(AS: LN->getAddressSpace()))
1910	report_fatal_error(
1911	reason: "Encountered an unlowerable load from the wasm_var address space",
1912	gen_crash_diag: false);
1913
1914	return Op;
1915	}
1916
1917	SDValue WebAssemblyTargetLowering::LowerMUL_LOHI(SDValue Op,
1918	SelectionDAG &DAG) const {
1919	assert(Subtarget->hasWideArithmetic());
1920	assert(Op.getValueType() == MVT::i64);
1921	SDLoc DL(Op);
1922	unsigned Opcode;
1923	switch (Op.getOpcode()) {
1924	case ISD::UMUL_LOHI:
1925	Opcode = WebAssemblyISD::I64_MUL_WIDE_U;
1926	break;
1927	case ISD::SMUL_LOHI:
1928	Opcode = WebAssemblyISD::I64_MUL_WIDE_S;
1929	break;
1930	default:
1931	llvm_unreachable("unexpected opcode");
1932	}
1933	SDValue LHS = Op.getOperand(i: `0`);
1934	SDValue RHS = Op.getOperand(i: `1`);
1935	SDValue Lo =
1936	DAG.getNode(Opcode, DL, VTList: DAG.getVTList(VT1: MVT::i64, VT2: MVT::i64), N1: LHS, N2: RHS);
1937	SDValue Hi(Lo.getNode(), `1`);
1938	SDValue Ops[] = {Lo, Hi};
1939	return DAG.getMergeValues(Ops, dl: DL);
1940	}
1941
1942	// Lowers `UADDO` intrinsics to an `i64.add128` instruction when it's enabled.
1943	//
1944	// This enables generating a single wasm instruction for this operation where
1945	// the upper half of both operands are constant zeros. The upper half of the
1946	// result is then whether the overflow happened.
1947	SDValue WebAssemblyTargetLowering::LowerUADDO(SDValue Op,
1948	SelectionDAG &DAG) const {
1949	assert(Subtarget->hasWideArithmetic());
1950	assert(Op.getValueType() == MVT::i64);
1951	assert(Op.getOpcode() == ISD::UADDO);
1952	SDLoc DL(Op);
1953	SDValue LHS = Op.getOperand(i: `0`);
1954	SDValue RHS = Op.getOperand(i: `1`);
1955	SDValue Zero = DAG.getConstant(Val: `0`, DL, VT: MVT::i64);
1956	SDValue Result =
1957	DAG.getNode(Opcode: WebAssemblyISD::I64_ADD128, DL,
1958	VTList: DAG.getVTList(VT1: MVT::i64, VT2: MVT::i64), N1: LHS, N2: Zero, N3: RHS, N4: Zero);
1959	SDValue CarryI64(Result.getNode(), `1`);
1960	SDValue CarryI32 = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: MVT::i32, Operand: CarryI64);
1961	SDValue Ops[] = {Result, CarryI32};
1962	return DAG.getMergeValues(Ops, dl: DL);
1963	}
1964
1965	SDValue WebAssemblyTargetLowering::Replace128Op(SDNode *N,
1966	SelectionDAG &DAG) const {
1967	assert(Subtarget->hasWideArithmetic());
1968	assert(N->getValueType(`0`) == MVT::i128);
1969	SDLoc DL(N);
1970	unsigned Opcode;
1971	switch (N->getOpcode()) {
1972	case ISD::ADD:
1973	Opcode = WebAssemblyISD::I64_ADD128;
1974	break;
1975	case ISD::SUB:
1976	Opcode = WebAssemblyISD::I64_SUB128;
1977	break;
1978	default:
1979	llvm_unreachable("unexpected opcode");
1980	}
1981	SDValue LHS = N->getOperand(Num: `0`);
1982	SDValue RHS = N->getOperand(Num: `1`);
1983
1984	SDValue C0 = DAG.getConstant(Val: `0`, DL, VT: MVT::i64);
1985	SDValue C1 = DAG.getConstant(Val: `1`, DL, VT: MVT::i64);
1986	SDValue LHS_0 = DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL, VT: MVT::i64, N1: LHS, N2: C0);
1987	SDValue LHS_1 = DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL, VT: MVT::i64, N1: LHS, N2: C1);
1988	SDValue RHS_0 = DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL, VT: MVT::i64, N1: RHS, N2: C0);
1989	SDValue RHS_1 = DAG.getNode(Opcode: ISD::EXTRACT_ELEMENT, DL, VT: MVT::i64, N1: RHS, N2: C1);
1990	SDValue Result_LO = DAG.getNode(Opcode, DL, VTList: DAG.getVTList(VT1: MVT::i64, VT2: MVT::i64),
1991	N1: LHS_0, N2: LHS_1, N3: RHS_0, N4: RHS_1);
1992	SDValue Result_HI(Result_LO.getNode(), `1`);
1993	return DAG.getNode(Opcode: ISD::BUILD_PAIR, DL, VTList: N->getVTList(), N1: Result_LO, N2: Result_HI);
1994	}
1995
1996	SDValue WebAssemblyTargetLowering::LowerCopyToReg(SDValue Op,
1997	SelectionDAG &DAG) const {
1998	SDValue Src = Op.getOperand(i: `2`);
1999	if (isa<FrameIndexSDNode>(Val: Src.getNode())) {
2000	// CopyToReg nodes don't support FrameIndex operands. Other targets select
2001	// the FI to some LEA-like instruction, but since we don't have that, we
2002	// need to insert some kind of instruction that can take an FI operand and
2003	// produces a value usable by CopyToReg (i.e. in a vreg). So insert a dummy
2004	// local.copy between Op and its FI operand.
2005	SDValue Chain = Op.getOperand(i: `0`);
2006	SDLoc DL(Op);
2007	Register Reg = cast<RegisterSDNode>(Val: Op.getOperand(i: `1`))->getReg();
2008	EVT VT = Src.getValueType();
2009	SDValue Copy(DAG.getMachineNode(Opcode: VT == MVT::i32 ? WebAssembly::COPY_I32
2010	: WebAssembly::COPY_I64,
2011	dl: DL, VT, Op1: Src),
2012	`0`);
2013	return Op.getNode()->getNumValues() == `1`
2014	? DAG.getCopyToReg(Chain, dl: DL, Reg, N: Copy)
2015	: DAG.getCopyToReg(Chain, dl: DL, Reg, N: Copy,
2016	Glue: Op.getNumOperands() == `4` ? Op.getOperand(i: `3`)
2017	: SDValue ());
2018	}
2019	return SDValue ();
2020	}
2021
2022	SDValue WebAssemblyTargetLowering::LowerFrameIndex(SDValue Op,
2023	SelectionDAG &DAG) const {
2024	int FI = cast<FrameIndexSDNode>(Val&: Op)->getIndex();
2025	return DAG.getTargetFrameIndex(FI, VT: Op.getValueType());
2026	}
2027
2028	SDValue WebAssemblyTargetLowering::LowerRETURNADDR(SDValue Op,
2029	SelectionDAG &DAG) const {
2030	SDLoc DL(Op);
2031
2032	if (!Subtarget->getTargetTriple().isOSEmscripten()) {
2033	fail(DL, DAG,
2034	Msg: "Non-Emscripten WebAssembly hasn't implemented "
2035	"__builtin_return_address");
2036	return SDValue ();
2037	}
2038
2039	unsigned Depth = Op.getConstantOperandVal(i: `0`);
2040	MakeLibCallOptions CallOptions;
2041	return makeLibCall(DAG, LC: RTLIB::RETURN_ADDRESS, RetVT: Op.getValueType(),
2042	Ops: {DAG.getConstant(Val: Depth, DL, VT: MVT::i32)}, CallOptions, dl: DL)
2043	.first;
2044	}
2045
2046	SDValue WebAssemblyTargetLowering::LowerFRAMEADDR(SDValue Op,
2047	SelectionDAG &DAG) const {
2048	// Non-zero depths are not supported by WebAssembly currently. Use the
2049	// legalizer's default expansion, which is to return 0 (what this function is
2050	// documented to do).
2051	if (Op.getConstantOperandVal(i: `0`) > `0`)
2052	return SDValue ();
2053
2054	DAG.getMachineFunction().getFrameInfo().setFrameAddressIsTaken(true);
2055	EVT VT = Op.getValueType();
2056	Register FP =
2057	Subtarget->getRegisterInfo()->getFrameRegister(MF: DAG.getMachineFunction());
2058	return DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: SDLoc (Op), Reg: FP, VT);
2059	}
2060
2061	SDValue
2062	WebAssemblyTargetLowering::LowerGlobalTLSAddress(SDValue Op,
2063	SelectionDAG &DAG) const {
2064	SDLoc DL(Op);
2065	const auto *GA = cast<GlobalAddressSDNode>(Val&: Op);
2066
2067	MachineFunction &MF = DAG.getMachineFunction();
2068	if (!MF.getSubtarget<WebAssemblySubtarget>().hasBulkMemory())
2069	report_fatal_error(reason: "cannot use thread-local storage without bulk memory",
2070	gen_crash_diag: false);
2071
2072	const GlobalValue *GV = GA->getGlobal();
2073
2074	// Currently only Emscripten supports dynamic linking with threads. Therefore,
2075	// on other targets, if we have thread-local storage, only the local-exec
2076	// model is possible.
2077	auto model = Subtarget->getTargetTriple().isOSEmscripten()
2078	? GV->getThreadLocalMode()
2079	: GlobalValue::LocalExecTLSModel;
2080
2081	// Unsupported TLS modes
2082	assert(model != GlobalValue::NotThreadLocal);
2083	assert(model != GlobalValue::InitialExecTLSModel);
2084
2085	if (model == GlobalValue::LocalExecTLSModel \|\|
2086	model == GlobalValue::LocalDynamicTLSModel \|\|
2087	(model == GlobalValue::GeneralDynamicTLSModel &&
2088	getTargetMachine().shouldAssumeDSOLocal(GV))) {
2089	// For DSO-local TLS variables we use offset from __tls_base, or
2090	// __wasm_get_tls_base() if using libcall thread context.
2091
2092	MVT PtrVT = getPointerTy(DL: DAG.getDataLayout());
2093	SDValue BaseAddr(WebAssembly::getTLSBase(DAG, DL, Subtarget), `0`);
2094
2095	SDValue TLSOffset = DAG.getTargetGlobalAddress(
2096	GV, DL, VT: PtrVT, offset: GA->getOffset(), TargetFlags: WebAssemblyII::MO_TLS_BASE_REL);
2097	SDValue SymOffset =
2098	DAG.getNode(Opcode: WebAssemblyISD::WrapperREL, DL, VT: PtrVT, Operand: TLSOffset);
2099
2100	return DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: BaseAddr, N2: SymOffset);
2101	}
2102
2103	assert(model == GlobalValue::GeneralDynamicTLSModel);
2104
2105	EVT VT = Op.getValueType();
2106	return DAG.getNode(Opcode: WebAssemblyISD::Wrapper, DL, VT,
2107	Operand: DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL, VT,
2108	offset: GA->getOffset(),
2109	TargetFlags: WebAssemblyII::MO_GOT_TLS));
2110	}
2111
2112	SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op,
2113	SelectionDAG &DAG) const {
2114	SDLoc DL(Op);
2115	const auto *GA = cast<GlobalAddressSDNode>(Val&: Op);
2116	EVT VT = Op.getValueType();
2117	assert(GA->getTargetFlags() == `0` &&
2118	"Unexpected target flags on generic GlobalAddressSDNode");
2119	if (!WebAssembly::isValidAddressSpace(AS: GA->getAddressSpace()))
2120	fail(DL, DAG, Msg: "Invalid address space for WebAssembly target");
2121
2122	unsigned OperandFlags = `0`;
2123	const GlobalValue *GV = GA->getGlobal();
2124	// Since WebAssembly tables cannot yet be shared accross modules, we don't
2125	// need special treatment for tables in PIC mode.
2126	if (isPositionIndependent() &&
2127	!WebAssembly::isWebAssemblyTableType(Ty: GV->getValueType())) {
2128	if (getTargetMachine().shouldAssumeDSOLocal(GV)) {
2129	MachineFunction &MF = DAG.getMachineFunction();
2130	MVT PtrVT = getPointerTy(DL: MF.getDataLayout());
2131	const char *BaseName;
2132	if (GV->getValueType()->isFunctionTy()) {
2133	BaseName = MF.createExternalSymbolName(Name: "__table_base");
2134	OperandFlags = WebAssemblyII::MO_TABLE_BASE_REL;
2135	} else {
2136	BaseName = MF.createExternalSymbolName(Name: "__memory_base");
2137	OperandFlags = WebAssemblyII::MO_MEMORY_BASE_REL;
2138	}
2139	SDValue BaseAddr =
2140	DAG.getNode(Opcode: WebAssemblyISD::Wrapper, DL, VT: PtrVT,
2141	Operand: DAG.getTargetExternalSymbol(Sym: BaseName, VT: PtrVT));
2142
2143	SDValue SymAddr = DAG.getNode(
2144	Opcode: WebAssemblyISD::WrapperREL, DL, VT,
2145	Operand: DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL, VT, offset: GA->getOffset(),
2146	TargetFlags: OperandFlags));
2147
2148	return DAG.getNode(Opcode: ISD::ADD, DL, VT, N1: BaseAddr, N2: SymAddr);
2149	}
2150	OperandFlags = WebAssemblyII::MO_GOT;
2151	}
2152
2153	return DAG.getNode(Opcode: WebAssemblyISD::Wrapper, DL, VT,
2154	Operand: DAG.getTargetGlobalAddress(GV: GA->getGlobal(), DL, VT,
2155	offset: GA->getOffset(), TargetFlags: OperandFlags));
2156	}
2157
2158	SDValue
2159	WebAssemblyTargetLowering::LowerExternalSymbol(SDValue Op,
2160	SelectionDAG &DAG) const {
2161	SDLoc DL(Op);
2162	const auto *ES = cast<ExternalSymbolSDNode>(Val&: Op);
2163	EVT VT = Op.getValueType();
2164	assert(ES->getTargetFlags() == `0` &&
2165	"Unexpected target flags on generic ExternalSymbolSDNode");
2166	return DAG.getNode(Opcode: WebAssemblyISD::Wrapper, DL, VT,
2167	Operand: DAG.getTargetExternalSymbol(Sym: ES->getSymbol(), VT));
2168	}
2169
2170	SDValue WebAssemblyTargetLowering::LowerJumpTable(SDValue Op,
2171	SelectionDAG &DAG) const {
2172	// There's no need for a Wrapper node because we always incorporate a jump
2173	// table operand into a BR_TABLE instruction, rather than ever
2174	// materializing it in a register.
2175	const JumpTableSDNode *JT = cast<JumpTableSDNode>(Val&: Op);
2176	return DAG.getTargetJumpTable(JTI: JT->getIndex(), VT: Op.getValueType(),
2177	TargetFlags: JT->getTargetFlags());
2178	}
2179
2180	SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op,
2181	SelectionDAG &DAG) const {
2182	SDLoc DL(Op);
2183	SDValue Chain = Op.getOperand(i: `0`);
2184	const auto *JT = cast<JumpTableSDNode>(Val: Op.getOperand(i: `1`));
2185	SDValue Index = Op.getOperand(i: `2`);
2186	assert(JT->getTargetFlags() == `0` && "WebAssembly doesn't set target flags");
2187
2188	SmallVector<SDValue, `8`> Ops;
2189	Ops.push_back(Elt: Chain);
2190	Ops.push_back(Elt: Index);
2191
2192	MachineJumpTableInfo *MJTI = DAG.getMachineFunction().getJumpTableInfo();
2193	const auto &MBBs = MJTI->getJumpTables()[JT->getIndex()].MBBs;
2194
2195	// Add an operand for each case.
2196	for (auto *MBB : MBBs)
2197	Ops.push_back(Elt: DAG.getBasicBlock(MBB));
2198
2199	// Add the first MBB as a dummy default target for now. This will be replaced
2200	// with the proper default target (and the preceding range check eliminated)
2201	// if possible by WebAssemblyFixBrTableDefaults.
2202	Ops.push_back(Elt: DAG.getBasicBlock(MBB: *MBBs.begin()));
2203	return DAG.getNode(Opcode: WebAssemblyISD::BR_TABLE, DL, VT: MVT::Other, Ops);
2204	}
2205
2206	SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op,
2207	SelectionDAG &DAG) const {
2208	SDLoc DL(Op);
2209	EVT PtrVT = getPointerTy(DL: DAG.getMachineFunction().getDataLayout());
2210
2211	auto *MFI = DAG.getMachineFunction().getInfo<WebAssemblyFunctionInfo>();
2212	const Value *SV = cast<SrcValueSDNode>(Val: Op.getOperand(i: `2`))->getValue();
2213
2214	SDValue ArgN = DAG.getCopyFromReg(Chain: DAG.getEntryNode(), dl: DL,
2215	Reg: MFI->getVarargBufferVreg(), VT: PtrVT);
2216	return DAG.getStore(Chain: Op.getOperand(i: `0`), dl: DL, Val: ArgN, Ptr: Op.getOperand(i: `1`),
2217	PtrInfo: MachinePointerInfo (SV));
2218	}
2219
2220	SDValue WebAssemblyTargetLowering::LowerIntrinsic(SDValue Op,
2221	SelectionDAG &DAG) const {
2222	MachineFunction &MF = DAG.getMachineFunction();
2223	unsigned IntNo;
2224	switch (Op.getOpcode()) {
2225	case ISD::INTRINSIC_VOID:
2226	case ISD::INTRINSIC_W_CHAIN:
2227	IntNo = Op.getConstantOperandVal(i: `1`);
2228	break;
2229	case ISD::INTRINSIC_WO_CHAIN:
2230	IntNo = Op.getConstantOperandVal(i: `0`);
2231	break;
2232	default:
2233	llvm_unreachable("Invalid intrinsic");
2234	}
2235	SDLoc DL(Op);
2236
2237	switch (IntNo) {
2238	default:
2239	return SDValue (); // Don't custom lower most intrinsics.
2240
2241	case Intrinsic::wasm_lsda: {
2242	auto PtrVT = getPointerTy(DL: MF.getDataLayout());
2243	const char *SymName = MF.createExternalSymbolName(
2244	Name: "GCC_except_table" + std::to_string(val: MF.getFunctionNumber()));
2245	if (isPositionIndependent()) {
2246	SDValue Node = DAG.getTargetExternalSymbol(
2247	Sym: SymName, VT: PtrVT, TargetFlags: WebAssemblyII::MO_MEMORY_BASE_REL);
2248	const char *BaseName = MF.createExternalSymbolName(Name: "__memory_base");
2249	SDValue BaseAddr =
2250	DAG.getNode(Opcode: WebAssemblyISD::Wrapper, DL, VT: PtrVT,
2251	Operand: DAG.getTargetExternalSymbol(Sym: BaseName, VT: PtrVT));
2252	SDValue SymAddr =
2253	DAG.getNode(Opcode: WebAssemblyISD::WrapperREL, DL, VT: PtrVT, Operand: Node);
2254	return DAG.getNode(Opcode: ISD::ADD, DL, VT: PtrVT, N1: BaseAddr, N2: SymAddr);
2255	}
2256	SDValue Node = DAG.getTargetExternalSymbol(Sym: SymName, VT: PtrVT);
2257	return DAG.getNode(Opcode: WebAssemblyISD::Wrapper, DL, VT: PtrVT, Operand: Node);
2258	}
2259
2260	case Intrinsic::wasm_shuffle: {
2261	// Drop in-chain and replace undefs, but otherwise pass through unchanged
2262	SDValue Ops[`18`];
2263	size_t OpIdx = `0`;
2264	Ops[OpIdx++] = Op.getOperand(i: `1`);
2265	Ops[OpIdx++] = Op.getOperand(i: `2`);
2266	while (OpIdx < `18`) {
2267	const SDValue &MaskIdx = Op.getOperand(i: OpIdx + `1`);
2268	if (MaskIdx.isUndef() \|\| MaskIdx.getNode()->getAsZExtVal() >= `32`) {
2269	bool isTarget = MaskIdx.getNode()->getOpcode() == ISD::TargetConstant;
2270	Ops[OpIdx++] = DAG.getConstant(Val: `0`, DL, VT: MVT::i32, isTarget);
2271	} else {
2272	Ops[OpIdx++] = MaskIdx;
2273	}
2274	}
2275	return DAG.getNode(Opcode: WebAssemblyISD::SHUFFLE, DL, VT: Op.getValueType(), Ops);
2276	}
2277
2278	case Intrinsic::wasm_funcref_to_ptr: {
2279	// llvm.wasm.funcref.to_ptr only has a defined lowering when its result
2280	// feeds directly into an indirect call. Reaching here means the pointer
2281	// escapes a direct call. We haven't implemented conversion of a funcref
2282	// into a real function pointer so we crash if we get here.
2283	fail(DL, DAG,
2284	Msg: "a funcref can only be converted to a pointer to be directly called; "
2285	"the resulting pointer cannot otherwise be used");
2286	return DAG.getPOISON(VT: Op.getValueType());
2287	}
2288
2289	case Intrinsic::thread_pointer: {
2290	return SDValue (WebAssembly::getTLSBase(DAG, DL, Subtarget), `0`);
2291	}
2292	}
2293	}
2294
2295	SDValue
2296	WebAssemblyTargetLowering::LowerSIGN_EXTEND_INREG(SDValue Op,
2297	SelectionDAG &DAG) const {
2298	SDLoc DL(Op);
2299	// If sign extension operations are disabled, allow sext_inreg only if operand
2300	// is a vector extract of an i8 or i16 lane. SIMD does not depend on sign
2301	// extension operations, but allowing sext_inreg in this context lets us have
2302	// simple patterns to select extract_lane_s instructions. Expanding sext_inreg
2303	// everywhere would be simpler in this file, but would necessitate large and
2304	// brittle patterns to undo the expansion and select extract_lane_s
2305	// instructions.
2306	assert(!Subtarget->hasSignExt() && Subtarget->hasSIMD128());
2307	if (Op.getOperand(i: `0`).getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2308	return SDValue ();
2309
2310	const SDValue &Extract = Op.getOperand(i: `0`);
2311	MVT VecT = Extract.getOperand(i: `0`).getSimpleValueType();
2312	if (VecT.getVectorElementType().getSizeInBits() > `32`)
2313	return SDValue ();
2314	MVT ExtractedLaneT =
2315	cast<VTSDNode>(Val: Op.getOperand(i: `1`).getNode())->getVT().getSimpleVT();
2316	MVT ExtractedVecT =
2317	MVT::getVectorVT(VT: ExtractedLaneT, NumElements: `128` / ExtractedLaneT.getSizeInBits());
2318	if (ExtractedVecT == VecT)
2319	return Op;
2320
2321	// Bitcast vector to appropriate type to ensure ISel pattern coverage
2322	const SDNode *Index = Extract.getOperand(i: `1`).getNode();
2323	if (!isa<ConstantSDNode>(Val: Index))
2324	return SDValue ();
2325	unsigned IndexVal = Index->getAsZExtVal();
2326	unsigned Scale =
2327	ExtractedVecT.getVectorNumElements() / VecT.getVectorNumElements();
2328	assert(Scale > `1`);
2329	SDValue NewIndex =
2330	DAG.getConstant(Val: IndexVal * Scale, DL, VT: Index->getValueType(ResNo: `0`));
2331	SDValue NewExtract = DAG.getNode(
2332	Opcode: ISD::EXTRACT_VECTOR_ELT, DL, VT: Extract.getValueType(),
2333	N1: DAG.getBitcast(VT: ExtractedVecT, V: Extract.getOperand(i: `0`)), N2: NewIndex);
2334	return DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: Op.getValueType(), N1: NewExtract,
2335	N2: Op.getOperand(i: `1`));
2336	}
2337
2338	static SDValue GetExtendHigh(SDValue Op, unsigned UserOpc, EVT VT,
2339	SelectionDAG &DAG) {
2340	SDValue Source = peekThroughBitcasts(V: Op);
2341	if (Source.getOpcode() != ISD::VECTOR_SHUFFLE)
2342	return SDValue ();
2343
2344	assert((UserOpc == WebAssemblyISD::EXTEND_LOW_U \|\|
2345	UserOpc == WebAssemblyISD::EXTEND_LOW_S) &&
2346	"expected extend_low");
2347	auto *Shuffle = cast<ShuffleVectorSDNode>(Val: Source.getNode());
2348
2349	ArrayRef<int> Mask = Shuffle->getMask();
2350	// Look for a shuffle which moves from the high half to the low half.
2351	size_t FirstIdx = Mask.size() / `2`;
2352	for (size_t i = `0`; i < Mask.size() / `2`; ++i) {
2353	if (Mask [i] != static_cast<int>(FirstIdx + i)) {
2354	return SDValue ();
2355	}
2356	}
2357
2358	SDLoc DL(Op);
2359	unsigned Opc = UserOpc == WebAssemblyISD::EXTEND_LOW_S
2360	? WebAssemblyISD::EXTEND_HIGH_S
2361	: WebAssemblyISD::EXTEND_HIGH_U;
2362	SDValue ShuffleSrc = Shuffle->getOperand(Num: `0`);
2363	if (Op.getOpcode() == ISD::BITCAST)
2364	ShuffleSrc = DAG.getBitcast(VT: Op.getValueType(), V: ShuffleSrc);
2365
2366	return DAG.getNode(Opcode: Opc, DL, VT, Operand: ShuffleSrc);
2367	}
2368
2369	SDValue
2370	WebAssemblyTargetLowering::LowerEXTEND_VECTOR_INREG(SDValue Op,
2371	SelectionDAG &DAG) const {
2372	SDLoc DL(Op);
2373	EVT VT = Op.getValueType();
2374	SDValue Src = Op.getOperand(i: `0`);
2375	EVT SrcVT = Src.getValueType();
2376
2377	if (SrcVT.getVectorElementType() == MVT::i1 \|\|
2378	SrcVT.getVectorElementType() == MVT::i64)
2379	return SDValue ();
2380
2381	assert(VT.getScalarSizeInBits() % SrcVT.getScalarSizeInBits() == `0` &&
2382	"Unexpected extension factor.");
2383	unsigned Scale = VT.getScalarSizeInBits() / SrcVT.getScalarSizeInBits();
2384
2385	if (Scale != `2` && Scale != `4` && Scale != `8`)
2386	return SDValue ();
2387
2388	unsigned Ext;
2389	switch (Op.getOpcode()) {
2390	default:
2391	llvm_unreachable("unexpected opcode");
2392	case ISD::ANY_EXTEND_VECTOR_INREG:
2393	case ISD::ZERO_EXTEND_VECTOR_INREG:
2394	Ext = WebAssemblyISD::EXTEND_LOW_U;
2395	break;
2396	case ISD::SIGN_EXTEND_VECTOR_INREG:
2397	Ext = WebAssemblyISD::EXTEND_LOW_S;
2398	break;
2399	}
2400
2401	if (Scale == `2`) {
2402	// See if we can use EXTEND_HIGH.
2403	if (auto ExtendHigh = GetExtendHigh(Op: Op.getOperand(i: `0`), UserOpc: Ext, VT, DAG))
2404	return ExtendHigh;
2405	}
2406
2407	SDValue Ret = Src;
2408	while (Scale != `1`) {
2409	Ret = DAG.getNode(Opcode: Ext, DL,
2410	VT: Ret.getValueType()
2411	.widenIntegerVectorElementType(Context&: *DAG.getContext())
2412	.getHalfNumVectorElementsVT(Context&: *DAG.getContext()),
2413	Operand: Ret);
2414	Scale /= `2`;
2415	}
2416	assert(Ret.getValueType() == VT);
2417	return Ret;
2418	}
2419
2420	static SDValue LowerConvertLow(SDValue Op, SelectionDAG &DAG) {
2421	SDLoc DL(Op);
2422	if (Op.getValueType() != MVT::v2f64 && Op.getValueType() != MVT::v4f32)
2423	return SDValue ();
2424
2425	auto GetConvertedLane = [](SDValue Op, unsigned &Opcode, SDValue &SrcVec,
2426	unsigned &Index) -> bool {
2427	switch (Op.getOpcode()) {
2428	case ISD::SINT_TO_FP:
2429	Opcode = WebAssemblyISD::CONVERT_LOW_S;
2430	break;
2431	case ISD::UINT_TO_FP:
2432	Opcode = WebAssemblyISD::CONVERT_LOW_U;
2433	break;
2434	case ISD::FP_EXTEND:
2435	case ISD::FP16_TO_FP:
2436	Opcode = WebAssemblyISD::PROMOTE_LOW;
2437	break;
2438	default:
2439	return false;
2440	}
2441
2442	auto ExtractVector = Op.getOperand(i: `0`);
2443	if (ExtractVector.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2444	return false;
2445
2446	if (!isa<ConstantSDNode>(Val: ExtractVector.getOperand(i: `1`).getNode()))
2447	return false;
2448
2449	SrcVec = ExtractVector.getOperand(i: `0`);
2450	Index = ExtractVector.getConstantOperandVal(i: `1`);
2451	return true;
2452	};
2453
2454	unsigned NumLanes = Op.getValueType() == MVT::v2f64 ? `2` : `4`;
2455	unsigned FirstOpcode = `0`, SecondOpcode = `0`, ThirdOpcode = `0`, FourthOpcode = `0`;
2456	unsigned FirstIndex = `0`, SecondIndex = `0`, ThirdIndex = `0`, FourthIndex = `0`;
2457	SDValue FirstSrcVec, SecondSrcVec, ThirdSrcVec, FourthSrcVec;
2458
2459	if (!GetConvertedLane (Op.getOperand(i: `0`), FirstOpcode, FirstSrcVec,
2460	FirstIndex) \|\|
2461	!GetConvertedLane (Op.getOperand(i: `1`), SecondOpcode, SecondSrcVec,
2462	SecondIndex))
2463	return SDValue ();
2464
2465	// If we're converting to v4f32, check the third and fourth lanes, too.
2466	if (NumLanes == `4` && (!GetConvertedLane (Op.getOperand(i: `2`), ThirdOpcode,
2467	ThirdSrcVec, ThirdIndex) \|\|
2468	!GetConvertedLane (Op.getOperand(i: `3`), FourthOpcode,
2469	FourthSrcVec, FourthIndex)))
2470	return SDValue ();
2471
2472	if (FirstOpcode != SecondOpcode)
2473	return SDValue ();
2474
2475	// TODO Add an optimization similar to the v2f64 below for shuffling the
2476	// vectors when the lanes are in the wrong order or come from different src
2477	// vectors.
2478	if (NumLanes == `4` &&
2479	(FirstOpcode != ThirdOpcode \|\| FirstOpcode != FourthOpcode \|\|
2480	FirstSrcVec != SecondSrcVec \|\| FirstSrcVec != ThirdSrcVec \|\|
2481	FirstSrcVec != FourthSrcVec \|\| FirstIndex != `0` \|\| SecondIndex != `1` \|\|
2482	ThirdIndex != `2` \|\| FourthIndex != `3`))
2483	return SDValue ();
2484
2485	MVT ExpectedSrcVT;
2486	switch (FirstOpcode) {
2487	case WebAssemblyISD::CONVERT_LOW_S:
2488	case WebAssemblyISD::CONVERT_LOW_U:
2489	ExpectedSrcVT = MVT::v4i32;
2490	break;
2491	case WebAssemblyISD::PROMOTE_LOW:
2492	ExpectedSrcVT = NumLanes == `2` ? MVT::v4f32 : MVT::v8i16;
2493	break;
2494	}
2495	if (FirstSrcVec.getValueType() != ExpectedSrcVT)
2496	return SDValue ();
2497
2498	auto Src = FirstSrcVec;
2499	if (NumLanes == `2` &&
2500	(FirstIndex != `0` \|\| SecondIndex != `1` \|\| FirstSrcVec != SecondSrcVec)) {
2501	// Shuffle the source vector so that the converted lanes are the low lanes.
2502	Src = DAG.getVectorShuffle(VT: ExpectedSrcVT, dl: DL, N1: FirstSrcVec, N2: SecondSrcVec,
2503	Mask: {static_cast<int>(FirstIndex),
2504	static_cast<int>(SecondIndex) + `4`, -`1`, -`1`});
2505	}
2506	return DAG.getNode(Opcode: FirstOpcode, DL, VT: NumLanes == `2` ? MVT::v2f64 : MVT::v4f32,
2507	Operand: Src);
2508	}
2509
2510	SDValue WebAssemblyTargetLowering::LowerBUILD_VECTOR(SDValue Op,
2511	SelectionDAG &DAG) const {
2512	MVT VT = Op.getSimpleValueType();
2513	if (VT == MVT::v8f16) {
2514	// BUILD_VECTOR can't handle FP16 operands since Wasm doesn't have a scaler
2515	// FP16 type, so cast them to I16s.
2516	MVT IVT = VT.changeVectorElementType(EltVT: MVT::i16);
2517	SmallVector<SDValue, `8`> NewOps;
2518	for (unsigned I = `0`, E = Op.getNumOperands(); I < E; ++I)
2519	NewOps.push_back(Elt: DAG.getBitcast(VT: MVT::i16, V: Op.getOperand(i: I)));
2520	SDValue Res = DAG.getNode(Opcode: ISD::BUILD_VECTOR, DL: SDLoc (), VT: IVT, Ops: NewOps);
2521	return DAG.getBitcast(VT, V: Res);
2522	}
2523
2524	if (auto ConvertLow = LowerConvertLow(Op, DAG))
2525	return ConvertLow;
2526
2527	SDLoc DL(Op);
2528	const EVT VecT = Op.getValueType();
2529	const EVT LaneT = Op.getOperand(i: `0`).getValueType();
2530	const size_t Lanes = Op.getNumOperands();
2531	bool CanSwizzle = VecT == MVT::v16i8;
2532
2533	// BUILD_VECTORs are lowered to the instruction that initializes the highest
2534	// possible number of lanes at once followed by a sequence of replace_lane
2535	// instructions to individually initialize any remaining lanes.
2536
2537	// TODO: Tune this. For example, lanewise swizzling is very expensive, so
2538	// swizzled lanes should be given greater weight.
2539
2540	// TODO: Investigate looping rather than always extracting/replacing specific
2541	// lanes to fill gaps.
2542
2543	auto IsConstant = [](const SDValue &V) {
2544	return V.getOpcode() == ISD::Constant \|\| V.getOpcode() == ISD::ConstantFP;
2545	};
2546
2547	// Returns the source vector and index vector pair if they exist. Checks for:
2548	// (extract_vector_elt
2549	// $src,
2550	// (sign_extend_inreg (extract_vector_elt $indices, $i))
2551	// )
2552	auto GetSwizzleSrcs = [](size_t I, const SDValue &Lane) {
2553	auto Bail = std::make_pair(x: SDValue (), y: SDValue ());
2554	if (Lane ->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2555	return Bail;
2556	const SDValue &SwizzleSrc = Lane ->getOperand(Num: `0`);
2557	const SDValue &IndexExt = Lane ->getOperand(Num: `1`);
2558	if (IndexExt ->getOpcode() != ISD::SIGN_EXTEND_INREG)
2559	return Bail;
2560	const SDValue &Index = IndexExt ->getOperand(Num: `0`);
2561	if (Index ->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2562	return Bail;
2563	const SDValue &SwizzleIndices = Index ->getOperand(Num: `0`);
2564	if (SwizzleSrc.getValueType() != MVT::v16i8 \|\|
2565	SwizzleIndices.getValueType() != MVT::v16i8 \|\|
2566	Index ->getOperand(Num: `1`)->getOpcode() != ISD::Constant \|\|
2567	Index ->getConstantOperandVal(Num: `1`) != I)
2568	return Bail;
2569	return std::make_pair(x: SwizzleSrc, y: SwizzleIndices);
2570	};
2571
2572	// If the lane is extracted from another vector at a constant index, return
2573	// that vector. The source vector must not have more lanes than the dest
2574	// because the shufflevector indices are in terms of the destination lanes and
2575	// would not be able to address the smaller individual source lanes.
2576	auto GetShuffleSrc = [&](const SDValue &Lane) {
2577	if (Lane ->getOpcode() != ISD::EXTRACT_VECTOR_ELT)
2578	return SDValue ();
2579	if (!isa<ConstantSDNode>(Val: Lane ->getOperand(Num: `1`).getNode()))
2580	return SDValue ();
2581	if (Lane ->getOperand(Num: `0`).getValueType().getVectorNumElements() >
2582	VecT.getVectorNumElements())
2583	return SDValue ();
2584	return Lane ->getOperand(Num: `0`);
2585	};
2586
2587	using ValueEntry = std::pair<SDValue, size_t>;
2588	SmallVector<ValueEntry, `16`> SplatValueCounts;
2589
2590	using SwizzleEntry = std::pair<std::pair<SDValue, SDValue>, size_t>;
2591	SmallVector<SwizzleEntry, `16`> SwizzleCounts;
2592
2593	using ShuffleEntry = std::pair<SDValue, size_t>;
2594	SmallVector<ShuffleEntry, `16`> ShuffleCounts;
2595
2596	auto AddCount = [](auto &Counts, const auto &Val) {
2597	auto CountIt =
2598	llvm::find_if(Counts, [&Val](auto E) { return E.first == Val; });
2599	if (CountIt == Counts.end()) {
2600	Counts.emplace_back(Val, `1`);
2601	} else {
2602	CountIt->second++;
2603	}
2604	};
2605
2606	auto GetMostCommon = [](auto &Counts) {
2607	auto CommonIt = llvm::max_element(Counts, llvm::less_second());
2608	assert(CommonIt != Counts.end() && "Unexpected all-undef build_vector");
2609	return *CommonIt;
2610	};
2611
2612	size_t NumConstantLanes = `0`;
2613
2614	// Count eligible lanes for each type of vector creation op
2615	for (size_t I = `0`; I < Lanes; ++I) {
2616	const SDValue &Lane = Op ->getOperand(Num: I);
2617	if (Lane.isUndef())
2618	continue;
2619
2620	AddCount (SplatValueCounts, Lane);
2621
2622	if (IsConstant (Lane))
2623	NumConstantLanes++;
2624	if (auto ShuffleSrc = GetShuffleSrc (Lane))
2625	AddCount (ShuffleCounts, ShuffleSrc);
2626	if (CanSwizzle) {
2627	auto SwizzleSrcs = GetSwizzleSrcs (I, Lane);
2628	if (SwizzleSrcs.first)
2629	AddCount (SwizzleCounts, SwizzleSrcs);
2630	}
2631	}
2632
2633	SDValue SplatValue;
2634	size_t NumSplatLanes;
2635	std::tie(args&: SplatValue, args&: NumSplatLanes) = GetMostCommon (SplatValueCounts);
2636
2637	SDValue SwizzleSrc;
2638	SDValue SwizzleIndices;
2639	size_t NumSwizzleLanes = `0`;
2640	if (SwizzleCounts.size())
2641	std::forward_as_tuple(args: std::tie(args&: SwizzleSrc, args&: SwizzleIndices),
2642	args&: NumSwizzleLanes) = GetMostCommon (SwizzleCounts);
2643
2644	// Shuffles can draw from up to two vectors, so find the two most common
2645	// sources.
2646	SDValue ShuffleSrc1, ShuffleSrc2;
2647	size_t NumShuffleLanes = `0`;
2648	if (ShuffleCounts.size()) {
2649	std::tie(args&: ShuffleSrc1, args&: NumShuffleLanes) = GetMostCommon (ShuffleCounts);
2650	llvm::erase_if(C&: ShuffleCounts,
2651	P: [&](const auto &Pair) { return Pair.first == ShuffleSrc1; });
2652	}
2653	if (ShuffleCounts.size()) {
2654	size_t AdditionalShuffleLanes;
2655	std::tie(args&: ShuffleSrc2, args&: AdditionalShuffleLanes) =
2656	GetMostCommon (ShuffleCounts);
2657	NumShuffleLanes += AdditionalShuffleLanes;
2658	}
2659
2660	// Predicate returning true if the lane is properly initialized by the
2661	// original instruction
2662	std::function<bool(size_t, const SDValue &)> IsLaneConstructed;
2663	SDValue Result;
2664	// Prefer swizzles over shuffles over vector consts over splats
2665	if (NumSwizzleLanes >= NumShuffleLanes &&
2666	NumSwizzleLanes >= NumConstantLanes && NumSwizzleLanes >= NumSplatLanes) {
2667	Result = DAG.getNode(Opcode: WebAssemblyISD::SWIZZLE, DL, VT: VecT, N1: SwizzleSrc,
2668	N2: SwizzleIndices);
2669	auto Swizzled = std::make_pair(x&: SwizzleSrc, y&: SwizzleIndices);
2670	IsLaneConstructed = [&, Swizzled](size_t I, const SDValue &Lane) {
2671	return Swizzled == GetSwizzleSrcs (I, Lane);
2672	};
2673	} else if (NumShuffleLanes >= NumConstantLanes &&
2674	NumShuffleLanes >= NumSplatLanes) {
2675	size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits() / `8`;
2676	size_t DestLaneCount = VecT.getVectorNumElements();
2677	size_t Scale1 = `1`;
2678	size_t Scale2 = `1`;
2679	SDValue Src1 = ShuffleSrc1;
2680	SDValue Src2 = ShuffleSrc2 ? ShuffleSrc2 : DAG.getUNDEF(VT: VecT);
2681	if (Src1.getValueType() != VecT) {
2682	size_t LaneSize =
2683	Src1.getValueType().getVectorElementType().getFixedSizeInBits() / `8`;
2684	assert(LaneSize > DestLaneSize);
2685	Scale1 = LaneSize / DestLaneSize;
2686	Src1 = DAG.getBitcast(VT: VecT, V: Src1);
2687	}
2688	if (Src2.getValueType() != VecT) {
2689	size_t LaneSize =
2690	Src2.getValueType().getVectorElementType().getFixedSizeInBits() / `8`;
2691	assert(LaneSize > DestLaneSize);
2692	Scale2 = LaneSize / DestLaneSize;
2693	Src2 = DAG.getBitcast(VT: VecT, V: Src2);
2694	}
2695
2696	int Mask[`16`];
2697	assert(DestLaneCount <= `16`);
2698	for (size_t I = `0`; I < DestLaneCount; ++I) {
2699	const SDValue &Lane = Op ->getOperand(Num: I);
2700	SDValue Src = GetShuffleSrc (Lane);
2701	if (Src == ShuffleSrc1) {
2702	Mask[I] = Lane ->getConstantOperandVal(Num: `1`) * Scale1;
2703	} else if (Src && Src == ShuffleSrc2) {
2704	Mask[I] = DestLaneCount + Lane ->getConstantOperandVal(Num: `1`) * Scale2;
2705	} else {
2706	Mask[I] = -`1`;
2707	}
2708	}
2709	ArrayRef<int> MaskRef(Mask, DestLaneCount);
2710	Result = DAG.getVectorShuffle(VT: VecT, dl: DL, N1: Src1, N2: Src2, Mask: MaskRef);
2711	IsLaneConstructed = [&](size_t, const SDValue &Lane) {
2712	auto Src = GetShuffleSrc (Lane);
2713	return Src == ShuffleSrc1 \|\| (Src && Src == ShuffleSrc2);
2714	};
2715	} else if (NumConstantLanes >= NumSplatLanes) {
2716	SmallVector<SDValue, `16`> ConstLanes;
2717	for (const SDValue &Lane : Op ->op_values()) {
2718	if (IsConstant (Lane)) {
2719	// Values may need to be fixed so that they will sign extend to be
2720	// within the expected range during ISel. Check whether the value is in
2721	// bounds based on the lane bit width and if it is out of bounds, lop
2722	// off the extra bits.
2723	uint64_t LaneBits = `128` / Lanes;
2724	if (auto *Const = dyn_cast<ConstantSDNode>(Val: Lane.getNode())) {
2725	ConstLanes.push_back(Elt: DAG.getConstant(
2726	Val: Const->getAPIntValue().trunc(width: LaneBits).getZExtValue(),
2727	DL: SDLoc (Lane), VT: LaneT));
2728	} else {
2729	ConstLanes.push_back(Elt: Lane);
2730	}
2731	} else if (LaneT.isFloatingPoint()) {
2732	ConstLanes.push_back(Elt: DAG.getConstantFP(Val: `0`, DL, VT: LaneT));
2733	} else {
2734	ConstLanes.push_back(Elt: DAG.getConstant(Val: `0`, DL, VT: LaneT));
2735	}
2736	}
2737	Result = DAG.getBuildVector(VT: VecT, DL, Ops: ConstLanes);
2738	IsLaneConstructed = [&IsConstant](size_t _, const SDValue &Lane) {
2739	return IsConstant (Lane);
2740	};
2741	} else {
2742	size_t DestLaneSize = VecT.getVectorElementType().getFixedSizeInBits();
2743	if (NumSplatLanes == `1` && Op ->getOperand(Num: `0`) == SplatValue &&
2744	(DestLaneSize == `32` \|\| DestLaneSize == `64`)) {
2745	// Could be selected to load_zero.
2746	Result = DAG.getNode(Opcode: ISD::SCALAR_TO_VECTOR, DL, VT: VecT, Operand: SplatValue);
2747	} else {
2748	// Use a splat (which might be selected as a load splat)
2749	Result = DAG.getSplatBuildVector(VT: VecT, DL, Op: SplatValue);
2750	}
2751	IsLaneConstructed = [&SplatValue](size_t _, const SDValue &Lane) {
2752	return Lane == SplatValue;
2753	};
2754	}
2755
2756	assert(Result);
2757	assert(IsLaneConstructed);
2758
2759	// Add replace_lane instructions for any unhandled values
2760	for (size_t I = `0`; I < Lanes; ++I) {
2761	const SDValue &Lane = Op ->getOperand(Num: I);
2762	if (!Lane.isUndef() && !IsLaneConstructed (I, Lane))
2763	Result = DAG.getNode(Opcode: ISD::INSERT_VECTOR_ELT, DL, VT: VecT, N1: Result, N2: Lane,
2764	N3: DAG.getConstant(Val: I, DL, VT: MVT::i32));
2765	}
2766
2767	return Result;
2768	}
2769
2770	SDValue
2771	WebAssemblyTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
2772	SelectionDAG &DAG) const {
2773	SDLoc DL(Op);
2774	ArrayRef<int> Mask = cast<ShuffleVectorSDNode>(Val: Op.getNode())->getMask();
2775	MVT VecType = Op.getOperand(i: `0`).getSimpleValueType();
2776	assert(VecType.is128BitVector() && "Unexpected shuffle vector type");
2777	size_t LaneBytes = VecType.getVectorElementType().getSizeInBits() / `8`;
2778
2779	// Space for two vector args and sixteen mask indices
2780	SDValue Ops[`18`];
2781	size_t OpIdx = `0`;
2782	Ops[OpIdx++] = Op.getOperand(i: `0`);
2783	Ops[OpIdx++] = Op.getOperand(i: `1`);
2784
2785	// Expand mask indices to byte indices and materialize them as operands
2786	for (int M : Mask) {
2787	for (size_t J = `0`; J < LaneBytes; ++J) {
2788	// Lower undefs (represented by -1 in mask) to {0..J}, which use a
2789	// whole lane of vector input, to allow further reduction at VM. E.g.
2790	// match an 8x16 byte shuffle to an equivalent cheaper 32x4 shuffle.
2791	uint64_t ByteIndex = M == -`1` ? J : (uint64_t)M * LaneBytes + J;
2792	Ops[OpIdx++] = DAG.getConstant(Val: ByteIndex, DL, VT: MVT::i32);
2793	}
2794	}
2795
2796	return DAG.getNode(Opcode: WebAssemblyISD::SHUFFLE, DL, VT: Op.getValueType(), Ops);
2797	}
2798
2799	SDValue WebAssemblyTargetLowering::LowerSETCC(SDValue Op,
2800	SelectionDAG &DAG) const {
2801	SDLoc DL(Op);
2802	// The legalizer does not know how to expand the unsupported comparison modes
2803	// of i64x2 vectors, so we manually unroll them here.
2804	assert(Op->getOperand(`0`)->getSimpleValueType(`0`) == MVT::v2i64);
2805	SmallVector<SDValue, `2`> LHS, RHS;
2806	DAG.ExtractVectorElements(Op: Op ->getOperand(Num: `0`), Args&: LHS);
2807	DAG.ExtractVectorElements(Op: Op ->getOperand(Num: `1`), Args&: RHS);
2808	const SDValue &CC = Op ->getOperand(Num: `2`);
2809	auto MakeLane = [&](unsigned I) {
2810	return DAG.getNode(Opcode: ISD::SELECT_CC, DL, VT: MVT::i64, N1: LHS [I], N2: RHS [I],
2811	N3: DAG.getConstant(Val: uint64_t(-`1`), DL, VT: MVT::i64),
2812	N4: DAG.getConstant(Val: uint64_t(`0`), DL, VT: MVT::i64), N5: CC);
2813	};
2814	return DAG.getBuildVector(VT: Op ->getValueType(ResNo: `0`), DL,
2815	Ops: {MakeLane (`0`), MakeLane (`1`)});
2816	}
2817
2818	SDValue
2819	WebAssemblyTargetLowering::LowerAccessVectorElement(SDValue Op,
2820	SelectionDAG &DAG) const {
2821	// Allow constant lane indices, expand variable lane indices
2822	SDNode *IdxNode = Op.getOperand(i: Op.getNumOperands() - `1`).getNode();
2823	if (isa<ConstantSDNode>(Val: IdxNode)) {
2824	// Ensure the index type is i32 to match the tablegen patterns
2825	uint64_t Idx = IdxNode->getAsZExtVal();
2826	SmallVector<SDValue, `3`> Ops(Op.getNode()->ops());
2827	Ops [Op.getNumOperands() - `1`] =
2828	DAG.getConstant(Val: Idx, DL: SDLoc (IdxNode), VT: MVT::i32);
2829	return DAG.getNode(Opcode: Op.getOpcode(), DL: SDLoc (Op), VT: Op.getValueType(), Ops);
2830	}
2831	// Perform default expansion
2832	return SDValue ();
2833	}
2834
2835	static SDValue unrollVectorShift(SDValue Op, SelectionDAG &DAG) {
2836	EVT LaneT = Op.getSimpleValueType().getVectorElementType();
2837	// 32-bit and 64-bit unrolled shifts will have proper semantics
2838	if (LaneT.bitsGE(VT: MVT::i32))
2839	return DAG.UnrollVectorOp(N: Op.getNode());
2840	// Otherwise mask the shift value to get proper semantics from 32-bit shift
2841	SDLoc DL(Op);
2842	size_t NumLanes = Op.getSimpleValueType().getVectorNumElements();
2843	SDValue Mask = DAG.getConstant(Val: LaneT.getSizeInBits() - `1`, DL, VT: MVT::i32);
2844	unsigned ShiftOpcode = Op.getOpcode();
2845	SmallVector<SDValue, `16`> ShiftedElements;
2846	DAG.ExtractVectorElements(Op: Op.getOperand(i: `0`), Args&: ShiftedElements, Start: `0`, Count: `0`, EltVT: MVT::i32);
2847	SmallVector<SDValue, `16`> ShiftElements;
2848	DAG.ExtractVectorElements(Op: Op.getOperand(i: `1`), Args&: ShiftElements, Start: `0`, Count: `0`, EltVT: MVT::i32);
2849	SmallVector<SDValue, `16`> UnrolledOps;
2850	for (size_t i = `0`; i < NumLanes; ++i) {
2851	SDValue MaskedShiftValue =
2852	DAG.getNode(Opcode: ISD::AND, DL, VT: MVT::i32, N1: ShiftElements [i], N2: Mask);
2853	SDValue ShiftedValue = ShiftedElements [i];
2854	if (ShiftOpcode == ISD::SRA)
2855	ShiftedValue = DAG.getNode(Opcode: ISD::SIGN_EXTEND_INREG, DL, VT: MVT::i32,
2856	N1: ShiftedValue, N2: DAG.getValueType(LaneT));
2857	UnrolledOps.push_back(
2858	Elt: DAG.getNode(Opcode: ShiftOpcode, DL, VT: MVT::i32, N1: ShiftedValue, N2: MaskedShiftValue));
2859	}
2860	return DAG.getBuildVector(VT: Op.getValueType(), DL, Ops: UnrolledOps);
2861	}
2862
2863	SDValue WebAssemblyTargetLowering::LowerShift(SDValue Op,
2864	SelectionDAG &DAG) const {
2865	SDLoc DL(Op);
2866	// Only manually lower vector shifts
2867	assert(Op.getSimpleValueType().isVector());
2868
2869	uint64_t LaneBits = Op.getValueType().getScalarSizeInBits();
2870	auto ShiftVal = Op.getOperand(i: `1`);
2871
2872	// Try to skip bitmask operation since it is implied inside shift instruction
2873	auto SkipImpliedMask = [](SDValue MaskOp, uint64_t MaskBits) {
2874	if (MaskOp.getOpcode() != ISD::AND)
2875	return MaskOp;
2876	SDValue LHS = MaskOp.getOperand(i: `0`);
2877	SDValue RHS = MaskOp.getOperand(i: `1`);
2878	if (MaskOp.getValueType().isVector()) {
2879	APInt MaskVal;
2880	if (!ISD::isConstantSplatVector(N: RHS.getNode(), SplatValue&: MaskVal))
2881	std::swap(a&: LHS, b&: RHS);
2882
2883	if (ISD::isConstantSplatVector(N: RHS.getNode(), SplatValue&: MaskVal) &&
2884	MaskVal == MaskBits)
2885	MaskOp = LHS;
2886	} else {
2887	if (!isa<ConstantSDNode>(Val: RHS.getNode()))
2888	std::swap(a&: LHS, b&: RHS);
2889
2890	auto ConstantRHS = dyn_cast<ConstantSDNode>(Val: RHS.getNode());
2891	if (ConstantRHS && ConstantRHS->getAPIntValue() == MaskBits)
2892	MaskOp = LHS;
2893	}
2894
2895	return MaskOp;
2896	};
2897
2898	// Skip vector and operation
2899	ShiftVal = SkipImpliedMask (ShiftVal, LaneBits - `1`);
2900	ShiftVal = DAG.getSplatValue(V: ShiftVal);
2901	if (!ShiftVal)
2902	return unrollVectorShift(Op, DAG);
2903
2904	// Skip scalar and operation
2905	ShiftVal = SkipImpliedMask (ShiftVal, LaneBits - `1`);
2906	// Use anyext because none of the high bits can affect the shift
2907	ShiftVal = DAG.getAnyExtOrTrunc(Op: ShiftVal, DL, VT: MVT::i32);
2908
2909	unsigned Opcode;
2910	switch (Op.getOpcode()) {
2911	case ISD::SHL:
2912	Opcode = WebAssemblyISD::VEC_SHL;
2913	break;
2914	case ISD::SRA:
2915	Opcode = WebAssemblyISD::VEC_SHR_S;
2916	break;
2917	case ISD::SRL:
2918	Opcode = WebAssemblyISD::VEC_SHR_U;
2919	break;
2920	default:
2921	llvm_unreachable("unexpected opcode");
2922	}
2923
2924	return DAG.getNode(Opcode, DL, VT: Op.getValueType(), N1: Op.getOperand(i: `0`), N2: ShiftVal);
2925	}
2926
2927	SDValue WebAssemblyTargetLowering::LowerFP_TO_INT_SAT(SDValue Op,
2928	SelectionDAG &DAG) const {
2929	EVT ResT = Op.getValueType();
2930	EVT SatVT = cast<VTSDNode>(Val: Op.getOperand(i: `1`))->getVT();
2931
2932	if ((ResT == MVT::i32 \|\| ResT == MVT::i64) &&
2933	(SatVT == MVT::i32 \|\| SatVT == MVT::i64))
2934	return Op;
2935
2936	if (ResT == MVT::v4i32 && SatVT == MVT::i32)
2937	return Op;
2938
2939	if (ResT == MVT::v8i16 && SatVT == MVT::i16)
2940	return Op;
2941
2942	return SDValue ();
2943	}
2944
2945	static bool HasNoSignedZerosOrNaNs(SDValue Op, SelectionDAG &DAG) {
2946	return (Op ->getFlags().hasNoNaNs() \|\|
2947	(DAG.isKnownNeverNaN(Op: Op ->getOperand(Num: `0`)) &&
2948	DAG.isKnownNeverNaN(Op: Op ->getOperand(Num: `1`)))) &&
2949	(Op ->getFlags().hasNoSignedZeros() \|\|
2950	DAG.isKnownNeverLogicalZero(Op: Op ->getOperand(Num: `0`)) \|\|
2951	DAG.isKnownNeverLogicalZero(Op: Op ->getOperand(Num: `1`)));
2952	}
2953
2954	SDValue WebAssemblyTargetLowering::LowerFMIN(SDValue Op,
2955	SelectionDAG &DAG) const {
2956	if (Subtarget->hasRelaxedSIMD() && HasNoSignedZerosOrNaNs(Op, DAG)) {
2957	return DAG.getNode(Opcode: WebAssemblyISD::RELAXED_FMIN, DL: SDLoc (Op),
2958	VT: Op.getValueType(), N1: Op.getOperand(i: `0`), N2: Op.getOperand(i: `1`));
2959	}
2960	return SDValue ();
2961	}
2962
2963	SDValue WebAssemblyTargetLowering::LowerFMAX(SDValue Op,
2964	SelectionDAG &DAG) const {
2965	if (Subtarget->hasRelaxedSIMD() && HasNoSignedZerosOrNaNs(Op, DAG)) {
2966	return DAG.getNode(Opcode: WebAssemblyISD::RELAXED_FMAX, DL: SDLoc (Op),
2967	VT: Op.getValueType(), N1: Op.getOperand(i: `0`), N2: Op.getOperand(i: `1`));
2968	}
2969	return SDValue ();
2970	}
2971
2972	//===----------------------------------------------------------------------===//
2973	// Custom DAG combine hooks
2974	//===----------------------------------------------------------------------===//
2975	static SDValue
2976	performVECTOR_SHUFFLECombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
2977	auto &DAG = DCI.DAG;
2978	auto Shuffle = cast<ShuffleVectorSDNode>(Val: N);
2979
2980	// Hoist vector bitcasts that don't change the number of lanes out of unary
2981	// shuffles, where they are less likely to get in the way of other combines.
2982	// (shuffle (vNxT1 (bitcast (vNxT0 x))), undef, mask) ->
2983	// (vNxT1 (bitcast (vNxT0 (shuffle x, undef, mask))))
2984	SDValue Bitcast = N->getOperand(Num: `0`);
2985	if (Bitcast.getOpcode() != ISD::BITCAST)
2986	return SDValue ();
2987	if (!N->getOperand(Num: `1`).isUndef())
2988	return SDValue ();
2989	SDValue CastOp = Bitcast.getOperand(i: `0`);
2990	EVT SrcType = CastOp.getValueType();
2991	EVT DstType = Bitcast.getValueType();
2992	if (!SrcType.is128BitVector() \|\|
2993	SrcType.getVectorNumElements() != DstType.getVectorNumElements())
2994	return SDValue ();
2995	SDValue NewShuffle = DAG.getVectorShuffle(
2996	VT: SrcType, dl: SDLoc (N), N1: CastOp, N2: DAG.getUNDEF(VT: SrcType), Mask: Shuffle->getMask());
2997	return DAG.getBitcast(VT: DstType, V: NewShuffle);
2998	}
2999
3000	/// Convert ({u,s}itofp vec) --> ({u,s}itofp ({s,z}ext vec)) so it doesn't get
3001	/// split up into scalar instructions during legalization, and the vector
3002	/// extending instructions are selected in performVectorExtendCombine below.
3003	static SDValue
3004	performVectorExtendToFPCombine(SDNode *N,
3005	TargetLowering::DAGCombinerInfo &DCI) {
3006	auto &DAG = DCI.DAG;
3007	assert(N->getOpcode() == ISD::UINT_TO_FP \|\|
3008	N->getOpcode() == ISD::SINT_TO_FP);
3009
3010	EVT InVT = N->getOperand(Num: `0`)->getValueType(ResNo: `0`);
3011	EVT ResVT = N->getValueType(ResNo: `0`);
3012	MVT ExtVT;
3013	if (ResVT == MVT::v4f32 && (InVT == MVT::v4i16 \|\| InVT == MVT::v4i8))
3014	ExtVT = MVT::v4i32;
3015	else if (ResVT == MVT::v2f64 && (InVT == MVT::v2i16 \|\| InVT == MVT::v2i8))
3016	ExtVT = MVT::v2i32;
3017	else
3018	return SDValue ();
3019
3020	unsigned Op =
3021	N->getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : ISD::SIGN_EXTEND;
3022	SDValue Conv = DAG.getNode(Opcode: Op, DL: SDLoc (N), VT: ExtVT, Operand: N->getOperand(Num: `0`));
3023	return DAG.getNode(Opcode: N->getOpcode(), DL: SDLoc (N), VT: ResVT, Operand: Conv);
3024	}
3025
3026	static SDValue
3027	performVectorNonNegToFPCombine(SDNode *N,
3028	TargetLowering::DAGCombinerInfo &DCI) {
3029	auto &DAG = DCI.DAG;
3030
3031	SDNodeFlags Flags = N->getFlags();
3032	SDValue Op0 = N->getOperand(Num: `0`);
3033	EVT VT = N->getValueType(ResNo: `0`);
3034
3035	// Optimize uitofp to sitofp when the sign bit is known to be zero.
3036	// Depending on the target (runtime) backend, this might be performance
3037	// neutral (e.g. AArch64) or a significant improvement (e.g. x86_64).
3038	if (VT.isVector() && (Flags.hasNonNeg() \|\| DAG.SignBitIsZero(Op: Op0))) {
3039	return DAG.getNode(Opcode: ISD::SINT_TO_FP, DL: SDLoc (N), VT, Operand: Op0);
3040	}
3041
3042	return SDValue ();
3043	}
3044
3045	static SDValue
3046	performVectorExtendCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
3047	auto &DAG = DCI.DAG;
3048	assert(N->getOpcode() == ISD::SIGN_EXTEND \|\|
3049	N->getOpcode() == ISD::ZERO_EXTEND);
3050
3051	EVT ResVT = N->getValueType(ResNo: `0`);
3052	bool IsSext = N->getOpcode() == ISD::SIGN_EXTEND;
3053	SDLoc DL(N);
3054
3055	if (ResVT == MVT::v16i32 && N->getOperand(Num: `0`)->getValueType(ResNo: `0`) == MVT::v16i8) {
3056	// Use a tree of extend low/high to split and extend the input in two
3057	// layers to avoid doing several shuffles and even more extends.
3058	unsigned LowOp =
3059	IsSext ? WebAssemblyISD::EXTEND_LOW_S : WebAssemblyISD::EXTEND_LOW_U;
3060	unsigned HighOp =
3061	IsSext ? WebAssemblyISD::EXTEND_HIGH_S : WebAssemblyISD::EXTEND_HIGH_U;
3062	SDValue Input = N->getOperand(Num: `0`);
3063	SDValue LowHalf = DAG.getNode(Opcode: LowOp, DL, VT: MVT::v8i16, Operand: Input);
3064	SDValue HighHalf = DAG.getNode(Opcode: HighOp, DL, VT: MVT::v8i16, Operand: Input);
3065	SDValue Subvectors[] = {
3066	DAG.getNode(Opcode: LowOp, DL, VT: MVT::v4i32, Operand: LowHalf),
3067	DAG.getNode(Opcode: HighOp, DL, VT: MVT::v4i32, Operand: LowHalf),
3068	DAG.getNode(Opcode: LowOp, DL, VT: MVT::v4i32, Operand: HighHalf),
3069	DAG.getNode(Opcode: HighOp, DL, VT: MVT::v4i32, Operand: HighHalf),
3070	};
3071	return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: ResVT, Ops: Subvectors);
3072	}
3073
3074	// Combine ({s,z}ext (extract_subvector src, i)) into a widening operation if
3075	// possible before the extract_subvector can be expanded.
3076	auto Extract = N->getOperand(Num: `0`);
3077	if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
3078	return SDValue ();
3079	auto Source = Extract.getOperand(i: `0`);
3080	auto *IndexNode = dyn_cast<ConstantSDNode>(Val: Extract.getOperand(i: `1`));
3081	if (IndexNode == nullptr)
3082	return SDValue ();
3083	auto Index = IndexNode->getZExtValue();
3084
3085	// Only v8i8, v4i16, and v2i32 extracts can be widened, and only if the
3086	// extracted subvector is the low or high half of its source.
3087	if (ResVT == MVT::v8i16) {
3088	if (Extract.getValueType() != MVT::v8i8 \|\|
3089	Source.getValueType() != MVT::v16i8 \|\| (Index != `0` && Index != `8`))
3090	return SDValue ();
3091	} else if (ResVT == MVT::v4i32) {
3092	if (Extract.getValueType() != MVT::v4i16 \|\|
3093	Source.getValueType() != MVT::v8i16 \|\| (Index != `0` && Index != `4`))
3094	return SDValue ();
3095	} else if (ResVT == MVT::v2i64) {
3096	if (Extract.getValueType() != MVT::v2i32 \|\|
3097	Source.getValueType() != MVT::v4i32 \|\| (Index != `0` && Index != `2`))
3098	return SDValue ();
3099	} else {
3100	return SDValue ();
3101	}
3102
3103	bool IsLow = Index == `0`;
3104
3105	unsigned Op = IsSext ? (IsLow ? WebAssemblyISD::EXTEND_LOW_S
3106	: WebAssemblyISD::EXTEND_HIGH_S)
3107	: (IsLow ? WebAssemblyISD::EXTEND_LOW_U
3108	: WebAssemblyISD::EXTEND_HIGH_U);
3109
3110	return DAG.getNode(Opcode: Op, DL, VT: ResVT, Operand: Source);
3111	}
3112
3113	static SDValue
3114	performVectorTruncZeroCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) {
3115	auto &DAG = DCI.DAG;
3116
3117	auto GetWasmConversionOp = [](unsigned Op) {
3118	switch (Op) {
3119	case ISD::FP_TO_SINT_SAT:
3120	return WebAssemblyISD::TRUNC_SAT_ZERO_S;
3121	case ISD::FP_TO_UINT_SAT:
3122	return WebAssemblyISD::TRUNC_SAT_ZERO_U;
3123	case ISD::FP_ROUND:
3124	return WebAssemblyISD::DEMOTE_ZERO;
3125	}
3126	llvm_unreachable("unexpected op");
3127	};
3128
3129	auto IsZeroSplat = [](SDValue SplatVal) {
3130	auto *Splat = dyn_cast<BuildVectorSDNode>(Val: SplatVal.getNode());
3131	APInt SplatValue, SplatUndef;
3132	unsigned SplatBitSize;
3133	bool HasAnyUndefs;
3134	// Endianness doesn't matter in this context because we are looking for
3135	// an all-zero value.
3136	return Splat &&
3137	Splat->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
3138	HasAnyUndefs) &&
3139	SplatValue == `0`;
3140	};
3141
3142	if (N->getOpcode() == ISD::CONCAT_VECTORS) {
3143	// Combine this:
3144	//
3145	// (concat_vectors (v2i32 (fp_to_{s,u}int_sat $x, 32)), (v2i32 (splat 0)))
3146	//
3147	// into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
3148	//
3149	// Or this:
3150	//
3151	// (concat_vectors ({v2f32, v4f16} (fp_round ({v2f64, v4f32} $x))),
3152	// ({v2f32, v4f16} (splat 0)))
3153	//
3154	// into ({f32x4, f16x8}.demote_zero_{f64x2, f32x4} $x).
3155	EVT ResVT;
3156	EVT ExpectedConversionType;
3157	auto Conversion = N->getOperand(Num: `0`);
3158	auto ConversionOp = Conversion.getOpcode();
3159	switch (ConversionOp) {
3160	case ISD::FP_TO_SINT_SAT:
3161	case ISD::FP_TO_UINT_SAT:
3162	ResVT = MVT::v4i32;
3163	ExpectedConversionType = MVT::v2i32;
3164	break;
3165	case ISD::FP_ROUND:
3166	if (Conversion.getValueType() == MVT::v2f32) {
3167	ResVT = MVT::v4f32;
3168	ExpectedConversionType = MVT::v2f32;
3169	} else if (Conversion.getValueType() == MVT::v4f16) {
3170	ResVT = MVT::v8f16;
3171	ExpectedConversionType = MVT::v4f16;
3172	} else {
3173	return SDValue ();
3174	}
3175	break;
3176	default:
3177	return SDValue ();
3178	}
3179
3180	if (N->getValueType(ResNo: `0`) != ResVT)
3181	return SDValue ();
3182
3183	if (Conversion.getValueType() != ExpectedConversionType)
3184	return SDValue ();
3185
3186	auto Source = Conversion.getOperand(i: `0`);
3187	if (!((Source.getValueType() == MVT::v2f64 && ResVT == MVT::v4f32) \|\|
3188	(Source.getValueType() == MVT::v2f64 && ResVT == MVT::v4i32) \|\|
3189	(Source.getValueType() == MVT::v4f32 && ResVT == MVT::v8f16)))
3190	return SDValue ();
3191
3192	if (!IsZeroSplat (N->getOperand(Num: `1`)) \|\|
3193	N->getOperand(Num: `1`).getValueType() != ExpectedConversionType)
3194	return SDValue ();
3195
3196	unsigned Op = GetWasmConversionOp (ConversionOp);
3197	return DAG.getNode(Opcode: Op, DL: SDLoc (N), VT: ResVT, Operand: Source);
3198	}
3199
3200	// Combine this:
3201	//
3202	// (fp_to_{s,u}int_sat (concat_vectors $x, (v2f64 (splat 0))), 32)
3203	//
3204	// into (i32x4.trunc_sat_f64x2_zero_{s,u} $x).
3205	//
3206	// Or this:
3207	//
3208	// ({v4f32, v8f16} (fp_round (concat_vectors $x,
3209	// ({v2f64, v4f32} (splat 0)))))
3210	//
3211	// into ({f32x4, f16x8}.demote_zero_{f64x2, f32x4} $x).
3212	EVT ResVT;
3213	auto ConversionOp = N->getOpcode();
3214	switch (ConversionOp) {
3215	case ISD::FP_TO_SINT_SAT:
3216	case ISD::FP_TO_UINT_SAT:
3217	ResVT = MVT::v4i32;
3218	break;
3219	case ISD::FP_ROUND:
3220	ResVT = N->getValueType(ResNo: `0`);
3221	break;
3222	default:
3223	llvm_unreachable("unexpected op");
3224	}
3225
3226	if (N->getValueType(ResNo: `0`) != ResVT)
3227	return SDValue ();
3228
3229	auto Concat = N->getOperand(Num: `0`);
3230	if (Concat.getOpcode() != ISD::CONCAT_VECTORS)
3231	return SDValue ();
3232	EVT ConcatVT = Concat.getValueType();
3233	EVT SourceVT = Concat.getOperand(i: `0`).getValueType();
3234
3235	if (!IsZeroSplat (Concat.getOperand(i: `1`)))
3236	return SDValue ();
3237
3238	if (ConversionOp == ISD::FP_ROUND) {
3239	bool IsF64ToF32 =
3240	ConcatVT == MVT::v4f64 && SourceVT == MVT::v2f64 && ResVT == MVT::v4f32;
3241	bool IsF32ToF16 =
3242	ConcatVT == MVT::v8f32 && SourceVT == MVT::v4f32 && ResVT == MVT::v8f16;
3243	if (!(IsF64ToF32 \|\| IsF32ToF16))
3244	return SDValue ();
3245	} else {
3246	if (ConcatVT != MVT::v4f64 \|\| SourceVT != MVT::v2f64 \|\| ResVT != MVT::v4i32)
3247	return SDValue ();
3248	}
3249
3250	unsigned Op = GetWasmConversionOp (ConversionOp);
3251	return DAG.getNode(Opcode: Op, DL: SDLoc (N), VT: ResVT, Operand: Concat.getOperand(i: `0`));
3252	}
3253
3254	// Helper to extract VectorWidth bits from Vec, starting from IdxVal.
3255	static SDValue extractSubVector(SDValue Vec, unsigned IdxVal, SelectionDAG &DAG,
3256	const SDLoc &DL, unsigned VectorWidth) {
3257	EVT VT = Vec.getValueType();
3258	EVT ElVT = VT.getVectorElementType();
3259	unsigned Factor = VT.getSizeInBits() / VectorWidth;
3260	EVT ResultVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: ElVT,
3261	NumElements: VT.getVectorNumElements() / Factor);
3262
3263	// Extract the relevant VectorWidth bits. Generate an EXTRACT_SUBVECTOR
3264	unsigned ElemsPerChunk = VectorWidth / ElVT.getSizeInBits();
3265	assert(isPowerOf2_32(ElemsPerChunk) && "Elements per chunk not power of 2");
3266
3267	// This is the index of the first element of the VectorWidth-bit chunk
3268	// we want. Since ElemsPerChunk is a power of 2 just need to clear bits.
3269	IdxVal &= ~(ElemsPerChunk - `1`);
3270
3271	// If the input is a buildvector just emit a smaller one.
3272	if (Vec.getOpcode() == ISD::BUILD_VECTOR)
3273	return DAG.getBuildVector(VT: ResultVT, DL,
3274	Ops: Vec ->ops().slice(N: IdxVal, M: ElemsPerChunk));
3275
3276	SDValue VecIdx = DAG.getIntPtrConstant(Val: IdxVal, DL);
3277	return DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: ResultVT, N1: Vec, N2: VecIdx);
3278	}
3279
3280	// Helper to recursively truncate vector elements in half with NARROW_U. DstVT
3281	// is the expected destination value type after recursion. In is the initial
3282	// input. Note that the input should have enough leading zero bits to prevent
3283	// NARROW_U from saturating results.
3284	static SDValue truncateVectorWithNARROW(EVT DstVT, SDValue In, const SDLoc &DL,
3285	SelectionDAG &DAG) {
3286	EVT SrcVT = In.getValueType();
3287
3288	// No truncation required, we might get here due to recursive calls.
3289	if (SrcVT == DstVT)
3290	return In;
3291
3292	unsigned SrcSizeInBits = SrcVT.getSizeInBits();
3293	unsigned NumElems = SrcVT.getVectorNumElements();
3294	if (!isPowerOf2_32(Value: NumElems))
3295	return SDValue ();
3296	assert(DstVT.getVectorNumElements() == NumElems && "Illegal truncation");
3297	assert(SrcSizeInBits > DstVT.getSizeInBits() && "Illegal truncation");
3298
3299	LLVMContext &Ctx = *DAG.getContext();
3300	EVT PackedSVT = EVT::getIntegerVT(Context&: Ctx, BitWidth: SrcVT.getScalarSizeInBits() / `2`);
3301
3302	// Narrow to the largest type possible:
3303	// vXi64/vXi32 -> i16x8.narrow_i32x4_u and vXi16 -> i8x16.narrow_i16x8_u.
3304	EVT InVT = MVT::i16, OutVT = MVT::i8;
3305	if (SrcVT.getScalarSizeInBits() > `16`) {
3306	InVT = MVT::i32;
3307	OutVT = MVT::i16;
3308	}
3309	unsigned SubSizeInBits = SrcSizeInBits / `2`;
3310	InVT = EVT::getVectorVT(Context&: Ctx, VT: InVT, NumElements: SubSizeInBits / InVT.getSizeInBits());
3311	OutVT = EVT::getVectorVT(Context&: Ctx, VT: OutVT, NumElements: SubSizeInBits / OutVT.getSizeInBits());
3312
3313	// Split lower/upper subvectors.
3314	SDValue Lo = extractSubVector(Vec: In, IdxVal: `0`, DAG, DL, VectorWidth: SubSizeInBits);
3315	SDValue Hi = extractSubVector(Vec: In, IdxVal: NumElems / `2`, DAG, DL, VectorWidth: SubSizeInBits);
3316
3317	// 256bit -> 128bit truncate - Narrow lower/upper 128-bit subvectors.
3318	if (SrcVT.is256BitVector() && DstVT.is128BitVector()) {
3319	Lo = DAG.getBitcast(VT: InVT, V: Lo);
3320	Hi = DAG.getBitcast(VT: InVT, V: Hi);
3321	SDValue Res = DAG.getNode(Opcode: WebAssemblyISD::NARROW_U, DL, VT: OutVT, N1: Lo, N2: Hi);
3322	return DAG.getBitcast(VT: DstVT, V: Res);
3323	}
3324
3325	// Recursively narrow lower/upper subvectors, concat result and narrow again.
3326	EVT PackedVT = EVT::getVectorVT(Context&: Ctx, VT: PackedSVT, NumElements: NumElems / `2`);
3327	Lo = truncateVectorWithNARROW(DstVT: PackedVT, In: Lo, DL, DAG);
3328	Hi = truncateVectorWithNARROW(DstVT: PackedVT, In: Hi, DL, DAG);
3329
3330	PackedVT = EVT::getVectorVT(Context&: Ctx, VT: PackedSVT, NumElements: NumElems);
3331	SDValue Res = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: PackedVT, N1: Lo, N2: Hi);
3332	return truncateVectorWithNARROW(DstVT, In: Res, DL, DAG);
3333	}
3334
3335	static SDValue performTruncateCombine(SDNode *N,
3336	TargetLowering::DAGCombinerInfo &DCI) {
3337	auto &DAG = DCI.DAG;
3338
3339	SDValue In = N->getOperand(Num: `0`);
3340	EVT InVT = In.getValueType();
3341	if (!InVT.isSimple())
3342	return SDValue ();
3343
3344	EVT OutVT = N->getValueType(ResNo: `0`);
3345	if (!OutVT.isVector())
3346	return SDValue ();
3347
3348	EVT OutSVT = OutVT.getVectorElementType();
3349	EVT InSVT = InVT.getVectorElementType();
3350	// Currently only cover truncate to v16i8 or v8i16.
3351	if (!((InSVT == MVT::i16 \|\| InSVT == MVT::i32 \|\| InSVT == MVT::i64) &&
3352	(OutSVT == MVT::i8 \|\| OutSVT == MVT::i16) && OutVT.is128BitVector()))
3353	return SDValue ();
3354
3355	SDLoc DL(N);
3356	APInt Mask = APInt::getLowBitsSet(numBits: InVT.getScalarSizeInBits(),
3357	loBitsSet: OutVT.getScalarSizeInBits());
3358	In = DAG.getNode(Opcode: ISD::AND, DL, VT: InVT, N1: In, N2: DAG.getConstant(Val: Mask, DL, VT: InVT));
3359	return truncateVectorWithNARROW(DstVT: OutVT, In, DL, DAG);
3360	}
3361
3362	static SDValue performBitcastCombine(SDNode *N,
3363	TargetLowering::DAGCombinerInfo &DCI) {
3364	using namespace llvm::SDPatternMatch;
3365	auto &DAG = DCI.DAG;
3366	SDLoc DL(N);
3367	SDValue Src = N->getOperand(Num: `0`);
3368	EVT VT = N->getValueType(ResNo: `0`);
3369	EVT SrcVT = Src.getValueType();
3370
3371	if (!(DCI.isBeforeLegalize() && VT.isScalarInteger() &&
3372	SrcVT.isFixedLengthVectorOf(EltVT: MVT::i1)))
3373	return SDValue ();
3374
3375	unsigned NumElts = SrcVT.getVectorNumElements();
3376	EVT Width = MVT::getIntegerVT(BitWidth: `128` / NumElts);
3377
3378	// bitcast <N x i1> to iN, where N = 2, 4, 8, 16 (legal)
3379	// ==> bitmask
3380	if (NumElts == `2` \|\| NumElts == `4` \|\| NumElts == `8` \|\| NumElts == `16`) {
3381	return DAG.getZExtOrTrunc(
3382	Op: DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL, VT: MVT::i32,
3383	Ops: {DAG.getConstant(Val: Intrinsic::wasm_bitmask, DL, VT: MVT::i32),
3384	DAG.getSExtOrTrunc(Op: N->getOperand(Num: `0`), DL,
3385	VT: SrcVT.changeVectorElementType(
3386	Context&: *DAG.getContext(), EltVT: Width))}),
3387	DL, VT);
3388	}
3389
3390	// bitcast <N x i1>(setcc ...) to concat iN, where N = 32 and 64 (illegal)
3391	if (NumElts == `32` \|\| NumElts == `64`) {
3392	SDValue Concat, SetCCVector;
3393	ISD::CondCode SetCond;
3394
3395	if (!sd_match(N, P: m_BitCast(Op: m_c_SetCC(LHS: m_Value(N&: Concat), RHS: m_Value(N&: SetCCVector),
3396	CC: m_CondCode(CC&: SetCond)))))
3397	return SDValue ();
3398	if (Concat.getOpcode() != ISD::CONCAT_VECTORS)
3399	return SDValue ();
3400
3401	// Reconstruct the wide bitmask from each CONCAT_VECTORS operand.
3402	// Derive the per-chunk mask/integer types from the actual operand type
3403	// instead of hardcoding v16i1 / i16 for every chunk.
3404	EVT ConcatOperandVT = Concat.getOperand(i: `0`).getValueType();
3405	unsigned ConcatOperandNumElts = ConcatOperandVT.getVectorNumElements();
3406
3407	EVT ConcatOperandMaskVT =
3408	EVT::getVectorVT(Context&: *DAG.getContext(), VT: MVT::i1,
3409	EC: ElementCount::getFixed(MinVal: ConcatOperandNumElts));
3410	EVT ConcatOperandBitmaskVT =
3411	EVT::getIntegerVT(Context&: *DAG.getContext(), BitWidth: ConcatOperandNumElts);
3412	EVT ReturnVT = N->getValueType(ResNo: `0`);
3413	SDValue ReconstructedBitmask = DAG.getConstant(Val: `0`, DL, VT: ReturnVT);
3414	// Example:
3415	// v32i16 = concat(v8i16, v8i16, v8i16, v8i16)
3416	// -> v8i1 + v8i1 + v8i1 + v8i1
3417	// -> i8 + i8 + i8 + i8
3418	// -> reconstructed i32 bitmask
3419	for (size_t I = `0`; I < Concat ->ops().size(); ++I) {
3420	SDValue ConcatOperand = Concat.getOperand(i: I);
3421	assert(ConcatOperand.getValueType() == ConcatOperandVT &&
3422	"concat_vectors operands must have the same type");
3423
3424	SDValue SetCCVectorOperand =
3425	extractSubVector(Vec: SetCCVector, IdxVal: I * ConcatOperandNumElts, DAG, DL, VectorWidth: `128`);
3426	if (!SetCCVectorOperand \|\|
3427	SetCCVectorOperand.getValueType() != ConcatOperandVT)
3428	return SDValue ();
3429
3430	// Build the per-chunk mask using the correct chunk type:
3431	// v16i8 -> v16i1 -> i16
3432	// v8i16 -> v8i1 -> i8
3433	// v4i32 -> v4i1 -> i4
3434	// v2i64 -> v2i1 -> i2
3435	SDValue ConcatOperandMask = DAG.getSetCC(
3436	DL, VT: ConcatOperandMaskVT, LHS: ConcatOperand, RHS: SetCCVectorOperand, Cond: SetCond);
3437	SDValue ConcatOperandBitmask =
3438	DAG.getBitcast(VT: ConcatOperandBitmaskVT, V: ConcatOperandMask);
3439	SDValue ExtendedConcatOperandBitmask =
3440	DAG.getZExtOrTrunc(Op: ConcatOperandBitmask, DL, VT: ReturnVT);
3441
3442	// Shift the previously reconstructed bits to make room for this chunk.
3443	if (I != `0`) {
3444	ReconstructedBitmask = DAG.getNode(
3445	Opcode: ISD::SHL, DL, VT: ReturnVT, N1: ReconstructedBitmask,
3446	N2: DAG.getShiftAmountConstant(Val: ConcatOperandNumElts, VT: ReturnVT, DL));
3447	}
3448
3449	// Merge disjoint partial bitmasks with OR.
3450	ReconstructedBitmask =
3451	DAG.getNode(Opcode: ISD::OR, DL, VT: ReturnVT, N1: ReconstructedBitmask,
3452	N2: ExtendedConcatOperandBitmask);
3453	}
3454
3455	return ReconstructedBitmask;
3456	}
3457
3458	return SDValue ();
3459	}
3460
3461	static SDValue performBitmaskCombine(SDNode *N, SelectionDAG &DAG) {
3462	// bitmask (setcc <X>, 0, setlt) => bitmask X
3463	assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN);
3464	using namespace llvm::SDPatternMatch;
3465
3466	if (N->getConstantOperandVal(Num: `0`) != Intrinsic::wasm_bitmask)
3467	return SDValue ();
3468
3469	SDValue LHS;
3470	if (!sd_match(N: N->getOperand(Num: `1`), P: m_c_SetCC(LHS: m_Value(N&: LHS), RHS: m_Zero(),
3471	CC: m_SpecificCondCode(CC: ISD::SETLT))))
3472	return SDValue ();
3473
3474	SDLoc DL(N);
3475	return DAG.getNode(
3476	Opcode: ISD::INTRINSIC_WO_CHAIN, DL, VT: N->getValueType(ResNo: `0`),
3477	Ops: {DAG.getConstant(Val: Intrinsic::wasm_bitmask, DL, VT: MVT::i32), LHS});
3478	}
3479
3480	static SDValue performAnyAllCombine(SDNode *N, SelectionDAG &DAG) {
3481	// any_true (setcc <X>, 0, eq) => (not (all_true X))
3482	// all_true (setcc <X>, 0, eq) => (not (any_true X))
3483	// any_true (setcc <X>, 0, ne) => (any_true X)
3484	// all_true (setcc <X>, 0, ne) => (all_true X)
3485	assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN);
3486	using namespace llvm::SDPatternMatch;
3487
3488	SDValue LHS;
3489	if (N->getNumOperands() < `2` \|\|
3490	!sd_match(N: N->getOperand(Num: `1`),
3491	P: m_c_SetCC(LHS: m_Value(N&: LHS), RHS: m_Zero(), CC: m_CondCode())))
3492	return SDValue ();
3493	EVT LT = LHS.getValueType();
3494	if (LT.getScalarSizeInBits() > `128` / LT.getVectorNumElements())
3495	return SDValue ();
3496
3497	auto CombineSetCC = [&N, &DAG](Intrinsic::WASMIntrinsics InPre,
3498	ISD::CondCode SetType,
3499	Intrinsic::WASMIntrinsics InPost) {
3500	if (N->getConstantOperandVal(Num: `0`) != InPre)
3501	return SDValue ();
3502
3503	SDValue LHS;
3504	if (!sd_match(N: N->getOperand(Num: `1`), P: m_c_SetCC(LHS: m_Value(N&: LHS), RHS: m_Zero(),
3505	CC: m_SpecificCondCode(CC: SetType))))
3506	return SDValue ();
3507
3508	SDLoc DL(N);
3509	SDValue Ret = DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL, VT: MVT::i32,
3510	Ops: {DAG.getConstant(Val: InPost, DL, VT: MVT::i32), LHS});
3511	if (SetType == ISD::SETEQ)
3512	Ret = DAG.getNode(Opcode: ISD::XOR, DL, VT: MVT::i32, N1: Ret,
3513	N2: DAG.getConstant(Val: `1`, DL, VT: MVT::i32));
3514	return DAG.getZExtOrTrunc(Op: Ret, DL, VT: N->getValueType(ResNo: `0`));
3515	};
3516
3517	if (SDValue AnyTrueEQ = CombineSetCC (Intrinsic::wasm_anytrue, ISD::SETEQ,
3518	Intrinsic::wasm_alltrue))
3519	return AnyTrueEQ;
3520	if (SDValue AllTrueEQ = CombineSetCC (Intrinsic::wasm_alltrue, ISD::SETEQ,
3521	Intrinsic::wasm_anytrue))
3522	return AllTrueEQ;
3523	if (SDValue AnyTrueNE = CombineSetCC (Intrinsic::wasm_anytrue, ISD::SETNE,
3524	Intrinsic::wasm_anytrue))
3525	return AnyTrueNE;
3526	if (SDValue AllTrueNE = CombineSetCC (Intrinsic::wasm_alltrue, ISD::SETNE,
3527	Intrinsic::wasm_alltrue))
3528	return AllTrueNE;
3529
3530	return SDValue ();
3531	}
3532
3533	struct MaskReduceInfo {
3534	Intrinsic::ID IID;
3535	unsigned WideCombineOpcode;
3536	bool Invert;
3537	};
3538
3539	static SDValue combineSmallMaskReduction(SDNode *N, EVT FromVT,
3540	unsigned NumElts,
3541	const MaskReduceInfo &Info,
3542	SelectionDAG &DAG) {
3543	EVT VecVT = FromVT.changeVectorElementType(Context&: *DAG.getContext(),
3544	EltVT: MVT::getIntegerVT(BitWidth: `128` / NumElts));
3545	assert(VecVT.getSizeInBits() == `128` &&
3546	"mask reduction should be widened to a 128-bit vector");
3547
3548	SDLoc DL(N);
3549	SDValue Mask = N->getOperand(Num: `0`)->getOperand(Num: `0`);
3550	SDValue Ret = DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL, VT: MVT::i32,
3551	Ops: {DAG.getConstant(Val: Info.IID, DL, VT: MVT::i32),
3552	DAG.getSExtOrTrunc(Op: Mask, DL, VT: VecVT)});
3553	if (Info.Invert)
3554	Ret = DAG.getNode(Opcode: ISD::XOR, DL, VT: MVT::i32, N1: Ret,
3555	N2: DAG.getConstant(Val: `1`, DL, VT: MVT::i32));
3556	return DAG.getZExtOrTrunc(Op: Ret, DL, VT: N->getValueType(ResNo: `0`));
3557	}
3558
3559	static SDValue combineWideMaskReduction(SDNode *N, SDValue Mask, EVT MaskVT,
3560	unsigned NumElts,
3561	const MaskReduceInfo &Info,
3562	SelectionDAG &DAG) {
3563	assert((NumElts == `32` \|\| NumElts == `64`) &&
3564	"combineWideMaskReduction is only for wide masks");
3565	assert(MaskVT.isFixedLengthVector() &&
3566	MaskVT.getVectorElementType() == MVT::i1);
3567	SDLoc DL(N);
3568	unsigned ChunkElts = `16`;
3569	EVT ChunkMaskVT = EVT::getVectorVT(Context&: *DAG.getContext(), VT: MVT::i1,
3570	EC: ElementCount::getFixed(MinVal: ChunkElts));
3571	EVT LegalVecVT = ChunkMaskVT.changeVectorElementType(
3572	Context&: *DAG.getContext(), EltVT: MVT::getIntegerVT(BitWidth: `128` / ChunkElts));
3573
3574	SmallVector<SDValue, `4`> ChunkResults;
3575	// Split the wide mask into v16i1 chunks and reduce each chunk separately.
3576	// For example:
3577	// v32i1: [0..15] [16..31]
3578	// \| \|
3579	// v v
3580	// chunk0 chunk1
3581	//
3582	// v64i1: [0..15] [16..31] [32..47] [48..63]
3583	// \| \| \| \|
3584	// v v v v
3585	// chunk0 chunk1 chunk2 chunk3
3586	//
3587	// each chunk:
3588	// v16i1 -> v16i8 -> wasm_anytrue/alltrue -> i32 0/1
3589	for (unsigned I = `0`; I < NumElts; I += ChunkElts) {
3590	SDValue ChunkMask = DAG.getNode(Opcode: ISD::EXTRACT_SUBVECTOR, DL, VT: ChunkMaskVT,
3591	N1: Mask, N2: DAG.getVectorIdxConstant(Val: I, DL));
3592	SDValue LegalMask = DAG.getSExtOrTrunc(Op: ChunkMask, DL, VT: LegalVecVT);
3593	SDValue Reduced =
3594	DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL, VT: MVT::i32,
3595	N1: DAG.getConstant(Val: Info.IID, DL, VT: MVT::i32), N2: LegalMask);
3596	ChunkResults.push_back(Elt: Reduced);
3597	}
3598
3599	SDValue Acc = ChunkResults [`0`];
3600	for (unsigned I = `1`; I < ChunkResults.size(); ++I)
3601	Acc =
3602	DAG.getNode(Opcode: Info.WideCombineOpcode, DL, VT: MVT::i32, N1: Acc, N2: ChunkResults [I]);
3603
3604	if (Info.Invert)
3605	Acc = DAG.getNode(Opcode: ISD::XOR, DL, VT: MVT::i32, N1: Acc,
3606	N2: DAG.getConstant(Val: `1`, DL, VT: MVT::i32));
3607
3608	return DAG.getZExtOrTrunc(Op: Acc, DL, VT: N->getValueType(ResNo: `0`));
3609	}
3610
3611	static std::optional<MaskReduceInfo> classifyMaskReduction(SDNode *N) {
3612	auto *C = dyn_cast<ConstantSDNode>(Val: N->getOperand(Num: `1`));
3613	if (!C)
3614	return std::nullopt;
3615
3616	ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: `2`))->get();
3617
3618	// setcc (bitcast mask), 0, ne -> any_true(mask)
3619	if (C->isZero() && CC == ISD::SETNE)
3620	return MaskReduceInfo{.IID: Intrinsic::wasm_anytrue, .WideCombineOpcode: ISD::OR, .Invert: false};
3621
3622	// setcc (bitcast mask), 0, eq -> !any_true(mask)
3623	if (C->isZero() && CC == ISD::SETEQ)
3624	return MaskReduceInfo{.IID: Intrinsic::wasm_anytrue, .WideCombineOpcode: ISD::OR, .Invert: true};
3625
3626	// setcc (bitcast mask), -1, eq -> all_true(mask)
3627	if (C->isAllOnes() && CC == ISD::SETEQ)
3628	return MaskReduceInfo{.IID: Intrinsic::wasm_alltrue, .WideCombineOpcode: ISD::AND, .Invert: false};
3629
3630	// setcc (bitcast mask), -1, ne -> !all_true(mask)
3631	if (C->isAllOnes() && CC == ISD::SETNE)
3632	return MaskReduceInfo{.IID: Intrinsic::wasm_alltrue, .WideCombineOpcode: ISD::AND, .Invert: true};
3633
3634	return std::nullopt;
3635	}
3636
3637	/// Try to convert a i128 comparison to a v16i8 comparison before type
3638	/// legalization splits it up into chunks
3639	static SDValue
3640	combineVectorSizedSetCCEquality(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
3641	const WebAssemblySubtarget *Subtarget) {
3642
3643	SDLoc DL(N);
3644	SDValue X = N->getOperand(Num: `0`);
3645	SDValue Y = N->getOperand(Num: `1`);
3646	EVT VT = N->getValueType(ResNo: `0`);
3647	EVT OpVT = X.getValueType();
3648
3649	SelectionDAG &DAG = DCI.DAG;
3650	if (DCI.DAG.getMachineFunction().getFunction().hasFnAttribute(
3651	Kind: Attribute::NoImplicitFloat))
3652	return SDValue ();
3653
3654	ISD::CondCode CC = cast<CondCodeSDNode>(Val: N->getOperand(Num: `2`))->get();
3655	// We're looking for an oversized integer equality comparison with SIMD
3656	if (!OpVT.isScalarInteger() \|\| !OpVT.isByteSized() \|\| OpVT != MVT::i128 \|\|
3657	!Subtarget->hasSIMD128() \|\| !isIntEqualitySetCC(Code: CC))
3658	return SDValue ();
3659
3660	// Don't perform this combine if constructing the vector will be expensive.
3661	auto IsVectorBitCastCheap = [](SDValue X) {
3662	X = peekThroughBitcasts(V: X);
3663	return isa<ConstantSDNode>(Val: X) \|\| X.getOpcode() == ISD::LOAD;
3664	};
3665
3666	if (!IsVectorBitCastCheap (X) \|\| !IsVectorBitCastCheap (Y))
3667	return SDValue ();
3668
3669	SDValue VecX = DAG.getBitcast(VT: MVT::v16i8, V: X);
3670	SDValue VecY = DAG.getBitcast(VT: MVT::v16i8, V: Y);
3671	SDValue Cmp = DAG.getSetCC(DL, VT: MVT::v16i8, LHS: VecX, RHS: VecY, Cond: CC);
3672
3673	SDValue Intr =
3674	DAG.getNode(Opcode: ISD::INTRINSIC_WO_CHAIN, DL, VT: MVT::i32,
3675	Ops: {DAG.getConstant(Val: CC == ISD::SETEQ ? Intrinsic::wasm_alltrue
3676	: Intrinsic::wasm_anytrue,
3677	DL, VT: MVT::i32),
3678	Cmp});
3679
3680	return DAG.getSetCC(DL, VT, LHS: Intr, RHS: DAG.getConstant(Val: `0`, DL, VT: MVT::i32),
3681	Cond: ISD::SETNE);
3682	}
3683
3684	static SDValue performSETCCCombine(SDNode *N,
3685	TargetLowering::DAGCombinerInfo &DCI,
3686	const WebAssemblySubtarget *Subtarget) {
3687	if (!DCI.isBeforeLegalize())
3688	return SDValue ();
3689
3690	EVT VT = N->getValueType(ResNo: `0`);
3691	if (!VT.isScalarInteger())
3692	return SDValue ();
3693
3694	if (SDValue V = combineVectorSizedSetCCEquality(N, DCI, Subtarget))
3695	return V;
3696
3697	SDValue LHS = N->getOperand(Num: `0`);
3698	if (LHS ->getOpcode() != ISD::BITCAST)
3699	return SDValue ();
3700
3701	EVT FromVT = LHS ->getOperand(Num: `0`).getValueType();
3702	if (!FromVT.isFixedLengthVectorOf(EltVT: MVT::i1))
3703	return SDValue ();
3704
3705	unsigned NumElts = FromVT.getVectorNumElements();
3706	auto Info = classifyMaskReduction(N);
3707	if (!Info)
3708	return SDValue ();
3709
3710	auto &DAG = DCI.DAG;
3711	if (NumElts == `2` \|\| NumElts == `4` \|\| NumElts == `8` \|\| NumElts == `16`)
3712	return combineSmallMaskReduction(N, FromVT, NumElts, Info: *Info, DAG);
3713
3714	if (NumElts == `32` \|\| NumElts == `64`)
3715	return combineWideMaskReduction(N, Mask: LHS.getOperand(i: `0`), MaskVT: FromVT, NumElts,
3716	Info: *Info, DAG);
3717
3718	return SDValue ();
3719	}
3720
3721	static SDValue TryWideExtMulCombine(SDNode *N, SelectionDAG &DAG) {
3722	EVT VT = N->getValueType(ResNo: `0`);
3723	if (VT != MVT::v8i32 && VT != MVT::v16i32)
3724	return SDValue ();
3725
3726	// Mul with extending inputs.
3727	SDValue LHS = N->getOperand(Num: `0`);
3728	SDValue RHS = N->getOperand(Num: `1`);
3729	if (LHS.getOpcode() != RHS.getOpcode())
3730	return SDValue ();
3731
3732	if (LHS.getOpcode() != ISD::SIGN_EXTEND &&
3733	LHS.getOpcode() != ISD::ZERO_EXTEND)
3734	return SDValue ();
3735
3736	if (LHS ->getOperand(Num: `0`).getValueType() != RHS ->getOperand(Num: `0`).getValueType())
3737	return SDValue ();
3738
3739	EVT FromVT = LHS ->getOperand(Num: `0`).getValueType();
3740	EVT EltTy = FromVT.getVectorElementType();
3741	if (EltTy != MVT::i8)
3742	return SDValue ();
3743
3744	// For an input DAG that looks like this
3745	// %a = input_type
3746	// %b = input_type
3747	// %lhs = extend %a to output_type
3748	// %rhs = extend %b to output_type
3749	// %mul = mul %lhs, %rhs
3750
3751	// input_type \| output_type \| instructions
3752	// v16i8 \| v16i32 \| %low = i16x8.extmul_low_i8x16_ %a, %b
3753	// \| \| %high = i16x8.extmul_high_i8x16_, %a, %b
3754	// \| \| %low_low = i32x4.ext_low_i16x8_ %low
3755	// \| \| %low_high = i32x4.ext_high_i16x8_ %low
3756	// \| \| %high_low = i32x4.ext_low_i16x8_ %high
3757	// \| \| %high_high = i32x4.ext_high_i16x8_ %high
3758	// \| \| %res = concat_vector(...)
3759	// v8i8 \| v8i32 \| %low = i16x8.extmul_low_i8x16_ %a, %b
3760	// \| \| %low_low = i32x4.ext_low_i16x8_ %low
3761	// \| \| %low_high = i32x4.ext_high_i16x8_ %low
3762	// \| \| %res = concat_vector(%low_low, %low_high)
3763
3764	SDLoc DL(N);
3765	unsigned NumElts = VT.getVectorNumElements();
3766	SDValue ExtendInLHS = LHS ->getOperand(Num: `0`);
3767	SDValue ExtendInRHS = RHS ->getOperand(Num: `0`);
3768	bool IsSigned = LHS ->getOpcode() == ISD::SIGN_EXTEND;
3769	unsigned ExtendLowOpc =
3770	IsSigned ? WebAssemblyISD::EXTEND_LOW_S : WebAssemblyISD::EXTEND_LOW_U;
3771	unsigned ExtendHighOpc =
3772	IsSigned ? WebAssemblyISD::EXTEND_HIGH_S : WebAssemblyISD::EXTEND_HIGH_U;
3773
3774	auto GetExtendLow = [&DAG, &DL, &ExtendLowOpc](EVT VT, SDValue Op) {
3775	return DAG.getNode(Opcode: ExtendLowOpc, DL, VT, Operand: Op);
3776	};
3777	auto GetExtendHigh = [&DAG, &DL, &ExtendHighOpc](EVT VT, SDValue Op) {
3778	return DAG.getNode(Opcode: ExtendHighOpc, DL, VT, Operand: Op);
3779	};
3780
3781	if (NumElts == `16`) {
3782	SDValue LowLHS = GetExtendLow (MVT::v8i16, ExtendInLHS);
3783	SDValue LowRHS = GetExtendLow (MVT::v8i16, ExtendInRHS);
3784	SDValue MulLow = DAG.getNode(Opcode: ISD::MUL, DL, VT: MVT::v8i16, N1: LowLHS, N2: LowRHS);
3785	SDValue HighLHS = GetExtendHigh (MVT::v8i16, ExtendInLHS);
3786	SDValue HighRHS = GetExtendHigh (MVT::v8i16, ExtendInRHS);
3787	SDValue MulHigh = DAG.getNode(Opcode: ISD::MUL, DL, VT: MVT::v8i16, N1: HighLHS, N2: HighRHS);
3788	SDValue SubVectors[] = {
3789	GetExtendLow (MVT::v4i32, MulLow),
3790	GetExtendHigh (MVT::v4i32, MulLow),
3791	GetExtendLow (MVT::v4i32, MulHigh),
3792	GetExtendHigh (MVT::v4i32, MulHigh),
3793	};
3794	return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT, Ops: SubVectors);
3795	} else {
3796	assert(NumElts == `8`);
3797	SDValue LowLHS = DAG.getNode(Opcode: LHS ->getOpcode(), DL, VT: MVT::v8i16, Operand: ExtendInLHS);
3798	SDValue LowRHS = DAG.getNode(Opcode: RHS ->getOpcode(), DL, VT: MVT::v8i16, Operand: ExtendInRHS);
3799	SDValue MulLow = DAG.getNode(Opcode: ISD::MUL, DL, VT: MVT::v8i16, N1: LowLHS, N2: LowRHS);
3800	SDValue Lo = GetExtendLow (MVT::v4i32, MulLow);
3801	SDValue Hi = GetExtendHigh (MVT::v4i32, MulLow);
3802	return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT, N1: Lo, N2: Hi);
3803	}
3804	return SDValue ();
3805	}
3806
3807	static SDValue performMulCombine(SDNode *N,
3808	TargetLowering::DAGCombinerInfo &DCI) {
3809	assert(N->getOpcode() == ISD::MUL);
3810	EVT VT = N->getValueType(ResNo: `0`);
3811	if (!VT.isVector())
3812	return SDValue ();
3813
3814	if (auto Res = TryWideExtMulCombine(N, DAG&: DCI.DAG))
3815	return Res;
3816
3817	// We don't natively support v16i8 or v8i8 mul, but we do support v8i16. So,
3818	// extend them to v8i16.
3819	if (VT != MVT::v8i8 && VT != MVT::v16i8)
3820	return SDValue ();
3821
3822	SDLoc DL(N);
3823	SelectionDAG &DAG = DCI.DAG;
3824	SDValue LHS = N->getOperand(Num: `0`);
3825	SDValue RHS = N->getOperand(Num: `1`);
3826	EVT MulVT = MVT::v8i16;
3827
3828	if (VT == MVT::v8i8) {
3829	SDValue PromotedLHS = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: MVT::v16i8, N1: LHS,
3830	N2: DAG.getUNDEF(VT: MVT::v8i8));
3831	SDValue PromotedRHS = DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: MVT::v16i8, N1: RHS,
3832	N2: DAG.getUNDEF(VT: MVT::v8i8));
3833	SDValue LowLHS =
3834	DAG.getNode(Opcode: WebAssemblyISD::EXTEND_LOW_U, DL, VT: MulVT, Operand: PromotedLHS);
3835	SDValue LowRHS =
3836	DAG.getNode(Opcode: WebAssemblyISD::EXTEND_LOW_U, DL, VT: MulVT, Operand: PromotedRHS);
3837	SDValue MulLow = DAG.getBitcast(
3838	VT: MVT::v16i8, V: DAG.getNode(Opcode: ISD::MUL, DL, VT: MulVT, N1: LowLHS, N2: LowRHS));
3839	// Take the low byte of each lane.
3840	SDValue Shuffle = DAG.getVectorShuffle(
3841	VT: MVT::v16i8, dl: DL, N1: MulLow, N2: DAG.getUNDEF(VT: MVT::v16i8),
3842	Mask: {`0`, `2`, `4`, `6`, `8`, `10`, `12`, `14`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`});
3843	return extractSubVector(Vec: Shuffle, IdxVal: `0`, DAG, DL, VectorWidth: `64`);
3844	} else {
3845	assert(VT == MVT::v16i8 && "Expected v16i8");
3846	SDValue LowLHS = DAG.getNode(Opcode: WebAssemblyISD::EXTEND_LOW_U, DL, VT: MulVT, Operand: LHS);
3847	SDValue LowRHS = DAG.getNode(Opcode: WebAssemblyISD::EXTEND_LOW_U, DL, VT: MulVT, Operand: RHS);
3848	SDValue HighLHS =
3849	DAG.getNode(Opcode: WebAssemblyISD::EXTEND_HIGH_U, DL, VT: MulVT, Operand: LHS);
3850	SDValue HighRHS =
3851	DAG.getNode(Opcode: WebAssemblyISD::EXTEND_HIGH_U, DL, VT: MulVT, Operand: RHS);
3852
3853	SDValue MulLow =
3854	DAG.getBitcast(VT, V: DAG.getNode(Opcode: ISD::MUL, DL, VT: MulVT, N1: LowLHS, N2: LowRHS));
3855	SDValue MulHigh =
3856	DAG.getBitcast(VT, V: DAG.getNode(Opcode: ISD::MUL, DL, VT: MulVT, N1: HighLHS, N2: HighRHS));
3857
3858	// Take the low byte of each lane.
3859	return DAG.getVectorShuffle(
3860	VT, dl: DL, N1: MulLow, N2: MulHigh,
3861	Mask: {`0`, `2`, `4`, `6`, `8`, `10`, `12`, `14`, `16`, `18`, `20`, `22`, `24`, `26`, `28`, `30`});
3862	}
3863	}
3864
3865	SDValue DoubleVectorWidth(SDValue In, unsigned RequiredNumElems,
3866	SelectionDAG &DAG) {
3867	SDLoc DL(In);
3868	LLVMContext &Ctx = *DAG.getContext();
3869	EVT InVT = In.getValueType();
3870	unsigned NumElems = InVT.getVectorNumElements() * `2`;
3871	EVT OutVT = EVT::getVectorVT(Context&: Ctx, VT: InVT.getVectorElementType(), NumElements: NumElems);
3872	SDValue Concat =
3873	DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT: OutVT, N1: In, N2: DAG.getPOISON(VT: InVT));
3874	if (NumElems < RequiredNumElems) {
3875	return DoubleVectorWidth(In: Concat, RequiredNumElems, DAG);
3876	}
3877	return Concat;
3878	}
3879
3880	SDValue performConvertFPCombine(SDNode *N, SelectionDAG &DAG) {
3881	EVT OutVT = N->getValueType(ResNo: `0`);
3882	if (!OutVT.isVector())
3883	return SDValue ();
3884
3885	EVT OutElTy = OutVT.getVectorElementType();
3886	if (OutElTy != MVT::i8 && OutElTy != MVT::i16)
3887	return SDValue ();
3888
3889	unsigned NumElems = OutVT.getVectorNumElements();
3890	if (!isPowerOf2_32(Value: NumElems))
3891	return SDValue ();
3892
3893	EVT FPVT = N->getOperand(Num: `0`)->getValueType(ResNo: `0`);
3894	if (FPVT.getVectorElementType() != MVT::f32)
3895	return SDValue ();
3896
3897	SDLoc DL(N);
3898
3899	// First, convert to i32.
3900	LLVMContext &Ctx = *DAG.getContext();
3901	EVT IntVT = EVT::getVectorVT(Context&: Ctx, VT: MVT::i32, NumElements: NumElems);
3902	SDValue ToInt = DAG.getNode(Opcode: N->getOpcode(), DL, VT: IntVT, Operand: N->getOperand(Num: `0`));
3903	APInt Mask = APInt::getLowBitsSet(numBits: IntVT.getScalarSizeInBits(),
3904	loBitsSet: OutVT.getScalarSizeInBits());
3905	// Mask out the top MSBs.
3906	SDValue Masked =
3907	DAG.getNode(Opcode: ISD::AND, DL, VT: IntVT, N1: ToInt, N2: DAG.getConstant(Val: Mask, DL, VT: IntVT));
3908
3909	if (OutVT.getSizeInBits() < `128`) {
3910	// Create a wide enough vector that we can use narrow.
3911	EVT NarrowedVT = OutElTy == MVT::i8 ? MVT::v16i8 : MVT::v8i16;
3912	unsigned NumRequiredElems = NarrowedVT.getVectorNumElements();
3913	SDValue WideVector = DoubleVectorWidth(In: Masked, RequiredNumElems: NumRequiredElems, DAG);
3914	SDValue Trunc = truncateVectorWithNARROW(DstVT: NarrowedVT, In: WideVector, DL, DAG);
3915	return DAG.getBitcast(
3916	VT: OutVT, V: extractSubVector(Vec: Trunc, IdxVal: `0`, DAG, DL, VectorWidth: OutVT.getSizeInBits()));
3917	} else {
3918	return truncateVectorWithNARROW(DstVT: OutVT, In: Masked, DL, DAG);
3919	}
3920	return SDValue ();
3921	}
3922
3923	// Wide vector shift operations such as v8i32 with sign-extended
3924	// operands cause Type Legalizer crashes because the target-specific
3925	// extension nodes cannot be directly mapped to the 256-bit size.
3926	//
3927	// To resolve the crash and optimize performance, we intercept the
3928	// illegal v8i32 shift in DAGCombine. We convert the shift amounts
3929	// into multipliers and manually split the vector into two v4i32 halves.
3930	//
3931	// Before: t1: v8i32 = shl (sign_extend v8i16), const_vec
3932	// After : t2: v4i32 = mul (ext_low_s v8i16), (ext_low_s narrow_vec)
3933	// t3: v4i32 = mul (ext_high_s v8i16), (ext_high_s narrow_vec)
3934	// t4: v8i32 = concat_vectors t2, t3
3935	static SDValue performShiftCombine(SDNode *N,
3936	TargetLowering::DAGCombinerInfo &DCI) {
3937	SelectionDAG &DAG = DCI.DAG;
3938	assert(N->getOpcode() == ISD::SHL);
3939	EVT VT = N->getValueType(ResNo: `0`);
3940	if (VT != MVT::v8i32)
3941	return SDValue ();
3942
3943	SDValue LHS = N->getOperand(Num: `0`);
3944	SDValue RHS = N->getOperand(Num: `1`);
3945	unsigned ExtOpc = LHS.getOpcode();
3946	if (ExtOpc != ISD::SIGN_EXTEND && ExtOpc != ISD::ZERO_EXTEND)
3947	return SDValue ();
3948
3949	if (RHS.getOpcode() != ISD::BUILD_VECTOR)
3950	return SDValue ();
3951
3952	SDLoc DL(N);
3953	SDValue ExtendIn = LHS.getOperand(i: `0`);
3954	EVT FromVT = ExtendIn.getValueType();
3955	if (FromVT != MVT::v8i16)
3956	return SDValue ();
3957
3958	unsigned NumElts = VT.getVectorNumElements();
3959	unsigned BitWidth = FromVT.getScalarSizeInBits();
3960	bool IsSigned = (ExtOpc == ISD::SIGN_EXTEND);
3961	unsigned MaxValidShift = IsSigned ? (BitWidth - `1`) : BitWidth;
3962	SmallVector<SDValue, `16`> MulConsts;
3963	for (unsigned I = `0`; I < NumElts; ++I) {
3964	auto *C = dyn_cast<ConstantSDNode>(Val: RHS.getOperand(i: I));
3965	if (!C)
3966	return SDValue ();
3967
3968	const APInt &ShiftAmt = C->getAPIntValue();
3969	if (ShiftAmt.uge(RHS: MaxValidShift))
3970	return SDValue ();
3971
3972	APInt MulAmt = APInt::getOneBitSet(numBits: BitWidth, BitNo: ShiftAmt.getZExtValue());
3973	MulConsts.push_back(Elt: DAG.getConstant(Val: MulAmt, DL, VT: FromVT.getScalarType(),
3974	/isTarget=/false, /isOpaque=/true));
3975	}
3976
3977	SDValue NarrowConst = DAG.getBuildVector(VT: FromVT, DL, Ops: MulConsts);
3978	unsigned ExtLowOpc =
3979	IsSigned ? WebAssemblyISD::EXTEND_LOW_S : WebAssemblyISD::EXTEND_LOW_U;
3980	unsigned ExtHighOpc =
3981	IsSigned ? WebAssemblyISD::EXTEND_HIGH_S : WebAssemblyISD::EXTEND_HIGH_U;
3982
3983	EVT HalfVT = MVT::v4i32;
3984	SDValue LHSLo = DAG.getNode(Opcode: ExtLowOpc, DL, VT: HalfVT, Operand: ExtendIn);
3985	SDValue LHSHi = DAG.getNode(Opcode: ExtHighOpc, DL, VT: HalfVT, Operand: ExtendIn);
3986	SDValue RHSLo = DAG.getNode(Opcode: ExtLowOpc, DL, VT: HalfVT, Operand: NarrowConst);
3987	SDValue RHSHi = DAG.getNode(Opcode: ExtHighOpc, DL, VT: HalfVT, Operand: NarrowConst);
3988	SDValue MulLo = DAG.getNode(Opcode: ISD::MUL, DL, VT: HalfVT, N1: LHSLo, N2: RHSLo);
3989	SDValue MulHi = DAG.getNode(Opcode: ISD::MUL, DL, VT: HalfVT, N1: LHSHi, N2: RHSHi);
3990	return DAG.getNode(Opcode: ISD::CONCAT_VECTORS, DL, VT, N1: MulLo, N2: MulHi);
3991	}
3992
3993	static SDValue performMinMaxF128Combine(SDNode *N, SelectionDAG &DAG) {
3994	if (N->getValueType(ResNo: `0`) != MVT::f128)
3995	return SDValue ();
3996
3997	const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3998	switch (N->getOpcode()) {
3999	// wasi-libc and emscripten do not currently define fminimuml and fmaximuml.
4000	case ISD::FMINIMUM:
4001	case ISD::FMAXIMUM:
4002	return TLI.expandFMINIMUM_FMAXIMUM(N, DAG);
4003
4004	// wasi-libc and emscripten do not currently define fminimum_numl and
4005	// fmaximum_numl.
4006	case ISD::FMINIMUMNUM:
4007	case ISD::FMAXIMUMNUM:
4008	return TLI.expandFMINIMUMNUM_FMAXIMUMNUM(N, DAG);
4009
4010	default:
4011	return SDValue ();
4012	}
4013	}
4014
4015	SDValue
4016	WebAssemblyTargetLowering::PerformDAGCombine(SDNode *N,
4017	DAGCombinerInfo &DCI) const {
4018	switch (N->getOpcode()) {
4019	default:
4020	return SDValue ();
4021	case ISD::BITCAST:
4022	return performBitcastCombine(N, DCI);
4023	case ISD::SETCC:
4024	return performSETCCCombine(N, DCI, Subtarget);
4025	case ISD::VECTOR_SHUFFLE:
4026	return performVECTOR_SHUFFLECombine(N, DCI);
4027	case ISD::SIGN_EXTEND:
4028	case ISD::ZERO_EXTEND:
4029	return performVectorExtendCombine(N, DCI);
4030	case ISD::UINT_TO_FP:
4031	if (auto ExtCombine = performVectorExtendToFPCombine(N, DCI))
4032	return ExtCombine;
4033	return performVectorNonNegToFPCombine(N, DCI);
4034	case ISD::SINT_TO_FP:
4035	return performVectorExtendToFPCombine(N, DCI);
4036	case ISD::FP_TO_SINT_SAT:
4037	case ISD::FP_TO_UINT_SAT:
4038	case ISD::FP_ROUND:
4039	case ISD::CONCAT_VECTORS:
4040	return performVectorTruncZeroCombine(N, DCI);
4041	case ISD::FP_TO_SINT:
4042	case ISD::FP_TO_UINT:
4043	return performConvertFPCombine(N, DAG&: DCI.DAG);
4044	case ISD::TRUNCATE:
4045	return performTruncateCombine(N, DCI);
4046	case ISD::INTRINSIC_WO_CHAIN: {
4047	if (SDValue V = performBitmaskCombine(N, DAG&: DCI.DAG))
4048	return V;
4049	return performAnyAllCombine(N, DAG&: DCI.DAG);
4050	}
4051	case ISD::MUL:
4052	return performMulCombine(N, DCI);
4053	case ISD::SHL:
4054	return performShiftCombine(N, DCI);
4055	case ISD::FMINIMUM:
4056	case ISD::FMAXIMUM:
4057	case ISD::FMINIMUMNUM:
4058	case ISD::FMAXIMUMNUM:
4059	return performMinMaxF128Combine(N, DAG&: DCI.DAG);
4060	}
4061	}
4062

Browse the source code of llvm_projects/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp