1 | //===---------- PPC.cpp - Emit LLVM Code for builtins ---------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This contains code to emit Builtin calls as LLVM code. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "CGBuiltin.h" |
14 | #include "clang/Basic/TargetBuiltins.h" |
15 | #include "llvm/IR/InlineAsm.h" |
16 | #include "llvm/IR/IntrinsicsPowerPC.h" |
17 | #include "llvm/Support/ScopedPrinter.h" |
18 | |
19 | using namespace clang; |
20 | using namespace CodeGen; |
21 | using namespace llvm; |
22 | |
23 | static llvm::Value *emitPPCLoadReserveIntrinsic(CodeGenFunction &CGF, |
24 | unsigned BuiltinID, |
25 | const CallExpr *E) { |
26 | Value *Addr = CGF.EmitScalarExpr(E: E->getArg(Arg: 0)); |
27 | |
28 | SmallString<64> Asm; |
29 | raw_svector_ostream AsmOS(Asm); |
30 | llvm::IntegerType *RetType = CGF.Int32Ty; |
31 | |
32 | switch (BuiltinID) { |
33 | case clang::PPC::BI__builtin_ppc_ldarx: |
34 | AsmOS << "ldarx " ; |
35 | RetType = CGF.Int64Ty; |
36 | break; |
37 | case clang::PPC::BI__builtin_ppc_lwarx: |
38 | AsmOS << "lwarx " ; |
39 | RetType = CGF.Int32Ty; |
40 | break; |
41 | case clang::PPC::BI__builtin_ppc_lharx: |
42 | AsmOS << "lharx " ; |
43 | RetType = CGF.Int16Ty; |
44 | break; |
45 | case clang::PPC::BI__builtin_ppc_lbarx: |
46 | AsmOS << "lbarx " ; |
47 | RetType = CGF.Int8Ty; |
48 | break; |
49 | default: |
50 | llvm_unreachable("Expected only PowerPC load reserve intrinsics" ); |
51 | } |
52 | |
53 | AsmOS << "$0, ${1:y}" ; |
54 | |
55 | std::string Constraints = "=r,*Z,~{memory}" ; |
56 | std::string_view MachineClobbers = CGF.getTarget().getClobbers(); |
57 | if (!MachineClobbers.empty()) { |
58 | Constraints += ','; |
59 | Constraints += MachineClobbers; |
60 | } |
61 | |
62 | llvm::Type *PtrType = CGF.UnqualPtrTy; |
63 | llvm::FunctionType *FTy = llvm::FunctionType::get(Result: RetType, Params: {PtrType}, isVarArg: false); |
64 | |
65 | llvm::InlineAsm *IA = |
66 | llvm::InlineAsm::get(Ty: FTy, AsmString: Asm, Constraints, /*hasSideEffects=*/true); |
67 | llvm::CallInst *CI = CGF.Builder.CreateCall(Callee: IA, Args: {Addr}); |
68 | CI->addParamAttr( |
69 | ArgNo: 0, Attr: Attribute::get(Context&: CGF.getLLVMContext(), Kind: Attribute::ElementType, Ty: RetType)); |
70 | return CI; |
71 | } |
72 | |
73 | Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, |
74 | const CallExpr *E) { |
75 | // Do not emit the builtin arguments in the arguments of a function call, |
76 | // because the evaluation order of function arguments is not specified in C++. |
77 | // This is important when testing to ensure the arguments are emitted in the |
78 | // same order every time. Eg: |
79 | // Instead of: |
80 | // return Builder.CreateFDiv(EmitScalarExpr(E->getArg(0)), |
81 | // EmitScalarExpr(E->getArg(1)), "swdiv"); |
82 | // Use: |
83 | // Value *Op0 = EmitScalarExpr(E->getArg(0)); |
84 | // Value *Op1 = EmitScalarExpr(E->getArg(1)); |
85 | // return Builder.CreateFDiv(Op0, Op1, "swdiv") |
86 | |
87 | Intrinsic::ID ID = Intrinsic::not_intrinsic; |
88 | |
89 | #include "llvm/TargetParser/PPCTargetParser.def" |
90 | auto GenAIXPPCBuiltinCpuExpr = [&](unsigned SupportMethod, unsigned FieldIdx, |
91 | unsigned Mask, CmpInst::Predicate CompOp, |
92 | unsigned OpValue) -> Value * { |
93 | if (SupportMethod == BUILTIN_PPC_FALSE) |
94 | return llvm::ConstantInt::getFalse(Ty: ConvertType(T: E->getType())); |
95 | |
96 | if (SupportMethod == BUILTIN_PPC_TRUE) |
97 | return llvm::ConstantInt::getTrue(Ty: ConvertType(T: E->getType())); |
98 | |
99 | assert(SupportMethod <= SYS_CALL && "Invalid value for SupportMethod." ); |
100 | |
101 | llvm::Value *FieldValue = nullptr; |
102 | if (SupportMethod == USE_SYS_CONF) { |
103 | llvm::Type *STy = llvm::StructType::get(PPC_SYSTEMCONFIG_TYPE); |
104 | llvm::Constant *SysConf = |
105 | CGM.CreateRuntimeVariable(Ty: STy, Name: "_system_configuration" ); |
106 | |
107 | // Grab the appropriate field from _system_configuration. |
108 | llvm::Value *Idxs[] = {ConstantInt::get(Ty: Int32Ty, V: 0), |
109 | ConstantInt::get(Ty: Int32Ty, V: FieldIdx)}; |
110 | |
111 | FieldValue = Builder.CreateInBoundsGEP(Ty: STy, Ptr: SysConf, IdxList: Idxs); |
112 | FieldValue = Builder.CreateAlignedLoad(Ty: Int32Ty, Addr: FieldValue, |
113 | Align: CharUnits::fromQuantity(Quantity: 4)); |
114 | } else if (SupportMethod == SYS_CALL) { |
115 | llvm::FunctionType *FTy = |
116 | llvm::FunctionType::get(Result: Int64Ty, Params: Int32Ty, isVarArg: false); |
117 | llvm::FunctionCallee Func = |
118 | CGM.CreateRuntimeFunction(Ty: FTy, Name: "getsystemcfg" ); |
119 | |
120 | FieldValue = |
121 | Builder.CreateCall(Callee: Func, Args: {ConstantInt::get(Ty: Int32Ty, V: FieldIdx)}); |
122 | } |
123 | assert(FieldValue && |
124 | "SupportMethod value is not defined in PPCTargetParser.def." ); |
125 | |
126 | if (Mask) |
127 | FieldValue = Builder.CreateAnd(LHS: FieldValue, RHS: Mask); |
128 | |
129 | llvm::Type *ValueType = FieldValue->getType(); |
130 | bool IsValueType64Bit = ValueType->isIntegerTy(Bitwidth: 64); |
131 | assert( |
132 | (IsValueType64Bit || ValueType->isIntegerTy(32)) && |
133 | "Only 32/64-bit integers are supported in GenAIXPPCBuiltinCpuExpr()." ); |
134 | |
135 | return Builder.CreateICmp( |
136 | P: CompOp, LHS: FieldValue, |
137 | RHS: ConstantInt::get(Ty: IsValueType64Bit ? Int64Ty : Int32Ty, V: OpValue)); |
138 | }; |
139 | |
140 | switch (BuiltinID) { |
141 | default: return nullptr; |
142 | |
143 | case Builtin::BI__builtin_cpu_is: { |
144 | const Expr *CPUExpr = E->getArg(Arg: 0)->IgnoreParenCasts(); |
145 | StringRef CPUStr = cast<clang::StringLiteral>(Val: CPUExpr)->getString(); |
146 | llvm::Triple Triple = getTarget().getTriple(); |
147 | |
148 | typedef std::tuple<unsigned, unsigned, unsigned, unsigned> CPUInfo; |
149 | |
150 | auto [LinuxSupportMethod, LinuxIDValue, AIXSupportMethod, AIXIDValue] = |
151 | static_cast<CPUInfo>(StringSwitch<CPUInfo>(CPUStr) |
152 | #define PPC_CPU(NAME, Linux_SUPPORT_METHOD, LinuxID, AIX_SUPPORT_METHOD, \ |
153 | AIXID) \ |
154 | .Case(NAME, {Linux_SUPPORT_METHOD, LinuxID, AIX_SUPPORT_METHOD, AIXID}) |
155 | #include "llvm/TargetParser/PPCTargetParser.def" |
156 | .Default(Value: {BUILTIN_PPC_UNSUPPORTED, 0, |
157 | BUILTIN_PPC_UNSUPPORTED, 0})); |
158 | |
159 | if (Triple.isOSAIX()) { |
160 | assert((AIXSupportMethod != BUILTIN_PPC_UNSUPPORTED) && |
161 | "Invalid CPU name. Missed by SemaChecking?" ); |
162 | return GenAIXPPCBuiltinCpuExpr(AIXSupportMethod, AIX_SYSCON_IMPL_IDX, 0, |
163 | ICmpInst::ICMP_EQ, AIXIDValue); |
164 | } |
165 | |
166 | assert(Triple.isOSLinux() && |
167 | "__builtin_cpu_is() is only supported for AIX and Linux." ); |
168 | |
169 | assert((LinuxSupportMethod != BUILTIN_PPC_UNSUPPORTED) && |
170 | "Invalid CPU name. Missed by SemaChecking?" ); |
171 | |
172 | if (LinuxSupportMethod == BUILTIN_PPC_FALSE) |
173 | return llvm::ConstantInt::getFalse(Ty: ConvertType(T: E->getType())); |
174 | |
175 | Value *Op0 = llvm::ConstantInt::get(Ty: Int32Ty, PPC_FAWORD_CPUID); |
176 | llvm::Function *F = CGM.getIntrinsic(IID: Intrinsic::ppc_fixed_addr_ld); |
177 | Value *TheCall = Builder.CreateCall(Callee: F, Args: {Op0}, Name: "cpu_is" ); |
178 | return Builder.CreateICmpEQ(LHS: TheCall, |
179 | RHS: llvm::ConstantInt::get(Ty: Int32Ty, V: LinuxIDValue)); |
180 | } |
181 | case Builtin::BI__builtin_cpu_supports: { |
182 | llvm::Triple Triple = getTarget().getTriple(); |
183 | const Expr *CPUExpr = E->getArg(Arg: 0)->IgnoreParenCasts(); |
184 | StringRef CPUStr = cast<clang::StringLiteral>(Val: CPUExpr)->getString(); |
185 | if (Triple.isOSAIX()) { |
186 | typedef std::tuple<unsigned, unsigned, unsigned, CmpInst::Predicate, |
187 | unsigned> |
188 | CPUSupportType; |
189 | auto [SupportMethod, FieldIdx, Mask, CompOp, Value] = |
190 | static_cast<CPUSupportType>(StringSwitch<CPUSupportType>(CPUStr) |
191 | #define PPC_AIX_FEATURE(NAME, DESC, SUPPORT_METHOD, INDEX, MASK, COMP_OP, \ |
192 | VALUE) \ |
193 | .Case(NAME, {SUPPORT_METHOD, INDEX, MASK, COMP_OP, VALUE}) |
194 | #include "llvm/TargetParser/PPCTargetParser.def" |
195 | .Default(Value: {BUILTIN_PPC_FALSE, 0, 0, |
196 | CmpInst::Predicate(), 0})); |
197 | return GenAIXPPCBuiltinCpuExpr(SupportMethod, FieldIdx, Mask, CompOp, |
198 | Value); |
199 | } |
200 | |
201 | assert(Triple.isOSLinux() && |
202 | "__builtin_cpu_supports() is only supported for AIX and Linux." ); |
203 | auto [FeatureWord, BitMask] = |
204 | StringSwitch<std::pair<unsigned, unsigned>>(CPUStr) |
205 | #define PPC_LNX_FEATURE(Name, Description, EnumName, Bitmask, FA_WORD) \ |
206 | .Case(Name, {FA_WORD, Bitmask}) |
207 | #include "llvm/TargetParser/PPCTargetParser.def" |
208 | .Default(Value: {0, 0}); |
209 | if (!BitMask) |
210 | return Builder.getFalse(); |
211 | Value *Op0 = llvm::ConstantInt::get(Ty: Int32Ty, V: FeatureWord); |
212 | llvm::Function *F = CGM.getIntrinsic(IID: Intrinsic::ppc_fixed_addr_ld); |
213 | Value *TheCall = Builder.CreateCall(Callee: F, Args: {Op0}, Name: "cpu_supports" ); |
214 | Value *Mask = |
215 | Builder.CreateAnd(LHS: TheCall, RHS: llvm::ConstantInt::get(Ty: Int32Ty, V: BitMask)); |
216 | return Builder.CreateICmpNE(LHS: Mask, RHS: llvm::Constant::getNullValue(Ty: Int32Ty)); |
217 | #undef PPC_FAWORD_HWCAP |
218 | #undef PPC_FAWORD_HWCAP2 |
219 | #undef PPC_FAWORD_CPUID |
220 | } |
221 | |
222 | // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we |
223 | // call __builtin_readcyclecounter. |
224 | case PPC::BI__builtin_ppc_get_timebase: |
225 | return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::readcyclecounter)); |
226 | |
227 | // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr |
228 | case PPC::BI__builtin_altivec_lvx: |
229 | case PPC::BI__builtin_altivec_lvxl: |
230 | case PPC::BI__builtin_altivec_lvebx: |
231 | case PPC::BI__builtin_altivec_lvehx: |
232 | case PPC::BI__builtin_altivec_lvewx: |
233 | case PPC::BI__builtin_altivec_lvsl: |
234 | case PPC::BI__builtin_altivec_lvsr: |
235 | case PPC::BI__builtin_vsx_lxvd2x: |
236 | case PPC::BI__builtin_vsx_lxvw4x: |
237 | case PPC::BI__builtin_vsx_lxvd2x_be: |
238 | case PPC::BI__builtin_vsx_lxvw4x_be: |
239 | case PPC::BI__builtin_vsx_lxvl: |
240 | case PPC::BI__builtin_vsx_lxvll: |
241 | { |
242 | SmallVector<Value *, 2> Ops; |
243 | Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0))); |
244 | Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1))); |
245 | if (!(BuiltinID == PPC::BI__builtin_vsx_lxvl || |
246 | BuiltinID == PPC::BI__builtin_vsx_lxvll)) { |
247 | Ops[0] = Builder.CreateGEP(Ty: Int8Ty, Ptr: Ops[1], IdxList: Ops[0]); |
248 | Ops.pop_back(); |
249 | } |
250 | |
251 | switch (BuiltinID) { |
252 | default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!" ); |
253 | case PPC::BI__builtin_altivec_lvx: |
254 | ID = Intrinsic::ppc_altivec_lvx; |
255 | break; |
256 | case PPC::BI__builtin_altivec_lvxl: |
257 | ID = Intrinsic::ppc_altivec_lvxl; |
258 | break; |
259 | case PPC::BI__builtin_altivec_lvebx: |
260 | ID = Intrinsic::ppc_altivec_lvebx; |
261 | break; |
262 | case PPC::BI__builtin_altivec_lvehx: |
263 | ID = Intrinsic::ppc_altivec_lvehx; |
264 | break; |
265 | case PPC::BI__builtin_altivec_lvewx: |
266 | ID = Intrinsic::ppc_altivec_lvewx; |
267 | break; |
268 | case PPC::BI__builtin_altivec_lvsl: |
269 | ID = Intrinsic::ppc_altivec_lvsl; |
270 | break; |
271 | case PPC::BI__builtin_altivec_lvsr: |
272 | ID = Intrinsic::ppc_altivec_lvsr; |
273 | break; |
274 | case PPC::BI__builtin_vsx_lxvd2x: |
275 | ID = Intrinsic::ppc_vsx_lxvd2x; |
276 | break; |
277 | case PPC::BI__builtin_vsx_lxvw4x: |
278 | ID = Intrinsic::ppc_vsx_lxvw4x; |
279 | break; |
280 | case PPC::BI__builtin_vsx_lxvd2x_be: |
281 | ID = Intrinsic::ppc_vsx_lxvd2x_be; |
282 | break; |
283 | case PPC::BI__builtin_vsx_lxvw4x_be: |
284 | ID = Intrinsic::ppc_vsx_lxvw4x_be; |
285 | break; |
286 | case PPC::BI__builtin_vsx_lxvl: |
287 | ID = Intrinsic::ppc_vsx_lxvl; |
288 | break; |
289 | case PPC::BI__builtin_vsx_lxvll: |
290 | ID = Intrinsic::ppc_vsx_lxvll; |
291 | break; |
292 | } |
293 | llvm::Function *F = CGM.getIntrinsic(IID: ID); |
294 | return Builder.CreateCall(Callee: F, Args: Ops, Name: "" ); |
295 | } |
296 | |
297 | // vec_st, vec_xst_be |
298 | case PPC::BI__builtin_altivec_stvx: |
299 | case PPC::BI__builtin_altivec_stvxl: |
300 | case PPC::BI__builtin_altivec_stvebx: |
301 | case PPC::BI__builtin_altivec_stvehx: |
302 | case PPC::BI__builtin_altivec_stvewx: |
303 | case PPC::BI__builtin_vsx_stxvd2x: |
304 | case PPC::BI__builtin_vsx_stxvw4x: |
305 | case PPC::BI__builtin_vsx_stxvd2x_be: |
306 | case PPC::BI__builtin_vsx_stxvw4x_be: |
307 | case PPC::BI__builtin_vsx_stxvl: |
308 | case PPC::BI__builtin_vsx_stxvll: |
309 | { |
310 | SmallVector<Value *, 3> Ops; |
311 | Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 0))); |
312 | Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 1))); |
313 | Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: 2))); |
314 | if (!(BuiltinID == PPC::BI__builtin_vsx_stxvl || |
315 | BuiltinID == PPC::BI__builtin_vsx_stxvll)) { |
316 | Ops[1] = Builder.CreateGEP(Ty: Int8Ty, Ptr: Ops[2], IdxList: Ops[1]); |
317 | Ops.pop_back(); |
318 | } |
319 | |
320 | switch (BuiltinID) { |
321 | default: llvm_unreachable("Unsupported st intrinsic!" ); |
322 | case PPC::BI__builtin_altivec_stvx: |
323 | ID = Intrinsic::ppc_altivec_stvx; |
324 | break; |
325 | case PPC::BI__builtin_altivec_stvxl: |
326 | ID = Intrinsic::ppc_altivec_stvxl; |
327 | break; |
328 | case PPC::BI__builtin_altivec_stvebx: |
329 | ID = Intrinsic::ppc_altivec_stvebx; |
330 | break; |
331 | case PPC::BI__builtin_altivec_stvehx: |
332 | ID = Intrinsic::ppc_altivec_stvehx; |
333 | break; |
334 | case PPC::BI__builtin_altivec_stvewx: |
335 | ID = Intrinsic::ppc_altivec_stvewx; |
336 | break; |
337 | case PPC::BI__builtin_vsx_stxvd2x: |
338 | ID = Intrinsic::ppc_vsx_stxvd2x; |
339 | break; |
340 | case PPC::BI__builtin_vsx_stxvw4x: |
341 | ID = Intrinsic::ppc_vsx_stxvw4x; |
342 | break; |
343 | case PPC::BI__builtin_vsx_stxvd2x_be: |
344 | ID = Intrinsic::ppc_vsx_stxvd2x_be; |
345 | break; |
346 | case PPC::BI__builtin_vsx_stxvw4x_be: |
347 | ID = Intrinsic::ppc_vsx_stxvw4x_be; |
348 | break; |
349 | case PPC::BI__builtin_vsx_stxvl: |
350 | ID = Intrinsic::ppc_vsx_stxvl; |
351 | break; |
352 | case PPC::BI__builtin_vsx_stxvll: |
353 | ID = Intrinsic::ppc_vsx_stxvll; |
354 | break; |
355 | } |
356 | llvm::Function *F = CGM.getIntrinsic(IID: ID); |
357 | return Builder.CreateCall(Callee: F, Args: Ops, Name: "" ); |
358 | } |
359 | case PPC::BI__builtin_vsx_ldrmb: { |
360 | // Essentially boils down to performing an unaligned VMX load sequence so |
361 | // as to avoid crossing a page boundary and then shuffling the elements |
362 | // into the right side of the vector register. |
363 | Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0)); |
364 | Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1)); |
365 | int64_t NumBytes = cast<ConstantInt>(Val: Op1)->getZExtValue(); |
366 | llvm::Type *ResTy = ConvertType(T: E->getType()); |
367 | bool IsLE = getTarget().isLittleEndian(); |
368 | |
369 | // If the user wants the entire vector, just load the entire vector. |
370 | if (NumBytes == 16) { |
371 | Value *LD = |
372 | Builder.CreateLoad(Addr: Address(Op0, ResTy, CharUnits::fromQuantity(Quantity: 1))); |
373 | if (!IsLE) |
374 | return LD; |
375 | |
376 | // Reverse the bytes on LE. |
377 | SmallVector<int, 16> RevMask; |
378 | for (int Idx = 0; Idx < 16; Idx++) |
379 | RevMask.push_back(Elt: 15 - Idx); |
380 | return Builder.CreateShuffleVector(V1: LD, V2: LD, Mask: RevMask); |
381 | } |
382 | |
383 | llvm::Function *Lvx = CGM.getIntrinsic(IID: Intrinsic::ppc_altivec_lvx); |
384 | llvm::Function *Lvs = CGM.getIntrinsic(IID: IsLE ? Intrinsic::ppc_altivec_lvsr |
385 | : Intrinsic::ppc_altivec_lvsl); |
386 | llvm::Function *Vperm = CGM.getIntrinsic(IID: Intrinsic::ppc_altivec_vperm); |
387 | Value *HiMem = Builder.CreateGEP( |
388 | Ty: Int8Ty, Ptr: Op0, IdxList: ConstantInt::get(Ty: Op1->getType(), V: NumBytes - 1)); |
389 | Value *LoLd = Builder.CreateCall(Callee: Lvx, Args: Op0, Name: "ld.lo" ); |
390 | Value *HiLd = Builder.CreateCall(Callee: Lvx, Args: HiMem, Name: "ld.hi" ); |
391 | Value *Mask1 = Builder.CreateCall(Callee: Lvs, Args: Op0, Name: "mask1" ); |
392 | |
393 | Op0 = IsLE ? HiLd : LoLd; |
394 | Op1 = IsLE ? LoLd : HiLd; |
395 | Value *AllElts = Builder.CreateCall(Callee: Vperm, Args: {Op0, Op1, Mask1}, Name: "shuffle1" ); |
396 | Constant *Zero = llvm::Constant::getNullValue(Ty: IsLE ? ResTy : AllElts->getType()); |
397 | |
398 | if (IsLE) { |
399 | SmallVector<int, 16> Consts; |
400 | for (int Idx = 0; Idx < 16; Idx++) { |
401 | int Val = (NumBytes - Idx - 1 >= 0) ? (NumBytes - Idx - 1) |
402 | : 16 - (NumBytes - Idx); |
403 | Consts.push_back(Elt: Val); |
404 | } |
405 | return Builder.CreateShuffleVector(V1: Builder.CreateBitCast(V: AllElts, DestTy: ResTy), |
406 | V2: Zero, Mask: Consts); |
407 | } |
408 | SmallVector<Constant *, 16> Consts; |
409 | for (int Idx = 0; Idx < 16; Idx++) |
410 | Consts.push_back(Elt: Builder.getInt8(C: NumBytes + Idx)); |
411 | Value *Mask2 = ConstantVector::get(V: Consts); |
412 | return Builder.CreateBitCast( |
413 | V: Builder.CreateCall(Callee: Vperm, Args: {Zero, AllElts, Mask2}, Name: "shuffle2" ), DestTy: ResTy); |
414 | } |
415 | case PPC::BI__builtin_vsx_strmb: { |
416 | Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0)); |
417 | Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1)); |
418 | Value *Op2 = EmitScalarExpr(E: E->getArg(Arg: 2)); |
419 | int64_t NumBytes = cast<ConstantInt>(Val: Op1)->getZExtValue(); |
420 | bool IsLE = getTarget().isLittleEndian(); |
421 | auto StoreSubVec = [&](unsigned Width, unsigned Offset, unsigned EltNo) { |
422 | // Storing the whole vector, simply store it on BE and reverse bytes and |
423 | // store on LE. |
424 | if (Width == 16) { |
425 | Value *StVec = Op2; |
426 | if (IsLE) { |
427 | SmallVector<int, 16> RevMask; |
428 | for (int Idx = 0; Idx < 16; Idx++) |
429 | RevMask.push_back(Elt: 15 - Idx); |
430 | StVec = Builder.CreateShuffleVector(V1: Op2, V2: Op2, Mask: RevMask); |
431 | } |
432 | return Builder.CreateStore( |
433 | Val: StVec, Addr: Address(Op0, Op2->getType(), CharUnits::fromQuantity(Quantity: 1))); |
434 | } |
435 | auto *ConvTy = Int64Ty; |
436 | unsigned NumElts = 0; |
437 | switch (Width) { |
438 | default: |
439 | llvm_unreachable("width for stores must be a power of 2" ); |
440 | case 8: |
441 | ConvTy = Int64Ty; |
442 | NumElts = 2; |
443 | break; |
444 | case 4: |
445 | ConvTy = Int32Ty; |
446 | NumElts = 4; |
447 | break; |
448 | case 2: |
449 | ConvTy = Int16Ty; |
450 | NumElts = 8; |
451 | break; |
452 | case 1: |
453 | ConvTy = Int8Ty; |
454 | NumElts = 16; |
455 | break; |
456 | } |
457 | Value *Vec = Builder.CreateBitCast( |
458 | V: Op2, DestTy: llvm::FixedVectorType::get(ElementType: ConvTy, NumElts)); |
459 | Value *Ptr = |
460 | Builder.CreateGEP(Ty: Int8Ty, Ptr: Op0, IdxList: ConstantInt::get(Ty: Int64Ty, V: Offset)); |
461 | Value *Elt = Builder.CreateExtractElement(Vec, Idx: EltNo); |
462 | if (IsLE && Width > 1) { |
463 | Function *F = CGM.getIntrinsic(IID: Intrinsic::bswap, Tys: ConvTy); |
464 | Elt = Builder.CreateCall(Callee: F, Args: Elt); |
465 | } |
466 | return Builder.CreateStore( |
467 | Val: Elt, Addr: Address(Ptr, ConvTy, CharUnits::fromQuantity(Quantity: 1))); |
468 | }; |
469 | unsigned Stored = 0; |
470 | unsigned RemainingBytes = NumBytes; |
471 | Value *Result; |
472 | if (NumBytes == 16) |
473 | return StoreSubVec(16, 0, 0); |
474 | if (NumBytes >= 8) { |
475 | Result = StoreSubVec(8, NumBytes - 8, IsLE ? 0 : 1); |
476 | RemainingBytes -= 8; |
477 | Stored += 8; |
478 | } |
479 | if (RemainingBytes >= 4) { |
480 | Result = StoreSubVec(4, NumBytes - Stored - 4, |
481 | IsLE ? (Stored >> 2) : 3 - (Stored >> 2)); |
482 | RemainingBytes -= 4; |
483 | Stored += 4; |
484 | } |
485 | if (RemainingBytes >= 2) { |
486 | Result = StoreSubVec(2, NumBytes - Stored - 2, |
487 | IsLE ? (Stored >> 1) : 7 - (Stored >> 1)); |
488 | RemainingBytes -= 2; |
489 | Stored += 2; |
490 | } |
491 | if (RemainingBytes) |
492 | Result = |
493 | StoreSubVec(1, NumBytes - Stored - 1, IsLE ? Stored : 15 - Stored); |
494 | return Result; |
495 | } |
496 | // Square root |
497 | case PPC::BI__builtin_vsx_xvsqrtsp: |
498 | case PPC::BI__builtin_vsx_xvsqrtdp: { |
499 | llvm::Type *ResultType = ConvertType(T: E->getType()); |
500 | Value *X = EmitScalarExpr(E: E->getArg(Arg: 0)); |
501 | if (Builder.getIsFPConstrained()) { |
502 | llvm::Function *F = CGM.getIntrinsic( |
503 | IID: Intrinsic::experimental_constrained_sqrt, Tys: ResultType); |
504 | return Builder.CreateConstrainedFPCall(Callee: F, Args: X); |
505 | } else { |
506 | llvm::Function *F = CGM.getIntrinsic(IID: Intrinsic::sqrt, Tys: ResultType); |
507 | return Builder.CreateCall(Callee: F, Args: X); |
508 | } |
509 | } |
510 | // Count leading zeros |
511 | case PPC::BI__builtin_altivec_vclzb: |
512 | case PPC::BI__builtin_altivec_vclzh: |
513 | case PPC::BI__builtin_altivec_vclzw: |
514 | case PPC::BI__builtin_altivec_vclzd: { |
515 | llvm::Type *ResultType = ConvertType(T: E->getType()); |
516 | Value *X = EmitScalarExpr(E: E->getArg(Arg: 0)); |
517 | Value *Undef = ConstantInt::get(Ty: Builder.getInt1Ty(), V: false); |
518 | Function *F = CGM.getIntrinsic(IID: Intrinsic::ctlz, Tys: ResultType); |
519 | return Builder.CreateCall(Callee: F, Args: {X, Undef}); |
520 | } |
521 | case PPC::BI__builtin_altivec_vctzb: |
522 | case PPC::BI__builtin_altivec_vctzh: |
523 | case PPC::BI__builtin_altivec_vctzw: |
524 | case PPC::BI__builtin_altivec_vctzd: { |
525 | llvm::Type *ResultType = ConvertType(T: E->getType()); |
526 | Value *X = EmitScalarExpr(E: E->getArg(Arg: 0)); |
527 | Value *Undef = ConstantInt::get(Ty: Builder.getInt1Ty(), V: false); |
528 | Function *F = CGM.getIntrinsic(IID: Intrinsic::cttz, Tys: ResultType); |
529 | return Builder.CreateCall(Callee: F, Args: {X, Undef}); |
530 | } |
531 | case PPC::BI__builtin_altivec_vinsd: |
532 | case PPC::BI__builtin_altivec_vinsw: |
533 | case PPC::BI__builtin_altivec_vinsd_elt: |
534 | case PPC::BI__builtin_altivec_vinsw_elt: { |
535 | llvm::Type *ResultType = ConvertType(T: E->getType()); |
536 | Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0)); |
537 | Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1)); |
538 | Value *Op2 = EmitScalarExpr(E: E->getArg(Arg: 2)); |
539 | |
540 | bool IsUnaligned = (BuiltinID == PPC::BI__builtin_altivec_vinsw || |
541 | BuiltinID == PPC::BI__builtin_altivec_vinsd); |
542 | |
543 | bool Is32bit = (BuiltinID == PPC::BI__builtin_altivec_vinsw || |
544 | BuiltinID == PPC::BI__builtin_altivec_vinsw_elt); |
545 | |
546 | // The third argument must be a compile time constant. |
547 | ConstantInt *ArgCI = dyn_cast<ConstantInt>(Val: Op2); |
548 | assert(ArgCI && |
549 | "Third Arg to vinsw/vinsd intrinsic must be a constant integer!" ); |
550 | |
551 | // Valid value for the third argument is dependent on the input type and |
552 | // builtin called. |
553 | int ValidMaxValue = 0; |
554 | if (IsUnaligned) |
555 | ValidMaxValue = (Is32bit) ? 12 : 8; |
556 | else |
557 | ValidMaxValue = (Is32bit) ? 3 : 1; |
558 | |
559 | // Get value of third argument. |
560 | int64_t ConstArg = ArgCI->getSExtValue(); |
561 | |
562 | // Compose range checking error message. |
563 | std::string RangeErrMsg = IsUnaligned ? "byte" : "element" ; |
564 | RangeErrMsg += " number " + llvm::to_string(Value: ConstArg); |
565 | RangeErrMsg += " is outside of the valid range [0, " ; |
566 | RangeErrMsg += llvm::to_string(Value: ValidMaxValue) + "]" ; |
567 | |
568 | // Issue error if third argument is not within the valid range. |
569 | if (ConstArg < 0 || ConstArg > ValidMaxValue) |
570 | CGM.Error(loc: E->getExprLoc(), error: RangeErrMsg); |
571 | |
572 | // Input to vec_replace_elt is an element index, convert to byte index. |
573 | if (!IsUnaligned) { |
574 | ConstArg *= Is32bit ? 4 : 8; |
575 | // Fix the constant according to endianess. |
576 | if (getTarget().isLittleEndian()) |
577 | ConstArg = (Is32bit ? 12 : 8) - ConstArg; |
578 | } |
579 | |
580 | ID = Is32bit ? Intrinsic::ppc_altivec_vinsw : Intrinsic::ppc_altivec_vinsd; |
581 | Op2 = ConstantInt::getSigned(Ty: Int32Ty, V: ConstArg); |
582 | // Casting input to vector int as per intrinsic definition. |
583 | Op0 = |
584 | Is32bit |
585 | ? Builder.CreateBitCast(V: Op0, DestTy: llvm::FixedVectorType::get(ElementType: Int32Ty, NumElts: 4)) |
586 | : Builder.CreateBitCast(V: Op0, |
587 | DestTy: llvm::FixedVectorType::get(ElementType: Int64Ty, NumElts: 2)); |
588 | return Builder.CreateBitCast( |
589 | V: Builder.CreateCall(Callee: CGM.getIntrinsic(IID: ID), Args: {Op0, Op1, Op2}), DestTy: ResultType); |
590 | } |
591 | case PPC::BI__builtin_altivec_vadduqm: |
592 | case PPC::BI__builtin_altivec_vsubuqm: { |
593 | Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0)); |
594 | Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1)); |
595 | llvm::Type *Int128Ty = llvm::IntegerType::get(C&: getLLVMContext(), NumBits: 128); |
596 | Op0 = Builder.CreateBitCast(V: Op0, DestTy: llvm::FixedVectorType::get(ElementType: Int128Ty, NumElts: 1)); |
597 | Op1 = Builder.CreateBitCast(V: Op1, DestTy: llvm::FixedVectorType::get(ElementType: Int128Ty, NumElts: 1)); |
598 | if (BuiltinID == PPC::BI__builtin_altivec_vadduqm) |
599 | return Builder.CreateAdd(LHS: Op0, RHS: Op1, Name: "vadduqm" ); |
600 | else |
601 | return Builder.CreateSub(LHS: Op0, RHS: Op1, Name: "vsubuqm" ); |
602 | } |
603 | case PPC::BI__builtin_altivec_vaddcuq_c: |
604 | case PPC::BI__builtin_altivec_vsubcuq_c: { |
605 | SmallVector<Value *, 2> Ops; |
606 | Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0)); |
607 | Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1)); |
608 | llvm::Type *V1I128Ty = llvm::FixedVectorType::get( |
609 | ElementType: llvm::IntegerType::get(C&: getLLVMContext(), NumBits: 128), NumElts: 1); |
610 | Ops.push_back(Elt: Builder.CreateBitCast(V: Op0, DestTy: V1I128Ty)); |
611 | Ops.push_back(Elt: Builder.CreateBitCast(V: Op1, DestTy: V1I128Ty)); |
612 | ID = (BuiltinID == PPC::BI__builtin_altivec_vaddcuq_c) |
613 | ? Intrinsic::ppc_altivec_vaddcuq |
614 | : Intrinsic::ppc_altivec_vsubcuq; |
615 | return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: ID), Args: Ops, Name: "" ); |
616 | } |
617 | case PPC::BI__builtin_altivec_vaddeuqm_c: |
618 | case PPC::BI__builtin_altivec_vaddecuq_c: |
619 | case PPC::BI__builtin_altivec_vsubeuqm_c: |
620 | case PPC::BI__builtin_altivec_vsubecuq_c: { |
621 | SmallVector<Value *, 3> Ops; |
622 | Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0)); |
623 | Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1)); |
624 | Value *Op2 = EmitScalarExpr(E: E->getArg(Arg: 2)); |
625 | llvm::Type *V1I128Ty = llvm::FixedVectorType::get( |
626 | ElementType: llvm::IntegerType::get(C&: getLLVMContext(), NumBits: 128), NumElts: 1); |
627 | Ops.push_back(Elt: Builder.CreateBitCast(V: Op0, DestTy: V1I128Ty)); |
628 | Ops.push_back(Elt: Builder.CreateBitCast(V: Op1, DestTy: V1I128Ty)); |
629 | Ops.push_back(Elt: Builder.CreateBitCast(V: Op2, DestTy: V1I128Ty)); |
630 | switch (BuiltinID) { |
631 | default: |
632 | llvm_unreachable("Unsupported intrinsic!" ); |
633 | case PPC::BI__builtin_altivec_vaddeuqm_c: |
634 | ID = Intrinsic::ppc_altivec_vaddeuqm; |
635 | break; |
636 | case PPC::BI__builtin_altivec_vaddecuq_c: |
637 | ID = Intrinsic::ppc_altivec_vaddecuq; |
638 | break; |
639 | case PPC::BI__builtin_altivec_vsubeuqm_c: |
640 | ID = Intrinsic::ppc_altivec_vsubeuqm; |
641 | break; |
642 | case PPC::BI__builtin_altivec_vsubecuq_c: |
643 | ID = Intrinsic::ppc_altivec_vsubecuq; |
644 | break; |
645 | } |
646 | return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: ID), Args: Ops, Name: "" ); |
647 | } |
648 | case PPC::BI__builtin_ppc_rldimi: |
649 | case PPC::BI__builtin_ppc_rlwimi: { |
650 | Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0)); |
651 | Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1)); |
652 | Value *Op2 = EmitScalarExpr(E: E->getArg(Arg: 2)); |
653 | Value *Op3 = EmitScalarExpr(E: E->getArg(Arg: 3)); |
654 | // rldimi is 64-bit instruction, expand the intrinsic before isel to |
655 | // leverage peephole and avoid legalization efforts. |
656 | if (BuiltinID == PPC::BI__builtin_ppc_rldimi && |
657 | !getTarget().getTriple().isPPC64()) { |
658 | Function *F = CGM.getIntrinsic(IID: Intrinsic::fshl, Tys: Op0->getType()); |
659 | Op2 = Builder.CreateZExt(V: Op2, DestTy: Int64Ty); |
660 | Value *Shift = Builder.CreateCall(Callee: F, Args: {Op0, Op0, Op2}); |
661 | return Builder.CreateOr(LHS: Builder.CreateAnd(LHS: Shift, RHS: Op3), |
662 | RHS: Builder.CreateAnd(LHS: Op1, RHS: Builder.CreateNot(V: Op3))); |
663 | } |
664 | return Builder.CreateCall( |
665 | Callee: CGM.getIntrinsic(IID: BuiltinID == PPC::BI__builtin_ppc_rldimi |
666 | ? Intrinsic::ppc_rldimi |
667 | : Intrinsic::ppc_rlwimi), |
668 | Args: {Op0, Op1, Op2, Op3}); |
669 | } |
670 | case PPC::BI__builtin_ppc_rlwnm: { |
671 | Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0)); |
672 | Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1)); |
673 | Value *Op2 = EmitScalarExpr(E: E->getArg(Arg: 2)); |
674 | return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::ppc_rlwnm), |
675 | Args: {Op0, Op1, Op2}); |
676 | } |
677 | case PPC::BI__builtin_ppc_poppar4: |
678 | case PPC::BI__builtin_ppc_poppar8: { |
679 | Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0)); |
680 | llvm::Type *ArgType = Op0->getType(); |
681 | Function *F = CGM.getIntrinsic(IID: Intrinsic::ctpop, Tys: ArgType); |
682 | Value *Tmp = Builder.CreateCall(Callee: F, Args: Op0); |
683 | |
684 | llvm::Type *ResultType = ConvertType(T: E->getType()); |
685 | Value *Result = Builder.CreateAnd(LHS: Tmp, RHS: llvm::ConstantInt::get(Ty: ArgType, V: 1)); |
686 | if (Result->getType() != ResultType) |
687 | Result = Builder.CreateIntCast(V: Result, DestTy: ResultType, /*isSigned*/true, |
688 | Name: "cast" ); |
689 | return Result; |
690 | } |
691 | case PPC::BI__builtin_ppc_cmpb: { |
692 | Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0)); |
693 | Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1)); |
694 | if (getTarget().getTriple().isPPC64()) { |
695 | Function *F = |
696 | CGM.getIntrinsic(IID: Intrinsic::ppc_cmpb, Tys: {Int64Ty, Int64Ty, Int64Ty}); |
697 | return Builder.CreateCall(Callee: F, Args: {Op0, Op1}, Name: "cmpb" ); |
698 | } |
699 | // For 32 bit, emit the code as below: |
700 | // %conv = trunc i64 %a to i32 |
701 | // %conv1 = trunc i64 %b to i32 |
702 | // %shr = lshr i64 %a, 32 |
703 | // %conv2 = trunc i64 %shr to i32 |
704 | // %shr3 = lshr i64 %b, 32 |
705 | // %conv4 = trunc i64 %shr3 to i32 |
706 | // %0 = tail call i32 @llvm.ppc.cmpb32(i32 %conv, i32 %conv1) |
707 | // %conv5 = zext i32 %0 to i64 |
708 | // %1 = tail call i32 @llvm.ppc.cmpb32(i32 %conv2, i32 %conv4) |
709 | // %conv614 = zext i32 %1 to i64 |
710 | // %shl = shl nuw i64 %conv614, 32 |
711 | // %or = or i64 %shl, %conv5 |
712 | // ret i64 %or |
713 | Function *F = |
714 | CGM.getIntrinsic(IID: Intrinsic::ppc_cmpb, Tys: {Int32Ty, Int32Ty, Int32Ty}); |
715 | Value *ArgOneLo = Builder.CreateTrunc(V: Op0, DestTy: Int32Ty); |
716 | Value *ArgTwoLo = Builder.CreateTrunc(V: Op1, DestTy: Int32Ty); |
717 | Constant *ShiftAmt = ConstantInt::get(Ty: Int64Ty, V: 32); |
718 | Value *ArgOneHi = |
719 | Builder.CreateTrunc(V: Builder.CreateLShr(LHS: Op0, RHS: ShiftAmt), DestTy: Int32Ty); |
720 | Value *ArgTwoHi = |
721 | Builder.CreateTrunc(V: Builder.CreateLShr(LHS: Op1, RHS: ShiftAmt), DestTy: Int32Ty); |
722 | Value *ResLo = Builder.CreateZExt( |
723 | V: Builder.CreateCall(Callee: F, Args: {ArgOneLo, ArgTwoLo}, Name: "cmpb" ), DestTy: Int64Ty); |
724 | Value *ResHiShift = Builder.CreateZExt( |
725 | V: Builder.CreateCall(Callee: F, Args: {ArgOneHi, ArgTwoHi}, Name: "cmpb" ), DestTy: Int64Ty); |
726 | Value *ResHi = Builder.CreateShl(LHS: ResHiShift, RHS: ShiftAmt); |
727 | return Builder.CreateOr(LHS: ResLo, RHS: ResHi); |
728 | } |
729 | // Copy sign |
730 | case PPC::BI__builtin_vsx_xvcpsgnsp: |
731 | case PPC::BI__builtin_vsx_xvcpsgndp: { |
732 | llvm::Type *ResultType = ConvertType(T: E->getType()); |
733 | Value *X = EmitScalarExpr(E: E->getArg(Arg: 0)); |
734 | Value *Y = EmitScalarExpr(E: E->getArg(Arg: 1)); |
735 | ID = Intrinsic::copysign; |
736 | llvm::Function *F = CGM.getIntrinsic(IID: ID, Tys: ResultType); |
737 | return Builder.CreateCall(Callee: F, Args: {X, Y}); |
738 | } |
739 | // Rounding/truncation |
740 | case PPC::BI__builtin_vsx_xvrspip: |
741 | case PPC::BI__builtin_vsx_xvrdpip: |
742 | case PPC::BI__builtin_vsx_xvrdpim: |
743 | case PPC::BI__builtin_vsx_xvrspim: |
744 | case PPC::BI__builtin_vsx_xvrdpi: |
745 | case PPC::BI__builtin_vsx_xvrspi: |
746 | case PPC::BI__builtin_vsx_xvrdpic: |
747 | case PPC::BI__builtin_vsx_xvrspic: |
748 | case PPC::BI__builtin_vsx_xvrdpiz: |
749 | case PPC::BI__builtin_vsx_xvrspiz: { |
750 | llvm::Type *ResultType = ConvertType(T: E->getType()); |
751 | Value *X = EmitScalarExpr(E: E->getArg(Arg: 0)); |
752 | if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim || |
753 | BuiltinID == PPC::BI__builtin_vsx_xvrspim) |
754 | ID = Builder.getIsFPConstrained() |
755 | ? Intrinsic::experimental_constrained_floor |
756 | : Intrinsic::floor; |
757 | else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi || |
758 | BuiltinID == PPC::BI__builtin_vsx_xvrspi) |
759 | ID = Builder.getIsFPConstrained() |
760 | ? Intrinsic::experimental_constrained_round |
761 | : Intrinsic::round; |
762 | else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic || |
763 | BuiltinID == PPC::BI__builtin_vsx_xvrspic) |
764 | ID = Builder.getIsFPConstrained() |
765 | ? Intrinsic::experimental_constrained_rint |
766 | : Intrinsic::rint; |
767 | else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip || |
768 | BuiltinID == PPC::BI__builtin_vsx_xvrspip) |
769 | ID = Builder.getIsFPConstrained() |
770 | ? Intrinsic::experimental_constrained_ceil |
771 | : Intrinsic::ceil; |
772 | else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz || |
773 | BuiltinID == PPC::BI__builtin_vsx_xvrspiz) |
774 | ID = Builder.getIsFPConstrained() |
775 | ? Intrinsic::experimental_constrained_trunc |
776 | : Intrinsic::trunc; |
777 | llvm::Function *F = CGM.getIntrinsic(IID: ID, Tys: ResultType); |
778 | return Builder.getIsFPConstrained() ? Builder.CreateConstrainedFPCall(Callee: F, Args: X) |
779 | : Builder.CreateCall(Callee: F, Args: X); |
780 | } |
781 | |
782 | // Absolute value |
783 | case PPC::BI__builtin_vsx_xvabsdp: |
784 | case PPC::BI__builtin_vsx_xvabssp: { |
785 | llvm::Type *ResultType = ConvertType(T: E->getType()); |
786 | Value *X = EmitScalarExpr(E: E->getArg(Arg: 0)); |
787 | llvm::Function *F = CGM.getIntrinsic(IID: Intrinsic::fabs, Tys: ResultType); |
788 | return Builder.CreateCall(Callee: F, Args: X); |
789 | } |
790 | |
791 | // Fastmath by default |
792 | case PPC::BI__builtin_ppc_recipdivf: |
793 | case PPC::BI__builtin_ppc_recipdivd: |
794 | case PPC::BI__builtin_ppc_rsqrtf: |
795 | case PPC::BI__builtin_ppc_rsqrtd: { |
796 | FastMathFlags FMF = Builder.getFastMathFlags(); |
797 | Builder.getFastMathFlags().setFast(); |
798 | llvm::Type *ResultType = ConvertType(T: E->getType()); |
799 | Value *X = EmitScalarExpr(E: E->getArg(Arg: 0)); |
800 | |
801 | if (BuiltinID == PPC::BI__builtin_ppc_recipdivf || |
802 | BuiltinID == PPC::BI__builtin_ppc_recipdivd) { |
803 | Value *Y = EmitScalarExpr(E: E->getArg(Arg: 1)); |
804 | Value *FDiv = Builder.CreateFDiv(L: X, R: Y, Name: "recipdiv" ); |
805 | Builder.getFastMathFlags() &= (FMF); |
806 | return FDiv; |
807 | } |
808 | auto *One = ConstantFP::get(Ty: ResultType, V: 1.0); |
809 | llvm::Function *F = CGM.getIntrinsic(IID: Intrinsic::sqrt, Tys: ResultType); |
810 | Value *FDiv = Builder.CreateFDiv(L: One, R: Builder.CreateCall(Callee: F, Args: X), Name: "rsqrt" ); |
811 | Builder.getFastMathFlags() &= (FMF); |
812 | return FDiv; |
813 | } |
814 | case PPC::BI__builtin_ppc_alignx: { |
815 | Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0)); |
816 | Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1)); |
817 | ConstantInt *AlignmentCI = cast<ConstantInt>(Val: Op0); |
818 | if (AlignmentCI->getValue().ugt(RHS: llvm::Value::MaximumAlignment)) |
819 | AlignmentCI = ConstantInt::get(Ty: AlignmentCI->getIntegerType(), |
820 | V: llvm::Value::MaximumAlignment); |
821 | |
822 | emitAlignmentAssumption(PtrValue: Op1, E: E->getArg(Arg: 1), |
823 | /*The expr loc is sufficient.*/ AssumptionLoc: SourceLocation(), |
824 | Alignment: AlignmentCI, OffsetValue: nullptr); |
825 | return Op1; |
826 | } |
827 | case PPC::BI__builtin_ppc_rdlam: { |
828 | Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0)); |
829 | Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1)); |
830 | Value *Op2 = EmitScalarExpr(E: E->getArg(Arg: 2)); |
831 | llvm::Type *Ty = Op0->getType(); |
832 | Value *ShiftAmt = Builder.CreateIntCast(V: Op1, DestTy: Ty, isSigned: false); |
833 | Function *F = CGM.getIntrinsic(IID: Intrinsic::fshl, Tys: Ty); |
834 | Value *Rotate = Builder.CreateCall(Callee: F, Args: {Op0, Op0, ShiftAmt}); |
835 | return Builder.CreateAnd(LHS: Rotate, RHS: Op2); |
836 | } |
837 | case PPC::BI__builtin_ppc_load2r: { |
838 | Function *F = CGM.getIntrinsic(IID: Intrinsic::ppc_load2r); |
839 | Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0)); |
840 | Value *LoadIntrinsic = Builder.CreateCall(Callee: F, Args: {Op0}); |
841 | return Builder.CreateTrunc(V: LoadIntrinsic, DestTy: Int16Ty); |
842 | } |
843 | // FMA variations |
844 | case PPC::BI__builtin_ppc_fnmsub: |
845 | case PPC::BI__builtin_ppc_fnmsubs: |
846 | case PPC::BI__builtin_vsx_xvmaddadp: |
847 | case PPC::BI__builtin_vsx_xvmaddasp: |
848 | case PPC::BI__builtin_vsx_xvnmaddadp: |
849 | case PPC::BI__builtin_vsx_xvnmaddasp: |
850 | case PPC::BI__builtin_vsx_xvmsubadp: |
851 | case PPC::BI__builtin_vsx_xvmsubasp: |
852 | case PPC::BI__builtin_vsx_xvnmsubadp: |
853 | case PPC::BI__builtin_vsx_xvnmsubasp: { |
854 | llvm::Type *ResultType = ConvertType(T: E->getType()); |
855 | Value *X = EmitScalarExpr(E: E->getArg(Arg: 0)); |
856 | Value *Y = EmitScalarExpr(E: E->getArg(Arg: 1)); |
857 | Value *Z = EmitScalarExpr(E: E->getArg(Arg: 2)); |
858 | llvm::Function *F; |
859 | if (Builder.getIsFPConstrained()) |
860 | F = CGM.getIntrinsic(IID: Intrinsic::experimental_constrained_fma, Tys: ResultType); |
861 | else |
862 | F = CGM.getIntrinsic(IID: Intrinsic::fma, Tys: ResultType); |
863 | switch (BuiltinID) { |
864 | case PPC::BI__builtin_vsx_xvmaddadp: |
865 | case PPC::BI__builtin_vsx_xvmaddasp: |
866 | if (Builder.getIsFPConstrained()) |
867 | return Builder.CreateConstrainedFPCall(Callee: F, Args: {X, Y, Z}); |
868 | else |
869 | return Builder.CreateCall(Callee: F, Args: {X, Y, Z}); |
870 | case PPC::BI__builtin_vsx_xvnmaddadp: |
871 | case PPC::BI__builtin_vsx_xvnmaddasp: |
872 | if (Builder.getIsFPConstrained()) |
873 | return Builder.CreateFNeg( |
874 | V: Builder.CreateConstrainedFPCall(Callee: F, Args: {X, Y, Z}), Name: "neg" ); |
875 | else |
876 | return Builder.CreateFNeg(V: Builder.CreateCall(Callee: F, Args: {X, Y, Z}), Name: "neg" ); |
877 | case PPC::BI__builtin_vsx_xvmsubadp: |
878 | case PPC::BI__builtin_vsx_xvmsubasp: |
879 | if (Builder.getIsFPConstrained()) |
880 | return Builder.CreateConstrainedFPCall( |
881 | Callee: F, Args: {X, Y, Builder.CreateFNeg(V: Z, Name: "neg" )}); |
882 | else |
883 | return Builder.CreateCall(Callee: F, Args: {X, Y, Builder.CreateFNeg(V: Z, Name: "neg" )}); |
884 | case PPC::BI__builtin_ppc_fnmsub: |
885 | case PPC::BI__builtin_ppc_fnmsubs: |
886 | case PPC::BI__builtin_vsx_xvnmsubadp: |
887 | case PPC::BI__builtin_vsx_xvnmsubasp: |
888 | if (Builder.getIsFPConstrained()) |
889 | return Builder.CreateFNeg( |
890 | V: Builder.CreateConstrainedFPCall( |
891 | Callee: F, Args: {X, Y, Builder.CreateFNeg(V: Z, Name: "neg" )}), |
892 | Name: "neg" ); |
893 | else |
894 | return Builder.CreateCall( |
895 | Callee: CGM.getIntrinsic(IID: Intrinsic::ppc_fnmsub, Tys: ResultType), Args: {X, Y, Z}); |
896 | } |
897 | llvm_unreachable("Unknown FMA operation" ); |
898 | return nullptr; // Suppress no-return warning |
899 | } |
900 | |
901 | case PPC::BI__builtin_vsx_insertword: { |
902 | Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0)); |
903 | Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1)); |
904 | Value *Op2 = EmitScalarExpr(E: E->getArg(Arg: 2)); |
905 | llvm::Function *F = CGM.getIntrinsic(IID: Intrinsic::ppc_vsx_xxinsertw); |
906 | |
907 | // Third argument is a compile time constant int. It must be clamped to |
908 | // to the range [0, 12]. |
909 | ConstantInt *ArgCI = dyn_cast<ConstantInt>(Val: Op2); |
910 | assert(ArgCI && |
911 | "Third arg to xxinsertw intrinsic must be constant integer" ); |
912 | const int64_t MaxIndex = 12; |
913 | int64_t Index = std::clamp(val: ArgCI->getSExtValue(), lo: (int64_t)0, hi: MaxIndex); |
914 | |
915 | // The builtin semantics don't exactly match the xxinsertw instructions |
916 | // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the |
917 | // word from the first argument, and inserts it in the second argument. The |
918 | // instruction extracts the word from its second input register and inserts |
919 | // it into its first input register, so swap the first and second arguments. |
920 | std::swap(a&: Op0, b&: Op1); |
921 | |
922 | // Need to cast the second argument from a vector of unsigned int to a |
923 | // vector of long long. |
924 | Op1 = Builder.CreateBitCast(V: Op1, DestTy: llvm::FixedVectorType::get(ElementType: Int64Ty, NumElts: 2)); |
925 | |
926 | if (getTarget().isLittleEndian()) { |
927 | // Reverse the double words in the vector we will extract from. |
928 | Op0 = Builder.CreateBitCast(V: Op0, DestTy: llvm::FixedVectorType::get(ElementType: Int64Ty, NumElts: 2)); |
929 | Op0 = Builder.CreateShuffleVector(V1: Op0, V2: Op0, Mask: {1, 0}); |
930 | |
931 | // Reverse the index. |
932 | Index = MaxIndex - Index; |
933 | } |
934 | |
935 | // Intrinsic expects the first arg to be a vector of int. |
936 | Op0 = Builder.CreateBitCast(V: Op0, DestTy: llvm::FixedVectorType::get(ElementType: Int32Ty, NumElts: 4)); |
937 | Op2 = ConstantInt::getSigned(Ty: Int32Ty, V: Index); |
938 | return Builder.CreateCall(Callee: F, Args: {Op0, Op1, Op2}); |
939 | } |
940 | |
941 | case PPC::BI__builtin_vsx_extractuword: { |
942 | Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0)); |
943 | Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1)); |
944 | llvm::Function *F = CGM.getIntrinsic(IID: Intrinsic::ppc_vsx_xxextractuw); |
945 | |
946 | // Intrinsic expects the first argument to be a vector of doublewords. |
947 | Op0 = Builder.CreateBitCast(V: Op0, DestTy: llvm::FixedVectorType::get(ElementType: Int64Ty, NumElts: 2)); |
948 | |
949 | // The second argument is a compile time constant int that needs to |
950 | // be clamped to the range [0, 12]. |
951 | ConstantInt *ArgCI = dyn_cast<ConstantInt>(Val: Op1); |
952 | assert(ArgCI && |
953 | "Second Arg to xxextractuw intrinsic must be a constant integer!" ); |
954 | const int64_t MaxIndex = 12; |
955 | int64_t Index = std::clamp(val: ArgCI->getSExtValue(), lo: (int64_t)0, hi: MaxIndex); |
956 | |
957 | if (getTarget().isLittleEndian()) { |
958 | // Reverse the index. |
959 | Index = MaxIndex - Index; |
960 | Op1 = ConstantInt::getSigned(Ty: Int32Ty, V: Index); |
961 | |
962 | // Emit the call, then reverse the double words of the results vector. |
963 | Value *Call = Builder.CreateCall(Callee: F, Args: {Op0, Op1}); |
964 | |
965 | Value *ShuffleCall = |
966 | Builder.CreateShuffleVector(V1: Call, V2: Call, Mask: {1, 0}); |
967 | return ShuffleCall; |
968 | } else { |
969 | Op1 = ConstantInt::getSigned(Ty: Int32Ty, V: Index); |
970 | return Builder.CreateCall(Callee: F, Args: {Op0, Op1}); |
971 | } |
972 | } |
973 | |
974 | case PPC::BI__builtin_vsx_xxpermdi: { |
975 | Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0)); |
976 | Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1)); |
977 | Value *Op2 = EmitScalarExpr(E: E->getArg(Arg: 2)); |
978 | ConstantInt *ArgCI = dyn_cast<ConstantInt>(Val: Op2); |
979 | assert(ArgCI && "Third arg must be constant integer!" ); |
980 | |
981 | unsigned Index = ArgCI->getZExtValue(); |
982 | Op0 = Builder.CreateBitCast(V: Op0, DestTy: llvm::FixedVectorType::get(ElementType: Int64Ty, NumElts: 2)); |
983 | Op1 = Builder.CreateBitCast(V: Op1, DestTy: llvm::FixedVectorType::get(ElementType: Int64Ty, NumElts: 2)); |
984 | |
985 | // Account for endianness by treating this as just a shuffle. So we use the |
986 | // same indices for both LE and BE in order to produce expected results in |
987 | // both cases. |
988 | int ElemIdx0 = (Index & 2) >> 1; |
989 | int ElemIdx1 = 2 + (Index & 1); |
990 | |
991 | int ShuffleElts[2] = {ElemIdx0, ElemIdx1}; |
992 | Value *ShuffleCall = Builder.CreateShuffleVector(V1: Op0, V2: Op1, Mask: ShuffleElts); |
993 | QualType BIRetType = E->getType(); |
994 | auto RetTy = ConvertType(T: BIRetType); |
995 | return Builder.CreateBitCast(V: ShuffleCall, DestTy: RetTy); |
996 | } |
997 | |
998 | case PPC::BI__builtin_vsx_xxsldwi: { |
999 | Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0)); |
1000 | Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1)); |
1001 | Value *Op2 = EmitScalarExpr(E: E->getArg(Arg: 2)); |
1002 | ConstantInt *ArgCI = dyn_cast<ConstantInt>(Val: Op2); |
1003 | assert(ArgCI && "Third argument must be a compile time constant" ); |
1004 | unsigned Index = ArgCI->getZExtValue() & 0x3; |
1005 | Op0 = Builder.CreateBitCast(V: Op0, DestTy: llvm::FixedVectorType::get(ElementType: Int32Ty, NumElts: 4)); |
1006 | Op1 = Builder.CreateBitCast(V: Op1, DestTy: llvm::FixedVectorType::get(ElementType: Int32Ty, NumElts: 4)); |
1007 | |
1008 | // Create a shuffle mask |
1009 | int ElemIdx0; |
1010 | int ElemIdx1; |
1011 | int ElemIdx2; |
1012 | int ElemIdx3; |
1013 | if (getTarget().isLittleEndian()) { |
1014 | // Little endian element N comes from element 8+N-Index of the |
1015 | // concatenated wide vector (of course, using modulo arithmetic on |
1016 | // the total number of elements). |
1017 | ElemIdx0 = (8 - Index) % 8; |
1018 | ElemIdx1 = (9 - Index) % 8; |
1019 | ElemIdx2 = (10 - Index) % 8; |
1020 | ElemIdx3 = (11 - Index) % 8; |
1021 | } else { |
1022 | // Big endian ElemIdx<N> = Index + N |
1023 | ElemIdx0 = Index; |
1024 | ElemIdx1 = Index + 1; |
1025 | ElemIdx2 = Index + 2; |
1026 | ElemIdx3 = Index + 3; |
1027 | } |
1028 | |
1029 | int ShuffleElts[4] = {ElemIdx0, ElemIdx1, ElemIdx2, ElemIdx3}; |
1030 | Value *ShuffleCall = Builder.CreateShuffleVector(V1: Op0, V2: Op1, Mask: ShuffleElts); |
1031 | QualType BIRetType = E->getType(); |
1032 | auto RetTy = ConvertType(T: BIRetType); |
1033 | return Builder.CreateBitCast(V: ShuffleCall, DestTy: RetTy); |
1034 | } |
1035 | |
1036 | case PPC::BI__builtin_pack_vector_int128: { |
1037 | Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0)); |
1038 | Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1)); |
1039 | bool isLittleEndian = getTarget().isLittleEndian(); |
1040 | Value *PoisonValue = |
1041 | llvm::PoisonValue::get(T: llvm::FixedVectorType::get(ElementType: Op0->getType(), NumElts: 2)); |
1042 | Value *Res = Builder.CreateInsertElement( |
1043 | Vec: PoisonValue, NewElt: Op0, Idx: (uint64_t)(isLittleEndian ? 1 : 0)); |
1044 | Res = Builder.CreateInsertElement(Vec: Res, NewElt: Op1, |
1045 | Idx: (uint64_t)(isLittleEndian ? 0 : 1)); |
1046 | return Builder.CreateBitCast(V: Res, DestTy: ConvertType(T: E->getType())); |
1047 | } |
1048 | |
1049 | case PPC::BI__builtin_unpack_vector_int128: { |
1050 | Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0)); |
1051 | Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1)); |
1052 | ConstantInt *Index = cast<ConstantInt>(Val: Op1); |
1053 | Value *Unpacked = Builder.CreateBitCast( |
1054 | V: Op0, DestTy: llvm::FixedVectorType::get(ElementType: ConvertType(T: E->getType()), NumElts: 2)); |
1055 | |
1056 | if (getTarget().isLittleEndian()) |
1057 | Index = |
1058 | ConstantInt::get(Ty: Index->getIntegerType(), V: 1 - Index->getZExtValue()); |
1059 | |
1060 | return Builder.CreateExtractElement(Vec: Unpacked, Idx: Index); |
1061 | } |
1062 | |
1063 | case PPC::BI__builtin_ppc_sthcx: { |
1064 | llvm::Function *F = CGM.getIntrinsic(IID: Intrinsic::ppc_sthcx); |
1065 | Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0)); |
1066 | Value *Op1 = Builder.CreateSExt(V: EmitScalarExpr(E: E->getArg(Arg: 1)), DestTy: Int32Ty); |
1067 | return Builder.CreateCall(Callee: F, Args: {Op0, Op1}); |
1068 | } |
1069 | |
1070 | // The PPC MMA builtins take a pointer to a __vector_quad as an argument. |
1071 | // Some of the MMA instructions accumulate their result into an existing |
1072 | // accumulator whereas the others generate a new accumulator. So we need to |
1073 | // use custom code generation to expand a builtin call with a pointer to a |
1074 | // load (if the corresponding instruction accumulates its result) followed by |
1075 | // the call to the intrinsic and a store of the result. |
1076 | #define CUSTOM_BUILTIN(Name, Intr, Types, Accumulate, Feature) \ |
1077 | case PPC::BI__builtin_##Name: |
1078 | #include "clang/Basic/BuiltinsPPC.def" |
1079 | { |
1080 | SmallVector<Value *, 4> Ops; |
1081 | for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) |
1082 | if (E->getArg(Arg: i)->getType()->isArrayType()) |
1083 | Ops.push_back( |
1084 | Elt: EmitArrayToPointerDecay(Array: E->getArg(Arg: i)).emitRawPointer(CGF&: *this)); |
1085 | else |
1086 | Ops.push_back(Elt: EmitScalarExpr(E: E->getArg(Arg: i))); |
1087 | // The first argument of these two builtins is a pointer used to store their |
1088 | // result. However, the llvm intrinsics return their result in multiple |
1089 | // return values. So, here we emit code extracting these values from the |
1090 | // intrinsic results and storing them using that pointer. |
1091 | if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc || |
1092 | BuiltinID == PPC::BI__builtin_vsx_disassemble_pair || |
1093 | BuiltinID == PPC::BI__builtin_mma_disassemble_pair) { |
1094 | unsigned NumVecs = 2; |
1095 | auto Intrinsic = Intrinsic::ppc_vsx_disassemble_pair; |
1096 | if (BuiltinID == PPC::BI__builtin_mma_disassemble_acc) { |
1097 | NumVecs = 4; |
1098 | Intrinsic = Intrinsic::ppc_mma_disassemble_acc; |
1099 | } |
1100 | llvm::Function *F = CGM.getIntrinsic(IID: Intrinsic); |
1101 | Address Addr = EmitPointerWithAlignment(Addr: E->getArg(Arg: 1)); |
1102 | Value *Vec = Builder.CreateLoad(Addr); |
1103 | Value *Call = Builder.CreateCall(Callee: F, Args: {Vec}); |
1104 | llvm::Type *VTy = llvm::FixedVectorType::get(ElementType: Int8Ty, NumElts: 16); |
1105 | Value *Ptr = Ops[0]; |
1106 | for (unsigned i=0; i<NumVecs; i++) { |
1107 | Value *Vec = Builder.CreateExtractValue(Agg: Call, Idxs: i); |
1108 | llvm::ConstantInt* Index = llvm::ConstantInt::get(Ty: IntTy, V: i); |
1109 | Value *GEP = Builder.CreateInBoundsGEP(Ty: VTy, Ptr, IdxList: Index); |
1110 | Builder.CreateAlignedStore(Val: Vec, Ptr: GEP, Align: MaybeAlign(16)); |
1111 | } |
1112 | return Call; |
1113 | } |
1114 | if (BuiltinID == PPC::BI__builtin_vsx_build_pair || |
1115 | BuiltinID == PPC::BI__builtin_mma_build_acc) { |
1116 | // Reverse the order of the operands for LE, so the |
1117 | // same builtin call can be used on both LE and BE |
1118 | // without the need for the programmer to swap operands. |
1119 | // The operands are reversed starting from the second argument, |
1120 | // the first operand is the pointer to the pair/accumulator |
1121 | // that is being built. |
1122 | if (getTarget().isLittleEndian()) |
1123 | std::reverse(first: Ops.begin() + 1, last: Ops.end()); |
1124 | } |
1125 | bool Accumulate; |
1126 | switch (BuiltinID) { |
1127 | #define CUSTOM_BUILTIN(Name, Intr, Types, Acc, Feature) \ |
1128 | case PPC::BI__builtin_##Name: \ |
1129 | ID = Intrinsic::ppc_##Intr; \ |
1130 | Accumulate = Acc; \ |
1131 | break; |
1132 | #include "clang/Basic/BuiltinsPPC.def" |
1133 | } |
1134 | if (BuiltinID == PPC::BI__builtin_vsx_lxvp || |
1135 | BuiltinID == PPC::BI__builtin_vsx_stxvp || |
1136 | BuiltinID == PPC::BI__builtin_mma_lxvp || |
1137 | BuiltinID == PPC::BI__builtin_mma_stxvp) { |
1138 | if (BuiltinID == PPC::BI__builtin_vsx_lxvp || |
1139 | BuiltinID == PPC::BI__builtin_mma_lxvp) { |
1140 | Ops[0] = Builder.CreateGEP(Ty: Int8Ty, Ptr: Ops[1], IdxList: Ops[0]); |
1141 | } else { |
1142 | Ops[1] = Builder.CreateGEP(Ty: Int8Ty, Ptr: Ops[2], IdxList: Ops[1]); |
1143 | } |
1144 | Ops.pop_back(); |
1145 | llvm::Function *F = CGM.getIntrinsic(IID: ID); |
1146 | return Builder.CreateCall(Callee: F, Args: Ops, Name: "" ); |
1147 | } |
1148 | SmallVector<Value*, 4> CallOps; |
1149 | if (Accumulate) { |
1150 | Address Addr = EmitPointerWithAlignment(Addr: E->getArg(Arg: 0)); |
1151 | Value *Acc = Builder.CreateLoad(Addr); |
1152 | CallOps.push_back(Elt: Acc); |
1153 | } |
1154 | for (unsigned i=1; i<Ops.size(); i++) |
1155 | CallOps.push_back(Elt: Ops[i]); |
1156 | llvm::Function *F = CGM.getIntrinsic(IID: ID); |
1157 | Value *Call = Builder.CreateCall(Callee: F, Args: CallOps); |
1158 | return Builder.CreateAlignedStore(Val: Call, Ptr: Ops[0], Align: MaybeAlign()); |
1159 | } |
1160 | |
1161 | case PPC::BI__builtin_ppc_compare_and_swap: |
1162 | case PPC::BI__builtin_ppc_compare_and_swaplp: { |
1163 | Address Addr = EmitPointerWithAlignment(Addr: E->getArg(Arg: 0)); |
1164 | Address OldValAddr = EmitPointerWithAlignment(Addr: E->getArg(Arg: 1)); |
1165 | Value *OldVal = Builder.CreateLoad(Addr: OldValAddr); |
1166 | QualType AtomicTy = E->getArg(Arg: 0)->getType()->getPointeeType(); |
1167 | LValue LV = MakeAddrLValue(Addr, T: AtomicTy); |
1168 | Value *Op2 = EmitScalarExpr(E: E->getArg(Arg: 2)); |
1169 | auto Pair = EmitAtomicCompareExchange( |
1170 | Obj: LV, Expected: RValue::get(V: OldVal), Desired: RValue::get(V: Op2), Loc: E->getExprLoc(), |
1171 | Success: llvm::AtomicOrdering::Monotonic, Failure: llvm::AtomicOrdering::Monotonic, IsWeak: true); |
1172 | // Unlike c11's atomic_compare_exchange, according to |
1173 | // https://www.ibm.com/docs/en/xl-c-and-cpp-aix/16.1?topic=functions-compare-swap-compare-swaplp |
1174 | // > In either case, the contents of the memory location specified by addr |
1175 | // > are copied into the memory location specified by old_val_addr. |
1176 | // But it hasn't specified storing to OldValAddr is atomic or not and |
1177 | // which order to use. Now following XL's codegen, treat it as a normal |
1178 | // store. |
1179 | Value *LoadedVal = Pair.first.getScalarVal(); |
1180 | Builder.CreateStore(Val: LoadedVal, Addr: OldValAddr); |
1181 | return Builder.CreateZExt(V: Pair.second, DestTy: Builder.getInt32Ty()); |
1182 | } |
1183 | case PPC::BI__builtin_ppc_fetch_and_add: |
1184 | case PPC::BI__builtin_ppc_fetch_and_addlp: { |
1185 | return MakeBinaryAtomicValue(CGF&: *this, Kind: AtomicRMWInst::Add, E, |
1186 | Ordering: llvm::AtomicOrdering::Monotonic); |
1187 | } |
1188 | case PPC::BI__builtin_ppc_fetch_and_and: |
1189 | case PPC::BI__builtin_ppc_fetch_and_andlp: { |
1190 | return MakeBinaryAtomicValue(CGF&: *this, Kind: AtomicRMWInst::And, E, |
1191 | Ordering: llvm::AtomicOrdering::Monotonic); |
1192 | } |
1193 | |
1194 | case PPC::BI__builtin_ppc_fetch_and_or: |
1195 | case PPC::BI__builtin_ppc_fetch_and_orlp: { |
1196 | return MakeBinaryAtomicValue(CGF&: *this, Kind: AtomicRMWInst::Or, E, |
1197 | Ordering: llvm::AtomicOrdering::Monotonic); |
1198 | } |
1199 | case PPC::BI__builtin_ppc_fetch_and_swap: |
1200 | case PPC::BI__builtin_ppc_fetch_and_swaplp: { |
1201 | return MakeBinaryAtomicValue(CGF&: *this, Kind: AtomicRMWInst::Xchg, E, |
1202 | Ordering: llvm::AtomicOrdering::Monotonic); |
1203 | } |
1204 | case PPC::BI__builtin_ppc_ldarx: |
1205 | case PPC::BI__builtin_ppc_lwarx: |
1206 | case PPC::BI__builtin_ppc_lharx: |
1207 | case PPC::BI__builtin_ppc_lbarx: |
1208 | return emitPPCLoadReserveIntrinsic(CGF&: *this, BuiltinID, E); |
1209 | case PPC::BI__builtin_ppc_mfspr: { |
1210 | Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0)); |
1211 | llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(Ty: VoidPtrTy) == 32 |
1212 | ? Int32Ty |
1213 | : Int64Ty; |
1214 | Function *F = CGM.getIntrinsic(IID: Intrinsic::ppc_mfspr, Tys: RetType); |
1215 | return Builder.CreateCall(Callee: F, Args: {Op0}); |
1216 | } |
1217 | case PPC::BI__builtin_ppc_mtspr: { |
1218 | Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0)); |
1219 | Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1)); |
1220 | llvm::Type *RetType = CGM.getDataLayout().getTypeSizeInBits(Ty: VoidPtrTy) == 32 |
1221 | ? Int32Ty |
1222 | : Int64Ty; |
1223 | Function *F = CGM.getIntrinsic(IID: Intrinsic::ppc_mtspr, Tys: RetType); |
1224 | return Builder.CreateCall(Callee: F, Args: {Op0, Op1}); |
1225 | } |
1226 | case PPC::BI__builtin_ppc_popcntb: { |
1227 | Value *ArgValue = EmitScalarExpr(E: E->getArg(Arg: 0)); |
1228 | llvm::Type *ArgType = ArgValue->getType(); |
1229 | Function *F = CGM.getIntrinsic(IID: Intrinsic::ppc_popcntb, Tys: {ArgType, ArgType}); |
1230 | return Builder.CreateCall(Callee: F, Args: {ArgValue}, Name: "popcntb" ); |
1231 | } |
1232 | case PPC::BI__builtin_ppc_mtfsf: { |
1233 | // The builtin takes a uint32 that needs to be cast to an |
1234 | // f64 to be passed to the intrinsic. |
1235 | Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0)); |
1236 | Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1)); |
1237 | Value *Cast = Builder.CreateUIToFP(V: Op1, DestTy: DoubleTy); |
1238 | llvm::Function *F = CGM.getIntrinsic(IID: Intrinsic::ppc_mtfsf); |
1239 | return Builder.CreateCall(Callee: F, Args: {Op0, Cast}, Name: "" ); |
1240 | } |
1241 | |
1242 | case PPC::BI__builtin_ppc_swdiv_nochk: |
1243 | case PPC::BI__builtin_ppc_swdivs_nochk: { |
1244 | Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0)); |
1245 | Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1)); |
1246 | FastMathFlags FMF = Builder.getFastMathFlags(); |
1247 | Builder.getFastMathFlags().setFast(); |
1248 | Value *FDiv = Builder.CreateFDiv(L: Op0, R: Op1, Name: "swdiv_nochk" ); |
1249 | Builder.getFastMathFlags() &= (FMF); |
1250 | return FDiv; |
1251 | } |
1252 | case PPC::BI__builtin_ppc_fric: |
1253 | return RValue::get(V: emitUnaryMaybeConstrainedFPBuiltin( |
1254 | CGF&: *this, E, IntrinsicID: Intrinsic::rint, |
1255 | ConstrainedIntrinsicID: Intrinsic::experimental_constrained_rint)) |
1256 | .getScalarVal(); |
1257 | case PPC::BI__builtin_ppc_frim: |
1258 | case PPC::BI__builtin_ppc_frims: |
1259 | return RValue::get(V: emitUnaryMaybeConstrainedFPBuiltin( |
1260 | CGF&: *this, E, IntrinsicID: Intrinsic::floor, |
1261 | ConstrainedIntrinsicID: Intrinsic::experimental_constrained_floor)) |
1262 | .getScalarVal(); |
1263 | case PPC::BI__builtin_ppc_frin: |
1264 | case PPC::BI__builtin_ppc_frins: |
1265 | return RValue::get(V: emitUnaryMaybeConstrainedFPBuiltin( |
1266 | CGF&: *this, E, IntrinsicID: Intrinsic::round, |
1267 | ConstrainedIntrinsicID: Intrinsic::experimental_constrained_round)) |
1268 | .getScalarVal(); |
1269 | case PPC::BI__builtin_ppc_frip: |
1270 | case PPC::BI__builtin_ppc_frips: |
1271 | return RValue::get(V: emitUnaryMaybeConstrainedFPBuiltin( |
1272 | CGF&: *this, E, IntrinsicID: Intrinsic::ceil, |
1273 | ConstrainedIntrinsicID: Intrinsic::experimental_constrained_ceil)) |
1274 | .getScalarVal(); |
1275 | case PPC::BI__builtin_ppc_friz: |
1276 | case PPC::BI__builtin_ppc_frizs: |
1277 | return RValue::get(V: emitUnaryMaybeConstrainedFPBuiltin( |
1278 | CGF&: *this, E, IntrinsicID: Intrinsic::trunc, |
1279 | ConstrainedIntrinsicID: Intrinsic::experimental_constrained_trunc)) |
1280 | .getScalarVal(); |
1281 | case PPC::BI__builtin_ppc_fsqrt: |
1282 | case PPC::BI__builtin_ppc_fsqrts: |
1283 | return RValue::get(V: emitUnaryMaybeConstrainedFPBuiltin( |
1284 | CGF&: *this, E, IntrinsicID: Intrinsic::sqrt, |
1285 | ConstrainedIntrinsicID: Intrinsic::experimental_constrained_sqrt)) |
1286 | .getScalarVal(); |
1287 | case PPC::BI__builtin_ppc_test_data_class: { |
1288 | Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0)); |
1289 | Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1)); |
1290 | return Builder.CreateCall( |
1291 | Callee: CGM.getIntrinsic(IID: Intrinsic::ppc_test_data_class, Tys: Op0->getType()), |
1292 | Args: {Op0, Op1}, Name: "test_data_class" ); |
1293 | } |
1294 | case PPC::BI__builtin_ppc_maxfe: { |
1295 | Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0)); |
1296 | Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1)); |
1297 | Value *Op2 = EmitScalarExpr(E: E->getArg(Arg: 2)); |
1298 | Value *Op3 = EmitScalarExpr(E: E->getArg(Arg: 3)); |
1299 | return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::ppc_maxfe), |
1300 | Args: {Op0, Op1, Op2, Op3}); |
1301 | } |
1302 | case PPC::BI__builtin_ppc_maxfl: { |
1303 | Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0)); |
1304 | Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1)); |
1305 | Value *Op2 = EmitScalarExpr(E: E->getArg(Arg: 2)); |
1306 | Value *Op3 = EmitScalarExpr(E: E->getArg(Arg: 3)); |
1307 | return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::ppc_maxfl), |
1308 | Args: {Op0, Op1, Op2, Op3}); |
1309 | } |
1310 | case PPC::BI__builtin_ppc_maxfs: { |
1311 | Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0)); |
1312 | Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1)); |
1313 | Value *Op2 = EmitScalarExpr(E: E->getArg(Arg: 2)); |
1314 | Value *Op3 = EmitScalarExpr(E: E->getArg(Arg: 3)); |
1315 | return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::ppc_maxfs), |
1316 | Args: {Op0, Op1, Op2, Op3}); |
1317 | } |
1318 | case PPC::BI__builtin_ppc_minfe: { |
1319 | Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0)); |
1320 | Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1)); |
1321 | Value *Op2 = EmitScalarExpr(E: E->getArg(Arg: 2)); |
1322 | Value *Op3 = EmitScalarExpr(E: E->getArg(Arg: 3)); |
1323 | return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::ppc_minfe), |
1324 | Args: {Op0, Op1, Op2, Op3}); |
1325 | } |
1326 | case PPC::BI__builtin_ppc_minfl: { |
1327 | Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0)); |
1328 | Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1)); |
1329 | Value *Op2 = EmitScalarExpr(E: E->getArg(Arg: 2)); |
1330 | Value *Op3 = EmitScalarExpr(E: E->getArg(Arg: 3)); |
1331 | return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::ppc_minfl), |
1332 | Args: {Op0, Op1, Op2, Op3}); |
1333 | } |
1334 | case PPC::BI__builtin_ppc_minfs: { |
1335 | Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0)); |
1336 | Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1)); |
1337 | Value *Op2 = EmitScalarExpr(E: E->getArg(Arg: 2)); |
1338 | Value *Op3 = EmitScalarExpr(E: E->getArg(Arg: 3)); |
1339 | return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::ppc_minfs), |
1340 | Args: {Op0, Op1, Op2, Op3}); |
1341 | } |
1342 | case PPC::BI__builtin_ppc_swdiv: |
1343 | case PPC::BI__builtin_ppc_swdivs: { |
1344 | Value *Op0 = EmitScalarExpr(E: E->getArg(Arg: 0)); |
1345 | Value *Op1 = EmitScalarExpr(E: E->getArg(Arg: 1)); |
1346 | return Builder.CreateFDiv(L: Op0, R: Op1, Name: "swdiv" ); |
1347 | } |
1348 | case PPC::BI__builtin_ppc_set_fpscr_rn: |
1349 | return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::ppc_setrnd), |
1350 | Args: {EmitScalarExpr(E: E->getArg(Arg: 0))}); |
1351 | case PPC::BI__builtin_ppc_mffs: |
1352 | return Builder.CreateCall(Callee: CGM.getIntrinsic(IID: Intrinsic::ppc_readflm)); |
1353 | } |
1354 | } |
1355 | |