| 1 | //===----- SVEIntrinsicOpts - SVE ACLE Intrinsics Opts --------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // Performs general IR level optimizations on SVE intrinsics. |
| 10 | // |
| 11 | // This pass performs the following optimizations: |
| 12 | // |
| 13 | // - removes unnecessary ptrue intrinsics (llvm.aarch64.sve.ptrue), e.g: |
| 14 | // %1 = @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) |
| 15 | // %2 = @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) |
| 16 | // ; (%1 can be replaced with a reinterpret of %2) |
| 17 | // |
| 18 | // - optimizes ptest intrinsics where the operands are being needlessly |
| 19 | // converted to and from svbool_t. |
| 20 | // |
| 21 | //===----------------------------------------------------------------------===// |
| 22 | |
| 23 | #include "AArch64.h" |
| 24 | #include "Utils/AArch64BaseInfo.h" |
| 25 | #include "llvm/ADT/PostOrderIterator.h" |
| 26 | #include "llvm/ADT/SetVector.h" |
| 27 | #include "llvm/IR/Constants.h" |
| 28 | #include "llvm/IR/Dominators.h" |
| 29 | #include "llvm/IR/IRBuilder.h" |
| 30 | #include "llvm/IR/Instructions.h" |
| 31 | #include "llvm/IR/IntrinsicInst.h" |
| 32 | #include "llvm/IR/IntrinsicsAArch64.h" |
| 33 | #include "llvm/IR/LLVMContext.h" |
| 34 | #include "llvm/IR/Module.h" |
| 35 | #include "llvm/IR/PatternMatch.h" |
| 36 | #include "llvm/InitializePasses.h" |
| 37 | #include <optional> |
| 38 | |
| 39 | using namespace llvm; |
| 40 | using namespace llvm::PatternMatch; |
| 41 | |
| 42 | #define DEBUG_TYPE "aarch64-sve-intrinsic-opts" |
| 43 | |
| 44 | namespace { |
| 45 | struct SVEIntrinsicOpts : public ModulePass { |
| 46 | static char ID; // Pass identification, replacement for typeid |
| 47 | SVEIntrinsicOpts() : ModulePass(ID) {} |
| 48 | |
| 49 | bool runOnModule(Module &M) override; |
| 50 | void getAnalysisUsage(AnalysisUsage &AU) const override; |
| 51 | |
| 52 | private: |
| 53 | bool coalescePTrueIntrinsicCalls(BasicBlock &BB, |
| 54 | SmallSetVector<IntrinsicInst *, 4> &PTrues); |
| 55 | bool optimizePTrueIntrinsicCalls(SmallSetVector<Function *, 4> &Functions); |
| 56 | bool optimizePredicateStore(Instruction *I); |
| 57 | bool optimizePredicateLoad(Instruction *I); |
| 58 | |
| 59 | bool optimizeInstructions(SmallSetVector<Function *, 4> &Functions); |
| 60 | |
| 61 | /// Operates at the function-scope. I.e., optimizations are applied local to |
| 62 | /// the functions themselves. |
| 63 | bool optimizeFunctions(SmallSetVector<Function *, 4> &Functions); |
| 64 | }; |
| 65 | } // end anonymous namespace |
| 66 | |
| 67 | void SVEIntrinsicOpts::getAnalysisUsage(AnalysisUsage &AU) const { |
| 68 | AU.addRequired<DominatorTreeWrapperPass>(); |
| 69 | AU.setPreservesCFG(); |
| 70 | } |
| 71 | |
| 72 | char SVEIntrinsicOpts::ID = 0; |
| 73 | static const char *name = "SVE intrinsics optimizations" ; |
| 74 | INITIALIZE_PASS_BEGIN(SVEIntrinsicOpts, DEBUG_TYPE, name, false, false) |
| 75 | INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass); |
| 76 | INITIALIZE_PASS_END(SVEIntrinsicOpts, DEBUG_TYPE, name, false, false) |
| 77 | |
| 78 | ModulePass *llvm::createSVEIntrinsicOptsPass() { |
| 79 | return new SVEIntrinsicOpts(); |
| 80 | } |
| 81 | |
| 82 | /// Checks if a ptrue intrinsic call is promoted. The act of promoting a |
| 83 | /// ptrue will introduce zeroing. For example: |
| 84 | /// |
| 85 | /// %1 = <vscale x 4 x i1> call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) |
| 86 | /// %2 = <vscale x 16 x i1> call @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %1) |
| 87 | /// %3 = <vscale x 8 x i1> call @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> %2) |
| 88 | /// |
| 89 | /// %1 is promoted, because it is converted: |
| 90 | /// |
| 91 | /// <vscale x 4 x i1> => <vscale x 16 x i1> => <vscale x 8 x i1> |
| 92 | /// |
| 93 | /// via a sequence of the SVE reinterpret intrinsics convert.{to,from}.svbool. |
| 94 | static bool isPTruePromoted(IntrinsicInst *PTrue) { |
| 95 | // Find all users of this intrinsic that are calls to convert-to-svbool |
| 96 | // reinterpret intrinsics. |
| 97 | SmallVector<IntrinsicInst *, 4> ConvertToUses; |
| 98 | for (User *User : PTrue->users()) { |
| 99 | if (match(V: User, P: m_Intrinsic<Intrinsic::aarch64_sve_convert_to_svbool>())) { |
| 100 | ConvertToUses.push_back(Elt: cast<IntrinsicInst>(Val: User)); |
| 101 | } |
| 102 | } |
| 103 | |
| 104 | // If no such calls were found, this is ptrue is not promoted. |
| 105 | if (ConvertToUses.empty()) |
| 106 | return false; |
| 107 | |
| 108 | // Otherwise, try to find users of the convert-to-svbool intrinsics that are |
| 109 | // calls to the convert-from-svbool intrinsic, and would result in some lanes |
| 110 | // being zeroed. |
| 111 | const auto *PTrueVTy = cast<ScalableVectorType>(Val: PTrue->getType()); |
| 112 | for (IntrinsicInst *ConvertToUse : ConvertToUses) { |
| 113 | for (User *User : ConvertToUse->users()) { |
| 114 | auto *IntrUser = dyn_cast<IntrinsicInst>(Val: User); |
| 115 | if (IntrUser && IntrUser->getIntrinsicID() == |
| 116 | Intrinsic::aarch64_sve_convert_from_svbool) { |
| 117 | const auto *IntrUserVTy = cast<ScalableVectorType>(Val: IntrUser->getType()); |
| 118 | |
| 119 | // Would some lanes become zeroed by the conversion? |
| 120 | if (IntrUserVTy->getElementCount().getKnownMinValue() > |
| 121 | PTrueVTy->getElementCount().getKnownMinValue()) |
| 122 | // This is a promoted ptrue. |
| 123 | return true; |
| 124 | } |
| 125 | } |
| 126 | } |
| 127 | |
| 128 | // If no matching calls were found, this is not a promoted ptrue. |
| 129 | return false; |
| 130 | } |
| 131 | |
| 132 | /// Attempts to coalesce ptrues in a basic block. |
| 133 | bool SVEIntrinsicOpts::coalescePTrueIntrinsicCalls( |
| 134 | BasicBlock &BB, SmallSetVector<IntrinsicInst *, 4> &PTrues) { |
| 135 | if (PTrues.size() <= 1) |
| 136 | return false; |
| 137 | |
| 138 | // Find the ptrue with the most lanes. |
| 139 | auto *MostEncompassingPTrue = |
| 140 | *llvm::max_element(Range&: PTrues, C: [](auto *PTrue1, auto *PTrue2) { |
| 141 | auto *PTrue1VTy = cast<ScalableVectorType>(PTrue1->getType()); |
| 142 | auto *PTrue2VTy = cast<ScalableVectorType>(PTrue2->getType()); |
| 143 | return PTrue1VTy->getElementCount().getKnownMinValue() < |
| 144 | PTrue2VTy->getElementCount().getKnownMinValue(); |
| 145 | }); |
| 146 | |
| 147 | // Remove the most encompassing ptrue, as well as any promoted ptrues, leaving |
| 148 | // behind only the ptrues to be coalesced. |
| 149 | PTrues.remove(X: MostEncompassingPTrue); |
| 150 | PTrues.remove_if(P: isPTruePromoted); |
| 151 | |
| 152 | // Hoist MostEncompassingPTrue to the start of the basic block. It is always |
| 153 | // safe to do this, since ptrue intrinsic calls are guaranteed to have no |
| 154 | // predecessors. |
| 155 | MostEncompassingPTrue->moveBefore(BB, I: BB.getFirstInsertionPt()); |
| 156 | |
| 157 | LLVMContext &Ctx = BB.getContext(); |
| 158 | IRBuilder<> Builder(Ctx); |
| 159 | Builder.SetInsertPoint(TheBB: &BB, IP: ++MostEncompassingPTrue->getIterator()); |
| 160 | |
| 161 | auto *MostEncompassingPTrueVTy = |
| 162 | cast<VectorType>(Val: MostEncompassingPTrue->getType()); |
| 163 | auto *ConvertToSVBool = Builder.CreateIntrinsic( |
| 164 | ID: Intrinsic::aarch64_sve_convert_to_svbool, Types: {MostEncompassingPTrueVTy}, |
| 165 | Args: {MostEncompassingPTrue}); |
| 166 | |
| 167 | bool ConvertFromCreated = false; |
| 168 | for (auto *PTrue : PTrues) { |
| 169 | auto *PTrueVTy = cast<VectorType>(Val: PTrue->getType()); |
| 170 | |
| 171 | // Only create the converts if the types are not already the same, otherwise |
| 172 | // just use the most encompassing ptrue. |
| 173 | if (MostEncompassingPTrueVTy != PTrueVTy) { |
| 174 | ConvertFromCreated = true; |
| 175 | |
| 176 | Builder.SetInsertPoint(TheBB: &BB, IP: ++ConvertToSVBool->getIterator()); |
| 177 | auto *ConvertFromSVBool = |
| 178 | Builder.CreateIntrinsic(ID: Intrinsic::aarch64_sve_convert_from_svbool, |
| 179 | Types: {PTrueVTy}, Args: {ConvertToSVBool}); |
| 180 | PTrue->replaceAllUsesWith(V: ConvertFromSVBool); |
| 181 | } else |
| 182 | PTrue->replaceAllUsesWith(V: MostEncompassingPTrue); |
| 183 | |
| 184 | PTrue->eraseFromParent(); |
| 185 | } |
| 186 | |
| 187 | // We never used the ConvertTo so remove it |
| 188 | if (!ConvertFromCreated) |
| 189 | ConvertToSVBool->eraseFromParent(); |
| 190 | |
| 191 | return true; |
| 192 | } |
| 193 | |
| 194 | /// The goal of this function is to remove redundant calls to the SVE ptrue |
| 195 | /// intrinsic in each basic block within the given functions. |
| 196 | /// |
| 197 | /// SVE ptrues have two representations in LLVM IR: |
| 198 | /// - a logical representation -- an arbitrary-width scalable vector of i1s, |
| 199 | /// i.e. <vscale x N x i1>. |
| 200 | /// - a physical representation (svbool, <vscale x 16 x i1>) -- a 16-element |
| 201 | /// scalable vector of i1s, i.e. <vscale x 16 x i1>. |
| 202 | /// |
| 203 | /// The SVE ptrue intrinsic is used to create a logical representation of an SVE |
| 204 | /// predicate. Suppose that we have two SVE ptrue intrinsic calls: P1 and P2. If |
| 205 | /// P1 creates a logical SVE predicate that is at least as wide as the logical |
| 206 | /// SVE predicate created by P2, then all of the bits that are true in the |
| 207 | /// physical representation of P2 are necessarily also true in the physical |
| 208 | /// representation of P1. P1 'encompasses' P2, therefore, the intrinsic call to |
| 209 | /// P2 is redundant and can be replaced by an SVE reinterpret of P1 via |
| 210 | /// convert.{to,from}.svbool. |
| 211 | /// |
| 212 | /// Currently, this pass only coalesces calls to SVE ptrue intrinsics |
| 213 | /// if they match the following conditions: |
| 214 | /// |
| 215 | /// - the call to the intrinsic uses either the SV_ALL or SV_POW2 patterns. |
| 216 | /// SV_ALL indicates that all bits of the predicate vector are to be set to |
| 217 | /// true. SV_POW2 indicates that all bits of the predicate vector up to the |
| 218 | /// largest power-of-two are to be set to true. |
| 219 | /// - the result of the call to the intrinsic is not promoted to a wider |
| 220 | /// predicate. In this case, keeping the extra ptrue leads to better codegen |
| 221 | /// -- coalescing here would create an irreducible chain of SVE reinterprets |
| 222 | /// via convert.{to,from}.svbool. |
| 223 | /// |
| 224 | /// EXAMPLE: |
| 225 | /// |
| 226 | /// %1 = <vscale x 8 x i1> ptrue(i32 SV_ALL) |
| 227 | /// ; Logical: <1, 1, 1, 1, 1, 1, 1, 1> |
| 228 | /// ; Physical: <1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0> |
| 229 | /// ... |
| 230 | /// |
| 231 | /// %2 = <vscale x 4 x i1> ptrue(i32 SV_ALL) |
| 232 | /// ; Logical: <1, 1, 1, 1> |
| 233 | /// ; Physical: <1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0> |
| 234 | /// ... |
| 235 | /// |
| 236 | /// Here, %2 can be replaced by an SVE reinterpret of %1, giving, for instance: |
| 237 | /// |
| 238 | /// %1 = <vscale x 8 x i1> ptrue(i32 i31) |
| 239 | /// %2 = <vscale x 16 x i1> convert.to.svbool(<vscale x 8 x i1> %1) |
| 240 | /// %3 = <vscale x 4 x i1> convert.from.svbool(<vscale x 16 x i1> %2) |
| 241 | /// |
| 242 | bool SVEIntrinsicOpts::optimizePTrueIntrinsicCalls( |
| 243 | SmallSetVector<Function *, 4> &Functions) { |
| 244 | bool Changed = false; |
| 245 | |
| 246 | for (auto *F : Functions) { |
| 247 | for (auto &BB : *F) { |
| 248 | SmallSetVector<IntrinsicInst *, 4> SVAllPTrues; |
| 249 | SmallSetVector<IntrinsicInst *, 4> SVPow2PTrues; |
| 250 | |
| 251 | // For each basic block, collect the used ptrues and try to coalesce them. |
| 252 | for (Instruction &I : BB) { |
| 253 | if (I.use_empty()) |
| 254 | continue; |
| 255 | |
| 256 | auto *IntrI = dyn_cast<IntrinsicInst>(Val: &I); |
| 257 | if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::aarch64_sve_ptrue) |
| 258 | continue; |
| 259 | |
| 260 | const auto PTruePattern = |
| 261 | cast<ConstantInt>(Val: IntrI->getOperand(i_nocapture: 0))->getZExtValue(); |
| 262 | |
| 263 | if (PTruePattern == AArch64SVEPredPattern::all) |
| 264 | SVAllPTrues.insert(X: IntrI); |
| 265 | if (PTruePattern == AArch64SVEPredPattern::pow2) |
| 266 | SVPow2PTrues.insert(X: IntrI); |
| 267 | } |
| 268 | |
| 269 | Changed |= coalescePTrueIntrinsicCalls(BB, PTrues&: SVAllPTrues); |
| 270 | Changed |= coalescePTrueIntrinsicCalls(BB, PTrues&: SVPow2PTrues); |
| 271 | } |
| 272 | } |
| 273 | |
| 274 | return Changed; |
| 275 | } |
| 276 | |
| 277 | // This is done in SVEIntrinsicOpts rather than InstCombine so that we introduce |
| 278 | // scalable stores as late as possible |
| 279 | bool SVEIntrinsicOpts::optimizePredicateStore(Instruction *I) { |
| 280 | auto *F = I->getFunction(); |
| 281 | auto Attr = F->getFnAttribute(Kind: Attribute::VScaleRange); |
| 282 | if (!Attr.isValid()) |
| 283 | return false; |
| 284 | |
| 285 | unsigned MinVScale = Attr.getVScaleRangeMin(); |
| 286 | std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax(); |
| 287 | // The transform needs to know the exact runtime length of scalable vectors |
| 288 | if (!MaxVScale || MinVScale != MaxVScale) |
| 289 | return false; |
| 290 | |
| 291 | auto *PredType = |
| 292 | ScalableVectorType::get(ElementType: Type::getInt1Ty(C&: I->getContext()), MinNumElts: 16); |
| 293 | auto *FixedPredType = |
| 294 | FixedVectorType::get(ElementType: Type::getInt8Ty(C&: I->getContext()), NumElts: MinVScale * 2); |
| 295 | |
| 296 | // If we have a store.. |
| 297 | auto *Store = dyn_cast<StoreInst>(Val: I); |
| 298 | if (!Store || !Store->isSimple()) |
| 299 | return false; |
| 300 | |
| 301 | // ..that is storing a predicate vector sized worth of bits.. |
| 302 | if (Store->getOperand(i_nocapture: 0)->getType() != FixedPredType) |
| 303 | return false; |
| 304 | |
| 305 | // ..where the value stored comes from a vector extract.. |
| 306 | auto *IntrI = dyn_cast<IntrinsicInst>(Val: Store->getOperand(i_nocapture: 0)); |
| 307 | if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::vector_extract) |
| 308 | return false; |
| 309 | |
| 310 | // ..that is extracting from index 0.. |
| 311 | if (!cast<ConstantInt>(Val: IntrI->getOperand(i_nocapture: 1))->isZero()) |
| 312 | return false; |
| 313 | |
| 314 | // ..where the value being extract from comes from a bitcast |
| 315 | auto *BitCast = dyn_cast<BitCastInst>(Val: IntrI->getOperand(i_nocapture: 0)); |
| 316 | if (!BitCast) |
| 317 | return false; |
| 318 | |
| 319 | // ..and the bitcast is casting from predicate type |
| 320 | if (BitCast->getOperand(i_nocapture: 0)->getType() != PredType) |
| 321 | return false; |
| 322 | |
| 323 | IRBuilder<> Builder(I->getContext()); |
| 324 | Builder.SetInsertPoint(I); |
| 325 | |
| 326 | Builder.CreateStore(Val: BitCast->getOperand(i_nocapture: 0), Ptr: Store->getPointerOperand()); |
| 327 | |
| 328 | Store->eraseFromParent(); |
| 329 | if (IntrI->use_empty()) |
| 330 | IntrI->eraseFromParent(); |
| 331 | if (BitCast->use_empty()) |
| 332 | BitCast->eraseFromParent(); |
| 333 | |
| 334 | return true; |
| 335 | } |
| 336 | |
| 337 | // This is done in SVEIntrinsicOpts rather than InstCombine so that we introduce |
| 338 | // scalable loads as late as possible |
| 339 | bool SVEIntrinsicOpts::optimizePredicateLoad(Instruction *I) { |
| 340 | auto *F = I->getFunction(); |
| 341 | auto Attr = F->getFnAttribute(Kind: Attribute::VScaleRange); |
| 342 | if (!Attr.isValid()) |
| 343 | return false; |
| 344 | |
| 345 | unsigned MinVScale = Attr.getVScaleRangeMin(); |
| 346 | std::optional<unsigned> MaxVScale = Attr.getVScaleRangeMax(); |
| 347 | // The transform needs to know the exact runtime length of scalable vectors |
| 348 | if (!MaxVScale || MinVScale != MaxVScale) |
| 349 | return false; |
| 350 | |
| 351 | auto *PredType = |
| 352 | ScalableVectorType::get(ElementType: Type::getInt1Ty(C&: I->getContext()), MinNumElts: 16); |
| 353 | auto *FixedPredType = |
| 354 | FixedVectorType::get(ElementType: Type::getInt8Ty(C&: I->getContext()), NumElts: MinVScale * 2); |
| 355 | |
| 356 | // If we have a bitcast.. |
| 357 | auto *BitCast = dyn_cast<BitCastInst>(Val: I); |
| 358 | if (!BitCast || BitCast->getType() != PredType) |
| 359 | return false; |
| 360 | |
| 361 | // ..whose operand is a vector_insert.. |
| 362 | auto *IntrI = dyn_cast<IntrinsicInst>(Val: BitCast->getOperand(i_nocapture: 0)); |
| 363 | if (!IntrI || IntrI->getIntrinsicID() != Intrinsic::vector_insert) |
| 364 | return false; |
| 365 | |
| 366 | // ..that is inserting into index zero of an undef vector.. |
| 367 | if (!isa<UndefValue>(Val: IntrI->getOperand(i_nocapture: 0)) || |
| 368 | !cast<ConstantInt>(Val: IntrI->getOperand(i_nocapture: 2))->isZero()) |
| 369 | return false; |
| 370 | |
| 371 | // ..where the value inserted comes from a load.. |
| 372 | auto *Load = dyn_cast<LoadInst>(Val: IntrI->getOperand(i_nocapture: 1)); |
| 373 | if (!Load || !Load->isSimple()) |
| 374 | return false; |
| 375 | |
| 376 | // ..that is loading a predicate vector sized worth of bits.. |
| 377 | if (Load->getType() != FixedPredType) |
| 378 | return false; |
| 379 | |
| 380 | IRBuilder<> Builder(I->getContext()); |
| 381 | Builder.SetInsertPoint(Load); |
| 382 | |
| 383 | auto *LoadPred = Builder.CreateLoad(Ty: PredType, Ptr: Load->getPointerOperand()); |
| 384 | |
| 385 | BitCast->replaceAllUsesWith(V: LoadPred); |
| 386 | BitCast->eraseFromParent(); |
| 387 | if (IntrI->use_empty()) |
| 388 | IntrI->eraseFromParent(); |
| 389 | if (Load->use_empty()) |
| 390 | Load->eraseFromParent(); |
| 391 | |
| 392 | return true; |
| 393 | } |
| 394 | |
| 395 | bool SVEIntrinsicOpts::optimizeInstructions( |
| 396 | SmallSetVector<Function *, 4> &Functions) { |
| 397 | bool Changed = false; |
| 398 | |
| 399 | for (auto *F : Functions) { |
| 400 | DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>(F&: *F).getDomTree(); |
| 401 | |
| 402 | // Traverse the DT with an rpo walk so we see defs before uses, allowing |
| 403 | // simplification to be done incrementally. |
| 404 | BasicBlock *Root = DT->getRoot(); |
| 405 | ReversePostOrderTraversal<BasicBlock *> RPOT(Root); |
| 406 | for (auto *BB : RPOT) { |
| 407 | for (Instruction &I : make_early_inc_range(Range&: *BB)) { |
| 408 | switch (I.getOpcode()) { |
| 409 | case Instruction::Store: |
| 410 | Changed |= optimizePredicateStore(I: &I); |
| 411 | break; |
| 412 | case Instruction::BitCast: |
| 413 | Changed |= optimizePredicateLoad(I: &I); |
| 414 | break; |
| 415 | } |
| 416 | } |
| 417 | } |
| 418 | } |
| 419 | |
| 420 | return Changed; |
| 421 | } |
| 422 | |
| 423 | bool SVEIntrinsicOpts::optimizeFunctions( |
| 424 | SmallSetVector<Function *, 4> &Functions) { |
| 425 | bool Changed = false; |
| 426 | |
| 427 | Changed |= optimizePTrueIntrinsicCalls(Functions); |
| 428 | Changed |= optimizeInstructions(Functions); |
| 429 | |
| 430 | return Changed; |
| 431 | } |
| 432 | |
| 433 | bool SVEIntrinsicOpts::runOnModule(Module &M) { |
| 434 | bool Changed = false; |
| 435 | SmallSetVector<Function *, 4> Functions; |
| 436 | |
| 437 | // Check for SVE intrinsic declarations first so that we only iterate over |
| 438 | // relevant functions. Where an appropriate declaration is found, store the |
| 439 | // function(s) where it is used so we can target these only. |
| 440 | for (auto &F : M.getFunctionList()) { |
| 441 | if (!F.isDeclaration()) |
| 442 | continue; |
| 443 | |
| 444 | switch (F.getIntrinsicID()) { |
| 445 | case Intrinsic::vector_extract: |
| 446 | case Intrinsic::vector_insert: |
| 447 | case Intrinsic::aarch64_sve_ptrue: |
| 448 | for (User *U : F.users()) |
| 449 | Functions.insert(X: cast<Instruction>(Val: U)->getFunction()); |
| 450 | break; |
| 451 | default: |
| 452 | break; |
| 453 | } |
| 454 | } |
| 455 | |
| 456 | if (!Functions.empty()) |
| 457 | Changed |= optimizeFunctions(Functions); |
| 458 | |
| 459 | return Changed; |
| 460 | } |
| 461 | |