| 1 | //===- BottomUpVec.cpp - A bottom-up vectorizer pass ----------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h" |
| 10 | #include "llvm/ADT/SmallVector.h" |
| 11 | #include "llvm/SandboxIR/Function.h" |
| 12 | #include "llvm/SandboxIR/Instruction.h" |
| 13 | #include "llvm/SandboxIR/Module.h" |
| 14 | #include "llvm/SandboxIR/Region.h" |
| 15 | #include "llvm/SandboxIR/Utils.h" |
| 16 | #include "llvm/Transforms/Vectorize/SandboxVectorizer/Debug.h" |
| 17 | #include "llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h" |
| 18 | |
| 19 | namespace llvm { |
| 20 | |
| 21 | #ifndef NDEBUG |
| 22 | static cl::opt<bool> |
| 23 | AlwaysVerify("sbvec-always-verify" , cl::init(false), cl::Hidden, |
| 24 | cl::desc("Helps find bugs by verifying the IR whenever we " |
| 25 | "emit new instructions (*very* expensive)." )); |
| 26 | #endif // NDEBUG |
| 27 | |
| 28 | static constexpr const unsigned long StopAtDisabled = |
| 29 | std::numeric_limits<unsigned long>::max(); |
| 30 | static cl::opt<unsigned long> |
| 31 | StopAt("sbvec-stop-at" , cl::init(Val: StopAtDisabled), cl::Hidden, |
| 32 | cl::desc("Vectorize if the invocation count is < than this. 0 " |
| 33 | "disables vectorization." )); |
| 34 | |
| 35 | static constexpr const unsigned long StopBundleDisabled = |
| 36 | std::numeric_limits<unsigned long>::max(); |
| 37 | static cl::opt<unsigned long> |
| 38 | StopBundle("sbvec-stop-bndl" , cl::init(Val: StopBundleDisabled), cl::Hidden, |
| 39 | cl::desc("Vectorize up to this many bundles." )); |
| 40 | |
| 41 | namespace sandboxir { |
| 42 | |
| 43 | static SmallVector<Value *, 4> getOperand(ArrayRef<Value *> Bndl, |
| 44 | unsigned OpIdx) { |
| 45 | SmallVector<Value *, 4> Operands; |
| 46 | for (Value *BndlV : Bndl) { |
| 47 | auto *BndlI = cast<Instruction>(Val: BndlV); |
| 48 | Operands.push_back(Elt: BndlI->getOperand(OpIdx)); |
| 49 | } |
| 50 | return Operands; |
| 51 | } |
| 52 | |
| 53 | /// \Returns the BB iterator after the lowest instruction in \p Vals, or the top |
| 54 | /// of BB if no instruction found in \p Vals. |
| 55 | static BasicBlock::iterator getInsertPointAfterInstrs(ArrayRef<Value *> Vals, |
| 56 | BasicBlock *BB) { |
| 57 | auto *BotI = VecUtils::getLastPHIOrSelf(I: VecUtils::getLowest(Vals, BB)); |
| 58 | if (BotI == nullptr) |
| 59 | // We are using BB->begin() (or after PHIs) as the fallback insert point. |
| 60 | return BB->empty() |
| 61 | ? BB->begin() |
| 62 | : std::next( |
| 63 | x: VecUtils::getLastPHIOrSelf(I: &*BB->begin())->getIterator()); |
| 64 | return std::next(x: BotI->getIterator()); |
| 65 | } |
| 66 | |
| 67 | Value *BottomUpVec::createVectorInstr(ArrayRef<Value *> Bndl, |
| 68 | ArrayRef<Value *> Operands) { |
| 69 | auto CreateVectorInstr = [](ArrayRef<Value *> Bndl, |
| 70 | ArrayRef<Value *> Operands) -> Value * { |
| 71 | assert(all_of(Bndl, [](auto *V) { return isa<Instruction>(V); }) && |
| 72 | "Expect Instructions!" ); |
| 73 | auto &Ctx = Bndl[0]->getContext(); |
| 74 | |
| 75 | Type *ScalarTy = VecUtils::getElementType(Ty: Utils::getExpectedType(V: Bndl[0])); |
| 76 | auto *VecTy = VecUtils::getWideType(ElemTy: ScalarTy, NumElts: VecUtils::getNumLanes(Bndl)); |
| 77 | |
| 78 | BasicBlock::iterator WhereIt = getInsertPointAfterInstrs( |
| 79 | Vals: Bndl, BB: cast<Instruction>(Val: Bndl[0])->getParent()); |
| 80 | |
| 81 | auto Opcode = cast<Instruction>(Val: Bndl[0])->getOpcode(); |
| 82 | switch (Opcode) { |
| 83 | case Instruction::Opcode::ZExt: |
| 84 | case Instruction::Opcode::SExt: |
| 85 | case Instruction::Opcode::FPToUI: |
| 86 | case Instruction::Opcode::FPToSI: |
| 87 | case Instruction::Opcode::FPExt: |
| 88 | case Instruction::Opcode::PtrToInt: |
| 89 | case Instruction::Opcode::IntToPtr: |
| 90 | case Instruction::Opcode::SIToFP: |
| 91 | case Instruction::Opcode::UIToFP: |
| 92 | case Instruction::Opcode::Trunc: |
| 93 | case Instruction::Opcode::FPTrunc: |
| 94 | case Instruction::Opcode::BitCast: { |
| 95 | assert(Operands.size() == 1u && "Casts are unary!" ); |
| 96 | return CastInst::create(DestTy: VecTy, Op: Opcode, Operand: Operands[0], Pos: WhereIt, Ctx, |
| 97 | Name: "VCast" ); |
| 98 | } |
| 99 | case Instruction::Opcode::FCmp: |
| 100 | case Instruction::Opcode::ICmp: { |
| 101 | auto Pred = cast<CmpInst>(Val: Bndl[0])->getPredicate(); |
| 102 | assert(all_of(drop_begin(Bndl), |
| 103 | [Pred](auto *SBV) { |
| 104 | return cast<CmpInst>(SBV)->getPredicate() == Pred; |
| 105 | }) && |
| 106 | "Expected same predicate across bundle." ); |
| 107 | return CmpInst::create(Pred, S1: Operands[0], S2: Operands[1], Pos: WhereIt, Ctx, |
| 108 | Name: "VCmp" ); |
| 109 | } |
| 110 | case Instruction::Opcode::Select: { |
| 111 | return SelectInst::create(Cond: Operands[0], True: Operands[1], False: Operands[2], Pos: WhereIt, |
| 112 | Ctx, Name: "Vec" ); |
| 113 | } |
| 114 | case Instruction::Opcode::FNeg: { |
| 115 | auto *UOp0 = cast<UnaryOperator>(Val: Bndl[0]); |
| 116 | auto OpC = UOp0->getOpcode(); |
| 117 | return UnaryOperator::createWithCopiedFlags(Op: OpC, OpV: Operands[0], CopyFrom: UOp0, |
| 118 | Pos: WhereIt, Ctx, Name: "Vec" ); |
| 119 | } |
| 120 | case Instruction::Opcode::Add: |
| 121 | case Instruction::Opcode::FAdd: |
| 122 | case Instruction::Opcode::Sub: |
| 123 | case Instruction::Opcode::FSub: |
| 124 | case Instruction::Opcode::Mul: |
| 125 | case Instruction::Opcode::FMul: |
| 126 | case Instruction::Opcode::UDiv: |
| 127 | case Instruction::Opcode::SDiv: |
| 128 | case Instruction::Opcode::FDiv: |
| 129 | case Instruction::Opcode::URem: |
| 130 | case Instruction::Opcode::SRem: |
| 131 | case Instruction::Opcode::FRem: |
| 132 | case Instruction::Opcode::Shl: |
| 133 | case Instruction::Opcode::LShr: |
| 134 | case Instruction::Opcode::AShr: |
| 135 | case Instruction::Opcode::And: |
| 136 | case Instruction::Opcode::Or: |
| 137 | case Instruction::Opcode::Xor: { |
| 138 | auto *BinOp0 = cast<BinaryOperator>(Val: Bndl[0]); |
| 139 | auto *LHS = Operands[0]; |
| 140 | auto *RHS = Operands[1]; |
| 141 | return BinaryOperator::createWithCopiedFlags( |
| 142 | Op: BinOp0->getOpcode(), LHS, RHS, CopyFrom: BinOp0, Pos: WhereIt, Ctx, Name: "Vec" ); |
| 143 | } |
| 144 | case Instruction::Opcode::Load: { |
| 145 | auto *Ld0 = cast<LoadInst>(Val: Bndl[0]); |
| 146 | Value *Ptr = Ld0->getPointerOperand(); |
| 147 | return LoadInst::create(Ty: VecTy, Ptr, Align: Ld0->getAlign(), Pos: WhereIt, Ctx, |
| 148 | Name: "VecL" ); |
| 149 | } |
| 150 | case Instruction::Opcode::Store: { |
| 151 | auto Align = cast<StoreInst>(Val: Bndl[0])->getAlign(); |
| 152 | Value *Val = Operands[0]; |
| 153 | Value *Ptr = Operands[1]; |
| 154 | return StoreInst::create(V: Val, Ptr, Align, Pos: WhereIt, Ctx); |
| 155 | } |
| 156 | case Instruction::Opcode::Br: |
| 157 | case Instruction::Opcode::Ret: |
| 158 | case Instruction::Opcode::PHI: |
| 159 | case Instruction::Opcode::AddrSpaceCast: |
| 160 | case Instruction::Opcode::Call: |
| 161 | case Instruction::Opcode::GetElementPtr: |
| 162 | llvm_unreachable("Unimplemented" ); |
| 163 | break; |
| 164 | default: |
| 165 | llvm_unreachable("Unimplemented" ); |
| 166 | break; |
| 167 | } |
| 168 | llvm_unreachable("Missing switch case!" ); |
| 169 | // TODO: Propagate debug info. |
| 170 | }; |
| 171 | |
| 172 | auto *NewI = CreateVectorInstr(Bndl, Operands); |
| 173 | LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "New instr: " << *NewI << "\n" ); |
| 174 | return NewI; |
| 175 | } |
| 176 | |
| 177 | void BottomUpVec::tryEraseDeadInstrs() { |
| 178 | DenseMap<BasicBlock *, SmallVector<Instruction *>> SortedDeadInstrCandidates; |
| 179 | // The dead instrs could span BBs, so we need to collect and sort them per BB. |
| 180 | for (auto *DeadI : DeadInstrCandidates) |
| 181 | SortedDeadInstrCandidates[DeadI->getParent()].push_back(Elt: DeadI); |
| 182 | for (auto &Pair : SortedDeadInstrCandidates) |
| 183 | sort(C&: Pair.second, |
| 184 | Comp: [](Instruction *I1, Instruction *I2) { return I1->comesBefore(Other: I2); }); |
| 185 | for (const auto &Pair : SortedDeadInstrCandidates) { |
| 186 | for (Instruction *I : reverse(C: Pair.second)) { |
| 187 | if (I->hasNUses(Num: 0)) { |
| 188 | // Erase the dead instructions bottom-to-top. |
| 189 | LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Erase dead: " << *I << "\n" ); |
| 190 | I->eraseFromParent(); |
| 191 | } |
| 192 | } |
| 193 | } |
| 194 | DeadInstrCandidates.clear(); |
| 195 | } |
| 196 | |
| 197 | Value *BottomUpVec::createShuffle(Value *VecOp, const ShuffleMask &Mask, |
| 198 | BasicBlock *UserBB) { |
| 199 | BasicBlock::iterator WhereIt = getInsertPointAfterInstrs(Vals: {VecOp}, BB: UserBB); |
| 200 | return ShuffleVectorInst::create(V1: VecOp, V2: VecOp, Mask, Pos: WhereIt, |
| 201 | Ctx&: VecOp->getContext(), Name: "VShuf" ); |
| 202 | } |
| 203 | |
| 204 | Value *BottomUpVec::createPack(ArrayRef<Value *> ToPack, BasicBlock *UserBB) { |
| 205 | BasicBlock::iterator WhereIt = getInsertPointAfterInstrs(Vals: ToPack, BB: UserBB); |
| 206 | |
| 207 | Type *ScalarTy = VecUtils::getCommonScalarType(Bndl: ToPack); |
| 208 | unsigned Lanes = VecUtils::getNumLanes(Bndl: ToPack); |
| 209 | Type *VecTy = VecUtils::getWideType(ElemTy: ScalarTy, NumElts: Lanes); |
| 210 | |
| 211 | // Create a series of pack instructions. |
| 212 | Value *LastInsert = PoisonValue::get(T: VecTy); |
| 213 | |
| 214 | Context &Ctx = ToPack[0]->getContext(); |
| 215 | |
| 216 | unsigned InsertIdx = 0; |
| 217 | for (Value *Elm : ToPack) { |
| 218 | // An element can be either scalar or vector. We need to generate different |
| 219 | // IR for each case. |
| 220 | if (Elm->getType()->isVectorTy()) { |
| 221 | unsigned NumElms = |
| 222 | cast<FixedVectorType>(Val: Elm->getType())->getNumElements(); |
| 223 | for (auto ExtrLane : seq<int>(Begin: 0, End: NumElms)) { |
| 224 | // We generate extract-insert pairs, for each lane in `Elm`. |
| 225 | Constant *ExtrLaneC = |
| 226 | ConstantInt::getSigned(Ty: Type::getInt32Ty(Ctx), V: ExtrLane); |
| 227 | // This may return a Constant if Elm is a Constant. |
| 228 | auto *ExtrI = |
| 229 | ExtractElementInst::create(Vec: Elm, Idx: ExtrLaneC, Pos: WhereIt, Ctx, Name: "VPack" ); |
| 230 | if (!isa<Constant>(Val: ExtrI)) |
| 231 | WhereIt = std::next(x: cast<Instruction>(Val: ExtrI)->getIterator()); |
| 232 | Constant *InsertLaneC = |
| 233 | ConstantInt::getSigned(Ty: Type::getInt32Ty(Ctx), V: InsertIdx++); |
| 234 | // This may also return a Constant if ExtrI is a Constant. |
| 235 | auto *InsertI = InsertElementInst::create( |
| 236 | Vec: LastInsert, NewElt: ExtrI, Idx: InsertLaneC, Pos: WhereIt, Ctx, Name: "VPack" ); |
| 237 | LastInsert = InsertI; |
| 238 | if (!isa<Constant>(Val: InsertI)) |
| 239 | WhereIt = std::next(x: cast<Instruction>(Val: LastInsert)->getIterator()); |
| 240 | } |
| 241 | } else { |
| 242 | Constant *InsertLaneC = |
| 243 | ConstantInt::getSigned(Ty: Type::getInt32Ty(Ctx), V: InsertIdx++); |
| 244 | // This may be folded into a Constant if LastInsert is a Constant. In |
| 245 | // that case we only collect the last constant. |
| 246 | LastInsert = InsertElementInst::create(Vec: LastInsert, NewElt: Elm, Idx: InsertLaneC, |
| 247 | Pos: WhereIt, Ctx, Name: "Pack" ); |
| 248 | if (auto *NewI = dyn_cast<Instruction>(Val: LastInsert)) |
| 249 | WhereIt = std::next(x: NewI->getIterator()); |
| 250 | } |
| 251 | } |
| 252 | return LastInsert; |
| 253 | } |
| 254 | |
| 255 | void BottomUpVec::collectPotentiallyDeadInstrs(ArrayRef<Value *> Bndl) { |
| 256 | for (Value *V : Bndl) |
| 257 | DeadInstrCandidates.insert(V: cast<Instruction>(Val: V)); |
| 258 | // Also collect the GEPs of vectorized loads and stores. |
| 259 | auto Opcode = cast<Instruction>(Val: Bndl[0])->getOpcode(); |
| 260 | switch (Opcode) { |
| 261 | case Instruction::Opcode::Load: { |
| 262 | for (Value *V : drop_begin(RangeOrContainer&: Bndl)) |
| 263 | if (auto *Ptr = |
| 264 | dyn_cast<Instruction>(Val: cast<LoadInst>(Val: V)->getPointerOperand())) |
| 265 | DeadInstrCandidates.insert(V: Ptr); |
| 266 | break; |
| 267 | } |
| 268 | case Instruction::Opcode::Store: { |
| 269 | for (Value *V : drop_begin(RangeOrContainer&: Bndl)) |
| 270 | if (auto *Ptr = |
| 271 | dyn_cast<Instruction>(Val: cast<StoreInst>(Val: V)->getPointerOperand())) |
| 272 | DeadInstrCandidates.insert(V: Ptr); |
| 273 | break; |
| 274 | } |
| 275 | default: |
| 276 | break; |
| 277 | } |
| 278 | } |
| 279 | |
| 280 | Action *BottomUpVec::vectorizeRec(ArrayRef<Value *> Bndl, |
| 281 | ArrayRef<Value *> UserBndl, unsigned Depth, |
| 282 | LegalityAnalysis &Legality) { |
| 283 | bool StopForDebug = |
| 284 | DebugBndlCnt++ >= StopBundle && StopBundle != StopBundleDisabled; |
| 285 | LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "canVectorize() Bundle:\n" ; |
| 286 | VecUtils::dump(Bndl)); |
| 287 | const auto &LegalityRes = StopForDebug ? Legality.getForcedPackForDebugging() |
| 288 | : Legality.canVectorize(Bndl); |
| 289 | LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Legality: " << LegalityRes << "\n" ); |
| 290 | auto ActionPtr = |
| 291 | std::make_unique<Action>(args: &LegalityRes, args&: Bndl, args&: UserBndl, args&: Depth); |
| 292 | SmallVector<Action *> Operands; |
| 293 | switch (LegalityRes.getSubclassID()) { |
| 294 | case LegalityResultID::Widen: { |
| 295 | auto *I = cast<Instruction>(Val: Bndl[0]); |
| 296 | switch (I->getOpcode()) { |
| 297 | case Instruction::Opcode::Load: |
| 298 | break; |
| 299 | case Instruction::Opcode::Store: { |
| 300 | // Don't recurse towards the pointer operand. |
| 301 | Action *OpA = |
| 302 | vectorizeRec(Bndl: getOperand(Bndl, OpIdx: 0), UserBndl: Bndl, Depth: Depth + 1, Legality); |
| 303 | Operands.push_back(Elt: OpA); |
| 304 | break; |
| 305 | } |
| 306 | default: |
| 307 | // Visit all operands. |
| 308 | for (auto OpIdx : seq<unsigned>(Size: I->getNumOperands())) { |
| 309 | Action *OpA = |
| 310 | vectorizeRec(Bndl: getOperand(Bndl, OpIdx), UserBndl: Bndl, Depth: Depth + 1, Legality); |
| 311 | Operands.push_back(Elt: OpA); |
| 312 | } |
| 313 | break; |
| 314 | } |
| 315 | // Update the maps to mark Bndl as "vectorized". |
| 316 | IMaps->registerVector(Origs: Bndl, Vec: ActionPtr.get()); |
| 317 | break; |
| 318 | } |
| 319 | case LegalityResultID::DiamondReuse: |
| 320 | case LegalityResultID::DiamondReuseWithShuffle: |
| 321 | case LegalityResultID::DiamondReuseMultiInput: |
| 322 | case LegalityResultID::Pack: |
| 323 | break; |
| 324 | } |
| 325 | // Create actions in post-order. |
| 326 | ActionPtr->Operands = std::move(Operands); |
| 327 | auto *Action = ActionPtr.get(); |
| 328 | Actions.push_back(ActPtr: std::move(ActionPtr)); |
| 329 | return Action; |
| 330 | } |
| 331 | |
| 332 | #ifndef NDEBUG |
| 333 | void BottomUpVec::ActionsVector::print(raw_ostream &OS) const { |
| 334 | for (auto [Idx, Action] : enumerate(Actions)) { |
| 335 | Action->print(OS); |
| 336 | OS << "\n" ; |
| 337 | } |
| 338 | } |
| 339 | void BottomUpVec::ActionsVector::dump() const { print(dbgs()); } |
| 340 | #endif // NDEBUG |
| 341 | |
| 342 | Value *BottomUpVec::emitVectors() { |
| 343 | Value *NewVec = nullptr; |
| 344 | for (const auto &ActionPtr : Actions) { |
| 345 | ArrayRef<Value *> Bndl = ActionPtr->Bndl; |
| 346 | ArrayRef<Value *> UserBndl = ActionPtr->UserBndl; |
| 347 | const LegalityResult &LegalityRes = *ActionPtr->LegalityRes; |
| 348 | unsigned Depth = ActionPtr->Depth; |
| 349 | auto *UserBB = !UserBndl.empty() |
| 350 | ? cast<Instruction>(Val: UserBndl.front())->getParent() |
| 351 | : cast<Instruction>(Val: Bndl[0])->getParent(); |
| 352 | |
| 353 | switch (LegalityRes.getSubclassID()) { |
| 354 | case LegalityResultID::Widen: { |
| 355 | auto *I = cast<Instruction>(Val: Bndl[0]); |
| 356 | SmallVector<Value *, 2> VecOperands; |
| 357 | switch (I->getOpcode()) { |
| 358 | case Instruction::Opcode::Load: |
| 359 | VecOperands.push_back(Elt: cast<LoadInst>(Val: I)->getPointerOperand()); |
| 360 | break; |
| 361 | case Instruction::Opcode::Store: { |
| 362 | VecOperands.push_back(Elt: ActionPtr->Operands[0]->Vec); |
| 363 | VecOperands.push_back(Elt: cast<StoreInst>(Val: I)->getPointerOperand()); |
| 364 | break; |
| 365 | } |
| 366 | default: |
| 367 | // Visit all operands. |
| 368 | for (Action *OpA : ActionPtr->Operands) { |
| 369 | auto *VecOp = OpA->Vec; |
| 370 | VecOperands.push_back(Elt: VecOp); |
| 371 | } |
| 372 | break; |
| 373 | } |
| 374 | NewVec = createVectorInstr(Bndl: ActionPtr->Bndl, Operands: VecOperands); |
| 375 | // Collect any potentially dead scalar instructions, including the |
| 376 | // original scalars and pointer operands of loads/stores. |
| 377 | if (NewVec != nullptr) |
| 378 | collectPotentiallyDeadInstrs(Bndl); |
| 379 | break; |
| 380 | } |
| 381 | case LegalityResultID::DiamondReuse: { |
| 382 | NewVec = cast<DiamondReuse>(Val: LegalityRes).getVector()->Vec; |
| 383 | break; |
| 384 | } |
| 385 | case LegalityResultID::DiamondReuseWithShuffle: { |
| 386 | auto *VecOp = cast<DiamondReuseWithShuffle>(Val: LegalityRes).getVector()->Vec; |
| 387 | const ShuffleMask &Mask = |
| 388 | cast<DiamondReuseWithShuffle>(Val: LegalityRes).getMask(); |
| 389 | NewVec = createShuffle(VecOp, Mask, UserBB); |
| 390 | assert(NewVec->getType() == VecOp->getType() && |
| 391 | "Expected same type! Bad mask ?" ); |
| 392 | break; |
| 393 | } |
| 394 | case LegalityResultID::DiamondReuseMultiInput: { |
| 395 | const auto &Descr = |
| 396 | cast<DiamondReuseMultiInput>(Val: LegalityRes).getCollectDescr(); |
| 397 | Type *ResTy = VecUtils::getWideType(ElemTy: Bndl[0]->getType(), NumElts: Bndl.size()); |
| 398 | |
| 399 | // TODO: Try to get WhereIt without creating a vector. |
| 400 | SmallVector<Value *, 4> DescrInstrs; |
| 401 | for (const auto &ElmDescr : Descr.getDescrs()) { |
| 402 | auto *V = ElmDescr.needsExtract() ? ElmDescr.getValue()->Vec |
| 403 | : ElmDescr.getScalar(); |
| 404 | if (auto *I = dyn_cast<Instruction>(Val: V)) |
| 405 | DescrInstrs.push_back(Elt: I); |
| 406 | } |
| 407 | BasicBlock::iterator WhereIt = |
| 408 | getInsertPointAfterInstrs(Vals: DescrInstrs, BB: UserBB); |
| 409 | |
| 410 | Value *LastV = PoisonValue::get(T: ResTy); |
| 411 | Context &Ctx = LastV->getContext(); |
| 412 | unsigned Lane = 0; |
| 413 | for (const auto &ElmDescr : Descr.getDescrs()) { |
| 414 | Value *VecOp = nullptr; |
| 415 | Value *ValueToInsert; |
| 416 | if (ElmDescr.needsExtract()) { |
| 417 | VecOp = ElmDescr.getValue()->Vec; |
| 418 | ConstantInt *IdxC = |
| 419 | ConstantInt::get(Ty: Type::getInt32Ty(Ctx), V: ElmDescr.getExtractIdx()); |
| 420 | ValueToInsert = ExtractElementInst::create( |
| 421 | Vec: VecOp, Idx: IdxC, Pos: WhereIt, Ctx&: VecOp->getContext(), Name: "VExt" ); |
| 422 | } else { |
| 423 | ValueToInsert = ElmDescr.getScalar(); |
| 424 | } |
| 425 | auto NumLanesToInsert = VecUtils::getNumLanes(V: ValueToInsert); |
| 426 | if (NumLanesToInsert == 1) { |
| 427 | // If we are inserting a scalar element then we need a single insert. |
| 428 | // %VIns = insert %DstVec, %SrcScalar, Lane |
| 429 | ConstantInt *LaneC = ConstantInt::get(Ty: Type::getInt32Ty(Ctx), V: Lane); |
| 430 | LastV = InsertElementInst::create(Vec: LastV, NewElt: ValueToInsert, Idx: LaneC, |
| 431 | Pos: WhereIt, Ctx, Name: "VIns" ); |
| 432 | } else { |
| 433 | // If we are inserting a vector element then we need to extract and |
| 434 | // insert each vector element one by one with a chain of extracts and |
| 435 | // inserts, for example: |
| 436 | // %VExt0 = extract %SrcVec, 0 |
| 437 | // %VIns0 = insert %DstVec, %Vect0, Lane + 0 |
| 438 | // %VExt1 = extract %SrcVec, 1 |
| 439 | // %VIns1 = insert %VIns0, %Vect0, Lane + 1 |
| 440 | for (unsigned LnCnt = 0; LnCnt != NumLanesToInsert; ++LnCnt) { |
| 441 | auto *ExtrIdxC = ConstantInt::get(Ty: Type::getInt32Ty(Ctx), V: LnCnt); |
| 442 | auto *ExtrI = ExtractElementInst::create(Vec: ValueToInsert, Idx: ExtrIdxC, |
| 443 | Pos: WhereIt, Ctx, Name: "VExt" ); |
| 444 | unsigned InsLane = Lane + LnCnt; |
| 445 | auto *InsLaneC = ConstantInt::get(Ty: Type::getInt32Ty(Ctx), V: InsLane); |
| 446 | LastV = InsertElementInst::create(Vec: LastV, NewElt: ExtrI, Idx: InsLaneC, Pos: WhereIt, |
| 447 | Ctx, Name: "VIns" ); |
| 448 | } |
| 449 | } |
| 450 | Lane += NumLanesToInsert; |
| 451 | } |
| 452 | NewVec = LastV; |
| 453 | break; |
| 454 | } |
| 455 | case LegalityResultID::Pack: { |
| 456 | // If we can't vectorize the seeds then just return. |
| 457 | if (Depth == 0) |
| 458 | return nullptr; |
| 459 | NewVec = createPack(ToPack: Bndl, UserBB); |
| 460 | break; |
| 461 | } |
| 462 | } |
| 463 | if (NewVec != nullptr) { |
| 464 | Change = true; |
| 465 | ActionPtr->Vec = NewVec; |
| 466 | } |
| 467 | #ifndef NDEBUG |
| 468 | if (AlwaysVerify) { |
| 469 | // This helps find broken IR by constantly verifying the function. Note |
| 470 | // that this is very expensive and should only be used for debugging. |
| 471 | Instruction *I0 = isa<Instruction>(Bndl[0]) |
| 472 | ? cast<Instruction>(Bndl[0]) |
| 473 | : cast<Instruction>(UserBndl[0]); |
| 474 | assert(!Utils::verifyFunction(I0->getParent()->getParent(), dbgs()) && |
| 475 | "Broken function!" ); |
| 476 | } |
| 477 | #endif // NDEBUG |
| 478 | } |
| 479 | return NewVec; |
| 480 | } |
| 481 | |
| 482 | bool BottomUpVec::tryVectorize(ArrayRef<Value *> Bndl, |
| 483 | LegalityAnalysis &Legality) { |
| 484 | Change = false; |
| 485 | if (LLVM_UNLIKELY(BottomUpInvocationCnt++ >= StopAt && |
| 486 | StopAt != StopAtDisabled)) |
| 487 | return false; |
| 488 | DeadInstrCandidates.clear(); |
| 489 | Legality.clear(); |
| 490 | Actions.clear(); |
| 491 | DebugBndlCnt = 0; |
| 492 | vectorizeRec(Bndl, UserBndl: {}, /*Depth=*/0, Legality); |
| 493 | LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "BottomUpVec: Vectorization Actions:\n" ; |
| 494 | Actions.dump()); |
| 495 | emitVectors(); |
| 496 | tryEraseDeadInstrs(); |
| 497 | return Change; |
| 498 | } |
| 499 | |
| 500 | bool BottomUpVec::runOnRegion(Region &Rgn, const Analyses &A) { |
| 501 | const auto &SeedSlice = Rgn.getAux(); |
| 502 | assert(SeedSlice.size() >= 2 && "Bad slice!" ); |
| 503 | Function &F = *SeedSlice[0]->getParent()->getParent(); |
| 504 | IMaps = std::make_unique<InstrMaps>(); |
| 505 | LegalityAnalysis Legality(A.getAA(), A.getScalarEvolution(), |
| 506 | F.getParent()->getDataLayout(), F.getContext(), |
| 507 | *IMaps); |
| 508 | |
| 509 | // TODO: Refactor to remove the unnecessary copy to SeedSliceVals. |
| 510 | SmallVector<Value *> SeedSliceVals(SeedSlice.begin(), SeedSlice.end()); |
| 511 | // Try to vectorize starting from the seed slice. The returned value |
| 512 | // is true if we found vectorizable code and generated some vector |
| 513 | // code for it. It does not mean that the code is profitable. |
| 514 | return tryVectorize(Bndl: SeedSliceVals, Legality); |
| 515 | } |
| 516 | |
| 517 | } // namespace sandboxir |
| 518 | } // namespace llvm |
| 519 | |