1 | //===- BottomUpVec.cpp - A bottom-up vectorizer pass ----------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/BottomUpVec.h" |
10 | #include "llvm/ADT/SmallVector.h" |
11 | #include "llvm/SandboxIR/Function.h" |
12 | #include "llvm/SandboxIR/Instruction.h" |
13 | #include "llvm/SandboxIR/Module.h" |
14 | #include "llvm/SandboxIR/Region.h" |
15 | #include "llvm/SandboxIR/Utils.h" |
16 | #include "llvm/Transforms/Vectorize/SandboxVectorizer/Debug.h" |
17 | #include "llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h" |
18 | |
19 | namespace llvm { |
20 | |
21 | #ifndef NDEBUG |
22 | static cl::opt<bool> |
23 | AlwaysVerify("sbvec-always-verify" , cl::init(false), cl::Hidden, |
24 | cl::desc("Helps find bugs by verifying the IR whenever we " |
25 | "emit new instructions (*very* expensive)." )); |
26 | #endif // NDEBUG |
27 | |
28 | static constexpr const unsigned long StopAtDisabled = |
29 | std::numeric_limits<unsigned long>::max(); |
30 | static cl::opt<unsigned long> |
31 | StopAt("sbvec-stop-at" , cl::init(Val: StopAtDisabled), cl::Hidden, |
32 | cl::desc("Vectorize if the invocation count is < than this. 0 " |
33 | "disables vectorization." )); |
34 | |
35 | static constexpr const unsigned long StopBundleDisabled = |
36 | std::numeric_limits<unsigned long>::max(); |
37 | static cl::opt<unsigned long> |
38 | StopBundle("sbvec-stop-bndl" , cl::init(Val: StopBundleDisabled), cl::Hidden, |
39 | cl::desc("Vectorize up to this many bundles." )); |
40 | |
41 | namespace sandboxir { |
42 | |
43 | static SmallVector<Value *, 4> getOperand(ArrayRef<Value *> Bndl, |
44 | unsigned OpIdx) { |
45 | SmallVector<Value *, 4> Operands; |
46 | for (Value *BndlV : Bndl) { |
47 | auto *BndlI = cast<Instruction>(Val: BndlV); |
48 | Operands.push_back(Elt: BndlI->getOperand(OpIdx)); |
49 | } |
50 | return Operands; |
51 | } |
52 | |
53 | /// \Returns the BB iterator after the lowest instruction in \p Vals, or the top |
54 | /// of BB if no instruction found in \p Vals. |
55 | static BasicBlock::iterator getInsertPointAfterInstrs(ArrayRef<Value *> Vals, |
56 | BasicBlock *BB) { |
57 | auto *BotI = VecUtils::getLastPHIOrSelf(I: VecUtils::getLowest(Vals, BB)); |
58 | if (BotI == nullptr) |
59 | // We are using BB->begin() (or after PHIs) as the fallback insert point. |
60 | return BB->empty() |
61 | ? BB->begin() |
62 | : std::next( |
63 | x: VecUtils::getLastPHIOrSelf(I: &*BB->begin())->getIterator()); |
64 | return std::next(x: BotI->getIterator()); |
65 | } |
66 | |
67 | Value *BottomUpVec::createVectorInstr(ArrayRef<Value *> Bndl, |
68 | ArrayRef<Value *> Operands) { |
69 | auto CreateVectorInstr = [](ArrayRef<Value *> Bndl, |
70 | ArrayRef<Value *> Operands) -> Value * { |
71 | assert(all_of(Bndl, [](auto *V) { return isa<Instruction>(V); }) && |
72 | "Expect Instructions!" ); |
73 | auto &Ctx = Bndl[0]->getContext(); |
74 | |
75 | Type *ScalarTy = VecUtils::getElementType(Ty: Utils::getExpectedType(V: Bndl[0])); |
76 | auto *VecTy = VecUtils::getWideType(ElemTy: ScalarTy, NumElts: VecUtils::getNumLanes(Bndl)); |
77 | |
78 | BasicBlock::iterator WhereIt = getInsertPointAfterInstrs( |
79 | Vals: Bndl, BB: cast<Instruction>(Val: Bndl[0])->getParent()); |
80 | |
81 | auto Opcode = cast<Instruction>(Val: Bndl[0])->getOpcode(); |
82 | switch (Opcode) { |
83 | case Instruction::Opcode::ZExt: |
84 | case Instruction::Opcode::SExt: |
85 | case Instruction::Opcode::FPToUI: |
86 | case Instruction::Opcode::FPToSI: |
87 | case Instruction::Opcode::FPExt: |
88 | case Instruction::Opcode::PtrToInt: |
89 | case Instruction::Opcode::IntToPtr: |
90 | case Instruction::Opcode::SIToFP: |
91 | case Instruction::Opcode::UIToFP: |
92 | case Instruction::Opcode::Trunc: |
93 | case Instruction::Opcode::FPTrunc: |
94 | case Instruction::Opcode::BitCast: { |
95 | assert(Operands.size() == 1u && "Casts are unary!" ); |
96 | return CastInst::create(DestTy: VecTy, Op: Opcode, Operand: Operands[0], Pos: WhereIt, Ctx, |
97 | Name: "VCast" ); |
98 | } |
99 | case Instruction::Opcode::FCmp: |
100 | case Instruction::Opcode::ICmp: { |
101 | auto Pred = cast<CmpInst>(Val: Bndl[0])->getPredicate(); |
102 | assert(all_of(drop_begin(Bndl), |
103 | [Pred](auto *SBV) { |
104 | return cast<CmpInst>(SBV)->getPredicate() == Pred; |
105 | }) && |
106 | "Expected same predicate across bundle." ); |
107 | return CmpInst::create(Pred, S1: Operands[0], S2: Operands[1], Pos: WhereIt, Ctx, |
108 | Name: "VCmp" ); |
109 | } |
110 | case Instruction::Opcode::Select: { |
111 | return SelectInst::create(Cond: Operands[0], True: Operands[1], False: Operands[2], Pos: WhereIt, |
112 | Ctx, Name: "Vec" ); |
113 | } |
114 | case Instruction::Opcode::FNeg: { |
115 | auto *UOp0 = cast<UnaryOperator>(Val: Bndl[0]); |
116 | auto OpC = UOp0->getOpcode(); |
117 | return UnaryOperator::createWithCopiedFlags(Op: OpC, OpV: Operands[0], CopyFrom: UOp0, |
118 | Pos: WhereIt, Ctx, Name: "Vec" ); |
119 | } |
120 | case Instruction::Opcode::Add: |
121 | case Instruction::Opcode::FAdd: |
122 | case Instruction::Opcode::Sub: |
123 | case Instruction::Opcode::FSub: |
124 | case Instruction::Opcode::Mul: |
125 | case Instruction::Opcode::FMul: |
126 | case Instruction::Opcode::UDiv: |
127 | case Instruction::Opcode::SDiv: |
128 | case Instruction::Opcode::FDiv: |
129 | case Instruction::Opcode::URem: |
130 | case Instruction::Opcode::SRem: |
131 | case Instruction::Opcode::FRem: |
132 | case Instruction::Opcode::Shl: |
133 | case Instruction::Opcode::LShr: |
134 | case Instruction::Opcode::AShr: |
135 | case Instruction::Opcode::And: |
136 | case Instruction::Opcode::Or: |
137 | case Instruction::Opcode::Xor: { |
138 | auto *BinOp0 = cast<BinaryOperator>(Val: Bndl[0]); |
139 | auto *LHS = Operands[0]; |
140 | auto *RHS = Operands[1]; |
141 | return BinaryOperator::createWithCopiedFlags( |
142 | Op: BinOp0->getOpcode(), LHS, RHS, CopyFrom: BinOp0, Pos: WhereIt, Ctx, Name: "Vec" ); |
143 | } |
144 | case Instruction::Opcode::Load: { |
145 | auto *Ld0 = cast<LoadInst>(Val: Bndl[0]); |
146 | Value *Ptr = Ld0->getPointerOperand(); |
147 | return LoadInst::create(Ty: VecTy, Ptr, Align: Ld0->getAlign(), Pos: WhereIt, Ctx, |
148 | Name: "VecL" ); |
149 | } |
150 | case Instruction::Opcode::Store: { |
151 | auto Align = cast<StoreInst>(Val: Bndl[0])->getAlign(); |
152 | Value *Val = Operands[0]; |
153 | Value *Ptr = Operands[1]; |
154 | return StoreInst::create(V: Val, Ptr, Align, Pos: WhereIt, Ctx); |
155 | } |
156 | case Instruction::Opcode::Br: |
157 | case Instruction::Opcode::Ret: |
158 | case Instruction::Opcode::PHI: |
159 | case Instruction::Opcode::AddrSpaceCast: |
160 | case Instruction::Opcode::Call: |
161 | case Instruction::Opcode::GetElementPtr: |
162 | llvm_unreachable("Unimplemented" ); |
163 | break; |
164 | default: |
165 | llvm_unreachable("Unimplemented" ); |
166 | break; |
167 | } |
168 | llvm_unreachable("Missing switch case!" ); |
169 | // TODO: Propagate debug info. |
170 | }; |
171 | |
172 | auto *NewI = CreateVectorInstr(Bndl, Operands); |
173 | LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "New instr: " << *NewI << "\n" ); |
174 | return NewI; |
175 | } |
176 | |
177 | void BottomUpVec::tryEraseDeadInstrs() { |
178 | DenseMap<BasicBlock *, SmallVector<Instruction *>> SortedDeadInstrCandidates; |
179 | // The dead instrs could span BBs, so we need to collect and sort them per BB. |
180 | for (auto *DeadI : DeadInstrCandidates) |
181 | SortedDeadInstrCandidates[DeadI->getParent()].push_back(Elt: DeadI); |
182 | for (auto &Pair : SortedDeadInstrCandidates) |
183 | sort(C&: Pair.second, |
184 | Comp: [](Instruction *I1, Instruction *I2) { return I1->comesBefore(Other: I2); }); |
185 | for (const auto &Pair : SortedDeadInstrCandidates) { |
186 | for (Instruction *I : reverse(C: Pair.second)) { |
187 | if (I->hasNUses(Num: 0)) { |
188 | // Erase the dead instructions bottom-to-top. |
189 | LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Erase dead: " << *I << "\n" ); |
190 | I->eraseFromParent(); |
191 | } |
192 | } |
193 | } |
194 | DeadInstrCandidates.clear(); |
195 | } |
196 | |
197 | Value *BottomUpVec::createShuffle(Value *VecOp, const ShuffleMask &Mask, |
198 | BasicBlock *UserBB) { |
199 | BasicBlock::iterator WhereIt = getInsertPointAfterInstrs(Vals: {VecOp}, BB: UserBB); |
200 | return ShuffleVectorInst::create(V1: VecOp, V2: VecOp, Mask, Pos: WhereIt, |
201 | Ctx&: VecOp->getContext(), Name: "VShuf" ); |
202 | } |
203 | |
204 | Value *BottomUpVec::createPack(ArrayRef<Value *> ToPack, BasicBlock *UserBB) { |
205 | BasicBlock::iterator WhereIt = getInsertPointAfterInstrs(Vals: ToPack, BB: UserBB); |
206 | |
207 | Type *ScalarTy = VecUtils::getCommonScalarType(Bndl: ToPack); |
208 | unsigned Lanes = VecUtils::getNumLanes(Bndl: ToPack); |
209 | Type *VecTy = VecUtils::getWideType(ElemTy: ScalarTy, NumElts: Lanes); |
210 | |
211 | // Create a series of pack instructions. |
212 | Value *LastInsert = PoisonValue::get(T: VecTy); |
213 | |
214 | Context &Ctx = ToPack[0]->getContext(); |
215 | |
216 | unsigned InsertIdx = 0; |
217 | for (Value *Elm : ToPack) { |
218 | // An element can be either scalar or vector. We need to generate different |
219 | // IR for each case. |
220 | if (Elm->getType()->isVectorTy()) { |
221 | unsigned NumElms = |
222 | cast<FixedVectorType>(Val: Elm->getType())->getNumElements(); |
223 | for (auto ExtrLane : seq<int>(Begin: 0, End: NumElms)) { |
224 | // We generate extract-insert pairs, for each lane in `Elm`. |
225 | Constant *ExtrLaneC = |
226 | ConstantInt::getSigned(Ty: Type::getInt32Ty(Ctx), V: ExtrLane); |
227 | // This may return a Constant if Elm is a Constant. |
228 | auto *ExtrI = |
229 | ExtractElementInst::create(Vec: Elm, Idx: ExtrLaneC, Pos: WhereIt, Ctx, Name: "VPack" ); |
230 | if (!isa<Constant>(Val: ExtrI)) |
231 | WhereIt = std::next(x: cast<Instruction>(Val: ExtrI)->getIterator()); |
232 | Constant *InsertLaneC = |
233 | ConstantInt::getSigned(Ty: Type::getInt32Ty(Ctx), V: InsertIdx++); |
234 | // This may also return a Constant if ExtrI is a Constant. |
235 | auto *InsertI = InsertElementInst::create( |
236 | Vec: LastInsert, NewElt: ExtrI, Idx: InsertLaneC, Pos: WhereIt, Ctx, Name: "VPack" ); |
237 | LastInsert = InsertI; |
238 | if (!isa<Constant>(Val: InsertI)) |
239 | WhereIt = std::next(x: cast<Instruction>(Val: LastInsert)->getIterator()); |
240 | } |
241 | } else { |
242 | Constant *InsertLaneC = |
243 | ConstantInt::getSigned(Ty: Type::getInt32Ty(Ctx), V: InsertIdx++); |
244 | // This may be folded into a Constant if LastInsert is a Constant. In |
245 | // that case we only collect the last constant. |
246 | LastInsert = InsertElementInst::create(Vec: LastInsert, NewElt: Elm, Idx: InsertLaneC, |
247 | Pos: WhereIt, Ctx, Name: "Pack" ); |
248 | if (auto *NewI = dyn_cast<Instruction>(Val: LastInsert)) |
249 | WhereIt = std::next(x: NewI->getIterator()); |
250 | } |
251 | } |
252 | return LastInsert; |
253 | } |
254 | |
255 | void BottomUpVec::collectPotentiallyDeadInstrs(ArrayRef<Value *> Bndl) { |
256 | for (Value *V : Bndl) |
257 | DeadInstrCandidates.insert(V: cast<Instruction>(Val: V)); |
258 | // Also collect the GEPs of vectorized loads and stores. |
259 | auto Opcode = cast<Instruction>(Val: Bndl[0])->getOpcode(); |
260 | switch (Opcode) { |
261 | case Instruction::Opcode::Load: { |
262 | for (Value *V : drop_begin(RangeOrContainer&: Bndl)) |
263 | if (auto *Ptr = |
264 | dyn_cast<Instruction>(Val: cast<LoadInst>(Val: V)->getPointerOperand())) |
265 | DeadInstrCandidates.insert(V: Ptr); |
266 | break; |
267 | } |
268 | case Instruction::Opcode::Store: { |
269 | for (Value *V : drop_begin(RangeOrContainer&: Bndl)) |
270 | if (auto *Ptr = |
271 | dyn_cast<Instruction>(Val: cast<StoreInst>(Val: V)->getPointerOperand())) |
272 | DeadInstrCandidates.insert(V: Ptr); |
273 | break; |
274 | } |
275 | default: |
276 | break; |
277 | } |
278 | } |
279 | |
280 | Action *BottomUpVec::vectorizeRec(ArrayRef<Value *> Bndl, |
281 | ArrayRef<Value *> UserBndl, unsigned Depth, |
282 | LegalityAnalysis &Legality) { |
283 | bool StopForDebug = |
284 | DebugBndlCnt++ >= StopBundle && StopBundle != StopBundleDisabled; |
285 | LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "canVectorize() Bundle:\n" ; |
286 | VecUtils::dump(Bndl)); |
287 | const auto &LegalityRes = StopForDebug ? Legality.getForcedPackForDebugging() |
288 | : Legality.canVectorize(Bndl); |
289 | LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "Legality: " << LegalityRes << "\n" ); |
290 | auto ActionPtr = |
291 | std::make_unique<Action>(args: &LegalityRes, args&: Bndl, args&: UserBndl, args&: Depth); |
292 | SmallVector<Action *> Operands; |
293 | switch (LegalityRes.getSubclassID()) { |
294 | case LegalityResultID::Widen: { |
295 | auto *I = cast<Instruction>(Val: Bndl[0]); |
296 | switch (I->getOpcode()) { |
297 | case Instruction::Opcode::Load: |
298 | break; |
299 | case Instruction::Opcode::Store: { |
300 | // Don't recurse towards the pointer operand. |
301 | Action *OpA = |
302 | vectorizeRec(Bndl: getOperand(Bndl, OpIdx: 0), UserBndl: Bndl, Depth: Depth + 1, Legality); |
303 | Operands.push_back(Elt: OpA); |
304 | break; |
305 | } |
306 | default: |
307 | // Visit all operands. |
308 | for (auto OpIdx : seq<unsigned>(Size: I->getNumOperands())) { |
309 | Action *OpA = |
310 | vectorizeRec(Bndl: getOperand(Bndl, OpIdx), UserBndl: Bndl, Depth: Depth + 1, Legality); |
311 | Operands.push_back(Elt: OpA); |
312 | } |
313 | break; |
314 | } |
315 | // Update the maps to mark Bndl as "vectorized". |
316 | IMaps->registerVector(Origs: Bndl, Vec: ActionPtr.get()); |
317 | break; |
318 | } |
319 | case LegalityResultID::DiamondReuse: |
320 | case LegalityResultID::DiamondReuseWithShuffle: |
321 | case LegalityResultID::DiamondReuseMultiInput: |
322 | case LegalityResultID::Pack: |
323 | break; |
324 | } |
325 | // Create actions in post-order. |
326 | ActionPtr->Operands = std::move(Operands); |
327 | auto *Action = ActionPtr.get(); |
328 | Actions.push_back(ActPtr: std::move(ActionPtr)); |
329 | return Action; |
330 | } |
331 | |
332 | #ifndef NDEBUG |
333 | void BottomUpVec::ActionsVector::print(raw_ostream &OS) const { |
334 | for (auto [Idx, Action] : enumerate(Actions)) { |
335 | Action->print(OS); |
336 | OS << "\n" ; |
337 | } |
338 | } |
339 | void BottomUpVec::ActionsVector::dump() const { print(dbgs()); } |
340 | #endif // NDEBUG |
341 | |
342 | Value *BottomUpVec::emitVectors() { |
343 | Value *NewVec = nullptr; |
344 | for (const auto &ActionPtr : Actions) { |
345 | ArrayRef<Value *> Bndl = ActionPtr->Bndl; |
346 | ArrayRef<Value *> UserBndl = ActionPtr->UserBndl; |
347 | const LegalityResult &LegalityRes = *ActionPtr->LegalityRes; |
348 | unsigned Depth = ActionPtr->Depth; |
349 | auto *UserBB = !UserBndl.empty() |
350 | ? cast<Instruction>(Val: UserBndl.front())->getParent() |
351 | : cast<Instruction>(Val: Bndl[0])->getParent(); |
352 | |
353 | switch (LegalityRes.getSubclassID()) { |
354 | case LegalityResultID::Widen: { |
355 | auto *I = cast<Instruction>(Val: Bndl[0]); |
356 | SmallVector<Value *, 2> VecOperands; |
357 | switch (I->getOpcode()) { |
358 | case Instruction::Opcode::Load: |
359 | VecOperands.push_back(Elt: cast<LoadInst>(Val: I)->getPointerOperand()); |
360 | break; |
361 | case Instruction::Opcode::Store: { |
362 | VecOperands.push_back(Elt: ActionPtr->Operands[0]->Vec); |
363 | VecOperands.push_back(Elt: cast<StoreInst>(Val: I)->getPointerOperand()); |
364 | break; |
365 | } |
366 | default: |
367 | // Visit all operands. |
368 | for (Action *OpA : ActionPtr->Operands) { |
369 | auto *VecOp = OpA->Vec; |
370 | VecOperands.push_back(Elt: VecOp); |
371 | } |
372 | break; |
373 | } |
374 | NewVec = createVectorInstr(Bndl: ActionPtr->Bndl, Operands: VecOperands); |
375 | // Collect any potentially dead scalar instructions, including the |
376 | // original scalars and pointer operands of loads/stores. |
377 | if (NewVec != nullptr) |
378 | collectPotentiallyDeadInstrs(Bndl); |
379 | break; |
380 | } |
381 | case LegalityResultID::DiamondReuse: { |
382 | NewVec = cast<DiamondReuse>(Val: LegalityRes).getVector()->Vec; |
383 | break; |
384 | } |
385 | case LegalityResultID::DiamondReuseWithShuffle: { |
386 | auto *VecOp = cast<DiamondReuseWithShuffle>(Val: LegalityRes).getVector()->Vec; |
387 | const ShuffleMask &Mask = |
388 | cast<DiamondReuseWithShuffle>(Val: LegalityRes).getMask(); |
389 | NewVec = createShuffle(VecOp, Mask, UserBB); |
390 | assert(NewVec->getType() == VecOp->getType() && |
391 | "Expected same type! Bad mask ?" ); |
392 | break; |
393 | } |
394 | case LegalityResultID::DiamondReuseMultiInput: { |
395 | const auto &Descr = |
396 | cast<DiamondReuseMultiInput>(Val: LegalityRes).getCollectDescr(); |
397 | Type *ResTy = VecUtils::getWideType(ElemTy: Bndl[0]->getType(), NumElts: Bndl.size()); |
398 | |
399 | // TODO: Try to get WhereIt without creating a vector. |
400 | SmallVector<Value *, 4> DescrInstrs; |
401 | for (const auto &ElmDescr : Descr.getDescrs()) { |
402 | auto *V = ElmDescr.needsExtract() ? ElmDescr.getValue()->Vec |
403 | : ElmDescr.getScalar(); |
404 | if (auto *I = dyn_cast<Instruction>(Val: V)) |
405 | DescrInstrs.push_back(Elt: I); |
406 | } |
407 | BasicBlock::iterator WhereIt = |
408 | getInsertPointAfterInstrs(Vals: DescrInstrs, BB: UserBB); |
409 | |
410 | Value *LastV = PoisonValue::get(T: ResTy); |
411 | Context &Ctx = LastV->getContext(); |
412 | unsigned Lane = 0; |
413 | for (const auto &ElmDescr : Descr.getDescrs()) { |
414 | Value *VecOp = nullptr; |
415 | Value *ValueToInsert; |
416 | if (ElmDescr.needsExtract()) { |
417 | VecOp = ElmDescr.getValue()->Vec; |
418 | ConstantInt *IdxC = |
419 | ConstantInt::get(Ty: Type::getInt32Ty(Ctx), V: ElmDescr.getExtractIdx()); |
420 | ValueToInsert = ExtractElementInst::create( |
421 | Vec: VecOp, Idx: IdxC, Pos: WhereIt, Ctx&: VecOp->getContext(), Name: "VExt" ); |
422 | } else { |
423 | ValueToInsert = ElmDescr.getScalar(); |
424 | } |
425 | auto NumLanesToInsert = VecUtils::getNumLanes(V: ValueToInsert); |
426 | if (NumLanesToInsert == 1) { |
427 | // If we are inserting a scalar element then we need a single insert. |
428 | // %VIns = insert %DstVec, %SrcScalar, Lane |
429 | ConstantInt *LaneC = ConstantInt::get(Ty: Type::getInt32Ty(Ctx), V: Lane); |
430 | LastV = InsertElementInst::create(Vec: LastV, NewElt: ValueToInsert, Idx: LaneC, |
431 | Pos: WhereIt, Ctx, Name: "VIns" ); |
432 | } else { |
433 | // If we are inserting a vector element then we need to extract and |
434 | // insert each vector element one by one with a chain of extracts and |
435 | // inserts, for example: |
436 | // %VExt0 = extract %SrcVec, 0 |
437 | // %VIns0 = insert %DstVec, %Vect0, Lane + 0 |
438 | // %VExt1 = extract %SrcVec, 1 |
439 | // %VIns1 = insert %VIns0, %Vect0, Lane + 1 |
440 | for (unsigned LnCnt = 0; LnCnt != NumLanesToInsert; ++LnCnt) { |
441 | auto *ExtrIdxC = ConstantInt::get(Ty: Type::getInt32Ty(Ctx), V: LnCnt); |
442 | auto *ExtrI = ExtractElementInst::create(Vec: ValueToInsert, Idx: ExtrIdxC, |
443 | Pos: WhereIt, Ctx, Name: "VExt" ); |
444 | unsigned InsLane = Lane + LnCnt; |
445 | auto *InsLaneC = ConstantInt::get(Ty: Type::getInt32Ty(Ctx), V: InsLane); |
446 | LastV = InsertElementInst::create(Vec: LastV, NewElt: ExtrI, Idx: InsLaneC, Pos: WhereIt, |
447 | Ctx, Name: "VIns" ); |
448 | } |
449 | } |
450 | Lane += NumLanesToInsert; |
451 | } |
452 | NewVec = LastV; |
453 | break; |
454 | } |
455 | case LegalityResultID::Pack: { |
456 | // If we can't vectorize the seeds then just return. |
457 | if (Depth == 0) |
458 | return nullptr; |
459 | NewVec = createPack(ToPack: Bndl, UserBB); |
460 | break; |
461 | } |
462 | } |
463 | if (NewVec != nullptr) { |
464 | Change = true; |
465 | ActionPtr->Vec = NewVec; |
466 | } |
467 | #ifndef NDEBUG |
468 | if (AlwaysVerify) { |
469 | // This helps find broken IR by constantly verifying the function. Note |
470 | // that this is very expensive and should only be used for debugging. |
471 | Instruction *I0 = isa<Instruction>(Bndl[0]) |
472 | ? cast<Instruction>(Bndl[0]) |
473 | : cast<Instruction>(UserBndl[0]); |
474 | assert(!Utils::verifyFunction(I0->getParent()->getParent(), dbgs()) && |
475 | "Broken function!" ); |
476 | } |
477 | #endif // NDEBUG |
478 | } |
479 | return NewVec; |
480 | } |
481 | |
482 | bool BottomUpVec::tryVectorize(ArrayRef<Value *> Bndl, |
483 | LegalityAnalysis &Legality) { |
484 | Change = false; |
485 | if (LLVM_UNLIKELY(BottomUpInvocationCnt++ >= StopAt && |
486 | StopAt != StopAtDisabled)) |
487 | return false; |
488 | DeadInstrCandidates.clear(); |
489 | Legality.clear(); |
490 | Actions.clear(); |
491 | DebugBndlCnt = 0; |
492 | vectorizeRec(Bndl, UserBndl: {}, /*Depth=*/0, Legality); |
493 | LLVM_DEBUG(dbgs() << DEBUG_PREFIX << "BottomUpVec: Vectorization Actions:\n" ; |
494 | Actions.dump()); |
495 | emitVectors(); |
496 | tryEraseDeadInstrs(); |
497 | return Change; |
498 | } |
499 | |
500 | bool BottomUpVec::runOnRegion(Region &Rgn, const Analyses &A) { |
501 | const auto &SeedSlice = Rgn.getAux(); |
502 | assert(SeedSlice.size() >= 2 && "Bad slice!" ); |
503 | Function &F = *SeedSlice[0]->getParent()->getParent(); |
504 | IMaps = std::make_unique<InstrMaps>(); |
505 | LegalityAnalysis Legality(A.getAA(), A.getScalarEvolution(), |
506 | F.getParent()->getDataLayout(), F.getContext(), |
507 | *IMaps); |
508 | |
509 | // TODO: Refactor to remove the unnecessary copy to SeedSliceVals. |
510 | SmallVector<Value *> SeedSliceVals(SeedSlice.begin(), SeedSlice.end()); |
511 | // Try to vectorize starting from the seed slice. The returned value |
512 | // is true if we found vectorizable code and generated some vector |
513 | // code for it. It does not mean that the code is profitable. |
514 | return tryVectorize(Bndl: SeedSliceVals, Legality); |
515 | } |
516 | |
517 | } // namespace sandboxir |
518 | } // namespace llvm |
519 | |