1//===- SeedCollection.cpp - Seed collection pass --------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "llvm/Transforms/Vectorize/SandboxVectorizer/Passes/SeedCollection.h"
10#include "llvm/Analysis/TargetTransformInfo.h"
11#include "llvm/SandboxIR/Module.h"
12#include "llvm/Transforms/Vectorize/SandboxVectorizer/RegionWithScore.h"
13#include "llvm/Transforms/Vectorize/SandboxVectorizer/SandboxVectorizerPassBuilder.h"
14#include "llvm/Transforms/Vectorize/SandboxVectorizer/SeedCollector.h"
15#include "llvm/Transforms/Vectorize/SandboxVectorizer/VecUtils.h"
16
17namespace llvm {
18
19static cl::opt<unsigned>
20 OverrideVecRegBits("sbvec-vec-reg-bits", cl::init(Val: 0), cl::Hidden,
21 cl::desc("Override the vector register size in bits, "
22 "which is otherwise found by querying TTI."));
23static cl::opt<bool>
24 AllowNonPow2("sbvec-allow-non-pow2", cl::init(Val: false), cl::Hidden,
25 cl::desc("Allow non-power-of-2 vectorization."));
26
27#define LoadSeedsDef "loads"
28#define StoreSeedsDef "stores"
29cl::opt<std::string> CollectSeeds(
30 "sbvec-collect-seeds", cl::init(StoreSeedsDef), cl::Hidden,
31 cl::desc("Collect these seeds. Use empty for none or a comma-separated "
32 "list of '" StoreSeedsDef "' and '" LoadSeedsDef "'."));
33
34namespace sandboxir {
35
36SeedCollection::SeedCollection(StringRef Pipeline, StringRef AuxArg)
37 : FunctionPass("seed-collection"),
38 RPM("rpm", Pipeline, SandboxVectorizerPassBuilder::createRegionPass) {
39 if (!AuxArg.empty()) {
40 if (AuxArg != DiffTypesArgStr) {
41 std::string ErrStr;
42 raw_string_ostream ErrSS(ErrStr);
43 ErrSS << "SeedCollection only supports '" << DiffTypesArgStr
44 << "' aux argument!\n";
45 reportFatalUsageError(reason: ErrStr.c_str());
46 }
47 AllowDiffTypes = true;
48 }
49}
50
51bool SeedCollection::runOnFunction(Function &F, const Analyses &A) {
52 bool Change = false;
53 const auto &DL = F.getParent()->getDataLayout();
54 bool CollectStores = CollectSeeds.find(StoreSeedsDef) != std::string::npos;
55 bool CollectLoads = CollectSeeds.find(LoadSeedsDef) != std::string::npos;
56
57 // TODO: Start from innermost BBs first
58 for (auto &BB : F) {
59 SeedCollector SC(&BB, A.getScalarEvolution(), CollectStores, CollectLoads,
60 AllowDiffTypes);
61 for (auto &SeedRange : {SC.getStoreSeeds(), SC.getLoadSeeds()}) {
62 for (SeedBundle &Seeds : SeedRange) {
63 unsigned ElmBits =
64 Utils::getNumBits(Ty: VecUtils::getElementType(Ty: Utils::getExpectedType(
65 V: Seeds[Seeds.getFirstUnusedElementIdx()])),
66 DL);
67 unsigned AS = getLoadStoreAddressSpace(I: Seeds[0]);
68 unsigned VecRegBits = OverrideVecRegBits != 0
69 ? OverrideVecRegBits
70 : A.getTTI().getLoadStoreVecRegBitWidth(AddrSpace: AS);
71
72 auto DivideBy2 = [](unsigned Num) {
73 auto Floor = VecUtils::getFloorPowerOf2(Num);
74 if (Floor == Num)
75 return Floor / 2;
76 return Floor;
77 };
78 // Try to create the largest vector supported by the target. If it fails
79 // reduce the vector size by half.
80 for (unsigned SliceElms = std::min(a: VecRegBits / ElmBits,
81 b: Seeds.getNumUnusedBits() / ElmBits);
82 SliceElms >= 2u; SliceElms = DivideBy2(SliceElms)) {
83 if (Seeds.allUsed())
84 break;
85 // Keep trying offsets after FirstUnusedElementIdx, until we vectorize
86 // the slice. This could be quite expensive, so we enforce a limit.
87 for (unsigned Offset = Seeds.getFirstUnusedElementIdx(),
88 OE = Seeds.size();
89 Offset + 1 < OE; Offset += 1) {
90 // Seeds are getting used as we vectorize, so skip them.
91 if (Seeds.isUsed(Element: Offset))
92 continue;
93 if (Seeds.allUsed())
94 break;
95
96 auto SeedSlice =
97 Seeds.getSlice(StartIdx: Offset, MaxVecRegBits: SliceElms * ElmBits, ForcePowOf2: !AllowNonPow2);
98 if (SeedSlice.empty())
99 continue;
100
101 assert(SeedSlice.size() >= 2 && "Should have been rejected!");
102
103 // Create a region containing the seed slice.
104 auto &Ctx = F.getContext();
105 RegionWithScore Rgn(Ctx, A.getTTI());
106 Rgn.setAux(SeedSlice);
107 // Run the region pass pipeline.
108 Change |= RPM.runOnRegion(R&: Rgn, A);
109 Rgn.clearAux();
110 }
111 }
112 }
113 }
114 }
115 return Change;
116}
117} // namespace sandboxir
118} // namespace llvm
119