AArch64ExpandImm.cpp source code [llvm_projects/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp]

1	//===- AArch64ExpandImm.h - AArch64 Immediate Expansion -------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements the AArch64ExpandImm stuff.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "AArch64.h"
14	#include "AArch64ExpandImm.h"
15	#include "MCTargetDesc/AArch64AddressingModes.h"
16
17	using namespace llvm;
18	using namespace llvm::AArch64_IMM;
19
20	/// Helper function which extracts the specified 16-bit chunk from a
21	/// 64-bit value.
22	static uint64_t getChunk(uint64_t Imm, unsigned ChunkIdx) {
23	assert(ChunkIdx < `4` && "Out of range chunk index specified!");
24
25	return (Imm >> (ChunkIdx * `16`)) & `0xFFFF`;
26	}
27
28	/// Check whether the given 16-bit chunk replicated to full 64-bit width
29	/// can be materialized with an ORR instruction.
30	static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding) {
31	Chunk = (Chunk << `48`) \| (Chunk << `32`) \| (Chunk << `16`) \| Chunk;
32
33	return AArch64_AM::processLogicalImmediate(Imm: Chunk, RegSize: `64`, Encoding);
34	}
35
36	/// Check for identical 16-bit chunks within the constant and if so
37	/// materialize them with a single ORR instruction. The remaining one or two
38	/// 16-bit chunks will be materialized with MOVK instructions.
39	///
40	/// This allows us to materialize constants like \|A\|B\|A\|A\| or \|A\|B\|C\|A\| (order
41	/// of the chunks doesn't matter), assuming \|A\|A\|A\|A\| can be materialized with
42	/// an ORR instruction.
43	static bool tryToreplicateChunks(uint64_t UImm,
44	SmallVectorImpl<ImmInsnModel> &Insn) {
45	using CountMap = DenseMap<uint64_t, unsigned>;
46
47	CountMap Counts;
48
49	// Scan the constant and count how often every chunk occurs.
50	for (unsigned Idx = `0`; Idx < `4`; ++Idx)
51	++Counts [getChunk(Imm: UImm, ChunkIdx: Idx)];
52
53	// Traverse the chunks to find one which occurs more than once.
54	for (const auto &Chunk : Counts) {
55	const uint64_t ChunkVal = Chunk.first;
56	const unsigned Count = Chunk.second;
57
58	uint64_t Encoding = `0`;
59
60	// We are looking for chunks which have two or three instances and can be
61	// materialized with an ORR instruction.
62	if ((Count != `2` && Count != `3`) \|\| !canUseOrr(Chunk: ChunkVal, Encoding))
63	continue;
64
65	const bool CountThree = Count == `3`;
66
67	Insn.push_back(Elt: { .Opcode: AArch64::ORRXri, .Op1: `0`, .Op2: Encoding });
68
69	unsigned ShiftAmt = `0`;
70	uint64_t Imm16 = `0`;
71	// Find the first chunk not materialized with the ORR instruction.
72	for (; ShiftAmt < `64`; ShiftAmt += `16`) {
73	Imm16 = (UImm >> ShiftAmt) & `0xFFFF`;
74
75	if (Imm16 != ChunkVal)
76	break;
77	}
78
79	// Create the first MOVK instruction.
80	Insn.push_back(Elt: { .Opcode: AArch64::MOVKXi, .Op1: Imm16,
81	.Op2: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: ShiftAmt) });
82
83	// In case we have three instances the whole constant is now materialized
84	// and we can exit.
85	if (CountThree)
86	return true;
87
88	// Find the remaining chunk which needs to be materialized.
89	for (ShiftAmt += `16`; ShiftAmt < `64`; ShiftAmt += `16`) {
90	Imm16 = (UImm >> ShiftAmt) & `0xFFFF`;
91
92	if (Imm16 != ChunkVal)
93	break;
94	}
95	Insn.push_back(Elt: { .Opcode: AArch64::MOVKXi, .Op1: Imm16,
96	.Op2: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: ShiftAmt) });
97	return true;
98	}
99
100	return false;
101	}
102
103	/// Check whether this chunk matches the pattern '1...0...'. This pattern
104	/// starts a contiguous sequence of ones if we look at the bits from the LSB
105	/// towards the MSB.
106	static bool isStartChunk(uint64_t Chunk) {
107	if (Chunk == `0` \|\| Chunk == std::numeric_limits<uint64_t>::max())
108	return false;
109
110	return isMask_64(Value: ~Chunk);
111	}
112
113	/// Check whether this chunk matches the pattern '0...1...' This pattern
114	/// ends a contiguous sequence of ones if we look at the bits from the LSB
115	/// towards the MSB.
116	static bool isEndChunk(uint64_t Chunk) {
117	if (Chunk == `0` \|\| Chunk == std::numeric_limits<uint64_t>::max())
118	return false;
119
120	return isMask_64(Value: Chunk);
121	}
122
123	/// Clear or set all bits in the chunk at the given index.
124	static uint64_t updateImm(uint64_t Imm, unsigned Idx, bool Clear) {
125	const uint64_t Mask = `0xFFFF`;
126
127	if (Clear)
128	// Clear chunk in the immediate.
129	Imm &= ~(Mask << (Idx * `16`));
130	else
131	// Set all bits in the immediate for the particular chunk.
132	Imm \|= Mask << (Idx * `16`);
133
134	return Imm;
135	}
136
137	/// Check whether the constant contains a sequence of contiguous ones,
138	/// which might be interrupted by one or two chunks. If so, materialize the
139	/// sequence of contiguous ones with an ORR instruction.
140	/// Materialize the chunks which are either interrupting the sequence or outside
141	/// of the sequence with a MOVK instruction.
142	///
143	/// Assuming S is a chunk which starts the sequence (1...0...), E is a chunk
144	/// which ends the sequence (0...1...). Then we are looking for constants which
145	/// contain at least one S and E chunk.
146	/// E.g. \|E\|A\|B\|S\|, \|A\|E\|B\|S\| or \|A\|B\|E\|S\|.
147	///
148	/// We are also looking for constants like \|S\|A\|B\|E\| where the contiguous
149	/// sequence of ones wraps around the MSB into the LSB.
150	static bool trySequenceOfOnes(uint64_t UImm,
151	SmallVectorImpl<ImmInsnModel> &Insn) {
152	const int NotSet = -`1`;
153	const uint64_t Mask = `0xFFFF`;
154
155	int StartIdx = NotSet;
156	int EndIdx = NotSet;
157	// Try to find the chunks which start/end a contiguous sequence of ones.
158	for (int Idx = `0`; Idx < `4`; ++Idx) {
159	int64_t Chunk = getChunk(Imm: UImm, ChunkIdx: Idx);
160	// Sign extend the 16-bit chunk to 64-bit.
161	Chunk = (Chunk << `48`) >> `48`;
162
163	if (isStartChunk(Chunk))
164	StartIdx = Idx;
165	else if (isEndChunk(Chunk))
166	EndIdx = Idx;
167	}
168
169	// Early exit in case we can't find a start/end chunk.
170	if (StartIdx == NotSet \|\| EndIdx == NotSet)
171	return false;
172
173	// Outside of the contiguous sequence of ones everything needs to be zero.
174	uint64_t Outside = `0`;
175	// Chunks between the start and end chunk need to have all their bits set.
176	uint64_t Inside = Mask;
177
178	// If our contiguous sequence of ones wraps around from the MSB into the LSB,
179	// just swap indices and pretend we are materializing a contiguous sequence
180	// of zeros surrounded by a contiguous sequence of ones.
181	if (StartIdx > EndIdx) {
182	std::swap(a&: StartIdx, b&: EndIdx);
183	std::swap(a&: Outside, b&: Inside);
184	}
185
186	uint64_t OrrImm = UImm;
187	int FirstMovkIdx = NotSet;
188	int SecondMovkIdx = NotSet;
189
190	// Find out which chunks we need to patch up to obtain a contiguous sequence
191	// of ones.
192	for (int Idx = `0`; Idx < `4`; ++Idx) {
193	const uint64_t Chunk = getChunk(Imm: UImm, ChunkIdx: Idx);
194
195	// Check whether we are looking at a chunk which is not part of the
196	// contiguous sequence of ones.
197	if ((Idx < StartIdx \|\| EndIdx < Idx) && Chunk != Outside) {
198	OrrImm = updateImm(Imm: OrrImm, Idx, Clear: Outside == `0`);
199
200	// Remember the index we need to patch.
201	if (FirstMovkIdx == NotSet)
202	FirstMovkIdx = Idx;
203	else
204	SecondMovkIdx = Idx;
205
206	// Check whether we are looking a chunk which is part of the contiguous
207	// sequence of ones.
208	} else if (Idx > StartIdx && Idx < EndIdx && Chunk != Inside) {
209	OrrImm = updateImm(Imm: OrrImm, Idx, Clear: Inside != Mask);
210
211	// Remember the index we need to patch.
212	if (FirstMovkIdx == NotSet)
213	FirstMovkIdx = Idx;
214	else
215	SecondMovkIdx = Idx;
216	}
217	}
218	assert(FirstMovkIdx != NotSet && "Constant materializable with single ORR!");
219
220	// Create the ORR-immediate instruction.
221	uint64_t Encoding = `0`;
222	AArch64_AM::processLogicalImmediate(Imm: OrrImm, RegSize: `64`, Encoding);
223	Insn.push_back(Elt: { .Opcode: AArch64::ORRXri, .Op1: `0`, .Op2: Encoding });
224
225	const bool SingleMovk = SecondMovkIdx == NotSet;
226	Insn.push_back(Elt: { .Opcode: AArch64::MOVKXi, .Op1: getChunk(Imm: UImm, ChunkIdx: FirstMovkIdx),
227	.Op2: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL,
228	Imm: FirstMovkIdx * `16`) });
229
230	// Early exit in case we only need to emit a single MOVK instruction.
231	if (SingleMovk)
232	return true;
233
234	// Create the second MOVK instruction.
235	Insn.push_back(Elt: { .Opcode: AArch64::MOVKXi, .Op1: getChunk(Imm: UImm, ChunkIdx: SecondMovkIdx),
236	.Op2: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL,
237	Imm: SecondMovkIdx * `16`) });
238
239	return true;
240	}
241
242	static uint64_t GetRunOfOnesStartingAt(uint64_t V, uint64_t StartPosition) {
243	uint64_t NumOnes = llvm::countr_one(Value: V >> StartPosition);
244
245	uint64_t UnshiftedOnes;
246	if (NumOnes == `64`) {
247	UnshiftedOnes = ~`0ULL`;
248	} else {
249	UnshiftedOnes = (`1ULL` << NumOnes) - `1`;
250	}
251	return UnshiftedOnes << StartPosition;
252	}
253
254	static uint64_t MaximallyReplicateSubImmediate(uint64_t V, uint64_t Subset) {
255	uint64_t Result = Subset;
256
257	// 64, 32, 16, 8, 4, 2
258	for (uint64_t i = `0`; i < `6`; ++i) {
259	uint64_t Rotation = `1ULL` << (`6` - i);
260	uint64_t Closure = Result \| llvm::rotl<uint64_t>(V: Result, R: Rotation);
261	if (Closure != (Closure & V)) {
262	break;
263	}
264	Result = Closure;
265	}
266
267	return Result;
268	}
269
270	// Find the logical immediate that covers the most bits in RemainingBits,
271	// allowing for additional bits to be set that were set in OriginalBits.
272	static uint64_t maximalLogicalImmWithin(uint64_t RemainingBits,
273	uint64_t OriginalBits) {
274	// Find the first set bit.
275	uint32_t Position = llvm::countr_zero(Val: RemainingBits);
276
277	// Get the first run of set bits.
278	uint64_t FirstRun = GetRunOfOnesStartingAt(V: OriginalBits, StartPosition: Position);
279
280	// Replicate the run as many times as possible, as long as the bits are set in
281	// RemainingBits.
282	uint64_t MaximalImm = MaximallyReplicateSubImmediate(V: OriginalBits, Subset: FirstRun);
283
284	return MaximalImm;
285	}
286
287	static std::optional<std::pair<uint64_t, uint64_t>>
288	decomposeIntoOrrOfLogicalImmediates(uint64_t UImm) {
289	if (UImm == `0` \|\| ~UImm == `0`)
290	return std::nullopt;
291
292	// Make sure we don't have a run of ones split around the rotation boundary.
293	uint32_t InitialTrailingOnes = llvm::countr_one(Value: UImm);
294	uint64_t RotatedBits = llvm::rotr<uint64_t>(V: UImm, R: InitialTrailingOnes);
295
296	// Find the largest logical immediate that fits within the full immediate.
297	uint64_t MaximalImm1 = maximalLogicalImmWithin(RemainingBits: RotatedBits, OriginalBits: RotatedBits);
298
299	// Remove all bits that are set by this mask.
300	uint64_t RemainingBits = RotatedBits & ~MaximalImm1;
301
302	// Find the largest logical immediate covering the remaining bits, allowing
303	// for additional bits to be set that were also set in the original immediate.
304	uint64_t MaximalImm2 = maximalLogicalImmWithin(RemainingBits, OriginalBits: RotatedBits);
305
306	// If any bits still haven't been covered, then give up.
307	if (RemainingBits & ~MaximalImm2)
308	return std::nullopt;
309
310	// Make sure to un-rotate the immediates.
311	return std::make_pair(x: rotl(V: MaximalImm1, R: InitialTrailingOnes),
312	y: rotl(V: MaximalImm2, R: InitialTrailingOnes));
313	}
314
315	// Attempt to expand an immediate as the ORR of a pair of logical immediates.
316	static bool tryOrrOfLogicalImmediates(uint64_t UImm,
317	SmallVectorImpl<ImmInsnModel> &Insn) {
318	auto MaybeDecomposition = decomposeIntoOrrOfLogicalImmediates(UImm);
319	if (MaybeDecomposition == std::nullopt)
320	return false;
321	uint64_t Imm1 = MaybeDecomposition ->first;
322	uint64_t Imm2 = MaybeDecomposition ->second;
323
324	uint64_t Encoding1, Encoding2;
325	bool Imm1Success = AArch64_AM::processLogicalImmediate(Imm: Imm1, RegSize: `64`, Encoding&: Encoding1);
326	bool Imm2Success = AArch64_AM::processLogicalImmediate(Imm: Imm2, RegSize: `64`, Encoding&: Encoding2);
327
328	if (Imm1Success && Imm2Success) {
329	// Create the ORR-immediate instructions.
330	Insn.push_back(Elt: {.Opcode: AArch64::ORRXri, .Op1: `0`, .Op2: Encoding1});
331	Insn.push_back(Elt: {.Opcode: AArch64::ORRXri, .Op1: `1`, .Op2: Encoding2});
332	return true;
333	}
334
335	return false;
336	}
337
338	// Attempt to expand an immediate as the AND of a pair of logical immediates.
339	// This is done by applying DeMorgan's law, under which logical immediates
340	// are closed.
341	static bool tryAndOfLogicalImmediates(uint64_t UImm,
342	SmallVectorImpl<ImmInsnModel> &Insn) {
343	// Apply DeMorgan's law to turn this into an ORR problem.
344	auto MaybeDecomposition = decomposeIntoOrrOfLogicalImmediates(UImm: ~UImm);
345	if (MaybeDecomposition == std::nullopt)
346	return false;
347	uint64_t Imm1 = MaybeDecomposition ->first;
348	uint64_t Imm2 = MaybeDecomposition ->second;
349
350	uint64_t Encoding1, Encoding2;
351	bool Imm1Success = AArch64_AM::processLogicalImmediate(Imm: ~Imm1, RegSize: `64`, Encoding&: Encoding1);
352	bool Imm2Success = AArch64_AM::processLogicalImmediate(Imm: ~Imm2, RegSize: `64`, Encoding&: Encoding2);
353
354	if (Imm1Success && Imm2Success) {
355	// Materialize Imm1, the LHS of the AND
356	Insn.push_back(Elt: {.Opcode: AArch64::ORRXri, .Op1: `0`, .Op2: Encoding1});
357	// AND Imm1 with Imm2
358	Insn.push_back(Elt: {.Opcode: AArch64::ANDXri, .Op1: `1`, .Op2: Encoding2});
359	return true;
360	}
361
362	return false;
363	}
364
365	// Check whether the constant can be represented by exclusive-or of two 64-bit
366	// logical immediates. If so, materialize it with an ORR instruction followed
367	// by an EOR instruction.
368	//
369	// This encoding allows all remaining repeated byte patterns, and many repeated
370	// 16-bit values, to be encoded without needing four instructions. It can also
371	// represent some irregular bitmasks (although those would mostly only need
372	// three instructions otherwise).
373	static bool tryEorOfLogicalImmediates(uint64_t Imm,
374	SmallVectorImpl<ImmInsnModel> &Insn) {
375	// Determine the larger repetition size of the two possible logical
376	// immediates, by finding the repetition size of Imm.
377	unsigned BigSize = `64`;
378
379	do {
380	BigSize /= `2`;
381	uint64_t Mask = (`1ULL` << BigSize) - `1`;
382
383	if ((Imm & Mask) != ((Imm >> BigSize) & Mask)) {
384	BigSize *= `2`;
385	break;
386	}
387	} while (BigSize > `2`);
388
389	uint64_t BigMask = ((uint64_t)-`1LL`) >> (`64` - BigSize);
390
391	// Find the last bit of each run of ones, circularly. For runs which wrap
392	// around from bit 0 to bit 63, this is the bit before the most-significant
393	// zero, otherwise it is the least-significant bit in the run of ones.
394	uint64_t RunStarts = Imm & ~rotl<uint64_t>(V: Imm, R: `1`);
395
396	// Find the smaller repetition size of the two possible logical immediates by
397	// counting the number of runs of one-bits within the BigSize-bit value. Both
398	// sizes may be the same. The EOR may add one or subtract one from the
399	// power-of-two count that can be represented by a logical immediate, or it
400	// may be left unchanged.
401	int RunsPerBigChunk = popcount(Value: RunStarts & BigMask);
402
403	static const int8_t BigToSmallSizeTable[`32`] = {
404	-`1`, -`1`, `0`, `1`, `2`, `2`, -`1`, `3`, `3`, `3`, -`1`, -`1`, -`1`, -`1`, -`1`, `4`,
405	`4`, `4`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, `5`,
406	};
407
408	int BigToSmallShift = BigToSmallSizeTable[RunsPerBigChunk];
409
410	// Early-exit if the big chunk couldn't be a power-of-two number of runs
411	// EORed with another single run.
412	if (BigToSmallShift == -`1`)
413	return false;
414
415	unsigned SmallSize = BigSize >> BigToSmallShift;
416
417	// 64-bit values with a bit set every (1 << index) bits.
418	static const uint64_t RepeatedOnesTable[] = {
419	`0xffffffffffffffff`, `0x5555555555555555`, `0x1111111111111111`,
420	`0x0101010101010101`, `0x0001000100010001`, `0x0000000100000001`,
421	`0x0000000000000001`,
422	};
423
424	// This RepeatedOnesTable lookup is a faster implementation of the division
425	// 0xffffffffffffffff / ((1 << SmallSize) - 1), and can be thought of as
426	// dividing the 64-bit value into fields of width SmallSize, and placing a
427	// one in the least significant bit of each field.
428	uint64_t SmallOnes = RepeatedOnesTable[countr_zero(Val: SmallSize)];
429
430	// Now we try to find the number of ones in each of the smaller repetitions,
431	// by looking at runs of ones in Imm. This can take three attempts, as the
432	// EOR may have changed the length of the first two runs we find.
433
434	// Rotate a run of ones so we can count the number of trailing set bits.
435	int Rotation = countr_zero(Val: RunStarts);
436	uint64_t RotatedImm = rotr<uint64_t>(V: Imm, R: Rotation);
437	for (int Attempt = `0`; Attempt < `3`; ++Attempt) {
438	unsigned RunLength = countr_one(Value: RotatedImm);
439
440	// Construct candidate values BigImm and SmallImm, such that if these two
441	// values are encodable, we have a solution. (SmallImm is constructed to be
442	// encodable, but this isn't guaranteed when RunLength >= SmallSize)
443	uint64_t SmallImm =
444	rotl<uint64_t>(V: (SmallOnes << RunLength) - SmallOnes, R: Rotation);
445	uint64_t BigImm = Imm ^ SmallImm;
446
447	uint64_t BigEncoding = `0`;
448	uint64_t SmallEncoding = `0`;
449	if (AArch64_AM::processLogicalImmediate(Imm: BigImm, RegSize: `64`, Encoding&: BigEncoding) &&
450	AArch64_AM::processLogicalImmediate(Imm: SmallImm, RegSize: `64`, Encoding&: SmallEncoding)) {
451	Insn.push_back(Elt: {.Opcode: AArch64::ORRXri, .Op1: `0`, .Op2: SmallEncoding});
452	Insn.push_back(Elt: {.Opcode: AArch64::EORXri, .Op1: `1`, .Op2: BigEncoding});
453	return true;
454	}
455
456	// Rotate to the next run of ones
457	Rotation += countr_zero(Val: rotr<uint64_t>(V: RunStarts, R: Rotation) & ~`1`);
458	RotatedImm = rotr<uint64_t>(V: Imm, R: Rotation);
459	}
460
461	return false;
462	}
463
464	/// \brief Expand a MOVi32imm or MOVi64imm pseudo instruction to a
465	/// MOVZ or MOVN of width BitSize followed by up to 3 MOVK instructions.
466	static inline void expandMOVImmSimple(uint64_t Imm, unsigned BitSize,
467	unsigned OneChunks, unsigned ZeroChunks,
468	SmallVectorImpl<ImmInsnModel> &Insn) {
469	const unsigned Mask = `0xFFFF`;
470
471	// Use a MOVZ or MOVN instruction to set the high bits, followed by one or
472	// more MOVK instructions to insert additional 16-bit portions into the
473	// lower bits.
474	bool isNeg = false;
475
476	// Use MOVN to materialize the high bits if we have more all one chunks
477	// than all zero chunks.
478	if (OneChunks > ZeroChunks) {
479	isNeg = true;
480	Imm = ~Imm;
481	}
482
483	unsigned FirstOpc;
484	if (BitSize == `32`) {
485	Imm &= (`1LL` << `32`) - `1`;
486	FirstOpc = (isNeg ? AArch64::MOVNWi : AArch64::MOVZWi);
487	} else {
488	FirstOpc = (isNeg ? AArch64::MOVNXi : AArch64::MOVZXi);
489	}
490	unsigned Shift = `0`; // LSL amount for high bits with MOVZ/MOVN
491	unsigned LastShift = `0`; // LSL amount for last MOVK
492	if (Imm != `0`) {
493	unsigned LZ = llvm::countl_zero(Val: Imm);
494	unsigned TZ = llvm::countr_zero(Val: Imm);
495	Shift = (TZ / `16`) * `16`;
496	LastShift = ((`63` - LZ) / `16`) * `16`;
497	}
498	unsigned Imm16 = (Imm >> Shift) & Mask;
499
500	Insn.push_back(Elt: { .Opcode: FirstOpc, .Op1: Imm16,
501	.Op2: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: Shift) });
502
503	if (Shift == LastShift)
504	return;
505
506	// If a MOVN was used for the high bits of a negative value, flip the rest
507	// of the bits back for use with MOVK.
508	if (isNeg)
509	Imm = ~Imm;
510
511	unsigned Opc = (BitSize == `32` ? AArch64::MOVKWi : AArch64::MOVKXi);
512	while (Shift < LastShift) {
513	Shift += `16`;
514	Imm16 = (Imm >> Shift) & Mask;
515	if (Imm16 == (isNeg ? Mask : `0`))
516	continue; // This 16-bit portion is already set correctly.
517
518	Insn.push_back(Elt: { .Opcode: Opc, .Op1: Imm16,
519	.Op2: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: Shift) });
520	}
521
522	// Now, we get 16-bit divided Imm. If high and low bits are same in
523	// 32-bit, there is an opportunity to reduce instruction.
524	if (Insn.size() > `2` && (Imm >> `32`) == (Imm & `0xffffffffULL`)) {
525	for (int Size = Insn.size(); Size > `2`; Size--)
526	Insn.pop_back();
527	Insn.push_back(Elt: {.Opcode: AArch64::ORRXrs, .Op1: `0`, .Op2: `32`});
528	}
529	}
530
531	/// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
532	/// real move-immediate instructions to synthesize the immediate.
533	void AArch64_IMM::expandMOVImm(uint64_t Imm, unsigned BitSize,
534	SmallVectorImpl<ImmInsnModel> &Insn) {
535	const unsigned Mask = `0xFFFF`;
536
537	// Scan the immediate and count the number of 16-bit chunks which are either
538	// all ones or all zeros.
539	unsigned OneChunks = `0`;
540	unsigned ZeroChunks = `0`;
541	for (unsigned Shift = `0`; Shift < BitSize; Shift += `16`) {
542	const unsigned Chunk = (Imm >> Shift) & Mask;
543	if (Chunk == Mask)
544	OneChunks++;
545	else if (Chunk == `0`)
546	ZeroChunks++;
547	}
548
549	// Prefer MOVZ/MOVN over ORR because of the rules for the "mov" alias.
550	if ((BitSize / `16`) - OneChunks <= `1` \|\| (BitSize / `16`) - ZeroChunks <= `1`) {
551	expandMOVImmSimple(Imm, BitSize, OneChunks, ZeroChunks, Insn);
552	return;
553	}
554
555	// Try a single ORR.
556	uint64_t UImm = Imm << (`64` - BitSize) >> (`64` - BitSize);
557	uint64_t Encoding;
558	if (AArch64_AM::processLogicalImmediate(Imm: UImm, RegSize: BitSize, Encoding)) {
559	unsigned Opc = (BitSize == `32` ? AArch64::ORRWri : AArch64::ORRXri);
560	Insn.push_back(Elt: { .Opcode: Opc, .Op1: `0`, .Op2: Encoding });
561	return;
562	}
563
564	// One to up three instruction sequences.
565	//
566	// Prefer MOVZ/MOVN followed by MOVK; it's more readable, and possibly the
567	// fastest sequence with fast literal generation.
568	if (OneChunks >= (BitSize / `16`) - `2` \|\| ZeroChunks >= (BitSize / `16`) - `2`) {
569	expandMOVImmSimple(Imm, BitSize, OneChunks, ZeroChunks, Insn);
570	return;
571	}
572
573	assert(BitSize == `64` && "All 32-bit immediates can be expanded with a"
574	"MOVZ/MOVK pair");
575
576	// Try other two-instruction sequences.
577
578	// 64-bit ORR followed by MOVK.
579	// We try to construct the ORR immediate in three different ways: either we
580	// zero out the chunk which will be replaced, we fill the chunk which will
581	// be replaced with ones, or we take the bit pattern from the other half of
582	// the 64-bit immediate. This is comprehensive because of the way ORR
583	// immediates are constructed.
584	for (unsigned Shift = `0`; Shift < BitSize; Shift += `16`) {
585	uint64_t ShiftedMask = (`0xFFFFULL` << Shift);
586	uint64_t ZeroChunk = UImm & ~ShiftedMask;
587	uint64_t OneChunk = UImm \| ShiftedMask;
588	uint64_t RotatedImm = (UImm << `32`) \| (UImm >> `32`);
589	uint64_t ReplicateChunk = ZeroChunk \| (RotatedImm & ShiftedMask);
590	if (AArch64_AM::processLogicalImmediate(Imm: ZeroChunk, RegSize: BitSize, Encoding) \|\|
591	AArch64_AM::processLogicalImmediate(Imm: OneChunk, RegSize: BitSize, Encoding) \|\|
592	AArch64_AM::processLogicalImmediate(Imm: ReplicateChunk, RegSize: BitSize,
593	Encoding)) {
594	// Create the ORR-immediate instruction.
595	Insn.push_back(Elt: { .Opcode: AArch64::ORRXri, .Op1: `0`, .Op2: Encoding });
596
597	// Create the MOVK instruction.
598	const unsigned Imm16 = getChunk(Imm: UImm, ChunkIdx: Shift / `16`);
599	Insn.push_back(Elt: { .Opcode: AArch64::MOVKXi, .Op1: Imm16,
600	.Op2: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: Shift) });
601	return;
602	}
603	}
604
605	// Attempt to use a sequence of two ORR-immediate instructions.
606	if (tryOrrOfLogicalImmediates(UImm: Imm, Insn))
607	return;
608
609	// Attempt to use a sequence of ORR-immediate followed by AND-immediate.
610	if (tryAndOfLogicalImmediates(UImm: Imm, Insn))
611	return;
612
613	// Attempt to use a sequence of ORR-immediate followed by EOR-immediate.
614	if (tryEorOfLogicalImmediates(Imm: UImm, Insn))
615	return;
616
617	// FIXME: Add more two-instruction sequences.
618
619	// Three instruction sequences.
620	//
621	// Prefer MOVZ/MOVN followed by two MOVK; it's more readable, and possibly
622	// the fastest sequence with fast literal generation. (If neither MOVK is
623	// part of a fast literal generation pair, it could be slower than the
624	// four-instruction sequence, but we won't worry about that for now.)
625	if (OneChunks \|\| ZeroChunks) {
626	expandMOVImmSimple(Imm, BitSize, OneChunks, ZeroChunks, Insn);
627	return;
628	}
629
630	// Check for identical 16-bit chunks within the constant and if so materialize
631	// them with a single ORR instruction. The remaining one or two 16-bit chunks
632	// will be materialized with MOVK instructions.
633	if (BitSize == `64` && tryToreplicateChunks(UImm, Insn))
634	return;
635
636	// Check whether the constant contains a sequence of contiguous ones, which
637	// might be interrupted by one or two chunks. If so, materialize the sequence
638	// of contiguous ones with an ORR instruction. Materialize the chunks which
639	// are either interrupting the sequence or outside of the sequence with a
640	// MOVK instruction.
641	if (BitSize == `64` && trySequenceOfOnes(UImm, Insn))
642	return;
643
644	// We found no possible two or three instruction sequence; use the general
645	// four-instruction sequence.
646	expandMOVImmSimple(Imm, BitSize, OneChunks, ZeroChunks, Insn);
647	}
648

Browse the source code of llvm_projects/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp