AArch64ExpandImm.cpp source code [llvm_projects/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp]

1	//===- AArch64ExpandImm.h - AArch64 Immediate Expansion -------------------===//
2	//
3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4	// See https://llvm.org/LICENSE.txt for license information.
5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6	//
7	//===----------------------------------------------------------------------===//
8	//
9	// This file implements the AArch64ExpandImm stuff.
10	//
11	//===----------------------------------------------------------------------===//
12
13	#include "AArch64.h"
14	#include "AArch64ExpandImm.h"
15	#include "MCTargetDesc/AArch64AddressingModes.h"
16
17	using namespace llvm;
18	using namespace llvm::AArch64_IMM;
19
20	/// Helper function which extracts the specified 16-bit chunk from a
21	/// 64-bit value.
22	static uint64_t getChunk(uint64_t Imm, unsigned ChunkIdx) {
23	assert(ChunkIdx < `4` && "Out of range chunk index specified!");
24
25	return (Imm >> (ChunkIdx * `16`)) & `0xFFFF`;
26	}
27
28	/// Check whether the given 16-bit chunk replicated to full 64-bit width
29	/// can be materialized with an ORR instruction.
30	static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding) {
31	Chunk = (Chunk << `48`) \| (Chunk << `32`) \| (Chunk << `16`) \| Chunk;
32
33	return AArch64_AM::processLogicalImmediate(Imm: Chunk, RegSize: `64`, Encoding);
34	}
35
36	/// Check for identical 16-bit chunks within the constant and if so
37	/// materialize them with a single ORR instruction. The remaining one or two
38	/// 16-bit chunks will be materialized with MOVK instructions.
39	///
40	/// This allows us to materialize constants like \|A\|B\|A\|A\| or \|A\|B\|C\|A\| (order
41	/// of the chunks doesn't matter), assuming \|A\|A\|A\|A\| can be materialized with
42	/// an ORR instruction.
43	static bool tryToreplicateChunks(uint64_t UImm,
44	SmallVectorImpl<ImmInsnModel> &Insn) {
45	using CountMap = DenseMap<uint64_t, unsigned>;
46
47	CountMap Counts;
48
49	// Scan the constant and count how often every chunk occurs.
50	for (unsigned Idx = `0`; Idx < `4`; ++Idx)
51	++Counts [getChunk(Imm: UImm, ChunkIdx: Idx)];
52
53	// Traverse the chunks to find one which occurs more than once.
54	for (const auto &Chunk : Counts) {
55	const uint64_t ChunkVal = Chunk.first;
56	const unsigned Count = Chunk.second;
57
58	uint64_t Encoding = `0`;
59
60	// We are looking for chunks which have two or three instances and can be
61	// materialized with an ORR instruction.
62	if ((Count != `2` && Count != `3`) \|\| !canUseOrr(Chunk: ChunkVal, Encoding))
63	continue;
64
65	const bool CountThree = Count == `3`;
66
67	Insn.push_back(Elt: { .Opcode: AArch64::ORRXri, .Op1: `0`, .Op2: Encoding });
68
69	unsigned ShiftAmt = `0`;
70	uint64_t Imm16 = `0`;
71	// Find the first chunk not materialized with the ORR instruction.
72	for (; ShiftAmt < `64`; ShiftAmt += `16`) {
73	Imm16 = (UImm >> ShiftAmt) & `0xFFFF`;
74
75	if (Imm16 != ChunkVal)
76	break;
77	}
78
79	// Create the first MOVK instruction.
80	Insn.push_back(Elt: { .Opcode: AArch64::MOVKXi, .Op1: Imm16,
81	.Op2: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: ShiftAmt) });
82
83	// In case we have three instances the whole constant is now materialized
84	// and we can exit.
85	if (CountThree)
86	return true;
87
88	// Find the remaining chunk which needs to be materialized.
89	for (ShiftAmt += `16`; ShiftAmt < `64`; ShiftAmt += `16`) {
90	Imm16 = (UImm >> ShiftAmt) & `0xFFFF`;
91
92	if (Imm16 != ChunkVal)
93	break;
94	}
95	Insn.push_back(Elt: { .Opcode: AArch64::MOVKXi, .Op1: Imm16,
96	.Op2: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: ShiftAmt) });
97	return true;
98	}
99
100	return false;
101	}
102
103	/// Check whether this chunk matches the pattern '1...0...'. This pattern
104	/// starts a contiguous sequence of ones if we look at the bits from the LSB
105	/// towards the MSB.
106	static bool isStartChunk(uint64_t Chunk) {
107	if (Chunk == `0` \|\| Chunk == std::numeric_limits<uint64_t>::max())
108	return false;
109
110	return isMask_64(Value: ~Chunk);
111	}
112
113	/// Check whether this chunk matches the pattern '0...1...' This pattern
114	/// ends a contiguous sequence of ones if we look at the bits from the LSB
115	/// towards the MSB.
116	static bool isEndChunk(uint64_t Chunk) {
117	if (Chunk == `0` \|\| Chunk == std::numeric_limits<uint64_t>::max())
118	return false;
119
120	return isMask_64(Value: Chunk);
121	}
122
123	/// Clear or set all bits in the chunk at the given index.
124	static uint64_t updateImm(uint64_t Imm, unsigned Idx, bool Clear) {
125	const uint64_t Mask = `0xFFFF`;
126
127	if (Clear)
128	// Clear chunk in the immediate.
129	Imm &= ~(Mask << (Idx * `16`));
130	else
131	// Set all bits in the immediate for the particular chunk.
132	Imm \|= Mask << (Idx * `16`);
133
134	return Imm;
135	}
136
137	/// Check whether the constant contains a sequence of contiguous ones,
138	/// which might be interrupted by one or two chunks. If so, materialize the
139	/// sequence of contiguous ones with an ORR instruction.
140	/// Materialize the chunks which are either interrupting the sequence or outside
141	/// of the sequence with a MOVK instruction.
142	///
143	/// Assuming S is a chunk which starts the sequence (1...0...), E is a chunk
144	/// which ends the sequence (0...1...). Then we are looking for constants which
145	/// contain at least one S and E chunk.
146	/// E.g. \|E\|A\|B\|S\|, \|A\|E\|B\|S\| or \|A\|B\|E\|S\|.
147	///
148	/// We are also looking for constants like \|S\|A\|B\|E\| where the contiguous
149	/// sequence of ones wraps around the MSB into the LSB.
150	static bool trySequenceOfOnes(uint64_t UImm,
151	SmallVectorImpl<ImmInsnModel> &Insn) {
152	const int NotSet = -`1`;
153	const uint64_t Mask = `0xFFFF`;
154
155	int StartIdx = NotSet;
156	int EndIdx = NotSet;
157	// Try to find the chunks which start/end a contiguous sequence of ones.
158	for (int Idx = `0`; Idx < `4`; ++Idx) {
159	int64_t Chunk = getChunk(Imm: UImm, ChunkIdx: Idx);
160	// Sign extend the 16-bit chunk to 64-bit.
161	Chunk = (Chunk << `48`) >> `48`;
162
163	if (isStartChunk(Chunk))
164	StartIdx = Idx;
165	else if (isEndChunk(Chunk))
166	EndIdx = Idx;
167	}
168
169	// Early exit in case we can't find a start/end chunk.
170	if (StartIdx == NotSet \|\| EndIdx == NotSet)
171	return false;
172
173	// Outside of the contiguous sequence of ones everything needs to be zero.
174	uint64_t Outside = `0`;
175	// Chunks between the start and end chunk need to have all their bits set.
176	uint64_t Inside = Mask;
177
178	// If our contiguous sequence of ones wraps around from the MSB into the LSB,
179	// just swap indices and pretend we are materializing a contiguous sequence
180	// of zeros surrounded by a contiguous sequence of ones.
181	if (StartIdx > EndIdx) {
182	std::swap(a&: StartIdx, b&: EndIdx);
183	std::swap(a&: Outside, b&: Inside);
184	}
185
186	uint64_t OrrImm = UImm;
187	int FirstMovkIdx = NotSet;
188	int SecondMovkIdx = NotSet;
189
190	// Find out which chunks we need to patch up to obtain a contiguous sequence
191	// of ones.
192	for (int Idx = `0`; Idx < `4`; ++Idx) {
193	const uint64_t Chunk = getChunk(Imm: UImm, ChunkIdx: Idx);
194
195	// Check whether we are looking at a chunk which is not part of the
196	// contiguous sequence of ones.
197	if ((Idx < StartIdx \|\| EndIdx < Idx) && Chunk != Outside) {
198	OrrImm = updateImm(Imm: OrrImm, Idx, Clear: Outside == `0`);
199
200	// Remember the index we need to patch.
201	if (FirstMovkIdx == NotSet)
202	FirstMovkIdx = Idx;
203	else
204	SecondMovkIdx = Idx;
205
206	// Check whether we are looking a chunk which is part of the contiguous
207	// sequence of ones.
208	} else if (Idx > StartIdx && Idx < EndIdx && Chunk != Inside) {
209	OrrImm = updateImm(Imm: OrrImm, Idx, Clear: Inside != Mask);
210
211	// Remember the index we need to patch.
212	if (FirstMovkIdx == NotSet)
213	FirstMovkIdx = Idx;
214	else
215	SecondMovkIdx = Idx;
216	}
217	}
218	assert(FirstMovkIdx != NotSet && "Constant materializable with single ORR!");
219
220	// Create the ORR-immediate instruction.
221	uint64_t Encoding = `0`;
222	AArch64_AM::processLogicalImmediate(Imm: OrrImm, RegSize: `64`, Encoding);
223	Insn.push_back(Elt: { .Opcode: AArch64::ORRXri, .Op1: `0`, .Op2: Encoding });
224
225	const bool SingleMovk = SecondMovkIdx == NotSet;
226	Insn.push_back(Elt: { .Opcode: AArch64::MOVKXi, .Op1: getChunk(Imm: UImm, ChunkIdx: FirstMovkIdx),
227	.Op2: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL,
228	Imm: FirstMovkIdx * `16`) });
229
230	// Early exit in case we only need to emit a single MOVK instruction.
231	if (SingleMovk)
232	return true;
233
234	// Create the second MOVK instruction.
235	Insn.push_back(Elt: { .Opcode: AArch64::MOVKXi, .Op1: getChunk(Imm: UImm, ChunkIdx: SecondMovkIdx),
236	.Op2: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL,
237	Imm: SecondMovkIdx * `16`) });
238
239	return true;
240	}
241
242	// Attempt to expand 64-bit immediate values that consist of shifted negated
243	// components such as 0x1234'5678'edcb'a987, where the upper half is the
244	// negation of the lower half. Immediates of this form can generally be
245	// expanded via a sequence of MOVN+MOVK to expand the lower half, followed by
246	// an EOR or EON to shift and negate the result to the upper half, for example:
247	// mov x0, #-22137 // =0xffffffffffffa987
248	// movk x0, #60875, lsl #16 // =0xffffffffedcba987
249	// eor x0, x0, x0, lsl #32 // =0xffffffffedcba987 ^ 0xedcba98700000000
250	// =0x12345678edcba987.
251	// The logic extends to other shift amounts in the range [17, 48) (outside that
252	// range we get runs of ones/zeros that are optimised separately).
253	//
254	// When the lower half contains a 16-bit chunk of ones, such as
255	// 0x0000'5678'ffff'a987, the intermediate MOVK is redundant.
256	// Similarly, when it contains a 16-bit chunk of zeros, such as
257	// 0xffff'5678'0000'a987, the expansion can instead be effected by expanding
258	// the negation of the lower half and negating the result with an EON, e.g.:
259	// mov x0, #-43400 // =0xffffffffffff5678
260	// eon x0, x0, x0, lsl #32 // =0xffffffffffff5678 ^ ~0xffff567800000000
261	// =0xffffffffffff5678 ^ 0x0000a987ffffffff
262	// =0xffff56780000a987.
263	// In any of these cases, the expansion with EOR/EON saves an instruction
264	// compared to the default expansion based on MOV and MOVKs.
265	static bool tryCopyWithNegation(uint64_t Imm, bool AllowThreeSequence,
266	SmallVectorImpl<ImmInsnModel> &Insn) {
267	// Degenerate cases where Imm is a run of ones should be handled separately.
268	if (!Imm \|\| llvm::isShiftedMask_64(Value: Imm))
269	return false;
270
271	const unsigned Mask = `0xffff`;
272
273	auto tryExpansion = [&](unsigned Opc, uint64_t C, unsigned N) {
274	assert((C >> `32`) == `0xffffffffULL` && "Invalid immediate");
275	const unsigned Imm0 = C & Mask;
276	const unsigned Imm16 = (C >> `16`) & Mask;
277	if (Imm0 != Mask && Imm16 != Mask && !AllowThreeSequence)
278	return false;
279
280	if (Imm0 != Mask) {
281	Insn.push_back(Elt: {.Opcode: AArch64::MOVNXi, .Op1: Imm0 ^ Mask, .Op2: `0`});
282	if (Imm16 != Mask)
283	Insn.push_back(Elt: {.Opcode: AArch64::MOVKXi, .Op1: Imm16, .Op2: `16`});
284	} else {
285	Insn.push_back(Elt: {.Opcode: AArch64::MOVNXi, .Op1: Imm16 ^ Mask, .Op2: `16`});
286	}
287
288	Insn.push_back(Elt: {.Opcode: Opc, .Op1: `0`, .Op2: N});
289	return true;
290	};
291
292	for (unsigned N = `17`; N < `48`; ++N) {
293	// Attempt EOR.
294	uint64_t C = `0xffffffff00000000ULL` \| (Imm ^ (Imm << N));
295	if ((C ^ (C << N)) == Imm && tryExpansion (AArch64::EORXrs, C, N))
296	return true;
297
298	// Attempt EON.
299	C = `0xffffffff00000000ULL` \| (Imm ^ ~(~Imm << N));
300	if ((C ^ ~(C << N)) == Imm && tryExpansion (AArch64::EONXrs, C, N))
301	return true;
302	}
303
304	return false;
305	}
306
307	static uint64_t GetRunOfOnesStartingAt(uint64_t V, uint64_t StartPosition) {
308	uint64_t NumOnes = llvm::countr_one(Value: V >> StartPosition);
309
310	uint64_t UnshiftedOnes;
311	if (NumOnes == `64`) {
312	UnshiftedOnes = ~`0ULL`;
313	} else {
314	UnshiftedOnes = (`1ULL` << NumOnes) - `1`;
315	}
316	return UnshiftedOnes << StartPosition;
317	}
318
319	static uint64_t MaximallyReplicateSubImmediate(uint64_t V, uint64_t Subset) {
320	uint64_t Result = Subset;
321
322	// 64, 32, 16, 8, 4, 2
323	for (uint64_t i = `0`; i < `6`; ++i) {
324	uint64_t Rotation = `1ULL` << (`6` - i);
325	uint64_t Closure = Result \| llvm::rotl<uint64_t>(V: Result, R: Rotation);
326	if (Closure != (Closure & V)) {
327	break;
328	}
329	Result = Closure;
330	}
331
332	return Result;
333	}
334
335	// Find the logical immediate that covers the most bits in RemainingBits,
336	// allowing for additional bits to be set that were set in OriginalBits.
337	static uint64_t maximalLogicalImmWithin(uint64_t RemainingBits,
338	uint64_t OriginalBits) {
339	// Find the first set bit.
340	uint32_t Position = llvm::countr_zero(Val: RemainingBits);
341
342	// Get the first run of set bits.
343	uint64_t FirstRun = GetRunOfOnesStartingAt(V: OriginalBits, StartPosition: Position);
344
345	// Replicate the run as many times as possible, as long as the bits are set in
346	// RemainingBits.
347	uint64_t MaximalImm = MaximallyReplicateSubImmediate(V: OriginalBits, Subset: FirstRun);
348
349	return MaximalImm;
350	}
351
352	static std::optional<std::pair<uint64_t, uint64_t>>
353	decomposeIntoOrrOfLogicalImmediates(uint64_t UImm) {
354	if (UImm == `0` \|\| ~UImm == `0`)
355	return std::nullopt;
356
357	// Make sure we don't have a run of ones split around the rotation boundary.
358	uint32_t InitialTrailingOnes = llvm::countr_one(Value: UImm);
359	uint64_t RotatedBits = llvm::rotr<uint64_t>(V: UImm, R: InitialTrailingOnes);
360
361	// Find the largest logical immediate that fits within the full immediate.
362	uint64_t MaximalImm1 = maximalLogicalImmWithin(RemainingBits: RotatedBits, OriginalBits: RotatedBits);
363
364	// Remove all bits that are set by this mask.
365	uint64_t RemainingBits = RotatedBits & ~MaximalImm1;
366
367	// Find the largest logical immediate covering the remaining bits, allowing
368	// for additional bits to be set that were also set in the original immediate.
369	uint64_t MaximalImm2 = maximalLogicalImmWithin(RemainingBits, OriginalBits: RotatedBits);
370
371	// If any bits still haven't been covered, then give up.
372	if (RemainingBits & ~MaximalImm2)
373	return std::nullopt;
374
375	// Make sure to un-rotate the immediates.
376	return std::make_pair(x: rotl(V: MaximalImm1, R: InitialTrailingOnes),
377	y: rotl(V: MaximalImm2, R: InitialTrailingOnes));
378	}
379
380	// Attempt to expand an immediate as the ORR of a pair of logical immediates.
381	static bool tryOrrOfLogicalImmediates(uint64_t UImm,
382	SmallVectorImpl<ImmInsnModel> &Insn) {
383	auto MaybeDecomposition = decomposeIntoOrrOfLogicalImmediates(UImm);
384	if (MaybeDecomposition == std::nullopt)
385	return false;
386	uint64_t Imm1 = MaybeDecomposition ->first;
387	uint64_t Imm2 = MaybeDecomposition ->second;
388
389	uint64_t Encoding1, Encoding2;
390	bool Imm1Success = AArch64_AM::processLogicalImmediate(Imm: Imm1, RegSize: `64`, Encoding&: Encoding1);
391	bool Imm2Success = AArch64_AM::processLogicalImmediate(Imm: Imm2, RegSize: `64`, Encoding&: Encoding2);
392
393	if (Imm1Success && Imm2Success) {
394	// Create the ORR-immediate instructions.
395	Insn.push_back(Elt: {.Opcode: AArch64::ORRXri, .Op1: `0`, .Op2: Encoding1});
396	Insn.push_back(Elt: {.Opcode: AArch64::ORRXri, .Op1: `1`, .Op2: Encoding2});
397	return true;
398	}
399
400	return false;
401	}
402
403	// Attempt to expand an immediate as the AND of a pair of logical immediates.
404	// This is done by applying DeMorgan's law, under which logical immediates
405	// are closed.
406	static bool tryAndOfLogicalImmediates(uint64_t UImm,
407	SmallVectorImpl<ImmInsnModel> &Insn) {
408	// Apply DeMorgan's law to turn this into an ORR problem.
409	auto MaybeDecomposition = decomposeIntoOrrOfLogicalImmediates(UImm: ~UImm);
410	if (MaybeDecomposition == std::nullopt)
411	return false;
412	uint64_t Imm1 = MaybeDecomposition ->first;
413	uint64_t Imm2 = MaybeDecomposition ->second;
414
415	uint64_t Encoding1, Encoding2;
416	bool Imm1Success = AArch64_AM::processLogicalImmediate(Imm: ~Imm1, RegSize: `64`, Encoding&: Encoding1);
417	bool Imm2Success = AArch64_AM::processLogicalImmediate(Imm: ~Imm2, RegSize: `64`, Encoding&: Encoding2);
418
419	if (Imm1Success && Imm2Success) {
420	// Materialize Imm1, the LHS of the AND
421	Insn.push_back(Elt: {.Opcode: AArch64::ORRXri, .Op1: `0`, .Op2: Encoding1});
422	// AND Imm1 with Imm2
423	Insn.push_back(Elt: {.Opcode: AArch64::ANDXri, .Op1: `1`, .Op2: Encoding2});
424	return true;
425	}
426
427	return false;
428	}
429
430	// Check whether the constant can be represented by exclusive-or of two 64-bit
431	// logical immediates. If so, materialize it with an ORR instruction followed
432	// by an EOR instruction.
433	//
434	// This encoding allows all remaining repeated byte patterns, and many repeated
435	// 16-bit values, to be encoded without needing four instructions. It can also
436	// represent some irregular bitmasks (although those would mostly only need
437	// three instructions otherwise).
438	static bool tryEorOfLogicalImmediates(uint64_t Imm,
439	SmallVectorImpl<ImmInsnModel> &Insn) {
440	// Determine the larger repetition size of the two possible logical
441	// immediates, by finding the repetition size of Imm.
442	unsigned BigSize = `64`;
443
444	do {
445	BigSize /= `2`;
446	uint64_t Mask = (`1ULL` << BigSize) - `1`;
447
448	if ((Imm & Mask) != ((Imm >> BigSize) & Mask)) {
449	BigSize *= `2`;
450	break;
451	}
452	} while (BigSize > `2`);
453
454	uint64_t BigMask = ((uint64_t)-`1LL`) >> (`64` - BigSize);
455
456	// Find the last bit of each run of ones, circularly. For runs which wrap
457	// around from bit 0 to bit 63, this is the bit before the most-significant
458	// zero, otherwise it is the least-significant bit in the run of ones.
459	uint64_t RunStarts = Imm & ~rotl<uint64_t>(V: Imm, R: `1`);
460
461	// Find the smaller repetition size of the two possible logical immediates by
462	// counting the number of runs of one-bits within the BigSize-bit value. Both
463	// sizes may be the same. The EOR may add one or subtract one from the
464	// power-of-two count that can be represented by a logical immediate, or it
465	// may be left unchanged.
466	int RunsPerBigChunk = popcount(Value: RunStarts & BigMask);
467
468	static const int8_t BigToSmallSizeTable[`32`] = {
469	-`1`, -`1`, `0`, `1`, `2`, `2`, -`1`, `3`, `3`, `3`, -`1`, -`1`, -`1`, -`1`, -`1`, `4`,
470	`4`, `4`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, -`1`, `5`,
471	};
472
473	int BigToSmallShift = BigToSmallSizeTable[RunsPerBigChunk];
474
475	// Early-exit if the big chunk couldn't be a power-of-two number of runs
476	// EORed with another single run.
477	if (BigToSmallShift == -`1`)
478	return false;
479
480	unsigned SmallSize = BigSize >> BigToSmallShift;
481
482	// 64-bit values with a bit set every (1 << index) bits.
483	static const uint64_t RepeatedOnesTable[] = {
484	`0xffffffffffffffff`, `0x5555555555555555`, `0x1111111111111111`,
485	`0x0101010101010101`, `0x0001000100010001`, `0x0000000100000001`,
486	`0x0000000000000001`,
487	};
488
489	// This RepeatedOnesTable lookup is a faster implementation of the division
490	// 0xffffffffffffffff / ((1 << SmallSize) - 1), and can be thought of as
491	// dividing the 64-bit value into fields of width SmallSize, and placing a
492	// one in the least significant bit of each field.
493	uint64_t SmallOnes = RepeatedOnesTable[countr_zero(Val: SmallSize)];
494
495	// Now we try to find the number of ones in each of the smaller repetitions,
496	// by looking at runs of ones in Imm. This can take three attempts, as the
497	// EOR may have changed the length of the first two runs we find.
498
499	// Rotate a run of ones so we can count the number of trailing set bits.
500	int Rotation = countr_zero(Val: RunStarts);
501	uint64_t RotatedImm = rotr<uint64_t>(V: Imm, R: Rotation);
502	for (int Attempt = `0`; Attempt < `3`; ++Attempt) {
503	unsigned RunLength = countr_one(Value: RotatedImm);
504
505	// Construct candidate values BigImm and SmallImm, such that if these two
506	// values are encodable, we have a solution. (SmallImm is constructed to be
507	// encodable, but this isn't guaranteed when RunLength >= SmallSize)
508	uint64_t SmallImm =
509	rotl<uint64_t>(V: (SmallOnes << RunLength) - SmallOnes, R: Rotation);
510	uint64_t BigImm = Imm ^ SmallImm;
511
512	uint64_t BigEncoding = `0`;
513	uint64_t SmallEncoding = `0`;
514	if (AArch64_AM::processLogicalImmediate(Imm: BigImm, RegSize: `64`, Encoding&: BigEncoding) &&
515	AArch64_AM::processLogicalImmediate(Imm: SmallImm, RegSize: `64`, Encoding&: SmallEncoding)) {
516	Insn.push_back(Elt: {.Opcode: AArch64::ORRXri, .Op1: `0`, .Op2: SmallEncoding});
517	Insn.push_back(Elt: {.Opcode: AArch64::EORXri, .Op1: `1`, .Op2: BigEncoding});
518	return true;
519	}
520
521	// Rotate to the next run of ones
522	Rotation += countr_zero(Val: rotr<uint64_t>(V: RunStarts, R: Rotation) & ~`1`);
523	RotatedImm = rotr<uint64_t>(V: Imm, R: Rotation);
524	}
525
526	return false;
527	}
528
529	/// \brief Expand a MOVi32imm or MOVi64imm pseudo instruction to a
530	/// MOVZ or MOVN of width BitSize followed by up to 3 MOVK instructions.
531	static inline void expandMOVImmSimple(uint64_t Imm, unsigned BitSize,
532	unsigned OneChunks, unsigned ZeroChunks,
533	SmallVectorImpl<ImmInsnModel> &Insn) {
534	const unsigned Mask = `0xFFFF`;
535
536	// Use a MOVZ or MOVN instruction to set the high bits, followed by one or
537	// more MOVK instructions to insert additional 16-bit portions into the
538	// lower bits.
539	bool isNeg = false;
540
541	// Use MOVN to materialize the high bits if we have more all one chunks
542	// than all zero chunks.
543	if (OneChunks > ZeroChunks) {
544	isNeg = true;
545	Imm = ~Imm;
546	}
547
548	unsigned FirstOpc;
549	if (BitSize == `32`) {
550	Imm &= (`1LL` << `32`) - `1`;
551	FirstOpc = (isNeg ? AArch64::MOVNWi : AArch64::MOVZWi);
552	} else {
553	FirstOpc = (isNeg ? AArch64::MOVNXi : AArch64::MOVZXi);
554	}
555	unsigned Shift = `0`; // LSL amount for high bits with MOVZ/MOVN
556	unsigned LastShift = `0`; // LSL amount for last MOVK
557	if (Imm != `0`) {
558	unsigned LZ = llvm::countl_zero(Val: Imm);
559	unsigned TZ = llvm::countr_zero(Val: Imm);
560	Shift = (TZ / `16`) * `16`;
561	LastShift = ((`63` - LZ) / `16`) * `16`;
562	}
563	unsigned Imm16 = (Imm >> Shift) & Mask;
564
565	Insn.push_back(Elt: { .Opcode: FirstOpc, .Op1: Imm16,
566	.Op2: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: Shift) });
567
568	if (Shift == LastShift)
569	return;
570
571	// If a MOVN was used for the high bits of a negative value, flip the rest
572	// of the bits back for use with MOVK.
573	if (isNeg)
574	Imm = ~Imm;
575
576	unsigned Opc = (BitSize == `32` ? AArch64::MOVKWi : AArch64::MOVKXi);
577	while (Shift < LastShift) {
578	Shift += `16`;
579	Imm16 = (Imm >> Shift) & Mask;
580	if (Imm16 == (isNeg ? Mask : `0`))
581	continue; // This 16-bit portion is already set correctly.
582
583	Insn.push_back(Elt: { .Opcode: Opc, .Op1: Imm16,
584	.Op2: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: Shift) });
585	}
586
587	// Now, we get 16-bit divided Imm. If high and low bits are same in
588	// 32-bit, there is an opportunity to reduce instruction.
589	if (Insn.size() > `2` && (Imm >> `32`) == (Imm & `0xffffffffULL`)) {
590	for (int Size = Insn.size(); Size > `2`; Size--)
591	Insn.pop_back();
592	Insn.push_back(Elt: {.Opcode: AArch64::ORRXrs, .Op1: `0`, .Op2: `32`});
593	}
594	}
595
596	/// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
597	/// real move-immediate instructions to synthesize the immediate.
598	void AArch64_IMM::expandMOVImm(uint64_t Imm, unsigned BitSize,
599	SmallVectorImpl<ImmInsnModel> &Insn) {
600	const unsigned Mask = `0xFFFF`;
601
602	// Scan the immediate and count the number of 16-bit chunks which are either
603	// all ones or all zeros.
604	unsigned OneChunks = `0`;
605	unsigned ZeroChunks = `0`;
606	for (unsigned Shift = `0`; Shift < BitSize; Shift += `16`) {
607	const unsigned Chunk = (Imm >> Shift) & Mask;
608	if (Chunk == Mask)
609	OneChunks++;
610	else if (Chunk == `0`)
611	ZeroChunks++;
612	}
613
614	// Prefer MOVZ/MOVN over ORR because of the rules for the "mov" alias.
615	if ((BitSize / `16`) - OneChunks <= `1` \|\| (BitSize / `16`) - ZeroChunks <= `1`) {
616	expandMOVImmSimple(Imm, BitSize, OneChunks, ZeroChunks, Insn);
617	assert(Insn.size() == `1` &&
618	"Move of immediate should have expanded to a single MOVZ/MOVN");
619	return;
620	}
621
622	// Try a single ORR.
623	uint64_t UImm = Imm << (`64` - BitSize) >> (`64` - BitSize);
624	uint64_t Encoding;
625	if (AArch64_AM::processLogicalImmediate(Imm: UImm, RegSize: BitSize, Encoding)) {
626	unsigned Opc = (BitSize == `32` ? AArch64::ORRWri : AArch64::ORRXri);
627	Insn.push_back(Elt: { .Opcode: Opc, .Op1: `0`, .Op2: Encoding });
628	return;
629	}
630
631	// One to up three instruction sequences.
632	//
633	// Prefer MOVZ/MOVN followed by MOVK; it's more readable, and possibly the
634	// fastest sequence with fast literal generation.
635	if (OneChunks >= (BitSize / `16`) - `2` \|\| ZeroChunks >= (BitSize / `16`) - `2`) {
636	expandMOVImmSimple(Imm, BitSize, OneChunks, ZeroChunks, Insn);
637	return;
638	}
639
640	assert(BitSize == `64` && "All 32-bit immediates can be expanded with a"
641	"MOVZ/MOVK pair");
642
643	// Try other two-instruction sequences.
644
645	// 64-bit ORR followed by MOVK.
646	// We try to construct the ORR immediate in three different ways: either we
647	// zero out the chunk which will be replaced, we fill the chunk which will
648	// be replaced with ones, or we take the bit pattern from the other half of
649	// the 64-bit immediate. This is comprehensive because of the way ORR
650	// immediates are constructed.
651	for (unsigned Shift = `0`; Shift < BitSize; Shift += `16`) {
652	uint64_t ShiftedMask = (`0xFFFFULL` << Shift);
653	uint64_t ZeroChunk = UImm & ~ShiftedMask;
654	uint64_t OneChunk = UImm \| ShiftedMask;
655	uint64_t RotatedImm = llvm::rotl(V: UImm, R: `32`);
656	uint64_t ReplicateChunk = ZeroChunk \| (RotatedImm & ShiftedMask);
657	if (AArch64_AM::processLogicalImmediate(Imm: ZeroChunk, RegSize: BitSize, Encoding) \|\|
658	AArch64_AM::processLogicalImmediate(Imm: OneChunk, RegSize: BitSize, Encoding) \|\|
659	AArch64_AM::processLogicalImmediate(Imm: ReplicateChunk, RegSize: BitSize,
660	Encoding)) {
661	// Create the ORR-immediate instruction.
662	Insn.push_back(Elt: { .Opcode: AArch64::ORRXri, .Op1: `0`, .Op2: Encoding });
663
664	// Create the MOVK instruction.
665	const unsigned Imm16 = getChunk(Imm: UImm, ChunkIdx: Shift / `16`);
666	Insn.push_back(Elt: { .Opcode: AArch64::MOVKXi, .Op1: Imm16,
667	.Op2: AArch64_AM::getShifterImm(ST: AArch64_AM::LSL, Imm: Shift) });
668	return;
669	}
670	}
671
672	// Attempt to use a sequence of two ORR-immediate instructions.
673	if (tryOrrOfLogicalImmediates(UImm: Imm, Insn))
674	return;
675
676	// Attempt to use a sequence of ORR-immediate followed by AND-immediate.
677	if (tryAndOfLogicalImmediates(UImm: Imm, Insn))
678	return;
679
680	// Attempt to use a sequence of ORR-immediate followed by EOR-immediate.
681	if (tryEorOfLogicalImmediates(Imm: UImm, Insn))
682	return;
683
684	// Attempt to use a sequence of MOVN+EOR/EON (shifted register).
685	if (tryCopyWithNegation(Imm, /AllowThreeSequence=/false, Insn))
686	return;
687
688	// FIXME: Add more two-instruction sequences.
689
690	// Three instruction sequences.
691	//
692	// Prefer MOVZ/MOVN followed by two MOVK; it's more readable, and possibly
693	// the fastest sequence with fast literal generation. (If neither MOVK is
694	// part of a fast literal generation pair, it could be slower than the
695	// four-instruction sequence, but we won't worry about that for now.)
696	if (OneChunks \|\| ZeroChunks) {
697	expandMOVImmSimple(Imm, BitSize, OneChunks, ZeroChunks, Insn);
698	return;
699	}
700
701	// Check for identical 16-bit chunks within the constant and if so materialize
702	// them with a single ORR instruction. The remaining one or two 16-bit chunks
703	// will be materialized with MOVK instructions.
704	if (BitSize == `64` && tryToreplicateChunks(UImm, Insn))
705	return;
706
707	// Check whether the constant contains a sequence of contiguous ones, which
708	// might be interrupted by one or two chunks. If so, materialize the sequence
709	// of contiguous ones with an ORR instruction. Materialize the chunks which
710	// are either interrupting the sequence or outside of the sequence with a
711	// MOVK instruction.
712	if (BitSize == `64` && trySequenceOfOnes(UImm, Insn))
713	return;
714
715	// Attempt to use a sequence of MOVN+MOVK+EOR/EON (shifted register).
716	if (tryCopyWithNegation(Imm, /AllowThreeSequence=/true, Insn))
717	return;
718
719	// We found no possible two or three instruction sequence; use the general
720	// four-instruction sequence.
721	expandMOVImmSimple(Imm, BitSize, OneChunks, ZeroChunks, Insn);
722	}
723

Browse the source code of llvm_projects/llvm/lib/Target/AArch64/AArch64ExpandImm.cpp