| 1 | //===--- AMDHSAKernelDescriptor.h -----------------------------------------===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | #include "AMDGPUMCKernelDescriptor.h" |
| 10 | #include "AMDGPUMCTargetDesc.h" |
| 11 | #include "Utils/AMDGPUBaseInfo.h" |
| 12 | #include "llvm/MC/MCContext.h" |
| 13 | #include "llvm/MC/MCExpr.h" |
| 14 | #include "llvm/MC/MCSubtargetInfo.h" |
| 15 | #include "llvm/TargetParser/TargetParser.h" |
| 16 | |
| 17 | using namespace llvm; |
| 18 | using namespace llvm::AMDGPU; |
| 19 | |
| 20 | MCKernelDescriptor |
| 21 | MCKernelDescriptor::getDefaultAmdhsaKernelDescriptor(const MCSubtargetInfo *STI, |
| 22 | MCContext &Ctx) { |
| 23 | IsaVersion Version = getIsaVersion(GPU: STI->getCPU()); |
| 24 | |
| 25 | MCKernelDescriptor KD; |
| 26 | const MCExpr *ZeroMCExpr = MCConstantExpr::create(Value: 0, Ctx); |
| 27 | const MCExpr *OneMCExpr = MCConstantExpr::create(Value: 1, Ctx); |
| 28 | |
| 29 | KD.group_segment_fixed_size = ZeroMCExpr; |
| 30 | KD.private_segment_fixed_size = ZeroMCExpr; |
| 31 | KD.compute_pgm_rsrc1 = ZeroMCExpr; |
| 32 | KD.compute_pgm_rsrc2 = ZeroMCExpr; |
| 33 | KD.compute_pgm_rsrc3 = ZeroMCExpr; |
| 34 | KD.kernarg_size = ZeroMCExpr; |
| 35 | KD.kernel_code_properties = ZeroMCExpr; |
| 36 | KD.kernarg_preload = ZeroMCExpr; |
| 37 | |
| 38 | MCKernelDescriptor::bits_set( |
| 39 | Dst&: KD.compute_pgm_rsrc1, |
| 40 | Value: MCConstantExpr::create(Value: amdhsa::FLOAT_DENORM_MODE_FLUSH_NONE, Ctx), |
| 41 | Shift: amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64_SHIFT, |
| 42 | Mask: amdhsa::COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Ctx); |
| 43 | if (Version.Major < 12) { |
| 44 | MCKernelDescriptor::bits_set( |
| 45 | Dst&: KD.compute_pgm_rsrc1, Value: OneMCExpr, |
| 46 | Shift: amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP_SHIFT, |
| 47 | Mask: amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_DX10_CLAMP, Ctx); |
| 48 | MCKernelDescriptor::bits_set( |
| 49 | Dst&: KD.compute_pgm_rsrc1, Value: OneMCExpr, |
| 50 | Shift: amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE_SHIFT, |
| 51 | Mask: amdhsa::COMPUTE_PGM_RSRC1_GFX6_GFX11_ENABLE_IEEE_MODE, Ctx); |
| 52 | } |
| 53 | MCKernelDescriptor::bits_set( |
| 54 | Dst&: KD.compute_pgm_rsrc2, Value: OneMCExpr, |
| 55 | Shift: amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X_SHIFT, |
| 56 | Mask: amdhsa::COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Ctx); |
| 57 | if (Version.Major >= 10) { |
| 58 | if (STI->getFeatureBits().test(I: FeatureWavefrontSize32)) |
| 59 | MCKernelDescriptor::bits_set( |
| 60 | Dst&: KD.kernel_code_properties, Value: OneMCExpr, |
| 61 | Shift: amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32_SHIFT, |
| 62 | Mask: amdhsa::KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32, Ctx); |
| 63 | if (!STI->getFeatureBits().test(I: FeatureCuMode)) |
| 64 | MCKernelDescriptor::bits_set( |
| 65 | Dst&: KD.compute_pgm_rsrc1, Value: OneMCExpr, |
| 66 | Shift: amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE_SHIFT, |
| 67 | Mask: amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_WGP_MODE, Ctx); |
| 68 | |
| 69 | MCKernelDescriptor::bits_set( |
| 70 | Dst&: KD.compute_pgm_rsrc1, Value: OneMCExpr, |
| 71 | Shift: amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED_SHIFT, |
| 72 | Mask: amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_MEM_ORDERED, Ctx); |
| 73 | |
| 74 | MCKernelDescriptor::bits_set( |
| 75 | Dst&: KD.compute_pgm_rsrc1, Value: OneMCExpr, |
| 76 | Shift: amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS_SHIFT, |
| 77 | Mask: amdhsa::COMPUTE_PGM_RSRC1_GFX10_PLUS_FWD_PROGRESS, Ctx); |
| 78 | } |
| 79 | if (AMDGPU::isGFX90A(STI: *STI) && STI->getFeatureBits().test(I: FeatureTgSplit)) |
| 80 | MCKernelDescriptor::bits_set( |
| 81 | Dst&: KD.compute_pgm_rsrc3, Value: OneMCExpr, |
| 82 | Shift: amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT_SHIFT, |
| 83 | Mask: amdhsa::COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Ctx); |
| 84 | return KD; |
| 85 | } |
| 86 | |
| 87 | void MCKernelDescriptor::bits_set(const MCExpr *&Dst, const MCExpr *Value, |
| 88 | uint32_t Shift, uint32_t Mask, |
| 89 | MCContext &Ctx) { |
| 90 | const auto *Sft = MCConstantExpr::create(Value: Shift, Ctx); |
| 91 | const auto *Msk = MCConstantExpr::create(Value: Mask, Ctx); |
| 92 | Dst = MCBinaryExpr::createAnd(LHS: Dst, RHS: MCUnaryExpr::createNot(Expr: Msk, Ctx), Ctx); |
| 93 | Dst = MCBinaryExpr::createOr(LHS: Dst, RHS: MCBinaryExpr::createShl(LHS: Value, RHS: Sft, Ctx), |
| 94 | Ctx); |
| 95 | } |
| 96 | |
| 97 | const MCExpr *MCKernelDescriptor::bits_get(const MCExpr *Src, uint32_t Shift, |
| 98 | uint32_t Mask, MCContext &Ctx) { |
| 99 | const auto *Sft = MCConstantExpr::create(Value: Shift, Ctx); |
| 100 | const auto *Msk = MCConstantExpr::create(Value: Mask, Ctx); |
| 101 | return MCBinaryExpr::createLShr(LHS: MCBinaryExpr::createAnd(LHS: Src, RHS: Msk, Ctx), RHS: Sft, |
| 102 | Ctx); |
| 103 | } |
| 104 | |