| 1 | /* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */ |
| 2 | #ifndef _LINUX_RSEQ_H |
| 3 | #define _LINUX_RSEQ_H |
| 4 | |
| 5 | /* |
| 6 | * linux/rseq.h |
| 7 | * |
| 8 | * Restartable sequences system call API |
| 9 | * |
| 10 | * Copyright (c) 2015-2018 Mathieu Desnoyers <mathieu.desnoyers@efficios.com> |
| 11 | */ |
| 12 | |
| 13 | #include <linux/types.h> |
| 14 | #include <asm/byteorder.h> |
| 15 | |
| 16 | enum rseq_cpu_id_state { |
| 17 | RSEQ_CPU_ID_UNINITIALIZED = -1, |
| 18 | RSEQ_CPU_ID_REGISTRATION_FAILED = -2, |
| 19 | }; |
| 20 | |
| 21 | enum rseq_flags { |
| 22 | RSEQ_FLAG_UNREGISTER = (1 << 0), |
| 23 | RSEQ_FLAG_SLICE_EXT_DEFAULT_ON = (1 << 1), |
| 24 | }; |
| 25 | |
| 26 | enum rseq_cs_flags_bit { |
| 27 | /* Historical and unsupported bits */ |
| 28 | RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT = 0, |
| 29 | RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT = 1, |
| 30 | RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT = 2, |
| 31 | /* (3) Intentional gap to put new bits into a separate byte */ |
| 32 | |
| 33 | /* User read only feature flags */ |
| 34 | RSEQ_CS_FLAG_SLICE_EXT_AVAILABLE_BIT = 4, |
| 35 | RSEQ_CS_FLAG_SLICE_EXT_ENABLED_BIT = 5, |
| 36 | }; |
| 37 | |
| 38 | enum rseq_cs_flags { |
| 39 | RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT = |
| 40 | (1U << RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT), |
| 41 | RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL = |
| 42 | (1U << RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT), |
| 43 | RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE = |
| 44 | (1U << RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT), |
| 45 | |
| 46 | RSEQ_CS_FLAG_SLICE_EXT_AVAILABLE = |
| 47 | (1U << RSEQ_CS_FLAG_SLICE_EXT_AVAILABLE_BIT), |
| 48 | RSEQ_CS_FLAG_SLICE_EXT_ENABLED = |
| 49 | (1U << RSEQ_CS_FLAG_SLICE_EXT_ENABLED_BIT), |
| 50 | }; |
| 51 | |
| 52 | /* |
| 53 | * struct rseq_cs is aligned on 4 * 8 bytes to ensure it is always |
| 54 | * contained within a single cache-line. It is usually declared as |
| 55 | * link-time constant data. |
| 56 | */ |
| 57 | struct rseq_cs { |
| 58 | /* Version of this structure. */ |
| 59 | __u32 version; |
| 60 | /* enum rseq_cs_flags */ |
| 61 | __u32 flags; |
| 62 | __u64 start_ip; |
| 63 | /* Offset from start_ip. */ |
| 64 | __u64 post_commit_offset; |
| 65 | __u64 abort_ip; |
| 66 | } __attribute__((aligned(4 * sizeof(__u64)))); |
| 67 | |
| 68 | /** |
| 69 | * rseq_slice_ctrl - Time slice extension control structure |
| 70 | * @all: Compound value |
| 71 | * @request: Request for a time slice extension |
| 72 | * @granted: Granted time slice extension |
| 73 | * |
| 74 | * @request is set by user space and can be cleared by user space or kernel |
| 75 | * space. @granted is set and cleared by the kernel and must only be read |
| 76 | * by user space. |
| 77 | */ |
| 78 | struct rseq_slice_ctrl { |
| 79 | union { |
| 80 | __u32 all; |
| 81 | struct { |
| 82 | __u8 request; |
| 83 | __u8 granted; |
| 84 | __u16 __reserved; |
| 85 | }; |
| 86 | }; |
| 87 | }; |
| 88 | |
| 89 | /* |
| 90 | * The original size and alignment of the allocation for struct rseq is |
| 91 | * 32 bytes. |
| 92 | * |
| 93 | * The allocation size needs to be greater or equal to |
| 94 | * max(getauxval(AT_RSEQ_FEATURE_SIZE), 32), and the allocation needs to |
| 95 | * be aligned on max(getauxval(AT_RSEQ_ALIGN), 32). |
| 96 | * |
| 97 | * As an alternative, userspace is allowed to use both the original size |
| 98 | * and alignment of 32 bytes for backward compatibility. |
| 99 | * |
| 100 | * A single active struct rseq registration per thread is allowed. |
| 101 | */ |
| 102 | struct rseq { |
| 103 | /* |
| 104 | * Restartable sequences cpu_id_start field. Updated by the |
| 105 | * kernel. Read by user-space with single-copy atomicity |
| 106 | * semantics. This field should only be read by the thread which |
| 107 | * registered this data structure. Aligned on 32-bit. Always |
| 108 | * contains a value in the range of possible CPUs, although the |
| 109 | * value may not be the actual current CPU (e.g. if rseq is not |
| 110 | * initialized). This CPU number value should always be compared |
| 111 | * against the value of the cpu_id field before performing a rseq |
| 112 | * commit or returning a value read from a data structure indexed |
| 113 | * using the cpu_id_start value. |
| 114 | */ |
| 115 | __u32 cpu_id_start; |
| 116 | /* |
| 117 | * Restartable sequences cpu_id field. Updated by the kernel. |
| 118 | * Read by user-space with single-copy atomicity semantics. This |
| 119 | * field should only be read by the thread which registered this |
| 120 | * data structure. Aligned on 32-bit. Values |
| 121 | * RSEQ_CPU_ID_UNINITIALIZED and RSEQ_CPU_ID_REGISTRATION_FAILED |
| 122 | * have a special semantic: the former means "rseq uninitialized", |
| 123 | * and latter means "rseq initialization failed". This value is |
| 124 | * meant to be read within rseq critical sections and compared |
| 125 | * with the cpu_id_start value previously read, before performing |
| 126 | * the commit instruction, or read and compared with the |
| 127 | * cpu_id_start value before returning a value loaded from a data |
| 128 | * structure indexed using the cpu_id_start value. |
| 129 | */ |
| 130 | __u32 cpu_id; |
| 131 | /* |
| 132 | * Restartable sequences rseq_cs field. |
| 133 | * |
| 134 | * Contains NULL when no critical section is active for the current |
| 135 | * thread, or holds a pointer to the currently active struct rseq_cs. |
| 136 | * |
| 137 | * Updated by user-space, which sets the address of the currently |
| 138 | * active rseq_cs at the beginning of assembly instruction sequence |
| 139 | * block, and set to NULL by the kernel when it restarts an assembly |
| 140 | * instruction sequence block, as well as when the kernel detects that |
| 141 | * it is preempting or delivering a signal outside of the range |
| 142 | * targeted by the rseq_cs. Also needs to be set to NULL by user-space |
| 143 | * before reclaiming memory that contains the targeted struct rseq_cs. |
| 144 | * |
| 145 | * Read and set by the kernel. Set by user-space with single-copy |
| 146 | * atomicity semantics. This field should only be updated by the |
| 147 | * thread which registered this data structure. Aligned on 64-bit. |
| 148 | * |
| 149 | * 32-bit architectures should update the low order bits of the |
| 150 | * rseq_cs field, leaving the high order bits initialized to 0. |
| 151 | */ |
| 152 | __u64 rseq_cs; |
| 153 | |
| 154 | /* |
| 155 | * Restartable sequences flags field. |
| 156 | * |
| 157 | * This field was initially intended to allow event masking for |
| 158 | * single-stepping through rseq critical sections with debuggers. |
| 159 | * The kernel does not support this anymore and the relevant bits |
| 160 | * are checked for being always false: |
| 161 | * - RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT |
| 162 | * - RSEQ_CS_FLAG_NO_RESTART_ON_SIGNAL |
| 163 | * - RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE |
| 164 | */ |
| 165 | __u32 flags; |
| 166 | |
| 167 | /* |
| 168 | * Restartable sequences node_id field. Updated by the kernel. Read by |
| 169 | * user-space with single-copy atomicity semantics. This field should |
| 170 | * only be read by the thread which registered this data structure. |
| 171 | * Aligned on 32-bit. Contains the current NUMA node ID. |
| 172 | */ |
| 173 | __u32 node_id; |
| 174 | |
| 175 | /* |
| 176 | * Restartable sequences mm_cid field. Updated by the kernel. Read by |
| 177 | * user-space with single-copy atomicity semantics. This field should |
| 178 | * only be read by the thread which registered this data structure. |
| 179 | * Aligned on 32-bit. Contains the current thread's concurrency ID |
| 180 | * (allocated uniquely within a memory map). |
| 181 | */ |
| 182 | __u32 mm_cid; |
| 183 | |
| 184 | /* |
| 185 | * Time slice extension control structure. CPU local updates from |
| 186 | * kernel and user space. |
| 187 | */ |
| 188 | struct rseq_slice_ctrl slice_ctrl; |
| 189 | |
| 190 | /* |
| 191 | * Before rseq became extensible, its original size was 32 bytes even |
| 192 | * though the active rseq area was only 20 bytes. |
| 193 | * Exposing a 32 bytes feature size would make life needlessly painful |
| 194 | * for userspace. Therefore, add a reserved byte after byte 32 |
| 195 | * to bump the rseq feature size from 32 to 33. |
| 196 | * The next field to be added to the rseq area will be larger |
| 197 | * than one byte, and will replace this reserved byte. |
| 198 | */ |
| 199 | __u8 __reserved; |
| 200 | |
| 201 | /* |
| 202 | * Flexible array member at end of structure, after last feature field. |
| 203 | */ |
| 204 | char end[]; |
| 205 | } __attribute__((aligned(32))); |
| 206 | |
| 207 | #endif /* _LINUX_RSEQ_H */ |
| 208 | |