1/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
2#ifndef _LINUX_FS_H
3#define _LINUX_FS_H
4
5/*
6 * This file has definitions for some important file table structures
7 * and constants and structures used by various generic file system
8 * ioctl's. Please do not make any changes in this file before
9 * sending patches for review to linux-fsdevel@vger.kernel.org and
10 * linux-api@vger.kernel.org.
11 */
12
13#include <linux/limits.h>
14#include <linux/ioctl.h>
15#include <linux/types.h>
16#include <linux/fscrypt.h>
17
18/* Use of MS_* flags within the kernel is restricted to core mount(2) code. */
19#include <linux/mount.h>
20
21/*
22 * It's silly to have NR_OPEN bigger than NR_FILE, but you can change
23 * the file limit at runtime and only root can increase the per-process
24 * nr_file rlimit, so it's safe to set up a ridiculously high absolute
25 * upper limit on files-per-process.
26 *
27 * Some programs (notably those using select()) may have to be
28 * recompiled to take full advantage of the new limits..
29 */
30
31/* Fixed constants first: */
32#undef NR_OPEN
33#define INR_OPEN_CUR 1024 /* Initial setting for nfile rlimits */
34#define INR_OPEN_MAX 4096 /* Hard limit for nfile rlimits */
35
36#define BLOCK_SIZE_BITS 10
37#define BLOCK_SIZE (1<<BLOCK_SIZE_BITS)
38
39/* flags for integrity meta */
40#define IO_INTEGRITY_CHK_GUARD (1U << 0) /* enforce guard check */
41#define IO_INTEGRITY_CHK_REFTAG (1U << 1) /* enforce ref check */
42#define IO_INTEGRITY_CHK_APPTAG (1U << 2) /* enforce app check */
43
44#define IO_INTEGRITY_VALID_FLAGS (IO_INTEGRITY_CHK_GUARD | \
45 IO_INTEGRITY_CHK_REFTAG | \
46 IO_INTEGRITY_CHK_APPTAG)
47
48#define SEEK_SET 0 /* seek relative to beginning of file */
49#define SEEK_CUR 1 /* seek relative to current file position */
50#define SEEK_END 2 /* seek relative to end of file */
51#define SEEK_DATA 3 /* seek to the next data */
52#define SEEK_HOLE 4 /* seek to the next hole */
53#define SEEK_MAX SEEK_HOLE
54
55#define RENAME_NOREPLACE (1 << 0) /* Don't overwrite target */
56#define RENAME_EXCHANGE (1 << 1) /* Exchange source and dest */
57#define RENAME_WHITEOUT (1 << 2) /* Whiteout source */
58
59/*
60 * The root inode of procfs is guaranteed to always have the same inode number.
61 * For programs that make heavy use of procfs, verifying that the root is a
62 * real procfs root and using openat2(RESOLVE_{NO_{XDEV,MAGICLINKS},BENEATH})
63 * will allow you to make sure you are never tricked into operating on the
64 * wrong procfs file.
65 */
66enum procfs_ino {
67 PROCFS_ROOT_INO = 1,
68};
69
70struct file_clone_range {
71 __s64 src_fd;
72 __u64 src_offset;
73 __u64 src_length;
74 __u64 dest_offset;
75};
76
77struct fstrim_range {
78 __u64 start;
79 __u64 len;
80 __u64 minlen;
81};
82
83/*
84 * We include a length field because some filesystems (vfat) have an identifier
85 * that we do want to expose as a UUID, but doesn't have the standard length.
86 *
87 * We use a fixed size buffer beacuse this interface will, by fiat, never
88 * support "UUIDs" longer than 16 bytes; we don't want to force all downstream
89 * users to have to deal with that.
90 */
91struct fsuuid2 {
92 __u8 len;
93 __u8 uuid[16];
94};
95
96struct fs_sysfs_path {
97 __u8 len;
98 __u8 name[128];
99};
100
101/* Protection info capability flags */
102#define LBMD_PI_CAP_INTEGRITY (1 << 0)
103#define LBMD_PI_CAP_REFTAG (1 << 1)
104
105/* Checksum types for Protection Information */
106#define LBMD_PI_CSUM_NONE 0
107#define LBMD_PI_CSUM_IP 1
108#define LBMD_PI_CSUM_CRC16_T10DIF 2
109#define LBMD_PI_CSUM_CRC64_NVME 4
110
111/* sizeof first published struct */
112#define LBMD_SIZE_VER0 16
113
114/*
115 * Logical block metadata capability descriptor
116 * If the device does not support metadata, all the fields will be zero.
117 * Applications must check lbmd_flags to determine whether metadata is
118 * supported or not.
119 */
120struct logical_block_metadata_cap {
121 /* Bitmask of logical block metadata capability flags */
122 __u32 lbmd_flags;
123 /*
124 * The amount of data described by each unit of logical block
125 * metadata
126 */
127 __u16 lbmd_interval;
128 /*
129 * Size in bytes of the logical block metadata associated with each
130 * interval
131 */
132 __u8 lbmd_size;
133 /*
134 * Size in bytes of the opaque block tag associated with each
135 * interval
136 */
137 __u8 lbmd_opaque_size;
138 /*
139 * Offset in bytes of the opaque block tag within the logical block
140 * metadata
141 */
142 __u8 lbmd_opaque_offset;
143 /* Size in bytes of the T10 PI tuple associated with each interval */
144 __u8 lbmd_pi_size;
145 /* Offset in bytes of T10 PI tuple within the logical block metadata */
146 __u8 lbmd_pi_offset;
147 /* T10 PI guard tag type */
148 __u8 lbmd_guard_tag_type;
149 /* Size in bytes of the T10 PI application tag */
150 __u8 lbmd_app_tag_size;
151 /* Size in bytes of the T10 PI reference tag */
152 __u8 lbmd_ref_tag_size;
153 /* Size in bytes of the T10 PI storage tag */
154 __u8 lbmd_storage_tag_size;
155 __u8 pad;
156};
157
158/* extent-same (dedupe) ioctls; these MUST match the btrfs ioctl definitions */
159#define FILE_DEDUPE_RANGE_SAME 0
160#define FILE_DEDUPE_RANGE_DIFFERS 1
161
162/* from struct btrfs_ioctl_file_extent_same_info */
163struct file_dedupe_range_info {
164 __s64 dest_fd; /* in - destination file */
165 __u64 dest_offset; /* in - start of extent in destination */
166 __u64 bytes_deduped; /* out - total # of bytes we were able
167 * to dedupe from this file. */
168 /* status of this dedupe operation:
169 * < 0 for error
170 * == FILE_DEDUPE_RANGE_SAME if dedupe succeeds
171 * == FILE_DEDUPE_RANGE_DIFFERS if data differs
172 */
173 __s32 status; /* out - see above description */
174 __u32 reserved; /* must be zero */
175};
176
177/* from struct btrfs_ioctl_file_extent_same_args */
178struct file_dedupe_range {
179 __u64 src_offset; /* in - start of extent in source */
180 __u64 src_length; /* in - length of extent */
181 __u16 dest_count; /* in - total elements in info array */
182 __u16 reserved1; /* must be zero */
183 __u32 reserved2; /* must be zero */
184 struct file_dedupe_range_info info[];
185};
186
187/* And dynamically-tunable limits and defaults: */
188struct files_stat_struct {
189 unsigned long nr_files; /* read only */
190 unsigned long nr_free_files; /* read only */
191 unsigned long max_files; /* tunable */
192};
193
194struct inodes_stat_t {
195 long nr_inodes;
196 long nr_unused;
197 long dummy[5]; /* padding for sysctl ABI compatibility */
198};
199
200
201#define NR_FILE 8192 /* this can well be larger on a larger system */
202
203/*
204 * Structure for FS_IOC_FSGETXATTR[A] and FS_IOC_FSSETXATTR.
205 */
206struct fsxattr {
207 __u32 fsx_xflags; /* xflags field value (get/set) */
208 __u32 fsx_extsize; /* extsize field value (get/set)*/
209 __u32 fsx_nextents; /* nextents field value (get) */
210 __u32 fsx_projid; /* project identifier (get/set) */
211 __u32 fsx_cowextsize; /* CoW extsize field value (get/set)*/
212 unsigned char fsx_pad[8];
213};
214
215/*
216 * Variable size structure for file_[sg]et_attr().
217 *
218 * Note. This is alternative to the structure 'struct file_kattr'/'struct fsxattr'.
219 * As this structure is passed to/from userspace with its size, this can
220 * be versioned based on the size.
221 */
222struct file_attr {
223 __u64 fa_xflags; /* xflags field value (get/set) */
224 __u32 fa_extsize; /* extsize field value (get/set)*/
225 __u32 fa_nextents; /* nextents field value (get) */
226 __u32 fa_projid; /* project identifier (get/set) */
227 __u32 fa_cowextsize; /* CoW extsize field value (get/set) */
228};
229
230#define FILE_ATTR_SIZE_VER0 24
231#define FILE_ATTR_SIZE_LATEST FILE_ATTR_SIZE_VER0
232
233/*
234 * Flags for the fsx_xflags field
235 */
236#define FS_XFLAG_REALTIME 0x00000001 /* data in realtime volume */
237#define FS_XFLAG_PREALLOC 0x00000002 /* preallocated file extents */
238#define FS_XFLAG_IMMUTABLE 0x00000008 /* file cannot be modified */
239#define FS_XFLAG_APPEND 0x00000010 /* all writes append */
240#define FS_XFLAG_SYNC 0x00000020 /* all writes synchronous */
241#define FS_XFLAG_NOATIME 0x00000040 /* do not update access time */
242#define FS_XFLAG_NODUMP 0x00000080 /* do not include in backups */
243#define FS_XFLAG_RTINHERIT 0x00000100 /* create with rt bit set */
244#define FS_XFLAG_PROJINHERIT 0x00000200 /* create with parents projid */
245#define FS_XFLAG_NOSYMLINKS 0x00000400 /* disallow symlink creation */
246#define FS_XFLAG_EXTSIZE 0x00000800 /* extent size allocator hint */
247#define FS_XFLAG_EXTSZINHERIT 0x00001000 /* inherit inode extent size */
248#define FS_XFLAG_NODEFRAG 0x00002000 /* do not defragment */
249#define FS_XFLAG_FILESTREAM 0x00004000 /* use filestream allocator */
250#define FS_XFLAG_DAX 0x00008000 /* use DAX for IO */
251#define FS_XFLAG_COWEXTSIZE 0x00010000 /* CoW extent size allocator hint */
252#define FS_XFLAG_VERITY 0x00020000 /* fs-verity enabled */
253#define FS_XFLAG_HASATTR 0x80000000 /* no DIFLAG for this */
254
255/* the read-only stuff doesn't really belong here, but any other place is
256 probably as bad and I don't want to create yet another include file. */
257
258#define BLKROSET _IO(0x12,93) /* set device read-only (0 = read-write) */
259#define BLKROGET _IO(0x12,94) /* get read-only status (0 = read_write) */
260#define BLKRRPART _IO(0x12,95) /* re-read partition table */
261#define BLKGETSIZE _IO(0x12,96) /* return device size /512 (long *arg) */
262#define BLKFLSBUF _IO(0x12,97) /* flush buffer cache */
263#define BLKRASET _IO(0x12,98) /* set read ahead for block device */
264#define BLKRAGET _IO(0x12,99) /* get current read ahead setting */
265#define BLKFRASET _IO(0x12,100)/* set filesystem (mm/filemap.c) read-ahead */
266#define BLKFRAGET _IO(0x12,101)/* get filesystem (mm/filemap.c) read-ahead */
267#define BLKSECTSET _IO(0x12,102)/* set max sectors per request (ll_rw_blk.c) */
268#define BLKSECTGET _IO(0x12,103)/* get max sectors per request (ll_rw_blk.c) */
269#define BLKSSZGET _IO(0x12,104)/* get block device sector size */
270#if 0
271#define BLKPG _IO(0x12,105)/* See blkpg.h */
272
273/* Some people are morons. Do not use sizeof! */
274
275#define BLKELVGET _IOR(0x12,106,size_t)/* elevator get */
276#define BLKELVSET _IOW(0x12,107,size_t)/* elevator set */
277/* This was here just to show that the number is taken -
278 probably all these _IO(0x12,*) ioctls should be moved to blkpg.h. */
279#endif
280/* A jump here: 108-111 have been used for various private purposes. */
281#define BLKBSZGET _IOR(0x12,112,size_t)
282#define BLKBSZSET _IOW(0x12,113,size_t)
283#define BLKGETSIZE64 _IOR(0x12,114,size_t) /* return device size in bytes (u64 *arg) */
284#define BLKTRACESETUP _IOWR(0x12,115,struct blk_user_trace_setup)
285#define BLKTRACESTART _IO(0x12,116)
286#define BLKTRACESTOP _IO(0x12,117)
287#define BLKTRACETEARDOWN _IO(0x12,118)
288#define BLKDISCARD _IO(0x12,119)
289#define BLKIOMIN _IO(0x12,120)
290#define BLKIOOPT _IO(0x12,121)
291#define BLKALIGNOFF _IO(0x12,122)
292#define BLKPBSZGET _IO(0x12,123)
293#define BLKDISCARDZEROES _IO(0x12,124)
294#define BLKSECDISCARD _IO(0x12,125)
295#define BLKROTATIONAL _IO(0x12,126)
296#define BLKZEROOUT _IO(0x12,127)
297#define BLKGETDISKSEQ _IOR(0x12,128,__u64)
298/* 130-136 and 142 are used by zoned block device ioctls (uapi/linux/blkzoned.h) */
299/* 137-141 are used by blk-crypto ioctls (uapi/linux/blk-crypto.h) */
300#define BLKTRACESETUP2 _IOWR(0x12, 142, struct blk_user_trace_setup2)
301
302#define BMAP_IOCTL 1 /* obsolete - kept for compatibility */
303#define FIBMAP _IO(0x00,1) /* bmap access */
304#define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */
305#define FIFREEZE _IOWR('X', 119, int) /* Freeze */
306#define FITHAW _IOWR('X', 120, int) /* Thaw */
307#define FITRIM _IOWR('X', 121, struct fstrim_range) /* Trim */
308#define FICLONE _IOW(0x94, 9, int)
309#define FICLONERANGE _IOW(0x94, 13, struct file_clone_range)
310#define FIDEDUPERANGE _IOWR(0x94, 54, struct file_dedupe_range)
311
312#define FSLABEL_MAX 256 /* Max chars for the interface; each fs may differ */
313
314#define FS_IOC_GETFLAGS _IOR('f', 1, long)
315#define FS_IOC_SETFLAGS _IOW('f', 2, long)
316#define FS_IOC_GETVERSION _IOR('v', 1, long)
317#define FS_IOC_SETVERSION _IOW('v', 2, long)
318#define FS_IOC_FIEMAP _IOWR('f', 11, struct fiemap)
319#define FS_IOC32_GETFLAGS _IOR('f', 1, int)
320#define FS_IOC32_SETFLAGS _IOW('f', 2, int)
321#define FS_IOC32_GETVERSION _IOR('v', 1, int)
322#define FS_IOC32_SETVERSION _IOW('v', 2, int)
323#define FS_IOC_FSGETXATTR _IOR('X', 31, struct fsxattr)
324#define FS_IOC_FSSETXATTR _IOW('X', 32, struct fsxattr)
325#define FS_IOC_GETFSLABEL _IOR(0x94, 49, char[FSLABEL_MAX])
326#define FS_IOC_SETFSLABEL _IOW(0x94, 50, char[FSLABEL_MAX])
327/* Returns the external filesystem UUID, the same one blkid returns */
328#define FS_IOC_GETFSUUID _IOR(0x15, 0, struct fsuuid2)
329/*
330 * Returns the path component under /sys/fs/ that refers to this filesystem;
331 * also /sys/kernel/debug/ for filesystems with debugfs exports
332 */
333#define FS_IOC_GETFSSYSFSPATH _IOR(0x15, 1, struct fs_sysfs_path)
334/* Get logical block metadata capability details */
335#define FS_IOC_GETLBMD_CAP _IOWR(0x15, 2, struct logical_block_metadata_cap)
336
337/*
338 * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS)
339 *
340 * Note: for historical reasons, these flags were originally used and
341 * defined for use by ext2/ext3, and then other file systems started
342 * using these flags so they wouldn't need to write their own version
343 * of chattr/lsattr (which was shipped as part of e2fsprogs). You
344 * should think twice before trying to use these flags in new
345 * contexts, or trying to assign these flags, since they are used both
346 * as the UAPI and the on-disk encoding for ext2/3/4. Also, we are
347 * almost out of 32-bit flags. :-)
348 *
349 * We have recently hoisted FS_IOC_FSGETXATTR / FS_IOC_FSSETXATTR from
350 * XFS to the generic FS level interface. This uses a structure that
351 * has padding and hence has more room to grow, so it may be more
352 * appropriate for many new use cases.
353 *
354 * Please do not change these flags or interfaces before checking with
355 * linux-fsdevel@vger.kernel.org and linux-api@vger.kernel.org.
356 */
357#define FS_SECRM_FL 0x00000001 /* Secure deletion */
358#define FS_UNRM_FL 0x00000002 /* Undelete */
359#define FS_COMPR_FL 0x00000004 /* Compress file */
360#define FS_SYNC_FL 0x00000008 /* Synchronous updates */
361#define FS_IMMUTABLE_FL 0x00000010 /* Immutable file */
362#define FS_APPEND_FL 0x00000020 /* writes to file may only append */
363#define FS_NODUMP_FL 0x00000040 /* do not dump file */
364#define FS_NOATIME_FL 0x00000080 /* do not update atime */
365/* Reserved for compression usage... */
366#define FS_DIRTY_FL 0x00000100
367#define FS_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */
368#define FS_NOCOMP_FL 0x00000400 /* Don't compress */
369/* End compression flags --- maybe not all used */
370#define FS_ENCRYPT_FL 0x00000800 /* Encrypted file */
371#define FS_BTREE_FL 0x00001000 /* btree format dir */
372#define FS_INDEX_FL 0x00001000 /* hash-indexed directory */
373#define FS_IMAGIC_FL 0x00002000 /* AFS directory */
374#define FS_JOURNAL_DATA_FL 0x00004000 /* Reserved for ext3 */
375#define FS_NOTAIL_FL 0x00008000 /* file tail should not be merged */
376#define FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */
377#define FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/
378#define FS_HUGE_FILE_FL 0x00040000 /* Reserved for ext4 */
379#define FS_EXTENT_FL 0x00080000 /* Extents */
380#define FS_VERITY_FL 0x00100000 /* Verity protected inode */
381#define FS_EA_INODE_FL 0x00200000 /* Inode used for large EA */
382#define FS_EOFBLOCKS_FL 0x00400000 /* Reserved for ext4 */
383#define FS_NOCOW_FL 0x00800000 /* Do not cow file */
384#define FS_DAX_FL 0x02000000 /* Inode is DAX */
385#define FS_INLINE_DATA_FL 0x10000000 /* Reserved for ext4 */
386#define FS_PROJINHERIT_FL 0x20000000 /* Create with parents projid */
387#define FS_CASEFOLD_FL 0x40000000 /* Folder is case insensitive */
388#define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */
389
390#define FS_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */
391#define FS_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */
392
393
394#define SYNC_FILE_RANGE_WAIT_BEFORE 1
395#define SYNC_FILE_RANGE_WRITE 2
396#define SYNC_FILE_RANGE_WAIT_AFTER 4
397#define SYNC_FILE_RANGE_WRITE_AND_WAIT (SYNC_FILE_RANGE_WRITE | \
398 SYNC_FILE_RANGE_WAIT_BEFORE | \
399 SYNC_FILE_RANGE_WAIT_AFTER)
400
401/*
402 * Flags for preadv2/pwritev2:
403 */
404
405typedef int __bitwise __kernel_rwf_t;
406
407/* high priority request, poll if possible */
408#define RWF_HIPRI ((__kernel_rwf_t)0x00000001)
409
410/* per-IO O_DSYNC */
411#define RWF_DSYNC ((__kernel_rwf_t)0x00000002)
412
413/* per-IO O_SYNC */
414#define RWF_SYNC ((__kernel_rwf_t)0x00000004)
415
416/* per-IO, return -EAGAIN if operation would block */
417#define RWF_NOWAIT ((__kernel_rwf_t)0x00000008)
418
419/* per-IO O_APPEND */
420#define RWF_APPEND ((__kernel_rwf_t)0x00000010)
421
422/* per-IO negation of O_APPEND */
423#define RWF_NOAPPEND ((__kernel_rwf_t)0x00000020)
424
425/* Atomic Write */
426#define RWF_ATOMIC ((__kernel_rwf_t)0x00000040)
427
428/* buffered IO that drops the cache after reading or writing data */
429#define RWF_DONTCACHE ((__kernel_rwf_t)0x00000080)
430
431/* prevent pipe and socket writes from raising SIGPIPE */
432#define RWF_NOSIGNAL ((__kernel_rwf_t)0x00000100)
433
434/* mask of flags supported by the kernel */
435#define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\
436 RWF_APPEND | RWF_NOAPPEND | RWF_ATOMIC |\
437 RWF_DONTCACHE | RWF_NOSIGNAL)
438
439#define PROCFS_IOCTL_MAGIC 'f'
440
441/* Pagemap ioctl */
442#define PAGEMAP_SCAN _IOWR(PROCFS_IOCTL_MAGIC, 16, struct pm_scan_arg)
443
444/* Bitmasks provided in pm_scan_args masks and reported in page_region.categories. */
445#define PAGE_IS_WPALLOWED (1 << 0)
446#define PAGE_IS_WRITTEN (1 << 1)
447#define PAGE_IS_FILE (1 << 2)
448#define PAGE_IS_PRESENT (1 << 3)
449#define PAGE_IS_SWAPPED (1 << 4)
450#define PAGE_IS_PFNZERO (1 << 5)
451#define PAGE_IS_HUGE (1 << 6)
452#define PAGE_IS_SOFT_DIRTY (1 << 7)
453#define PAGE_IS_GUARD (1 << 8)
454
455/*
456 * struct page_region - Page region with flags
457 * @start: Start of the region
458 * @end: End of the region (exclusive)
459 * @categories: PAGE_IS_* category bitmask for the region
460 */
461struct page_region {
462 __u64 start;
463 __u64 end;
464 __u64 categories;
465};
466
467/* Flags for PAGEMAP_SCAN ioctl */
468#define PM_SCAN_WP_MATCHING (1 << 0) /* Write protect the pages matched. */
469#define PM_SCAN_CHECK_WPASYNC (1 << 1) /* Abort the scan when a non-WP-enabled page is found. */
470
471/*
472 * struct pm_scan_arg - Pagemap ioctl argument
473 * @size: Size of the structure
474 * @flags: Flags for the IOCTL
475 * @start: Starting address of the region
476 * @end: Ending address of the region
477 * @walk_end Address where the scan stopped (written by kernel).
478 * walk_end == end (address tags cleared) informs that the scan completed on entire range.
479 * @vec: Address of page_region struct array for output
480 * @vec_len: Length of the page_region struct array
481 * @max_pages: Optional limit for number of returned pages (0 = disabled)
482 * @category_inverted: PAGE_IS_* categories which values match if 0 instead of 1
483 * @category_mask: Skip pages for which any category doesn't match
484 * @category_anyof_mask: Skip pages for which no category matches
485 * @return_mask: PAGE_IS_* categories that are to be reported in `page_region`s returned
486 */
487struct pm_scan_arg {
488 __u64 size;
489 __u64 flags;
490 __u64 start;
491 __u64 end;
492 __u64 walk_end;
493 __u64 vec;
494 __u64 vec_len;
495 __u64 max_pages;
496 __u64 category_inverted;
497 __u64 category_mask;
498 __u64 category_anyof_mask;
499 __u64 return_mask;
500};
501
502/* /proc/<pid>/maps ioctl */
503#define PROCMAP_QUERY _IOWR(PROCFS_IOCTL_MAGIC, 17, struct procmap_query)
504
505enum procmap_query_flags {
506 /*
507 * VMA permission flags.
508 *
509 * Can be used as part of procmap_query.query_flags field to look up
510 * only VMAs satisfying specified subset of permissions. E.g., specifying
511 * PROCMAP_QUERY_VMA_READABLE only will return both readable and read/write VMAs,
512 * while having PROCMAP_QUERY_VMA_READABLE | PROCMAP_QUERY_VMA_WRITABLE will only
513 * return read/write VMAs, though both executable/non-executable and
514 * private/shared will be ignored.
515 *
516 * PROCMAP_QUERY_VMA_* flags are also returned in procmap_query.vma_flags
517 * field to specify actual VMA permissions.
518 */
519 PROCMAP_QUERY_VMA_READABLE = 0x01,
520 PROCMAP_QUERY_VMA_WRITABLE = 0x02,
521 PROCMAP_QUERY_VMA_EXECUTABLE = 0x04,
522 PROCMAP_QUERY_VMA_SHARED = 0x08,
523 /*
524 * Query modifier flags.
525 *
526 * By default VMA that covers provided address is returned, or -ENOENT
527 * is returned. With PROCMAP_QUERY_COVERING_OR_NEXT_VMA flag set, closest
528 * VMA with vma_start > addr will be returned if no covering VMA is
529 * found.
530 *
531 * PROCMAP_QUERY_FILE_BACKED_VMA instructs query to consider only VMAs that
532 * have file backing. Can be combined with PROCMAP_QUERY_COVERING_OR_NEXT_VMA
533 * to iterate all VMAs with file backing.
534 */
535 PROCMAP_QUERY_COVERING_OR_NEXT_VMA = 0x10,
536 PROCMAP_QUERY_FILE_BACKED_VMA = 0x20,
537};
538
539/*
540 * Input/output argument structured passed into ioctl() call. It can be used
541 * to query a set of VMAs (Virtual Memory Areas) of a process.
542 *
543 * Each field can be one of three kinds, marked in a short comment to the
544 * right of the field:
545 * - "in", input argument, user has to provide this value, kernel doesn't modify it;
546 * - "out", output argument, kernel sets this field with VMA data;
547 * - "in/out", input and output argument; user provides initial value (used
548 * to specify maximum allowable buffer size), and kernel sets it to actual
549 * amount of data written (or zero, if there is no data).
550 *
551 * If matching VMA is found (according to criterias specified by
552 * query_addr/query_flags, all the out fields are filled out, and ioctl()
553 * returns 0. If there is no matching VMA, -ENOENT will be returned.
554 * In case of any other error, negative error code other than -ENOENT is
555 * returned.
556 *
557 * Most of the data is similar to the one returned as text in /proc/<pid>/maps
558 * file, but procmap_query provides more querying flexibility. There are no
559 * consistency guarantees between subsequent ioctl() calls, but data returned
560 * for matched VMA is self-consistent.
561 */
562struct procmap_query {
563 /* Query struct size, for backwards/forward compatibility */
564 __u64 size;
565 /*
566 * Query flags, a combination of enum procmap_query_flags values.
567 * Defines query filtering and behavior, see enum procmap_query_flags.
568 *
569 * Input argument, provided by user. Kernel doesn't modify it.
570 */
571 __u64 query_flags; /* in */
572 /*
573 * Query address. By default, VMA that covers this address will
574 * be looked up. PROCMAP_QUERY_* flags above modify this default
575 * behavior further.
576 *
577 * Input argument, provided by user. Kernel doesn't modify it.
578 */
579 __u64 query_addr; /* in */
580 /* VMA starting (inclusive) and ending (exclusive) address, if VMA is found. */
581 __u64 vma_start; /* out */
582 __u64 vma_end; /* out */
583 /* VMA permissions flags. A combination of PROCMAP_QUERY_VMA_* flags. */
584 __u64 vma_flags; /* out */
585 /* VMA backing page size granularity. */
586 __u64 vma_page_size; /* out */
587 /*
588 * VMA file offset. If VMA has file backing, this specifies offset
589 * within the file that VMA's start address corresponds to.
590 * Is set to zero if VMA has no backing file.
591 */
592 __u64 vma_offset; /* out */
593 /* Backing file's inode number, or zero, if VMA has no backing file. */
594 __u64 inode; /* out */
595 /* Backing file's device major/minor number, or zero, if VMA has no backing file. */
596 __u32 dev_major; /* out */
597 __u32 dev_minor; /* out */
598 /*
599 * If set to non-zero value, signals the request to return VMA name
600 * (i.e., VMA's backing file's absolute path, with " (deleted)" suffix
601 * appended, if file was unlinked from FS) for matched VMA. VMA name
602 * can also be some special name (e.g., "[heap]", "[stack]") or could
603 * be even user-supplied with prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME).
604 *
605 * Kernel will set this field to zero, if VMA has no associated name.
606 * Otherwise kernel will return actual amount of bytes filled in
607 * user-supplied buffer (see vma_name_addr field below), including the
608 * terminating zero.
609 *
610 * If VMA name is longer that user-supplied maximum buffer size,
611 * -E2BIG error is returned.
612 *
613 * If this field is set to non-zero value, vma_name_addr should point
614 * to valid user space memory buffer of at least vma_name_size bytes.
615 * If set to zero, vma_name_addr should be set to zero as well
616 */
617 __u32 vma_name_size; /* in/out */
618 /*
619 * If set to non-zero value, signals the request to extract and return
620 * VMA's backing file's build ID, if the backing file is an ELF file
621 * and it contains embedded build ID.
622 *
623 * Kernel will set this field to zero, if VMA has no backing file,
624 * backing file is not an ELF file, or ELF file has no build ID
625 * embedded.
626 *
627 * Build ID is a binary value (not a string). Kernel will set
628 * build_id_size field to exact number of bytes used for build ID.
629 * If build ID is requested and present, but needs more bytes than
630 * user-supplied maximum buffer size (see build_id_addr field below),
631 * -E2BIG error will be returned.
632 *
633 * If this field is set to non-zero value, build_id_addr should point
634 * to valid user space memory buffer of at least build_id_size bytes.
635 * If set to zero, build_id_addr should be set to zero as well
636 */
637 __u32 build_id_size; /* in/out */
638 /*
639 * User-supplied address of a buffer of at least vma_name_size bytes
640 * for kernel to fill with matched VMA's name (see vma_name_size field
641 * description above for details).
642 *
643 * Should be set to zero if VMA name should not be returned.
644 */
645 __u64 vma_name_addr; /* in */
646 /*
647 * User-supplied address of a buffer of at least build_id_size bytes
648 * for kernel to fill with matched VMA's ELF build ID, if available
649 * (see build_id_size field description above for details).
650 *
651 * Should be set to zero if build ID should not be returned.
652 */
653 __u64 build_id_addr; /* in */
654};
655
656#endif /* _LINUX_FS_H */
657