| 1 | /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ |
| 2 | /* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES. |
| 3 | */ |
| 4 | #ifndef _UAPI_IOMMUFD_H |
| 5 | #define _UAPI_IOMMUFD_H |
| 6 | |
| 7 | #include <linux/ioctl.h> |
| 8 | #include <linux/types.h> |
| 9 | |
| 10 | #define IOMMUFD_TYPE (';') |
| 11 | |
| 12 | /** |
| 13 | * DOC: General ioctl format |
| 14 | * |
| 15 | * The ioctl interface follows a general format to allow for extensibility. Each |
| 16 | * ioctl is passed in a structure pointer as the argument providing the size of |
| 17 | * the structure in the first u32. The kernel checks that any structure space |
| 18 | * beyond what it understands is 0. This allows userspace to use the backward |
| 19 | * compatible portion while consistently using the newer, larger, structures. |
| 20 | * |
| 21 | * ioctls use a standard meaning for common errnos: |
| 22 | * |
| 23 | * - ENOTTY: The IOCTL number itself is not supported at all |
| 24 | * - E2BIG: The IOCTL number is supported, but the provided structure has |
| 25 | * non-zero in a part the kernel does not understand. |
| 26 | * - EOPNOTSUPP: The IOCTL number is supported, and the structure is |
| 27 | * understood, however a known field has a value the kernel does not |
| 28 | * understand or support. |
| 29 | * - EINVAL: Everything about the IOCTL was understood, but a field is not |
| 30 | * correct. |
| 31 | * - ENOENT: An ID or IOVA provided does not exist. |
| 32 | * - ENOMEM: Out of memory. |
| 33 | * - EOVERFLOW: Mathematics overflowed. |
| 34 | * |
| 35 | * As well as additional errnos, within specific ioctls. |
| 36 | */ |
| 37 | enum { |
| 38 | IOMMUFD_CMD_BASE = 0x80, |
| 39 | IOMMUFD_CMD_DESTROY = IOMMUFD_CMD_BASE, |
| 40 | IOMMUFD_CMD_IOAS_ALLOC = 0x81, |
| 41 | IOMMUFD_CMD_IOAS_ALLOW_IOVAS = 0x82, |
| 42 | IOMMUFD_CMD_IOAS_COPY = 0x83, |
| 43 | IOMMUFD_CMD_IOAS_IOVA_RANGES = 0x84, |
| 44 | IOMMUFD_CMD_IOAS_MAP = 0x85, |
| 45 | IOMMUFD_CMD_IOAS_UNMAP = 0x86, |
| 46 | IOMMUFD_CMD_OPTION = 0x87, |
| 47 | IOMMUFD_CMD_VFIO_IOAS = 0x88, |
| 48 | IOMMUFD_CMD_HWPT_ALLOC = 0x89, |
| 49 | IOMMUFD_CMD_GET_HW_INFO = 0x8a, |
| 50 | IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING = 0x8b, |
| 51 | IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP = 0x8c, |
| 52 | IOMMUFD_CMD_HWPT_INVALIDATE = 0x8d, |
| 53 | IOMMUFD_CMD_FAULT_QUEUE_ALLOC = 0x8e, |
| 54 | IOMMUFD_CMD_IOAS_MAP_FILE = 0x8f, |
| 55 | IOMMUFD_CMD_VIOMMU_ALLOC = 0x90, |
| 56 | IOMMUFD_CMD_VDEVICE_ALLOC = 0x91, |
| 57 | IOMMUFD_CMD_IOAS_CHANGE_PROCESS = 0x92, |
| 58 | IOMMUFD_CMD_VEVENTQ_ALLOC = 0x93, |
| 59 | }; |
| 60 | |
| 61 | /** |
| 62 | * struct iommu_destroy - ioctl(IOMMU_DESTROY) |
| 63 | * @size: sizeof(struct iommu_destroy) |
| 64 | * @id: iommufd object ID to destroy. Can be any destroyable object type. |
| 65 | * |
| 66 | * Destroy any object held within iommufd. |
| 67 | */ |
| 68 | struct iommu_destroy { |
| 69 | __u32 size; |
| 70 | __u32 id; |
| 71 | }; |
| 72 | #define IOMMU_DESTROY _IO(IOMMUFD_TYPE, IOMMUFD_CMD_DESTROY) |
| 73 | |
| 74 | /** |
| 75 | * struct iommu_ioas_alloc - ioctl(IOMMU_IOAS_ALLOC) |
| 76 | * @size: sizeof(struct iommu_ioas_alloc) |
| 77 | * @flags: Must be 0 |
| 78 | * @out_ioas_id: Output IOAS ID for the allocated object |
| 79 | * |
| 80 | * Allocate an IO Address Space (IOAS) which holds an IO Virtual Address (IOVA) |
| 81 | * to memory mapping. |
| 82 | */ |
| 83 | struct iommu_ioas_alloc { |
| 84 | __u32 size; |
| 85 | __u32 flags; |
| 86 | __u32 out_ioas_id; |
| 87 | }; |
| 88 | #define IOMMU_IOAS_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_ALLOC) |
| 89 | |
| 90 | /** |
| 91 | * struct iommu_iova_range - ioctl(IOMMU_IOVA_RANGE) |
| 92 | * @start: First IOVA |
| 93 | * @last: Inclusive last IOVA |
| 94 | * |
| 95 | * An interval in IOVA space. |
| 96 | */ |
| 97 | struct iommu_iova_range { |
| 98 | __aligned_u64 start; |
| 99 | __aligned_u64 last; |
| 100 | }; |
| 101 | |
| 102 | /** |
| 103 | * struct iommu_ioas_iova_ranges - ioctl(IOMMU_IOAS_IOVA_RANGES) |
| 104 | * @size: sizeof(struct iommu_ioas_iova_ranges) |
| 105 | * @ioas_id: IOAS ID to read ranges from |
| 106 | * @num_iovas: Input/Output total number of ranges in the IOAS |
| 107 | * @__reserved: Must be 0 |
| 108 | * @allowed_iovas: Pointer to the output array of struct iommu_iova_range |
| 109 | * @out_iova_alignment: Minimum alignment required for mapping IOVA |
| 110 | * |
| 111 | * Query an IOAS for ranges of allowed IOVAs. Mapping IOVA outside these ranges |
| 112 | * is not allowed. num_iovas will be set to the total number of iovas and |
| 113 | * the allowed_iovas[] will be filled in as space permits. |
| 114 | * |
| 115 | * The allowed ranges are dependent on the HW path the DMA operation takes, and |
| 116 | * can change during the lifetime of the IOAS. A fresh empty IOAS will have a |
| 117 | * full range, and each attached device will narrow the ranges based on that |
| 118 | * device's HW restrictions. Detaching a device can widen the ranges. Userspace |
| 119 | * should query ranges after every attach/detach to know what IOVAs are valid |
| 120 | * for mapping. |
| 121 | * |
| 122 | * On input num_iovas is the length of the allowed_iovas array. On output it is |
| 123 | * the total number of iovas filled in. The ioctl will return -EMSGSIZE and set |
| 124 | * num_iovas to the required value if num_iovas is too small. In this case the |
| 125 | * caller should allocate a larger output array and re-issue the ioctl. |
| 126 | * |
| 127 | * out_iova_alignment returns the minimum IOVA alignment that can be given |
| 128 | * to IOMMU_IOAS_MAP/COPY. IOVA's must satisfy:: |
| 129 | * |
| 130 | * starting_iova % out_iova_alignment == 0 |
| 131 | * (starting_iova + length) % out_iova_alignment == 0 |
| 132 | * |
| 133 | * out_iova_alignment can be 1 indicating any IOVA is allowed. It cannot |
| 134 | * be higher than the system PAGE_SIZE. |
| 135 | */ |
| 136 | struct iommu_ioas_iova_ranges { |
| 137 | __u32 size; |
| 138 | __u32 ioas_id; |
| 139 | __u32 num_iovas; |
| 140 | __u32 __reserved; |
| 141 | __aligned_u64 allowed_iovas; |
| 142 | __aligned_u64 out_iova_alignment; |
| 143 | }; |
| 144 | #define IOMMU_IOAS_IOVA_RANGES _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_IOVA_RANGES) |
| 145 | |
| 146 | /** |
| 147 | * struct iommu_ioas_allow_iovas - ioctl(IOMMU_IOAS_ALLOW_IOVAS) |
| 148 | * @size: sizeof(struct iommu_ioas_allow_iovas) |
| 149 | * @ioas_id: IOAS ID to allow IOVAs from |
| 150 | * @num_iovas: Input/Output total number of ranges in the IOAS |
| 151 | * @__reserved: Must be 0 |
| 152 | * @allowed_iovas: Pointer to array of struct iommu_iova_range |
| 153 | * |
| 154 | * Ensure a range of IOVAs are always available for allocation. If this call |
| 155 | * succeeds then IOMMU_IOAS_IOVA_RANGES will never return a list of IOVA ranges |
| 156 | * that are narrower than the ranges provided here. This call will fail if |
| 157 | * IOMMU_IOAS_IOVA_RANGES is currently narrower than the given ranges. |
| 158 | * |
| 159 | * When an IOAS is first created the IOVA_RANGES will be maximally sized, and as |
| 160 | * devices are attached the IOVA will narrow based on the device restrictions. |
| 161 | * When an allowed range is specified any narrowing will be refused, ie device |
| 162 | * attachment can fail if the device requires limiting within the allowed range. |
| 163 | * |
| 164 | * Automatic IOVA allocation is also impacted by this call. MAP will only |
| 165 | * allocate within the allowed IOVAs if they are present. |
| 166 | * |
| 167 | * This call replaces the entire allowed list with the given list. |
| 168 | */ |
| 169 | struct iommu_ioas_allow_iovas { |
| 170 | __u32 size; |
| 171 | __u32 ioas_id; |
| 172 | __u32 num_iovas; |
| 173 | __u32 __reserved; |
| 174 | __aligned_u64 allowed_iovas; |
| 175 | }; |
| 176 | #define IOMMU_IOAS_ALLOW_IOVAS _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_ALLOW_IOVAS) |
| 177 | |
| 178 | /** |
| 179 | * enum iommufd_ioas_map_flags - Flags for map and copy |
| 180 | * @IOMMU_IOAS_MAP_FIXED_IOVA: If clear the kernel will compute an appropriate |
| 181 | * IOVA to place the mapping at |
| 182 | * @IOMMU_IOAS_MAP_WRITEABLE: DMA is allowed to write to this mapping |
| 183 | * @IOMMU_IOAS_MAP_READABLE: DMA is allowed to read from this mapping |
| 184 | */ |
| 185 | enum iommufd_ioas_map_flags { |
| 186 | IOMMU_IOAS_MAP_FIXED_IOVA = 1 << 0, |
| 187 | IOMMU_IOAS_MAP_WRITEABLE = 1 << 1, |
| 188 | IOMMU_IOAS_MAP_READABLE = 1 << 2, |
| 189 | }; |
| 190 | |
| 191 | /** |
| 192 | * struct iommu_ioas_map - ioctl(IOMMU_IOAS_MAP) |
| 193 | * @size: sizeof(struct iommu_ioas_map) |
| 194 | * @flags: Combination of enum iommufd_ioas_map_flags |
| 195 | * @ioas_id: IOAS ID to change the mapping of |
| 196 | * @__reserved: Must be 0 |
| 197 | * @user_va: Userspace pointer to start mapping from |
| 198 | * @length: Number of bytes to map |
| 199 | * @iova: IOVA the mapping was placed at. If IOMMU_IOAS_MAP_FIXED_IOVA is set |
| 200 | * then this must be provided as input. |
| 201 | * |
| 202 | * Set an IOVA mapping from a user pointer. If FIXED_IOVA is specified then the |
| 203 | * mapping will be established at iova, otherwise a suitable location based on |
| 204 | * the reserved and allowed lists will be automatically selected and returned in |
| 205 | * iova. |
| 206 | * |
| 207 | * If IOMMU_IOAS_MAP_FIXED_IOVA is specified then the iova range must currently |
| 208 | * be unused, existing IOVA cannot be replaced. |
| 209 | */ |
| 210 | struct iommu_ioas_map { |
| 211 | __u32 size; |
| 212 | __u32 flags; |
| 213 | __u32 ioas_id; |
| 214 | __u32 __reserved; |
| 215 | __aligned_u64 user_va; |
| 216 | __aligned_u64 length; |
| 217 | __aligned_u64 iova; |
| 218 | }; |
| 219 | #define IOMMU_IOAS_MAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_MAP) |
| 220 | |
| 221 | /** |
| 222 | * struct iommu_ioas_map_file - ioctl(IOMMU_IOAS_MAP_FILE) |
| 223 | * @size: sizeof(struct iommu_ioas_map_file) |
| 224 | * @flags: same as for iommu_ioas_map |
| 225 | * @ioas_id: same as for iommu_ioas_map |
| 226 | * @fd: the memfd to map |
| 227 | * @start: byte offset from start of file to map from |
| 228 | * @length: same as for iommu_ioas_map |
| 229 | * @iova: same as for iommu_ioas_map |
| 230 | * |
| 231 | * Set an IOVA mapping from a memfd file. All other arguments and semantics |
| 232 | * match those of IOMMU_IOAS_MAP. |
| 233 | */ |
| 234 | struct iommu_ioas_map_file { |
| 235 | __u32 size; |
| 236 | __u32 flags; |
| 237 | __u32 ioas_id; |
| 238 | __s32 fd; |
| 239 | __aligned_u64 start; |
| 240 | __aligned_u64 length; |
| 241 | __aligned_u64 iova; |
| 242 | }; |
| 243 | #define IOMMU_IOAS_MAP_FILE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_MAP_FILE) |
| 244 | |
| 245 | /** |
| 246 | * struct iommu_ioas_copy - ioctl(IOMMU_IOAS_COPY) |
| 247 | * @size: sizeof(struct iommu_ioas_copy) |
| 248 | * @flags: Combination of enum iommufd_ioas_map_flags |
| 249 | * @dst_ioas_id: IOAS ID to change the mapping of |
| 250 | * @src_ioas_id: IOAS ID to copy from |
| 251 | * @length: Number of bytes to copy and map |
| 252 | * @dst_iova: IOVA the mapping was placed at. If IOMMU_IOAS_MAP_FIXED_IOVA is |
| 253 | * set then this must be provided as input. |
| 254 | * @src_iova: IOVA to start the copy |
| 255 | * |
| 256 | * Copy an already existing mapping from src_ioas_id and establish it in |
| 257 | * dst_ioas_id. The src iova/length must exactly match a range used with |
| 258 | * IOMMU_IOAS_MAP. |
| 259 | * |
| 260 | * This may be used to efficiently clone a subset of an IOAS to another, or as a |
| 261 | * kind of 'cache' to speed up mapping. Copy has an efficiency advantage over |
| 262 | * establishing equivalent new mappings, as internal resources are shared, and |
| 263 | * the kernel will pin the user memory only once. |
| 264 | */ |
| 265 | struct iommu_ioas_copy { |
| 266 | __u32 size; |
| 267 | __u32 flags; |
| 268 | __u32 dst_ioas_id; |
| 269 | __u32 src_ioas_id; |
| 270 | __aligned_u64 length; |
| 271 | __aligned_u64 dst_iova; |
| 272 | __aligned_u64 src_iova; |
| 273 | }; |
| 274 | #define IOMMU_IOAS_COPY _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_COPY) |
| 275 | |
| 276 | /** |
| 277 | * struct iommu_ioas_unmap - ioctl(IOMMU_IOAS_UNMAP) |
| 278 | * @size: sizeof(struct iommu_ioas_unmap) |
| 279 | * @ioas_id: IOAS ID to change the mapping of |
| 280 | * @iova: IOVA to start the unmapping at |
| 281 | * @length: Number of bytes to unmap, and return back the bytes unmapped |
| 282 | * |
| 283 | * Unmap an IOVA range. The iova/length must be a superset of a previously |
| 284 | * mapped range used with IOMMU_IOAS_MAP or IOMMU_IOAS_COPY. Splitting or |
| 285 | * truncating ranges is not allowed. The values 0 to U64_MAX will unmap |
| 286 | * everything. |
| 287 | */ |
| 288 | struct iommu_ioas_unmap { |
| 289 | __u32 size; |
| 290 | __u32 ioas_id; |
| 291 | __aligned_u64 iova; |
| 292 | __aligned_u64 length; |
| 293 | }; |
| 294 | #define IOMMU_IOAS_UNMAP _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_UNMAP) |
| 295 | |
| 296 | /** |
| 297 | * enum iommufd_option - ioctl(IOMMU_OPTION_RLIMIT_MODE) and |
| 298 | * ioctl(IOMMU_OPTION_HUGE_PAGES) |
| 299 | * @IOMMU_OPTION_RLIMIT_MODE: |
| 300 | * Change how RLIMIT_MEMLOCK accounting works. The caller must have privilege |
| 301 | * to invoke this. Value 0 (default) is user based accounting, 1 uses process |
| 302 | * based accounting. Global option, object_id must be 0 |
| 303 | * @IOMMU_OPTION_HUGE_PAGES: |
| 304 | * Value 1 (default) allows contiguous pages to be combined when generating |
| 305 | * iommu mappings. Value 0 disables combining, everything is mapped to |
| 306 | * PAGE_SIZE. This can be useful for benchmarking. This is a per-IOAS |
| 307 | * option, the object_id must be the IOAS ID. |
| 308 | */ |
| 309 | enum iommufd_option { |
| 310 | IOMMU_OPTION_RLIMIT_MODE = 0, |
| 311 | IOMMU_OPTION_HUGE_PAGES = 1, |
| 312 | }; |
| 313 | |
| 314 | /** |
| 315 | * enum iommufd_option_ops - ioctl(IOMMU_OPTION_OP_SET) and |
| 316 | * ioctl(IOMMU_OPTION_OP_GET) |
| 317 | * @IOMMU_OPTION_OP_SET: Set the option's value |
| 318 | * @IOMMU_OPTION_OP_GET: Get the option's value |
| 319 | */ |
| 320 | enum iommufd_option_ops { |
| 321 | IOMMU_OPTION_OP_SET = 0, |
| 322 | IOMMU_OPTION_OP_GET = 1, |
| 323 | }; |
| 324 | |
| 325 | /** |
| 326 | * struct iommu_option - iommu option multiplexer |
| 327 | * @size: sizeof(struct iommu_option) |
| 328 | * @option_id: One of enum iommufd_option |
| 329 | * @op: One of enum iommufd_option_ops |
| 330 | * @__reserved: Must be 0 |
| 331 | * @object_id: ID of the object if required |
| 332 | * @val64: Option value to set or value returned on get |
| 333 | * |
| 334 | * Change a simple option value. This multiplexor allows controlling options |
| 335 | * on objects. IOMMU_OPTION_OP_SET will load an option and IOMMU_OPTION_OP_GET |
| 336 | * will return the current value. |
| 337 | */ |
| 338 | struct iommu_option { |
| 339 | __u32 size; |
| 340 | __u32 option_id; |
| 341 | __u16 op; |
| 342 | __u16 __reserved; |
| 343 | __u32 object_id; |
| 344 | __aligned_u64 val64; |
| 345 | }; |
| 346 | #define IOMMU_OPTION _IO(IOMMUFD_TYPE, IOMMUFD_CMD_OPTION) |
| 347 | |
| 348 | /** |
| 349 | * enum iommufd_vfio_ioas_op - IOMMU_VFIO_IOAS_* ioctls |
| 350 | * @IOMMU_VFIO_IOAS_GET: Get the current compatibility IOAS |
| 351 | * @IOMMU_VFIO_IOAS_SET: Change the current compatibility IOAS |
| 352 | * @IOMMU_VFIO_IOAS_CLEAR: Disable VFIO compatibility |
| 353 | */ |
| 354 | enum iommufd_vfio_ioas_op { |
| 355 | IOMMU_VFIO_IOAS_GET = 0, |
| 356 | IOMMU_VFIO_IOAS_SET = 1, |
| 357 | IOMMU_VFIO_IOAS_CLEAR = 2, |
| 358 | }; |
| 359 | |
| 360 | /** |
| 361 | * struct iommu_vfio_ioas - ioctl(IOMMU_VFIO_IOAS) |
| 362 | * @size: sizeof(struct iommu_vfio_ioas) |
| 363 | * @ioas_id: For IOMMU_VFIO_IOAS_SET the input IOAS ID to set |
| 364 | * For IOMMU_VFIO_IOAS_GET will output the IOAS ID |
| 365 | * @op: One of enum iommufd_vfio_ioas_op |
| 366 | * @__reserved: Must be 0 |
| 367 | * |
| 368 | * The VFIO compatibility support uses a single ioas because VFIO APIs do not |
| 369 | * support the ID field. Set or Get the IOAS that VFIO compatibility will use. |
| 370 | * When VFIO_GROUP_SET_CONTAINER is used on an iommufd it will get the |
| 371 | * compatibility ioas, either by taking what is already set, or auto creating |
| 372 | * one. From then on VFIO will continue to use that ioas and is not effected by |
| 373 | * this ioctl. SET or CLEAR does not destroy any auto-created IOAS. |
| 374 | */ |
| 375 | struct iommu_vfio_ioas { |
| 376 | __u32 size; |
| 377 | __u32 ioas_id; |
| 378 | __u16 op; |
| 379 | __u16 __reserved; |
| 380 | }; |
| 381 | #define IOMMU_VFIO_IOAS _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VFIO_IOAS) |
| 382 | |
| 383 | /** |
| 384 | * enum iommufd_hwpt_alloc_flags - Flags for HWPT allocation |
| 385 | * @IOMMU_HWPT_ALLOC_NEST_PARENT: If set, allocate a HWPT that can serve as |
| 386 | * the parent HWPT in a nesting configuration. |
| 387 | * @IOMMU_HWPT_ALLOC_DIRTY_TRACKING: Dirty tracking support for device IOMMU is |
| 388 | * enforced on device attachment |
| 389 | * @IOMMU_HWPT_FAULT_ID_VALID: The fault_id field of hwpt allocation data is |
| 390 | * valid. |
| 391 | * @IOMMU_HWPT_ALLOC_PASID: Requests a domain that can be used with PASID. The |
| 392 | * domain can be attached to any PASID on the device. |
| 393 | * Any domain attached to the non-PASID part of the |
| 394 | * device must also be flagged, otherwise attaching a |
| 395 | * PASID will blocked. |
| 396 | * For the user that wants to attach PASID, ioas is |
| 397 | * not recommended for both the non-PASID part |
| 398 | * and PASID part of the device. |
| 399 | * If IOMMU does not support PASID it will return |
| 400 | * error (-EOPNOTSUPP). |
| 401 | */ |
| 402 | enum iommufd_hwpt_alloc_flags { |
| 403 | IOMMU_HWPT_ALLOC_NEST_PARENT = 1 << 0, |
| 404 | IOMMU_HWPT_ALLOC_DIRTY_TRACKING = 1 << 1, |
| 405 | IOMMU_HWPT_FAULT_ID_VALID = 1 << 2, |
| 406 | IOMMU_HWPT_ALLOC_PASID = 1 << 3, |
| 407 | }; |
| 408 | |
| 409 | /** |
| 410 | * enum iommu_hwpt_vtd_s1_flags - Intel VT-d stage-1 page table |
| 411 | * entry attributes |
| 412 | * @IOMMU_VTD_S1_SRE: Supervisor request |
| 413 | * @IOMMU_VTD_S1_EAFE: Extended access enable |
| 414 | * @IOMMU_VTD_S1_WPE: Write protect enable |
| 415 | */ |
| 416 | enum iommu_hwpt_vtd_s1_flags { |
| 417 | IOMMU_VTD_S1_SRE = 1 << 0, |
| 418 | IOMMU_VTD_S1_EAFE = 1 << 1, |
| 419 | IOMMU_VTD_S1_WPE = 1 << 2, |
| 420 | }; |
| 421 | |
| 422 | /** |
| 423 | * struct iommu_hwpt_vtd_s1 - Intel VT-d stage-1 page table |
| 424 | * info (IOMMU_HWPT_DATA_VTD_S1) |
| 425 | * @flags: Combination of enum iommu_hwpt_vtd_s1_flags |
| 426 | * @pgtbl_addr: The base address of the stage-1 page table. |
| 427 | * @addr_width: The address width of the stage-1 page table |
| 428 | * @__reserved: Must be 0 |
| 429 | */ |
| 430 | struct iommu_hwpt_vtd_s1 { |
| 431 | __aligned_u64 flags; |
| 432 | __aligned_u64 pgtbl_addr; |
| 433 | __u32 addr_width; |
| 434 | __u32 __reserved; |
| 435 | }; |
| 436 | |
| 437 | /** |
| 438 | * struct iommu_hwpt_arm_smmuv3 - ARM SMMUv3 nested STE |
| 439 | * (IOMMU_HWPT_DATA_ARM_SMMUV3) |
| 440 | * |
| 441 | * @ste: The first two double words of the user space Stream Table Entry for |
| 442 | * the translation. Must be little-endian. |
| 443 | * Allowed fields: (Refer to "5.2 Stream Table Entry" in SMMUv3 HW Spec) |
| 444 | * - word-0: V, Cfg, S1Fmt, S1ContextPtr, S1CDMax |
| 445 | * - word-1: EATS, S1DSS, S1CIR, S1COR, S1CSH, S1STALLD |
| 446 | * |
| 447 | * -EIO will be returned if @ste is not legal or contains any non-allowed field. |
| 448 | * Cfg can be used to select a S1, Bypass or Abort configuration. A Bypass |
| 449 | * nested domain will translate the same as the nesting parent. The S1 will |
| 450 | * install a Context Descriptor Table pointing at userspace memory translated |
| 451 | * by the nesting parent. |
| 452 | */ |
| 453 | struct iommu_hwpt_arm_smmuv3 { |
| 454 | __aligned_le64 ste[2]; |
| 455 | }; |
| 456 | |
| 457 | /** |
| 458 | * enum iommu_hwpt_data_type - IOMMU HWPT Data Type |
| 459 | * @IOMMU_HWPT_DATA_NONE: no data |
| 460 | * @IOMMU_HWPT_DATA_VTD_S1: Intel VT-d stage-1 page table |
| 461 | * @IOMMU_HWPT_DATA_ARM_SMMUV3: ARM SMMUv3 Context Descriptor Table |
| 462 | */ |
| 463 | enum iommu_hwpt_data_type { |
| 464 | IOMMU_HWPT_DATA_NONE = 0, |
| 465 | IOMMU_HWPT_DATA_VTD_S1 = 1, |
| 466 | IOMMU_HWPT_DATA_ARM_SMMUV3 = 2, |
| 467 | }; |
| 468 | |
| 469 | /** |
| 470 | * struct iommu_hwpt_alloc - ioctl(IOMMU_HWPT_ALLOC) |
| 471 | * @size: sizeof(struct iommu_hwpt_alloc) |
| 472 | * @flags: Combination of enum iommufd_hwpt_alloc_flags |
| 473 | * @dev_id: The device to allocate this HWPT for |
| 474 | * @pt_id: The IOAS or HWPT or vIOMMU to connect this HWPT to |
| 475 | * @out_hwpt_id: The ID of the new HWPT |
| 476 | * @__reserved: Must be 0 |
| 477 | * @data_type: One of enum iommu_hwpt_data_type |
| 478 | * @data_len: Length of the type specific data |
| 479 | * @data_uptr: User pointer to the type specific data |
| 480 | * @fault_id: The ID of IOMMUFD_FAULT object. Valid only if flags field of |
| 481 | * IOMMU_HWPT_FAULT_ID_VALID is set. |
| 482 | * @__reserved2: Padding to 64-bit alignment. Must be 0. |
| 483 | * |
| 484 | * Explicitly allocate a hardware page table object. This is the same object |
| 485 | * type that is returned by iommufd_device_attach() and represents the |
| 486 | * underlying iommu driver's iommu_domain kernel object. |
| 487 | * |
| 488 | * A kernel-managed HWPT will be created with the mappings from the given |
| 489 | * IOAS via the @pt_id. The @data_type for this allocation must be set to |
| 490 | * IOMMU_HWPT_DATA_NONE. The HWPT can be allocated as a parent HWPT for a |
| 491 | * nesting configuration by passing IOMMU_HWPT_ALLOC_NEST_PARENT via @flags. |
| 492 | * |
| 493 | * A user-managed nested HWPT will be created from a given vIOMMU (wrapping a |
| 494 | * parent HWPT) or a parent HWPT via @pt_id, in which the parent HWPT must be |
| 495 | * allocated previously via the same ioctl from a given IOAS (@pt_id). In this |
| 496 | * case, the @data_type must be set to a pre-defined type corresponding to an |
| 497 | * I/O page table type supported by the underlying IOMMU hardware. The device |
| 498 | * via @dev_id and the vIOMMU via @pt_id must be associated to the same IOMMU |
| 499 | * instance. |
| 500 | * |
| 501 | * If the @data_type is set to IOMMU_HWPT_DATA_NONE, @data_len and |
| 502 | * @data_uptr should be zero. Otherwise, both @data_len and @data_uptr |
| 503 | * must be given. |
| 504 | */ |
| 505 | struct iommu_hwpt_alloc { |
| 506 | __u32 size; |
| 507 | __u32 flags; |
| 508 | __u32 dev_id; |
| 509 | __u32 pt_id; |
| 510 | __u32 out_hwpt_id; |
| 511 | __u32 __reserved; |
| 512 | __u32 data_type; |
| 513 | __u32 data_len; |
| 514 | __aligned_u64 data_uptr; |
| 515 | __u32 fault_id; |
| 516 | __u32 __reserved2; |
| 517 | }; |
| 518 | #define IOMMU_HWPT_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_ALLOC) |
| 519 | |
| 520 | /** |
| 521 | * enum iommu_hw_info_vtd_flags - Flags for VT-d hw_info |
| 522 | * @IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17: If set, disallow read-only mappings |
| 523 | * on a nested_parent domain. |
| 524 | * https://www.intel.com/content/www/us/en/content-details/772415/content-details.html |
| 525 | */ |
| 526 | enum iommu_hw_info_vtd_flags { |
| 527 | IOMMU_HW_INFO_VTD_ERRATA_772415_SPR17 = 1 << 0, |
| 528 | }; |
| 529 | |
| 530 | /** |
| 531 | * struct iommu_hw_info_vtd - Intel VT-d hardware information |
| 532 | * |
| 533 | * @flags: Combination of enum iommu_hw_info_vtd_flags |
| 534 | * @__reserved: Must be 0 |
| 535 | * |
| 536 | * @cap_reg: Value of Intel VT-d capability register defined in VT-d spec |
| 537 | * section 11.4.2 Capability Register. |
| 538 | * @ecap_reg: Value of Intel VT-d capability register defined in VT-d spec |
| 539 | * section 11.4.3 Extended Capability Register. |
| 540 | * |
| 541 | * User needs to understand the Intel VT-d specification to decode the |
| 542 | * register value. |
| 543 | */ |
| 544 | struct iommu_hw_info_vtd { |
| 545 | __u32 flags; |
| 546 | __u32 __reserved; |
| 547 | __aligned_u64 cap_reg; |
| 548 | __aligned_u64 ecap_reg; |
| 549 | }; |
| 550 | |
| 551 | /** |
| 552 | * struct iommu_hw_info_arm_smmuv3 - ARM SMMUv3 hardware information |
| 553 | * (IOMMU_HW_INFO_TYPE_ARM_SMMUV3) |
| 554 | * |
| 555 | * @flags: Must be set to 0 |
| 556 | * @__reserved: Must be 0 |
| 557 | * @idr: Implemented features for ARM SMMU Non-secure programming interface |
| 558 | * @iidr: Information about the implementation and implementer of ARM SMMU, |
| 559 | * and architecture version supported |
| 560 | * @aidr: ARM SMMU architecture version |
| 561 | * |
| 562 | * For the details of @idr, @iidr and @aidr, please refer to the chapters |
| 563 | * from 6.3.1 to 6.3.6 in the SMMUv3 Spec. |
| 564 | * |
| 565 | * This reports the raw HW capability, and not all bits are meaningful to be |
| 566 | * read by userspace. Only the following fields should be used: |
| 567 | * |
| 568 | * idr[0]: ST_LEVEL, TERM_MODEL, STALL_MODEL, TTENDIAN , CD2L, ASID16, TTF |
| 569 | * idr[1]: SIDSIZE, SSIDSIZE |
| 570 | * idr[3]: BBML, RIL |
| 571 | * idr[5]: VAX, GRAN64K, GRAN16K, GRAN4K |
| 572 | * |
| 573 | * - S1P should be assumed to be true if a NESTED HWPT can be created |
| 574 | * - VFIO/iommufd only support platforms with COHACC, it should be assumed to be |
| 575 | * true. |
| 576 | * - ATS is a per-device property. If the VMM describes any devices as ATS |
| 577 | * capable in ACPI/DT it should set the corresponding idr. |
| 578 | * |
| 579 | * This list may expand in future (eg E0PD, AIE, PBHA, D128, DS etc). It is |
| 580 | * important that VMMs do not read bits outside the list to allow for |
| 581 | * compatibility with future kernels. Several features in the SMMUv3 |
| 582 | * architecture are not currently supported by the kernel for nesting: HTTU, |
| 583 | * BTM, MPAM and others. |
| 584 | */ |
| 585 | struct iommu_hw_info_arm_smmuv3 { |
| 586 | __u32 flags; |
| 587 | __u32 __reserved; |
| 588 | __u32 idr[6]; |
| 589 | __u32 iidr; |
| 590 | __u32 aidr; |
| 591 | }; |
| 592 | |
| 593 | /** |
| 594 | * enum iommu_hw_info_type - IOMMU Hardware Info Types |
| 595 | * @IOMMU_HW_INFO_TYPE_NONE: Used by the drivers that do not report hardware |
| 596 | * info |
| 597 | * @IOMMU_HW_INFO_TYPE_INTEL_VTD: Intel VT-d iommu info type |
| 598 | * @IOMMU_HW_INFO_TYPE_ARM_SMMUV3: ARM SMMUv3 iommu info type |
| 599 | */ |
| 600 | enum iommu_hw_info_type { |
| 601 | IOMMU_HW_INFO_TYPE_NONE = 0, |
| 602 | IOMMU_HW_INFO_TYPE_INTEL_VTD = 1, |
| 603 | IOMMU_HW_INFO_TYPE_ARM_SMMUV3 = 2, |
| 604 | }; |
| 605 | |
| 606 | /** |
| 607 | * enum iommufd_hw_capabilities |
| 608 | * @IOMMU_HW_CAP_DIRTY_TRACKING: IOMMU hardware support for dirty tracking |
| 609 | * If available, it means the following APIs |
| 610 | * are supported: |
| 611 | * |
| 612 | * IOMMU_HWPT_GET_DIRTY_BITMAP |
| 613 | * IOMMU_HWPT_SET_DIRTY_TRACKING |
| 614 | * |
| 615 | * @IOMMU_HW_CAP_PCI_PASID_EXEC: Execute Permission Supported, user ignores it |
| 616 | * when the struct |
| 617 | * iommu_hw_info::out_max_pasid_log2 is zero. |
| 618 | * @IOMMU_HW_CAP_PCI_PASID_PRIV: Privileged Mode Supported, user ignores it |
| 619 | * when the struct |
| 620 | * iommu_hw_info::out_max_pasid_log2 is zero. |
| 621 | */ |
| 622 | enum iommufd_hw_capabilities { |
| 623 | IOMMU_HW_CAP_DIRTY_TRACKING = 1 << 0, |
| 624 | IOMMU_HW_CAP_PCI_PASID_EXEC = 1 << 1, |
| 625 | IOMMU_HW_CAP_PCI_PASID_PRIV = 1 << 2, |
| 626 | }; |
| 627 | |
| 628 | /** |
| 629 | * struct iommu_hw_info - ioctl(IOMMU_GET_HW_INFO) |
| 630 | * @size: sizeof(struct iommu_hw_info) |
| 631 | * @flags: Must be 0 |
| 632 | * @dev_id: The device bound to the iommufd |
| 633 | * @data_len: Input the length of a user buffer in bytes. Output the length of |
| 634 | * data that kernel supports |
| 635 | * @data_uptr: User pointer to a user-space buffer used by the kernel to fill |
| 636 | * the iommu type specific hardware information data |
| 637 | * @out_data_type: Output the iommu hardware info type as defined in the enum |
| 638 | * iommu_hw_info_type. |
| 639 | * @out_capabilities: Output the generic iommu capability info type as defined |
| 640 | * in the enum iommu_hw_capabilities. |
| 641 | * @out_max_pasid_log2: Output the width of PASIDs. 0 means no PASID support. |
| 642 | * PCI devices turn to out_capabilities to check if the |
| 643 | * specific capabilities is supported or not. |
| 644 | * @__reserved: Must be 0 |
| 645 | * |
| 646 | * Query an iommu type specific hardware information data from an iommu behind |
| 647 | * a given device that has been bound to iommufd. This hardware info data will |
| 648 | * be used to sync capabilities between the virtual iommu and the physical |
| 649 | * iommu, e.g. a nested translation setup needs to check the hardware info, so |
| 650 | * a guest stage-1 page table can be compatible with the physical iommu. |
| 651 | * |
| 652 | * To capture an iommu type specific hardware information data, @data_uptr and |
| 653 | * its length @data_len must be provided. Trailing bytes will be zeroed if the |
| 654 | * user buffer is larger than the data that kernel has. Otherwise, kernel only |
| 655 | * fills the buffer using the given length in @data_len. If the ioctl succeeds, |
| 656 | * @data_len will be updated to the length that kernel actually supports, |
| 657 | * @out_data_type will be filled to decode the data filled in the buffer |
| 658 | * pointed by @data_uptr. Input @data_len == zero is allowed. |
| 659 | */ |
| 660 | struct iommu_hw_info { |
| 661 | __u32 size; |
| 662 | __u32 flags; |
| 663 | __u32 dev_id; |
| 664 | __u32 data_len; |
| 665 | __aligned_u64 data_uptr; |
| 666 | __u32 out_data_type; |
| 667 | __u8 out_max_pasid_log2; |
| 668 | __u8 __reserved[3]; |
| 669 | __aligned_u64 out_capabilities; |
| 670 | }; |
| 671 | #define IOMMU_GET_HW_INFO _IO(IOMMUFD_TYPE, IOMMUFD_CMD_GET_HW_INFO) |
| 672 | |
| 673 | /* |
| 674 | * enum iommufd_hwpt_set_dirty_tracking_flags - Flags for steering dirty |
| 675 | * tracking |
| 676 | * @IOMMU_HWPT_DIRTY_TRACKING_ENABLE: Enable dirty tracking |
| 677 | */ |
| 678 | enum iommufd_hwpt_set_dirty_tracking_flags { |
| 679 | IOMMU_HWPT_DIRTY_TRACKING_ENABLE = 1, |
| 680 | }; |
| 681 | |
| 682 | /** |
| 683 | * struct iommu_hwpt_set_dirty_tracking - ioctl(IOMMU_HWPT_SET_DIRTY_TRACKING) |
| 684 | * @size: sizeof(struct iommu_hwpt_set_dirty_tracking) |
| 685 | * @flags: Combination of enum iommufd_hwpt_set_dirty_tracking_flags |
| 686 | * @hwpt_id: HW pagetable ID that represents the IOMMU domain |
| 687 | * @__reserved: Must be 0 |
| 688 | * |
| 689 | * Toggle dirty tracking on an HW pagetable. |
| 690 | */ |
| 691 | struct iommu_hwpt_set_dirty_tracking { |
| 692 | __u32 size; |
| 693 | __u32 flags; |
| 694 | __u32 hwpt_id; |
| 695 | __u32 __reserved; |
| 696 | }; |
| 697 | #define IOMMU_HWPT_SET_DIRTY_TRACKING _IO(IOMMUFD_TYPE, \ |
| 698 | IOMMUFD_CMD_HWPT_SET_DIRTY_TRACKING) |
| 699 | |
| 700 | /** |
| 701 | * enum iommufd_hwpt_get_dirty_bitmap_flags - Flags for getting dirty bits |
| 702 | * @IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR: Just read the PTEs without clearing |
| 703 | * any dirty bits metadata. This flag |
| 704 | * can be passed in the expectation |
| 705 | * where the next operation is an unmap |
| 706 | * of the same IOVA range. |
| 707 | * |
| 708 | */ |
| 709 | enum iommufd_hwpt_get_dirty_bitmap_flags { |
| 710 | IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR = 1, |
| 711 | }; |
| 712 | |
| 713 | /** |
| 714 | * struct iommu_hwpt_get_dirty_bitmap - ioctl(IOMMU_HWPT_GET_DIRTY_BITMAP) |
| 715 | * @size: sizeof(struct iommu_hwpt_get_dirty_bitmap) |
| 716 | * @hwpt_id: HW pagetable ID that represents the IOMMU domain |
| 717 | * @flags: Combination of enum iommufd_hwpt_get_dirty_bitmap_flags |
| 718 | * @__reserved: Must be 0 |
| 719 | * @iova: base IOVA of the bitmap first bit |
| 720 | * @length: IOVA range size |
| 721 | * @page_size: page size granularity of each bit in the bitmap |
| 722 | * @data: bitmap where to set the dirty bits. The bitmap bits each |
| 723 | * represent a page_size which you deviate from an arbitrary iova. |
| 724 | * |
| 725 | * Checking a given IOVA is dirty: |
| 726 | * |
| 727 | * data[(iova / page_size) / 64] & (1ULL << ((iova / page_size) % 64)) |
| 728 | * |
| 729 | * Walk the IOMMU pagetables for a given IOVA range to return a bitmap |
| 730 | * with the dirty IOVAs. In doing so it will also by default clear any |
| 731 | * dirty bit metadata set in the IOPTE. |
| 732 | */ |
| 733 | struct iommu_hwpt_get_dirty_bitmap { |
| 734 | __u32 size; |
| 735 | __u32 hwpt_id; |
| 736 | __u32 flags; |
| 737 | __u32 __reserved; |
| 738 | __aligned_u64 iova; |
| 739 | __aligned_u64 length; |
| 740 | __aligned_u64 page_size; |
| 741 | __aligned_u64 data; |
| 742 | }; |
| 743 | #define IOMMU_HWPT_GET_DIRTY_BITMAP _IO(IOMMUFD_TYPE, \ |
| 744 | IOMMUFD_CMD_HWPT_GET_DIRTY_BITMAP) |
| 745 | |
| 746 | /** |
| 747 | * enum iommu_hwpt_invalidate_data_type - IOMMU HWPT Cache Invalidation |
| 748 | * Data Type |
| 749 | * @IOMMU_HWPT_INVALIDATE_DATA_VTD_S1: Invalidation data for VTD_S1 |
| 750 | * @IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3: Invalidation data for ARM SMMUv3 |
| 751 | */ |
| 752 | enum iommu_hwpt_invalidate_data_type { |
| 753 | IOMMU_HWPT_INVALIDATE_DATA_VTD_S1 = 0, |
| 754 | IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3 = 1, |
| 755 | }; |
| 756 | |
| 757 | /** |
| 758 | * enum iommu_hwpt_vtd_s1_invalidate_flags - Flags for Intel VT-d |
| 759 | * stage-1 cache invalidation |
| 760 | * @IOMMU_VTD_INV_FLAGS_LEAF: Indicates whether the invalidation applies |
| 761 | * to all-levels page structure cache or just |
| 762 | * the leaf PTE cache. |
| 763 | */ |
| 764 | enum iommu_hwpt_vtd_s1_invalidate_flags { |
| 765 | IOMMU_VTD_INV_FLAGS_LEAF = 1 << 0, |
| 766 | }; |
| 767 | |
| 768 | /** |
| 769 | * struct iommu_hwpt_vtd_s1_invalidate - Intel VT-d cache invalidation |
| 770 | * (IOMMU_HWPT_INVALIDATE_DATA_VTD_S1) |
| 771 | * @addr: The start address of the range to be invalidated. It needs to |
| 772 | * be 4KB aligned. |
| 773 | * @npages: Number of contiguous 4K pages to be invalidated. |
| 774 | * @flags: Combination of enum iommu_hwpt_vtd_s1_invalidate_flags |
| 775 | * @__reserved: Must be 0 |
| 776 | * |
| 777 | * The Intel VT-d specific invalidation data for user-managed stage-1 cache |
| 778 | * invalidation in nested translation. Userspace uses this structure to |
| 779 | * tell the impacted cache scope after modifying the stage-1 page table. |
| 780 | * |
| 781 | * Invalidating all the caches related to the page table by setting @addr |
| 782 | * to be 0 and @npages to be U64_MAX. |
| 783 | * |
| 784 | * The device TLB will be invalidated automatically if ATS is enabled. |
| 785 | */ |
| 786 | struct iommu_hwpt_vtd_s1_invalidate { |
| 787 | __aligned_u64 addr; |
| 788 | __aligned_u64 npages; |
| 789 | __u32 flags; |
| 790 | __u32 __reserved; |
| 791 | }; |
| 792 | |
| 793 | /** |
| 794 | * struct iommu_viommu_arm_smmuv3_invalidate - ARM SMMUv3 cache invalidation |
| 795 | * (IOMMU_VIOMMU_INVALIDATE_DATA_ARM_SMMUV3) |
| 796 | * @cmd: 128-bit cache invalidation command that runs in SMMU CMDQ. |
| 797 | * Must be little-endian. |
| 798 | * |
| 799 | * Supported command list only when passing in a vIOMMU via @hwpt_id: |
| 800 | * CMDQ_OP_TLBI_NSNH_ALL |
| 801 | * CMDQ_OP_TLBI_NH_VA |
| 802 | * CMDQ_OP_TLBI_NH_VAA |
| 803 | * CMDQ_OP_TLBI_NH_ALL |
| 804 | * CMDQ_OP_TLBI_NH_ASID |
| 805 | * CMDQ_OP_ATC_INV |
| 806 | * CMDQ_OP_CFGI_CD |
| 807 | * CMDQ_OP_CFGI_CD_ALL |
| 808 | * |
| 809 | * -EIO will be returned if the command is not supported. |
| 810 | */ |
| 811 | struct iommu_viommu_arm_smmuv3_invalidate { |
| 812 | __aligned_le64 cmd[2]; |
| 813 | }; |
| 814 | |
| 815 | /** |
| 816 | * struct iommu_hwpt_invalidate - ioctl(IOMMU_HWPT_INVALIDATE) |
| 817 | * @size: sizeof(struct iommu_hwpt_invalidate) |
| 818 | * @hwpt_id: ID of a nested HWPT or a vIOMMU, for cache invalidation |
| 819 | * @data_uptr: User pointer to an array of driver-specific cache invalidation |
| 820 | * data. |
| 821 | * @data_type: One of enum iommu_hwpt_invalidate_data_type, defining the data |
| 822 | * type of all the entries in the invalidation request array. It |
| 823 | * should be a type supported by the hwpt pointed by @hwpt_id. |
| 824 | * @entry_len: Length (in bytes) of a request entry in the request array |
| 825 | * @entry_num: Input the number of cache invalidation requests in the array. |
| 826 | * Output the number of requests successfully handled by kernel. |
| 827 | * @__reserved: Must be 0. |
| 828 | * |
| 829 | * Invalidate iommu cache for user-managed page table or vIOMMU. Modifications |
| 830 | * on a user-managed page table should be followed by this operation, if a HWPT |
| 831 | * is passed in via @hwpt_id. Other caches, such as device cache or descriptor |
| 832 | * cache can be flushed if a vIOMMU is passed in via the @hwpt_id field. |
| 833 | * |
| 834 | * Each ioctl can support one or more cache invalidation requests in the array |
| 835 | * that has a total size of @entry_len * @entry_num. |
| 836 | * |
| 837 | * An empty invalidation request array by setting @entry_num==0 is allowed, and |
| 838 | * @entry_len and @data_uptr would be ignored in this case. This can be used to |
| 839 | * check if the given @data_type is supported or not by kernel. |
| 840 | */ |
| 841 | struct iommu_hwpt_invalidate { |
| 842 | __u32 size; |
| 843 | __u32 hwpt_id; |
| 844 | __aligned_u64 data_uptr; |
| 845 | __u32 data_type; |
| 846 | __u32 entry_len; |
| 847 | __u32 entry_num; |
| 848 | __u32 __reserved; |
| 849 | }; |
| 850 | #define IOMMU_HWPT_INVALIDATE _IO(IOMMUFD_TYPE, IOMMUFD_CMD_HWPT_INVALIDATE) |
| 851 | |
| 852 | /** |
| 853 | * enum iommu_hwpt_pgfault_flags - flags for struct iommu_hwpt_pgfault |
| 854 | * @IOMMU_PGFAULT_FLAGS_PASID_VALID: The pasid field of the fault data is |
| 855 | * valid. |
| 856 | * @IOMMU_PGFAULT_FLAGS_LAST_PAGE: It's the last fault of a fault group. |
| 857 | */ |
| 858 | enum iommu_hwpt_pgfault_flags { |
| 859 | IOMMU_PGFAULT_FLAGS_PASID_VALID = (1 << 0), |
| 860 | IOMMU_PGFAULT_FLAGS_LAST_PAGE = (1 << 1), |
| 861 | }; |
| 862 | |
| 863 | /** |
| 864 | * enum iommu_hwpt_pgfault_perm - perm bits for struct iommu_hwpt_pgfault |
| 865 | * @IOMMU_PGFAULT_PERM_READ: request for read permission |
| 866 | * @IOMMU_PGFAULT_PERM_WRITE: request for write permission |
| 867 | * @IOMMU_PGFAULT_PERM_EXEC: (PCIE 10.4.1) request with a PASID that has the |
| 868 | * Execute Requested bit set in PASID TLP Prefix. |
| 869 | * @IOMMU_PGFAULT_PERM_PRIV: (PCIE 10.4.1) request with a PASID that has the |
| 870 | * Privileged Mode Requested bit set in PASID TLP |
| 871 | * Prefix. |
| 872 | */ |
| 873 | enum iommu_hwpt_pgfault_perm { |
| 874 | IOMMU_PGFAULT_PERM_READ = (1 << 0), |
| 875 | IOMMU_PGFAULT_PERM_WRITE = (1 << 1), |
| 876 | IOMMU_PGFAULT_PERM_EXEC = (1 << 2), |
| 877 | IOMMU_PGFAULT_PERM_PRIV = (1 << 3), |
| 878 | }; |
| 879 | |
| 880 | /** |
| 881 | * struct iommu_hwpt_pgfault - iommu page fault data |
| 882 | * @flags: Combination of enum iommu_hwpt_pgfault_flags |
| 883 | * @dev_id: id of the originated device |
| 884 | * @pasid: Process Address Space ID |
| 885 | * @grpid: Page Request Group Index |
| 886 | * @perm: Combination of enum iommu_hwpt_pgfault_perm |
| 887 | * @__reserved: Must be 0. |
| 888 | * @addr: Fault address |
| 889 | * @length: a hint of how much data the requestor is expecting to fetch. For |
| 890 | * example, if the PRI initiator knows it is going to do a 10MB |
| 891 | * transfer, it could fill in 10MB and the OS could pre-fault in |
| 892 | * 10MB of IOVA. It's default to 0 if there's no such hint. |
| 893 | * @cookie: kernel-managed cookie identifying a group of fault messages. The |
| 894 | * cookie number encoded in the last page fault of the group should |
| 895 | * be echoed back in the response message. |
| 896 | */ |
| 897 | struct iommu_hwpt_pgfault { |
| 898 | __u32 flags; |
| 899 | __u32 dev_id; |
| 900 | __u32 pasid; |
| 901 | __u32 grpid; |
| 902 | __u32 perm; |
| 903 | __u32 __reserved; |
| 904 | __aligned_u64 addr; |
| 905 | __u32 length; |
| 906 | __u32 cookie; |
| 907 | }; |
| 908 | |
| 909 | /** |
| 910 | * enum iommufd_page_response_code - Return status of fault handlers |
| 911 | * @IOMMUFD_PAGE_RESP_SUCCESS: Fault has been handled and the page tables |
| 912 | * populated, retry the access. This is the |
| 913 | * "Success" defined in PCI 10.4.2.1. |
| 914 | * @IOMMUFD_PAGE_RESP_INVALID: Could not handle this fault, don't retry the |
| 915 | * access. This is the "Invalid Request" in PCI |
| 916 | * 10.4.2.1. |
| 917 | */ |
| 918 | enum iommufd_page_response_code { |
| 919 | IOMMUFD_PAGE_RESP_SUCCESS = 0, |
| 920 | IOMMUFD_PAGE_RESP_INVALID = 1, |
| 921 | }; |
| 922 | |
| 923 | /** |
| 924 | * struct iommu_hwpt_page_response - IOMMU page fault response |
| 925 | * @cookie: The kernel-managed cookie reported in the fault message. |
| 926 | * @code: One of response code in enum iommufd_page_response_code. |
| 927 | */ |
| 928 | struct iommu_hwpt_page_response { |
| 929 | __u32 cookie; |
| 930 | __u32 code; |
| 931 | }; |
| 932 | |
| 933 | /** |
| 934 | * struct iommu_fault_alloc - ioctl(IOMMU_FAULT_QUEUE_ALLOC) |
| 935 | * @size: sizeof(struct iommu_fault_alloc) |
| 936 | * @flags: Must be 0 |
| 937 | * @out_fault_id: The ID of the new FAULT |
| 938 | * @out_fault_fd: The fd of the new FAULT |
| 939 | * |
| 940 | * Explicitly allocate a fault handling object. |
| 941 | */ |
| 942 | struct iommu_fault_alloc { |
| 943 | __u32 size; |
| 944 | __u32 flags; |
| 945 | __u32 out_fault_id; |
| 946 | __u32 out_fault_fd; |
| 947 | }; |
| 948 | #define IOMMU_FAULT_QUEUE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_FAULT_QUEUE_ALLOC) |
| 949 | |
| 950 | /** |
| 951 | * enum iommu_viommu_type - Virtual IOMMU Type |
| 952 | * @IOMMU_VIOMMU_TYPE_DEFAULT: Reserved for future use |
| 953 | * @IOMMU_VIOMMU_TYPE_ARM_SMMUV3: ARM SMMUv3 driver specific type |
| 954 | */ |
| 955 | enum iommu_viommu_type { |
| 956 | IOMMU_VIOMMU_TYPE_DEFAULT = 0, |
| 957 | IOMMU_VIOMMU_TYPE_ARM_SMMUV3 = 1, |
| 958 | }; |
| 959 | |
| 960 | /** |
| 961 | * struct iommu_viommu_alloc - ioctl(IOMMU_VIOMMU_ALLOC) |
| 962 | * @size: sizeof(struct iommu_viommu_alloc) |
| 963 | * @flags: Must be 0 |
| 964 | * @type: Type of the virtual IOMMU. Must be defined in enum iommu_viommu_type |
| 965 | * @dev_id: The device's physical IOMMU will be used to back the virtual IOMMU |
| 966 | * @hwpt_id: ID of a nesting parent HWPT to associate to |
| 967 | * @out_viommu_id: Output virtual IOMMU ID for the allocated object |
| 968 | * |
| 969 | * Allocate a virtual IOMMU object, representing the underlying physical IOMMU's |
| 970 | * virtualization support that is a security-isolated slice of the real IOMMU HW |
| 971 | * that is unique to a specific VM. Operations global to the IOMMU are connected |
| 972 | * to the vIOMMU, such as: |
| 973 | * - Security namespace for guest owned ID, e.g. guest-controlled cache tags |
| 974 | * - Non-device-affiliated event reporting, e.g. invalidation queue errors |
| 975 | * - Access to a sharable nesting parent pagetable across physical IOMMUs |
| 976 | * - Virtualization of various platforms IDs, e.g. RIDs and others |
| 977 | * - Delivery of paravirtualized invalidation |
| 978 | * - Direct assigned invalidation queues |
| 979 | * - Direct assigned interrupts |
| 980 | */ |
| 981 | struct iommu_viommu_alloc { |
| 982 | __u32 size; |
| 983 | __u32 flags; |
| 984 | __u32 type; |
| 985 | __u32 dev_id; |
| 986 | __u32 hwpt_id; |
| 987 | __u32 out_viommu_id; |
| 988 | }; |
| 989 | #define IOMMU_VIOMMU_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VIOMMU_ALLOC) |
| 990 | |
| 991 | /** |
| 992 | * struct iommu_vdevice_alloc - ioctl(IOMMU_VDEVICE_ALLOC) |
| 993 | * @size: sizeof(struct iommu_vdevice_alloc) |
| 994 | * @viommu_id: vIOMMU ID to associate with the virtual device |
| 995 | * @dev_id: The physical device to allocate a virtual instance on the vIOMMU |
| 996 | * @out_vdevice_id: Object handle for the vDevice. Pass to IOMMU_DESTORY |
| 997 | * @virt_id: Virtual device ID per vIOMMU, e.g. vSID of ARM SMMUv3, vDeviceID |
| 998 | * of AMD IOMMU, and vRID of a nested Intel VT-d to a Context Table |
| 999 | * |
| 1000 | * Allocate a virtual device instance (for a physical device) against a vIOMMU. |
| 1001 | * This instance holds the device's information (related to its vIOMMU) in a VM. |
| 1002 | */ |
| 1003 | struct iommu_vdevice_alloc { |
| 1004 | __u32 size; |
| 1005 | __u32 viommu_id; |
| 1006 | __u32 dev_id; |
| 1007 | __u32 out_vdevice_id; |
| 1008 | __aligned_u64 virt_id; |
| 1009 | }; |
| 1010 | #define IOMMU_VDEVICE_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VDEVICE_ALLOC) |
| 1011 | |
| 1012 | /** |
| 1013 | * struct iommu_ioas_change_process - ioctl(VFIO_IOAS_CHANGE_PROCESS) |
| 1014 | * @size: sizeof(struct iommu_ioas_change_process) |
| 1015 | * @__reserved: Must be 0 |
| 1016 | * |
| 1017 | * This transfers pinned memory counts for every memory map in every IOAS |
| 1018 | * in the context to the current process. This only supports maps created |
| 1019 | * with IOMMU_IOAS_MAP_FILE, and returns EINVAL if other maps are present. |
| 1020 | * If the ioctl returns a failure status, then nothing is changed. |
| 1021 | * |
| 1022 | * This API is useful for transferring operation of a device from one process |
| 1023 | * to another, such as during userland live update. |
| 1024 | */ |
| 1025 | struct iommu_ioas_change_process { |
| 1026 | __u32 size; |
| 1027 | __u32 __reserved; |
| 1028 | }; |
| 1029 | |
| 1030 | #define IOMMU_IOAS_CHANGE_PROCESS \ |
| 1031 | _IO(IOMMUFD_TYPE, IOMMUFD_CMD_IOAS_CHANGE_PROCESS) |
| 1032 | |
| 1033 | /** |
| 1034 | * enum iommu_veventq_flag - flag for struct iommufd_vevent_header |
| 1035 | * @IOMMU_VEVENTQ_FLAG_LOST_EVENTS: vEVENTQ has lost vEVENTs |
| 1036 | */ |
| 1037 | enum iommu_veventq_flag { |
| 1038 | IOMMU_VEVENTQ_FLAG_LOST_EVENTS = (1U << 0), |
| 1039 | }; |
| 1040 | |
| 1041 | /** |
| 1042 | * struct iommufd_vevent_header - Virtual Event Header for a vEVENTQ Status |
| 1043 | * @flags: Combination of enum iommu_veventq_flag |
| 1044 | * @sequence: The sequence index of a vEVENT in the vEVENTQ, with a range of |
| 1045 | * [0, INT_MAX] where the following index of INT_MAX is 0 |
| 1046 | * |
| 1047 | * Each iommufd_vevent_header reports a sequence index of the following vEVENT: |
| 1048 | * |
| 1049 | * +----------------------+-------+----------------------+-------+---+-------+ |
| 1050 | * | header0 {sequence=0} | data0 | header1 {sequence=1} | data1 |...| dataN | |
| 1051 | * +----------------------+-------+----------------------+-------+---+-------+ |
| 1052 | * |
| 1053 | * And this sequence index is expected to be monotonic to the sequence index of |
| 1054 | * the previous vEVENT. If two adjacent sequence indexes has a delta larger than |
| 1055 | * 1, it means that delta - 1 number of vEVENTs has lost, e.g. two lost vEVENTs: |
| 1056 | * |
| 1057 | * +-----+----------------------+-------+----------------------+-------+-----+ |
| 1058 | * | ... | header3 {sequence=3} | data3 | header6 {sequence=6} | data6 | ... | |
| 1059 | * +-----+----------------------+-------+----------------------+-------+-----+ |
| 1060 | * |
| 1061 | * If a vEVENT lost at the tail of the vEVENTQ and there is no following vEVENT |
| 1062 | * providing the next sequence index, an IOMMU_VEVENTQ_FLAG_LOST_EVENTS header |
| 1063 | * would be added to the tail, and no data would follow this header: |
| 1064 | * |
| 1065 | * +--+----------------------+-------+-----------------------------------------+ |
| 1066 | * |..| header3 {sequence=3} | data3 | header4 {flags=LOST_EVENTS, sequence=4} | |
| 1067 | * +--+----------------------+-------+-----------------------------------------+ |
| 1068 | */ |
| 1069 | struct { |
| 1070 | __u32 ; |
| 1071 | __u32 ; |
| 1072 | }; |
| 1073 | |
| 1074 | /** |
| 1075 | * enum iommu_veventq_type - Virtual Event Queue Type |
| 1076 | * @IOMMU_VEVENTQ_TYPE_DEFAULT: Reserved for future use |
| 1077 | * @IOMMU_VEVENTQ_TYPE_ARM_SMMUV3: ARM SMMUv3 Virtual Event Queue |
| 1078 | */ |
| 1079 | enum iommu_veventq_type { |
| 1080 | IOMMU_VEVENTQ_TYPE_DEFAULT = 0, |
| 1081 | IOMMU_VEVENTQ_TYPE_ARM_SMMUV3 = 1, |
| 1082 | }; |
| 1083 | |
| 1084 | /** |
| 1085 | * struct iommu_vevent_arm_smmuv3 - ARM SMMUv3 Virtual Event |
| 1086 | * (IOMMU_VEVENTQ_TYPE_ARM_SMMUV3) |
| 1087 | * @evt: 256-bit ARM SMMUv3 Event record, little-endian. |
| 1088 | * Reported event records: (Refer to "7.3 Event records" in SMMUv3 HW Spec) |
| 1089 | * - 0x04 C_BAD_STE |
| 1090 | * - 0x06 F_STREAM_DISABLED |
| 1091 | * - 0x08 C_BAD_SUBSTREAMID |
| 1092 | * - 0x0a C_BAD_CD |
| 1093 | * - 0x10 F_TRANSLATION |
| 1094 | * - 0x11 F_ADDR_SIZE |
| 1095 | * - 0x12 F_ACCESS |
| 1096 | * - 0x13 F_PERMISSION |
| 1097 | * |
| 1098 | * StreamID field reports a virtual device ID. To receive a virtual event for a |
| 1099 | * device, a vDEVICE must be allocated via IOMMU_VDEVICE_ALLOC. |
| 1100 | */ |
| 1101 | struct iommu_vevent_arm_smmuv3 { |
| 1102 | __aligned_le64 evt[4]; |
| 1103 | }; |
| 1104 | |
| 1105 | /** |
| 1106 | * struct iommu_veventq_alloc - ioctl(IOMMU_VEVENTQ_ALLOC) |
| 1107 | * @size: sizeof(struct iommu_veventq_alloc) |
| 1108 | * @flags: Must be 0 |
| 1109 | * @viommu_id: virtual IOMMU ID to associate the vEVENTQ with |
| 1110 | * @type: Type of the vEVENTQ. Must be defined in enum iommu_veventq_type |
| 1111 | * @veventq_depth: Maximum number of events in the vEVENTQ |
| 1112 | * @out_veventq_id: The ID of the new vEVENTQ |
| 1113 | * @out_veventq_fd: The fd of the new vEVENTQ. User space must close the |
| 1114 | * successfully returned fd after using it |
| 1115 | * @__reserved: Must be 0 |
| 1116 | * |
| 1117 | * Explicitly allocate a virtual event queue interface for a vIOMMU. A vIOMMU |
| 1118 | * can have multiple FDs for different types, but is confined to one per @type. |
| 1119 | * User space should open the @out_veventq_fd to read vEVENTs out of a vEVENTQ, |
| 1120 | * if there are vEVENTs available. A vEVENTQ will lose events due to overflow, |
| 1121 | * if the number of the vEVENTs hits @veventq_depth. |
| 1122 | * |
| 1123 | * Each vEVENT in a vEVENTQ encloses a struct iommufd_vevent_header followed by |
| 1124 | * a type-specific data structure, in a normal case: |
| 1125 | * |
| 1126 | * +-+---------+-------+---------+-------+-----+---------+-------+-+ |
| 1127 | * | | header0 | data0 | header1 | data1 | ... | headerN | dataN | | |
| 1128 | * +-+---------+-------+---------+-------+-----+---------+-------+-+ |
| 1129 | * |
| 1130 | * unless a tailing IOMMU_VEVENTQ_FLAG_LOST_EVENTS header is logged (refer to |
| 1131 | * struct iommufd_vevent_header). |
| 1132 | */ |
| 1133 | struct iommu_veventq_alloc { |
| 1134 | __u32 size; |
| 1135 | __u32 flags; |
| 1136 | __u32 viommu_id; |
| 1137 | __u32 type; |
| 1138 | __u32 veventq_depth; |
| 1139 | __u32 out_veventq_id; |
| 1140 | __u32 out_veventq_fd; |
| 1141 | __u32 __reserved; |
| 1142 | }; |
| 1143 | #define IOMMU_VEVENTQ_ALLOC _IO(IOMMUFD_TYPE, IOMMUFD_CMD_VEVENTQ_ALLOC) |
| 1144 | #endif |
| 1145 | |