| 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
| 2 | #ifndef _LINUX_NET_QUEUES_H |
| 3 | #define _LINUX_NET_QUEUES_H |
| 4 | |
| 5 | #include <linux/netdevice.h> |
| 6 | |
| 7 | /** |
| 8 | * struct netdev_config - queue-related configuration for a netdev |
| 9 | * @hds_thresh: HDS Threshold value. |
| 10 | * @hds_config: HDS value from userspace. |
| 11 | */ |
| 12 | struct netdev_config { |
| 13 | u32 hds_thresh; |
| 14 | u8 hds_config; |
| 15 | }; |
| 16 | |
| 17 | /* See the netdev.yaml spec for definition of each statistic */ |
| 18 | struct netdev_queue_stats_rx { |
| 19 | u64 bytes; |
| 20 | u64 packets; |
| 21 | u64 alloc_fail; |
| 22 | |
| 23 | u64 hw_drops; |
| 24 | u64 hw_drop_overruns; |
| 25 | |
| 26 | u64 csum_complete; |
| 27 | u64 csum_unnecessary; |
| 28 | u64 csum_none; |
| 29 | u64 csum_bad; |
| 30 | |
| 31 | u64 hw_gro_packets; |
| 32 | u64 hw_gro_bytes; |
| 33 | u64 hw_gro_wire_packets; |
| 34 | u64 hw_gro_wire_bytes; |
| 35 | |
| 36 | u64 hw_drop_ratelimits; |
| 37 | }; |
| 38 | |
| 39 | struct netdev_queue_stats_tx { |
| 40 | u64 bytes; |
| 41 | u64 packets; |
| 42 | |
| 43 | u64 hw_drops; |
| 44 | u64 hw_drop_errors; |
| 45 | |
| 46 | u64 csum_none; |
| 47 | u64 needs_csum; |
| 48 | |
| 49 | u64 hw_gso_packets; |
| 50 | u64 hw_gso_bytes; |
| 51 | u64 hw_gso_wire_packets; |
| 52 | u64 hw_gso_wire_bytes; |
| 53 | |
| 54 | u64 hw_drop_ratelimits; |
| 55 | |
| 56 | u64 stop; |
| 57 | u64 wake; |
| 58 | }; |
| 59 | |
| 60 | /** |
| 61 | * struct netdev_stat_ops - netdev ops for fine grained stats |
| 62 | * @get_queue_stats_rx: get stats for a given Rx queue |
| 63 | * @get_queue_stats_tx: get stats for a given Tx queue |
| 64 | * @get_base_stats: get base stats (not belonging to any live instance) |
| 65 | * |
| 66 | * Query stats for a given object. The values of the statistics are undefined |
| 67 | * on entry (specifically they are *not* zero-initialized). Drivers should |
| 68 | * assign values only to the statistics they collect. Statistics which are not |
| 69 | * collected must be left undefined. |
| 70 | * |
| 71 | * Queue objects are not necessarily persistent, and only currently active |
| 72 | * queues are queried by the per-queue callbacks. This means that per-queue |
| 73 | * statistics will not generally add up to the total number of events for |
| 74 | * the device. The @get_base_stats callback allows filling in the delta |
| 75 | * between events for currently live queues and overall device history. |
| 76 | * @get_base_stats can also be used to report any miscellaneous packets |
| 77 | * transferred outside of the main set of queues used by the networking stack. |
| 78 | * When the statistics for the entire device are queried, first @get_base_stats |
| 79 | * is issued to collect the delta, and then a series of per-queue callbacks. |
| 80 | * Only statistics which are set in @get_base_stats will be reported |
| 81 | * at the device level, meaning that unlike in queue callbacks, setting |
| 82 | * a statistic to zero in @get_base_stats is a legitimate thing to do. |
| 83 | * This is because @get_base_stats has a second function of designating which |
| 84 | * statistics are in fact correct for the entire device (e.g. when history |
| 85 | * for some of the events is not maintained, and reliable "total" cannot |
| 86 | * be provided). |
| 87 | * |
| 88 | * Ops are called under the instance lock if netdev_need_ops_lock() |
| 89 | * returns true, otherwise under rtnl_lock. |
| 90 | * Device drivers can assume that when collecting total device stats, |
| 91 | * the @get_base_stats and subsequent per-queue calls are performed |
| 92 | * "atomically" (without releasing the relevant lock). |
| 93 | * |
| 94 | * Device drivers are encouraged to reset the per-queue statistics when |
| 95 | * number of queues change. This is because the primary use case for |
| 96 | * per-queue statistics is currently to detect traffic imbalance. |
| 97 | */ |
| 98 | struct netdev_stat_ops { |
| 99 | void (*get_queue_stats_rx)(struct net_device *dev, int idx, |
| 100 | struct netdev_queue_stats_rx *stats); |
| 101 | void (*get_queue_stats_tx)(struct net_device *dev, int idx, |
| 102 | struct netdev_queue_stats_tx *stats); |
| 103 | void (*get_base_stats)(struct net_device *dev, |
| 104 | struct netdev_queue_stats_rx *rx, |
| 105 | struct netdev_queue_stats_tx *tx); |
| 106 | }; |
| 107 | |
| 108 | void netdev_stat_queue_sum(struct net_device *netdev, |
| 109 | int rx_start, int rx_end, |
| 110 | struct netdev_queue_stats_rx *rx_sum, |
| 111 | int tx_start, int tx_end, |
| 112 | struct netdev_queue_stats_tx *tx_sum); |
| 113 | |
| 114 | /** |
| 115 | * struct netdev_queue_mgmt_ops - netdev ops for queue management |
| 116 | * |
| 117 | * @ndo_queue_mem_size: Size of the struct that describes a queue's memory. |
| 118 | * |
| 119 | * @ndo_queue_mem_alloc: Allocate memory for an RX queue at the specified index. |
| 120 | * The new memory is written at the specified address. |
| 121 | * |
| 122 | * @ndo_queue_mem_free: Free memory from an RX queue. |
| 123 | * |
| 124 | * @ndo_queue_start: Start an RX queue with the specified memory and at the |
| 125 | * specified index. |
| 126 | * |
| 127 | * @ndo_queue_stop: Stop the RX queue at the specified index. The stopped |
| 128 | * queue's memory is written at the specified address. |
| 129 | * |
| 130 | * Note that @ndo_queue_mem_alloc and @ndo_queue_mem_free may be called while |
| 131 | * the interface is closed. @ndo_queue_start and @ndo_queue_stop will only |
| 132 | * be called for an interface which is open. |
| 133 | */ |
| 134 | struct netdev_queue_mgmt_ops { |
| 135 | size_t ndo_queue_mem_size; |
| 136 | int (*ndo_queue_mem_alloc)(struct net_device *dev, |
| 137 | void *per_queue_mem, |
| 138 | int idx); |
| 139 | void (*ndo_queue_mem_free)(struct net_device *dev, |
| 140 | void *per_queue_mem); |
| 141 | int (*ndo_queue_start)(struct net_device *dev, |
| 142 | void *per_queue_mem, |
| 143 | int idx); |
| 144 | int (*ndo_queue_stop)(struct net_device *dev, |
| 145 | void *per_queue_mem, |
| 146 | int idx); |
| 147 | }; |
| 148 | |
| 149 | /** |
| 150 | * DOC: Lockless queue stopping / waking helpers. |
| 151 | * |
| 152 | * The netif_txq_maybe_stop() and __netif_txq_completed_wake() |
| 153 | * macros are designed to safely implement stopping |
| 154 | * and waking netdev queues without full lock protection. |
| 155 | * |
| 156 | * We assume that there can be no concurrent stop attempts and no concurrent |
| 157 | * wake attempts. The try-stop should happen from the xmit handler, |
| 158 | * while wake up should be triggered from NAPI poll context. |
| 159 | * The two may run concurrently (single producer, single consumer). |
| 160 | * |
| 161 | * The try-stop side is expected to run from the xmit handler and therefore |
| 162 | * it does not reschedule Tx (netif_tx_start_queue() instead of |
| 163 | * netif_tx_wake_queue()). Uses of the ``stop`` macros outside of the xmit |
| 164 | * handler may lead to xmit queue being enabled but not run. |
| 165 | * The waking side does not have similar context restrictions. |
| 166 | * |
| 167 | * The macros guarantee that rings will not remain stopped if there's |
| 168 | * space available, but they do *not* prevent false wake ups when |
| 169 | * the ring is full! Drivers should check for ring full at the start |
| 170 | * for the xmit handler. |
| 171 | * |
| 172 | * All descriptor ring indexes (and other relevant shared state) must |
| 173 | * be updated before invoking the macros. |
| 174 | */ |
| 175 | |
| 176 | #define netif_txq_try_stop(txq, get_desc, start_thrs) \ |
| 177 | ({ \ |
| 178 | int _res; \ |
| 179 | \ |
| 180 | netif_tx_stop_queue(txq); \ |
| 181 | /* Producer index and stop bit must be visible \ |
| 182 | * to consumer before we recheck. \ |
| 183 | * Pairs with a barrier in __netif_txq_completed_wake(). \ |
| 184 | */ \ |
| 185 | smp_mb__after_atomic(); \ |
| 186 | \ |
| 187 | /* We need to check again in a case another \ |
| 188 | * CPU has just made room available. \ |
| 189 | */ \ |
| 190 | _res = 0; \ |
| 191 | if (unlikely(get_desc >= start_thrs)) { \ |
| 192 | netif_tx_start_queue(txq); \ |
| 193 | _res = -1; \ |
| 194 | } \ |
| 195 | _res; \ |
| 196 | }) \ |
| 197 | |
| 198 | /** |
| 199 | * netif_txq_maybe_stop() - locklessly stop a Tx queue, if needed |
| 200 | * @txq: struct netdev_queue to stop/start |
| 201 | * @get_desc: get current number of free descriptors (see requirements below!) |
| 202 | * @stop_thrs: minimal number of available descriptors for queue to be left |
| 203 | * enabled |
| 204 | * @start_thrs: minimal number of descriptors to re-enable the queue, can be |
| 205 | * equal to @stop_thrs or higher to avoid frequent waking |
| 206 | * |
| 207 | * All arguments may be evaluated multiple times, beware of side effects. |
| 208 | * @get_desc must be a formula or a function call, it must always |
| 209 | * return up-to-date information when evaluated! |
| 210 | * Expected to be used from ndo_start_xmit, see the comment on top of the file. |
| 211 | * |
| 212 | * Returns: |
| 213 | * 0 if the queue was stopped |
| 214 | * 1 if the queue was left enabled |
| 215 | * -1 if the queue was re-enabled (raced with waking) |
| 216 | */ |
| 217 | #define netif_txq_maybe_stop(txq, get_desc, stop_thrs, start_thrs) \ |
| 218 | ({ \ |
| 219 | int _res; \ |
| 220 | \ |
| 221 | _res = 1; \ |
| 222 | if (unlikely(get_desc < stop_thrs)) \ |
| 223 | _res = netif_txq_try_stop(txq, get_desc, start_thrs); \ |
| 224 | _res; \ |
| 225 | }) \ |
| 226 | |
| 227 | /* Variant of netdev_tx_completed_queue() which guarantees smp_mb() if |
| 228 | * @bytes != 0, regardless of kernel config. |
| 229 | */ |
| 230 | static inline void |
| 231 | netdev_txq_completed_mb(struct netdev_queue *dev_queue, |
| 232 | unsigned int pkts, unsigned int bytes) |
| 233 | { |
| 234 | if (IS_ENABLED(CONFIG_BQL)) |
| 235 | netdev_tx_completed_queue(dev_queue, pkts, bytes); |
| 236 | else if (bytes) |
| 237 | smp_mb(); |
| 238 | } |
| 239 | |
| 240 | /** |
| 241 | * __netif_txq_completed_wake() - locklessly wake a Tx queue, if needed |
| 242 | * @txq: struct netdev_queue to stop/start |
| 243 | * @pkts: number of packets completed |
| 244 | * @bytes: number of bytes completed |
| 245 | * @get_desc: get current number of free descriptors (see requirements below!) |
| 246 | * @start_thrs: minimal number of descriptors to re-enable the queue |
| 247 | * @down_cond: down condition, predicate indicating that the queue should |
| 248 | * not be woken up even if descriptors are available |
| 249 | * |
| 250 | * All arguments may be evaluated multiple times. |
| 251 | * @get_desc must be a formula or a function call, it must always |
| 252 | * return up-to-date information when evaluated! |
| 253 | * Reports completed pkts/bytes to BQL. |
| 254 | * |
| 255 | * Returns: |
| 256 | * 0 if the queue was woken up |
| 257 | * 1 if the queue was already enabled (or disabled but @down_cond is true) |
| 258 | * -1 if the queue was left unchanged (@start_thrs not reached) |
| 259 | */ |
| 260 | #define __netif_txq_completed_wake(txq, pkts, bytes, \ |
| 261 | get_desc, start_thrs, down_cond) \ |
| 262 | ({ \ |
| 263 | int _res; \ |
| 264 | \ |
| 265 | /* Report to BQL and piggy back on its barrier. \ |
| 266 | * Barrier makes sure that anybody stopping the queue \ |
| 267 | * after this point sees the new consumer index. \ |
| 268 | * Pairs with barrier in netif_txq_try_stop(). \ |
| 269 | */ \ |
| 270 | netdev_txq_completed_mb(txq, pkts, bytes); \ |
| 271 | \ |
| 272 | _res = -1; \ |
| 273 | if (pkts && likely(get_desc >= start_thrs)) { \ |
| 274 | _res = 1; \ |
| 275 | if (unlikely(netif_tx_queue_stopped(txq)) && \ |
| 276 | !(down_cond)) { \ |
| 277 | netif_tx_wake_queue(txq); \ |
| 278 | _res = 0; \ |
| 279 | } \ |
| 280 | } \ |
| 281 | _res; \ |
| 282 | }) |
| 283 | |
| 284 | #define netif_txq_completed_wake(txq, pkts, bytes, get_desc, start_thrs) \ |
| 285 | __netif_txq_completed_wake(txq, pkts, bytes, get_desc, start_thrs, false) |
| 286 | |
| 287 | /* subqueue variants follow */ |
| 288 | |
| 289 | #define netif_subqueue_try_stop(dev, idx, get_desc, start_thrs) \ |
| 290 | ({ \ |
| 291 | struct netdev_queue *_txq; \ |
| 292 | \ |
| 293 | _txq = netdev_get_tx_queue(dev, idx); \ |
| 294 | netif_txq_try_stop(_txq, get_desc, start_thrs); \ |
| 295 | }) |
| 296 | |
| 297 | #define netif_subqueue_maybe_stop(dev, idx, get_desc, stop_thrs, start_thrs) \ |
| 298 | ({ \ |
| 299 | struct netdev_queue *_txq; \ |
| 300 | \ |
| 301 | _txq = netdev_get_tx_queue(dev, idx); \ |
| 302 | netif_txq_maybe_stop(_txq, get_desc, stop_thrs, start_thrs); \ |
| 303 | }) |
| 304 | |
| 305 | #define netif_subqueue_completed_wake(dev, idx, pkts, bytes, \ |
| 306 | get_desc, start_thrs) \ |
| 307 | ({ \ |
| 308 | struct netdev_queue *_txq; \ |
| 309 | \ |
| 310 | _txq = netdev_get_tx_queue(dev, idx); \ |
| 311 | netif_txq_completed_wake(_txq, pkts, bytes, \ |
| 312 | get_desc, start_thrs); \ |
| 313 | }) |
| 314 | |
| 315 | #endif |
| 316 | |