| 1 | /* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ |
| 2 | /* Copyright (c) 2019 Mellanox Technologies. */ |
| 3 | |
| 4 | #ifndef DIM_H |
| 5 | #define DIM_H |
| 6 | |
| 7 | #include <linux/bits.h> |
| 8 | #include <linux/kernel.h> |
| 9 | #include <linux/module.h> |
| 10 | #include <linux/types.h> |
| 11 | #include <linux/workqueue.h> |
| 12 | |
| 13 | struct net_device; |
| 14 | |
| 15 | /* Number of DIM profiles and period mode. */ |
| 16 | #define NET_DIM_PARAMS_NUM_PROFILES 5 |
| 17 | #define NET_DIM_DEFAULT_RX_CQ_PKTS_FROM_EQE 256 |
| 18 | #define NET_DIM_DEFAULT_TX_CQ_PKTS_FROM_EQE 128 |
| 19 | #define NET_DIM_DEF_PROFILE_CQE 1 |
| 20 | #define NET_DIM_DEF_PROFILE_EQE 1 |
| 21 | |
| 22 | /* |
| 23 | * Number of events between DIM iterations. |
| 24 | * Causes a moderation of the algorithm run. |
| 25 | */ |
| 26 | #define DIM_NEVENTS 64 |
| 27 | |
| 28 | /* |
| 29 | * Is a difference between values justifies taking an action. |
| 30 | * We consider 10% difference as significant. |
| 31 | */ |
| 32 | #define IS_SIGNIFICANT_DIFF(val, ref) \ |
| 33 | ((ref) && (((100UL * abs((val) - (ref))) / (ref)) > 10)) |
| 34 | |
| 35 | /* |
| 36 | * Calculate the gap between two values. |
| 37 | * Take wrap-around and variable size into consideration. |
| 38 | */ |
| 39 | #define BIT_GAP(bits, end, start) ((((end) - (start)) + BIT_ULL(bits)) \ |
| 40 | & (BIT_ULL(bits) - 1)) |
| 41 | |
| 42 | /** |
| 43 | * struct dim_cq_moder - Structure for CQ moderation values. |
| 44 | * Used for communications between DIM and its consumer. |
| 45 | * |
| 46 | * @usec: CQ timer suggestion (by DIM) |
| 47 | * @pkts: CQ packet counter suggestion (by DIM) |
| 48 | * @comps: Completion counter |
| 49 | * @cq_period_mode: CQ period count mode (from CQE/EQE) |
| 50 | * @rcu: for asynchronous kfree_rcu |
| 51 | */ |
| 52 | struct dim_cq_moder { |
| 53 | u16 usec; |
| 54 | u16 pkts; |
| 55 | u16 comps; |
| 56 | u8 cq_period_mode; |
| 57 | struct rcu_head rcu; |
| 58 | }; |
| 59 | |
| 60 | #define DIM_PROFILE_RX BIT(0) /* support rx profile modification */ |
| 61 | #define DIM_PROFILE_TX BIT(1) /* support tx profile modification */ |
| 62 | |
| 63 | #define DIM_COALESCE_USEC BIT(0) /* support usec field modification */ |
| 64 | #define DIM_COALESCE_PKTS BIT(1) /* support pkts field modification */ |
| 65 | #define DIM_COALESCE_COMPS BIT(2) /* support comps field modification */ |
| 66 | |
| 67 | /** |
| 68 | * struct dim_irq_moder - Structure for irq moderation information. |
| 69 | * Used to collect irq moderation related information. |
| 70 | * |
| 71 | * @profile_flags: DIM_PROFILE_* |
| 72 | * @coal_flags: DIM_COALESCE_* for Rx and Tx |
| 73 | * @dim_rx_mode: Rx DIM period count mode: CQE or EQE |
| 74 | * @dim_tx_mode: Tx DIM period count mode: CQE or EQE |
| 75 | * @rx_profile: DIM profile list for Rx |
| 76 | * @tx_profile: DIM profile list for Tx |
| 77 | * @rx_dim_work: Rx DIM worker scheduled by net_dim() |
| 78 | * @tx_dim_work: Tx DIM worker scheduled by net_dim() |
| 79 | */ |
| 80 | struct dim_irq_moder { |
| 81 | u8 profile_flags; |
| 82 | u8 coal_flags; |
| 83 | u8 dim_rx_mode; |
| 84 | u8 dim_tx_mode; |
| 85 | struct dim_cq_moder __rcu *rx_profile; |
| 86 | struct dim_cq_moder __rcu *tx_profile; |
| 87 | void (*rx_dim_work)(struct work_struct *work); |
| 88 | void (*tx_dim_work)(struct work_struct *work); |
| 89 | }; |
| 90 | |
| 91 | /** |
| 92 | * struct dim_sample - Structure for DIM sample data. |
| 93 | * Used for communications between DIM and its consumer. |
| 94 | * |
| 95 | * @time: Sample timestamp |
| 96 | * @pkt_ctr: Number of packets |
| 97 | * @byte_ctr: Number of bytes |
| 98 | * @event_ctr: Number of events |
| 99 | * @comp_ctr: Current completion counter |
| 100 | */ |
| 101 | struct dim_sample { |
| 102 | ktime_t time; |
| 103 | u32 pkt_ctr; |
| 104 | u32 byte_ctr; |
| 105 | u16 event_ctr; |
| 106 | u32 comp_ctr; |
| 107 | }; |
| 108 | |
| 109 | /** |
| 110 | * struct dim_stats - Structure for DIM stats. |
| 111 | * Used for holding current measured rates. |
| 112 | * |
| 113 | * @ppms: Packets per msec |
| 114 | * @bpms: Bytes per msec |
| 115 | * @epms: Events per msec |
| 116 | * @cpms: Completions per msec |
| 117 | * @cpe_ratio: Ratio of completions to events |
| 118 | */ |
| 119 | struct dim_stats { |
| 120 | int ppms; /* packets per msec */ |
| 121 | int bpms; /* bytes per msec */ |
| 122 | int epms; /* events per msec */ |
| 123 | int cpms; /* completions per msec */ |
| 124 | int cpe_ratio; /* ratio of completions to events */ |
| 125 | }; |
| 126 | |
| 127 | /** |
| 128 | * struct dim - Main structure for dynamic interrupt moderation (DIM). |
| 129 | * Used for holding all information about a specific DIM instance. |
| 130 | * |
| 131 | * @state: Algorithm state (see below) |
| 132 | * @prev_stats: Measured rates from previous iteration (for comparison) |
| 133 | * @start_sample: Sampled data at start of current iteration |
| 134 | * @measuring_sample: A &dim_sample that is used to update the current events |
| 135 | * @work: Work to perform on action required |
| 136 | * @priv: A pointer to the struct that points to dim |
| 137 | * @profile_ix: Current moderation profile |
| 138 | * @mode: CQ period count mode |
| 139 | * @tune_state: Algorithm tuning state (see below) |
| 140 | * @steps_right: Number of steps taken towards higher moderation |
| 141 | * @steps_left: Number of steps taken towards lower moderation |
| 142 | * @tired: Parking depth counter |
| 143 | */ |
| 144 | struct dim { |
| 145 | u8 state; |
| 146 | struct dim_stats prev_stats; |
| 147 | struct dim_sample start_sample; |
| 148 | struct dim_sample measuring_sample; |
| 149 | struct work_struct work; |
| 150 | void *priv; |
| 151 | u8 profile_ix; |
| 152 | u8 mode; |
| 153 | u8 tune_state; |
| 154 | u8 steps_right; |
| 155 | u8 steps_left; |
| 156 | u8 tired; |
| 157 | }; |
| 158 | |
| 159 | /** |
| 160 | * enum dim_cq_period_mode - Modes for CQ period count |
| 161 | * |
| 162 | * @DIM_CQ_PERIOD_MODE_START_FROM_EQE: Start counting from EQE |
| 163 | * @DIM_CQ_PERIOD_MODE_START_FROM_CQE: Start counting from CQE (implies timer reset) |
| 164 | * @DIM_CQ_PERIOD_NUM_MODES: Number of modes |
| 165 | */ |
| 166 | enum dim_cq_period_mode { |
| 167 | DIM_CQ_PERIOD_MODE_START_FROM_EQE = 0x0, |
| 168 | DIM_CQ_PERIOD_MODE_START_FROM_CQE = 0x1, |
| 169 | DIM_CQ_PERIOD_NUM_MODES |
| 170 | }; |
| 171 | |
| 172 | /** |
| 173 | * enum dim_state - DIM algorithm states |
| 174 | * |
| 175 | * These will determine if the algorithm is in a valid state to start an iteration. |
| 176 | * |
| 177 | * @DIM_START_MEASURE: This is the first iteration (also after applying a new profile) |
| 178 | * @DIM_MEASURE_IN_PROGRESS: Algorithm is already in progress - check if |
| 179 | * need to perform an action |
| 180 | * @DIM_APPLY_NEW_PROFILE: DIM consumer is currently applying a profile - no need to measure |
| 181 | */ |
| 182 | enum dim_state { |
| 183 | DIM_START_MEASURE, |
| 184 | DIM_MEASURE_IN_PROGRESS, |
| 185 | DIM_APPLY_NEW_PROFILE, |
| 186 | }; |
| 187 | |
| 188 | /** |
| 189 | * enum dim_tune_state - DIM algorithm tune states |
| 190 | * |
| 191 | * These will determine which action the algorithm should perform. |
| 192 | * |
| 193 | * @DIM_PARKING_ON_TOP: Algorithm found a local top point - exit on significant difference |
| 194 | * @DIM_PARKING_TIRED: Algorithm found a deep top point - don't exit if tired > 0 |
| 195 | * @DIM_GOING_RIGHT: Algorithm is currently trying higher moderation levels |
| 196 | * @DIM_GOING_LEFT: Algorithm is currently trying lower moderation levels |
| 197 | */ |
| 198 | enum dim_tune_state { |
| 199 | DIM_PARKING_ON_TOP, |
| 200 | DIM_PARKING_TIRED, |
| 201 | DIM_GOING_RIGHT, |
| 202 | DIM_GOING_LEFT, |
| 203 | }; |
| 204 | |
| 205 | /** |
| 206 | * enum dim_stats_state - DIM algorithm statistics states |
| 207 | * |
| 208 | * These will determine the verdict of current iteration. |
| 209 | * |
| 210 | * @DIM_STATS_WORSE: Current iteration shows worse performance than before |
| 211 | * @DIM_STATS_SAME: Current iteration shows same performance than before |
| 212 | * @DIM_STATS_BETTER: Current iteration shows better performance than before |
| 213 | */ |
| 214 | enum dim_stats_state { |
| 215 | DIM_STATS_WORSE, |
| 216 | DIM_STATS_SAME, |
| 217 | DIM_STATS_BETTER, |
| 218 | }; |
| 219 | |
| 220 | /** |
| 221 | * enum dim_step_result - DIM algorithm step results |
| 222 | * |
| 223 | * These describe the result of a step. |
| 224 | * |
| 225 | * @DIM_STEPPED: Performed a regular step |
| 226 | * @DIM_TOO_TIRED: Same kind of step was done multiple times - should go to |
| 227 | * tired parking |
| 228 | * @DIM_ON_EDGE: Stepped to the most left/right profile |
| 229 | */ |
| 230 | enum dim_step_result { |
| 231 | DIM_STEPPED, |
| 232 | DIM_TOO_TIRED, |
| 233 | DIM_ON_EDGE, |
| 234 | }; |
| 235 | |
| 236 | /** |
| 237 | * net_dim_init_irq_moder - collect information to initialize irq moderation |
| 238 | * @dev: target network device |
| 239 | * @profile_flags: Rx or Tx profile modification capability |
| 240 | * @coal_flags: irq moderation params flags |
| 241 | * @rx_mode: CQ period mode for Rx |
| 242 | * @tx_mode: CQ period mode for Tx |
| 243 | * @rx_dim_work: Rx worker called after dim decision |
| 244 | * @tx_dim_work: Tx worker called after dim decision |
| 245 | * |
| 246 | * Return: 0 on success or a negative error code. |
| 247 | */ |
| 248 | int net_dim_init_irq_moder(struct net_device *dev, u8 profile_flags, |
| 249 | u8 coal_flags, u8 rx_mode, u8 tx_mode, |
| 250 | void (*rx_dim_work)(struct work_struct *work), |
| 251 | void (*tx_dim_work)(struct work_struct *work)); |
| 252 | |
| 253 | /** |
| 254 | * net_dim_free_irq_moder - free fields for irq moderation |
| 255 | * @dev: target network device |
| 256 | */ |
| 257 | void net_dim_free_irq_moder(struct net_device *dev); |
| 258 | |
| 259 | /** |
| 260 | * net_dim_setting - initialize DIM's cq mode and schedule worker |
| 261 | * @dev: target network device |
| 262 | * @dim: DIM context |
| 263 | * @is_tx: true indicates the tx direction, false indicates the rx direction |
| 264 | */ |
| 265 | void net_dim_setting(struct net_device *dev, struct dim *dim, bool is_tx); |
| 266 | |
| 267 | /** |
| 268 | * net_dim_work_cancel - synchronously cancel dim's worker |
| 269 | * @dim: DIM context |
| 270 | */ |
| 271 | void net_dim_work_cancel(struct dim *dim); |
| 272 | |
| 273 | /** |
| 274 | * net_dim_get_rx_irq_moder - get DIM rx results based on profile_ix |
| 275 | * @dev: target network device |
| 276 | * @dim: DIM context |
| 277 | * |
| 278 | * Return: DIM irq moderation |
| 279 | */ |
| 280 | struct dim_cq_moder |
| 281 | net_dim_get_rx_irq_moder(struct net_device *dev, struct dim *dim); |
| 282 | |
| 283 | /** |
| 284 | * net_dim_get_tx_irq_moder - get DIM tx results based on profile_ix |
| 285 | * @dev: target network device |
| 286 | * @dim: DIM context |
| 287 | * |
| 288 | * Return: DIM irq moderation |
| 289 | */ |
| 290 | struct dim_cq_moder |
| 291 | net_dim_get_tx_irq_moder(struct net_device *dev, struct dim *dim); |
| 292 | |
| 293 | /** |
| 294 | * net_dim_set_rx_mode - set DIM rx cq mode |
| 295 | * @dev: target network device |
| 296 | * @rx_mode: target rx cq mode |
| 297 | */ |
| 298 | void net_dim_set_rx_mode(struct net_device *dev, u8 rx_mode); |
| 299 | |
| 300 | /** |
| 301 | * net_dim_set_tx_mode - set DIM tx cq mode |
| 302 | * @dev: target network device |
| 303 | * @tx_mode: target tx cq mode |
| 304 | */ |
| 305 | void net_dim_set_tx_mode(struct net_device *dev, u8 tx_mode); |
| 306 | |
| 307 | /** |
| 308 | * dim_on_top - check if current state is a good place to stop (top location) |
| 309 | * @dim: DIM context |
| 310 | * |
| 311 | * Check if current profile is a good place to park at. |
| 312 | * This will result in reducing the DIM checks frequency as we assume we |
| 313 | * shouldn't probably change profiles, unless traffic pattern wasn't changed. |
| 314 | */ |
| 315 | bool dim_on_top(struct dim *dim); |
| 316 | |
| 317 | /** |
| 318 | * dim_turn - change profile altering direction |
| 319 | * @dim: DIM context |
| 320 | * |
| 321 | * Go left if we were going right and vice-versa. |
| 322 | * Do nothing if currently parking. |
| 323 | */ |
| 324 | void dim_turn(struct dim *dim); |
| 325 | |
| 326 | /** |
| 327 | * dim_park_on_top - enter a parking state on a top location |
| 328 | * @dim: DIM context |
| 329 | * |
| 330 | * Enter parking state. |
| 331 | * Clear all movement history. |
| 332 | */ |
| 333 | void dim_park_on_top(struct dim *dim); |
| 334 | |
| 335 | /** |
| 336 | * dim_park_tired - enter a tired parking state |
| 337 | * @dim: DIM context |
| 338 | * |
| 339 | * Enter parking state. |
| 340 | * Clear all movement history and cause DIM checks frequency to reduce. |
| 341 | */ |
| 342 | void dim_park_tired(struct dim *dim); |
| 343 | |
| 344 | /** |
| 345 | * dim_calc_stats - calculate the difference between two samples |
| 346 | * @start: start sample |
| 347 | * @end: end sample |
| 348 | * @curr_stats: delta between samples |
| 349 | * |
| 350 | * Calculate the delta between two samples (in data rates). |
| 351 | * Takes into consideration counter wrap-around. |
| 352 | * Returned boolean indicates whether curr_stats are reliable. |
| 353 | */ |
| 354 | bool dim_calc_stats(const struct dim_sample *start, |
| 355 | const struct dim_sample *end, |
| 356 | struct dim_stats *curr_stats); |
| 357 | |
| 358 | /** |
| 359 | * dim_update_sample - set a sample's fields with given values |
| 360 | * @event_ctr: number of events to set |
| 361 | * @packets: number of packets to set |
| 362 | * @bytes: number of bytes to set |
| 363 | * @s: DIM sample |
| 364 | */ |
| 365 | static inline void |
| 366 | dim_update_sample(u16 event_ctr, u64 packets, u64 bytes, struct dim_sample *s) |
| 367 | { |
| 368 | s->time = ktime_get(); |
| 369 | s->pkt_ctr = packets; |
| 370 | s->byte_ctr = bytes; |
| 371 | s->event_ctr = event_ctr; |
| 372 | } |
| 373 | |
| 374 | /** |
| 375 | * dim_update_sample_with_comps - set a sample's fields with given |
| 376 | * values including the completion parameter |
| 377 | * @event_ctr: number of events to set |
| 378 | * @packets: number of packets to set |
| 379 | * @bytes: number of bytes to set |
| 380 | * @comps: number of completions to set |
| 381 | * @s: DIM sample |
| 382 | */ |
| 383 | static inline void |
| 384 | dim_update_sample_with_comps(u16 event_ctr, u64 packets, u64 bytes, u64 comps, |
| 385 | struct dim_sample *s) |
| 386 | { |
| 387 | dim_update_sample(event_ctr, packets, bytes, s); |
| 388 | s->comp_ctr = comps; |
| 389 | } |
| 390 | |
| 391 | /* Net DIM */ |
| 392 | |
| 393 | /** |
| 394 | * net_dim_get_rx_moderation - provide a CQ moderation object for the given RX profile |
| 395 | * @cq_period_mode: CQ period mode |
| 396 | * @ix: Profile index |
| 397 | */ |
| 398 | struct dim_cq_moder net_dim_get_rx_moderation(u8 cq_period_mode, int ix); |
| 399 | |
| 400 | /** |
| 401 | * net_dim_get_def_rx_moderation - provide the default RX moderation |
| 402 | * @cq_period_mode: CQ period mode |
| 403 | */ |
| 404 | struct dim_cq_moder net_dim_get_def_rx_moderation(u8 cq_period_mode); |
| 405 | |
| 406 | /** |
| 407 | * net_dim_get_tx_moderation - provide a CQ moderation object for the given TX profile |
| 408 | * @cq_period_mode: CQ period mode |
| 409 | * @ix: Profile index |
| 410 | */ |
| 411 | struct dim_cq_moder net_dim_get_tx_moderation(u8 cq_period_mode, int ix); |
| 412 | |
| 413 | /** |
| 414 | * net_dim_get_def_tx_moderation - provide the default TX moderation |
| 415 | * @cq_period_mode: CQ period mode |
| 416 | */ |
| 417 | struct dim_cq_moder net_dim_get_def_tx_moderation(u8 cq_period_mode); |
| 418 | |
| 419 | /** |
| 420 | * net_dim - main DIM algorithm entry point |
| 421 | * @dim: DIM instance information |
| 422 | * @end_sample: Current data measurement |
| 423 | * |
| 424 | * Called by the consumer. |
| 425 | * This is the main logic of the algorithm, where data is processed in order |
| 426 | * to decide on next required action. |
| 427 | */ |
| 428 | void net_dim(struct dim *dim, const struct dim_sample *end_sample); |
| 429 | |
| 430 | /* RDMA DIM */ |
| 431 | |
| 432 | /* |
| 433 | * RDMA DIM profile: |
| 434 | * profile size must be of RDMA_DIM_PARAMS_NUM_PROFILES. |
| 435 | */ |
| 436 | #define RDMA_DIM_PARAMS_NUM_PROFILES 9 |
| 437 | #define RDMA_DIM_START_PROFILE 0 |
| 438 | |
| 439 | /** |
| 440 | * rdma_dim - Runs the adaptive moderation. |
| 441 | * @dim: The moderation struct. |
| 442 | * @completions: The number of completions collected in this round. |
| 443 | * |
| 444 | * Each call to rdma_dim takes the latest amount of completions that |
| 445 | * have been collected and counts them as a new event. |
| 446 | * Once enough events have been collected the algorithm decides a new |
| 447 | * moderation level. |
| 448 | */ |
| 449 | void rdma_dim(struct dim *dim, u64 completions); |
| 450 | |
| 451 | #endif /* DIM_H */ |
| 452 | |