1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3 * PCIe bandwidth controller
4 *
5 * Author: Alexandru Gagniuc <[email protected]>
6 *
7 * Copyright (C) 2019 Dell Inc
8 * Copyright (C) 2023-2024 Intel Corporation
9 *
10 * The PCIe bandwidth controller provides a way to alter PCIe Link Speeds
11 * and notify the operating system when the Link Width or Speed changes. The
12 * notification capability is required for all Root Ports and Downstream
13 * Ports supporting Link Width wider than x1 and/or multiple Link Speeds.
14 *
15 * This service port driver hooks into the Bandwidth Notification interrupt
16 * watching for changes or links becoming degraded in operation. It updates
17 * the cached Current Link Speed that is exposed to user space through sysfs.
18 */
19
20 #define dev_fmt(fmt) "bwctrl: " fmt
21
22 #include <linux/atomic.h>
23 #include <linux/bitops.h>
24 #include <linux/bits.h>
25 #include <linux/cleanup.h>
26 #include <linux/errno.h>
27 #include <linux/interrupt.h>
28 #include <linux/mutex.h>
29 #include <linux/pci.h>
30 #include <linux/pci-bwctrl.h>
31 #include <linux/rwsem.h>
32 #include <linux/slab.h>
33 #include <linux/types.h>
34
35 #include "../pci.h"
36 #include "portdrv.h"
37
38 /**
39 * struct pcie_bwctrl_data - PCIe bandwidth controller
40 * @set_speed_mutex: Serializes link speed changes
41 * @lbms_count: Count for LBMS (since last reset)
42 * @cdev: Thermal cooling device associated with the port
43 */
44 struct pcie_bwctrl_data {
45 struct mutex set_speed_mutex;
46 atomic_t lbms_count;
47 struct thermal_cooling_device *cdev;
48 };
49
50 /*
51 * Prevent port removal during LBMS count accessors and Link Speed changes.
52 *
53 * These have to be differentiated because pcie_bwctrl_change_speed() calls
54 * pcie_retrain_link() which uses LBMS count reset accessor on success
55 * (using just one rwsem triggers "possible recursive locking detected"
56 * warning).
57 */
58 static DECLARE_RWSEM(pcie_bwctrl_lbms_rwsem);
59 static DECLARE_RWSEM(pcie_bwctrl_setspeed_rwsem);
60
pcie_valid_speed(enum pci_bus_speed speed)61 static bool pcie_valid_speed(enum pci_bus_speed speed)
62 {
63 return (speed >= PCIE_SPEED_2_5GT) && (speed <= PCIE_SPEED_64_0GT);
64 }
65
pci_bus_speed2lnkctl2(enum pci_bus_speed speed)66 static u16 pci_bus_speed2lnkctl2(enum pci_bus_speed speed)
67 {
68 static const u8 speed_conv[] = {
69 [PCIE_SPEED_2_5GT] = PCI_EXP_LNKCTL2_TLS_2_5GT,
70 [PCIE_SPEED_5_0GT] = PCI_EXP_LNKCTL2_TLS_5_0GT,
71 [PCIE_SPEED_8_0GT] = PCI_EXP_LNKCTL2_TLS_8_0GT,
72 [PCIE_SPEED_16_0GT] = PCI_EXP_LNKCTL2_TLS_16_0GT,
73 [PCIE_SPEED_32_0GT] = PCI_EXP_LNKCTL2_TLS_32_0GT,
74 [PCIE_SPEED_64_0GT] = PCI_EXP_LNKCTL2_TLS_64_0GT,
75 };
76
77 if (WARN_ON_ONCE(!pcie_valid_speed(speed)))
78 return 0;
79
80 return speed_conv[speed];
81 }
82
pcie_supported_speeds2target_speed(u8 supported_speeds)83 static inline u16 pcie_supported_speeds2target_speed(u8 supported_speeds)
84 {
85 return __fls(supported_speeds);
86 }
87
88 /**
89 * pcie_bwctrl_select_speed - Select Target Link Speed
90 * @port: PCIe Port
91 * @speed_req: Requested PCIe Link Speed
92 *
93 * Select Target Link Speed by take into account Supported Link Speeds of
94 * both the Root Port and the Endpoint.
95 *
96 * Return: Target Link Speed (1=2.5GT/s, 2=5GT/s, 3=8GT/s, etc.)
97 */
pcie_bwctrl_select_speed(struct pci_dev * port,enum pci_bus_speed speed_req)98 static u16 pcie_bwctrl_select_speed(struct pci_dev *port, enum pci_bus_speed speed_req)
99 {
100 struct pci_bus *bus = port->subordinate;
101 u8 desired_speeds, supported_speeds;
102 struct pci_dev *dev;
103
104 desired_speeds = GENMASK(pci_bus_speed2lnkctl2(speed_req),
105 __fls(PCI_EXP_LNKCAP2_SLS_2_5GB));
106
107 supported_speeds = port->supported_speeds;
108 if (bus) {
109 down_read(&pci_bus_sem);
110 dev = list_first_entry_or_null(&bus->devices, struct pci_dev, bus_list);
111 if (dev)
112 supported_speeds &= dev->supported_speeds;
113 up_read(&pci_bus_sem);
114 }
115 if (!supported_speeds)
116 supported_speeds = PCI_EXP_LNKCAP2_SLS_2_5GB;
117
118 return pcie_supported_speeds2target_speed(supported_speeds & desired_speeds);
119 }
120
pcie_bwctrl_change_speed(struct pci_dev * port,u16 target_speed,bool use_lt)121 static int pcie_bwctrl_change_speed(struct pci_dev *port, u16 target_speed, bool use_lt)
122 {
123 int ret;
124
125 ret = pcie_capability_clear_and_set_word(port, PCI_EXP_LNKCTL2,
126 PCI_EXP_LNKCTL2_TLS, target_speed);
127 if (ret != PCIBIOS_SUCCESSFUL)
128 return pcibios_err_to_errno(ret);
129
130 ret = pcie_retrain_link(port, use_lt);
131 if (ret < 0)
132 return ret;
133
134 /*
135 * Ensure link speed updates also with platforms that have problems
136 * with notifications.
137 */
138 if (port->subordinate)
139 pcie_update_link_speed(port->subordinate);
140
141 return 0;
142 }
143
144 /**
145 * pcie_set_target_speed - Set downstream Link Speed for PCIe Port
146 * @port: PCIe Port
147 * @speed_req: Requested PCIe Link Speed
148 * @use_lt: Wait for the LT or DLLLA bit to detect the end of link training
149 *
150 * Attempt to set PCIe Port Link Speed to @speed_req. @speed_req may be
151 * adjusted downwards to the best speed supported by both the Port and PCIe
152 * Device underneath it.
153 *
154 * Return:
155 * * 0 - on success
156 * * -EINVAL - @speed_req is not a PCIe Link Speed
157 * * -ENODEV - @port is not controllable
158 * * -ETIMEDOUT - changing Link Speed took too long
159 * * -EAGAIN - Link Speed was changed but @speed_req was not achieved
160 */
pcie_set_target_speed(struct pci_dev * port,enum pci_bus_speed speed_req,bool use_lt)161 int pcie_set_target_speed(struct pci_dev *port, enum pci_bus_speed speed_req,
162 bool use_lt)
163 {
164 struct pci_bus *bus = port->subordinate;
165 u16 target_speed;
166 int ret;
167
168 if (WARN_ON_ONCE(!pcie_valid_speed(speed_req)))
169 return -EINVAL;
170
171 if (bus && bus->cur_bus_speed == speed_req)
172 return 0;
173
174 target_speed = pcie_bwctrl_select_speed(port, speed_req);
175
176 scoped_guard(rwsem_read, &pcie_bwctrl_setspeed_rwsem) {
177 struct pcie_bwctrl_data *data = port->link_bwctrl;
178
179 /*
180 * port->link_bwctrl is NULL during initial scan when called
181 * e.g. from the Target Speed quirk.
182 */
183 if (data)
184 mutex_lock(&data->set_speed_mutex);
185
186 ret = pcie_bwctrl_change_speed(port, target_speed, use_lt);
187
188 if (data)
189 mutex_unlock(&data->set_speed_mutex);
190 }
191
192 /*
193 * Despite setting higher speed into the Target Link Speed, empty
194 * bus won't train to 5GT+ speeds.
195 */
196 if (!ret && bus && bus->cur_bus_speed != speed_req &&
197 !list_empty(&bus->devices))
198 ret = -EAGAIN;
199
200 return ret;
201 }
202
pcie_bwnotif_enable(struct pcie_device * srv)203 static void pcie_bwnotif_enable(struct pcie_device *srv)
204 {
205 struct pcie_bwctrl_data *data = srv->port->link_bwctrl;
206 struct pci_dev *port = srv->port;
207 u16 link_status;
208 int ret;
209
210 /* Count LBMS seen so far as one */
211 ret = pcie_capability_read_word(port, PCI_EXP_LNKSTA, &link_status);
212 if (ret == PCIBIOS_SUCCESSFUL && link_status & PCI_EXP_LNKSTA_LBMS)
213 atomic_inc(&data->lbms_count);
214
215 pcie_capability_set_word(port, PCI_EXP_LNKCTL,
216 PCI_EXP_LNKCTL_LBMIE | PCI_EXP_LNKCTL_LABIE);
217 pcie_capability_write_word(port, PCI_EXP_LNKSTA,
218 PCI_EXP_LNKSTA_LBMS | PCI_EXP_LNKSTA_LABS);
219
220 /*
221 * Update after enabling notifications & clearing status bits ensures
222 * link speed is up to date.
223 */
224 pcie_update_link_speed(port->subordinate);
225 }
226
pcie_bwnotif_disable(struct pci_dev * port)227 static void pcie_bwnotif_disable(struct pci_dev *port)
228 {
229 pcie_capability_clear_word(port, PCI_EXP_LNKCTL,
230 PCI_EXP_LNKCTL_LBMIE | PCI_EXP_LNKCTL_LABIE);
231 }
232
pcie_bwnotif_irq(int irq,void * context)233 static irqreturn_t pcie_bwnotif_irq(int irq, void *context)
234 {
235 struct pcie_device *srv = context;
236 struct pcie_bwctrl_data *data = srv->port->link_bwctrl;
237 struct pci_dev *port = srv->port;
238 u16 link_status, events;
239 int ret;
240
241 ret = pcie_capability_read_word(port, PCI_EXP_LNKSTA, &link_status);
242 if (ret != PCIBIOS_SUCCESSFUL)
243 return IRQ_NONE;
244
245 events = link_status & (PCI_EXP_LNKSTA_LBMS | PCI_EXP_LNKSTA_LABS);
246 if (!events)
247 return IRQ_NONE;
248
249 if (events & PCI_EXP_LNKSTA_LBMS)
250 atomic_inc(&data->lbms_count);
251
252 pcie_capability_write_word(port, PCI_EXP_LNKSTA, events);
253
254 /*
255 * Interrupts will not be triggered from any further Link Speed
256 * change until LBMS is cleared by the write. Therefore, re-read the
257 * speed (inside pcie_update_link_speed()) after LBMS has been
258 * cleared to avoid missing link speed changes.
259 */
260 pcie_update_link_speed(port->subordinate);
261
262 return IRQ_HANDLED;
263 }
264
pcie_reset_lbms_count(struct pci_dev * port)265 void pcie_reset_lbms_count(struct pci_dev *port)
266 {
267 struct pcie_bwctrl_data *data;
268
269 guard(rwsem_read)(&pcie_bwctrl_lbms_rwsem);
270 data = port->link_bwctrl;
271 if (data)
272 atomic_set(&data->lbms_count, 0);
273 else
274 pcie_capability_write_word(port, PCI_EXP_LNKSTA,
275 PCI_EXP_LNKSTA_LBMS);
276 }
277
pcie_lbms_count(struct pci_dev * port,unsigned long * val)278 int pcie_lbms_count(struct pci_dev *port, unsigned long *val)
279 {
280 struct pcie_bwctrl_data *data;
281
282 guard(rwsem_read)(&pcie_bwctrl_lbms_rwsem);
283 data = port->link_bwctrl;
284 if (!data)
285 return -ENOTTY;
286
287 *val = atomic_read(&data->lbms_count);
288
289 return 0;
290 }
291
pcie_bwnotif_probe(struct pcie_device * srv)292 static int pcie_bwnotif_probe(struct pcie_device *srv)
293 {
294 struct pci_dev *port = srv->port;
295 int ret;
296
297 /* Can happen if we run out of bus numbers during enumeration. */
298 if (!port->subordinate)
299 return -ENODEV;
300
301 struct pcie_bwctrl_data *data = devm_kzalloc(&srv->device,
302 sizeof(*data), GFP_KERNEL);
303 if (!data)
304 return -ENOMEM;
305
306 ret = devm_mutex_init(&srv->device, &data->set_speed_mutex);
307 if (ret)
308 return ret;
309
310 scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem) {
311 scoped_guard(rwsem_write, &pcie_bwctrl_lbms_rwsem) {
312 port->link_bwctrl = data;
313
314 ret = request_irq(srv->irq, pcie_bwnotif_irq,
315 IRQF_SHARED, "PCIe bwctrl", srv);
316 if (ret) {
317 port->link_bwctrl = NULL;
318 return ret;
319 }
320
321 pcie_bwnotif_enable(srv);
322 }
323 }
324
325 pci_dbg(port, "enabled with IRQ %d\n", srv->irq);
326
327 /* Don't fail on errors. Don't leave IS_ERR() "pointer" into ->cdev */
328 port->link_bwctrl->cdev = pcie_cooling_device_register(port);
329 if (IS_ERR(port->link_bwctrl->cdev))
330 port->link_bwctrl->cdev = NULL;
331
332 return 0;
333 }
334
pcie_bwnotif_remove(struct pcie_device * srv)335 static void pcie_bwnotif_remove(struct pcie_device *srv)
336 {
337 struct pcie_bwctrl_data *data = srv->port->link_bwctrl;
338
339 pcie_cooling_device_unregister(data->cdev);
340
341 scoped_guard(rwsem_write, &pcie_bwctrl_setspeed_rwsem) {
342 scoped_guard(rwsem_write, &pcie_bwctrl_lbms_rwsem) {
343 pcie_bwnotif_disable(srv->port);
344
345 free_irq(srv->irq, srv);
346
347 srv->port->link_bwctrl = NULL;
348 }
349 }
350 }
351
pcie_bwnotif_suspend(struct pcie_device * srv)352 static int pcie_bwnotif_suspend(struct pcie_device *srv)
353 {
354 pcie_bwnotif_disable(srv->port);
355 return 0;
356 }
357
pcie_bwnotif_resume(struct pcie_device * srv)358 static int pcie_bwnotif_resume(struct pcie_device *srv)
359 {
360 pcie_bwnotif_enable(srv);
361 return 0;
362 }
363
364 static struct pcie_port_service_driver pcie_bwctrl_driver = {
365 .name = "pcie_bwctrl",
366 .port_type = PCIE_ANY_PORT,
367 .service = PCIE_PORT_SERVICE_BWCTRL,
368 .probe = pcie_bwnotif_probe,
369 .suspend = pcie_bwnotif_suspend,
370 .resume = pcie_bwnotif_resume,
371 .remove = pcie_bwnotif_remove,
372 };
373
pcie_bwctrl_init(void)374 int __init pcie_bwctrl_init(void)
375 {
376 return pcie_port_service_register(&pcie_bwctrl_driver);
377 }
378