Douglas Thompson | 7c9281d | 2007-07-19 01:49:33 -0700 | [diff] [blame^] | 1 | /* edac_mc kernel module |
| 2 | * (C) 2005, 2006 Linux Networx (http://lnxi.com) |
| 3 | * This file may be distributed under the terms of the |
| 4 | * GNU General Public License. |
| 5 | * |
| 6 | * Written Doug Thompson <norsk5@xmission.com> |
| 7 | * |
| 8 | */ |
| 9 | #include <linux/module.h> |
| 10 | #include <linux/sysdev.h> |
| 11 | #include <linux/ctype.h> |
| 12 | |
| 13 | #include "edac_mc.h" |
| 14 | #include "edac_module.h" |
| 15 | |
| 16 | |
| 17 | #ifdef CONFIG_PCI |
| 18 | static int check_pci_parity = 0; /* default YES check PCI parity */ |
| 19 | static int panic_on_pci_parity; /* default no panic on PCI Parity */ |
| 20 | static atomic_t pci_parity_count = ATOMIC_INIT(0); |
| 21 | |
| 22 | static struct kobject edac_pci_kobj; /* /sys/devices/system/edac/pci */ |
| 23 | static struct completion edac_pci_kobj_complete; |
| 24 | |
| 25 | |
| 26 | static ssize_t edac_pci_int_show(void *ptr, char *buffer) |
| 27 | { |
| 28 | int *value = ptr; |
| 29 | return sprintf(buffer,"%d\n",*value); |
| 30 | } |
| 31 | |
| 32 | static ssize_t edac_pci_int_store(void *ptr, const char *buffer, size_t count) |
| 33 | { |
| 34 | int *value = ptr; |
| 35 | |
| 36 | if (isdigit(*buffer)) |
| 37 | *value = simple_strtoul(buffer,NULL,0); |
| 38 | |
| 39 | return count; |
| 40 | } |
| 41 | |
| 42 | struct edac_pci_dev_attribute { |
| 43 | struct attribute attr; |
| 44 | void *value; |
| 45 | ssize_t (*show)(void *,char *); |
| 46 | ssize_t (*store)(void *, const char *,size_t); |
| 47 | }; |
| 48 | |
| 49 | /* Set of show/store abstract level functions for PCI Parity object */ |
| 50 | static ssize_t edac_pci_dev_show(struct kobject *kobj, struct attribute *attr, |
| 51 | char *buffer) |
| 52 | { |
| 53 | struct edac_pci_dev_attribute *edac_pci_dev; |
| 54 | edac_pci_dev= (struct edac_pci_dev_attribute*)attr; |
| 55 | |
| 56 | if (edac_pci_dev->show) |
| 57 | return edac_pci_dev->show(edac_pci_dev->value, buffer); |
| 58 | return -EIO; |
| 59 | } |
| 60 | |
| 61 | static ssize_t edac_pci_dev_store(struct kobject *kobj, |
| 62 | struct attribute *attr, const char *buffer, size_t count) |
| 63 | { |
| 64 | struct edac_pci_dev_attribute *edac_pci_dev; |
| 65 | edac_pci_dev= (struct edac_pci_dev_attribute*)attr; |
| 66 | |
| 67 | if (edac_pci_dev->show) |
| 68 | return edac_pci_dev->store(edac_pci_dev->value, buffer, count); |
| 69 | return -EIO; |
| 70 | } |
| 71 | |
| 72 | static struct sysfs_ops edac_pci_sysfs_ops = { |
| 73 | .show = edac_pci_dev_show, |
| 74 | .store = edac_pci_dev_store |
| 75 | }; |
| 76 | |
| 77 | #define EDAC_PCI_ATTR(_name,_mode,_show,_store) \ |
| 78 | static struct edac_pci_dev_attribute edac_pci_attr_##_name = { \ |
| 79 | .attr = {.name = __stringify(_name), .mode = _mode }, \ |
| 80 | .value = &_name, \ |
| 81 | .show = _show, \ |
| 82 | .store = _store, \ |
| 83 | }; |
| 84 | |
| 85 | #define EDAC_PCI_STRING_ATTR(_name,_data,_mode,_show,_store) \ |
| 86 | static struct edac_pci_dev_attribute edac_pci_attr_##_name = { \ |
| 87 | .attr = {.name = __stringify(_name), .mode = _mode }, \ |
| 88 | .value = _data, \ |
| 89 | .show = _show, \ |
| 90 | .store = _store, \ |
| 91 | }; |
| 92 | |
| 93 | /* PCI Parity control files */ |
| 94 | EDAC_PCI_ATTR(check_pci_parity, S_IRUGO|S_IWUSR, edac_pci_int_show, |
| 95 | edac_pci_int_store); |
| 96 | EDAC_PCI_ATTR(panic_on_pci_parity, S_IRUGO|S_IWUSR, edac_pci_int_show, |
| 97 | edac_pci_int_store); |
| 98 | EDAC_PCI_ATTR(pci_parity_count, S_IRUGO, edac_pci_int_show, NULL); |
| 99 | |
| 100 | /* Base Attributes of the memory ECC object */ |
| 101 | static struct edac_pci_dev_attribute *edac_pci_attr[] = { |
| 102 | &edac_pci_attr_check_pci_parity, |
| 103 | &edac_pci_attr_panic_on_pci_parity, |
| 104 | &edac_pci_attr_pci_parity_count, |
| 105 | NULL, |
| 106 | }; |
| 107 | |
| 108 | /* No memory to release */ |
| 109 | static void edac_pci_release(struct kobject *kobj) |
| 110 | { |
| 111 | debugf1("%s()\n", __func__); |
| 112 | complete(&edac_pci_kobj_complete); |
| 113 | } |
| 114 | |
| 115 | static struct kobj_type ktype_edac_pci = { |
| 116 | .release = edac_pci_release, |
| 117 | .sysfs_ops = &edac_pci_sysfs_ops, |
| 118 | .default_attrs = (struct attribute **) edac_pci_attr, |
| 119 | }; |
| 120 | |
| 121 | /** |
| 122 | * edac_sysfs_pci_setup() |
| 123 | * |
| 124 | * setup the sysfs for EDAC PCI attributes |
| 125 | * assumes edac_class has already been initialized |
| 126 | */ |
| 127 | int edac_sysfs_pci_setup(void) |
| 128 | { |
| 129 | int err; |
| 130 | struct sysdev_class *edac_class; |
| 131 | |
| 132 | debugf1("%s()\n", __func__); |
| 133 | |
| 134 | edac_class = edac_get_edac_class(); |
| 135 | |
| 136 | memset(&edac_pci_kobj, 0, sizeof(edac_pci_kobj)); |
| 137 | edac_pci_kobj.parent = &edac_class->kset.kobj; |
| 138 | edac_pci_kobj.ktype = &ktype_edac_pci; |
| 139 | err = kobject_set_name(&edac_pci_kobj, "pci"); |
| 140 | |
| 141 | if (!err) { |
| 142 | /* Instanstiate the pci object */ |
| 143 | /* FIXME: maybe new sysdev_create_subdir() */ |
| 144 | err = kobject_register(&edac_pci_kobj); |
| 145 | |
| 146 | if (err) |
| 147 | debugf1("Failed to register '.../edac/pci'\n"); |
| 148 | else |
| 149 | debugf1("Registered '.../edac/pci' kobject\n"); |
| 150 | } |
| 151 | |
| 152 | return err; |
| 153 | } |
| 154 | |
| 155 | /* |
| 156 | * edac_sysfs_pci_teardown |
| 157 | * |
| 158 | * perform the sysfs teardown for the PCI attributes |
| 159 | */ |
| 160 | void edac_sysfs_pci_teardown(void) |
| 161 | { |
| 162 | debugf0("%s()\n", __func__); |
| 163 | init_completion(&edac_pci_kobj_complete); |
| 164 | kobject_unregister(&edac_pci_kobj); |
| 165 | wait_for_completion(&edac_pci_kobj_complete); |
| 166 | } |
| 167 | |
| 168 | |
| 169 | static u16 get_pci_parity_status(struct pci_dev *dev, int secondary) |
| 170 | { |
| 171 | int where; |
| 172 | u16 status; |
| 173 | |
| 174 | where = secondary ? PCI_SEC_STATUS : PCI_STATUS; |
| 175 | pci_read_config_word(dev, where, &status); |
| 176 | |
| 177 | /* If we get back 0xFFFF then we must suspect that the card has been |
| 178 | * pulled but the Linux PCI layer has not yet finished cleaning up. |
| 179 | * We don't want to report on such devices |
| 180 | */ |
| 181 | |
| 182 | if (status == 0xFFFF) { |
| 183 | u32 sanity; |
| 184 | |
| 185 | pci_read_config_dword(dev, 0, &sanity); |
| 186 | |
| 187 | if (sanity == 0xFFFFFFFF) |
| 188 | return 0; |
| 189 | } |
| 190 | |
| 191 | status &= PCI_STATUS_DETECTED_PARITY | PCI_STATUS_SIG_SYSTEM_ERROR | |
| 192 | PCI_STATUS_PARITY; |
| 193 | |
| 194 | if (status) |
| 195 | /* reset only the bits we are interested in */ |
| 196 | pci_write_config_word(dev, where, status); |
| 197 | |
| 198 | return status; |
| 199 | } |
| 200 | |
| 201 | typedef void (*pci_parity_check_fn_t) (struct pci_dev *dev); |
| 202 | |
| 203 | /* Clear any PCI parity errors logged by this device. */ |
| 204 | static void edac_pci_dev_parity_clear(struct pci_dev *dev) |
| 205 | { |
| 206 | u8 header_type; |
| 207 | |
| 208 | get_pci_parity_status(dev, 0); |
| 209 | |
| 210 | /* read the device TYPE, looking for bridges */ |
| 211 | pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type); |
| 212 | |
| 213 | if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) |
| 214 | get_pci_parity_status(dev, 1); |
| 215 | } |
| 216 | |
| 217 | /* |
| 218 | * PCI Parity polling |
| 219 | * |
| 220 | */ |
| 221 | static void edac_pci_dev_parity_test(struct pci_dev *dev) |
| 222 | { |
| 223 | u16 status; |
| 224 | u8 header_type; |
| 225 | |
| 226 | /* read the STATUS register on this device |
| 227 | */ |
| 228 | status = get_pci_parity_status(dev, 0); |
| 229 | |
| 230 | debugf2("PCI STATUS= 0x%04x %s\n", status, dev->dev.bus_id ); |
| 231 | |
| 232 | /* check the status reg for errors */ |
| 233 | if (status) { |
| 234 | if (status & (PCI_STATUS_SIG_SYSTEM_ERROR)) |
| 235 | edac_printk(KERN_CRIT, EDAC_PCI, |
| 236 | "Signaled System Error on %s\n", |
| 237 | pci_name(dev)); |
| 238 | |
| 239 | if (status & (PCI_STATUS_PARITY)) { |
| 240 | edac_printk(KERN_CRIT, EDAC_PCI, |
| 241 | "Master Data Parity Error on %s\n", |
| 242 | pci_name(dev)); |
| 243 | |
| 244 | atomic_inc(&pci_parity_count); |
| 245 | } |
| 246 | |
| 247 | if (status & (PCI_STATUS_DETECTED_PARITY)) { |
| 248 | edac_printk(KERN_CRIT, EDAC_PCI, |
| 249 | "Detected Parity Error on %s\n", |
| 250 | pci_name(dev)); |
| 251 | |
| 252 | atomic_inc(&pci_parity_count); |
| 253 | } |
| 254 | } |
| 255 | |
| 256 | /* read the device TYPE, looking for bridges */ |
| 257 | pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type); |
| 258 | |
| 259 | debugf2("PCI HEADER TYPE= 0x%02x %s\n", header_type, dev->dev.bus_id ); |
| 260 | |
| 261 | if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) { |
| 262 | /* On bridges, need to examine secondary status register */ |
| 263 | status = get_pci_parity_status(dev, 1); |
| 264 | |
| 265 | debugf2("PCI SEC_STATUS= 0x%04x %s\n", |
| 266 | status, dev->dev.bus_id ); |
| 267 | |
| 268 | /* check the secondary status reg for errors */ |
| 269 | if (status) { |
| 270 | if (status & (PCI_STATUS_SIG_SYSTEM_ERROR)) |
| 271 | edac_printk(KERN_CRIT, EDAC_PCI, "Bridge " |
| 272 | "Signaled System Error on %s\n", |
| 273 | pci_name(dev)); |
| 274 | |
| 275 | if (status & (PCI_STATUS_PARITY)) { |
| 276 | edac_printk(KERN_CRIT, EDAC_PCI, "Bridge " |
| 277 | "Master Data Parity Error on " |
| 278 | "%s\n", pci_name(dev)); |
| 279 | |
| 280 | atomic_inc(&pci_parity_count); |
| 281 | } |
| 282 | |
| 283 | if (status & (PCI_STATUS_DETECTED_PARITY)) { |
| 284 | edac_printk(KERN_CRIT, EDAC_PCI, "Bridge " |
| 285 | "Detected Parity Error on %s\n", |
| 286 | pci_name(dev)); |
| 287 | |
| 288 | atomic_inc(&pci_parity_count); |
| 289 | } |
| 290 | } |
| 291 | } |
| 292 | } |
| 293 | |
| 294 | /* |
| 295 | * pci_dev parity list iterator |
| 296 | * Scan the PCI device list for one iteration, looking for SERRORs |
| 297 | * Master Parity ERRORS or Parity ERRORs on primary or secondary devices |
| 298 | */ |
| 299 | static inline void edac_pci_dev_parity_iterator(pci_parity_check_fn_t fn) |
| 300 | { |
| 301 | struct pci_dev *dev = NULL; |
| 302 | |
| 303 | /* request for kernel access to the next PCI device, if any, |
| 304 | * and while we are looking at it have its reference count |
| 305 | * bumped until we are done with it |
| 306 | */ |
| 307 | while((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { |
| 308 | fn(dev); |
| 309 | } |
| 310 | } |
| 311 | |
| 312 | /* |
| 313 | * edac_pci_do_parity_check |
| 314 | * |
| 315 | * performs the actual PCI parity check operation |
| 316 | */ |
| 317 | void edac_pci_do_parity_check(void) |
| 318 | { |
| 319 | unsigned long flags; |
| 320 | int before_count; |
| 321 | |
| 322 | debugf3("%s()\n", __func__); |
| 323 | |
| 324 | if (!check_pci_parity) |
| 325 | return; |
| 326 | |
| 327 | before_count = atomic_read(&pci_parity_count); |
| 328 | |
| 329 | /* scan all PCI devices looking for a Parity Error on devices and |
| 330 | * bridges |
| 331 | */ |
| 332 | local_irq_save(flags); |
| 333 | edac_pci_dev_parity_iterator(edac_pci_dev_parity_test); |
| 334 | local_irq_restore(flags); |
| 335 | |
| 336 | /* Only if operator has selected panic on PCI Error */ |
| 337 | if (panic_on_pci_parity) { |
| 338 | /* If the count is different 'after' from 'before' */ |
| 339 | if (before_count != atomic_read(&pci_parity_count)) |
| 340 | panic("EDAC: PCI Parity Error"); |
| 341 | } |
| 342 | } |
| 343 | |
| 344 | void edac_pci_clear_parity_errors(void) |
| 345 | { |
| 346 | /* Clear any PCI bus parity errors that devices initially have logged |
| 347 | * in their registers. |
| 348 | */ |
| 349 | edac_pci_dev_parity_iterator(edac_pci_dev_parity_clear); |
| 350 | } |
| 351 | |
| 352 | |
| 353 | /* |
| 354 | * Define the PCI parameter to the module |
| 355 | */ |
| 356 | module_param(check_pci_parity, int, 0644); |
| 357 | MODULE_PARM_DESC(check_pci_parity, "Check for PCI bus parity errors: 0=off 1=on"); |
| 358 | module_param(panic_on_pci_parity, int, 0644); |
| 359 | MODULE_PARM_DESC(panic_on_pci_parity, "Panic on PCI Bus Parity error: 0=off 1=on"); |
| 360 | |
| 361 | #endif /* CONFIG_PCI */ |