blob: 291788798b6f17385cae1e344df882c5fe61c31e [file] [log] [blame]
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001/* Intel Sandy Bridge -EN/-EP/-EX Memory Controller kernel module
2 *
3 * This driver supports the memory controllers found on the Intel
4 * processor family Sandy Bridge.
5 *
6 * This file may be distributed under the terms of the
7 * GNU General Public License version 2 only.
8 *
9 * Copyright (c) 2011 by:
10 * Mauro Carvalho Chehab <mchehab@redhat.com>
11 */
12
13#include <linux/module.h>
14#include <linux/init.h>
15#include <linux/pci.h>
16#include <linux/pci_ids.h>
17#include <linux/slab.h>
18#include <linux/delay.h>
19#include <linux/edac.h>
20#include <linux/mmzone.h>
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -020021#include <linux/smp.h>
22#include <linux/bitmap.h>
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -030023#include <linux/math64.h>
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -020024#include <asm/processor.h>
Mauro Carvalho Chehab3d78c9a2011-10-20 19:33:46 -020025#include <asm/mce.h>
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -020026
27#include "edac_core.h"
28
29/* Static vars */
30static LIST_HEAD(sbridge_edac_list);
31static DEFINE_MUTEX(sbridge_edac_lock);
32static int probed;
33
34/*
35 * Alter this version for the module when modifications are made
36 */
37#define SBRIDGE_REVISION " Ver: 1.0.0 "
38#define EDAC_MOD_STR "sbridge_edac"
39
40/*
41 * Debug macros
42 */
43#define sbridge_printk(level, fmt, arg...) \
44 edac_printk(level, "sbridge", fmt, ##arg)
45
46#define sbridge_mc_printk(mci, level, fmt, arg...) \
47 edac_mc_chipset_printk(mci, level, "sbridge", fmt, ##arg)
48
49/*
50 * Get a bit field at register value <v>, from bit <lo> to bit <hi>
51 */
52#define GET_BITFIELD(v, lo, hi) \
53 (((v) & ((1ULL << ((hi) - (lo) + 1)) - 1) << (lo)) >> (lo))
54
55/*
56 * sbridge Memory Controller Registers
57 */
58
59/*
60 * FIXME: For now, let's order by device function, as it makes
61 * easier for driver's development proccess. This table should be
62 * moved to pci_id.h when submitted upstream
63 */
64#define PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0 0x3cf4 /* 12.6 */
65#define PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1 0x3cf6 /* 12.7 */
66#define PCI_DEVICE_ID_INTEL_SBRIDGE_BR 0x3cf5 /* 13.6 */
67#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0 0x3ca0 /* 14.0 */
68#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA 0x3ca8 /* 15.0 */
69#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS 0x3c71 /* 15.1 */
70#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0 0x3caa /* 15.2 */
71#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1 0x3cab /* 15.3 */
72#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2 0x3cac /* 15.4 */
73#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3 0x3cad /* 15.5 */
74#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO 0x3cb8 /* 17.0 */
75
76 /*
77 * Currently, unused, but will be needed in the future
78 * implementations, as they hold the error counters
79 */
80#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR0 0x3c72 /* 16.2 */
81#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR1 0x3c73 /* 16.3 */
82#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR2 0x3c76 /* 16.6 */
83#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR3 0x3c77 /* 16.7 */
84
85/* Devices 12 Function 6, Offsets 0x80 to 0xcc */
86static const u32 dram_rule[] = {
87 0x80, 0x88, 0x90, 0x98, 0xa0,
88 0xa8, 0xb0, 0xb8, 0xc0, 0xc8,
89};
90#define MAX_SAD ARRAY_SIZE(dram_rule)
91
92#define SAD_LIMIT(reg) ((GET_BITFIELD(reg, 6, 25) << 26) | 0x3ffffff)
93#define DRAM_ATTR(reg) GET_BITFIELD(reg, 2, 3)
94#define INTERLEAVE_MODE(reg) GET_BITFIELD(reg, 1, 1)
95#define DRAM_RULE_ENABLE(reg) GET_BITFIELD(reg, 0, 0)
96
97static char *get_dram_attr(u32 reg)
98{
99 switch(DRAM_ATTR(reg)) {
100 case 0:
101 return "DRAM";
102 case 1:
103 return "MMCFG";
104 case 2:
105 return "NXM";
106 default:
107 return "unknown";
108 }
109}
110
111static const u32 interleave_list[] = {
112 0x84, 0x8c, 0x94, 0x9c, 0xa4,
113 0xac, 0xb4, 0xbc, 0xc4, 0xcc,
114};
115#define MAX_INTERLEAVE ARRAY_SIZE(interleave_list)
116
117#define SAD_PKG0(reg) GET_BITFIELD(reg, 0, 2)
118#define SAD_PKG1(reg) GET_BITFIELD(reg, 3, 5)
119#define SAD_PKG2(reg) GET_BITFIELD(reg, 8, 10)
120#define SAD_PKG3(reg) GET_BITFIELD(reg, 11, 13)
121#define SAD_PKG4(reg) GET_BITFIELD(reg, 16, 18)
122#define SAD_PKG5(reg) GET_BITFIELD(reg, 19, 21)
123#define SAD_PKG6(reg) GET_BITFIELD(reg, 24, 26)
124#define SAD_PKG7(reg) GET_BITFIELD(reg, 27, 29)
125
126static inline int sad_pkg(u32 reg, int interleave)
127{
128 switch (interleave) {
129 case 0:
130 return SAD_PKG0(reg);
131 case 1:
132 return SAD_PKG1(reg);
133 case 2:
134 return SAD_PKG2(reg);
135 case 3:
136 return SAD_PKG3(reg);
137 case 4:
138 return SAD_PKG4(reg);
139 case 5:
140 return SAD_PKG5(reg);
141 case 6:
142 return SAD_PKG6(reg);
143 case 7:
144 return SAD_PKG7(reg);
145 default:
146 return -EINVAL;
147 }
148}
149
150/* Devices 12 Function 7 */
151
152#define TOLM 0x80
153#define TOHM 0x84
154
155#define GET_TOLM(reg) ((GET_BITFIELD(reg, 0, 3) << 28) | 0x3ffffff)
156#define GET_TOHM(reg) ((GET_BITFIELD(reg, 0, 20) << 25) | 0x3ffffff)
157
158/* Device 13 Function 6 */
159
160#define SAD_TARGET 0xf0
161
162#define SOURCE_ID(reg) GET_BITFIELD(reg, 9, 11)
163
164#define SAD_CONTROL 0xf4
165
166#define NODE_ID(reg) GET_BITFIELD(reg, 0, 2)
167
168/* Device 14 function 0 */
169
170static const u32 tad_dram_rule[] = {
171 0x40, 0x44, 0x48, 0x4c,
172 0x50, 0x54, 0x58, 0x5c,
173 0x60, 0x64, 0x68, 0x6c,
174};
175#define MAX_TAD ARRAY_SIZE(tad_dram_rule)
176
177#define TAD_LIMIT(reg) ((GET_BITFIELD(reg, 12, 31) << 26) | 0x3ffffff)
178#define TAD_SOCK(reg) GET_BITFIELD(reg, 10, 11)
179#define TAD_CH(reg) GET_BITFIELD(reg, 8, 9)
180#define TAD_TGT3(reg) GET_BITFIELD(reg, 6, 7)
181#define TAD_TGT2(reg) GET_BITFIELD(reg, 4, 5)
182#define TAD_TGT1(reg) GET_BITFIELD(reg, 2, 3)
183#define TAD_TGT0(reg) GET_BITFIELD(reg, 0, 1)
184
185/* Device 15, function 0 */
186
187#define MCMTR 0x7c
188
189#define IS_ECC_ENABLED(mcmtr) GET_BITFIELD(mcmtr, 2, 2)
190#define IS_LOCKSTEP_ENABLED(mcmtr) GET_BITFIELD(mcmtr, 1, 1)
191#define IS_CLOSE_PG(mcmtr) GET_BITFIELD(mcmtr, 0, 0)
192
193/* Device 15, function 1 */
194
195#define RASENABLES 0xac
196#define IS_MIRROR_ENABLED(reg) GET_BITFIELD(reg, 0, 0)
197
198/* Device 15, functions 2-5 */
199
200static const int mtr_regs[] = {
201 0x80, 0x84, 0x88,
202};
203
204#define RANK_DISABLE(mtr) GET_BITFIELD(mtr, 16, 19)
205#define IS_DIMM_PRESENT(mtr) GET_BITFIELD(mtr, 14, 14)
206#define RANK_CNT_BITS(mtr) GET_BITFIELD(mtr, 12, 13)
207#define RANK_WIDTH_BITS(mtr) GET_BITFIELD(mtr, 2, 4)
208#define COL_WIDTH_BITS(mtr) GET_BITFIELD(mtr, 0, 1)
209
210static const u32 tad_ch_nilv_offset[] = {
211 0x90, 0x94, 0x98, 0x9c,
212 0xa0, 0xa4, 0xa8, 0xac,
213 0xb0, 0xb4, 0xb8, 0xbc,
214};
215#define CHN_IDX_OFFSET(reg) GET_BITFIELD(reg, 28, 29)
216#define TAD_OFFSET(reg) (GET_BITFIELD(reg, 6, 25) << 26)
217
218static const u32 rir_way_limit[] = {
219 0x108, 0x10c, 0x110, 0x114, 0x118,
220};
221#define MAX_RIR_RANGES ARRAY_SIZE(rir_way_limit)
222
223#define IS_RIR_VALID(reg) GET_BITFIELD(reg, 31, 31)
224#define RIR_WAY(reg) GET_BITFIELD(reg, 28, 29)
225#define RIR_LIMIT(reg) ((GET_BITFIELD(reg, 1, 10) << 29)| 0x1fffffff)
226
227#define MAX_RIR_WAY 8
228
229static const u32 rir_offset[MAX_RIR_RANGES][MAX_RIR_WAY] = {
230 { 0x120, 0x124, 0x128, 0x12c, 0x130, 0x134, 0x138, 0x13c },
231 { 0x140, 0x144, 0x148, 0x14c, 0x150, 0x154, 0x158, 0x15c },
232 { 0x160, 0x164, 0x168, 0x16c, 0x170, 0x174, 0x178, 0x17c },
233 { 0x180, 0x184, 0x188, 0x18c, 0x190, 0x194, 0x198, 0x19c },
234 { 0x1a0, 0x1a4, 0x1a8, 0x1ac, 0x1b0, 0x1b4, 0x1b8, 0x1bc },
235};
236
237#define RIR_RNK_TGT(reg) GET_BITFIELD(reg, 16, 19)
238#define RIR_OFFSET(reg) GET_BITFIELD(reg, 2, 14)
239
240/* Device 16, functions 2-7 */
241
242/*
243 * FIXME: Implement the error count reads directly
244 */
245
246static const u32 correrrcnt[] = {
247 0x104, 0x108, 0x10c, 0x110,
248};
249
250#define RANK_ODD_OV(reg) GET_BITFIELD(reg, 31, 31)
251#define RANK_ODD_ERR_CNT(reg) GET_BITFIELD(reg, 16, 30)
252#define RANK_EVEN_OV(reg) GET_BITFIELD(reg, 15, 15)
253#define RANK_EVEN_ERR_CNT(reg) GET_BITFIELD(reg, 0, 14)
254
255static const u32 correrrthrsld[] = {
256 0x11c, 0x120, 0x124, 0x128,
257};
258
259#define RANK_ODD_ERR_THRSLD(reg) GET_BITFIELD(reg, 16, 30)
260#define RANK_EVEN_ERR_THRSLD(reg) GET_BITFIELD(reg, 0, 14)
261
262
263/* Device 17, function 0 */
264
265#define RANK_CFG_A 0x0328
266
267#define IS_RDIMM_ENABLED(reg) GET_BITFIELD(reg, 11, 11)
268
269/*
270 * sbridge structs
271 */
272
273#define NUM_CHANNELS 4
274#define MAX_DIMMS 3 /* Max DIMMS per channel */
275
276struct sbridge_info {
277 u32 mcmtr;
278};
279
280struct sbridge_channel {
281 u32 ranks;
282 u32 dimms;
283};
284
285struct pci_id_descr {
286 int dev;
287 int func;
288 int dev_id;
289 int optional;
290};
291
292struct pci_id_table {
293 const struct pci_id_descr *descr;
294 int n_devs;
295};
296
297struct sbridge_dev {
298 struct list_head list;
299 u8 bus, mc;
300 u8 node_id, source_id;
301 struct pci_dev **pdev;
302 int n_devs;
303 struct mem_ctl_info *mci;
304};
305
306struct sbridge_pvt {
307 struct pci_dev *pci_ta, *pci_ddrio, *pci_ras;
308 struct pci_dev *pci_sad0, *pci_sad1, *pci_ha0;
309 struct pci_dev *pci_br;
310 struct pci_dev *pci_tad[NUM_CHANNELS];
311
312 struct sbridge_dev *sbridge_dev;
313
314 struct sbridge_info info;
315 struct sbridge_channel channel[NUM_CHANNELS];
316
317 int csrow_map[NUM_CHANNELS][MAX_DIMMS];
318
319 /* Memory type detection */
320 bool is_mirrored, is_lockstep, is_close_pg;
321
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200322 /* Fifo double buffers */
323 struct mce mce_entry[MCE_LOG_LEN];
324 struct mce mce_outentry[MCE_LOG_LEN];
325
326 /* Fifo in/out counters */
327 unsigned mce_in, mce_out;
328
329 /* Count indicator to show errors not got */
330 unsigned mce_overrun;
331
332 /* Memory description */
333 u64 tolm, tohm;
334};
335
336#define PCI_DESCR(device, function, device_id) \
337 .dev = (device), \
338 .func = (function), \
339 .dev_id = (device_id)
340
341static const struct pci_id_descr pci_dev_descr_sbridge[] = {
342 /* Processor Home Agent */
343 { PCI_DESCR(14, 0, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0) },
344
345 /* Memory controller */
346 { PCI_DESCR(15, 0, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA) },
347 { PCI_DESCR(15, 1, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS) },
348 { PCI_DESCR(15, 2, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0) },
349 { PCI_DESCR(15, 3, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1) },
350 { PCI_DESCR(15, 4, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2) },
351 { PCI_DESCR(15, 5, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3) },
352 { PCI_DESCR(17, 0, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO) },
353
354 /* System Address Decoder */
355 { PCI_DESCR(12, 6, PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0) },
356 { PCI_DESCR(12, 7, PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1) },
357
358 /* Broadcast Registers */
359 { PCI_DESCR(13, 6, PCI_DEVICE_ID_INTEL_SBRIDGE_BR) },
360};
361
362#define PCI_ID_TABLE_ENTRY(A) { .descr=A, .n_devs = ARRAY_SIZE(A) }
363static const struct pci_id_table pci_dev_descr_sbridge_table[] = {
364 PCI_ID_TABLE_ENTRY(pci_dev_descr_sbridge),
365 {0,} /* 0 terminated list. */
366};
367
368/*
369 * pci_device_id table for which devices we are looking for
370 */
371static const struct pci_device_id sbridge_pci_tbl[] __devinitdata = {
372 {PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA)},
373 {0,} /* 0 terminated list. */
374};
375
376
377/****************************************************************************
378 Anciliary status routines
379 ****************************************************************************/
380
381static inline int numrank(u32 mtr)
382{
383 int ranks = (1 << RANK_CNT_BITS(mtr));
384
385 if (ranks > 4) {
386 debugf0("Invalid number of ranks: %d (max = 4) raw value = %x (%04x)",
387 ranks, (unsigned int)RANK_CNT_BITS(mtr), mtr);
388 return -EINVAL;
389 }
390
391 return ranks;
392}
393
394static inline int numrow(u32 mtr)
395{
396 int rows = (RANK_WIDTH_BITS(mtr) + 12);
397
398 if (rows < 13 || rows > 18) {
399 debugf0("Invalid number of rows: %d (should be between 14 and 17) raw value = %x (%04x)",
400 rows, (unsigned int)RANK_WIDTH_BITS(mtr), mtr);
401 return -EINVAL;
402 }
403
404 return 1 << rows;
405}
406
407static inline int numcol(u32 mtr)
408{
409 int cols = (COL_WIDTH_BITS(mtr) + 10);
410
411 if (cols > 12) {
412 debugf0("Invalid number of cols: %d (max = 4) raw value = %x (%04x)",
413 cols, (unsigned int)COL_WIDTH_BITS(mtr), mtr);
414 return -EINVAL;
415 }
416
417 return 1 << cols;
418}
419
420static struct sbridge_dev *get_sbridge_dev(u8 bus)
421{
422 struct sbridge_dev *sbridge_dev;
423
424 list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) {
425 if (sbridge_dev->bus == bus)
426 return sbridge_dev;
427 }
428
429 return NULL;
430}
431
432static struct sbridge_dev *alloc_sbridge_dev(u8 bus,
433 const struct pci_id_table *table)
434{
435 struct sbridge_dev *sbridge_dev;
436
437 sbridge_dev = kzalloc(sizeof(*sbridge_dev), GFP_KERNEL);
438 if (!sbridge_dev)
439 return NULL;
440
441 sbridge_dev->pdev = kzalloc(sizeof(*sbridge_dev->pdev) * table->n_devs,
442 GFP_KERNEL);
443 if (!sbridge_dev->pdev) {
444 kfree(sbridge_dev);
445 return NULL;
446 }
447
448 sbridge_dev->bus = bus;
449 sbridge_dev->n_devs = table->n_devs;
450 list_add_tail(&sbridge_dev->list, &sbridge_edac_list);
451
452 return sbridge_dev;
453}
454
455static void free_sbridge_dev(struct sbridge_dev *sbridge_dev)
456{
457 list_del(&sbridge_dev->list);
458 kfree(sbridge_dev->pdev);
459 kfree(sbridge_dev);
460}
461
462/****************************************************************************
463 Memory check routines
464 ****************************************************************************/
465static struct pci_dev *get_pdev_slot_func(u8 bus, unsigned slot,
466 unsigned func)
467{
468 struct sbridge_dev *sbridge_dev = get_sbridge_dev(bus);
469 int i;
470
471 if (!sbridge_dev)
472 return NULL;
473
474 for (i = 0; i < sbridge_dev->n_devs; i++) {
475 if (!sbridge_dev->pdev[i])
476 continue;
477
478 if (PCI_SLOT(sbridge_dev->pdev[i]->devfn) == slot &&
479 PCI_FUNC(sbridge_dev->pdev[i]->devfn) == func) {
480 debugf1("Associated %02x.%02x.%d with %p\n",
481 bus, slot, func, sbridge_dev->pdev[i]);
482 return sbridge_dev->pdev[i];
483 }
484 }
485
486 return NULL;
487}
488
489/**
490 * sbridge_get_active_channels() - gets the number of channels and csrows
491 * bus: Device bus
492 * @channels: Number of channels that will be returned
493 * @csrows: Number of csrows found
494 *
495 * Since EDAC core needs to know in advance the number of available channels
496 * and csrows, in order to allocate memory for csrows/channels, it is needed
497 * to run two similar steps. At the first step, implemented on this function,
498 * it checks the number of csrows/channels present at one socket, identified
499 * by the associated PCI bus.
500 * this is used in order to properly allocate the size of mci components.
501 * Note: one csrow is one dimm.
502 */
503static int sbridge_get_active_channels(const u8 bus, unsigned *channels,
504 unsigned *csrows)
505{
506 struct pci_dev *pdev = NULL;
507 int i, j;
508 u32 mcmtr;
509
510 *channels = 0;
511 *csrows = 0;
512
513 pdev = get_pdev_slot_func(bus, 15, 0);
514 if (!pdev) {
515 sbridge_printk(KERN_ERR, "Couldn't find PCI device "
516 "%2x.%02d.%d!!!\n",
517 bus, 15, 0);
518 return -ENODEV;
519 }
520
521 pci_read_config_dword(pdev, MCMTR, &mcmtr);
522 if (!IS_ECC_ENABLED(mcmtr)) {
523 sbridge_printk(KERN_ERR, "ECC is disabled. Aborting\n");
524 return -ENODEV;
525 }
526
527 for (i = 0; i < NUM_CHANNELS; i++) {
528 u32 mtr;
529
530 /* Device 15 functions 2 - 5 */
531 pdev = get_pdev_slot_func(bus, 15, 2 + i);
532 if (!pdev) {
533 sbridge_printk(KERN_ERR, "Couldn't find PCI device "
534 "%2x.%02d.%d!!!\n",
535 bus, 15, 2 + i);
536 return -ENODEV;
537 }
538 (*channels)++;
539
540 for (j = 0; j < ARRAY_SIZE(mtr_regs); j++) {
541 pci_read_config_dword(pdev, mtr_regs[j], &mtr);
542 debugf1("Bus#%02x channel #%d MTR%d = %x\n", bus, i, j, mtr);
543 if (IS_DIMM_PRESENT(mtr))
544 (*csrows)++;
545 }
546 }
547
548 debugf0("Number of active channels: %d, number of active dimms: %d\n",
549 *channels, *csrows);
550
551 return 0;
552}
553
554static int get_dimm_config(const struct mem_ctl_info *mci)
555{
556 struct sbridge_pvt *pvt = mci->pvt_info;
557 struct csrow_info *csr;
558 int i, j, banks, ranks, rows, cols, size, npages;
559 int csrow = 0;
560 unsigned long last_page = 0;
561 u32 reg;
562 enum edac_type mode;
Mark A. Grondonac6e13b52011-10-18 11:02:58 -0200563 enum mem_type mtype;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200564
565 pci_read_config_dword(pvt->pci_br, SAD_TARGET, &reg);
566 pvt->sbridge_dev->source_id = SOURCE_ID(reg);
567
568 pci_read_config_dword(pvt->pci_br, SAD_CONTROL, &reg);
569 pvt->sbridge_dev->node_id = NODE_ID(reg);
570 debugf0("mc#%d: Node ID: %d, source ID: %d\n",
571 pvt->sbridge_dev->mc,
572 pvt->sbridge_dev->node_id,
573 pvt->sbridge_dev->source_id);
574
575 pci_read_config_dword(pvt->pci_ras, RASENABLES, &reg);
576 if (IS_MIRROR_ENABLED(reg)) {
577 debugf0("Memory mirror is enabled\n");
578 pvt->is_mirrored = true;
579 } else {
580 debugf0("Memory mirror is disabled\n");
581 pvt->is_mirrored = false;
582 }
583
584 pci_read_config_dword(pvt->pci_ta, MCMTR, &pvt->info.mcmtr);
585 if (IS_LOCKSTEP_ENABLED(pvt->info.mcmtr)) {
586 debugf0("Lockstep is enabled\n");
587 mode = EDAC_S8ECD8ED;
588 pvt->is_lockstep = true;
589 } else {
590 debugf0("Lockstep is disabled\n");
591 mode = EDAC_S4ECD4ED;
592 pvt->is_lockstep = false;
593 }
594 if (IS_CLOSE_PG(pvt->info.mcmtr)) {
595 debugf0("address map is on closed page mode\n");
596 pvt->is_close_pg = true;
597 } else {
598 debugf0("address map is on open page mode\n");
599 pvt->is_close_pg = false;
600 }
601
602 pci_read_config_dword(pvt->pci_ta, RANK_CFG_A, &reg);
603 if (IS_RDIMM_ENABLED(reg)) {
604 /* FIXME: Can also be LRDIMM */
605 debugf0("Memory is registered\n");
Mark A. Grondonac6e13b52011-10-18 11:02:58 -0200606 mtype = MEM_RDDR3;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200607 } else {
608 debugf0("Memory is unregistered\n");
Mark A. Grondonac6e13b52011-10-18 11:02:58 -0200609 mtype = MEM_DDR3;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200610 }
611
612 /* On all supported DDR3 DIMM types, there are 8 banks available */
613 banks = 8;
614
615 for (i = 0; i < NUM_CHANNELS; i++) {
616 u32 mtr;
617
618 for (j = 0; j < ARRAY_SIZE(mtr_regs); j++) {
619 pci_read_config_dword(pvt->pci_tad[i],
620 mtr_regs[j], &mtr);
621 debugf4("Channel #%d MTR%d = %x\n", i, j, mtr);
622 if (IS_DIMM_PRESENT(mtr)) {
623 pvt->channel[i].dimms++;
624
625 ranks = numrank(mtr);
626 rows = numrow(mtr);
627 cols = numcol(mtr);
628
629 /* DDR3 has 8 I/O banks */
630 size = (rows * cols * banks * ranks) >> (20 - 3);
631 npages = MiB_TO_PAGES(size);
632
633 debugf0("mc#%d: channel %d, dimm %d, %d Mb (%d pages) bank: %d, rank: %d, row: %#x, col: %#x\n",
634 pvt->sbridge_dev->mc, i, j,
635 size, npages,
636 banks, ranks, rows, cols);
637 csr = &mci->csrows[csrow];
638
639 csr->first_page = last_page;
640 csr->last_page = last_page + npages - 1;
641 csr->page_mask = 0UL; /* Unused */
642 csr->nr_pages = npages;
643 csr->grain = 32;
644 csr->csrow_idx = csrow;
645 csr->dtype = (banks == 8) ? DEV_X8 : DEV_X4;
646 csr->ce_count = 0;
647 csr->ue_count = 0;
Mark A. Grondonac6e13b52011-10-18 11:02:58 -0200648 csr->mtype = mtype;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200649 csr->edac_mode = mode;
650 csr->nr_channels = 1;
651 csr->channels[0].chan_idx = i;
652 csr->channels[0].ce_count = 0;
653 pvt->csrow_map[i][j] = csrow;
654 snprintf(csr->channels[0].label,
655 sizeof(csr->channels[0].label),
656 "CPU_SrcID#%u_Channel#%u_DIMM#%u",
657 pvt->sbridge_dev->source_id, i, j);
658 last_page += npages;
659 csrow++;
660 }
661 }
662 }
663
664 return 0;
665}
666
667static void get_memory_layout(const struct mem_ctl_info *mci)
668{
669 struct sbridge_pvt *pvt = mci->pvt_info;
670 int i, j, k, n_sads, n_tads, sad_interl;
671 u32 reg;
672 u64 limit, prv = 0;
673 u64 tmp_mb;
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300674 u32 mb, kb;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200675 u32 rir_way;
676
677 /*
678 * Step 1) Get TOLM/TOHM ranges
679 */
680
681 /* Address range is 32:28 */
682 pci_read_config_dword(pvt->pci_sad1, TOLM,
683 &reg);
684 pvt->tolm = GET_TOLM(reg);
685 tmp_mb = (1 + pvt->tolm) >> 20;
686
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300687 mb = div_u64_rem(tmp_mb, 1000, &kb);
688 debugf0("TOLM: %u.%03u GB (0x%016Lx)\n",
689 mb, kb, (u64)pvt->tolm);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200690
691 /* Address range is already 45:25 */
692 pci_read_config_dword(pvt->pci_sad1, TOHM,
693 &reg);
694 pvt->tohm = GET_TOHM(reg);
695 tmp_mb = (1 + pvt->tohm) >> 20;
696
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300697 mb = div_u64_rem(tmp_mb, 1000, &kb);
698 debugf0("TOHM: %u.%03u GB (0x%016Lx)",
699 mb, kb, (u64)pvt->tohm);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200700
701 /*
702 * Step 2) Get SAD range and SAD Interleave list
703 * TAD registers contain the interleave wayness. However, it
704 * seems simpler to just discover it indirectly, with the
705 * algorithm bellow.
706 */
707 prv = 0;
708 for (n_sads = 0; n_sads < MAX_SAD; n_sads++) {
709 /* SAD_LIMIT Address range is 45:26 */
710 pci_read_config_dword(pvt->pci_sad0, dram_rule[n_sads],
711 &reg);
712 limit = SAD_LIMIT(reg);
713
714 if (!DRAM_RULE_ENABLE(reg))
715 continue;
716
717 if (limit <= prv)
718 break;
719
720 tmp_mb = (limit + 1) >> 20;
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300721 mb = div_u64_rem(tmp_mb, 1000, &kb);
722 debugf0("SAD#%d %s up to %u.%03u GB (0x%016Lx) %s reg=0x%08x\n",
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200723 n_sads,
724 get_dram_attr(reg),
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300725 mb, kb,
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200726 ((u64)tmp_mb) << 20L,
727 INTERLEAVE_MODE(reg) ? "Interleave: 8:6" : "Interleave: [8:6]XOR[18:16]",
728 reg);
729 prv = limit;
730
731 pci_read_config_dword(pvt->pci_sad0, interleave_list[n_sads],
732 &reg);
733 sad_interl = sad_pkg(reg, 0);
734 for (j = 0; j < 8; j++) {
735 if (j > 0 && sad_interl == sad_pkg(reg, j))
736 break;
737
738 debugf0("SAD#%d, interleave #%d: %d\n",
739 n_sads, j, sad_pkg(reg, j));
740 }
741 }
742
743 /*
744 * Step 3) Get TAD range
745 */
746 prv = 0;
747 for (n_tads = 0; n_tads < MAX_TAD; n_tads++) {
748 pci_read_config_dword(pvt->pci_ha0, tad_dram_rule[n_tads],
749 &reg);
750 limit = TAD_LIMIT(reg);
751 if (limit <= prv)
752 break;
753 tmp_mb = (limit + 1) >> 20;
754
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300755 mb = div_u64_rem(tmp_mb, 1000, &kb);
756 debugf0("TAD#%d: up to %u.%03u GB (0x%016Lx), socket interleave %d, memory interleave %d, TGT: %d, %d, %d, %d, reg=0x%08x\n",
757 n_tads, mb, kb,
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200758 ((u64)tmp_mb) << 20L,
759 (u32)TAD_SOCK(reg),
760 (u32)TAD_CH(reg),
761 (u32)TAD_TGT0(reg),
762 (u32)TAD_TGT1(reg),
763 (u32)TAD_TGT2(reg),
764 (u32)TAD_TGT3(reg),
765 reg);
Hui Wang7fae0db2012-02-06 04:11:01 -0300766 prv = limit;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200767 }
768
769 /*
770 * Step 4) Get TAD offsets, per each channel
771 */
772 for (i = 0; i < NUM_CHANNELS; i++) {
773 if (!pvt->channel[i].dimms)
774 continue;
775 for (j = 0; j < n_tads; j++) {
776 pci_read_config_dword(pvt->pci_tad[i],
777 tad_ch_nilv_offset[j],
778 &reg);
779 tmp_mb = TAD_OFFSET(reg) >> 20;
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300780 mb = div_u64_rem(tmp_mb, 1000, &kb);
781 debugf0("TAD CH#%d, offset #%d: %u.%03u GB (0x%016Lx), reg=0x%08x\n",
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200782 i, j,
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300783 mb, kb,
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200784 ((u64)tmp_mb) << 20L,
785 reg);
786 }
787 }
788
789 /*
790 * Step 6) Get RIR Wayness/Limit, per each channel
791 */
792 for (i = 0; i < NUM_CHANNELS; i++) {
793 if (!pvt->channel[i].dimms)
794 continue;
795 for (j = 0; j < MAX_RIR_RANGES; j++) {
796 pci_read_config_dword(pvt->pci_tad[i],
797 rir_way_limit[j],
798 &reg);
799
800 if (!IS_RIR_VALID(reg))
801 continue;
802
803 tmp_mb = RIR_LIMIT(reg) >> 20;
804 rir_way = 1 << RIR_WAY(reg);
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300805 mb = div_u64_rem(tmp_mb, 1000, &kb);
806 debugf0("CH#%d RIR#%d, limit: %u.%03u GB (0x%016Lx), way: %d, reg=0x%08x\n",
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200807 i, j,
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300808 mb, kb,
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200809 ((u64)tmp_mb) << 20L,
810 rir_way,
811 reg);
812
813 for (k = 0; k < rir_way; k++) {
814 pci_read_config_dword(pvt->pci_tad[i],
815 rir_offset[j][k],
816 &reg);
817 tmp_mb = RIR_OFFSET(reg) << 6;
818
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300819 mb = div_u64_rem(tmp_mb, 1000, &kb);
820 debugf0("CH#%d RIR#%d INTL#%d, offset %u.%03u GB (0x%016Lx), tgt: %d, reg=0x%08x\n",
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200821 i, j, k,
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300822 mb, kb,
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200823 ((u64)tmp_mb) << 20L,
824 (u32)RIR_RNK_TGT(reg),
825 reg);
826 }
827 }
828 }
829}
830
831struct mem_ctl_info *get_mci_for_node_id(u8 node_id)
832{
833 struct sbridge_dev *sbridge_dev;
834
835 list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) {
836 if (sbridge_dev->node_id == node_id)
837 return sbridge_dev->mci;
838 }
839 return NULL;
840}
841
842static int get_memory_error_data(struct mem_ctl_info *mci,
843 u64 addr,
844 u8 *socket,
845 long *channel_mask,
846 u8 *rank,
847 char *area_type)
848{
849 struct mem_ctl_info *new_mci;
850 struct sbridge_pvt *pvt = mci->pvt_info;
851 char msg[256];
852 int n_rir, n_sads, n_tads, sad_way, sck_xch;
853 int sad_interl, idx, base_ch;
854 int interleave_mode;
855 unsigned sad_interleave[MAX_INTERLEAVE];
856 u32 reg;
857 u8 ch_way,sck_way;
858 u32 tad_offset;
859 u32 rir_way;
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300860 u32 mb, kb;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200861 u64 ch_addr, offset, limit, prv = 0;
862
863
864 /*
865 * Step 0) Check if the address is at special memory ranges
866 * The check bellow is probably enough to fill all cases where
867 * the error is not inside a memory, except for the legacy
868 * range (e. g. VGA addresses). It is unlikely, however, that the
869 * memory controller would generate an error on that range.
870 */
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -0300871 if ((addr > (u64) pvt->tolm) && (addr < (1LL << 32))) {
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200872 sprintf(msg, "Error at TOLM area, on addr 0x%08Lx", addr);
873 edac_mc_handle_ce_no_info(mci, msg);
874 return -EINVAL;
875 }
876 if (addr >= (u64)pvt->tohm) {
877 sprintf(msg, "Error at MMIOH area, on addr 0x%016Lx", addr);
878 edac_mc_handle_ce_no_info(mci, msg);
879 return -EINVAL;
880 }
881
882 /*
883 * Step 1) Get socket
884 */
885 for (n_sads = 0; n_sads < MAX_SAD; n_sads++) {
886 pci_read_config_dword(pvt->pci_sad0, dram_rule[n_sads],
887 &reg);
888
889 if (!DRAM_RULE_ENABLE(reg))
890 continue;
891
892 limit = SAD_LIMIT(reg);
893 if (limit <= prv) {
894 sprintf(msg, "Can't discover the memory socket");
895 edac_mc_handle_ce_no_info(mci, msg);
896 return -EINVAL;
897 }
898 if (addr <= limit)
899 break;
900 prv = limit;
901 }
902 if (n_sads == MAX_SAD) {
903 sprintf(msg, "Can't discover the memory socket");
904 edac_mc_handle_ce_no_info(mci, msg);
905 return -EINVAL;
906 }
907 area_type = get_dram_attr(reg);
908 interleave_mode = INTERLEAVE_MODE(reg);
909
910 pci_read_config_dword(pvt->pci_sad0, interleave_list[n_sads],
911 &reg);
912 sad_interl = sad_pkg(reg, 0);
913 for (sad_way = 0; sad_way < 8; sad_way++) {
914 if (sad_way > 0 && sad_interl == sad_pkg(reg, sad_way))
915 break;
916 sad_interleave[sad_way] = sad_pkg(reg, sad_way);
917 debugf0("SAD interleave #%d: %d\n",
918 sad_way, sad_interleave[sad_way]);
919 }
920 debugf0("mc#%d: Error detected on SAD#%d: address 0x%016Lx < 0x%016Lx, Interleave [%d:6]%s\n",
921 pvt->sbridge_dev->mc,
922 n_sads,
923 addr,
924 limit,
925 sad_way + 7,
Hui Wangad9c40b2012-02-06 04:11:00 -0300926 interleave_mode ? "" : "XOR[18:16]");
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -0200927 if (interleave_mode)
928 idx = ((addr >> 6) ^ (addr >> 16)) & 7;
929 else
930 idx = (addr >> 6) & 7;
931 switch (sad_way) {
932 case 1:
933 idx = 0;
934 break;
935 case 2:
936 idx = idx & 1;
937 break;
938 case 4:
939 idx = idx & 3;
940 break;
941 case 8:
942 break;
943 default:
944 sprintf(msg, "Can't discover socket interleave");
945 edac_mc_handle_ce_no_info(mci, msg);
946 return -EINVAL;
947 }
948 *socket = sad_interleave[idx];
949 debugf0("SAD interleave index: %d (wayness %d) = CPU socket %d\n",
950 idx, sad_way, *socket);
951
952 /*
953 * Move to the proper node structure, in order to access the
954 * right PCI registers
955 */
956 new_mci = get_mci_for_node_id(*socket);
957 if (!new_mci) {
958 sprintf(msg, "Struct for socket #%u wasn't initialized",
959 *socket);
960 edac_mc_handle_ce_no_info(mci, msg);
961 return -EINVAL;
962 }
963 mci = new_mci;
964 pvt = mci->pvt_info;
965
966 /*
967 * Step 2) Get memory channel
968 */
969 prv = 0;
970 for (n_tads = 0; n_tads < MAX_TAD; n_tads++) {
971 pci_read_config_dword(pvt->pci_ha0, tad_dram_rule[n_tads],
972 &reg);
973 limit = TAD_LIMIT(reg);
974 if (limit <= prv) {
975 sprintf(msg, "Can't discover the memory channel");
976 edac_mc_handle_ce_no_info(mci, msg);
977 return -EINVAL;
978 }
979 if (addr <= limit)
980 break;
981 prv = limit;
982 }
983 ch_way = TAD_CH(reg) + 1;
984 sck_way = TAD_SOCK(reg) + 1;
985 /*
986 * FIXME: Is it right to always use channel 0 for offsets?
987 */
988 pci_read_config_dword(pvt->pci_tad[0],
989 tad_ch_nilv_offset[n_tads],
990 &tad_offset);
991
992 if (ch_way == 3)
993 idx = addr >> 6;
994 else
995 idx = addr >> (6 + sck_way);
996 idx = idx % ch_way;
997
998 /*
999 * FIXME: Shouldn't we use CHN_IDX_OFFSET() here, when ch_way == 3 ???
1000 */
1001 switch (idx) {
1002 case 0:
1003 base_ch = TAD_TGT0(reg);
1004 break;
1005 case 1:
1006 base_ch = TAD_TGT1(reg);
1007 break;
1008 case 2:
1009 base_ch = TAD_TGT2(reg);
1010 break;
1011 case 3:
1012 base_ch = TAD_TGT3(reg);
1013 break;
1014 default:
1015 sprintf(msg, "Can't discover the TAD target");
1016 edac_mc_handle_ce_no_info(mci, msg);
1017 return -EINVAL;
1018 }
1019 *channel_mask = 1 << base_ch;
1020
1021 if (pvt->is_mirrored) {
1022 *channel_mask |= 1 << ((base_ch + 2) % 4);
1023 switch(ch_way) {
1024 case 2:
1025 case 4:
1026 sck_xch = 1 << sck_way * (ch_way >> 1);
1027 break;
1028 default:
1029 sprintf(msg, "Invalid mirror set. Can't decode addr");
1030 edac_mc_handle_ce_no_info(mci, msg);
1031 return -EINVAL;
1032 }
1033 } else
1034 sck_xch = (1 << sck_way) * ch_way;
1035
1036 if (pvt->is_lockstep)
1037 *channel_mask |= 1 << ((base_ch + 1) % 4);
1038
1039 offset = TAD_OFFSET(tad_offset);
1040
1041 debugf0("TAD#%d: address 0x%016Lx < 0x%016Lx, socket interleave %d, channel interleave %d (offset 0x%08Lx), index %d, base ch: %d, ch mask: 0x%02lx\n",
1042 n_tads,
1043 addr,
1044 limit,
1045 (u32)TAD_SOCK(reg),
1046 ch_way,
1047 offset,
1048 idx,
1049 base_ch,
1050 *channel_mask);
1051
1052 /* Calculate channel address */
1053 /* Remove the TAD offset */
1054
1055 if (offset > addr) {
1056 sprintf(msg, "Can't calculate ch addr: TAD offset 0x%08Lx is too high for addr 0x%08Lx!",
1057 offset, addr);
1058 edac_mc_handle_ce_no_info(mci, msg);
1059 return -EINVAL;
1060 }
1061 addr -= offset;
1062 /* Store the low bits [0:6] of the addr */
1063 ch_addr = addr & 0x7f;
1064 /* Remove socket wayness and remove 6 bits */
1065 addr >>= 6;
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -03001066 addr = div_u64(addr, sck_xch);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001067#if 0
1068 /* Divide by channel way */
1069 addr = addr / ch_way;
1070#endif
1071 /* Recover the last 6 bits */
1072 ch_addr |= addr << 6;
1073
1074 /*
1075 * Step 3) Decode rank
1076 */
1077 for (n_rir = 0; n_rir < MAX_RIR_RANGES; n_rir++) {
1078 pci_read_config_dword(pvt->pci_tad[base_ch],
1079 rir_way_limit[n_rir],
1080 &reg);
1081
1082 if (!IS_RIR_VALID(reg))
1083 continue;
1084
1085 limit = RIR_LIMIT(reg);
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -03001086 mb = div_u64_rem(limit >> 20, 1000, &kb);
1087 debugf0("RIR#%d, limit: %u.%03u GB (0x%016Lx), way: %d\n",
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001088 n_rir,
Mauro Carvalho Chehab5b889e32011-11-07 18:26:53 -03001089 mb, kb,
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001090 limit,
1091 1 << RIR_WAY(reg));
1092 if (ch_addr <= limit)
1093 break;
1094 }
1095 if (n_rir == MAX_RIR_RANGES) {
1096 sprintf(msg, "Can't discover the memory rank for ch addr 0x%08Lx",
1097 ch_addr);
1098 edac_mc_handle_ce_no_info(mci, msg);
1099 return -EINVAL;
1100 }
1101 rir_way = RIR_WAY(reg);
1102 if (pvt->is_close_pg)
1103 idx = (ch_addr >> 6);
1104 else
1105 idx = (ch_addr >> 13); /* FIXME: Datasheet says to shift by 15 */
1106 idx %= 1 << rir_way;
1107
1108 pci_read_config_dword(pvt->pci_tad[base_ch],
1109 rir_offset[n_rir][idx],
1110 &reg);
1111 *rank = RIR_RNK_TGT(reg);
1112
1113 debugf0("RIR#%d: channel address 0x%08Lx < 0x%08Lx, RIR interleave %d, index %d\n",
1114 n_rir,
1115 ch_addr,
1116 limit,
1117 rir_way,
1118 idx);
1119
1120 return 0;
1121}
1122
1123/****************************************************************************
1124 Device initialization routines: put/get, init/exit
1125 ****************************************************************************/
1126
1127/*
1128 * sbridge_put_all_devices 'put' all the devices that we have
1129 * reserved via 'get'
1130 */
1131static void sbridge_put_devices(struct sbridge_dev *sbridge_dev)
1132{
1133 int i;
1134
1135 debugf0(__FILE__ ": %s()\n", __func__);
1136 for (i = 0; i < sbridge_dev->n_devs; i++) {
1137 struct pci_dev *pdev = sbridge_dev->pdev[i];
1138 if (!pdev)
1139 continue;
1140 debugf0("Removing dev %02x:%02x.%d\n",
1141 pdev->bus->number,
1142 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
1143 pci_dev_put(pdev);
1144 }
1145}
1146
1147static void sbridge_put_all_devices(void)
1148{
1149 struct sbridge_dev *sbridge_dev, *tmp;
1150
1151 list_for_each_entry_safe(sbridge_dev, tmp, &sbridge_edac_list, list) {
1152 sbridge_put_devices(sbridge_dev);
1153 free_sbridge_dev(sbridge_dev);
1154 }
1155}
1156
1157/*
1158 * sbridge_get_all_devices Find and perform 'get' operation on the MCH's
1159 * device/functions we want to reference for this driver
1160 *
1161 * Need to 'get' device 16 func 1 and func 2
1162 */
1163static int sbridge_get_onedevice(struct pci_dev **prev,
1164 u8 *num_mc,
1165 const struct pci_id_table *table,
1166 const unsigned devno)
1167{
1168 struct sbridge_dev *sbridge_dev;
1169 const struct pci_id_descr *dev_descr = &table->descr[devno];
1170
1171 struct pci_dev *pdev = NULL;
1172 u8 bus = 0;
1173
1174 sbridge_printk(KERN_INFO,
1175 "Seeking for: dev %02x.%d PCI ID %04x:%04x\n",
1176 dev_descr->dev, dev_descr->func,
1177 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1178
1179 pdev = pci_get_device(PCI_VENDOR_ID_INTEL,
1180 dev_descr->dev_id, *prev);
1181
1182 if (!pdev) {
1183 if (*prev) {
1184 *prev = pdev;
1185 return 0;
1186 }
1187
1188 if (dev_descr->optional)
1189 return 0;
1190
1191 if (devno == 0)
1192 return -ENODEV;
1193
1194 sbridge_printk(KERN_INFO,
1195 "Device not found: dev %02x.%d PCI ID %04x:%04x\n",
1196 dev_descr->dev, dev_descr->func,
1197 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1198
1199 /* End of list, leave */
1200 return -ENODEV;
1201 }
1202 bus = pdev->bus->number;
1203
1204 sbridge_dev = get_sbridge_dev(bus);
1205 if (!sbridge_dev) {
1206 sbridge_dev = alloc_sbridge_dev(bus, table);
1207 if (!sbridge_dev) {
1208 pci_dev_put(pdev);
1209 return -ENOMEM;
1210 }
1211 (*num_mc)++;
1212 }
1213
1214 if (sbridge_dev->pdev[devno]) {
1215 sbridge_printk(KERN_ERR,
1216 "Duplicated device for "
1217 "dev %02x:%d.%d PCI ID %04x:%04x\n",
1218 bus, dev_descr->dev, dev_descr->func,
1219 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1220 pci_dev_put(pdev);
1221 return -ENODEV;
1222 }
1223
1224 sbridge_dev->pdev[devno] = pdev;
1225
1226 /* Sanity check */
1227 if (unlikely(PCI_SLOT(pdev->devfn) != dev_descr->dev ||
1228 PCI_FUNC(pdev->devfn) != dev_descr->func)) {
1229 sbridge_printk(KERN_ERR,
1230 "Device PCI ID %04x:%04x "
1231 "has dev %02x:%d.%d instead of dev %02x:%02x.%d\n",
1232 PCI_VENDOR_ID_INTEL, dev_descr->dev_id,
1233 bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1234 bus, dev_descr->dev, dev_descr->func);
1235 return -ENODEV;
1236 }
1237
1238 /* Be sure that the device is enabled */
1239 if (unlikely(pci_enable_device(pdev) < 0)) {
1240 sbridge_printk(KERN_ERR,
1241 "Couldn't enable "
1242 "dev %02x:%d.%d PCI ID %04x:%04x\n",
1243 bus, dev_descr->dev, dev_descr->func,
1244 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1245 return -ENODEV;
1246 }
1247
1248 debugf0("Detected dev %02x:%d.%d PCI ID %04x:%04x\n",
1249 bus, dev_descr->dev,
1250 dev_descr->func,
1251 PCI_VENDOR_ID_INTEL, dev_descr->dev_id);
1252
1253 /*
1254 * As stated on drivers/pci/search.c, the reference count for
1255 * @from is always decremented if it is not %NULL. So, as we need
1256 * to get all devices up to null, we need to do a get for the device
1257 */
1258 pci_dev_get(pdev);
1259
1260 *prev = pdev;
1261
1262 return 0;
1263}
1264
1265static int sbridge_get_all_devices(u8 *num_mc)
1266{
1267 int i, rc;
1268 struct pci_dev *pdev = NULL;
1269 const struct pci_id_table *table = pci_dev_descr_sbridge_table;
1270
1271 while (table && table->descr) {
1272 for (i = 0; i < table->n_devs; i++) {
1273 pdev = NULL;
1274 do {
1275 rc = sbridge_get_onedevice(&pdev, num_mc,
1276 table, i);
1277 if (rc < 0) {
1278 if (i == 0) {
1279 i = table->n_devs;
1280 break;
1281 }
1282 sbridge_put_all_devices();
1283 return -ENODEV;
1284 }
1285 } while (pdev);
1286 }
1287 table++;
1288 }
1289
1290 return 0;
1291}
1292
1293static int mci_bind_devs(struct mem_ctl_info *mci,
1294 struct sbridge_dev *sbridge_dev)
1295{
1296 struct sbridge_pvt *pvt = mci->pvt_info;
1297 struct pci_dev *pdev;
1298 int i, func, slot;
1299
1300 for (i = 0; i < sbridge_dev->n_devs; i++) {
1301 pdev = sbridge_dev->pdev[i];
1302 if (!pdev)
1303 continue;
1304 slot = PCI_SLOT(pdev->devfn);
1305 func = PCI_FUNC(pdev->devfn);
1306 switch (slot) {
1307 case 12:
1308 switch (func) {
1309 case 6:
1310 pvt->pci_sad0 = pdev;
1311 break;
1312 case 7:
1313 pvt->pci_sad1 = pdev;
1314 break;
1315 default:
1316 goto error;
1317 }
1318 break;
1319 case 13:
1320 switch (func) {
1321 case 6:
1322 pvt->pci_br = pdev;
1323 break;
1324 default:
1325 goto error;
1326 }
1327 break;
1328 case 14:
1329 switch (func) {
1330 case 0:
1331 pvt->pci_ha0 = pdev;
1332 break;
1333 default:
1334 goto error;
1335 }
1336 break;
1337 case 15:
1338 switch (func) {
1339 case 0:
1340 pvt->pci_ta = pdev;
1341 break;
1342 case 1:
1343 pvt->pci_ras = pdev;
1344 break;
1345 case 2:
1346 case 3:
1347 case 4:
1348 case 5:
1349 pvt->pci_tad[func - 2] = pdev;
1350 break;
1351 default:
1352 goto error;
1353 }
1354 break;
1355 case 17:
1356 switch (func) {
1357 case 0:
1358 pvt->pci_ddrio = pdev;
1359 break;
1360 default:
1361 goto error;
1362 }
1363 break;
1364 default:
1365 goto error;
1366 }
1367
1368 debugf0("Associated PCI %02x.%02d.%d with dev = %p\n",
1369 sbridge_dev->bus,
1370 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
1371 pdev);
1372 }
1373
1374 /* Check if everything were registered */
1375 if (!pvt->pci_sad0 || !pvt->pci_sad1 || !pvt->pci_ha0 ||
1376 !pvt-> pci_tad || !pvt->pci_ras || !pvt->pci_ta ||
1377 !pvt->pci_ddrio)
1378 goto enodev;
1379
1380 for (i = 0; i < NUM_CHANNELS; i++) {
1381 if (!pvt->pci_tad[i])
1382 goto enodev;
1383 }
1384 return 0;
1385
1386enodev:
1387 sbridge_printk(KERN_ERR, "Some needed devices are missing\n");
1388 return -ENODEV;
1389
1390error:
1391 sbridge_printk(KERN_ERR, "Device %d, function %d "
1392 "is out of the expected range\n",
1393 slot, func);
1394 return -EINVAL;
1395}
1396
1397/****************************************************************************
1398 Error check routines
1399 ****************************************************************************/
1400
1401/*
1402 * While Sandy Bridge has error count registers, SMI BIOS read values from
1403 * and resets the counters. So, they are not reliable for the OS to read
1404 * from them. So, we have no option but to just trust on whatever MCE is
1405 * telling us about the errors.
1406 */
1407static void sbridge_mce_output_error(struct mem_ctl_info *mci,
1408 const struct mce *m)
1409{
1410 struct mem_ctl_info *new_mci;
1411 struct sbridge_pvt *pvt = mci->pvt_info;
1412 char *type, *optype, *msg, *recoverable_msg;
1413 bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
1414 bool overflow = GET_BITFIELD(m->status, 62, 62);
1415 bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
1416 bool recoverable = GET_BITFIELD(m->status, 56, 56);
1417 u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52);
1418 u32 mscod = GET_BITFIELD(m->status, 16, 31);
1419 u32 errcode = GET_BITFIELD(m->status, 0, 15);
1420 u32 channel = GET_BITFIELD(m->status, 0, 3);
1421 u32 optypenum = GET_BITFIELD(m->status, 4, 6);
1422 long channel_mask, first_channel;
1423 u8 rank, socket;
1424 int csrow, rc, dimm;
1425 char *area_type = "Unknown";
1426
1427 if (ripv)
1428 type = "NON_FATAL";
1429 else
1430 type = "FATAL";
1431
1432 /*
1433 * According with Table 15-9 of the Intel Archictecture spec vol 3A,
1434 * memory errors should fit in this mask:
1435 * 000f 0000 1mmm cccc (binary)
1436 * where:
1437 * f = Correction Report Filtering Bit. If 1, subsequent errors
1438 * won't be shown
1439 * mmm = error type
1440 * cccc = channel
1441 * If the mask doesn't match, report an error to the parsing logic
1442 */
1443 if (! ((errcode & 0xef80) == 0x80)) {
1444 optype = "Can't parse: it is not a mem";
1445 } else {
1446 switch (optypenum) {
1447 case 0:
1448 optype = "generic undef request";
1449 break;
1450 case 1:
1451 optype = "memory read";
1452 break;
1453 case 2:
1454 optype = "memory write";
1455 break;
1456 case 3:
1457 optype = "addr/cmd";
1458 break;
1459 case 4:
1460 optype = "memory scrubbing";
1461 break;
1462 default:
1463 optype = "reserved";
1464 break;
1465 }
1466 }
1467
1468 rc = get_memory_error_data(mci, m->addr, &socket,
1469 &channel_mask, &rank, area_type);
1470 if (rc < 0)
1471 return;
1472 new_mci = get_mci_for_node_id(socket);
1473 if (!new_mci) {
1474 edac_mc_handle_ce_no_info(mci, "Error: socket got corrupted!");
1475 return;
1476 }
1477 mci = new_mci;
1478 pvt = mci->pvt_info;
1479
1480 first_channel = find_first_bit(&channel_mask, NUM_CHANNELS);
1481
1482 if (rank < 4)
1483 dimm = 0;
1484 else if (rank < 8)
1485 dimm = 1;
1486 else
1487 dimm = 2;
1488
1489 csrow = pvt->csrow_map[first_channel][dimm];
1490
1491 if (uncorrected_error && recoverable)
1492 recoverable_msg = " recoverable";
1493 else
1494 recoverable_msg = "";
1495
1496 /*
1497 * FIXME: What should we do with "channel" information on mcelog?
1498 * Probably, we can just discard it, as the channel information
1499 * comes from the get_memory_error_data() address decoding
1500 */
1501 msg = kasprintf(GFP_ATOMIC,
1502 "%d %s error(s): %s on %s area %s%s: cpu=%d Err=%04x:%04x (ch=%d), "
1503 "addr = 0x%08llx => socket=%d, Channel=%ld(mask=%ld), rank=%d\n",
1504 core_err_cnt,
1505 area_type,
1506 optype,
1507 type,
1508 recoverable_msg,
1509 overflow ? "OVERFLOW" : "",
1510 m->cpu,
1511 mscod, errcode,
1512 channel, /* 1111b means not specified */
1513 (long long) m->addr,
1514 socket,
1515 first_channel, /* This is the real channel on SB */
1516 channel_mask,
1517 rank);
1518
1519 debugf0("%s", msg);
1520
1521 /* Call the helper to output message */
1522 if (uncorrected_error)
1523 edac_mc_handle_fbd_ue(mci, csrow, 0, 0, msg);
1524 else
1525 edac_mc_handle_fbd_ce(mci, csrow, 0, msg);
1526
1527 kfree(msg);
1528}
1529
1530/*
1531 * sbridge_check_error Retrieve and process errors reported by the
1532 * hardware. Called by the Core module.
1533 */
1534static void sbridge_check_error(struct mem_ctl_info *mci)
1535{
1536 struct sbridge_pvt *pvt = mci->pvt_info;
1537 int i;
1538 unsigned count = 0;
1539 struct mce *m;
1540
1541 /*
1542 * MCE first step: Copy all mce errors into a temporary buffer
1543 * We use a double buffering here, to reduce the risk of
1544 * loosing an error.
1545 */
1546 smp_rmb();
1547 count = (pvt->mce_out + MCE_LOG_LEN - pvt->mce_in)
1548 % MCE_LOG_LEN;
1549 if (!count)
1550 return;
1551
1552 m = pvt->mce_outentry;
1553 if (pvt->mce_in + count > MCE_LOG_LEN) {
1554 unsigned l = MCE_LOG_LEN - pvt->mce_in;
1555
1556 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * l);
1557 smp_wmb();
1558 pvt->mce_in = 0;
1559 count -= l;
1560 m += l;
1561 }
1562 memcpy(m, &pvt->mce_entry[pvt->mce_in], sizeof(*m) * count);
1563 smp_wmb();
1564 pvt->mce_in += count;
1565
1566 smp_rmb();
1567 if (pvt->mce_overrun) {
1568 sbridge_printk(KERN_ERR, "Lost %d memory errors\n",
1569 pvt->mce_overrun);
1570 smp_wmb();
1571 pvt->mce_overrun = 0;
1572 }
1573
1574 /*
1575 * MCE second step: parse errors and display
1576 */
1577 for (i = 0; i < count; i++)
1578 sbridge_mce_output_error(mci, &pvt->mce_outentry[i]);
1579}
1580
1581/*
1582 * sbridge_mce_check_error Replicates mcelog routine to get errors
1583 * This routine simply queues mcelog errors, and
1584 * return. The error itself should be handled later
1585 * by sbridge_check_error.
1586 * WARNING: As this routine should be called at NMI time, extra care should
1587 * be taken to avoid deadlocks, and to be as fast as possible.
1588 */
Mauro Carvalho Chehab3d78c9a2011-10-20 19:33:46 -02001589static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val,
1590 void *data)
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001591{
Mauro Carvalho Chehab3d78c9a2011-10-20 19:33:46 -02001592 struct mce *mce = (struct mce *)data;
1593 struct mem_ctl_info *mci;
1594 struct sbridge_pvt *pvt;
1595
1596 mci = get_mci_for_node_id(mce->socketid);
1597 if (!mci)
1598 return NOTIFY_BAD;
1599 pvt = mci->pvt_info;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001600
1601 /*
1602 * Just let mcelog handle it if the error is
1603 * outside the memory controller. A memory error
1604 * is indicated by bit 7 = 1 and bits = 8-11,13-15 = 0.
1605 * bit 12 has an special meaning.
1606 */
1607 if ((mce->status & 0xefff) >> 7 != 1)
Mauro Carvalho Chehab3d78c9a2011-10-20 19:33:46 -02001608 return NOTIFY_DONE;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001609
1610 printk("sbridge: HANDLING MCE MEMORY ERROR\n");
1611
1612 printk("CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n",
1613 mce->extcpu, mce->mcgstatus, mce->bank, mce->status);
1614 printk("TSC %llx ", mce->tsc);
1615 printk("ADDR %llx ", mce->addr);
1616 printk("MISC %llx ", mce->misc);
1617
1618 printk("PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
1619 mce->cpuvendor, mce->cpuid, mce->time,
1620 mce->socketid, mce->apicid);
1621
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001622 /* Only handle if it is the right mc controller */
1623 if (cpu_data(mce->cpu).phys_proc_id != pvt->sbridge_dev->mc)
Mauro Carvalho Chehab3d78c9a2011-10-20 19:33:46 -02001624 return NOTIFY_DONE;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001625
1626 smp_rmb();
1627 if ((pvt->mce_out + 1) % MCE_LOG_LEN == pvt->mce_in) {
1628 smp_wmb();
1629 pvt->mce_overrun++;
Mauro Carvalho Chehab3d78c9a2011-10-20 19:33:46 -02001630 return NOTIFY_DONE;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001631 }
1632
1633 /* Copy memory error at the ringbuffer */
1634 memcpy(&pvt->mce_entry[pvt->mce_out], mce, sizeof(*mce));
1635 smp_wmb();
1636 pvt->mce_out = (pvt->mce_out + 1) % MCE_LOG_LEN;
1637
1638 /* Handle fatal errors immediately */
1639 if (mce->mcgstatus & 1)
1640 sbridge_check_error(mci);
1641
1642 /* Advice mcelog that the error were handled */
Mauro Carvalho Chehab3d78c9a2011-10-20 19:33:46 -02001643 return NOTIFY_STOP;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001644}
1645
Mauro Carvalho Chehab3d78c9a2011-10-20 19:33:46 -02001646static struct notifier_block sbridge_mce_dec = {
1647 .notifier_call = sbridge_mce_check_error,
1648};
1649
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001650/****************************************************************************
1651 EDAC register/unregister logic
1652 ****************************************************************************/
1653
1654static void sbridge_unregister_mci(struct sbridge_dev *sbridge_dev)
1655{
1656 struct mem_ctl_info *mci = sbridge_dev->mci;
1657 struct sbridge_pvt *pvt;
1658
1659 if (unlikely(!mci || !mci->pvt_info)) {
1660 debugf0("MC: " __FILE__ ": %s(): dev = %p\n",
1661 __func__, &sbridge_dev->pdev[0]->dev);
1662
1663 sbridge_printk(KERN_ERR, "Couldn't find mci handler\n");
1664 return;
1665 }
1666
1667 pvt = mci->pvt_info;
1668
1669 debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
1670 __func__, mci, &sbridge_dev->pdev[0]->dev);
1671
Borislav Petkov3653ada2011-12-04 15:12:09 +01001672 mce_unregister_decode_chain(&sbridge_mce_dec);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001673
1674 /* Remove MC sysfs nodes */
1675 edac_mc_del_mc(mci->dev);
1676
1677 debugf1("%s: free mci struct\n", mci->ctl_name);
1678 kfree(mci->ctl_name);
1679 edac_mc_free(mci);
1680 sbridge_dev->mci = NULL;
1681}
1682
1683static int sbridge_register_mci(struct sbridge_dev *sbridge_dev)
1684{
1685 struct mem_ctl_info *mci;
1686 struct sbridge_pvt *pvt;
1687 int rc, channels, csrows;
1688
1689 /* Check the number of active and not disabled channels */
1690 rc = sbridge_get_active_channels(sbridge_dev->bus, &channels, &csrows);
1691 if (unlikely(rc < 0))
1692 return rc;
1693
1694 /* allocate a new MC control structure */
1695 mci = edac_mc_alloc(sizeof(*pvt), csrows, channels, sbridge_dev->mc);
1696 if (unlikely(!mci))
1697 return -ENOMEM;
1698
1699 debugf0("MC: " __FILE__ ": %s(): mci = %p, dev = %p\n",
1700 __func__, mci, &sbridge_dev->pdev[0]->dev);
1701
1702 pvt = mci->pvt_info;
1703 memset(pvt, 0, sizeof(*pvt));
1704
1705 /* Associate sbridge_dev and mci for future usage */
1706 pvt->sbridge_dev = sbridge_dev;
1707 sbridge_dev->mci = mci;
1708
1709 mci->mtype_cap = MEM_FLAG_DDR3;
1710 mci->edac_ctl_cap = EDAC_FLAG_NONE;
1711 mci->edac_cap = EDAC_FLAG_NONE;
1712 mci->mod_name = "sbridge_edac.c";
1713 mci->mod_ver = SBRIDGE_REVISION;
1714 mci->ctl_name = kasprintf(GFP_KERNEL, "Sandy Bridge Socket#%d", mci->mc_idx);
1715 mci->dev_name = pci_name(sbridge_dev->pdev[0]);
1716 mci->ctl_page_to_phys = NULL;
1717
1718 /* Set the function pointer to an actual operation function */
1719 mci->edac_check = sbridge_check_error;
1720
1721 /* Store pci devices at mci for faster access */
1722 rc = mci_bind_devs(mci, sbridge_dev);
1723 if (unlikely(rc < 0))
1724 goto fail0;
1725
1726 /* Get dimm basic config and the memory layout */
1727 get_dimm_config(mci);
1728 get_memory_layout(mci);
1729
1730 /* record ptr to the generic device */
1731 mci->dev = &sbridge_dev->pdev[0]->dev;
1732
1733 /* add this new MC control structure to EDAC's list of MCs */
1734 if (unlikely(edac_mc_add_mc(mci))) {
1735 debugf0("MC: " __FILE__
1736 ": %s(): failed edac_mc_add_mc()\n", __func__);
1737 rc = -EINVAL;
1738 goto fail0;
1739 }
1740
Borislav Petkov3653ada2011-12-04 15:12:09 +01001741 mce_register_decode_chain(&sbridge_mce_dec);
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001742 return 0;
Mauro Carvalho Chehabeebf11a2011-10-20 19:18:01 -02001743
1744fail0:
1745 kfree(mci->ctl_name);
1746 edac_mc_free(mci);
1747 sbridge_dev->mci = NULL;
1748 return rc;
1749}
1750
1751/*
1752 * sbridge_probe Probe for ONE instance of device to see if it is
1753 * present.
1754 * return:
1755 * 0 for FOUND a device
1756 * < 0 for error code
1757 */
1758
1759static int __devinit sbridge_probe(struct pci_dev *pdev,
1760 const struct pci_device_id *id)
1761{
1762 int rc;
1763 u8 mc, num_mc = 0;
1764 struct sbridge_dev *sbridge_dev;
1765
1766 /* get the pci devices we want to reserve for our use */
1767 mutex_lock(&sbridge_edac_lock);
1768
1769 /*
1770 * All memory controllers are allocated at the first pass.
1771 */
1772 if (unlikely(probed >= 1)) {
1773 mutex_unlock(&sbridge_edac_lock);
1774 return -ENODEV;
1775 }
1776 probed++;
1777
1778 rc = sbridge_get_all_devices(&num_mc);
1779 if (unlikely(rc < 0))
1780 goto fail0;
1781 mc = 0;
1782
1783 list_for_each_entry(sbridge_dev, &sbridge_edac_list, list) {
1784 debugf0("Registering MC#%d (%d of %d)\n", mc, mc + 1, num_mc);
1785 sbridge_dev->mc = mc++;
1786 rc = sbridge_register_mci(sbridge_dev);
1787 if (unlikely(rc < 0))
1788 goto fail1;
1789 }
1790
1791 sbridge_printk(KERN_INFO, "Driver loaded.\n");
1792
1793 mutex_unlock(&sbridge_edac_lock);
1794 return 0;
1795
1796fail1:
1797 list_for_each_entry(sbridge_dev, &sbridge_edac_list, list)
1798 sbridge_unregister_mci(sbridge_dev);
1799
1800 sbridge_put_all_devices();
1801fail0:
1802 mutex_unlock(&sbridge_edac_lock);
1803 return rc;
1804}
1805
1806/*
1807 * sbridge_remove destructor for one instance of device
1808 *
1809 */
1810static void __devexit sbridge_remove(struct pci_dev *pdev)
1811{
1812 struct sbridge_dev *sbridge_dev;
1813
1814 debugf0(__FILE__ ": %s()\n", __func__);
1815
1816 /*
1817 * we have a trouble here: pdev value for removal will be wrong, since
1818 * it will point to the X58 register used to detect that the machine
1819 * is a Nehalem or upper design. However, due to the way several PCI
1820 * devices are grouped together to provide MC functionality, we need
1821 * to use a different method for releasing the devices
1822 */
1823
1824 mutex_lock(&sbridge_edac_lock);
1825
1826 if (unlikely(!probed)) {
1827 mutex_unlock(&sbridge_edac_lock);
1828 return;
1829 }
1830
1831 list_for_each_entry(sbridge_dev, &sbridge_edac_list, list)
1832 sbridge_unregister_mci(sbridge_dev);
1833
1834 /* Release PCI resources */
1835 sbridge_put_all_devices();
1836
1837 probed--;
1838
1839 mutex_unlock(&sbridge_edac_lock);
1840}
1841
1842MODULE_DEVICE_TABLE(pci, sbridge_pci_tbl);
1843
1844/*
1845 * sbridge_driver pci_driver structure for this module
1846 *
1847 */
1848static struct pci_driver sbridge_driver = {
1849 .name = "sbridge_edac",
1850 .probe = sbridge_probe,
1851 .remove = __devexit_p(sbridge_remove),
1852 .id_table = sbridge_pci_tbl,
1853};
1854
1855/*
1856 * sbridge_init Module entry function
1857 * Try to initialize this module for its devices
1858 */
1859static int __init sbridge_init(void)
1860{
1861 int pci_rc;
1862
1863 debugf2("MC: " __FILE__ ": %s()\n", __func__);
1864
1865 /* Ensure that the OPSTATE is set correctly for POLL or NMI */
1866 opstate_init();
1867
1868 pci_rc = pci_register_driver(&sbridge_driver);
1869
1870 if (pci_rc >= 0)
1871 return 0;
1872
1873 sbridge_printk(KERN_ERR, "Failed to register device with error %d.\n",
1874 pci_rc);
1875
1876 return pci_rc;
1877}
1878
1879/*
1880 * sbridge_exit() Module exit function
1881 * Unregister the driver
1882 */
1883static void __exit sbridge_exit(void)
1884{
1885 debugf2("MC: " __FILE__ ": %s()\n", __func__);
1886 pci_unregister_driver(&sbridge_driver);
1887}
1888
1889module_init(sbridge_init);
1890module_exit(sbridge_exit);
1891
1892module_param(edac_op_state, int, 0444);
1893MODULE_PARM_DESC(edac_op_state, "EDAC Error Reporting state: 0=Poll,1=NMI");
1894
1895MODULE_LICENSE("GPL");
1896MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@redhat.com>");
1897MODULE_AUTHOR("Red Hat Inc. (http://www.redhat.com)");
1898MODULE_DESCRIPTION("MC Driver for Intel Sandy Bridge memory controllers - "
1899 SBRIDGE_REVISION);