blob: de5ba86e8b8998df830a0e62647378e4667489f4 [file] [log] [blame]
Alan Coxda9bb1d2006-01-18 17:44:13 -08001/*
2 * edac_mc kernel module
Doug Thompson49c0dab72006-07-10 04:45:19 -07003 * (C) 2005, 2006 Linux Networx (http://lnxi.com)
Alan Coxda9bb1d2006-01-18 17:44:13 -08004 * This file may be distributed under the terms of the
5 * GNU General Public License.
6 *
7 * Written by Thayne Harbaugh
8 * Based on work by Dan Hollis <goemon at anime dot net> and others.
9 * http://www.anime.net/~goemon/linux-ecc/
10 *
11 * Modified by Dave Peterson and Doug Thompson
12 *
13 */
14
Alan Coxda9bb1d2006-01-18 17:44:13 -080015#include <linux/module.h>
16#include <linux/proc_fs.h>
17#include <linux/kernel.h>
18#include <linux/types.h>
19#include <linux/smp.h>
20#include <linux/init.h>
21#include <linux/sysctl.h>
22#include <linux/highmem.h>
23#include <linux/timer.h>
24#include <linux/slab.h>
25#include <linux/jiffies.h>
26#include <linux/spinlock.h>
27#include <linux/list.h>
Alan Coxda9bb1d2006-01-18 17:44:13 -080028#include <linux/ctype.h>
Dave Jiangc0d12172007-07-19 01:49:46 -070029#include <linux/edac.h>
Alan Coxda9bb1d2006-01-18 17:44:13 -080030#include <asm/uaccess.h>
31#include <asm/page.h>
32#include <asm/edac.h>
Douglas Thompson20bcb7a2007-07-19 01:49:47 -070033#include "edac_core.h"
Douglas Thompson7c9281d2007-07-19 01:49:33 -070034#include "edac_module.h"
Alan Coxda9bb1d2006-01-18 17:44:13 -080035
Alan Coxda9bb1d2006-01-18 17:44:13 -080036/* lock to memory controller's control array */
Matthias Kaehlcke63b7df92007-07-19 01:49:38 -070037static DEFINE_MUTEX(mem_ctls_mutex);
Robert P. J. Dayff6ac2a2008-04-29 01:03:17 -070038static LIST_HEAD(mc_devices);
Alan Coxda9bb1d2006-01-18 17:44:13 -080039
Alan Coxda9bb1d2006-01-18 17:44:13 -080040#ifdef CONFIG_EDAC_DEBUG
41
Mauro Carvalho Chehaba4b4be32012-01-27 10:26:13 -030042static void edac_mc_dump_channel(struct rank_info *chan)
Alan Coxda9bb1d2006-01-18 17:44:13 -080043{
44 debugf4("\tchannel = %p\n", chan);
45 debugf4("\tchannel->chan_idx = %d\n", chan->chan_idx);
Alan Coxda9bb1d2006-01-18 17:44:13 -080046 debugf4("\tchannel->csrow = %p\n\n", chan->csrow);
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -030047 debugf4("\tchannel->dimm = %p\n", chan->dimm);
48}
49
50static void edac_mc_dump_dimm(struct dimm_info *dimm)
51{
52 int i;
53
54 debugf4("\tdimm = %p\n", dimm);
55 debugf4("\tdimm->label = '%s'\n", dimm->label);
56 debugf4("\tdimm->nr_pages = 0x%x\n", dimm->nr_pages);
57 debugf4("\tdimm location ");
58 for (i = 0; i < dimm->mci->n_layers; i++) {
59 printk(KERN_CONT "%d", dimm->location[i]);
60 if (i < dimm->mci->n_layers - 1)
61 printk(KERN_CONT ".");
62 }
63 printk(KERN_CONT "\n");
64 debugf4("\tdimm->grain = %d\n", dimm->grain);
65 debugf4("\tdimm->nr_pages = 0x%x\n", dimm->nr_pages);
Alan Coxda9bb1d2006-01-18 17:44:13 -080066}
67
Adrian Bunk2da1c112007-07-19 01:49:32 -070068static void edac_mc_dump_csrow(struct csrow_info *csrow)
Alan Coxda9bb1d2006-01-18 17:44:13 -080069{
70 debugf4("\tcsrow = %p\n", csrow);
71 debugf4("\tcsrow->csrow_idx = %d\n", csrow->csrow_idx);
Douglas Thompson079708b2007-07-19 01:49:58 -070072 debugf4("\tcsrow->first_page = 0x%lx\n", csrow->first_page);
Alan Coxda9bb1d2006-01-18 17:44:13 -080073 debugf4("\tcsrow->last_page = 0x%lx\n", csrow->last_page);
74 debugf4("\tcsrow->page_mask = 0x%lx\n", csrow->page_mask);
Douglas Thompson079708b2007-07-19 01:49:58 -070075 debugf4("\tcsrow->nr_channels = %d\n", csrow->nr_channels);
Alan Coxda9bb1d2006-01-18 17:44:13 -080076 debugf4("\tcsrow->channels = %p\n", csrow->channels);
77 debugf4("\tcsrow->mci = %p\n\n", csrow->mci);
78}
79
Adrian Bunk2da1c112007-07-19 01:49:32 -070080static void edac_mc_dump_mci(struct mem_ctl_info *mci)
Alan Coxda9bb1d2006-01-18 17:44:13 -080081{
82 debugf3("\tmci = %p\n", mci);
83 debugf3("\tmci->mtype_cap = %lx\n", mci->mtype_cap);
84 debugf3("\tmci->edac_ctl_cap = %lx\n", mci->edac_ctl_cap);
85 debugf3("\tmci->edac_cap = %lx\n", mci->edac_cap);
86 debugf4("\tmci->edac_check = %p\n", mci->edac_check);
87 debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
88 mci->nr_csrows, mci->csrows);
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -030089 debugf3("\tmci->nr_dimms = %d, dimms = %p\n",
90 mci->tot_dimms, mci->dimms);
Doug Thompson37f04582006-06-30 01:56:07 -070091 debugf3("\tdev = %p\n", mci->dev);
Douglas Thompson079708b2007-07-19 01:49:58 -070092 debugf3("\tmod_name:ctl_name = %s:%s\n", mci->mod_name, mci->ctl_name);
Alan Coxda9bb1d2006-01-18 17:44:13 -080093 debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
94}
95
Borislav Petkov24f9a7f2010-10-07 18:29:15 +020096#endif /* CONFIG_EDAC_DEBUG */
97
Borislav Petkov239642f2009-11-12 15:33:16 +010098/*
99 * keep those in sync with the enum mem_type
100 */
101const char *edac_mem_types[] = {
102 "Empty csrow",
103 "Reserved csrow type",
104 "Unknown csrow type",
105 "Fast page mode RAM",
106 "Extended data out RAM",
107 "Burst Extended data out RAM",
108 "Single data rate SDRAM",
109 "Registered single data rate SDRAM",
110 "Double data rate SDRAM",
111 "Registered Double data rate SDRAM",
112 "Rambus DRAM",
113 "Unbuffered DDR2 RAM",
114 "Fully buffered DDR2",
115 "Registered DDR2 RAM",
116 "Rambus XDR",
117 "Unbuffered DDR3 RAM",
118 "Registered DDR3 RAM",
119};
120EXPORT_SYMBOL_GPL(edac_mem_types);
121
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300122/**
123 * edac_align_ptr - Prepares the pointer offsets for a single-shot allocation
124 * @p: pointer to a pointer with the memory offset to be used. At
125 * return, this will be incremented to point to the next offset
126 * @size: Size of the data structure to be reserved
127 * @n_elems: Number of elements that should be reserved
Alan Coxda9bb1d2006-01-18 17:44:13 -0800128 *
129 * If 'size' is a constant, the compiler will optimize this whole function
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300130 * down to either a no-op or the addition of a constant to the value of '*p'.
131 *
132 * The 'p' pointer is absolutely needed to keep the proper advancing
133 * further in memory to the proper offsets when allocating the struct along
134 * with its embedded structs, as edac_device_alloc_ctl_info() does it
135 * above, for example.
136 *
137 * At return, the pointer 'p' will be incremented to be used on a next call
138 * to this function.
Alan Coxda9bb1d2006-01-18 17:44:13 -0800139 */
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300140void *edac_align_ptr(void **p, unsigned size, int n_elems)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800141{
142 unsigned align, r;
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300143 void *ptr = *p;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800144
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300145 *p += size * n_elems;
146
147 /*
148 * 'p' can possibly be an unaligned item X such that sizeof(X) is
149 * 'size'. Adjust 'p' so that its alignment is at least as
150 * stringent as what the compiler would provide for X and return
151 * the aligned result.
152 * Here we assume that the alignment of a "long long" is the most
Alan Coxda9bb1d2006-01-18 17:44:13 -0800153 * stringent alignment that the compiler will ever provide by default.
154 * As far as I know, this is a reasonable assumption.
155 */
156 if (size > sizeof(long))
157 align = sizeof(long long);
158 else if (size > sizeof(int))
159 align = sizeof(long);
160 else if (size > sizeof(short))
161 align = sizeof(int);
162 else if (size > sizeof(char))
163 align = sizeof(short);
164 else
Douglas Thompson079708b2007-07-19 01:49:58 -0700165 return (char *)ptr;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800166
Chris Metcalf8447c4d12012-06-06 13:11:05 -0400167 r = (unsigned long)p % align;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800168
169 if (r == 0)
Douglas Thompson079708b2007-07-19 01:49:58 -0700170 return (char *)ptr;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800171
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300172 *p += align - r;
173
Douglas Thompson7391c6d2007-07-19 01:50:21 -0700174 return (void *)(((unsigned long)ptr) + align - r);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800175}
176
Alan Coxda9bb1d2006-01-18 17:44:13 -0800177/**
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300178 * edac_mc_alloc: Allocate and partially fill a struct mem_ctl_info structure
179 * @mc_num: Memory controller number
180 * @n_layers: Number of MC hierarchy layers
181 * layers: Describes each layer as seen by the Memory Controller
182 * @size_pvt: size of private storage needed
183 *
Alan Coxda9bb1d2006-01-18 17:44:13 -0800184 *
185 * Everything is kmalloc'ed as one big chunk - more efficient.
186 * Only can be used if all structures have the same lifetime - otherwise
187 * you have to allocate and initialize your own structures.
188 *
189 * Use edac_mc_free() to free mc structures allocated by this function.
190 *
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300191 * NOTE: drivers handle multi-rank memories in different ways: in some
192 * drivers, one multi-rank memory stick is mapped as one entry, while, in
193 * others, a single multi-rank memory stick would be mapped into several
194 * entries. Currently, this function will allocate multiple struct dimm_info
195 * on such scenarios, as grouping the multiple ranks require drivers change.
196 *
Alan Coxda9bb1d2006-01-18 17:44:13 -0800197 * Returns:
Mauro Carvalho Chehabca0907b2012-05-02 14:37:00 -0300198 * On failure: NULL
199 * On success: struct mem_ctl_info pointer
Alan Coxda9bb1d2006-01-18 17:44:13 -0800200 */
Mauro Carvalho Chehabca0907b2012-05-02 14:37:00 -0300201struct mem_ctl_info *edac_mc_alloc(unsigned mc_num,
202 unsigned n_layers,
203 struct edac_mc_layer *layers,
204 unsigned sz_pvt)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800205{
206 struct mem_ctl_info *mci;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300207 struct edac_mc_layer *layer;
208 struct csrow_info *csi, *csr;
Mauro Carvalho Chehaba4b4be32012-01-27 10:26:13 -0300209 struct rank_info *chi, *chp, *chan;
Mauro Carvalho Chehaba7d7d2e2012-01-27 14:12:32 -0300210 struct dimm_info *dimm;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300211 u32 *ce_per_layer[EDAC_MAX_LAYERS], *ue_per_layer[EDAC_MAX_LAYERS];
212 unsigned pos[EDAC_MAX_LAYERS];
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300213 unsigned size, tot_dimms = 1, count = 1;
214 unsigned tot_csrows = 1, tot_channels = 1, tot_errcount = 0;
Mauro Carvalho Chehab5926ff52012-02-09 11:05:20 -0300215 void *pvt, *p, *ptr = NULL;
216 int i, j, err, row, chn, n, len;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300217 bool per_rank = false;
218
219 BUG_ON(n_layers > EDAC_MAX_LAYERS || n_layers == 0);
220 /*
221 * Calculate the total amount of dimms and csrows/cschannels while
222 * in the old API emulation mode
223 */
224 for (i = 0; i < n_layers; i++) {
225 tot_dimms *= layers[i].size;
226 if (layers[i].is_virt_csrow)
227 tot_csrows *= layers[i].size;
228 else
229 tot_channels *= layers[i].size;
230
231 if (layers[i].type == EDAC_MC_LAYER_CHIP_SELECT)
232 per_rank = true;
233 }
Alan Coxda9bb1d2006-01-18 17:44:13 -0800234
235 /* Figure out the offsets of the various items from the start of an mc
236 * structure. We want the alignment of each item to be at least as
237 * stringent as what the compiler would provide if we could simply
238 * hardcode everything into a single struct.
239 */
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300240 mci = edac_align_ptr(&ptr, sizeof(*mci), 1);
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300241 layer = edac_align_ptr(&ptr, sizeof(*layer), n_layers);
242 csi = edac_align_ptr(&ptr, sizeof(*csi), tot_csrows);
243 chi = edac_align_ptr(&ptr, sizeof(*chi), tot_csrows * tot_channels);
244 dimm = edac_align_ptr(&ptr, sizeof(*dimm), tot_dimms);
245 for (i = 0; i < n_layers; i++) {
246 count *= layers[i].size;
247 debugf4("%s: errcount layer %d size %d\n", __func__, i, count);
248 ce_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
249 ue_per_layer[i] = edac_align_ptr(&ptr, sizeof(u32), count);
250 tot_errcount += 2 * count;
251 }
252
253 debugf4("%s: allocating %d error counters\n", __func__, tot_errcount);
Mauro Carvalho Chehab93e4fe62012-04-16 10:18:12 -0300254 pvt = edac_align_ptr(&ptr, sz_pvt, 1);
Douglas Thompson079708b2007-07-19 01:49:58 -0700255 size = ((unsigned long)pvt) + sz_pvt;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800256
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300257 debugf1("%s(): allocating %u bytes for mci data (%d %s, %d csrows/channels)\n",
258 __func__, size,
259 tot_dimms,
260 per_rank ? "ranks" : "dimms",
261 tot_csrows * tot_channels);
Doug Thompson8096cfa2007-07-19 01:50:27 -0700262 mci = kzalloc(size, GFP_KERNEL);
263 if (mci == NULL)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800264 return NULL;
265
266 /* Adjust pointers so they point within the memory we just allocated
267 * rather than an imaginary chunk of memory located at address 0.
268 */
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300269 layer = (struct edac_mc_layer *)(((char *)mci) + ((unsigned long)layer));
Douglas Thompson079708b2007-07-19 01:49:58 -0700270 csi = (struct csrow_info *)(((char *)mci) + ((unsigned long)csi));
Mauro Carvalho Chehaba4b4be32012-01-27 10:26:13 -0300271 chi = (struct rank_info *)(((char *)mci) + ((unsigned long)chi));
Mauro Carvalho Chehaba7d7d2e2012-01-27 14:12:32 -0300272 dimm = (struct dimm_info *)(((char *)mci) + ((unsigned long)dimm));
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300273 for (i = 0; i < n_layers; i++) {
274 mci->ce_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ce_per_layer[i]));
275 mci->ue_per_layer[i] = (u32 *)((char *)mci + ((unsigned long)ue_per_layer[i]));
276 }
Douglas Thompson079708b2007-07-19 01:49:58 -0700277 pvt = sz_pvt ? (((char *)mci) + ((unsigned long)pvt)) : NULL;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800278
Doug Thompsonb8f6f972007-07-19 01:50:26 -0700279 /* setup index and various internal pointers */
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300280 mci->mc_idx = mc_num;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800281 mci->csrows = csi;
Mauro Carvalho Chehaba7d7d2e2012-01-27 14:12:32 -0300282 mci->dimms = dimm;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300283 mci->tot_dimms = tot_dimms;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800284 mci->pvt_info = pvt;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300285 mci->n_layers = n_layers;
286 mci->layers = layer;
287 memcpy(mci->layers, layers, sizeof(*layer) * n_layers);
288 mci->nr_csrows = tot_csrows;
289 mci->num_cschannel = tot_channels;
290 mci->mem_is_per_rank = per_rank;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800291
Mauro Carvalho Chehaba7d7d2e2012-01-27 14:12:32 -0300292 /*
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300293 * Fill the csrow struct
Mauro Carvalho Chehaba7d7d2e2012-01-27 14:12:32 -0300294 */
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300295 for (row = 0; row < tot_csrows; row++) {
296 csr = &csi[row];
297 csr->csrow_idx = row;
298 csr->mci = mci;
299 csr->nr_channels = tot_channels;
300 chp = &chi[row * tot_channels];
301 csr->channels = chp;
Mauro Carvalho Chehaba7d7d2e2012-01-27 14:12:32 -0300302
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300303 for (chn = 0; chn < tot_channels; chn++) {
Alan Coxda9bb1d2006-01-18 17:44:13 -0800304 chan = &chp[chn];
305 chan->chan_idx = chn;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300306 chan->csrow = csr;
307 }
308 }
Mauro Carvalho Chehaba7d7d2e2012-01-27 14:12:32 -0300309
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300310 /*
311 * Fill the dimm struct
312 */
313 memset(&pos, 0, sizeof(pos));
314 row = 0;
315 chn = 0;
316 debugf4("%s: initializing %d %s\n", __func__, tot_dimms,
317 per_rank ? "ranks" : "dimms");
318 for (i = 0; i < tot_dimms; i++) {
319 chan = &csi[row].channels[chn];
320 dimm = EDAC_DIMM_PTR(layer, mci->dimms, n_layers,
321 pos[0], pos[1], pos[2]);
322 dimm->mci = mci;
323
324 debugf2("%s: %d: %s%zd (%d:%d:%d): row %d, chan %d\n", __func__,
325 i, per_rank ? "rank" : "dimm", (dimm - mci->dimms),
326 pos[0], pos[1], pos[2], row, chn);
327
Mauro Carvalho Chehab5926ff52012-02-09 11:05:20 -0300328 /*
329 * Copy DIMM location and initialize it.
330 */
331 len = sizeof(dimm->label);
332 p = dimm->label;
333 n = snprintf(p, len, "mc#%u", mc_num);
334 p += n;
335 len -= n;
336 for (j = 0; j < n_layers; j++) {
337 n = snprintf(p, len, "%s#%u",
338 edac_layer_name[layers[j].type],
339 pos[j]);
340 p += n;
341 len -= n;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300342 dimm->location[j] = pos[j];
343
Mauro Carvalho Chehab5926ff52012-02-09 11:05:20 -0300344 if (len <= 0)
345 break;
346 }
347
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300348 /* Link it to the csrows old API data */
349 chan->dimm = dimm;
350 dimm->csrow = row;
351 dimm->cschannel = chn;
352
353 /* Increment csrow location */
354 row++;
355 if (row == tot_csrows) {
356 row = 0;
357 chn++;
358 }
359
360 /* Increment dimm location */
361 for (j = n_layers - 1; j >= 0; j--) {
362 pos[j]++;
363 if (pos[j] < layers[j].size)
364 break;
365 pos[j] = 0;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800366 }
367 }
368
Dave Jiang81d87cb2007-07-19 01:49:52 -0700369 mci->op_state = OP_ALLOC;
Mauro Carvalho Chehab6fe11082010-08-12 00:30:25 -0300370 INIT_LIST_HEAD(&mci->grp_kobj_list);
Dave Jiang81d87cb2007-07-19 01:49:52 -0700371
Doug Thompson8096cfa2007-07-19 01:50:27 -0700372 /*
373 * Initialize the 'root' kobj for the edac_mc controller
374 */
375 err = edac_mc_register_sysfs_main_kobj(mci);
376 if (err) {
377 kfree(mci);
378 return NULL;
379 }
380
381 /* at this point, the root kobj is valid, and in order to
382 * 'free' the object, then the function:
383 * edac_mc_unregister_sysfs_main_kobj() must be called
384 * which will perform kobj unregistration and the actual free
385 * will occur during the kobject callback operation
386 */
Alan Coxda9bb1d2006-01-18 17:44:13 -0800387 return mci;
388}
Dave Peterson91105402006-03-26 01:38:55 -0800389EXPORT_SYMBOL_GPL(edac_mc_alloc);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800390
Alan Coxda9bb1d2006-01-18 17:44:13 -0800391/**
Doug Thompson8096cfa2007-07-19 01:50:27 -0700392 * edac_mc_free
393 * 'Free' a previously allocated 'mci' structure
Alan Coxda9bb1d2006-01-18 17:44:13 -0800394 * @mci: pointer to a struct mem_ctl_info structure
Alan Coxda9bb1d2006-01-18 17:44:13 -0800395 */
396void edac_mc_free(struct mem_ctl_info *mci)
397{
Mauro Carvalho Chehabbbc560a2010-08-16 18:22:43 -0300398 debugf1("%s()\n", __func__);
399
Doug Thompson8096cfa2007-07-19 01:50:27 -0700400 edac_mc_unregister_sysfs_main_kobj(mci);
Mauro Carvalho Chehabaccf74f2010-08-16 18:34:37 -0300401
402 /* free the mci instance memory here */
403 kfree(mci);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800404}
Dave Peterson91105402006-03-26 01:38:55 -0800405EXPORT_SYMBOL_GPL(edac_mc_free);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800406
Doug Thompsonbce19682007-07-26 10:41:14 -0700407
Mauro Carvalho Chehab939747bd2010-08-10 11:22:01 -0300408/**
Doug Thompsonbce19682007-07-26 10:41:14 -0700409 * find_mci_by_dev
410 *
411 * scan list of controllers looking for the one that manages
412 * the 'dev' device
Mauro Carvalho Chehab939747bd2010-08-10 11:22:01 -0300413 * @dev: pointer to a struct device related with the MCI
Doug Thompsonbce19682007-07-26 10:41:14 -0700414 */
Mauro Carvalho Chehab939747bd2010-08-10 11:22:01 -0300415struct mem_ctl_info *find_mci_by_dev(struct device *dev)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800416{
417 struct mem_ctl_info *mci;
418 struct list_head *item;
419
Dave Peterson537fba22006-03-26 01:38:40 -0800420 debugf3("%s()\n", __func__);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800421
422 list_for_each(item, &mc_devices) {
423 mci = list_entry(item, struct mem_ctl_info, link);
424
Doug Thompson37f04582006-06-30 01:56:07 -0700425 if (mci->dev == dev)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800426 return mci;
427 }
428
429 return NULL;
430}
Mauro Carvalho Chehab939747bd2010-08-10 11:22:01 -0300431EXPORT_SYMBOL_GPL(find_mci_by_dev);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800432
Dave Jiang81d87cb2007-07-19 01:49:52 -0700433/*
434 * handler for EDAC to check if NMI type handler has asserted interrupt
435 */
436static int edac_mc_assert_error_check_and_clear(void)
437{
Dave Jiang66ee2f92007-07-19 01:49:54 -0700438 int old_state;
Dave Jiang81d87cb2007-07-19 01:49:52 -0700439
Douglas Thompson079708b2007-07-19 01:49:58 -0700440 if (edac_op_state == EDAC_OPSTATE_POLL)
Dave Jiang81d87cb2007-07-19 01:49:52 -0700441 return 1;
442
Dave Jiang66ee2f92007-07-19 01:49:54 -0700443 old_state = edac_err_assert;
444 edac_err_assert = 0;
Dave Jiang81d87cb2007-07-19 01:49:52 -0700445
Dave Jiang66ee2f92007-07-19 01:49:54 -0700446 return old_state;
Dave Jiang81d87cb2007-07-19 01:49:52 -0700447}
448
449/*
450 * edac_mc_workq_function
451 * performs the operation scheduled by a workq request
452 */
Dave Jiang81d87cb2007-07-19 01:49:52 -0700453static void edac_mc_workq_function(struct work_struct *work_req)
454{
Jean Delvarefbeb4382009-04-13 14:40:21 -0700455 struct delayed_work *d_work = to_delayed_work(work_req);
Dave Jiang81d87cb2007-07-19 01:49:52 -0700456 struct mem_ctl_info *mci = to_edac_mem_ctl_work(d_work);
Dave Jiang81d87cb2007-07-19 01:49:52 -0700457
458 mutex_lock(&mem_ctls_mutex);
459
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700460 /* if this control struct has movd to offline state, we are done */
461 if (mci->op_state == OP_OFFLINE) {
462 mutex_unlock(&mem_ctls_mutex);
463 return;
464 }
465
Dave Jiang81d87cb2007-07-19 01:49:52 -0700466 /* Only poll controllers that are running polled and have a check */
467 if (edac_mc_assert_error_check_and_clear() && (mci->edac_check != NULL))
468 mci->edac_check(mci);
469
Dave Jiang81d87cb2007-07-19 01:49:52 -0700470 mutex_unlock(&mem_ctls_mutex);
471
472 /* Reschedule */
Dave Jiang4de78c62007-07-19 01:49:54 -0700473 queue_delayed_work(edac_workqueue, &mci->work,
Douglas Thompson052dfb42007-07-19 01:50:13 -0700474 msecs_to_jiffies(edac_mc_get_poll_msec()));
Dave Jiang81d87cb2007-07-19 01:49:52 -0700475}
476
477/*
478 * edac_mc_workq_setup
479 * initialize a workq item for this mci
480 * passing in the new delay period in msec
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700481 *
482 * locking model:
483 *
484 * called with the mem_ctls_mutex held
Dave Jiang81d87cb2007-07-19 01:49:52 -0700485 */
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700486static void edac_mc_workq_setup(struct mem_ctl_info *mci, unsigned msec)
Dave Jiang81d87cb2007-07-19 01:49:52 -0700487{
488 debugf0("%s()\n", __func__);
489
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700490 /* if this instance is not in the POLL state, then simply return */
491 if (mci->op_state != OP_RUNNING_POLL)
492 return;
493
Dave Jiang81d87cb2007-07-19 01:49:52 -0700494 INIT_DELAYED_WORK(&mci->work, edac_mc_workq_function);
Dave Jiang81d87cb2007-07-19 01:49:52 -0700495 queue_delayed_work(edac_workqueue, &mci->work, msecs_to_jiffies(msec));
496}
497
498/*
499 * edac_mc_workq_teardown
500 * stop the workq processing on this mci
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700501 *
502 * locking model:
503 *
504 * called WITHOUT lock held
Dave Jiang81d87cb2007-07-19 01:49:52 -0700505 */
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700506static void edac_mc_workq_teardown(struct mem_ctl_info *mci)
Dave Jiang81d87cb2007-07-19 01:49:52 -0700507{
508 int status;
509
Borislav Petkov00740c52010-09-26 12:42:23 +0200510 if (mci->op_state != OP_RUNNING_POLL)
511 return;
512
Doug Thompsonbce19682007-07-26 10:41:14 -0700513 status = cancel_delayed_work(&mci->work);
514 if (status == 0) {
515 debugf0("%s() not canceled, flush the queue\n",
516 __func__);
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700517
Doug Thompsonbce19682007-07-26 10:41:14 -0700518 /* workq instance might be running, wait for it */
519 flush_workqueue(edac_workqueue);
Dave Jiang81d87cb2007-07-19 01:49:52 -0700520 }
521}
522
523/*
Doug Thompsonbce19682007-07-26 10:41:14 -0700524 * edac_mc_reset_delay_period(unsigned long value)
525 *
526 * user space has updated our poll period value, need to
527 * reset our workq delays
Dave Jiang81d87cb2007-07-19 01:49:52 -0700528 */
Doug Thompsonbce19682007-07-26 10:41:14 -0700529void edac_mc_reset_delay_period(int value)
Dave Jiang81d87cb2007-07-19 01:49:52 -0700530{
Doug Thompsonbce19682007-07-26 10:41:14 -0700531 struct mem_ctl_info *mci;
532 struct list_head *item;
Dave Jiang81d87cb2007-07-19 01:49:52 -0700533
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700534 mutex_lock(&mem_ctls_mutex);
535
Doug Thompsonbce19682007-07-26 10:41:14 -0700536 /* scan the list and turn off all workq timers, doing so under lock
537 */
538 list_for_each(item, &mc_devices) {
539 mci = list_entry(item, struct mem_ctl_info, link);
540
541 if (mci->op_state == OP_RUNNING_POLL)
542 cancel_delayed_work(&mci->work);
543 }
544
545 mutex_unlock(&mem_ctls_mutex);
546
547
548 /* re-walk the list, and reset the poll delay */
549 mutex_lock(&mem_ctls_mutex);
550
551 list_for_each(item, &mc_devices) {
552 mci = list_entry(item, struct mem_ctl_info, link);
553
554 edac_mc_workq_setup(mci, (unsigned long) value);
555 }
Dave Jiang81d87cb2007-07-19 01:49:52 -0700556
557 mutex_unlock(&mem_ctls_mutex);
558}
559
Doug Thompsonbce19682007-07-26 10:41:14 -0700560
561
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700562/* Return 0 on success, 1 on failure.
563 * Before calling this function, caller must
564 * assign a unique value to mci->mc_idx.
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700565 *
566 * locking model:
567 *
568 * called with the mem_ctls_mutex lock held
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700569 */
Douglas Thompson079708b2007-07-19 01:49:58 -0700570static int add_mc_to_global_list(struct mem_ctl_info *mci)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800571{
572 struct list_head *item, *insert_before;
573 struct mem_ctl_info *p;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800574
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700575 insert_before = &mc_devices;
576
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700577 p = find_mci_by_dev(mci->dev);
578 if (unlikely(p != NULL))
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700579 goto fail0;
580
581 list_for_each(item, &mc_devices) {
582 p = list_entry(item, struct mem_ctl_info, link);
583
584 if (p->mc_idx >= mci->mc_idx) {
585 if (unlikely(p->mc_idx == mci->mc_idx))
586 goto fail1;
587
588 insert_before = item;
589 break;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800590 }
Alan Coxda9bb1d2006-01-18 17:44:13 -0800591 }
592
593 list_add_tail_rcu(&mci->link, insert_before);
Dave Jiangc0d12172007-07-19 01:49:46 -0700594 atomic_inc(&edac_handlers);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800595 return 0;
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700596
Douglas Thompson052dfb42007-07-19 01:50:13 -0700597fail0:
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700598 edac_printk(KERN_WARNING, EDAC_MC,
Kay Sievers281efb12009-01-06 14:42:57 -0800599 "%s (%s) %s %s already assigned %d\n", dev_name(p->dev),
Stephen Rothwell17aa7e02008-05-05 13:54:19 +1000600 edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700601 return 1;
602
Douglas Thompson052dfb42007-07-19 01:50:13 -0700603fail1:
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700604 edac_printk(KERN_WARNING, EDAC_MC,
Douglas Thompson052dfb42007-07-19 01:50:13 -0700605 "bug in low-level driver: attempt to assign\n"
606 " duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
Doug Thompson2d7bbb92006-06-30 01:56:08 -0700607 return 1;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800608}
609
Dave Petersone7ecd892006-03-26 01:38:52 -0800610static void del_mc_from_global_list(struct mem_ctl_info *mci)
Dave Petersona1d03fc2006-03-26 01:38:46 -0800611{
Dave Jiangc0d12172007-07-19 01:49:46 -0700612 atomic_dec(&edac_handlers);
Dave Petersona1d03fc2006-03-26 01:38:46 -0800613 list_del_rcu(&mci->link);
Lai Jiangshane2e77092011-05-26 16:25:58 -0700614
615 /* these are for safe removal of devices from global list while
616 * NMI handlers may be traversing list
617 */
618 synchronize_rcu();
619 INIT_LIST_HEAD(&mci->link);
Dave Petersona1d03fc2006-03-26 01:38:46 -0800620}
621
Alan Coxda9bb1d2006-01-18 17:44:13 -0800622/**
Douglas Thompson5da08312007-07-19 01:49:31 -0700623 * edac_mc_find: Search for a mem_ctl_info structure whose index is 'idx'.
624 *
625 * If found, return a pointer to the structure.
626 * Else return NULL.
627 *
628 * Caller must hold mem_ctls_mutex.
629 */
Douglas Thompson079708b2007-07-19 01:49:58 -0700630struct mem_ctl_info *edac_mc_find(int idx)
Douglas Thompson5da08312007-07-19 01:49:31 -0700631{
632 struct list_head *item;
633 struct mem_ctl_info *mci;
634
635 list_for_each(item, &mc_devices) {
636 mci = list_entry(item, struct mem_ctl_info, link);
637
638 if (mci->mc_idx >= idx) {
639 if (mci->mc_idx == idx)
640 return mci;
641
642 break;
643 }
644 }
645
646 return NULL;
647}
648EXPORT_SYMBOL(edac_mc_find);
649
650/**
Dave Peterson472678e2006-03-26 01:38:49 -0800651 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
652 * create sysfs entries associated with mci structure
Alan Coxda9bb1d2006-01-18 17:44:13 -0800653 * @mci: pointer to the mci structure to be added to the list
654 *
655 * Return:
656 * 0 Success
657 * !0 Failure
658 */
659
660/* FIXME - should a warning be printed if no error detection? correction? */
Doug Thompsonb8f6f972007-07-19 01:50:26 -0700661int edac_mc_add_mc(struct mem_ctl_info *mci)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800662{
Dave Peterson537fba22006-03-26 01:38:40 -0800663 debugf0("%s()\n", __func__);
Doug Thompsonb8f6f972007-07-19 01:50:26 -0700664
Alan Coxda9bb1d2006-01-18 17:44:13 -0800665#ifdef CONFIG_EDAC_DEBUG
666 if (edac_debug_level >= 3)
667 edac_mc_dump_mci(mci);
Dave Petersone7ecd892006-03-26 01:38:52 -0800668
Alan Coxda9bb1d2006-01-18 17:44:13 -0800669 if (edac_debug_level >= 4) {
670 int i;
671
672 for (i = 0; i < mci->nr_csrows; i++) {
673 int j;
Dave Petersone7ecd892006-03-26 01:38:52 -0800674
Alan Coxda9bb1d2006-01-18 17:44:13 -0800675 edac_mc_dump_csrow(&mci->csrows[i]);
676 for (j = 0; j < mci->csrows[i].nr_channels; j++)
Douglas Thompson079708b2007-07-19 01:49:58 -0700677 edac_mc_dump_channel(&mci->csrows[i].
Douglas Thompson052dfb42007-07-19 01:50:13 -0700678 channels[j]);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800679 }
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300680 for (i = 0; i < mci->tot_dimms; i++)
681 edac_mc_dump_dimm(&mci->dimms[i]);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800682 }
683#endif
Matthias Kaehlcke63b7df92007-07-19 01:49:38 -0700684 mutex_lock(&mem_ctls_mutex);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800685
686 if (add_mc_to_global_list(mci))
Dave Peterson028a7b62006-03-26 01:38:47 -0800687 goto fail0;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800688
689 /* set load time so that error rate can be tracked */
690 mci->start_time = jiffies;
691
eric wollesen9794f332007-02-12 00:53:08 -0800692 if (edac_create_sysfs_mci_device(mci)) {
693 edac_mc_printk(mci, KERN_WARNING,
Douglas Thompson052dfb42007-07-19 01:50:13 -0700694 "failed to create sysfs device\n");
eric wollesen9794f332007-02-12 00:53:08 -0800695 goto fail1;
696 }
Alan Coxda9bb1d2006-01-18 17:44:13 -0800697
Dave Jiang81d87cb2007-07-19 01:49:52 -0700698 /* If there IS a check routine, then we are running POLLED */
699 if (mci->edac_check != NULL) {
700 /* This instance is NOW RUNNING */
701 mci->op_state = OP_RUNNING_POLL;
702
703 edac_mc_workq_setup(mci, edac_mc_get_poll_msec());
704 } else {
705 mci->op_state = OP_RUNNING_INTERRUPT;
706 }
707
Alan Coxda9bb1d2006-01-18 17:44:13 -0800708 /* Report action taken */
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700709 edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
Stephen Rothwell17aa7e02008-05-05 13:54:19 +1000710 " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
Alan Coxda9bb1d2006-01-18 17:44:13 -0800711
Matthias Kaehlcke63b7df92007-07-19 01:49:38 -0700712 mutex_unlock(&mem_ctls_mutex);
Dave Peterson028a7b62006-03-26 01:38:47 -0800713 return 0;
714
Douglas Thompson052dfb42007-07-19 01:50:13 -0700715fail1:
Dave Peterson028a7b62006-03-26 01:38:47 -0800716 del_mc_from_global_list(mci);
717
Douglas Thompson052dfb42007-07-19 01:50:13 -0700718fail0:
Matthias Kaehlcke63b7df92007-07-19 01:49:38 -0700719 mutex_unlock(&mem_ctls_mutex);
Dave Peterson028a7b62006-03-26 01:38:47 -0800720 return 1;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800721}
Dave Peterson91105402006-03-26 01:38:55 -0800722EXPORT_SYMBOL_GPL(edac_mc_add_mc);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800723
Alan Coxda9bb1d2006-01-18 17:44:13 -0800724/**
Dave Peterson472678e2006-03-26 01:38:49 -0800725 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
726 * remove mci structure from global list
Doug Thompson37f04582006-06-30 01:56:07 -0700727 * @pdev: Pointer to 'struct device' representing mci structure to remove.
Alan Coxda9bb1d2006-01-18 17:44:13 -0800728 *
Dave Peterson18dbc332006-03-26 01:38:50 -0800729 * Return pointer to removed mci structure, or NULL if device not found.
Alan Coxda9bb1d2006-01-18 17:44:13 -0800730 */
Douglas Thompson079708b2007-07-19 01:49:58 -0700731struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800732{
Dave Peterson18dbc332006-03-26 01:38:50 -0800733 struct mem_ctl_info *mci;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800734
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700735 debugf0("%s()\n", __func__);
736
Matthias Kaehlcke63b7df92007-07-19 01:49:38 -0700737 mutex_lock(&mem_ctls_mutex);
Dave Peterson18dbc332006-03-26 01:38:50 -0800738
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700739 /* find the requested mci struct in the global list */
740 mci = find_mci_by_dev(dev);
741 if (mci == NULL) {
Matthias Kaehlcke63b7df92007-07-19 01:49:38 -0700742 mutex_unlock(&mem_ctls_mutex);
Dave Peterson18dbc332006-03-26 01:38:50 -0800743 return NULL;
744 }
745
Alan Coxda9bb1d2006-01-18 17:44:13 -0800746 del_mc_from_global_list(mci);
Matthias Kaehlcke63b7df92007-07-19 01:49:38 -0700747 mutex_unlock(&mem_ctls_mutex);
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700748
Borislav Petkovbb31b3122010-12-02 17:48:35 +0100749 /* flush workq processes */
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700750 edac_mc_workq_teardown(mci);
Borislav Petkovbb31b3122010-12-02 17:48:35 +0100751
752 /* marking MCI offline */
753 mci->op_state = OP_OFFLINE;
754
755 /* remove from sysfs */
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700756 edac_remove_sysfs_mci_device(mci);
757
Dave Peterson537fba22006-03-26 01:38:40 -0800758 edac_printk(KERN_INFO, EDAC_MC,
Douglas Thompson052dfb42007-07-19 01:50:13 -0700759 "Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
Stephen Rothwell17aa7e02008-05-05 13:54:19 +1000760 mci->mod_name, mci->ctl_name, edac_dev_name(mci));
Doug Thompsonbf52fa42007-07-19 01:50:30 -0700761
Dave Peterson18dbc332006-03-26 01:38:50 -0800762 return mci;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800763}
Dave Peterson91105402006-03-26 01:38:55 -0800764EXPORT_SYMBOL_GPL(edac_mc_del_mc);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800765
Adrian Bunk2da1c112007-07-19 01:49:32 -0700766static void edac_mc_scrub_block(unsigned long page, unsigned long offset,
767 u32 size)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800768{
769 struct page *pg;
770 void *virt_addr;
771 unsigned long flags = 0;
772
Dave Peterson537fba22006-03-26 01:38:40 -0800773 debugf3("%s()\n", __func__);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800774
775 /* ECC error page was not in our memory. Ignore it. */
Douglas Thompson079708b2007-07-19 01:49:58 -0700776 if (!pfn_valid(page))
Alan Coxda9bb1d2006-01-18 17:44:13 -0800777 return;
778
779 /* Find the actual page structure then map it and fix */
780 pg = pfn_to_page(page);
781
782 if (PageHighMem(pg))
783 local_irq_save(flags);
784
Cong Wang4e5df7c2011-11-25 23:14:19 +0800785 virt_addr = kmap_atomic(pg);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800786
787 /* Perform architecture specific atomic scrub operation */
788 atomic_scrub(virt_addr + offset, size);
789
790 /* Unmap and complete */
Cong Wang4e5df7c2011-11-25 23:14:19 +0800791 kunmap_atomic(virt_addr);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800792
793 if (PageHighMem(pg))
794 local_irq_restore(flags);
795}
796
Alan Coxda9bb1d2006-01-18 17:44:13 -0800797/* FIXME - should return -1 */
Dave Petersone7ecd892006-03-26 01:38:52 -0800798int edac_mc_find_csrow_by_page(struct mem_ctl_info *mci, unsigned long page)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800799{
800 struct csrow_info *csrows = mci->csrows;
Mauro Carvalho Chehaba895bf82012-01-28 09:09:38 -0300801 int row, i, j, n;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800802
Dave Peterson537fba22006-03-26 01:38:40 -0800803 debugf1("MC%d: %s(): 0x%lx\n", mci->mc_idx, __func__, page);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800804 row = -1;
805
806 for (i = 0; i < mci->nr_csrows; i++) {
807 struct csrow_info *csrow = &csrows[i];
Mauro Carvalho Chehaba895bf82012-01-28 09:09:38 -0300808 n = 0;
809 for (j = 0; j < csrow->nr_channels; j++) {
810 struct dimm_info *dimm = csrow->channels[j].dimm;
811 n += dimm->nr_pages;
812 }
813 if (n == 0)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800814 continue;
815
Dave Peterson537fba22006-03-26 01:38:40 -0800816 debugf3("MC%d: %s(): first(0x%lx) page(0x%lx) last(0x%lx) "
817 "mask(0x%lx)\n", mci->mc_idx, __func__,
818 csrow->first_page, page, csrow->last_page,
819 csrow->page_mask);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800820
821 if ((page >= csrow->first_page) &&
822 (page <= csrow->last_page) &&
823 ((page & csrow->page_mask) ==
824 (csrow->first_page & csrow->page_mask))) {
825 row = i;
826 break;
827 }
828 }
829
830 if (row == -1)
Dave Peterson537fba22006-03-26 01:38:40 -0800831 edac_mc_printk(mci, KERN_ERR,
Douglas Thompson052dfb42007-07-19 01:50:13 -0700832 "could not look up page error address %lx\n",
833 (unsigned long)page);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800834
835 return row;
836}
Dave Peterson91105402006-03-26 01:38:55 -0800837EXPORT_SYMBOL_GPL(edac_mc_find_csrow_by_page);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800838
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300839const char *edac_layer_name[] = {
840 [EDAC_MC_LAYER_BRANCH] = "branch",
841 [EDAC_MC_LAYER_CHANNEL] = "channel",
842 [EDAC_MC_LAYER_SLOT] = "slot",
843 [EDAC_MC_LAYER_CHIP_SELECT] = "csrow",
844};
845EXPORT_SYMBOL_GPL(edac_layer_name);
846
847static void edac_inc_ce_error(struct mem_ctl_info *mci,
848 bool enable_per_layer_report,
849 const int pos[EDAC_MAX_LAYERS])
Alan Coxda9bb1d2006-01-18 17:44:13 -0800850{
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300851 int i, index = 0;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800852
Mauro Carvalho Chehab5926ff52012-02-09 11:05:20 -0300853 mci->ce_mc++;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300854
855 if (!enable_per_layer_report) {
856 mci->ce_noinfo_count++;
857 return;
858 }
859
860 for (i = 0; i < mci->n_layers; i++) {
861 if (pos[i] < 0)
862 break;
863 index += pos[i];
864 mci->ce_per_layer[i][index]++;
865
866 if (i < mci->n_layers - 1)
867 index *= mci->layers[i + 1].size;
868 }
869}
870
871static void edac_inc_ue_error(struct mem_ctl_info *mci,
872 bool enable_per_layer_report,
873 const int pos[EDAC_MAX_LAYERS])
874{
875 int i, index = 0;
876
Mauro Carvalho Chehab5926ff52012-02-09 11:05:20 -0300877 mci->ue_mc++;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300878
879 if (!enable_per_layer_report) {
880 mci->ce_noinfo_count++;
881 return;
882 }
883
884 for (i = 0; i < mci->n_layers; i++) {
885 if (pos[i] < 0)
886 break;
887 index += pos[i];
888 mci->ue_per_layer[i][index]++;
889
890 if (i < mci->n_layers - 1)
891 index *= mci->layers[i + 1].size;
892 }
893}
894
895static void edac_ce_error(struct mem_ctl_info *mci,
896 const int pos[EDAC_MAX_LAYERS],
897 const char *msg,
898 const char *location,
899 const char *label,
900 const char *detail,
901 const char *other_detail,
902 const bool enable_per_layer_report,
903 const unsigned long page_frame_number,
904 const unsigned long offset_in_page,
905 u32 grain)
906{
907 unsigned long remapped_page;
908
909 if (edac_mc_get_log_ce()) {
910 if (other_detail && *other_detail)
911 edac_mc_printk(mci, KERN_WARNING,
912 "CE %s on %s (%s%s - %s)\n",
913 msg, label, location,
914 detail, other_detail);
915 else
916 edac_mc_printk(mci, KERN_WARNING,
917 "CE %s on %s (%s%s)\n",
918 msg, label, location,
919 detail);
920 }
921 edac_inc_ce_error(mci, enable_per_layer_report, pos);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800922
923 if (mci->scrub_mode & SCRUB_SW_SRC) {
924 /*
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300925 * Some memory controllers (called MCs below) can remap
926 * memory so that it is still available at a different
927 * address when PCI devices map into memory.
928 * MC's that can't do this, lose the memory where PCI
929 * devices are mapped. This mapping is MC-dependent
930 * and so we call back into the MC driver for it to
931 * map the MC page to a physical (CPU) page which can
932 * then be mapped to a virtual page - which can then
933 * be scrubbed.
934 */
Alan Coxda9bb1d2006-01-18 17:44:13 -0800935 remapped_page = mci->ctl_page_to_phys ?
Douglas Thompson052dfb42007-07-19 01:50:13 -0700936 mci->ctl_page_to_phys(mci, page_frame_number) :
937 page_frame_number;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800938
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300939 edac_mc_scrub_block(remapped_page,
940 offset_in_page, grain);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800941 }
942}
943
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300944static void edac_ue_error(struct mem_ctl_info *mci,
945 const int pos[EDAC_MAX_LAYERS],
946 const char *msg,
947 const char *location,
948 const char *label,
949 const char *detail,
950 const char *other_detail,
951 const bool enable_per_layer_report)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800952{
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300953 if (edac_mc_get_log_ue()) {
954 if (other_detail && *other_detail)
955 edac_mc_printk(mci, KERN_WARNING,
956 "UE %s on %s (%s%s - %s)\n",
957 msg, label, location, detail,
958 other_detail);
959 else
960 edac_mc_printk(mci, KERN_WARNING,
961 "UE %s on %s (%s%s)\n",
962 msg, label, location, detail);
963 }
Dave Petersone7ecd892006-03-26 01:38:52 -0800964
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300965 if (edac_mc_get_panic_on_ue()) {
966 if (other_detail && *other_detail)
967 panic("UE %s on %s (%s%s - %s)\n",
968 msg, label, location, detail, other_detail);
969 else
970 panic("UE %s on %s (%s%s)\n",
971 msg, label, location, detail);
972 }
973
974 edac_inc_ue_error(mci, enable_per_layer_report, pos);
Alan Coxda9bb1d2006-01-18 17:44:13 -0800975}
976
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300977#define OTHER_LABEL " or "
978void edac_mc_handle_error(const enum hw_event_mc_err_type type,
979 struct mem_ctl_info *mci,
980 const unsigned long page_frame_number,
981 const unsigned long offset_in_page,
982 const unsigned long syndrome,
983 const int layer0,
984 const int layer1,
985 const int layer2,
986 const char *msg,
987 const char *other_detail,
988 const void *mcelog)
Alan Coxda9bb1d2006-01-18 17:44:13 -0800989{
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300990 /* FIXME: too much for stack: move it to some pre-alocated area */
991 char detail[80], location[80];
992 char label[(EDAC_MC_LABEL_LEN + 1 + sizeof(OTHER_LABEL)) * mci->tot_dimms];
993 char *p;
994 int row = -1, chan = -1;
995 int pos[EDAC_MAX_LAYERS] = { layer0, layer1, layer2 };
996 int i;
Mauro Carvalho Chehab084a4fc2012-01-27 18:38:08 -0300997 u32 grain;
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -0300998 bool enable_per_layer_report = false;
Alan Coxda9bb1d2006-01-18 17:44:13 -0800999
Dave Peterson537fba22006-03-26 01:38:40 -08001000 debugf3("MC%d: %s()\n", mci->mc_idx, __func__);
Alan Coxda9bb1d2006-01-18 17:44:13 -08001001
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001002 /*
1003 * Check if the event report is consistent and if the memory
1004 * location is known. If it is known, enable_per_layer_report will be
1005 * true, the DIMM(s) label info will be filled and the per-layer
1006 * error counters will be incremented.
1007 */
1008 for (i = 0; i < mci->n_layers; i++) {
1009 if (pos[i] >= (int)mci->layers[i].size) {
1010 if (type == HW_EVENT_ERR_CORRECTED)
1011 p = "CE";
1012 else
1013 p = "UE";
1014
1015 edac_mc_printk(mci, KERN_ERR,
1016 "INTERNAL ERROR: %s value is out of range (%d >= %d)\n",
1017 edac_layer_name[mci->layers[i].type],
1018 pos[i], mci->layers[i].size);
1019 /*
1020 * Instead of just returning it, let's use what's
1021 * known about the error. The increment routines and
1022 * the DIMM filter logic will do the right thing by
1023 * pointing the likely damaged DIMMs.
1024 */
1025 pos[i] = -1;
1026 }
1027 if (pos[i] >= 0)
1028 enable_per_layer_report = true;
Alan Coxda9bb1d2006-01-18 17:44:13 -08001029 }
1030
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001031 /*
1032 * Get the dimm label/grain that applies to the match criteria.
1033 * As the error algorithm may not be able to point to just one memory
1034 * stick, the logic here will get all possible labels that could
1035 * pottentially be affected by the error.
1036 * On FB-DIMM memory controllers, for uncorrected errors, it is common
1037 * to have only the MC channel and the MC dimm (also called "branch")
1038 * but the channel is not known, as the memory is arranged in pairs,
1039 * where each memory belongs to a separate channel within the same
1040 * branch.
1041 */
1042 grain = 0;
1043 p = label;
1044 *p = '\0';
1045 for (i = 0; i < mci->tot_dimms; i++) {
1046 struct dimm_info *dimm = &mci->dimms[i];
Dave Petersone7ecd892006-03-26 01:38:52 -08001047
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001048 if (layer0 >= 0 && layer0 != dimm->location[0])
1049 continue;
1050 if (layer1 >= 0 && layer1 != dimm->location[1])
1051 continue;
1052 if (layer2 >= 0 && layer2 != dimm->location[2])
1053 continue;
1054
1055 /* get the max grain, over the error match range */
1056 if (dimm->grain > grain)
1057 grain = dimm->grain;
1058
1059 /*
1060 * If the error is memory-controller wide, there's no need to
1061 * seek for the affected DIMMs because the whole
1062 * channel/memory controller/... may be affected.
1063 * Also, don't show errors for empty DIMM slots.
1064 */
1065 if (enable_per_layer_report && dimm->nr_pages) {
1066 if (p != label) {
1067 strcpy(p, OTHER_LABEL);
1068 p += strlen(OTHER_LABEL);
1069 }
1070 strcpy(p, dimm->label);
1071 p += strlen(p);
1072 *p = '\0';
1073
1074 /*
1075 * get csrow/channel of the DIMM, in order to allow
1076 * incrementing the compat API counters
1077 */
1078 debugf4("%s: %s csrows map: (%d,%d)\n",
1079 __func__,
1080 mci->mem_is_per_rank ? "rank" : "dimm",
1081 dimm->csrow, dimm->cschannel);
1082
1083 if (row == -1)
1084 row = dimm->csrow;
1085 else if (row >= 0 && row != dimm->csrow)
1086 row = -2;
1087
1088 if (chan == -1)
1089 chan = dimm->cschannel;
1090 else if (chan >= 0 && chan != dimm->cschannel)
1091 chan = -2;
1092 }
Alan Coxda9bb1d2006-01-18 17:44:13 -08001093 }
1094
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001095 if (!enable_per_layer_report) {
1096 strcpy(label, "any memory");
1097 } else {
1098 debugf4("%s: csrow/channel to increment: (%d,%d)\n",
1099 __func__, row, chan);
1100 if (p == label)
1101 strcpy(label, "unknown memory");
1102 if (type == HW_EVENT_ERR_CORRECTED) {
1103 if (row >= 0) {
1104 mci->csrows[row].ce_count++;
1105 if (chan >= 0)
1106 mci->csrows[row].channels[chan].ce_count++;
1107 }
1108 } else
1109 if (row >= 0)
1110 mci->csrows[row].ue_count++;
1111 }
Alan Coxda9bb1d2006-01-18 17:44:13 -08001112
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001113 /* Fill the RAM location data */
1114 p = location;
1115 for (i = 0; i < mci->n_layers; i++) {
1116 if (pos[i] < 0)
1117 continue;
1118
1119 p += sprintf(p, "%s:%d ",
1120 edac_layer_name[mci->layers[i].type],
1121 pos[i]);
1122 }
1123
1124 /* Memory type dependent details about the error */
1125 if (type == HW_EVENT_ERR_CORRECTED) {
1126 snprintf(detail, sizeof(detail),
1127 "page:0x%lx offset:0x%lx grain:%d syndrome:0x%lx",
Douglas Thompson052dfb42007-07-19 01:50:13 -07001128 page_frame_number, offset_in_page,
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001129 grain, syndrome);
1130 edac_ce_error(mci, pos, msg, location, label, detail,
1131 other_detail, enable_per_layer_report,
1132 page_frame_number, offset_in_page, grain);
1133 } else {
1134 snprintf(detail, sizeof(detail),
1135 "page:0x%lx offset:0x%lx grain:%d",
1136 page_frame_number, offset_in_page, grain);
Alan Coxda9bb1d2006-01-18 17:44:13 -08001137
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001138 edac_ue_error(mci, pos, msg, location, label, detail,
1139 other_detail, enable_per_layer_report);
1140 }
Alan Coxda9bb1d2006-01-18 17:44:13 -08001141}
Mauro Carvalho Chehab4275be62012-04-18 15:20:50 -03001142EXPORT_SYMBOL_GPL(edac_mc_handle_error);