Blame - arch/ppc64/kernel/eeh.c - kernel/hikey-linaro

blob: d63d41f3eecf788b6107269d36df2097075a637f [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame^]	1	/*
				2	* eeh.c
				3	* Copyright (C) 2001 Dave Engebretsen & Todd Inglett IBM Corporation
				4	*
				5	* This program is free software; you can redistribute it and/or modify
				6	* it under the terms of the GNU General Public License as published by
				7	* the Free Software Foundation; either version 2 of the License, or
				8	* (at your option) any later version.
				9	*
				10	* This program is distributed in the hope that it will be useful,
				11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				13	* GNU General Public License for more details.
				14	*
				15	* You should have received a copy of the GNU General Public License
				16	* along with this program; if not, write to the Free Software
				17	* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
				18	*/
				19
				20	#include <linux/bootmem.h>
				21	#include <linux/init.h>
				22	#include <linux/list.h>
				23	#include <linux/mm.h>
				24	#include <linux/notifier.h>
				25	#include <linux/pci.h>
				26	#include <linux/proc_fs.h>
				27	#include <linux/rbtree.h>
				28	#include <linux/seq_file.h>
				29	#include <linux/spinlock.h>
				30	#include <asm/eeh.h>
				31	#include <asm/io.h>
				32	#include <asm/machdep.h>
				33	#include <asm/rtas.h>
				34	#include <asm/atomic.h>
				35	#include <asm/systemcfg.h>
				36	#include "pci.h"
				37
				38	#undef DEBUG
				39
				40	/** Overview:
				41	* EEH, or "Extended Error Handling" is a PCI bridge technology for
				42	* dealing with PCI bus errors that can't be dealt with within the
				43	* usual PCI framework, except by check-stopping the CPU. Systems
				44	* that are designed for high-availability/reliability cannot afford
				45	* to crash due to a "mere" PCI error, thus the need for EEH.
				46	* An EEH-capable bridge operates by converting a detected error
				47	* into a "slot freeze", taking the PCI adapter off-line, making
				48	* the slot behave, from the OS'es point of view, as if the slot
				49	* were "empty": all reads return 0xff's and all writes are silently
				50	* ignored. EEH slot isolation events can be triggered by parity
				51	* errors on the address or data busses (e.g. during posted writes),
				52	* which in turn might be caused by dust, vibration, humidity,
				53	* radioactivity or plain-old failed hardware.
				54	*
				55	* Note, however, that one of the leading causes of EEH slot
				56	* freeze events are buggy device drivers, buggy device microcode,
				57	* or buggy device hardware. This is because any attempt by the
				58	* device to bus-master data to a memory address that is not
				59	* assigned to the device will trigger a slot freeze. (The idea
				60	* is to prevent devices-gone-wild from corrupting system memory).
				61	* Buggy hardware/drivers will have a miserable time co-existing
				62	* with EEH.
				63	*
				64	* Ideally, a PCI device driver, when suspecting that an isolation
				65	* event has occured (e.g. by reading 0xff's), will then ask EEH
				66	* whether this is the case, and then take appropriate steps to
				67	* reset the PCI slot, the PCI device, and then resume operations.
				68	* However, until that day, the checking is done here, with the
				69	* eeh_check_failure() routine embedded in the MMIO macros. If
				70	* the slot is found to be isolated, an "EEH Event" is synthesized
				71	* and sent out for processing.
				72	*/
				73
				74	/** Bus Unit ID macros; get low and hi 32-bits of the 64-bit BUID */
				75	#define BUID_HI(buid) ((buid) >> 32)
				76	#define BUID_LO(buid) ((buid) & 0xffffffff)
				77
				78	/* EEH event workqueue setup. */
				79	static DEFINE_SPINLOCK(eeh_eventlist_lock);
				80	LIST_HEAD(eeh_eventlist);
				81	static void eeh_event_handler(void *);
				82	DECLARE_WORK(eeh_event_wq, eeh_event_handler, NULL);
				83
				84	static struct notifier_block *eeh_notifier_chain;
				85
				86	/*
				87	* If a device driver keeps reading an MMIO register in an interrupt
				88	* handler after a slot isolation event has occurred, we assume it
				89	* is broken and panic. This sets the threshold for how many read
				90	* attempts we allow before panicking.
				91	*/
				92	#define EEH_MAX_FAILS 1000
				93	static atomic_t eeh_fail_count;
				94
				95	/* RTAS tokens */
				96	static int ibm_set_eeh_option;
				97	static int ibm_set_slot_reset;
				98	static int ibm_read_slot_reset_state;
				99	static int ibm_read_slot_reset_state2;
				100	static int ibm_slot_error_detail;
				101
				102	static int eeh_subsystem_enabled;
				103
				104	/* Buffer for reporting slot-error-detail rtas calls */
				105	static unsigned char slot_errbuf[RTAS_ERROR_LOG_MAX];
				106	static DEFINE_SPINLOCK(slot_errbuf_lock);
				107	static int eeh_error_buf_size;
				108
				109	/* System monitoring statistics */
				110	static DEFINE_PER_CPU(unsigned long, total_mmio_ffs);
				111	static DEFINE_PER_CPU(unsigned long, false_positives);
				112	static DEFINE_PER_CPU(unsigned long, ignored_failures);
				113	static DEFINE_PER_CPU(unsigned long, slot_resets);
				114
				115	/**
				116	* The pci address cache subsystem. This subsystem places
				117	* PCI device address resources into a red-black tree, sorted
				118	* according to the address range, so that given only an i/o
				119	* address, the corresponding PCI device can be quickly
				120	* found. It is safe to perform an address lookup in an interrupt
				121	* context; this ability is an important feature.
				122	*
				123	* Currently, the only customer of this code is the EEH subsystem;
				124	* thus, this code has been somewhat tailored to suit EEH better.
				125	* In particular, the cache does not hold the addresses of devices
				126	* for which EEH is not enabled.
				127	*
				128	* (Implementation Note: The RB tree seems to be better/faster
				129	* than any hash algo I could think of for this problem, even
				130	* with the penalty of slow pointer chases for d-cache misses).
				131	*/
				132	struct pci_io_addr_range
				133	{
				134	struct rb_node rb_node;
				135	unsigned long addr_lo;
				136	unsigned long addr_hi;
				137	struct pci_dev *pcidev;
				138	unsigned int flags;
				139	};
				140
				141	static struct pci_io_addr_cache
				142	{
				143	struct rb_root rb_root;
				144	spinlock_t piar_lock;
				145	} pci_io_addr_cache_root;
				146
				147	static inline struct pci_dev *__pci_get_device_by_addr(unsigned long addr)
				148	{
				149	struct rb_node *n = pci_io_addr_cache_root.rb_root.rb_node;
				150
				151	while (n) {
				152	struct pci_io_addr_range *piar;
				153	piar = rb_entry(n, struct pci_io_addr_range, rb_node);
				154
				155	if (addr < piar->addr_lo) {
				156	n = n->rb_left;
				157	} else {
				158	if (addr > piar->addr_hi) {
				159	n = n->rb_right;
				160	} else {
				161	pci_dev_get(piar->pcidev);
				162	return piar->pcidev;
				163	}
				164	}
				165	}
				166
				167	return NULL;
				168	}
				169
				170	/**
				171	* pci_get_device_by_addr - Get device, given only address
				172	* @addr: mmio (PIO) phys address or i/o port number
				173	*
				174	* Given an mmio phys address, or a port number, find a pci device
				175	* that implements this address. Be sure to pci_dev_put the device
				176	* when finished. I/O port numbers are assumed to be offset
				177	* from zero (that is, they do not have pci_io_addr added in).
				178	* It is safe to call this function within an interrupt.
				179	*/
				180	static struct pci_dev *pci_get_device_by_addr(unsigned long addr)
				181	{
				182	struct pci_dev *dev;
				183	unsigned long flags;
				184
				185	spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
				186	dev = __pci_get_device_by_addr(addr);
				187	spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
				188	return dev;
				189	}
				190
				191	#ifdef DEBUG
				192	/*
				193	* Handy-dandy debug print routine, does nothing more
				194	* than print out the contents of our addr cache.
				195	*/
				196	static void pci_addr_cache_print(struct pci_io_addr_cache *cache)
				197	{
				198	struct rb_node *n;
				199	int cnt = 0;
				200
				201	n = rb_first(&cache->rb_root);
				202	while (n) {
				203	struct pci_io_addr_range *piar;
				204	piar = rb_entry(n, struct pci_io_addr_range, rb_node);
				205	printk(KERN_DEBUG "PCI: %s addr range %d [%lx-%lx]: %s %s\n",
				206	(piar->flags & IORESOURCE_IO) ? "i/o" : "mem", cnt,
				207	piar->addr_lo, piar->addr_hi, pci_name(piar->pcidev),
				208	pci_pretty_name(piar->pcidev));
				209	cnt++;
				210	n = rb_next(n);
				211	}
				212	}
				213	#endif
				214
				215	/* Insert address range into the rb tree. */
				216	static struct pci_io_addr_range *
				217	pci_addr_cache_insert(struct pci_dev *dev, unsigned long alo,
				218	unsigned long ahi, unsigned int flags)
				219	{
				220	struct rb_node **p = &pci_io_addr_cache_root.rb_root.rb_node;
				221	struct rb_node *parent = NULL;
				222	struct pci_io_addr_range *piar;
				223
				224	/* Walk tree, find a place to insert into tree */
				225	while (*p) {
				226	parent = *p;
				227	piar = rb_entry(parent, struct pci_io_addr_range, rb_node);
				228	if (alo < piar->addr_lo) {
				229	p = &parent->rb_left;
				230	} else if (ahi > piar->addr_hi) {
				231	p = &parent->rb_right;
				232	} else {
				233	if (dev != piar->pcidev \|\|
				234	alo != piar->addr_lo \|\| ahi != piar->addr_hi) {
				235	printk(KERN_WARNING "PIAR: overlapping address range\n");
				236	}
				237	return piar;
				238	}
				239	}
				240	piar = (struct pci_io_addr_range *)kmalloc(sizeof(struct pci_io_addr_range), GFP_ATOMIC);
				241	if (!piar)
				242	return NULL;
				243
				244	piar->addr_lo = alo;
				245	piar->addr_hi = ahi;
				246	piar->pcidev = dev;
				247	piar->flags = flags;
				248
				249	rb_link_node(&piar->rb_node, parent, p);
				250	rb_insert_color(&piar->rb_node, &pci_io_addr_cache_root.rb_root);
				251
				252	return piar;
				253	}
				254
				255	static void __pci_addr_cache_insert_device(struct pci_dev *dev)
				256	{
				257	struct device_node *dn;
				258	int i;
				259	int inserted = 0;
				260
				261	dn = pci_device_to_OF_node(dev);
				262	if (!dn) {
				263	printk(KERN_WARNING "PCI: no pci dn found for dev=%s %s\n",
				264	pci_name(dev), pci_pretty_name(dev));
				265	return;
				266	}
				267
				268	/* Skip any devices for which EEH is not enabled. */
				269	if (!(dn->eeh_mode & EEH_MODE_SUPPORTED) \|\|
				270	dn->eeh_mode & EEH_MODE_NOCHECK) {
				271	#ifdef DEBUG
				272	printk(KERN_INFO "PCI: skip building address cache for=%s %s\n",
				273	pci_name(dev), pci_pretty_name(dev));
				274	#endif
				275	return;
				276	}
				277
				278	/* The cache holds a reference to the device... */
				279	pci_dev_get(dev);
				280
				281	/* Walk resources on this device, poke them into the tree */
				282	for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
				283	unsigned long start = pci_resource_start(dev,i);
				284	unsigned long end = pci_resource_end(dev,i);
				285	unsigned int flags = pci_resource_flags(dev,i);
				286
				287	/* We are interested only bus addresses, not dma or other stuff */
				288	if (0 == (flags & (IORESOURCE_IO \| IORESOURCE_MEM)))
				289	continue;
				290	if (start == 0 \|\| ~start == 0 \|\| end == 0 \|\| ~end == 0)
				291	continue;
				292	pci_addr_cache_insert(dev, start, end, flags);
				293	inserted = 1;
				294	}
				295
				296	/* If there was nothing to add, the cache has no reference... */
				297	if (!inserted)
				298	pci_dev_put(dev);
				299	}
				300
				301	/**
				302	* pci_addr_cache_insert_device - Add a device to the address cache
				303	* @dev: PCI device whose I/O addresses we are interested in.
				304	*
				305	* In order to support the fast lookup of devices based on addresses,
				306	* we maintain a cache of devices that can be quickly searched.
				307	* This routine adds a device to that cache.
				308	*/
				309	void pci_addr_cache_insert_device(struct pci_dev *dev)
				310	{
				311	unsigned long flags;
				312
				313	spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
				314	__pci_addr_cache_insert_device(dev);
				315	spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
				316	}
				317
				318	static inline void __pci_addr_cache_remove_device(struct pci_dev *dev)
				319	{
				320	struct rb_node *n;
				321	int removed = 0;
				322
				323	restart:
				324	n = rb_first(&pci_io_addr_cache_root.rb_root);
				325	while (n) {
				326	struct pci_io_addr_range *piar;
				327	piar = rb_entry(n, struct pci_io_addr_range, rb_node);
				328
				329	if (piar->pcidev == dev) {
				330	rb_erase(n, &pci_io_addr_cache_root.rb_root);
				331	removed = 1;
				332	kfree(piar);
				333	goto restart;
				334	}
				335	n = rb_next(n);
				336	}
				337
				338	/* The cache no longer holds its reference to this device... */
				339	if (removed)
				340	pci_dev_put(dev);
				341	}
				342
				343	/**
				344	* pci_addr_cache_remove_device - remove pci device from addr cache
				345	* @dev: device to remove
				346	*
				347	* Remove a device from the addr-cache tree.
				348	* This is potentially expensive, since it will walk
				349	* the tree multiple times (once per resource).
				350	* But so what; device removal doesn't need to be that fast.
				351	*/
				352	void pci_addr_cache_remove_device(struct pci_dev *dev)
				353	{
				354	unsigned long flags;
				355
				356	spin_lock_irqsave(&pci_io_addr_cache_root.piar_lock, flags);
				357	__pci_addr_cache_remove_device(dev);
				358	spin_unlock_irqrestore(&pci_io_addr_cache_root.piar_lock, flags);
				359	}
				360
				361	/**
				362	* pci_addr_cache_build - Build a cache of I/O addresses
				363	*
				364	* Build a cache of pci i/o addresses. This cache will be used to
				365	* find the pci device that corresponds to a given address.
				366	* This routine scans all pci busses to build the cache.
				367	* Must be run late in boot process, after the pci controllers
				368	* have been scaned for devices (after all device resources are known).
				369	*/
				370	void __init pci_addr_cache_build(void)
				371	{
				372	struct pci_dev *dev = NULL;
				373
				374	spin_lock_init(&pci_io_addr_cache_root.piar_lock);
				375
				376	while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
				377	/* Ignore PCI bridges ( XXX why ??) */
				378	if ((dev->class >> 16) == PCI_BASE_CLASS_BRIDGE) {
				379	continue;
				380	}
				381	pci_addr_cache_insert_device(dev);
				382	}
				383
				384	#ifdef DEBUG
				385	/* Verify tree built up above, echo back the list of addrs. */
				386	pci_addr_cache_print(&pci_io_addr_cache_root);
				387	#endif
				388	}
				389
				390	/* --------------------------------------------------------------- */
				391	/* Above lies the PCI Address Cache. Below lies the EEH event infrastructure */
				392
				393	/**
				394	* eeh_register_notifier - Register to find out about EEH events.
				395	* @nb: notifier block to callback on events
				396	*/
				397	int eeh_register_notifier(struct notifier_block *nb)
				398	{
				399	return notifier_chain_register(&eeh_notifier_chain, nb);
				400	}
				401
				402	/**
				403	* eeh_unregister_notifier - Unregister to an EEH event notifier.
				404	* @nb: notifier block to callback on events
				405	*/
				406	int eeh_unregister_notifier(struct notifier_block *nb)
				407	{
				408	return notifier_chain_unregister(&eeh_notifier_chain, nb);
				409	}
				410
				411	/**
				412	* read_slot_reset_state - Read the reset state of a device node's slot
				413	* @dn: device node to read
				414	* @rets: array to return results in
				415	*/
				416	static int read_slot_reset_state(struct device_node *dn, int rets[])
				417	{
				418	int token, outputs;
				419
				420	if (ibm_read_slot_reset_state2 != RTAS_UNKNOWN_SERVICE) {
				421	token = ibm_read_slot_reset_state2;
				422	outputs = 4;
				423	} else {
				424	token = ibm_read_slot_reset_state;
				425	outputs = 3;
				426	}
				427
				428	return rtas_call(token, 3, outputs, rets, dn->eeh_config_addr,
				429	BUID_HI(dn->phb->buid), BUID_LO(dn->phb->buid));
				430	}
				431
				432	/**
				433	* eeh_panic - call panic() for an eeh event that cannot be handled.
				434	* The philosophy of this routine is that it is better to panic and
				435	* halt the OS than it is to risk possible data corruption by
				436	* oblivious device drivers that don't know better.
				437	*
				438	* @dev pci device that had an eeh event
				439	* @reset_state current reset state of the device slot
				440	*/
				441	static void eeh_panic(struct pci_dev *dev, int reset_state)
				442	{
				443	/*
				444	* XXX We should create a separate sysctl for this.
				445	*
				446	* Since the panic_on_oops sysctl is used to halt the system
				447	* in light of potential corruption, we can use it here.
				448	*/
				449	if (panic_on_oops)
				450	panic("EEH: MMIO failure (%d) on device:%s %s\n", reset_state,
				451	pci_name(dev), pci_pretty_name(dev));
				452	else {
				453	__get_cpu_var(ignored_failures)++;
				454	printk(KERN_INFO "EEH: Ignored MMIO failure (%d) on device:%s %s\n",
				455	reset_state, pci_name(dev), pci_pretty_name(dev));
				456	}
				457	}
				458
				459	/**
				460	* eeh_event_handler - dispatch EEH events. The detection of a frozen
				461	* slot can occur inside an interrupt, where it can be hard to do
				462	* anything about it. The goal of this routine is to pull these
				463	* detection events out of the context of the interrupt handler, and
				464	* re-dispatch them for processing at a later time in a normal context.
				465	*
				466	* @dummy - unused
				467	*/
				468	static void eeh_event_handler(void *dummy)
				469	{
				470	unsigned long flags;
				471	struct eeh_event *event;
				472
				473	while (1) {
				474	spin_lock_irqsave(&eeh_eventlist_lock, flags);
				475	event = NULL;
				476	if (!list_empty(&eeh_eventlist)) {
				477	event = list_entry(eeh_eventlist.next, struct eeh_event, list);
				478	list_del(&event->list);
				479	}
				480	spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
				481	if (event == NULL)
				482	break;
				483
				484	printk(KERN_INFO "EEH: MMIO failure (%d), notifiying device "
				485	"%s %s\n", event->reset_state,
				486	pci_name(event->dev), pci_pretty_name(event->dev));
				487
				488	atomic_set(&eeh_fail_count, 0);
				489	notifier_call_chain (&eeh_notifier_chain,
				490	EEH_NOTIFY_FREEZE, event);
				491
				492	__get_cpu_var(slot_resets)++;
				493
				494	pci_dev_put(event->dev);
				495	kfree(event);
				496	}
				497	}
				498
				499	/**
				500	* eeh_token_to_phys - convert EEH address token to phys address
				501	* @token i/o token, should be address in the form 0xE....
				502	*/
				503	static inline unsigned long eeh_token_to_phys(unsigned long token)
				504	{
				505	pte_t *ptep;
				506	unsigned long pa;
				507
				508	ptep = find_linux_pte(ioremap_mm.pgd, token);
				509	if (!ptep)
				510	return token;
				511	pa = pte_pfn(*ptep) << PAGE_SHIFT;
				512
				513	return pa \| (token & (PAGE_SIZE-1));
				514	}
				515
				516	/**
				517	* eeh_dn_check_failure - check if all 1's data is due to EEH slot freeze
				518	* @dn device node
				519	* @dev pci device, if known
				520	*
				521	* Check for an EEH failure for the given device node. Call this
				522	* routine if the result of a read was all 0xff's and you want to
				523	* find out if this is due to an EEH slot freeze. This routine
				524	* will query firmware for the EEH status.
				525	*
				526	* Returns 0 if there has not been an EEH error; otherwise returns
				527	* a non-zero value and queues up a solt isolation event notification.
				528	*
				529	* It is safe to call this routine in an interrupt context.
				530	*/
				531	int eeh_dn_check_failure(struct device_node dn, struct pci_dev dev)
				532	{
				533	int ret;
				534	int rets[3];
				535	unsigned long flags;
				536	int rc, reset_state;
				537	struct eeh_event *event;
				538
				539	__get_cpu_var(total_mmio_ffs)++;
				540
				541	if (!eeh_subsystem_enabled)
				542	return 0;
				543
				544	if (!dn)
				545	return 0;
				546
				547	/* Access to IO BARs might get this far and still not want checking. */
				548	if (!(dn->eeh_mode & EEH_MODE_SUPPORTED) \|\|
				549	dn->eeh_mode & EEH_MODE_NOCHECK) {
				550	return 0;
				551	}
				552
				553	if (!dn->eeh_config_addr) {
				554	return 0;
				555	}
				556
				557	/*
				558	* If we already have a pending isolation event for this
				559	* slot, we know it's bad already, we don't need to check...
				560	*/
				561	if (dn->eeh_mode & EEH_MODE_ISOLATED) {
				562	atomic_inc(&eeh_fail_count);
				563	if (atomic_read(&eeh_fail_count) >= EEH_MAX_FAILS) {
				564	/* re-read the slot reset state */
				565	if (read_slot_reset_state(dn, rets) != 0)
				566	rets[0] = -1; /* reset state unknown */
				567	eeh_panic(dev, rets[0]);
				568	}
				569	return 0;
				570	}
				571
				572	/*
				573	* Now test for an EEH failure. This is VERY expensive.
				574	* Note that the eeh_config_addr may be a parent device
				575	* in the case of a device behind a bridge, or it may be
				576	* function zero of a multi-function device.
				577	* In any case they must share a common PHB.
				578	*/
				579	ret = read_slot_reset_state(dn, rets);
				580	if (!(ret == 0 && rets[1] == 1 && (rets[0] == 2 \|\| rets[0] == 4))) {
				581	__get_cpu_var(false_positives)++;
				582	return 0;
				583	}
				584
				585	/* prevent repeated reports of this failure */
				586	dn->eeh_mode \|= EEH_MODE_ISOLATED;
				587
				588	reset_state = rets[0];
				589
				590	spin_lock_irqsave(&slot_errbuf_lock, flags);
				591	memset(slot_errbuf, 0, eeh_error_buf_size);
				592
				593	rc = rtas_call(ibm_slot_error_detail,
				594	8, 1, NULL, dn->eeh_config_addr,
				595	BUID_HI(dn->phb->buid),
				596	BUID_LO(dn->phb->buid), NULL, 0,
				597	virt_to_phys(slot_errbuf),
				598	eeh_error_buf_size,
				599	1 /* Temporary Error */);
				600
				601	if (rc == 0)
				602	log_error(slot_errbuf, ERR_TYPE_RTAS_LOG, 0);
				603	spin_unlock_irqrestore(&slot_errbuf_lock, flags);
				604
				605	printk(KERN_INFO "EEH: MMIO failure (%d) on device: %s %s\n",
				606	rets[0], dn->name, dn->full_name);
				607	event = kmalloc(sizeof(*event), GFP_ATOMIC);
				608	if (event == NULL) {
				609	eeh_panic(dev, reset_state);
				610	return 1;
				611	}
				612
				613	event->dev = dev;
				614	event->dn = dn;
				615	event->reset_state = reset_state;
				616
				617	/* We may or may not be called in an interrupt context */
				618	spin_lock_irqsave(&eeh_eventlist_lock, flags);
				619	list_add(&event->list, &eeh_eventlist);
				620	spin_unlock_irqrestore(&eeh_eventlist_lock, flags);
				621
				622	/* Most EEH events are due to device driver bugs. Having
				623	* a stack trace will help the device-driver authors figure
				624	* out what happened. So print that out. */
				625	dump_stack();
				626	schedule_work(&eeh_event_wq);
				627
				628	return 0;
				629	}
				630
				631	EXPORT_SYMBOL(eeh_dn_check_failure);
				632
				633	/**
				634	* eeh_check_failure - check if all 1's data is due to EEH slot freeze
				635	* @token i/o token, should be address in the form 0xA....
				636	* @val value, should be all 1's (XXX why do we need this arg??)
				637	*
				638	* Check for an eeh failure at the given token address.
				639	* Check for an EEH failure at the given token address. Call this
				640	* routine if the result of a read was all 0xff's and you want to
				641	* find out if this is due to an EEH slot freeze event. This routine
				642	* will query firmware for the EEH status.
				643	*
				644	* Note this routine is safe to call in an interrupt context.
				645	*/
				646	unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned long val)
				647	{
				648	unsigned long addr;
				649	struct pci_dev *dev;
				650	struct device_node *dn;
				651
				652	/* Finding the phys addr + pci device; this is pretty quick. */
				653	addr = eeh_token_to_phys((unsigned long __force) token);
				654	dev = pci_get_device_by_addr(addr);
				655	if (!dev)
				656	return val;
				657
				658	dn = pci_device_to_OF_node(dev);
				659	eeh_dn_check_failure (dn, dev);
				660
				661	pci_dev_put(dev);
				662	return val;
				663	}
				664
				665	EXPORT_SYMBOL(eeh_check_failure);
				666
				667	struct eeh_early_enable_info {
				668	unsigned int buid_hi;
				669	unsigned int buid_lo;
				670	};
				671
				672	/* Enable eeh for the given device node. */
				673	static void early_enable_eeh(struct device_node dn, void *data)
				674	{
				675	struct eeh_early_enable_info *info = data;
				676	int ret;
				677	char *status = get_property(dn, "status", NULL);
				678	u32 class_code = (u32 )get_property(dn, "class-code", NULL);
				679	u32 vendor_id = (u32 )get_property(dn, "vendor-id", NULL);
				680	u32 device_id = (u32 )get_property(dn, "device-id", NULL);
				681	u32 *regs;
				682	int enable;
				683
				684	dn->eeh_mode = 0;
				685
				686	if (status && strcmp(status, "ok") != 0)
				687	return NULL; /* ignore devices with bad status */
				688
				689	/* Ignore bad nodes. */
				690	if (!class_code \|\| !vendor_id \|\| !device_id)
				691	return NULL;
				692
				693	/* There is nothing to check on PCI to ISA bridges */
				694	if (dn->type && !strcmp(dn->type, "isa")) {
				695	dn->eeh_mode \|= EEH_MODE_NOCHECK;
				696	return NULL;
				697	}
				698
				699	/*
				700	* Now decide if we are going to "Disable" EEH checking
				701	* for this device. We still run with the EEH hardware active,
				702	* but we won't be checking for ff's. This means a driver
				703	* could return bad data (very bad!), an interrupt handler could
				704	* hang waiting on status bits that won't change, etc.
				705	* But there are a few cases like display devices that make sense.
				706	*/
				707	enable = 1; /* i.e. we will do checking */
				708	if ((*class_code >> 16) == PCI_BASE_CLASS_DISPLAY)
				709	enable = 0;
				710
				711	if (!enable)
				712	dn->eeh_mode \|= EEH_MODE_NOCHECK;
				713
				714	/* Ok... see if this device supports EEH. Some do, some don't,
				715	* and the only way to find out is to check each and every one. */
				716	regs = (u32 *)get_property(dn, "reg", NULL);
				717	if (regs) {
				718	/* First register entry is addr (00BBSS00) */
				719	/* Try to enable eeh */
				720	ret = rtas_call(ibm_set_eeh_option, 4, 1, NULL,
				721	regs[0], info->buid_hi, info->buid_lo,
				722	EEH_ENABLE);
				723	if (ret == 0) {
				724	eeh_subsystem_enabled = 1;
				725	dn->eeh_mode \|= EEH_MODE_SUPPORTED;
				726	dn->eeh_config_addr = regs[0];
				727	#ifdef DEBUG
				728	printk(KERN_DEBUG "EEH: %s: eeh enabled\n", dn->full_name);
				729	#endif
				730	} else {
				731
				732	/* This device doesn't support EEH, but it may have an
				733	* EEH parent, in which case we mark it as supported. */
				734	if (dn->parent && (dn->parent->eeh_mode & EEH_MODE_SUPPORTED)) {
				735	/* Parent supports EEH. */
				736	dn->eeh_mode \|= EEH_MODE_SUPPORTED;
				737	dn->eeh_config_addr = dn->parent->eeh_config_addr;
				738	return NULL;
				739	}
				740	}
				741	} else {
				742	printk(KERN_WARNING "EEH: %s: unable to get reg property.\n",
				743	dn->full_name);
				744	}
				745
				746	return NULL;
				747	}
				748
				749	/*
				750	* Initialize EEH by trying to enable it for all of the adapters in the system.
				751	* As a side effect we can determine here if eeh is supported at all.
				752	* Note that we leave EEH on so failed config cycles won't cause a machine
				753	* check. If a user turns off EEH for a particular adapter they are really
				754	* telling Linux to ignore errors. Some hardware (e.g. POWER5) won't
				755	* grant access to a slot if EEH isn't enabled, and so we always enable
				756	* EEH for all slots/all devices.
				757	*
				758	* The eeh-force-off option disables EEH checking globally, for all slots.
				759	* Even if force-off is set, the EEH hardware is still enabled, so that
				760	* newer systems can boot.
				761	*/
				762	void __init eeh_init(void)
				763	{
				764	struct device_node phb, np;
				765	struct eeh_early_enable_info info;
				766
				767	np = of_find_node_by_path("/rtas");
				768	if (np == NULL)
				769	return;
				770
				771	ibm_set_eeh_option = rtas_token("ibm,set-eeh-option");
				772	ibm_set_slot_reset = rtas_token("ibm,set-slot-reset");
				773	ibm_read_slot_reset_state2 = rtas_token("ibm,read-slot-reset-state2");
				774	ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state");
				775	ibm_slot_error_detail = rtas_token("ibm,slot-error-detail");
				776
				777	if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE)
				778	return;
				779
				780	eeh_error_buf_size = rtas_token("rtas-error-log-max");
				781	if (eeh_error_buf_size == RTAS_UNKNOWN_SERVICE) {
				782	eeh_error_buf_size = 1024;
				783	}
				784	if (eeh_error_buf_size > RTAS_ERROR_LOG_MAX) {
				785	printk(KERN_WARNING "EEH: rtas-error-log-max is bigger than allocated "
				786	"buffer ! (%d vs %d)", eeh_error_buf_size, RTAS_ERROR_LOG_MAX);
				787	eeh_error_buf_size = RTAS_ERROR_LOG_MAX;
				788	}
				789
				790	/* Enable EEH for all adapters. Note that eeh requires buid's */
				791	for (phb = of_find_node_by_name(NULL, "pci"); phb;
				792	phb = of_find_node_by_name(phb, "pci")) {
				793	unsigned long buid;
				794
				795	buid = get_phb_buid(phb);
				796	if (buid == 0)
				797	continue;
				798
				799	info.buid_lo = BUID_LO(buid);
				800	info.buid_hi = BUID_HI(buid);
				801	traverse_pci_devices(phb, early_enable_eeh, &info);
				802	}
				803
				804	if (eeh_subsystem_enabled)
				805	printk(KERN_INFO "EEH: PCI Enhanced I/O Error Handling Enabled\n");
				806	else
				807	printk(KERN_WARNING "EEH: No capable adapters found\n");
				808	}
				809
				810	/**
				811	* eeh_add_device_early - enable EEH for the indicated device_node
				812	* @dn: device node for which to set up EEH
				813	*
				814	* This routine must be used to perform EEH initialization for PCI
				815	* devices that were added after system boot (e.g. hotplug, dlpar).
				816	* This routine must be called before any i/o is performed to the
				817	* adapter (inluding any config-space i/o).
				818	* Whether this actually enables EEH or not for this device depends
				819	* on the CEC architecture, type of the device, on earlier boot
				820	* command-line arguments & etc.
				821	*/
				822	void eeh_add_device_early(struct device_node *dn)
				823	{
				824	struct pci_controller *phb;
				825	struct eeh_early_enable_info info;
				826
				827	if (!dn)
				828	return;
				829	phb = dn->phb;
				830	if (NULL == phb \|\| 0 == phb->buid) {
				831	printk(KERN_WARNING "EEH: Expected buid but found none\n");
				832	return;
				833	}
				834
				835	info.buid_hi = BUID_HI(phb->buid);
				836	info.buid_lo = BUID_LO(phb->buid);
				837	early_enable_eeh(dn, &info);
				838	}
				839	EXPORT_SYMBOL(eeh_add_device_early);
				840
				841	/**
				842	* eeh_add_device_late - perform EEH initialization for the indicated pci device
				843	* @dev: pci device for which to set up EEH
				844	*
				845	* This routine must be used to complete EEH initialization for PCI
				846	* devices that were added after system boot (e.g. hotplug, dlpar).
				847	*/
				848	void eeh_add_device_late(struct pci_dev *dev)
				849	{
				850	if (!dev \|\| !eeh_subsystem_enabled)
				851	return;
				852
				853	#ifdef DEBUG
				854	printk(KERN_DEBUG "EEH: adding device %s %s\n", pci_name(dev),
				855	pci_pretty_name(dev));
				856	#endif
				857
				858	pci_addr_cache_insert_device (dev);
				859	}
				860	EXPORT_SYMBOL(eeh_add_device_late);
				861
				862	/**
				863	* eeh_remove_device - undo EEH setup for the indicated pci device
				864	* @dev: pci device to be removed
				865	*
				866	* This routine should be when a device is removed from a running
				867	* system (e.g. by hotplug or dlpar).
				868	*/
				869	void eeh_remove_device(struct pci_dev *dev)
				870	{
				871	if (!dev \|\| !eeh_subsystem_enabled)
				872	return;
				873
				874	/* Unregister the device with the EEH/PCI address search system */
				875	#ifdef DEBUG
				876	printk(KERN_DEBUG "EEH: remove device %s %s\n", pci_name(dev),
				877	pci_pretty_name(dev));
				878	#endif
				879	pci_addr_cache_remove_device(dev);
				880	}
				881	EXPORT_SYMBOL(eeh_remove_device);
				882
				883	static int proc_eeh_show(struct seq_file m, void v)
				884	{
				885	unsigned int cpu;
				886	unsigned long ffs = 0, positives = 0, failures = 0;
				887	unsigned long resets = 0;
				888
				889	for_each_cpu(cpu) {
				890	ffs += per_cpu(total_mmio_ffs, cpu);
				891	positives += per_cpu(false_positives, cpu);
				892	failures += per_cpu(ignored_failures, cpu);
				893	resets += per_cpu(slot_resets, cpu);
				894	}
				895
				896	if (0 == eeh_subsystem_enabled) {
				897	seq_printf(m, "EEH Subsystem is globally disabled\n");
				898	seq_printf(m, "eeh_total_mmio_ffs=%ld\n", ffs);
				899	} else {
				900	seq_printf(m, "EEH Subsystem is enabled\n");
				901	seq_printf(m, "eeh_total_mmio_ffs=%ld\n"
				902	"eeh_false_positives=%ld\n"
				903	"eeh_ignored_failures=%ld\n"
				904	"eeh_slot_resets=%ld\n"
				905	"eeh_fail_count=%d\n",
				906	ffs, positives, failures, resets,
				907	eeh_fail_count.counter);
				908	}
				909
				910	return 0;
				911	}
				912
				913	static int proc_eeh_open(struct inode inode, struct file file)
				914	{
				915	return single_open(file, proc_eeh_show, NULL);
				916	}
				917
				918	static struct file_operations proc_eeh_operations = {
				919	.open = proc_eeh_open,
				920	.read = seq_read,
				921	.llseek = seq_lseek,
				922	.release = single_release,
				923	};
				924
				925	static int __init eeh_init_proc(void)
				926	{
				927	struct proc_dir_entry *e;
				928
				929	if (systemcfg->platform & PLATFORM_PSERIES) {
				930	e = create_proc_entry("ppc64/eeh", 0, NULL);
				931	if (e)
				932	e->proc_fops = &proc_eeh_operations;
				933	}
				934
				935	return 0;
				936	}
				937	__initcall(eeh_init_proc);