Blame - drivers/scsi/scsi_error.c - kernel/linaro-android

blob: dd6a9f61bdf1bc42a829587d57508ef0afe7b9c3 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* scsi_error.c Copyright (C) 1997 Eric Youngdale
				3	*
				4	* SCSI error/timeout handling
				5	* Initial versions: Eric Youngdale. Based upon conversations with
				6	* Leonard Zubkoff and David Miller at Linux Expo,
				7	* ideas originating from all over the place.
				8	*
				9	* Restructured scsi_unjam_host and associated functions.
				10	* September 04, 2002 Mike Anderson (andmike@us.ibm.com)
				11	*
				12	* Forward port of Russell King's (rmk@arm.linux.org.uk) changes and
				13	* minor cleanups.
				14	* September 30, 2002 Mike Anderson (andmike@us.ibm.com)
				15	*/
				16
				17	#include <linux/module.h>
				18	#include <linux/sched.h>
				19	#include <linux/timer.h>
				20	#include <linux/string.h>
				21	#include <linux/slab.h>
				22	#include <linux/kernel.h>
Christoph Hellwig	c5478de	2005-09-06 14:04:26 +0200	[diff] [blame]	23	#include <linux/kthread.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	24	#include <linux/interrupt.h>
				25	#include <linux/blkdev.h>
				26	#include <linux/delay.h>
				27
				28	#include <scsi/scsi.h>
				29	#include <scsi/scsi_dbg.h>
				30	#include <scsi/scsi_device.h>
				31	#include <scsi/scsi_eh.h>
				32	#include <scsi/scsi_host.h>
				33	#include <scsi/scsi_ioctl.h>
				34	#include <scsi/scsi_request.h>
				35
				36	#include "scsi_priv.h"
				37	#include "scsi_logging.h"
				38
				39	#define SENSE_TIMEOUT (10*HZ)
				40	#define START_UNIT_TIMEOUT (30*HZ)
				41
				42	/*
				43	* These should probably be handled by the host itself.
				44	* Since it is allowed to sleep, it probably should.
				45	*/
				46	#define BUS_RESET_SETTLE_TIME (10)
				47	#define HOST_RESET_SETTLE_TIME (10)
				48
				49	/* called with shost->host_lock held */
				50	void scsi_eh_wakeup(struct Scsi_Host *shost)
				51	{
				52	if (shost->host_busy == shost->host_failed) {
James Bottomley	3ed7a47	2005-09-19 09:50:04 -0500	[diff] [blame]	53	wake_up_process(shost->ehandler);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	54	SCSI_LOG_ERROR_RECOVERY(5,
				55	printk("Waking error handler thread\n"));
				56	}
				57	}
				58
				59	/**
				60	* scsi_eh_scmd_add - add scsi cmd to error handling.
				61	* @scmd: scmd to run eh on.
				62	* @eh_flag: optional SCSI_EH flag.
				63	*
				64	* Return value:
				65	* 0 on failure.
				66	**/
				67	int scsi_eh_scmd_add(struct scsi_cmnd *scmd, int eh_flag)
				68	{
				69	struct Scsi_Host *shost = scmd->device->host;
				70	unsigned long flags;
James Bottomley	939647e	2005-09-18 15:05:20 -0500	[diff] [blame]	71	int ret = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	72
James Bottomley	3ed7a47	2005-09-19 09:50:04 -0500	[diff] [blame]	73	if (!shost->ehandler)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	74	return 0;
				75
				76	spin_lock_irqsave(shost->host_lock, flags);
James Bottomley	939647e	2005-09-18 15:05:20 -0500	[diff] [blame]	77	if (scsi_host_set_state(shost, SHOST_RECOVERY))
				78	if (scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY))
				79	goto out_unlock;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	80
James Bottomley	939647e	2005-09-18 15:05:20 -0500	[diff] [blame]	81	ret = 1;
Christoph Hellwig	3111b0d	2005-06-19 13:43:26 +0200	[diff] [blame]	82	scmd->eh_eflags \|= eh_flag;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	83	list_add_tail(&scmd->eh_entry, &shost->eh_cmd_q);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	84	shost->host_failed++;
				85	scsi_eh_wakeup(shost);
James Bottomley	939647e	2005-09-18 15:05:20 -0500	[diff] [blame]	86	out_unlock:
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	87	spin_unlock_irqrestore(shost->host_lock, flags);
James Bottomley	939647e	2005-09-18 15:05:20 -0500	[diff] [blame]	88	return ret;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	89	}
				90
				91	/**
				92	* scsi_add_timer - Start timeout timer for a single scsi command.
				93	* @scmd: scsi command that is about to start running.
				94	* @timeout: amount of time to allow this command to run.
				95	* @complete: timeout function to call if timer isn't canceled.
				96	*
				97	* Notes:
				98	* This should be turned into an inline function. Each scsi command
				99	* has its own timer, and as it is added to the queue, we set up the
				100	* timer. When the command completes, we cancel the timer.
				101	**/
				102	void scsi_add_timer(struct scsi_cmnd *scmd, int timeout,
				103	void (complete)(struct scsi_cmnd ))
				104	{
				105
				106	/*
				107	* If the clock was already running for this command, then
				108	* first delete the timer. The timer handling code gets rather
				109	* confused if we don't do this.
				110	*/
				111	if (scmd->eh_timeout.function)
				112	del_timer(&scmd->eh_timeout);
				113
				114	scmd->eh_timeout.data = (unsigned long)scmd;
				115	scmd->eh_timeout.expires = jiffies + timeout;
				116	scmd->eh_timeout.function = (void (*)(unsigned long)) complete;
				117
				118	SCSI_LOG_ERROR_RECOVERY(5, printk("%s: scmd: %p, time:"
				119	" %d, (%p)\n", __FUNCTION__,
				120	scmd, timeout, complete));
				121
				122	add_timer(&scmd->eh_timeout);
				123	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	124
				125	/**
				126	* scsi_delete_timer - Delete/cancel timer for a given function.
				127	* @scmd: Cmd that we are canceling timer for
				128	*
				129	* Notes:
				130	* This should be turned into an inline function.
				131	*
				132	* Return value:
				133	* 1 if we were able to detach the timer. 0 if we blew it, and the
				134	* timer function has already started to run.
				135	**/
				136	int scsi_delete_timer(struct scsi_cmnd *scmd)
				137	{
				138	int rtn;
				139
				140	rtn = del_timer(&scmd->eh_timeout);
				141
				142	SCSI_LOG_ERROR_RECOVERY(5, printk("%s: scmd: %p,"
				143	" rtn: %d\n", __FUNCTION__,
				144	scmd, rtn));
				145
				146	scmd->eh_timeout.data = (unsigned long)NULL;
				147	scmd->eh_timeout.function = NULL;
				148
				149	return rtn;
				150	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	151
				152	/**
				153	* scsi_times_out - Timeout function for normal scsi commands.
				154	* @scmd: Cmd that is timing out.
				155	*
				156	* Notes:
				157	* We do not need to lock this. There is the potential for a race
				158	* only in that the normal completion handling might run, but if the
				159	* normal completion function determines that the timer has already
				160	* fired, then it mustn't do anything.
				161	**/
				162	void scsi_times_out(struct scsi_cmnd *scmd)
				163	{
				164	scsi_log_completion(scmd, TIMEOUT_ERROR);
				165
				166	if (scmd->device->host->hostt->eh_timed_out)
				167	switch (scmd->device->host->hostt->eh_timed_out(scmd)) {
				168	case EH_HANDLED:
				169	__scsi_done(scmd);
				170	return;
				171	case EH_RESET_TIMER:
				172	/* This allows a single retry even of a command
				173	* with allowed == 0 */
				174	if (scmd->retries++ > scmd->allowed)
				175	break;
				176	scsi_add_timer(scmd, scmd->timeout_per_command,
				177	scsi_times_out);
				178	return;
				179	case EH_NOT_HANDLED:
				180	break;
				181	}
				182
				183	if (unlikely(!scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD))) {
James Bottomley	939647e	2005-09-18 15:05:20 -0500	[diff] [blame]	184	scmd->result \|= DID_TIME_OUT << 16;
				185	__scsi_done(scmd);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	186	}
				187	}
				188
				189	/**
				190	* scsi_block_when_processing_errors - Prevent cmds from being queued.
				191	* @sdev: Device on which we are performing recovery.
				192	*
				193	* Description:
				194	* We block until the host is out of error recovery, and then check to
				195	* see whether the host or the device is offline.
				196	*
				197	* Return value:
				198	* 0 when dev was taken offline by error recovery. 1 OK to proceed.
				199	**/
				200	int scsi_block_when_processing_errors(struct scsi_device *sdev)
				201	{
				202	int online;
				203
James Bottomley	939647e	2005-09-18 15:05:20 -0500	[diff] [blame]	204	wait_event(sdev->host->host_wait, !scsi_host_in_recovery(sdev->host));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	205
				206	online = scsi_device_online(sdev);
				207
				208	SCSI_LOG_ERROR_RECOVERY(5, printk("%s: rtn: %d\n", __FUNCTION__,
				209	online));
				210
				211	return online;
				212	}
				213	EXPORT_SYMBOL(scsi_block_when_processing_errors);
				214
				215	#ifdef CONFIG_SCSI_LOGGING
				216	/**
				217	* scsi_eh_prt_fail_stats - Log info on failures.
				218	* @shost: scsi host being recovered.
				219	* @work_q: Queue of scsi cmds to process.
				220	**/
				221	static inline void scsi_eh_prt_fail_stats(struct Scsi_Host *shost,
				222	struct list_head *work_q)
				223	{
				224	struct scsi_cmnd *scmd;
				225	struct scsi_device *sdev;
				226	int total_failures = 0;
				227	int cmd_failed = 0;
				228	int cmd_cancel = 0;
				229	int devices_failed = 0;
				230
				231	shost_for_each_device(sdev, shost) {
				232	list_for_each_entry(scmd, work_q, eh_entry) {
				233	if (scmd->device == sdev) {
				234	++total_failures;
Christoph Hellwig	3111b0d	2005-06-19 13:43:26 +0200	[diff] [blame]	235	if (scmd->eh_eflags & SCSI_EH_CANCEL_CMD)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	236	++cmd_cancel;
				237	else
				238	++cmd_failed;
				239	}
				240	}
				241
				242	if (cmd_cancel \|\| cmd_failed) {
				243	SCSI_LOG_ERROR_RECOVERY(3,
James Bottomley	9ccfc75	2005-10-02 11:45:08 -0500	[diff] [blame^]	244	sdev_printk(KERN_INFO, sdev,
				245	"%s: cmds failed: %d, cancel: %d\n",
				246	__FUNCTION__, cmd_failed,
				247	cmd_cancel));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	248	cmd_cancel = 0;
				249	cmd_failed = 0;
				250	++devices_failed;
				251	}
				252	}
				253
				254	SCSI_LOG_ERROR_RECOVERY(2, printk("Total of %d commands on %d"
				255	" devices require eh work\n",
				256	total_failures, devices_failed));
				257	}
				258	#endif
				259
				260	/**
				261	* scsi_check_sense - Examine scsi cmd sense
				262	* @scmd: Cmd to have sense checked.
				263	*
				264	* Return value:
				265	* SUCCESS or FAILED or NEEDS_RETRY
				266	*
				267	* Notes:
				268	* When a deferred error is detected the current command has
				269	* not been executed and needs retrying.
				270	**/
				271	static int scsi_check_sense(struct scsi_cmnd *scmd)
				272	{
				273	struct scsi_sense_hdr sshdr;
				274
				275	if (! scsi_command_normalize_sense(scmd, &sshdr))
				276	return FAILED; /* no valid sense data */
				277
				278	if (scsi_sense_is_deferred(&sshdr))
				279	return NEEDS_RETRY;
				280
				281	/*
				282	* Previous logic looked for FILEMARK, EOM or ILI which are
				283	* mainly associated with tapes and returned SUCCESS.
				284	*/
				285	if (sshdr.response_code == 0x70) {
				286	/* fixed format */
				287	if (scmd->sense_buffer[2] & 0xe0)
				288	return SUCCESS;
				289	} else {
				290	/*
				291	* descriptor format: look for "stream commands sense data
				292	* descriptor" (see SSC-3). Assume single sense data
				293	* descriptor. Ignore ILI from SBC-2 READ LONG and WRITE LONG.
				294	*/
				295	if ((sshdr.additional_length > 3) &&
				296	(scmd->sense_buffer[8] == 0x4) &&
				297	(scmd->sense_buffer[11] & 0xe0))
				298	return SUCCESS;
				299	}
				300
				301	switch (sshdr.sense_key) {
				302	case NO_SENSE:
				303	return SUCCESS;
				304	case RECOVERED_ERROR:
				305	return /* soft_error */ SUCCESS;
				306
				307	case ABORTED_COMMAND:
				308	return NEEDS_RETRY;
				309	case NOT_READY:
				310	case UNIT_ATTENTION:
				311	/*
				312	* if we are expecting a cc/ua because of a bus reset that we
				313	* performed, treat this just as a retry. otherwise this is
				314	* information that we should pass up to the upper-level driver
				315	* so that we can deal with it there.
				316	*/
				317	if (scmd->device->expecting_cc_ua) {
				318	scmd->device->expecting_cc_ua = 0;
				319	return NEEDS_RETRY;
				320	}
				321	/*
				322	* if the device is in the process of becoming ready, we
				323	* should retry.
				324	*/
				325	if ((sshdr.asc == 0x04) && (sshdr.ascq == 0x01))
				326	return NEEDS_RETRY;
				327	/*
				328	* if the device is not started, we need to wake
				329	* the error handler to start the motor
				330	*/
				331	if (scmd->device->allow_restart &&
				332	(sshdr.asc == 0x04) && (sshdr.ascq == 0x02))
				333	return FAILED;
				334	return SUCCESS;
				335
				336	/* these three are not supported */
				337	case COPY_ABORTED:
				338	case VOLUME_OVERFLOW:
				339	case MISCOMPARE:
				340	return SUCCESS;
				341
				342	case MEDIUM_ERROR:
				343	return NEEDS_RETRY;
				344
				345	case HARDWARE_ERROR:
				346	if (scmd->device->retry_hwerror)
				347	return NEEDS_RETRY;
				348	else
				349	return SUCCESS;
				350
				351	case ILLEGAL_REQUEST:
				352	case BLANK_CHECK:
				353	case DATA_PROTECT:
				354	default:
				355	return SUCCESS;
				356	}
				357	}
				358
				359	/**
				360	* scsi_eh_completed_normally - Disposition a eh cmd on return from LLD.
				361	* @scmd: SCSI cmd to examine.
				362	*
				363	* Notes:
				364	* This is only called when we are examining the status of commands
				365	* queued during error recovery. the main difference here is that we
				366	* don't allow for the possibility of retries here, and we are a lot
				367	* more restrictive about what we consider acceptable.
				368	**/
				369	static int scsi_eh_completed_normally(struct scsi_cmnd *scmd)
				370	{
				371	/*
				372	* first check the host byte, to see if there is anything in there
				373	* that would indicate what we need to do.
				374	*/
				375	if (host_byte(scmd->result) == DID_RESET) {
				376	/*
				377	* rats. we are already in the error handler, so we now
				378	* get to try and figure out what to do next. if the sense
				379	* is valid, we have a pretty good idea of what to do.
				380	* if not, we mark it as FAILED.
				381	*/
				382	return scsi_check_sense(scmd);
				383	}
				384	if (host_byte(scmd->result) != DID_OK)
				385	return FAILED;
				386
				387	/*
				388	* next, check the message byte.
				389	*/
				390	if (msg_byte(scmd->result) != COMMAND_COMPLETE)
				391	return FAILED;
				392
				393	/*
				394	* now, check the status byte to see if this indicates
				395	* anything special.
				396	*/
				397	switch (status_byte(scmd->result)) {
				398	case GOOD:
				399	case COMMAND_TERMINATED:
				400	return SUCCESS;
				401	case CHECK_CONDITION:
				402	return scsi_check_sense(scmd);
				403	case CONDITION_GOOD:
				404	case INTERMEDIATE_GOOD:
				405	case INTERMEDIATE_C_GOOD:
				406	/*
				407	* who knows? FIXME(eric)
				408	*/
				409	return SUCCESS;
				410	case BUSY:
				411	case QUEUE_FULL:
				412	case RESERVATION_CONFLICT:
				413	default:
				414	return FAILED;
				415	}
				416	return FAILED;
				417	}
				418
				419	/**
				420	* scsi_eh_times_out - timeout function for error handling.
				421	* @scmd: Cmd that is timing out.
				422	*
				423	* Notes:
				424	* During error handling, the kernel thread will be sleeping waiting
				425	* for some action to complete on the device. our only job is to
				426	* record that it timed out, and to wake up the thread.
				427	**/
				428	static void scsi_eh_times_out(struct scsi_cmnd *scmd)
				429	{
Christoph Hellwig	3111b0d	2005-06-19 13:43:26 +0200	[diff] [blame]	430	scmd->eh_eflags \|= SCSI_EH_REC_TIMEOUT;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	431	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd:%p\n", __FUNCTION__,
				432	scmd));
				433
Tejun Heo	5b8ef84	2005-05-14 00:46:18 +0900	[diff] [blame]	434	up(scmd->device->host->eh_action);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	435	}
				436
				437	/**
				438	* scsi_eh_done - Completion function for error handling.
				439	* @scmd: Cmd that is done.
				440	**/
				441	static void scsi_eh_done(struct scsi_cmnd *scmd)
				442	{
				443	/*
				444	* if the timeout handler is already running, then just set the
				445	* flag which says we finished late, and return. we have no
				446	* way of stopping the timeout handler from running, so we must
				447	* always defer to it.
				448	*/
				449	if (del_timer(&scmd->eh_timeout)) {
				450	scmd->request->rq_status = RQ_SCSI_DONE;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	451
				452	SCSI_LOG_ERROR_RECOVERY(3, printk("%s scmd: %p result: %x\n",
				453	__FUNCTION__, scmd, scmd->result));
				454
Tejun Heo	5b8ef84	2005-05-14 00:46:18 +0900	[diff] [blame]	455	up(scmd->device->host->eh_action);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	456	}
				457	}
				458
				459	/**
				460	* scsi_send_eh_cmnd - send a cmd to a device as part of error recovery.
				461	* @scmd: SCSI Cmd to send.
				462	* @timeout: Timeout for cmd.
				463	*
				464	* Notes:
				465	* The initialization of the structures is quite a bit different in
				466	* this case, and furthermore, there is a different completion handler
				467	* vs scsi_dispatch_cmd.
				468	* Return value:
				469	* SUCCESS or FAILED or NEEDS_RETRY
				470	**/
				471	static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, int timeout)
				472	{
	f59114b	2005-04-17 15:00:23 -0500	[diff] [blame]	473	struct scsi_device *sdev = scmd->device;
				474	struct Scsi_Host *shost = sdev->host;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	475	DECLARE_MUTEX_LOCKED(sem);
				476	unsigned long flags;
				477	int rtn = SUCCESS;
				478
				479	/*
				480	* we will use a queued command if possible, otherwise we will
				481	* emulate the queuing and calling of completion function ourselves.
				482	*/
	f59114b	2005-04-17 15:00:23 -0500	[diff] [blame]	483	if (sdev->scsi_level <= SCSI_2)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	484	scmd->cmnd[1] = (scmd->cmnd[1] & 0x1f) \|
	f59114b	2005-04-17 15:00:23 -0500	[diff] [blame]	485	(sdev->lun << 5 & 0xe0);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	486
				487	scsi_add_timer(scmd, timeout, scsi_eh_times_out);
				488
				489	/*
				490	* set up the semaphore so we wait for the command to complete.
				491	*/
	f59114b	2005-04-17 15:00:23 -0500	[diff] [blame]	492	shost->eh_action = &sem;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	493	scmd->request->rq_status = RQ_SCSI_BUSY;
				494
	f59114b	2005-04-17 15:00:23 -0500	[diff] [blame]	495	spin_lock_irqsave(shost->host_lock, flags);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	496	scsi_log_send(scmd);
	f59114b	2005-04-17 15:00:23 -0500	[diff] [blame]	497	shost->hostt->queuecommand(scmd, scsi_eh_done);
				498	spin_unlock_irqrestore(shost->host_lock, flags);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	499
				500	down(&sem);
				501	scsi_log_completion(scmd, SUCCESS);
				502
	f59114b	2005-04-17 15:00:23 -0500	[diff] [blame]	503	shost->eh_action = NULL;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	504
				505	/*
				506	* see if timeout. if so, tell the host to forget about it.
				507	* in other words, we don't want a callback any more.
				508	*/
Christoph Hellwig	3111b0d	2005-06-19 13:43:26 +0200	[diff] [blame]	509	if (scmd->eh_eflags & SCSI_EH_REC_TIMEOUT) {
				510	scmd->eh_eflags &= ~SCSI_EH_REC_TIMEOUT;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	511
				512	/*
				513	* as far as the low level driver is
				514	* concerned, this command is still active, so
				515	* we must give the low level driver a chance
				516	* to abort it. (db)
				517	*
				518	* FIXME(eric) - we are not tracking whether we could
				519	* abort a timed out command or not. not sure how
				520	* we should treat them differently anyways.
				521	*/
	f59114b	2005-04-17 15:00:23 -0500	[diff] [blame]	522	if (shost->hostt->eh_abort_handler)
				523	shost->hostt->eh_abort_handler(scmd);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	524
				525	scmd->request->rq_status = RQ_SCSI_DONE;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	526	rtn = FAILED;
				527	}
				528
				529	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd: %p, rtn:%x\n",
				530	__FUNCTION__, scmd, rtn));
				531
				532	/*
				533	* now examine the actual status codes to see whether the command
				534	* actually did complete normally.
				535	*/
				536	if (rtn == SUCCESS) {
				537	rtn = scsi_eh_completed_normally(scmd);
				538	SCSI_LOG_ERROR_RECOVERY(3,
				539	printk("%s: scsi_eh_completed_normally %x\n",
				540	__FUNCTION__, rtn));
				541	switch (rtn) {
				542	case SUCCESS:
				543	case NEEDS_RETRY:
				544	case FAILED:
				545	break;
				546	default:
				547	rtn = FAILED;
				548	break;
				549	}
				550	}
				551
				552	return rtn;
				553	}
				554
				555	/**
				556	* scsi_request_sense - Request sense data from a particular target.
				557	* @scmd: SCSI cmd for request sense.
				558	*
				559	* Notes:
				560	* Some hosts automatically obtain this information, others require
				561	* that we obtain it on our own. This function will not return until
				562	* the command either times out, or it completes.
				563	**/
				564	static int scsi_request_sense(struct scsi_cmnd *scmd)
				565	{
				566	static unsigned char generic_sense[6] =
				567	{REQUEST_SENSE, 0, 0, 0, 252, 0};
				568	unsigned char *scsi_result;
				569	int saved_result;
				570	int rtn;
				571
				572	memcpy(scmd->cmnd, generic_sense, sizeof(generic_sense));
				573
Al Viro	bc86120	2005-04-24 12:28:34 -0700	[diff] [blame]	574	scsi_result = kmalloc(252, GFP_ATOMIC \| ((scmd->device->host->hostt->unchecked_isa_dma) ? __GFP_DMA : 0));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	575
				576
				577	if (unlikely(!scsi_result)) {
				578	printk(KERN_ERR "%s: cannot allocate scsi_result.\n",
				579	__FUNCTION__);
				580	return FAILED;
				581	}
				582
				583	/*
				584	* zero the sense buffer. some host adapters automatically always
				585	* request sense, so it is not a good idea that
				586	* scmd->request_buffer and scmd->sense_buffer point to the same
				587	* address (db). 0 is not a valid sense code.
				588	*/
				589	memset(scmd->sense_buffer, 0, sizeof(scmd->sense_buffer));
				590	memset(scsi_result, 0, 252);
				591
				592	saved_result = scmd->result;
				593	scmd->request_buffer = scsi_result;
				594	scmd->request_bufflen = 252;
				595	scmd->use_sg = 0;
				596	scmd->cmd_len = COMMAND_SIZE(scmd->cmnd[0]);
				597	scmd->sc_data_direction = DMA_FROM_DEVICE;
				598	scmd->underflow = 0;
				599
				600	rtn = scsi_send_eh_cmnd(scmd, SENSE_TIMEOUT);
				601
				602	/* last chance to have valid sense data */
				603	if(!SCSI_SENSE_VALID(scmd)) {
				604	memcpy(scmd->sense_buffer, scmd->request_buffer,
				605	sizeof(scmd->sense_buffer));
				606	}
				607
				608	kfree(scsi_result);
				609
				610	/*
				611	* when we eventually call scsi_finish, we really wish to complete
				612	* the original request, so let's restore the original data. (db)
				613	*/
				614	scsi_setup_cmd_retry(scmd);
				615	scmd->result = saved_result;
				616	return rtn;
				617	}
				618
				619	/**
				620	* scsi_eh_finish_cmd - Handle a cmd that eh is finished with.
				621	* @scmd: Original SCSI cmd that eh has finished.
				622	* @done_q: Queue for processed commands.
				623	*
				624	* Notes:
				625	* We don't want to use the normal command completion while we are are
				626	* still handling errors - it may cause other commands to be queued,
				627	* and that would disturb what we are doing. thus we really want to
				628	* keep a list of pending commands for final completion, and once we
				629	* are ready to leave error handling we handle completion for real.
				630	**/
				631	static void scsi_eh_finish_cmd(struct scsi_cmnd *scmd,
				632	struct list_head *done_q)
				633	{
				634	scmd->device->host->host_failed--;
Christoph Hellwig	3111b0d	2005-06-19 13:43:26 +0200	[diff] [blame]	635	scmd->eh_eflags = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	636
				637	/*
				638	* set this back so that the upper level can correctly free up
				639	* things.
				640	*/
				641	scsi_setup_cmd_retry(scmd);
				642	list_move_tail(&scmd->eh_entry, done_q);
				643	}
				644
				645	/**
				646	* scsi_eh_get_sense - Get device sense data.
				647	* @work_q: Queue of commands to process.
				648	* @done_q: Queue of proccessed commands..
				649	*
				650	* Description:
				651	* See if we need to request sense information. if so, then get it
				652	* now, so we have a better idea of what to do.
				653	*
				654	* Notes:
				655	* This has the unfortunate side effect that if a shost adapter does
				656	* not automatically request sense information, that we end up shutting
				657	* it down before we request it.
				658	*
				659	* All drivers should request sense information internally these days,
				660	* so for now all I have to say is tough noogies if you end up in here.
				661	*
				662	* XXX: Long term this code should go away, but that needs an audit of
				663	* all LLDDs first.
				664	**/
				665	static int scsi_eh_get_sense(struct list_head *work_q,
				666	struct list_head *done_q)
				667	{
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	668	struct scsi_cmnd scmd, next;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	669	int rtn;
				670
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	671	list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
Christoph Hellwig	3111b0d	2005-06-19 13:43:26 +0200	[diff] [blame]	672	if ((scmd->eh_eflags & SCSI_EH_CANCEL_CMD) \|\|
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	673	SCSI_SENSE_VALID(scmd))
				674	continue;
				675
				676	SCSI_LOG_ERROR_RECOVERY(2, printk("%s: requesting sense"
				677	" for id: %d\n",
				678	current->comm,
				679	scmd->device->id));
				680	rtn = scsi_request_sense(scmd);
				681	if (rtn != SUCCESS)
				682	continue;
				683
				684	SCSI_LOG_ERROR_RECOVERY(3, printk("sense requested for %p"
				685	" result %x\n", scmd,
				686	scmd->result));
				687	SCSI_LOG_ERROR_RECOVERY(3, scsi_print_sense("bh", scmd));
				688
				689	rtn = scsi_decide_disposition(scmd);
				690
				691	/*
				692	* if the result was normal, then just pass it along to the
				693	* upper level.
				694	*/
				695	if (rtn == SUCCESS)
				696	/* we don't want this command reissued, just
				697	* finished with the sense data, so set
				698	* retries to the max allowed to ensure it
				699	* won't get reissued */
				700	scmd->retries = scmd->allowed;
				701	else if (rtn != NEEDS_RETRY)
				702	continue;
				703
				704	scsi_eh_finish_cmd(scmd, done_q);
				705	}
				706
				707	return list_empty(work_q);
				708	}
				709
				710	/**
				711	* scsi_try_to_abort_cmd - Ask host to abort a running command.
				712	* @scmd: SCSI cmd to abort from Lower Level.
				713	*
				714	* Notes:
				715	* This function will not return until the user's completion function
				716	* has been called. there is no timeout on this operation. if the
				717	* author of the low-level driver wishes this operation to be timed,
				718	* they can provide this facility themselves. helper functions in
				719	* scsi_error.c can be supplied to make this easier to do.
				720	**/
				721	static int scsi_try_to_abort_cmd(struct scsi_cmnd *scmd)
				722	{
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	723	if (!scmd->device->host->hostt->eh_abort_handler)
Jeff Garzik	8fa728a	2005-05-28 07:54:40 -0400	[diff] [blame]	724	return FAILED;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	725
				726	/*
				727	* scsi_done was called just after the command timed out and before
				728	* we had a chance to process it. (db)
				729	*/
				730	if (scmd->serial_number == 0)
				731	return SUCCESS;
Jeff Garzik	8fa728a	2005-05-28 07:54:40 -0400	[diff] [blame]	732	return scmd->device->host->hostt->eh_abort_handler(scmd);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	733	}
				734
				735	/**
				736	* scsi_eh_tur - Send TUR to device.
				737	* @scmd: Scsi cmd to send TUR
				738	*
				739	* Return value:
				740	* 0 - Device is ready. 1 - Device NOT ready.
				741	**/
				742	static int scsi_eh_tur(struct scsi_cmnd *scmd)
				743	{
				744	static unsigned char tur_command[6] = {TEST_UNIT_READY, 0, 0, 0, 0, 0};
				745	int retry_cnt = 1, rtn;
Patrick Mansfield	793698c	2005-05-16 17:42:15 -0700	[diff] [blame]	746	int saved_result;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	747
				748	retry_tur:
				749	memcpy(scmd->cmnd, tur_command, sizeof(tur_command));
				750
				751	/*
				752	* zero the sense buffer. the scsi spec mandates that any
				753	* untransferred sense data should be interpreted as being zero.
				754	*/
				755	memset(scmd->sense_buffer, 0, sizeof(scmd->sense_buffer));
				756
Patrick Mansfield	793698c	2005-05-16 17:42:15 -0700	[diff] [blame]	757	saved_result = scmd->result;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	758	scmd->request_buffer = NULL;
				759	scmd->request_bufflen = 0;
				760	scmd->use_sg = 0;
				761	scmd->cmd_len = COMMAND_SIZE(scmd->cmnd[0]);
				762	scmd->underflow = 0;
				763	scmd->sc_data_direction = DMA_NONE;
				764
				765	rtn = scsi_send_eh_cmnd(scmd, SENSE_TIMEOUT);
				766
				767	/*
				768	* when we eventually call scsi_finish, we really wish to complete
				769	* the original request, so let's restore the original data. (db)
				770	*/
				771	scsi_setup_cmd_retry(scmd);
Patrick Mansfield	793698c	2005-05-16 17:42:15 -0700	[diff] [blame]	772	scmd->result = saved_result;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	773
				774	/*
				775	* hey, we are done. let's look to see what happened.
				776	*/
				777	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd %p rtn %x\n",
				778	__FUNCTION__, scmd, rtn));
				779	if (rtn == SUCCESS)
				780	return 0;
Alan Stern	e47373e	2005-03-30 15:05:45 -0500	[diff] [blame]	781	else if (rtn == NEEDS_RETRY) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	782	if (retry_cnt--)
				783	goto retry_tur;
Alan Stern	e47373e	2005-03-30 15:05:45 -0500	[diff] [blame]	784	return 0;
				785	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	786	return 1;
				787	}
				788
				789	/**
				790	* scsi_eh_abort_cmds - abort canceled commands.
				791	* @shost: scsi host being recovered.
				792	* @eh_done_q: list_head for processed commands.
				793	*
				794	* Decription:
				795	* Try and see whether or not it makes sense to try and abort the
				796	* running command. this only works out to be the case if we have one
				797	* command that has timed out. if the command simply failed, it makes
				798	* no sense to try and abort the command, since as far as the shost
				799	* adapter is concerned, it isn't running.
				800	**/
				801	static int scsi_eh_abort_cmds(struct list_head *work_q,
				802	struct list_head *done_q)
				803	{
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	804	struct scsi_cmnd scmd, next;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	805	int rtn;
				806
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	807	list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
Christoph Hellwig	3111b0d	2005-06-19 13:43:26 +0200	[diff] [blame]	808	if (!(scmd->eh_eflags & SCSI_EH_CANCEL_CMD))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	809	continue;
				810	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: aborting cmd:"
				811	"0x%p\n", current->comm,
				812	scmd));
				813	rtn = scsi_try_to_abort_cmd(scmd);
				814	if (rtn == SUCCESS) {
Christoph Hellwig	3111b0d	2005-06-19 13:43:26 +0200	[diff] [blame]	815	scmd->eh_eflags &= ~SCSI_EH_CANCEL_CMD;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	816	if (!scsi_device_online(scmd->device) \|\|
				817	!scsi_eh_tur(scmd)) {
				818	scsi_eh_finish_cmd(scmd, done_q);
				819	}
				820
				821	} else
				822	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: aborting"
				823	" cmd failed:"
				824	"0x%p\n",
				825	current->comm,
				826	scmd));
				827	}
				828
				829	return list_empty(work_q);
				830	}
				831
				832	/**
				833	* scsi_try_bus_device_reset - Ask host to perform a BDR on a dev
				834	* @scmd: SCSI cmd used to send BDR
				835	*
				836	* Notes:
				837	* There is no timeout for this operation. if this operation is
				838	* unreliable for a given host, then the host itself needs to put a
				839	* timer on it, and set the host back to a consistent state prior to
				840	* returning.
				841	**/
				842	static int scsi_try_bus_device_reset(struct scsi_cmnd *scmd)
				843	{
Jeff Garzik	94d0e7b8	2005-05-28 07:55:48 -0400	[diff] [blame]	844	int rtn;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	845
				846	if (!scmd->device->host->hostt->eh_device_reset_handler)
Jeff Garzik	94d0e7b8	2005-05-28 07:55:48 -0400	[diff] [blame]	847	return FAILED;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	848
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	849	rtn = scmd->device->host->hostt->eh_device_reset_handler(scmd);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	850	if (rtn == SUCCESS) {
				851	scmd->device->was_reset = 1;
				852	scmd->device->expecting_cc_ua = 1;
				853	}
				854
				855	return rtn;
				856	}
				857
				858	/**
				859	* scsi_eh_try_stu - Send START_UNIT to device.
				860	* @scmd: Scsi cmd to send START_UNIT
				861	*
				862	* Return value:
				863	* 0 - Device is ready. 1 - Device NOT ready.
				864	**/
				865	static int scsi_eh_try_stu(struct scsi_cmnd *scmd)
				866	{
				867	static unsigned char stu_command[6] = {START_STOP, 0, 0, 0, 1, 0};
				868	int rtn;
Patrick Mansfield	793698c	2005-05-16 17:42:15 -0700	[diff] [blame]	869	int saved_result;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	870
				871	if (!scmd->device->allow_restart)
				872	return 1;
				873
				874	memcpy(scmd->cmnd, stu_command, sizeof(stu_command));
				875
				876	/*
				877	* zero the sense buffer. the scsi spec mandates that any
				878	* untransferred sense data should be interpreted as being zero.
				879	*/
				880	memset(scmd->sense_buffer, 0, sizeof(scmd->sense_buffer));
				881
Patrick Mansfield	793698c	2005-05-16 17:42:15 -0700	[diff] [blame]	882	saved_result = scmd->result;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	883	scmd->request_buffer = NULL;
				884	scmd->request_bufflen = 0;
				885	scmd->use_sg = 0;
				886	scmd->cmd_len = COMMAND_SIZE(scmd->cmnd[0]);
				887	scmd->underflow = 0;
				888	scmd->sc_data_direction = DMA_NONE;
				889
				890	rtn = scsi_send_eh_cmnd(scmd, START_UNIT_TIMEOUT);
				891
				892	/*
				893	* when we eventually call scsi_finish, we really wish to complete
				894	* the original request, so let's restore the original data. (db)
				895	*/
				896	scsi_setup_cmd_retry(scmd);
Patrick Mansfield	793698c	2005-05-16 17:42:15 -0700	[diff] [blame]	897	scmd->result = saved_result;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	898
				899	/*
				900	* hey, we are done. let's look to see what happened.
				901	*/
				902	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: scmd %p rtn %x\n",
				903	__FUNCTION__, scmd, rtn));
				904	if (rtn == SUCCESS)
				905	return 0;
				906	return 1;
				907	}
				908
				909	/**
				910	* scsi_eh_stu - send START_UNIT if needed
				911	* @shost: scsi host being recovered.
				912	* @eh_done_q: list_head for processed commands.
				913	*
				914	* Notes:
				915	* If commands are failing due to not ready, initializing command required,
				916	* try revalidating the device, which will end up sending a start unit.
				917	**/
				918	static int scsi_eh_stu(struct Scsi_Host *shost,
				919	struct list_head *work_q,
				920	struct list_head *done_q)
				921	{
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	922	struct scsi_cmnd scmd, stu_scmd, *next;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	923	struct scsi_device *sdev;
				924
				925	shost_for_each_device(sdev, shost) {
				926	stu_scmd = NULL;
				927	list_for_each_entry(scmd, work_q, eh_entry)
				928	if (scmd->device == sdev && SCSI_SENSE_VALID(scmd) &&
				929	scsi_check_sense(scmd) == FAILED ) {
				930	stu_scmd = scmd;
				931	break;
				932	}
				933
				934	if (!stu_scmd)
				935	continue;
				936
				937	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending START_UNIT to sdev:"
				938	" 0x%p\n", current->comm, sdev));
				939
				940	if (!scsi_eh_try_stu(stu_scmd)) {
				941	if (!scsi_device_online(sdev) \|\|
				942	!scsi_eh_tur(stu_scmd)) {
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	943	list_for_each_entry_safe(scmd, next,
				944	work_q, eh_entry) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	945	if (scmd->device == sdev)
				946	scsi_eh_finish_cmd(scmd, done_q);
				947	}
				948	}
				949	} else {
				950	SCSI_LOG_ERROR_RECOVERY(3,
				951	printk("%s: START_UNIT failed to sdev:"
				952	" 0x%p\n", current->comm, sdev));
				953	}
				954	}
				955
				956	return list_empty(work_q);
				957	}
				958
				959
				960	/**
				961	* scsi_eh_bus_device_reset - send bdr if needed
				962	* @shost: scsi host being recovered.
				963	* @eh_done_q: list_head for processed commands.
				964	*
				965	* Notes:
				966	* Try a bus device reset. still, look to see whether we have multiple
				967	* devices that are jammed or not - if we have multiple devices, it
				968	* makes no sense to try bus_device_reset - we really would need to try
				969	* a bus_reset instead.
				970	**/
				971	static int scsi_eh_bus_device_reset(struct Scsi_Host *shost,
				972	struct list_head *work_q,
				973	struct list_head *done_q)
				974	{
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	975	struct scsi_cmnd scmd, bdr_scmd, *next;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	976	struct scsi_device *sdev;
				977	int rtn;
				978
				979	shost_for_each_device(sdev, shost) {
				980	bdr_scmd = NULL;
				981	list_for_each_entry(scmd, work_q, eh_entry)
				982	if (scmd->device == sdev) {
				983	bdr_scmd = scmd;
				984	break;
				985	}
				986
				987	if (!bdr_scmd)
				988	continue;
				989
				990	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending BDR sdev:"
				991	" 0x%p\n", current->comm,
				992	sdev));
				993	rtn = scsi_try_bus_device_reset(bdr_scmd);
				994	if (rtn == SUCCESS) {
				995	if (!scsi_device_online(sdev) \|\|
				996	!scsi_eh_tur(bdr_scmd)) {
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	997	list_for_each_entry_safe(scmd, next,
				998	work_q, eh_entry) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	999	if (scmd->device == sdev)
				1000	scsi_eh_finish_cmd(scmd,
				1001	done_q);
				1002	}
				1003	}
				1004	} else {
				1005	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: BDR"
				1006	" failed sdev:"
				1007	"0x%p\n",
				1008	current->comm,
				1009	sdev));
				1010	}
				1011	}
				1012
				1013	return list_empty(work_q);
				1014	}
				1015
				1016	/**
				1017	* scsi_try_bus_reset - ask host to perform a bus reset
				1018	* @scmd: SCSI cmd to send bus reset.
				1019	**/
				1020	static int scsi_try_bus_reset(struct scsi_cmnd *scmd)
				1021	{
				1022	unsigned long flags;
				1023	int rtn;
				1024
				1025	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Snd Bus RST\n",
				1026	__FUNCTION__));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1027
				1028	if (!scmd->device->host->hostt->eh_bus_reset_handler)
				1029	return FAILED;
				1030
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1031	rtn = scmd->device->host->hostt->eh_bus_reset_handler(scmd);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1032
				1033	if (rtn == SUCCESS) {
				1034	if (!scmd->device->host->hostt->skip_settle_delay)
				1035	ssleep(BUS_RESET_SETTLE_TIME);
				1036	spin_lock_irqsave(scmd->device->host->host_lock, flags);
				1037	scsi_report_bus_reset(scmd->device->host, scmd->device->channel);
				1038	spin_unlock_irqrestore(scmd->device->host->host_lock, flags);
				1039	}
				1040
				1041	return rtn;
				1042	}
				1043
				1044	/**
				1045	* scsi_try_host_reset - ask host adapter to reset itself
				1046	* @scmd: SCSI cmd to send hsot reset.
				1047	**/
				1048	static int scsi_try_host_reset(struct scsi_cmnd *scmd)
				1049	{
				1050	unsigned long flags;
				1051	int rtn;
				1052
				1053	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Snd Host RST\n",
				1054	__FUNCTION__));
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1055
				1056	if (!scmd->device->host->hostt->eh_host_reset_handler)
				1057	return FAILED;
				1058
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1059	rtn = scmd->device->host->hostt->eh_host_reset_handler(scmd);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1060
				1061	if (rtn == SUCCESS) {
				1062	if (!scmd->device->host->hostt->skip_settle_delay)
				1063	ssleep(HOST_RESET_SETTLE_TIME);
				1064	spin_lock_irqsave(scmd->device->host->host_lock, flags);
				1065	scsi_report_bus_reset(scmd->device->host, scmd->device->channel);
				1066	spin_unlock_irqrestore(scmd->device->host->host_lock, flags);
				1067	}
				1068
				1069	return rtn;
				1070	}
				1071
				1072	/**
				1073	* scsi_eh_bus_reset - send a bus reset
				1074	* @shost: scsi host being recovered.
				1075	* @eh_done_q: list_head for processed commands.
				1076	**/
				1077	static int scsi_eh_bus_reset(struct Scsi_Host *shost,
				1078	struct list_head *work_q,
				1079	struct list_head *done_q)
				1080	{
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	1081	struct scsi_cmnd scmd, chan_scmd, *next;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1082	unsigned int channel;
				1083	int rtn;
				1084
				1085	/*
				1086	* we really want to loop over the various channels, and do this on
				1087	* a channel by channel basis. we should also check to see if any
				1088	* of the failed commands are on soft_reset devices, and if so, skip
				1089	* the reset.
				1090	*/
				1091
				1092	for (channel = 0; channel <= shost->max_channel; channel++) {
				1093	chan_scmd = NULL;
				1094	list_for_each_entry(scmd, work_q, eh_entry) {
				1095	if (channel == scmd->device->channel) {
				1096	chan_scmd = scmd;
				1097	break;
				1098	/*
				1099	* FIXME add back in some support for
				1100	* soft_reset devices.
				1101	*/
				1102	}
				1103	}
				1104
				1105	if (!chan_scmd)
				1106	continue;
				1107	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending BRST chan:"
				1108	" %d\n", current->comm,
				1109	channel));
				1110	rtn = scsi_try_bus_reset(chan_scmd);
				1111	if (rtn == SUCCESS) {
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	1112	list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1113	if (channel == scmd->device->channel)
				1114	if (!scsi_device_online(scmd->device) \|\|
				1115	!scsi_eh_tur(scmd))
				1116	scsi_eh_finish_cmd(scmd,
				1117	done_q);
				1118	}
				1119	} else {
				1120	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: BRST"
				1121	" failed chan: %d\n",
				1122	current->comm,
				1123	channel));
				1124	}
				1125	}
				1126	return list_empty(work_q);
				1127	}
				1128
				1129	/**
				1130	* scsi_eh_host_reset - send a host reset
				1131	* @work_q: list_head for processed commands.
				1132	* @done_q: list_head for processed commands.
				1133	**/
				1134	static int scsi_eh_host_reset(struct list_head *work_q,
				1135	struct list_head *done_q)
				1136	{
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	1137	struct scsi_cmnd scmd, next;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1138	int rtn;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1139
				1140	if (!list_empty(work_q)) {
				1141	scmd = list_entry(work_q->next,
				1142	struct scsi_cmnd, eh_entry);
				1143
				1144	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: Sending HRST\n"
				1145	, current->comm));
				1146
				1147	rtn = scsi_try_host_reset(scmd);
				1148	if (rtn == SUCCESS) {
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	1149	list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1150	if (!scsi_device_online(scmd->device) \|\|
				1151	(!scsi_eh_try_stu(scmd) && !scsi_eh_tur(scmd)) \|\|
				1152	!scsi_eh_tur(scmd))
				1153	scsi_eh_finish_cmd(scmd, done_q);
				1154	}
				1155	} else {
				1156	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: HRST"
				1157	" failed\n",
				1158	current->comm));
				1159	}
				1160	}
				1161	return list_empty(work_q);
				1162	}
				1163
				1164	/**
				1165	* scsi_eh_offline_sdevs - offline scsi devices that fail to recover
				1166	* @work_q: list_head for processed commands.
				1167	* @done_q: list_head for processed commands.
				1168	*
				1169	**/
				1170	static void scsi_eh_offline_sdevs(struct list_head *work_q,
				1171	struct list_head *done_q)
				1172	{
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	1173	struct scsi_cmnd scmd, next;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1174
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	1175	list_for_each_entry_safe(scmd, next, work_q, eh_entry) {
James Bottomley	9ccfc75	2005-10-02 11:45:08 -0500	[diff] [blame^]	1176	sdev_printk(KERN_INFO, scmd->device,
				1177	"scsi: Device offlined - not"
				1178	" ready after error recovery\n");
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1179	scsi_device_set_state(scmd->device, SDEV_OFFLINE);
Christoph Hellwig	3111b0d	2005-06-19 13:43:26 +0200	[diff] [blame]	1180	if (scmd->eh_eflags & SCSI_EH_CANCEL_CMD) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1181	/*
				1182	* FIXME: Handle lost cmds.
				1183	*/
				1184	}
				1185	scsi_eh_finish_cmd(scmd, done_q);
				1186	}
				1187	return;
				1188	}
				1189
				1190	/**
				1191	* scsi_decide_disposition - Disposition a cmd on return from LLD.
				1192	* @scmd: SCSI cmd to examine.
				1193	*
				1194	* Notes:
				1195	* This is only called when we are examining the status after sending
				1196	* out the actual data command. any commands that are queued for error
				1197	* recovery (e.g. test_unit_ready) do not come through here.
				1198	*
				1199	* When this routine returns failed, it means the error handler thread
				1200	* is woken. In cases where the error code indicates an error that
				1201	* doesn't require the error handler read (i.e. we don't need to
				1202	* abort/reset), this function should return SUCCESS.
				1203	**/
				1204	int scsi_decide_disposition(struct scsi_cmnd *scmd)
				1205	{
				1206	int rtn;
				1207
				1208	/*
				1209	* if the device is offline, then we clearly just pass the result back
				1210	* up to the top level.
				1211	*/
				1212	if (!scsi_device_online(scmd->device)) {
				1213	SCSI_LOG_ERROR_RECOVERY(5, printk("%s: device offline - report"
				1214	" as SUCCESS\n",
				1215	__FUNCTION__));
				1216	return SUCCESS;
				1217	}
				1218
				1219	/*
				1220	* first check the host byte, to see if there is anything in there
				1221	* that would indicate what we need to do.
				1222	*/
				1223	switch (host_byte(scmd->result)) {
				1224	case DID_PASSTHROUGH:
				1225	/*
				1226	* no matter what, pass this through to the upper layer.
				1227	* nuke this special code so that it looks like we are saying
				1228	* did_ok.
				1229	*/
				1230	scmd->result &= 0xff00ffff;
				1231	return SUCCESS;
				1232	case DID_OK:
				1233	/*
				1234	* looks good. drop through, and check the next byte.
				1235	*/
				1236	break;
				1237	case DID_NO_CONNECT:
				1238	case DID_BAD_TARGET:
				1239	case DID_ABORT:
				1240	/*
				1241	* note - this means that we just report the status back
				1242	* to the top level driver, not that we actually think
				1243	* that it indicates SUCCESS.
				1244	*/
				1245	return SUCCESS;
				1246	/*
				1247	* when the low level driver returns did_soft_error,
				1248	* it is responsible for keeping an internal retry counter
				1249	* in order to avoid endless loops (db)
				1250	*
				1251	* actually this is a bug in this function here. we should
				1252	* be mindful of the maximum number of retries specified
				1253	* and not get stuck in a loop.
				1254	*/
				1255	case DID_SOFT_ERROR:
				1256	goto maybe_retry;
				1257	case DID_IMM_RETRY:
				1258	return NEEDS_RETRY;
				1259
	bf34191	2005-04-12 17:49:09 -0500	[diff] [blame]	1260	case DID_REQUEUE:
				1261	return ADD_TO_MLQUEUE;
				1262
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1263	case DID_ERROR:
				1264	if (msg_byte(scmd->result) == COMMAND_COMPLETE &&
				1265	status_byte(scmd->result) == RESERVATION_CONFLICT)
				1266	/*
				1267	* execute reservation conflict processing code
				1268	* lower down
				1269	*/
				1270	break;
				1271	/* fallthrough */
				1272
				1273	case DID_BUS_BUSY:
				1274	case DID_PARITY:
				1275	goto maybe_retry;
				1276	case DID_TIME_OUT:
				1277	/*
				1278	* when we scan the bus, we get timeout messages for
				1279	* these commands if there is no device available.
				1280	* other hosts report did_no_connect for the same thing.
				1281	*/
				1282	if ((scmd->cmnd[0] == TEST_UNIT_READY \|\|
				1283	scmd->cmnd[0] == INQUIRY)) {
				1284	return SUCCESS;
				1285	} else {
				1286	return FAILED;
				1287	}
				1288	case DID_RESET:
				1289	return SUCCESS;
				1290	default:
				1291	return FAILED;
				1292	}
				1293
				1294	/*
				1295	* next, check the message byte.
				1296	*/
				1297	if (msg_byte(scmd->result) != COMMAND_COMPLETE)
				1298	return FAILED;
				1299
				1300	/*
				1301	* check the status byte to see if this indicates anything special.
				1302	*/
				1303	switch (status_byte(scmd->result)) {
				1304	case QUEUE_FULL:
				1305	/*
				1306	* the case of trying to send too many commands to a
				1307	* tagged queueing device.
				1308	*/
				1309	case BUSY:
				1310	/*
				1311	* device can't talk to us at the moment. Should only
				1312	* occur (SAM-3) when the task queue is empty, so will cause
				1313	* the empty queue handling to trigger a stall in the
				1314	* device.
				1315	*/
				1316	return ADD_TO_MLQUEUE;
				1317	case GOOD:
				1318	case COMMAND_TERMINATED:
				1319	case TASK_ABORTED:
				1320	return SUCCESS;
				1321	case CHECK_CONDITION:
				1322	rtn = scsi_check_sense(scmd);
				1323	if (rtn == NEEDS_RETRY)
				1324	goto maybe_retry;
				1325	/* if rtn == FAILED, we have no sense information;
				1326	* returning FAILED will wake the error handler thread
				1327	* to collect the sense and redo the decide
				1328	* disposition */
				1329	return rtn;
				1330	case CONDITION_GOOD:
				1331	case INTERMEDIATE_GOOD:
				1332	case INTERMEDIATE_C_GOOD:
				1333	case ACA_ACTIVE:
				1334	/*
				1335	* who knows? FIXME(eric)
				1336	*/
				1337	return SUCCESS;
				1338
				1339	case RESERVATION_CONFLICT:
James Bottomley	9ccfc75	2005-10-02 11:45:08 -0500	[diff] [blame^]	1340	sdev_printk(KERN_INFO, scmd->device,
				1341	"reservation conflict\n");
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1342	return SUCCESS; /* causes immediate i/o error */
				1343	default:
				1344	return FAILED;
				1345	}
				1346	return FAILED;
				1347
				1348	maybe_retry:
				1349
				1350	/* we requeue for retry because the error was retryable, and
				1351	* the request was not marked fast fail. Note that above,
				1352	* even if the request is marked fast fail, we still requeue
				1353	* for queue congestion conditions (QUEUE_FULL or BUSY) */
				1354	if ((++scmd->retries) < scmd->allowed
				1355	&& !blk_noretry_request(scmd->request)) {
				1356	return NEEDS_RETRY;
				1357	} else {
				1358	/*
				1359	* no more retries - report this one back to upper level.
				1360	*/
				1361	return SUCCESS;
				1362	}
				1363	}
				1364
				1365	/**
				1366	* scsi_eh_lock_done - done function for eh door lock request
				1367	* @scmd: SCSI command block for the door lock request
				1368	*
				1369	* Notes:
				1370	* We completed the asynchronous door lock request, and it has either
				1371	* locked the door or failed. We must free the command structures
				1372	* associated with this request.
				1373	**/
				1374	static void scsi_eh_lock_done(struct scsi_cmnd *scmd)
				1375	{
				1376	struct scsi_request *sreq = scmd->sc_request;
				1377
				1378	scsi_release_request(sreq);
				1379	}
				1380
				1381
				1382	/**
				1383	* scsi_eh_lock_door - Prevent medium removal for the specified device
				1384	* @sdev: SCSI device to prevent medium removal
				1385	*
				1386	* Locking:
				1387	* We must be called from process context; scsi_allocate_request()
				1388	* may sleep.
				1389	*
				1390	* Notes:
				1391	* We queue up an asynchronous "ALLOW MEDIUM REMOVAL" request on the
				1392	* head of the devices request queue, and continue.
				1393	*
				1394	* Bugs:
				1395	* scsi_allocate_request() may sleep waiting for existing requests to
				1396	* be processed. However, since we haven't kicked off any request
				1397	* processing for this host, this may deadlock.
				1398	*
				1399	* If scsi_allocate_request() fails for what ever reason, we
				1400	* completely forget to lock the door.
				1401	**/
				1402	static void scsi_eh_lock_door(struct scsi_device *sdev)
				1403	{
				1404	struct scsi_request *sreq = scsi_allocate_request(sdev, GFP_KERNEL);
				1405
				1406	if (unlikely(!sreq)) {
				1407	printk(KERN_ERR "%s: request allocate failed,"
				1408	"prevent media removal cmd not sent\n", __FUNCTION__);
				1409	return;
				1410	}
				1411
				1412	sreq->sr_cmnd[0] = ALLOW_MEDIUM_REMOVAL;
				1413	sreq->sr_cmnd[1] = 0;
				1414	sreq->sr_cmnd[2] = 0;
				1415	sreq->sr_cmnd[3] = 0;
				1416	sreq->sr_cmnd[4] = SCSI_REMOVAL_PREVENT;
				1417	sreq->sr_cmnd[5] = 0;
				1418	sreq->sr_data_direction = DMA_NONE;
				1419	sreq->sr_bufflen = 0;
				1420	sreq->sr_buffer = NULL;
				1421	sreq->sr_allowed = 5;
				1422	sreq->sr_done = scsi_eh_lock_done;
				1423	sreq->sr_timeout_per_command = 10 * HZ;
				1424	sreq->sr_cmd_len = COMMAND_SIZE(sreq->sr_cmnd[0]);
				1425
				1426	scsi_insert_special_req(sreq, 1);
				1427	}
				1428
				1429
				1430	/**
				1431	* scsi_restart_operations - restart io operations to the specified host.
				1432	* @shost: Host we are restarting.
				1433	*
				1434	* Notes:
				1435	* When we entered the error handler, we blocked all further i/o to
				1436	* this device. we need to 'reverse' this process.
				1437	**/
				1438	static void scsi_restart_operations(struct Scsi_Host *shost)
				1439	{
				1440	struct scsi_device *sdev;
James Bottomley	939647e	2005-09-18 15:05:20 -0500	[diff] [blame]	1441	unsigned long flags;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1442
				1443	/*
				1444	* If the door was locked, we need to insert a door lock request
				1445	* onto the head of the SCSI request queue for the device. There
				1446	* is no point trying to lock the door of an off-line device.
				1447	*/
				1448	shost_for_each_device(sdev, shost) {
				1449	if (scsi_device_online(sdev) && sdev->locked)
				1450	scsi_eh_lock_door(sdev);
				1451	}
				1452
				1453	/*
				1454	* next free up anything directly waiting upon the host. this
				1455	* will be requests for character device operations, and also for
				1456	* ioctls to queued block devices.
				1457	*/
				1458	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: waking up host to restart\n",
				1459	__FUNCTION__));
				1460
James Bottomley	939647e	2005-09-18 15:05:20 -0500	[diff] [blame]	1461	spin_lock_irqsave(shost->host_lock, flags);
				1462	if (scsi_host_set_state(shost, SHOST_RUNNING))
				1463	if (scsi_host_set_state(shost, SHOST_CANCEL))
				1464	BUG_ON(scsi_host_set_state(shost, SHOST_DEL));
				1465	spin_unlock_irqrestore(shost->host_lock, flags);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1466
				1467	wake_up(&shost->host_wait);
				1468
				1469	/*
				1470	* finally we need to re-initiate requests that may be pending. we will
				1471	* have had everything blocked while error handling is taking place, and
				1472	* now that error recovery is done, we will need to ensure that these
				1473	* requests are started.
				1474	*/
				1475	scsi_run_host_queues(shost);
				1476	}
				1477
				1478	/**
				1479	* scsi_eh_ready_devs - check device ready state and recover if not.
				1480	* @shost: host to be recovered.
				1481	* @eh_done_q: list_head for processed commands.
				1482	*
				1483	**/
				1484	static void scsi_eh_ready_devs(struct Scsi_Host *shost,
				1485	struct list_head *work_q,
				1486	struct list_head *done_q)
				1487	{
				1488	if (!scsi_eh_stu(shost, work_q, done_q))
				1489	if (!scsi_eh_bus_device_reset(shost, work_q, done_q))
				1490	if (!scsi_eh_bus_reset(shost, work_q, done_q))
				1491	if (!scsi_eh_host_reset(work_q, done_q))
				1492	scsi_eh_offline_sdevs(work_q, done_q);
				1493	}
				1494
				1495	/**
				1496	* scsi_eh_flush_done_q - finish processed commands or retry them.
				1497	* @done_q: list_head of processed commands.
				1498	*
				1499	**/
				1500	static void scsi_eh_flush_done_q(struct list_head *done_q)
				1501	{
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	1502	struct scsi_cmnd scmd, next;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1503
Christoph Hellwig	937abeaa	2005-06-19 13:43:56 +0200	[diff] [blame]	1504	list_for_each_entry_safe(scmd, next, done_q, eh_entry) {
				1505	list_del_init(&scmd->eh_entry);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1506	if (scsi_device_online(scmd->device) &&
				1507	!blk_noretry_request(scmd->request) &&
				1508	(++scmd->retries < scmd->allowed)) {
				1509	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: flush"
				1510	" retry cmd: %p\n",
				1511	current->comm,
				1512	scmd));
				1513	scsi_queue_insert(scmd, SCSI_MLQUEUE_EH_RETRY);
				1514	} else {
Patrick Mansfield	793698c	2005-05-16 17:42:15 -0700	[diff] [blame]	1515	/*
				1516	* If just we got sense for the device (called
				1517	* scsi_eh_get_sense), scmd->result is already
				1518	* set, do not set DRIVER_TIMEOUT.
				1519	*/
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1520	if (!scmd->result)
				1521	scmd->result \|= (DRIVER_TIMEOUT << 24);
				1522	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: flush finish"
				1523	" cmd: %p\n",
				1524	current->comm, scmd));
				1525	scsi_finish_command(scmd);
				1526	}
				1527	}
				1528	}
				1529
				1530	/**
				1531	* scsi_unjam_host - Attempt to fix a host which has a cmd that failed.
				1532	* @shost: Host to unjam.
				1533	*
				1534	* Notes:
				1535	* When we come in here, we know that all commands on the bus have
				1536	* either completed, failed or timed out. we also know that no further
				1537	* commands are being sent to the host, so things are relatively quiet
				1538	* and we have freedom to fiddle with things as we wish.
				1539	*
				1540	* This is only the default implementation. it is possible for
				1541	* individual drivers to supply their own version of this function, and
				1542	* if the maintainer wishes to do this, it is strongly suggested that
				1543	* this function be taken as a template and modified. this function
				1544	* was designed to correctly handle problems for about 95% of the
				1545	* different cases out there, and it should always provide at least a
				1546	* reasonable amount of error recovery.
				1547	*
				1548	* Any command marked 'failed' or 'timeout' must eventually have
				1549	* scsi_finish_cmd() called for it. we do all of the retry stuff
				1550	* here, so when we restart the host after we return it should have an
				1551	* empty queue.
				1552	**/
				1553	static void scsi_unjam_host(struct Scsi_Host *shost)
				1554	{
				1555	unsigned long flags;
				1556	LIST_HEAD(eh_work_q);
				1557	LIST_HEAD(eh_done_q);
				1558
				1559	spin_lock_irqsave(shost->host_lock, flags);
				1560	list_splice_init(&shost->eh_cmd_q, &eh_work_q);
				1561	spin_unlock_irqrestore(shost->host_lock, flags);
				1562
				1563	SCSI_LOG_ERROR_RECOVERY(1, scsi_eh_prt_fail_stats(shost, &eh_work_q));
				1564
				1565	if (!scsi_eh_get_sense(&eh_work_q, &eh_done_q))
				1566	if (!scsi_eh_abort_cmds(&eh_work_q, &eh_done_q))
				1567	scsi_eh_ready_devs(shost, &eh_work_q, &eh_done_q);
				1568
				1569	scsi_eh_flush_done_q(&eh_done_q);
				1570	}
				1571
				1572	/**
				1573	* scsi_error_handler - Handle errors/timeouts of SCSI cmds.
				1574	* @data: Host for which we are running.
				1575	*
				1576	* Notes:
				1577	* This is always run in the context of a kernel thread. The idea is
				1578	* that we start this thing up when the kernel starts up (one per host
				1579	* that we detect), and it immediately goes to sleep and waits for some
				1580	* event (i.e. failure). When this takes place, we have the job of
				1581	* trying to unjam the bus and restarting things.
				1582	**/
				1583	int scsi_error_handler(void *data)
				1584	{
				1585	struct Scsi_Host shost = (struct Scsi_Host ) data;
				1586	int rtn;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1587
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1588	current->flags \|= PF_NOFREEZE;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1589
James Bottomley	3ed7a47	2005-09-19 09:50:04 -0500	[diff] [blame]	1590
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1591	/*
James Bottomley	3ed7a47	2005-09-19 09:50:04 -0500	[diff] [blame]	1592	* Note - we always use TASK_INTERRUPTIBLE even if the module
				1593	* was loaded as part of the kernel. The reason is that
				1594	* UNINTERRUPTIBLE would cause this thread to be counted in
				1595	* the load average as a running process, and an interruptible
				1596	* wait doesn't.
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1597	*/
James Bottomley	3ed7a47	2005-09-19 09:50:04 -0500	[diff] [blame]	1598	set_current_state(TASK_INTERRUPTIBLE);
				1599	while (!kthread_should_stop()) {
				1600	if (shost->host_failed == 0 \|\|
				1601	shost->host_failed != shost->host_busy) {
				1602	SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler"
				1603	" scsi_eh_%d"
				1604	" sleeping\n",
				1605	shost->host_no));
				1606	schedule();
				1607	set_current_state(TASK_INTERRUPTIBLE);
				1608	continue;
				1609	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1610
James Bottomley	3ed7a47	2005-09-19 09:50:04 -0500	[diff] [blame]	1611	__set_current_state(TASK_RUNNING);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1612	SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler"
				1613	" scsi_eh_%d waking"
				1614	" up\n",shost->host_no));
				1615
				1616	shost->eh_active = 1;
				1617
				1618	/*
				1619	* We have a host that is failing for some reason. Figure out
				1620	* what we need to do to get it up and online again (if we can).
				1621	* If we fail, we end up taking the thing offline.
				1622	*/
				1623	if (shost->hostt->eh_strategy_handler)
				1624	rtn = shost->hostt->eh_strategy_handler(shost);
				1625	else
				1626	scsi_unjam_host(shost);
				1627
				1628	shost->eh_active = 0;
				1629
				1630	/*
				1631	* Note - if the above fails completely, the action is to take
				1632	* individual devices offline and flush the queue of any
				1633	* outstanding requests that may have been pending. When we
				1634	* restart, we restart any I/O to any other devices on the bus
				1635	* which are still online.
				1636	*/
				1637	scsi_restart_operations(shost);
James Bottomley	3ed7a47	2005-09-19 09:50:04 -0500	[diff] [blame]	1638	set_current_state(TASK_INTERRUPTIBLE);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1639	}
				1640
Steven Rostedt	461a0ff	2005-10-19 08:22:13 -0400	[diff] [blame]	1641	__set_current_state(TASK_RUNNING);
				1642
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1643	SCSI_LOG_ERROR_RECOVERY(1, printk("Error handler scsi_eh_%d"
				1644	" exiting\n",shost->host_no));
				1645
				1646	/*
				1647	* Make sure that nobody tries to wake us up again.
				1648	*/
James Bottomley	3ed7a47	2005-09-19 09:50:04 -0500	[diff] [blame]	1649	shost->ehandler = NULL;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1650	return 0;
				1651	}
				1652
				1653	/*
				1654	* Function: scsi_report_bus_reset()
				1655	*
				1656	* Purpose: Utility function used by low-level drivers to report that
				1657	* they have observed a bus reset on the bus being handled.
				1658	*
				1659	* Arguments: shost - Host in question
				1660	* channel - channel on which reset was observed.
				1661	*
				1662	* Returns: Nothing
				1663	*
				1664	* Lock status: Host lock must be held.
				1665	*
				1666	* Notes: This only needs to be called if the reset is one which
				1667	* originates from an unknown location. Resets originated
				1668	* by the mid-level itself don't need to call this, but there
				1669	* should be no harm.
				1670	*
				1671	* The main purpose of this is to make sure that a CHECK_CONDITION
				1672	* is properly treated.
				1673	*/
				1674	void scsi_report_bus_reset(struct Scsi_Host *shost, int channel)
				1675	{
				1676	struct scsi_device *sdev;
				1677
				1678	__shost_for_each_device(sdev, shost) {
				1679	if (channel == sdev->channel) {
				1680	sdev->was_reset = 1;
				1681	sdev->expecting_cc_ua = 1;
				1682	}
				1683	}
				1684	}
				1685	EXPORT_SYMBOL(scsi_report_bus_reset);
				1686
				1687	/*
				1688	* Function: scsi_report_device_reset()
				1689	*
				1690	* Purpose: Utility function used by low-level drivers to report that
				1691	* they have observed a device reset on the device being handled.
				1692	*
				1693	* Arguments: shost - Host in question
				1694	* channel - channel on which reset was observed
				1695	* target - target on which reset was observed
				1696	*
				1697	* Returns: Nothing
				1698	*
				1699	* Lock status: Host lock must be held
				1700	*
				1701	* Notes: This only needs to be called if the reset is one which
				1702	* originates from an unknown location. Resets originated
				1703	* by the mid-level itself don't need to call this, but there
				1704	* should be no harm.
				1705	*
				1706	* The main purpose of this is to make sure that a CHECK_CONDITION
				1707	* is properly treated.
				1708	*/
				1709	void scsi_report_device_reset(struct Scsi_Host *shost, int channel, int target)
				1710	{
				1711	struct scsi_device *sdev;
				1712
				1713	__shost_for_each_device(sdev, shost) {
				1714	if (channel == sdev->channel &&
				1715	target == sdev->id) {
				1716	sdev->was_reset = 1;
				1717	sdev->expecting_cc_ua = 1;
				1718	}
				1719	}
				1720	}
				1721	EXPORT_SYMBOL(scsi_report_device_reset);
				1722
				1723	static void
				1724	scsi_reset_provider_done_command(struct scsi_cmnd *scmd)
				1725	{
				1726	}
				1727
				1728	/*
				1729	* Function: scsi_reset_provider
				1730	*
				1731	* Purpose: Send requested reset to a bus or device at any phase.
				1732	*
				1733	* Arguments: device - device to send reset to
				1734	* flag - reset type (see scsi.h)
				1735	*
				1736	* Returns: SUCCESS/FAILURE.
				1737	*
				1738	* Notes: This is used by the SCSI Generic driver to provide
				1739	* Bus/Device reset capability.
				1740	*/
				1741	int
				1742	scsi_reset_provider(struct scsi_device *dev, int flag)
				1743	{
				1744	struct scsi_cmnd *scmd = scsi_get_command(dev, GFP_KERNEL);
				1745	struct request req;
				1746	int rtn;
				1747
				1748	scmd->request = &req;
				1749	memset(&scmd->eh_timeout, 0, sizeof(scmd->eh_timeout));
				1750	scmd->request->rq_status = RQ_SCSI_BUSY;
Christoph Hellwig	b4edcbc	2005-06-19 13:40:52 +0200	[diff] [blame]	1751
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1752	memset(&scmd->cmnd, '\0', sizeof(scmd->cmnd));
				1753
				1754	scmd->scsi_done = scsi_reset_provider_done_command;
				1755	scmd->done = NULL;
				1756	scmd->buffer = NULL;
				1757	scmd->bufflen = 0;
				1758	scmd->request_buffer = NULL;
				1759	scmd->request_bufflen = 0;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1760
				1761	scmd->cmd_len = 0;
				1762
				1763	scmd->sc_data_direction = DMA_BIDIRECTIONAL;
				1764	scmd->sc_request = NULL;
				1765	scmd->sc_magic = SCSI_CMND_MAGIC;
				1766
				1767	init_timer(&scmd->eh_timeout);
				1768
				1769	/*
				1770	* Sometimes the command can get back into the timer chain,
				1771	* so use the pid as an identifier.
				1772	*/
				1773	scmd->pid = 0;
				1774
				1775	switch (flag) {
				1776	case SCSI_TRY_RESET_DEVICE:
				1777	rtn = scsi_try_bus_device_reset(scmd);
				1778	if (rtn == SUCCESS)
				1779	break;
				1780	/* FALLTHROUGH */
				1781	case SCSI_TRY_RESET_BUS:
				1782	rtn = scsi_try_bus_reset(scmd);
				1783	if (rtn == SUCCESS)
				1784	break;
				1785	/* FALLTHROUGH */
				1786	case SCSI_TRY_RESET_HOST:
				1787	rtn = scsi_try_host_reset(scmd);
				1788	break;
				1789	default:
				1790	rtn = FAILED;
				1791	}
				1792
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1793	scsi_next_command(scmd);
				1794	return rtn;
				1795	}
				1796	EXPORT_SYMBOL(scsi_reset_provider);
				1797
				1798	/**
				1799	* scsi_normalize_sense - normalize main elements from either fixed or
				1800	* descriptor sense data format into a common format.
				1801	*
				1802	* @sense_buffer: byte array containing sense data returned by device
				1803	* @sb_len: number of valid bytes in sense_buffer
				1804	* @sshdr: pointer to instance of structure that common
				1805	* elements are written to.
				1806	*
				1807	* Notes:
				1808	* The "main elements" from sense data are: response_code, sense_key,
				1809	* asc, ascq and additional_length (only for descriptor format).
				1810	*
				1811	* Typically this function can be called after a device has
				1812	* responded to a SCSI command with the CHECK_CONDITION status.
				1813	*
				1814	* Return value:
				1815	* 1 if valid sense data information found, else 0;
				1816	**/
				1817	int scsi_normalize_sense(const u8 *sense_buffer, int sb_len,
				1818	struct scsi_sense_hdr *sshdr)
				1819	{
James Bottomley	33aa687	2005-08-28 11:31:14 -0500	[diff] [blame]	1820	if (!sense_buffer \|\| !sb_len)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1821	return 0;
				1822
				1823	memset(sshdr, 0, sizeof(struct scsi_sense_hdr));
				1824
				1825	sshdr->response_code = (sense_buffer[0] & 0x7f);
James Bottomley	33aa687	2005-08-28 11:31:14 -0500	[diff] [blame]	1826
				1827	if (!scsi_sense_valid(sshdr))
				1828	return 0;
				1829
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1830	if (sshdr->response_code >= 0x72) {
				1831	/*
				1832	* descriptor format
				1833	*/
				1834	if (sb_len > 1)
				1835	sshdr->sense_key = (sense_buffer[1] & 0xf);
				1836	if (sb_len > 2)
				1837	sshdr->asc = sense_buffer[2];
				1838	if (sb_len > 3)
				1839	sshdr->ascq = sense_buffer[3];
				1840	if (sb_len > 7)
				1841	sshdr->additional_length = sense_buffer[7];
				1842	} else {
				1843	/*
				1844	* fixed format
				1845	*/
				1846	if (sb_len > 2)
				1847	sshdr->sense_key = (sense_buffer[2] & 0xf);
				1848	if (sb_len > 7) {
				1849	sb_len = (sb_len < (sense_buffer[7] + 8)) ?
				1850	sb_len : (sense_buffer[7] + 8);
				1851	if (sb_len > 12)
				1852	sshdr->asc = sense_buffer[12];
				1853	if (sb_len > 13)
				1854	sshdr->ascq = sense_buffer[13];
				1855	}
				1856	}
				1857
				1858	return 1;
				1859	}
				1860	EXPORT_SYMBOL(scsi_normalize_sense);
				1861
				1862	int scsi_request_normalize_sense(struct scsi_request *sreq,
				1863	struct scsi_sense_hdr *sshdr)
				1864	{
				1865	return scsi_normalize_sense(sreq->sr_sense_buffer,
				1866	sizeof(sreq->sr_sense_buffer), sshdr);
				1867	}
				1868	EXPORT_SYMBOL(scsi_request_normalize_sense);
				1869
				1870	int scsi_command_normalize_sense(struct scsi_cmnd *cmd,
				1871	struct scsi_sense_hdr *sshdr)
				1872	{
				1873	return scsi_normalize_sense(cmd->sense_buffer,
				1874	sizeof(cmd->sense_buffer), sshdr);
				1875	}
				1876	EXPORT_SYMBOL(scsi_command_normalize_sense);
				1877
				1878	/**
				1879	* scsi_sense_desc_find - search for a given descriptor type in
				1880	* descriptor sense data format.
				1881	*
				1882	* @sense_buffer: byte array of descriptor format sense data
				1883	* @sb_len: number of valid bytes in sense_buffer
				1884	* @desc_type: value of descriptor type to find
				1885	* (e.g. 0 -> information)
				1886	*
				1887	* Notes:
				1888	* only valid when sense data is in descriptor format
				1889	*
				1890	* Return value:
				1891	* pointer to start of (first) descriptor if found else NULL
				1892	**/
				1893	const u8 * scsi_sense_desc_find(const u8 * sense_buffer, int sb_len,
				1894	int desc_type)
				1895	{
				1896	int add_sen_len, add_len, desc_len, k;
				1897	const u8 * descp;
				1898
				1899	if ((sb_len < 8) \|\| (0 == (add_sen_len = sense_buffer[7])))
				1900	return NULL;
				1901	if ((sense_buffer[0] < 0x72) \|\| (sense_buffer[0] > 0x73))
				1902	return NULL;
				1903	add_sen_len = (add_sen_len < (sb_len - 8)) ?
				1904	add_sen_len : (sb_len - 8);
				1905	descp = &sense_buffer[8];
				1906	for (desc_len = 0, k = 0; k < add_sen_len; k += desc_len) {
				1907	descp += desc_len;
				1908	add_len = (k < (add_sen_len - 1)) ? descp[1]: -1;
				1909	desc_len = add_len + 2;
				1910	if (descp[0] == desc_type)
				1911	return descp;
				1912	if (add_len < 0) // short descriptor ??
				1913	break;
				1914	}
				1915	return NULL;
				1916	}
				1917	EXPORT_SYMBOL(scsi_sense_desc_find);
				1918
				1919	/**
				1920	* scsi_get_sense_info_fld - attempts to get information field from
				1921	* sense data (either fixed or descriptor format)
				1922	*
				1923	* @sense_buffer: byte array of sense data
				1924	* @sb_len: number of valid bytes in sense_buffer
				1925	* @info_out: pointer to 64 integer where 8 or 4 byte information
				1926	* field will be placed if found.
				1927	*
				1928	* Return value:
				1929	* 1 if information field found, 0 if not found.
				1930	**/
				1931	int scsi_get_sense_info_fld(const u8 * sense_buffer, int sb_len,
				1932	u64 * info_out)
				1933	{
				1934	int j;
				1935	const u8 * ucp;
				1936	u64 ull;
				1937
				1938	if (sb_len < 7)
				1939	return 0;
				1940	switch (sense_buffer[0] & 0x7f) {
				1941	case 0x70:
				1942	case 0x71:
				1943	if (sense_buffer[0] & 0x80) {
				1944	*info_out = (sense_buffer[3] << 24) +
				1945	(sense_buffer[4] << 16) +
				1946	(sense_buffer[5] << 8) + sense_buffer[6];
				1947	return 1;
				1948	} else
				1949	return 0;
				1950	case 0x72:
				1951	case 0x73:
				1952	ucp = scsi_sense_desc_find(sense_buffer, sb_len,
				1953	0 /* info desc */);
				1954	if (ucp && (0xa == ucp[1])) {
				1955	ull = 0;
				1956	for (j = 0; j < 8; ++j) {
				1957	if (j > 0)
				1958	ull <<= 8;
				1959	ull \|= ucp[4 + j];
				1960	}
				1961	*info_out = ull;
				1962	return 1;
				1963	} else
				1964	return 0;
				1965	default:
				1966	return 0;
				1967	}
				1968	}
				1969	EXPORT_SYMBOL(scsi_get_sense_info_fld);