Blame - arch/ppc64/mm/fault.c - kernel/hikey-linaro

blob: 20b0f37e8bf8fbe3ac28c2f0113a97b6aa951675 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame^]	1	/*
				2	* arch/ppc/mm/fault.c
				3	*
				4	* PowerPC version
				5	* Copyright (C) 1995-1996 Gary Thomas (gdt@linuxppc.org)
				6	*
				7	* Derived from "arch/i386/mm/fault.c"
				8	* Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds
				9	*
				10	* Modified by Cort Dougan and Paul Mackerras.
				11	*
				12	* Modified for PPC64 by Dave Engebretsen (engebret@ibm.com)
				13	*
				14	* This program is free software; you can redistribute it and/or
				15	* modify it under the terms of the GNU General Public License
				16	* as published by the Free Software Foundation; either version
				17	* 2 of the License, or (at your option) any later version.
				18	*/
				19
				20	#include <linux/config.h>
				21	#include <linux/signal.h>
				22	#include <linux/sched.h>
				23	#include <linux/kernel.h>
				24	#include <linux/errno.h>
				25	#include <linux/string.h>
				26	#include <linux/types.h>
				27	#include <linux/mman.h>
				28	#include <linux/mm.h>
				29	#include <linux/interrupt.h>
				30	#include <linux/smp_lock.h>
				31	#include <linux/module.h>
				32
				33	#include <asm/page.h>
				34	#include <asm/pgtable.h>
				35	#include <asm/mmu.h>
				36	#include <asm/mmu_context.h>
				37	#include <asm/system.h>
				38	#include <asm/uaccess.h>
				39	#include <asm/kdebug.h>
				40
				41	/*
				42	* Check whether the instruction at regs->nip is a store using
				43	* an update addressing form which will update r1.
				44	*/
				45	static int store_updates_sp(struct pt_regs *regs)
				46	{
				47	unsigned int inst;
				48
				49	if (get_user(inst, (unsigned int __user *)regs->nip))
				50	return 0;
				51	/* check for 1 in the rA field */
				52	if (((inst >> 16) & 0x1f) != 1)
				53	return 0;
				54	/* check major opcode */
				55	switch (inst >> 26) {
				56	case 37: /* stwu */
				57	case 39: /* stbu */
				58	case 45: /* sthu */
				59	case 53: /* stfsu */
				60	case 55: /* stfdu */
				61	return 1;
				62	case 62: /* std or stdu */
				63	return (inst & 3) == 1;
				64	case 31:
				65	/* check minor opcode */
				66	switch ((inst >> 1) & 0x3ff) {
				67	case 181: /* stdux */
				68	case 183: /* stwux */
				69	case 247: /* stbux */
				70	case 439: /* sthux */
				71	case 695: /* stfsux */
				72	case 759: /* stfdux */
				73	return 1;
				74	}
				75	}
				76	return 0;
				77	}
				78
				79	/*
				80	* The error_code parameter is
				81	* - DSISR for a non-SLB data access fault,
				82	* - SRR1 & 0x08000000 for a non-SLB instruction access fault
				83	* - 0 any SLB fault.
				84	* The return value is 0 if the fault was handled, or the signal
				85	* number if this is a kernel fault that can't be handled here.
				86	*/
				87	int do_page_fault(struct pt_regs *regs, unsigned long address,
				88	unsigned long error_code)
				89	{
				90	struct vm_area_struct * vma;
				91	struct mm_struct *mm = current->mm;
				92	siginfo_t info;
				93	unsigned long code = SEGV_MAPERR;
				94	unsigned long is_write = error_code & DSISR_ISSTORE;
				95	unsigned long trap = TRAP(regs);
				96	unsigned long is_exec = trap == 0x400;
				97
				98	BUG_ON((trap == 0x380) \|\| (trap == 0x480));
				99
				100	if (notify_die(DIE_PAGE_FAULT, "page_fault", regs, error_code,
				101	11, SIGSEGV) == NOTIFY_STOP)
				102	return 0;
				103
				104	if (trap == 0x300) {
				105	if (debugger_fault_handler(regs))
				106	return 0;
				107	}
				108
				109	/* On a kernel SLB miss we can only check for a valid exception entry */
				110	if (!user_mode(regs) && (address >= TASK_SIZE))
				111	return SIGSEGV;
				112
				113	if (error_code & DSISR_DABRMATCH) {
				114	if (notify_die(DIE_DABR_MATCH, "dabr_match", regs, error_code,
				115	11, SIGSEGV) == NOTIFY_STOP)
				116	return 0;
				117	if (debugger_dabr_match(regs))
				118	return 0;
				119	}
				120
				121	if (in_atomic() \|\| mm == NULL) {
				122	if (!user_mode(regs))
				123	return SIGSEGV;
				124	/* in_atomic() in user mode is really bad,
				125	as is current->mm == NULL. */
				126	printk(KERN_EMERG "Page fault in user mode with"
				127	"in_atomic() = %d mm = %p\n", in_atomic(), mm);
				128	printk(KERN_EMERG "NIP = %lx MSR = %lx\n",
				129	regs->nip, regs->msr);
				130	die("Weird page fault", regs, SIGSEGV);
				131	}
				132
				133	/* When running in the kernel we expect faults to occur only to
				134	* addresses in user space. All other faults represent errors in the
				135	* kernel and should generate an OOPS. Unfortunatly, in the case of an
				136	* erroneous fault occuring in a code path which already holds mmap_sem
				137	* we will deadlock attempting to validate the fault against the
				138	* address space. Luckily the kernel only validly references user
				139	* space from well defined areas of code, which are listed in the
				140	* exceptions table.
				141	*
				142	* As the vast majority of faults will be valid we will only perform
				143	* the source reference check when there is a possibilty of a deadlock.
				144	* Attempt to lock the address space, if we cannot we then validate the
				145	* source. If this is invalid we can skip the address space check,
				146	* thus avoiding the deadlock.
				147	*/
				148	if (!down_read_trylock(&mm->mmap_sem)) {
				149	if (!user_mode(regs) && !search_exception_tables(regs->nip))
				150	goto bad_area_nosemaphore;
				151
				152	down_read(&mm->mmap_sem);
				153	}
				154
				155	vma = find_vma(mm, address);
				156	if (!vma)
				157	goto bad_area;
				158
				159	if (vma->vm_start <= address) {
				160	goto good_area;
				161	}
				162	if (!(vma->vm_flags & VM_GROWSDOWN))
				163	goto bad_area;
				164
				165	/*
				166	* N.B. The POWER/Open ABI allows programs to access up to
				167	* 288 bytes below the stack pointer.
				168	* The kernel signal delivery code writes up to about 1.5kB
				169	* below the stack pointer (r1) before decrementing it.
				170	* The exec code can write slightly over 640kB to the stack
				171	* before setting the user r1. Thus we allow the stack to
				172	* expand to 1MB without further checks.
				173	*/
				174	if (address + 0x100000 < vma->vm_end) {
				175	/* get user regs even if this fault is in kernel mode */
				176	struct pt_regs *uregs = current->thread.regs;
				177	if (uregs == NULL)
				178	goto bad_area;
				179
				180	/*
				181	* A user-mode access to an address a long way below
				182	* the stack pointer is only valid if the instruction
				183	* is one which would update the stack pointer to the
				184	* address accessed if the instruction completed,
				185	* i.e. either stwu rs,n(r1) or stwux rs,r1,rb
				186	* (or the byte, halfword, float or double forms).
				187	*
				188	* If we don't check this then any write to the area
				189	* between the last mapped region and the stack will
				190	* expand the stack rather than segfaulting.
				191	*/
				192	if (address + 2048 < uregs->gpr[1]
				193	&& (!user_mode(regs) \|\| !store_updates_sp(regs)))
				194	goto bad_area;
				195	}
				196
				197	if (expand_stack(vma, address))
				198	goto bad_area;
				199
				200	good_area:
				201	code = SEGV_ACCERR;
				202
				203	if (is_exec) {
				204	/* protection fault */
				205	if (error_code & DSISR_PROTFAULT)
				206	goto bad_area;
				207	if (!(vma->vm_flags & VM_EXEC))
				208	goto bad_area;
				209	/* a write */
				210	} else if (is_write) {
				211	if (!(vma->vm_flags & VM_WRITE))
				212	goto bad_area;
				213	/* a read */
				214	} else {
				215	if (!(vma->vm_flags & VM_READ))
				216	goto bad_area;
				217	}
				218
				219	survive:
				220	/*
				221	* If for any reason at all we couldn't handle the fault,
				222	* make sure we exit gracefully rather than endlessly redo
				223	* the fault.
				224	*/
				225	switch (handle_mm_fault(mm, vma, address, is_write)) {
				226
				227	case VM_FAULT_MINOR:
				228	current->min_flt++;
				229	break;
				230	case VM_FAULT_MAJOR:
				231	current->maj_flt++;
				232	break;
				233	case VM_FAULT_SIGBUS:
				234	goto do_sigbus;
				235	case VM_FAULT_OOM:
				236	goto out_of_memory;
				237	default:
				238	BUG();
				239	}
				240
				241	up_read(&mm->mmap_sem);
				242	return 0;
				243
				244	bad_area:
				245	up_read(&mm->mmap_sem);
				246
				247	bad_area_nosemaphore:
				248	/* User mode accesses cause a SIGSEGV */
				249	if (user_mode(regs)) {
				250	info.si_signo = SIGSEGV;
				251	info.si_errno = 0;
				252	info.si_code = code;
				253	info.si_addr = (void __user *) address;
				254	force_sig_info(SIGSEGV, &info, current);
				255	return 0;
				256	}
				257
				258	if (trap == 0x400 && (error_code & DSISR_PROTFAULT)
				259	&& printk_ratelimit())
				260	printk(KERN_CRIT "kernel tried to execute NX-protected"
				261	" page (%lx) - exploit attempt? (uid: %d)\n",
				262	address, current->uid);
				263
				264	return SIGSEGV;
				265
				266	/*
				267	* We ran out of memory, or some other thing happened to us that made
				268	* us unable to handle the page fault gracefully.
				269	*/
				270	out_of_memory:
				271	up_read(&mm->mmap_sem);
				272	if (current->pid == 1) {
				273	yield();
				274	down_read(&mm->mmap_sem);
				275	goto survive;
				276	}
				277	printk("VM: killing process %s\n", current->comm);
				278	if (user_mode(regs))
				279	do_exit(SIGKILL);
				280	return SIGKILL;
				281
				282	do_sigbus:
				283	up_read(&mm->mmap_sem);
				284	if (user_mode(regs)) {
				285	info.si_signo = SIGBUS;
				286	info.si_errno = 0;
				287	info.si_code = BUS_ADRERR;
				288	info.si_addr = (void __user *)address;
				289	force_sig_info(SIGBUS, &info, current);
				290	return 0;
				291	}
				292	return SIGBUS;
				293	}
				294
				295	/*
				296	* bad_page_fault is called when we have a bad access from the kernel.
				297	* It is called from do_page_fault above and from some of the procedures
				298	* in traps.c.
				299	*/
				300	void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
				301	{
				302	const struct exception_table_entry *entry;
				303
				304	/* Are we prepared to handle this fault? */
				305	if ((entry = search_exception_tables(regs->nip)) != NULL) {
				306	regs->nip = entry->fixup;
				307	return;
				308	}
				309
				310	/* kernel has accessed a bad area */
				311	die("Kernel access of bad area", regs, sig);
				312	}