Blame - kernel/perf_counter.c - kernel/hikey-linaro

blob: 1ac18daa424fd928a5a3e59f0dcdd3ee4bea0dd9 [file] [log] [blame]

Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1	/*
				2	* Performance counter core code
				3	*
				4	* Copyright(C) 2008 Thomas Gleixner <tglx@linutronix.de>
				5	* Copyright(C) 2008 Red Hat, Inc., Ingo Molnar
				6	*
				7	* For licencing details see kernel-base/COPYING
				8	*/
				9
				10	#include <linux/fs.h>
				11	#include <linux/cpu.h>
				12	#include <linux/smp.h>
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	13	#include <linux/file.h>
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	14	#include <linux/poll.h>
				15	#include <linux/sysfs.h>
				16	#include <linux/ptrace.h>
				17	#include <linux/percpu.h>
				18	#include <linux/uaccess.h>
				19	#include <linux/syscalls.h>
				20	#include <linux/anon_inodes.h>
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	21	#include <linux/kernel_stat.h>
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	22	#include <linux/perf_counter.h>
				23
				24	/*
				25	* Each CPU has a list of per CPU counters:
				26	*/
				27	DEFINE_PER_CPU(struct perf_cpu_context, perf_cpu_context);
				28
Ingo Molnar	088e285	2008-12-14 20:21:00 +0100	[diff] [blame]	29	int perf_max_counters __read_mostly = 1;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	30	static int perf_reserved_percpu __read_mostly;
				31	static int perf_overcommit __read_mostly = 1;
				32
				33	/*
				34	* Mutex for (sysadmin-configurable) counter reservations:
				35	*/
				36	static DEFINE_MUTEX(perf_resource_mutex);
				37
				38	/*
				39	* Architecture provided APIs - weak aliases:
				40	*/
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	41	extern __weak const struct hw_perf_counter_ops *
Ingo Molnar	621a01e	2008-12-11 12:46:46 +0100	[diff] [blame]	42	hw_perf_counter_init(struct perf_counter *counter)
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	43	{
Paul Mackerras	ff6f054	2009-01-09 16:19:25 +1100	[diff] [blame]	44	return NULL;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	45	}
				46
Ingo Molnar	01b2838	2008-12-11 13:45:51 +0100	[diff] [blame]	47	u64 __weak hw_perf_save_disable(void) { return 0; }
Yinghai Lu	01ea1cc	2008-12-26 21:05:06 -0800	[diff] [blame]	48	void __weak hw_perf_restore(u64 ctrl) { barrier(); }
Paul Mackerras	01d0287	2009-01-14 13:44:19 +1100	[diff] [blame]	49	void __weak hw_perf_counter_setup(int cpu) { barrier(); }
Paul Mackerras	3cbed42	2009-01-09 16:43:42 +1100	[diff] [blame]	50	int __weak hw_perf_group_sched_in(struct perf_counter *group_leader,
				51	struct perf_cpu_context *cpuctx,
				52	struct perf_counter_context *ctx, int cpu)
				53	{
				54	return 0;
				55	}
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	56
Paul Mackerras	4eb96fc	2009-01-09 17:24:34 +1100	[diff] [blame]	57	void __weak perf_counter_print_debug(void) { }
				58
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	59	static void
				60	list_add_counter(struct perf_counter counter, struct perf_counter_context ctx)
				61	{
				62	struct perf_counter *group_leader = counter->group_leader;
				63
				64	/*
				65	* Depending on whether it is a standalone or sibling counter,
				66	* add it straight to the context's counter list, or to the group
				67	* leader's sibling list:
				68	*/
				69	if (counter->group_leader == counter)
				70	list_add_tail(&counter->list_entry, &ctx->counter_list);
				71	else
				72	list_add_tail(&counter->list_entry, &group_leader->sibling_list);
				73	}
				74
				75	static void
				76	list_del_counter(struct perf_counter counter, struct perf_counter_context ctx)
				77	{
				78	struct perf_counter sibling, tmp;
				79
				80	list_del_init(&counter->list_entry);
				81
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	82	/*
				83	* If this was a group counter with sibling counters then
				84	* upgrade the siblings to singleton counters by adding them
				85	* to the context list directly:
				86	*/
				87	list_for_each_entry_safe(sibling, tmp,
				88	&counter->sibling_list, list_entry) {
				89
				90	list_del_init(&sibling->list_entry);
				91	list_add_tail(&sibling->list_entry, &ctx->counter_list);
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	92	sibling->group_leader = sibling;
				93	}
				94	}
				95
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	96	static void
				97	counter_sched_out(struct perf_counter *counter,
				98	struct perf_cpu_context *cpuctx,
				99	struct perf_counter_context *ctx)
				100	{
				101	if (counter->state != PERF_COUNTER_STATE_ACTIVE)
				102	return;
				103
				104	counter->state = PERF_COUNTER_STATE_INACTIVE;
				105	counter->hw_ops->disable(counter);
				106	counter->oncpu = -1;
				107
				108	if (!is_software_counter(counter))
				109	cpuctx->active_oncpu--;
				110	ctx->nr_active--;
				111	if (counter->hw_event.exclusive \|\| !cpuctx->active_oncpu)
				112	cpuctx->exclusive = 0;
				113	}
				114
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	115	static void
				116	group_sched_out(struct perf_counter *group_counter,
				117	struct perf_cpu_context *cpuctx,
				118	struct perf_counter_context *ctx)
				119	{
				120	struct perf_counter *counter;
				121
				122	if (group_counter->state != PERF_COUNTER_STATE_ACTIVE)
				123	return;
				124
				125	counter_sched_out(group_counter, cpuctx, ctx);
				126
				127	/*
				128	* Schedule out siblings (if any):
				129	*/
				130	list_for_each_entry(counter, &group_counter->sibling_list, list_entry)
				131	counter_sched_out(counter, cpuctx, ctx);
				132
				133	if (group_counter->hw_event.exclusive)
				134	cpuctx->exclusive = 0;
				135	}
				136
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	137	/*
				138	* Cross CPU call to remove a performance counter
				139	*
				140	* We disable the counter on the hardware level first. After that we
				141	* remove it from the context list.
				142	*/
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	143	static void __perf_counter_remove_from_context(void *info)
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	144	{
				145	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
				146	struct perf_counter *counter = info;
				147	struct perf_counter_context *ctx = counter->ctx;
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	148	unsigned long flags;
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	149	u64 perf_flags;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	150
				151	/*
				152	* If this is a task context, we need to check whether it is
				153	* the current task context of this cpu. If not it has been
				154	* scheduled out before the smp call arrived.
				155	*/
				156	if (ctx->task && cpuctx->task_ctx != ctx)
				157	return;
				158
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	159	curr_rq_lock_irq_save(&flags);
				160	spin_lock(&ctx->lock);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	161
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	162	counter_sched_out(counter, cpuctx, ctx);
				163
				164	counter->task = NULL;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	165	ctx->nr_counters--;
				166
				167	/*
				168	* Protect the list operation against NMI by disabling the
				169	* counters on a global level. NOP for non NMI based counters.
				170	*/
Ingo Molnar	01b2838	2008-12-11 13:45:51 +0100	[diff] [blame]	171	perf_flags = hw_perf_save_disable();
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	172	list_del_counter(counter, ctx);
Ingo Molnar	01b2838	2008-12-11 13:45:51 +0100	[diff] [blame]	173	hw_perf_restore(perf_flags);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	174
				175	if (!ctx->task) {
				176	/*
				177	* Allow more per task counters with respect to the
				178	* reservation:
				179	*/
				180	cpuctx->max_pertask =
				181	min(perf_max_counters - ctx->nr_counters,
				182	perf_max_counters - perf_reserved_percpu);
				183	}
				184
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	185	spin_unlock(&ctx->lock);
				186	curr_rq_unlock_irq_restore(&flags);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	187	}
				188
				189
				190	/*
				191	* Remove the counter from a task's (or a CPU's) list of counters.
				192	*
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	193	* Must be called with counter->mutex and ctx->mutex held.
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	194	*
				195	* CPU counters are removed with a smp call. For task counters we only
				196	* call when the task is on a CPU.
				197	*/
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	198	static void perf_counter_remove_from_context(struct perf_counter *counter)
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	199	{
				200	struct perf_counter_context *ctx = counter->ctx;
				201	struct task_struct *task = ctx->task;
				202
				203	if (!task) {
				204	/*
				205	* Per cpu counters are removed via an smp call and
				206	* the removal is always sucessful.
				207	*/
				208	smp_call_function_single(counter->cpu,
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	209	__perf_counter_remove_from_context,
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	210	counter, 1);
				211	return;
				212	}
				213
				214	retry:
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	215	task_oncpu_function_call(task, __perf_counter_remove_from_context,
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	216	counter);
				217
				218	spin_lock_irq(&ctx->lock);
				219	/*
				220	* If the context is active we need to retry the smp call.
				221	*/
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	222	if (ctx->nr_active && !list_empty(&counter->list_entry)) {
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	223	spin_unlock_irq(&ctx->lock);
				224	goto retry;
				225	}
				226
				227	/*
				228	* The lock prevents that this context is scheduled in so we
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	229	* can remove the counter safely, if the call above did not
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	230	* succeed.
				231	*/
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	232	if (!list_empty(&counter->list_entry)) {
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	233	ctx->nr_counters--;
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	234	list_del_counter(counter, ctx);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	235	counter->task = NULL;
				236	}
				237	spin_unlock_irq(&ctx->lock);
				238	}
				239
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	240	/*
				241	* Cross CPU call to disable a performance counter
				242	*/
				243	static void __perf_counter_disable(void *info)
				244	{
				245	struct perf_counter *counter = info;
				246	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
				247	struct perf_counter_context *ctx = counter->ctx;
				248	unsigned long flags;
				249
				250	/*
				251	* If this is a per-task counter, need to check whether this
				252	* counter's task is the current task on this cpu.
				253	*/
				254	if (ctx->task && cpuctx->task_ctx != ctx)
				255	return;
				256
				257	curr_rq_lock_irq_save(&flags);
				258	spin_lock(&ctx->lock);
				259
				260	/*
				261	* If the counter is on, turn it off.
				262	* If it is in error state, leave it in error state.
				263	*/
				264	if (counter->state >= PERF_COUNTER_STATE_INACTIVE) {
				265	if (counter == counter->group_leader)
				266	group_sched_out(counter, cpuctx, ctx);
				267	else
				268	counter_sched_out(counter, cpuctx, ctx);
				269	counter->state = PERF_COUNTER_STATE_OFF;
				270	}
				271
				272	spin_unlock(&ctx->lock);
				273	curr_rq_unlock_irq_restore(&flags);
				274	}
				275
				276	/*
				277	* Disable a counter.
				278	*/
				279	static void perf_counter_disable(struct perf_counter *counter)
				280	{
				281	struct perf_counter_context *ctx = counter->ctx;
				282	struct task_struct *task = ctx->task;
				283
				284	if (!task) {
				285	/*
				286	* Disable the counter on the cpu that it's on
				287	*/
				288	smp_call_function_single(counter->cpu, __perf_counter_disable,
				289	counter, 1);
				290	return;
				291	}
				292
				293	retry:
				294	task_oncpu_function_call(task, __perf_counter_disable, counter);
				295
				296	spin_lock_irq(&ctx->lock);
				297	/*
				298	* If the counter is still active, we need to retry the cross-call.
				299	*/
				300	if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
				301	spin_unlock_irq(&ctx->lock);
				302	goto retry;
				303	}
				304
				305	/*
				306	* Since we have the lock this context can't be scheduled
				307	* in, so we can change the state safely.
				308	*/
				309	if (counter->state == PERF_COUNTER_STATE_INACTIVE)
				310	counter->state = PERF_COUNTER_STATE_OFF;
				311
				312	spin_unlock_irq(&ctx->lock);
				313	}
				314
				315	/*
				316	* Disable a counter and all its children.
				317	*/
				318	static void perf_counter_disable_family(struct perf_counter *counter)
				319	{
				320	struct perf_counter *child;
				321
				322	perf_counter_disable(counter);
				323
				324	/*
				325	* Lock the mutex to protect the list of children
				326	*/
				327	mutex_lock(&counter->mutex);
				328	list_for_each_entry(child, &counter->child_list, child_list)
				329	perf_counter_disable(child);
				330	mutex_unlock(&counter->mutex);
				331	}
				332
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	333	static int
				334	counter_sched_in(struct perf_counter *counter,
				335	struct perf_cpu_context *cpuctx,
				336	struct perf_counter_context *ctx,
				337	int cpu)
				338	{
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	339	if (counter->state <= PERF_COUNTER_STATE_OFF)
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	340	return 0;
				341
				342	counter->state = PERF_COUNTER_STATE_ACTIVE;
				343	counter->oncpu = cpu; /* TODO: put 'cpu' into cpuctx->cpu */
				344	/*
				345	* The new state must be visible before we turn it on in the hardware:
				346	*/
				347	smp_wmb();
				348
				349	if (counter->hw_ops->enable(counter)) {
				350	counter->state = PERF_COUNTER_STATE_INACTIVE;
				351	counter->oncpu = -1;
				352	return -EAGAIN;
				353	}
				354
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	355	if (!is_software_counter(counter))
				356	cpuctx->active_oncpu++;
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	357	ctx->nr_active++;
				358
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	359	if (counter->hw_event.exclusive)
				360	cpuctx->exclusive = 1;
				361
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	362	return 0;
				363	}
				364
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	365	/*
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	366	* Return 1 for a group consisting entirely of software counters,
				367	* 0 if the group contains any hardware counters.
				368	*/
				369	static int is_software_only_group(struct perf_counter *leader)
				370	{
				371	struct perf_counter *counter;
				372
				373	if (!is_software_counter(leader))
				374	return 0;
				375	list_for_each_entry(counter, &leader->sibling_list, list_entry)
				376	if (!is_software_counter(counter))
				377	return 0;
				378	return 1;
				379	}
				380
				381	/*
				382	* Work out whether we can put this counter group on the CPU now.
				383	*/
				384	static int group_can_go_on(struct perf_counter *counter,
				385	struct perf_cpu_context *cpuctx,
				386	int can_add_hw)
				387	{
				388	/*
				389	* Groups consisting entirely of software counters can always go on.
				390	*/
				391	if (is_software_only_group(counter))
				392	return 1;
				393	/*
				394	* If an exclusive group is already on, no other hardware
				395	* counters can go on.
				396	*/
				397	if (cpuctx->exclusive)
				398	return 0;
				399	/*
				400	* If this group is exclusive and there are already
				401	* counters on the CPU, it can't go on.
				402	*/
				403	if (counter->hw_event.exclusive && cpuctx->active_oncpu)
				404	return 0;
				405	/*
				406	* Otherwise, try to add it if all previous groups were able
				407	* to go on.
				408	*/
				409	return can_add_hw;
				410	}
				411
				412	/*
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	413	* Cross CPU call to install and enable a performance counter
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	414	*/
				415	static void __perf_install_in_context(void *info)
				416	{
				417	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
				418	struct perf_counter *counter = info;
				419	struct perf_counter_context *ctx = counter->ctx;
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	420	struct perf_counter *leader = counter->group_leader;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	421	int cpu = smp_processor_id();
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	422	unsigned long flags;
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	423	u64 perf_flags;
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	424	int err;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	425
				426	/*
				427	* If this is a task context, we need to check whether it is
				428	* the current task context of this cpu. If not it has been
				429	* scheduled out before the smp call arrived.
				430	*/
				431	if (ctx->task && cpuctx->task_ctx != ctx)
				432	return;
				433
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	434	curr_rq_lock_irq_save(&flags);
				435	spin_lock(&ctx->lock);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	436
				437	/*
				438	* Protect the list operation against NMI by disabling the
				439	* counters on a global level. NOP for non NMI based counters.
				440	*/
Ingo Molnar	01b2838	2008-12-11 13:45:51 +0100	[diff] [blame]	441	perf_flags = hw_perf_save_disable();
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	442
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	443	list_add_counter(counter, ctx);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	444	ctx->nr_counters++;
				445
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	446	/*
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	447	* Don't put the counter on if it is disabled or if
				448	* it is in a group and the group isn't on.
				449	*/
				450	if (counter->state != PERF_COUNTER_STATE_INACTIVE \|\|
				451	(leader != counter && leader->state != PERF_COUNTER_STATE_ACTIVE))
				452	goto unlock;
				453
				454	/*
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	455	* An exclusive counter can't go on if there are already active
				456	* hardware counters, and no hardware counter can go on if there
				457	* is already an exclusive counter on.
				458	*/
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	459	if (!group_can_go_on(counter, cpuctx, 1))
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	460	err = -EEXIST;
				461	else
				462	err = counter_sched_in(counter, cpuctx, ctx, cpu);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	463
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	464	if (err) {
				465	/*
				466	* This counter couldn't go on. If it is in a group
				467	* then we have to pull the whole group off.
				468	* If the counter group is pinned then put it in error state.
				469	*/
				470	if (leader != counter)
				471	group_sched_out(leader, cpuctx, ctx);
				472	if (leader->hw_event.pinned)
				473	leader->state = PERF_COUNTER_STATE_ERROR;
				474	}
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	475
				476	if (!err && !ctx->task && cpuctx->max_pertask)
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	477	cpuctx->max_pertask--;
				478
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	479	unlock:
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	480	hw_perf_restore(perf_flags);
				481
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	482	spin_unlock(&ctx->lock);
				483	curr_rq_unlock_irq_restore(&flags);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	484	}
				485
				486	/*
				487	* Attach a performance counter to a context
				488	*
				489	* First we add the counter to the list with the hardware enable bit
				490	* in counter->hw_config cleared.
				491	*
				492	* If the counter is attached to a task which is on a CPU we use a smp
				493	* call to enable it in the task context. The task might have been
				494	* scheduled away, but we check this in the smp call again.
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	495	*
				496	* Must be called with ctx->mutex held.
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	497	*/
				498	static void
				499	perf_install_in_context(struct perf_counter_context *ctx,
				500	struct perf_counter *counter,
				501	int cpu)
				502	{
				503	struct task_struct *task = ctx->task;
				504
				505	counter->ctx = ctx;
				506	if (!task) {
				507	/*
				508	* Per cpu counters are installed via an smp call and
				509	* the install is always sucessful.
				510	*/
				511	smp_call_function_single(cpu, __perf_install_in_context,
				512	counter, 1);
				513	return;
				514	}
				515
				516	counter->task = task;
				517	retry:
				518	task_oncpu_function_call(task, __perf_install_in_context,
				519	counter);
				520
				521	spin_lock_irq(&ctx->lock);
				522	/*
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	523	* we need to retry the smp call.
				524	*/
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	525	if (ctx->is_active && list_empty(&counter->list_entry)) {
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	526	spin_unlock_irq(&ctx->lock);
				527	goto retry;
				528	}
				529
				530	/*
				531	* The lock prevents that this context is scheduled in so we
				532	* can add the counter safely, if it the call above did not
				533	* succeed.
				534	*/
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	535	if (list_empty(&counter->list_entry)) {
				536	list_add_counter(counter, ctx);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	537	ctx->nr_counters++;
				538	}
				539	spin_unlock_irq(&ctx->lock);
				540	}
				541
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	542	/*
				543	* Cross CPU call to enable a performance counter
				544	*/
				545	static void __perf_counter_enable(void *info)
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	546	{
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	547	struct perf_counter *counter = info;
				548	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
				549	struct perf_counter_context *ctx = counter->ctx;
				550	struct perf_counter *leader = counter->group_leader;
				551	unsigned long flags;
				552	int err;
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	553
				554	/*
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	555	* If this is a per-task counter, need to check whether this
				556	* counter's task is the current task on this cpu.
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	557	*/
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	558	if (ctx->task && cpuctx->task_ctx != ctx)
				559	return;
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	560
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	561	curr_rq_lock_irq_save(&flags);
				562	spin_lock(&ctx->lock);
				563
				564	if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
				565	goto unlock;
				566	counter->state = PERF_COUNTER_STATE_INACTIVE;
				567
				568	/*
				569	* If the counter is in a group and isn't the group leader,
				570	* then don't put it on unless the group is on.
				571	*/
				572	if (leader != counter && leader->state != PERF_COUNTER_STATE_ACTIVE)
				573	goto unlock;
				574
				575	if (!group_can_go_on(counter, cpuctx, 1))
				576	err = -EEXIST;
				577	else
				578	err = counter_sched_in(counter, cpuctx, ctx,
				579	smp_processor_id());
				580
				581	if (err) {
				582	/*
				583	* If this counter can't go on and it's part of a
				584	* group, then the whole group has to come off.
				585	*/
				586	if (leader != counter)
				587	group_sched_out(leader, cpuctx, ctx);
				588	if (leader->hw_event.pinned)
				589	leader->state = PERF_COUNTER_STATE_ERROR;
				590	}
				591
				592	unlock:
				593	spin_unlock(&ctx->lock);
				594	curr_rq_unlock_irq_restore(&flags);
				595	}
				596
				597	/*
				598	* Enable a counter.
				599	*/
				600	static void perf_counter_enable(struct perf_counter *counter)
				601	{
				602	struct perf_counter_context *ctx = counter->ctx;
				603	struct task_struct *task = ctx->task;
				604
				605	if (!task) {
				606	/*
				607	* Enable the counter on the cpu that it's on
				608	*/
				609	smp_call_function_single(counter->cpu, __perf_counter_enable,
				610	counter, 1);
				611	return;
				612	}
				613
				614	spin_lock_irq(&ctx->lock);
				615	if (counter->state >= PERF_COUNTER_STATE_INACTIVE)
				616	goto out;
				617
				618	/*
				619	* If the counter is in error state, clear that first.
				620	* That way, if we see the counter in error state below, we
				621	* know that it has gone back into error state, as distinct
				622	* from the task having been scheduled away before the
				623	* cross-call arrived.
				624	*/
				625	if (counter->state == PERF_COUNTER_STATE_ERROR)
				626	counter->state = PERF_COUNTER_STATE_OFF;
				627
				628	retry:
				629	spin_unlock_irq(&ctx->lock);
				630	task_oncpu_function_call(task, __perf_counter_enable, counter);
				631
				632	spin_lock_irq(&ctx->lock);
				633
				634	/*
				635	* If the context is active and the counter is still off,
				636	* we need to retry the cross-call.
				637	*/
				638	if (ctx->is_active && counter->state == PERF_COUNTER_STATE_OFF)
				639	goto retry;
				640
				641	/*
				642	* Since we have the lock this context can't be scheduled
				643	* in, so we can change the state safely.
				644	*/
				645	if (counter->state == PERF_COUNTER_STATE_OFF)
				646	counter->state = PERF_COUNTER_STATE_INACTIVE;
				647	out:
				648	spin_unlock_irq(&ctx->lock);
				649	}
				650
				651	/*
				652	* Enable a counter and all its children.
				653	*/
				654	static void perf_counter_enable_family(struct perf_counter *counter)
				655	{
				656	struct perf_counter *child;
				657
				658	perf_counter_enable(counter);
				659
				660	/*
				661	* Lock the mutex to protect the list of children
				662	*/
				663	mutex_lock(&counter->mutex);
				664	list_for_each_entry(child, &counter->child_list, child_list)
				665	perf_counter_enable(child);
				666	mutex_unlock(&counter->mutex);
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	667	}
				668
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	669	void __perf_counter_sched_out(struct perf_counter_context *ctx,
				670	struct perf_cpu_context *cpuctx)
				671	{
				672	struct perf_counter *counter;
Paul Mackerras	3cbed42	2009-01-09 16:43:42 +1100	[diff] [blame]	673	u64 flags;
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	674
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	675	spin_lock(&ctx->lock);
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	676	ctx->is_active = 0;
				677	if (likely(!ctx->nr_counters))
				678	goto out;
				679
Paul Mackerras	3cbed42	2009-01-09 16:43:42 +1100	[diff] [blame]	680	flags = hw_perf_save_disable();
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	681	if (ctx->nr_active) {
				682	list_for_each_entry(counter, &ctx->counter_list, list_entry)
				683	group_sched_out(counter, cpuctx, ctx);
				684	}
Paul Mackerras	3cbed42	2009-01-09 16:43:42 +1100	[diff] [blame]	685	hw_perf_restore(flags);
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	686	out:
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	687	spin_unlock(&ctx->lock);
				688	}
				689
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	690	/*
				691	* Called from scheduler to remove the counters of the current task,
				692	* with interrupts disabled.
				693	*
				694	* We stop each counter and update the counter value in counter->count.
				695	*
Ingo Molnar	7671581	2008-12-17 14:20:28 +0100	[diff] [blame]	696	* This does not protect us against NMI, but disable()
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	697	* sets the disabled bit in the control field of counter _before_
				698	* accessing the counter control register. If a NMI hits, then it will
				699	* not restart the counter.
				700	*/
				701	void perf_counter_task_sched_out(struct task_struct *task, int cpu)
				702	{
				703	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
				704	struct perf_counter_context *ctx = &task->perf_counter_ctx;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	705
				706	if (likely(!cpuctx->task_ctx))
				707	return;
				708
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	709	__perf_counter_sched_out(ctx, cpuctx);
				710
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	711	cpuctx->task_ctx = NULL;
				712	}
				713
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	714	static void perf_counter_cpu_sched_out(struct perf_cpu_context *cpuctx)
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	715	{
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	716	__perf_counter_sched_out(&cpuctx->ctx, cpuctx);
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	717	}
				718
Ingo Molnar	7995888	2008-12-17 08:54:56 +0100	[diff] [blame]	719	static int
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	720	group_sched_in(struct perf_counter *group_counter,
				721	struct perf_cpu_context *cpuctx,
				722	struct perf_counter_context *ctx,
				723	int cpu)
				724	{
Ingo Molnar	95cdd2e	2008-12-21 13:50:42 +0100	[diff] [blame]	725	struct perf_counter counter, partial_group;
Paul Mackerras	3cbed42	2009-01-09 16:43:42 +1100	[diff] [blame]	726	int ret;
				727
				728	if (group_counter->state == PERF_COUNTER_STATE_OFF)
				729	return 0;
				730
				731	ret = hw_perf_group_sched_in(group_counter, cpuctx, ctx, cpu);
				732	if (ret)
				733	return ret < 0 ? ret : 0;
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	734
Ingo Molnar	95cdd2e	2008-12-21 13:50:42 +0100	[diff] [blame]	735	if (counter_sched_in(group_counter, cpuctx, ctx, cpu))
				736	return -EAGAIN;
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	737
				738	/*
				739	* Schedule in siblings as one group (if any):
				740	*/
Ingo Molnar	7995888	2008-12-17 08:54:56 +0100	[diff] [blame]	741	list_for_each_entry(counter, &group_counter->sibling_list, list_entry) {
Ingo Molnar	95cdd2e	2008-12-21 13:50:42 +0100	[diff] [blame]	742	if (counter_sched_in(counter, cpuctx, ctx, cpu)) {
				743	partial_group = counter;
				744	goto group_error;
				745	}
Ingo Molnar	7995888	2008-12-17 08:54:56 +0100	[diff] [blame]	746	}
				747
Paul Mackerras	3cbed42	2009-01-09 16:43:42 +1100	[diff] [blame]	748	return 0;
Ingo Molnar	95cdd2e	2008-12-21 13:50:42 +0100	[diff] [blame]	749
				750	group_error:
				751	/*
				752	* Groups can be scheduled in as one unit only, so undo any
				753	* partial group before returning:
				754	*/
				755	list_for_each_entry(counter, &group_counter->sibling_list, list_entry) {
				756	if (counter == partial_group)
				757	break;
				758	counter_sched_out(counter, cpuctx, ctx);
				759	}
				760	counter_sched_out(group_counter, cpuctx, ctx);
				761
				762	return -EAGAIN;
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	763	}
				764
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	765	static void
				766	__perf_counter_sched_in(struct perf_counter_context *ctx,
				767	struct perf_cpu_context *cpuctx, int cpu)
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	768	{
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	769	struct perf_counter *counter;
Paul Mackerras	3cbed42	2009-01-09 16:43:42 +1100	[diff] [blame]	770	u64 flags;
Paul Mackerras	dd0e6ba	2009-01-12 15:11:00 +1100	[diff] [blame]	771	int can_add_hw = 1;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	772
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	773	spin_lock(&ctx->lock);
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	774	ctx->is_active = 1;
				775	if (likely(!ctx->nr_counters))
				776	goto out;
				777
Paul Mackerras	3cbed42	2009-01-09 16:43:42 +1100	[diff] [blame]	778	flags = hw_perf_save_disable();
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	779
				780	/*
				781	* First go through the list and put on any pinned groups
				782	* in order to give them the best chance of going on.
				783	*/
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	784	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	785	if (counter->state <= PERF_COUNTER_STATE_OFF \|\|
				786	!counter->hw_event.pinned)
				787	continue;
				788	if (counter->cpu != -1 && counter->cpu != cpu)
				789	continue;
				790
				791	if (group_can_go_on(counter, cpuctx, 1))
				792	group_sched_in(counter, cpuctx, ctx, cpu);
				793
				794	/*
				795	* If this pinned group hasn't been scheduled,
				796	* put it in error state.
				797	*/
				798	if (counter->state == PERF_COUNTER_STATE_INACTIVE)
				799	counter->state = PERF_COUNTER_STATE_ERROR;
				800	}
				801
				802	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
				803	/*
				804	* Ignore counters in OFF or ERROR state, and
				805	* ignore pinned counters since we did them already.
				806	*/
				807	if (counter->state <= PERF_COUNTER_STATE_OFF \|\|
				808	counter->hw_event.pinned)
				809	continue;
				810
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	811	/*
				812	* Listen to the 'cpu' scheduling filter constraint
				813	* of counters:
				814	*/
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	815	if (counter->cpu != -1 && counter->cpu != cpu)
				816	continue;
				817
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	818	if (group_can_go_on(counter, cpuctx, can_add_hw)) {
Paul Mackerras	dd0e6ba	2009-01-12 15:11:00 +1100	[diff] [blame]	819	if (group_sched_in(counter, cpuctx, ctx, cpu))
				820	can_add_hw = 0;
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	821	}
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	822	}
Paul Mackerras	3cbed42	2009-01-09 16:43:42 +1100	[diff] [blame]	823	hw_perf_restore(flags);
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	824	out:
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	825	spin_unlock(&ctx->lock);
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	826	}
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	827
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	828	/*
				829	* Called from scheduler to add the counters of the current task
				830	* with interrupts disabled.
				831	*
				832	* We restore the counter value and then enable it.
				833	*
				834	* This does not protect us against NMI, but enable()
				835	* sets the enabled bit in the control field of counter _before_
				836	* accessing the counter control register. If a NMI hits, then it will
				837	* keep the counter running.
				838	*/
				839	void perf_counter_task_sched_in(struct task_struct *task, int cpu)
				840	{
				841	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
				842	struct perf_counter_context *ctx = &task->perf_counter_ctx;
				843
				844	__perf_counter_sched_in(ctx, cpuctx, cpu);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	845	cpuctx->task_ctx = ctx;
				846	}
				847
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	848	static void perf_counter_cpu_sched_in(struct perf_cpu_context *cpuctx, int cpu)
				849	{
				850	struct perf_counter_context *ctx = &cpuctx->ctx;
				851
				852	__perf_counter_sched_in(ctx, cpuctx, cpu);
				853	}
				854
Ingo Molnar	1d1c7dd	2008-12-11 14:59:31 +0100	[diff] [blame]	855	int perf_counter_task_disable(void)
				856	{
				857	struct task_struct *curr = current;
				858	struct perf_counter_context *ctx = &curr->perf_counter_ctx;
				859	struct perf_counter *counter;
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	860	unsigned long flags;
Ingo Molnar	1d1c7dd	2008-12-11 14:59:31 +0100	[diff] [blame]	861	u64 perf_flags;
				862	int cpu;
				863
				864	if (likely(!ctx->nr_counters))
				865	return 0;
				866
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	867	curr_rq_lock_irq_save(&flags);
Ingo Molnar	1d1c7dd	2008-12-11 14:59:31 +0100	[diff] [blame]	868	cpu = smp_processor_id();
				869
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	870	/* force the update of the task clock: */
				871	__task_delta_exec(curr, 1);
				872
Ingo Molnar	1d1c7dd	2008-12-11 14:59:31 +0100	[diff] [blame]	873	perf_counter_task_sched_out(curr, cpu);
				874
				875	spin_lock(&ctx->lock);
				876
				877	/*
				878	* Disable all the counters:
				879	*/
				880	perf_flags = hw_perf_save_disable();
				881
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	882	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
				883	if (counter->state != PERF_COUNTER_STATE_ERROR)
				884	counter->state = PERF_COUNTER_STATE_OFF;
				885	}
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	886
Ingo Molnar	1d1c7dd	2008-12-11 14:59:31 +0100	[diff] [blame]	887	hw_perf_restore(perf_flags);
				888
				889	spin_unlock(&ctx->lock);
				890
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	891	curr_rq_unlock_irq_restore(&flags);
Ingo Molnar	1d1c7dd	2008-12-11 14:59:31 +0100	[diff] [blame]	892
				893	return 0;
				894	}
				895
				896	int perf_counter_task_enable(void)
				897	{
				898	struct task_struct *curr = current;
				899	struct perf_counter_context *ctx = &curr->perf_counter_ctx;
				900	struct perf_counter *counter;
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	901	unsigned long flags;
Ingo Molnar	1d1c7dd	2008-12-11 14:59:31 +0100	[diff] [blame]	902	u64 perf_flags;
				903	int cpu;
				904
				905	if (likely(!ctx->nr_counters))
				906	return 0;
				907
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	908	curr_rq_lock_irq_save(&flags);
Ingo Molnar	1d1c7dd	2008-12-11 14:59:31 +0100	[diff] [blame]	909	cpu = smp_processor_id();
				910
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	911	/* force the update of the task clock: */
				912	__task_delta_exec(curr, 1);
				913
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	914	perf_counter_task_sched_out(curr, cpu);
				915
Ingo Molnar	1d1c7dd	2008-12-11 14:59:31 +0100	[diff] [blame]	916	spin_lock(&ctx->lock);
				917
				918	/*
				919	* Disable all the counters:
				920	*/
				921	perf_flags = hw_perf_save_disable();
				922
				923	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	924	if (counter->state > PERF_COUNTER_STATE_OFF)
Ingo Molnar	1d1c7dd	2008-12-11 14:59:31 +0100	[diff] [blame]	925	continue;
Ingo Molnar	6a93070	2008-12-11 15:17:03 +0100	[diff] [blame]	926	counter->state = PERF_COUNTER_STATE_INACTIVE;
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	927	counter->hw_event.disabled = 0;
Ingo Molnar	1d1c7dd	2008-12-11 14:59:31 +0100	[diff] [blame]	928	}
				929	hw_perf_restore(perf_flags);
				930
				931	spin_unlock(&ctx->lock);
				932
				933	perf_counter_task_sched_in(curr, cpu);
				934
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	935	curr_rq_unlock_irq_restore(&flags);
Ingo Molnar	1d1c7dd	2008-12-11 14:59:31 +0100	[diff] [blame]	936
				937	return 0;
				938	}
				939
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	940	/*
				941	* Round-robin a context's counters:
				942	*/
				943	static void rotate_ctx(struct perf_counter_context *ctx)
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	944	{
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	945	struct perf_counter *counter;
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	946	u64 perf_flags;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	947
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	948	if (!ctx->nr_counters)
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	949	return;
				950
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	951	spin_lock(&ctx->lock);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	952	/*
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	953	* Rotate the first entry last (works just fine for group counters too):
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	954	*/
Ingo Molnar	01b2838	2008-12-11 13:45:51 +0100	[diff] [blame]	955	perf_flags = hw_perf_save_disable();
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	956	list_for_each_entry(counter, &ctx->counter_list, list_entry) {
				957	list_del(&counter->list_entry);
				958	list_add_tail(&counter->list_entry, &ctx->counter_list);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	959	break;
				960	}
Ingo Molnar	01b2838	2008-12-11 13:45:51 +0100	[diff] [blame]	961	hw_perf_restore(perf_flags);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	962
				963	spin_unlock(&ctx->lock);
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	964	}
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	965
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	966	void perf_counter_task_tick(struct task_struct *curr, int cpu)
				967	{
				968	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
				969	struct perf_counter_context *ctx = &curr->perf_counter_ctx;
				970	const int rotate_percpu = 0;
				971
				972	if (rotate_percpu)
				973	perf_counter_cpu_sched_out(cpuctx);
				974	perf_counter_task_sched_out(curr, cpu);
				975
				976	if (rotate_percpu)
				977	rotate_ctx(&cpuctx->ctx);
				978	rotate_ctx(ctx);
				979
				980	if (rotate_percpu)
				981	perf_counter_cpu_sched_in(cpuctx, cpu);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	982	perf_counter_task_sched_in(curr, cpu);
				983	}
				984
				985	/*
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	986	* Cross CPU call to read the hardware counter
				987	*/
Ingo Molnar	7671581	2008-12-17 14:20:28 +0100	[diff] [blame]	988	static void __read(void *info)
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	989	{
Ingo Molnar	621a01e	2008-12-11 12:46:46 +0100	[diff] [blame]	990	struct perf_counter *counter = info;
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	991	unsigned long flags;
Ingo Molnar	621a01e	2008-12-11 12:46:46 +0100	[diff] [blame]	992
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	993	curr_rq_lock_irq_save(&flags);
Ingo Molnar	7671581	2008-12-17 14:20:28 +0100	[diff] [blame]	994	counter->hw_ops->read(counter);
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	995	curr_rq_unlock_irq_restore(&flags);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	996	}
				997
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	998	static u64 perf_counter_read(struct perf_counter *counter)
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	999	{
				1000	/*
				1001	* If counter is enabled and currently active on a CPU, update the
				1002	* value in the counter structure:
				1003	*/
Ingo Molnar	6a93070	2008-12-11 15:17:03 +0100	[diff] [blame]	1004	if (counter->state == PERF_COUNTER_STATE_ACTIVE) {
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1005	smp_call_function_single(counter->oncpu,
Ingo Molnar	7671581	2008-12-17 14:20:28 +0100	[diff] [blame]	1006	__read, counter, 1);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1007	}
				1008
Ingo Molnar	ee06094	2008-12-13 09:00:03 +0100	[diff] [blame]	1009	return atomic64_read(&counter->count);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1010	}
				1011
				1012	/*
				1013	* Cross CPU call to switch performance data pointers
				1014	*/
				1015	static void __perf_switch_irq_data(void *info)
				1016	{
				1017	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
				1018	struct perf_counter *counter = info;
				1019	struct perf_counter_context *ctx = counter->ctx;
				1020	struct perf_data *oldirqdata = counter->irqdata;
				1021
				1022	/*
				1023	* If this is a task context, we need to check whether it is
				1024	* the current task context of this cpu. If not it has been
				1025	* scheduled out before the smp call arrived.
				1026	*/
				1027	if (ctx->task) {
				1028	if (cpuctx->task_ctx != ctx)
				1029	return;
				1030	spin_lock(&ctx->lock);
				1031	}
				1032
				1033	/* Change the pointer NMI safe */
				1034	atomic_long_set((atomic_long_t *)&counter->irqdata,
				1035	(unsigned long) counter->usrdata);
				1036	counter->usrdata = oldirqdata;
				1037
				1038	if (ctx->task)
				1039	spin_unlock(&ctx->lock);
				1040	}
				1041
				1042	static struct perf_data perf_switch_irq_data(struct perf_counter counter)
				1043	{
				1044	struct perf_counter_context *ctx = counter->ctx;
				1045	struct perf_data *oldirqdata = counter->irqdata;
				1046	struct task_struct *task = ctx->task;
				1047
				1048	if (!task) {
				1049	smp_call_function_single(counter->cpu,
				1050	__perf_switch_irq_data,
				1051	counter, 1);
				1052	return counter->usrdata;
				1053	}
				1054
				1055	retry:
				1056	spin_lock_irq(&ctx->lock);
Ingo Molnar	6a93070	2008-12-11 15:17:03 +0100	[diff] [blame]	1057	if (counter->state != PERF_COUNTER_STATE_ACTIVE) {
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1058	counter->irqdata = counter->usrdata;
				1059	counter->usrdata = oldirqdata;
				1060	spin_unlock_irq(&ctx->lock);
				1061	return oldirqdata;
				1062	}
				1063	spin_unlock_irq(&ctx->lock);
				1064	task_oncpu_function_call(task, __perf_switch_irq_data, counter);
				1065	/* Might have failed, because task was scheduled out */
				1066	if (counter->irqdata == oldirqdata)
				1067	goto retry;
				1068
				1069	return counter->usrdata;
				1070	}
				1071
				1072	static void put_context(struct perf_counter_context *ctx)
				1073	{
				1074	if (ctx->task)
				1075	put_task_struct(ctx->task);
				1076	}
				1077
				1078	static struct perf_counter_context *find_get_context(pid_t pid, int cpu)
				1079	{
				1080	struct perf_cpu_context *cpuctx;
				1081	struct perf_counter_context *ctx;
				1082	struct task_struct *task;
				1083
				1084	/*
				1085	* If cpu is not a wildcard then this is a percpu counter:
				1086	*/
				1087	if (cpu != -1) {
				1088	/* Must be root to operate on a CPU counter: */
				1089	if (!capable(CAP_SYS_ADMIN))
				1090	return ERR_PTR(-EACCES);
				1091
				1092	if (cpu < 0 \|\| cpu > num_possible_cpus())
				1093	return ERR_PTR(-EINVAL);
				1094
				1095	/*
				1096	* We could be clever and allow to attach a counter to an
				1097	* offline CPU and activate it when the CPU comes up, but
				1098	* that's for later.
				1099	*/
				1100	if (!cpu_isset(cpu, cpu_online_map))
				1101	return ERR_PTR(-ENODEV);
				1102
				1103	cpuctx = &per_cpu(perf_cpu_context, cpu);
				1104	ctx = &cpuctx->ctx;
				1105
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1106	return ctx;
				1107	}
				1108
				1109	rcu_read_lock();
				1110	if (!pid)
				1111	task = current;
				1112	else
				1113	task = find_task_by_vpid(pid);
				1114	if (task)
				1115	get_task_struct(task);
				1116	rcu_read_unlock();
				1117
				1118	if (!task)
				1119	return ERR_PTR(-ESRCH);
				1120
				1121	ctx = &task->perf_counter_ctx;
				1122	ctx->task = task;
				1123
				1124	/* Reuse ptrace permission checks for now. */
				1125	if (!ptrace_may_access(task, PTRACE_MODE_READ)) {
				1126	put_context(ctx);
				1127	return ERR_PTR(-EACCES);
				1128	}
				1129
				1130	return ctx;
				1131	}
				1132
				1133	/*
				1134	* Called when the last reference to the file is gone.
				1135	*/
				1136	static int perf_release(struct inode inode, struct file file)
				1137	{
				1138	struct perf_counter *counter = file->private_data;
				1139	struct perf_counter_context *ctx = counter->ctx;
				1140
				1141	file->private_data = NULL;
				1142
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	1143	mutex_lock(&ctx->mutex);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1144	mutex_lock(&counter->mutex);
				1145
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1146	perf_counter_remove_from_context(counter);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1147	put_context(ctx);
				1148
				1149	mutex_unlock(&counter->mutex);
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	1150	mutex_unlock(&ctx->mutex);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1151
				1152	kfree(counter);
				1153
				1154	return 0;
				1155	}
				1156
				1157	/*
				1158	* Read the performance counter - simple non blocking version for now
				1159	*/
				1160	static ssize_t
				1161	perf_read_hw(struct perf_counter counter, char __user buf, size_t count)
				1162	{
				1163	u64 cntval;
				1164
				1165	if (count != sizeof(cntval))
				1166	return -EINVAL;
				1167
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	1168	/*
				1169	* Return end-of-file for a read on a counter that is in
				1170	* error state (i.e. because it was pinned but it couldn't be
				1171	* scheduled on to the CPU at some point).
				1172	*/
				1173	if (counter->state == PERF_COUNTER_STATE_ERROR)
				1174	return 0;
				1175
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1176	mutex_lock(&counter->mutex);
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1177	cntval = perf_counter_read(counter);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1178	mutex_unlock(&counter->mutex);
				1179
				1180	return put_user(cntval, (u64 __user *) buf) ? -EFAULT : sizeof(cntval);
				1181	}
				1182
				1183	static ssize_t
				1184	perf_copy_usrdata(struct perf_data usrdata, char __user buf, size_t count)
				1185	{
				1186	if (!usrdata->len)
				1187	return 0;
				1188
				1189	count = min(count, (size_t)usrdata->len);
				1190	if (copy_to_user(buf, usrdata->data + usrdata->rd_idx, count))
				1191	return -EFAULT;
				1192
				1193	/* Adjust the counters */
				1194	usrdata->len -= count;
				1195	if (!usrdata->len)
				1196	usrdata->rd_idx = 0;
				1197	else
				1198	usrdata->rd_idx += count;
				1199
				1200	return count;
				1201	}
				1202
				1203	static ssize_t
				1204	perf_read_irq_data(struct perf_counter *counter,
				1205	char __user *buf,
				1206	size_t count,
				1207	int nonblocking)
				1208	{
				1209	struct perf_data irqdata, usrdata;
				1210	DECLARE_WAITQUEUE(wait, current);
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	1211	ssize_t res, res2;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1212
				1213	irqdata = counter->irqdata;
				1214	usrdata = counter->usrdata;
				1215
				1216	if (usrdata->len + irqdata->len >= count)
				1217	goto read_pending;
				1218
				1219	if (nonblocking)
				1220	return -EAGAIN;
				1221
				1222	spin_lock_irq(&counter->waitq.lock);
				1223	__add_wait_queue(&counter->waitq, &wait);
				1224	for (;;) {
				1225	set_current_state(TASK_INTERRUPTIBLE);
				1226	if (usrdata->len + irqdata->len >= count)
				1227	break;
				1228
				1229	if (signal_pending(current))
				1230	break;
				1231
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	1232	if (counter->state == PERF_COUNTER_STATE_ERROR)
				1233	break;
				1234
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1235	spin_unlock_irq(&counter->waitq.lock);
				1236	schedule();
				1237	spin_lock_irq(&counter->waitq.lock);
				1238	}
				1239	__remove_wait_queue(&counter->waitq, &wait);
				1240	__set_current_state(TASK_RUNNING);
				1241	spin_unlock_irq(&counter->waitq.lock);
				1242
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	1243	if (usrdata->len + irqdata->len < count &&
				1244	counter->state != PERF_COUNTER_STATE_ERROR)
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1245	return -ERESTARTSYS;
				1246	read_pending:
				1247	mutex_lock(&counter->mutex);
				1248
				1249	/* Drain pending data first: */
				1250	res = perf_copy_usrdata(usrdata, buf, count);
				1251	if (res < 0 \|\| res == count)
				1252	goto out;
				1253
				1254	/* Switch irq buffer: */
				1255	usrdata = perf_switch_irq_data(counter);
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	1256	res2 = perf_copy_usrdata(usrdata, buf + res, count - res);
				1257	if (res2 < 0) {
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1258	if (!res)
				1259	res = -EFAULT;
				1260	} else {
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	1261	res += res2;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1262	}
				1263	out:
				1264	mutex_unlock(&counter->mutex);
				1265
				1266	return res;
				1267	}
				1268
				1269	static ssize_t
				1270	perf_read(struct file file, char __user buf, size_t count, loff_t *ppos)
				1271	{
				1272	struct perf_counter *counter = file->private_data;
				1273
Ingo Molnar	9f66a38	2008-12-10 12:33:23 +0100	[diff] [blame]	1274	switch (counter->hw_event.record_type) {
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1275	case PERF_RECORD_SIMPLE:
				1276	return perf_read_hw(counter, buf, count);
				1277
				1278	case PERF_RECORD_IRQ:
				1279	case PERF_RECORD_GROUP:
				1280	return perf_read_irq_data(counter, buf, count,
				1281	file->f_flags & O_NONBLOCK);
				1282	}
				1283	return -EINVAL;
				1284	}
				1285
				1286	static unsigned int perf_poll(struct file file, poll_table wait)
				1287	{
				1288	struct perf_counter *counter = file->private_data;
				1289	unsigned int events = 0;
				1290	unsigned long flags;
				1291
				1292	poll_wait(file, &counter->waitq, wait);
				1293
				1294	spin_lock_irqsave(&counter->waitq.lock, flags);
				1295	if (counter->usrdata->len \|\| counter->irqdata->len)
				1296	events \|= POLLIN;
				1297	spin_unlock_irqrestore(&counter->waitq.lock, flags);
				1298
				1299	return events;
				1300	}
				1301
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	1302	static long perf_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
				1303	{
				1304	struct perf_counter *counter = file->private_data;
				1305	int err = 0;
				1306
				1307	switch (cmd) {
				1308	case PERF_COUNTER_IOC_ENABLE:
				1309	perf_counter_enable_family(counter);
				1310	break;
				1311	case PERF_COUNTER_IOC_DISABLE:
				1312	perf_counter_disable_family(counter);
				1313	break;
				1314	default:
				1315	err = -ENOTTY;
				1316	}
				1317	return err;
				1318	}
				1319
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1320	static const struct file_operations perf_fops = {
				1321	.release = perf_release,
				1322	.read = perf_read,
				1323	.poll = perf_poll,
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	1324	.unlocked_ioctl = perf_ioctl,
				1325	.compat_ioctl = perf_ioctl,
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1326	};
				1327
Ingo Molnar	95cdd2e	2008-12-21 13:50:42 +0100	[diff] [blame]	1328	static int cpu_clock_perf_counter_enable(struct perf_counter *counter)
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	1329	{
Paul Mackerras	9abf8a0	2009-01-09 16:26:43 +1100	[diff] [blame]	1330	int cpu = raw_smp_processor_id();
				1331
				1332	atomic64_set(&counter->hw.prev_count, cpu_clock(cpu));
Ingo Molnar	95cdd2e	2008-12-21 13:50:42 +0100	[diff] [blame]	1333	return 0;
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	1334	}
				1335
Paul Mackerras	9abf8a0	2009-01-09 16:26:43 +1100	[diff] [blame]	1336	static void cpu_clock_perf_counter_update(struct perf_counter *counter)
				1337	{
				1338	int cpu = raw_smp_processor_id();
				1339	s64 prev;
				1340	u64 now;
				1341
				1342	now = cpu_clock(cpu);
				1343	prev = atomic64_read(&counter->hw.prev_count);
				1344	atomic64_set(&counter->hw.prev_count, now);
				1345	atomic64_add(now - prev, &counter->count);
				1346	}
				1347
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	1348	static void cpu_clock_perf_counter_disable(struct perf_counter *counter)
				1349	{
Paul Mackerras	9abf8a0	2009-01-09 16:26:43 +1100	[diff] [blame]	1350	cpu_clock_perf_counter_update(counter);
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	1351	}
				1352
				1353	static void cpu_clock_perf_counter_read(struct perf_counter *counter)
				1354	{
Paul Mackerras	9abf8a0	2009-01-09 16:26:43 +1100	[diff] [blame]	1355	cpu_clock_perf_counter_update(counter);
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	1356	}
				1357
				1358	static const struct hw_perf_counter_ops perf_ops_cpu_clock = {
Ingo Molnar	7671581	2008-12-17 14:20:28 +0100	[diff] [blame]	1359	.enable = cpu_clock_perf_counter_enable,
				1360	.disable = cpu_clock_perf_counter_disable,
				1361	.read = cpu_clock_perf_counter_read,
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	1362	};
				1363
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	1364	/*
				1365	* Called from within the scheduler:
				1366	*/
				1367	static u64 task_clock_perf_counter_val(struct perf_counter *counter, int update)
Ingo Molnar	bae43c9	2008-12-11 14:03:20 +0100	[diff] [blame]	1368	{
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	1369	struct task_struct *curr = counter->task;
				1370	u64 delta;
				1371
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	1372	delta = __task_delta_exec(curr, update);
				1373
				1374	return curr->se.sum_exec_runtime + delta;
				1375	}
				1376
				1377	static void task_clock_perf_counter_update(struct perf_counter *counter, u64 now)
				1378	{
				1379	u64 prev;
Ingo Molnar	8cb391e	2008-12-14 12:22:31 +0100	[diff] [blame]	1380	s64 delta;
Ingo Molnar	bae43c9	2008-12-11 14:03:20 +0100	[diff] [blame]	1381
Ingo Molnar	8cb391e	2008-12-14 12:22:31 +0100	[diff] [blame]	1382	prev = atomic64_read(&counter->hw.prev_count);
Ingo Molnar	8cb391e	2008-12-14 12:22:31 +0100	[diff] [blame]	1383
				1384	atomic64_set(&counter->hw.prev_count, now);
				1385
				1386	delta = now - prev;
Ingo Molnar	8cb391e	2008-12-14 12:22:31 +0100	[diff] [blame]	1387
				1388	atomic64_add(delta, &counter->count);
Ingo Molnar	bae43c9	2008-12-11 14:03:20 +0100	[diff] [blame]	1389	}
				1390
				1391	static void task_clock_perf_counter_read(struct perf_counter *counter)
				1392	{
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	1393	u64 now = task_clock_perf_counter_val(counter, 1);
				1394
				1395	task_clock_perf_counter_update(counter, now);
Ingo Molnar	8cb391e	2008-12-14 12:22:31 +0100	[diff] [blame]	1396	}
				1397
Ingo Molnar	95cdd2e	2008-12-21 13:50:42 +0100	[diff] [blame]	1398	static int task_clock_perf_counter_enable(struct perf_counter *counter)
Ingo Molnar	8cb391e	2008-12-14 12:22:31 +0100	[diff] [blame]	1399	{
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	1400	u64 now = task_clock_perf_counter_val(counter, 0);
				1401
				1402	atomic64_set(&counter->hw.prev_count, now);
Ingo Molnar	95cdd2e	2008-12-21 13:50:42 +0100	[diff] [blame]	1403
				1404	return 0;
Ingo Molnar	8cb391e	2008-12-14 12:22:31 +0100	[diff] [blame]	1405	}
				1406
				1407	static void task_clock_perf_counter_disable(struct perf_counter *counter)
				1408	{
Ingo Molnar	aa9c4c0	2008-12-17 14:10:57 +0100	[diff] [blame]	1409	u64 now = task_clock_perf_counter_val(counter, 0);
				1410
				1411	task_clock_perf_counter_update(counter, now);
Ingo Molnar	bae43c9	2008-12-11 14:03:20 +0100	[diff] [blame]	1412	}
				1413
				1414	static const struct hw_perf_counter_ops perf_ops_task_clock = {
Ingo Molnar	7671581	2008-12-17 14:20:28 +0100	[diff] [blame]	1415	.enable = task_clock_perf_counter_enable,
				1416	.disable = task_clock_perf_counter_disable,
				1417	.read = task_clock_perf_counter_read,
Ingo Molnar	bae43c9	2008-12-11 14:03:20 +0100	[diff] [blame]	1418	};
				1419
Ingo Molnar	e06c61a	2008-12-14 14:44:31 +0100	[diff] [blame]	1420	static u64 get_page_faults(void)
				1421	{
				1422	struct task_struct *curr = current;
				1423
				1424	return curr->maj_flt + curr->min_flt;
				1425	}
				1426
				1427	static void page_faults_perf_counter_update(struct perf_counter *counter)
				1428	{
				1429	u64 prev, now;
				1430	s64 delta;
				1431
				1432	prev = atomic64_read(&counter->hw.prev_count);
				1433	now = get_page_faults();
				1434
				1435	atomic64_set(&counter->hw.prev_count, now);
				1436
				1437	delta = now - prev;
Ingo Molnar	e06c61a	2008-12-14 14:44:31 +0100	[diff] [blame]	1438
				1439	atomic64_add(delta, &counter->count);
				1440	}
				1441
				1442	static void page_faults_perf_counter_read(struct perf_counter *counter)
				1443	{
				1444	page_faults_perf_counter_update(counter);
				1445	}
				1446
Ingo Molnar	95cdd2e	2008-12-21 13:50:42 +0100	[diff] [blame]	1447	static int page_faults_perf_counter_enable(struct perf_counter *counter)
Ingo Molnar	e06c61a	2008-12-14 14:44:31 +0100	[diff] [blame]	1448	{
				1449	/*
				1450	* page-faults is a per-task value already,
				1451	* so we dont have to clear it on switch-in.
				1452	*/
Ingo Molnar	95cdd2e	2008-12-21 13:50:42 +0100	[diff] [blame]	1453
				1454	return 0;
Ingo Molnar	e06c61a	2008-12-14 14:44:31 +0100	[diff] [blame]	1455	}
				1456
				1457	static void page_faults_perf_counter_disable(struct perf_counter *counter)
				1458	{
				1459	page_faults_perf_counter_update(counter);
				1460	}
				1461
				1462	static const struct hw_perf_counter_ops perf_ops_page_faults = {
Ingo Molnar	7671581	2008-12-17 14:20:28 +0100	[diff] [blame]	1463	.enable = page_faults_perf_counter_enable,
				1464	.disable = page_faults_perf_counter_disable,
				1465	.read = page_faults_perf_counter_read,
Ingo Molnar	e06c61a	2008-12-14 14:44:31 +0100	[diff] [blame]	1466	};
				1467
Ingo Molnar	5d6a27d	2008-12-14 12:28:33 +0100	[diff] [blame]	1468	static u64 get_context_switches(void)
				1469	{
				1470	struct task_struct *curr = current;
				1471
				1472	return curr->nvcsw + curr->nivcsw;
				1473	}
				1474
				1475	static void context_switches_perf_counter_update(struct perf_counter *counter)
				1476	{
				1477	u64 prev, now;
				1478	s64 delta;
				1479
				1480	prev = atomic64_read(&counter->hw.prev_count);
				1481	now = get_context_switches();
				1482
				1483	atomic64_set(&counter->hw.prev_count, now);
				1484
				1485	delta = now - prev;
Ingo Molnar	5d6a27d	2008-12-14 12:28:33 +0100	[diff] [blame]	1486
				1487	atomic64_add(delta, &counter->count);
				1488	}
				1489
				1490	static void context_switches_perf_counter_read(struct perf_counter *counter)
				1491	{
				1492	context_switches_perf_counter_update(counter);
				1493	}
				1494
Ingo Molnar	95cdd2e	2008-12-21 13:50:42 +0100	[diff] [blame]	1495	static int context_switches_perf_counter_enable(struct perf_counter *counter)
Ingo Molnar	5d6a27d	2008-12-14 12:28:33 +0100	[diff] [blame]	1496	{
				1497	/*
				1498	* ->nvcsw + curr->nivcsw is a per-task value already,
				1499	* so we dont have to clear it on switch-in.
				1500	*/
Ingo Molnar	95cdd2e	2008-12-21 13:50:42 +0100	[diff] [blame]	1501
				1502	return 0;
Ingo Molnar	5d6a27d	2008-12-14 12:28:33 +0100	[diff] [blame]	1503	}
				1504
				1505	static void context_switches_perf_counter_disable(struct perf_counter *counter)
				1506	{
				1507	context_switches_perf_counter_update(counter);
				1508	}
				1509
				1510	static const struct hw_perf_counter_ops perf_ops_context_switches = {
Ingo Molnar	7671581	2008-12-17 14:20:28 +0100	[diff] [blame]	1511	.enable = context_switches_perf_counter_enable,
				1512	.disable = context_switches_perf_counter_disable,
				1513	.read = context_switches_perf_counter_read,
Ingo Molnar	5d6a27d	2008-12-14 12:28:33 +0100	[diff] [blame]	1514	};
				1515
Ingo Molnar	6c594c2	2008-12-14 12:34:15 +0100	[diff] [blame]	1516	static inline u64 get_cpu_migrations(void)
				1517	{
				1518	return current->se.nr_migrations;
				1519	}
				1520
				1521	static void cpu_migrations_perf_counter_update(struct perf_counter *counter)
				1522	{
				1523	u64 prev, now;
				1524	s64 delta;
				1525
				1526	prev = atomic64_read(&counter->hw.prev_count);
				1527	now = get_cpu_migrations();
				1528
				1529	atomic64_set(&counter->hw.prev_count, now);
				1530
				1531	delta = now - prev;
Ingo Molnar	6c594c2	2008-12-14 12:34:15 +0100	[diff] [blame]	1532
				1533	atomic64_add(delta, &counter->count);
				1534	}
				1535
				1536	static void cpu_migrations_perf_counter_read(struct perf_counter *counter)
				1537	{
				1538	cpu_migrations_perf_counter_update(counter);
				1539	}
				1540
Ingo Molnar	95cdd2e	2008-12-21 13:50:42 +0100	[diff] [blame]	1541	static int cpu_migrations_perf_counter_enable(struct perf_counter *counter)
Ingo Molnar	6c594c2	2008-12-14 12:34:15 +0100	[diff] [blame]	1542	{
				1543	/*
				1544	* se.nr_migrations is a per-task value already,
				1545	* so we dont have to clear it on switch-in.
				1546	*/
Ingo Molnar	95cdd2e	2008-12-21 13:50:42 +0100	[diff] [blame]	1547
				1548	return 0;
Ingo Molnar	6c594c2	2008-12-14 12:34:15 +0100	[diff] [blame]	1549	}
				1550
				1551	static void cpu_migrations_perf_counter_disable(struct perf_counter *counter)
				1552	{
				1553	cpu_migrations_perf_counter_update(counter);
				1554	}
				1555
				1556	static const struct hw_perf_counter_ops perf_ops_cpu_migrations = {
Ingo Molnar	7671581	2008-12-17 14:20:28 +0100	[diff] [blame]	1557	.enable = cpu_migrations_perf_counter_enable,
				1558	.disable = cpu_migrations_perf_counter_disable,
				1559	.read = cpu_migrations_perf_counter_read,
Ingo Molnar	6c594c2	2008-12-14 12:34:15 +0100	[diff] [blame]	1560	};
				1561
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	1562	static const struct hw_perf_counter_ops *
				1563	sw_perf_counter_init(struct perf_counter *counter)
				1564	{
				1565	const struct hw_perf_counter_ops *hw_ops = NULL;
				1566
				1567	switch (counter->hw_event.type) {
				1568	case PERF_COUNT_CPU_CLOCK:
				1569	hw_ops = &perf_ops_cpu_clock;
				1570	break;
Ingo Molnar	bae43c9	2008-12-11 14:03:20 +0100	[diff] [blame]	1571	case PERF_COUNT_TASK_CLOCK:
				1572	hw_ops = &perf_ops_task_clock;
				1573	break;
Ingo Molnar	e06c61a	2008-12-14 14:44:31 +0100	[diff] [blame]	1574	case PERF_COUNT_PAGE_FAULTS:
				1575	hw_ops = &perf_ops_page_faults;
				1576	break;
Ingo Molnar	5d6a27d	2008-12-14 12:28:33 +0100	[diff] [blame]	1577	case PERF_COUNT_CONTEXT_SWITCHES:
				1578	hw_ops = &perf_ops_context_switches;
				1579	break;
Ingo Molnar	6c594c2	2008-12-14 12:34:15 +0100	[diff] [blame]	1580	case PERF_COUNT_CPU_MIGRATIONS:
				1581	hw_ops = &perf_ops_cpu_migrations;
				1582	break;
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	1583	default:
				1584	break;
				1585	}
				1586	return hw_ops;
				1587	}
				1588
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1589	/*
				1590	* Allocate and initialize a counter structure
				1591	*/
				1592	static struct perf_counter *
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1593	perf_counter_alloc(struct perf_counter_hw_event *hw_event,
				1594	int cpu,
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1595	struct perf_counter *group_leader,
				1596	gfp_t gfpflags)
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1597	{
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	1598	const struct hw_perf_counter_ops *hw_ops;
Ingo Molnar	621a01e	2008-12-11 12:46:46 +0100	[diff] [blame]	1599	struct perf_counter *counter;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1600
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1601	counter = kzalloc(sizeof(*counter), gfpflags);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1602	if (!counter)
				1603	return NULL;
				1604
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1605	/*
				1606	* Single counters are their own group leaders, with an
				1607	* empty sibling list:
				1608	*/
				1609	if (!group_leader)
				1610	group_leader = counter;
				1611
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1612	mutex_init(&counter->mutex);
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1613	INIT_LIST_HEAD(&counter->list_entry);
				1614	INIT_LIST_HEAD(&counter->sibling_list);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1615	init_waitqueue_head(&counter->waitq);
				1616
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	1617	INIT_LIST_HEAD(&counter->child_list);
				1618
Ingo Molnar	9f66a38	2008-12-10 12:33:23 +0100	[diff] [blame]	1619	counter->irqdata = &counter->data[0];
				1620	counter->usrdata = &counter->data[1];
				1621	counter->cpu = cpu;
				1622	counter->hw_event = *hw_event;
				1623	counter->wakeup_pending = 0;
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1624	counter->group_leader = group_leader;
Ingo Molnar	621a01e	2008-12-11 12:46:46 +0100	[diff] [blame]	1625	counter->hw_ops = NULL;
				1626
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	1627	counter->state = PERF_COUNTER_STATE_INACTIVE;
Ingo Molnar	a86ed50	2008-12-17 00:43:10 +0100	[diff] [blame]	1628	if (hw_event->disabled)
				1629	counter->state = PERF_COUNTER_STATE_OFF;
				1630
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	1631	hw_ops = NULL;
				1632	if (!hw_event->raw && hw_event->type < 0)
				1633	hw_ops = sw_perf_counter_init(counter);
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1634	if (!hw_ops)
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	1635	hw_ops = hw_perf_counter_init(counter);
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	1636
Ingo Molnar	621a01e	2008-12-11 12:46:46 +0100	[diff] [blame]	1637	if (!hw_ops) {
				1638	kfree(counter);
				1639	return NULL;
				1640	}
				1641	counter->hw_ops = hw_ops;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1642
				1643	return counter;
				1644	}
				1645
				1646	/**
Ingo Molnar	9f66a38	2008-12-10 12:33:23 +0100	[diff] [blame]	1647	* sys_perf_task_open - open a performance counter, associate it to a task/cpu
				1648	*
				1649	* @hw_event_uptr: event type attributes for monitoring/sampling
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1650	* @pid: target pid
Ingo Molnar	9f66a38	2008-12-10 12:33:23 +0100	[diff] [blame]	1651	* @cpu: target cpu
				1652	* @group_fd: group leader counter fd
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1653	*/
Ingo Molnar	1d1c7dd	2008-12-11 14:59:31 +0100	[diff] [blame]	1654	asmlinkage int
				1655	sys_perf_counter_open(struct perf_counter_hw_event *hw_event_uptr __user,
				1656	pid_t pid, int cpu, int group_fd)
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1657	{
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1658	struct perf_counter counter, group_leader;
Ingo Molnar	9f66a38	2008-12-10 12:33:23 +0100	[diff] [blame]	1659	struct perf_counter_hw_event hw_event;
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1660	struct perf_counter_context *ctx;
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1661	struct file *counter_file = NULL;
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1662	struct file *group_file = NULL;
				1663	int fput_needed = 0;
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1664	int fput_needed2 = 0;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1665	int ret;
				1666
Ingo Molnar	9f66a38	2008-12-10 12:33:23 +0100	[diff] [blame]	1667	if (copy_from_user(&hw_event, hw_event_uptr, sizeof(hw_event)) != 0)
Thomas Gleixner	eab656a	2008-12-08 19:26:59 +0100	[diff] [blame]	1668	return -EFAULT;
				1669
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1670	/*
Ingo Molnar	ccff286	2008-12-11 11:26:29 +0100	[diff] [blame]	1671	* Get the target context (task or percpu):
				1672	*/
				1673	ctx = find_get_context(pid, cpu);
				1674	if (IS_ERR(ctx))
				1675	return PTR_ERR(ctx);
				1676
				1677	/*
				1678	* Look up the group leader (we will attach this counter to it):
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1679	*/
				1680	group_leader = NULL;
				1681	if (group_fd != -1) {
				1682	ret = -EINVAL;
				1683	group_file = fget_light(group_fd, &fput_needed);
				1684	if (!group_file)
Ingo Molnar	ccff286	2008-12-11 11:26:29 +0100	[diff] [blame]	1685	goto err_put_context;
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1686	if (group_file->f_op != &perf_fops)
Ingo Molnar	ccff286	2008-12-11 11:26:29 +0100	[diff] [blame]	1687	goto err_put_context;
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1688
				1689	group_leader = group_file->private_data;
				1690	/*
Ingo Molnar	ccff286	2008-12-11 11:26:29 +0100	[diff] [blame]	1691	* Do not allow a recursive hierarchy (this new sibling
				1692	* becoming part of another group-sibling):
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1693	*/
Ingo Molnar	ccff286	2008-12-11 11:26:29 +0100	[diff] [blame]	1694	if (group_leader->group_leader != group_leader)
				1695	goto err_put_context;
				1696	/*
				1697	* Do not allow to attach to a group in a different
				1698	* task or CPU context:
				1699	*/
				1700	if (group_leader->ctx != ctx)
				1701	goto err_put_context;
Paul Mackerras	3b6f9e5	2009-01-14 21:00:30 +1100	[diff] [blame]	1702	/*
				1703	* Only a group leader can be exclusive or pinned
				1704	*/
				1705	if (hw_event.exclusive \|\| hw_event.pinned)
				1706	goto err_put_context;
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1707	}
				1708
Ingo Molnar	5c92d12	2008-12-11 13:21:10 +0100	[diff] [blame]	1709	ret = -EINVAL;
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1710	counter = perf_counter_alloc(&hw_event, cpu, group_leader, GFP_KERNEL);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1711	if (!counter)
				1712	goto err_put_context;
				1713
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1714	ret = anon_inode_getfd("[perf_counter]", &perf_fops, counter, 0);
				1715	if (ret < 0)
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1716	goto err_free_put_context;
				1717
				1718	counter_file = fget_light(ret, &fput_needed2);
				1719	if (!counter_file)
				1720	goto err_free_put_context;
				1721
				1722	counter->filp = counter_file;
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	1723	mutex_lock(&ctx->mutex);
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1724	perf_install_in_context(ctx, counter, cpu);
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	1725	mutex_unlock(&ctx->mutex);
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1726
				1727	fput_light(counter_file, fput_needed2);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1728
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1729	out_fput:
				1730	fput_light(group_file, fput_needed);
				1731
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1732	return ret;
				1733
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1734	err_free_put_context:
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1735	kfree(counter);
				1736
				1737	err_put_context:
				1738	put_context(ctx);
				1739
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	1740	goto out_fput;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	1741	}
				1742
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1743	/*
				1744	* Initialize the perf_counter context in a task_struct:
				1745	*/
				1746	static void
				1747	__perf_counter_init_context(struct perf_counter_context *ctx,
				1748	struct task_struct *task)
				1749	{
				1750	memset(ctx, 0, sizeof(*ctx));
				1751	spin_lock_init(&ctx->lock);
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	1752	mutex_init(&ctx->mutex);
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1753	INIT_LIST_HEAD(&ctx->counter_list);
				1754	ctx->task = task;
				1755	}
				1756
				1757	/*
				1758	* inherit a counter from parent task to child task:
				1759	*/
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	1760	static struct perf_counter *
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1761	inherit_counter(struct perf_counter *parent_counter,
				1762	struct task_struct *parent,
				1763	struct perf_counter_context *parent_ctx,
				1764	struct task_struct *child,
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	1765	struct perf_counter *group_leader,
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1766	struct perf_counter_context *child_ctx)
				1767	{
				1768	struct perf_counter *child_counter;
				1769
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	1770	/*
				1771	* Instead of creating recursive hierarchies of counters,
				1772	* we link inherited counters back to the original parent,
				1773	* which has a filp for sure, which we use as the reference
				1774	* count:
				1775	*/
				1776	if (parent_counter->parent)
				1777	parent_counter = parent_counter->parent;
				1778
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1779	child_counter = perf_counter_alloc(&parent_counter->hw_event,
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	1780	parent_counter->cpu, group_leader,
				1781	GFP_KERNEL);
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1782	if (!child_counter)
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	1783	return NULL;
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1784
				1785	/*
				1786	* Link it up in the child's context:
				1787	*/
				1788	child_counter->ctx = child_ctx;
				1789	child_counter->task = child;
				1790	list_add_counter(child_counter, child_ctx);
				1791	child_ctx->nr_counters++;
				1792
				1793	child_counter->parent = parent_counter;
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1794	/*
				1795	* inherit into child's child as well:
				1796	*/
				1797	child_counter->hw_event.inherit = 1;
				1798
				1799	/*
				1800	* Get a reference to the parent filp - we will fput it
				1801	* when the child counter exits. This is safe to do because
				1802	* we are in the parent and we know that the filp still
				1803	* exists and has a nonzero count:
				1804	*/
				1805	atomic_long_inc(&parent_counter->filp->f_count);
				1806
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	1807	/*
				1808	* Link this into the parent counter's child list
				1809	*/
				1810	mutex_lock(&parent_counter->mutex);
				1811	list_add_tail(&child_counter->child_list, &parent_counter->child_list);
				1812
				1813	/*
				1814	* Make the child state follow the state of the parent counter,
				1815	* not its hw_event.disabled bit. We hold the parent's mutex,
				1816	* so we won't race with perf_counter_{en,dis}able_family.
				1817	*/
				1818	if (parent_counter->state >= PERF_COUNTER_STATE_INACTIVE)
				1819	child_counter->state = PERF_COUNTER_STATE_INACTIVE;
				1820	else
				1821	child_counter->state = PERF_COUNTER_STATE_OFF;
				1822
				1823	mutex_unlock(&parent_counter->mutex);
				1824
				1825	return child_counter;
				1826	}
				1827
				1828	static int inherit_group(struct perf_counter *parent_counter,
				1829	struct task_struct *parent,
				1830	struct perf_counter_context *parent_ctx,
				1831	struct task_struct *child,
				1832	struct perf_counter_context *child_ctx)
				1833	{
				1834	struct perf_counter *leader;
				1835	struct perf_counter *sub;
				1836
				1837	leader = inherit_counter(parent_counter, parent, parent_ctx,
				1838	child, NULL, child_ctx);
				1839	if (!leader)
				1840	return -ENOMEM;
				1841	list_for_each_entry(sub, &parent_counter->sibling_list, list_entry) {
				1842	if (!inherit_counter(sub, parent, parent_ctx,
				1843	child, leader, child_ctx))
				1844	return -ENOMEM;
				1845	}
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1846	return 0;
				1847	}
				1848
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	1849	static void sync_child_counter(struct perf_counter *child_counter,
				1850	struct perf_counter *parent_counter)
				1851	{
				1852	u64 parent_val, child_val;
				1853
				1854	parent_val = atomic64_read(&parent_counter->count);
				1855	child_val = atomic64_read(&child_counter->count);
				1856
				1857	/*
				1858	* Add back the child's count to the parent's count:
				1859	*/
				1860	atomic64_add(child_val, &parent_counter->count);
				1861
				1862	/*
				1863	* Remove this counter from the parent's list
				1864	*/
				1865	mutex_lock(&parent_counter->mutex);
				1866	list_del_init(&child_counter->child_list);
				1867	mutex_unlock(&parent_counter->mutex);
				1868
				1869	/*
				1870	* Release the parent counter, if this was the last
				1871	* reference to it.
				1872	*/
				1873	fput(parent_counter->filp);
				1874	}
				1875
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1876	static void
				1877	__perf_counter_exit_task(struct task_struct *child,
				1878	struct perf_counter *child_counter,
				1879	struct perf_counter_context *child_ctx)
				1880	{
				1881	struct perf_counter *parent_counter;
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	1882	struct perf_counter sub, tmp;
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1883
				1884	/*
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	1885	* If we do not self-reap then we have to wait for the
				1886	* child task to unschedule (it will happen for sure),
				1887	* so that its counter is at its final count. (This
				1888	* condition triggers rarely - child tasks usually get
				1889	* off their CPU before the parent has a chance to
				1890	* get this far into the reaping action)
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1891	*/
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	1892	if (child != current) {
				1893	wait_task_inactive(child, 0);
				1894	list_del_init(&child_counter->list_entry);
				1895	} else {
Ingo Molnar	0cc0c02	2008-12-14 23:20:36 +0100	[diff] [blame]	1896	struct perf_cpu_context *cpuctx;
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	1897	unsigned long flags;
				1898	u64 perf_flags;
				1899
				1900	/*
				1901	* Disable and unlink this counter.
				1902	*
				1903	* Be careful about zapping the list - IRQ/NMI context
				1904	* could still be processing it:
				1905	*/
				1906	curr_rq_lock_irq_save(&flags);
				1907	perf_flags = hw_perf_save_disable();
Ingo Molnar	0cc0c02	2008-12-14 23:20:36 +0100	[diff] [blame]	1908
				1909	cpuctx = &__get_cpu_var(perf_cpu_context);
				1910
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	1911	group_sched_out(child_counter, cpuctx, child_ctx);
Ingo Molnar	0cc0c02	2008-12-14 23:20:36 +0100	[diff] [blame]	1912
Ingo Molnar	235c7fc	2008-12-21 14:43:25 +0100	[diff] [blame]	1913	list_del_init(&child_counter->list_entry);
				1914
				1915	child_ctx->nr_counters--;
				1916
				1917	hw_perf_restore(perf_flags);
				1918	curr_rq_unlock_irq_restore(&flags);
Ingo Molnar	0cc0c02	2008-12-14 23:20:36 +0100	[diff] [blame]	1919	}
				1920
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1921	parent_counter = child_counter->parent;
				1922	/*
				1923	* It can happen that parent exits first, and has counters
				1924	* that are still around due to the child reference. These
				1925	* counters need to be zapped - but otherwise linger.
				1926	*/
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	1927	if (parent_counter) {
				1928	sync_child_counter(child_counter, parent_counter);
				1929	list_for_each_entry_safe(sub, tmp, &child_counter->sibling_list,
				1930	list_entry) {
				1931	if (sub->parent)
				1932	sync_child_counter(sub, sub->parent);
				1933	kfree(sub);
				1934	}
				1935	}
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1936
				1937	kfree(child_counter);
				1938	}
				1939
				1940	/*
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	1941	* When a child task exits, feed back counter values to parent counters.
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1942	*
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	1943	* Note: we may be running in child context, but the PID is not hashed
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1944	* anymore so new counters will not be added.
				1945	*/
				1946	void perf_counter_exit_task(struct task_struct *child)
				1947	{
				1948	struct perf_counter child_counter, tmp;
				1949	struct perf_counter_context *child_ctx;
				1950
				1951	child_ctx = &child->perf_counter_ctx;
				1952
				1953	if (likely(!child_ctx->nr_counters))
				1954	return;
				1955
				1956	list_for_each_entry_safe(child_counter, tmp, &child_ctx->counter_list,
				1957	list_entry)
				1958	__perf_counter_exit_task(child, child_counter, child_ctx);
				1959	}
				1960
				1961	/*
				1962	* Initialize the perf_counter context in task_struct
				1963	*/
				1964	void perf_counter_init_task(struct task_struct *child)
				1965	{
				1966	struct perf_counter_context child_ctx, parent_ctx;
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	1967	struct perf_counter *counter;
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1968	struct task_struct *parent = current;
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1969
				1970	child_ctx = &child->perf_counter_ctx;
				1971	parent_ctx = &parent->perf_counter_ctx;
				1972
				1973	__perf_counter_init_context(child_ctx, child);
				1974
				1975	/*
				1976	* This is executed from the parent task context, so inherit
				1977	* counters that have been marked for cloning:
				1978	*/
				1979
				1980	if (likely(!parent_ctx->nr_counters))
				1981	return;
				1982
				1983	/*
				1984	* Lock the parent list. No need to lock the child - not PID
				1985	* hashed yet and not running, so nobody can access it.
				1986	*/
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	1987	mutex_lock(&parent_ctx->mutex);
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1988
				1989	/*
				1990	* We dont have to disable NMIs - we are only looking at
				1991	* the list, not manipulating it:
				1992	*/
				1993	list_for_each_entry(counter, &parent_ctx->counter_list, list_entry) {
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	1994	if (!counter->hw_event.inherit)
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1995	continue;
				1996
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	1997	if (inherit_group(counter, parent,
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	1998	parent_ctx, child, child_ctx))
				1999	break;
				2000	}
				2001
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	2002	mutex_unlock(&parent_ctx->mutex);
Ingo Molnar	9b51f66	2008-12-12 13:49:45 +0100	[diff] [blame]	2003	}
				2004
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	2005	static void __cpuinit perf_counter_init_cpu(int cpu)
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	2006	{
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	2007	struct perf_cpu_context *cpuctx;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	2008
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	2009	cpuctx = &per_cpu(perf_cpu_context, cpu);
				2010	__perf_counter_init_context(&cpuctx->ctx, NULL);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	2011
				2012	mutex_lock(&perf_resource_mutex);
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	2013	cpuctx->max_pertask = perf_max_counters - perf_reserved_percpu;
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	2014	mutex_unlock(&perf_resource_mutex);
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	2015
Paul Mackerras	01d0287	2009-01-14 13:44:19 +1100	[diff] [blame]	2016	hw_perf_counter_setup(cpu);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	2017	}
				2018
				2019	#ifdef CONFIG_HOTPLUG_CPU
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	2020	static void __perf_counter_exit_cpu(void *info)
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	2021	{
				2022	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
				2023	struct perf_counter_context *ctx = &cpuctx->ctx;
				2024	struct perf_counter counter, tmp;
				2025
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	2026	list_for_each_entry_safe(counter, tmp, &ctx->counter_list, list_entry)
				2027	__perf_counter_remove_from_context(counter);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	2028	}
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	2029	static void perf_counter_exit_cpu(int cpu)
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	2030	{
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	2031	struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu);
				2032	struct perf_counter_context *ctx = &cpuctx->ctx;
				2033
				2034	mutex_lock(&ctx->mutex);
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	2035	smp_call_function_single(cpu, __perf_counter_exit_cpu, NULL, 1);
Paul Mackerras	d859e29	2009-01-17 18:10:22 +1100	[diff] [blame^]	2036	mutex_unlock(&ctx->mutex);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	2037	}
				2038	#else
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	2039	static inline void perf_counter_exit_cpu(int cpu) { }
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	2040	#endif
				2041
				2042	static int __cpuinit
				2043	perf_cpu_notify(struct notifier_block self, unsigned long action, void hcpu)
				2044	{
				2045	unsigned int cpu = (long)hcpu;
				2046
				2047	switch (action) {
				2048
				2049	case CPU_UP_PREPARE:
				2050	case CPU_UP_PREPARE_FROZEN:
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	2051	perf_counter_init_cpu(cpu);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	2052	break;
				2053
				2054	case CPU_DOWN_PREPARE:
				2055	case CPU_DOWN_PREPARE_FROZEN:
Ingo Molnar	04289bb	2008-12-11 08:38:42 +0100	[diff] [blame]	2056	perf_counter_exit_cpu(cpu);
Thomas Gleixner	0793a61	2008-12-04 20:12:29 +0100	[diff] [blame]	2057	break;
				2058
				2059	default:
				2060	break;
				2061	}
				2062
				2063	return NOTIFY_OK;
				2064	}
				2065
				2066	static struct notifier_block __cpuinitdata perf_cpu_nb = {
				2067	.notifier_call = perf_cpu_notify,
				2068	};
				2069
				2070	static int __init perf_counter_init(void)
				2071	{
				2072	perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_UP_PREPARE,
				2073	(void *)(long)smp_processor_id());
				2074	register_cpu_notifier(&perf_cpu_nb);
				2075
				2076	return 0;
				2077	}
				2078	early_initcall(perf_counter_init);
				2079
				2080	static ssize_t perf_show_reserve_percpu(struct sysdev_class class, char buf)
				2081	{
				2082	return sprintf(buf, "%d\n", perf_reserved_percpu);
				2083	}
				2084
				2085	static ssize_t
				2086	perf_set_reserve_percpu(struct sysdev_class *class,
				2087	const char *buf,
				2088	size_t count)
				2089	{
				2090	struct perf_cpu_context *cpuctx;
				2091	unsigned long val;
				2092	int err, cpu, mpt;
				2093
				2094	err = strict_strtoul(buf, 10, &val);
				2095	if (err)
				2096	return err;
				2097	if (val > perf_max_counters)
				2098	return -EINVAL;
				2099
				2100	mutex_lock(&perf_resource_mutex);
				2101	perf_reserved_percpu = val;
				2102	for_each_online_cpu(cpu) {
				2103	cpuctx = &per_cpu(perf_cpu_context, cpu);
				2104	spin_lock_irq(&cpuctx->ctx.lock);
				2105	mpt = min(perf_max_counters - cpuctx->ctx.nr_counters,
				2106	perf_max_counters - perf_reserved_percpu);
				2107	cpuctx->max_pertask = mpt;
				2108	spin_unlock_irq(&cpuctx->ctx.lock);
				2109	}
				2110	mutex_unlock(&perf_resource_mutex);
				2111
				2112	return count;
				2113	}
				2114
				2115	static ssize_t perf_show_overcommit(struct sysdev_class class, char buf)
				2116	{
				2117	return sprintf(buf, "%d\n", perf_overcommit);
				2118	}
				2119
				2120	static ssize_t
				2121	perf_set_overcommit(struct sysdev_class class, const char buf, size_t count)
				2122	{
				2123	unsigned long val;
				2124	int err;
				2125
				2126	err = strict_strtoul(buf, 10, &val);
				2127	if (err)
				2128	return err;
				2129	if (val > 1)
				2130	return -EINVAL;
				2131
				2132	mutex_lock(&perf_resource_mutex);
				2133	perf_overcommit = val;
				2134	mutex_unlock(&perf_resource_mutex);
				2135
				2136	return count;
				2137	}
				2138
				2139	static SYSDEV_CLASS_ATTR(
				2140	reserve_percpu,
				2141	0644,
				2142	perf_show_reserve_percpu,
				2143	perf_set_reserve_percpu
				2144	);
				2145
				2146	static SYSDEV_CLASS_ATTR(
				2147	overcommit,
				2148	0644,
				2149	perf_show_overcommit,
				2150	perf_set_overcommit
				2151	);
				2152
				2153	static struct attribute *perfclass_attrs[] = {
				2154	&attr_reserve_percpu.attr,
				2155	&attr_overcommit.attr,
				2156	NULL
				2157	};
				2158
				2159	static struct attribute_group perfclass_attr_group = {
				2160	.attrs = perfclass_attrs,
				2161	.name = "perf_counters",
				2162	};
				2163
				2164	static int __init perf_counter_sysfs_init(void)
				2165	{
				2166	return sysfs_create_group(&cpu_sysdev_class.kset.kobj,
				2167	&perfclass_attr_group);
				2168	}
				2169	device_initcall(perf_counter_sysfs_init);