perf_counter: don't count scheduler ticks as context switches

The context-switch software counter gives inflated values at present
because each scheduler tick and each process-wide counter
enable/disable prctl gets counted as a context switch.

This happens because perf_counter_task_tick, perf_counter_task_disable
and perf_counter_task_enable all call perf_counter_task_sched_out,
which calls perf_swcounter_event to record a context switch event.

This fixes it by introducing a variant of perf_counter_task_sched_out
with two underscores in front for internal use within the perf_counter
code, and makes perf_counter_task_{tick,disable,enable} call it.  This
variant doesn't record a context switch event, and takes a struct
perf_counter_context *.  This adds the new variant rather than
changing the behaviour or interface of perf_counter_task_sched_out
because that is called from other code.

[ Impact: fix inflated context-switch event counts ]

Signed-off-by: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
LKML-Reference: <18951.48034.485580.498953@drongo.ozlabs.ibm.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index a5bdc93..7373b96 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -837,6 +837,14 @@
 	cpuctx->task_ctx = NULL;
 }
 
+static void __perf_counter_task_sched_out(struct perf_counter_context *ctx)
+{
+	struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context);
+
+	__perf_counter_sched_out(ctx, cpuctx);
+	cpuctx->task_ctx = NULL;
+}
+
 static void perf_counter_cpu_sched_out(struct perf_cpu_context *cpuctx)
 {
 	__perf_counter_sched_out(&cpuctx->ctx, cpuctx);
@@ -943,15 +951,13 @@
 	struct perf_counter *counter;
 	unsigned long flags;
 	u64 perf_flags;
-	int cpu;
 
 	if (likely(!ctx->nr_counters))
 		return 0;
 
 	local_irq_save(flags);
-	cpu = smp_processor_id();
 
-	perf_counter_task_sched_out(curr, cpu);
+	__perf_counter_task_sched_out(ctx);
 
 	spin_lock(&ctx->lock);
 
@@ -989,7 +995,7 @@
 	local_irq_save(flags);
 	cpu = smp_processor_id();
 
-	perf_counter_task_sched_out(curr, cpu);
+	__perf_counter_task_sched_out(ctx);
 
 	spin_lock(&ctx->lock);
 
@@ -1054,7 +1060,7 @@
 	ctx = &curr->perf_counter_ctx;
 
 	perf_counter_cpu_sched_out(cpuctx);
-	perf_counter_task_sched_out(curr, cpu);
+	__perf_counter_task_sched_out(ctx);
 
 	rotate_ctx(&cpuctx->ctx);
 	rotate_ctx(ctx);