[LIB]: Consolidate _atomic_dec_and_lock()

Several implementations were essentialy a common piece of C code using
the cmpxchg() macro.  Put the implementation in one spot that everyone
can share, and convert sparc64 over to using this.

Alpha is the lone arch-specific implementation, which codes up a
special fast path for the common case in order to avoid GP reloading
which a pure C version would require.

Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/lib/dec_and_lock.c b/lib/dec_and_lock.c
index 2377af0..305a966 100644
--- a/lib/dec_and_lock.c
+++ b/lib/dec_and_lock.c
@@ -1,7 +1,41 @@
 #include <linux/module.h>
 #include <linux/spinlock.h>
 #include <asm/atomic.h>
+#include <asm/system.h>
 
+#ifdef __HAVE_ARCH_CMPXCHG
+/*
+ * This is an implementation of the notion of "decrement a
+ * reference count, and return locked if it decremented to zero".
+ *
+ * This implementation can be used on any architecture that
+ * has a cmpxchg, and where atomic->value is an int holding
+ * the value of the atomic (i.e. the high bits aren't used
+ * for a lock or anything like that).
+ */
+int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock)
+{
+	int counter;
+	int newcount;
+
+	for (;;) {
+		counter = atomic_read(atomic);
+		newcount = counter - 1;
+		if (!newcount)
+			break;		/* do it the slow way */
+
+		newcount = cmpxchg(&atomic->counter, counter, newcount);
+		if (newcount == counter)
+			return 0;
+	}
+
+	spin_lock(lock);
+	if (atomic_dec_and_test(atomic))
+		return 1;
+	spin_unlock(lock);
+	return 0;
+}
+#else
 /*
  * This is an architecture-neutral, but slow,
  * implementation of the notion of "decrement
@@ -33,5 +67,6 @@
 	spin_unlock(lock);
 	return 0;
 }
+#endif
 
 EXPORT_SYMBOL(_atomic_dec_and_lock);