userns: Add a more complete capability subset test to commit_creds

When unsharing a user namespace we reduce our credentials to just what
can be done in that user namespace.  This is a subset of the credentials
we previously had.  Teach commit_creds to recognize this is a subset
of the credentials we have had before and don't clear the dumpability flag.

This allows an unprivileged  program to do:
unshare(CLONE_NEWUSER);
fd = open("/proc/self/uid_map", O_RDWR);

Where previously opening the uid_map writable would fail because
the the task had been made non-dumpable.

Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
diff --git a/kernel/cred.c b/kernel/cred.c
index 48cea3d..709d521 100644
--- a/kernel/cred.c
+++ b/kernel/cred.c
@@ -455,6 +455,31 @@
 	return ret;
 }
 
+static bool cred_cap_issubset(const struct cred *set, const struct cred *subset)
+{
+	const struct user_namespace *set_ns = set->user_ns;
+	const struct user_namespace *subset_ns = subset->user_ns;
+
+	/* If the two credentials are in the same user namespace see if
+	 * the capabilities of subset are a subset of set.
+	 */
+	if (set_ns == subset_ns)
+		return cap_issubset(subset->cap_permitted, set->cap_permitted);
+
+	/* The credentials are in a different user namespaces
+	 * therefore one is a subset of the other only if a set is an
+	 * ancestor of subset and set->euid is owner of subset or one
+	 * of subsets ancestors.
+	 */
+	for (;subset_ns != &init_user_ns; subset_ns = subset_ns->parent) {
+		if ((set_ns == subset_ns->parent)  &&
+		    uid_eq(subset_ns->owner, set->euid))
+			return true;
+	}
+
+	return false;
+}
+
 /**
  * commit_creds - Install new credentials upon the current task
  * @new: The credentials to be assigned
@@ -493,7 +518,7 @@
 	    !gid_eq(old->egid, new->egid) ||
 	    !uid_eq(old->fsuid, new->fsuid) ||
 	    !gid_eq(old->fsgid, new->fsgid) ||
-	    !cap_issubset(new->cap_permitted, old->cap_permitted)) {
+	    !cred_cap_issubset(old, new)) {
 		if (task->mm)
 			set_dumpable(task->mm, suid_dumpable);
 		task->pdeath_signal = 0;