namei: be careful with mountpoint crossings in follow_dotdot_rcu()

Otherwise we are risking a hard error where nonlazy restart would be the right
thing to do; it's a very narrow race with mount --move and most of the time it
ends up being completely harmless, but it's possible to construct a case when
we'll get a bogus hard error instead of falling back to non-lazy walk...

For one thing, when crossing _into_ overmount of parent we need to check for
mount_lock bumps when we get NULL from __lookup_mnt() as well.

For another, and less exotically, we need to make sure that the data fetched
in follow_up_rcu() had been consistent.  ->mnt_mountpoint is pinned for as
long as it is a mountpoint, but we need to check mount_lock after fetching
to verify that.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
diff --git a/fs/namei.c b/fs/namei.c
index 0cccb24..d5f4568 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1018,21 +1018,6 @@
 	return res;
 }
 
-static int follow_up_rcu(struct path *path)
-{
-	struct mount *mnt = real_mount(path->mnt);
-	struct mount *parent;
-	struct dentry *mountpoint;
-
-	parent = mnt->mnt_parent;
-	if (&parent->mnt == path->mnt)
-		return 0;
-	mountpoint = mnt->mnt_mountpoint;
-	path->dentry = mountpoint;
-	path->mnt = &parent->mnt;
-	return 1;
-}
-
 /*
  * follow_up - Find the mountpoint of path's vfsmount
  *
@@ -1289,10 +1274,8 @@
 		set_root_rcu(nd);
 
 	while (1) {
-		if (nd->path.dentry == nd->root.dentry &&
-		    nd->path.mnt == nd->root.mnt) {
+		if (path_equal(&nd->path, &nd->root))
 			break;
-		}
 		if (nd->path.dentry != nd->path.mnt->mnt_root) {
 			struct dentry *old = nd->path.dentry;
 			struct dentry *parent = old->d_parent;
@@ -1300,34 +1283,42 @@
 
 			inode = parent->d_inode;
 			seq = read_seqcount_begin(&parent->d_seq);
-			if (read_seqcount_retry(&old->d_seq, nd->seq))
-				goto failed;
+			if (unlikely(read_seqcount_retry(&old->d_seq, nd->seq)))
+				return -ECHILD;
 			nd->path.dentry = parent;
 			nd->seq = seq;
 			break;
+		} else {
+			struct mount *mnt = real_mount(nd->path.mnt);
+			struct mount *mparent = mnt->mnt_parent;
+			struct dentry *mountpoint = mnt->mnt_mountpoint;
+			struct inode *inode2 = mountpoint->d_inode;
+			unsigned seq = read_seqcount_begin(&mountpoint->d_seq);
+			if (unlikely(read_seqretry(&mount_lock, nd->m_seq)))
+				return -ECHILD;
+			if (&mparent->mnt == nd->path.mnt)
+				break;
+			/* we know that mountpoint was pinned */
+			nd->path.dentry = mountpoint;
+			nd->path.mnt = &mparent->mnt;
+			inode = inode2;
+			nd->seq = seq;
 		}
-		if (!follow_up_rcu(&nd->path))
-			break;
-		inode = nd->path.dentry->d_inode;
-		nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
 	}
-	while (d_mountpoint(nd->path.dentry)) {
+	while (unlikely(d_mountpoint(nd->path.dentry))) {
 		struct mount *mounted;
 		mounted = __lookup_mnt(nd->path.mnt, nd->path.dentry);
+		if (unlikely(read_seqretry(&mount_lock, nd->m_seq)))
+			return -ECHILD;
 		if (!mounted)
 			break;
 		nd->path.mnt = &mounted->mnt;
 		nd->path.dentry = mounted->mnt.mnt_root;
 		inode = nd->path.dentry->d_inode;
 		nd->seq = read_seqcount_begin(&nd->path.dentry->d_seq);
-		if (read_seqretry(&mount_lock, nd->m_seq))
-			goto failed;
 	}
 	nd->inode = inode;
 	return 0;
-
-failed:
-	return -ECHILD;
 }
 
 /*