Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux

Pull s390 updates from Martin Schwidefsky:
 "The new features and main improvements in this merge for v4.9

   - Support for the UBSAN sanitizer

   - Set HAVE_EFFICIENT_UNALIGNED_ACCESS, it improves the code in some
     places

   - Improvements for the in-kernel fpu code, in particular the overhead
     for multiple consecutive in kernel fpu users is recuded

   - Add a SIMD implementation for the RAID6 gen and xor operations

   - Add RAID6 recovery based on the XC instruction

   - The PCI DMA flush logic has been improved to increase the speed of
     the map / unmap operations

   - The time synchronization code has seen some updates

  And bug fixes all over the place"

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux: (48 commits)
  s390/con3270: fix insufficient space padding
  s390/con3270: fix use of uninitialised data
  MAINTAINERS: update DASD maintainer
  s390/cio: fix accidental interrupt enabling during resume
  s390/dasd: add missing \n to end of dev_err messages
  s390/config: Enable config options for Docker
  s390/dasd: make query host access interruptible
  s390/dasd: fix panic during offline processing
  s390/dasd: fix hanging offline processing
  s390/pci_dma: improve lazy flush for unmap
  s390/pci_dma: split dma_update_trans
  s390/pci_dma: improve map_sg
  s390/pci_dma: simplify dma address calculation
  s390/pci_dma: remove dma address range check
  iommu/s390: simplify registration of I/O address translation parameters
  s390: migrate exception table users off module.h and onto extable.h
  s390: export header for CLP ioctl
  s390/vmur: fix irq pointer dereference in int handler
  s390/dasd: add missing KOBJ_CHANGE event for unformatted devices
  s390: enable UBSAN
  ...
diff --git a/MAINTAINERS b/MAINTAINERS
index a4961b0..3bb6640 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10135,8 +10135,8 @@
 F:	drivers/s390/cio/
 
 S390 DASD DRIVER
-M:	Stefan Weinhuber <wein@de.ibm.com>
-M:	Stefan Haberland <stefan.haberland@de.ibm.com>
+M:	Stefan Haberland <sth@linux.vnet.ibm.com>
+M:	Jan Hoeppner <hoeppner@linux.vnet.ibm.com>
 L:	linux-s390@vger.kernel.org
 W:	http://www.ibm.com/developerworks/linux/linux390/
 S:	Supported
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index c109f07..deeadfa 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -73,6 +73,7 @@
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_HAS_KCOV
 	select ARCH_HAS_SG_CHAIN
+	select ARCH_HAS_UBSAN_SANITIZE_ALL
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select ARCH_INLINE_READ_LOCK
 	select ARCH_INLINE_READ_LOCK_BH
@@ -109,6 +110,7 @@
 	select ARCH_USE_CMPXCHG_LOCKREF
 	select ARCH_WANTS_DYNAMIC_TASK_STRUCT
 	select ARCH_WANTS_PROT_NUMA_PROT_NONE
+	select ARCH_WANTS_UBSAN_NO_NULL
 	select ARCH_WANT_IPC_PARSE_VERSION
 	select BUILDTIME_EXTABLE_SORT
 	select CLONE_BACKWARDS2
@@ -136,6 +138,7 @@
 	select HAVE_DMA_API_DEBUG
 	select HAVE_DYNAMIC_FTRACE
 	select HAVE_DYNAMIC_FTRACE_WITH_REGS
+	select HAVE_EFFICIENT_UNALIGNED_ACCESS
 	select HAVE_EXIT_THREAD
 	select HAVE_FTRACE_MCOUNT_RECORD
 	select HAVE_FUNCTION_GRAPH_TRACER
diff --git a/arch/s390/Makefile b/arch/s390/Makefile
index 224b427..54e0052 100644
--- a/arch/s390/Makefile
+++ b/arch/s390/Makefile
@@ -46,6 +46,8 @@
 cflags-$(CONFIG_MARCH_ZEC12_TUNE)	+= -mtune=zEC12
 cflags-$(CONFIG_MARCH_Z13_TUNE)	+= -mtune=z13
 
+cflags-y += -Wa,-I$(srctree)/arch/$(ARCH)/include
+
 #KBUILD_IMAGE is necessary for make rpm
 KBUILD_IMAGE	:=arch/s390/boot/image
 
diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
index 15c9424..f587c48 100644
--- a/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@ -542,7 +542,7 @@
 		rc = PTR_ERR(appldata_pdev);
 		goto out_driver;
 	}
-	appldata_wq = create_singlethread_workqueue("appldata");
+	appldata_wq = alloc_ordered_workqueue("appldata", 0);
 	if (!appldata_wq) {
 		rc = -ENOMEM;
 		goto out_device;
diff --git a/arch/s390/boot/compressed/Makefile b/arch/s390/boot/compressed/Makefile
index 33ba697c..0daa070 100644
--- a/arch/s390/boot/compressed/Makefile
+++ b/arch/s390/boot/compressed/Makefile
@@ -17,6 +17,7 @@
 KBUILD_CFLAGS += $(call cc-option,-ffreestanding)
 
 GCOV_PROFILE := n
+UBSAN_SANITIZE := n
 
 OBJECTS := $(addprefix $(objtree)/arch/s390/kernel/, head.o sclp.o ebcdic.o als.o)
 OBJECTS += $(obj)/head.o $(obj)/misc.o $(obj)/piggy.o
diff --git a/arch/s390/configs/default_defconfig b/arch/s390/configs/default_defconfig
index 412b1bd..4596868 100644
--- a/arch/s390/configs/default_defconfig
+++ b/arch/s390/configs/default_defconfig
@@ -260,7 +260,6 @@
 CONFIG_NF_TABLES_IPV4=m
 CONFIG_NFT_CHAIN_ROUTE_IPV4=m
 CONFIG_NF_TABLES_ARP=m
-CONFIG_NF_NAT_IPV4=m
 CONFIG_NFT_CHAIN_NAT_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
@@ -269,6 +268,8 @@
 CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_NAT=m
+CONFIG_IP_NF_TARGET_MASQUERADE=m
 CONFIG_IP_NF_MANGLE=m
 CONFIG_IP_NF_TARGET_CLUSTERIP=m
 CONFIG_IP_NF_TARGET_ECN=m
@@ -281,7 +282,6 @@
 CONFIG_NF_CONNTRACK_IPV6=m
 CONFIG_NF_TABLES_IPV6=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NF_NAT_IPV6=m
 CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
@@ -299,6 +299,8 @@
 CONFIG_IP6_NF_MANGLE=m
 CONFIG_IP6_NF_RAW=m
 CONFIG_IP6_NF_SECURITY=m
+CONFIG_IP6_NF_NAT=m
+CONFIG_IP6_NF_TARGET_MASQUERADE=m
 CONFIG_NF_TABLES_BRIDGE=m
 CONFIG_NET_SCTPPROBE=m
 CONFIG_RDS=m
@@ -359,6 +361,7 @@
 CONFIG_NET_ACT_SKBEDIT=m
 CONFIG_NET_ACT_CSUM=m
 CONFIG_DNS_RESOLVER=y
+CONFIG_CGROUP_NET_PRIO=y
 CONFIG_BPF_JIT=y
 CONFIG_NET_PKTGEN=m
 CONFIG_NET_TCPPROBE=m
@@ -409,6 +412,7 @@
 CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_THIN_PROVISIONING=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_LOG_USERSPACE=m
 CONFIG_DM_RAID=m
@@ -428,6 +432,7 @@
 CONFIG_IFB=m
 CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
+CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
 CONFIG_TUN=m
 CONFIG_VETH=m
@@ -453,7 +458,6 @@
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
-CONFIG_DEVPTS_MULTIPLE_INSTANCES=y
 CONFIG_LEGACY_PTY_COUNT=0
 CONFIG_HW_RANDOM_VIRTIO=m
 CONFIG_RAW_DRIVER=m
@@ -495,6 +499,7 @@
 CONFIG_AUTOFS4_FS=m
 CONFIG_FUSE_FS=y
 CONFIG_CUSE=m
+CONFIG_OVERLAY_FS=m
 CONFIG_FSCACHE=m
 CONFIG_CACHEFILES=m
 CONFIG_ISO9660_FS=y
diff --git a/arch/s390/configs/gcov_defconfig b/arch/s390/configs/gcov_defconfig
index bec279e..1dd05e3 100644
--- a/arch/s390/configs/gcov_defconfig
+++ b/arch/s390/configs/gcov_defconfig
@@ -15,6 +15,8 @@
 CONFIG_MEMCG=y
 CONFIG_MEMCG_SWAP=y
 CONFIG_BLK_CGROUP=y
+CONFIG_CFS_BANDWIDTH=y
+CONFIG_RT_GROUP_SCHED=y
 CONFIG_CGROUP_PIDS=y
 CONFIG_CGROUP_FREEZER=y
 CONFIG_CGROUP_HUGETLB=y
@@ -255,7 +257,6 @@
 CONFIG_NF_TABLES_IPV4=m
 CONFIG_NFT_CHAIN_ROUTE_IPV4=m
 CONFIG_NF_TABLES_ARP=m
-CONFIG_NF_NAT_IPV4=m
 CONFIG_NFT_CHAIN_NAT_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
@@ -264,6 +265,8 @@
 CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_NAT=m
+CONFIG_IP_NF_TARGET_MASQUERADE=m
 CONFIG_IP_NF_MANGLE=m
 CONFIG_IP_NF_TARGET_CLUSTERIP=m
 CONFIG_IP_NF_TARGET_ECN=m
@@ -276,7 +279,6 @@
 CONFIG_NF_CONNTRACK_IPV6=m
 CONFIG_NF_TABLES_IPV6=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NF_NAT_IPV6=m
 CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
@@ -294,6 +296,8 @@
 CONFIG_IP6_NF_MANGLE=m
 CONFIG_IP6_NF_RAW=m
 CONFIG_IP6_NF_SECURITY=m
+CONFIG_IP6_NF_NAT=m
+CONFIG_IP6_NF_TARGET_MASQUERADE=m
 CONFIG_NF_TABLES_BRIDGE=m
 CONFIG_NET_SCTPPROBE=m
 CONFIG_RDS=m
@@ -353,6 +357,7 @@
 CONFIG_NET_ACT_SKBEDIT=m
 CONFIG_NET_ACT_CSUM=m
 CONFIG_DNS_RESOLVER=y
+CONFIG_CGROUP_NET_PRIO=y
 CONFIG_BPF_JIT=y
 CONFIG_NET_PKTGEN=m
 CONFIG_NET_TCPPROBE=m
@@ -403,6 +408,7 @@
 CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_THIN_PROVISIONING=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_LOG_USERSPACE=m
 CONFIG_DM_RAID=m
@@ -422,6 +428,7 @@
 CONFIG_IFB=m
 CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
+CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
 CONFIG_TUN=m
 CONFIG_VETH=m
@@ -447,7 +454,6 @@
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
-CONFIG_DEVPTS_MULTIPLE_INSTANCES=y
 CONFIG_LEGACY_PTY_COUNT=0
 CONFIG_HW_RANDOM_VIRTIO=m
 CONFIG_RAW_DRIVER=m
@@ -487,6 +493,7 @@
 CONFIG_AUTOFS4_FS=m
 CONFIG_FUSE_FS=y
 CONFIG_CUSE=m
+CONFIG_OVERLAY_FS=m
 CONFIG_FSCACHE=m
 CONFIG_CACHEFILES=m
 CONFIG_ISO9660_FS=y
diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig
index 1751446..29d1178 100644
--- a/arch/s390/configs/performance_defconfig
+++ b/arch/s390/configs/performance_defconfig
@@ -16,6 +16,8 @@
 CONFIG_MEMCG=y
 CONFIG_MEMCG_SWAP=y
 CONFIG_BLK_CGROUP=y
+CONFIG_CFS_BANDWIDTH=y
+CONFIG_RT_GROUP_SCHED=y
 CONFIG_CGROUP_PIDS=y
 CONFIG_CGROUP_FREEZER=y
 CONFIG_CGROUP_HUGETLB=y
@@ -255,7 +257,6 @@
 CONFIG_NF_TABLES_IPV4=m
 CONFIG_NFT_CHAIN_ROUTE_IPV4=m
 CONFIG_NF_TABLES_ARP=m
-CONFIG_NF_NAT_IPV4=m
 CONFIG_NFT_CHAIN_NAT_IPV4=m
 CONFIG_IP_NF_IPTABLES=m
 CONFIG_IP_NF_MATCH_AH=m
@@ -264,6 +265,8 @@
 CONFIG_IP_NF_MATCH_TTL=m
 CONFIG_IP_NF_FILTER=m
 CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IP_NF_NAT=m
+CONFIG_IP_NF_TARGET_MASQUERADE=m
 CONFIG_IP_NF_MANGLE=m
 CONFIG_IP_NF_TARGET_CLUSTERIP=m
 CONFIG_IP_NF_TARGET_ECN=m
@@ -276,7 +279,6 @@
 CONFIG_NF_CONNTRACK_IPV6=m
 CONFIG_NF_TABLES_IPV6=m
 CONFIG_NFT_CHAIN_ROUTE_IPV6=m
-CONFIG_NF_NAT_IPV6=m
 CONFIG_NFT_CHAIN_NAT_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
@@ -294,6 +296,8 @@
 CONFIG_IP6_NF_MANGLE=m
 CONFIG_IP6_NF_RAW=m
 CONFIG_IP6_NF_SECURITY=m
+CONFIG_IP6_NF_NAT=m
+CONFIG_IP6_NF_TARGET_MASQUERADE=m
 CONFIG_NF_TABLES_BRIDGE=m
 CONFIG_NET_SCTPPROBE=m
 CONFIG_RDS=m
@@ -353,6 +357,7 @@
 CONFIG_NET_ACT_SKBEDIT=m
 CONFIG_NET_ACT_CSUM=m
 CONFIG_DNS_RESOLVER=y
+CONFIG_CGROUP_NET_PRIO=y
 CONFIG_BPF_JIT=y
 CONFIG_NET_PKTGEN=m
 CONFIG_NET_TCPPROBE=m
@@ -403,6 +408,7 @@
 CONFIG_BLK_DEV_DM=m
 CONFIG_DM_CRYPT=m
 CONFIG_DM_SNAPSHOT=m
+CONFIG_DM_THIN_PROVISIONING=m
 CONFIG_DM_MIRROR=m
 CONFIG_DM_LOG_USERSPACE=m
 CONFIG_DM_RAID=m
@@ -422,6 +428,7 @@
 CONFIG_IFB=m
 CONFIG_MACVLAN=m
 CONFIG_MACVTAP=m
+CONFIG_IPVLAN=m
 CONFIG_VXLAN=m
 CONFIG_TUN=m
 CONFIG_VETH=m
@@ -447,7 +454,6 @@
 # CONFIG_INPUT_KEYBOARD is not set
 # CONFIG_INPUT_MOUSE is not set
 # CONFIG_SERIO is not set
-CONFIG_DEVPTS_MULTIPLE_INSTANCES=y
 CONFIG_LEGACY_PTY_COUNT=0
 CONFIG_HW_RANDOM_VIRTIO=m
 CONFIG_RAW_DRIVER=m
@@ -488,6 +494,7 @@
 CONFIG_AUTOFS4_FS=m
 CONFIG_FUSE_FS=y
 CONFIG_CUSE=m
+CONFIG_OVERLAY_FS=m
 CONFIG_FSCACHE=m
 CONFIG_CACHEFILES=m
 CONFIG_ISO9660_FS=y
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index 2ea18b0..303d28e 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -31,69 +31,29 @@
 #include <crypto/xts.h>
 #include <asm/cpacf.h>
 
-#define AES_KEYLEN_128		1
-#define AES_KEYLEN_192		2
-#define AES_KEYLEN_256		4
-
 static u8 *ctrblk;
 static DEFINE_SPINLOCK(ctrblk_lock);
-static char keylen_flag;
+
+static cpacf_mask_t km_functions, kmc_functions, kmctr_functions;
 
 struct s390_aes_ctx {
 	u8 key[AES_MAX_KEY_SIZE];
-	long enc;
-	long dec;
 	int key_len;
+	unsigned long fc;
 	union {
 		struct crypto_skcipher *blk;
 		struct crypto_cipher *cip;
 	} fallback;
 };
 
-struct pcc_param {
-	u8 key[32];
-	u8 tweak[16];
-	u8 block[16];
-	u8 bit[16];
-	u8 xts[16];
-};
-
 struct s390_xts_ctx {
 	u8 key[32];
 	u8 pcc_key[32];
-	long enc;
-	long dec;
 	int key_len;
+	unsigned long fc;
 	struct crypto_skcipher *fallback;
 };
 
-/*
- * Check if the key_len is supported by the HW.
- * Returns 0 if it is, a positive number if it is not and software fallback is
- * required or a negative number in case the key size is not valid
- */
-static int need_fallback(unsigned int key_len)
-{
-	switch (key_len) {
-	case 16:
-		if (!(keylen_flag & AES_KEYLEN_128))
-			return 1;
-		break;
-	case 24:
-		if (!(keylen_flag & AES_KEYLEN_192))
-			return 1;
-		break;
-	case 32:
-		if (!(keylen_flag & AES_KEYLEN_256))
-			return 1;
-		break;
-	default:
-		return -1;
-		break;
-	}
-	return 0;
-}
-
 static int setkey_fallback_cip(struct crypto_tfm *tfm, const u8 *in_key,
 		unsigned int key_len)
 {
@@ -117,72 +77,44 @@
 		       unsigned int key_len)
 {
 	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
-	u32 *flags = &tfm->crt_flags;
-	int ret;
+	unsigned long fc;
 
-	ret = need_fallback(key_len);
-	if (ret < 0) {
-		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-		return -EINVAL;
-	}
+	/* Pick the correct function code based on the key length */
+	fc = (key_len == 16) ? CPACF_KM_AES_128 :
+	     (key_len == 24) ? CPACF_KM_AES_192 :
+	     (key_len == 32) ? CPACF_KM_AES_256 : 0;
+
+	/* Check if the function code is available */
+	sctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0;
+	if (!sctx->fc)
+		return setkey_fallback_cip(tfm, in_key, key_len);
 
 	sctx->key_len = key_len;
-	if (!ret) {
-		memcpy(sctx->key, in_key, key_len);
-		return 0;
-	}
-
-	return setkey_fallback_cip(tfm, in_key, key_len);
+	memcpy(sctx->key, in_key, key_len);
+	return 0;
 }
 
 static void aes_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
 	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
 
-	if (unlikely(need_fallback(sctx->key_len))) {
+	if (unlikely(!sctx->fc)) {
 		crypto_cipher_encrypt_one(sctx->fallback.cip, out, in);
 		return;
 	}
-
-	switch (sctx->key_len) {
-	case 16:
-		cpacf_km(CPACF_KM_AES_128_ENC, &sctx->key, out, in,
-			 AES_BLOCK_SIZE);
-		break;
-	case 24:
-		cpacf_km(CPACF_KM_AES_192_ENC, &sctx->key, out, in,
-			 AES_BLOCK_SIZE);
-		break;
-	case 32:
-		cpacf_km(CPACF_KM_AES_256_ENC, &sctx->key, out, in,
-			 AES_BLOCK_SIZE);
-		break;
-	}
+	cpacf_km(sctx->fc, &sctx->key, out, in, AES_BLOCK_SIZE);
 }
 
 static void aes_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
 	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
 
-	if (unlikely(need_fallback(sctx->key_len))) {
+	if (unlikely(!sctx->fc)) {
 		crypto_cipher_decrypt_one(sctx->fallback.cip, out, in);
 		return;
 	}
-
-	switch (sctx->key_len) {
-	case 16:
-		cpacf_km(CPACF_KM_AES_128_DEC, &sctx->key, out, in,
-			 AES_BLOCK_SIZE);
-		break;
-	case 24:
-		cpacf_km(CPACF_KM_AES_192_DEC, &sctx->key, out, in,
-			 AES_BLOCK_SIZE);
-		break;
-	case 32:
-		cpacf_km(CPACF_KM_AES_256_DEC, &sctx->key, out, in,
-			 AES_BLOCK_SIZE);
-		break;
-	}
+	cpacf_km(sctx->fc | CPACF_DECRYPT,
+		 &sctx->key, out, in, AES_BLOCK_SIZE);
 }
 
 static int fallback_init_cip(struct crypto_tfm *tfm)
@@ -291,50 +223,37 @@
 			   unsigned int key_len)
 {
 	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
-	int ret;
+	unsigned long fc;
 
-	ret = need_fallback(key_len);
-	if (ret > 0) {
-		sctx->key_len = key_len;
+	/* Pick the correct function code based on the key length */
+	fc = (key_len == 16) ? CPACF_KM_AES_128 :
+	     (key_len == 24) ? CPACF_KM_AES_192 :
+	     (key_len == 32) ? CPACF_KM_AES_256 : 0;
+
+	/* Check if the function code is available */
+	sctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0;
+	if (!sctx->fc)
 		return setkey_fallback_blk(tfm, in_key, key_len);
-	}
 
-	switch (key_len) {
-	case 16:
-		sctx->enc = CPACF_KM_AES_128_ENC;
-		sctx->dec = CPACF_KM_AES_128_DEC;
-		break;
-	case 24:
-		sctx->enc = CPACF_KM_AES_192_ENC;
-		sctx->dec = CPACF_KM_AES_192_DEC;
-		break;
-	case 32:
-		sctx->enc = CPACF_KM_AES_256_ENC;
-		sctx->dec = CPACF_KM_AES_256_DEC;
-		break;
-	}
-
-	return aes_set_key(tfm, in_key, key_len);
+	sctx->key_len = key_len;
+	memcpy(sctx->key, in_key, key_len);
+	return 0;
 }
 
-static int ecb_aes_crypt(struct blkcipher_desc *desc, long func, void *param,
+static int ecb_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
 			 struct blkcipher_walk *walk)
 {
-	int ret = blkcipher_walk_virt(desc, walk);
-	unsigned int nbytes;
+	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	unsigned int nbytes, n;
+	int ret;
 
-	while ((nbytes = walk->nbytes)) {
+	ret = blkcipher_walk_virt(desc, walk);
+	while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
 		/* only use complete blocks */
-		unsigned int n = nbytes & ~(AES_BLOCK_SIZE - 1);
-		u8 *out = walk->dst.virt.addr;
-		u8 *in = walk->src.virt.addr;
-
-		ret = cpacf_km(func, param, out, in, n);
-		if (ret < 0 || ret != n)
-			return -EIO;
-
-		nbytes &= AES_BLOCK_SIZE - 1;
-		ret = blkcipher_walk_done(desc, walk, nbytes);
+		n = nbytes & ~(AES_BLOCK_SIZE - 1);
+		cpacf_km(sctx->fc | modifier, sctx->key,
+			 walk->dst.virt.addr, walk->src.virt.addr, n);
+		ret = blkcipher_walk_done(desc, walk, nbytes - n);
 	}
 
 	return ret;
@@ -347,11 +266,11 @@
 	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
-	if (unlikely(need_fallback(sctx->key_len)))
+	if (unlikely(!sctx->fc))
 		return fallback_blk_enc(desc, dst, src, nbytes);
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_aes_crypt(desc, sctx->enc, sctx->key, &walk);
+	return ecb_aes_crypt(desc, 0, &walk);
 }
 
 static int ecb_aes_decrypt(struct blkcipher_desc *desc,
@@ -361,11 +280,11 @@
 	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
-	if (unlikely(need_fallback(sctx->key_len)))
+	if (unlikely(!sctx->fc))
 		return fallback_blk_dec(desc, dst, src, nbytes);
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_aes_crypt(desc, sctx->dec, sctx->key, &walk);
+	return ecb_aes_crypt(desc, CPACF_DECRYPT, &walk);
 }
 
 static int fallback_init_blk(struct crypto_tfm *tfm)
@@ -420,64 +339,45 @@
 			   unsigned int key_len)
 {
 	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
-	int ret;
+	unsigned long fc;
 
-	ret = need_fallback(key_len);
-	if (ret > 0) {
-		sctx->key_len = key_len;
+	/* Pick the correct function code based on the key length */
+	fc = (key_len == 16) ? CPACF_KMC_AES_128 :
+	     (key_len == 24) ? CPACF_KMC_AES_192 :
+	     (key_len == 32) ? CPACF_KMC_AES_256 : 0;
+
+	/* Check if the function code is available */
+	sctx->fc = (fc && cpacf_test_func(&kmc_functions, fc)) ? fc : 0;
+	if (!sctx->fc)
 		return setkey_fallback_blk(tfm, in_key, key_len);
-	}
 
-	switch (key_len) {
-	case 16:
-		sctx->enc = CPACF_KMC_AES_128_ENC;
-		sctx->dec = CPACF_KMC_AES_128_DEC;
-		break;
-	case 24:
-		sctx->enc = CPACF_KMC_AES_192_ENC;
-		sctx->dec = CPACF_KMC_AES_192_DEC;
-		break;
-	case 32:
-		sctx->enc = CPACF_KMC_AES_256_ENC;
-		sctx->dec = CPACF_KMC_AES_256_DEC;
-		break;
-	}
-
-	return aes_set_key(tfm, in_key, key_len);
+	sctx->key_len = key_len;
+	memcpy(sctx->key, in_key, key_len);
+	return 0;
 }
 
-static int cbc_aes_crypt(struct blkcipher_desc *desc, long func,
+static int cbc_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
 			 struct blkcipher_walk *walk)
 {
 	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
-	int ret = blkcipher_walk_virt(desc, walk);
-	unsigned int nbytes = walk->nbytes;
+	unsigned int nbytes, n;
+	int ret;
 	struct {
 		u8 iv[AES_BLOCK_SIZE];
 		u8 key[AES_MAX_KEY_SIZE];
 	} param;
 
-	if (!nbytes)
-		goto out;
-
+	ret = blkcipher_walk_virt(desc, walk);
 	memcpy(param.iv, walk->iv, AES_BLOCK_SIZE);
 	memcpy(param.key, sctx->key, sctx->key_len);
-	do {
+	while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
 		/* only use complete blocks */
-		unsigned int n = nbytes & ~(AES_BLOCK_SIZE - 1);
-		u8 *out = walk->dst.virt.addr;
-		u8 *in = walk->src.virt.addr;
-
-		ret = cpacf_kmc(func, &param, out, in, n);
-		if (ret < 0 || ret != n)
-			return -EIO;
-
-		nbytes &= AES_BLOCK_SIZE - 1;
-		ret = blkcipher_walk_done(desc, walk, nbytes);
-	} while ((nbytes = walk->nbytes));
+		n = nbytes & ~(AES_BLOCK_SIZE - 1);
+		cpacf_kmc(sctx->fc | modifier, &param,
+			  walk->dst.virt.addr, walk->src.virt.addr, n);
+		ret = blkcipher_walk_done(desc, walk, nbytes - n);
+	}
 	memcpy(walk->iv, param.iv, AES_BLOCK_SIZE);
-
-out:
 	return ret;
 }
 
@@ -488,11 +388,11 @@
 	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
-	if (unlikely(need_fallback(sctx->key_len)))
+	if (unlikely(!sctx->fc))
 		return fallback_blk_enc(desc, dst, src, nbytes);
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_aes_crypt(desc, sctx->enc, &walk);
+	return cbc_aes_crypt(desc, 0, &walk);
 }
 
 static int cbc_aes_decrypt(struct blkcipher_desc *desc,
@@ -502,11 +402,11 @@
 	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
-	if (unlikely(need_fallback(sctx->key_len)))
+	if (unlikely(!sctx->fc))
 		return fallback_blk_dec(desc, dst, src, nbytes);
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_aes_crypt(desc, sctx->dec, &walk);
+	return cbc_aes_crypt(desc, CPACF_DECRYPT, &walk);
 }
 
 static struct crypto_alg cbc_aes_alg = {
@@ -594,83 +494,67 @@
 			   unsigned int key_len)
 {
 	struct s390_xts_ctx *xts_ctx = crypto_tfm_ctx(tfm);
-	u32 *flags = &tfm->crt_flags;
+	unsigned long fc;
 	int err;
 
 	err = xts_check_key(tfm, in_key, key_len);
 	if (err)
 		return err;
 
-	switch (key_len) {
-	case 32:
-		xts_ctx->enc = CPACF_KM_XTS_128_ENC;
-		xts_ctx->dec = CPACF_KM_XTS_128_DEC;
-		memcpy(xts_ctx->key + 16, in_key, 16);
-		memcpy(xts_ctx->pcc_key + 16, in_key + 16, 16);
-		break;
-	case 48:
-		xts_ctx->enc = 0;
-		xts_ctx->dec = 0;
-		xts_fallback_setkey(tfm, in_key, key_len);
-		break;
-	case 64:
-		xts_ctx->enc = CPACF_KM_XTS_256_ENC;
-		xts_ctx->dec = CPACF_KM_XTS_256_DEC;
-		memcpy(xts_ctx->key, in_key, 32);
-		memcpy(xts_ctx->pcc_key, in_key + 32, 32);
-		break;
-	default:
-		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
-		return -EINVAL;
-	}
+	/* Pick the correct function code based on the key length */
+	fc = (key_len == 32) ? CPACF_KM_XTS_128 :
+	     (key_len == 64) ? CPACF_KM_XTS_256 : 0;
+
+	/* Check if the function code is available */
+	xts_ctx->fc = (fc && cpacf_test_func(&km_functions, fc)) ? fc : 0;
+	if (!xts_ctx->fc)
+		return xts_fallback_setkey(tfm, in_key, key_len);
+
+	/* Split the XTS key into the two subkeys */
+	key_len = key_len / 2;
 	xts_ctx->key_len = key_len;
+	memcpy(xts_ctx->key, in_key, key_len);
+	memcpy(xts_ctx->pcc_key, in_key + key_len, key_len);
 	return 0;
 }
 
-static int xts_aes_crypt(struct blkcipher_desc *desc, long func,
-			 struct s390_xts_ctx *xts_ctx,
+static int xts_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
 			 struct blkcipher_walk *walk)
 {
-	unsigned int offset = (xts_ctx->key_len >> 1) & 0x10;
-	int ret = blkcipher_walk_virt(desc, walk);
-	unsigned int nbytes = walk->nbytes;
-	unsigned int n;
-	u8 *in, *out;
-	struct pcc_param pcc_param;
+	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
+	unsigned int offset, nbytes, n;
+	int ret;
+	struct {
+		u8 key[32];
+		u8 tweak[16];
+		u8 block[16];
+		u8 bit[16];
+		u8 xts[16];
+	} pcc_param;
 	struct {
 		u8 key[32];
 		u8 init[16];
 	} xts_param;
 
-	if (!nbytes)
-		goto out;
-
+	ret = blkcipher_walk_virt(desc, walk);
+	offset = xts_ctx->key_len & 0x10;
 	memset(pcc_param.block, 0, sizeof(pcc_param.block));
 	memset(pcc_param.bit, 0, sizeof(pcc_param.bit));
 	memset(pcc_param.xts, 0, sizeof(pcc_param.xts));
 	memcpy(pcc_param.tweak, walk->iv, sizeof(pcc_param.tweak));
-	memcpy(pcc_param.key, xts_ctx->pcc_key, 32);
-	/* remove decipher modifier bit from 'func' and call PCC */
-	ret = cpacf_pcc(func & 0x7f, &pcc_param.key[offset]);
-	if (ret < 0)
-		return -EIO;
+	memcpy(pcc_param.key + offset, xts_ctx->pcc_key, xts_ctx->key_len);
+	cpacf_pcc(xts_ctx->fc, pcc_param.key + offset);
 
-	memcpy(xts_param.key, xts_ctx->key, 32);
+	memcpy(xts_param.key + offset, xts_ctx->key, xts_ctx->key_len);
 	memcpy(xts_param.init, pcc_param.xts, 16);
-	do {
+
+	while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
 		/* only use complete blocks */
 		n = nbytes & ~(AES_BLOCK_SIZE - 1);
-		out = walk->dst.virt.addr;
-		in = walk->src.virt.addr;
-
-		ret = cpacf_km(func, &xts_param.key[offset], out, in, n);
-		if (ret < 0 || ret != n)
-			return -EIO;
-
-		nbytes &= AES_BLOCK_SIZE - 1;
-		ret = blkcipher_walk_done(desc, walk, nbytes);
-	} while ((nbytes = walk->nbytes));
-out:
+		cpacf_km(xts_ctx->fc | modifier, xts_param.key + offset,
+			 walk->dst.virt.addr, walk->src.virt.addr, n);
+		ret = blkcipher_walk_done(desc, walk, nbytes - n);
+	}
 	return ret;
 }
 
@@ -681,11 +565,11 @@
 	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
-	if (unlikely(xts_ctx->key_len == 48))
+	if (unlikely(!xts_ctx->fc))
 		return xts_fallback_encrypt(desc, dst, src, nbytes);
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return xts_aes_crypt(desc, xts_ctx->enc, xts_ctx, &walk);
+	return xts_aes_crypt(desc, 0, &walk);
 }
 
 static int xts_aes_decrypt(struct blkcipher_desc *desc,
@@ -695,11 +579,11 @@
 	struct s390_xts_ctx *xts_ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
-	if (unlikely(xts_ctx->key_len == 48))
+	if (unlikely(!xts_ctx->fc))
 		return xts_fallback_decrypt(desc, dst, src, nbytes);
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return xts_aes_crypt(desc, xts_ctx->dec, xts_ctx, &walk);
+	return xts_aes_crypt(desc, CPACF_DECRYPT, &walk);
 }
 
 static int xts_fallback_init(struct crypto_tfm *tfm)
@@ -750,108 +634,79 @@
 	}
 };
 
-static int xts_aes_alg_reg;
-
 static int ctr_aes_set_key(struct crypto_tfm *tfm, const u8 *in_key,
 			   unsigned int key_len)
 {
 	struct s390_aes_ctx *sctx = crypto_tfm_ctx(tfm);
+	unsigned long fc;
 
-	switch (key_len) {
-	case 16:
-		sctx->enc = CPACF_KMCTR_AES_128_ENC;
-		sctx->dec = CPACF_KMCTR_AES_128_DEC;
-		break;
-	case 24:
-		sctx->enc = CPACF_KMCTR_AES_192_ENC;
-		sctx->dec = CPACF_KMCTR_AES_192_DEC;
-		break;
-	case 32:
-		sctx->enc = CPACF_KMCTR_AES_256_ENC;
-		sctx->dec = CPACF_KMCTR_AES_256_DEC;
-		break;
-	}
+	/* Pick the correct function code based on the key length */
+	fc = (key_len == 16) ? CPACF_KMCTR_AES_128 :
+	     (key_len == 24) ? CPACF_KMCTR_AES_192 :
+	     (key_len == 32) ? CPACF_KMCTR_AES_256 : 0;
 
-	return aes_set_key(tfm, in_key, key_len);
+	/* Check if the function code is available */
+	sctx->fc = (fc && cpacf_test_func(&kmctr_functions, fc)) ? fc : 0;
+	if (!sctx->fc)
+		return setkey_fallback_blk(tfm, in_key, key_len);
+
+	sctx->key_len = key_len;
+	memcpy(sctx->key, in_key, key_len);
+	return 0;
 }
 
-static unsigned int __ctrblk_init(u8 *ctrptr, unsigned int nbytes)
+static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes)
 {
 	unsigned int i, n;
 
 	/* only use complete blocks, max. PAGE_SIZE */
+	memcpy(ctrptr, iv, AES_BLOCK_SIZE);
 	n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(AES_BLOCK_SIZE - 1);
-	for (i = AES_BLOCK_SIZE; i < n; i += AES_BLOCK_SIZE) {
-		memcpy(ctrptr + i, ctrptr + i - AES_BLOCK_SIZE,
-		       AES_BLOCK_SIZE);
-		crypto_inc(ctrptr + i, AES_BLOCK_SIZE);
+	for (i = (n / AES_BLOCK_SIZE) - 1; i > 0; i--) {
+		memcpy(ctrptr + AES_BLOCK_SIZE, ctrptr, AES_BLOCK_SIZE);
+		crypto_inc(ctrptr + AES_BLOCK_SIZE, AES_BLOCK_SIZE);
+		ctrptr += AES_BLOCK_SIZE;
 	}
 	return n;
 }
 
-static int ctr_aes_crypt(struct blkcipher_desc *desc, long func,
-			 struct s390_aes_ctx *sctx, struct blkcipher_walk *walk)
+static int ctr_aes_crypt(struct blkcipher_desc *desc, unsigned long modifier,
+			 struct blkcipher_walk *walk)
 {
-	int ret = blkcipher_walk_virt_block(desc, walk, AES_BLOCK_SIZE);
+	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
+	u8 buf[AES_BLOCK_SIZE], *ctrptr;
 	unsigned int n, nbytes;
-	u8 buf[AES_BLOCK_SIZE], ctrbuf[AES_BLOCK_SIZE];
-	u8 *out, *in, *ctrptr = ctrbuf;
+	int ret, locked;
 
-	if (!walk->nbytes)
-		return ret;
+	locked = spin_trylock(&ctrblk_lock);
 
-	if (spin_trylock(&ctrblk_lock))
-		ctrptr = ctrblk;
-
-	memcpy(ctrptr, walk->iv, AES_BLOCK_SIZE);
+	ret = blkcipher_walk_virt_block(desc, walk, AES_BLOCK_SIZE);
 	while ((nbytes = walk->nbytes) >= AES_BLOCK_SIZE) {
-		out = walk->dst.virt.addr;
-		in = walk->src.virt.addr;
-		while (nbytes >= AES_BLOCK_SIZE) {
-			if (ctrptr == ctrblk)
-				n = __ctrblk_init(ctrptr, nbytes);
-			else
-				n = AES_BLOCK_SIZE;
-			ret = cpacf_kmctr(func, sctx->key, out, in, n, ctrptr);
-			if (ret < 0 || ret != n) {
-				if (ctrptr == ctrblk)
-					spin_unlock(&ctrblk_lock);
-				return -EIO;
-			}
-			if (n > AES_BLOCK_SIZE)
-				memcpy(ctrptr, ctrptr + n - AES_BLOCK_SIZE,
-				       AES_BLOCK_SIZE);
-			crypto_inc(ctrptr, AES_BLOCK_SIZE);
-			out += n;
-			in += n;
-			nbytes -= n;
-		}
-		ret = blkcipher_walk_done(desc, walk, nbytes);
+		n = AES_BLOCK_SIZE;
+		if (nbytes >= 2*AES_BLOCK_SIZE && locked)
+			n = __ctrblk_init(ctrblk, walk->iv, nbytes);
+		ctrptr = (n > AES_BLOCK_SIZE) ? ctrblk : walk->iv;
+		cpacf_kmctr(sctx->fc | modifier, sctx->key,
+			    walk->dst.virt.addr, walk->src.virt.addr,
+			    n, ctrptr);
+		if (ctrptr == ctrblk)
+			memcpy(walk->iv, ctrptr + n - AES_BLOCK_SIZE,
+			       AES_BLOCK_SIZE);
+		crypto_inc(walk->iv, AES_BLOCK_SIZE);
+		ret = blkcipher_walk_done(desc, walk, nbytes - n);
 	}
-	if (ctrptr == ctrblk) {
-		if (nbytes)
-			memcpy(ctrbuf, ctrptr, AES_BLOCK_SIZE);
-		else
-			memcpy(walk->iv, ctrptr, AES_BLOCK_SIZE);
+	if (locked)
 		spin_unlock(&ctrblk_lock);
-	} else {
-		if (!nbytes)
-			memcpy(walk->iv, ctrptr, AES_BLOCK_SIZE);
-	}
 	/*
 	 * final block may be < AES_BLOCK_SIZE, copy only nbytes
 	 */
 	if (nbytes) {
-		out = walk->dst.virt.addr;
-		in = walk->src.virt.addr;
-		ret = cpacf_kmctr(func, sctx->key, buf, in,
-				  AES_BLOCK_SIZE, ctrbuf);
-		if (ret < 0 || ret != AES_BLOCK_SIZE)
-			return -EIO;
-		memcpy(out, buf, nbytes);
-		crypto_inc(ctrbuf, AES_BLOCK_SIZE);
+		cpacf_kmctr(sctx->fc | modifier, sctx->key,
+			    buf, walk->src.virt.addr,
+			    AES_BLOCK_SIZE, walk->iv);
+		memcpy(walk->dst.virt.addr, buf, nbytes);
+		crypto_inc(walk->iv, AES_BLOCK_SIZE);
 		ret = blkcipher_walk_done(desc, walk, 0);
-		memcpy(walk->iv, ctrbuf, AES_BLOCK_SIZE);
 	}
 
 	return ret;
@@ -864,8 +719,11 @@
 	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
+	if (unlikely(!sctx->fc))
+		return fallback_blk_enc(desc, dst, src, nbytes);
+
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_aes_crypt(desc, sctx->enc, sctx, &walk);
+	return ctr_aes_crypt(desc, 0, &walk);
 }
 
 static int ctr_aes_decrypt(struct blkcipher_desc *desc,
@@ -875,19 +733,25 @@
 	struct s390_aes_ctx *sctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
+	if (unlikely(!sctx->fc))
+		return fallback_blk_dec(desc, dst, src, nbytes);
+
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_aes_crypt(desc, sctx->dec, sctx, &walk);
+	return ctr_aes_crypt(desc, CPACF_DECRYPT, &walk);
 }
 
 static struct crypto_alg ctr_aes_alg = {
 	.cra_name		=	"ctr(aes)",
 	.cra_driver_name	=	"ctr-aes-s390",
 	.cra_priority		=	400,	/* combo: aes + ctr */
-	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER,
+	.cra_flags		=	CRYPTO_ALG_TYPE_BLKCIPHER |
+					CRYPTO_ALG_NEED_FALLBACK,
 	.cra_blocksize		=	1,
 	.cra_ctxsize		=	sizeof(struct s390_aes_ctx),
 	.cra_type		=	&crypto_blkcipher_type,
 	.cra_module		=	THIS_MODULE,
+	.cra_init		=	fallback_init_blk,
+	.cra_exit		=	fallback_exit_blk,
 	.cra_u			=	{
 		.blkcipher = {
 			.min_keysize		=	AES_MIN_KEY_SIZE,
@@ -900,89 +764,79 @@
 	}
 };
 
-static int ctr_aes_alg_reg;
+static struct crypto_alg *aes_s390_algs_ptr[5];
+static int aes_s390_algs_num;
+
+static int aes_s390_register_alg(struct crypto_alg *alg)
+{
+	int ret;
+
+	ret = crypto_register_alg(alg);
+	if (!ret)
+		aes_s390_algs_ptr[aes_s390_algs_num++] = alg;
+	return ret;
+}
+
+static void aes_s390_fini(void)
+{
+	while (aes_s390_algs_num--)
+		crypto_unregister_alg(aes_s390_algs_ptr[aes_s390_algs_num]);
+	if (ctrblk)
+		free_page((unsigned long) ctrblk);
+}
 
 static int __init aes_s390_init(void)
 {
 	int ret;
 
-	if (cpacf_query(CPACF_KM, CPACF_KM_AES_128_ENC))
-		keylen_flag |= AES_KEYLEN_128;
-	if (cpacf_query(CPACF_KM, CPACF_KM_AES_192_ENC))
-		keylen_flag |= AES_KEYLEN_192;
-	if (cpacf_query(CPACF_KM, CPACF_KM_AES_256_ENC))
-		keylen_flag |= AES_KEYLEN_256;
+	/* Query available functions for KM, KMC and KMCTR */
+	cpacf_query(CPACF_KM, &km_functions);
+	cpacf_query(CPACF_KMC, &kmc_functions);
+	cpacf_query(CPACF_KMCTR, &kmctr_functions);
 
-	if (!keylen_flag)
-		return -EOPNOTSUPP;
-
-	/* z9 109 and z9 BC/EC only support 128 bit key length */
-	if (keylen_flag == AES_KEYLEN_128)
-		pr_info("AES hardware acceleration is only available for"
-			" 128-bit keys\n");
-
-	ret = crypto_register_alg(&aes_alg);
-	if (ret)
-		goto aes_err;
-
-	ret = crypto_register_alg(&ecb_aes_alg);
-	if (ret)
-		goto ecb_aes_err;
-
-	ret = crypto_register_alg(&cbc_aes_alg);
-	if (ret)
-		goto cbc_aes_err;
-
-	if (cpacf_query(CPACF_KM, CPACF_KM_XTS_128_ENC) &&
-	    cpacf_query(CPACF_KM, CPACF_KM_XTS_256_ENC)) {
-		ret = crypto_register_alg(&xts_aes_alg);
+	if (cpacf_test_func(&km_functions, CPACF_KM_AES_128) ||
+	    cpacf_test_func(&km_functions, CPACF_KM_AES_192) ||
+	    cpacf_test_func(&km_functions, CPACF_KM_AES_256)) {
+		ret = aes_s390_register_alg(&aes_alg);
 		if (ret)
-			goto xts_aes_err;
-		xts_aes_alg_reg = 1;
+			goto out_err;
+		ret = aes_s390_register_alg(&ecb_aes_alg);
+		if (ret)
+			goto out_err;
 	}
 
-	if (cpacf_query(CPACF_KMCTR, CPACF_KMCTR_AES_128_ENC) &&
-	    cpacf_query(CPACF_KMCTR, CPACF_KMCTR_AES_192_ENC) &&
-	    cpacf_query(CPACF_KMCTR, CPACF_KMCTR_AES_256_ENC)) {
+	if (cpacf_test_func(&kmc_functions, CPACF_KMC_AES_128) ||
+	    cpacf_test_func(&kmc_functions, CPACF_KMC_AES_192) ||
+	    cpacf_test_func(&kmc_functions, CPACF_KMC_AES_256)) {
+		ret = aes_s390_register_alg(&cbc_aes_alg);
+		if (ret)
+			goto out_err;
+	}
+
+	if (cpacf_test_func(&km_functions, CPACF_KM_XTS_128) ||
+	    cpacf_test_func(&km_functions, CPACF_KM_XTS_256)) {
+		ret = aes_s390_register_alg(&xts_aes_alg);
+		if (ret)
+			goto out_err;
+	}
+
+	if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_AES_128) ||
+	    cpacf_test_func(&kmctr_functions, CPACF_KMCTR_AES_192) ||
+	    cpacf_test_func(&kmctr_functions, CPACF_KMCTR_AES_256)) {
 		ctrblk = (u8 *) __get_free_page(GFP_KERNEL);
 		if (!ctrblk) {
 			ret = -ENOMEM;
-			goto ctr_aes_err;
+			goto out_err;
 		}
-		ret = crypto_register_alg(&ctr_aes_alg);
-		if (ret) {
-			free_page((unsigned long) ctrblk);
-			goto ctr_aes_err;
-		}
-		ctr_aes_alg_reg = 1;
+		ret = aes_s390_register_alg(&ctr_aes_alg);
+		if (ret)
+			goto out_err;
 	}
 
-out:
+	return 0;
+out_err:
+	aes_s390_fini();
 	return ret;
-
-ctr_aes_err:
-	crypto_unregister_alg(&xts_aes_alg);
-xts_aes_err:
-	crypto_unregister_alg(&cbc_aes_alg);
-cbc_aes_err:
-	crypto_unregister_alg(&ecb_aes_alg);
-ecb_aes_err:
-	crypto_unregister_alg(&aes_alg);
-aes_err:
-	goto out;
-}
-
-static void __exit aes_s390_fini(void)
-{
-	if (ctr_aes_alg_reg) {
-		crypto_unregister_alg(&ctr_aes_alg);
-		free_page((unsigned long) ctrblk);
-	}
-	if (xts_aes_alg_reg)
-		crypto_unregister_alg(&xts_aes_alg);
-	crypto_unregister_alg(&cbc_aes_alg);
-	crypto_unregister_alg(&ecb_aes_alg);
-	crypto_unregister_alg(&aes_alg);
 }
 
 module_cpu_feature_match(MSA, aes_s390_init);
diff --git a/arch/s390/crypto/crc32-vx.c b/arch/s390/crypto/crc32-vx.c
index 2bad9d8..992e630 100644
--- a/arch/s390/crypto/crc32-vx.c
+++ b/arch/s390/crypto/crc32-vx.c
@@ -67,7 +67,7 @@
 									    \
 		kernel_fpu_begin(&vxstate, KERNEL_VXR_LOW);		    \
 		crc = ___crc32_vx(crc, data, aligned);			    \
-		kernel_fpu_end(&vxstate);				    \
+		kernel_fpu_end(&vxstate, KERNEL_VXR_LOW);		    \
 									    \
 		if (remaining)						    \
 			crc = ___crc32_sw(crc, data + aligned, remaining);  \
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index 697e71a..8b83144 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -27,6 +27,8 @@
 static u8 *ctrblk;
 static DEFINE_SPINLOCK(ctrblk_lock);
 
+static cpacf_mask_t km_functions, kmc_functions, kmctr_functions;
+
 struct s390_des_ctx {
 	u8 iv[DES_BLOCK_SIZE];
 	u8 key[DES3_KEY_SIZE];
@@ -36,12 +38,12 @@
 		      unsigned int key_len)
 {
 	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
-	u32 *flags = &tfm->crt_flags;
 	u32 tmp[DES_EXPKEY_WORDS];
 
 	/* check for weak keys */
-	if (!des_ekey(tmp, key) && (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
-		*flags |= CRYPTO_TFM_RES_WEAK_KEY;
+	if (!des_ekey(tmp, key) &&
+	    (tfm->crt_flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+		tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY;
 		return -EINVAL;
 	}
 
@@ -53,14 +55,15 @@
 {
 	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	cpacf_km(CPACF_KM_DEA_ENC, ctx->key, out, in, DES_BLOCK_SIZE);
+	cpacf_km(CPACF_KM_DEA, ctx->key, out, in, DES_BLOCK_SIZE);
 }
 
 static void des_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in)
 {
 	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	cpacf_km(CPACF_KM_DEA_DEC, ctx->key, out, in, DES_BLOCK_SIZE);
+	cpacf_km(CPACF_KM_DEA | CPACF_DECRYPT,
+		 ctx->key, out, in, DES_BLOCK_SIZE);
 }
 
 static struct crypto_alg des_alg = {
@@ -82,61 +85,46 @@
 	}
 };
 
-static int ecb_desall_crypt(struct blkcipher_desc *desc, long func,
-			    u8 *key, struct blkcipher_walk *walk)
-{
-	int ret = blkcipher_walk_virt(desc, walk);
-	unsigned int nbytes;
-
-	while ((nbytes = walk->nbytes)) {
-		/* only use complete blocks */
-		unsigned int n = nbytes & ~(DES_BLOCK_SIZE - 1);
-		u8 *out = walk->dst.virt.addr;
-		u8 *in = walk->src.virt.addr;
-
-		ret = cpacf_km(func, key, out, in, n);
-		if (ret < 0 || ret != n)
-			return -EIO;
-
-		nbytes &= DES_BLOCK_SIZE - 1;
-		ret = blkcipher_walk_done(desc, walk, nbytes);
-	}
-
-	return ret;
-}
-
-static int cbc_desall_crypt(struct blkcipher_desc *desc, long func,
+static int ecb_desall_crypt(struct blkcipher_desc *desc, unsigned long fc,
 			    struct blkcipher_walk *walk)
 {
 	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
-	int ret = blkcipher_walk_virt(desc, walk);
-	unsigned int nbytes = walk->nbytes;
+	unsigned int nbytes, n;
+	int ret;
+
+	ret = blkcipher_walk_virt(desc, walk);
+	while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) {
+		/* only use complete blocks */
+		n = nbytes & ~(DES_BLOCK_SIZE - 1);
+		cpacf_km(fc, ctx->key, walk->dst.virt.addr,
+			 walk->src.virt.addr, n);
+		ret = blkcipher_walk_done(desc, walk, nbytes - n);
+	}
+	return ret;
+}
+
+static int cbc_desall_crypt(struct blkcipher_desc *desc, unsigned long fc,
+			    struct blkcipher_walk *walk)
+{
+	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	unsigned int nbytes, n;
+	int ret;
 	struct {
 		u8 iv[DES_BLOCK_SIZE];
 		u8 key[DES3_KEY_SIZE];
 	} param;
 
-	if (!nbytes)
-		goto out;
-
+	ret = blkcipher_walk_virt(desc, walk);
 	memcpy(param.iv, walk->iv, DES_BLOCK_SIZE);
 	memcpy(param.key, ctx->key, DES3_KEY_SIZE);
-	do {
+	while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) {
 		/* only use complete blocks */
-		unsigned int n = nbytes & ~(DES_BLOCK_SIZE - 1);
-		u8 *out = walk->dst.virt.addr;
-		u8 *in = walk->src.virt.addr;
-
-		ret = cpacf_kmc(func, &param, out, in, n);
-		if (ret < 0 || ret != n)
-			return -EIO;
-
-		nbytes &= DES_BLOCK_SIZE - 1;
-		ret = blkcipher_walk_done(desc, walk, nbytes);
-	} while ((nbytes = walk->nbytes));
+		n = nbytes & ~(DES_BLOCK_SIZE - 1);
+		cpacf_kmc(fc, &param, walk->dst.virt.addr,
+			  walk->src.virt.addr, n);
+		ret = blkcipher_walk_done(desc, walk, nbytes - n);
+	}
 	memcpy(walk->iv, param.iv, DES_BLOCK_SIZE);
-
-out:
 	return ret;
 }
 
@@ -144,22 +132,20 @@
 			   struct scatterlist *dst, struct scatterlist *src,
 			   unsigned int nbytes)
 {
-	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_desall_crypt(desc, CPACF_KM_DEA_ENC, ctx->key, &walk);
+	return ecb_desall_crypt(desc, CPACF_KM_DEA, &walk);
 }
 
 static int ecb_des_decrypt(struct blkcipher_desc *desc,
 			   struct scatterlist *dst, struct scatterlist *src,
 			   unsigned int nbytes)
 {
-	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_desall_crypt(desc, CPACF_KM_DEA_DEC, ctx->key, &walk);
+	return ecb_desall_crypt(desc, CPACF_KM_DEA | CPACF_DECRYPT, &walk);
 }
 
 static struct crypto_alg ecb_des_alg = {
@@ -189,7 +175,7 @@
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, CPACF_KMC_DEA_ENC, &walk);
+	return cbc_desall_crypt(desc, CPACF_KMC_DEA, &walk);
 }
 
 static int cbc_des_decrypt(struct blkcipher_desc *desc,
@@ -199,7 +185,7 @@
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, CPACF_KMC_DEA_DEC, &walk);
+	return cbc_desall_crypt(desc, CPACF_KMC_DEA | CPACF_DECRYPT, &walk);
 }
 
 static struct crypto_alg cbc_des_alg = {
@@ -240,13 +226,12 @@
 		       unsigned int key_len)
 {
 	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
-	u32 *flags = &tfm->crt_flags;
 
 	if (!(crypto_memneq(key, &key[DES_KEY_SIZE], DES_KEY_SIZE) &&
 	    crypto_memneq(&key[DES_KEY_SIZE], &key[DES_KEY_SIZE * 2],
 			  DES_KEY_SIZE)) &&
-	    (*flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
-		*flags |= CRYPTO_TFM_RES_WEAK_KEY;
+	    (tfm->crt_flags & CRYPTO_TFM_REQ_WEAK_KEY)) {
+		tfm->crt_flags |= CRYPTO_TFM_RES_WEAK_KEY;
 		return -EINVAL;
 	}
 	memcpy(ctx->key, key, key_len);
@@ -257,14 +242,15 @@
 {
 	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	cpacf_km(CPACF_KM_TDEA_192_ENC, ctx->key, dst, src, DES_BLOCK_SIZE);
+	cpacf_km(CPACF_KM_TDEA_192, ctx->key, dst, src, DES_BLOCK_SIZE);
 }
 
 static void des3_decrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
 {
 	struct s390_des_ctx *ctx = crypto_tfm_ctx(tfm);
 
-	cpacf_km(CPACF_KM_TDEA_192_DEC, ctx->key, dst, src, DES_BLOCK_SIZE);
+	cpacf_km(CPACF_KM_TDEA_192 | CPACF_DECRYPT,
+		 ctx->key, dst, src, DES_BLOCK_SIZE);
 }
 
 static struct crypto_alg des3_alg = {
@@ -290,22 +276,21 @@
 			    struct scatterlist *dst, struct scatterlist *src,
 			    unsigned int nbytes)
 {
-	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_desall_crypt(desc, CPACF_KM_TDEA_192_ENC, ctx->key, &walk);
+	return ecb_desall_crypt(desc, CPACF_KM_TDEA_192, &walk);
 }
 
 static int ecb_des3_decrypt(struct blkcipher_desc *desc,
 			    struct scatterlist *dst, struct scatterlist *src,
 			    unsigned int nbytes)
 {
-	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ecb_desall_crypt(desc, CPACF_KM_TDEA_192_DEC, ctx->key, &walk);
+	return ecb_desall_crypt(desc, CPACF_KM_TDEA_192 | CPACF_DECRYPT,
+				&walk);
 }
 
 static struct crypto_alg ecb_des3_alg = {
@@ -335,7 +320,7 @@
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, CPACF_KMC_TDEA_192_ENC, &walk);
+	return cbc_desall_crypt(desc, CPACF_KMC_TDEA_192, &walk);
 }
 
 static int cbc_des3_decrypt(struct blkcipher_desc *desc,
@@ -345,7 +330,8 @@
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return cbc_desall_crypt(desc, CPACF_KMC_TDEA_192_DEC, &walk);
+	return cbc_desall_crypt(desc, CPACF_KMC_TDEA_192 | CPACF_DECRYPT,
+				&walk);
 }
 
 static struct crypto_alg cbc_des3_alg = {
@@ -369,81 +355,54 @@
 	}
 };
 
-static unsigned int __ctrblk_init(u8 *ctrptr, unsigned int nbytes)
+static unsigned int __ctrblk_init(u8 *ctrptr, u8 *iv, unsigned int nbytes)
 {
 	unsigned int i, n;
 
 	/* align to block size, max. PAGE_SIZE */
 	n = (nbytes > PAGE_SIZE) ? PAGE_SIZE : nbytes & ~(DES_BLOCK_SIZE - 1);
-	for (i = DES_BLOCK_SIZE; i < n; i += DES_BLOCK_SIZE) {
-		memcpy(ctrptr + i, ctrptr + i - DES_BLOCK_SIZE, DES_BLOCK_SIZE);
-		crypto_inc(ctrptr + i, DES_BLOCK_SIZE);
+	memcpy(ctrptr, iv, DES_BLOCK_SIZE);
+	for (i = (n / DES_BLOCK_SIZE) - 1; i > 0; i--) {
+		memcpy(ctrptr + DES_BLOCK_SIZE, ctrptr, DES_BLOCK_SIZE);
+		crypto_inc(ctrptr + DES_BLOCK_SIZE, DES_BLOCK_SIZE);
+		ctrptr += DES_BLOCK_SIZE;
 	}
 	return n;
 }
 
-static int ctr_desall_crypt(struct blkcipher_desc *desc, long func,
-			    struct s390_des_ctx *ctx,
+static int ctr_desall_crypt(struct blkcipher_desc *desc, unsigned long fc,
 			    struct blkcipher_walk *walk)
 {
-	int ret = blkcipher_walk_virt_block(desc, walk, DES_BLOCK_SIZE);
+	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
+	u8 buf[DES_BLOCK_SIZE], *ctrptr;
 	unsigned int n, nbytes;
-	u8 buf[DES_BLOCK_SIZE], ctrbuf[DES_BLOCK_SIZE];
-	u8 *out, *in, *ctrptr = ctrbuf;
+	int ret, locked;
 
-	if (!walk->nbytes)
-		return ret;
+	locked = spin_trylock(&ctrblk_lock);
 
-	if (spin_trylock(&ctrblk_lock))
-		ctrptr = ctrblk;
-
-	memcpy(ctrptr, walk->iv, DES_BLOCK_SIZE);
+	ret = blkcipher_walk_virt_block(desc, walk, DES_BLOCK_SIZE);
 	while ((nbytes = walk->nbytes) >= DES_BLOCK_SIZE) {
-		out = walk->dst.virt.addr;
-		in = walk->src.virt.addr;
-		while (nbytes >= DES_BLOCK_SIZE) {
-			if (ctrptr == ctrblk)
-				n = __ctrblk_init(ctrptr, nbytes);
-			else
-				n = DES_BLOCK_SIZE;
-			ret = cpacf_kmctr(func, ctx->key, out, in, n, ctrptr);
-			if (ret < 0 || ret != n) {
-				if (ctrptr == ctrblk)
-					spin_unlock(&ctrblk_lock);
-				return -EIO;
-			}
-			if (n > DES_BLOCK_SIZE)
-				memcpy(ctrptr, ctrptr + n - DES_BLOCK_SIZE,
-				       DES_BLOCK_SIZE);
-			crypto_inc(ctrptr, DES_BLOCK_SIZE);
-			out += n;
-			in += n;
-			nbytes -= n;
-		}
-		ret = blkcipher_walk_done(desc, walk, nbytes);
+		n = DES_BLOCK_SIZE;
+		if (nbytes >= 2*DES_BLOCK_SIZE && locked)
+			n = __ctrblk_init(ctrblk, walk->iv, nbytes);
+		ctrptr = (n > DES_BLOCK_SIZE) ? ctrblk : walk->iv;
+		cpacf_kmctr(fc, ctx->key, walk->dst.virt.addr,
+			    walk->src.virt.addr, n, ctrptr);
+		if (ctrptr == ctrblk)
+			memcpy(walk->iv, ctrptr + n - DES_BLOCK_SIZE,
+				DES_BLOCK_SIZE);
+		crypto_inc(walk->iv, DES_BLOCK_SIZE);
+		ret = blkcipher_walk_done(desc, walk, nbytes - n);
 	}
-	if (ctrptr == ctrblk) {
-		if (nbytes)
-			memcpy(ctrbuf, ctrptr, DES_BLOCK_SIZE);
-		else
-			memcpy(walk->iv, ctrptr, DES_BLOCK_SIZE);
+	if (locked)
 		spin_unlock(&ctrblk_lock);
-	} else {
-		if (!nbytes)
-			memcpy(walk->iv, ctrptr, DES_BLOCK_SIZE);
-	}
 	/* final block may be < DES_BLOCK_SIZE, copy only nbytes */
 	if (nbytes) {
-		out = walk->dst.virt.addr;
-		in = walk->src.virt.addr;
-		ret = cpacf_kmctr(func, ctx->key, buf, in,
-				  DES_BLOCK_SIZE, ctrbuf);
-		if (ret < 0 || ret != DES_BLOCK_SIZE)
-			return -EIO;
-		memcpy(out, buf, nbytes);
-		crypto_inc(ctrbuf, DES_BLOCK_SIZE);
+		cpacf_kmctr(fc, ctx->key, buf, walk->src.virt.addr,
+			    DES_BLOCK_SIZE, walk->iv);
+		memcpy(walk->dst.virt.addr, buf, nbytes);
+		crypto_inc(walk->iv, DES_BLOCK_SIZE);
 		ret = blkcipher_walk_done(desc, walk, 0);
-		memcpy(walk->iv, ctrbuf, DES_BLOCK_SIZE);
 	}
 	return ret;
 }
@@ -452,22 +411,20 @@
 			   struct scatterlist *dst, struct scatterlist *src,
 			   unsigned int nbytes)
 {
-	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_desall_crypt(desc, CPACF_KMCTR_DEA_ENC, ctx, &walk);
+	return ctr_desall_crypt(desc, CPACF_KMCTR_DEA, &walk);
 }
 
 static int ctr_des_decrypt(struct blkcipher_desc *desc,
 			   struct scatterlist *dst, struct scatterlist *src,
 			   unsigned int nbytes)
 {
-	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_desall_crypt(desc, CPACF_KMCTR_DEA_DEC, ctx, &walk);
+	return ctr_desall_crypt(desc, CPACF_KMCTR_DEA | CPACF_DECRYPT, &walk);
 }
 
 static struct crypto_alg ctr_des_alg = {
@@ -495,22 +452,21 @@
 			    struct scatterlist *dst, struct scatterlist *src,
 			    unsigned int nbytes)
 {
-	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_desall_crypt(desc, CPACF_KMCTR_TDEA_192_ENC, ctx, &walk);
+	return ctr_desall_crypt(desc, CPACF_KMCTR_TDEA_192, &walk);
 }
 
 static int ctr_des3_decrypt(struct blkcipher_desc *desc,
 			    struct scatterlist *dst, struct scatterlist *src,
 			    unsigned int nbytes)
 {
-	struct s390_des_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
 	struct blkcipher_walk walk;
 
 	blkcipher_walk_init(&walk, dst, src, nbytes);
-	return ctr_desall_crypt(desc, CPACF_KMCTR_TDEA_192_DEC, ctx, &walk);
+	return ctr_desall_crypt(desc, CPACF_KMCTR_TDEA_192 | CPACF_DECRYPT,
+				&walk);
 }
 
 static struct crypto_alg ctr_des3_alg = {
@@ -534,83 +490,87 @@
 	}
 };
 
+static struct crypto_alg *des_s390_algs_ptr[8];
+static int des_s390_algs_num;
+
+static int des_s390_register_alg(struct crypto_alg *alg)
+{
+	int ret;
+
+	ret = crypto_register_alg(alg);
+	if (!ret)
+		des_s390_algs_ptr[des_s390_algs_num++] = alg;
+	return ret;
+}
+
+static void des_s390_exit(void)
+{
+	while (des_s390_algs_num--)
+		crypto_unregister_alg(des_s390_algs_ptr[des_s390_algs_num]);
+	if (ctrblk)
+		free_page((unsigned long) ctrblk);
+}
+
 static int __init des_s390_init(void)
 {
 	int ret;
 
-	if (!cpacf_query(CPACF_KM, CPACF_KM_DEA_ENC) ||
-	    !cpacf_query(CPACF_KM, CPACF_KM_TDEA_192_ENC))
-		return -EOPNOTSUPP;
+	/* Query available functions for KM, KMC and KMCTR */
+	cpacf_query(CPACF_KM, &km_functions);
+	cpacf_query(CPACF_KMC, &kmc_functions);
+	cpacf_query(CPACF_KMCTR, &kmctr_functions);
 
-	ret = crypto_register_alg(&des_alg);
-	if (ret)
-		goto des_err;
-	ret = crypto_register_alg(&ecb_des_alg);
-	if (ret)
-		goto ecb_des_err;
-	ret = crypto_register_alg(&cbc_des_alg);
-	if (ret)
-		goto cbc_des_err;
-	ret = crypto_register_alg(&des3_alg);
-	if (ret)
-		goto des3_err;
-	ret = crypto_register_alg(&ecb_des3_alg);
-	if (ret)
-		goto ecb_des3_err;
-	ret = crypto_register_alg(&cbc_des3_alg);
-	if (ret)
-		goto cbc_des3_err;
+	if (cpacf_test_func(&km_functions, CPACF_KM_DEA)) {
+		ret = des_s390_register_alg(&des_alg);
+		if (ret)
+			goto out_err;
+		ret = des_s390_register_alg(&ecb_des_alg);
+		if (ret)
+			goto out_err;
+	}
+	if (cpacf_test_func(&kmc_functions, CPACF_KMC_DEA)) {
+		ret = des_s390_register_alg(&cbc_des_alg);
+		if (ret)
+			goto out_err;
+	}
+	if (cpacf_test_func(&km_functions, CPACF_KM_TDEA_192)) {
+		ret = des_s390_register_alg(&des3_alg);
+		if (ret)
+			goto out_err;
+		ret = des_s390_register_alg(&ecb_des3_alg);
+		if (ret)
+			goto out_err;
+	}
+	if (cpacf_test_func(&kmc_functions, CPACF_KMC_TDEA_192)) {
+		ret = des_s390_register_alg(&cbc_des3_alg);
+		if (ret)
+			goto out_err;
+	}
 
-	if (cpacf_query(CPACF_KMCTR, CPACF_KMCTR_DEA_ENC) &&
-	    cpacf_query(CPACF_KMCTR, CPACF_KMCTR_TDEA_192_ENC)) {
-		ret = crypto_register_alg(&ctr_des_alg);
-		if (ret)
-			goto ctr_des_err;
-		ret = crypto_register_alg(&ctr_des3_alg);
-		if (ret)
-			goto ctr_des3_err;
+	if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_DEA) ||
+	    cpacf_test_func(&kmctr_functions, CPACF_KMCTR_TDEA_192)) {
 		ctrblk = (u8 *) __get_free_page(GFP_KERNEL);
 		if (!ctrblk) {
 			ret = -ENOMEM;
-			goto ctr_mem_err;
+			goto out_err;
 		}
 	}
-out:
-	return ret;
 
-ctr_mem_err:
-	crypto_unregister_alg(&ctr_des3_alg);
-ctr_des3_err:
-	crypto_unregister_alg(&ctr_des_alg);
-ctr_des_err:
-	crypto_unregister_alg(&cbc_des3_alg);
-cbc_des3_err:
-	crypto_unregister_alg(&ecb_des3_alg);
-ecb_des3_err:
-	crypto_unregister_alg(&des3_alg);
-des3_err:
-	crypto_unregister_alg(&cbc_des_alg);
-cbc_des_err:
-	crypto_unregister_alg(&ecb_des_alg);
-ecb_des_err:
-	crypto_unregister_alg(&des_alg);
-des_err:
-	goto out;
-}
-
-static void __exit des_s390_exit(void)
-{
-	if (ctrblk) {
-		crypto_unregister_alg(&ctr_des_alg);
-		crypto_unregister_alg(&ctr_des3_alg);
-		free_page((unsigned long) ctrblk);
+	if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_DEA)) {
+		ret = des_s390_register_alg(&ctr_des_alg);
+		if (ret)
+			goto out_err;
 	}
-	crypto_unregister_alg(&cbc_des3_alg);
-	crypto_unregister_alg(&ecb_des3_alg);
-	crypto_unregister_alg(&des3_alg);
-	crypto_unregister_alg(&cbc_des_alg);
-	crypto_unregister_alg(&ecb_des_alg);
-	crypto_unregister_alg(&des_alg);
+	if (cpacf_test_func(&kmctr_functions, CPACF_KMCTR_TDEA_192)) {
+		ret = des_s390_register_alg(&ctr_des3_alg);
+		if (ret)
+			goto out_err;
+	}
+
+	return 0;
+out_err:
+	des_s390_exit();
+	return ret;
 }
 
 module_cpu_feature_match(MSA, des_s390_init);
diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c
index ab68de7..564616d 100644
--- a/arch/s390/crypto/ghash_s390.c
+++ b/arch/s390/crypto/ghash_s390.c
@@ -58,7 +58,6 @@
 	struct ghash_desc_ctx *dctx = shash_desc_ctx(desc);
 	unsigned int n;
 	u8 *buf = dctx->buffer;
-	int ret;
 
 	if (dctx->bytes) {
 		u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes);
@@ -71,18 +70,14 @@
 		src += n;
 
 		if (!dctx->bytes) {
-			ret = cpacf_kimd(CPACF_KIMD_GHASH, dctx, buf,
-					 GHASH_BLOCK_SIZE);
-			if (ret != GHASH_BLOCK_SIZE)
-				return -EIO;
+			cpacf_kimd(CPACF_KIMD_GHASH, dctx, buf,
+				   GHASH_BLOCK_SIZE);
 		}
 	}
 
 	n = srclen & ~(GHASH_BLOCK_SIZE - 1);
 	if (n) {
-		ret = cpacf_kimd(CPACF_KIMD_GHASH, dctx, src, n);
-		if (ret != n)
-			return -EIO;
+		cpacf_kimd(CPACF_KIMD_GHASH, dctx, src, n);
 		src += n;
 		srclen -= n;
 	}
@@ -98,17 +93,12 @@
 static int ghash_flush(struct ghash_desc_ctx *dctx)
 {
 	u8 *buf = dctx->buffer;
-	int ret;
 
 	if (dctx->bytes) {
 		u8 *pos = buf + (GHASH_BLOCK_SIZE - dctx->bytes);
 
 		memset(pos, 0, dctx->bytes);
-
-		ret = cpacf_kimd(CPACF_KIMD_GHASH, dctx, buf, GHASH_BLOCK_SIZE);
-		if (ret != GHASH_BLOCK_SIZE)
-			return -EIO;
-
+		cpacf_kimd(CPACF_KIMD_GHASH, dctx, buf, GHASH_BLOCK_SIZE);
 		dctx->bytes = 0;
 	}
 
@@ -146,7 +136,7 @@
 
 static int __init ghash_mod_init(void)
 {
-	if (!cpacf_query(CPACF_KIMD, CPACF_KIMD_GHASH))
+	if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_GHASH))
 		return -EOPNOTSUPP;
 
 	return crypto_register_shash(&ghash_alg);
diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c
index 41527b1..9cc050f 100644
--- a/arch/s390/crypto/prng.c
+++ b/arch/s390/crypto/prng.c
@@ -135,12 +135,7 @@
 		else
 			h = ebuf;
 		/* generate sha256 from this page */
-		if (cpacf_kimd(CPACF_KIMD_SHA_256, h,
-			       pg, PAGE_SIZE) != PAGE_SIZE) {
-			prng_errorflag = PRNG_GEN_ENTROPY_FAILED;
-			ret = -EIO;
-			goto out;
-		}
+		cpacf_kimd(CPACF_KIMD_SHA_256, h, pg, PAGE_SIZE);
 		if (n < sizeof(hash))
 			memcpy(ebuf, hash, n);
 		ret += n;
@@ -148,7 +143,6 @@
 		nbytes -= n;
 	}
 
-out:
 	free_page((unsigned long)pg);
 	return ret;
 }
@@ -160,13 +154,11 @@
 {
 	__u64 entropy[4];
 	unsigned int i;
-	int ret;
 
 	for (i = 0; i < 16; i++) {
-		ret = cpacf_kmc(CPACF_KMC_PRNG, prng_data->prngws.parm_block,
-				(char *)entropy, (char *)entropy,
-				sizeof(entropy));
-		BUG_ON(ret < 0 || ret != sizeof(entropy));
+		cpacf_kmc(CPACF_KMC_PRNG, prng_data->prngws.parm_block,
+			  (char *) entropy, (char *) entropy,
+			  sizeof(entropy));
 		memcpy(prng_data->prngws.parm_block, entropy, sizeof(entropy));
 	}
 }
@@ -303,21 +295,14 @@
 		0x21, 0xe4, 0xb0, 0x86, 0x44, 0xf6, 0x72, 0x7c,
 		0x36, 0x8c, 0x5a, 0x9f, 0x7a, 0x4b, 0x3e, 0xe2 };
 
-	int ret = 0;
 	u8 buf[sizeof(random)];
 	struct ppno_ws_s ws;
 
 	memset(&ws, 0, sizeof(ws));
 
 	/* initial seed */
-	ret = cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED, &ws, NULL, 0,
-			 seed, sizeof(seed));
-	if (ret < 0) {
-		pr_err("The prng self test seed operation for the "
-		       "SHA-512 mode failed with rc=%d\n", ret);
-		prng_errorflag = PRNG_SELFTEST_FAILED;
-		return -EIO;
-	}
+	cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED,
+		   &ws, NULL, 0, seed, sizeof(seed));
 
 	/* check working states V and C */
 	if (memcmp(ws.V, V0, sizeof(V0)) != 0
@@ -329,22 +314,10 @@
 	}
 
 	/* generate random bytes */
-	ret = cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN,
-			 &ws, buf, sizeof(buf), NULL, 0);
-	if (ret < 0) {
-		pr_err("The prng self test generate operation for "
-		       "the SHA-512 mode failed with rc=%d\n", ret);
-		prng_errorflag = PRNG_SELFTEST_FAILED;
-		return -EIO;
-	}
-	ret = cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN,
-			 &ws, buf, sizeof(buf), NULL, 0);
-	if (ret < 0) {
-		pr_err("The prng self test generate operation for "
-		       "the SHA-512 mode failed with rc=%d\n", ret);
-		prng_errorflag = PRNG_SELFTEST_FAILED;
-		return -EIO;
-	}
+	cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN,
+		   &ws, buf, sizeof(buf), NULL, 0);
+	cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN,
+		   &ws, buf, sizeof(buf), NULL, 0);
 
 	/* check against expected data */
 	if (memcmp(buf, random, sizeof(random)) != 0) {
@@ -392,26 +365,16 @@
 	get_tod_clock_ext(seed + 48);
 
 	/* initial seed of the ppno drng */
-	ret = cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED,
-			 &prng_data->ppnows, NULL, 0, seed, sizeof(seed));
-	if (ret < 0) {
-		prng_errorflag = PRNG_SEED_FAILED;
-		ret = -EIO;
-		goto outfree;
-	}
+	cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED,
+		   &prng_data->ppnows, NULL, 0, seed, sizeof(seed));
 
 	/* if fips mode is enabled, generate a first block of random
 	   bytes for the FIPS 140-2 Conditional Self Test */
 	if (fips_enabled) {
 		prng_data->prev = prng_data->buf + prng_chunk_size;
-		ret = cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN,
-				 &prng_data->ppnows,
-				 prng_data->prev, prng_chunk_size, NULL, 0);
-		if (ret < 0 || ret != prng_chunk_size) {
-			prng_errorflag = PRNG_GEN_FAILED;
-			ret = -EIO;
-			goto outfree;
-		}
+		cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN,
+			   &prng_data->ppnows,
+			   prng_data->prev, prng_chunk_size, NULL, 0);
 	}
 
 	return 0;
@@ -440,12 +403,8 @@
 		return ret;
 
 	/* do a reseed of the ppno drng with this bytestring */
-	ret = cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED,
-			 &prng_data->ppnows, NULL, 0, seed, sizeof(seed));
-	if (ret) {
-		prng_errorflag = PRNG_RESEED_FAILED;
-		return -EIO;
-	}
+	cpacf_ppno(CPACF_PPNO_SHA512_DRNG_SEED,
+		   &prng_data->ppnows, NULL, 0, seed, sizeof(seed));
 
 	return 0;
 }
@@ -463,12 +422,8 @@
 	}
 
 	/* PPNO generate */
-	ret = cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN,
-			 &prng_data->ppnows, buf, nbytes, NULL, 0);
-	if (ret < 0 || ret != nbytes) {
-		prng_errorflag = PRNG_GEN_FAILED;
-		return -EIO;
-	}
+	cpacf_ppno(CPACF_PPNO_SHA512_DRNG_GEN,
+		   &prng_data->ppnows, buf, nbytes, NULL, 0);
 
 	/* FIPS 140-2 Conditional Self Test */
 	if (fips_enabled) {
@@ -479,7 +434,7 @@
 		memcpy(prng_data->prev, buf, nbytes);
 	}
 
-	return ret;
+	return nbytes;
 }
 
 
@@ -494,7 +449,7 @@
 static ssize_t prng_tdes_read(struct file *file, char __user *ubuf,
 			      size_t nbytes, loff_t *ppos)
 {
-	int chunk, n, tmp, ret = 0;
+	int chunk, n, ret = 0;
 
 	/* lock prng_data struct */
 	if (mutex_lock_interruptible(&prng_data->mutex))
@@ -545,13 +500,9 @@
 		 *
 		 * Note: you can still get strict X9.17 conformity by setting
 		 * prng_chunk_size to 8 bytes.
-		*/
-		tmp = cpacf_kmc(CPACF_KMC_PRNG, prng_data->prngws.parm_block,
-				prng_data->buf, prng_data->buf, n);
-		if (tmp < 0 || tmp != n) {
-			ret = -EIO;
-			break;
-		}
+		 */
+		cpacf_kmc(CPACF_KMC_PRNG, prng_data->prngws.parm_block,
+			  prng_data->buf, prng_data->buf, n);
 
 		prng_data->prngws.byte_counter += n;
 		prng_data->prngws.reseed_counter += n;
@@ -806,13 +757,13 @@
 	int ret;
 
 	/* check if the CPU has a PRNG */
-	if (!cpacf_query(CPACF_KMC, CPACF_KMC_PRNG))
+	if (!cpacf_query_func(CPACF_KMC, CPACF_KMC_PRNG))
 		return -EOPNOTSUPP;
 
 	/* choose prng mode */
 	if (prng_mode != PRNG_MODE_TDES) {
 		/* check for MSA5 support for PPNO operations */
-		if (!cpacf_query(CPACF_PPNO, CPACF_PPNO_SHA512_DRNG_GEN)) {
+		if (!cpacf_query_func(CPACF_PPNO, CPACF_PPNO_SHA512_DRNG_GEN)) {
 			if (prng_mode == PRNG_MODE_SHA512) {
 				pr_err("The prng module cannot "
 				       "start in SHA-512 mode\n");
diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
index 5fbf91b..c7de53d 100644
--- a/arch/s390/crypto/sha1_s390.c
+++ b/arch/s390/crypto/sha1_s390.c
@@ -91,7 +91,7 @@
 
 static int __init sha1_s390_init(void)
 {
-	if (!cpacf_query(CPACF_KIMD, CPACF_KIMD_SHA_1))
+	if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_1))
 		return -EOPNOTSUPP;
 	return crypto_register_shash(&alg);
 }
diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c
index 10aac0b..53c2779 100644
--- a/arch/s390/crypto/sha256_s390.c
+++ b/arch/s390/crypto/sha256_s390.c
@@ -123,7 +123,7 @@
 {
 	int ret;
 
-	if (!cpacf_query(CPACF_KIMD, CPACF_KIMD_SHA_256))
+	if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_256))
 		return -EOPNOTSUPP;
 	ret = crypto_register_shash(&sha256_alg);
 	if (ret < 0)
diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c
index ea85757..2f4caa1 100644
--- a/arch/s390/crypto/sha512_s390.c
+++ b/arch/s390/crypto/sha512_s390.c
@@ -133,7 +133,7 @@
 {
 	int ret;
 
-	if (!cpacf_query(CPACF_KIMD, CPACF_KIMD_SHA_512))
+	if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_512))
 		return -EOPNOTSUPP;
 	if ((ret = crypto_register_shash(&sha512_alg)) < 0)
 		goto out;
diff --git a/arch/s390/crypto/sha_common.c b/arch/s390/crypto/sha_common.c
index 8e90816..c740f77 100644
--- a/arch/s390/crypto/sha_common.c
+++ b/arch/s390/crypto/sha_common.c
@@ -22,8 +22,7 @@
 {
 	struct s390_sha_ctx *ctx = shash_desc_ctx(desc);
 	unsigned int bsize = crypto_shash_blocksize(desc->tfm);
-	unsigned int index;
-	int ret;
+	unsigned int index, n;
 
 	/* how much is already in the buffer? */
 	index = ctx->count & (bsize - 1);
@@ -35,9 +34,7 @@
 	/* process one stored block */
 	if (index) {
 		memcpy(ctx->buf + index, data, bsize - index);
-		ret = cpacf_kimd(ctx->func, ctx->state, ctx->buf, bsize);
-		if (ret != bsize)
-			return -EIO;
+		cpacf_kimd(ctx->func, ctx->state, ctx->buf, bsize);
 		data += bsize - index;
 		len -= bsize - index;
 		index = 0;
@@ -45,12 +42,10 @@
 
 	/* process as many blocks as possible */
 	if (len >= bsize) {
-		ret = cpacf_kimd(ctx->func, ctx->state, data,
-				 len & ~(bsize - 1));
-		if (ret != (len & ~(bsize - 1)))
-			return -EIO;
-		data += ret;
-		len -= ret;
+		n = len & ~(bsize - 1);
+		cpacf_kimd(ctx->func, ctx->state, data, n);
+		data += n;
+		len -= n;
 	}
 store:
 	if (len)
@@ -66,7 +61,6 @@
 	unsigned int bsize = crypto_shash_blocksize(desc->tfm);
 	u64 bits;
 	unsigned int index, end, plen;
-	int ret;
 
 	/* SHA-512 uses 128 bit padding length */
 	plen = (bsize > SHA256_BLOCK_SIZE) ? 16 : 8;
@@ -88,10 +82,7 @@
 	 */
 	bits = ctx->count * 8;
 	memcpy(ctx->buf + end - 8, &bits, sizeof(bits));
-
-	ret = cpacf_kimd(ctx->func, ctx->state, ctx->buf, end);
-	if (ret != end)
-		return -EIO;
+	cpacf_kimd(ctx->func, ctx->state, ctx->buf, end);
 
 	/* copy digest to out */
 	memcpy(out, ctx->state, crypto_shash_digestsize(desc->tfm));
diff --git a/arch/s390/include/asm/cpacf.h b/arch/s390/include/asm/cpacf.h
index d28621d..2c680db 100644
--- a/arch/s390/include/asm/cpacf.h
+++ b/arch/s390/include/asm/cpacf.h
@@ -28,67 +28,51 @@
 #define CPACF_PPNO		0xb93c		/* MSA5 */
 
 /*
- * Function codes for the KM (CIPHER MESSAGE)
- * instruction (0x80 is the decipher modifier bit)
+ * Decryption modifier bit
+ */
+#define CPACF_DECRYPT		0x80
+
+/*
+ * Function codes for the KM (CIPHER MESSAGE) instruction
  */
 #define CPACF_KM_QUERY		0x00
-#define CPACF_KM_DEA_ENC	0x01
-#define CPACF_KM_DEA_DEC	0x81
-#define CPACF_KM_TDEA_128_ENC	0x02
-#define CPACF_KM_TDEA_128_DEC	0x82
-#define CPACF_KM_TDEA_192_ENC	0x03
-#define CPACF_KM_TDEA_192_DEC	0x83
-#define CPACF_KM_AES_128_ENC	0x12
-#define CPACF_KM_AES_128_DEC	0x92
-#define CPACF_KM_AES_192_ENC	0x13
-#define CPACF_KM_AES_192_DEC	0x93
-#define CPACF_KM_AES_256_ENC	0x14
-#define CPACF_KM_AES_256_DEC	0x94
-#define CPACF_KM_XTS_128_ENC	0x32
-#define CPACF_KM_XTS_128_DEC	0xb2
-#define CPACF_KM_XTS_256_ENC	0x34
-#define CPACF_KM_XTS_256_DEC	0xb4
+#define CPACF_KM_DEA		0x01
+#define CPACF_KM_TDEA_128	0x02
+#define CPACF_KM_TDEA_192	0x03
+#define CPACF_KM_AES_128	0x12
+#define CPACF_KM_AES_192	0x13
+#define CPACF_KM_AES_256	0x14
+#define CPACF_KM_XTS_128	0x32
+#define CPACF_KM_XTS_256	0x34
 
 /*
  * Function codes for the KMC (CIPHER MESSAGE WITH CHAINING)
- * instruction (0x80 is the decipher modifier bit)
+ * instruction
  */
 #define CPACF_KMC_QUERY		0x00
-#define CPACF_KMC_DEA_ENC	0x01
-#define CPACF_KMC_DEA_DEC	0x81
-#define CPACF_KMC_TDEA_128_ENC	0x02
-#define CPACF_KMC_TDEA_128_DEC	0x82
-#define CPACF_KMC_TDEA_192_ENC	0x03
-#define CPACF_KMC_TDEA_192_DEC	0x83
-#define CPACF_KMC_AES_128_ENC	0x12
-#define CPACF_KMC_AES_128_DEC	0x92
-#define CPACF_KMC_AES_192_ENC	0x13
-#define CPACF_KMC_AES_192_DEC	0x93
-#define CPACF_KMC_AES_256_ENC	0x14
-#define CPACF_KMC_AES_256_DEC	0x94
+#define CPACF_KMC_DEA		0x01
+#define CPACF_KMC_TDEA_128	0x02
+#define CPACF_KMC_TDEA_192	0x03
+#define CPACF_KMC_AES_128	0x12
+#define CPACF_KMC_AES_192	0x13
+#define CPACF_KMC_AES_256	0x14
 #define CPACF_KMC_PRNG		0x43
 
 /*
  * Function codes for the KMCTR (CIPHER MESSAGE WITH COUNTER)
- * instruction (0x80 is the decipher modifier bit)
+ * instruction
  */
-#define CPACF_KMCTR_QUERY	 0x00
-#define CPACF_KMCTR_DEA_ENC	 0x01
-#define CPACF_KMCTR_DEA_DEC	 0x81
-#define CPACF_KMCTR_TDEA_128_ENC 0x02
-#define CPACF_KMCTR_TDEA_128_DEC 0x82
-#define CPACF_KMCTR_TDEA_192_ENC 0x03
-#define CPACF_KMCTR_TDEA_192_DEC 0x83
-#define CPACF_KMCTR_AES_128_ENC	 0x12
-#define CPACF_KMCTR_AES_128_DEC	 0x92
-#define CPACF_KMCTR_AES_192_ENC	 0x13
-#define CPACF_KMCTR_AES_192_DEC	 0x93
-#define CPACF_KMCTR_AES_256_ENC	 0x14
-#define CPACF_KMCTR_AES_256_DEC	 0x94
+#define CPACF_KMCTR_QUERY	0x00
+#define CPACF_KMCTR_DEA		0x01
+#define CPACF_KMCTR_TDEA_128	0x02
+#define CPACF_KMCTR_TDEA_192	0x03
+#define CPACF_KMCTR_AES_128	0x12
+#define CPACF_KMCTR_AES_192	0x13
+#define CPACF_KMCTR_AES_256	0x14
 
 /*
  * Function codes for the KIMD (COMPUTE INTERMEDIATE MESSAGE DIGEST)
- * instruction (0x80 is the decipher modifier bit)
+ * instruction
  */
 #define CPACF_KIMD_QUERY	0x00
 #define CPACF_KIMD_SHA_1	0x01
@@ -98,7 +82,7 @@
 
 /*
  * Function codes for the KLMD (COMPUTE LAST MESSAGE DIGEST)
- * instruction (0x80 is the decipher modifier bit)
+ * instruction
  */
 #define CPACF_KLMD_QUERY	0x00
 #define CPACF_KLMD_SHA_1	0x01
@@ -107,7 +91,7 @@
 
 /*
  * function codes for the KMAC (COMPUTE MESSAGE AUTHENTICATION CODE)
- * instruction (0x80 is the decipher modifier bit)
+ * instruction
  */
 #define CPACF_KMAC_QUERY	0x00
 #define CPACF_KMAC_DEA		0x01
@@ -116,12 +100,14 @@
 
 /*
  * Function codes for the PPNO (PERFORM PSEUDORANDOM NUMBER OPERATION)
- * instruction (0x80 is the decipher modifier bit)
+ * instruction
  */
 #define CPACF_PPNO_QUERY		0x00
 #define CPACF_PPNO_SHA512_DRNG_GEN	0x03
 #define CPACF_PPNO_SHA512_DRNG_SEED	0x83
 
+typedef struct { unsigned char bytes[16]; } cpacf_mask_t;
+
 /**
  * cpacf_query() - check if a specific CPACF function is available
  * @opcode: the opcode of the crypto instruction
@@ -132,55 +118,66 @@
  *
  * Returns 1 if @func is available for @opcode, 0 otherwise
  */
-static inline void __cpacf_query(unsigned int opcode, unsigned char *status)
+static inline void __cpacf_query(unsigned int opcode, cpacf_mask_t *mask)
 {
-	typedef struct { unsigned char _[16]; } status_type;
 	register unsigned long r0 asm("0") = 0;	/* query function */
-	register unsigned long r1 asm("1") = (unsigned long) status;
+	register unsigned long r1 asm("1") = (unsigned long) mask;
 
 	asm volatile(
 		"	spm 0\n" /* pckmo doesn't change the cc */
 		/* Parameter registers are ignored, but may not be 0 */
 		"0:	.insn	rrf,%[opc] << 16,2,2,2,0\n"
 		"	brc	1,0b\n"	/* handle partial completion */
-		: "=m" (*(status_type *) status)
+		: "=m" (*mask)
 		: [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (opcode)
 		: "cc");
 }
 
-static inline int cpacf_query(unsigned int opcode, unsigned int func)
+static inline int __cpacf_check_opcode(unsigned int opcode)
 {
-	unsigned char status[16];
-
 	switch (opcode) {
 	case CPACF_KMAC:
 	case CPACF_KM:
 	case CPACF_KMC:
 	case CPACF_KIMD:
 	case CPACF_KLMD:
-		if (!test_facility(17))	/* check for MSA */
-			return 0;
-		break;
+		return test_facility(17);	/* check for MSA */
 	case CPACF_PCKMO:
-		if (!test_facility(76))	/* check for MSA3 */
-			return 0;
-		break;
+		return test_facility(76);	/* check for MSA3 */
 	case CPACF_KMF:
 	case CPACF_KMO:
 	case CPACF_PCC:
 	case CPACF_KMCTR:
-		if (!test_facility(77))	/* check for MSA4 */
-			return 0;
-		break;
+		return test_facility(77);	/* check for MSA4 */
 	case CPACF_PPNO:
-		if (!test_facility(57))	/* check for MSA5 */
-			return 0;
-		break;
+		return test_facility(57);	/* check for MSA5 */
 	default:
 		BUG();
 	}
-	__cpacf_query(opcode, status);
-	return (status[func >> 3] & (0x80 >> (func & 7))) != 0;
+}
+
+static inline int cpacf_query(unsigned int opcode, cpacf_mask_t *mask)
+{
+	if (__cpacf_check_opcode(opcode)) {
+		__cpacf_query(opcode, mask);
+		return 1;
+	}
+	memset(mask, 0, sizeof(*mask));
+	return 0;
+}
+
+static inline int cpacf_test_func(cpacf_mask_t *mask, unsigned int func)
+{
+	return (mask->bytes[func >> 3] & (0x80 >> (func & 7))) != 0;
+}
+
+static inline int cpacf_query_func(unsigned int opcode, unsigned int func)
+{
+	cpacf_mask_t mask;
+
+	if (cpacf_query(opcode, &mask))
+		return cpacf_test_func(&mask, func);
+	return 0;
 }
 
 /**
@@ -194,7 +191,7 @@
  * Returns 0 for the query func, number of processed bytes for
  * encryption/decryption funcs
  */
-static inline int cpacf_km(long func, void *param,
+static inline int cpacf_km(unsigned long func, void *param,
 			   u8 *dest, const u8 *src, long src_len)
 {
 	register unsigned long r0 asm("0") = (unsigned long) func;
@@ -224,7 +221,7 @@
  * Returns 0 for the query func, number of processed bytes for
  * encryption/decryption funcs
  */
-static inline int cpacf_kmc(long func, void *param,
+static inline int cpacf_kmc(unsigned long func, void *param,
 			    u8 *dest, const u8 *src, long src_len)
 {
 	register unsigned long r0 asm("0") = (unsigned long) func;
@@ -250,11 +247,9 @@
  * @param: address of parameter block; see POP for details on each func
  * @src: address of source memory area
  * @src_len: length of src operand in bytes
- *
- * Returns 0 for the query func, number of processed bytes for digest funcs
  */
-static inline int cpacf_kimd(long func, void *param,
-			     const u8 *src, long src_len)
+static inline void cpacf_kimd(unsigned long func, void *param,
+			      const u8 *src, long src_len)
 {
 	register unsigned long r0 asm("0") = (unsigned long) func;
 	register unsigned long r1 asm("1") = (unsigned long) param;
@@ -267,8 +262,6 @@
 		: [src] "+a" (r2), [len] "+d" (r3)
 		: [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_KIMD)
 		: "cc", "memory");
-
-	return src_len - r3;
 }
 
 /**
@@ -277,11 +270,9 @@
  * @param: address of parameter block; see POP for details on each func
  * @src: address of source memory area
  * @src_len: length of src operand in bytes
- *
- * Returns 0 for the query func, number of processed bytes for digest funcs
  */
-static inline int cpacf_klmd(long func, void *param,
-			     const u8 *src, long src_len)
+static inline void cpacf_klmd(unsigned long func, void *param,
+			      const u8 *src, long src_len)
 {
 	register unsigned long r0 asm("0") = (unsigned long) func;
 	register unsigned long r1 asm("1") = (unsigned long) param;
@@ -294,8 +285,6 @@
 		: [src] "+a" (r2), [len] "+d" (r3)
 		: [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_KLMD)
 		: "cc", "memory");
-
-	return src_len - r3;
 }
 
 /**
@@ -308,7 +297,7 @@
  *
  * Returns 0 for the query func, number of processed bytes for digest funcs
  */
-static inline int cpacf_kmac(long func, void *param,
+static inline int cpacf_kmac(unsigned long func, void *param,
 			     const u8 *src, long src_len)
 {
 	register unsigned long r0 asm("0") = (unsigned long) func;
@@ -338,7 +327,7 @@
  * Returns 0 for the query func, number of processed bytes for
  * encryption/decryption funcs
  */
-static inline int cpacf_kmctr(long func, void *param, u8 *dest,
+static inline int cpacf_kmctr(unsigned long func, void *param, u8 *dest,
 			      const u8 *src, long src_len, u8 *counter)
 {
 	register unsigned long r0 asm("0") = (unsigned long) func;
@@ -368,13 +357,10 @@
  * @dest_len: size of destination memory area in bytes
  * @seed: address of seed data
  * @seed_len: size of seed data in bytes
- *
- * Returns 0 for the query func, number of random bytes stored in
- * dest buffer for generate function
  */
-static inline int cpacf_ppno(long func, void *param,
-			     u8 *dest, long dest_len,
-			     const u8 *seed, long seed_len)
+static inline void cpacf_ppno(unsigned long func, void *param,
+			      u8 *dest, long dest_len,
+			      const u8 *seed, long seed_len)
 {
 	register unsigned long r0 asm("0") = (unsigned long) func;
 	register unsigned long r1 asm("1") = (unsigned long) param;
@@ -390,8 +376,6 @@
 		: [fc] "d" (r0), [pba] "a" (r1),
 		  [seed] "a" (r4), [slen] "d" (r5), [opc] "i" (CPACF_PPNO)
 		: "cc", "memory");
-
-	return dest_len - r3;
 }
 
 /**
@@ -399,10 +383,8 @@
  *		 instruction
  * @func: the function code passed to PCC; see CPACF_KM_xxx defines
  * @param: address of parameter block; see POP for details on each func
- *
- * Returns 0.
  */
-static inline int cpacf_pcc(long func, void *param)
+static inline void cpacf_pcc(unsigned long func, void *param)
 {
 	register unsigned long r0 asm("0") = (unsigned long) func;
 	register unsigned long r1 asm("1") = (unsigned long) param;
@@ -413,8 +395,6 @@
 		:
 		: [fc] "d" (r0), [pba] "a" (r1), [opc] "i" (CPACF_PCC)
 		: "cc", "memory");
-
-	return 0;
 }
 
 #endif	/* _ASM_S390_CPACF_H */
diff --git a/arch/s390/include/asm/facilities_src.h b/arch/s390/include/asm/facilities_src.h
index 4917728..3b758f6 100644
--- a/arch/s390/include/asm/facilities_src.h
+++ b/arch/s390/include/asm/facilities_src.h
@@ -55,4 +55,28 @@
 			-1 /* END */
 		}
 	},
+	{
+		.name = "FACILITIES_KVM",
+		.bits = (int[]){
+			0,  /* N3 instructions */
+			1,  /* z/Arch mode installed */
+			2,  /* z/Arch mode active */
+			3,  /* DAT-enhancement */
+			4,  /* idte segment table */
+			5,  /* idte region table */
+			6,  /* ASN-and-LX reuse */
+			7,  /* stfle */
+			8,  /* enhanced-DAT 1 */
+			9,  /* sense-running-status */
+			10, /* conditional sske */
+			13, /* ipte-range */
+			14, /* nonquiescing key-setting */
+			73, /* transactional execution */
+			75, /* access-exception-fetch/store indication */
+			76, /* msa extension 3 */
+			77, /* msa extension 4 */
+			78, /* enhanced-DAT 2 */
+			-1  /* END */
+		}
+	},
 };
diff --git a/arch/s390/include/asm/fpu/api.h b/arch/s390/include/asm/fpu/api.h
index 6aba6fc..02124d6 100644
--- a/arch/s390/include/asm/fpu/api.h
+++ b/arch/s390/include/asm/fpu/api.h
@@ -64,18 +64,18 @@
 	return rc;
 }
 
-#define KERNEL_VXR_V0V7		1
-#define KERNEL_VXR_V8V15	2
-#define KERNEL_VXR_V16V23	4
-#define KERNEL_VXR_V24V31	8
-#define KERNEL_FPR		16
-#define KERNEL_FPC		256
+#define KERNEL_FPC		1
+#define KERNEL_VXR_V0V7		2
+#define KERNEL_VXR_V8V15	4
+#define KERNEL_VXR_V16V23	8
+#define KERNEL_VXR_V24V31	16
 
 #define KERNEL_VXR_LOW		(KERNEL_VXR_V0V7|KERNEL_VXR_V8V15)
 #define KERNEL_VXR_MID		(KERNEL_VXR_V8V15|KERNEL_VXR_V16V23)
 #define KERNEL_VXR_HIGH		(KERNEL_VXR_V16V23|KERNEL_VXR_V24V31)
 
-#define KERNEL_FPU_MASK		(KERNEL_VXR_LOW|KERNEL_VXR_HIGH|KERNEL_FPR)
+#define KERNEL_VXR		(KERNEL_VXR_LOW|KERNEL_VXR_HIGH)
+#define KERNEL_FPR		(KERNEL_FPC|KERNEL_VXR_V0V7)
 
 struct kernel_fpu;
 
@@ -87,18 +87,28 @@
  * Prefer using the kernel_fpu_begin()/kernel_fpu_end() pair of functions.
  */
 void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags);
-void __kernel_fpu_end(struct kernel_fpu *state);
+void __kernel_fpu_end(struct kernel_fpu *state, u32 flags);
 
 
 static inline void kernel_fpu_begin(struct kernel_fpu *state, u32 flags)
 {
 	preempt_disable();
-	__kernel_fpu_begin(state, flags);
+	state->mask = S390_lowcore.fpu_flags;
+	if (!test_cpu_flag(CIF_FPU))
+		/* Save user space FPU state and register contents */
+		save_fpu_regs();
+	else if (state->mask & flags)
+		/* Save FPU/vector register in-use by the kernel */
+		__kernel_fpu_begin(state, flags);
+	S390_lowcore.fpu_flags |= flags;
 }
 
-static inline void kernel_fpu_end(struct kernel_fpu *state)
+static inline void kernel_fpu_end(struct kernel_fpu *state, u32 flags)
 {
-	__kernel_fpu_end(state);
+	S390_lowcore.fpu_flags = state->mask;
+	if (state->mask & flags)
+		/* Restore FPU/vector register in-use by the kernel */
+		__kernel_fpu_end(state, flags);
 	preempt_enable();
 }
 
diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h
index d79ba7c..7b93b78 100644
--- a/arch/s390/include/asm/lowcore.h
+++ b/arch/s390/include/asm/lowcore.h
@@ -129,7 +129,8 @@
 	__u8	pad_0x0390[0x0398-0x0390];	/* 0x0390 */
 	__u64	gmap;				/* 0x0398 */
 	__u32	spinlock_lockval;		/* 0x03a0 */
-	__u8	pad_0x03a0[0x0400-0x03a4];	/* 0x03a4 */
+	__u32	fpu_flags;			/* 0x03a4 */
+	__u8	pad_0x03a8[0x0400-0x03a8];	/* 0x03a8 */
 
 	/* Per cpu primary space access list */
 	__u32	paste[16];			/* 0x0400 */
diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h
index 6d39329..bea785d 100644
--- a/arch/s390/include/asm/mmu.h
+++ b/arch/s390/include/asm/mmu.h
@@ -12,6 +12,7 @@
 	struct list_head pgtable_list;
 	spinlock_t gmap_lock;
 	struct list_head gmap_list;
+	unsigned long gmap_asce;
 	unsigned long asce;
 	unsigned long asce_limit;
 	unsigned long vdso_base;
diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h
index c6a088c..515fea5a 100644
--- a/arch/s390/include/asm/mmu_context.h
+++ b/arch/s390/include/asm/mmu_context.h
@@ -21,6 +21,7 @@
 	INIT_LIST_HEAD(&mm->context.gmap_list);
 	cpumask_clear(&mm->context.cpu_attach_mask);
 	atomic_set(&mm->context.flush_count, 0);
+	mm->context.gmap_asce = 0;
 	mm->context.flush_mm = 0;
 #ifdef CONFIG_PGSTE
 	mm->context.alloc_pgste = page_table_allocate_pgste;
diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h
index 0da91c4..6611f79 100644
--- a/arch/s390/include/asm/pci.h
+++ b/arch/s390/include/asm/pci.h
@@ -11,6 +11,7 @@
 #include <asm-generic/pci.h>
 #include <asm/pci_clp.h>
 #include <asm/pci_debug.h>
+#include <asm/sclp.h>
 
 #define PCIBIOS_MIN_IO		0x1000
 #define PCIBIOS_MIN_MEM		0x10000000
@@ -117,6 +118,7 @@
 
 	spinlock_t	iommu_bitmap_lock;
 	unsigned long	*iommu_bitmap;
+	unsigned long	*lazy_bitmap;
 	unsigned long	iommu_size;
 	unsigned long	iommu_pages;
 	unsigned int	next_bit;
@@ -216,6 +218,9 @@
 void zpci_debug_exit_device(struct zpci_dev *);
 void zpci_debug_info(struct zpci_dev *, struct seq_file *);
 
+/* Error reporting */
+int zpci_report_error(struct pci_dev *, struct zpci_report_error_header *);
+
 #ifdef CONFIG_NUMA
 
 /* Returns the node based on PCI bus */
diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h
index 72c7f60..0362cd5 100644
--- a/arch/s390/include/asm/pgtable.h
+++ b/arch/s390/include/asm/pgtable.h
@@ -874,35 +874,31 @@
 }
 #endif
 
-static inline void __ptep_ipte(unsigned long address, pte_t *ptep)
+#define IPTE_GLOBAL	0
+#define	IPTE_LOCAL	1
+
+static inline void __ptep_ipte(unsigned long address, pte_t *ptep, int local)
 {
 	unsigned long pto = (unsigned long) ptep;
 
-	/* Invalidation + global TLB flush for the pte */
+	/* Invalidation + TLB flush for the pte */
 	asm volatile(
-		"	ipte	%2,%3"
-		: "=m" (*ptep) : "m" (*ptep), "a" (pto), "a" (address));
+		"       .insn rrf,0xb2210000,%[r1],%[r2],0,%[m4]"
+		: "+m" (*ptep) : [r1] "a" (pto), [r2] "a" (address),
+		  [m4] "i" (local));
 }
 
-static inline void __ptep_ipte_local(unsigned long address, pte_t *ptep)
+static inline void __ptep_ipte_range(unsigned long address, int nr,
+				     pte_t *ptep, int local)
 {
 	unsigned long pto = (unsigned long) ptep;
 
-	/* Invalidation + local TLB flush for the pte */
-	asm volatile(
-		"	.insn rrf,0xb2210000,%2,%3,0,1"
-		: "=m" (*ptep) : "m" (*ptep), "a" (pto), "a" (address));
-}
-
-static inline void __ptep_ipte_range(unsigned long address, int nr, pte_t *ptep)
-{
-	unsigned long pto = (unsigned long) ptep;
-
-	/* Invalidate a range of ptes + global TLB flush of the ptes */
+	/* Invalidate a range of ptes + TLB flush of the ptes */
 	do {
 		asm volatile(
-			"	.insn rrf,0xb2210000,%2,%0,%1,0"
-			: "+a" (address), "+a" (nr) : "a" (pto) : "memory");
+			"       .insn rrf,0xb2210000,%[r1],%[r2],%[r3],%[m4]"
+			: [r2] "+a" (address), [r3] "+a" (nr)
+			: [r1] "a" (pto), [m4] "i" (local) : "memory");
 	} while (nr != 255);
 }
 
@@ -1239,53 +1235,33 @@
 	    pmd_val(*pmdp) | _SEGMENT_ENTRY_INVALID);
 }
 
-static inline void __pmdp_idte(unsigned long address, pmd_t *pmdp)
+#define IDTE_GLOBAL	0
+#define IDTE_LOCAL	1
+
+static inline void __pmdp_idte(unsigned long address, pmd_t *pmdp, int local)
 {
 	unsigned long sto;
 
 	sto = (unsigned long) pmdp - pmd_index(address) * sizeof(pmd_t);
 	asm volatile(
-		"	.insn	rrf,0xb98e0000,%2,%3,0,0"
-		: "=m" (*pmdp)
-		: "m" (*pmdp), "a" (sto), "a" ((address & HPAGE_MASK))
+		"	.insn	rrf,0xb98e0000,%[r1],%[r2],0,%[m4]"
+		: "+m" (*pmdp)
+		: [r1] "a" (sto), [r2] "a" ((address & HPAGE_MASK)),
+		  [m4] "i" (local)
 		: "cc" );
 }
 
-static inline void __pudp_idte(unsigned long address, pud_t *pudp)
+static inline void __pudp_idte(unsigned long address, pud_t *pudp, int local)
 {
 	unsigned long r3o;
 
 	r3o = (unsigned long) pudp - pud_index(address) * sizeof(pud_t);
 	r3o |= _ASCE_TYPE_REGION3;
 	asm volatile(
-		"	.insn	rrf,0xb98e0000,%2,%3,0,0"
-		: "=m" (*pudp)
-		: "m" (*pudp), "a" (r3o), "a" ((address & PUD_MASK))
-		: "cc");
-}
-
-static inline void __pmdp_idte_local(unsigned long address, pmd_t *pmdp)
-{
-	unsigned long sto;
-
-	sto = (unsigned long) pmdp - pmd_index(address) * sizeof(pmd_t);
-	asm volatile(
-		"	.insn	rrf,0xb98e0000,%2,%3,0,1"
-		: "=m" (*pmdp)
-		: "m" (*pmdp), "a" (sto), "a" ((address & HPAGE_MASK))
-		: "cc" );
-}
-
-static inline void __pudp_idte_local(unsigned long address, pud_t *pudp)
-{
-	unsigned long r3o;
-
-	r3o = (unsigned long) pudp - pud_index(address) * sizeof(pud_t);
-	r3o |= _ASCE_TYPE_REGION3;
-	asm volatile(
-		"	.insn	rrf,0xb98e0000,%2,%3,0,1"
-		: "=m" (*pudp)
-		: "m" (*pudp), "a" (r3o), "a" ((address & PUD_MASK))
+		"	.insn	rrf,0xb98e0000,%[r1],%[r2],0,%[m4]"
+		: "+m" (*pudp)
+		: [r1] "a" (r3o), [r2] "a" ((address & PUD_MASK)),
+		  [m4] "i" (local)
 		: "cc");
 }
 
diff --git a/arch/s390/include/asm/tlbflush.h b/arch/s390/include/asm/tlbflush.h
index 1a691ef..3984610 100644
--- a/arch/s390/include/asm/tlbflush.h
+++ b/arch/s390/include/asm/tlbflush.h
@@ -26,17 +26,6 @@
 		: : "a" (2048), "a" (asce) : "cc");
 }
 
-/*
- * Flush TLB entries for a specific ASCE on the local CPU
- */
-static inline void __tlb_flush_idte_local(unsigned long asce)
-{
-	/* Local TLB flush for the mm */
-	asm volatile(
-		"	.insn	rrf,0xb98e0000,0,%0,%1,1"
-		: : "a" (2048), "a" (asce) : "cc");
-}
-
 #ifdef CONFIG_SMP
 void smp_ptlb_all(void);
 
@@ -65,35 +54,33 @@
 		/* Global TLB flush */
 		__tlb_flush_global();
 		/* Reset TLB flush mask */
-		if (MACHINE_HAS_TLB_LC)
-			cpumask_copy(mm_cpumask(mm),
-				     &mm->context.cpu_attach_mask);
+		cpumask_copy(mm_cpumask(mm), &mm->context.cpu_attach_mask);
 	}
 	atomic_dec(&mm->context.flush_count);
 	preempt_enable();
 }
 
-/*
- * Flush TLB entries for a specific ASCE on all CPUs. Should never be used
- * when more than one asce (e.g. gmap) ran on this mm.
- */
-static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce)
+static inline void __tlb_flush_mm(struct mm_struct *mm)
 {
+	unsigned long gmap_asce;
+
+	/*
+	 * If the machine has IDTE we prefer to do a per mm flush
+	 * on all cpus instead of doing a local flush if the mm
+	 * only ran on the local cpu.
+	 */
 	preempt_disable();
 	atomic_inc(&mm->context.flush_count);
-	if (MACHINE_HAS_TLB_LC &&
-	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) {
-		__tlb_flush_idte_local(asce);
+	gmap_asce = READ_ONCE(mm->context.gmap_asce);
+	if (MACHINE_HAS_IDTE && gmap_asce != -1UL) {
+		if (gmap_asce)
+			__tlb_flush_idte(gmap_asce);
+		__tlb_flush_idte(mm->context.asce);
 	} else {
-		if (MACHINE_HAS_IDTE)
-			__tlb_flush_idte(asce);
-		else
-			__tlb_flush_global();
-		/* Reset TLB flush mask */
-		if (MACHINE_HAS_TLB_LC)
-			cpumask_copy(mm_cpumask(mm),
-				     &mm->context.cpu_attach_mask);
+		__tlb_flush_full(mm);
 	}
+	/* Reset TLB flush mask */
+	cpumask_copy(mm_cpumask(mm), &mm->context.cpu_attach_mask);
 	atomic_dec(&mm->context.flush_count);
 	preempt_enable();
 }
@@ -112,36 +99,17 @@
 /*
  * Flush TLB entries for a specific ASCE on all CPUs.
  */
-static inline void __tlb_flush_asce(struct mm_struct *mm, unsigned long asce)
+static inline void __tlb_flush_mm(struct mm_struct *mm)
 {
-	if (MACHINE_HAS_TLB_LC)
-		__tlb_flush_idte_local(asce);
-	else
-		__tlb_flush_local();
+	__tlb_flush_local();
 }
 
 static inline void __tlb_flush_kernel(void)
 {
-	if (MACHINE_HAS_TLB_LC)
-		__tlb_flush_idte_local(init_mm.context.asce);
-	else
-		__tlb_flush_local();
+	__tlb_flush_local();
 }
 #endif
 
-static inline void __tlb_flush_mm(struct mm_struct * mm)
-{
-	/*
-	 * If the machine has IDTE we prefer to do a per mm flush
-	 * on all cpus instead of doing a local flush if the mm
-	 * only ran on the local cpu.
-	 */
-	if (MACHINE_HAS_IDTE && list_empty(&mm->context.gmap_list))
-		__tlb_flush_asce(mm, mm->context.asce);
-	else
-		__tlb_flush_full(mm);
-}
-
 static inline void __tlb_flush_mm_lazy(struct mm_struct * mm)
 {
 	if (mm->context.flush_mm) {
diff --git a/arch/s390/include/asm/vx-insn.h b/arch/s390/include/asm/vx-insn.h
index 4a31356..49c24a2 100644
--- a/arch/s390/include/asm/vx-insn.h
+++ b/arch/s390/include/asm/vx-insn.h
@@ -16,15 +16,13 @@
 
 /* Macros to generate vector instruction byte code */
 
-#define REG_NUM_INVALID	       255
-
 /* GR_NUM - Retrieve general-purpose register number
  *
  * @opd:	Operand to store register number
  * @r64:	String designation register in the format "%rN"
  */
 .macro	GR_NUM	opd gr
-	\opd = REG_NUM_INVALID
+	\opd = 255
 	.ifc \gr,%r0
 		\opd = 0
 	.endif
@@ -73,14 +71,11 @@
 	.ifc \gr,%r15
 		\opd = 15
 	.endif
-	.if \opd == REG_NUM_INVALID
-		.error "Invalid general-purpose register designation: \gr"
+	.if \opd == 255
+		\opd = \gr
 	.endif
 .endm
 
-/* VX_R() - Macro to encode the VX_NUM into the instruction */
-#define VX_R(v)		(v & 0x0F)
-
 /* VX_NUM - Retrieve vector register number
  *
  * @opd:	Operand to store register number
@@ -88,11 +83,10 @@
  *
  * The vector register number is used for as input number to the
  * instruction and, as well as, to compute the RXB field of the
- * instruction.  To encode the particular vector register number,
- * use the VX_R(v) macro to extract the instruction opcode.
+ * instruction.
  */
 .macro	VX_NUM	opd vxr
-	\opd = REG_NUM_INVALID
+	\opd = 255
 	.ifc \vxr,%v0
 		\opd = 0
 	.endif
@@ -189,8 +183,8 @@
 	.ifc \vxr,%v31
 		\opd = 31
 	.endif
-	.if \opd == REG_NUM_INVALID
-		.error "Invalid vector register designation: \vxr"
+	.if \opd == 255
+		\opd = \vxr
 	.endif
 .endm
 
@@ -251,7 +245,7 @@
 /* VECTOR GENERATE BYTE MASK */
 .macro	VGBM	vr imm2
 	VX_NUM	v1, \vr
-	.word	(0xE700 | (VX_R(v1) << 4))
+	.word	(0xE700 | ((v1&15) << 4))
 	.word	\imm2
 	MRXBOPC	0, 0x44, v1
 .endm
@@ -267,7 +261,7 @@
 	VX_NUM	v1, \v
 	GR_NUM	b2, "%r0"
 	GR_NUM	r3, \gr
-	.word	0xE700 | (VX_R(v1) << 4) | r3
+	.word	0xE700 | ((v1&15) << 4) | r3
 	.word	(b2 << 12) | (\disp)
 	MRXBOPC	\m, 0x22, v1
 .endm
@@ -284,12 +278,21 @@
 	VLVG	\v, \gr, \index, 3
 .endm
 
+/* VECTOR LOAD REGISTER */
+.macro	VLR	v1, v2
+	VX_NUM	v1, \v1
+	VX_NUM	v2, \v2
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	0
+	MRXBOPC	0, 0x56, v1, v2
+.endm
+
 /* VECTOR LOAD */
 .macro	VL	v, disp, index="%r0", base
 	VX_NUM	v1, \v
 	GR_NUM	x2, \index
 	GR_NUM	b2, \base
-	.word	0xE700 | (VX_R(v1) << 4) | x2
+	.word	0xE700 | ((v1&15) << 4) | x2
 	.word	(b2 << 12) | (\disp)
 	MRXBOPC 0, 0x06, v1
 .endm
@@ -299,7 +302,7 @@
 	VX_NUM	v1, \vr1
 	GR_NUM	x2, \index
 	GR_NUM	b2, \base
-	.word	0xE700 | (VX_R(v1) << 4) | x2
+	.word	0xE700 | ((v1&15) << 4) | x2
 	.word	(b2 << 12) | (\disp)
 	MRXBOPC	\m3, \opc, v1
 .endm
@@ -319,7 +322,7 @@
 /* VECTOR LOAD ELEMENT IMMEDIATE */
 .macro	VLEIx	vr1, imm2, m3, opc
 	VX_NUM	v1, \vr1
-	.word	0xE700 | (VX_R(v1) << 4)
+	.word	0xE700 | ((v1&15) << 4)
 	.word	\imm2
 	MRXBOPC	\m3, \opc, v1
 .endm
@@ -341,7 +344,7 @@
 	GR_NUM	r1, \gr
 	GR_NUM	b2, \base
 	VX_NUM	v3, \vr
-	.word	0xE700 | (r1 << 4) | VX_R(v3)
+	.word	0xE700 | (r1 << 4) | (v3&15)
 	.word	(b2 << 12) | (\disp)
 	MRXBOPC	\m, 0x21, v3
 .endm
@@ -363,7 +366,7 @@
 	VX_NUM	v1, \vfrom
 	VX_NUM	v3, \vto
 	GR_NUM	b2, \base	    /* Base register */
-	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v3)
+	.word	0xE700 | ((v1&15) << 4) | (v3&15)
 	.word	(b2 << 12) | (\disp)
 	MRXBOPC	0, 0x36, v1, v3
 .endm
@@ -373,7 +376,7 @@
 	VX_NUM	v1, \vfrom
 	VX_NUM	v3, \vto
 	GR_NUM	b2, \base	    /* Base register */
-	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v3)
+	.word	0xE700 | ((v1&15) << 4) | (v3&15)
 	.word	(b2 << 12) | (\disp)
 	MRXBOPC	0, 0x3E, v1, v3
 .endm
@@ -384,16 +387,16 @@
 	VX_NUM	v2, \vr2
 	VX_NUM	v3, \vr3
 	VX_NUM	v4, \vr4
-	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
-	.word	(VX_R(v3) << 12)
-	MRXBOPC	VX_R(v4), 0x8C, v1, v2, v3, v4
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	((v3&15) << 12)
+	MRXBOPC	(v4&15), 0x8C, v1, v2, v3, v4
 .endm
 
 /* VECTOR UNPACK LOGICAL LOW */
 .macro	VUPLL	vr1, vr2, m3
 	VX_NUM	v1, \vr1
 	VX_NUM	v2, \vr2
-	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
 	.word	0x0000
 	MRXBOPC	\m3, 0xD4, v1, v2
 .endm
@@ -410,13 +413,23 @@
 
 /* Vector integer instructions */
 
+/* VECTOR AND */
+.macro	VN	vr1, vr2, vr3
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	((v3&15) << 12)
+	MRXBOPC	0, 0x68, v1, v2, v3
+.endm
+
 /* VECTOR EXCLUSIVE OR */
 .macro	VX	vr1, vr2, vr3
 	VX_NUM	v1, \vr1
 	VX_NUM	v2, \vr2
 	VX_NUM	v3, \vr3
-	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
-	.word	(VX_R(v3) << 12)
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	((v3&15) << 12)
 	MRXBOPC	0, 0x6D, v1, v2, v3
 .endm
 
@@ -425,8 +438,8 @@
 	VX_NUM	v1, \vr1
 	VX_NUM	v2, \vr2
 	VX_NUM	v3, \vr3
-	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
-	.word	(VX_R(v3) << 12)
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	((v3&15) << 12)
 	MRXBOPC	\m4, 0xB4, v1, v2, v3
 .endm
 .macro	VGFMB	vr1, vr2, vr3
@@ -448,9 +461,9 @@
 	VX_NUM	v2, \vr2
 	VX_NUM	v3, \vr3
 	VX_NUM	v4, \vr4
-	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
-	.word	(VX_R(v3) << 12) | (\m5 << 8)
-	MRXBOPC	VX_R(v4), 0xBC, v1, v2, v3, v4
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	((v3&15) << 12) | (\m5 << 8)
+	MRXBOPC	(v4&15), 0xBC, v1, v2, v3, v4
 .endm
 .macro	VGFMAB	vr1, vr2, vr3, vr4
 	VGFMA	\vr1, \vr2, \vr3, \vr4, 0
@@ -470,11 +483,78 @@
 	VX_NUM	v1, \vr1
 	VX_NUM	v2, \vr2
 	VX_NUM	v3, \vr3
-	.word	0xE700 | (VX_R(v1) << 4) | VX_R(v2)
-	.word	(VX_R(v3) << 12)
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	((v3&15) << 12)
 	MRXBOPC	0, 0x7D, v1, v2, v3
 .endm
 
+/* VECTOR REPLICATE IMMEDIATE */
+.macro	VREPI	vr1, imm2, m3
+	VX_NUM	v1, \vr1
+	.word	0xE700 | ((v1&15) << 4)
+	.word	\imm2
+	MRXBOPC	\m3, 0x45, v1
+.endm
+.macro	VREPIB	vr1, imm2
+	VREPI	\vr1, \imm2, 0
+.endm
+.macro	VREPIH	vr1, imm2
+	VREPI	\vr1, \imm2, 1
+.endm
+.macro	VREPIF	vr1, imm2
+	VREPI	\vr1, \imm2, 2
+.endm
+.macro	VREPIG	vr1, imm2
+	VREP	\vr1, \imm2, 3
+.endm
+
+/* VECTOR ADD */
+.macro	VA	vr1, vr2, vr3, m4
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	((v3&15) << 12)
+	MRXBOPC	\m4, 0xF3, v1, v2, v3
+.endm
+.macro	VAB	vr1, vr2, vr3
+	VA	\vr1, \vr2, \vr3, 0
+.endm
+.macro	VAH	vr1, vr2, vr3
+	VA	\vr1, \vr2, \vr3, 1
+.endm
+.macro	VAF	vr1, vr2, vr3
+	VA	\vr1, \vr2, \vr3, 2
+.endm
+.macro	VAG	vr1, vr2, vr3
+	VA	\vr1, \vr2, \vr3, 3
+.endm
+.macro	VAQ	vr1, vr2, vr3
+	VA	\vr1, \vr2, \vr3, 4
+.endm
+
+/* VECTOR ELEMENT SHIFT RIGHT ARITHMETIC */
+.macro	VESRAV	vr1, vr2, vr3, m4
+	VX_NUM	v1, \vr1
+	VX_NUM	v2, \vr2
+	VX_NUM	v3, \vr3
+	.word	0xE700 | ((v1&15) << 4) | (v2&15)
+	.word	((v3&15) << 12)
+	MRXBOPC \m4, 0x7A, v1, v2, v3
+.endm
+
+.macro	VESRAVB	vr1, vr2, vr3
+	VESRAV	\vr1, \vr2, \vr3, 0
+.endm
+.macro	VESRAVH	vr1, vr2, vr3
+	VESRAV	\vr1, \vr2, \vr3, 1
+.endm
+.macro	VESRAVF	vr1, vr2, vr3
+	VESRAV	\vr1, \vr2, \vr3, 2
+.endm
+.macro	VESRAVG	vr1, vr2, vr3
+	VESRAV	\vr1, \vr2, \vr3, 3
+.endm
 
 #endif	/* __ASSEMBLY__ */
 #endif	/* __ASM_S390_VX_INSN_H */
diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild
index 08fe6da..cc44b09 100644
--- a/arch/s390/include/uapi/asm/Kbuild
+++ b/arch/s390/include/uapi/asm/Kbuild
@@ -6,6 +6,7 @@
 header-y += byteorder.h
 header-y += chpid.h
 header-y += chsc.h
+header-y += clp.h
 header-y += cmb.h
 header-y += dasd.h
 header-y += debug.h
diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile
index 3234817..72ccc41 100644
--- a/arch/s390/kernel/Makefile
+++ b/arch/s390/kernel/Makefile
@@ -48,6 +48,9 @@
 endif
 GCOV_PROFILE_sclp.o := n
 GCOV_PROFILE_als.o := n
+UBSAN_SANITIZE_als.o := n
+UBSAN_SANITIZE_early.o := n
+UBSAN_SANITIZE_sclp.o := n
 
 obj-y	:= traps.o time.o process.o base.o early.o setup.o idle.o vtime.o
 obj-y	+= processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index 29df8484..f9293bf 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -71,9 +71,7 @@
  */
 struct save_area * __init save_area_boot_cpu(void)
 {
-	if (list_empty(&dump_save_areas))
-		return NULL;
-	return list_first_entry(&dump_save_areas, struct save_area, list);
+	return list_first_entry_or_null(&dump_save_areas, struct save_area, list);
 }
 
 /*
diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c
index 717b03a..2374c5b 100644
--- a/arch/s390/kernel/early.c
+++ b/arch/s390/kernel/early.c
@@ -13,7 +13,7 @@
 #include <linux/string.h>
 #include <linux/ctype.h>
 #include <linux/lockdep.h>
-#include <linux/module.h>
+#include <linux/extable.h>
 #include <linux/pfn.h>
 #include <linux/uaccess.h>
 #include <linux/kernel.h>
diff --git a/arch/s390/kernel/fpu.c b/arch/s390/kernel/fpu.c
index 81d1d18..1235b94 100644
--- a/arch/s390/kernel/fpu.c
+++ b/arch/s390/kernel/fpu.c
@@ -10,240 +10,167 @@
 #include <asm/fpu/types.h>
 #include <asm/fpu/api.h>
 
-/*
- * Per-CPU variable to maintain FPU register ranges that are in use
- * by the kernel.
- */
-static DEFINE_PER_CPU(u32, kernel_fpu_state);
-
-#define KERNEL_FPU_STATE_MASK	(KERNEL_FPU_MASK|KERNEL_FPC)
-
+asm(".include \"asm/vx-insn.h\"\n");
 
 void __kernel_fpu_begin(struct kernel_fpu *state, u32 flags)
 {
-	if (!__this_cpu_read(kernel_fpu_state)) {
-		/*
-		 * Save user space FPU state and register contents.  Multiple
-		 * calls because of interruptions do not matter and return
-		 * immediately.  This also sets CIF_FPU to lazy restore FP/VX
-		 * register contents when returning to user space.
-		 */
-		save_fpu_regs();
-	}
-
-	/* Update flags to use the vector facility for KERNEL_FPR */
-	if (MACHINE_HAS_VX && (state->mask & KERNEL_FPR)) {
-		flags |= KERNEL_VXR_LOW | KERNEL_FPC;
-		flags &= ~KERNEL_FPR;
-	}
-
-	/* Save and update current kernel VX state */
-	state->mask = __this_cpu_read(kernel_fpu_state);
-	__this_cpu_or(kernel_fpu_state, flags & KERNEL_FPU_STATE_MASK);
-
 	/*
-	 * If this is the first call to __kernel_fpu_begin(), no additional
-	 * work is required.
+	 * Limit the save to the FPU/vector registers already
+	 * in use by the previous context
 	 */
-	if (!(state->mask & KERNEL_FPU_STATE_MASK))
-		return;
+	flags &= state->mask;
 
-	/*
-	 * If KERNEL_FPR is still set, the vector facility is not available
-	 * and, thus, save floating-point control and registers only.
-	 */
-	if (state->mask & KERNEL_FPR) {
-		asm volatile("stfpc %0" : "=Q" (state->fpc));
-		asm volatile("std 0,%0" : "=Q" (state->fprs[0]));
-		asm volatile("std 1,%0" : "=Q" (state->fprs[1]));
-		asm volatile("std 2,%0" : "=Q" (state->fprs[2]));
-		asm volatile("std 3,%0" : "=Q" (state->fprs[3]));
-		asm volatile("std 4,%0" : "=Q" (state->fprs[4]));
-		asm volatile("std 5,%0" : "=Q" (state->fprs[5]));
-		asm volatile("std 6,%0" : "=Q" (state->fprs[6]));
-		asm volatile("std 7,%0" : "=Q" (state->fprs[7]));
-		asm volatile("std 8,%0" : "=Q" (state->fprs[8]));
-		asm volatile("std 9,%0" : "=Q" (state->fprs[9]));
-		asm volatile("std 10,%0" : "=Q" (state->fprs[10]));
-		asm volatile("std 11,%0" : "=Q" (state->fprs[11]));
-		asm volatile("std 12,%0" : "=Q" (state->fprs[12]));
-		asm volatile("std 13,%0" : "=Q" (state->fprs[13]));
-		asm volatile("std 14,%0" : "=Q" (state->fprs[14]));
-		asm volatile("std 15,%0" : "=Q" (state->fprs[15]));
-		return;
-	}
-
-	/*
-	 * If this is a nested call to __kernel_fpu_begin(), check the saved
-	 * state mask to save and later restore the vector registers that
-	 * are already in use.	Let's start with checking floating-point
-	 * controls.
-	 */
-	if (state->mask & KERNEL_FPC)
+	if (flags & KERNEL_FPC)
+		/* Save floating point control */
 		asm volatile("stfpc %0" : "=m" (state->fpc));
 
+	if (!MACHINE_HAS_VX) {
+		if (flags & KERNEL_VXR_V0V7) {
+			/* Save floating-point registers */
+			asm volatile("std 0,%0" : "=Q" (state->fprs[0]));
+			asm volatile("std 1,%0" : "=Q" (state->fprs[1]));
+			asm volatile("std 2,%0" : "=Q" (state->fprs[2]));
+			asm volatile("std 3,%0" : "=Q" (state->fprs[3]));
+			asm volatile("std 4,%0" : "=Q" (state->fprs[4]));
+			asm volatile("std 5,%0" : "=Q" (state->fprs[5]));
+			asm volatile("std 6,%0" : "=Q" (state->fprs[6]));
+			asm volatile("std 7,%0" : "=Q" (state->fprs[7]));
+			asm volatile("std 8,%0" : "=Q" (state->fprs[8]));
+			asm volatile("std 9,%0" : "=Q" (state->fprs[9]));
+			asm volatile("std 10,%0" : "=Q" (state->fprs[10]));
+			asm volatile("std 11,%0" : "=Q" (state->fprs[11]));
+			asm volatile("std 12,%0" : "=Q" (state->fprs[12]));
+			asm volatile("std 13,%0" : "=Q" (state->fprs[13]));
+			asm volatile("std 14,%0" : "=Q" (state->fprs[14]));
+			asm volatile("std 15,%0" : "=Q" (state->fprs[15]));
+		}
+		return;
+	}
+
 	/* Test and save vector registers */
 	asm volatile (
 		/*
 		 * Test if any vector register must be saved and, if so,
 		 * test if all register can be saved.
 		 */
-		"	tmll	%[m],15\n"	/* KERNEL_VXR_MASK */
-		"	jz	20f\n"		/* no work -> done */
 		"	la	1,%[vxrs]\n"	/* load save area */
-		"	jo	18f\n"		/* -> save V0..V31 */
-
+		"	tmll	%[m],30\n"	/* KERNEL_VXR */
+		"	jz	7f\n"		/* no work -> done */
+		"	jo	5f\n"		/* -> save V0..V31 */
 		/*
-		 * Test if V8..V23 can be saved at once... this speeds up
-		 * for KERNEL_fpu_MID only. Otherwise continue to split the
-		 * range of vector registers into two halves and test them
-		 * separately.
+		 * Test for special case KERNEL_FPU_MID only. In this
+		 * case a vstm V8..V23 is the best instruction
 		 */
-		"	tmll	%[m],6\n"	/* KERNEL_VXR_MID */
-		"	jo	17f\n"		/* -> save V8..V23 */
-
+		"	chi	%[m],12\n"	/* KERNEL_VXR_MID */
+		"	jne	0f\n"		/* -> save V8..V23 */
+		"	VSTM	8,23,128,1\n"	/* vstm %v8,%v23,128(%r1) */
+		"	j	7f\n"
 		/* Test and save the first half of 16 vector registers */
-		"1:	tmll	%[m],3\n"	/* KERNEL_VXR_LOW */
-		"	jz	10f\n"		/* -> KERNEL_VXR_HIGH */
+		"0:	tmll	%[m],6\n"	/* KERNEL_VXR_LOW */
+		"	jz	3f\n"		/* -> KERNEL_VXR_HIGH */
 		"	jo	2f\n"		/* 11 -> save V0..V15 */
-		"	brc	4,3f\n"		/* 01 -> save V0..V7  */
-		"	brc	2,4f\n"		/* 10 -> save V8..V15 */
-
+		"	brc	2,1f\n"		/* 10 -> save V8..V15 */
+		"	VSTM	0,7,0,1\n"	/* vstm %v0,%v7,0(%r1) */
+		"	j	3f\n"
+		"1:	VSTM	8,15,128,1\n"	/* vstm %v8,%v15,128(%r1) */
+		"	j	3f\n"
+		"2:	VSTM	0,15,0,1\n"	/* vstm %v0,%v15,0(%r1) */
 		/* Test and save the second half of 16 vector registers */
-		"10:	tmll	%[m],12\n"	/* KERNEL_VXR_HIGH */
-		"	jo	19f\n"		/* 11 -> save V16..V31 */
-		"	brc	4,11f\n"	/* 01 -> save V16..V23	*/
-		"	brc	2,12f\n"	/* 10 -> save V24..V31 */
-		"	j	20f\n"		/* 00 -> done */
-
-		/*
-		 * Below are the vstm combinations to save multiple vector
-		 * registers at once.
-		 */
-		"2:	.word	0xe70f,0x1000,0x003e\n"	/* vstm 0,15,0(1) */
-		"	j	10b\n"			/* -> VXR_HIGH */
-		"3:	.word	0xe707,0x1000,0x003e\n" /* vstm 0,7,0(1) */
-		"	j	10b\n"			/* -> VXR_HIGH */
-		"4:	.word	0xe78f,0x1080,0x003e\n" /* vstm 8,15,128(1) */
-		"	j	10b\n"			/* -> VXR_HIGH */
-		"\n"
-		"11:	.word	0xe707,0x1100,0x0c3e\n"	/* vstm 16,23,256(1) */
-		"	j	20f\n"			/* -> done */
-		"12:	.word	0xe78f,0x1180,0x0c3e\n" /* vstm 24,31,384(1) */
-		"	j	20f\n"			/* -> done */
-		"\n"
-		"17:	.word	0xe787,0x1080,0x043e\n"	/* vstm 8,23,128(1) */
-		"	nill	%[m],249\n"		/* m &= ~VXR_MID    */
-		"	j	1b\n"			/* -> VXR_LOW */
-		"\n"
-		"18:	.word	0xe70f,0x1000,0x003e\n"	/* vstm 0,15,0(1) */
-		"19:	.word	0xe70f,0x1100,0x0c3e\n"	/* vstm 16,31,256(1) */
-		"20:"
+		"3:	tmll	%[m],24\n"	/* KERNEL_VXR_HIGH */
+		"	jz	7f\n"
+		"	jo	6f\n"		/* 11 -> save V16..V31 */
+		"	brc	2,4f\n"		/* 10 -> save V24..V31 */
+		"	VSTM	16,23,256,1\n"	/* vstm %v16,%v23,256(%r1) */
+		"	j	7f\n"
+		"4:	VSTM	24,31,384,1\n"	/* vstm %v24,%v31,384(%r1) */
+		"	j	7f\n"
+		"5:	VSTM	0,15,0,1\n"	/* vstm %v0,%v15,0(%r1) */
+		"6:	VSTM	16,31,256,1\n"	/* vstm %v16,%v31,256(%r1) */
+		"7:"
 		: [vxrs] "=Q" (*(struct vx_array *) &state->vxrs)
-		: [m] "d" (state->mask)
+		: [m] "d" (flags)
 		: "1", "cc");
 }
 EXPORT_SYMBOL(__kernel_fpu_begin);
 
-void __kernel_fpu_end(struct kernel_fpu *state)
+void __kernel_fpu_end(struct kernel_fpu *state, u32 flags)
 {
-	/* Just update the per-CPU state if there is nothing to restore */
-	if (!(state->mask & KERNEL_FPU_STATE_MASK))
-		goto update_fpu_state;
-
 	/*
-	 * If KERNEL_FPR is specified, the vector facility is not available
-	 * and, thus, restore floating-point control and registers only.
+	 * Limit the restore to the FPU/vector registers of the
+	 * previous context that have been overwritte by the
+	 * current context
 	 */
-	if (state->mask & KERNEL_FPR) {
-		asm volatile("lfpc %0" : : "Q" (state->fpc));
-		asm volatile("ld 0,%0" : : "Q" (state->fprs[0]));
-		asm volatile("ld 1,%0" : : "Q" (state->fprs[1]));
-		asm volatile("ld 2,%0" : : "Q" (state->fprs[2]));
-		asm volatile("ld 3,%0" : : "Q" (state->fprs[3]));
-		asm volatile("ld 4,%0" : : "Q" (state->fprs[4]));
-		asm volatile("ld 5,%0" : : "Q" (state->fprs[5]));
-		asm volatile("ld 6,%0" : : "Q" (state->fprs[6]));
-		asm volatile("ld 7,%0" : : "Q" (state->fprs[7]));
-		asm volatile("ld 8,%0" : : "Q" (state->fprs[8]));
-		asm volatile("ld 9,%0" : : "Q" (state->fprs[9]));
-		asm volatile("ld 10,%0" : : "Q" (state->fprs[10]));
-		asm volatile("ld 11,%0" : : "Q" (state->fprs[11]));
-		asm volatile("ld 12,%0" : : "Q" (state->fprs[12]));
-		asm volatile("ld 13,%0" : : "Q" (state->fprs[13]));
-		asm volatile("ld 14,%0" : : "Q" (state->fprs[14]));
-		asm volatile("ld 15,%0" : : "Q" (state->fprs[15]));
-		goto update_fpu_state;
-	}
+	flags &= state->mask;
 
-	/* Test and restore floating-point controls */
-	if (state->mask & KERNEL_FPC)
+	if (flags & KERNEL_FPC)
+		/* Restore floating-point controls */
 		asm volatile("lfpc %0" : : "Q" (state->fpc));
 
+	if (!MACHINE_HAS_VX) {
+		if (flags & KERNEL_VXR_V0V7) {
+			/* Restore floating-point registers */
+			asm volatile("ld 0,%0" : : "Q" (state->fprs[0]));
+			asm volatile("ld 1,%0" : : "Q" (state->fprs[1]));
+			asm volatile("ld 2,%0" : : "Q" (state->fprs[2]));
+			asm volatile("ld 3,%0" : : "Q" (state->fprs[3]));
+			asm volatile("ld 4,%0" : : "Q" (state->fprs[4]));
+			asm volatile("ld 5,%0" : : "Q" (state->fprs[5]));
+			asm volatile("ld 6,%0" : : "Q" (state->fprs[6]));
+			asm volatile("ld 7,%0" : : "Q" (state->fprs[7]));
+			asm volatile("ld 8,%0" : : "Q" (state->fprs[8]));
+			asm volatile("ld 9,%0" : : "Q" (state->fprs[9]));
+			asm volatile("ld 10,%0" : : "Q" (state->fprs[10]));
+			asm volatile("ld 11,%0" : : "Q" (state->fprs[11]));
+			asm volatile("ld 12,%0" : : "Q" (state->fprs[12]));
+			asm volatile("ld 13,%0" : : "Q" (state->fprs[13]));
+			asm volatile("ld 14,%0" : : "Q" (state->fprs[14]));
+			asm volatile("ld 15,%0" : : "Q" (state->fprs[15]));
+		}
+		return;
+	}
+
 	/* Test and restore (load) vector registers */
 	asm volatile (
 		/*
-		 * Test if any vector registers must be loaded and, if so,
+		 * Test if any vector register must be loaded and, if so,
 		 * test if all registers can be loaded at once.
 		 */
-		"	tmll	%[m],15\n"	/* KERNEL_VXR_MASK */
-		"	jz	20f\n"		/* no work -> done */
-		"	la	1,%[vxrs]\n"	/* load load area */
-		"	jo	18f\n"		/* -> load V0..V31 */
-
+		"	la	1,%[vxrs]\n"	/* load restore area */
+		"	tmll	%[m],30\n"	/* KERNEL_VXR */
+		"	jz	7f\n"		/* no work -> done */
+		"	jo	5f\n"		/* -> restore V0..V31 */
 		/*
-		 * Test if V8..V23 can be restored at once... this speeds up
-		 * for KERNEL_VXR_MID only. Otherwise continue to split the
-		 * range of vector registers into two halves and test them
-		 * separately.
+		 * Test for special case KERNEL_FPU_MID only. In this
+		 * case a vlm V8..V23 is the best instruction
 		 */
-		"	tmll	%[m],6\n"	/* KERNEL_VXR_MID */
-		"	jo	17f\n"		/* -> load V8..V23 */
-
-		/* Test and load the first half of 16 vector registers */
-		"1:	tmll	%[m],3\n"	/* KERNEL_VXR_LOW */
-		"	jz	10f\n"		/* -> KERNEL_VXR_HIGH */
-		"	jo	2f\n"		/* 11 -> load V0..V15 */
-		"	brc	4,3f\n"		/* 01 -> load V0..V7  */
-		"	brc	2,4f\n"		/* 10 -> load V8..V15 */
-
-		/* Test and load the second half of 16 vector registers */
-		"10:	tmll	%[m],12\n"	/* KERNEL_VXR_HIGH */
-		"	jo	19f\n"		/* 11 -> load V16..V31 */
-		"	brc	4,11f\n"	/* 01 -> load V16..V23	*/
-		"	brc	2,12f\n"	/* 10 -> load V24..V31 */
-		"	j	20f\n"		/* 00 -> done */
-
-		/*
-		 * Below are the vstm combinations to load multiple vector
-		 * registers at once.
-		 */
-		"2:	.word	0xe70f,0x1000,0x0036\n"	/* vlm 0,15,0(1) */
-		"	j	10b\n"			/* -> VXR_HIGH */
-		"3:	.word	0xe707,0x1000,0x0036\n" /* vlm 0,7,0(1) */
-		"	j	10b\n"			/* -> VXR_HIGH */
-		"4:	.word	0xe78f,0x1080,0x0036\n" /* vlm 8,15,128(1) */
-		"	j	10b\n"			/* -> VXR_HIGH */
-		"\n"
-		"11:	.word	0xe707,0x1100,0x0c36\n"	/* vlm 16,23,256(1) */
-		"	j	20f\n"			/* -> done */
-		"12:	.word	0xe78f,0x1180,0x0c36\n" /* vlm 24,31,384(1) */
-		"	j	20f\n"			/* -> done */
-		"\n"
-		"17:	.word	0xe787,0x1080,0x0436\n"	/* vlm 8,23,128(1) */
-		"	nill	%[m],249\n"		/* m &= ~VXR_MID    */
-		"	j	1b\n"			/* -> VXR_LOW */
-		"\n"
-		"18:	.word	0xe70f,0x1000,0x0036\n"	/* vlm 0,15,0(1) */
-		"19:	.word	0xe70f,0x1100,0x0c36\n"	/* vlm 16,31,256(1) */
-		"20:"
-		:
-		: [vxrs] "Q" (*(struct vx_array *) &state->vxrs),
-		  [m] "d" (state->mask)
+		"	chi	%[m],12\n"	/* KERNEL_VXR_MID */
+		"	jne	0f\n"		/* -> restore V8..V23 */
+		"	VLM	8,23,128,1\n"	/* vlm %v8,%v23,128(%r1) */
+		"	j	7f\n"
+		/* Test and restore the first half of 16 vector registers */
+		"0:	tmll	%[m],6\n"	/* KERNEL_VXR_LOW */
+		"	jz	3f\n"		/* -> KERNEL_VXR_HIGH */
+		"	jo	2f\n"		/* 11 -> restore V0..V15 */
+		"	brc	2,1f\n"		/* 10 -> restore V8..V15 */
+		"	VLM	0,7,0,1\n"	/* vlm %v0,%v7,0(%r1) */
+		"	j	3f\n"
+		"1:	VLM	8,15,128,1\n"	/* vlm %v8,%v15,128(%r1) */
+		"	j	3f\n"
+		"2:	VLM	0,15,0,1\n"	/* vlm %v0,%v15,0(%r1) */
+		/* Test and restore the second half of 16 vector registers */
+		"3:	tmll	%[m],24\n"	/* KERNEL_VXR_HIGH */
+		"	jz	7f\n"
+		"	jo	6f\n"		/* 11 -> restore V16..V31 */
+		"	brc	2,4f\n"		/* 10 -> restore V24..V31 */
+		"	VLM	16,23,256,1\n"	/* vlm %v16,%v23,256(%r1) */
+		"	j	7f\n"
+		"4:	VLM	24,31,384,1\n"	/* vlm %v24,%v31,384(%r1) */
+		"	j	7f\n"
+		"5:	VLM	0,15,0,1\n"	/* vlm %v0,%v15,0(%r1) */
+		"6:	VLM	16,31,256,1\n"	/* vlm %v16,%v31,256(%r1) */
+		"7:"
+		: [vxrs] "=Q" (*(struct vx_array *) &state->vxrs)
+		: [m] "d" (flags)
 		: "1", "cc");
-
-update_fpu_state:
-	/* Update current kernel VX state */
-	__this_cpu_write(kernel_fpu_state, state->mask);
 }
 EXPORT_SYMBOL(__kernel_fpu_end);
diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c
index dd6306c..fdb4042 100644
--- a/arch/s390/kernel/kprobes.c
+++ b/arch/s390/kernel/kprobes.c
@@ -26,12 +26,14 @@
 #include <linux/stop_machine.h>
 #include <linux/kdebug.h>
 #include <linux/uaccess.h>
+#include <linux/extable.h>
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/hardirq.h>
 #include <linux/ftrace.h>
 #include <asm/cacheflush.h>
 #include <asm/sections.h>
+#include <asm/uaccess.h>
 #include <asm/dis.h>
 
 DEFINE_PER_CPU(struct kprobe *, current_kprobe);
diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c
index 29376f0..9a32f74 100644
--- a/arch/s390/kernel/nmi.c
+++ b/arch/s390/kernel/nmi.c
@@ -98,7 +98,7 @@
  * returns 0 if all registers could be validated
  * returns 1 otherwise
  */
-static int notrace s390_validate_registers(union mci mci)
+static int notrace s390_validate_registers(union mci mci, int umode)
 {
 	int kill_task;
 	u64 zero;
@@ -110,26 +110,41 @@
 	if (!mci.gr) {
 		/*
 		 * General purpose registers couldn't be restored and have
-		 * unknown contents. Process needs to be terminated.
+		 * unknown contents. Stop system or terminate process.
 		 */
+		if (!umode)
+			s390_handle_damage();
 		kill_task = 1;
 	}
 	if (!mci.fp) {
 		/*
-		 * Floating point registers can't be restored and
-		 * therefore the process needs to be terminated.
+		 * Floating point registers can't be restored. If the
+		 * kernel currently uses floating point registers the
+		 * system is stopped. If the process has its floating
+		 * pointer registers loaded it is terminated.
+		 * Otherwise just revalidate the registers.
 		 */
-		kill_task = 1;
+		if (S390_lowcore.fpu_flags & KERNEL_VXR_V0V7)
+			s390_handle_damage();
+		if (!test_cpu_flag(CIF_FPU))
+			kill_task = 1;
 	}
 	fpt_save_area = &S390_lowcore.floating_pt_save_area;
 	fpt_creg_save_area = &S390_lowcore.fpt_creg_save_area;
 	if (!mci.fc) {
 		/*
 		 * Floating point control register can't be restored.
-		 * Task will be terminated.
+		 * If the kernel currently uses the floating pointer
+		 * registers and needs the FPC register the system is
+		 * stopped. If the process has its floating pointer
+		 * registers loaded it is terminated. Otherwiese the
+		 * FPC is just revalidated.
 		 */
+		if (S390_lowcore.fpu_flags & KERNEL_FPC)
+			s390_handle_damage();
 		asm volatile("lfpc 0(%0)" : : "a" (&zero), "m" (zero));
-		kill_task = 1;
+		if (!test_cpu_flag(CIF_FPU))
+			kill_task = 1;
 	} else
 		asm volatile("lfpc 0(%0)" : : "a" (fpt_creg_save_area));
 
@@ -159,10 +174,16 @@
 
 		if (!mci.vr) {
 			/*
-			 * Vector registers can't be restored and therefore
-			 * the process needs to be terminated.
+			 * Vector registers can't be restored. If the kernel
+			 * currently uses vector registers the system is
+			 * stopped. If the process has its vector registers
+			 * loaded it is terminated. Otherwise just revalidate
+			 * the registers.
 			 */
-			kill_task = 1;
+			if (S390_lowcore.fpu_flags & KERNEL_VXR)
+				s390_handle_damage();
+			if (!test_cpu_flag(CIF_FPU))
+				kill_task = 1;
 		}
 		cr0.val = S390_lowcore.cregs_save_area[0];
 		cr0.afp = cr0.vx = 1;
@@ -250,13 +271,11 @@
 	struct mcck_struct *mcck;
 	unsigned long long tmp;
 	union mci mci;
-	int umode;
 
 	nmi_enter();
 	inc_irq_stat(NMI_NMI);
 	mci.val = S390_lowcore.mcck_interruption_code;
 	mcck = this_cpu_ptr(&cpu_mcck);
-	umode = user_mode(regs);
 
 	if (mci.sd) {
 		/* System damage -> stopping machine */
@@ -297,22 +316,14 @@
 			s390_handle_damage();
 		}
 	}
-	if (s390_validate_registers(mci)) {
-		if (umode) {
-			/*
-			 * Couldn't restore all register contents while in
-			 * user mode -> mark task for termination.
-			 */
-			mcck->kill_task = 1;
-			mcck->mcck_code = mci.val;
-			set_cpu_flag(CIF_MCCK_PENDING);
-		} else {
-			/*
-			 * Couldn't restore all register contents while in
-			 * kernel mode -> stopping machine.
-			 */
-			s390_handle_damage();
-		}
+	if (s390_validate_registers(mci, user_mode(regs))) {
+		/*
+		 * Couldn't restore all register contents for the
+		 * user space process -> mark task for termination.
+		 */
+		mcck->kill_task = 1;
+		mcck->mcck_code = mci.val;
+		set_cpu_flag(CIF_MCCK_PENDING);
 	}
 	if (mci.cd) {
 		/* Timing facility damage */
diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c
index 050b8d0..bfda6aa 100644
--- a/arch/s390/kernel/sysinfo.c
+++ b/arch/s390/kernel/sysinfo.c
@@ -454,7 +454,7 @@
 			: "Q" (info->capability), "d" (10000000), "d" (0)
 			: "cc"
 			);
-		kernel_fpu_end(&fpu);
+		kernel_fpu_end(&fpu, KERNEL_FPR);
 	} else
 		/*
 		 * Really old machine without stsi block for basic
diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
index 4e99498..0bfcc49 100644
--- a/arch/s390/kernel/time.c
+++ b/arch/s390/kernel/time.c
@@ -50,10 +50,6 @@
 #include <asm/cio.h>
 #include "entry.h"
 
-/* change this if you have some constant time drift */
-#define USECS_PER_JIFFY     ((unsigned long) 1000000/HZ)
-#define CLK_TICKS_PER_JIFFY ((unsigned long) USECS_PER_JIFFY << 12)
-
 u64 sched_clock_base_cc = -1;	/* Force to data section. */
 EXPORT_SYMBOL_GPL(sched_clock_base_cc);
 
@@ -282,13 +278,8 @@
 
 void update_vsyscall_tz(void)
 {
-	/* Make userspace gettimeofday spin until we're done. */
-	++vdso_data->tb_update_count;
-	smp_wmb();
 	vdso_data->tz_minuteswest = sys_tz.tz_minuteswest;
 	vdso_data->tz_dsttime = sys_tz.tz_dsttime;
-	smp_wmb();
-	++vdso_data->tb_update_count;
 }
 
 /*
@@ -318,51 +309,12 @@
 	vtime_init();
 }
 
-/*
- * The time is "clock". old is what we think the time is.
- * Adjust the value by a multiple of jiffies and add the delta to ntp.
- * "delay" is an approximation how long the synchronization took. If
- * the time correction is positive, then "delay" is subtracted from
- * the time difference and only the remaining part is passed to ntp.
- */
-static unsigned long long adjust_time(unsigned long long old,
-				      unsigned long long clock,
-				      unsigned long long delay)
-{
-	unsigned long long delta, ticks;
-	struct timex adjust;
-
-	if (clock > old) {
-		/* It is later than we thought. */
-		delta = ticks = clock - old;
-		delta = ticks = (delta < delay) ? 0 : delta - delay;
-		delta -= do_div(ticks, CLK_TICKS_PER_JIFFY);
-		adjust.offset = ticks * (1000000 / HZ);
-	} else {
-		/* It is earlier than we thought. */
-		delta = ticks = old - clock;
-		delta -= do_div(ticks, CLK_TICKS_PER_JIFFY);
-		delta = -delta;
-		adjust.offset = -ticks * (1000000 / HZ);
-	}
-	sched_clock_base_cc += delta;
-	if (adjust.offset != 0) {
-		pr_notice("The ETR interface has adjusted the clock "
-			  "by %li microseconds\n", adjust.offset);
-		adjust.modes = ADJ_OFFSET_SINGLESHOT;
-		do_adjtimex(&adjust);
-	}
-	return delta;
-}
-
 static DEFINE_PER_CPU(atomic_t, clock_sync_word);
 static DEFINE_MUTEX(clock_sync_mutex);
 static unsigned long clock_sync_flags;
 
-#define CLOCK_SYNC_HAS_ETR	0
-#define CLOCK_SYNC_HAS_STP	1
-#define CLOCK_SYNC_ETR		2
-#define CLOCK_SYNC_STP		3
+#define CLOCK_SYNC_HAS_STP	0
+#define CLOCK_SYNC_STP		1
 
 /*
  * The get_clock function for the physical clock. It will get the current
@@ -384,34 +336,32 @@
 	if (sw0 == sw1 && (sw0 & 0x80000000U))
 		/* Success: time is in sync. */
 		return 0;
-	if (!test_bit(CLOCK_SYNC_HAS_ETR, &clock_sync_flags) &&
-	    !test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
+	if (!test_bit(CLOCK_SYNC_HAS_STP, &clock_sync_flags))
 		return -EOPNOTSUPP;
-	if (!test_bit(CLOCK_SYNC_ETR, &clock_sync_flags) &&
-	    !test_bit(CLOCK_SYNC_STP, &clock_sync_flags))
+	if (!test_bit(CLOCK_SYNC_STP, &clock_sync_flags))
 		return -EACCES;
 	return -EAGAIN;
 }
 EXPORT_SYMBOL(get_phys_clock);
 
 /*
- * Make get_sync_clock return -EAGAIN.
+ * Make get_phys_clock() return -EAGAIN.
  */
 static void disable_sync_clock(void *dummy)
 {
 	atomic_t *sw_ptr = this_cpu_ptr(&clock_sync_word);
 	/*
-	 * Clear the in-sync bit 2^31. All get_sync_clock calls will
+	 * Clear the in-sync bit 2^31. All get_phys_clock calls will
 	 * fail until the sync bit is turned back on. In addition
 	 * increase the "sequence" counter to avoid the race of an
-	 * etr event and the complete recovery against get_sync_clock.
+	 * stp event and the complete recovery against get_phys_clock.
 	 */
 	atomic_andnot(0x80000000, sw_ptr);
 	atomic_inc(sw_ptr);
 }
 
 /*
- * Make get_sync_clock return 0 again.
+ * Make get_phys_clock() return 0 again.
  * Needs to be called from a context disabled for preemption.
  */
 static void enable_sync_clock(void)
@@ -434,7 +384,7 @@
 	return rc;
 }
 
-/* Single threaded workqueue used for etr and stp sync events */
+/* Single threaded workqueue used for stp sync events */
 static struct workqueue_struct *time_sync_wq;
 
 static void __init time_init_wq(void)
@@ -448,20 +398,12 @@
 	atomic_t cpus;
 	int in_sync;
 	unsigned long long fixup_cc;
-	int etr_port;
-	struct etr_aib *etr_aib;
 };
 
 static void clock_sync_cpu(struct clock_sync_data *sync)
 {
 	atomic_dec(&sync->cpus);
 	enable_sync_clock();
-	/*
-	 * This looks like a busy wait loop but it isn't. etr_sync_cpus
-	 * is called on all other cpus while the TOD clocks is stopped.
-	 * __udelay will stop the cpu on an enabled wait psw until the
-	 * TOD is running again.
-	 */
 	while (sync->in_sync == 0) {
 		__udelay(1);
 		/*
@@ -582,7 +524,7 @@
 static int stp_sync_clock(void *data)
 {
 	static int first;
-	unsigned long long old_clock, delta, new_clock, clock_delta;
+	unsigned long long clock_delta;
 	struct clock_sync_data *stp_sync;
 	struct ptff_qto qto;
 	int rc;
@@ -605,18 +547,18 @@
 	if (stp_info.todoff[0] || stp_info.todoff[1] ||
 	    stp_info.todoff[2] || stp_info.todoff[3] ||
 	    stp_info.tmd != 2) {
-		old_clock = get_tod_clock();
 		rc = chsc_sstpc(stp_page, STP_OP_SYNC, 0, &clock_delta);
 		if (rc == 0) {
-			new_clock = old_clock + clock_delta;
-			delta = adjust_time(old_clock, new_clock, 0);
+			/* fixup the monotonic sched clock */
+			sched_clock_base_cc += clock_delta;
 			if (ptff_query(PTFF_QTO) &&
 			    ptff(&qto, sizeof(qto), PTFF_QTO) == 0)
 				/* Update LPAR offset */
 				lpar_offset = qto.tod_epoch_difference;
 			atomic_notifier_call_chain(&s390_epoch_delta_notifier,
 						   0, &clock_delta);
-			fixup_clock_comparator(delta);
+			stp_sync->fixup_cc = clock_delta;
+			fixup_clock_comparator(clock_delta);
 			rc = chsc_sstpi(stp_page, &stp_info,
 					sizeof(struct stp_sstpi));
 			if (rc == 0 && stp_info.tmd != 2)
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index dd97a3e..d0539f7 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -14,11 +14,12 @@
  */
 #include <linux/kprobes.h>
 #include <linux/kdebug.h>
-#include <linux/module.h>
+#include <linux/extable.h>
 #include <linux/ptrace.h>
 #include <linux/sched.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
+#include <asm/uaccess.h>
 #include <asm/fpu/api.h>
 #include "entry.h"
 
diff --git a/arch/s390/kernel/vdso32/Makefile b/arch/s390/kernel/vdso32/Makefile
index 6814545..6cc9478 100644
--- a/arch/s390/kernel/vdso32/Makefile
+++ b/arch/s390/kernel/vdso32/Makefile
@@ -24,8 +24,9 @@
 extra-y += vdso32.lds
 CPPFLAGS_vdso32.lds += -P -C -U$(ARCH)
 
-# Disable gcov profiling for VDSO code
+# Disable gcov profiling and ubsan for VDSO code
 GCOV_PROFILE := n
+UBSAN_SANITIZE := n
 
 # Force dependency (incbin is bad)
 $(obj)/vdso32_wrapper.o : $(obj)/vdso32.so
diff --git a/arch/s390/kernel/vdso64/Makefile b/arch/s390/kernel/vdso64/Makefile
index 0b0fd22..2d54c18 100644
--- a/arch/s390/kernel/vdso64/Makefile
+++ b/arch/s390/kernel/vdso64/Makefile
@@ -24,8 +24,9 @@
 extra-y += vdso64.lds
 CPPFLAGS_vdso64.lds += -P -C -U$(ARCH)
 
-# Disable gcov profiling for VDSO code
+# Disable gcov profiling and ubsan for VDSO code
 GCOV_PROFILE := n
+UBSAN_SANITIZE := n
 
 # Force dependency (incbin is bad)
 $(obj)/vdso64_wrapper.o : $(obj)/vdso64.so
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index 607ec91..7e8cb6a 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -132,10 +132,7 @@
 MODULE_PARM_DESC(nested, "Nested virtualization support");
 
 /* upper facilities limit for kvm */
-unsigned long kvm_s390_fac_list_mask[16] = {
-	0xffe6000000000000UL,
-	0x005e000000000000UL,
-};
+unsigned long kvm_s390_fac_list_mask[16] = { FACILITIES_KVM };
 
 unsigned long kvm_s390_fac_list_mask_size(void)
 {
@@ -248,22 +245,33 @@
 		     PTFF_QAF);
 
 	if (test_facility(17)) { /* MSA */
-		__cpacf_query(CPACF_KMAC, kvm_s390_available_subfunc.kmac);
-		__cpacf_query(CPACF_KMC, kvm_s390_available_subfunc.kmc);
-		__cpacf_query(CPACF_KM, kvm_s390_available_subfunc.km);
-		__cpacf_query(CPACF_KIMD, kvm_s390_available_subfunc.kimd);
-		__cpacf_query(CPACF_KLMD, kvm_s390_available_subfunc.klmd);
+		__cpacf_query(CPACF_KMAC, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.kmac);
+		__cpacf_query(CPACF_KMC, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.kmc);
+		__cpacf_query(CPACF_KM, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.km);
+		__cpacf_query(CPACF_KIMD, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.kimd);
+		__cpacf_query(CPACF_KLMD, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.klmd);
 	}
 	if (test_facility(76)) /* MSA3 */
-		__cpacf_query(CPACF_PCKMO, kvm_s390_available_subfunc.pckmo);
+		__cpacf_query(CPACF_PCKMO, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.pckmo);
 	if (test_facility(77)) { /* MSA4 */
-		__cpacf_query(CPACF_KMCTR, kvm_s390_available_subfunc.kmctr);
-		__cpacf_query(CPACF_KMF, kvm_s390_available_subfunc.kmf);
-		__cpacf_query(CPACF_KMO, kvm_s390_available_subfunc.kmo);
-		__cpacf_query(CPACF_PCC, kvm_s390_available_subfunc.pcc);
+		__cpacf_query(CPACF_KMCTR, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.kmctr);
+		__cpacf_query(CPACF_KMF, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.kmf);
+		__cpacf_query(CPACF_KMO, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.kmo);
+		__cpacf_query(CPACF_PCC, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.pcc);
 	}
 	if (test_facility(57)) /* MSA5 */
-		__cpacf_query(CPACF_PPNO, kvm_s390_available_subfunc.ppno);
+		__cpacf_query(CPACF_PPNO, (cpacf_mask_t *)
+			      kvm_s390_available_subfunc.ppno);
 
 	if (MACHINE_HAS_ESOP)
 		allow_cpu_feat(KVM_S390_VM_CPU_FEAT_ESOP);
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index cbb73fa..661d9fe 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -24,7 +24,7 @@
 #include <linux/kdebug.h>
 #include <linux/init.h>
 #include <linux/console.h>
-#include <linux/module.h>
+#include <linux/extable.h>
 #include <linux/hardirq.h>
 #include <linux/kprobes.h>
 #include <linux/uaccess.h>
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 2ce6bb3..3ba6227 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -94,6 +94,7 @@
 struct gmap *gmap_create(struct mm_struct *mm, unsigned long limit)
 {
 	struct gmap *gmap;
+	unsigned long gmap_asce;
 
 	gmap = gmap_alloc(limit);
 	if (!gmap)
@@ -101,6 +102,11 @@
 	gmap->mm = mm;
 	spin_lock(&mm->context.gmap_lock);
 	list_add_rcu(&gmap->list, &mm->context.gmap_list);
+	if (list_is_singular(&mm->context.gmap_list))
+		gmap_asce = gmap->asce;
+	else
+		gmap_asce = -1UL;
+	WRITE_ONCE(mm->context.gmap_asce, gmap_asce);
 	spin_unlock(&mm->context.gmap_lock);
 	return gmap;
 }
@@ -230,6 +236,7 @@
 void gmap_remove(struct gmap *gmap)
 {
 	struct gmap *sg, *next;
+	unsigned long gmap_asce;
 
 	/* Remove all shadow gmaps linked to this gmap */
 	if (!list_empty(&gmap->children)) {
@@ -243,6 +250,14 @@
 	/* Remove gmap from the pre-mm list */
 	spin_lock(&gmap->mm->context.gmap_lock);
 	list_del_rcu(&gmap->list);
+	if (list_empty(&gmap->mm->context.gmap_list))
+		gmap_asce = 0;
+	else if (list_is_singular(&gmap->mm->context.gmap_list))
+		gmap_asce = list_first_entry(&gmap->mm->context.gmap_list,
+					     struct gmap, list)->asce;
+	else
+		gmap_asce = -1UL;
+	WRITE_ONCE(gmap->mm->context.gmap_asce, gmap_asce);
 	spin_unlock(&gmap->mm->context.gmap_lock);
 	synchronize_rcu();
 	/* Put reference */
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index af7cf28c..44f1503 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -309,11 +309,11 @@
 	int i;
 
 	if (test_facility(13)) {
-		__ptep_ipte_range(address, nr - 1, pte);
+		__ptep_ipte_range(address, nr - 1, pte, IPTE_GLOBAL);
 		return;
 	}
 	for (i = 0; i < nr; i++) {
-		__ptep_ipte(address, pte);
+		__ptep_ipte(address, pte, IPTE_GLOBAL);
 		address += PAGE_SIZE;
 		pte++;
 	}
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 5f09201..7a1897c 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -35,9 +35,9 @@
 	atomic_inc(&mm->context.flush_count);
 	if (MACHINE_HAS_TLB_LC &&
 	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
-		__ptep_ipte_local(addr, ptep);
+		__ptep_ipte(addr, ptep, IPTE_LOCAL);
 	else
-		__ptep_ipte(addr, ptep);
+		__ptep_ipte(addr, ptep, IPTE_GLOBAL);
 	atomic_dec(&mm->context.flush_count);
 	return old;
 }
@@ -56,7 +56,7 @@
 		pte_val(*ptep) |= _PAGE_INVALID;
 		mm->context.flush_mm = 1;
 	} else
-		__ptep_ipte(addr, ptep);
+		__ptep_ipte(addr, ptep, IPTE_GLOBAL);
 	atomic_dec(&mm->context.flush_count);
 	return old;
 }
@@ -301,9 +301,9 @@
 	atomic_inc(&mm->context.flush_count);
 	if (MACHINE_HAS_TLB_LC &&
 	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
-		__pmdp_idte_local(addr, pmdp);
+		__pmdp_idte(addr, pmdp, IDTE_LOCAL);
 	else
-		__pmdp_idte(addr, pmdp);
+		__pmdp_idte(addr, pmdp, IDTE_GLOBAL);
 	atomic_dec(&mm->context.flush_count);
 	return old;
 }
@@ -322,7 +322,7 @@
 		pmd_val(*pmdp) |= _SEGMENT_ENTRY_INVALID;
 		mm->context.flush_mm = 1;
 	} else if (MACHINE_HAS_IDTE)
-		__pmdp_idte(addr, pmdp);
+		__pmdp_idte(addr, pmdp, IDTE_GLOBAL);
 	else
 		__pmdp_csp(pmdp);
 	atomic_dec(&mm->context.flush_count);
@@ -374,9 +374,9 @@
 	atomic_inc(&mm->context.flush_count);
 	if (MACHINE_HAS_TLB_LC &&
 	    cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id())))
-		__pudp_idte_local(addr, pudp);
+		__pudp_idte(addr, pudp, IDTE_LOCAL);
 	else
-		__pudp_idte(addr, pudp);
+		__pudp_idte(addr, pudp, IDTE_GLOBAL);
 	atomic_dec(&mm->context.flush_count);
 	return old;
 }
@@ -620,7 +620,7 @@
 	pte = *ptep;
 	if (dirty && (pte_val(pte) & _PAGE_PRESENT)) {
 		pgste = pgste_pte_notify(mm, addr, ptep, pgste);
-		__ptep_ipte(addr, ptep);
+		__ptep_ipte(addr, ptep, IPTE_GLOBAL);
 		if (MACHINE_HAS_ESOP || !(pte_val(pte) & _PAGE_WRITE))
 			pte_val(pte) |= _PAGE_PROTECT;
 		else
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 871af75..15ffc19 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -854,6 +854,15 @@
 }
 EXPORT_SYMBOL_GPL(zpci_stop_device);
 
+int zpci_report_error(struct pci_dev *pdev,
+		      struct zpci_report_error_header *report)
+{
+	struct zpci_dev *zdev = to_zpci(pdev);
+
+	return sclp_pci_report(report, zdev->fh, zdev->fid);
+}
+EXPORT_SYMBOL(zpci_report_error);
+
 static inline int barsize(u8 size)
 {
 	return (size) ? (1 << size) >> 10 : 0;
diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c
index 7297fce..7350c8b 100644
--- a/arch/s390/pci/pci_dma.c
+++ b/arch/s390/pci/pci_dma.c
@@ -129,12 +129,11 @@
 		entry_clr_protected(entry);
 }
 
-static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
-			    dma_addr_t dma_addr, size_t size, int flags)
+static int __dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
+			      dma_addr_t dma_addr, size_t size, int flags)
 {
 	unsigned int nr_pages = PAGE_ALIGN(size) >> PAGE_SHIFT;
 	u8 *page_addr = (u8 *) (pa & PAGE_MASK);
-	dma_addr_t start_dma_addr = dma_addr;
 	unsigned long irq_flags;
 	unsigned long *entry;
 	int i, rc = 0;
@@ -145,7 +144,7 @@
 	spin_lock_irqsave(&zdev->dma_table_lock, irq_flags);
 	if (!zdev->dma_table) {
 		rc = -EINVAL;
-		goto no_refresh;
+		goto out_unlock;
 	}
 
 	for (i = 0; i < nr_pages; i++) {
@@ -159,20 +158,6 @@
 		dma_addr += PAGE_SIZE;
 	}
 
-	/*
-	 * With zdev->tlb_refresh == 0, rpcit is not required to establish new
-	 * translations when previously invalid translation-table entries are
-	 * validated. With lazy unmap, it also is skipped for previously valid
-	 * entries, but a global rpcit is then required before any address can
-	 * be re-used, i.e. after each iommu bitmap wrap-around.
-	 */
-	if (!zdev->tlb_refresh &&
-			(!s390_iommu_strict ||
-			((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)))
-		goto no_refresh;
-
-	rc = zpci_refresh_trans((u64) zdev->fh << 32, start_dma_addr,
-				nr_pages * PAGE_SIZE);
 undo_cpu_trans:
 	if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)) {
 		flags = ZPCI_PTE_INVALID;
@@ -185,12 +170,46 @@
 			dma_update_cpu_trans(entry, page_addr, flags);
 		}
 	}
-
-no_refresh:
+out_unlock:
 	spin_unlock_irqrestore(&zdev->dma_table_lock, irq_flags);
 	return rc;
 }
 
+static int __dma_purge_tlb(struct zpci_dev *zdev, dma_addr_t dma_addr,
+			   size_t size, int flags)
+{
+	/*
+	 * With zdev->tlb_refresh == 0, rpcit is not required to establish new
+	 * translations when previously invalid translation-table entries are
+	 * validated. With lazy unmap, it also is skipped for previously valid
+	 * entries, but a global rpcit is then required before any address can
+	 * be re-used, i.e. after each iommu bitmap wrap-around.
+	 */
+	if (!zdev->tlb_refresh &&
+			(!s390_iommu_strict ||
+			((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID)))
+		return 0;
+
+	return zpci_refresh_trans((u64) zdev->fh << 32, dma_addr,
+				  PAGE_ALIGN(size));
+}
+
+static int dma_update_trans(struct zpci_dev *zdev, unsigned long pa,
+			    dma_addr_t dma_addr, size_t size, int flags)
+{
+	int rc;
+
+	rc = __dma_update_trans(zdev, pa, dma_addr, size, flags);
+	if (rc)
+		return rc;
+
+	rc = __dma_purge_tlb(zdev, dma_addr, size, flags);
+	if (rc && ((flags & ZPCI_PTE_VALID_MASK) == ZPCI_PTE_VALID))
+		__dma_update_trans(zdev, pa, dma_addr, size, ZPCI_PTE_INVALID);
+
+	return rc;
+}
+
 void dma_free_seg_table(unsigned long entry)
 {
 	unsigned long *sto = get_rt_sto(entry);
@@ -230,45 +249,54 @@
 				boundary_size, 0);
 }
 
-static unsigned long dma_alloc_iommu(struct device *dev, int size)
+static dma_addr_t dma_alloc_address(struct device *dev, int size)
 {
 	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
 	unsigned long offset, flags;
-	int wrap = 0;
 
 	spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
 	offset = __dma_alloc_iommu(dev, zdev->next_bit, size);
 	if (offset == -1) {
+		if (!zdev->tlb_refresh && !s390_iommu_strict) {
+			/* global flush before DMA addresses are reused */
+			if (zpci_refresh_global(zdev))
+				goto out_error;
+
+			bitmap_andnot(zdev->iommu_bitmap, zdev->iommu_bitmap,
+				      zdev->lazy_bitmap, zdev->iommu_pages);
+			bitmap_zero(zdev->lazy_bitmap, zdev->iommu_pages);
+		}
 		/* wrap-around */
 		offset = __dma_alloc_iommu(dev, 0, size);
-		wrap = 1;
+		if (offset == -1)
+			goto out_error;
 	}
-
-	if (offset != -1) {
-		zdev->next_bit = offset + size;
-		if (!zdev->tlb_refresh && !s390_iommu_strict && wrap)
-			/* global flush after wrap-around with lazy unmap */
-			zpci_refresh_global(zdev);
-	}
+	zdev->next_bit = offset + size;
 	spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
-	return offset;
+
+	return zdev->start_dma + offset * PAGE_SIZE;
+
+out_error:
+	spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
+	return DMA_ERROR_CODE;
 }
 
-static void dma_free_iommu(struct device *dev, unsigned long offset, int size)
+static void dma_free_address(struct device *dev, dma_addr_t dma_addr, int size)
 {
 	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
-	unsigned long flags;
+	unsigned long flags, offset;
+
+	offset = (dma_addr - zdev->start_dma) >> PAGE_SHIFT;
 
 	spin_lock_irqsave(&zdev->iommu_bitmap_lock, flags);
 	if (!zdev->iommu_bitmap)
 		goto out;
-	bitmap_clear(zdev->iommu_bitmap, offset, size);
-	/*
-	 * Lazy flush for unmap: need to move next_bit to avoid address re-use
-	 * until wrap-around.
-	 */
-	if (!s390_iommu_strict && offset >= zdev->next_bit)
-		zdev->next_bit = offset + size;
+
+	if (zdev->tlb_refresh || s390_iommu_strict)
+		bitmap_clear(zdev->iommu_bitmap, offset, size);
+	else
+		bitmap_set(zdev->lazy_bitmap, offset, size);
+
 out:
 	spin_unlock_irqrestore(&zdev->iommu_bitmap_lock, flags);
 }
@@ -289,16 +317,16 @@
 				     unsigned long attrs)
 {
 	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
-	unsigned long nr_pages, iommu_page_index;
 	unsigned long pa = page_to_phys(page) + offset;
 	int flags = ZPCI_PTE_VALID;
+	unsigned long nr_pages;
 	dma_addr_t dma_addr;
 	int ret;
 
 	/* This rounds up number of pages based on size and offset */
 	nr_pages = iommu_num_pages(pa, size, PAGE_SIZE);
-	iommu_page_index = dma_alloc_iommu(dev, nr_pages);
-	if (iommu_page_index == -1) {
+	dma_addr = dma_alloc_address(dev, nr_pages);
+	if (dma_addr == DMA_ERROR_CODE) {
 		ret = -ENOSPC;
 		goto out_err;
 	}
@@ -306,12 +334,6 @@
 	/* Use rounded up size */
 	size = nr_pages * PAGE_SIZE;
 
-	dma_addr = zdev->start_dma + iommu_page_index * PAGE_SIZE;
-	if (dma_addr + size > zdev->end_dma) {
-		ret = -ERANGE;
-		goto out_free;
-	}
-
 	if (direction == DMA_NONE || direction == DMA_TO_DEVICE)
 		flags |= ZPCI_TABLE_PROTECTED;
 
@@ -323,7 +345,7 @@
 	return dma_addr + (offset & ~PAGE_MASK);
 
 out_free:
-	dma_free_iommu(dev, iommu_page_index, nr_pages);
+	dma_free_address(dev, dma_addr, nr_pages);
 out_err:
 	zpci_err("map error:\n");
 	zpci_err_dma(ret, pa);
@@ -335,7 +357,6 @@
 				 unsigned long attrs)
 {
 	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
-	unsigned long iommu_page_index;
 	int npages, ret;
 
 	npages = iommu_num_pages(dma_addr, size, PAGE_SIZE);
@@ -349,8 +370,7 @@
 	}
 
 	atomic64_add(npages, &zdev->unmapped_pages);
-	iommu_page_index = (dma_addr - zdev->start_dma) >> PAGE_SHIFT;
-	dma_free_iommu(dev, iommu_page_index, npages);
+	dma_free_address(dev, dma_addr, npages);
 }
 
 static void *s390_dma_alloc(struct device *dev, size_t size,
@@ -394,37 +414,98 @@
 	free_pages((unsigned long) pa, get_order(size));
 }
 
+/* Map a segment into a contiguous dma address area */
+static int __s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
+			     size_t size, dma_addr_t *handle,
+			     enum dma_data_direction dir)
+{
+	struct zpci_dev *zdev = to_zpci(to_pci_dev(dev));
+	dma_addr_t dma_addr_base, dma_addr;
+	int flags = ZPCI_PTE_VALID;
+	struct scatterlist *s;
+	unsigned long pa;
+	int ret;
+
+	size = PAGE_ALIGN(size);
+	dma_addr_base = dma_alloc_address(dev, size >> PAGE_SHIFT);
+	if (dma_addr_base == DMA_ERROR_CODE)
+		return -ENOMEM;
+
+	dma_addr = dma_addr_base;
+	if (dir == DMA_NONE || dir == DMA_TO_DEVICE)
+		flags |= ZPCI_TABLE_PROTECTED;
+
+	for (s = sg; dma_addr < dma_addr_base + size; s = sg_next(s)) {
+		pa = page_to_phys(sg_page(s)) + s->offset;
+		ret = __dma_update_trans(zdev, pa, dma_addr, s->length, flags);
+		if (ret)
+			goto unmap;
+
+		dma_addr += s->length;
+	}
+	ret = __dma_purge_tlb(zdev, dma_addr_base, size, flags);
+	if (ret)
+		goto unmap;
+
+	*handle = dma_addr_base;
+	atomic64_add(size >> PAGE_SHIFT, &zdev->mapped_pages);
+
+	return ret;
+
+unmap:
+	dma_update_trans(zdev, 0, dma_addr_base, dma_addr - dma_addr_base,
+			 ZPCI_PTE_INVALID);
+	dma_free_address(dev, dma_addr_base, size >> PAGE_SHIFT);
+	zpci_err("map error:\n");
+	zpci_err_dma(ret, pa);
+	return ret;
+}
+
 static int s390_dma_map_sg(struct device *dev, struct scatterlist *sg,
 			   int nr_elements, enum dma_data_direction dir,
 			   unsigned long attrs)
 {
-	int mapped_elements = 0;
-	struct scatterlist *s;
-	int i;
+	struct scatterlist *s = sg, *start = sg, *dma = sg;
+	unsigned int max = dma_get_max_seg_size(dev);
+	unsigned int size = s->offset + s->length;
+	unsigned int offset = s->offset;
+	int count = 0, i;
 
-	for_each_sg(sg, s, nr_elements, i) {
-		struct page *page = sg_page(s);
-		s->dma_address = s390_dma_map_pages(dev, page, s->offset,
-						    s->length, dir, 0);
-		if (!dma_mapping_error(dev, s->dma_address)) {
-			s->dma_length = s->length;
-			mapped_elements++;
-		} else
-			goto unmap;
-	}
-out:
-	return mapped_elements;
+	for (i = 1; i < nr_elements; i++) {
+		s = sg_next(s);
 
-unmap:
-	for_each_sg(sg, s, mapped_elements, i) {
-		if (s->dma_address)
-			s390_dma_unmap_pages(dev, s->dma_address, s->dma_length,
-					     dir, 0);
-		s->dma_address = 0;
+		s->dma_address = DMA_ERROR_CODE;
 		s->dma_length = 0;
+
+		if (s->offset || (size & ~PAGE_MASK) ||
+		    size + s->length > max) {
+			if (__s390_dma_map_sg(dev, start, size,
+					      &dma->dma_address, dir))
+				goto unmap;
+
+			dma->dma_address += offset;
+			dma->dma_length = size - offset;
+
+			size = offset = s->offset;
+			start = s;
+			dma = sg_next(dma);
+			count++;
+		}
+		size += s->length;
 	}
-	mapped_elements = 0;
-	goto out;
+	if (__s390_dma_map_sg(dev, start, size, &dma->dma_address, dir))
+		goto unmap;
+
+	dma->dma_address += offset;
+	dma->dma_length = size - offset;
+
+	return count + 1;
+unmap:
+	for_each_sg(sg, s, count, i)
+		s390_dma_unmap_pages(dev, sg_dma_address(s), sg_dma_len(s),
+				     dir, attrs);
+
+	return 0;
 }
 
 static void s390_dma_unmap_sg(struct device *dev, struct scatterlist *sg,
@@ -435,8 +516,9 @@
 	int i;
 
 	for_each_sg(sg, s, nr_elements, i) {
-		s390_dma_unmap_pages(dev, s->dma_address, s->dma_length, dir,
-				     0);
+		if (s->dma_length)
+			s390_dma_unmap_pages(dev, s->dma_address, s->dma_length,
+					     dir, attrs);
 		s->dma_address = 0;
 		s->dma_length = 0;
 	}
@@ -482,7 +564,14 @@
 		rc = -ENOMEM;
 		goto free_dma_table;
 	}
+	if (!zdev->tlb_refresh && !s390_iommu_strict) {
+		zdev->lazy_bitmap = vzalloc(zdev->iommu_pages / 8);
+		if (!zdev->lazy_bitmap) {
+			rc = -ENOMEM;
+			goto free_bitmap;
+		}
 
+	}
 	rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
 				(u64) zdev->dma_table);
 	if (rc)
@@ -492,6 +581,8 @@
 free_bitmap:
 	vfree(zdev->iommu_bitmap);
 	zdev->iommu_bitmap = NULL;
+	vfree(zdev->lazy_bitmap);
+	zdev->lazy_bitmap = NULL;
 free_dma_table:
 	dma_free_cpu_table(zdev->dma_table);
 	zdev->dma_table = NULL;
@@ -513,6 +604,9 @@
 	zdev->dma_table = NULL;
 	vfree(zdev->iommu_bitmap);
 	zdev->iommu_bitmap = NULL;
+	vfree(zdev->lazy_bitmap);
+	zdev->lazy_bitmap = NULL;
+
 	zdev->next_bit = 0;
 }
 
diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c
index a04d491..3b44b1d 100644
--- a/drivers/iommu/s390-iommu.c
+++ b/drivers/iommu/s390-iommu.c
@@ -101,8 +101,7 @@
 		zpci_dma_exit_device(zdev);
 
 	zdev->dma_table = s390_domain->dma_table;
-	rc = zpci_register_ioat(zdev, 0, zdev->start_dma + PAGE_OFFSET,
-				zdev->start_dma + zdev->iommu_size - 1,
+	rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma,
 				(u64) zdev->dma_table);
 	if (rc)
 		goto out_restore;
diff --git a/drivers/s390/block/dasd.c b/drivers/s390/block/dasd.c
index fb1b56a..1de0890 100644
--- a/drivers/s390/block/dasd.c
+++ b/drivers/s390/block/dasd.c
@@ -212,16 +212,6 @@
 {
 	/* Disable extended error reporting for this device. */
 	dasd_eer_disable(device);
-	/* Forget the discipline information. */
-	if (device->discipline) {
-		if (device->discipline->uncheck_device)
-			device->discipline->uncheck_device(device);
-		module_put(device->discipline->owner);
-	}
-	device->discipline = NULL;
-	if (device->base_discipline)
-		module_put(device->base_discipline->owner);
-	device->base_discipline = NULL;
 	device->state = DASD_STATE_NEW;
 
 	if (device->block)
@@ -336,6 +326,7 @@
 {
 	int rc;
 	struct dasd_block *block;
+	struct gendisk *disk;
 
 	rc = 0;
 	block = device->block;
@@ -346,6 +337,9 @@
 		if (rc) {
 			if (rc != -EAGAIN) {
 				device->state = DASD_STATE_UNFMT;
+				disk = device->block->gdp;
+				kobject_uevent(&disk_to_dev(disk)->kobj,
+					       KOBJ_CHANGE);
 				goto out;
 			}
 			return rc;
@@ -2274,6 +2268,15 @@
 			continue;
 		}
 		/*
+		 * Don't try to start requests if device is in
+		 * offline processing, it might wait forever
+		 */
+		if (test_bit(DASD_FLAG_OFFLINE, &device->flags)) {
+			cqr->status = DASD_CQR_FAILED;
+			cqr->intrc = -ENODEV;
+			continue;
+		}
+		/*
 		 * Don't try to start requests if device is stopped
 		 * except path verification requests
 		 */
@@ -3364,6 +3367,22 @@
 }
 EXPORT_SYMBOL_GPL(dasd_generic_probe);
 
+void dasd_generic_free_discipline(struct dasd_device *device)
+{
+	/* Forget the discipline information. */
+	if (device->discipline) {
+		if (device->discipline->uncheck_device)
+			device->discipline->uncheck_device(device);
+		module_put(device->discipline->owner);
+		device->discipline = NULL;
+	}
+	if (device->base_discipline) {
+		module_put(device->base_discipline->owner);
+		device->base_discipline = NULL;
+	}
+}
+EXPORT_SYMBOL_GPL(dasd_generic_free_discipline);
+
 /*
  * This will one day be called from a global not_oper handler.
  * It is also used by driver_unregister during module unload.
diff --git a/drivers/s390/block/dasd_devmap.c b/drivers/s390/block/dasd_devmap.c
index 3cdbce4..15a1a70 100644
--- a/drivers/s390/block/dasd_devmap.c
+++ b/drivers/s390/block/dasd_devmap.c
@@ -617,6 +617,7 @@
 	/* Wait for reference counter to drop to zero. */
 	wait_event(dasd_delete_wq, atomic_read(&device->ref_count) == 0);
 
+	dasd_generic_free_discipline(device);
 	/* Disconnect dasd_device structure from ccw_device structure. */
 	cdev = device->cdev;
 	device->cdev = NULL;
diff --git a/drivers/s390/block/dasd_eckd.c b/drivers/s390/block/dasd_eckd.c
index 98bbec4..831935a 100644
--- a/drivers/s390/block/dasd_eckd.c
+++ b/drivers/s390/block/dasd_eckd.c
@@ -5201,7 +5201,7 @@
 
 	cqr->buildclk = get_tod_clock();
 	cqr->status = DASD_CQR_FILLED;
-	rc = dasd_sleep_on(cqr);
+	rc = dasd_sleep_on_interruptible(cqr);
 	if (rc == 0) {
 		*data = *host_access;
 	} else {
diff --git a/drivers/s390/block/dasd_erp.c b/drivers/s390/block/dasd_erp.c
index e1e8848..d138d01 100644
--- a/drivers/s390/block/dasd_erp.c
+++ b/drivers/s390/block/dasd_erp.c
@@ -169,12 +169,12 @@
 	device = cqr->startdev;
 	if (cqr->intrc == -ETIMEDOUT) {
 		dev_err(&device->cdev->dev,
-			"A timeout error occurred for cqr %p", cqr);
+			"A timeout error occurred for cqr %p\n", cqr);
 		return;
 	}
 	if (cqr->intrc == -ENOLINK) {
 		dev_err(&device->cdev->dev,
-			"A transport error occurred for cqr %p", cqr);
+			"A transport error occurred for cqr %p\n", cqr);
 		return;
 	}
 	/* dump sense data */
diff --git a/drivers/s390/block/dasd_int.h b/drivers/s390/block/dasd_int.h
index ac7027e..87ff6ce 100644
--- a/drivers/s390/block/dasd_int.h
+++ b/drivers/s390/block/dasd_int.h
@@ -725,6 +725,7 @@
 int  dasd_cancel_req(struct dasd_ccw_req *);
 int dasd_flush_device_queue(struct dasd_device *);
 int dasd_generic_probe (struct ccw_device *, struct dasd_discipline *);
+void dasd_generic_free_discipline(struct dasd_device *);
 void dasd_generic_remove (struct ccw_device *cdev);
 int dasd_generic_set_online(struct ccw_device *, struct dasd_discipline *);
 int dasd_generic_set_offline (struct ccw_device *cdev);
diff --git a/drivers/s390/char/con3270.c b/drivers/s390/char/con3270.c
index 6b1577c..285b400 100644
--- a/drivers/s390/char/con3270.c
+++ b/drivers/s390/char/con3270.c
@@ -124,7 +124,12 @@
 static void
 con3270_update_string(struct con3270 *cp, struct string *s, int nr)
 {
-	if (s->len >= cp->view.cols - 5)
+	if (s->len < 4) {
+		/* This indicates a bug, but printing a warning would
+		 * cause a deadlock. */
+		return;
+	}
+	if (s->string[s->len - 4] != TO_RA)
 		return;
 	raw3270_buffer_address(cp->view.dev, s->string + s->len - 3,
 			       cp->view.cols * (nr + 1));
@@ -460,11 +465,11 @@
 		cp->cline->len + 4 : cp->view.cols;
 	s = con3270_alloc_string(cp, size);
 	memcpy(s->string, cp->cline->string, cp->cline->len);
-	if (s->len < cp->view.cols - 5) {
+	if (cp->cline->len < cp->view.cols - 5) {
 		s->string[s->len - 4] = TO_RA;
 		s->string[s->len - 1] = 0;
 	} else {
-		while (--size > cp->cline->len)
+		while (--size >= cp->cline->len)
 			s->string[size] = cp->view.ascebc[' '];
 	}
 	/* Replace cline with allocated line s and reset cline. */
diff --git a/drivers/s390/char/tape_3590.c b/drivers/s390/char/tape_3590.c
index d3d1936..e352047 100644
--- a/drivers/s390/char/tape_3590.c
+++ b/drivers/s390/char/tape_3590.c
@@ -312,15 +312,10 @@
 		return -ENOSYS;
 	if (!crypt_enabled(device))
 		return -EUNATCH;
-	ext_kekls = kmalloc(sizeof(*ext_kekls), GFP_KERNEL);
-	if (!ext_kekls)
-		return -ENOMEM;
-	if (copy_from_user(ext_kekls, (char __user *)arg, sizeof(*ext_kekls))) {
-		rc = -EFAULT;
-		goto out;
-	}
+	ext_kekls = memdup_user((char __user *)arg, sizeof(*ext_kekls));
+	if (IS_ERR(ext_kekls))
+		return PTR_ERR(ext_kekls);
 	rc = tape_3592_kekl_set(device, ext_kekls);
-out:
 	kfree(ext_kekls);
 	return rc;
 }
diff --git a/drivers/s390/char/vmur.c b/drivers/s390/char/vmur.c
index 6c30e93a..ff18f37 100644
--- a/drivers/s390/char/vmur.c
+++ b/drivers/s390/char/vmur.c
@@ -306,10 +306,11 @@
 {
 	struct urdev *urd;
 
-	TRACE("ur_int_handler: intparm=0x%lx cstat=%02x dstat=%02x res=%u\n",
-	      intparm, irb->scsw.cmd.cstat, irb->scsw.cmd.dstat,
-	      irb->scsw.cmd.count);
-
+	if (!IS_ERR(irb)) {
+		TRACE("ur_int_handler: intparm=0x%lx cstat=%02x dstat=%02x res=%u\n",
+		      intparm, irb->scsw.cmd.cstat, irb->scsw.cmd.dstat,
+		      irb->scsw.cmd.count);
+	}
 	if (!intparm) {
 		TRACE("ur_int_handler: unsolicited interrupt\n");
 		return;
diff --git a/drivers/s390/cio/chsc.c b/drivers/s390/cio/chsc.c
index 940e725..1167469 100644
--- a/drivers/s390/cio/chsc.c
+++ b/drivers/s390/cio/chsc.c
@@ -95,12 +95,13 @@
 int chsc_get_ssd_info(struct subchannel_id schid, struct chsc_ssd_info *ssd)
 {
 	struct chsc_ssd_area *ssd_area;
+	unsigned long flags;
 	int ccode;
 	int ret;
 	int i;
 	int mask;
 
-	spin_lock_irq(&chsc_page_lock);
+	spin_lock_irqsave(&chsc_page_lock, flags);
 	memset(chsc_page, 0, PAGE_SIZE);
 	ssd_area = chsc_page;
 	ssd_area->request.length = 0x0010;
@@ -144,7 +145,7 @@
 			ssd->fla[i] = ssd_area->fla[i];
 	}
 out:
-	spin_unlock_irq(&chsc_page_lock);
+	spin_unlock_irqrestore(&chsc_page_lock, flags);
 	return ret;
 }
 
@@ -832,9 +833,10 @@
 		u32 fmt : 4;
 		u32 : 16;
 	} __attribute__ ((packed)) *secm_area;
+	unsigned long flags;
 	int ret, ccode;
 
-	spin_lock_irq(&chsc_page_lock);
+	spin_lock_irqsave(&chsc_page_lock, flags);
 	memset(chsc_page, 0, PAGE_SIZE);
 	secm_area = chsc_page;
 	secm_area->request.length = 0x0050;
@@ -864,7 +866,7 @@
 		CIO_CRW_EVENT(2, "chsc: secm failed (rc=%04x)\n",
 			      secm_area->response.code);
 out:
-	spin_unlock_irq(&chsc_page_lock);
+	spin_unlock_irqrestore(&chsc_page_lock, flags);
 	return ret;
 }
 
@@ -992,6 +994,7 @@
 
 int chsc_get_channel_measurement_chars(struct channel_path *chp)
 {
+	unsigned long flags;
 	int ccode, ret;
 
 	struct {
@@ -1021,7 +1024,7 @@
 	if (!css_chsc_characteristics.scmc || !css_chsc_characteristics.secm)
 		return -EINVAL;
 
-	spin_lock_irq(&chsc_page_lock);
+	spin_lock_irqsave(&chsc_page_lock, flags);
 	memset(chsc_page, 0, PAGE_SIZE);
 	scmc_area = chsc_page;
 	scmc_area->request.length = 0x0010;
@@ -1053,7 +1056,7 @@
 	chsc_initialize_cmg_chars(chp, scmc_area->cmcv,
 				  (struct cmg_chars *) &scmc_area->data);
 out:
-	spin_unlock_irq(&chsc_page_lock);
+	spin_unlock_irqrestore(&chsc_page_lock, flags);
 	return ret;
 }
 
@@ -1134,6 +1137,7 @@
 int __init
 chsc_determine_css_characteristics(void)
 {
+	unsigned long flags;
 	int result;
 	struct {
 		struct chsc_header request;
@@ -1146,7 +1150,7 @@
 		u32 chsc_char[508];
 	} __attribute__ ((packed)) *scsc_area;
 
-	spin_lock_irq(&chsc_page_lock);
+	spin_lock_irqsave(&chsc_page_lock, flags);
 	memset(chsc_page, 0, PAGE_SIZE);
 	scsc_area = chsc_page;
 	scsc_area->request.length = 0x0010;
@@ -1168,7 +1172,7 @@
 		CIO_CRW_EVENT(2, "chsc: scsc failed (rc=%04x)\n",
 			      scsc_area->response.code);
 exit:
-	spin_unlock_irq(&chsc_page_lock);
+	spin_unlock_irqrestore(&chsc_page_lock, flags);
 	return result;
 }
 
diff --git a/drivers/s390/cio/cio.h b/drivers/s390/cio/cio.h
index 93de0b4..f0e57ae 100644
--- a/drivers/s390/cio/cio.h
+++ b/drivers/s390/cio/cio.h
@@ -127,7 +127,6 @@
 extern int cio_halt (struct subchannel *);
 extern int cio_start (struct subchannel *, struct ccw1 *, __u8);
 extern int cio_start_key (struct subchannel *, struct ccw1 *, __u8, __u8);
-extern int cio_cancel (struct subchannel *);
 extern int cio_set_options (struct subchannel *, int);
 extern int cio_update_schib(struct subchannel *sch);
 extern int cio_commit_config(struct subchannel *sch);
diff --git a/drivers/s390/scsi/zfcp_aux.c b/drivers/s390/scsi/zfcp_aux.c
index c00ac46..bcc8f3d 100644
--- a/drivers/s390/scsi/zfcp_aux.c
+++ b/drivers/s390/scsi/zfcp_aux.c
@@ -310,7 +310,7 @@
 
 	snprintf(name, sizeof(name), "zfcp_q_%s",
 		 dev_name(&adapter->ccw_device->dev));
-	adapter->work_queue = create_singlethread_workqueue(name);
+	adapter->work_queue = alloc_ordered_workqueue(name, WQ_MEM_RECLAIM);
 
 	if (adapter->work_queue)
 		return 0;
diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index a0118d5..395a4c6 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -103,6 +103,7 @@
 extern const struct raid6_calls raid6_avx2x2;
 extern const struct raid6_calls raid6_avx2x4;
 extern const struct raid6_calls raid6_tilegx8;
+extern const struct raid6_calls raid6_s390vx8;
 
 struct raid6_recov_calls {
 	void (*data2)(int, size_t, int, int, void **);
@@ -115,6 +116,7 @@
 extern const struct raid6_recov_calls raid6_recov_intx1;
 extern const struct raid6_recov_calls raid6_recov_ssse3;
 extern const struct raid6_recov_calls raid6_recov_avx2;
+extern const struct raid6_recov_calls raid6_recov_s390xc;
 
 extern const struct raid6_calls raid6_neonx1;
 extern const struct raid6_calls raid6_neonx2;
diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan
index 39494af..bc6e651 100644
--- a/lib/Kconfig.ubsan
+++ b/lib/Kconfig.ubsan
@@ -1,6 +1,9 @@
 config ARCH_HAS_UBSAN_SANITIZE_ALL
 	bool
 
+config ARCH_WANTS_UBSAN_NO_NULL
+	def_bool n
+
 config UBSAN
 	bool "Undefined behaviour sanity checker"
 	help
@@ -34,3 +37,11 @@
 	  This option enables detection of unaligned memory accesses.
 	  Enabling this option on architectures that support unaligned
 	  accesses may produce a lot of false positives.
+
+config UBSAN_NULL
+	bool "Enable checking of null pointers"
+	depends on UBSAN
+	default y if !ARCH_WANTS_UBSAN_NO_NULL
+	help
+	  This option enables detection of memory accesses via a
+	  null pointer.
diff --git a/lib/raid6/.gitignore b/lib/raid6/.gitignore
index 0a7e494..f01b1cb 100644
--- a/lib/raid6/.gitignore
+++ b/lib/raid6/.gitignore
@@ -3,3 +3,4 @@
 int*.c
 tables.c
 neon?.c
+s390vx?.c
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index 3b10a48..29f503e 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -7,6 +7,7 @@
 raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o
 raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o
 raid6_pq-$(CONFIG_TILEGX) += tilegx8.o
+raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o
 
 hostprogs-y	+= mktables
 
@@ -116,6 +117,11 @@
 $(obj)/tilegx8.c:   $(src)/tilegx.uc $(src)/unroll.awk FORCE
 	$(call if_changed,unroll)
 
+targets += s390vx8.c
+$(obj)/s390vx8.c:   UNROLL := 8
+$(obj)/s390vx8.c:   $(src)/s390vx.uc $(src)/unroll.awk FORCE
+	$(call if_changed,unroll)
+
 quiet_cmd_mktable = TABLE   $@
       cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 )
 
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 975c6e0..592ff49 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -69,6 +69,9 @@
 #if defined(CONFIG_TILEGX)
 	&raid6_tilegx8,
 #endif
+#if defined(CONFIG_S390)
+	&raid6_s390vx8,
+#endif
 	&raid6_intx1,
 	&raid6_intx2,
 	&raid6_intx4,
@@ -95,6 +98,9 @@
 #ifdef CONFIG_AS_SSSE3
 	&raid6_recov_ssse3,
 #endif
+#ifdef CONFIG_S390
+	&raid6_recov_s390xc,
+#endif
 	&raid6_recov_intx1,
 	NULL
 };
diff --git a/lib/raid6/recov_s390xc.c b/lib/raid6/recov_s390xc.c
new file mode 100644
index 0000000..b042dac
--- /dev/null
+++ b/lib/raid6/recov_s390xc.c
@@ -0,0 +1,116 @@
+/*
+ * RAID-6 data recovery in dual failure mode based on the XC instruction.
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ */
+
+#include <linux/export.h>
+#include <linux/raid/pq.h>
+
+static inline void xor_block(u8 *p1, u8 *p2)
+{
+	typedef struct { u8 _[256]; } addrtype;
+
+	asm volatile(
+		"	xc	0(256,%[p1]),0(%[p2])\n"
+		: "+m" (*(addrtype *) p1) : "m" (*(addrtype *) p2),
+		  [p1] "a" (p1), [p2] "a" (p2) : "cc");
+}
+
+/* Recover two failed data blocks. */
+static void raid6_2data_recov_s390xc(int disks, size_t bytes, int faila,
+		int failb, void **ptrs)
+{
+	u8 *p, *q, *dp, *dq;
+	const u8 *pbmul;	/* P multiplier table for B data */
+	const u8 *qmul;		/* Q multiplier table (for both) */
+	int i;
+
+	p = (u8 *)ptrs[disks-2];
+	q = (u8 *)ptrs[disks-1];
+
+	/* Compute syndrome with zero for the missing data pages
+	   Use the dead data pages as temporary storage for
+	   delta p and delta q */
+	dp = (u8 *)ptrs[faila];
+	ptrs[faila] = (void *)raid6_empty_zero_page;
+	ptrs[disks-2] = dp;
+	dq = (u8 *)ptrs[failb];
+	ptrs[failb] = (void *)raid6_empty_zero_page;
+	ptrs[disks-1] = dq;
+
+	raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+	/* Restore pointer table */
+	ptrs[faila]   = dp;
+	ptrs[failb]   = dq;
+	ptrs[disks-2] = p;
+	ptrs[disks-1] = q;
+
+	/* Now, pick the proper data tables */
+	pbmul = raid6_gfmul[raid6_gfexi[failb-faila]];
+	qmul  = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]]];
+
+	/* Now do it... */
+	while (bytes) {
+		xor_block(dp, p);
+		xor_block(dq, q);
+		for (i = 0; i < 256; i++)
+			dq[i] = pbmul[dp[i]] ^ qmul[dq[i]];
+		xor_block(dp, dq);
+		p += 256;
+		q += 256;
+		dp += 256;
+		dq += 256;
+		bytes -= 256;
+	}
+}
+
+/* Recover failure of one data block plus the P block */
+static void raid6_datap_recov_s390xc(int disks, size_t bytes, int faila,
+		void **ptrs)
+{
+	u8 *p, *q, *dq;
+	const u8 *qmul;		/* Q multiplier table */
+	int i;
+
+	p = (u8 *)ptrs[disks-2];
+	q = (u8 *)ptrs[disks-1];
+
+	/* Compute syndrome with zero for the missing data page
+	   Use the dead data page as temporary storage for delta q */
+	dq = (u8 *)ptrs[faila];
+	ptrs[faila] = (void *)raid6_empty_zero_page;
+	ptrs[disks-1] = dq;
+
+	raid6_call.gen_syndrome(disks, bytes, ptrs);
+
+	/* Restore pointer table */
+	ptrs[faila]   = dq;
+	ptrs[disks-1] = q;
+
+	/* Now, pick the proper data tables */
+	qmul  = raid6_gfmul[raid6_gfinv[raid6_gfexp[faila]]];
+
+	/* Now do it... */
+	while (bytes) {
+		xor_block(dq, q);
+		for (i = 0; i < 256; i++)
+			dq[i] = qmul[dq[i]];
+		xor_block(p, dq);
+		p += 256;
+		q += 256;
+		dq += 256;
+		bytes -= 256;
+	}
+}
+
+
+const struct raid6_recov_calls raid6_recov_s390xc = {
+	.data2 = raid6_2data_recov_s390xc,
+	.datap = raid6_datap_recov_s390xc,
+	.valid = NULL,
+	.name = "s390xc",
+	.priority = 1,
+};
diff --git a/lib/raid6/s390vx.uc b/lib/raid6/s390vx.uc
new file mode 100644
index 0000000..7b45191
--- /dev/null
+++ b/lib/raid6/s390vx.uc
@@ -0,0 +1,168 @@
+/*
+ * raid6_vx$#.c
+ *
+ * $#-way unrolled RAID6 gen/xor functions for s390
+ * based on the vector facility
+ *
+ * Copyright IBM Corp. 2016
+ * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
+ *
+ * This file is postprocessed using unroll.awk.
+ */
+
+#include <linux/raid/pq.h>
+#include <asm/fpu/api.h>
+
+asm(".include \"asm/vx-insn.h\"\n");
+
+#define NSIZE 16
+
+static inline void LOAD_CONST(void)
+{
+	asm volatile("VREPIB %v24,7");
+	asm volatile("VREPIB %v25,0x1d");
+}
+
+/*
+ * The SHLBYTE() operation shifts each of the 16 bytes in
+ * vector register y left by 1 bit and stores the result in
+ * vector register x.
+ */
+static inline void SHLBYTE(int x, int y)
+{
+	asm volatile ("VAB %0,%1,%1" : : "i" (x), "i" (y));
+}
+
+/*
+ * For each of the 16 bytes in the vector register y the MASK()
+ * operation returns 0xFF if the high bit of the byte is 1,
+ * or 0x00 if the high bit is 0. The result is stored in vector
+ * register x.
+ */
+static inline void MASK(int x, int y)
+{
+	asm volatile ("VESRAVB	%0,%1,24" : : "i" (x), "i" (y));
+}
+
+static inline void AND(int x, int y, int z)
+{
+	asm volatile ("VN %0,%1,%2" : : "i" (x), "i" (y), "i" (z));
+}
+
+static inline void XOR(int x, int y, int z)
+{
+	asm volatile ("VX %0,%1,%2" : : "i" (x), "i" (y), "i" (z));
+}
+
+static inline void LOAD_DATA(int x, int n, u8 *ptr)
+{
+	typedef struct { u8 _[16*n]; } addrtype;
+	register addrtype *__ptr asm("1") = (addrtype *) ptr;
+
+	asm volatile ("VLM %2,%3,0,%r1"
+		      : : "m" (*__ptr), "a" (__ptr), "i" (x), "i" (x + n - 1));
+}
+
+static inline void STORE_DATA(int x, int n, u8 *ptr)
+{
+	typedef struct { u8 _[16*n]; } addrtype;
+	register addrtype *__ptr asm("1") = (addrtype *) ptr;
+
+	asm volatile ("VSTM %2,%3,0,1"
+		      : "=m" (*__ptr) : "a" (__ptr), "i" (x), "i" (x + n - 1));
+}
+
+static inline void COPY_VEC(int x, int y)
+{
+	asm volatile ("VLR %0,%1" : : "i" (x), "i" (y));
+}
+
+static void raid6_s390vx$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
+{
+	struct kernel_fpu vxstate;
+	u8 **dptr, *p, *q;
+	int d, z, z0;
+
+	kernel_fpu_begin(&vxstate, KERNEL_VXR);
+	LOAD_CONST();
+
+	dptr = (u8 **) ptrs;
+	z0 = disks - 3;		/* Highest data disk */
+	p = dptr[z0 + 1];	/* XOR parity */
+	q = dptr[z0 + 2];	/* RS syndrome */
+
+	for (d = 0; d < bytes; d += $#*NSIZE) {
+		LOAD_DATA(0,$#,&dptr[z0][d]);
+		COPY_VEC(8+$$,0+$$);
+		for (z = z0 - 1; z >= 0; z--) {
+			MASK(16+$$,8+$$);
+			AND(16+$$,16+$$,25);
+			SHLBYTE(8+$$,8+$$);
+			XOR(8+$$,8+$$,16+$$);
+			LOAD_DATA(16,$#,&dptr[z][d]);
+			XOR(0+$$,0+$$,16+$$);
+			XOR(8+$$,8+$$,16+$$);
+		}
+		STORE_DATA(0,$#,&p[d]);
+		STORE_DATA(8,$#,&q[d]);
+	}
+	kernel_fpu_end(&vxstate, KERNEL_VXR);
+}
+
+static void raid6_s390vx$#_xor_syndrome(int disks, int start, int stop,
+					size_t bytes, void **ptrs)
+{
+	struct kernel_fpu vxstate;
+	u8 **dptr, *p, *q;
+	int d, z, z0;
+
+	dptr = (u8 **) ptrs;
+	z0 = stop;		/* P/Q right side optimization */
+	p = dptr[disks - 2];	/* XOR parity */
+	q = dptr[disks - 1];	/* RS syndrome */
+
+	kernel_fpu_begin(&vxstate, KERNEL_VXR);
+	LOAD_CONST();
+
+	for (d = 0; d < bytes; d += $#*NSIZE) {
+		/* P/Q data pages */
+		LOAD_DATA(0,$#,&dptr[z0][d]);
+		COPY_VEC(8+$$,0+$$);
+		for (z = z0 - 1; z >= start; z--) {
+			MASK(16+$$,8+$$);
+			AND(16+$$,16+$$,25);
+			SHLBYTE(8+$$,8+$$);
+			XOR(8+$$,8+$$,16+$$);
+			LOAD_DATA(16,$#,&dptr[z][d]);
+			XOR(0+$$,0+$$,16+$$);
+			XOR(8+$$,8+$$,16+$$);
+		}
+		/* P/Q left side optimization */
+		for (z = start - 1; z >= 0; z--) {
+			MASK(16+$$,8+$$);
+			AND(16+$$,16+$$,25);
+			SHLBYTE(8+$$,8+$$);
+			XOR(8+$$,8+$$,16+$$);
+		}
+		LOAD_DATA(16,$#,&p[d]);
+		XOR(16+$$,16+$$,0+$$);
+		STORE_DATA(16,$#,&p[d]);
+		LOAD_DATA(16,$#,&q[d]);
+		XOR(16+$$,16+$$,8+$$);
+		STORE_DATA(16,$#,&q[d]);
+	}
+	kernel_fpu_end(&vxstate, KERNEL_VXR);
+}
+
+static int raid6_s390vx$#_valid(void)
+{
+	return MACHINE_HAS_VX;
+}
+
+const struct raid6_calls raid6_s390vx$# = {
+	raid6_s390vx$#_gen_syndrome,
+	raid6_s390vx$#_xor_syndrome,
+	raid6_s390vx$#_valid,
+	"vx128x$#",
+	1
+};
diff --git a/scripts/Makefile.ubsan b/scripts/Makefile.ubsan
index 8ab6867..dd779c4 100644
--- a/scripts/Makefile.ubsan
+++ b/scripts/Makefile.ubsan
@@ -3,7 +3,6 @@
       CFLAGS_UBSAN += $(call cc-option, -fsanitize=integer-divide-by-zero)
       CFLAGS_UBSAN += $(call cc-option, -fsanitize=unreachable)
       CFLAGS_UBSAN += $(call cc-option, -fsanitize=vla-bound)
-      CFLAGS_UBSAN += $(call cc-option, -fsanitize=null)
       CFLAGS_UBSAN += $(call cc-option, -fsanitize=signed-integer-overflow)
       CFLAGS_UBSAN += $(call cc-option, -fsanitize=bounds)
       CFLAGS_UBSAN += $(call cc-option, -fsanitize=object-size)
@@ -14,4 +13,8 @@
 ifdef CONFIG_UBSAN_ALIGNMENT
       CFLAGS_UBSAN += $(call cc-option, -fsanitize=alignment)
 endif
+
+ifdef CONFIG_UBSAN_NULL
+      CFLAGS_UBSAN += $(call cc-option, -fsanitize=null)
+endif
 endif