Upgrade rust/crates/quiche to 0.9.0

Test: make
Change-Id: I438d6a167e6e0bbfe38785ba13c33a285d1c510b
diff --git a/.cargo_vcs_info.json b/.cargo_vcs_info.json
index 44886a3..3186f7e 100644
--- a/.cargo_vcs_info.json
+++ b/.cargo_vcs_info.json
@@ -1,5 +1,5 @@
 {
   "git": {
-    "sha1": "fd5e028e1ec29c964d79ce8fb5bf623669cebfc8"
+    "sha1": "ad9d9330cf77b212915c2b175e90063c2cc3ac78"
   }
 }
diff --git a/Android.bp b/Android.bp
index 578cc68..03b4318 100644
--- a/Android.bp
+++ b/Android.bp
@@ -152,14 +152,14 @@
 }
 
 // dependent_library ["feature_list"]
-//   cc-1.0.67
+//   cc-1.0.68
 //   cfg-if-0.1.10
 //   cfg-if-1.0.0
 //   cmake-0.1.45
 //   idna-0.1.5
 //   iovec-0.1.4
 //   lazy_static-1.4.0
-//   libc-0.2.94 "default,std"
+//   libc-0.2.95 "default,std"
 //   libm-0.2.1 "default"
 //   log-0.4.14 "std"
 //   matches-0.1.8
@@ -173,6 +173,6 @@
 //   tinyvec-1.2.0 "alloc,default,tinyvec_macros"
 //   tinyvec_macros-0.1.0
 //   unicode-bidi-0.3.5 "default"
-//   unicode-normalization-0.1.17 "default,std"
+//   unicode-normalization-0.1.19 "default,std"
 //   untrusted-0.7.1
 //   url-1.7.2
diff --git a/Cargo.lock b/Cargo.lock
index 8cc9226..de30126 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1,22 +1,99 @@
 # This file is automatically @generated by Cargo.
 # It is not intended for manual editing.
 [[package]]
+name = "aho-corasick"
+version = "0.7.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "ansi_term"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ee49baf6cb617b853aa8d93bf420db2383fab46d314482ca2803b40d5fde979b"
+dependencies = [
+ "winapi 0.3.9",
+]
+
+[[package]]
+name = "atty"
+version = "0.2.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
+dependencies = [
+ "hermit-abi",
+ "libc",
+ "winapi 0.3.9",
+]
+
+[[package]]
+name = "autocfg"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a"
+
+[[package]]
+name = "bindgen"
+version = "0.57.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fd4865004a46a0aafb2a0a5eb19d3c9fc46ee5f063a6cfc605c69ac9ecf5263d"
+dependencies = [
+ "bitflags",
+ "cexpr",
+ "clang-sys",
+ "clap",
+ "env_logger",
+ "lazy_static",
+ "lazycell",
+ "log",
+ "peeking_take_while",
+ "proc-macro2",
+ "quote",
+ "regex",
+ "rustc-hash",
+ "shlex",
+ "which",
+]
+
+[[package]]
 name = "bitflags"
 version = "1.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693"
 
 [[package]]
-name = "bumpalo"
-version = "3.4.0"
+name = "boring-sys"
+version = "1.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2e8c087f005730276d1096a652e92a8bacee2e2472bcc9715a74d2bec38b5820"
+checksum = "d2416bce1bcabf0d7995ce0338ec2425b8766a4d5a39d758a3638008911642fc"
+dependencies = [
+ "bindgen",
+ "cmake",
+]
+
+[[package]]
+name = "bumpalo"
+version = "3.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c59e7af012c713f529e7a3ee57ce9b31ddd858d4b512923602f74608b009631"
 
 [[package]]
 name = "cc"
-version = "1.0.61"
+version = "1.0.68"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ed67cbde08356238e75fc4656be4749481eeffb09e19f320a25237d5221c985d"
+checksum = "4a72c244c1ff497a746a7e1fb3d14bd08420ecda70c8f25c7112f2781652d787"
+
+[[package]]
+name = "cexpr"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f4aedb84272dbe89af497cf81375129abda4fc0a9e7c5d317498c15cc30c0d27"
+dependencies = [
+ "nom",
+]
 
 [[package]]
 name = "cfg-if"
@@ -25,19 +102,51 @@
 checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
 
 [[package]]
-name = "cmake"
-version = "0.1.44"
+name = "cfg-if"
+version = "1.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0e56268c17a6248366d66d4a47a3381369d068cce8409bb1716ed77ea32163bb"
+checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
+
+[[package]]
+name = "clang-sys"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "853eda514c284c2287f4bf20ae614f8781f40a81d32ecda6e91449304dfe077c"
+dependencies = [
+ "glob",
+ "libc",
+ "libloading",
+]
+
+[[package]]
+name = "clap"
+version = "2.33.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "37e58ac78573c40708d45522f0d80fa2f01cc4f9b4e2bf749807255454312002"
+dependencies = [
+ "ansi_term",
+ "atty",
+ "bitflags",
+ "strsim 0.8.0",
+ "textwrap",
+ "unicode-width",
+ "vec_map",
+]
+
+[[package]]
+name = "cmake"
+version = "0.1.45"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eb6210b637171dfba4cda12e579ac6dc73f5165ad56133e5d72ef3131f320855"
 dependencies = [
  "cc",
 ]
 
 [[package]]
 name = "darling"
-version = "0.10.2"
+version = "0.13.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0d706e75d87e35569db781a9b5e2416cff1236a47ed380831f959382ccd5f858"
+checksum = "757c0ded2af11d8e739c4daea1ac623dd1624b06c844cf3f5a39f1bdbd99bb12"
 dependencies = [
  "darling_core",
  "darling_macro",
@@ -45,23 +154,23 @@
 
 [[package]]
 name = "darling_core"
-version = "0.10.2"
+version = "0.13.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f0c960ae2da4de88a91b2d920c2a7233b400bc33cb28453a2987822d8392519b"
+checksum = "2c34d8efb62d0c2d7f60ece80f75e5c63c1588ba68032740494b0b9a996466e3"
 dependencies = [
  "fnv",
  "ident_case",
  "proc-macro2",
  "quote",
- "strsim",
+ "strsim 0.10.0",
  "syn",
 ]
 
 [[package]]
 name = "darling_macro"
-version = "0.10.2"
+version = "0.13.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d9b5a2f4ac4969822c62224815d069952656cadc7084fdca9751e6d959189b72"
+checksum = "ade7bff147130fe5e6d39f089c6bd49ec0250f35d70b2eebf72afdfc919f15cc"
 dependencies = [
  "darling_core",
  "quote",
@@ -69,6 +178,19 @@
 ]
 
 [[package]]
+name = "env_logger"
+version = "0.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "17392a012ea30ef05a610aa97dfb49496e71c9f676b27879922ea5bdf60d9d3f"
+dependencies = [
+ "atty",
+ "humantime",
+ "log",
+ "regex",
+ "termcolor",
+]
+
+[[package]]
 name = "fnv"
 version = "1.0.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -91,6 +213,33 @@
 checksum = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7"
 
 [[package]]
+name = "glob"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b919933a397b79c37e33b77bb2aa3dc8eb6e165ad809e58ff75bc7db2e34574"
+
+[[package]]
+name = "hashbrown"
+version = "0.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d7afe4a420e3fe79967a00898cc1f4db7c8a49a9333a29f8a4bd76a253d5cd04"
+
+[[package]]
+name = "hermit-abi"
+version = "0.1.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "322f4de77956e22ed0e5032c359a0f1273f1f7f0d79bfa3b8ffbc730d7fbcc5c"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "humantime"
+version = "2.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
+
+[[package]]
 name = "ident_case"
 version = "1.0.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -108,6 +257,16 @@
 ]
 
 [[package]]
+name = "indexmap"
+version = "1.6.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "824845a0bf897a9042383849b02c1bc219c2383772efcd5c6f9766fa4b81aef3"
+dependencies = [
+ "autocfg",
+ "hashbrown",
+]
+
+[[package]]
 name = "iovec"
 version = "0.1.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -118,15 +277,15 @@
 
 [[package]]
 name = "itoa"
-version = "0.4.6"
+version = "0.4.7"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dc6f3ad7b9d11a0c00842ff8de1b60ee58661048eb8049ed33c73594f359d7e6"
+checksum = "dd25036021b0de88a0aff6b850051563c6516d0bf53f8638938edbb9de732736"
 
 [[package]]
 name = "js-sys"
-version = "0.3.45"
+version = "0.3.51"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ca059e81d9486668f12d455a4ea6daa600bd408134cd17e3d3fb5a32d1f016f8"
+checksum = "83bdfbace3a0e81a4253f73b49e960b053e396a11012cbd49b9b74d6a2b67062"
 dependencies = [
  "wasm-bindgen",
 ]
@@ -148,10 +307,26 @@
 checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
 
 [[package]]
-name = "libc"
-version = "0.2.80"
+name = "lazycell"
+version = "1.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4d58d1b70b004888f764dfbf6a26a3b0342a1632d33968e4a179d8011c760614"
+checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
+
+[[package]]
+name = "libc"
+version = "0.2.95"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "789da6d93f1b866ffe175afc5322a4d76c038605a1c3319bb57b06967ca98a36"
+
+[[package]]
+name = "libloading"
+version = "0.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6f84d96438c15fcd6c3f244c8fce01d1e2b9c6b5623e9c711dc9286d8fc92d6a"
+dependencies = [
+ "cfg-if 1.0.0",
+ "winapi 0.3.9",
+]
 
 [[package]]
 name = "libm"
@@ -161,11 +336,11 @@
 
 [[package]]
 name = "log"
-version = "0.4.11"
+version = "0.4.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4fabed175da42fed1fa0746b0ea71f412aa9d35e76e95e59b192c64b9dc2bf8b"
+checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710"
 dependencies = [
- "cfg-if",
+ "cfg-if 1.0.0",
 ]
 
 [[package]]
@@ -175,12 +350,18 @@
 checksum = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08"
 
 [[package]]
-name = "mio"
-version = "0.6.22"
+name = "memchr"
+version = "2.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fce347092656428bc8eaf6201042cb551b8d67855af7374542a92a0fbfcac430"
+checksum = "b16bd47d9e329435e309c58469fe0791c2d0d1ba96ec0954152a5ae2b04387dc"
+
+[[package]]
+name = "mio"
+version = "0.6.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4afd66f5b91bf2a3bc13fad0e21caedac168ca4c707504e75585648ae80e4cc4"
 dependencies = [
- "cfg-if",
+ "cfg-if 0.1.10",
  "fuchsia-zircon",
  "fuchsia-zircon-sys",
  "iovec",
@@ -195,9 +376,9 @@
 
 [[package]]
 name = "miow"
-version = "0.2.1"
+version = "0.2.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8c1f2f3b1cf331de6896aabf6e9d55dca90356cc9960cca7eaaf408a355ae919"
+checksum = "ebd808424166322d4a38da87083bfddd3ac4c131334ed55856112eb06d46944d"
 dependencies = [
  "kernel32-sys",
  "net2",
@@ -207,20 +388,36 @@
 
 [[package]]
 name = "net2"
-version = "0.2.35"
+version = "0.2.37"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3ebc3ec692ed7c9a255596c67808dee269f64655d8baf7b4f0638e51ba1d6853"
+checksum = "391630d12b68002ae1e25e8f974306474966550ad82dac6886fb8910c19568ae"
 dependencies = [
- "cfg-if",
+ "cfg-if 0.1.10",
  "libc",
  "winapi 0.3.9",
 ]
 
 [[package]]
-name = "once_cell"
-version = "1.4.1"
+name = "nom"
+version = "5.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "260e51e7efe62b592207e9e13a68e43692a7a279171d6ba57abd208bf23645ad"
+checksum = "ffb4262d26ed83a1c0a33a38fe2bb15797329c85770da05e6b828ddb782627af"
+dependencies = [
+ "memchr",
+ "version_check",
+]
+
+[[package]]
+name = "once_cell"
+version = "1.7.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "af8b08b04175473088b46763e51ee54da5f9a164bc162f615b91bc179dbf15a3"
+
+[[package]]
+name = "peeking_take_while"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
 
 [[package]]
 name = "percent-encoding"
@@ -230,18 +427,18 @@
 
 [[package]]
 name = "proc-macro2"
-version = "1.0.24"
+version = "1.0.27"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71"
+checksum = "f0d8caf72986c1a598726adc988bb5984792ef84f5ee5aa50209145ee8077038"
 dependencies = [
  "unicode-xid",
 ]
 
 [[package]]
 name = "qlog"
-version = "0.3.0"
+version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4767943b701b674b8d5e8535d3242b6cbd8249bf5d72eee0d45f376a2b680ad4"
+checksum = "8777d5490145d6907198d48b3a907447689ce80e071b3d8a16a9d9fb3df02bc1"
 dependencies = [
  "serde",
  "serde_derive",
@@ -251,8 +448,9 @@
 
 [[package]]
 name = "quiche"
-version = "0.6.0"
+version = "0.9.0"
 dependencies = [
+ "boring-sys",
  "cmake",
  "lazy_static",
  "libc",
@@ -267,18 +465,35 @@
 
 [[package]]
 name = "quote"
-version = "1.0.7"
+version = "1.0.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "aa563d17ecb180e500da1cfd2b028310ac758de548efdd203e18f283af693f37"
+checksum = "c3d0b9745dc2debf507c8422de05d7226cc1f0644216dfdfead988f9b1ab32a7"
 dependencies = [
  "proc-macro2",
 ]
 
 [[package]]
-name = "ring"
-version = "0.16.15"
+name = "regex"
+version = "1.5.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "952cd6b98c85bbc30efa1ba5783b8abf12fec8b3287ffa52605b9432313e34e4"
+checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.6.25"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"
+
+[[package]]
+name = "ring"
+version = "0.16.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc"
 dependencies = [
  "cc",
  "libc",
@@ -290,6 +505,18 @@
 ]
 
 [[package]]
+name = "rustc-hash"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
+
+[[package]]
+name = "rustversion"
+version = "1.0.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "61b3909d758bb75c79f23d4736fac9433868679d3ad2ea7a61e3c25cfda9a088"
+
+[[package]]
 name = "ryu"
 version = "1.0.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -297,18 +524,18 @@
 
 [[package]]
 name = "serde"
-version = "1.0.117"
+version = "1.0.126"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b88fa983de7720629c9387e9f517353ed404164b1e482c970a90c1a4aaf7dc1a"
+checksum = "ec7505abeacaec74ae4778d9d9328fe5a5d04253220a85c4ee022239fc996d03"
 dependencies = [
  "serde_derive",
 ]
 
 [[package]]
 name = "serde_derive"
-version = "1.0.117"
+version = "1.0.126"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cbd1ae72adb44aab48f325a02444a5fc079349a8d804c1fc922aed3f7454c74e"
+checksum = "963a7dbc9895aeac7ac90e74f34a5d5261828f79df35cbed41e10189d3804d43"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -317,10 +544,11 @@
 
 [[package]]
 name = "serde_json"
-version = "1.0.59"
+version = "1.0.64"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dcac07dbffa1c65e7f816ab9eba78eb142c6d44410f4eeba1e26e4f5dfa56b95"
+checksum = "799e97dc9fdae36a5c8b8f2cae9ce2ee9fdce2058c57a93e6099d919fd982f79"
 dependencies = [
+ "indexmap",
  "itoa",
  "ryu",
  "serde",
@@ -328,19 +556,20 @@
 
 [[package]]
 name = "serde_with"
-version = "1.5.1"
+version = "1.9.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8bac272128fb3b1e98872dca27a05c18d8b78b9bd089d3edb7b5871501b50bce"
+checksum = "3e3132bd01cfb74aac8b1b10083ad1f38dbf756df3176d5e63dd91e3f62a87f5"
 dependencies = [
+ "rustversion",
  "serde",
  "serde_with_macros",
 ]
 
 [[package]]
 name = "serde_with_macros"
-version = "1.2.2"
+version = "1.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3c747a9ab2e833b807f74f6b6141530655010bfa9c9c06d5508bce75c8f8072f"
+checksum = "e1569374bd54623ec8bd592cf22ba6e03c0f177ff55fbc8c29a49e296e7adecf"
 dependencies = [
  "darling",
  "proc-macro2",
@@ -349,10 +578,16 @@
 ]
 
 [[package]]
-name = "slab"
-version = "0.4.2"
+name = "shlex"
+version = "0.1.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c111b5bd5695e56cffe5129854aa230b39c93a305372fdbb2668ca2394eea9f8"
+checksum = "7fdf1b9db47230893d76faad238fd6097fd6d6a9245cd7a4d90dbd639536bbd2"
+
+[[package]]
+name = "slab"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f173ac3d1a7e3b28003f40de0b5ce7fe2710f9b9dc3fc38664cebee46b3b6527"
 
 [[package]]
 name = "spin"
@@ -362,15 +597,21 @@
 
 [[package]]
 name = "strsim"
-version = "0.9.3"
+version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6446ced80d6c486436db5c078dde11a9f73d42b57fb273121e160b84f63d894c"
+checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a"
+
+[[package]]
+name = "strsim"
+version = "0.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
 
 [[package]]
 name = "syn"
-version = "1.0.48"
+version = "1.0.72"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cc371affeffc477f42a221a1e4297aedcea33d47d19b61455588bd9d8f6b19ac"
+checksum = "a1e8cdbefb79a9a5a65e0db8b47b723ee907b7c7f8496c76a1770b5c310bab82"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -378,34 +619,67 @@
 ]
 
 [[package]]
-name = "tinyvec"
-version = "0.3.4"
+name = "termcolor"
+version = "1.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "238ce071d267c5710f9d31451efec16c5ee22de34df17cc05e56cbc92e967117"
+checksum = "2dfed899f0eb03f32ee8c6a0aabdb8a7949659e3466561fc0adf54e26d88c5f4"
+dependencies = [
+ "winapi-util",
+]
+
+[[package]]
+name = "textwrap"
+version = "0.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060"
+dependencies = [
+ "unicode-width",
+]
+
+[[package]]
+name = "tinyvec"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b5220f05bb7de7f3f53c7c065e1199b3172696fe2db9f9c4d8ad9b4ee74c342"
+dependencies = [
+ "tinyvec_macros",
+]
+
+[[package]]
+name = "tinyvec_macros"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"
 
 [[package]]
 name = "unicode-bidi"
-version = "0.3.4"
+version = "0.3.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5"
+checksum = "eeb8be209bb1c96b7c177c7420d26e04eccacb0eeae6b980e35fcb74678107e0"
 dependencies = [
  "matches",
 ]
 
 [[package]]
 name = "unicode-normalization"
-version = "0.1.13"
+version = "0.1.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6fb19cf769fa8c6a80a162df694621ebeb4dafb606470b2b2fce0be40a98a977"
+checksum = "d54590932941a9e9266f0832deed84ebe1bf2e4c9e4a3554d393d18f5e854bf9"
 dependencies = [
  "tinyvec",
 ]
 
 [[package]]
-name = "unicode-xid"
-version = "0.2.1"
+name = "unicode-width"
+version = "0.1.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564"
+checksum = "9337591893a19b88d8d87f2cec1e73fad5cdfd10e5a6f349f498ad6ea2ffb1e3"
+
+[[package]]
+name = "unicode-xid"
+version = "0.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
 
 [[package]]
 name = "untrusted"
@@ -425,20 +699,32 @@
 ]
 
 [[package]]
-name = "wasm-bindgen"
-version = "0.2.68"
+name = "vec_map"
+version = "0.8.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1ac64ead5ea5f05873d7c12b545865ca2b8d28adfc50a49b84770a3a97265d42"
+checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191"
+
+[[package]]
+name = "version_check"
+version = "0.9.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe"
+
+[[package]]
+name = "wasm-bindgen"
+version = "0.2.74"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d54ee1d4ed486f78874278e63e4069fc1ab9f6a18ca492076ffb90c5eb2997fd"
 dependencies = [
- "cfg-if",
+ "cfg-if 1.0.0",
  "wasm-bindgen-macro",
 ]
 
 [[package]]
 name = "wasm-bindgen-backend"
-version = "0.2.68"
+version = "0.2.74"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f22b422e2a757c35a73774860af8e112bff612ce6cb604224e8e47641a9e4f68"
+checksum = "3b33f6a0694ccfea53d94db8b2ed1c3a8a4c86dd936b13b9f0a15ec4a451b900"
 dependencies = [
  "bumpalo",
  "lazy_static",
@@ -451,9 +737,9 @@
 
 [[package]]
 name = "wasm-bindgen-macro"
-version = "0.2.68"
+version = "0.2.74"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6b13312a745c08c469f0b292dd2fcd6411dba5f7160f593da6ef69b64e407038"
+checksum = "088169ca61430fe1e58b8096c24975251700e7b1f6fd91cc9d59b04fb9b18bd4"
 dependencies = [
  "quote",
  "wasm-bindgen-macro-support",
@@ -461,9 +747,9 @@
 
 [[package]]
 name = "wasm-bindgen-macro-support"
-version = "0.2.68"
+version = "0.2.74"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f249f06ef7ee334cc3b8ff031bfc11ec99d00f34d86da7498396dc1e3b1498fe"
+checksum = "be2241542ff3d9f241f5e2cb6dd09b37efe786df8851c54957683a49f0987a97"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -474,21 +760,30 @@
 
 [[package]]
 name = "wasm-bindgen-shared"
-version = "0.2.68"
+version = "0.2.74"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1d649a3145108d7d3fbcde896a468d1bd636791823c9921135218ad89be08307"
+checksum = "d7cff876b8f18eed75a66cf49b65e7f967cb354a7aa16003fb55dbfd25b44b4f"
 
 [[package]]
 name = "web-sys"
-version = "0.3.45"
+version = "0.3.51"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4bf6ef87ad7ae8008e15a355ce696bed26012b7caa21605188cfd8214ab51e2d"
+checksum = "e828417b379f3df7111d3a2a9e5753706cae29c41f7c4029ee9fd77f3e09e582"
 dependencies = [
  "js-sys",
  "wasm-bindgen",
 ]
 
 [[package]]
+name = "which"
+version = "3.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d011071ae14a2f6671d0b74080ae0cd8ebf3a6f8c9589a2cd45f23126fe29724"
+dependencies = [
+ "libc",
+]
+
+[[package]]
 name = "winapi"
 version = "0.2.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
@@ -517,6 +812,15 @@
 checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
 
 [[package]]
+name = "winapi-util"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
+dependencies = [
+ "winapi 0.3.9",
+]
+
+[[package]]
 name = "winapi-x86_64-pc-windows-gnu"
 version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
diff --git a/Cargo.toml b/Cargo.toml
index 572fb05..62b856a 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -13,10 +13,10 @@
 [package]
 edition = "2018"
 name = "quiche"
-version = "0.6.0"
+version = "0.9.0"
 authors = ["Alessandro Ghedini <alessandro@ghedini.me>"]
 build = "src/build.rs"
-include = ["/*.md", "/*.toml", "/CODEOWNERS", "/COPYING", "/benches", "/deps/boringssl/**/*.[chS]", "/deps/boringssl/src/**/*.cc", "/deps/boringssl/**/CMakeLists.txt", "/deps/boringssl/**/sources.cmake", "/deps/boringssl/LICENSE", "/examples", "/include", "/quiche.svg", "/src"]
+include = ["/*.md", "/*.toml", "/CODEOWNERS", "/COPYING", "/benches", "/deps/boringssl/**/*.[chS]", "/deps/boringssl/**/*.asm", "/deps/boringssl/src/**/*.cc", "/deps/boringssl/**/CMakeLists.txt", "/deps/boringssl/**/sources.cmake", "/deps/boringssl/LICENSE", "/examples", "/include", "/quiche.svg", "/src"]
 description = "🥧 Savoury implementation of the QUIC transport protocol and HTTP/3"
 readme = "README.md"
 keywords = ["quic", "http3"]
@@ -33,6 +33,10 @@
 
 [lib]
 crate-type = ["lib", "staticlib", "cdylib"]
+[dependencies.boring-sys]
+version = "1.0.2"
+optional = true
+
 [dependencies.lazy_static]
 version = "1"
 
@@ -47,7 +51,7 @@
 features = ["std"]
 
 [dependencies.qlog]
-version = "0.3"
+version = "0.4"
 optional = true
 
 [dependencies.ring]
@@ -63,6 +67,7 @@
 [features]
 boringssl-vendored = []
 default = ["boringssl-vendored"]
+ffi = []
 fuzzing = []
 ndk-old-gcc = []
 pkg-config-meta = []
diff --git a/Cargo.toml.orig b/Cargo.toml.orig
index 416f466..12bddf8 100644
--- a/Cargo.toml.orig
+++ b/Cargo.toml.orig
@@ -1,6 +1,6 @@
 [package]
 name = "quiche"
-version = "0.6.0"
+version = "0.9.0"
 authors = ["Alessandro Ghedini <alessandro@ghedini.me>"]
 edition = "2018"
 build = "src/build.rs"
@@ -17,6 +17,7 @@
     "/COPYING",
     "/benches",
     "/deps/boringssl/**/*.[chS]",
+    "/deps/boringssl/**/*.asm",
     "/deps/boringssl/src/**/*.cc",
     "/deps/boringssl/**/CMakeLists.txt",
     "/deps/boringssl/**/sources.cmake",
@@ -42,6 +43,9 @@
 # For building with Android NDK < 18 and GCC.
 ndk-old-gcc = []
 
+# Expose the FFI API.
+ffi = []
+
 [package.metadata.docs.rs]
 no-default-features = true
 
@@ -54,7 +58,8 @@
 libm = "0.2"
 ring = "0.16"
 lazy_static = "1"
-qlog = { version = "0.3", path = "tools/qlog", optional = true }
+boring-sys = { version = "1.0.2", optional = true }
+qlog = { version = "0.4", path = "tools/qlog", optional = true }
 
 [target."cfg(windows)".dependencies]
 winapi = { version = "0.3", features = ["wincrypt"] }
diff --git a/METADATA b/METADATA
index 3122681..218f6f3 100644
--- a/METADATA
+++ b/METADATA
@@ -1,5 +1,5 @@
 name: "quiche"
-description: "\ud83e\udd67 Savoury implementation of the QUIC transport protocol and HTTP/3"
+description: "\360\237\245\247 Savoury implementation of the QUIC transport protocol and HTTP/3"
 third_party {
   url {
     type: HOMEPAGE
@@ -7,13 +7,13 @@
   }
   url {
     type: ARCHIVE
-    value: "https://static.crates.io/crates/quiche/quiche-0.6.0.crate"
+    value: "https://static.crates.io/crates/quiche/quiche-0.9.0.crate"
   }
-  version: "0.6.0"
+  version: "0.9.0"
   license_type: NOTICE
   last_upgrade_date {
-    year: 2020
-    month: 11
-    day: 26
+    year: 2021
+    month: 6
+    day: 8
   }
 }
diff --git a/README.md b/README.md
index 99e2908..2be56f2 100644
--- a/README.md
+++ b/README.md
@@ -3,17 +3,13 @@
 [![crates.io](https://img.shields.io/crates/v/quiche.svg)](https://crates.io/crates/quiche)
 [![docs.rs](https://docs.rs/quiche/badge.svg)](https://docs.rs/quiche)
 [![license](https://img.shields.io/github/license/cloudflare/quiche.svg)](https://opensource.org/licenses/BSD-2-Clause)
-[![build](https://travis-ci.com/cloudflare/quiche.svg?branch=master)](https://travis-ci.com/cloudflare/quiche)
+![build](https://img.shields.io/github/workflow/status/cloudflare/quiche/Stable)
 
 [quiche] is an implementation of the QUIC transport protocol and HTTP/3 as
 specified by the [IETF]. It provides a low level API for processing QUIC packets
 and handling connection state. The application is responsible for providing I/O
 (e.g. sockets handling) as well as an event loop with support for timers.
 
-A live QUIC server based on quiche is available at ``https://quic.tech:4433/``,
-and an HTTP/3 one at ``https://quic.tech:8443/``, that can be used for
-experimentation.
-
 For more information on how quiche came about and some insights into its design
 you can read a [post] on Cloudflare's blog that goes into some more detail.
 
@@ -26,7 +22,9 @@
 
 ### Cloudflare
 
-quiche powers Cloudflare edge network's [HTTP/3 support][cloudflare-http3].
+quiche powers Cloudflare edge network's [HTTP/3 support][cloudflare-http3]. The
+[cloudflare-quic.com](https://cloudflare-quic.com) website can be used for
+testing and experimentation.
 
 ### curl
 
@@ -49,10 +47,10 @@
 Before diving into the quiche API, here are a few examples on how to use the
 quiche tools provided as part of the [quiche-apps](tools/apps/) crate.
 
-The client can be run as follows:
+After cloning the project according to the command mentioned in the [building](#building) section, the client can be run as follows:
 
 ```bash
- $ cargo run --manifest-path=tools/apps/Cargo.toml --bin quiche-client -- https://quic.tech:8443/
+ $ cargo run --manifest-path=tools/apps/Cargo.toml --bin quiche-client -- https://cloudflare-quic.com/
 ```
 
 while the server can be run as follows:
@@ -241,12 +239,15 @@
 built automatically alongside the Rust one. This is fully stand-alone and can
 be linked directly into C/C++ applications.
 
+Note that in order to enable the FFI API, the ``ffi`` feature must be enabled (it
+is disabled by default), by passing ``--features ffi`` to ``cargo``.
+
 [thin C API]: https://github.com/cloudflare/quiche/blob/master/include/quiche.h
 
 Building
 --------
 
-quiche requires Rust 1.39 or later to build. The latest stable Rust release can
+quiche requires Rust 1.50 or later to build. The latest stable Rust release can
 be installed using [rustup](https://rustup.rs/).
 
 Once the Rust build environment is setup, the quiche source code can be fetched
@@ -311,17 +312,17 @@
 #### NDK version >= 19
 
 For NDK version 19 or higher (21 recommended), you can build in a simpler
-way using [cargo-ndk]. You need to install [cargo-ndk] first.
+way using [cargo-ndk]. You need to install [cargo-ndk] (v2.0 or later) first.
 
 ```bash
  $ cargo install cargo-ndk
 ```
 
 You can build the quiche library using the following procedure. Note that
-`--target` and `--android-platform` are mandatory.
+`-t <architecture>` and `-p <NDK version>` are mandatory.
 
 ```bash
- $ cargo ndk --target aarch64-linux-android --android-platform 21 -- build
+ $ cargo ndk -t arm64-v8a -p 21 -- build --features ffi
 ```
 
 See [build_android_ndk19.sh] for more information.
@@ -385,13 +386,13 @@
 To build libquiche, run the following command:
 
 ```bash
- $ cargo lipo
+ $ cargo lipo --features ffi
 ```
 
 or
 
 ```bash
- $ cargo lipo --release
+ $ cargo lipo --features ffi --release
 ```
 
 iOS build is tested in Xcode 10.1 and Xcode 11.2.
diff --git a/deps/boringssl/CMakeLists.txt b/deps/boringssl/CMakeLists.txt
index 5f92f7a..1645a26 100644
--- a/deps/boringssl/CMakeLists.txt
+++ b/deps/boringssl/CMakeLists.txt
@@ -288,6 +288,21 @@
 )
 
 set(
+  CRYPTO_win_aarch64_SOURCES
+
+  win-aarch64/crypto/chacha/chacha-armv8.S
+  win-aarch64/crypto/fipsmodule/aesv8-armx64.S
+  win-aarch64/crypto/fipsmodule/armv8-mont.S
+  win-aarch64/crypto/fipsmodule/ghash-neon-armv8.S
+  win-aarch64/crypto/fipsmodule/ghashv8-armx64.S
+  win-aarch64/crypto/fipsmodule/sha1-armv8.S
+  win-aarch64/crypto/fipsmodule/sha256-armv8.S
+  win-aarch64/crypto/fipsmodule/sha512-armv8.S
+  win-aarch64/crypto/fipsmodule/vpaes-armv8.S
+  win-aarch64/crypto/test/trampoline-armv8.S
+)
+
+set(
   CRYPTO_win_x86_SOURCES
 
   win-x86/crypto/chacha/chacha-x86.asm
@@ -387,6 +402,7 @@
   src/crypto/bio/printf.c
   src/crypto/bio/socket.c
   src/crypto/bio/socket_helper.c
+  src/crypto/blake2/blake2.c
   src/crypto/bn_extra/bn_asn1.c
   src/crypto/bn_extra/convert.c
   src/crypto/buf/buf.c
@@ -411,6 +427,7 @@
   src/crypto/conf/conf.c
   src/crypto/cpu-aarch64-fuchsia.c
   src/crypto/cpu-aarch64-linux.c
+  src/crypto/cpu-aarch64-win.c
   src/crypto/cpu-arm-linux.c
   src/crypto/cpu-arm.c
   src/crypto/cpu-intel.c
@@ -418,10 +435,8 @@
   src/crypto/crypto.c
   src/crypto/curve25519/curve25519.c
   src/crypto/curve25519/spake25519.c
-  src/crypto/dh/check.c
-  src/crypto/dh/dh.c
-  src/crypto/dh/dh_asn1.c
-  src/crypto/dh/params.c
+  src/crypto/dh_extra/dh_asn1.c
+  src/crypto/dh_extra/params.c
   src/crypto/digest_extra/digest_extra.c
   src/crypto/dsa/dsa.c
   src/crypto/dsa/dsa_asn1.c
@@ -454,6 +469,7 @@
   src/crypto/fipsmodule/fips_shared_support.c
   src/crypto/fipsmodule/is_fips.c
   src/crypto/hkdf/hkdf.c
+  src/crypto/hpke/hpke.c
   src/crypto/hrss/hrss.c
   src/crypto/lhash/lhash.c
   src/crypto/mem.c
@@ -479,6 +495,7 @@
   src/crypto/rand_extra/deterministic.c
   src/crypto/rand_extra/forkunsafe.c
   src/crypto/rand_extra/fuchsia.c
+  src/crypto/rand_extra/passive.c
   src/crypto/rand_extra/rand_extra.c
   src/crypto/rand_extra/windows.c
   src/crypto/rc4/rc4.c
@@ -494,6 +511,7 @@
   src/crypto/thread_win.c
   src/crypto/trust_token/pmbtoken.c
   src/crypto/trust_token/trust_token.c
+  src/crypto/trust_token/voprf.c
   src/crypto/x509/a_digest.c
   src/crypto/x509/a_sign.c
   src/crypto/x509/a_strex.c
@@ -569,12 +587,10 @@
   src/crypto/x509v3/v3_pci.c
   src/crypto/x509v3/v3_pcia.c
   src/crypto/x509v3/v3_pcons.c
-  src/crypto/x509v3/v3_pku.c
   src/crypto/x509v3/v3_pmaps.c
   src/crypto/x509v3/v3_prn.c
   src/crypto/x509v3/v3_purp.c
   src/crypto/x509v3/v3_skey.c
-  src/crypto/x509v3/v3_sxnet.c
   src/crypto/x509v3/v3_utl.c
 )
 
@@ -642,7 +658,11 @@
 
 target_link_libraries(bssl ssl crypto)
 
-if(NOT MSVC AND NOT ANDROID)
+if(NOT WIN32 AND NOT ANDROID)
   target_link_libraries(crypto pthread)
 endif()
 
+if(WIN32)
+  target_link_libraries(bssl ws2_32)
+endif()
+
diff --git a/deps/boringssl/err_data.c b/deps/boringssl/err_data.c
index aa8cb53..7103cb1 100644
--- a/deps/boringssl/err_data.c
+++ b/deps/boringssl/err_data.c
@@ -55,727 +55,734 @@
 OPENSSL_STATIC_ASSERT(ERR_NUM_LIBS == 34, "number of libraries changed");
 
 const uint32_t kOpenSSLReasonValues[] = {
-    0xc32083a,
-    0xc328854,
-    0xc330863,
-    0xc338873,
-    0xc340882,
-    0xc34889b,
-    0xc3508a7,
-    0xc3588c4,
-    0xc3608e4,
-    0xc3688f2,
-    0xc370902,
-    0xc37890f,
-    0xc38091f,
-    0xc38892a,
-    0xc390940,
-    0xc39894f,
-    0xc3a0963,
-    0xc3a8847,
-    0xc3b00ea,
-    0xc3b88d6,
-    0x10320847,
-    0x1032959f,
-    0x103315ab,
-    0x103395c4,
-    0x103415d7,
-    0x10348f27,
-    0x10350c60,
-    0x103595ea,
-    0x10361614,
-    0x10369627,
-    0x10371646,
-    0x1037965f,
-    0x10381674,
-    0x10389692,
-    0x103916a1,
-    0x103996bd,
-    0x103a16d8,
-    0x103a96e7,
-    0x103b1703,
-    0x103b971e,
-    0x103c1744,
-    0x103c80ea,
-    0x103d1755,
-    0x103d9769,
-    0x103e1788,
-    0x103e9797,
-    0x103f17ae,
-    0x103f97c1,
-    0x10400c24,
-    0x104097d4,
-    0x104117f2,
-    0x10419805,
-    0x1042181f,
-    0x1042982f,
-    0x10431843,
-    0x10439859,
-    0x10441871,
-    0x10449886,
-    0x1045189a,
-    0x104598ac,
-    0x104605fd,
-    0x1046894f,
-    0x104718c1,
-    0x104798d8,
-    0x104818ed,
-    0x104898fb,
-    0x10490e73,
-    0x10499735,
-    0x104a15ff,
-    0x14320c07,
-    0x14328c15,
-    0x14330c24,
-    0x14338c36,
-    0x143400ac,
-    0x143480ea,
-    0x18320083,
-    0x18328f7d,
-    0x183300ac,
-    0x18338f93,
-    0x18340fa7,
-    0x183480ea,
-    0x18350fbc,
-    0x18358fd4,
-    0x18360fe9,
-    0x18368ffd,
-    0x18371021,
-    0x18379037,
-    0x1838104b,
-    0x1838905b,
-    0x18390a75,
-    0x1839906b,
-    0x183a1091,
-    0x183a90b7,
-    0x183b0c7f,
-    0x183b9106,
-    0x183c1118,
-    0x183c9123,
-    0x183d1133,
-    0x183d9144,
-    0x183e1155,
-    0x183e9167,
-    0x183f1190,
-    0x183f91a9,
-    0x184011c1,
-    0x184086d5,
-    0x184110da,
-    0x184190a5,
-    0x184210c4,
-    0x18428c6c,
-    0x18431080,
-    0x184390ec,
-    0x203211fb,
-    0x203291e8,
-    0x24321207,
-    0x24328995,
-    0x24331219,
-    0x24339226,
-    0x24341233,
-    0x24349245,
-    0x24351254,
-    0x24359271,
-    0x2436127e,
-    0x2436928c,
-    0x2437129a,
-    0x243792a8,
-    0x243812b1,
-    0x243892be,
-    0x243912d1,
-    0x28320c54,
-    0x28328c7f,
-    0x28330c24,
-    0x28338c92,
-    0x28340c60,
-    0x283480ac,
-    0x283500ea,
-    0x28358c6c,
-    0x2c323012,
-    0x2c3292e8,
-    0x2c333020,
-    0x2c33b032,
-    0x2c343046,
-    0x2c34b058,
-    0x2c353073,
-    0x2c35b085,
-    0x2c3630b5,
-    0x2c36832d,
-    0x2c3730c2,
-    0x2c37b0ee,
-    0x2c383113,
-    0x2c38b12a,
-    0x2c393148,
-    0x2c39b158,
-    0x2c3a316a,
-    0x2c3ab17e,
-    0x2c3b318f,
-    0x2c3bb1ae,
-    0x2c3c12fa,
-    0x2c3c9310,
-    0x2c3d31c2,
-    0x2c3d9329,
-    0x2c3e31df,
-    0x2c3eb1ed,
-    0x2c3f3205,
-    0x2c3fb21d,
-    0x2c403247,
-    0x2c4091fb,
-    0x2c413258,
-    0x2c41b26b,
-    0x2c4211c1,
-    0x2c42b27c,
-    0x2c430722,
-    0x2c43b1a0,
-    0x2c443101,
-    0x2c44b22a,
-    0x2c453098,
-    0x2c45b0d4,
-    0x2c463138,
+    0xc320847,
+    0xc328861,
+    0xc330870,
+    0xc338880,
+    0xc34088f,
+    0xc3488a8,
+    0xc3508b4,
+    0xc3588d1,
+    0xc3608f1,
+    0xc3688ff,
+    0xc37090f,
+    0xc37891c,
+    0xc38092c,
+    0xc388937,
+    0xc39094d,
+    0xc39895c,
+    0xc3a0970,
+    0xc3a8854,
+    0xc3b00f7,
+    0xc3b88e3,
+    0x10320854,
+    0x103295b6,
+    0x103315c2,
+    0x103395db,
+    0x103415ee,
+    0x10348f34,
+    0x10350c6d,
+    0x10359601,
+    0x1036162b,
+    0x1036963e,
+    0x1037165d,
+    0x10379676,
+    0x1038168b,
+    0x103896a9,
+    0x103916b8,
+    0x103996d4,
+    0x103a16ef,
+    0x103a96fe,
+    0x103b171a,
+    0x103b9735,
+    0x103c175b,
+    0x103c80f7,
+    0x103d176c,
+    0x103d9780,
+    0x103e179f,
+    0x103e97ae,
+    0x103f17c5,
+    0x103f97d8,
+    0x10400c31,
+    0x104097eb,
+    0x10411809,
+    0x1041981c,
+    0x10421836,
+    0x10429846,
+    0x1043185a,
+    0x10439870,
+    0x10441888,
+    0x1044989d,
+    0x104518b1,
+    0x104598c3,
+    0x1046060a,
+    0x1046895c,
+    0x104718d8,
+    0x104798ef,
+    0x10481904,
+    0x10489912,
+    0x10490e80,
+    0x1049974c,
+    0x104a1616,
+    0x14320c14,
+    0x14328c22,
+    0x14330c31,
+    0x14338c43,
+    0x143400b9,
+    0x143480f7,
+    0x18320090,
+    0x18328f8a,
+    0x183300b9,
+    0x18338fa0,
+    0x18340fb4,
+    0x183480f7,
+    0x18350fd3,
+    0x18358feb,
+    0x18361000,
+    0x18369014,
+    0x18371038,
+    0x1837904e,
+    0x18381062,
+    0x18389072,
+    0x18390a82,
+    0x18399082,
+    0x183a10a8,
+    0x183a90ce,
+    0x183b0c8c,
+    0x183b911d,
+    0x183c112f,
+    0x183c913a,
+    0x183d114a,
+    0x183d915b,
+    0x183e116c,
+    0x183e917e,
+    0x183f11a7,
+    0x183f91c0,
+    0x184011d8,
+    0x184086e2,
+    0x184110f1,
+    0x184190bc,
+    0x184210db,
+    0x18428c79,
+    0x18431097,
+    0x18439103,
+    0x18440fc9,
+    0x20321212,
+    0x203291ff,
+    0x2432121e,
+    0x243289a2,
+    0x24331230,
+    0x2433923d,
+    0x2434124a,
+    0x2434925c,
+    0x2435126b,
+    0x24359288,
+    0x24361295,
+    0x243692a3,
+    0x243712b1,
+    0x243792bf,
+    0x243812c8,
+    0x243892d5,
+    0x243912e8,
+    0x28320c61,
+    0x28328c8c,
+    0x28330c31,
+    0x28338c9f,
+    0x28340c6d,
+    0x283480b9,
+    0x283500f7,
+    0x28358c79,
+    0x2c3230db,
+    0x2c3292ff,
+    0x2c3330e9,
+    0x2c33b0fb,
+    0x2c34310f,
+    0x2c34b121,
+    0x2c35313c,
+    0x2c35b14e,
+    0x2c36317e,
+    0x2c36833a,
+    0x2c37318b,
+    0x2c37b1b7,
+    0x2c3831dc,
+    0x2c38b1f3,
+    0x2c393211,
+    0x2c39b221,
+    0x2c3a3233,
+    0x2c3ab247,
+    0x2c3b3258,
+    0x2c3bb277,
+    0x2c3c1311,
+    0x2c3c9327,
+    0x2c3d328b,
+    0x2c3d9340,
+    0x2c3e32a8,
+    0x2c3eb2b6,
+    0x2c3f32ce,
+    0x2c3fb2e6,
+    0x2c403310,
+    0x2c409212,
+    0x2c413321,
+    0x2c41b334,
+    0x2c4211d8,
+    0x2c42b345,
+    0x2c43072f,
+    0x2c43b269,
+    0x2c4431ca,
+    0x2c44b2f3,
+    0x2c453161,
+    0x2c45b19d,
+    0x2c463201,
     0x30320000,
     0x30328015,
     0x3033001f,
     0x30338038,
-    0x3034004a,
-    0x30348064,
-    0x3035006b,
-    0x30358083,
-    0x30360094,
-    0x303680ac,
-    0x303700b9,
-    0x303780c8,
-    0x303800ea,
-    0x303880f7,
-    0x3039010a,
-    0x30398125,
-    0x303a013a,
-    0x303a814e,
-    0x303b0162,
-    0x303b8173,
-    0x303c018c,
-    0x303c81a9,
-    0x303d01b7,
-    0x303d81cb,
-    0x303e01db,
-    0x303e81f4,
-    0x303f0204,
-    0x303f8217,
-    0x30400226,
-    0x30408232,
-    0x30410247,
-    0x30418257,
-    0x3042026e,
-    0x3042827b,
-    0x3043028e,
-    0x3043829d,
-    0x304402b2,
-    0x304482d3,
-    0x304502e6,
-    0x304582f9,
-    0x30460312,
-    0x3046832d,
-    0x3047034a,
-    0x3047835c,
-    0x3048036a,
-    0x3048837b,
-    0x3049038a,
-    0x304983a2,
-    0x304a03b4,
-    0x304a83c8,
-    0x304b03e0,
-    0x304b83f3,
-    0x304c03fe,
-    0x304c840f,
-    0x304d041b,
-    0x304d8431,
-    0x304e043f,
-    0x304e8455,
-    0x304f0467,
-    0x304f8479,
-    0x3050049c,
-    0x305084af,
-    0x305104c0,
-    0x305184d0,
-    0x305204e8,
-    0x305284fd,
-    0x30530515,
-    0x30538529,
-    0x30540541,
-    0x3054855a,
-    0x30550573,
-    0x30558590,
-    0x3056059b,
-    0x305685b3,
-    0x305705c3,
-    0x305785d4,
-    0x305805e7,
-    0x305885fd,
-    0x30590606,
-    0x3059861b,
-    0x305a062e,
-    0x305a863d,
-    0x305b065d,
-    0x305b866c,
-    0x305c068d,
-    0x305c86a9,
-    0x305d06b5,
-    0x305d86d5,
-    0x305e06f1,
-    0x305e8702,
-    0x305f0718,
-    0x305f8722,
-    0x3060048c,
-    0x34320b65,
-    0x34328b79,
-    0x34330b96,
-    0x34338ba9,
-    0x34340bb8,
-    0x34348bf1,
-    0x34350bd5,
-    0x3c320083,
-    0x3c328cbc,
-    0x3c330cd5,
-    0x3c338cf0,
-    0x3c340d0d,
-    0x3c348d37,
-    0x3c350d52,
-    0x3c358d78,
-    0x3c360d91,
-    0x3c368da9,
-    0x3c370dba,
-    0x3c378dc8,
-    0x3c380dd5,
-    0x3c388de9,
-    0x3c390c7f,
-    0x3c398e0c,
-    0x3c3a0e20,
-    0x3c3a890f,
-    0x3c3b0e30,
-    0x3c3b8e4b,
-    0x3c3c0e5d,
-    0x3c3c8e90,
-    0x3c3d0e9a,
-    0x3c3d8eae,
-    0x3c3e0ebc,
-    0x3c3e8ee1,
-    0x3c3f0ca8,
-    0x3c3f8eca,
-    0x3c4000ac,
-    0x3c4080ea,
-    0x3c410d28,
-    0x3c418d67,
-    0x3c420e73,
-    0x3c428dfd,
-    0x40321971,
-    0x40329987,
-    0x403319b5,
-    0x403399bf,
-    0x403419d6,
-    0x403499f4,
-    0x40351a04,
-    0x40359a16,
-    0x40361a23,
-    0x40369a2f,
-    0x40371a44,
-    0x40379a56,
-    0x40381a61,
-    0x40389a73,
-    0x40390f27,
-    0x40399a83,
-    0x403a1a96,
-    0x403a9ab7,
-    0x403b1ac8,
-    0x403b9ad8,
-    0x403c0064,
-    0x403c8083,
-    0x403d1b39,
-    0x403d9b4f,
-    0x403e1b5e,
-    0x403e9b96,
-    0x403f1bb0,
-    0x403f9bd8,
-    0x40401bed,
-    0x40409c01,
-    0x40411c3c,
-    0x40419c57,
-    0x40421c70,
-    0x40429c83,
-    0x40431c97,
-    0x40439caf,
-    0x40441cc6,
-    0x404480ac,
-    0x40451cdb,
-    0x40459ced,
-    0x40461d11,
-    0x40469d31,
-    0x40471d3f,
-    0x40479d66,
-    0x40481dd7,
-    0x40489e0a,
-    0x40491e21,
-    0x40499e3b,
-    0x404a1e52,
-    0x404a9e70,
-    0x404b1e88,
-    0x404b9eb5,
-    0x404c1ecb,
-    0x404c9edd,
-    0x404d1efe,
-    0x404d9f37,
-    0x404e1f4b,
-    0x404e9f58,
-    0x404f1f9f,
-    0x404f9fe5,
-    0x4050203c,
-    0x4050a050,
-    0x40512083,
-    0x40522093,
-    0x4052a0b7,
-    0x405320cf,
-    0x4053a0e2,
-    0x405420f7,
-    0x4054a11a,
-    0x40552128,
-    0x4055a165,
-    0x40562172,
-    0x4056a18b,
-    0x405721a3,
-    0x4057a1b6,
-    0x405821cb,
-    0x4058a1f2,
-    0x40592221,
-    0x4059a24e,
-    0x405a2262,
-    0x405aa272,
-    0x405b228a,
-    0x405ba29b,
-    0x405c22ae,
-    0x405ca2ed,
-    0x405d22fa,
-    0x405da31f,
-    0x405e235d,
-    0x405e8ab3,
-    0x405f237e,
-    0x405fa38b,
-    0x40602399,
-    0x4060a3bb,
-    0x4061241c,
-    0x4061a454,
-    0x4062246b,
-    0x4062a47c,
-    0x406324c9,
-    0x4063a4de,
-    0x406424f5,
-    0x4064a521,
-    0x4065253c,
-    0x4065a553,
-    0x4066256b,
-    0x4066a595,
-    0x406725c0,
-    0x4067a605,
-    0x4068264d,
-    0x4068a66e,
-    0x406926a0,
-    0x4069a6ce,
-    0x406a26ef,
-    0x406aa70f,
-    0x406b2897,
-    0x406ba8ba,
-    0x406c28d0,
-    0x406cab73,
-    0x406d2ba2,
-    0x406dabca,
-    0x406e2bf8,
-    0x406eac45,
-    0x406f2c80,
-    0x406facb8,
-    0x40702ccb,
-    0x4070ace8,
-    0x40710802,
-    0x4071acfa,
-    0x40722d0d,
-    0x4072ad43,
-    0x40732d5b,
-    0x407394fa,
-    0x40742d6f,
-    0x4074ad89,
-    0x40752d9a,
-    0x4075adae,
-    0x40762dbc,
-    0x407692be,
-    0x40772de1,
-    0x4077ae03,
-    0x40782e1e,
-    0x4078ae57,
-    0x40792e6e,
-    0x4079ae84,
-    0x407a2eb0,
-    0x407aaec3,
-    0x407b2ed8,
-    0x407baeea,
-    0x407c2f1b,
-    0x407caf24,
-    0x407d2689,
-    0x407d9ff5,
-    0x407e2e33,
-    0x407ea202,
-    0x407f1d53,
-    0x407f9e9f,
-    0x40801faf,
-    0x40809d7b,
-    0x408120a5,
-    0x40819f89,
-    0x40822be3,
-    0x40829ae4,
-    0x408321dd,
-    0x4083a506,
-    0x40841d8f,
-    0x4084a23a,
-    0x408522bf,
-    0x4085a3e3,
-    0x4086233f,
-    0x4086a00f,
-    0x40872c29,
-    0x4087a431,
-    0x40881b22,
-    0x4088a618,
-    0x40891b71,
-    0x40899afe,
-    0x408a2908,
-    0x408a9912,
-    0x408b2eff,
-    0x408bac95,
-    0x408c22cf,
-    0x408c992e,
-    0x408d1df0,
-    0x408d9dc1,
-    0x408e1f20,
-    0x408ea145,
-    0x408f262c,
-    0x408fa3ff,
-    0x409025e1,
-    0x4090a311,
-    0x409128f0,
-    0x40919954,
-    0x40921bbe,
-    0x4092ac64,
-    0x40932d26,
-    0x4093a020,
-    0x40941da3,
-    0x4094a921,
-    0x4095248d,
-    0x4095ae90,
-    0x40962c10,
-    0x40969fc8,
-    0x4097206b,
-    0x40979f6f,
-    0x40981c1e,
-    0x4098a4a1,
-    0x41f427c2,
-    0x41f92854,
-    0x41fe2747,
-    0x41fea964,
-    0x41ff2a55,
-    0x420327db,
-    0x420827fd,
-    0x4208a839,
-    0x4209272b,
-    0x4209a873,
-    0x420a2782,
-    0x420aa762,
-    0x420b27a2,
-    0x420ba81b,
-    0x420c2a71,
-    0x420ca931,
-    0x420d294b,
-    0x420da982,
-    0x4212299c,
-    0x42172a38,
-    0x4217a9de,
-    0x421c2a00,
-    0x421f29bb,
-    0x42212a88,
-    0x42262a1b,
-    0x422b2b57,
-    0x422bab05,
-    0x422c2b3f,
-    0x422caac4,
-    0x422d2aa3,
-    0x422dab24,
-    0x422e2aea,
-    0x4432072d,
-    0x4432873c,
-    0x44330748,
-    0x44338756,
-    0x44340769,
-    0x4434877a,
-    0x44350781,
-    0x4435878b,
-    0x4436079e,
-    0x443687b4,
-    0x443707c6,
-    0x443787d3,
-    0x443807e2,
-    0x443887ea,
-    0x44390802,
-    0x44398810,
-    0x443a0823,
-    0x483212e8,
-    0x483292fa,
-    0x48331310,
-    0x48339329,
-    0x4c32134e,
-    0x4c32935e,
-    0x4c331371,
-    0x4c339391,
-    0x4c3400ac,
-    0x4c3480ea,
-    0x4c35139d,
-    0x4c3593ab,
-    0x4c3613c7,
-    0x4c3693ed,
-    0x4c3713fc,
-    0x4c37940a,
-    0x4c38141f,
-    0x4c38942b,
-    0x4c39144b,
-    0x4c399475,
-    0x4c3a148e,
-    0x4c3a94a7,
-    0x4c3b05fd,
-    0x4c3b94c0,
-    0x4c3c14d2,
-    0x4c3c94e1,
-    0x4c3d14fa,
-    0x4c3d8c47,
-    0x4c3e1567,
-    0x4c3e9509,
-    0x4c3f1589,
-    0x4c3f92be,
-    0x4c40151f,
-    0x4c40933a,
-    0x4c411557,
-    0x4c4193da,
-    0x4c421543,
-    0x5032328e,
-    0x5032b29d,
-    0x503332a8,
-    0x5033b2b8,
-    0x503432d1,
-    0x5034b2eb,
-    0x503532f9,
-    0x5035b30f,
-    0x50363321,
-    0x5036b337,
-    0x50373350,
-    0x5037b363,
-    0x5038337b,
-    0x5038b38c,
-    0x503933a1,
-    0x5039b3b5,
-    0x503a33d5,
-    0x503ab3eb,
-    0x503b3403,
-    0x503bb415,
-    0x503c3431,
-    0x503cb448,
-    0x503d3461,
-    0x503db477,
-    0x503e3484,
-    0x503eb49a,
-    0x503f34ac,
-    0x503f837b,
-    0x504034bf,
-    0x5040b4cf,
-    0x504134e9,
-    0x5041b4f8,
-    0x50423512,
-    0x5042b52f,
-    0x5043353f,
-    0x5043b54f,
-    0x5044355e,
-    0x50448431,
-    0x50453572,
-    0x5045b590,
-    0x504635a3,
-    0x5046b5b9,
-    0x504735cb,
-    0x5047b5e0,
-    0x50483606,
-    0x5048b614,
-    0x50493627,
-    0x5049b63c,
-    0x504a3652,
-    0x504ab662,
-    0x504b3682,
-    0x504bb695,
-    0x504c36b8,
-    0x504cb6e6,
-    0x504d36f8,
-    0x504db715,
-    0x504e3730,
-    0x504eb74c,
-    0x504f375e,
-    0x504fb775,
-    0x50503784,
-    0x505086f1,
-    0x50513797,
-    0x58320f65,
-    0x68320f27,
-    0x68328c7f,
-    0x68330c92,
-    0x68338f35,
-    0x68340f45,
-    0x683480ea,
-    0x6c320eed,
-    0x6c328c36,
-    0x6c330ef8,
-    0x6c338f11,
-    0x74320a1b,
-    0x743280ac,
-    0x74330c47,
-    0x78320980,
-    0x78328995,
-    0x783309a1,
-    0x78338083,
-    0x783409b0,
-    0x783489c5,
-    0x783509e4,
-    0x78358a06,
-    0x78360a1b,
-    0x78368a31,
-    0x78370a41,
-    0x78378a62,
-    0x78380a75,
-    0x78388a87,
-    0x78390a94,
-    0x78398ab3,
-    0x783a0ac8,
-    0x783a8ad6,
-    0x783b0ae0,
-    0x783b8af4,
-    0x783c0b0b,
-    0x783c8b20,
-    0x783d0b37,
-    0x783d8b4c,
-    0x783e0aa2,
-    0x783e8a54,
-    0x7c3211d7,
-    0x803213ed,
-    0x80328083,
-    0x80332fe1,
-    0x803380ac,
-    0x80342ff0,
-    0x8034af58,
-    0x80352f76,
-    0x8035b004,
-    0x80362fb8,
-    0x8036af67,
-    0x80372faa,
-    0x8037af45,
-    0x80382fcb,
-    0x8038af87,
-    0x80392f9c,
+    0x30340057,
+    0x30348071,
+    0x30350078,
+    0x30358090,
+    0x303600a1,
+    0x303680b9,
+    0x303700c6,
+    0x303780d5,
+    0x303800f7,
+    0x30388104,
+    0x30390117,
+    0x30398132,
+    0x303a0147,
+    0x303a815b,
+    0x303b016f,
+    0x303b8180,
+    0x303c0199,
+    0x303c81b6,
+    0x303d01c4,
+    0x303d81d8,
+    0x303e01e8,
+    0x303e8201,
+    0x303f0211,
+    0x303f8224,
+    0x30400233,
+    0x3040823f,
+    0x30410254,
+    0x30418264,
+    0x3042027b,
+    0x30428288,
+    0x3043029b,
+    0x304382aa,
+    0x304402bf,
+    0x304482e0,
+    0x304502f3,
+    0x30458306,
+    0x3046031f,
+    0x3046833a,
+    0x30470357,
+    0x30478369,
+    0x30480377,
+    0x30488388,
+    0x30490397,
+    0x304983af,
+    0x304a03c1,
+    0x304a83d5,
+    0x304b03ed,
+    0x304b8400,
+    0x304c040b,
+    0x304c841c,
+    0x304d0428,
+    0x304d843e,
+    0x304e044c,
+    0x304e8462,
+    0x304f0474,
+    0x304f8486,
+    0x305004a9,
+    0x305084bc,
+    0x305104cd,
+    0x305184dd,
+    0x305204f5,
+    0x3052850a,
+    0x30530522,
+    0x30538536,
+    0x3054054e,
+    0x30548567,
+    0x30550580,
+    0x3055859d,
+    0x305605a8,
+    0x305685c0,
+    0x305705d0,
+    0x305785e1,
+    0x305805f4,
+    0x3058860a,
+    0x30590613,
+    0x30598628,
+    0x305a063b,
+    0x305a864a,
+    0x305b066a,
+    0x305b8679,
+    0x305c069a,
+    0x305c86b6,
+    0x305d06c2,
+    0x305d86e2,
+    0x305e06fe,
+    0x305e870f,
+    0x305f0725,
+    0x305f872f,
+    0x30600499,
+    0x3060804a,
+    0x34320b72,
+    0x34328b86,
+    0x34330ba3,
+    0x34338bb6,
+    0x34340bc5,
+    0x34348bfe,
+    0x34350be2,
+    0x3c320090,
+    0x3c328cc9,
+    0x3c330ce2,
+    0x3c338cfd,
+    0x3c340d1a,
+    0x3c348d44,
+    0x3c350d5f,
+    0x3c358d85,
+    0x3c360d9e,
+    0x3c368db6,
+    0x3c370dc7,
+    0x3c378dd5,
+    0x3c380de2,
+    0x3c388df6,
+    0x3c390c8c,
+    0x3c398e19,
+    0x3c3a0e2d,
+    0x3c3a891c,
+    0x3c3b0e3d,
+    0x3c3b8e58,
+    0x3c3c0e6a,
+    0x3c3c8e9d,
+    0x3c3d0ea7,
+    0x3c3d8ebb,
+    0x3c3e0ec9,
+    0x3c3e8eee,
+    0x3c3f0cb5,
+    0x3c3f8ed7,
+    0x3c4000b9,
+    0x3c4080f7,
+    0x3c410d35,
+    0x3c418d74,
+    0x3c420e80,
+    0x3c428e0a,
+    0x403219a4,
+    0x403299ba,
+    0x403319e8,
+    0x403399f2,
+    0x40341a09,
+    0x40349a27,
+    0x40351a37,
+    0x40359a49,
+    0x40361a56,
+    0x40369a62,
+    0x40371a77,
+    0x40379a89,
+    0x40381a94,
+    0x40389aa6,
+    0x40390f34,
+    0x40399ab6,
+    0x403a1ac9,
+    0x403a9aea,
+    0x403b1afb,
+    0x403b9b0b,
+    0x403c0071,
+    0x403c8090,
+    0x403d1b6c,
+    0x403d9b82,
+    0x403e1b91,
+    0x403e9bc9,
+    0x403f1be3,
+    0x403f9c0b,
+    0x40401c20,
+    0x40409c34,
+    0x40411c6f,
+    0x40419c8a,
+    0x40421ca3,
+    0x40429cb6,
+    0x40431cca,
+    0x40439ce2,
+    0x40441cf9,
+    0x404480b9,
+    0x40451d0e,
+    0x40459d20,
+    0x40461d44,
+    0x40469d64,
+    0x40471d72,
+    0x40479d99,
+    0x40481e0a,
+    0x40489e3d,
+    0x40491e54,
+    0x40499e6e,
+    0x404a1e85,
+    0x404a9ea3,
+    0x404b1ebb,
+    0x404b9ee8,
+    0x404c1efe,
+    0x404c9f10,
+    0x404d1f31,
+    0x404d9f6a,
+    0x404e1f7e,
+    0x404e9f8b,
+    0x404f1fd2,
+    0x404fa018,
+    0x4050206f,
+    0x4050a083,
+    0x405120b6,
+    0x405220d3,
+    0x4052a0f7,
+    0x4053210f,
+    0x4053a122,
+    0x40542137,
+    0x4054a15a,
+    0x40552185,
+    0x4055a1c2,
+    0x405621cf,
+    0x4056a1e8,
+    0x40572200,
+    0x4057a213,
+    0x40582228,
+    0x4058a24f,
+    0x4059227e,
+    0x4059a2ab,
+    0x405a22bf,
+    0x405aa2cf,
+    0x405b22e7,
+    0x405ba2f8,
+    0x405c230b,
+    0x405ca34a,
+    0x405d2357,
+    0x405da37c,
+    0x405e23ba,
+    0x405e8ac0,
+    0x405f23db,
+    0x405fa3e8,
+    0x406023f6,
+    0x4060a418,
+    0x40612479,
+    0x4061a4b1,
+    0x406224c8,
+    0x4062a4d9,
+    0x40632526,
+    0x4063a53b,
+    0x40642552,
+    0x4064a57e,
+    0x40652599,
+    0x4065a5b0,
+    0x406625c8,
+    0x4066a5f2,
+    0x4067261d,
+    0x4067a662,
+    0x406826aa,
+    0x4068a6cb,
+    0x406926fd,
+    0x4069a72b,
+    0x406a274c,
+    0x406aa76c,
+    0x406b28f4,
+    0x406ba917,
+    0x406c292d,
+    0x406cac1e,
+    0x406d2c4d,
+    0x406dac75,
+    0x406e2ca3,
+    0x406eacf0,
+    0x406f2d49,
+    0x406fad81,
+    0x40702d94,
+    0x4070adb1,
+    0x4071080f,
+    0x4071adc3,
+    0x40722dd6,
+    0x4072ae0c,
+    0x40732e24,
+    0x40739511,
+    0x40742e38,
+    0x4074ae52,
+    0x40752e63,
+    0x4075ae77,
+    0x40762e85,
+    0x407692d5,
+    0x40772eaa,
+    0x4077aecc,
+    0x40782ee7,
+    0x4078af20,
+    0x40792f37,
+    0x4079af4d,
+    0x407a2f79,
+    0x407aaf8c,
+    0x407b2fa1,
+    0x407bafb3,
+    0x407c2fe4,
+    0x407cafed,
+    0x407d26e6,
+    0x407da028,
+    0x407e2efc,
+    0x407ea25f,
+    0x407f1d86,
+    0x407f9ed2,
+    0x40801fe2,
+    0x40809dae,
+    0x408120e5,
+    0x40819fbc,
+    0x40822c8e,
+    0x40829b17,
+    0x4083223a,
+    0x4083a563,
+    0x40841dc2,
+    0x4084a297,
+    0x4085231c,
+    0x4085a440,
+    0x4086239c,
+    0x4086a042,
+    0x40872cd4,
+    0x4087a48e,
+    0x40881b55,
+    0x4088a675,
+    0x40891ba4,
+    0x40899b31,
+    0x408a2965,
+    0x408a9929,
+    0x408b2fc8,
+    0x408bad5e,
+    0x408c232c,
+    0x408c9961,
+    0x408d1e23,
+    0x408d9df4,
+    0x408e1f53,
+    0x408ea1a2,
+    0x408f2689,
+    0x408fa45c,
+    0x4090263e,
+    0x4090a36e,
+    0x4091294d,
+    0x40919987,
+    0x40921bf1,
+    0x4092ad0f,
+    0x40932def,
+    0x4093a053,
+    0x40941dd6,
+    0x4094a97e,
+    0x409524ea,
+    0x4095af59,
+    0x40962cbb,
+    0x40969ffb,
+    0x4097209e,
+    0x40979fa2,
+    0x40981c51,
+    0x4098a4fe,
+    0x40992d2b,
+    0x4099a0c6,
+    0x409a2168,
+    0x409a9945,
+    0x41f4281f,
+    0x41f928b1,
+    0x41fe27a4,
+    0x41feaa5a,
+    0x41ff2b6f,
+    0x42032838,
+    0x4208285a,
+    0x4208a896,
+    0x42092788,
+    0x4209a8d0,
+    0x420a27df,
+    0x420aa7bf,
+    0x420b27ff,
+    0x420ba878,
+    0x420c2b8b,
+    0x420ca98e,
+    0x420d2a41,
+    0x420daa78,
+    0x42122a92,
+    0x42172b52,
+    0x4217aad4,
+    0x421c2af6,
+    0x421f2ab1,
+    0x42212c03,
+    0x42262b35,
+    0x422b2be1,
+    0x422baa1c,
+    0x422c2bc3,
+    0x422ca9cf,
+    0x422d29a8,
+    0x422daba2,
+    0x422e29fb,
+    0x42302b11,
+    0x4432073a,
+    0x44328749,
+    0x44330755,
+    0x44338763,
+    0x44340776,
+    0x44348787,
+    0x4435078e,
+    0x44358798,
+    0x443607ab,
+    0x443687c1,
+    0x443707d3,
+    0x443787e0,
+    0x443807ef,
+    0x443887f7,
+    0x4439080f,
+    0x4439881d,
+    0x443a0830,
+    0x483212ff,
+    0x48329311,
+    0x48331327,
+    0x48339340,
+    0x4c321365,
+    0x4c329375,
+    0x4c331388,
+    0x4c3393a8,
+    0x4c3400b9,
+    0x4c3480f7,
+    0x4c3513b4,
+    0x4c3593c2,
+    0x4c3613de,
+    0x4c369404,
+    0x4c371413,
+    0x4c379421,
+    0x4c381436,
+    0x4c389442,
+    0x4c391462,
+    0x4c39948c,
+    0x4c3a14a5,
+    0x4c3a94be,
+    0x4c3b060a,
+    0x4c3b94d7,
+    0x4c3c14e9,
+    0x4c3c94f8,
+    0x4c3d1511,
+    0x4c3d8c54,
+    0x4c3e157e,
+    0x4c3e9520,
+    0x4c3f15a0,
+    0x4c3f92d5,
+    0x4c401536,
+    0x4c409351,
+    0x4c41156e,
+    0x4c4193f1,
+    0x4c42155a,
+    0x50323357,
+    0x5032b366,
+    0x50333371,
+    0x5033b381,
+    0x5034339a,
+    0x5034b3b4,
+    0x503533c2,
+    0x5035b3d8,
+    0x503633ea,
+    0x5036b400,
+    0x50373419,
+    0x5037b42c,
+    0x50383444,
+    0x5038b455,
+    0x5039346a,
+    0x5039b47e,
+    0x503a349e,
+    0x503ab4b4,
+    0x503b34cc,
+    0x503bb4de,
+    0x503c34fa,
+    0x503cb511,
+    0x503d352a,
+    0x503db540,
+    0x503e354d,
+    0x503eb563,
+    0x503f3575,
+    0x503f8388,
+    0x50403588,
+    0x5040b598,
+    0x504135b2,
+    0x5041b5c1,
+    0x504235db,
+    0x5042b5f8,
+    0x50433608,
+    0x5043b618,
+    0x50443627,
+    0x5044843e,
+    0x5045363b,
+    0x5045b659,
+    0x5046366c,
+    0x5046b682,
+    0x50473694,
+    0x5047b6a9,
+    0x504836cf,
+    0x5048b6dd,
+    0x504936f0,
+    0x5049b705,
+    0x504a371b,
+    0x504ab72b,
+    0x504b374b,
+    0x504bb75e,
+    0x504c3781,
+    0x504cb7af,
+    0x504d37c1,
+    0x504db7de,
+    0x504e37f9,
+    0x504eb815,
+    0x504f3827,
+    0x504fb83e,
+    0x5050384d,
+    0x505086fe,
+    0x50513860,
+    0x58320f72,
+    0x68320f34,
+    0x68328c8c,
+    0x68330c9f,
+    0x68338f42,
+    0x68340f52,
+    0x683480f7,
+    0x6c320efa,
+    0x6c328c43,
+    0x6c330f05,
+    0x6c338f1e,
+    0x74320a28,
+    0x743280b9,
+    0x74330c54,
+    0x7832098d,
+    0x783289a2,
+    0x783309ae,
+    0x78338090,
+    0x783409bd,
+    0x783489d2,
+    0x783509f1,
+    0x78358a13,
+    0x78360a28,
+    0x78368a3e,
+    0x78370a4e,
+    0x78378a6f,
+    0x78380a82,
+    0x78388a94,
+    0x78390aa1,
+    0x78398ac0,
+    0x783a0ad5,
+    0x783a8ae3,
+    0x783b0aed,
+    0x783b8b01,
+    0x783c0b18,
+    0x783c8b2d,
+    0x783d0b44,
+    0x783d8b59,
+    0x783e0aaf,
+    0x783e8a61,
+    0x7c3211ee,
+    0x80321404,
+    0x80328090,
+    0x803330aa,
+    0x803380b9,
+    0x803430b9,
+    0x8034b021,
+    0x8035303f,
+    0x8035b0cd,
+    0x80363081,
+    0x8036b030,
+    0x80373073,
+    0x8037b00e,
+    0x80383094,
+    0x8038b050,
+    0x80393065,
 };
 
 const size_t kOpenSSLReasonValuesLen = sizeof(kOpenSSLReasonValues) / sizeof(kOpenSSLReasonValues[0]);
@@ -785,6 +792,7 @@
     "AUX_ERROR\0"
     "BAD_GET_ASN1_OBJECT_CALL\0"
     "BAD_OBJECT_HEADER\0"
+    "BAD_TEMPLATE\0"
     "BMPSTRING_IS_WRONG_LENGTH\0"
     "BN_LIB\0"
     "BOOLEAN_IS_WRONG_LENGTH\0"
@@ -991,6 +999,7 @@
     "COMMAND_NOT_SUPPORTED\0"
     "DIFFERENT_KEY_TYPES\0"
     "DIFFERENT_PARAMETERS\0"
+    "EMPTY_PSK\0"
     "EXPECTING_AN_EC_KEY_KEY\0"
     "EXPECTING_AN_RSA_KEY\0"
     "EXPECTING_A_DSA_KEY\0"
@@ -1108,6 +1117,7 @@
     "VALUE_MISSING\0"
     "WRONG_SIGNATURE_LENGTH\0"
     "ALPN_MISMATCH_ON_EARLY_DATA\0"
+    "ALPS_MISMATCH_ON_EARLY_DATA\0"
     "APPLICATION_DATA_INSTEAD_OF_HANDSHAKE\0"
     "APPLICATION_DATA_ON_SHUTDOWN\0"
     "APP_DATA_IN_HANDSHAKE\0"
@@ -1191,6 +1201,7 @@
     "INVALID_TICKET_KEYS_LENGTH\0"
     "KEY_USAGE_BIT_INCORRECT\0"
     "LENGTH_MISMATCH\0"
+    "MISSING_ALPN\0"
     "MISSING_EXTENSION\0"
     "MISSING_KEY_SHARE\0"
     "MISSING_RSA_CERTIFICATE\0"
@@ -1198,6 +1209,7 @@
     "MISSING_TMP_ECDH_KEY\0"
     "MIXED_SPECIAL_OPERATOR_WITH_GROUPS\0"
     "MTU_TOO_SMALL\0"
+    "NEGOTIATED_ALPS_WITHOUT_ALPN\0"
     "NEGOTIATED_BOTH_NPN_AND_ALPN\0"
     "NEGOTIATED_TB_WITHOUT_EMS_OR_RI\0"
     "NESTED_GROUP\0"
@@ -1278,6 +1290,10 @@
     "TICKET_ENCRYPTION_FAILED\0"
     "TLS13_DOWNGRADE\0"
     "TLSV1_ALERT_ACCESS_DENIED\0"
+    "TLSV1_ALERT_BAD_CERTIFICATE_HASH_VALUE\0"
+    "TLSV1_ALERT_BAD_CERTIFICATE_STATUS_RESPONSE\0"
+    "TLSV1_ALERT_CERTIFICATE_REQUIRED\0"
+    "TLSV1_ALERT_CERTIFICATE_UNOBTAINABLE\0"
     "TLSV1_ALERT_DECODE_ERROR\0"
     "TLSV1_ALERT_DECRYPTION_FAILED\0"
     "TLSV1_ALERT_DECRYPT_ERROR\0"
@@ -1285,18 +1301,15 @@
     "TLSV1_ALERT_INAPPROPRIATE_FALLBACK\0"
     "TLSV1_ALERT_INSUFFICIENT_SECURITY\0"
     "TLSV1_ALERT_INTERNAL_ERROR\0"
+    "TLSV1_ALERT_NO_APPLICATION_PROTOCOL\0"
     "TLSV1_ALERT_NO_RENEGOTIATION\0"
     "TLSV1_ALERT_PROTOCOL_VERSION\0"
     "TLSV1_ALERT_RECORD_OVERFLOW\0"
     "TLSV1_ALERT_UNKNOWN_CA\0"
+    "TLSV1_ALERT_UNKNOWN_PSK_IDENTITY\0"
+    "TLSV1_ALERT_UNRECOGNIZED_NAME\0"
+    "TLSV1_ALERT_UNSUPPORTED_EXTENSION\0"
     "TLSV1_ALERT_USER_CANCELLED\0"
-    "TLSV1_BAD_CERTIFICATE_HASH_VALUE\0"
-    "TLSV1_BAD_CERTIFICATE_STATUS_RESPONSE\0"
-    "TLSV1_CERTIFICATE_REQUIRED\0"
-    "TLSV1_CERTIFICATE_UNOBTAINABLE\0"
-    "TLSV1_UNKNOWN_PSK_IDENTITY\0"
-    "TLSV1_UNRECOGNIZED_NAME\0"
-    "TLSV1_UNSUPPORTED_EXTENSION\0"
     "TLS_PEER_DID_NOT_RESPOND_WITH_CERTIFICATE_LIST\0"
     "TLS_RSA_ENCRYPTED_VALUE_LENGTH_IS_WRONG\0"
     "TOO_MANY_EMPTY_FRAGMENTS\0"
@@ -1306,6 +1319,7 @@
     "TOO_MUCH_SKIPPED_EARLY_DATA\0"
     "UNABLE_TO_FIND_ECDH_PARAMETERS\0"
     "UNCOMPRESSED_CERT_TOO_LARGE\0"
+    "UNEXPECTED_COMPATIBILITY_MODE\0"
     "UNEXPECTED_EXTENSION\0"
     "UNEXPECTED_EXTENSION_ON_EARLY_DATA\0"
     "UNEXPECTED_MESSAGE\0"
diff --git a/deps/boringssl/ios-aarch64/crypto/chacha/chacha-armv8.S b/deps/boringssl/ios-aarch64/crypto/chacha/chacha-armv8.S
index b14466d..31db825 100644
--- a/deps/boringssl/ios-aarch64/crypto/chacha/chacha-armv8.S
+++ b/deps/boringssl/ios-aarch64/crypto/chacha/chacha-armv8.S
@@ -15,6 +15,7 @@
 #include <openssl/arm_arch.h>
 
 
+.private_extern	_OPENSSL_armcap_P
 
 .section	__TEXT,__const
 
@@ -33,6 +34,7 @@
 
 .align	5
 _ChaCha20_ctr32:
+	AARCH64_VALID_CALL_TARGET
 	cbz	x2,Labort
 #if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
 	adrp	x5,:pg_hi21_nc:_OPENSSL_armcap_P
@@ -46,6 +48,7 @@
 	b.ne	ChaCha20_neon
 
 Lshort:
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-96]!
 	add	x29,sp,#0
 
@@ -258,6 +261,7 @@
 	ldp	x25,x26,[x29,#64]
 	ldp	x27,x28,[x29,#80]
 	ldp	x29,x30,[sp],#96
+	AARCH64_VALIDATE_LINK_REGISTER
 Labort:
 	ret
 
@@ -314,12 +318,14 @@
 	ldp	x25,x26,[x29,#64]
 	ldp	x27,x28,[x29,#80]
 	ldp	x29,x30,[sp],#96
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 
 
 
 .align	5
 ChaCha20_neon:
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-96]!
 	add	x29,sp,#0
 
@@ -700,6 +706,7 @@
 	ldp	x25,x26,[x29,#64]
 	ldp	x27,x28,[x29,#80]
 	ldp	x29,x30,[sp],#96
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 
 Ltail_neon:
@@ -809,11 +816,13 @@
 	ldp	x25,x26,[x29,#64]
 	ldp	x27,x28,[x29,#80]
 	ldp	x29,x30,[sp],#96
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 
 
 .align	5
 ChaCha20_512_neon:
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-96]!
 	add	x29,sp,#0
 
@@ -1977,6 +1986,7 @@
 	ldp	x25,x26,[x29,#64]
 	ldp	x27,x28,[x29,#80]
 	ldp	x29,x30,[sp],#96
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 
 #endif  // !OPENSSL_NO_ASM
diff --git a/deps/boringssl/ios-aarch64/crypto/fipsmodule/aesv8-armx64.S b/deps/boringssl/ios-aarch64/crypto/fipsmodule/aesv8-armx64.S
index dc2d6e4..13950f1 100644
--- a/deps/boringssl/ios-aarch64/crypto/fipsmodule/aesv8-armx64.S
+++ b/deps/boringssl/ios-aarch64/crypto/fipsmodule/aesv8-armx64.S
@@ -32,6 +32,8 @@
 .align	5
 _aes_hw_set_encrypt_key:
 Lenc_key:
+	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
+	AARCH64_VALID_CALL_TARGET
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 	mov	x3,#-1
@@ -200,6 +202,7 @@
 
 .align	5
 _aes_hw_set_decrypt_key:
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 	bl	Lenc_key
@@ -233,6 +236,7 @@
 	eor	x0,x0,x0		// return value
 Ldec_key_abort:
 	ldp	x29,x30,[sp],#16
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 
 .globl	_aes_hw_encrypt
@@ -240,6 +244,7 @@
 
 .align	5
 _aes_hw_encrypt:
+	AARCH64_VALID_CALL_TARGET
 	ldr	w3,[x2,#240]
 	ld1	{v0.4s},[x2],#16
 	ld1	{v2.16b},[x0]
@@ -270,6 +275,7 @@
 
 .align	5
 _aes_hw_decrypt:
+	AARCH64_VALID_CALL_TARGET
 	ldr	w3,[x2,#240]
 	ld1	{v0.4s},[x2],#16
 	ld1	{v2.16b},[x0]
@@ -300,6 +306,8 @@
 
 .align	5
 _aes_hw_cbc_encrypt:
+	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
+	AARCH64_VALID_CALL_TARGET
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 	subs	x2,x2,#16
@@ -591,6 +599,8 @@
 
 .align	5
 _aes_hw_ctr32_encrypt_blocks:
+	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
+	AARCH64_VALID_CALL_TARGET
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 	ldr	w5,[x3,#240]
@@ -610,20 +620,34 @@
 	add	x7,x3,#32
 	mov	w6,w5
 	csel	x12,xzr,x12,lo
+
+	// ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are
+	// affected by silicon errata #1742098 [0] and #1655431 [1],
+	// respectively, where the second instruction of an aese/aesmc
+	// instruction pair may execute twice if an interrupt is taken right
+	// after the first instruction consumes an input register of which a
+	// single 32-bit lane has been updated the last time it was modified.
+	//
+	// This function uses a counter in one 32-bit lane. The vmov lines
+	// could write to v1.16b and v18.16b directly, but that trips this bugs.
+	// We write to v6.16b and copy to the final register as a workaround.
+	//
+	// [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice
+	// [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice
 #ifndef __ARMEB__
 	rev	w8, w8
 #endif
-	orr	v1.16b,v0.16b,v0.16b
 	add	w10, w8, #1
-	orr	v18.16b,v0.16b,v0.16b
-	add	w8, w8, #2
 	orr	v6.16b,v0.16b,v0.16b
 	rev	w10, w10
-	mov	v1.s[3],w10
+	mov	v6.s[3],w10
+	add	w8, w8, #2
+	orr	v1.16b,v6.16b,v6.16b
 	b.ls	Lctr32_tail
 	rev	w12, w8
+	mov	v6.s[3],w12
 	sub	x2,x2,#3		// bias
-	mov	v18.s[3],w12
+	orr	v18.16b,v6.16b,v6.16b
 	b	Loop3x_ctr32
 
 .align	4
@@ -650,11 +674,11 @@
 	aese	v1.16b,v16.16b
 	aesmc	v5.16b,v1.16b
 	ld1	{v2.16b},[x0],#16
-	orr	v0.16b,v6.16b,v6.16b
+	add	w9,w8,#1
 	aese	v18.16b,v16.16b
 	aesmc	v18.16b,v18.16b
 	ld1	{v3.16b},[x0],#16
-	orr	v1.16b,v6.16b,v6.16b
+	rev	w9,w9
 	aese	v4.16b,v17.16b
 	aesmc	v4.16b,v4.16b
 	aese	v5.16b,v17.16b
@@ -663,8 +687,6 @@
 	mov	x7,x3
 	aese	v18.16b,v17.16b
 	aesmc	v17.16b,v18.16b
-	orr	v18.16b,v6.16b,v6.16b
-	add	w9,w8,#1
 	aese	v4.16b,v20.16b
 	aesmc	v4.16b,v4.16b
 	aese	v5.16b,v20.16b
@@ -679,21 +701,26 @@
 	aesmc	v4.16b,v4.16b
 	aese	v5.16b,v21.16b
 	aesmc	v5.16b,v5.16b
+	 // Note the logic to update v0.16b, v1.16b, and v1.16b is written to work
+	 // around a bug in ARM Cortex-A57 and Cortex-A72 cores running in
+	 // 32-bit mode. See the comment above.
 	eor	v19.16b,v19.16b,v7.16b
-	rev	w9,w9
+	mov	v6.s[3], w9
 	aese	v17.16b,v21.16b
 	aesmc	v17.16b,v17.16b
-	mov	v0.s[3], w9
+	orr	v0.16b,v6.16b,v6.16b
 	rev	w10,w10
 	aese	v4.16b,v22.16b
 	aesmc	v4.16b,v4.16b
+	mov	v6.s[3], w10
+	rev	w12,w8
 	aese	v5.16b,v22.16b
 	aesmc	v5.16b,v5.16b
-	mov	v1.s[3], w10
-	rev	w12,w8
+	orr	v1.16b,v6.16b,v6.16b
+	mov	v6.s[3], w12
 	aese	v17.16b,v22.16b
 	aesmc	v17.16b,v17.16b
-	mov	v18.s[3], w12
+	orr	v18.16b,v6.16b,v6.16b
 	subs	x2,x2,#3
 	aese	v4.16b,v23.16b
 	aese	v5.16b,v23.16b
diff --git a/deps/boringssl/ios-aarch64/crypto/fipsmodule/armv8-mont.S b/deps/boringssl/ios-aarch64/crypto/fipsmodule/armv8-mont.S
index 3d83f4d..2493ae0 100644
--- a/deps/boringssl/ios-aarch64/crypto/fipsmodule/armv8-mont.S
+++ b/deps/boringssl/ios-aarch64/crypto/fipsmodule/armv8-mont.S
@@ -12,6 +12,8 @@
 #if defined(BORINGSSL_PREFIX)
 #include <boringssl_prefix_symbols_asm.h>
 #endif
+#include <openssl/arm_arch.h>
+
 .text
 
 .globl	_bn_mul_mont
@@ -19,6 +21,7 @@
 
 .align	5
 _bn_mul_mont:
+	AARCH64_SIGN_LINK_REGISTER
 	tst	x5,#7
 	b.eq	__bn_sqr8x_mont
 	tst	x5,#3
@@ -216,11 +219,14 @@
 	mov	x0,#1
 	ldp	x23,x24,[x29,#48]
 	ldr	x29,[sp],#64
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 
 
 .align	5
 __bn_sqr8x_mont:
+	// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_sqr8x_mont is jumped to
+	// only from bn_mul_mont which has already signed the return address.
 	cmp	x1,x2
 	b.ne	__bn_mul4x_mont
 Lsqr8x_mont:
@@ -974,11 +980,16 @@
 	ldp	x25,x26,[x29,#64]
 	ldp	x27,x28,[x29,#80]
 	ldr	x29,[sp],#128
+	// x30 is popped earlier
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 
 
 .align	5
 __bn_mul4x_mont:
+	// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_mul4x_mont is jumped to
+	// only from bn_mul_mont or __bn_mul8x_mont which have already signed the
+	// return address.
 	stp	x29,x30,[sp,#-128]!
 	add	x29,sp,#0
 	stp	x19,x20,[sp,#16]
@@ -1412,6 +1423,8 @@
 	ldp	x25,x26,[x29,#64]
 	ldp	x27,x28,[x29,#80]
 	ldr	x29,[sp],#128
+	// x30 is popped earlier
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 
 .byte	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
diff --git a/deps/boringssl/ios-aarch64/crypto/fipsmodule/ghash-neon-armv8.S b/deps/boringssl/ios-aarch64/crypto/fipsmodule/ghash-neon-armv8.S
index 60bff31..5441afc 100644
--- a/deps/boringssl/ios-aarch64/crypto/fipsmodule/ghash-neon-armv8.S
+++ b/deps/boringssl/ios-aarch64/crypto/fipsmodule/ghash-neon-armv8.S
@@ -12,6 +12,8 @@
 #if defined(BORINGSSL_PREFIX)
 #include <boringssl_prefix_symbols_asm.h>
 #endif
+#include <openssl/arm_arch.h>
+
 .text
 
 .globl	_gcm_init_neon
@@ -19,6 +21,7 @@
 
 .align	4
 _gcm_init_neon:
+	AARCH64_VALID_CALL_TARGET
 	// This function is adapted from gcm_init_v8. xC2 is t3.
 	ld1	{v17.2d}, [x1]			// load H
 	movi	v19.16b, #0xe1
@@ -44,6 +47,7 @@
 
 .align	4
 _gcm_gmult_neon:
+	AARCH64_VALID_CALL_TARGET
 	ld1	{v3.16b}, [x0]		// load Xi
 	ld1	{v5.1d}, [x1], #8		// load twisted H
 	ld1	{v6.1d}, [x1]
@@ -63,6 +67,7 @@
 
 .align	4
 _gcm_ghash_neon:
+	AARCH64_VALID_CALL_TARGET
 	ld1	{v0.16b}, [x0]		// load Xi
 	ld1	{v5.1d}, [x1], #8		// load twisted H
 	ld1	{v6.1d}, [x1]
diff --git a/deps/boringssl/ios-aarch64/crypto/fipsmodule/ghashv8-armx64.S b/deps/boringssl/ios-aarch64/crypto/fipsmodule/ghashv8-armx64.S
index be0e283..566330f 100644
--- a/deps/boringssl/ios-aarch64/crypto/fipsmodule/ghashv8-armx64.S
+++ b/deps/boringssl/ios-aarch64/crypto/fipsmodule/ghashv8-armx64.S
@@ -21,6 +21,7 @@
 
 .align	4
 _gcm_init_v8:
+	AARCH64_VALID_CALL_TARGET
 	ld1	{v17.2d},[x1]		//load input H
 	movi	v19.16b,#0xe1
 	shl	v19.2d,v19.2d,#57		//0xc2.0
@@ -72,6 +73,7 @@
 
 .align	4
 _gcm_gmult_v8:
+	AARCH64_VALID_CALL_TARGET
 	ld1	{v17.2d},[x0]		//load Xi
 	movi	v19.16b,#0xe1
 	ld1	{v20.2d,v21.2d},[x1]	//load twisted H, ...
@@ -114,6 +116,7 @@
 
 .align	4
 _gcm_ghash_v8:
+	AARCH64_VALID_CALL_TARGET
 	ld1	{v0.2d},[x0]		//load [rotated] Xi
 						//"[rotated]" means that
 						//loaded value would have
diff --git a/deps/boringssl/ios-aarch64/crypto/fipsmodule/sha1-armv8.S b/deps/boringssl/ios-aarch64/crypto/fipsmodule/sha1-armv8.S
index 379107e..05eb920 100644
--- a/deps/boringssl/ios-aarch64/crypto/fipsmodule/sha1-armv8.S
+++ b/deps/boringssl/ios-aarch64/crypto/fipsmodule/sha1-armv8.S
@@ -17,11 +17,14 @@
 .text
 
 
+.private_extern	_OPENSSL_armcap_P
 .globl	_sha1_block_data_order
 .private_extern	_sha1_block_data_order
 
 .align	6
 _sha1_block_data_order:
+	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
+	AARCH64_VALID_CALL_TARGET
 #if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
 	adrp	x16,:pg_hi21_nc:_OPENSSL_armcap_P
 #else
@@ -1089,6 +1092,8 @@
 
 .align	6
 sha1_block_armv8:
+	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
+	AARCH64_VALID_CALL_TARGET
 Lv8_entry:
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
@@ -1227,6 +1232,4 @@
 .byte	83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
 .align	2
 .align	2
-.comm	_OPENSSL_armcap_P,4,4
-.private_extern	_OPENSSL_armcap_P
 #endif  // !OPENSSL_NO_ASM
diff --git a/deps/boringssl/ios-aarch64/crypto/fipsmodule/sha256-armv8.S b/deps/boringssl/ios-aarch64/crypto/fipsmodule/sha256-armv8.S
index d6fa5a9..c9b7991 100644
--- a/deps/boringssl/ios-aarch64/crypto/fipsmodule/sha256-armv8.S
+++ b/deps/boringssl/ios-aarch64/crypto/fipsmodule/sha256-armv8.S
@@ -58,11 +58,13 @@
 .text
 
 
+.private_extern	_OPENSSL_armcap_P
 .globl	_sha256_block_data_order
 .private_extern	_sha256_block_data_order
 
 .align	6
 _sha256_block_data_order:
+	AARCH64_VALID_CALL_TARGET
 #ifndef	__KERNEL__
 #if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
 	adrp	x16,:pg_hi21_nc:_OPENSSL_armcap_P
@@ -73,6 +75,7 @@
 	tst	w16,#ARMV8_SHA256
 	b.ne	Lv8_entry
 #endif
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-128]!
 	add	x29,sp,#0
 
@@ -1033,6 +1036,7 @@
 	ldp	x25,x26,[x29,#64]
 	ldp	x27,x28,[x29,#80]
 	ldp	x29,x30,[sp],#128
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 
 
@@ -1067,6 +1071,7 @@
 .align	6
 sha256_block_armv8:
 Lv8_entry:
+	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 
@@ -1203,8 +1208,4 @@
 	ret
 
 #endif
-#ifndef	__KERNEL__
-.comm	_OPENSSL_armcap_P,4,4
-.private_extern	_OPENSSL_armcap_P
-#endif
 #endif  // !OPENSSL_NO_ASM
diff --git a/deps/boringssl/ios-aarch64/crypto/fipsmodule/sha512-armv8.S b/deps/boringssl/ios-aarch64/crypto/fipsmodule/sha512-armv8.S
index 29e122b..97b3230 100644
--- a/deps/boringssl/ios-aarch64/crypto/fipsmodule/sha512-armv8.S
+++ b/deps/boringssl/ios-aarch64/crypto/fipsmodule/sha512-armv8.S
@@ -58,11 +58,13 @@
 .text
 
 
+.private_extern	_OPENSSL_armcap_P
 .globl	_sha512_block_data_order
 .private_extern	_sha512_block_data_order
 
 .align	6
 _sha512_block_data_order:
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-128]!
 	add	x29,sp,#0
 
@@ -1023,6 +1025,7 @@
 	ldp	x25,x26,[x29,#64]
 	ldp	x27,x28,[x29,#80]
 	ldp	x29,x30,[sp],#128
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 
 
@@ -1075,8 +1078,4 @@
 .byte	83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
 .align	2
 .align	2
-#ifndef	__KERNEL__
-.comm	_OPENSSL_armcap_P,4,4
-.private_extern	_OPENSSL_armcap_P
-#endif
 #endif  // !OPENSSL_NO_ASM
diff --git a/deps/boringssl/ios-aarch64/crypto/fipsmodule/vpaes-armv8.S b/deps/boringssl/ios-aarch64/crypto/fipsmodule/vpaes-armv8.S
index 0f5cbea..5985c55 100644
--- a/deps/boringssl/ios-aarch64/crypto/fipsmodule/vpaes-armv8.S
+++ b/deps/boringssl/ios-aarch64/crypto/fipsmodule/vpaes-armv8.S
@@ -12,6 +12,8 @@
 #if defined(BORINGSSL_PREFIX)
 #include <boringssl_prefix_symbols_asm.h>
 #endif
+#include <openssl/arm_arch.h>
+
 .section	__TEXT,__const
 
 
@@ -214,6 +216,7 @@
 
 .align	4
 _vpaes_encrypt:
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 
@@ -223,6 +226,7 @@
 	st1	{v0.16b}, [x1]
 
 	ldp	x29,x30,[sp],#16
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 
 
@@ -451,6 +455,7 @@
 
 .align	4
 _vpaes_decrypt:
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 
@@ -460,6 +465,7 @@
 	st1	{v0.16b}, [x1]
 
 	ldp	x29,x30,[sp],#16
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 
 
@@ -629,6 +635,7 @@
 
 .align	4
 _vpaes_schedule_core:
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29, x30, [sp,#-16]!
 	add	x29,sp,#0
 
@@ -798,6 +805,7 @@
 	eor	v6.16b, v6.16b, v6.16b		// vpxor	%xmm6,	%xmm6,	%xmm6
 	eor	v7.16b, v7.16b, v7.16b		// vpxor	%xmm7,	%xmm7,	%xmm7
 	ldp	x29, x30, [sp],#16
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 
 
@@ -1000,7 +1008,7 @@
 
 Lschedule_mangle_both:
 	tbl	v3.16b, {v3.16b}, v1.16b	// vpshufb	%xmm1,	%xmm3,	%xmm3
-	add	x8, x8, #64-16			// add	$-16,	%r8
+	add	x8, x8, #48			// add	$-16,	%r8
 	and	x8, x8, #~(1<<6)		// and	$0x30,	%r8
 	st1	{v3.2d}, [x2]			// vmovdqu	%xmm3,	(%rdx)
 	ret
@@ -1011,6 +1019,7 @@
 
 .align	4
 _vpaes_set_encrypt_key:
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 	stp	d8,d9,[sp,#-16]!	// ABI spec says so
@@ -1026,6 +1035,7 @@
 
 	ldp	d8,d9,[sp],#16
 	ldp	x29,x30,[sp],#16
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 
 
@@ -1034,6 +1044,7 @@
 
 .align	4
 _vpaes_set_decrypt_key:
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 	stp	d8,d9,[sp,#-16]!	// ABI spec says so
@@ -1053,6 +1064,7 @@
 
 	ldp	d8,d9,[sp],#16
 	ldp	x29,x30,[sp],#16
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 
 .globl	_vpaes_cbc_encrypt
@@ -1060,6 +1072,7 @@
 
 .align	4
 _vpaes_cbc_encrypt:
+	AARCH64_SIGN_LINK_REGISTER
 	cbz	x2, Lcbc_abort
 	cmp	w5, #0			// check direction
 	b.eq	vpaes_cbc_decrypt
@@ -1086,6 +1099,7 @@
 	st1	{v0.16b}, [x4]	// write ivec
 
 	ldp	x29,x30,[sp],#16
+	AARCH64_VALIDATE_LINK_REGISTER
 Lcbc_abort:
 	ret
 
@@ -1093,6 +1107,8 @@
 
 .align	4
 vpaes_cbc_decrypt:
+	// Not adding AARCH64_SIGN_LINK_REGISTER here because vpaes_cbc_decrypt is jumped to
+	// only from vpaes_cbc_encrypt which has already signed the return address.
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 	stp	d8,d9,[sp,#-16]!	// ABI spec says so
@@ -1134,6 +1150,7 @@
 	ldp	d10,d11,[sp],#16
 	ldp	d8,d9,[sp],#16
 	ldp	x29,x30,[sp],#16
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 
 .globl	_vpaes_ctr32_encrypt_blocks
@@ -1141,6 +1158,7 @@
 
 .align	4
 _vpaes_ctr32_encrypt_blocks:
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 	stp	d8,d9,[sp,#-16]!	// ABI spec says so
@@ -1208,6 +1226,7 @@
 	ldp	d10,d11,[sp],#16
 	ldp	d8,d9,[sp],#16
 	ldp	x29,x30,[sp],#16
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 
 #endif  // !OPENSSL_NO_ASM
diff --git a/deps/boringssl/ios-aarch64/crypto/test/trampoline-armv8.S b/deps/boringssl/ios-aarch64/crypto/test/trampoline-armv8.S
index 593ed9b..325da9b 100644
--- a/deps/boringssl/ios-aarch64/crypto/test/trampoline-armv8.S
+++ b/deps/boringssl/ios-aarch64/crypto/test/trampoline-armv8.S
@@ -12,6 +12,8 @@
 #if defined(BORINGSSL_PREFIX)
 #include <boringssl_prefix_symbols_asm.h>
 #endif
+#include <openssl/arm_arch.h>
+
 .text
 
 // abi_test_trampoline loads callee-saved registers from |state|, calls |func|
@@ -25,6 +27,8 @@
 .private_extern	_abi_test_trampoline
 .align	4
 _abi_test_trampoline:
+Labi_test_trampoline_begin:
+	AARCH64_SIGN_LINK_REGISTER
 	// Stack layout (low to high addresses)
 	//   x29,x30 (16 bytes)
 	//    d8-d15 (64 bytes)
@@ -127,6 +131,7 @@
 	ldp	x27, x28, [sp, #144]
 
 	ldp	x29, x30, [sp], #176
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 
 
@@ -134,6 +139,7 @@
 .private_extern	_abi_test_clobber_x0
 .align	4
 _abi_test_clobber_x0:
+	AARCH64_VALID_CALL_TARGET
 	mov	x0, xzr
 	ret
 
@@ -142,6 +148,7 @@
 .private_extern	_abi_test_clobber_x1
 .align	4
 _abi_test_clobber_x1:
+	AARCH64_VALID_CALL_TARGET
 	mov	x1, xzr
 	ret
 
@@ -150,6 +157,7 @@
 .private_extern	_abi_test_clobber_x2
 .align	4
 _abi_test_clobber_x2:
+	AARCH64_VALID_CALL_TARGET
 	mov	x2, xzr
 	ret
 
@@ -158,6 +166,7 @@
 .private_extern	_abi_test_clobber_x3
 .align	4
 _abi_test_clobber_x3:
+	AARCH64_VALID_CALL_TARGET
 	mov	x3, xzr
 	ret
 
@@ -166,6 +175,7 @@
 .private_extern	_abi_test_clobber_x4
 .align	4
 _abi_test_clobber_x4:
+	AARCH64_VALID_CALL_TARGET
 	mov	x4, xzr
 	ret
 
@@ -174,6 +184,7 @@
 .private_extern	_abi_test_clobber_x5
 .align	4
 _abi_test_clobber_x5:
+	AARCH64_VALID_CALL_TARGET
 	mov	x5, xzr
 	ret
 
@@ -182,6 +193,7 @@
 .private_extern	_abi_test_clobber_x6
 .align	4
 _abi_test_clobber_x6:
+	AARCH64_VALID_CALL_TARGET
 	mov	x6, xzr
 	ret
 
@@ -190,6 +202,7 @@
 .private_extern	_abi_test_clobber_x7
 .align	4
 _abi_test_clobber_x7:
+	AARCH64_VALID_CALL_TARGET
 	mov	x7, xzr
 	ret
 
@@ -198,6 +211,7 @@
 .private_extern	_abi_test_clobber_x8
 .align	4
 _abi_test_clobber_x8:
+	AARCH64_VALID_CALL_TARGET
 	mov	x8, xzr
 	ret
 
@@ -206,6 +220,7 @@
 .private_extern	_abi_test_clobber_x9
 .align	4
 _abi_test_clobber_x9:
+	AARCH64_VALID_CALL_TARGET
 	mov	x9, xzr
 	ret
 
@@ -214,6 +229,7 @@
 .private_extern	_abi_test_clobber_x10
 .align	4
 _abi_test_clobber_x10:
+	AARCH64_VALID_CALL_TARGET
 	mov	x10, xzr
 	ret
 
@@ -222,6 +238,7 @@
 .private_extern	_abi_test_clobber_x11
 .align	4
 _abi_test_clobber_x11:
+	AARCH64_VALID_CALL_TARGET
 	mov	x11, xzr
 	ret
 
@@ -230,6 +247,7 @@
 .private_extern	_abi_test_clobber_x12
 .align	4
 _abi_test_clobber_x12:
+	AARCH64_VALID_CALL_TARGET
 	mov	x12, xzr
 	ret
 
@@ -238,6 +256,7 @@
 .private_extern	_abi_test_clobber_x13
 .align	4
 _abi_test_clobber_x13:
+	AARCH64_VALID_CALL_TARGET
 	mov	x13, xzr
 	ret
 
@@ -246,6 +265,7 @@
 .private_extern	_abi_test_clobber_x14
 .align	4
 _abi_test_clobber_x14:
+	AARCH64_VALID_CALL_TARGET
 	mov	x14, xzr
 	ret
 
@@ -254,6 +274,7 @@
 .private_extern	_abi_test_clobber_x15
 .align	4
 _abi_test_clobber_x15:
+	AARCH64_VALID_CALL_TARGET
 	mov	x15, xzr
 	ret
 
@@ -262,6 +283,7 @@
 .private_extern	_abi_test_clobber_x16
 .align	4
 _abi_test_clobber_x16:
+	AARCH64_VALID_CALL_TARGET
 	mov	x16, xzr
 	ret
 
@@ -270,6 +292,7 @@
 .private_extern	_abi_test_clobber_x17
 .align	4
 _abi_test_clobber_x17:
+	AARCH64_VALID_CALL_TARGET
 	mov	x17, xzr
 	ret
 
@@ -278,6 +301,7 @@
 .private_extern	_abi_test_clobber_x19
 .align	4
 _abi_test_clobber_x19:
+	AARCH64_VALID_CALL_TARGET
 	mov	x19, xzr
 	ret
 
@@ -286,6 +310,7 @@
 .private_extern	_abi_test_clobber_x20
 .align	4
 _abi_test_clobber_x20:
+	AARCH64_VALID_CALL_TARGET
 	mov	x20, xzr
 	ret
 
@@ -294,6 +319,7 @@
 .private_extern	_abi_test_clobber_x21
 .align	4
 _abi_test_clobber_x21:
+	AARCH64_VALID_CALL_TARGET
 	mov	x21, xzr
 	ret
 
@@ -302,6 +328,7 @@
 .private_extern	_abi_test_clobber_x22
 .align	4
 _abi_test_clobber_x22:
+	AARCH64_VALID_CALL_TARGET
 	mov	x22, xzr
 	ret
 
@@ -310,6 +337,7 @@
 .private_extern	_abi_test_clobber_x23
 .align	4
 _abi_test_clobber_x23:
+	AARCH64_VALID_CALL_TARGET
 	mov	x23, xzr
 	ret
 
@@ -318,6 +346,7 @@
 .private_extern	_abi_test_clobber_x24
 .align	4
 _abi_test_clobber_x24:
+	AARCH64_VALID_CALL_TARGET
 	mov	x24, xzr
 	ret
 
@@ -326,6 +355,7 @@
 .private_extern	_abi_test_clobber_x25
 .align	4
 _abi_test_clobber_x25:
+	AARCH64_VALID_CALL_TARGET
 	mov	x25, xzr
 	ret
 
@@ -334,6 +364,7 @@
 .private_extern	_abi_test_clobber_x26
 .align	4
 _abi_test_clobber_x26:
+	AARCH64_VALID_CALL_TARGET
 	mov	x26, xzr
 	ret
 
@@ -342,6 +373,7 @@
 .private_extern	_abi_test_clobber_x27
 .align	4
 _abi_test_clobber_x27:
+	AARCH64_VALID_CALL_TARGET
 	mov	x27, xzr
 	ret
 
@@ -350,6 +382,7 @@
 .private_extern	_abi_test_clobber_x28
 .align	4
 _abi_test_clobber_x28:
+	AARCH64_VALID_CALL_TARGET
 	mov	x28, xzr
 	ret
 
@@ -358,6 +391,7 @@
 .private_extern	_abi_test_clobber_x29
 .align	4
 _abi_test_clobber_x29:
+	AARCH64_VALID_CALL_TARGET
 	mov	x29, xzr
 	ret
 
@@ -366,6 +400,7 @@
 .private_extern	_abi_test_clobber_d0
 .align	4
 _abi_test_clobber_d0:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d0, xzr
 	ret
 
@@ -374,6 +409,7 @@
 .private_extern	_abi_test_clobber_d1
 .align	4
 _abi_test_clobber_d1:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d1, xzr
 	ret
 
@@ -382,6 +418,7 @@
 .private_extern	_abi_test_clobber_d2
 .align	4
 _abi_test_clobber_d2:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d2, xzr
 	ret
 
@@ -390,6 +427,7 @@
 .private_extern	_abi_test_clobber_d3
 .align	4
 _abi_test_clobber_d3:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d3, xzr
 	ret
 
@@ -398,6 +436,7 @@
 .private_extern	_abi_test_clobber_d4
 .align	4
 _abi_test_clobber_d4:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d4, xzr
 	ret
 
@@ -406,6 +445,7 @@
 .private_extern	_abi_test_clobber_d5
 .align	4
 _abi_test_clobber_d5:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d5, xzr
 	ret
 
@@ -414,6 +454,7 @@
 .private_extern	_abi_test_clobber_d6
 .align	4
 _abi_test_clobber_d6:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d6, xzr
 	ret
 
@@ -422,6 +463,7 @@
 .private_extern	_abi_test_clobber_d7
 .align	4
 _abi_test_clobber_d7:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d7, xzr
 	ret
 
@@ -430,6 +472,7 @@
 .private_extern	_abi_test_clobber_d8
 .align	4
 _abi_test_clobber_d8:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d8, xzr
 	ret
 
@@ -438,6 +481,7 @@
 .private_extern	_abi_test_clobber_d9
 .align	4
 _abi_test_clobber_d9:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d9, xzr
 	ret
 
@@ -446,6 +490,7 @@
 .private_extern	_abi_test_clobber_d10
 .align	4
 _abi_test_clobber_d10:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d10, xzr
 	ret
 
@@ -454,6 +499,7 @@
 .private_extern	_abi_test_clobber_d11
 .align	4
 _abi_test_clobber_d11:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d11, xzr
 	ret
 
@@ -462,6 +508,7 @@
 .private_extern	_abi_test_clobber_d12
 .align	4
 _abi_test_clobber_d12:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d12, xzr
 	ret
 
@@ -470,6 +517,7 @@
 .private_extern	_abi_test_clobber_d13
 .align	4
 _abi_test_clobber_d13:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d13, xzr
 	ret
 
@@ -478,6 +526,7 @@
 .private_extern	_abi_test_clobber_d14
 .align	4
 _abi_test_clobber_d14:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d14, xzr
 	ret
 
@@ -486,6 +535,7 @@
 .private_extern	_abi_test_clobber_d15
 .align	4
 _abi_test_clobber_d15:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d15, xzr
 	ret
 
@@ -494,6 +544,7 @@
 .private_extern	_abi_test_clobber_d16
 .align	4
 _abi_test_clobber_d16:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d16, xzr
 	ret
 
@@ -502,6 +553,7 @@
 .private_extern	_abi_test_clobber_d17
 .align	4
 _abi_test_clobber_d17:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d17, xzr
 	ret
 
@@ -510,6 +562,7 @@
 .private_extern	_abi_test_clobber_d18
 .align	4
 _abi_test_clobber_d18:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d18, xzr
 	ret
 
@@ -518,6 +571,7 @@
 .private_extern	_abi_test_clobber_d19
 .align	4
 _abi_test_clobber_d19:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d19, xzr
 	ret
 
@@ -526,6 +580,7 @@
 .private_extern	_abi_test_clobber_d20
 .align	4
 _abi_test_clobber_d20:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d20, xzr
 	ret
 
@@ -534,6 +589,7 @@
 .private_extern	_abi_test_clobber_d21
 .align	4
 _abi_test_clobber_d21:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d21, xzr
 	ret
 
@@ -542,6 +598,7 @@
 .private_extern	_abi_test_clobber_d22
 .align	4
 _abi_test_clobber_d22:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d22, xzr
 	ret
 
@@ -550,6 +607,7 @@
 .private_extern	_abi_test_clobber_d23
 .align	4
 _abi_test_clobber_d23:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d23, xzr
 	ret
 
@@ -558,6 +616,7 @@
 .private_extern	_abi_test_clobber_d24
 .align	4
 _abi_test_clobber_d24:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d24, xzr
 	ret
 
@@ -566,6 +625,7 @@
 .private_extern	_abi_test_clobber_d25
 .align	4
 _abi_test_clobber_d25:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d25, xzr
 	ret
 
@@ -574,6 +634,7 @@
 .private_extern	_abi_test_clobber_d26
 .align	4
 _abi_test_clobber_d26:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d26, xzr
 	ret
 
@@ -582,6 +643,7 @@
 .private_extern	_abi_test_clobber_d27
 .align	4
 _abi_test_clobber_d27:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d27, xzr
 	ret
 
@@ -590,6 +652,7 @@
 .private_extern	_abi_test_clobber_d28
 .align	4
 _abi_test_clobber_d28:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d28, xzr
 	ret
 
@@ -598,6 +661,7 @@
 .private_extern	_abi_test_clobber_d29
 .align	4
 _abi_test_clobber_d29:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d29, xzr
 	ret
 
@@ -606,6 +670,7 @@
 .private_extern	_abi_test_clobber_d30
 .align	4
 _abi_test_clobber_d30:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d30, xzr
 	ret
 
@@ -614,6 +679,7 @@
 .private_extern	_abi_test_clobber_d31
 .align	4
 _abi_test_clobber_d31:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d31, xzr
 	ret
 
@@ -622,6 +688,7 @@
 .private_extern	_abi_test_clobber_v8_upper
 .align	4
 _abi_test_clobber_v8_upper:
+	AARCH64_VALID_CALL_TARGET
 	fmov	v8.d[1], xzr
 	ret
 
@@ -630,6 +697,7 @@
 .private_extern	_abi_test_clobber_v9_upper
 .align	4
 _abi_test_clobber_v9_upper:
+	AARCH64_VALID_CALL_TARGET
 	fmov	v9.d[1], xzr
 	ret
 
@@ -638,6 +706,7 @@
 .private_extern	_abi_test_clobber_v10_upper
 .align	4
 _abi_test_clobber_v10_upper:
+	AARCH64_VALID_CALL_TARGET
 	fmov	v10.d[1], xzr
 	ret
 
@@ -646,6 +715,7 @@
 .private_extern	_abi_test_clobber_v11_upper
 .align	4
 _abi_test_clobber_v11_upper:
+	AARCH64_VALID_CALL_TARGET
 	fmov	v11.d[1], xzr
 	ret
 
@@ -654,6 +724,7 @@
 .private_extern	_abi_test_clobber_v12_upper
 .align	4
 _abi_test_clobber_v12_upper:
+	AARCH64_VALID_CALL_TARGET
 	fmov	v12.d[1], xzr
 	ret
 
@@ -662,6 +733,7 @@
 .private_extern	_abi_test_clobber_v13_upper
 .align	4
 _abi_test_clobber_v13_upper:
+	AARCH64_VALID_CALL_TARGET
 	fmov	v13.d[1], xzr
 	ret
 
@@ -670,6 +742,7 @@
 .private_extern	_abi_test_clobber_v14_upper
 .align	4
 _abi_test_clobber_v14_upper:
+	AARCH64_VALID_CALL_TARGET
 	fmov	v14.d[1], xzr
 	ret
 
@@ -678,6 +751,7 @@
 .private_extern	_abi_test_clobber_v15_upper
 .align	4
 _abi_test_clobber_v15_upper:
+	AARCH64_VALID_CALL_TARGET
 	fmov	v15.d[1], xzr
 	ret
 
diff --git a/deps/boringssl/ios-arm/crypto/fipsmodule/aesv8-armx32.S b/deps/boringssl/ios-arm/crypto/fipsmodule/aesv8-armx32.S
index 7392231..87b4b0a 100644
--- a/deps/boringssl/ios-arm/crypto/fipsmodule/aesv8-armx32.S
+++ b/deps/boringssl/ios-arm/crypto/fipsmodule/aesv8-armx32.S
@@ -248,6 +248,7 @@
 #endif
 .align	5
 _aes_hw_encrypt:
+	AARCH64_VALID_CALL_TARGET
 	ldr	r3,[r2,#240]
 	vld1.32	{q0},[r2]!
 	vld1.8	{q2},[r0]
@@ -280,6 +281,7 @@
 #endif
 .align	5
 _aes_hw_decrypt:
+	AARCH64_VALID_CALL_TARGET
 	ldr	r3,[r2,#240]
 	vld1.32	{q0},[r2]!
 	vld1.8	{q2},[r0]
@@ -628,20 +630,34 @@
 	add	r7,r3,#32
 	mov	r6,r5
 	movlo	r12,#0
+
+	@ ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are
+	@ affected by silicon errata #1742098 [0] and #1655431 [1],
+	@ respectively, where the second instruction of an aese/aesmc
+	@ instruction pair may execute twice if an interrupt is taken right
+	@ after the first instruction consumes an input register of which a
+	@ single 32-bit lane has been updated the last time it was modified.
+	@ 
+	@ This function uses a counter in one 32-bit lane. The 
+	@ could write to q1 and q10 directly, but that trips this bugs.
+	@ We write to q6 and copy to the final register as a workaround.
+	@ 
+	@ [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice
+	@ [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice
 #ifndef __ARMEB__
 	rev	r8, r8
 #endif
-	vorr	q1,q0,q0
 	add	r10, r8, #1
-	vorr	q10,q0,q0
-	add	r8, r8, #2
 	vorr	q6,q0,q0
 	rev	r10, r10
-	vmov.32	d3[1],r10
+	vmov.32	d13[1],r10
+	add	r8, r8, #2
+	vorr	q1,q6,q6
 	bls	Lctr32_tail
 	rev	r12, r8
+	vmov.32	d13[1],r12
 	sub	r2,r2,#3		@ bias
-	vmov.32	d21[1],r12
+	vorr	q10,q6,q6
 	b	Loop3x_ctr32
 
 .align	4
@@ -668,11 +684,11 @@
 .byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
 .byte	0x82,0xa3,0xb0,0xf3	@ aesmc q5,q1
 	vld1.8	{q2},[r0]!
-	vorr	q0,q6,q6
+	add	r9,r8,#1
 .byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
 .byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
 	vld1.8	{q3},[r0]!
-	vorr	q1,q6,q6
+	rev	r9,r9
 .byte	0x22,0x83,0xb0,0xf3	@ aese q4,q9
 .byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
 .byte	0x22,0xa3,0xb0,0xf3	@ aese q5,q9
@@ -681,8 +697,6 @@
 	mov	r7,r3
 .byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
 .byte	0xa4,0x23,0xf0,0xf3	@ aesmc q9,q10
-	vorr	q10,q6,q6
-	add	r9,r8,#1
 .byte	0x28,0x83,0xb0,0xf3	@ aese q4,q12
 .byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
 .byte	0x28,0xa3,0xb0,0xf3	@ aese q5,q12
@@ -697,21 +711,26 @@
 .byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
 .byte	0x2a,0xa3,0xb0,0xf3	@ aese q5,q13
 .byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
+	 @ Note the logic to update q0, q1, and q1 is written to work
+	 @ around a bug in ARM Cortex-A57 and Cortex-A72 cores running in
+	 @ 32-bit mode. See the comment above.
 	veor	q11,q11,q7
-	rev	r9,r9
+	vmov.32	d13[1], r9
 .byte	0x2a,0x23,0xf0,0xf3	@ aese q9,q13
 .byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
-	vmov.32	d1[1], r9
+	vorr	q0,q6,q6
 	rev	r10,r10
 .byte	0x2c,0x83,0xb0,0xf3	@ aese q4,q14
 .byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
+	vmov.32	d13[1], r10
+	rev	r12,r8
 .byte	0x2c,0xa3,0xb0,0xf3	@ aese q5,q14
 .byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
-	vmov.32	d3[1], r10
-	rev	r12,r8
+	vorr	q1,q6,q6
+	vmov.32	d13[1], r12
 .byte	0x2c,0x23,0xf0,0xf3	@ aese q9,q14
 .byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
-	vmov.32	d21[1], r12
+	vorr	q10,q6,q6
 	subs	r2,r2,#3
 .byte	0x2e,0x83,0xb0,0xf3	@ aese q4,q15
 .byte	0x2e,0xa3,0xb0,0xf3	@ aese q5,q15
diff --git a/deps/boringssl/ios-arm/crypto/fipsmodule/ghashv8-armx32.S b/deps/boringssl/ios-arm/crypto/fipsmodule/ghashv8-armx32.S
index f5de67f..4a7497f 100644
--- a/deps/boringssl/ios-arm/crypto/fipsmodule/ghashv8-armx32.S
+++ b/deps/boringssl/ios-arm/crypto/fipsmodule/ghashv8-armx32.S
@@ -25,6 +25,7 @@
 #endif
 .align	4
 _gcm_init_v8:
+	AARCH64_VALID_CALL_TARGET
 	vld1.64	{q9},[r1]		@ load input H
 	vmov.i8	q11,#0xe1
 	vshl.i64	q11,q11,#57		@ 0xc2.0
@@ -78,6 +79,7 @@
 #endif
 .align	4
 _gcm_gmult_v8:
+	AARCH64_VALID_CALL_TARGET
 	vld1.64	{q9},[r0]		@ load Xi
 	vmov.i8	q11,#0xe1
 	vld1.64	{q12,q13},[r1]	@ load twisted H, ...
@@ -122,6 +124,7 @@
 #endif
 .align	4
 _gcm_ghash_v8:
+	AARCH64_VALID_CALL_TARGET
 	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}		@ 32-bit ABI says so
 	vld1.64	{q0},[r0]		@ load [rotated] Xi
 						@ "[rotated]" means that
diff --git a/deps/boringssl/linux-aarch64/crypto/chacha/chacha-armv8.S b/deps/boringssl/linux-aarch64/crypto/chacha/chacha-armv8.S
index 49449bf..e7f019c 100644
--- a/deps/boringssl/linux-aarch64/crypto/chacha/chacha-armv8.S
+++ b/deps/boringssl/linux-aarch64/crypto/chacha/chacha-armv8.S
@@ -16,6 +16,7 @@
 #include <openssl/arm_arch.h>
 
 
+.hidden	OPENSSL_armcap_P
 
 .section	.rodata
 
@@ -34,6 +35,7 @@
 .type	ChaCha20_ctr32,%function
 .align	5
 ChaCha20_ctr32:
+	AARCH64_VALID_CALL_TARGET
 	cbz	x2,.Labort
 #if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
 	adrp	x5,:pg_hi21_nc:OPENSSL_armcap_P
@@ -47,6 +49,7 @@
 	b.ne	ChaCha20_neon
 
 .Lshort:
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-96]!
 	add	x29,sp,#0
 
@@ -259,6 +262,7 @@
 	ldp	x25,x26,[x29,#64]
 	ldp	x27,x28,[x29,#80]
 	ldp	x29,x30,[sp],#96
+	AARCH64_VALIDATE_LINK_REGISTER
 .Labort:
 	ret
 
@@ -315,12 +319,14 @@
 	ldp	x25,x26,[x29,#64]
 	ldp	x27,x28,[x29,#80]
 	ldp	x29,x30,[sp],#96
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 .size	ChaCha20_ctr32,.-ChaCha20_ctr32
 
 .type	ChaCha20_neon,%function
 .align	5
 ChaCha20_neon:
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-96]!
 	add	x29,sp,#0
 
@@ -701,6 +707,7 @@
 	ldp	x25,x26,[x29,#64]
 	ldp	x27,x28,[x29,#80]
 	ldp	x29,x30,[sp],#96
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 
 .Ltail_neon:
@@ -810,11 +817,13 @@
 	ldp	x25,x26,[x29,#64]
 	ldp	x27,x28,[x29,#80]
 	ldp	x29,x30,[sp],#96
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 .size	ChaCha20_neon,.-ChaCha20_neon
 .type	ChaCha20_512_neon,%function
 .align	5
 ChaCha20_512_neon:
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-96]!
 	add	x29,sp,#0
 
@@ -1978,6 +1987,7 @@
 	ldp	x25,x26,[x29,#64]
 	ldp	x27,x28,[x29,#80]
 	ldp	x29,x30,[sp],#96
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 .size	ChaCha20_512_neon,.-ChaCha20_512_neon
 #endif
diff --git a/deps/boringssl/linux-aarch64/crypto/fipsmodule/aesv8-armx64.S b/deps/boringssl/linux-aarch64/crypto/fipsmodule/aesv8-armx64.S
index 60c70a2..f8cd03d 100644
--- a/deps/boringssl/linux-aarch64/crypto/fipsmodule/aesv8-armx64.S
+++ b/deps/boringssl/linux-aarch64/crypto/fipsmodule/aesv8-armx64.S
@@ -33,6 +33,8 @@
 .align	5
 aes_hw_set_encrypt_key:
 .Lenc_key:
+	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
+	AARCH64_VALID_CALL_TARGET
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 	mov	x3,#-1
@@ -201,6 +203,7 @@
 .type	aes_hw_set_decrypt_key,%function
 .align	5
 aes_hw_set_decrypt_key:
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 	bl	.Lenc_key
@@ -234,6 +237,7 @@
 	eor	x0,x0,x0		// return value
 .Ldec_key_abort:
 	ldp	x29,x30,[sp],#16
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 .size	aes_hw_set_decrypt_key,.-aes_hw_set_decrypt_key
 .globl	aes_hw_encrypt
@@ -241,6 +245,7 @@
 .type	aes_hw_encrypt,%function
 .align	5
 aes_hw_encrypt:
+	AARCH64_VALID_CALL_TARGET
 	ldr	w3,[x2,#240]
 	ld1	{v0.4s},[x2],#16
 	ld1	{v2.16b},[x0]
@@ -271,6 +276,7 @@
 .type	aes_hw_decrypt,%function
 .align	5
 aes_hw_decrypt:
+	AARCH64_VALID_CALL_TARGET
 	ldr	w3,[x2,#240]
 	ld1	{v0.4s},[x2],#16
 	ld1	{v2.16b},[x0]
@@ -301,6 +307,8 @@
 .type	aes_hw_cbc_encrypt,%function
 .align	5
 aes_hw_cbc_encrypt:
+	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
+	AARCH64_VALID_CALL_TARGET
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 	subs	x2,x2,#16
@@ -592,6 +600,8 @@
 .type	aes_hw_ctr32_encrypt_blocks,%function
 .align	5
 aes_hw_ctr32_encrypt_blocks:
+	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
+	AARCH64_VALID_CALL_TARGET
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 	ldr	w5,[x3,#240]
@@ -611,20 +621,34 @@
 	add	x7,x3,#32
 	mov	w6,w5
 	csel	x12,xzr,x12,lo
+
+	// ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are
+	// affected by silicon errata #1742098 [0] and #1655431 [1],
+	// respectively, where the second instruction of an aese/aesmc
+	// instruction pair may execute twice if an interrupt is taken right
+	// after the first instruction consumes an input register of which a
+	// single 32-bit lane has been updated the last time it was modified.
+	//
+	// This function uses a counter in one 32-bit lane. The vmov lines
+	// could write to v1.16b and v18.16b directly, but that trips this bugs.
+	// We write to v6.16b and copy to the final register as a workaround.
+	//
+	// [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice
+	// [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice
 #ifndef __ARMEB__
 	rev	w8, w8
 #endif
-	orr	v1.16b,v0.16b,v0.16b
 	add	w10, w8, #1
-	orr	v18.16b,v0.16b,v0.16b
-	add	w8, w8, #2
 	orr	v6.16b,v0.16b,v0.16b
 	rev	w10, w10
-	mov	v1.s[3],w10
+	mov	v6.s[3],w10
+	add	w8, w8, #2
+	orr	v1.16b,v6.16b,v6.16b
 	b.ls	.Lctr32_tail
 	rev	w12, w8
+	mov	v6.s[3],w12
 	sub	x2,x2,#3		// bias
-	mov	v18.s[3],w12
+	orr	v18.16b,v6.16b,v6.16b
 	b	.Loop3x_ctr32
 
 .align	4
@@ -651,11 +675,11 @@
 	aese	v1.16b,v16.16b
 	aesmc	v5.16b,v1.16b
 	ld1	{v2.16b},[x0],#16
-	orr	v0.16b,v6.16b,v6.16b
+	add	w9,w8,#1
 	aese	v18.16b,v16.16b
 	aesmc	v18.16b,v18.16b
 	ld1	{v3.16b},[x0],#16
-	orr	v1.16b,v6.16b,v6.16b
+	rev	w9,w9
 	aese	v4.16b,v17.16b
 	aesmc	v4.16b,v4.16b
 	aese	v5.16b,v17.16b
@@ -664,8 +688,6 @@
 	mov	x7,x3
 	aese	v18.16b,v17.16b
 	aesmc	v17.16b,v18.16b
-	orr	v18.16b,v6.16b,v6.16b
-	add	w9,w8,#1
 	aese	v4.16b,v20.16b
 	aesmc	v4.16b,v4.16b
 	aese	v5.16b,v20.16b
@@ -680,21 +702,26 @@
 	aesmc	v4.16b,v4.16b
 	aese	v5.16b,v21.16b
 	aesmc	v5.16b,v5.16b
+	 // Note the logic to update v0.16b, v1.16b, and v1.16b is written to work
+	 // around a bug in ARM Cortex-A57 and Cortex-A72 cores running in
+	 // 32-bit mode. See the comment above.
 	eor	v19.16b,v19.16b,v7.16b
-	rev	w9,w9
+	mov	v6.s[3], w9
 	aese	v17.16b,v21.16b
 	aesmc	v17.16b,v17.16b
-	mov	v0.s[3], w9
+	orr	v0.16b,v6.16b,v6.16b
 	rev	w10,w10
 	aese	v4.16b,v22.16b
 	aesmc	v4.16b,v4.16b
+	mov	v6.s[3], w10
+	rev	w12,w8
 	aese	v5.16b,v22.16b
 	aesmc	v5.16b,v5.16b
-	mov	v1.s[3], w10
-	rev	w12,w8
+	orr	v1.16b,v6.16b,v6.16b
+	mov	v6.s[3], w12
 	aese	v17.16b,v22.16b
 	aesmc	v17.16b,v17.16b
-	mov	v18.s[3], w12
+	orr	v18.16b,v6.16b,v6.16b
 	subs	x2,x2,#3
 	aese	v4.16b,v23.16b
 	aese	v5.16b,v23.16b
diff --git a/deps/boringssl/linux-aarch64/crypto/fipsmodule/armv8-mont.S b/deps/boringssl/linux-aarch64/crypto/fipsmodule/armv8-mont.S
index 360bf4c..db89859 100644
--- a/deps/boringssl/linux-aarch64/crypto/fipsmodule/armv8-mont.S
+++ b/deps/boringssl/linux-aarch64/crypto/fipsmodule/armv8-mont.S
@@ -13,6 +13,8 @@
 #if defined(BORINGSSL_PREFIX)
 #include <boringssl_prefix_symbols_asm.h>
 #endif
+#include <openssl/arm_arch.h>
+
 .text
 
 .globl	bn_mul_mont
@@ -20,6 +22,7 @@
 .type	bn_mul_mont,%function
 .align	5
 bn_mul_mont:
+	AARCH64_SIGN_LINK_REGISTER
 	tst	x5,#7
 	b.eq	__bn_sqr8x_mont
 	tst	x5,#3
@@ -217,11 +220,14 @@
 	mov	x0,#1
 	ldp	x23,x24,[x29,#48]
 	ldr	x29,[sp],#64
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 .size	bn_mul_mont,.-bn_mul_mont
 .type	__bn_sqr8x_mont,%function
 .align	5
 __bn_sqr8x_mont:
+	// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_sqr8x_mont is jumped to
+	// only from bn_mul_mont which has already signed the return address.
 	cmp	x1,x2
 	b.ne	__bn_mul4x_mont
 .Lsqr8x_mont:
@@ -975,11 +981,16 @@
 	ldp	x25,x26,[x29,#64]
 	ldp	x27,x28,[x29,#80]
 	ldr	x29,[sp],#128
+	// x30 is popped earlier
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 .size	__bn_sqr8x_mont,.-__bn_sqr8x_mont
 .type	__bn_mul4x_mont,%function
 .align	5
 __bn_mul4x_mont:
+	// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_mul4x_mont is jumped to
+	// only from bn_mul_mont or __bn_mul8x_mont which have already signed the
+	// return address.
 	stp	x29,x30,[sp,#-128]!
 	add	x29,sp,#0
 	stp	x19,x20,[sp,#16]
@@ -1413,6 +1424,8 @@
 	ldp	x25,x26,[x29,#64]
 	ldp	x27,x28,[x29,#80]
 	ldr	x29,[sp],#128
+	// x30 is popped earlier
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 .size	__bn_mul4x_mont,.-__bn_mul4x_mont
 .byte	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
diff --git a/deps/boringssl/linux-aarch64/crypto/fipsmodule/ghash-neon-armv8.S b/deps/boringssl/linux-aarch64/crypto/fipsmodule/ghash-neon-armv8.S
index f876db3..098967b 100644
--- a/deps/boringssl/linux-aarch64/crypto/fipsmodule/ghash-neon-armv8.S
+++ b/deps/boringssl/linux-aarch64/crypto/fipsmodule/ghash-neon-armv8.S
@@ -13,6 +13,8 @@
 #if defined(BORINGSSL_PREFIX)
 #include <boringssl_prefix_symbols_asm.h>
 #endif
+#include <openssl/arm_arch.h>
+
 .text
 
 .globl	gcm_init_neon
@@ -20,6 +22,7 @@
 .type	gcm_init_neon,%function
 .align	4
 gcm_init_neon:
+	AARCH64_VALID_CALL_TARGET
 	// This function is adapted from gcm_init_v8. xC2 is t3.
 	ld1	{v17.2d}, [x1]			// load H
 	movi	v19.16b, #0xe1
@@ -45,6 +48,7 @@
 .type	gcm_gmult_neon,%function
 .align	4
 gcm_gmult_neon:
+	AARCH64_VALID_CALL_TARGET
 	ld1	{v3.16b}, [x0]		// load Xi
 	ld1	{v5.1d}, [x1], #8		// load twisted H
 	ld1	{v6.1d}, [x1]
@@ -64,6 +68,7 @@
 .type	gcm_ghash_neon,%function
 .align	4
 gcm_ghash_neon:
+	AARCH64_VALID_CALL_TARGET
 	ld1	{v0.16b}, [x0]		// load Xi
 	ld1	{v5.1d}, [x1], #8		// load twisted H
 	ld1	{v6.1d}, [x1]
diff --git a/deps/boringssl/linux-aarch64/crypto/fipsmodule/ghashv8-armx64.S b/deps/boringssl/linux-aarch64/crypto/fipsmodule/ghashv8-armx64.S
index 37d9731..62e5884 100644
--- a/deps/boringssl/linux-aarch64/crypto/fipsmodule/ghashv8-armx64.S
+++ b/deps/boringssl/linux-aarch64/crypto/fipsmodule/ghashv8-armx64.S
@@ -22,6 +22,7 @@
 .type	gcm_init_v8,%function
 .align	4
 gcm_init_v8:
+	AARCH64_VALID_CALL_TARGET
 	ld1	{v17.2d},[x1]		//load input H
 	movi	v19.16b,#0xe1
 	shl	v19.2d,v19.2d,#57		//0xc2.0
@@ -73,6 +74,7 @@
 .type	gcm_gmult_v8,%function
 .align	4
 gcm_gmult_v8:
+	AARCH64_VALID_CALL_TARGET
 	ld1	{v17.2d},[x0]		//load Xi
 	movi	v19.16b,#0xe1
 	ld1	{v20.2d,v21.2d},[x1]	//load twisted H, ...
@@ -115,6 +117,7 @@
 .type	gcm_ghash_v8,%function
 .align	4
 gcm_ghash_v8:
+	AARCH64_VALID_CALL_TARGET
 	ld1	{v0.2d},[x0]		//load [rotated] Xi
 						//"[rotated]" means that
 						//loaded value would have
diff --git a/deps/boringssl/linux-aarch64/crypto/fipsmodule/sha1-armv8.S b/deps/boringssl/linux-aarch64/crypto/fipsmodule/sha1-armv8.S
index f681b99..838ec32 100644
--- a/deps/boringssl/linux-aarch64/crypto/fipsmodule/sha1-armv8.S
+++ b/deps/boringssl/linux-aarch64/crypto/fipsmodule/sha1-armv8.S
@@ -18,11 +18,14 @@
 .text
 
 
+.hidden	OPENSSL_armcap_P
 .globl	sha1_block_data_order
 .hidden	sha1_block_data_order
 .type	sha1_block_data_order,%function
 .align	6
 sha1_block_data_order:
+	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
+	AARCH64_VALID_CALL_TARGET
 #if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
 	adrp	x16,:pg_hi21_nc:OPENSSL_armcap_P
 #else
@@ -1090,6 +1093,8 @@
 .type	sha1_block_armv8,%function
 .align	6
 sha1_block_armv8:
+	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
+	AARCH64_VALID_CALL_TARGET
 .Lv8_entry:
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
@@ -1228,8 +1233,6 @@
 .byte	83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
 .align	2
 .align	2
-.comm	OPENSSL_armcap_P,4,4
-.hidden	OPENSSL_armcap_P
 #endif
 #endif  // !OPENSSL_NO_ASM
 .section	.note.GNU-stack,"",%progbits
diff --git a/deps/boringssl/linux-aarch64/crypto/fipsmodule/sha256-armv8.S b/deps/boringssl/linux-aarch64/crypto/fipsmodule/sha256-armv8.S
index 6e09f69..a4f170e 100644
--- a/deps/boringssl/linux-aarch64/crypto/fipsmodule/sha256-armv8.S
+++ b/deps/boringssl/linux-aarch64/crypto/fipsmodule/sha256-armv8.S
@@ -59,11 +59,13 @@
 .text
 
 
+.hidden	OPENSSL_armcap_P
 .globl	sha256_block_data_order
 .hidden	sha256_block_data_order
 .type	sha256_block_data_order,%function
 .align	6
 sha256_block_data_order:
+	AARCH64_VALID_CALL_TARGET
 #ifndef	__KERNEL__
 #if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
 	adrp	x16,:pg_hi21_nc:OPENSSL_armcap_P
@@ -74,6 +76,7 @@
 	tst	w16,#ARMV8_SHA256
 	b.ne	.Lv8_entry
 #endif
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-128]!
 	add	x29,sp,#0
 
@@ -1034,6 +1037,7 @@
 	ldp	x25,x26,[x29,#64]
 	ldp	x27,x28,[x29,#80]
 	ldp	x29,x30,[sp],#128
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 .size	sha256_block_data_order,.-sha256_block_data_order
 
@@ -1068,6 +1072,7 @@
 .align	6
 sha256_block_armv8:
 .Lv8_entry:
+	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 
@@ -1204,10 +1209,6 @@
 	ret
 .size	sha256_block_armv8,.-sha256_block_armv8
 #endif
-#ifndef	__KERNEL__
-.comm	OPENSSL_armcap_P,4,4
-.hidden	OPENSSL_armcap_P
-#endif
 #endif
 #endif  // !OPENSSL_NO_ASM
 .section	.note.GNU-stack,"",%progbits
diff --git a/deps/boringssl/linux-aarch64/crypto/fipsmodule/sha512-armv8.S b/deps/boringssl/linux-aarch64/crypto/fipsmodule/sha512-armv8.S
index 7b9b22a..98b7a7e 100644
--- a/deps/boringssl/linux-aarch64/crypto/fipsmodule/sha512-armv8.S
+++ b/deps/boringssl/linux-aarch64/crypto/fipsmodule/sha512-armv8.S
@@ -59,11 +59,13 @@
 .text
 
 
+.hidden	OPENSSL_armcap_P
 .globl	sha512_block_data_order
 .hidden	sha512_block_data_order
 .type	sha512_block_data_order,%function
 .align	6
 sha512_block_data_order:
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-128]!
 	add	x29,sp,#0
 
@@ -1024,6 +1026,7 @@
 	ldp	x25,x26,[x29,#64]
 	ldp	x27,x28,[x29,#80]
 	ldp	x29,x30,[sp],#128
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 .size	sha512_block_data_order,.-sha512_block_data_order
 
@@ -1076,10 +1079,6 @@
 .byte	83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
 .align	2
 .align	2
-#ifndef	__KERNEL__
-.comm	OPENSSL_armcap_P,4,4
-.hidden	OPENSSL_armcap_P
-#endif
 #endif
 #endif  // !OPENSSL_NO_ASM
 .section	.note.GNU-stack,"",%progbits
diff --git a/deps/boringssl/linux-aarch64/crypto/fipsmodule/vpaes-armv8.S b/deps/boringssl/linux-aarch64/crypto/fipsmodule/vpaes-armv8.S
index f57b7b5..c02be40 100644
--- a/deps/boringssl/linux-aarch64/crypto/fipsmodule/vpaes-armv8.S
+++ b/deps/boringssl/linux-aarch64/crypto/fipsmodule/vpaes-armv8.S
@@ -13,6 +13,8 @@
 #if defined(BORINGSSL_PREFIX)
 #include <boringssl_prefix_symbols_asm.h>
 #endif
+#include <openssl/arm_arch.h>
+
 .section	.rodata
 
 .type	_vpaes_consts,%object
@@ -215,6 +217,7 @@
 .type	vpaes_encrypt,%function
 .align	4
 vpaes_encrypt:
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 
@@ -224,6 +227,7 @@
 	st1	{v0.16b}, [x1]
 
 	ldp	x29,x30,[sp],#16
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 .size	vpaes_encrypt,.-vpaes_encrypt
 
@@ -452,6 +456,7 @@
 .type	vpaes_decrypt,%function
 .align	4
 vpaes_decrypt:
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 
@@ -461,6 +466,7 @@
 	st1	{v0.16b}, [x1]
 
 	ldp	x29,x30,[sp],#16
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 .size	vpaes_decrypt,.-vpaes_decrypt
 
@@ -630,6 +636,7 @@
 .type	_vpaes_schedule_core,%function
 .align	4
 _vpaes_schedule_core:
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29, x30, [sp,#-16]!
 	add	x29,sp,#0
 
@@ -799,6 +806,7 @@
 	eor	v6.16b, v6.16b, v6.16b		// vpxor	%xmm6,	%xmm6,	%xmm6
 	eor	v7.16b, v7.16b, v7.16b		// vpxor	%xmm7,	%xmm7,	%xmm7
 	ldp	x29, x30, [sp],#16
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 .size	_vpaes_schedule_core,.-_vpaes_schedule_core
 
@@ -1001,7 +1009,7 @@
 
 .Lschedule_mangle_both:
 	tbl	v3.16b, {v3.16b}, v1.16b	// vpshufb	%xmm1,	%xmm3,	%xmm3
-	add	x8, x8, #64-16			// add	$-16,	%r8
+	add	x8, x8, #48			// add	$-16,	%r8
 	and	x8, x8, #~(1<<6)		// and	$0x30,	%r8
 	st1	{v3.2d}, [x2]			// vmovdqu	%xmm3,	(%rdx)
 	ret
@@ -1012,6 +1020,7 @@
 .type	vpaes_set_encrypt_key,%function
 .align	4
 vpaes_set_encrypt_key:
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 	stp	d8,d9,[sp,#-16]!	// ABI spec says so
@@ -1027,6 +1036,7 @@
 
 	ldp	d8,d9,[sp],#16
 	ldp	x29,x30,[sp],#16
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 .size	vpaes_set_encrypt_key,.-vpaes_set_encrypt_key
 
@@ -1035,6 +1045,7 @@
 .type	vpaes_set_decrypt_key,%function
 .align	4
 vpaes_set_decrypt_key:
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 	stp	d8,d9,[sp,#-16]!	// ABI spec says so
@@ -1054,6 +1065,7 @@
 
 	ldp	d8,d9,[sp],#16
 	ldp	x29,x30,[sp],#16
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 .size	vpaes_set_decrypt_key,.-vpaes_set_decrypt_key
 .globl	vpaes_cbc_encrypt
@@ -1061,6 +1073,7 @@
 .type	vpaes_cbc_encrypt,%function
 .align	4
 vpaes_cbc_encrypt:
+	AARCH64_SIGN_LINK_REGISTER
 	cbz	x2, .Lcbc_abort
 	cmp	w5, #0			// check direction
 	b.eq	vpaes_cbc_decrypt
@@ -1087,6 +1100,7 @@
 	st1	{v0.16b}, [x4]	// write ivec
 
 	ldp	x29,x30,[sp],#16
+	AARCH64_VALIDATE_LINK_REGISTER
 .Lcbc_abort:
 	ret
 .size	vpaes_cbc_encrypt,.-vpaes_cbc_encrypt
@@ -1094,6 +1108,8 @@
 .type	vpaes_cbc_decrypt,%function
 .align	4
 vpaes_cbc_decrypt:
+	// Not adding AARCH64_SIGN_LINK_REGISTER here because vpaes_cbc_decrypt is jumped to
+	// only from vpaes_cbc_encrypt which has already signed the return address.
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 	stp	d8,d9,[sp,#-16]!	// ABI spec says so
@@ -1135,6 +1151,7 @@
 	ldp	d10,d11,[sp],#16
 	ldp	d8,d9,[sp],#16
 	ldp	x29,x30,[sp],#16
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 .size	vpaes_cbc_decrypt,.-vpaes_cbc_decrypt
 .globl	vpaes_ctr32_encrypt_blocks
@@ -1142,6 +1159,7 @@
 .type	vpaes_ctr32_encrypt_blocks,%function
 .align	4
 vpaes_ctr32_encrypt_blocks:
+	AARCH64_SIGN_LINK_REGISTER
 	stp	x29,x30,[sp,#-16]!
 	add	x29,sp,#0
 	stp	d8,d9,[sp,#-16]!	// ABI spec says so
@@ -1209,6 +1227,7 @@
 	ldp	d10,d11,[sp],#16
 	ldp	d8,d9,[sp],#16
 	ldp	x29,x30,[sp],#16
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 .size	vpaes_ctr32_encrypt_blocks,.-vpaes_ctr32_encrypt_blocks
 #endif
diff --git a/deps/boringssl/linux-aarch64/crypto/test/trampoline-armv8.S b/deps/boringssl/linux-aarch64/crypto/test/trampoline-armv8.S
index f7bbfc2..8928d7f 100644
--- a/deps/boringssl/linux-aarch64/crypto/test/trampoline-armv8.S
+++ b/deps/boringssl/linux-aarch64/crypto/test/trampoline-armv8.S
@@ -13,6 +13,8 @@
 #if defined(BORINGSSL_PREFIX)
 #include <boringssl_prefix_symbols_asm.h>
 #endif
+#include <openssl/arm_arch.h>
+
 .text
 
 // abi_test_trampoline loads callee-saved registers from |state|, calls |func|
@@ -26,6 +28,8 @@
 .hidden	abi_test_trampoline
 .align	4
 abi_test_trampoline:
+.Labi_test_trampoline_begin:
+	AARCH64_SIGN_LINK_REGISTER
 	// Stack layout (low to high addresses)
 	//   x29,x30 (16 bytes)
 	//    d8-d15 (64 bytes)
@@ -128,6 +132,7 @@
 	ldp	x27, x28, [sp, #144]
 
 	ldp	x29, x30, [sp], #176
+	AARCH64_VALIDATE_LINK_REGISTER
 	ret
 .size	abi_test_trampoline,.-abi_test_trampoline
 .type	abi_test_clobber_x0, %function
@@ -135,6 +140,7 @@
 .hidden	abi_test_clobber_x0
 .align	4
 abi_test_clobber_x0:
+	AARCH64_VALID_CALL_TARGET
 	mov	x0, xzr
 	ret
 .size	abi_test_clobber_x0,.-abi_test_clobber_x0
@@ -143,6 +149,7 @@
 .hidden	abi_test_clobber_x1
 .align	4
 abi_test_clobber_x1:
+	AARCH64_VALID_CALL_TARGET
 	mov	x1, xzr
 	ret
 .size	abi_test_clobber_x1,.-abi_test_clobber_x1
@@ -151,6 +158,7 @@
 .hidden	abi_test_clobber_x2
 .align	4
 abi_test_clobber_x2:
+	AARCH64_VALID_CALL_TARGET
 	mov	x2, xzr
 	ret
 .size	abi_test_clobber_x2,.-abi_test_clobber_x2
@@ -159,6 +167,7 @@
 .hidden	abi_test_clobber_x3
 .align	4
 abi_test_clobber_x3:
+	AARCH64_VALID_CALL_TARGET
 	mov	x3, xzr
 	ret
 .size	abi_test_clobber_x3,.-abi_test_clobber_x3
@@ -167,6 +176,7 @@
 .hidden	abi_test_clobber_x4
 .align	4
 abi_test_clobber_x4:
+	AARCH64_VALID_CALL_TARGET
 	mov	x4, xzr
 	ret
 .size	abi_test_clobber_x4,.-abi_test_clobber_x4
@@ -175,6 +185,7 @@
 .hidden	abi_test_clobber_x5
 .align	4
 abi_test_clobber_x5:
+	AARCH64_VALID_CALL_TARGET
 	mov	x5, xzr
 	ret
 .size	abi_test_clobber_x5,.-abi_test_clobber_x5
@@ -183,6 +194,7 @@
 .hidden	abi_test_clobber_x6
 .align	4
 abi_test_clobber_x6:
+	AARCH64_VALID_CALL_TARGET
 	mov	x6, xzr
 	ret
 .size	abi_test_clobber_x6,.-abi_test_clobber_x6
@@ -191,6 +203,7 @@
 .hidden	abi_test_clobber_x7
 .align	4
 abi_test_clobber_x7:
+	AARCH64_VALID_CALL_TARGET
 	mov	x7, xzr
 	ret
 .size	abi_test_clobber_x7,.-abi_test_clobber_x7
@@ -199,6 +212,7 @@
 .hidden	abi_test_clobber_x8
 .align	4
 abi_test_clobber_x8:
+	AARCH64_VALID_CALL_TARGET
 	mov	x8, xzr
 	ret
 .size	abi_test_clobber_x8,.-abi_test_clobber_x8
@@ -207,6 +221,7 @@
 .hidden	abi_test_clobber_x9
 .align	4
 abi_test_clobber_x9:
+	AARCH64_VALID_CALL_TARGET
 	mov	x9, xzr
 	ret
 .size	abi_test_clobber_x9,.-abi_test_clobber_x9
@@ -215,6 +230,7 @@
 .hidden	abi_test_clobber_x10
 .align	4
 abi_test_clobber_x10:
+	AARCH64_VALID_CALL_TARGET
 	mov	x10, xzr
 	ret
 .size	abi_test_clobber_x10,.-abi_test_clobber_x10
@@ -223,6 +239,7 @@
 .hidden	abi_test_clobber_x11
 .align	4
 abi_test_clobber_x11:
+	AARCH64_VALID_CALL_TARGET
 	mov	x11, xzr
 	ret
 .size	abi_test_clobber_x11,.-abi_test_clobber_x11
@@ -231,6 +248,7 @@
 .hidden	abi_test_clobber_x12
 .align	4
 abi_test_clobber_x12:
+	AARCH64_VALID_CALL_TARGET
 	mov	x12, xzr
 	ret
 .size	abi_test_clobber_x12,.-abi_test_clobber_x12
@@ -239,6 +257,7 @@
 .hidden	abi_test_clobber_x13
 .align	4
 abi_test_clobber_x13:
+	AARCH64_VALID_CALL_TARGET
 	mov	x13, xzr
 	ret
 .size	abi_test_clobber_x13,.-abi_test_clobber_x13
@@ -247,6 +266,7 @@
 .hidden	abi_test_clobber_x14
 .align	4
 abi_test_clobber_x14:
+	AARCH64_VALID_CALL_TARGET
 	mov	x14, xzr
 	ret
 .size	abi_test_clobber_x14,.-abi_test_clobber_x14
@@ -255,6 +275,7 @@
 .hidden	abi_test_clobber_x15
 .align	4
 abi_test_clobber_x15:
+	AARCH64_VALID_CALL_TARGET
 	mov	x15, xzr
 	ret
 .size	abi_test_clobber_x15,.-abi_test_clobber_x15
@@ -263,6 +284,7 @@
 .hidden	abi_test_clobber_x16
 .align	4
 abi_test_clobber_x16:
+	AARCH64_VALID_CALL_TARGET
 	mov	x16, xzr
 	ret
 .size	abi_test_clobber_x16,.-abi_test_clobber_x16
@@ -271,6 +293,7 @@
 .hidden	abi_test_clobber_x17
 .align	4
 abi_test_clobber_x17:
+	AARCH64_VALID_CALL_TARGET
 	mov	x17, xzr
 	ret
 .size	abi_test_clobber_x17,.-abi_test_clobber_x17
@@ -279,6 +302,7 @@
 .hidden	abi_test_clobber_x19
 .align	4
 abi_test_clobber_x19:
+	AARCH64_VALID_CALL_TARGET
 	mov	x19, xzr
 	ret
 .size	abi_test_clobber_x19,.-abi_test_clobber_x19
@@ -287,6 +311,7 @@
 .hidden	abi_test_clobber_x20
 .align	4
 abi_test_clobber_x20:
+	AARCH64_VALID_CALL_TARGET
 	mov	x20, xzr
 	ret
 .size	abi_test_clobber_x20,.-abi_test_clobber_x20
@@ -295,6 +320,7 @@
 .hidden	abi_test_clobber_x21
 .align	4
 abi_test_clobber_x21:
+	AARCH64_VALID_CALL_TARGET
 	mov	x21, xzr
 	ret
 .size	abi_test_clobber_x21,.-abi_test_clobber_x21
@@ -303,6 +329,7 @@
 .hidden	abi_test_clobber_x22
 .align	4
 abi_test_clobber_x22:
+	AARCH64_VALID_CALL_TARGET
 	mov	x22, xzr
 	ret
 .size	abi_test_clobber_x22,.-abi_test_clobber_x22
@@ -311,6 +338,7 @@
 .hidden	abi_test_clobber_x23
 .align	4
 abi_test_clobber_x23:
+	AARCH64_VALID_CALL_TARGET
 	mov	x23, xzr
 	ret
 .size	abi_test_clobber_x23,.-abi_test_clobber_x23
@@ -319,6 +347,7 @@
 .hidden	abi_test_clobber_x24
 .align	4
 abi_test_clobber_x24:
+	AARCH64_VALID_CALL_TARGET
 	mov	x24, xzr
 	ret
 .size	abi_test_clobber_x24,.-abi_test_clobber_x24
@@ -327,6 +356,7 @@
 .hidden	abi_test_clobber_x25
 .align	4
 abi_test_clobber_x25:
+	AARCH64_VALID_CALL_TARGET
 	mov	x25, xzr
 	ret
 .size	abi_test_clobber_x25,.-abi_test_clobber_x25
@@ -335,6 +365,7 @@
 .hidden	abi_test_clobber_x26
 .align	4
 abi_test_clobber_x26:
+	AARCH64_VALID_CALL_TARGET
 	mov	x26, xzr
 	ret
 .size	abi_test_clobber_x26,.-abi_test_clobber_x26
@@ -343,6 +374,7 @@
 .hidden	abi_test_clobber_x27
 .align	4
 abi_test_clobber_x27:
+	AARCH64_VALID_CALL_TARGET
 	mov	x27, xzr
 	ret
 .size	abi_test_clobber_x27,.-abi_test_clobber_x27
@@ -351,6 +383,7 @@
 .hidden	abi_test_clobber_x28
 .align	4
 abi_test_clobber_x28:
+	AARCH64_VALID_CALL_TARGET
 	mov	x28, xzr
 	ret
 .size	abi_test_clobber_x28,.-abi_test_clobber_x28
@@ -359,6 +392,7 @@
 .hidden	abi_test_clobber_x29
 .align	4
 abi_test_clobber_x29:
+	AARCH64_VALID_CALL_TARGET
 	mov	x29, xzr
 	ret
 .size	abi_test_clobber_x29,.-abi_test_clobber_x29
@@ -367,6 +401,7 @@
 .hidden	abi_test_clobber_d0
 .align	4
 abi_test_clobber_d0:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d0, xzr
 	ret
 .size	abi_test_clobber_d0,.-abi_test_clobber_d0
@@ -375,6 +410,7 @@
 .hidden	abi_test_clobber_d1
 .align	4
 abi_test_clobber_d1:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d1, xzr
 	ret
 .size	abi_test_clobber_d1,.-abi_test_clobber_d1
@@ -383,6 +419,7 @@
 .hidden	abi_test_clobber_d2
 .align	4
 abi_test_clobber_d2:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d2, xzr
 	ret
 .size	abi_test_clobber_d2,.-abi_test_clobber_d2
@@ -391,6 +428,7 @@
 .hidden	abi_test_clobber_d3
 .align	4
 abi_test_clobber_d3:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d3, xzr
 	ret
 .size	abi_test_clobber_d3,.-abi_test_clobber_d3
@@ -399,6 +437,7 @@
 .hidden	abi_test_clobber_d4
 .align	4
 abi_test_clobber_d4:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d4, xzr
 	ret
 .size	abi_test_clobber_d4,.-abi_test_clobber_d4
@@ -407,6 +446,7 @@
 .hidden	abi_test_clobber_d5
 .align	4
 abi_test_clobber_d5:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d5, xzr
 	ret
 .size	abi_test_clobber_d5,.-abi_test_clobber_d5
@@ -415,6 +455,7 @@
 .hidden	abi_test_clobber_d6
 .align	4
 abi_test_clobber_d6:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d6, xzr
 	ret
 .size	abi_test_clobber_d6,.-abi_test_clobber_d6
@@ -423,6 +464,7 @@
 .hidden	abi_test_clobber_d7
 .align	4
 abi_test_clobber_d7:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d7, xzr
 	ret
 .size	abi_test_clobber_d7,.-abi_test_clobber_d7
@@ -431,6 +473,7 @@
 .hidden	abi_test_clobber_d8
 .align	4
 abi_test_clobber_d8:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d8, xzr
 	ret
 .size	abi_test_clobber_d8,.-abi_test_clobber_d8
@@ -439,6 +482,7 @@
 .hidden	abi_test_clobber_d9
 .align	4
 abi_test_clobber_d9:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d9, xzr
 	ret
 .size	abi_test_clobber_d9,.-abi_test_clobber_d9
@@ -447,6 +491,7 @@
 .hidden	abi_test_clobber_d10
 .align	4
 abi_test_clobber_d10:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d10, xzr
 	ret
 .size	abi_test_clobber_d10,.-abi_test_clobber_d10
@@ -455,6 +500,7 @@
 .hidden	abi_test_clobber_d11
 .align	4
 abi_test_clobber_d11:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d11, xzr
 	ret
 .size	abi_test_clobber_d11,.-abi_test_clobber_d11
@@ -463,6 +509,7 @@
 .hidden	abi_test_clobber_d12
 .align	4
 abi_test_clobber_d12:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d12, xzr
 	ret
 .size	abi_test_clobber_d12,.-abi_test_clobber_d12
@@ -471,6 +518,7 @@
 .hidden	abi_test_clobber_d13
 .align	4
 abi_test_clobber_d13:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d13, xzr
 	ret
 .size	abi_test_clobber_d13,.-abi_test_clobber_d13
@@ -479,6 +527,7 @@
 .hidden	abi_test_clobber_d14
 .align	4
 abi_test_clobber_d14:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d14, xzr
 	ret
 .size	abi_test_clobber_d14,.-abi_test_clobber_d14
@@ -487,6 +536,7 @@
 .hidden	abi_test_clobber_d15
 .align	4
 abi_test_clobber_d15:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d15, xzr
 	ret
 .size	abi_test_clobber_d15,.-abi_test_clobber_d15
@@ -495,6 +545,7 @@
 .hidden	abi_test_clobber_d16
 .align	4
 abi_test_clobber_d16:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d16, xzr
 	ret
 .size	abi_test_clobber_d16,.-abi_test_clobber_d16
@@ -503,6 +554,7 @@
 .hidden	abi_test_clobber_d17
 .align	4
 abi_test_clobber_d17:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d17, xzr
 	ret
 .size	abi_test_clobber_d17,.-abi_test_clobber_d17
@@ -511,6 +563,7 @@
 .hidden	abi_test_clobber_d18
 .align	4
 abi_test_clobber_d18:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d18, xzr
 	ret
 .size	abi_test_clobber_d18,.-abi_test_clobber_d18
@@ -519,6 +572,7 @@
 .hidden	abi_test_clobber_d19
 .align	4
 abi_test_clobber_d19:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d19, xzr
 	ret
 .size	abi_test_clobber_d19,.-abi_test_clobber_d19
@@ -527,6 +581,7 @@
 .hidden	abi_test_clobber_d20
 .align	4
 abi_test_clobber_d20:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d20, xzr
 	ret
 .size	abi_test_clobber_d20,.-abi_test_clobber_d20
@@ -535,6 +590,7 @@
 .hidden	abi_test_clobber_d21
 .align	4
 abi_test_clobber_d21:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d21, xzr
 	ret
 .size	abi_test_clobber_d21,.-abi_test_clobber_d21
@@ -543,6 +599,7 @@
 .hidden	abi_test_clobber_d22
 .align	4
 abi_test_clobber_d22:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d22, xzr
 	ret
 .size	abi_test_clobber_d22,.-abi_test_clobber_d22
@@ -551,6 +608,7 @@
 .hidden	abi_test_clobber_d23
 .align	4
 abi_test_clobber_d23:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d23, xzr
 	ret
 .size	abi_test_clobber_d23,.-abi_test_clobber_d23
@@ -559,6 +617,7 @@
 .hidden	abi_test_clobber_d24
 .align	4
 abi_test_clobber_d24:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d24, xzr
 	ret
 .size	abi_test_clobber_d24,.-abi_test_clobber_d24
@@ -567,6 +626,7 @@
 .hidden	abi_test_clobber_d25
 .align	4
 abi_test_clobber_d25:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d25, xzr
 	ret
 .size	abi_test_clobber_d25,.-abi_test_clobber_d25
@@ -575,6 +635,7 @@
 .hidden	abi_test_clobber_d26
 .align	4
 abi_test_clobber_d26:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d26, xzr
 	ret
 .size	abi_test_clobber_d26,.-abi_test_clobber_d26
@@ -583,6 +644,7 @@
 .hidden	abi_test_clobber_d27
 .align	4
 abi_test_clobber_d27:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d27, xzr
 	ret
 .size	abi_test_clobber_d27,.-abi_test_clobber_d27
@@ -591,6 +653,7 @@
 .hidden	abi_test_clobber_d28
 .align	4
 abi_test_clobber_d28:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d28, xzr
 	ret
 .size	abi_test_clobber_d28,.-abi_test_clobber_d28
@@ -599,6 +662,7 @@
 .hidden	abi_test_clobber_d29
 .align	4
 abi_test_clobber_d29:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d29, xzr
 	ret
 .size	abi_test_clobber_d29,.-abi_test_clobber_d29
@@ -607,6 +671,7 @@
 .hidden	abi_test_clobber_d30
 .align	4
 abi_test_clobber_d30:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d30, xzr
 	ret
 .size	abi_test_clobber_d30,.-abi_test_clobber_d30
@@ -615,6 +680,7 @@
 .hidden	abi_test_clobber_d31
 .align	4
 abi_test_clobber_d31:
+	AARCH64_VALID_CALL_TARGET
 	fmov	d31, xzr
 	ret
 .size	abi_test_clobber_d31,.-abi_test_clobber_d31
@@ -623,6 +689,7 @@
 .hidden	abi_test_clobber_v8_upper
 .align	4
 abi_test_clobber_v8_upper:
+	AARCH64_VALID_CALL_TARGET
 	fmov	v8.d[1], xzr
 	ret
 .size	abi_test_clobber_v8_upper,.-abi_test_clobber_v8_upper
@@ -631,6 +698,7 @@
 .hidden	abi_test_clobber_v9_upper
 .align	4
 abi_test_clobber_v9_upper:
+	AARCH64_VALID_CALL_TARGET
 	fmov	v9.d[1], xzr
 	ret
 .size	abi_test_clobber_v9_upper,.-abi_test_clobber_v9_upper
@@ -639,6 +707,7 @@
 .hidden	abi_test_clobber_v10_upper
 .align	4
 abi_test_clobber_v10_upper:
+	AARCH64_VALID_CALL_TARGET
 	fmov	v10.d[1], xzr
 	ret
 .size	abi_test_clobber_v10_upper,.-abi_test_clobber_v10_upper
@@ -647,6 +716,7 @@
 .hidden	abi_test_clobber_v11_upper
 .align	4
 abi_test_clobber_v11_upper:
+	AARCH64_VALID_CALL_TARGET
 	fmov	v11.d[1], xzr
 	ret
 .size	abi_test_clobber_v11_upper,.-abi_test_clobber_v11_upper
@@ -655,6 +725,7 @@
 .hidden	abi_test_clobber_v12_upper
 .align	4
 abi_test_clobber_v12_upper:
+	AARCH64_VALID_CALL_TARGET
 	fmov	v12.d[1], xzr
 	ret
 .size	abi_test_clobber_v12_upper,.-abi_test_clobber_v12_upper
@@ -663,6 +734,7 @@
 .hidden	abi_test_clobber_v13_upper
 .align	4
 abi_test_clobber_v13_upper:
+	AARCH64_VALID_CALL_TARGET
 	fmov	v13.d[1], xzr
 	ret
 .size	abi_test_clobber_v13_upper,.-abi_test_clobber_v13_upper
@@ -671,6 +743,7 @@
 .hidden	abi_test_clobber_v14_upper
 .align	4
 abi_test_clobber_v14_upper:
+	AARCH64_VALID_CALL_TARGET
 	fmov	v14.d[1], xzr
 	ret
 .size	abi_test_clobber_v14_upper,.-abi_test_clobber_v14_upper
@@ -679,6 +752,7 @@
 .hidden	abi_test_clobber_v15_upper
 .align	4
 abi_test_clobber_v15_upper:
+	AARCH64_VALID_CALL_TARGET
 	fmov	v15.d[1], xzr
 	ret
 .size	abi_test_clobber_v15_upper,.-abi_test_clobber_v15_upper
diff --git a/deps/boringssl/linux-arm/crypto/fipsmodule/aesv8-armx32.S b/deps/boringssl/linux-arm/crypto/fipsmodule/aesv8-armx32.S
index 5d6e22d..30c6525 100644
--- a/deps/boringssl/linux-arm/crypto/fipsmodule/aesv8-armx32.S
+++ b/deps/boringssl/linux-arm/crypto/fipsmodule/aesv8-armx32.S
@@ -243,6 +243,7 @@
 .type	aes_hw_encrypt,%function
 .align	5
 aes_hw_encrypt:
+	AARCH64_VALID_CALL_TARGET
 	ldr	r3,[r2,#240]
 	vld1.32	{q0},[r2]!
 	vld1.8	{q2},[r0]
@@ -273,6 +274,7 @@
 .type	aes_hw_decrypt,%function
 .align	5
 aes_hw_decrypt:
+	AARCH64_VALID_CALL_TARGET
 	ldr	r3,[r2,#240]
 	vld1.32	{q0},[r2]!
 	vld1.8	{q2},[r0]
@@ -617,20 +619,34 @@
 	add	r7,r3,#32
 	mov	r6,r5
 	movlo	r12,#0
+
+	@ ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are
+	@ affected by silicon errata #1742098 [0] and #1655431 [1],
+	@ respectively, where the second instruction of an aese/aesmc
+	@ instruction pair may execute twice if an interrupt is taken right
+	@ after the first instruction consumes an input register of which a
+	@ single 32-bit lane has been updated the last time it was modified.
+	@ 
+	@ This function uses a counter in one 32-bit lane. The 
+	@ could write to q1 and q10 directly, but that trips this bugs.
+	@ We write to q6 and copy to the final register as a workaround.
+	@ 
+	@ [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice
+	@ [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice
 #ifndef __ARMEB__
 	rev	r8, r8
 #endif
-	vorr	q1,q0,q0
 	add	r10, r8, #1
-	vorr	q10,q0,q0
-	add	r8, r8, #2
 	vorr	q6,q0,q0
 	rev	r10, r10
-	vmov.32	d3[1],r10
+	vmov.32	d13[1],r10
+	add	r8, r8, #2
+	vorr	q1,q6,q6
 	bls	.Lctr32_tail
 	rev	r12, r8
+	vmov.32	d13[1],r12
 	sub	r2,r2,#3		@ bias
-	vmov.32	d21[1],r12
+	vorr	q10,q6,q6
 	b	.Loop3x_ctr32
 
 .align	4
@@ -657,11 +673,11 @@
 .byte	0x20,0x23,0xb0,0xf3	@ aese q1,q8
 .byte	0x82,0xa3,0xb0,0xf3	@ aesmc q5,q1
 	vld1.8	{q2},[r0]!
-	vorr	q0,q6,q6
+	add	r9,r8,#1
 .byte	0x20,0x43,0xf0,0xf3	@ aese q10,q8
 .byte	0xa4,0x43,0xf0,0xf3	@ aesmc q10,q10
 	vld1.8	{q3},[r0]!
-	vorr	q1,q6,q6
+	rev	r9,r9
 .byte	0x22,0x83,0xb0,0xf3	@ aese q4,q9
 .byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
 .byte	0x22,0xa3,0xb0,0xf3	@ aese q5,q9
@@ -670,8 +686,6 @@
 	mov	r7,r3
 .byte	0x22,0x43,0xf0,0xf3	@ aese q10,q9
 .byte	0xa4,0x23,0xf0,0xf3	@ aesmc q9,q10
-	vorr	q10,q6,q6
-	add	r9,r8,#1
 .byte	0x28,0x83,0xb0,0xf3	@ aese q4,q12
 .byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
 .byte	0x28,0xa3,0xb0,0xf3	@ aese q5,q12
@@ -686,21 +700,26 @@
 .byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
 .byte	0x2a,0xa3,0xb0,0xf3	@ aese q5,q13
 .byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
+	 @ Note the logic to update q0, q1, and q1 is written to work
+	 @ around a bug in ARM Cortex-A57 and Cortex-A72 cores running in
+	 @ 32-bit mode. See the comment above.
 	veor	q11,q11,q7
-	rev	r9,r9
+	vmov.32	d13[1], r9
 .byte	0x2a,0x23,0xf0,0xf3	@ aese q9,q13
 .byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
-	vmov.32	d1[1], r9
+	vorr	q0,q6,q6
 	rev	r10,r10
 .byte	0x2c,0x83,0xb0,0xf3	@ aese q4,q14
 .byte	0x88,0x83,0xb0,0xf3	@ aesmc q4,q4
+	vmov.32	d13[1], r10
+	rev	r12,r8
 .byte	0x2c,0xa3,0xb0,0xf3	@ aese q5,q14
 .byte	0x8a,0xa3,0xb0,0xf3	@ aesmc q5,q5
-	vmov.32	d3[1], r10
-	rev	r12,r8
+	vorr	q1,q6,q6
+	vmov.32	d13[1], r12
 .byte	0x2c,0x23,0xf0,0xf3	@ aese q9,q14
 .byte	0xa2,0x23,0xf0,0xf3	@ aesmc q9,q9
-	vmov.32	d21[1], r12
+	vorr	q10,q6,q6
 	subs	r2,r2,#3
 .byte	0x2e,0x83,0xb0,0xf3	@ aese q4,q15
 .byte	0x2e,0xa3,0xb0,0xf3	@ aese q5,q15
diff --git a/deps/boringssl/linux-arm/crypto/fipsmodule/ghashv8-armx32.S b/deps/boringssl/linux-arm/crypto/fipsmodule/ghashv8-armx32.S
index d684294..b97457b 100644
--- a/deps/boringssl/linux-arm/crypto/fipsmodule/ghashv8-armx32.S
+++ b/deps/boringssl/linux-arm/crypto/fipsmodule/ghashv8-armx32.S
@@ -24,6 +24,7 @@
 .type	gcm_init_v8,%function
 .align	4
 gcm_init_v8:
+	AARCH64_VALID_CALL_TARGET
 	vld1.64	{q9},[r1]		@ load input H
 	vmov.i8	q11,#0xe1
 	vshl.i64	q11,q11,#57		@ 0xc2.0
@@ -75,6 +76,7 @@
 .type	gcm_gmult_v8,%function
 .align	4
 gcm_gmult_v8:
+	AARCH64_VALID_CALL_TARGET
 	vld1.64	{q9},[r0]		@ load Xi
 	vmov.i8	q11,#0xe1
 	vld1.64	{q12,q13},[r1]	@ load twisted H, ...
@@ -117,6 +119,7 @@
 .type	gcm_ghash_v8,%function
 .align	4
 gcm_ghash_v8:
+	AARCH64_VALID_CALL_TARGET
 	vstmdb	sp!,{d8,d9,d10,d11,d12,d13,d14,d15}		@ 32-bit ABI says so
 	vld1.64	{q0},[r0]		@ load [rotated] Xi
 						@ "[rotated]" means that
diff --git a/deps/boringssl/linux-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S b/deps/boringssl/linux-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S
index aefa543..12368e6 100644
--- a/deps/boringssl/linux-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S
+++ b/deps/boringssl/linux-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S
@@ -18,26 +18,26 @@
 chacha20_poly1305_constants:
 
 .align	64
-.chacha20_consts:
+.Lchacha20_consts:
 .byte	'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
 .byte	'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
-.rol8:
+.Lrol8:
 .byte	3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14
 .byte	3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14
-.rol16:
+.Lrol16:
 .byte	2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13
 .byte	2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13
-.avx2_init:
+.Lavx2_init:
 .long	0,0,0,0
-.sse_inc:
+.Lsse_inc:
 .long	1,0,0,0
-.avx2_inc:
+.Lavx2_inc:
 .long	2,0,0,0,2,0,0,0
-.clamp:
+.Lclamp:
 .quad	0x0FFFFFFC0FFFFFFF, 0x0FFFFFFC0FFFFFFC
 .quad	0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF
 .align	16
-.and_masks:
+.Land_masks:
 .byte	0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
 .byte	0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
 .byte	0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
@@ -59,34 +59,35 @@
 .align	64
 poly_hash_ad_internal:
 .cfi_startproc	
+.cfi_def_cfa	rsp, 8
 	xorq	%r10,%r10
 	xorq	%r11,%r11
 	xorq	%r12,%r12
 	cmpq	$13,%r8
-	jne	hash_ad_loop
-poly_fast_tls_ad:
+	jne	.Lhash_ad_loop
+.Lpoly_fast_tls_ad:
 
 	movq	(%rcx),%r10
 	movq	5(%rcx),%r11
 	shrq	$24,%r11
 	movq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -102,38 +103,37 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	.byte	0xf3,0xc3
-hash_ad_loop:
+.Lhash_ad_loop:
 
 	cmpq	$16,%r8
-	jb	hash_ad_tail
-	addq	0(%rcx),%r10
+	jb	.Lhash_ad_tail
+	addq	0+0(%rcx),%r10
 	adcq	8+0(%rcx),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -149,53 +149,52 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	16(%rcx),%rcx
 	subq	$16,%r8
-	jmp	hash_ad_loop
-hash_ad_tail:
+	jmp	.Lhash_ad_loop
+.Lhash_ad_tail:
 	cmpq	$0,%r8
-	je	1f
+	je	.Lhash_ad_done
 
 	xorq	%r13,%r13
 	xorq	%r14,%r14
 	xorq	%r15,%r15
 	addq	%r8,%rcx
-hash_ad_tail_loop:
+.Lhash_ad_tail_loop:
 	shldq	$8,%r13,%r14
 	shlq	$8,%r13
 	movzbq	-1(%rcx),%r15
 	xorq	%r15,%r13
 	decq	%rcx
 	decq	%r8
-	jne	hash_ad_tail_loop
+	jne	.Lhash_ad_tail_loop
 
 	addq	%r13,%r10
 	adcq	%r14,%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -211,15 +210,14 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 
-1:
+.Lhash_ad_done:
 	.byte	0xf3,0xc3
 .cfi_endproc	
 .size	poly_hash_ad_internal, .-poly_hash_ad_internal
@@ -232,57 +230,60 @@
 .cfi_startproc	
 	pushq	%rbp
 .cfi_adjust_cfa_offset	8
+.cfi_offset	%rbp,-16
 	pushq	%rbx
 .cfi_adjust_cfa_offset	8
+.cfi_offset	%rbx,-24
 	pushq	%r12
 .cfi_adjust_cfa_offset	8
+.cfi_offset	%r12,-32
 	pushq	%r13
 .cfi_adjust_cfa_offset	8
+.cfi_offset	%r13,-40
 	pushq	%r14
 .cfi_adjust_cfa_offset	8
+.cfi_offset	%r14,-48
 	pushq	%r15
 .cfi_adjust_cfa_offset	8
+.cfi_offset	%r15,-56
 
 
 	pushq	%r9
 .cfi_adjust_cfa_offset	8
-	subq	$288 + 32,%rsp
+.cfi_offset	%r9,-64
+	subq	$288 + 0 + 32,%rsp
 .cfi_adjust_cfa_offset	288 + 32
-.cfi_offset	rbp, -16
-.cfi_offset	rbx, -24
-.cfi_offset	r12, -32
-.cfi_offset	r13, -40
-.cfi_offset	r14, -48
-.cfi_offset	r15, -56
+
 	leaq	32(%rsp),%rbp
 	andq	$-32,%rbp
-	movq	%rdx,8+32(%rbp)
-	movq	%r8,0+32(%rbp)
+
 	movq	%rdx,%rbx
+	movq	%r8,0+0+32(%rbp)
+	movq	%rbx,8+0+32(%rbp)
 
 	movl	OPENSSL_ia32cap_P+8(%rip),%eax
 	andl	$288,%eax
 	xorl	$288,%eax
 	jz	chacha20_poly1305_open_avx2
 
-1:
 	cmpq	$128,%rbx
-	jbe	open_sse_128
+	jbe	.Lopen_sse_128
 
-	movdqa	.chacha20_consts(%rip),%xmm0
+	movdqa	.Lchacha20_consts(%rip),%xmm0
 	movdqu	0(%r9),%xmm4
 	movdqu	16(%r9),%xmm8
 	movdqu	32(%r9),%xmm12
+
 	movdqa	%xmm12,%xmm7
 
-	movdqa	%xmm4,48(%rbp)
-	movdqa	%xmm8,64(%rbp)
-	movdqa	%xmm12,96(%rbp)
+	movdqa	%xmm4,0+48(%rbp)
+	movdqa	%xmm8,0+64(%rbp)
+	movdqa	%xmm12,0+96(%rbp)
 	movq	$10,%r10
-1:
+.Lopen_sse_init_rounds:
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol16(%rip),%xmm12
+	pshufb	.Lrol16(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -291,7 +292,7 @@
 	pxor	%xmm3,%xmm4
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol8(%rip),%xmm12
+	pshufb	.Lrol8(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -303,7 +304,7 @@
 .byte	102,69,15,58,15,228,12
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol16(%rip),%xmm12
+	pshufb	.Lrol16(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -312,7 +313,7 @@
 	pxor	%xmm3,%xmm4
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol8(%rip),%xmm12
+	pshufb	.Lrol8(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -324,24 +325,24 @@
 .byte	102,69,15,58,15,228,4
 
 	decq	%r10
-	jne	1b
+	jne	.Lopen_sse_init_rounds
 
-	paddd	.chacha20_consts(%rip),%xmm0
-	paddd	48(%rbp),%xmm4
+	paddd	.Lchacha20_consts(%rip),%xmm0
+	paddd	0+48(%rbp),%xmm4
 
-	pand	.clamp(%rip),%xmm0
-	movdqa	%xmm0,0(%rbp)
-	movdqa	%xmm4,16(%rbp)
+	pand	.Lclamp(%rip),%xmm0
+	movdqa	%xmm0,0+0(%rbp)
+	movdqa	%xmm4,0+16(%rbp)
 
 	movq	%r8,%r8
 	call	poly_hash_ad_internal
-open_sse_main_loop:
+.Lopen_sse_main_loop:
 	cmpq	$256,%rbx
-	jb	2f
+	jb	.Lopen_sse_tail
 
-	movdqa	.chacha20_consts(%rip),%xmm0
-	movdqa	48(%rbp),%xmm4
-	movdqa	64(%rbp),%xmm8
+	movdqa	.Lchacha20_consts(%rip),%xmm0
+	movdqa	0+48(%rbp),%xmm4
+	movdqa	0+64(%rbp),%xmm8
 	movdqa	%xmm0,%xmm1
 	movdqa	%xmm4,%xmm5
 	movdqa	%xmm8,%xmm9
@@ -351,26 +352,26 @@
 	movdqa	%xmm0,%xmm3
 	movdqa	%xmm4,%xmm7
 	movdqa	%xmm8,%xmm11
-	movdqa	96(%rbp),%xmm15
-	paddd	.sse_inc(%rip),%xmm15
+	movdqa	0+96(%rbp),%xmm15
+	paddd	.Lsse_inc(%rip),%xmm15
 	movdqa	%xmm15,%xmm14
-	paddd	.sse_inc(%rip),%xmm14
+	paddd	.Lsse_inc(%rip),%xmm14
 	movdqa	%xmm14,%xmm13
-	paddd	.sse_inc(%rip),%xmm13
+	paddd	.Lsse_inc(%rip),%xmm13
 	movdqa	%xmm13,%xmm12
-	paddd	.sse_inc(%rip),%xmm12
-	movdqa	%xmm12,96(%rbp)
-	movdqa	%xmm13,112(%rbp)
-	movdqa	%xmm14,128(%rbp)
-	movdqa	%xmm15,144(%rbp)
+	paddd	.Lsse_inc(%rip),%xmm12
+	movdqa	%xmm12,0+96(%rbp)
+	movdqa	%xmm13,0+112(%rbp)
+	movdqa	%xmm14,0+128(%rbp)
+	movdqa	%xmm15,0+144(%rbp)
 
 
 
 	movq	$4,%rcx
 	movq	%rsi,%r8
-1:
-	movdqa	%xmm8,80(%rbp)
-	movdqa	.rol16(%rip),%xmm8
+.Lopen_sse_main_loop_rounds:
+	movdqa	%xmm8,0+80(%rbp)
+	movdqa	.Lrol16(%rip),%xmm8
 	paddd	%xmm7,%xmm3
 	paddd	%xmm6,%xmm2
 	paddd	%xmm5,%xmm1
@@ -383,13 +384,13 @@
 .byte	102,69,15,56,0,240
 .byte	102,69,15,56,0,232
 .byte	102,69,15,56,0,224
-	movdqa	80(%rbp),%xmm8
+	movdqa	0+80(%rbp),%xmm8
 	paddd	%xmm15,%xmm11
 	paddd	%xmm14,%xmm10
 	paddd	%xmm13,%xmm9
 	paddd	%xmm12,%xmm8
 	pxor	%xmm11,%xmm7
-	addq	0(%r8),%r10
+	addq	0+0(%r8),%r10
 	adcq	8+0(%r8),%r11
 	adcq	$1,%r12
 
@@ -397,7 +398,7 @@
 	pxor	%xmm10,%xmm6
 	pxor	%xmm9,%xmm5
 	pxor	%xmm8,%xmm4
-	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm8,0+80(%rbp)
 	movdqa	%xmm7,%xmm8
 	psrld	$20,%xmm8
 	pslld	$32-20,%xmm7
@@ -414,17 +415,17 @@
 	psrld	$20,%xmm8
 	pslld	$32-20,%xmm4
 	pxor	%xmm8,%xmm4
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movdqa	.rol8(%rip),%xmm8
+	movdqa	.Lrol8(%rip),%xmm8
 	paddd	%xmm7,%xmm3
 	paddd	%xmm6,%xmm2
 	paddd	%xmm5,%xmm1
@@ -437,26 +438,26 @@
 .byte	102,69,15,56,0,240
 .byte	102,69,15,56,0,232
 .byte	102,69,15,56,0,224
-	movdqa	80(%rbp),%xmm8
+	movdqa	0+80(%rbp),%xmm8
 	paddd	%xmm15,%xmm11
 	paddd	%xmm14,%xmm10
 	paddd	%xmm13,%xmm9
 	paddd	%xmm12,%xmm8
 	pxor	%xmm11,%xmm7
 	pxor	%xmm10,%xmm6
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
 	pxor	%xmm9,%xmm5
 	pxor	%xmm8,%xmm4
-	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm8,0+80(%rbp)
 	movdqa	%xmm7,%xmm8
 	psrld	$25,%xmm8
 	pslld	$32-25,%xmm7
@@ -473,7 +474,7 @@
 	psrld	$25,%xmm8
 	pslld	$32-25,%xmm4
 	pxor	%xmm8,%xmm4
-	movdqa	80(%rbp),%xmm8
+	movdqa	0+80(%rbp),%xmm8
 	imulq	%r12,%r9
 	addq	%r10,%r15
 	adcq	%rdx,%r9
@@ -489,8 +490,8 @@
 .byte	102,15,58,15,228,4
 .byte	102,69,15,58,15,192,8
 .byte	102,69,15,58,15,228,12
-	movdqa	%xmm8,80(%rbp)
-	movdqa	.rol16(%rip),%xmm8
+	movdqa	%xmm8,0+80(%rbp)
+	movdqa	.Lrol16(%rip),%xmm8
 	paddd	%xmm7,%xmm3
 	paddd	%xmm6,%xmm2
 	paddd	%xmm5,%xmm1
@@ -506,9 +507,8 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
@@ -518,7 +518,7 @@
 .byte	102,69,15,56,0,240
 .byte	102,69,15,56,0,232
 .byte	102,69,15,56,0,224
-	movdqa	80(%rbp),%xmm8
+	movdqa	0+80(%rbp),%xmm8
 	paddd	%xmm15,%xmm11
 	paddd	%xmm14,%xmm10
 	paddd	%xmm13,%xmm9
@@ -527,7 +527,7 @@
 	pxor	%xmm10,%xmm6
 	pxor	%xmm9,%xmm5
 	pxor	%xmm8,%xmm4
-	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm8,0+80(%rbp)
 	movdqa	%xmm7,%xmm8
 	psrld	$20,%xmm8
 	pslld	$32-20,%xmm7
@@ -544,7 +544,7 @@
 	psrld	$20,%xmm8
 	pslld	$32-20,%xmm4
 	pxor	%xmm8,%xmm4
-	movdqa	.rol8(%rip),%xmm8
+	movdqa	.Lrol8(%rip),%xmm8
 	paddd	%xmm7,%xmm3
 	paddd	%xmm6,%xmm2
 	paddd	%xmm5,%xmm1
@@ -557,7 +557,7 @@
 .byte	102,69,15,56,0,240
 .byte	102,69,15,56,0,232
 .byte	102,69,15,56,0,224
-	movdqa	80(%rbp),%xmm8
+	movdqa	0+80(%rbp),%xmm8
 	paddd	%xmm15,%xmm11
 	paddd	%xmm14,%xmm10
 	paddd	%xmm13,%xmm9
@@ -566,7 +566,7 @@
 	pxor	%xmm10,%xmm6
 	pxor	%xmm9,%xmm5
 	pxor	%xmm8,%xmm4
-	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm8,0+80(%rbp)
 	movdqa	%xmm7,%xmm8
 	psrld	$25,%xmm8
 	pslld	$32-25,%xmm7
@@ -583,7 +583,7 @@
 	psrld	$25,%xmm8
 	pslld	$32-25,%xmm4
 	pxor	%xmm8,%xmm4
-	movdqa	80(%rbp),%xmm8
+	movdqa	0+80(%rbp),%xmm8
 .byte	102,15,58,15,255,12
 .byte	102,69,15,58,15,219,8
 .byte	102,69,15,58,15,255,4
@@ -598,27 +598,27 @@
 .byte	102,69,15,58,15,228,4
 
 	decq	%rcx
-	jge	1b
-	addq	0(%r8),%r10
+	jge	.Lopen_sse_main_loop_rounds
+	addq	0+0(%r8),%r10
 	adcq	8+0(%r8),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -634,33 +634,32 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	16(%r8),%r8
 	cmpq	$-6,%rcx
-	jg	1b
-	paddd	.chacha20_consts(%rip),%xmm3
-	paddd	48(%rbp),%xmm7
-	paddd	64(%rbp),%xmm11
-	paddd	144(%rbp),%xmm15
-	paddd	.chacha20_consts(%rip),%xmm2
-	paddd	48(%rbp),%xmm6
-	paddd	64(%rbp),%xmm10
-	paddd	128(%rbp),%xmm14
-	paddd	.chacha20_consts(%rip),%xmm1
-	paddd	48(%rbp),%xmm5
-	paddd	64(%rbp),%xmm9
-	paddd	112(%rbp),%xmm13
-	paddd	.chacha20_consts(%rip),%xmm0
-	paddd	48(%rbp),%xmm4
-	paddd	64(%rbp),%xmm8
-	paddd	96(%rbp),%xmm12
-	movdqa	%xmm12,80(%rbp)
+	jg	.Lopen_sse_main_loop_rounds
+	paddd	.Lchacha20_consts(%rip),%xmm3
+	paddd	0+48(%rbp),%xmm7
+	paddd	0+64(%rbp),%xmm11
+	paddd	0+144(%rbp),%xmm15
+	paddd	.Lchacha20_consts(%rip),%xmm2
+	paddd	0+48(%rbp),%xmm6
+	paddd	0+64(%rbp),%xmm10
+	paddd	0+128(%rbp),%xmm14
+	paddd	.Lchacha20_consts(%rip),%xmm1
+	paddd	0+48(%rbp),%xmm5
+	paddd	0+64(%rbp),%xmm9
+	paddd	0+112(%rbp),%xmm13
+	paddd	.Lchacha20_consts(%rip),%xmm0
+	paddd	0+48(%rbp),%xmm4
+	paddd	0+64(%rbp),%xmm8
+	paddd	0+96(%rbp),%xmm12
+	movdqa	%xmm12,0+80(%rbp)
 	movdqu	0 + 0(%rsi),%xmm12
 	pxor	%xmm3,%xmm12
 	movdqu	%xmm12,0 + 0(%rdi)
@@ -704,7 +703,7 @@
 	pxor	%xmm3,%xmm0
 	pxor	%xmm7,%xmm4
 	pxor	%xmm11,%xmm8
-	pxor	80(%rbp),%xmm15
+	pxor	0+80(%rbp),%xmm15
 	movdqu	%xmm0,0 + 192(%rdi)
 	movdqu	%xmm4,16 + 192(%rdi)
 	movdqu	%xmm8,32 + 192(%rdi)
@@ -713,45 +712,49 @@
 	leaq	256(%rsi),%rsi
 	leaq	256(%rdi),%rdi
 	subq	$256,%rbx
-	jmp	open_sse_main_loop
-2:
+	jmp	.Lopen_sse_main_loop
+.Lopen_sse_tail:
 
 	testq	%rbx,%rbx
-	jz	open_sse_finalize
+	jz	.Lopen_sse_finalize
+	cmpq	$192,%rbx
+	ja	.Lopen_sse_tail_256
+	cmpq	$128,%rbx
+	ja	.Lopen_sse_tail_192
 	cmpq	$64,%rbx
-	ja	3f
-	movdqa	.chacha20_consts(%rip),%xmm0
-	movdqa	48(%rbp),%xmm4
-	movdqa	64(%rbp),%xmm8
-	movdqa	96(%rbp),%xmm12
-	paddd	.sse_inc(%rip),%xmm12
-	movdqa	%xmm12,96(%rbp)
+	ja	.Lopen_sse_tail_128
+	movdqa	.Lchacha20_consts(%rip),%xmm0
+	movdqa	0+48(%rbp),%xmm4
+	movdqa	0+64(%rbp),%xmm8
+	movdqa	0+96(%rbp),%xmm12
+	paddd	.Lsse_inc(%rip),%xmm12
+	movdqa	%xmm12,0+96(%rbp)
 
 	xorq	%r8,%r8
 	movq	%rbx,%rcx
 	cmpq	$16,%rcx
-	jb	2f
-1:
-	addq	0(%rsi,%r8), %r10
-	adcq	8+0(%rsi,%r8), %r11
+	jb	.Lopen_sse_tail_64_rounds
+.Lopen_sse_tail_64_rounds_and_x1hash:
+	addq	0+0(%rsi,%r8,1),%r10
+	adcq	8+0(%rsi,%r8,1),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -767,19 +770,18 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	subq	$16,%rcx
-2:
+.Lopen_sse_tail_64_rounds:
 	addq	$16,%r8
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol16(%rip),%xmm12
+	pshufb	.Lrol16(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -788,7 +790,7 @@
 	pxor	%xmm3,%xmm4
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol8(%rip),%xmm12
+	pshufb	.Lrol8(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -800,7 +802,7 @@
 .byte	102,69,15,58,15,228,12
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol16(%rip),%xmm12
+	pshufb	.Lrol16(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -809,7 +811,7 @@
 	pxor	%xmm3,%xmm4
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol8(%rip),%xmm12
+	pshufb	.Lrol8(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -821,55 +823,54 @@
 .byte	102,69,15,58,15,228,4
 
 	cmpq	$16,%rcx
-	jae	1b
+	jae	.Lopen_sse_tail_64_rounds_and_x1hash
 	cmpq	$160,%r8
-	jne	2b
-	paddd	.chacha20_consts(%rip),%xmm0
-	paddd	48(%rbp),%xmm4
-	paddd	64(%rbp),%xmm8
-	paddd	96(%rbp),%xmm12
+	jne	.Lopen_sse_tail_64_rounds
+	paddd	.Lchacha20_consts(%rip),%xmm0
+	paddd	0+48(%rbp),%xmm4
+	paddd	0+64(%rbp),%xmm8
+	paddd	0+96(%rbp),%xmm12
 
-	jmp	open_sse_tail_64_dec_loop
-3:
-	cmpq	$128,%rbx
-	ja	3f
-	movdqa	.chacha20_consts(%rip),%xmm0
-	movdqa	48(%rbp),%xmm4
-	movdqa	64(%rbp),%xmm8
+	jmp	.Lopen_sse_tail_64_dec_loop
+
+.Lopen_sse_tail_128:
+	movdqa	.Lchacha20_consts(%rip),%xmm0
+	movdqa	0+48(%rbp),%xmm4
+	movdqa	0+64(%rbp),%xmm8
 	movdqa	%xmm0,%xmm1
 	movdqa	%xmm4,%xmm5
 	movdqa	%xmm8,%xmm9
-	movdqa	96(%rbp),%xmm13
-	paddd	.sse_inc(%rip),%xmm13
+	movdqa	0+96(%rbp),%xmm13
+	paddd	.Lsse_inc(%rip),%xmm13
 	movdqa	%xmm13,%xmm12
-	paddd	.sse_inc(%rip),%xmm12
-	movdqa	%xmm12,96(%rbp)
-	movdqa	%xmm13,112(%rbp)
+	paddd	.Lsse_inc(%rip),%xmm12
+	movdqa	%xmm12,0+96(%rbp)
+	movdqa	%xmm13,0+112(%rbp)
 
 	movq	%rbx,%rcx
 	andq	$-16,%rcx
 	xorq	%r8,%r8
-1:
-	addq	0(%rsi,%r8), %r10
-	adcq	8+0(%rsi,%r8), %r11
+.Lopen_sse_tail_128_rounds_and_x1hash:
+	addq	0+0(%rsi,%r8,1),%r10
+	adcq	8+0(%rsi,%r8,1),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -885,18 +886,17 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
-2:
+.Lopen_sse_tail_128_rounds:
 	addq	$16,%r8
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol16(%rip),%xmm12
+	pshufb	.Lrol16(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -905,7 +905,7 @@
 	pxor	%xmm3,%xmm4
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol8(%rip),%xmm12
+	pshufb	.Lrol8(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -917,7 +917,7 @@
 .byte	102,69,15,58,15,228,12
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol16(%rip),%xmm13
+	pshufb	.Lrol16(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -926,7 +926,7 @@
 	pxor	%xmm3,%xmm5
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol8(%rip),%xmm13
+	pshufb	.Lrol8(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -938,7 +938,7 @@
 .byte	102,69,15,58,15,237,12
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol16(%rip),%xmm12
+	pshufb	.Lrol16(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -947,7 +947,7 @@
 	pxor	%xmm3,%xmm4
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol8(%rip),%xmm12
+	pshufb	.Lrol8(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -959,7 +959,7 @@
 .byte	102,69,15,58,15,228,4
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol16(%rip),%xmm13
+	pshufb	.Lrol16(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -968,7 +968,7 @@
 	pxor	%xmm3,%xmm5
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol8(%rip),%xmm13
+	pshufb	.Lrol8(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -980,17 +980,17 @@
 .byte	102,69,15,58,15,237,4
 
 	cmpq	%rcx,%r8
-	jb	1b
+	jb	.Lopen_sse_tail_128_rounds_and_x1hash
 	cmpq	$160,%r8
-	jne	2b
-	paddd	.chacha20_consts(%rip),%xmm1
-	paddd	48(%rbp),%xmm5
-	paddd	64(%rbp),%xmm9
-	paddd	112(%rbp),%xmm13
-	paddd	.chacha20_consts(%rip),%xmm0
-	paddd	48(%rbp),%xmm4
-	paddd	64(%rbp),%xmm8
-	paddd	96(%rbp),%xmm12
+	jne	.Lopen_sse_tail_128_rounds
+	paddd	.Lchacha20_consts(%rip),%xmm1
+	paddd	0+48(%rbp),%xmm5
+	paddd	0+64(%rbp),%xmm9
+	paddd	0+112(%rbp),%xmm13
+	paddd	.Lchacha20_consts(%rip),%xmm0
+	paddd	0+48(%rbp),%xmm4
+	paddd	0+64(%rbp),%xmm8
+	paddd	0+96(%rbp),%xmm12
 	movdqu	0 + 0(%rsi),%xmm3
 	movdqu	16 + 0(%rsi),%xmm7
 	movdqu	32 + 0(%rsi),%xmm11
@@ -1007,28 +1007,27 @@
 	subq	$64,%rbx
 	leaq	64(%rsi),%rsi
 	leaq	64(%rdi),%rdi
-	jmp	open_sse_tail_64_dec_loop
-3:
-	cmpq	$192,%rbx
-	ja	3f
-	movdqa	.chacha20_consts(%rip),%xmm0
-	movdqa	48(%rbp),%xmm4
-	movdqa	64(%rbp),%xmm8
+	jmp	.Lopen_sse_tail_64_dec_loop
+
+.Lopen_sse_tail_192:
+	movdqa	.Lchacha20_consts(%rip),%xmm0
+	movdqa	0+48(%rbp),%xmm4
+	movdqa	0+64(%rbp),%xmm8
 	movdqa	%xmm0,%xmm1
 	movdqa	%xmm4,%xmm5
 	movdqa	%xmm8,%xmm9
 	movdqa	%xmm0,%xmm2
 	movdqa	%xmm4,%xmm6
 	movdqa	%xmm8,%xmm10
-	movdqa	96(%rbp),%xmm14
-	paddd	.sse_inc(%rip),%xmm14
+	movdqa	0+96(%rbp),%xmm14
+	paddd	.Lsse_inc(%rip),%xmm14
 	movdqa	%xmm14,%xmm13
-	paddd	.sse_inc(%rip),%xmm13
+	paddd	.Lsse_inc(%rip),%xmm13
 	movdqa	%xmm13,%xmm12
-	paddd	.sse_inc(%rip),%xmm12
-	movdqa	%xmm12,96(%rbp)
-	movdqa	%xmm13,112(%rbp)
-	movdqa	%xmm14,128(%rbp)
+	paddd	.Lsse_inc(%rip),%xmm12
+	movdqa	%xmm12,0+96(%rbp)
+	movdqa	%xmm13,0+112(%rbp)
+	movdqa	%xmm14,0+128(%rbp)
 
 	movq	%rbx,%rcx
 	movq	$160,%r8
@@ -1036,27 +1035,27 @@
 	cmovgq	%r8,%rcx
 	andq	$-16,%rcx
 	xorq	%r8,%r8
-1:
-	addq	0(%rsi,%r8), %r10
-	adcq	8+0(%rsi,%r8), %r11
+.Lopen_sse_tail_192_rounds_and_x1hash:
+	addq	0+0(%rsi,%r8,1),%r10
+	adcq	8+0(%rsi,%r8,1),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -1072,18 +1071,17 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
-2:
+.Lopen_sse_tail_192_rounds:
 	addq	$16,%r8
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol16(%rip),%xmm12
+	pshufb	.Lrol16(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -1092,7 +1090,7 @@
 	pxor	%xmm3,%xmm4
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol8(%rip),%xmm12
+	pshufb	.Lrol8(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -1104,7 +1102,7 @@
 .byte	102,69,15,58,15,228,12
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol16(%rip),%xmm13
+	pshufb	.Lrol16(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -1113,7 +1111,7 @@
 	pxor	%xmm3,%xmm5
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol8(%rip),%xmm13
+	pshufb	.Lrol8(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -1125,7 +1123,7 @@
 .byte	102,69,15,58,15,237,12
 	paddd	%xmm6,%xmm2
 	pxor	%xmm2,%xmm14
-	pshufb	.rol16(%rip),%xmm14
+	pshufb	.Lrol16(%rip),%xmm14
 	paddd	%xmm14,%xmm10
 	pxor	%xmm10,%xmm6
 	movdqa	%xmm6,%xmm3
@@ -1134,7 +1132,7 @@
 	pxor	%xmm3,%xmm6
 	paddd	%xmm6,%xmm2
 	pxor	%xmm2,%xmm14
-	pshufb	.rol8(%rip),%xmm14
+	pshufb	.Lrol8(%rip),%xmm14
 	paddd	%xmm14,%xmm10
 	pxor	%xmm10,%xmm6
 	movdqa	%xmm6,%xmm3
@@ -1146,7 +1144,7 @@
 .byte	102,69,15,58,15,246,12
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol16(%rip),%xmm12
+	pshufb	.Lrol16(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -1155,7 +1153,7 @@
 	pxor	%xmm3,%xmm4
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol8(%rip),%xmm12
+	pshufb	.Lrol8(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -1167,7 +1165,7 @@
 .byte	102,69,15,58,15,228,4
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol16(%rip),%xmm13
+	pshufb	.Lrol16(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -1176,7 +1174,7 @@
 	pxor	%xmm3,%xmm5
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol8(%rip),%xmm13
+	pshufb	.Lrol8(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -1188,7 +1186,7 @@
 .byte	102,69,15,58,15,237,4
 	paddd	%xmm6,%xmm2
 	pxor	%xmm2,%xmm14
-	pshufb	.rol16(%rip),%xmm14
+	pshufb	.Lrol16(%rip),%xmm14
 	paddd	%xmm14,%xmm10
 	pxor	%xmm10,%xmm6
 	movdqa	%xmm6,%xmm3
@@ -1197,7 +1195,7 @@
 	pxor	%xmm3,%xmm6
 	paddd	%xmm6,%xmm2
 	pxor	%xmm2,%xmm14
-	pshufb	.rol8(%rip),%xmm14
+	pshufb	.Lrol8(%rip),%xmm14
 	paddd	%xmm14,%xmm10
 	pxor	%xmm10,%xmm6
 	movdqa	%xmm6,%xmm3
@@ -1209,31 +1207,31 @@
 .byte	102,69,15,58,15,246,4
 
 	cmpq	%rcx,%r8
-	jb	1b
+	jb	.Lopen_sse_tail_192_rounds_and_x1hash
 	cmpq	$160,%r8
-	jne	2b
+	jne	.Lopen_sse_tail_192_rounds
 	cmpq	$176,%rbx
-	jb	1f
-	addq	160(%rsi),%r10
+	jb	.Lopen_sse_tail_192_finish
+	addq	0+160(%rsi),%r10
 	adcq	8+160(%rsi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -1249,35 +1247,34 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	cmpq	$192,%rbx
-	jb	1f
-	addq	176(%rsi),%r10
+	jb	.Lopen_sse_tail_192_finish
+	addq	0+176(%rsi),%r10
 	adcq	8+176(%rsi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -1293,26 +1290,25 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
-1:
-	paddd	.chacha20_consts(%rip),%xmm2
-	paddd	48(%rbp),%xmm6
-	paddd	64(%rbp),%xmm10
-	paddd	128(%rbp),%xmm14
-	paddd	.chacha20_consts(%rip),%xmm1
-	paddd	48(%rbp),%xmm5
-	paddd	64(%rbp),%xmm9
-	paddd	112(%rbp),%xmm13
-	paddd	.chacha20_consts(%rip),%xmm0
-	paddd	48(%rbp),%xmm4
-	paddd	64(%rbp),%xmm8
-	paddd	96(%rbp),%xmm12
+.Lopen_sse_tail_192_finish:
+	paddd	.Lchacha20_consts(%rip),%xmm2
+	paddd	0+48(%rbp),%xmm6
+	paddd	0+64(%rbp),%xmm10
+	paddd	0+128(%rbp),%xmm14
+	paddd	.Lchacha20_consts(%rip),%xmm1
+	paddd	0+48(%rbp),%xmm5
+	paddd	0+64(%rbp),%xmm9
+	paddd	0+112(%rbp),%xmm13
+	paddd	.Lchacha20_consts(%rip),%xmm0
+	paddd	0+48(%rbp),%xmm4
+	paddd	0+64(%rbp),%xmm8
+	paddd	0+96(%rbp),%xmm12
 	movdqu	0 + 0(%rsi),%xmm3
 	movdqu	16 + 0(%rsi),%xmm7
 	movdqu	32 + 0(%rsi),%xmm11
@@ -1341,12 +1337,12 @@
 	subq	$128,%rbx
 	leaq	128(%rsi),%rsi
 	leaq	128(%rdi),%rdi
-	jmp	open_sse_tail_64_dec_loop
-3:
+	jmp	.Lopen_sse_tail_64_dec_loop
 
-	movdqa	.chacha20_consts(%rip),%xmm0
-	movdqa	48(%rbp),%xmm4
-	movdqa	64(%rbp),%xmm8
+.Lopen_sse_tail_256:
+	movdqa	.Lchacha20_consts(%rip),%xmm0
+	movdqa	0+48(%rbp),%xmm4
+	movdqa	0+64(%rbp),%xmm8
 	movdqa	%xmm0,%xmm1
 	movdqa	%xmm4,%xmm5
 	movdqa	%xmm8,%xmm9
@@ -1356,28 +1352,28 @@
 	movdqa	%xmm0,%xmm3
 	movdqa	%xmm4,%xmm7
 	movdqa	%xmm8,%xmm11
-	movdqa	96(%rbp),%xmm15
-	paddd	.sse_inc(%rip),%xmm15
+	movdqa	0+96(%rbp),%xmm15
+	paddd	.Lsse_inc(%rip),%xmm15
 	movdqa	%xmm15,%xmm14
-	paddd	.sse_inc(%rip),%xmm14
+	paddd	.Lsse_inc(%rip),%xmm14
 	movdqa	%xmm14,%xmm13
-	paddd	.sse_inc(%rip),%xmm13
+	paddd	.Lsse_inc(%rip),%xmm13
 	movdqa	%xmm13,%xmm12
-	paddd	.sse_inc(%rip),%xmm12
-	movdqa	%xmm12,96(%rbp)
-	movdqa	%xmm13,112(%rbp)
-	movdqa	%xmm14,128(%rbp)
-	movdqa	%xmm15,144(%rbp)
+	paddd	.Lsse_inc(%rip),%xmm12
+	movdqa	%xmm12,0+96(%rbp)
+	movdqa	%xmm13,0+112(%rbp)
+	movdqa	%xmm14,0+128(%rbp)
+	movdqa	%xmm15,0+144(%rbp)
 
 	xorq	%r8,%r8
-1:
-	addq	0(%rsi,%r8), %r10
-	adcq	8+0(%rsi,%r8), %r11
+.Lopen_sse_tail_256_rounds_and_x1hash:
+	addq	0+0(%rsi,%r8,1),%r10
+	adcq	8+0(%rsi,%r8,1),%r11
 	adcq	$1,%r12
-	movdqa	%xmm11,80(%rbp)
+	movdqa	%xmm11,0+80(%rbp)
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol16(%rip),%xmm12
+	pshufb	.Lrol16(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm11
@@ -1386,7 +1382,7 @@
 	pxor	%xmm11,%xmm4
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol8(%rip),%xmm12
+	pshufb	.Lrol8(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm11
@@ -1398,7 +1394,7 @@
 .byte	102,69,15,58,15,228,12
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol16(%rip),%xmm13
+	pshufb	.Lrol16(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm11
@@ -1407,7 +1403,7 @@
 	pxor	%xmm11,%xmm5
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol8(%rip),%xmm13
+	pshufb	.Lrol8(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm11
@@ -1419,7 +1415,7 @@
 .byte	102,69,15,58,15,237,12
 	paddd	%xmm6,%xmm2
 	pxor	%xmm2,%xmm14
-	pshufb	.rol16(%rip),%xmm14
+	pshufb	.Lrol16(%rip),%xmm14
 	paddd	%xmm14,%xmm10
 	pxor	%xmm10,%xmm6
 	movdqa	%xmm6,%xmm11
@@ -1428,7 +1424,7 @@
 	pxor	%xmm11,%xmm6
 	paddd	%xmm6,%xmm2
 	pxor	%xmm2,%xmm14
-	pshufb	.rol8(%rip),%xmm14
+	pshufb	.Lrol8(%rip),%xmm14
 	paddd	%xmm14,%xmm10
 	pxor	%xmm10,%xmm6
 	movdqa	%xmm6,%xmm11
@@ -1438,21 +1434,21 @@
 .byte	102,15,58,15,246,4
 .byte	102,69,15,58,15,210,8
 .byte	102,69,15,58,15,246,12
-	movdqa	80(%rbp),%xmm11
-	movq	0+0(%rbp),%rax
+	movdqa	0+80(%rbp),%xmm11
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movdqa	%xmm9,80(%rbp)
+	movdqa	%xmm9,0+80(%rbp)
 	paddd	%xmm7,%xmm3
 	pxor	%xmm3,%xmm15
-	pshufb	.rol16(%rip),%xmm15
+	pshufb	.Lrol16(%rip),%xmm15
 	paddd	%xmm15,%xmm11
 	pxor	%xmm11,%xmm7
 	movdqa	%xmm7,%xmm9
@@ -1461,7 +1457,7 @@
 	pxor	%xmm9,%xmm7
 	paddd	%xmm7,%xmm3
 	pxor	%xmm3,%xmm15
-	pshufb	.rol8(%rip),%xmm15
+	pshufb	.Lrol8(%rip),%xmm15
 	paddd	%xmm15,%xmm11
 	pxor	%xmm11,%xmm7
 	movdqa	%xmm7,%xmm9
@@ -1471,21 +1467,21 @@
 .byte	102,15,58,15,255,4
 .byte	102,69,15,58,15,219,8
 .byte	102,69,15,58,15,255,12
-	movdqa	80(%rbp),%xmm9
-	movq	8+0(%rbp),%rax
+	movdqa	0+80(%rbp),%xmm9
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
-	movdqa	%xmm11,80(%rbp)
+	movdqa	%xmm11,0+80(%rbp)
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol16(%rip),%xmm12
+	pshufb	.Lrol16(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm11
@@ -1494,7 +1490,7 @@
 	pxor	%xmm11,%xmm4
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol8(%rip),%xmm12
+	pshufb	.Lrol8(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm11
@@ -1506,7 +1502,7 @@
 .byte	102,69,15,58,15,228,4
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol16(%rip),%xmm13
+	pshufb	.Lrol16(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm11
@@ -1515,7 +1511,7 @@
 	pxor	%xmm11,%xmm5
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol8(%rip),%xmm13
+	pshufb	.Lrol8(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm11
@@ -1530,7 +1526,7 @@
 	adcq	%rdx,%r9
 	paddd	%xmm6,%xmm2
 	pxor	%xmm2,%xmm14
-	pshufb	.rol16(%rip),%xmm14
+	pshufb	.Lrol16(%rip),%xmm14
 	paddd	%xmm14,%xmm10
 	pxor	%xmm10,%xmm6
 	movdqa	%xmm6,%xmm11
@@ -1539,7 +1535,7 @@
 	pxor	%xmm11,%xmm6
 	paddd	%xmm6,%xmm2
 	pxor	%xmm2,%xmm14
-	pshufb	.rol8(%rip),%xmm14
+	pshufb	.Lrol8(%rip),%xmm14
 	paddd	%xmm14,%xmm10
 	pxor	%xmm10,%xmm6
 	movdqa	%xmm6,%xmm11
@@ -1549,7 +1545,7 @@
 .byte	102,15,58,15,246,12
 .byte	102,69,15,58,15,210,8
 .byte	102,69,15,58,15,246,4
-	movdqa	80(%rbp),%xmm11
+	movdqa	0+80(%rbp),%xmm11
 	movq	%r13,%r10
 	movq	%r14,%r11
 	movq	%r15,%r12
@@ -1559,16 +1555,15 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
-	movdqa	%xmm9,80(%rbp)
+	movdqa	%xmm9,0+80(%rbp)
 	paddd	%xmm7,%xmm3
 	pxor	%xmm3,%xmm15
-	pshufb	.rol16(%rip),%xmm15
+	pshufb	.Lrol16(%rip),%xmm15
 	paddd	%xmm15,%xmm11
 	pxor	%xmm11,%xmm7
 	movdqa	%xmm7,%xmm9
@@ -1577,7 +1572,7 @@
 	pxor	%xmm9,%xmm7
 	paddd	%xmm7,%xmm3
 	pxor	%xmm3,%xmm15
-	pshufb	.rol8(%rip),%xmm15
+	pshufb	.Lrol8(%rip),%xmm15
 	paddd	%xmm15,%xmm11
 	pxor	%xmm11,%xmm7
 	movdqa	%xmm7,%xmm9
@@ -1587,34 +1582,35 @@
 .byte	102,15,58,15,255,12
 .byte	102,69,15,58,15,219,8
 .byte	102,69,15,58,15,255,4
-	movdqa	80(%rbp),%xmm9
+	movdqa	0+80(%rbp),%xmm9
 
 	addq	$16,%r8
 	cmpq	$160,%r8
-	jb	1b
+	jb	.Lopen_sse_tail_256_rounds_and_x1hash
+
 	movq	%rbx,%rcx
 	andq	$-16,%rcx
-1:
-	addq	0(%rsi,%r8), %r10
-	adcq	8+0(%rsi,%r8), %r11
+.Lopen_sse_tail_256_hash:
+	addq	0+0(%rsi,%r8,1),%r10
+	adcq	8+0(%rsi,%r8,1),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -1630,33 +1626,32 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	addq	$16,%r8
 	cmpq	%rcx,%r8
-	jb	1b
-	paddd	.chacha20_consts(%rip),%xmm3
-	paddd	48(%rbp),%xmm7
-	paddd	64(%rbp),%xmm11
-	paddd	144(%rbp),%xmm15
-	paddd	.chacha20_consts(%rip),%xmm2
-	paddd	48(%rbp),%xmm6
-	paddd	64(%rbp),%xmm10
-	paddd	128(%rbp),%xmm14
-	paddd	.chacha20_consts(%rip),%xmm1
-	paddd	48(%rbp),%xmm5
-	paddd	64(%rbp),%xmm9
-	paddd	112(%rbp),%xmm13
-	paddd	.chacha20_consts(%rip),%xmm0
-	paddd	48(%rbp),%xmm4
-	paddd	64(%rbp),%xmm8
-	paddd	96(%rbp),%xmm12
-	movdqa	%xmm12,80(%rbp)
+	jb	.Lopen_sse_tail_256_hash
+	paddd	.Lchacha20_consts(%rip),%xmm3
+	paddd	0+48(%rbp),%xmm7
+	paddd	0+64(%rbp),%xmm11
+	paddd	0+144(%rbp),%xmm15
+	paddd	.Lchacha20_consts(%rip),%xmm2
+	paddd	0+48(%rbp),%xmm6
+	paddd	0+64(%rbp),%xmm10
+	paddd	0+128(%rbp),%xmm14
+	paddd	.Lchacha20_consts(%rip),%xmm1
+	paddd	0+48(%rbp),%xmm5
+	paddd	0+64(%rbp),%xmm9
+	paddd	0+112(%rbp),%xmm13
+	paddd	.Lchacha20_consts(%rip),%xmm0
+	paddd	0+48(%rbp),%xmm4
+	paddd	0+64(%rbp),%xmm8
+	paddd	0+96(%rbp),%xmm12
+	movdqa	%xmm12,0+80(%rbp)
 	movdqu	0 + 0(%rsi),%xmm12
 	pxor	%xmm3,%xmm12
 	movdqu	%xmm12,0 + 0(%rdi)
@@ -1694,15 +1689,15 @@
 	movdqu	%xmm9,32 + 128(%rdi)
 	movdqu	%xmm15,48 + 128(%rdi)
 
-	movdqa	80(%rbp),%xmm12
+	movdqa	0+80(%rbp),%xmm12
 	subq	$192,%rbx
 	leaq	192(%rsi),%rsi
 	leaq	192(%rdi),%rdi
 
 
-open_sse_tail_64_dec_loop:
+.Lopen_sse_tail_64_dec_loop:
 	cmpq	$16,%rbx
-	jb	1f
+	jb	.Lopen_sse_tail_16_init
 	subq	$16,%rbx
 	movdqu	(%rsi),%xmm3
 	pxor	%xmm3,%xmm0
@@ -1712,61 +1707,60 @@
 	movdqa	%xmm4,%xmm0
 	movdqa	%xmm8,%xmm4
 	movdqa	%xmm12,%xmm8
-	jmp	open_sse_tail_64_dec_loop
-1:
+	jmp	.Lopen_sse_tail_64_dec_loop
+.Lopen_sse_tail_16_init:
 	movdqa	%xmm0,%xmm1
 
 
-open_sse_tail_16:
+.Lopen_sse_tail_16:
 	testq	%rbx,%rbx
-	jz	open_sse_finalize
+	jz	.Lopen_sse_finalize
 
 
 
 	pxor	%xmm3,%xmm3
-	leaq	-1(%rsi,%rbx), %rsi
+	leaq	-1(%rsi,%rbx,1),%rsi
 	movq	%rbx,%r8
-2:
+.Lopen_sse_tail_16_compose:
 	pslldq	$1,%xmm3
 	pinsrb	$0,(%rsi),%xmm3
 	subq	$1,%rsi
 	subq	$1,%r8
-	jnz	2b
+	jnz	.Lopen_sse_tail_16_compose
 
-3:
 .byte	102,73,15,126,221
 	pextrq	$1,%xmm3,%r14
 
 	pxor	%xmm1,%xmm3
 
 
-2:
+.Lopen_sse_tail_16_extract:
 	pextrb	$0,%xmm3,(%rdi)
 	psrldq	$1,%xmm3
 	addq	$1,%rdi
 	subq	$1,%rbx
-	jne	2b
+	jne	.Lopen_sse_tail_16_extract
 
 	addq	%r13,%r10
 	adcq	%r14,%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -1782,35 +1776,34 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 
-open_sse_finalize:
-	addq	32(%rbp),%r10
-	adcq	8+32(%rbp),%r11
+.Lopen_sse_finalize:
+	addq	0+0+32(%rbp),%r10
+	adcq	8+0+32(%rbp),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -1826,9 +1819,8 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
@@ -1844,33 +1836,41 @@
 	cmovcq	%r14,%r11
 	cmovcq	%r15,%r12
 
-	addq	0+16(%rbp),%r10
-	adcq	8+16(%rbp),%r11
+	addq	0+0+16(%rbp),%r10
+	adcq	8+0+16(%rbp),%r11
 
-	addq	$288 + 32,%rsp
+.cfi_remember_state	
+	addq	$288 + 0 + 32,%rsp
 .cfi_adjust_cfa_offset	-(288 + 32)
+
 	popq	%r9
 .cfi_adjust_cfa_offset	-8
+.cfi_restore	%r9
 	movq	%r10,(%r9)
 	movq	%r11,8(%r9)
-
 	popq	%r15
 .cfi_adjust_cfa_offset	-8
+.cfi_restore	%r15
 	popq	%r14
 .cfi_adjust_cfa_offset	-8
+.cfi_restore	%r14
 	popq	%r13
 .cfi_adjust_cfa_offset	-8
+.cfi_restore	%r13
 	popq	%r12
 .cfi_adjust_cfa_offset	-8
+.cfi_restore	%r12
 	popq	%rbx
 .cfi_adjust_cfa_offset	-8
+.cfi_restore	%rbx
 	popq	%rbp
 .cfi_adjust_cfa_offset	-8
+.cfi_restore	%rbp
 	.byte	0xf3,0xc3
-.cfi_adjust_cfa_offset	(8 * 6) + 288 + 32
 
-open_sse_128:
-	movdqu	.chacha20_consts(%rip),%xmm0
+.Lopen_sse_128:
+.cfi_restore_state	
+	movdqu	.Lchacha20_consts(%rip),%xmm0
 	movdqa	%xmm0,%xmm1
 	movdqa	%xmm0,%xmm2
 	movdqu	0(%r9),%xmm4
@@ -1881,17 +1881,18 @@
 	movdqa	%xmm8,%xmm10
 	movdqu	32(%r9),%xmm12
 	movdqa	%xmm12,%xmm13
-	paddd	.sse_inc(%rip),%xmm13
+	paddd	.Lsse_inc(%rip),%xmm13
 	movdqa	%xmm13,%xmm14
-	paddd	.sse_inc(%rip),%xmm14
+	paddd	.Lsse_inc(%rip),%xmm14
 	movdqa	%xmm4,%xmm7
 	movdqa	%xmm8,%xmm11
 	movdqa	%xmm13,%xmm15
 	movq	$10,%r10
-1:
+
+.Lopen_sse_128_rounds:
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol16(%rip),%xmm12
+	pshufb	.Lrol16(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -1900,7 +1901,7 @@
 	pxor	%xmm3,%xmm4
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol8(%rip),%xmm12
+	pshufb	.Lrol8(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -1912,7 +1913,7 @@
 .byte	102,69,15,58,15,228,12
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol16(%rip),%xmm13
+	pshufb	.Lrol16(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -1921,7 +1922,7 @@
 	pxor	%xmm3,%xmm5
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol8(%rip),%xmm13
+	pshufb	.Lrol8(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -1933,7 +1934,7 @@
 .byte	102,69,15,58,15,237,12
 	paddd	%xmm6,%xmm2
 	pxor	%xmm2,%xmm14
-	pshufb	.rol16(%rip),%xmm14
+	pshufb	.Lrol16(%rip),%xmm14
 	paddd	%xmm14,%xmm10
 	pxor	%xmm10,%xmm6
 	movdqa	%xmm6,%xmm3
@@ -1942,7 +1943,7 @@
 	pxor	%xmm3,%xmm6
 	paddd	%xmm6,%xmm2
 	pxor	%xmm2,%xmm14
-	pshufb	.rol8(%rip),%xmm14
+	pshufb	.Lrol8(%rip),%xmm14
 	paddd	%xmm14,%xmm10
 	pxor	%xmm10,%xmm6
 	movdqa	%xmm6,%xmm3
@@ -1954,7 +1955,7 @@
 .byte	102,69,15,58,15,246,12
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol16(%rip),%xmm12
+	pshufb	.Lrol16(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -1963,7 +1964,7 @@
 	pxor	%xmm3,%xmm4
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol8(%rip),%xmm12
+	pshufb	.Lrol8(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -1975,7 +1976,7 @@
 .byte	102,69,15,58,15,228,4
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol16(%rip),%xmm13
+	pshufb	.Lrol16(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -1984,7 +1985,7 @@
 	pxor	%xmm3,%xmm5
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol8(%rip),%xmm13
+	pshufb	.Lrol8(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -1996,7 +1997,7 @@
 .byte	102,69,15,58,15,237,4
 	paddd	%xmm6,%xmm2
 	pxor	%xmm2,%xmm14
-	pshufb	.rol16(%rip),%xmm14
+	pshufb	.Lrol16(%rip),%xmm14
 	paddd	%xmm14,%xmm10
 	pxor	%xmm10,%xmm6
 	movdqa	%xmm6,%xmm3
@@ -2005,7 +2006,7 @@
 	pxor	%xmm3,%xmm6
 	paddd	%xmm6,%xmm2
 	pxor	%xmm2,%xmm14
-	pshufb	.rol8(%rip),%xmm14
+	pshufb	.Lrol8(%rip),%xmm14
 	paddd	%xmm14,%xmm10
 	pxor	%xmm10,%xmm6
 	movdqa	%xmm6,%xmm3
@@ -2017,30 +2018,30 @@
 .byte	102,69,15,58,15,246,4
 
 	decq	%r10
-	jnz	1b
-	paddd	.chacha20_consts(%rip),%xmm0
-	paddd	.chacha20_consts(%rip),%xmm1
-	paddd	.chacha20_consts(%rip),%xmm2
+	jnz	.Lopen_sse_128_rounds
+	paddd	.Lchacha20_consts(%rip),%xmm0
+	paddd	.Lchacha20_consts(%rip),%xmm1
+	paddd	.Lchacha20_consts(%rip),%xmm2
 	paddd	%xmm7,%xmm4
 	paddd	%xmm7,%xmm5
 	paddd	%xmm7,%xmm6
 	paddd	%xmm11,%xmm9
 	paddd	%xmm11,%xmm10
 	paddd	%xmm15,%xmm13
-	paddd	.sse_inc(%rip),%xmm15
+	paddd	.Lsse_inc(%rip),%xmm15
 	paddd	%xmm15,%xmm14
 
-	pand	.clamp(%rip),%xmm0
-	movdqa	%xmm0,0(%rbp)
-	movdqa	%xmm4,16(%rbp)
+	pand	.Lclamp(%rip),%xmm0
+	movdqa	%xmm0,0+0(%rbp)
+	movdqa	%xmm4,0+16(%rbp)
 
 	movq	%r8,%r8
 	call	poly_hash_ad_internal
-1:
+.Lopen_sse_128_xor_hash:
 	cmpq	$16,%rbx
-	jb	open_sse_tail_16
+	jb	.Lopen_sse_tail_16
 	subq	$16,%rbx
-	addq	0(%rsi),%r10
+	addq	0+0(%rsi),%r10
 	adcq	8+0(%rsi),%r11
 	adcq	$1,%r12
 
@@ -2050,23 +2051,23 @@
 	movdqu	%xmm1,0(%rdi)
 	leaq	16(%rsi),%rsi
 	leaq	16(%rdi),%rdi
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -2082,9 +2083,8 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
@@ -2097,14 +2097,16 @@
 	movdqa	%xmm6,%xmm2
 	movdqa	%xmm10,%xmm6
 	movdqa	%xmm14,%xmm10
-	jmp	1b
-	jmp	open_sse_tail_16
+	jmp	.Lopen_sse_128_xor_hash
 .size	chacha20_poly1305_open, .-chacha20_poly1305_open
 .cfi_endproc	
 
 
 
 
+
+
+
 .globl	chacha20_poly1305_seal
 .hidden chacha20_poly1305_seal
 .type	chacha20_poly1305_seal,@function
@@ -2113,34 +2115,36 @@
 .cfi_startproc	
 	pushq	%rbp
 .cfi_adjust_cfa_offset	8
+.cfi_offset	%rbp,-16
 	pushq	%rbx
 .cfi_adjust_cfa_offset	8
+.cfi_offset	%rbx,-24
 	pushq	%r12
 .cfi_adjust_cfa_offset	8
+.cfi_offset	%r12,-32
 	pushq	%r13
 .cfi_adjust_cfa_offset	8
+.cfi_offset	%r13,-40
 	pushq	%r14
 .cfi_adjust_cfa_offset	8
+.cfi_offset	%r14,-48
 	pushq	%r15
 .cfi_adjust_cfa_offset	8
+.cfi_offset	%r15,-56
 
 
 	pushq	%r9
 .cfi_adjust_cfa_offset	8
-	subq	$288 + 32,%rsp
+.cfi_offset	%r9,-64
+	subq	$288 + 0 + 32,%rsp
 .cfi_adjust_cfa_offset	288 + 32
-.cfi_offset	rbp, -16
-.cfi_offset	rbx, -24
-.cfi_offset	r12, -32
-.cfi_offset	r13, -40
-.cfi_offset	r14, -48
-.cfi_offset	r15, -56
 	leaq	32(%rsp),%rbp
 	andq	$-32,%rbp
+
 	movq	56(%r9),%rbx
 	addq	%rdx,%rbx
-	movq	%rbx,8+32(%rbp)
-	movq	%r8,0+32(%rbp)
+	movq	%r8,0+0+32(%rbp)
+	movq	%rbx,8+0+32(%rbp)
 	movq	%rdx,%rbx
 
 	movl	OPENSSL_ia32cap_P+8(%rip),%eax
@@ -2149,12 +2153,13 @@
 	jz	chacha20_poly1305_seal_avx2
 
 	cmpq	$128,%rbx
-	jbe	seal_sse_128
+	jbe	.Lseal_sse_128
 
-	movdqa	.chacha20_consts(%rip),%xmm0
+	movdqa	.Lchacha20_consts(%rip),%xmm0
 	movdqu	0(%r9),%xmm4
 	movdqu	16(%r9),%xmm8
 	movdqu	32(%r9),%xmm12
+
 	movdqa	%xmm0,%xmm1
 	movdqa	%xmm0,%xmm2
 	movdqa	%xmm0,%xmm3
@@ -2165,22 +2170,22 @@
 	movdqa	%xmm8,%xmm10
 	movdqa	%xmm8,%xmm11
 	movdqa	%xmm12,%xmm15
-	paddd	.sse_inc(%rip),%xmm12
+	paddd	.Lsse_inc(%rip),%xmm12
 	movdqa	%xmm12,%xmm14
-	paddd	.sse_inc(%rip),%xmm12
+	paddd	.Lsse_inc(%rip),%xmm12
 	movdqa	%xmm12,%xmm13
-	paddd	.sse_inc(%rip),%xmm12
+	paddd	.Lsse_inc(%rip),%xmm12
 
-	movdqa	%xmm4,48(%rbp)
-	movdqa	%xmm8,64(%rbp)
-	movdqa	%xmm12,96(%rbp)
-	movdqa	%xmm13,112(%rbp)
-	movdqa	%xmm14,128(%rbp)
-	movdqa	%xmm15,144(%rbp)
+	movdqa	%xmm4,0+48(%rbp)
+	movdqa	%xmm8,0+64(%rbp)
+	movdqa	%xmm12,0+96(%rbp)
+	movdqa	%xmm13,0+112(%rbp)
+	movdqa	%xmm14,0+128(%rbp)
+	movdqa	%xmm15,0+144(%rbp)
 	movq	$10,%r10
-1:
-	movdqa	%xmm8,80(%rbp)
-	movdqa	.rol16(%rip),%xmm8
+.Lseal_sse_init_rounds:
+	movdqa	%xmm8,0+80(%rbp)
+	movdqa	.Lrol16(%rip),%xmm8
 	paddd	%xmm7,%xmm3
 	paddd	%xmm6,%xmm2
 	paddd	%xmm5,%xmm1
@@ -2193,7 +2198,7 @@
 .byte	102,69,15,56,0,240
 .byte	102,69,15,56,0,232
 .byte	102,69,15,56,0,224
-	movdqa	80(%rbp),%xmm8
+	movdqa	0+80(%rbp),%xmm8
 	paddd	%xmm15,%xmm11
 	paddd	%xmm14,%xmm10
 	paddd	%xmm13,%xmm9
@@ -2202,7 +2207,7 @@
 	pxor	%xmm10,%xmm6
 	pxor	%xmm9,%xmm5
 	pxor	%xmm8,%xmm4
-	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm8,0+80(%rbp)
 	movdqa	%xmm7,%xmm8
 	psrld	$20,%xmm8
 	pslld	$32-20,%xmm7
@@ -2219,7 +2224,7 @@
 	psrld	$20,%xmm8
 	pslld	$32-20,%xmm4
 	pxor	%xmm8,%xmm4
-	movdqa	.rol8(%rip),%xmm8
+	movdqa	.Lrol8(%rip),%xmm8
 	paddd	%xmm7,%xmm3
 	paddd	%xmm6,%xmm2
 	paddd	%xmm5,%xmm1
@@ -2232,7 +2237,7 @@
 .byte	102,69,15,56,0,240
 .byte	102,69,15,56,0,232
 .byte	102,69,15,56,0,224
-	movdqa	80(%rbp),%xmm8
+	movdqa	0+80(%rbp),%xmm8
 	paddd	%xmm15,%xmm11
 	paddd	%xmm14,%xmm10
 	paddd	%xmm13,%xmm9
@@ -2241,7 +2246,7 @@
 	pxor	%xmm10,%xmm6
 	pxor	%xmm9,%xmm5
 	pxor	%xmm8,%xmm4
-	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm8,0+80(%rbp)
 	movdqa	%xmm7,%xmm8
 	psrld	$25,%xmm8
 	pslld	$32-25,%xmm7
@@ -2258,7 +2263,7 @@
 	psrld	$25,%xmm8
 	pslld	$32-25,%xmm4
 	pxor	%xmm8,%xmm4
-	movdqa	80(%rbp),%xmm8
+	movdqa	0+80(%rbp),%xmm8
 .byte	102,15,58,15,255,4
 .byte	102,69,15,58,15,219,8
 .byte	102,69,15,58,15,255,12
@@ -2271,8 +2276,8 @@
 .byte	102,15,58,15,228,4
 .byte	102,69,15,58,15,192,8
 .byte	102,69,15,58,15,228,12
-	movdqa	%xmm8,80(%rbp)
-	movdqa	.rol16(%rip),%xmm8
+	movdqa	%xmm8,0+80(%rbp)
+	movdqa	.Lrol16(%rip),%xmm8
 	paddd	%xmm7,%xmm3
 	paddd	%xmm6,%xmm2
 	paddd	%xmm5,%xmm1
@@ -2285,7 +2290,7 @@
 .byte	102,69,15,56,0,240
 .byte	102,69,15,56,0,232
 .byte	102,69,15,56,0,224
-	movdqa	80(%rbp),%xmm8
+	movdqa	0+80(%rbp),%xmm8
 	paddd	%xmm15,%xmm11
 	paddd	%xmm14,%xmm10
 	paddd	%xmm13,%xmm9
@@ -2294,7 +2299,7 @@
 	pxor	%xmm10,%xmm6
 	pxor	%xmm9,%xmm5
 	pxor	%xmm8,%xmm4
-	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm8,0+80(%rbp)
 	movdqa	%xmm7,%xmm8
 	psrld	$20,%xmm8
 	pslld	$32-20,%xmm7
@@ -2311,7 +2316,7 @@
 	psrld	$20,%xmm8
 	pslld	$32-20,%xmm4
 	pxor	%xmm8,%xmm4
-	movdqa	.rol8(%rip),%xmm8
+	movdqa	.Lrol8(%rip),%xmm8
 	paddd	%xmm7,%xmm3
 	paddd	%xmm6,%xmm2
 	paddd	%xmm5,%xmm1
@@ -2324,7 +2329,7 @@
 .byte	102,69,15,56,0,240
 .byte	102,69,15,56,0,232
 .byte	102,69,15,56,0,224
-	movdqa	80(%rbp),%xmm8
+	movdqa	0+80(%rbp),%xmm8
 	paddd	%xmm15,%xmm11
 	paddd	%xmm14,%xmm10
 	paddd	%xmm13,%xmm9
@@ -2333,7 +2338,7 @@
 	pxor	%xmm10,%xmm6
 	pxor	%xmm9,%xmm5
 	pxor	%xmm8,%xmm4
-	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm8,0+80(%rbp)
 	movdqa	%xmm7,%xmm8
 	psrld	$25,%xmm8
 	pslld	$32-25,%xmm7
@@ -2350,7 +2355,7 @@
 	psrld	$25,%xmm8
 	pslld	$32-25,%xmm4
 	pxor	%xmm8,%xmm4
-	movdqa	80(%rbp),%xmm8
+	movdqa	0+80(%rbp),%xmm8
 .byte	102,15,58,15,255,12
 .byte	102,69,15,58,15,219,8
 .byte	102,69,15,58,15,255,4
@@ -2365,28 +2370,28 @@
 .byte	102,69,15,58,15,228,4
 
 	decq	%r10
-	jnz	1b
-	paddd	.chacha20_consts(%rip),%xmm3
-	paddd	48(%rbp),%xmm7
-	paddd	64(%rbp),%xmm11
-	paddd	144(%rbp),%xmm15
-	paddd	.chacha20_consts(%rip),%xmm2
-	paddd	48(%rbp),%xmm6
-	paddd	64(%rbp),%xmm10
-	paddd	128(%rbp),%xmm14
-	paddd	.chacha20_consts(%rip),%xmm1
-	paddd	48(%rbp),%xmm5
-	paddd	64(%rbp),%xmm9
-	paddd	112(%rbp),%xmm13
-	paddd	.chacha20_consts(%rip),%xmm0
-	paddd	48(%rbp),%xmm4
-	paddd	64(%rbp),%xmm8
-	paddd	96(%rbp),%xmm12
+	jnz	.Lseal_sse_init_rounds
+	paddd	.Lchacha20_consts(%rip),%xmm3
+	paddd	0+48(%rbp),%xmm7
+	paddd	0+64(%rbp),%xmm11
+	paddd	0+144(%rbp),%xmm15
+	paddd	.Lchacha20_consts(%rip),%xmm2
+	paddd	0+48(%rbp),%xmm6
+	paddd	0+64(%rbp),%xmm10
+	paddd	0+128(%rbp),%xmm14
+	paddd	.Lchacha20_consts(%rip),%xmm1
+	paddd	0+48(%rbp),%xmm5
+	paddd	0+64(%rbp),%xmm9
+	paddd	0+112(%rbp),%xmm13
+	paddd	.Lchacha20_consts(%rip),%xmm0
+	paddd	0+48(%rbp),%xmm4
+	paddd	0+64(%rbp),%xmm8
+	paddd	0+96(%rbp),%xmm12
 
 
-	pand	.clamp(%rip),%xmm3
-	movdqa	%xmm3,0(%rbp)
-	movdqa	%xmm7,16(%rbp)
+	pand	.Lclamp(%rip),%xmm3
+	movdqa	%xmm3,0+0(%rbp)
+	movdqa	%xmm7,0+16(%rbp)
 
 	movq	%r8,%r8
 	call	poly_hash_ad_internal
@@ -2416,12 +2421,12 @@
 	movdqu	%xmm15,48 + 64(%rdi)
 
 	cmpq	$192,%rbx
-	ja	1f
+	ja	.Lseal_sse_main_init
 	movq	$128,%rcx
 	subq	$128,%rbx
 	leaq	128(%rsi),%rsi
-	jmp	seal_sse_128_seal_hash
-1:
+	jmp	.Lseal_sse_128_tail_hash
+.Lseal_sse_main_init:
 	movdqu	0 + 128(%rsi),%xmm3
 	movdqu	16 + 128(%rsi),%xmm7
 	movdqu	32 + 128(%rsi),%xmm11
@@ -2441,16 +2446,16 @@
 	movq	$2,%rcx
 	movq	$8,%r8
 	cmpq	$64,%rbx
-	jbe	seal_sse_tail_64
+	jbe	.Lseal_sse_tail_64
 	cmpq	$128,%rbx
-	jbe	seal_sse_tail_128
+	jbe	.Lseal_sse_tail_128
 	cmpq	$192,%rbx
-	jbe	seal_sse_tail_192
+	jbe	.Lseal_sse_tail_192
 
-1:
-	movdqa	.chacha20_consts(%rip),%xmm0
-	movdqa	48(%rbp),%xmm4
-	movdqa	64(%rbp),%xmm8
+.Lseal_sse_main_loop:
+	movdqa	.Lchacha20_consts(%rip),%xmm0
+	movdqa	0+48(%rbp),%xmm4
+	movdqa	0+64(%rbp),%xmm8
 	movdqa	%xmm0,%xmm1
 	movdqa	%xmm4,%xmm5
 	movdqa	%xmm8,%xmm9
@@ -2460,22 +2465,23 @@
 	movdqa	%xmm0,%xmm3
 	movdqa	%xmm4,%xmm7
 	movdqa	%xmm8,%xmm11
-	movdqa	96(%rbp),%xmm15
-	paddd	.sse_inc(%rip),%xmm15
+	movdqa	0+96(%rbp),%xmm15
+	paddd	.Lsse_inc(%rip),%xmm15
 	movdqa	%xmm15,%xmm14
-	paddd	.sse_inc(%rip),%xmm14
+	paddd	.Lsse_inc(%rip),%xmm14
 	movdqa	%xmm14,%xmm13
-	paddd	.sse_inc(%rip),%xmm13
+	paddd	.Lsse_inc(%rip),%xmm13
 	movdqa	%xmm13,%xmm12
-	paddd	.sse_inc(%rip),%xmm12
-	movdqa	%xmm12,96(%rbp)
-	movdqa	%xmm13,112(%rbp)
-	movdqa	%xmm14,128(%rbp)
-	movdqa	%xmm15,144(%rbp)
+	paddd	.Lsse_inc(%rip),%xmm12
+	movdqa	%xmm12,0+96(%rbp)
+	movdqa	%xmm13,0+112(%rbp)
+	movdqa	%xmm14,0+128(%rbp)
+	movdqa	%xmm15,0+144(%rbp)
 
-2:
-	movdqa	%xmm8,80(%rbp)
-	movdqa	.rol16(%rip),%xmm8
+.align	32
+.Lseal_sse_main_rounds:
+	movdqa	%xmm8,0+80(%rbp)
+	movdqa	.Lrol16(%rip),%xmm8
 	paddd	%xmm7,%xmm3
 	paddd	%xmm6,%xmm2
 	paddd	%xmm5,%xmm1
@@ -2488,19 +2494,19 @@
 .byte	102,69,15,56,0,240
 .byte	102,69,15,56,0,232
 .byte	102,69,15,56,0,224
-	movdqa	80(%rbp),%xmm8
+	movdqa	0+80(%rbp),%xmm8
 	paddd	%xmm15,%xmm11
 	paddd	%xmm14,%xmm10
 	paddd	%xmm13,%xmm9
 	paddd	%xmm12,%xmm8
 	pxor	%xmm11,%xmm7
-	addq	0(%rdi),%r10
+	addq	0+0(%rdi),%r10
 	adcq	8+0(%rdi),%r11
 	adcq	$1,%r12
 	pxor	%xmm10,%xmm6
 	pxor	%xmm9,%xmm5
 	pxor	%xmm8,%xmm4
-	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm8,0+80(%rbp)
 	movdqa	%xmm7,%xmm8
 	psrld	$20,%xmm8
 	pslld	$32-20,%xmm7
@@ -2517,17 +2523,17 @@
 	psrld	$20,%xmm8
 	pslld	$32-20,%xmm4
 	pxor	%xmm8,%xmm4
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movdqa	.rol8(%rip),%xmm8
+	movdqa	.Lrol8(%rip),%xmm8
 	paddd	%xmm7,%xmm3
 	paddd	%xmm6,%xmm2
 	paddd	%xmm5,%xmm1
@@ -2540,26 +2546,26 @@
 .byte	102,69,15,56,0,240
 .byte	102,69,15,56,0,232
 .byte	102,69,15,56,0,224
-	movdqa	80(%rbp),%xmm8
+	movdqa	0+80(%rbp),%xmm8
 	paddd	%xmm15,%xmm11
 	paddd	%xmm14,%xmm10
 	paddd	%xmm13,%xmm9
 	paddd	%xmm12,%xmm8
 	pxor	%xmm11,%xmm7
 	pxor	%xmm10,%xmm6
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
 	pxor	%xmm9,%xmm5
 	pxor	%xmm8,%xmm4
-	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm8,0+80(%rbp)
 	movdqa	%xmm7,%xmm8
 	psrld	$25,%xmm8
 	pslld	$32-25,%xmm7
@@ -2576,7 +2582,7 @@
 	psrld	$25,%xmm8
 	pslld	$32-25,%xmm4
 	pxor	%xmm8,%xmm4
-	movdqa	80(%rbp),%xmm8
+	movdqa	0+80(%rbp),%xmm8
 	imulq	%r12,%r9
 	addq	%r10,%r15
 	adcq	%rdx,%r9
@@ -2592,8 +2598,8 @@
 .byte	102,15,58,15,228,4
 .byte	102,69,15,58,15,192,8
 .byte	102,69,15,58,15,228,12
-	movdqa	%xmm8,80(%rbp)
-	movdqa	.rol16(%rip),%xmm8
+	movdqa	%xmm8,0+80(%rbp)
+	movdqa	.Lrol16(%rip),%xmm8
 	paddd	%xmm7,%xmm3
 	paddd	%xmm6,%xmm2
 	paddd	%xmm5,%xmm1
@@ -2609,9 +2615,8 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
@@ -2621,7 +2626,7 @@
 .byte	102,69,15,56,0,240
 .byte	102,69,15,56,0,232
 .byte	102,69,15,56,0,224
-	movdqa	80(%rbp),%xmm8
+	movdqa	0+80(%rbp),%xmm8
 	paddd	%xmm15,%xmm11
 	paddd	%xmm14,%xmm10
 	paddd	%xmm13,%xmm9
@@ -2630,7 +2635,7 @@
 	pxor	%xmm10,%xmm6
 	pxor	%xmm9,%xmm5
 	pxor	%xmm8,%xmm4
-	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm8,0+80(%rbp)
 	movdqa	%xmm7,%xmm8
 	psrld	$20,%xmm8
 	pslld	$32-20,%xmm7
@@ -2647,7 +2652,7 @@
 	psrld	$20,%xmm8
 	pslld	$32-20,%xmm4
 	pxor	%xmm8,%xmm4
-	movdqa	.rol8(%rip),%xmm8
+	movdqa	.Lrol8(%rip),%xmm8
 	paddd	%xmm7,%xmm3
 	paddd	%xmm6,%xmm2
 	paddd	%xmm5,%xmm1
@@ -2660,7 +2665,7 @@
 .byte	102,69,15,56,0,240
 .byte	102,69,15,56,0,232
 .byte	102,69,15,56,0,224
-	movdqa	80(%rbp),%xmm8
+	movdqa	0+80(%rbp),%xmm8
 	paddd	%xmm15,%xmm11
 	paddd	%xmm14,%xmm10
 	paddd	%xmm13,%xmm9
@@ -2669,7 +2674,7 @@
 	pxor	%xmm10,%xmm6
 	pxor	%xmm9,%xmm5
 	pxor	%xmm8,%xmm4
-	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm8,0+80(%rbp)
 	movdqa	%xmm7,%xmm8
 	psrld	$25,%xmm8
 	pslld	$32-25,%xmm7
@@ -2686,7 +2691,7 @@
 	psrld	$25,%xmm8
 	pslld	$32-25,%xmm4
 	pxor	%xmm8,%xmm4
-	movdqa	80(%rbp),%xmm8
+	movdqa	0+80(%rbp),%xmm8
 .byte	102,15,58,15,255,12
 .byte	102,69,15,58,15,219,8
 .byte	102,69,15,58,15,255,4
@@ -2702,27 +2707,27 @@
 
 	leaq	16(%rdi),%rdi
 	decq	%r8
-	jge	2b
-	addq	0(%rdi),%r10
+	jge	.Lseal_sse_main_rounds
+	addq	0+0(%rdi),%r10
 	adcq	8+0(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -2738,35 +2743,34 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	16(%rdi),%rdi
 	decq	%rcx
-	jg	2b
-	paddd	.chacha20_consts(%rip),%xmm3
-	paddd	48(%rbp),%xmm7
-	paddd	64(%rbp),%xmm11
-	paddd	144(%rbp),%xmm15
-	paddd	.chacha20_consts(%rip),%xmm2
-	paddd	48(%rbp),%xmm6
-	paddd	64(%rbp),%xmm10
-	paddd	128(%rbp),%xmm14
-	paddd	.chacha20_consts(%rip),%xmm1
-	paddd	48(%rbp),%xmm5
-	paddd	64(%rbp),%xmm9
-	paddd	112(%rbp),%xmm13
-	paddd	.chacha20_consts(%rip),%xmm0
-	paddd	48(%rbp),%xmm4
-	paddd	64(%rbp),%xmm8
-	paddd	96(%rbp),%xmm12
+	jg	.Lseal_sse_main_rounds
+	paddd	.Lchacha20_consts(%rip),%xmm3
+	paddd	0+48(%rbp),%xmm7
+	paddd	0+64(%rbp),%xmm11
+	paddd	0+144(%rbp),%xmm15
+	paddd	.Lchacha20_consts(%rip),%xmm2
+	paddd	0+48(%rbp),%xmm6
+	paddd	0+64(%rbp),%xmm10
+	paddd	0+128(%rbp),%xmm14
+	paddd	.Lchacha20_consts(%rip),%xmm1
+	paddd	0+48(%rbp),%xmm5
+	paddd	0+64(%rbp),%xmm9
+	paddd	0+112(%rbp),%xmm13
+	paddd	.Lchacha20_consts(%rip),%xmm0
+	paddd	0+48(%rbp),%xmm4
+	paddd	0+64(%rbp),%xmm8
+	paddd	0+96(%rbp),%xmm12
 
-	movdqa	%xmm14,80(%rbp)
-	movdqa	%xmm14,80(%rbp)
+	movdqa	%xmm14,0+80(%rbp)
+	movdqa	%xmm14,0+80(%rbp)
 	movdqu	0 + 0(%rsi),%xmm14
 	pxor	%xmm3,%xmm14
 	movdqu	%xmm14,0 + 0(%rdi)
@@ -2780,7 +2784,7 @@
 	pxor	%xmm15,%xmm14
 	movdqu	%xmm14,48 + 0(%rdi)
 
-	movdqa	80(%rbp),%xmm14
+	movdqa	0+80(%rbp),%xmm14
 	movdqu	0 + 64(%rsi),%xmm3
 	movdqu	16 + 64(%rsi),%xmm7
 	movdqu	32 + 64(%rsi),%xmm11
@@ -2807,13 +2811,13 @@
 	movdqu	%xmm15,48 + 128(%rdi)
 
 	cmpq	$256,%rbx
-	ja	3f
+	ja	.Lseal_sse_main_loop_xor
 
 	movq	$192,%rcx
 	subq	$192,%rbx
 	leaq	192(%rsi),%rsi
-	jmp	seal_sse_128_seal_hash
-3:
+	jmp	.Lseal_sse_128_tail_hash
+.Lseal_sse_main_loop_xor:
 	movdqu	0 + 192(%rsi),%xmm3
 	movdqu	16 + 192(%rsi),%xmm7
 	movdqu	32 + 192(%rsi),%xmm11
@@ -2832,43 +2836,45 @@
 	movq	$6,%rcx
 	movq	$4,%r8
 	cmpq	$192,%rbx
-	jg	1b
+	jg	.Lseal_sse_main_loop
 	movq	%rbx,%rcx
 	testq	%rbx,%rbx
-	je	seal_sse_128_seal_hash
+	je	.Lseal_sse_128_tail_hash
 	movq	$6,%rcx
+	cmpq	$128,%rbx
+	ja	.Lseal_sse_tail_192
 	cmpq	$64,%rbx
-	jg	3f
+	ja	.Lseal_sse_tail_128
 
-seal_sse_tail_64:
-	movdqa	.chacha20_consts(%rip),%xmm0
-	movdqa	48(%rbp),%xmm4
-	movdqa	64(%rbp),%xmm8
-	movdqa	96(%rbp),%xmm12
-	paddd	.sse_inc(%rip),%xmm12
-	movdqa	%xmm12,96(%rbp)
+.Lseal_sse_tail_64:
+	movdqa	.Lchacha20_consts(%rip),%xmm0
+	movdqa	0+48(%rbp),%xmm4
+	movdqa	0+64(%rbp),%xmm8
+	movdqa	0+96(%rbp),%xmm12
+	paddd	.Lsse_inc(%rip),%xmm12
+	movdqa	%xmm12,0+96(%rbp)
 
-1:
-	addq	0(%rdi),%r10
+.Lseal_sse_tail_64_rounds_and_x2hash:
+	addq	0+0(%rdi),%r10
 	adcq	8+0(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -2884,18 +2890,17 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	16(%rdi),%rdi
-2:
+.Lseal_sse_tail_64_rounds_and_x1hash:
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol16(%rip),%xmm12
+	pshufb	.Lrol16(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -2904,7 +2909,7 @@
 	pxor	%xmm3,%xmm4
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol8(%rip),%xmm12
+	pshufb	.Lrol8(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -2916,7 +2921,7 @@
 .byte	102,69,15,58,15,228,12
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol16(%rip),%xmm12
+	pshufb	.Lrol16(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -2925,7 +2930,7 @@
 	pxor	%xmm3,%xmm4
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol8(%rip),%xmm12
+	pshufb	.Lrol8(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -2935,26 +2940,26 @@
 .byte	102,15,58,15,228,12
 .byte	102,69,15,58,15,192,8
 .byte	102,69,15,58,15,228,4
-	addq	0(%rdi),%r10
+	addq	0+0(%rdi),%r10
 	adcq	8+0(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -2970,63 +2975,59 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	16(%rdi),%rdi
 	decq	%rcx
-	jg	1b
+	jg	.Lseal_sse_tail_64_rounds_and_x2hash
 	decq	%r8
-	jge	2b
-	paddd	.chacha20_consts(%rip),%xmm0
-	paddd	48(%rbp),%xmm4
-	paddd	64(%rbp),%xmm8
-	paddd	96(%rbp),%xmm12
+	jge	.Lseal_sse_tail_64_rounds_and_x1hash
+	paddd	.Lchacha20_consts(%rip),%xmm0
+	paddd	0+48(%rbp),%xmm4
+	paddd	0+64(%rbp),%xmm8
+	paddd	0+96(%rbp),%xmm12
 
-	jmp	seal_sse_128_seal
-3:
-	cmpq	$128,%rbx
-	jg	3f
+	jmp	.Lseal_sse_128_tail_xor
 
-seal_sse_tail_128:
-	movdqa	.chacha20_consts(%rip),%xmm0
-	movdqa	48(%rbp),%xmm4
-	movdqa	64(%rbp),%xmm8
+.Lseal_sse_tail_128:
+	movdqa	.Lchacha20_consts(%rip),%xmm0
+	movdqa	0+48(%rbp),%xmm4
+	movdqa	0+64(%rbp),%xmm8
 	movdqa	%xmm0,%xmm1
 	movdqa	%xmm4,%xmm5
 	movdqa	%xmm8,%xmm9
-	movdqa	96(%rbp),%xmm13
-	paddd	.sse_inc(%rip),%xmm13
+	movdqa	0+96(%rbp),%xmm13
+	paddd	.Lsse_inc(%rip),%xmm13
 	movdqa	%xmm13,%xmm12
-	paddd	.sse_inc(%rip),%xmm12
-	movdqa	%xmm12,96(%rbp)
-	movdqa	%xmm13,112(%rbp)
+	paddd	.Lsse_inc(%rip),%xmm12
+	movdqa	%xmm12,0+96(%rbp)
+	movdqa	%xmm13,0+112(%rbp)
 
-1:
-	addq	0(%rdi),%r10
+.Lseal_sse_tail_128_rounds_and_x2hash:
+	addq	0+0(%rdi),%r10
 	adcq	8+0(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -3042,18 +3043,17 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	16(%rdi),%rdi
-2:
+.Lseal_sse_tail_128_rounds_and_x1hash:
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol16(%rip),%xmm12
+	pshufb	.Lrol16(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -3062,7 +3062,7 @@
 	pxor	%xmm3,%xmm4
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol8(%rip),%xmm12
+	pshufb	.Lrol8(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -3074,7 +3074,7 @@
 .byte	102,69,15,58,15,228,12
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol16(%rip),%xmm13
+	pshufb	.Lrol16(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -3083,7 +3083,7 @@
 	pxor	%xmm3,%xmm5
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol8(%rip),%xmm13
+	pshufb	.Lrol8(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -3093,26 +3093,26 @@
 .byte	102,15,58,15,237,4
 .byte	102,69,15,58,15,201,8
 .byte	102,69,15,58,15,237,12
-	addq	0(%rdi),%r10
+	addq	0+0(%rdi),%r10
 	adcq	8+0(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -3128,15 +3128,14 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol16(%rip),%xmm12
+	pshufb	.Lrol16(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -3145,7 +3144,7 @@
 	pxor	%xmm3,%xmm4
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol8(%rip),%xmm12
+	pshufb	.Lrol8(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -3157,7 +3156,7 @@
 .byte	102,69,15,58,15,228,4
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol16(%rip),%xmm13
+	pshufb	.Lrol16(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -3166,7 +3165,7 @@
 	pxor	%xmm3,%xmm5
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol8(%rip),%xmm13
+	pshufb	.Lrol8(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -3179,17 +3178,17 @@
 
 	leaq	16(%rdi),%rdi
 	decq	%rcx
-	jg	1b
+	jg	.Lseal_sse_tail_128_rounds_and_x2hash
 	decq	%r8
-	jge	2b
-	paddd	.chacha20_consts(%rip),%xmm1
-	paddd	48(%rbp),%xmm5
-	paddd	64(%rbp),%xmm9
-	paddd	112(%rbp),%xmm13
-	paddd	.chacha20_consts(%rip),%xmm0
-	paddd	48(%rbp),%xmm4
-	paddd	64(%rbp),%xmm8
-	paddd	96(%rbp),%xmm12
+	jge	.Lseal_sse_tail_128_rounds_and_x1hash
+	paddd	.Lchacha20_consts(%rip),%xmm1
+	paddd	0+48(%rbp),%xmm5
+	paddd	0+64(%rbp),%xmm9
+	paddd	0+112(%rbp),%xmm13
+	paddd	.Lchacha20_consts(%rip),%xmm0
+	paddd	0+48(%rbp),%xmm4
+	paddd	0+64(%rbp),%xmm8
+	paddd	0+96(%rbp),%xmm12
 	movdqu	0 + 0(%rsi),%xmm3
 	movdqu	16 + 0(%rsi),%xmm7
 	movdqu	32 + 0(%rsi),%xmm11
@@ -3206,50 +3205,49 @@
 	movq	$64,%rcx
 	subq	$64,%rbx
 	leaq	64(%rsi),%rsi
-	jmp	seal_sse_128_seal_hash
-3:
+	jmp	.Lseal_sse_128_tail_hash
 
-seal_sse_tail_192:
-	movdqa	.chacha20_consts(%rip),%xmm0
-	movdqa	48(%rbp),%xmm4
-	movdqa	64(%rbp),%xmm8
+.Lseal_sse_tail_192:
+	movdqa	.Lchacha20_consts(%rip),%xmm0
+	movdqa	0+48(%rbp),%xmm4
+	movdqa	0+64(%rbp),%xmm8
 	movdqa	%xmm0,%xmm1
 	movdqa	%xmm4,%xmm5
 	movdqa	%xmm8,%xmm9
 	movdqa	%xmm0,%xmm2
 	movdqa	%xmm4,%xmm6
 	movdqa	%xmm8,%xmm10
-	movdqa	96(%rbp),%xmm14
-	paddd	.sse_inc(%rip),%xmm14
+	movdqa	0+96(%rbp),%xmm14
+	paddd	.Lsse_inc(%rip),%xmm14
 	movdqa	%xmm14,%xmm13
-	paddd	.sse_inc(%rip),%xmm13
+	paddd	.Lsse_inc(%rip),%xmm13
 	movdqa	%xmm13,%xmm12
-	paddd	.sse_inc(%rip),%xmm12
-	movdqa	%xmm12,96(%rbp)
-	movdqa	%xmm13,112(%rbp)
-	movdqa	%xmm14,128(%rbp)
+	paddd	.Lsse_inc(%rip),%xmm12
+	movdqa	%xmm12,0+96(%rbp)
+	movdqa	%xmm13,0+112(%rbp)
+	movdqa	%xmm14,0+128(%rbp)
 
-1:
-	addq	0(%rdi),%r10
+.Lseal_sse_tail_192_rounds_and_x2hash:
+	addq	0+0(%rdi),%r10
 	adcq	8+0(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -3265,18 +3263,17 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	16(%rdi),%rdi
-2:
+.Lseal_sse_tail_192_rounds_and_x1hash:
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol16(%rip),%xmm12
+	pshufb	.Lrol16(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -3285,7 +3282,7 @@
 	pxor	%xmm3,%xmm4
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol8(%rip),%xmm12
+	pshufb	.Lrol8(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -3297,7 +3294,7 @@
 .byte	102,69,15,58,15,228,12
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol16(%rip),%xmm13
+	pshufb	.Lrol16(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -3306,7 +3303,7 @@
 	pxor	%xmm3,%xmm5
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol8(%rip),%xmm13
+	pshufb	.Lrol8(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -3318,7 +3315,7 @@
 .byte	102,69,15,58,15,237,12
 	paddd	%xmm6,%xmm2
 	pxor	%xmm2,%xmm14
-	pshufb	.rol16(%rip),%xmm14
+	pshufb	.Lrol16(%rip),%xmm14
 	paddd	%xmm14,%xmm10
 	pxor	%xmm10,%xmm6
 	movdqa	%xmm6,%xmm3
@@ -3327,7 +3324,7 @@
 	pxor	%xmm3,%xmm6
 	paddd	%xmm6,%xmm2
 	pxor	%xmm2,%xmm14
-	pshufb	.rol8(%rip),%xmm14
+	pshufb	.Lrol8(%rip),%xmm14
 	paddd	%xmm14,%xmm10
 	pxor	%xmm10,%xmm6
 	movdqa	%xmm6,%xmm3
@@ -3337,26 +3334,26 @@
 .byte	102,15,58,15,246,4
 .byte	102,69,15,58,15,210,8
 .byte	102,69,15,58,15,246,12
-	addq	0(%rdi),%r10
+	addq	0+0(%rdi),%r10
 	adcq	8+0(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -3372,15 +3369,14 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol16(%rip),%xmm12
+	pshufb	.Lrol16(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -3389,7 +3385,7 @@
 	pxor	%xmm3,%xmm4
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol8(%rip),%xmm12
+	pshufb	.Lrol8(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -3401,7 +3397,7 @@
 .byte	102,69,15,58,15,228,4
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol16(%rip),%xmm13
+	pshufb	.Lrol16(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -3410,7 +3406,7 @@
 	pxor	%xmm3,%xmm5
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol8(%rip),%xmm13
+	pshufb	.Lrol8(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -3422,7 +3418,7 @@
 .byte	102,69,15,58,15,237,4
 	paddd	%xmm6,%xmm2
 	pxor	%xmm2,%xmm14
-	pshufb	.rol16(%rip),%xmm14
+	pshufb	.Lrol16(%rip),%xmm14
 	paddd	%xmm14,%xmm10
 	pxor	%xmm10,%xmm6
 	movdqa	%xmm6,%xmm3
@@ -3431,7 +3427,7 @@
 	pxor	%xmm3,%xmm6
 	paddd	%xmm6,%xmm2
 	pxor	%xmm2,%xmm14
-	pshufb	.rol8(%rip),%xmm14
+	pshufb	.Lrol8(%rip),%xmm14
 	paddd	%xmm14,%xmm10
 	pxor	%xmm10,%xmm6
 	movdqa	%xmm6,%xmm3
@@ -3444,21 +3440,21 @@
 
 	leaq	16(%rdi),%rdi
 	decq	%rcx
-	jg	1b
+	jg	.Lseal_sse_tail_192_rounds_and_x2hash
 	decq	%r8
-	jge	2b
-	paddd	.chacha20_consts(%rip),%xmm2
-	paddd	48(%rbp),%xmm6
-	paddd	64(%rbp),%xmm10
-	paddd	128(%rbp),%xmm14
-	paddd	.chacha20_consts(%rip),%xmm1
-	paddd	48(%rbp),%xmm5
-	paddd	64(%rbp),%xmm9
-	paddd	112(%rbp),%xmm13
-	paddd	.chacha20_consts(%rip),%xmm0
-	paddd	48(%rbp),%xmm4
-	paddd	64(%rbp),%xmm8
-	paddd	96(%rbp),%xmm12
+	jge	.Lseal_sse_tail_192_rounds_and_x1hash
+	paddd	.Lchacha20_consts(%rip),%xmm2
+	paddd	0+48(%rbp),%xmm6
+	paddd	0+64(%rbp),%xmm10
+	paddd	0+128(%rbp),%xmm14
+	paddd	.Lchacha20_consts(%rip),%xmm1
+	paddd	0+48(%rbp),%xmm5
+	paddd	0+64(%rbp),%xmm9
+	paddd	0+112(%rbp),%xmm13
+	paddd	.Lchacha20_consts(%rip),%xmm0
+	paddd	0+48(%rbp),%xmm4
+	paddd	0+64(%rbp),%xmm8
+	paddd	0+96(%rbp),%xmm12
 	movdqu	0 + 0(%rsi),%xmm3
 	movdqu	16 + 0(%rsi),%xmm7
 	movdqu	32 + 0(%rsi),%xmm11
@@ -3488,29 +3484,29 @@
 	subq	$128,%rbx
 	leaq	128(%rsi),%rsi
 
-seal_sse_128_seal_hash:
+.Lseal_sse_128_tail_hash:
 	cmpq	$16,%rcx
-	jb	seal_sse_128_seal
-	addq	0(%rdi),%r10
+	jb	.Lseal_sse_128_tail_xor
+	addq	0+0(%rdi),%r10
 	adcq	8+0(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -3526,20 +3522,19 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	subq	$16,%rcx
 	leaq	16(%rdi),%rdi
-	jmp	seal_sse_128_seal_hash
+	jmp	.Lseal_sse_128_tail_hash
 
-seal_sse_128_seal:
+.Lseal_sse_128_tail_xor:
 	cmpq	$16,%rbx
-	jb	seal_sse_tail_16
+	jb	.Lseal_sse_tail_16
 	subq	$16,%rbx
 
 	movdqu	0(%rsi),%xmm3
@@ -3551,23 +3546,23 @@
 	adcq	$1,%r12
 	leaq	16(%rsi),%rsi
 	leaq	16(%rdi),%rdi
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -3583,9 +3578,8 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
@@ -3598,22 +3592,22 @@
 	movdqa	%xmm5,%xmm1
 	movdqa	%xmm9,%xmm5
 	movdqa	%xmm13,%xmm9
-	jmp	seal_sse_128_seal
+	jmp	.Lseal_sse_128_tail_xor
 
-seal_sse_tail_16:
+.Lseal_sse_tail_16:
 	testq	%rbx,%rbx
-	jz	process_blocks_of_extra_in
+	jz	.Lprocess_blocks_of_extra_in
 
 	movq	%rbx,%r8
 	movq	%rbx,%rcx
-	leaq	-1(%rsi,%rbx), %rsi
+	leaq	-1(%rsi,%rbx,1),%rsi
 	pxor	%xmm15,%xmm15
-1:
+.Lseal_sse_tail_16_compose:
 	pslldq	$1,%xmm15
 	pinsrb	$0,(%rsi),%xmm15
 	leaq	-1(%rsi),%rsi
 	decq	%rcx
-	jne	1b
+	jne	.Lseal_sse_tail_16_compose
 
 
 	pxor	%xmm0,%xmm15
@@ -3621,12 +3615,12 @@
 
 	movq	%rbx,%rcx
 	movdqu	%xmm15,%xmm0
-2:
+.Lseal_sse_tail_16_extract:
 	pextrb	$0,%xmm0,(%rdi)
 	psrldq	$1,%xmm0
 	addq	$1,%rdi
 	subq	$1,%rcx
-	jnz	2b
+	jnz	.Lseal_sse_tail_16_extract
 
 
 
@@ -3635,23 +3629,23 @@
 
 
 
-	movq	288+32(%rsp),%r9
+	movq	288 + 0 + 32(%rsp),%r9
 	movq	56(%r9),%r14
 	movq	48(%r9),%r13
 	testq	%r14,%r14
-	jz	process_partial_block
+	jz	.Lprocess_partial_block
 
 	movq	$16,%r15
 	subq	%rbx,%r15
 	cmpq	%r15,%r14
 
-	jge	load_extra_in
+	jge	.Lload_extra_in
 	movq	%r14,%r15
 
-load_extra_in:
+.Lload_extra_in:
 
 
-	leaq	-1(%r13,%r15), %rsi
+	leaq	-1(%r13,%r15,1),%rsi
 
 
 	addq	%r15,%r13
@@ -3665,29 +3659,29 @@
 
 
 	pxor	%xmm11,%xmm11
-3:
+.Lload_extra_load_loop:
 	pslldq	$1,%xmm11
 	pinsrb	$0,(%rsi),%xmm11
 	leaq	-1(%rsi),%rsi
 	subq	$1,%r15
-	jnz	3b
+	jnz	.Lload_extra_load_loop
 
 
 
 
 	movq	%rbx,%r15
 
-4:
+.Lload_extra_shift_loop:
 	pslldq	$1,%xmm11
 	subq	$1,%r15
-	jnz	4b
+	jnz	.Lload_extra_shift_loop
 
 
 
 
-	leaq	.and_masks(%rip),%r15
+	leaq	.Land_masks(%rip),%r15
 	shlq	$4,%rbx
-	pand	-16(%r15,%rbx), %xmm15
+	pand	-16(%r15,%rbx,1),%xmm15
 
 
 	por	%xmm11,%xmm15
@@ -3699,23 +3693,23 @@
 	addq	%r13,%r10
 	adcq	%r14,%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -3731,44 +3725,43 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 
-process_blocks_of_extra_in:
+.Lprocess_blocks_of_extra_in:
 
-	movq	288+32(%rsp),%r9
+	movq	288+32+0 (%rsp),%r9
 	movq	48(%r9),%rsi
 	movq	56(%r9),%r8
 	movq	%r8,%rcx
 	shrq	$4,%r8
 
-5:
+.Lprocess_extra_hash_loop:
 	jz	process_extra_in_trailer
-	addq	0(%rsi),%r10
+	addq	0+0(%rsi),%r10
 	adcq	8+0(%rsi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -3784,57 +3777,55 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	16(%rsi),%rsi
 	subq	$1,%r8
-	jmp	5b
-
+	jmp	.Lprocess_extra_hash_loop
 process_extra_in_trailer:
 	andq	$15,%rcx
 	movq	%rcx,%rbx
-	jz	do_length_block
-	leaq	-1(%rsi,%rcx), %rsi
+	jz	.Ldo_length_block
+	leaq	-1(%rsi,%rcx,1),%rsi
 
-6:
+.Lprocess_extra_in_trailer_load:
 	pslldq	$1,%xmm15
 	pinsrb	$0,(%rsi),%xmm15
 	leaq	-1(%rsi),%rsi
 	subq	$1,%rcx
-	jnz	6b
+	jnz	.Lprocess_extra_in_trailer_load
 
-process_partial_block:
+.Lprocess_partial_block:
 
-	leaq	.and_masks(%rip),%r15
+	leaq	.Land_masks(%rip),%r15
 	shlq	$4,%rbx
-	pand	-16(%r15,%rbx), %xmm15
+	pand	-16(%r15,%rbx,1),%xmm15
 .byte	102,77,15,126,253
 	pextrq	$1,%xmm15,%r14
 	addq	%r13,%r10
 	adcq	%r14,%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -3850,35 +3841,34 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 
-do_length_block:
-	addq	32(%rbp),%r10
-	adcq	8+32(%rbp),%r11
+.Ldo_length_block:
+	addq	0+0+32(%rbp),%r10
+	adcq	8+0+32(%rbp),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -3894,9 +3884,8 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
@@ -3912,33 +3901,41 @@
 	cmovcq	%r14,%r11
 	cmovcq	%r15,%r12
 
-	addq	0+16(%rbp),%r10
-	adcq	8+16(%rbp),%r11
+	addq	0+0+16(%rbp),%r10
+	adcq	8+0+16(%rbp),%r11
 
-	addq	$288 + 32,%rsp
+.cfi_remember_state	
+	addq	$288 + 0 + 32,%rsp
 .cfi_adjust_cfa_offset	-(288 + 32)
+
 	popq	%r9
 .cfi_adjust_cfa_offset	-8
-	movq	%r10,0(%r9)
+.cfi_restore	%r9
+	movq	%r10,(%r9)
 	movq	%r11,8(%r9)
-
 	popq	%r15
 .cfi_adjust_cfa_offset	-8
+.cfi_restore	%r15
 	popq	%r14
 .cfi_adjust_cfa_offset	-8
+.cfi_restore	%r14
 	popq	%r13
 .cfi_adjust_cfa_offset	-8
+.cfi_restore	%r13
 	popq	%r12
 .cfi_adjust_cfa_offset	-8
+.cfi_restore	%r12
 	popq	%rbx
 .cfi_adjust_cfa_offset	-8
+.cfi_restore	%rbx
 	popq	%rbp
 .cfi_adjust_cfa_offset	-8
+.cfi_restore	%rbp
 	.byte	0xf3,0xc3
-.cfi_adjust_cfa_offset	(8 * 7) + 288 + 32
 
-seal_sse_128:
-	movdqu	.chacha20_consts(%rip),%xmm0
+.Lseal_sse_128:
+.cfi_restore_state	
+	movdqu	.Lchacha20_consts(%rip),%xmm0
 	movdqa	%xmm0,%xmm1
 	movdqa	%xmm0,%xmm2
 	movdqu	0(%r9),%xmm4
@@ -3949,17 +3946,18 @@
 	movdqa	%xmm8,%xmm10
 	movdqu	32(%r9),%xmm14
 	movdqa	%xmm14,%xmm12
-	paddd	.sse_inc(%rip),%xmm12
+	paddd	.Lsse_inc(%rip),%xmm12
 	movdqa	%xmm12,%xmm13
-	paddd	.sse_inc(%rip),%xmm13
+	paddd	.Lsse_inc(%rip),%xmm13
 	movdqa	%xmm4,%xmm7
 	movdqa	%xmm8,%xmm11
 	movdqa	%xmm12,%xmm15
 	movq	$10,%r10
-1:
+
+.Lseal_sse_128_rounds:
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol16(%rip),%xmm12
+	pshufb	.Lrol16(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -3968,7 +3966,7 @@
 	pxor	%xmm3,%xmm4
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol8(%rip),%xmm12
+	pshufb	.Lrol8(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -3980,7 +3978,7 @@
 .byte	102,69,15,58,15,228,12
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol16(%rip),%xmm13
+	pshufb	.Lrol16(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -3989,7 +3987,7 @@
 	pxor	%xmm3,%xmm5
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol8(%rip),%xmm13
+	pshufb	.Lrol8(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -4001,7 +3999,7 @@
 .byte	102,69,15,58,15,237,12
 	paddd	%xmm6,%xmm2
 	pxor	%xmm2,%xmm14
-	pshufb	.rol16(%rip),%xmm14
+	pshufb	.Lrol16(%rip),%xmm14
 	paddd	%xmm14,%xmm10
 	pxor	%xmm10,%xmm6
 	movdqa	%xmm6,%xmm3
@@ -4010,7 +4008,7 @@
 	pxor	%xmm3,%xmm6
 	paddd	%xmm6,%xmm2
 	pxor	%xmm2,%xmm14
-	pshufb	.rol8(%rip),%xmm14
+	pshufb	.Lrol8(%rip),%xmm14
 	paddd	%xmm14,%xmm10
 	pxor	%xmm10,%xmm6
 	movdqa	%xmm6,%xmm3
@@ -4022,7 +4020,7 @@
 .byte	102,69,15,58,15,246,12
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol16(%rip),%xmm12
+	pshufb	.Lrol16(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -4031,7 +4029,7 @@
 	pxor	%xmm3,%xmm4
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol8(%rip),%xmm12
+	pshufb	.Lrol8(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -4043,7 +4041,7 @@
 .byte	102,69,15,58,15,228,4
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol16(%rip),%xmm13
+	pshufb	.Lrol16(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -4052,7 +4050,7 @@
 	pxor	%xmm3,%xmm5
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol8(%rip),%xmm13
+	pshufb	.Lrol8(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -4064,7 +4062,7 @@
 .byte	102,69,15,58,15,237,4
 	paddd	%xmm6,%xmm2
 	pxor	%xmm2,%xmm14
-	pshufb	.rol16(%rip),%xmm14
+	pshufb	.Lrol16(%rip),%xmm14
 	paddd	%xmm14,%xmm10
 	pxor	%xmm10,%xmm6
 	movdqa	%xmm6,%xmm3
@@ -4073,7 +4071,7 @@
 	pxor	%xmm3,%xmm6
 	paddd	%xmm6,%xmm2
 	pxor	%xmm2,%xmm14
-	pshufb	.rol8(%rip),%xmm14
+	pshufb	.Lrol8(%rip),%xmm14
 	paddd	%xmm14,%xmm10
 	pxor	%xmm10,%xmm6
 	movdqa	%xmm6,%xmm3
@@ -4085,51 +4083,71 @@
 .byte	102,69,15,58,15,246,4
 
 	decq	%r10
-	jnz	1b
-	paddd	.chacha20_consts(%rip),%xmm0
-	paddd	.chacha20_consts(%rip),%xmm1
-	paddd	.chacha20_consts(%rip),%xmm2
+	jnz	.Lseal_sse_128_rounds
+	paddd	.Lchacha20_consts(%rip),%xmm0
+	paddd	.Lchacha20_consts(%rip),%xmm1
+	paddd	.Lchacha20_consts(%rip),%xmm2
 	paddd	%xmm7,%xmm4
 	paddd	%xmm7,%xmm5
 	paddd	%xmm7,%xmm6
 	paddd	%xmm11,%xmm8
 	paddd	%xmm11,%xmm9
 	paddd	%xmm15,%xmm12
-	paddd	.sse_inc(%rip),%xmm15
+	paddd	.Lsse_inc(%rip),%xmm15
 	paddd	%xmm15,%xmm13
 
-	pand	.clamp(%rip),%xmm2
-	movdqa	%xmm2,0(%rbp)
-	movdqa	%xmm6,16(%rbp)
+	pand	.Lclamp(%rip),%xmm2
+	movdqa	%xmm2,0+0(%rbp)
+	movdqa	%xmm6,0+16(%rbp)
 
 	movq	%r8,%r8
 	call	poly_hash_ad_internal
-	jmp	seal_sse_128_seal
+	jmp	.Lseal_sse_128_tail_xor
 .size	chacha20_poly1305_seal, .-chacha20_poly1305_seal
+.cfi_endproc	
 
 
 .type	chacha20_poly1305_open_avx2,@function
 .align	64
 chacha20_poly1305_open_avx2:
+.cfi_startproc	
+
+
+.cfi_adjust_cfa_offset	8
+.cfi_offset	%rbp,-16
+.cfi_adjust_cfa_offset	8
+.cfi_offset	%rbx,-24
+.cfi_adjust_cfa_offset	8
+.cfi_offset	%r12,-32
+.cfi_adjust_cfa_offset	8
+.cfi_offset	%r13,-40
+.cfi_adjust_cfa_offset	8
+.cfi_offset	%r14,-48
+.cfi_adjust_cfa_offset	8
+.cfi_offset	%r15,-56
+.cfi_adjust_cfa_offset	8
+.cfi_offset	%r9,-64
+.cfi_adjust_cfa_offset	288 + 32
+
 	vzeroupper
-	vmovdqa	.chacha20_consts(%rip),%ymm0
+	vmovdqa	.Lchacha20_consts(%rip),%ymm0
 	vbroadcasti128	0(%r9),%ymm4
 	vbroadcasti128	16(%r9),%ymm8
 	vbroadcasti128	32(%r9),%ymm12
-	vpaddd	.avx2_init(%rip),%ymm12,%ymm12
+	vpaddd	.Lavx2_init(%rip),%ymm12,%ymm12
 	cmpq	$192,%rbx
-	jbe	open_avx2_192
+	jbe	.Lopen_avx2_192
 	cmpq	$320,%rbx
-	jbe	open_avx2_320
+	jbe	.Lopen_avx2_320
 
-	vmovdqa	%ymm4,64(%rbp)
-	vmovdqa	%ymm8,96(%rbp)
-	vmovdqa	%ymm12,160(%rbp)
+	vmovdqa	%ymm4,0+64(%rbp)
+	vmovdqa	%ymm8,0+96(%rbp)
+	vmovdqa	%ymm12,0+160(%rbp)
 	movq	$10,%r10
-1:
+.Lopen_avx2_init_rounds:
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpsrld	$20,%ymm4,%ymm3
@@ -4137,7 +4155,7 @@
 	vpxor	%ymm3,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpslld	$7,%ymm4,%ymm3
@@ -4148,7 +4166,7 @@
 	vpalignr	$4,%ymm4,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpsrld	$20,%ymm4,%ymm3
@@ -4156,7 +4174,7 @@
 	vpxor	%ymm3,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpslld	$7,%ymm4,%ymm3
@@ -4167,45 +4185,45 @@
 	vpalignr	$12,%ymm4,%ymm4,%ymm4
 
 	decq	%r10
-	jne	1b
-	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
-	vpaddd	64(%rbp),%ymm4,%ymm4
-	vpaddd	96(%rbp),%ymm8,%ymm8
-	vpaddd	160(%rbp),%ymm12,%ymm12
+	jne	.Lopen_avx2_init_rounds
+	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	0+64(%rbp),%ymm4,%ymm4
+	vpaddd	0+96(%rbp),%ymm8,%ymm8
+	vpaddd	0+160(%rbp),%ymm12,%ymm12
 
 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
 
-	vpand	.clamp(%rip),%ymm3,%ymm3
-	vmovdqa	%ymm3,0(%rbp)
+	vpand	.Lclamp(%rip),%ymm3,%ymm3
+	vmovdqa	%ymm3,0+0(%rbp)
 
 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
 
 	movq	%r8,%r8
 	call	poly_hash_ad_internal
-	xorq	%rcx,%rcx
 
-1:
-	addq	0(%rsi,%rcx), %r10
-	adcq	8+0(%rsi,%rcx), %r11
+	xorq	%rcx,%rcx
+.Lopen_avx2_init_hash:
+	addq	0+0(%rsi,%rcx,1),%r10
+	adcq	8+0(%rsi,%rcx,1),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -4221,31 +4239,31 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	addq	$16,%rcx
 	cmpq	$64,%rcx
-	jne	1b
+	jne	.Lopen_avx2_init_hash
 
 	vpxor	0(%rsi),%ymm0,%ymm0
 	vpxor	32(%rsi),%ymm4,%ymm4
+
 	vmovdqu	%ymm0,0(%rdi)
 	vmovdqu	%ymm4,32(%rdi)
 	leaq	64(%rsi),%rsi
 	leaq	64(%rdi),%rdi
 	subq	$64,%rbx
-1:
+.Lopen_avx2_main_loop:
 
 	cmpq	$512,%rbx
-	jb	3f
-	vmovdqa	.chacha20_consts(%rip),%ymm0
-	vmovdqa	64(%rbp),%ymm4
-	vmovdqa	96(%rbp),%ymm8
+	jb	.Lopen_avx2_main_loop_done
+	vmovdqa	.Lchacha20_consts(%rip),%ymm0
+	vmovdqa	0+64(%rbp),%ymm4
+	vmovdqa	0+96(%rbp),%ymm8
 	vmovdqa	%ymm0,%ymm1
 	vmovdqa	%ymm4,%ymm5
 	vmovdqa	%ymm8,%ymm9
@@ -4255,23 +4273,23 @@
 	vmovdqa	%ymm0,%ymm3
 	vmovdqa	%ymm4,%ymm7
 	vmovdqa	%ymm8,%ymm11
-	vmovdqa	.avx2_inc(%rip),%ymm12
-	vpaddd	160(%rbp),%ymm12,%ymm15
+	vmovdqa	.Lavx2_inc(%rip),%ymm12
+	vpaddd	0+160(%rbp),%ymm12,%ymm15
 	vpaddd	%ymm15,%ymm12,%ymm14
 	vpaddd	%ymm14,%ymm12,%ymm13
 	vpaddd	%ymm13,%ymm12,%ymm12
-	vmovdqa	%ymm15,256(%rbp)
-	vmovdqa	%ymm14,224(%rbp)
-	vmovdqa	%ymm13,192(%rbp)
-	vmovdqa	%ymm12,160(%rbp)
+	vmovdqa	%ymm15,0+256(%rbp)
+	vmovdqa	%ymm14,0+224(%rbp)
+	vmovdqa	%ymm13,0+192(%rbp)
+	vmovdqa	%ymm12,0+160(%rbp)
 
 	xorq	%rcx,%rcx
-2:
-	addq	0*8(%rsi,%rcx), %r10
-	adcq	8+0*8(%rsi,%rcx), %r11
+.Lopen_avx2_main_loop_rounds:
+	addq	0+0(%rsi,%rcx,1),%r10
+	adcq	8+0(%rsi,%rcx,1),%r11
 	adcq	$1,%r12
-	vmovdqa	%ymm8,128(%rbp)
-	vmovdqa	.rol16(%rip),%ymm8
+	vmovdqa	%ymm8,0+128(%rbp)
+	vmovdqa	.Lrol16(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
@@ -4280,7 +4298,7 @@
 	vpxor	%ymm2,%ymm14,%ymm14
 	vpxor	%ymm1,%ymm13,%ymm13
 	vpxor	%ymm0,%ymm12,%ymm12
-	movq	0+0(%rbp),%rdx
+	movq	0+0+0(%rbp),%rdx
 	movq	%rdx,%r15
 	mulxq	%r10,%r13,%r14
 	mulxq	%r11,%rax,%rdx
@@ -4291,23 +4309,22 @@
 	vpshufb	%ymm8,%ymm14,%ymm14
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
-	movq	8+0(%rbp),%rdx
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	movq	8+0+0(%rbp),%rdx
 	mulxq	%r10,%r10,%rax
 	addq	%r10,%r14
 	mulxq	%r11,%r11,%r9
 	adcq	%r11,%r15
 	adcq	$0,%r9
 	imulq	%r12,%rdx
-	vpxor	%ymm11,%ymm7,%ymm7
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
 	vpsrld	$20,%ymm7,%ymm8
 	vpslld	$32-20,%ymm7,%ymm7
 	vpxor	%ymm8,%ymm7,%ymm7
@@ -4315,18 +4332,19 @@
 	vpslld	$32-20,%ymm6,%ymm6
 	vpxor	%ymm8,%ymm6,%ymm6
 	vpsrld	$20,%ymm5,%ymm8
+	vpslld	$32-20,%ymm5,%ymm5
 	addq	%rax,%r15
 	adcq	%rdx,%r9
-	vpslld	$32-20,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm5,%ymm5
 	vpsrld	$20,%ymm4,%ymm8
 	vpslld	$32-20,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	.rol8(%rip),%ymm8
+	vmovdqa	.Lrol8(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
 	movq	%r13,%r10
 	movq	%r14,%r11
 	movq	%r15,%r12
@@ -4336,13 +4354,11 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
-	vpxor	%ymm3,%ymm15,%ymm15
 	vpxor	%ymm2,%ymm14,%ymm14
 	vpxor	%ymm1,%ymm13,%ymm13
 	vpxor	%ymm0,%ymm12,%ymm12
@@ -4350,27 +4366,26 @@
 	vpshufb	%ymm8,%ymm14,%ymm14
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
-	addq	2*8(%rsi,%rcx), %r10
-	adcq	8+2*8(%rsi,%rcx), %r11
-	adcq	$1,%r12
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
+	addq	0+16(%rsi,%rcx,1),%r10
+	adcq	8+16(%rsi,%rcx,1),%r11
+	adcq	$1,%r12
 	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
 	vpxor	%ymm11,%ymm7,%ymm7
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	movq	0+0(%rbp),%rdx
+	vmovdqa	%ymm8,0+128(%rbp)
+	vpsrld	$25,%ymm7,%ymm8
+	movq	0+0+0(%rbp),%rdx
 	movq	%rdx,%r15
 	mulxq	%r10,%r13,%r14
 	mulxq	%r11,%rax,%rdx
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	vmovdqa	%ymm8,128(%rbp)
-	vpsrld	$25,%ymm7,%ymm8
 	vpslld	$32-25,%ymm7,%ymm7
 	vpxor	%ymm8,%ymm7,%ymm7
 	vpsrld	$25,%ymm6,%ymm8
@@ -4382,28 +4397,28 @@
 	vpsrld	$25,%ymm4,%ymm8
 	vpslld	$32-25,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	128(%rbp),%ymm8
+	vmovdqa	0+128(%rbp),%ymm8
 	vpalignr	$4,%ymm7,%ymm7,%ymm7
 	vpalignr	$8,%ymm11,%ymm11,%ymm11
 	vpalignr	$12,%ymm15,%ymm15,%ymm15
 	vpalignr	$4,%ymm6,%ymm6,%ymm6
-	movq	8+0(%rbp),%rdx
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$12,%ymm14,%ymm14,%ymm14
+	movq	8+0+0(%rbp),%rdx
 	mulxq	%r10,%r10,%rax
 	addq	%r10,%r14
 	mulxq	%r11,%r11,%r9
 	adcq	%r11,%r15
 	adcq	$0,%r9
 	imulq	%r12,%rdx
-	vpalignr	$8,%ymm10,%ymm10,%ymm10
-	vpalignr	$12,%ymm14,%ymm14,%ymm14
 	vpalignr	$4,%ymm5,%ymm5,%ymm5
 	vpalignr	$8,%ymm9,%ymm9,%ymm9
 	vpalignr	$12,%ymm13,%ymm13,%ymm13
 	vpalignr	$4,%ymm4,%ymm4,%ymm4
 	vpalignr	$8,%ymm8,%ymm8,%ymm8
 	vpalignr	$12,%ymm12,%ymm12,%ymm12
-	vmovdqa	%ymm8,128(%rbp)
-	vmovdqa	.rol16(%rip),%ymm8
+	vmovdqa	%ymm8,0+128(%rbp)
+	vmovdqa	.Lrol16(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
@@ -4412,17 +4427,19 @@
 	vpxor	%ymm2,%ymm14,%ymm14
 	vpxor	%ymm1,%ymm13,%ymm13
 	vpxor	%ymm0,%ymm12,%ymm12
-	addq	%rax,%r15
-	adcq	%rdx,%r9
 	vpshufb	%ymm8,%ymm15,%ymm15
 	vpshufb	%ymm8,%ymm14,%ymm14
+	addq	%rax,%r15
+	adcq	%rdx,%r9
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
 	movq	%r13,%r10
 	movq	%r14,%r11
 	movq	%r15,%r12
@@ -4432,35 +4449,31 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
-	vpxor	%ymm11,%ymm7,%ymm7
-	vpxor	%ymm10,%ymm6,%ymm6
-	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
 	vpsrld	$20,%ymm7,%ymm8
 	vpslld	$32-20,%ymm7,%ymm7
 	vpxor	%ymm8,%ymm7,%ymm7
-	addq	4*8(%rsi,%rcx), %r10
-	adcq	8+4*8(%rsi,%rcx), %r11
-	adcq	$1,%r12
-
-	leaq	48(%rcx),%rcx
 	vpsrld	$20,%ymm6,%ymm8
 	vpslld	$32-20,%ymm6,%ymm6
 	vpxor	%ymm8,%ymm6,%ymm6
+	addq	0+32(%rsi,%rcx,1),%r10
+	adcq	8+32(%rsi,%rcx,1),%r11
+	adcq	$1,%r12
+
+	leaq	48(%rcx),%rcx
 	vpsrld	$20,%ymm5,%ymm8
 	vpslld	$32-20,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm5,%ymm5
 	vpsrld	$20,%ymm4,%ymm8
 	vpslld	$32-20,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	.rol8(%rip),%ymm8
+	vmovdqa	.Lrol8(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
@@ -4469,49 +4482,48 @@
 	vpxor	%ymm2,%ymm14,%ymm14
 	vpxor	%ymm1,%ymm13,%ymm13
 	vpxor	%ymm0,%ymm12,%ymm12
-	movq	0+0(%rbp),%rdx
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	movq	0+0+0(%rbp),%rdx
 	movq	%rdx,%r15
 	mulxq	%r10,%r13,%r14
 	mulxq	%r11,%rax,%rdx
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	vpshufb	%ymm8,%ymm15,%ymm15
-	vpshufb	%ymm8,%ymm14,%ymm14
-	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpaddd	%ymm13,%ymm9,%ymm9
-	movq	8+0(%rbp),%rdx
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	movq	8+0+0(%rbp),%rdx
 	mulxq	%r10,%r10,%rax
 	addq	%r10,%r14
 	mulxq	%r11,%r11,%r9
 	adcq	%r11,%r15
 	adcq	$0,%r9
 	imulq	%r12,%rdx
-	vpaddd	%ymm12,%ymm8,%ymm8
-	vpxor	%ymm11,%ymm7,%ymm7
-	vpxor	%ymm10,%ymm6,%ymm6
-	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
 	vpsrld	$25,%ymm7,%ymm8
 	vpslld	$32-25,%ymm7,%ymm7
-	addq	%rax,%r15
-	adcq	%rdx,%r9
 	vpxor	%ymm8,%ymm7,%ymm7
 	vpsrld	$25,%ymm6,%ymm8
 	vpslld	$32-25,%ymm6,%ymm6
 	vpxor	%ymm8,%ymm6,%ymm6
+	addq	%rax,%r15
+	adcq	%rdx,%r9
 	vpsrld	$25,%ymm5,%ymm8
 	vpslld	$32-25,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm5,%ymm5
 	vpsrld	$25,%ymm4,%ymm8
 	vpslld	$32-25,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	128(%rbp),%ymm8
+	vmovdqa	0+128(%rbp),%ymm8
 	vpalignr	$12,%ymm7,%ymm7,%ymm7
 	vpalignr	$8,%ymm11,%ymm11,%ymm11
 	vpalignr	$4,%ymm15,%ymm15,%ymm15
@@ -4519,6 +4531,10 @@
 	vpalignr	$8,%ymm10,%ymm10,%ymm10
 	vpalignr	$4,%ymm14,%ymm14,%ymm14
 	vpalignr	$12,%ymm5,%ymm5,%ymm5
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$4,%ymm13,%ymm13,%ymm13
+	vpalignr	$12,%ymm4,%ymm4,%ymm4
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
 	movq	%r13,%r10
 	movq	%r14,%r11
 	movq	%r15,%r12
@@ -4528,39 +4544,34 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
-	vpalignr	$8,%ymm9,%ymm9,%ymm9
-	vpalignr	$4,%ymm13,%ymm13,%ymm13
-	vpalignr	$12,%ymm4,%ymm4,%ymm4
-	vpalignr	$8,%ymm8,%ymm8,%ymm8
 	vpalignr	$4,%ymm12,%ymm12,%ymm12
 
 	cmpq	$60*8,%rcx
-	jne	2b
-	vpaddd	.chacha20_consts(%rip),%ymm3,%ymm3
-	vpaddd	64(%rbp),%ymm7,%ymm7
-	vpaddd	96(%rbp),%ymm11,%ymm11
-	vpaddd	256(%rbp),%ymm15,%ymm15
-	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
-	vpaddd	64(%rbp),%ymm6,%ymm6
-	vpaddd	96(%rbp),%ymm10,%ymm10
-	vpaddd	224(%rbp),%ymm14,%ymm14
-	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
-	vpaddd	64(%rbp),%ymm5,%ymm5
-	vpaddd	96(%rbp),%ymm9,%ymm9
-	vpaddd	192(%rbp),%ymm13,%ymm13
-	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
-	vpaddd	64(%rbp),%ymm4,%ymm4
-	vpaddd	96(%rbp),%ymm8,%ymm8
-	vpaddd	160(%rbp),%ymm12,%ymm12
+	jne	.Lopen_avx2_main_loop_rounds
+	vpaddd	.Lchacha20_consts(%rip),%ymm3,%ymm3
+	vpaddd	0+64(%rbp),%ymm7,%ymm7
+	vpaddd	0+96(%rbp),%ymm11,%ymm11
+	vpaddd	0+256(%rbp),%ymm15,%ymm15
+	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2
+	vpaddd	0+64(%rbp),%ymm6,%ymm6
+	vpaddd	0+96(%rbp),%ymm10,%ymm10
+	vpaddd	0+224(%rbp),%ymm14,%ymm14
+	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
+	vpaddd	0+64(%rbp),%ymm5,%ymm5
+	vpaddd	0+96(%rbp),%ymm9,%ymm9
+	vpaddd	0+192(%rbp),%ymm13,%ymm13
+	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	0+64(%rbp),%ymm4,%ymm4
+	vpaddd	0+96(%rbp),%ymm8,%ymm8
+	vpaddd	0+160(%rbp),%ymm12,%ymm12
 
-	vmovdqa	%ymm0,128(%rbp)
-	addq	60*8(%rsi),%r10
+	vmovdqa	%ymm0,0+128(%rbp)
+	addq	0+60*8(%rsi),%r10
 	adcq	8+60*8(%rsi),%r11
 	adcq	$1,%r12
 	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
@@ -4576,24 +4587,24 @@
 	vmovdqu	%ymm7,64+0(%rdi)
 	vmovdqu	%ymm11,96+0(%rdi)
 
-	vmovdqa	128(%rbp),%ymm0
-	movq	0+0(%rbp),%rax
+	vmovdqa	0+128(%rbp),%ymm0
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -4609,9 +4620,8 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
@@ -4627,7 +4637,7 @@
 	vmovdqu	%ymm2,32+128(%rdi)
 	vmovdqu	%ymm6,64+128(%rdi)
 	vmovdqu	%ymm10,96+128(%rdi)
-	addq	60*8+16(%rsi),%r10
+	addq	0+60*8+16(%rsi),%r10
 	adcq	8+60*8+16(%rsi),%r11
 	adcq	$1,%r12
 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
@@ -4642,23 +4652,23 @@
 	vmovdqu	%ymm1,32+256(%rdi)
 	vmovdqu	%ymm5,64+256(%rdi)
 	vmovdqu	%ymm9,96+256(%rdi)
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -4674,9 +4684,8 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
@@ -4696,47 +4705,51 @@
 	leaq	512(%rsi),%rsi
 	leaq	512(%rdi),%rdi
 	subq	$512,%rbx
-	jmp	1b
-3:
+	jmp	.Lopen_avx2_main_loop
+.Lopen_avx2_main_loop_done:
 	testq	%rbx,%rbx
 	vzeroupper
-	je	open_sse_finalize
-3:
+	je	.Lopen_sse_finalize
+
+	cmpq	$384,%rbx
+	ja	.Lopen_avx2_tail_512
+	cmpq	$256,%rbx
+	ja	.Lopen_avx2_tail_384
 	cmpq	$128,%rbx
-	ja	3f
-	vmovdqa	.chacha20_consts(%rip),%ymm0
-	vmovdqa	64(%rbp),%ymm4
-	vmovdqa	96(%rbp),%ymm8
-	vmovdqa	.avx2_inc(%rip),%ymm12
-	vpaddd	160(%rbp),%ymm12,%ymm12
-	vmovdqa	%ymm12,160(%rbp)
+	ja	.Lopen_avx2_tail_256
+	vmovdqa	.Lchacha20_consts(%rip),%ymm0
+	vmovdqa	0+64(%rbp),%ymm4
+	vmovdqa	0+96(%rbp),%ymm8
+	vmovdqa	.Lavx2_inc(%rip),%ymm12
+	vpaddd	0+160(%rbp),%ymm12,%ymm12
+	vmovdqa	%ymm12,0+160(%rbp)
 
 	xorq	%r8,%r8
 	movq	%rbx,%rcx
 	andq	$-16,%rcx
 	testq	%rcx,%rcx
-	je	2f
-1:
-	addq	0*8(%rsi,%r8), %r10
-	adcq	8+0*8(%rsi,%r8), %r11
+	je	.Lopen_avx2_tail_128_rounds
+.Lopen_avx2_tail_128_rounds_and_x1hash:
+	addq	0+0(%rsi,%r8,1),%r10
+	adcq	8+0(%rsi,%r8,1),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -4752,18 +4765,17 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
-2:
+.Lopen_avx2_tail_128_rounds:
 	addq	$16,%r8
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpsrld	$20,%ymm4,%ymm3
@@ -4771,7 +4783,7 @@
 	vpxor	%ymm3,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpslld	$7,%ymm4,%ymm3
@@ -4782,7 +4794,7 @@
 	vpalignr	$4,%ymm4,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpsrld	$20,%ymm4,%ymm3
@@ -4790,7 +4802,7 @@
 	vpxor	%ymm3,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpslld	$7,%ymm4,%ymm3
@@ -4801,36 +4813,35 @@
 	vpalignr	$12,%ymm4,%ymm4,%ymm4
 
 	cmpq	%rcx,%r8
-	jb	1b
+	jb	.Lopen_avx2_tail_128_rounds_and_x1hash
 	cmpq	$160,%r8
-	jne	2b
-	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
-	vpaddd	64(%rbp),%ymm4,%ymm4
-	vpaddd	96(%rbp),%ymm8,%ymm8
-	vpaddd	160(%rbp),%ymm12,%ymm12
+	jne	.Lopen_avx2_tail_128_rounds
+	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	0+64(%rbp),%ymm4,%ymm4
+	vpaddd	0+96(%rbp),%ymm8,%ymm8
+	vpaddd	0+160(%rbp),%ymm12,%ymm12
 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
 	vmovdqa	%ymm3,%ymm8
 
-	jmp	open_avx2_tail_loop
-3:
-	cmpq	$256,%rbx
-	ja	3f
-	vmovdqa	.chacha20_consts(%rip),%ymm0
-	vmovdqa	64(%rbp),%ymm4
-	vmovdqa	96(%rbp),%ymm8
+	jmp	.Lopen_avx2_tail_128_xor
+
+.Lopen_avx2_tail_256:
+	vmovdqa	.Lchacha20_consts(%rip),%ymm0
+	vmovdqa	0+64(%rbp),%ymm4
+	vmovdqa	0+96(%rbp),%ymm8
 	vmovdqa	%ymm0,%ymm1
 	vmovdqa	%ymm4,%ymm5
 	vmovdqa	%ymm8,%ymm9
-	vmovdqa	.avx2_inc(%rip),%ymm12
-	vpaddd	160(%rbp),%ymm12,%ymm13
+	vmovdqa	.Lavx2_inc(%rip),%ymm12
+	vpaddd	0+160(%rbp),%ymm12,%ymm13
 	vpaddd	%ymm13,%ymm12,%ymm12
-	vmovdqa	%ymm12,160(%rbp)
-	vmovdqa	%ymm13,192(%rbp)
+	vmovdqa	%ymm12,0+160(%rbp)
+	vmovdqa	%ymm13,0+192(%rbp)
 
-	movq	%rbx,128(%rbp)
+	movq	%rbx,0+128(%rbp)
 	movq	%rbx,%rcx
 	subq	$128,%rcx
 	shrq	$4,%rcx
@@ -4839,18 +4850,18 @@
 	cmovgq	%r8,%rcx
 	movq	%rsi,%rbx
 	xorq	%r8,%r8
-1:
-	addq	0(%rbx),%r10
+.Lopen_avx2_tail_256_rounds_and_x1hash:
+	addq	0+0(%rbx),%r10
 	adcq	8+0(%rbx),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rdx
+	movq	0+0+0(%rbp),%rdx
 	movq	%rdx,%r15
 	mulxq	%r10,%r13,%r14
 	mulxq	%r11,%rax,%rdx
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rdx
+	movq	8+0+0(%rbp),%rdx
 	mulxq	%r10,%r10,%rax
 	addq	%r10,%r14
 	mulxq	%r11,%r11,%r9
@@ -4868,18 +4879,17 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	16(%rbx),%rbx
-2:
+.Lopen_avx2_tail_256_rounds:
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpsrld	$20,%ymm4,%ymm3
@@ -4887,7 +4897,7 @@
 	vpxor	%ymm3,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpslld	$7,%ymm4,%ymm3
@@ -4898,7 +4908,7 @@
 	vpalignr	$4,%ymm4,%ymm4,%ymm4
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpsrld	$20,%ymm5,%ymm3
@@ -4906,7 +4916,7 @@
 	vpxor	%ymm3,%ymm5,%ymm5
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpslld	$7,%ymm5,%ymm3
@@ -4919,7 +4929,7 @@
 	incq	%r8
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpsrld	$20,%ymm4,%ymm3
@@ -4927,7 +4937,7 @@
 	vpxor	%ymm3,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpslld	$7,%ymm4,%ymm3
@@ -4938,7 +4948,7 @@
 	vpalignr	$12,%ymm4,%ymm4,%ymm4
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpsrld	$20,%ymm5,%ymm3
@@ -4946,7 +4956,7 @@
 	vpxor	%ymm3,%ymm5,%ymm5
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpslld	$7,%ymm5,%ymm3
@@ -4957,7 +4967,7 @@
 	vpalignr	$12,%ymm5,%ymm5,%ymm5
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpxor	%ymm2,%ymm14,%ymm14
-	vpshufb	.rol16(%rip),%ymm14,%ymm14
+	vpshufb	.Lrol16(%rip),%ymm14,%ymm14
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpsrld	$20,%ymm6,%ymm3
@@ -4965,7 +4975,7 @@
 	vpxor	%ymm3,%ymm6,%ymm6
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpxor	%ymm2,%ymm14,%ymm14
-	vpshufb	.rol8(%rip),%ymm14,%ymm14
+	vpshufb	.Lrol8(%rip),%ymm14,%ymm14
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpslld	$7,%ymm6,%ymm3
@@ -4976,28 +4986,28 @@
 	vpalignr	$12,%ymm6,%ymm6,%ymm6
 
 	cmpq	%rcx,%r8
-	jb	1b
+	jb	.Lopen_avx2_tail_256_rounds_and_x1hash
 	cmpq	$10,%r8
-	jne	2b
+	jne	.Lopen_avx2_tail_256_rounds
 	movq	%rbx,%r8
 	subq	%rsi,%rbx
 	movq	%rbx,%rcx
-	movq	128(%rbp),%rbx
-1:
+	movq	0+128(%rbp),%rbx
+.Lopen_avx2_tail_256_hash:
 	addq	$16,%rcx
 	cmpq	%rbx,%rcx
-	jg	1f
-	addq	0(%r8),%r10
+	jg	.Lopen_avx2_tail_256_done
+	addq	0+0(%r8),%r10
 	adcq	8+0(%r8),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rdx
+	movq	0+0+0(%rbp),%rdx
 	movq	%rdx,%r15
 	mulxq	%r10,%r13,%r14
 	mulxq	%r11,%rax,%rdx
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rdx
+	movq	8+0+0(%rbp),%rdx
 	mulxq	%r10,%r10,%rax
 	addq	%r10,%r14
 	mulxq	%r11,%r11,%r9
@@ -5015,24 +5025,23 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	16(%r8),%r8
-	jmp	1b
-1:
-	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
-	vpaddd	64(%rbp),%ymm5,%ymm5
-	vpaddd	96(%rbp),%ymm9,%ymm9
-	vpaddd	192(%rbp),%ymm13,%ymm13
-	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
-	vpaddd	64(%rbp),%ymm4,%ymm4
-	vpaddd	96(%rbp),%ymm8,%ymm8
-	vpaddd	160(%rbp),%ymm12,%ymm12
+	jmp	.Lopen_avx2_tail_256_hash
+.Lopen_avx2_tail_256_done:
+	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
+	vpaddd	0+64(%rbp),%ymm5,%ymm5
+	vpaddd	0+96(%rbp),%ymm9,%ymm9
+	vpaddd	0+192(%rbp),%ymm13,%ymm13
+	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	0+64(%rbp),%ymm4,%ymm4
+	vpaddd	0+96(%rbp),%ymm8,%ymm8
+	vpaddd	0+160(%rbp),%ymm12,%ymm12
 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
@@ -5054,28 +5063,27 @@
 	leaq	128(%rsi),%rsi
 	leaq	128(%rdi),%rdi
 	subq	$128,%rbx
-	jmp	open_avx2_tail_loop
-3:
-	cmpq	$384,%rbx
-	ja	3f
-	vmovdqa	.chacha20_consts(%rip),%ymm0
-	vmovdqa	64(%rbp),%ymm4
-	vmovdqa	96(%rbp),%ymm8
+	jmp	.Lopen_avx2_tail_128_xor
+
+.Lopen_avx2_tail_384:
+	vmovdqa	.Lchacha20_consts(%rip),%ymm0
+	vmovdqa	0+64(%rbp),%ymm4
+	vmovdqa	0+96(%rbp),%ymm8
 	vmovdqa	%ymm0,%ymm1
 	vmovdqa	%ymm4,%ymm5
 	vmovdqa	%ymm8,%ymm9
 	vmovdqa	%ymm0,%ymm2
 	vmovdqa	%ymm4,%ymm6
 	vmovdqa	%ymm8,%ymm10
-	vmovdqa	.avx2_inc(%rip),%ymm12
-	vpaddd	160(%rbp),%ymm12,%ymm14
+	vmovdqa	.Lavx2_inc(%rip),%ymm12
+	vpaddd	0+160(%rbp),%ymm12,%ymm14
 	vpaddd	%ymm14,%ymm12,%ymm13
 	vpaddd	%ymm13,%ymm12,%ymm12
-	vmovdqa	%ymm12,160(%rbp)
-	vmovdqa	%ymm13,192(%rbp)
-	vmovdqa	%ymm14,224(%rbp)
+	vmovdqa	%ymm12,0+160(%rbp)
+	vmovdqa	%ymm13,0+192(%rbp)
+	vmovdqa	%ymm14,0+224(%rbp)
 
-	movq	%rbx,128(%rbp)
+	movq	%rbx,0+128(%rbp)
 	movq	%rbx,%rcx
 	subq	$256,%rcx
 	shrq	$4,%rcx
@@ -5085,18 +5093,18 @@
 	cmovgq	%r8,%rcx
 	movq	%rsi,%rbx
 	xorq	%r8,%r8
-1:
-	addq	0(%rbx),%r10
+.Lopen_avx2_tail_384_rounds_and_x2hash:
+	addq	0+0(%rbx),%r10
 	adcq	8+0(%rbx),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rdx
+	movq	0+0+0(%rbp),%rdx
 	movq	%rdx,%r15
 	mulxq	%r10,%r13,%r14
 	mulxq	%r11,%rax,%rdx
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rdx
+	movq	8+0+0(%rbp),%rdx
 	mulxq	%r10,%r10,%rax
 	addq	%r10,%r14
 	mulxq	%r11,%r11,%r9
@@ -5114,18 +5122,17 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	16(%rbx),%rbx
-2:
+.Lopen_avx2_tail_384_rounds_and_x1hash:
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpxor	%ymm2,%ymm14,%ymm14
-	vpshufb	.rol16(%rip),%ymm14,%ymm14
+	vpshufb	.Lrol16(%rip),%ymm14,%ymm14
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpsrld	$20,%ymm6,%ymm3
@@ -5133,7 +5140,7 @@
 	vpxor	%ymm3,%ymm6,%ymm6
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpxor	%ymm2,%ymm14,%ymm14
-	vpshufb	.rol8(%rip),%ymm14,%ymm14
+	vpshufb	.Lrol8(%rip),%ymm14,%ymm14
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpslld	$7,%ymm6,%ymm3
@@ -5144,7 +5151,7 @@
 	vpalignr	$4,%ymm6,%ymm6,%ymm6
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpsrld	$20,%ymm5,%ymm3
@@ -5152,7 +5159,7 @@
 	vpxor	%ymm3,%ymm5,%ymm5
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpslld	$7,%ymm5,%ymm3
@@ -5163,7 +5170,7 @@
 	vpalignr	$4,%ymm5,%ymm5,%ymm5
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpsrld	$20,%ymm4,%ymm3
@@ -5171,7 +5178,7 @@
 	vpxor	%ymm3,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpslld	$7,%ymm4,%ymm3
@@ -5180,26 +5187,26 @@
 	vpalignr	$12,%ymm12,%ymm12,%ymm12
 	vpalignr	$8,%ymm8,%ymm8,%ymm8
 	vpalignr	$4,%ymm4,%ymm4,%ymm4
-	addq	0(%rbx),%r10
+	addq	0+0(%rbx),%r10
 	adcq	8+0(%rbx),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -5215,9 +5222,8 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
@@ -5226,7 +5232,7 @@
 	incq	%r8
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpxor	%ymm2,%ymm14,%ymm14
-	vpshufb	.rol16(%rip),%ymm14,%ymm14
+	vpshufb	.Lrol16(%rip),%ymm14,%ymm14
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpsrld	$20,%ymm6,%ymm3
@@ -5234,7 +5240,7 @@
 	vpxor	%ymm3,%ymm6,%ymm6
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpxor	%ymm2,%ymm14,%ymm14
-	vpshufb	.rol8(%rip),%ymm14,%ymm14
+	vpshufb	.Lrol8(%rip),%ymm14,%ymm14
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpslld	$7,%ymm6,%ymm3
@@ -5245,7 +5251,7 @@
 	vpalignr	$12,%ymm6,%ymm6,%ymm6
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpsrld	$20,%ymm5,%ymm3
@@ -5253,7 +5259,7 @@
 	vpxor	%ymm3,%ymm5,%ymm5
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpslld	$7,%ymm5,%ymm3
@@ -5264,7 +5270,7 @@
 	vpalignr	$12,%ymm5,%ymm5,%ymm5
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpsrld	$20,%ymm4,%ymm3
@@ -5272,7 +5278,7 @@
 	vpxor	%ymm3,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpslld	$7,%ymm4,%ymm3
@@ -5283,28 +5289,28 @@
 	vpalignr	$12,%ymm4,%ymm4,%ymm4
 
 	cmpq	%rcx,%r8
-	jb	1b
+	jb	.Lopen_avx2_tail_384_rounds_and_x2hash
 	cmpq	$10,%r8
-	jne	2b
+	jne	.Lopen_avx2_tail_384_rounds_and_x1hash
 	movq	%rbx,%r8
 	subq	%rsi,%rbx
 	movq	%rbx,%rcx
-	movq	128(%rbp),%rbx
-1:
+	movq	0+128(%rbp),%rbx
+.Lopen_avx2_384_tail_hash:
 	addq	$16,%rcx
 	cmpq	%rbx,%rcx
-	jg	1f
-	addq	0(%r8),%r10
+	jg	.Lopen_avx2_384_tail_done
+	addq	0+0(%r8),%r10
 	adcq	8+0(%r8),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rdx
+	movq	0+0+0(%rbp),%rdx
 	movq	%rdx,%r15
 	mulxq	%r10,%r13,%r14
 	mulxq	%r11,%rax,%rdx
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rdx
+	movq	8+0+0(%rbp),%rdx
 	mulxq	%r10,%r10,%rax
 	addq	%r10,%r14
 	mulxq	%r11,%r11,%r9
@@ -5322,28 +5328,27 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	16(%r8),%r8
-	jmp	1b
-1:
-	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
-	vpaddd	64(%rbp),%ymm6,%ymm6
-	vpaddd	96(%rbp),%ymm10,%ymm10
-	vpaddd	224(%rbp),%ymm14,%ymm14
-	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
-	vpaddd	64(%rbp),%ymm5,%ymm5
-	vpaddd	96(%rbp),%ymm9,%ymm9
-	vpaddd	192(%rbp),%ymm13,%ymm13
-	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
-	vpaddd	64(%rbp),%ymm4,%ymm4
-	vpaddd	96(%rbp),%ymm8,%ymm8
-	vpaddd	160(%rbp),%ymm12,%ymm12
+	jmp	.Lopen_avx2_384_tail_hash
+.Lopen_avx2_384_tail_done:
+	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2
+	vpaddd	0+64(%rbp),%ymm6,%ymm6
+	vpaddd	0+96(%rbp),%ymm10,%ymm10
+	vpaddd	0+224(%rbp),%ymm14,%ymm14
+	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
+	vpaddd	0+64(%rbp),%ymm5,%ymm5
+	vpaddd	0+96(%rbp),%ymm9,%ymm9
+	vpaddd	0+192(%rbp),%ymm13,%ymm13
+	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	0+64(%rbp),%ymm4,%ymm4
+	vpaddd	0+96(%rbp),%ymm8,%ymm8
+	vpaddd	0+160(%rbp),%ymm12,%ymm12
 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
@@ -5377,11 +5382,12 @@
 	leaq	256(%rsi),%rsi
 	leaq	256(%rdi),%rdi
 	subq	$256,%rbx
-	jmp	open_avx2_tail_loop
-3:
-	vmovdqa	.chacha20_consts(%rip),%ymm0
-	vmovdqa	64(%rbp),%ymm4
-	vmovdqa	96(%rbp),%ymm8
+	jmp	.Lopen_avx2_tail_128_xor
+
+.Lopen_avx2_tail_512:
+	vmovdqa	.Lchacha20_consts(%rip),%ymm0
+	vmovdqa	0+64(%rbp),%ymm4
+	vmovdqa	0+96(%rbp),%ymm8
 	vmovdqa	%ymm0,%ymm1
 	vmovdqa	%ymm4,%ymm5
 	vmovdqa	%ymm8,%ymm9
@@ -5391,39 +5397,39 @@
 	vmovdqa	%ymm0,%ymm3
 	vmovdqa	%ymm4,%ymm7
 	vmovdqa	%ymm8,%ymm11
-	vmovdqa	.avx2_inc(%rip),%ymm12
-	vpaddd	160(%rbp),%ymm12,%ymm15
+	vmovdqa	.Lavx2_inc(%rip),%ymm12
+	vpaddd	0+160(%rbp),%ymm12,%ymm15
 	vpaddd	%ymm15,%ymm12,%ymm14
 	vpaddd	%ymm14,%ymm12,%ymm13
 	vpaddd	%ymm13,%ymm12,%ymm12
-	vmovdqa	%ymm15,256(%rbp)
-	vmovdqa	%ymm14,224(%rbp)
-	vmovdqa	%ymm13,192(%rbp)
-	vmovdqa	%ymm12,160(%rbp)
+	vmovdqa	%ymm15,0+256(%rbp)
+	vmovdqa	%ymm14,0+224(%rbp)
+	vmovdqa	%ymm13,0+192(%rbp)
+	vmovdqa	%ymm12,0+160(%rbp)
 
 	xorq	%rcx,%rcx
 	movq	%rsi,%r8
-1:
-	addq	0(%r8),%r10
+.Lopen_avx2_tail_512_rounds_and_x2hash:
+	addq	0+0(%r8),%r10
 	adcq	8+0(%r8),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -5439,17 +5445,16 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	16(%r8),%r8
-2:
-	vmovdqa	%ymm8,128(%rbp)
-	vmovdqa	.rol16(%rip),%ymm8
+.Lopen_avx2_tail_512_rounds_and_x1hash:
+	vmovdqa	%ymm8,0+128(%rbp)
+	vmovdqa	.Lrol16(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
@@ -5462,16 +5467,15 @@
 	vpshufb	%ymm8,%ymm14,%ymm14
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
 	vpxor	%ymm11,%ymm7,%ymm7
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
 	vpsrld	$20,%ymm7,%ymm8
 	vpslld	$32-20,%ymm7,%ymm7
 	vpxor	%ymm8,%ymm7,%ymm7
@@ -5484,18 +5488,19 @@
 	vpsrld	$20,%ymm4,%ymm8
 	vpslld	$32-20,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	.rol8(%rip),%ymm8
-	addq	0(%r8),%r10
+	vmovdqa	.Lrol8(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	addq	0+0(%r8),%r10
 	adcq	8+0(%r8),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rdx
+	movq	0+0+0(%rbp),%rdx
 	movq	%rdx,%r15
 	mulxq	%r10,%r13,%r14
 	mulxq	%r11,%rax,%rdx
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rdx
+	movq	8+0+0(%rbp),%rdx
 	mulxq	%r10,%r10,%rax
 	addq	%r10,%r14
 	mulxq	%r11,%r11,%r9
@@ -5513,13 +5518,11 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
-	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpaddd	%ymm4,%ymm0,%ymm0
@@ -5531,16 +5534,15 @@
 	vpshufb	%ymm8,%ymm14,%ymm14
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
 	vpxor	%ymm11,%ymm7,%ymm7
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
 	vpsrld	$25,%ymm7,%ymm8
 	vpslld	$32-25,%ymm7,%ymm7
 	vpxor	%ymm8,%ymm7,%ymm7
@@ -5553,7 +5555,7 @@
 	vpsrld	$25,%ymm4,%ymm8
 	vpslld	$32-25,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	128(%rbp),%ymm8
+	vmovdqa	0+128(%rbp),%ymm8
 	vpalignr	$4,%ymm7,%ymm7,%ymm7
 	vpalignr	$8,%ymm11,%ymm11,%ymm11
 	vpalignr	$12,%ymm15,%ymm15,%ymm15
@@ -5566,18 +5568,20 @@
 	vpalignr	$4,%ymm4,%ymm4,%ymm4
 	vpalignr	$8,%ymm8,%ymm8,%ymm8
 	vpalignr	$12,%ymm12,%ymm12,%ymm12
-	vmovdqa	%ymm8,128(%rbp)
-	addq	16(%r8),%r10
+	vmovdqa	%ymm8,0+128(%rbp)
+	vmovdqa	.Lrol16(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	addq	0+16(%r8),%r10
 	adcq	8+16(%r8),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rdx
+	movq	0+0+0(%rbp),%rdx
 	movq	%rdx,%r15
 	mulxq	%r10,%r13,%r14
 	mulxq	%r11,%rax,%rdx
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rdx
+	movq	8+0+0(%rbp),%rdx
 	mulxq	%r10,%r10,%rax
 	addq	%r10,%r14
 	mulxq	%r11,%r11,%r9
@@ -5595,16 +5599,13 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	32(%r8),%r8
-	vmovdqa	.rol16(%rip),%ymm8
-	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpaddd	%ymm4,%ymm0,%ymm0
@@ -5616,16 +5617,15 @@
 	vpshufb	%ymm8,%ymm14,%ymm14
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
 	vpxor	%ymm11,%ymm7,%ymm7
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
 	vpsrld	$20,%ymm7,%ymm8
 	vpslld	$32-20,%ymm7,%ymm7
 	vpxor	%ymm8,%ymm7,%ymm7
@@ -5638,7 +5638,7 @@
 	vpsrld	$20,%ymm4,%ymm8
 	vpslld	$32-20,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	.rol8(%rip),%ymm8
+	vmovdqa	.Lrol8(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
@@ -5651,16 +5651,15 @@
 	vpshufb	%ymm8,%ymm14,%ymm14
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
 	vpxor	%ymm11,%ymm7,%ymm7
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
 	vpsrld	$25,%ymm7,%ymm8
 	vpslld	$32-25,%ymm7,%ymm7
 	vpxor	%ymm8,%ymm7,%ymm7
@@ -5673,7 +5672,7 @@
 	vpsrld	$25,%ymm4,%ymm8
 	vpslld	$32-25,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	128(%rbp),%ymm8
+	vmovdqa	0+128(%rbp),%ymm8
 	vpalignr	$12,%ymm7,%ymm7,%ymm7
 	vpalignr	$8,%ymm11,%ymm11,%ymm11
 	vpalignr	$4,%ymm15,%ymm15,%ymm15
@@ -5689,26 +5688,26 @@
 
 	incq	%rcx
 	cmpq	$4,%rcx
-	jl	1b
+	jl	.Lopen_avx2_tail_512_rounds_and_x2hash
 	cmpq	$10,%rcx
-	jne	2b
+	jne	.Lopen_avx2_tail_512_rounds_and_x1hash
 	movq	%rbx,%rcx
 	subq	$384,%rcx
 	andq	$-16,%rcx
-1:
+.Lopen_avx2_tail_512_hash:
 	testq	%rcx,%rcx
-	je	1f
-	addq	0(%r8),%r10
+	je	.Lopen_avx2_tail_512_done
+	addq	0+0(%r8),%r10
 	adcq	8+0(%r8),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rdx
+	movq	0+0+0(%rbp),%rdx
 	movq	%rdx,%r15
 	mulxq	%r10,%r13,%r14
 	mulxq	%r11,%rax,%rdx
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rdx
+	movq	8+0+0(%rbp),%rdx
 	mulxq	%r10,%r10,%rax
 	addq	%r10,%r14
 	mulxq	%r11,%r11,%r9
@@ -5726,35 +5725,34 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	16(%r8),%r8
 	subq	$16,%rcx
-	jmp	1b
-1:
-	vpaddd	.chacha20_consts(%rip),%ymm3,%ymm3
-	vpaddd	64(%rbp),%ymm7,%ymm7
-	vpaddd	96(%rbp),%ymm11,%ymm11
-	vpaddd	256(%rbp),%ymm15,%ymm15
-	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
-	vpaddd	64(%rbp),%ymm6,%ymm6
-	vpaddd	96(%rbp),%ymm10,%ymm10
-	vpaddd	224(%rbp),%ymm14,%ymm14
-	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
-	vpaddd	64(%rbp),%ymm5,%ymm5
-	vpaddd	96(%rbp),%ymm9,%ymm9
-	vpaddd	192(%rbp),%ymm13,%ymm13
-	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
-	vpaddd	64(%rbp),%ymm4,%ymm4
-	vpaddd	96(%rbp),%ymm8,%ymm8
-	vpaddd	160(%rbp),%ymm12,%ymm12
+	jmp	.Lopen_avx2_tail_512_hash
+.Lopen_avx2_tail_512_done:
+	vpaddd	.Lchacha20_consts(%rip),%ymm3,%ymm3
+	vpaddd	0+64(%rbp),%ymm7,%ymm7
+	vpaddd	0+96(%rbp),%ymm11,%ymm11
+	vpaddd	0+256(%rbp),%ymm15,%ymm15
+	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2
+	vpaddd	0+64(%rbp),%ymm6,%ymm6
+	vpaddd	0+96(%rbp),%ymm10,%ymm10
+	vpaddd	0+224(%rbp),%ymm14,%ymm14
+	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
+	vpaddd	0+64(%rbp),%ymm5,%ymm5
+	vpaddd	0+96(%rbp),%ymm9,%ymm9
+	vpaddd	0+192(%rbp),%ymm13,%ymm13
+	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	0+64(%rbp),%ymm4,%ymm4
+	vpaddd	0+96(%rbp),%ymm8,%ymm8
+	vpaddd	0+160(%rbp),%ymm12,%ymm12
 
-	vmovdqa	%ymm0,128(%rbp)
+	vmovdqa	%ymm0,0+128(%rbp)
 	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
 	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
 	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
@@ -5768,7 +5766,7 @@
 	vmovdqu	%ymm7,64+0(%rdi)
 	vmovdqu	%ymm11,96+0(%rdi)
 
-	vmovdqa	128(%rbp),%ymm0
+	vmovdqa	0+128(%rbp),%ymm0
 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
@@ -5802,9 +5800,9 @@
 	leaq	384(%rsi),%rsi
 	leaq	384(%rdi),%rdi
 	subq	$384,%rbx
-open_avx2_tail_loop:
+.Lopen_avx2_tail_128_xor:
 	cmpq	$32,%rbx
-	jb	open_avx2_tail
+	jb	.Lopen_avx2_tail_32_xor
 	subq	$32,%rbx
 	vpxor	(%rsi),%ymm0,%ymm0
 	vmovdqu	%ymm0,(%rdi)
@@ -5813,11 +5811,11 @@
 	vmovdqa	%ymm4,%ymm0
 	vmovdqa	%ymm8,%ymm4
 	vmovdqa	%ymm12,%ymm8
-	jmp	open_avx2_tail_loop
-open_avx2_tail:
+	jmp	.Lopen_avx2_tail_128_xor
+.Lopen_avx2_tail_32_xor:
 	cmpq	$16,%rbx
 	vmovdqa	%xmm0,%xmm1
-	jb	1f
+	jb	.Lopen_avx2_exit
 	subq	$16,%rbx
 
 	vpxor	(%rsi),%xmm0,%xmm1
@@ -5826,25 +5824,25 @@
 	leaq	16(%rdi),%rdi
 	vperm2i128	$0x11,%ymm0,%ymm0,%ymm0
 	vmovdqa	%xmm0,%xmm1
-1:
+.Lopen_avx2_exit:
 	vzeroupper
-	jmp	open_sse_tail_16
+	jmp	.Lopen_sse_tail_16
 
-open_avx2_192:
+.Lopen_avx2_192:
 	vmovdqa	%ymm0,%ymm1
 	vmovdqa	%ymm0,%ymm2
 	vmovdqa	%ymm4,%ymm5
 	vmovdqa	%ymm4,%ymm6
 	vmovdqa	%ymm8,%ymm9
 	vmovdqa	%ymm8,%ymm10
-	vpaddd	.avx2_inc(%rip),%ymm12,%ymm13
+	vpaddd	.Lavx2_inc(%rip),%ymm12,%ymm13
 	vmovdqa	%ymm12,%ymm11
 	vmovdqa	%ymm13,%ymm15
 	movq	$10,%r10
-1:
+.Lopen_avx2_192_rounds:
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpsrld	$20,%ymm4,%ymm3
@@ -5852,7 +5850,7 @@
 	vpxor	%ymm3,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpslld	$7,%ymm4,%ymm3
@@ -5863,7 +5861,7 @@
 	vpalignr	$4,%ymm4,%ymm4,%ymm4
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpsrld	$20,%ymm5,%ymm3
@@ -5871,7 +5869,7 @@
 	vpxor	%ymm3,%ymm5,%ymm5
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpslld	$7,%ymm5,%ymm3
@@ -5882,7 +5880,7 @@
 	vpalignr	$4,%ymm5,%ymm5,%ymm5
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpsrld	$20,%ymm4,%ymm3
@@ -5890,7 +5888,7 @@
 	vpxor	%ymm3,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpslld	$7,%ymm4,%ymm3
@@ -5901,7 +5899,7 @@
 	vpalignr	$12,%ymm4,%ymm4,%ymm4
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpsrld	$20,%ymm5,%ymm3
@@ -5909,7 +5907,7 @@
 	vpxor	%ymm3,%ymm5,%ymm5
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpslld	$7,%ymm5,%ymm3
@@ -5920,7 +5918,7 @@
 	vpalignr	$12,%ymm5,%ymm5,%ymm5
 
 	decq	%r10
-	jne	1b
+	jne	.Lopen_avx2_192_rounds
 	vpaddd	%ymm2,%ymm0,%ymm0
 	vpaddd	%ymm2,%ymm1,%ymm1
 	vpaddd	%ymm6,%ymm4,%ymm4
@@ -5931,8 +5929,8 @@
 	vpaddd	%ymm15,%ymm13,%ymm13
 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
 
-	vpand	.clamp(%rip),%ymm3,%ymm3
-	vmovdqa	%ymm3,0(%rbp)
+	vpand	.Lclamp(%rip),%ymm3,%ymm3
+	vmovdqa	%ymm3,0+0(%rbp)
 
 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
@@ -5940,33 +5938,33 @@
 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
-open_avx2_short:
+.Lopen_avx2_short:
 	movq	%r8,%r8
 	call	poly_hash_ad_internal
-open_avx2_hash_and_xor_loop:
+.Lopen_avx2_short_hash_and_xor_loop:
 	cmpq	$32,%rbx
-	jb	open_avx2_short_tail_32
+	jb	.Lopen_avx2_short_tail_32
 	subq	$32,%rbx
-	addq	0(%rsi),%r10
+	addq	0+0(%rsi),%r10
 	adcq	8+0(%rsi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -5982,32 +5980,31 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
-	addq	16(%rsi),%r10
+	addq	0+16(%rsi),%r10
 	adcq	8+16(%rsi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -6023,9 +6020,8 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
@@ -6045,32 +6041,32 @@
 	vmovdqa	%ymm13,%ymm9
 	vmovdqa	%ymm2,%ymm13
 	vmovdqa	%ymm6,%ymm2
-	jmp	open_avx2_hash_and_xor_loop
-open_avx2_short_tail_32:
+	jmp	.Lopen_avx2_short_hash_and_xor_loop
+.Lopen_avx2_short_tail_32:
 	cmpq	$16,%rbx
 	vmovdqa	%xmm0,%xmm1
-	jb	1f
+	jb	.Lopen_avx2_short_tail_32_exit
 	subq	$16,%rbx
-	addq	0(%rsi),%r10
+	addq	0+0(%rsi),%r10
 	adcq	8+0(%rsi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -6086,9 +6082,8 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
@@ -6098,29 +6093,29 @@
 	leaq	16(%rsi),%rsi
 	leaq	16(%rdi),%rdi
 	vextracti128	$1,%ymm0,%xmm1
-1:
+.Lopen_avx2_short_tail_32_exit:
 	vzeroupper
-	jmp	open_sse_tail_16
+	jmp	.Lopen_sse_tail_16
 
-open_avx2_320:
+.Lopen_avx2_320:
 	vmovdqa	%ymm0,%ymm1
 	vmovdqa	%ymm0,%ymm2
 	vmovdqa	%ymm4,%ymm5
 	vmovdqa	%ymm4,%ymm6
 	vmovdqa	%ymm8,%ymm9
 	vmovdqa	%ymm8,%ymm10
-	vpaddd	.avx2_inc(%rip),%ymm12,%ymm13
-	vpaddd	.avx2_inc(%rip),%ymm13,%ymm14
+	vpaddd	.Lavx2_inc(%rip),%ymm12,%ymm13
+	vpaddd	.Lavx2_inc(%rip),%ymm13,%ymm14
 	vmovdqa	%ymm4,%ymm7
 	vmovdqa	%ymm8,%ymm11
-	vmovdqa	%ymm12,160(%rbp)
-	vmovdqa	%ymm13,192(%rbp)
-	vmovdqa	%ymm14,224(%rbp)
+	vmovdqa	%ymm12,0+160(%rbp)
+	vmovdqa	%ymm13,0+192(%rbp)
+	vmovdqa	%ymm14,0+224(%rbp)
 	movq	$10,%r10
-1:
+.Lopen_avx2_320_rounds:
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpsrld	$20,%ymm4,%ymm3
@@ -6128,7 +6123,7 @@
 	vpxor	%ymm3,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpslld	$7,%ymm4,%ymm3
@@ -6139,7 +6134,7 @@
 	vpalignr	$4,%ymm4,%ymm4,%ymm4
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpsrld	$20,%ymm5,%ymm3
@@ -6147,7 +6142,7 @@
 	vpxor	%ymm3,%ymm5,%ymm5
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpslld	$7,%ymm5,%ymm3
@@ -6158,7 +6153,7 @@
 	vpalignr	$4,%ymm5,%ymm5,%ymm5
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpxor	%ymm2,%ymm14,%ymm14
-	vpshufb	.rol16(%rip),%ymm14,%ymm14
+	vpshufb	.Lrol16(%rip),%ymm14,%ymm14
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpsrld	$20,%ymm6,%ymm3
@@ -6166,7 +6161,7 @@
 	vpxor	%ymm3,%ymm6,%ymm6
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpxor	%ymm2,%ymm14,%ymm14
-	vpshufb	.rol8(%rip),%ymm14,%ymm14
+	vpshufb	.Lrol8(%rip),%ymm14,%ymm14
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpslld	$7,%ymm6,%ymm3
@@ -6177,7 +6172,7 @@
 	vpalignr	$4,%ymm6,%ymm6,%ymm6
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpsrld	$20,%ymm4,%ymm3
@@ -6185,7 +6180,7 @@
 	vpxor	%ymm3,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpslld	$7,%ymm4,%ymm3
@@ -6196,7 +6191,7 @@
 	vpalignr	$12,%ymm4,%ymm4,%ymm4
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpsrld	$20,%ymm5,%ymm3
@@ -6204,7 +6199,7 @@
 	vpxor	%ymm3,%ymm5,%ymm5
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpslld	$7,%ymm5,%ymm3
@@ -6215,7 +6210,7 @@
 	vpalignr	$12,%ymm5,%ymm5,%ymm5
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpxor	%ymm2,%ymm14,%ymm14
-	vpshufb	.rol16(%rip),%ymm14,%ymm14
+	vpshufb	.Lrol16(%rip),%ymm14,%ymm14
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpsrld	$20,%ymm6,%ymm3
@@ -6223,7 +6218,7 @@
 	vpxor	%ymm3,%ymm6,%ymm6
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpxor	%ymm2,%ymm14,%ymm14
-	vpshufb	.rol8(%rip),%ymm14,%ymm14
+	vpshufb	.Lrol8(%rip),%ymm14,%ymm14
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpslld	$7,%ymm6,%ymm3
@@ -6234,23 +6229,23 @@
 	vpalignr	$12,%ymm6,%ymm6,%ymm6
 
 	decq	%r10
-	jne	1b
-	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
-	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
-	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
+	jne	.Lopen_avx2_320_rounds
+	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
+	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2
 	vpaddd	%ymm7,%ymm4,%ymm4
 	vpaddd	%ymm7,%ymm5,%ymm5
 	vpaddd	%ymm7,%ymm6,%ymm6
 	vpaddd	%ymm11,%ymm8,%ymm8
 	vpaddd	%ymm11,%ymm9,%ymm9
 	vpaddd	%ymm11,%ymm10,%ymm10
-	vpaddd	160(%rbp),%ymm12,%ymm12
-	vpaddd	192(%rbp),%ymm13,%ymm13
-	vpaddd	224(%rbp),%ymm14,%ymm14
+	vpaddd	0+160(%rbp),%ymm12,%ymm12
+	vpaddd	0+192(%rbp),%ymm13,%ymm13
+	vpaddd	0+224(%rbp),%ymm14,%ymm14
 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
 
-	vpand	.clamp(%rip),%ymm3,%ymm3
-	vmovdqa	%ymm3,0(%rbp)
+	vpand	.Lclamp(%rip),%ymm3,%ymm3
+	vmovdqa	%ymm3,0+0(%rbp)
 
 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
@@ -6262,46 +6257,66 @@
 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm13
 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm2
 	vperm2i128	$0x13,%ymm10,%ymm14,%ymm6
-	jmp	open_avx2_short
+	jmp	.Lopen_avx2_short
 .size	chacha20_poly1305_open_avx2, .-chacha20_poly1305_open_avx2
+.cfi_endproc	
 
 
 .type	chacha20_poly1305_seal_avx2,@function
 .align	64
 chacha20_poly1305_seal_avx2:
+.cfi_startproc	
+
+
+.cfi_adjust_cfa_offset	8
+.cfi_offset	%rbp,-16
+.cfi_adjust_cfa_offset	8
+.cfi_offset	%rbx,-24
+.cfi_adjust_cfa_offset	8
+.cfi_offset	%r12,-32
+.cfi_adjust_cfa_offset	8
+.cfi_offset	%r13,-40
+.cfi_adjust_cfa_offset	8
+.cfi_offset	%r14,-48
+.cfi_adjust_cfa_offset	8
+.cfi_offset	%r15,-56
+.cfi_adjust_cfa_offset	8
+.cfi_offset	%r9,-64
+.cfi_adjust_cfa_offset	288 + 32
+
 	vzeroupper
-	vmovdqa	.chacha20_consts(%rip),%ymm0
+	vmovdqa	.Lchacha20_consts(%rip),%ymm0
 	vbroadcasti128	0(%r9),%ymm4
 	vbroadcasti128	16(%r9),%ymm8
 	vbroadcasti128	32(%r9),%ymm12
-	vpaddd	.avx2_init(%rip),%ymm12,%ymm12
+	vpaddd	.Lavx2_init(%rip),%ymm12,%ymm12
 	cmpq	$192,%rbx
-	jbe	seal_avx2_192
+	jbe	.Lseal_avx2_192
 	cmpq	$320,%rbx
-	jbe	seal_avx2_320
+	jbe	.Lseal_avx2_320
 	vmovdqa	%ymm0,%ymm1
 	vmovdqa	%ymm0,%ymm2
 	vmovdqa	%ymm0,%ymm3
 	vmovdqa	%ymm4,%ymm5
 	vmovdqa	%ymm4,%ymm6
 	vmovdqa	%ymm4,%ymm7
-	vmovdqa	%ymm4,64(%rbp)
+	vmovdqa	%ymm4,0+64(%rbp)
 	vmovdqa	%ymm8,%ymm9
 	vmovdqa	%ymm8,%ymm10
 	vmovdqa	%ymm8,%ymm11
-	vmovdqa	%ymm8,96(%rbp)
+	vmovdqa	%ymm8,0+96(%rbp)
 	vmovdqa	%ymm12,%ymm15
-	vpaddd	.avx2_inc(%rip),%ymm15,%ymm14
-	vpaddd	.avx2_inc(%rip),%ymm14,%ymm13
-	vpaddd	.avx2_inc(%rip),%ymm13,%ymm12
-	vmovdqa	%ymm12,160(%rbp)
-	vmovdqa	%ymm13,192(%rbp)
-	vmovdqa	%ymm14,224(%rbp)
-	vmovdqa	%ymm15,256(%rbp)
+	vpaddd	.Lavx2_inc(%rip),%ymm15,%ymm14
+	vpaddd	.Lavx2_inc(%rip),%ymm14,%ymm13
+	vpaddd	.Lavx2_inc(%rip),%ymm13,%ymm12
+	vmovdqa	%ymm12,0+160(%rbp)
+	vmovdqa	%ymm13,0+192(%rbp)
+	vmovdqa	%ymm14,0+224(%rbp)
+	vmovdqa	%ymm15,0+256(%rbp)
 	movq	$10,%r10
-1:
-	vmovdqa	%ymm8,128(%rbp)
-	vmovdqa	.rol16(%rip),%ymm8
+.Lseal_avx2_init_rounds:
+	vmovdqa	%ymm8,0+128(%rbp)
+	vmovdqa	.Lrol16(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
@@ -6314,16 +6329,15 @@
 	vpshufb	%ymm8,%ymm14,%ymm14
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
 	vpxor	%ymm11,%ymm7,%ymm7
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
 	vpsrld	$20,%ymm7,%ymm8
 	vpslld	$32-20,%ymm7,%ymm7
 	vpxor	%ymm8,%ymm7,%ymm7
@@ -6336,7 +6350,7 @@
 	vpsrld	$20,%ymm4,%ymm8
 	vpslld	$32-20,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	.rol8(%rip),%ymm8
+	vmovdqa	.Lrol8(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
@@ -6349,16 +6363,15 @@
 	vpshufb	%ymm8,%ymm14,%ymm14
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
 	vpxor	%ymm11,%ymm7,%ymm7
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
 	vpsrld	$25,%ymm7,%ymm8
 	vpslld	$32-25,%ymm7,%ymm7
 	vpxor	%ymm8,%ymm7,%ymm7
@@ -6371,7 +6384,7 @@
 	vpsrld	$25,%ymm4,%ymm8
 	vpslld	$32-25,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	128(%rbp),%ymm8
+	vmovdqa	0+128(%rbp),%ymm8
 	vpalignr	$4,%ymm7,%ymm7,%ymm7
 	vpalignr	$8,%ymm11,%ymm11,%ymm11
 	vpalignr	$12,%ymm15,%ymm15,%ymm15
@@ -6384,8 +6397,8 @@
 	vpalignr	$4,%ymm4,%ymm4,%ymm4
 	vpalignr	$8,%ymm8,%ymm8,%ymm8
 	vpalignr	$12,%ymm12,%ymm12,%ymm12
-	vmovdqa	%ymm8,128(%rbp)
-	vmovdqa	.rol16(%rip),%ymm8
+	vmovdqa	%ymm8,0+128(%rbp)
+	vmovdqa	.Lrol16(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
@@ -6398,16 +6411,15 @@
 	vpshufb	%ymm8,%ymm14,%ymm14
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
 	vpxor	%ymm11,%ymm7,%ymm7
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
 	vpsrld	$20,%ymm7,%ymm8
 	vpslld	$32-20,%ymm7,%ymm7
 	vpxor	%ymm8,%ymm7,%ymm7
@@ -6420,7 +6432,7 @@
 	vpsrld	$20,%ymm4,%ymm8
 	vpslld	$32-20,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	.rol8(%rip),%ymm8
+	vmovdqa	.Lrol8(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
@@ -6433,16 +6445,15 @@
 	vpshufb	%ymm8,%ymm14,%ymm14
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
 	vpxor	%ymm11,%ymm7,%ymm7
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
 	vpsrld	$25,%ymm7,%ymm8
 	vpslld	$32-25,%ymm7,%ymm7
 	vpxor	%ymm8,%ymm7,%ymm7
@@ -6455,7 +6466,7 @@
 	vpsrld	$25,%ymm4,%ymm8
 	vpslld	$32-25,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	128(%rbp),%ymm8
+	vmovdqa	0+128(%rbp),%ymm8
 	vpalignr	$12,%ymm7,%ymm7,%ymm7
 	vpalignr	$8,%ymm11,%ymm11,%ymm11
 	vpalignr	$4,%ymm15,%ymm15,%ymm15
@@ -6470,29 +6481,29 @@
 	vpalignr	$4,%ymm12,%ymm12,%ymm12
 
 	decq	%r10
-	jnz	1b
-	vpaddd	.chacha20_consts(%rip),%ymm3,%ymm3
-	vpaddd	64(%rbp),%ymm7,%ymm7
-	vpaddd	96(%rbp),%ymm11,%ymm11
-	vpaddd	256(%rbp),%ymm15,%ymm15
-	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
-	vpaddd	64(%rbp),%ymm6,%ymm6
-	vpaddd	96(%rbp),%ymm10,%ymm10
-	vpaddd	224(%rbp),%ymm14,%ymm14
-	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
-	vpaddd	64(%rbp),%ymm5,%ymm5
-	vpaddd	96(%rbp),%ymm9,%ymm9
-	vpaddd	192(%rbp),%ymm13,%ymm13
-	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
-	vpaddd	64(%rbp),%ymm4,%ymm4
-	vpaddd	96(%rbp),%ymm8,%ymm8
-	vpaddd	160(%rbp),%ymm12,%ymm12
+	jnz	.Lseal_avx2_init_rounds
+	vpaddd	.Lchacha20_consts(%rip),%ymm3,%ymm3
+	vpaddd	0+64(%rbp),%ymm7,%ymm7
+	vpaddd	0+96(%rbp),%ymm11,%ymm11
+	vpaddd	0+256(%rbp),%ymm15,%ymm15
+	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2
+	vpaddd	0+64(%rbp),%ymm6,%ymm6
+	vpaddd	0+96(%rbp),%ymm10,%ymm10
+	vpaddd	0+224(%rbp),%ymm14,%ymm14
+	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
+	vpaddd	0+64(%rbp),%ymm5,%ymm5
+	vpaddd	0+96(%rbp),%ymm9,%ymm9
+	vpaddd	0+192(%rbp),%ymm13,%ymm13
+	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	0+64(%rbp),%ymm4,%ymm4
+	vpaddd	0+96(%rbp),%ymm8,%ymm8
+	vpaddd	0+160(%rbp),%ymm12,%ymm12
 
 	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
 	vperm2i128	$0x02,%ymm3,%ymm7,%ymm15
 	vperm2i128	$0x13,%ymm3,%ymm7,%ymm3
-	vpand	.clamp(%rip),%ymm15,%ymm15
-	vmovdqa	%ymm15,0(%rbp)
+	vpand	.Lclamp(%rip),%ymm15,%ymm15
+	vmovdqa	%ymm15,0+0(%rbp)
 	movq	%r8,%r8
 	call	poly_hash_ad_internal
 
@@ -6534,7 +6545,7 @@
 	subq	$320,%rbx
 	movq	$320,%rcx
 	cmpq	$128,%rbx
-	jbe	seal_avx2_hash
+	jbe	.Lseal_avx2_short_hash_remainder
 	vpxor	0(%rsi),%ymm0,%ymm0
 	vpxor	32(%rsi),%ymm4,%ymm4
 	vpxor	64(%rsi),%ymm8,%ymm8
@@ -6548,16 +6559,16 @@
 	movq	$8,%rcx
 	movq	$2,%r8
 	cmpq	$128,%rbx
-	jbe	seal_avx2_tail_128
+	jbe	.Lseal_avx2_tail_128
 	cmpq	$256,%rbx
-	jbe	seal_avx2_tail_256
+	jbe	.Lseal_avx2_tail_256
 	cmpq	$384,%rbx
-	jbe	seal_avx2_tail_384
+	jbe	.Lseal_avx2_tail_384
 	cmpq	$512,%rbx
-	jbe	seal_avx2_tail_512
-	vmovdqa	.chacha20_consts(%rip),%ymm0
-	vmovdqa	64(%rbp),%ymm4
-	vmovdqa	96(%rbp),%ymm8
+	jbe	.Lseal_avx2_tail_512
+	vmovdqa	.Lchacha20_consts(%rip),%ymm0
+	vmovdqa	0+64(%rbp),%ymm4
+	vmovdqa	0+96(%rbp),%ymm8
 	vmovdqa	%ymm0,%ymm1
 	vmovdqa	%ymm4,%ymm5
 	vmovdqa	%ymm8,%ymm9
@@ -6567,17 +6578,17 @@
 	vmovdqa	%ymm0,%ymm3
 	vmovdqa	%ymm4,%ymm7
 	vmovdqa	%ymm8,%ymm11
-	vmovdqa	.avx2_inc(%rip),%ymm12
-	vpaddd	160(%rbp),%ymm12,%ymm15
+	vmovdqa	.Lavx2_inc(%rip),%ymm12
+	vpaddd	0+160(%rbp),%ymm12,%ymm15
 	vpaddd	%ymm15,%ymm12,%ymm14
 	vpaddd	%ymm14,%ymm12,%ymm13
 	vpaddd	%ymm13,%ymm12,%ymm12
-	vmovdqa	%ymm15,256(%rbp)
-	vmovdqa	%ymm14,224(%rbp)
-	vmovdqa	%ymm13,192(%rbp)
-	vmovdqa	%ymm12,160(%rbp)
-	vmovdqa	%ymm8,128(%rbp)
-	vmovdqa	.rol16(%rip),%ymm8
+	vmovdqa	%ymm15,0+256(%rbp)
+	vmovdqa	%ymm14,0+224(%rbp)
+	vmovdqa	%ymm13,0+192(%rbp)
+	vmovdqa	%ymm12,0+160(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
+	vmovdqa	.Lrol16(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
@@ -6590,16 +6601,15 @@
 	vpshufb	%ymm8,%ymm14,%ymm14
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
 	vpxor	%ymm11,%ymm7,%ymm7
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
 	vpsrld	$20,%ymm7,%ymm8
 	vpslld	$32-20,%ymm7,%ymm7
 	vpxor	%ymm8,%ymm7,%ymm7
@@ -6612,7 +6622,7 @@
 	vpsrld	$20,%ymm4,%ymm8
 	vpslld	$32-20,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	.rol8(%rip),%ymm8
+	vmovdqa	.Lrol8(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
@@ -6625,16 +6635,15 @@
 	vpshufb	%ymm8,%ymm14,%ymm14
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
 	vpxor	%ymm11,%ymm7,%ymm7
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
 	vpsrld	$25,%ymm7,%ymm8
 	vpslld	$32-25,%ymm7,%ymm7
 	vpxor	%ymm8,%ymm7,%ymm7
@@ -6647,7 +6656,7 @@
 	vpsrld	$25,%ymm4,%ymm8
 	vpslld	$32-25,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	128(%rbp),%ymm8
+	vmovdqa	0+128(%rbp),%ymm8
 	vpalignr	$4,%ymm7,%ymm7,%ymm7
 	vpalignr	$8,%ymm11,%ymm11,%ymm11
 	vpalignr	$12,%ymm15,%ymm15,%ymm15
@@ -6660,8 +6669,8 @@
 	vpalignr	$4,%ymm4,%ymm4,%ymm4
 	vpalignr	$8,%ymm8,%ymm8,%ymm8
 	vpalignr	$12,%ymm12,%ymm12,%ymm12
-	vmovdqa	%ymm8,128(%rbp)
-	vmovdqa	.rol16(%rip),%ymm8
+	vmovdqa	%ymm8,0+128(%rbp)
+	vmovdqa	.Lrol16(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
@@ -6674,16 +6683,15 @@
 	vpshufb	%ymm8,%ymm14,%ymm14
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
 	vpxor	%ymm11,%ymm7,%ymm7
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
 	vpsrld	$20,%ymm7,%ymm8
 	vpslld	$32-20,%ymm7,%ymm7
 	vpxor	%ymm8,%ymm7,%ymm7
@@ -6696,7 +6704,7 @@
 	vpsrld	$20,%ymm4,%ymm8
 	vpslld	$32-20,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	.rol8(%rip),%ymm8
+	vmovdqa	.Lrol8(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
@@ -6709,16 +6717,15 @@
 	vpshufb	%ymm8,%ymm14,%ymm14
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
 	vpxor	%ymm11,%ymm7,%ymm7
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
 	vpsrld	$25,%ymm7,%ymm8
 	vpslld	$32-25,%ymm7,%ymm7
 	vpxor	%ymm8,%ymm7,%ymm7
@@ -6731,7 +6738,7 @@
 	vpsrld	$25,%ymm4,%ymm8
 	vpslld	$32-25,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	128(%rbp),%ymm8
+	vmovdqa	0+128(%rbp),%ymm8
 	vpalignr	$12,%ymm7,%ymm7,%ymm7
 	vpalignr	$8,%ymm11,%ymm11,%ymm11
 	vpalignr	$4,%ymm15,%ymm15,%ymm15
@@ -6744,8 +6751,8 @@
 	vpalignr	$12,%ymm4,%ymm4,%ymm4
 	vpalignr	$8,%ymm8,%ymm8,%ymm8
 	vpalignr	$4,%ymm12,%ymm12,%ymm12
-	vmovdqa	%ymm8,128(%rbp)
-	vmovdqa	.rol16(%rip),%ymm8
+	vmovdqa	%ymm8,0+128(%rbp)
+	vmovdqa	.Lrol16(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
@@ -6758,16 +6765,15 @@
 	vpshufb	%ymm8,%ymm14,%ymm14
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
 	vpxor	%ymm11,%ymm7,%ymm7
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
 	vpsrld	$20,%ymm7,%ymm8
 	vpslld	$32-20,%ymm7,%ymm7
 	vpxor	%ymm8,%ymm7,%ymm7
@@ -6780,19 +6786,21 @@
 	vpsrld	$20,%ymm4,%ymm8
 	vpslld	$32-20,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	.rol8(%rip),%ymm8
+	vmovdqa	.Lrol8(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
 
 	subq	$16,%rdi
 	movq	$9,%rcx
-	jmp	4f
-1:
-	vmovdqa	.chacha20_consts(%rip),%ymm0
-	vmovdqa	64(%rbp),%ymm4
-	vmovdqa	96(%rbp),%ymm8
+	jmp	.Lseal_avx2_main_loop_rounds_entry
+.align	32
+.Lseal_avx2_main_loop:
+	vmovdqa	.Lchacha20_consts(%rip),%ymm0
+	vmovdqa	0+64(%rbp),%ymm4
+	vmovdqa	0+96(%rbp),%ymm8
 	vmovdqa	%ymm0,%ymm1
 	vmovdqa	%ymm4,%ymm5
 	vmovdqa	%ymm8,%ymm9
@@ -6802,23 +6810,24 @@
 	vmovdqa	%ymm0,%ymm3
 	vmovdqa	%ymm4,%ymm7
 	vmovdqa	%ymm8,%ymm11
-	vmovdqa	.avx2_inc(%rip),%ymm12
-	vpaddd	160(%rbp),%ymm12,%ymm15
+	vmovdqa	.Lavx2_inc(%rip),%ymm12
+	vpaddd	0+160(%rbp),%ymm12,%ymm15
 	vpaddd	%ymm15,%ymm12,%ymm14
 	vpaddd	%ymm14,%ymm12,%ymm13
 	vpaddd	%ymm13,%ymm12,%ymm12
-	vmovdqa	%ymm15,256(%rbp)
-	vmovdqa	%ymm14,224(%rbp)
-	vmovdqa	%ymm13,192(%rbp)
-	vmovdqa	%ymm12,160(%rbp)
+	vmovdqa	%ymm15,0+256(%rbp)
+	vmovdqa	%ymm14,0+224(%rbp)
+	vmovdqa	%ymm13,0+192(%rbp)
+	vmovdqa	%ymm12,0+160(%rbp)
 
 	movq	$10,%rcx
-2:
-	addq	0(%rdi),%r10
+.align	32
+.Lseal_avx2_main_loop_rounds:
+	addq	0+0(%rdi),%r10
 	adcq	8+0(%rdi),%r11
 	adcq	$1,%r12
-	vmovdqa	%ymm8,128(%rbp)
-	vmovdqa	.rol16(%rip),%ymm8
+	vmovdqa	%ymm8,0+128(%rbp)
+	vmovdqa	.Lrol16(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
@@ -6827,7 +6836,7 @@
 	vpxor	%ymm2,%ymm14,%ymm14
 	vpxor	%ymm1,%ymm13,%ymm13
 	vpxor	%ymm0,%ymm12,%ymm12
-	movq	0+0(%rbp),%rdx
+	movq	0+0+0(%rbp),%rdx
 	movq	%rdx,%r15
 	mulxq	%r10,%r13,%r14
 	mulxq	%r11,%rax,%rdx
@@ -6838,23 +6847,22 @@
 	vpshufb	%ymm8,%ymm14,%ymm14
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
-	movq	8+0(%rbp),%rdx
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	movq	8+0+0(%rbp),%rdx
 	mulxq	%r10,%r10,%rax
 	addq	%r10,%r14
 	mulxq	%r11,%r11,%r9
 	adcq	%r11,%r15
 	adcq	$0,%r9
 	imulq	%r12,%rdx
-	vpxor	%ymm11,%ymm7,%ymm7
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
 	vpsrld	$20,%ymm7,%ymm8
 	vpslld	$32-20,%ymm7,%ymm7
 	vpxor	%ymm8,%ymm7,%ymm7
@@ -6862,18 +6870,19 @@
 	vpslld	$32-20,%ymm6,%ymm6
 	vpxor	%ymm8,%ymm6,%ymm6
 	vpsrld	$20,%ymm5,%ymm8
+	vpslld	$32-20,%ymm5,%ymm5
 	addq	%rax,%r15
 	adcq	%rdx,%r9
-	vpslld	$32-20,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm5,%ymm5
 	vpsrld	$20,%ymm4,%ymm8
 	vpslld	$32-20,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	.rol8(%rip),%ymm8
+	vmovdqa	.Lrol8(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
 	movq	%r13,%r10
 	movq	%r14,%r11
 	movq	%r15,%r12
@@ -6883,15 +6892,13 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
-4:
-	vpxor	%ymm3,%ymm15,%ymm15
+.Lseal_avx2_main_loop_rounds_entry:
 	vpxor	%ymm2,%ymm14,%ymm14
 	vpxor	%ymm1,%ymm13,%ymm13
 	vpxor	%ymm0,%ymm12,%ymm12
@@ -6899,27 +6906,26 @@
 	vpshufb	%ymm8,%ymm14,%ymm14
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
-	addq	16(%rdi),%r10
-	adcq	8+16(%rdi),%r11
-	adcq	$1,%r12
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
+	addq	0+16(%rdi),%r10
+	adcq	8+16(%rdi),%r11
+	adcq	$1,%r12
 	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
 	vpxor	%ymm11,%ymm7,%ymm7
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	movq	0+0(%rbp),%rdx
+	vmovdqa	%ymm8,0+128(%rbp)
+	vpsrld	$25,%ymm7,%ymm8
+	movq	0+0+0(%rbp),%rdx
 	movq	%rdx,%r15
 	mulxq	%r10,%r13,%r14
 	mulxq	%r11,%rax,%rdx
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	vmovdqa	%ymm8,128(%rbp)
-	vpsrld	$25,%ymm7,%ymm8
 	vpslld	$32-25,%ymm7,%ymm7
 	vpxor	%ymm8,%ymm7,%ymm7
 	vpsrld	$25,%ymm6,%ymm8
@@ -6931,28 +6937,28 @@
 	vpsrld	$25,%ymm4,%ymm8
 	vpslld	$32-25,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	128(%rbp),%ymm8
+	vmovdqa	0+128(%rbp),%ymm8
 	vpalignr	$4,%ymm7,%ymm7,%ymm7
 	vpalignr	$8,%ymm11,%ymm11,%ymm11
 	vpalignr	$12,%ymm15,%ymm15,%ymm15
 	vpalignr	$4,%ymm6,%ymm6,%ymm6
-	movq	8+0(%rbp),%rdx
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$12,%ymm14,%ymm14,%ymm14
+	movq	8+0+0(%rbp),%rdx
 	mulxq	%r10,%r10,%rax
 	addq	%r10,%r14
 	mulxq	%r11,%r11,%r9
 	adcq	%r11,%r15
 	adcq	$0,%r9
 	imulq	%r12,%rdx
-	vpalignr	$8,%ymm10,%ymm10,%ymm10
-	vpalignr	$12,%ymm14,%ymm14,%ymm14
 	vpalignr	$4,%ymm5,%ymm5,%ymm5
 	vpalignr	$8,%ymm9,%ymm9,%ymm9
 	vpalignr	$12,%ymm13,%ymm13,%ymm13
 	vpalignr	$4,%ymm4,%ymm4,%ymm4
 	vpalignr	$8,%ymm8,%ymm8,%ymm8
 	vpalignr	$12,%ymm12,%ymm12,%ymm12
-	vmovdqa	%ymm8,128(%rbp)
-	vmovdqa	.rol16(%rip),%ymm8
+	vmovdqa	%ymm8,0+128(%rbp)
+	vmovdqa	.Lrol16(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
@@ -6961,17 +6967,19 @@
 	vpxor	%ymm2,%ymm14,%ymm14
 	vpxor	%ymm1,%ymm13,%ymm13
 	vpxor	%ymm0,%ymm12,%ymm12
-	addq	%rax,%r15
-	adcq	%rdx,%r9
 	vpshufb	%ymm8,%ymm15,%ymm15
 	vpshufb	%ymm8,%ymm14,%ymm14
+	addq	%rax,%r15
+	adcq	%rdx,%r9
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
 	movq	%r13,%r10
 	movq	%r14,%r11
 	movq	%r15,%r12
@@ -6981,35 +6989,31 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
-	vpxor	%ymm11,%ymm7,%ymm7
-	vpxor	%ymm10,%ymm6,%ymm6
-	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
 	vpsrld	$20,%ymm7,%ymm8
 	vpslld	$32-20,%ymm7,%ymm7
 	vpxor	%ymm8,%ymm7,%ymm7
-	addq	32(%rdi),%r10
+	vpsrld	$20,%ymm6,%ymm8
+	vpslld	$32-20,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	addq	0+32(%rdi),%r10
 	adcq	8+32(%rdi),%r11
 	adcq	$1,%r12
 
 	leaq	48(%rdi),%rdi
-	vpsrld	$20,%ymm6,%ymm8
-	vpslld	$32-20,%ymm6,%ymm6
-	vpxor	%ymm8,%ymm6,%ymm6
 	vpsrld	$20,%ymm5,%ymm8
 	vpslld	$32-20,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm5,%ymm5
 	vpsrld	$20,%ymm4,%ymm8
 	vpslld	$32-20,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	.rol8(%rip),%ymm8
+	vmovdqa	.Lrol8(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
@@ -7018,49 +7022,48 @@
 	vpxor	%ymm2,%ymm14,%ymm14
 	vpxor	%ymm1,%ymm13,%ymm13
 	vpxor	%ymm0,%ymm12,%ymm12
-	movq	0+0(%rbp),%rdx
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	movq	0+0+0(%rbp),%rdx
 	movq	%rdx,%r15
 	mulxq	%r10,%r13,%r14
 	mulxq	%r11,%rax,%rdx
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	vpshufb	%ymm8,%ymm15,%ymm15
-	vpshufb	%ymm8,%ymm14,%ymm14
-	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpaddd	%ymm13,%ymm9,%ymm9
-	movq	8+0(%rbp),%rdx
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	movq	8+0+0(%rbp),%rdx
 	mulxq	%r10,%r10,%rax
 	addq	%r10,%r14
 	mulxq	%r11,%r11,%r9
 	adcq	%r11,%r15
 	adcq	$0,%r9
 	imulq	%r12,%rdx
-	vpaddd	%ymm12,%ymm8,%ymm8
-	vpxor	%ymm11,%ymm7,%ymm7
-	vpxor	%ymm10,%ymm6,%ymm6
-	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
 	vpsrld	$25,%ymm7,%ymm8
 	vpslld	$32-25,%ymm7,%ymm7
-	addq	%rax,%r15
-	adcq	%rdx,%r9
 	vpxor	%ymm8,%ymm7,%ymm7
 	vpsrld	$25,%ymm6,%ymm8
 	vpslld	$32-25,%ymm6,%ymm6
 	vpxor	%ymm8,%ymm6,%ymm6
+	addq	%rax,%r15
+	adcq	%rdx,%r9
 	vpsrld	$25,%ymm5,%ymm8
 	vpslld	$32-25,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm5,%ymm5
 	vpsrld	$25,%ymm4,%ymm8
 	vpslld	$32-25,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	128(%rbp),%ymm8
+	vmovdqa	0+128(%rbp),%ymm8
 	vpalignr	$12,%ymm7,%ymm7,%ymm7
 	vpalignr	$8,%ymm11,%ymm11,%ymm11
 	vpalignr	$4,%ymm15,%ymm15,%ymm15
@@ -7068,6 +7071,10 @@
 	vpalignr	$8,%ymm10,%ymm10,%ymm10
 	vpalignr	$4,%ymm14,%ymm14,%ymm14
 	vpalignr	$12,%ymm5,%ymm5,%ymm5
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$4,%ymm13,%ymm13,%ymm13
+	vpalignr	$12,%ymm4,%ymm4,%ymm4
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
 	movq	%r13,%r10
 	movq	%r14,%r11
 	movq	%r15,%r12
@@ -7077,42 +7084,101 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
-	vpalignr	$8,%ymm9,%ymm9,%ymm9
-	vpalignr	$4,%ymm13,%ymm13,%ymm13
-	vpalignr	$12,%ymm4,%ymm4,%ymm4
-	vpalignr	$8,%ymm8,%ymm8,%ymm8
 	vpalignr	$4,%ymm12,%ymm12,%ymm12
 
 	decq	%rcx
-	jne	2b
-	vpaddd	.chacha20_consts(%rip),%ymm3,%ymm3
-	vpaddd	64(%rbp),%ymm7,%ymm7
-	vpaddd	96(%rbp),%ymm11,%ymm11
-	vpaddd	256(%rbp),%ymm15,%ymm15
-	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
-	vpaddd	64(%rbp),%ymm6,%ymm6
-	vpaddd	96(%rbp),%ymm10,%ymm10
-	vpaddd	224(%rbp),%ymm14,%ymm14
-	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
-	vpaddd	64(%rbp),%ymm5,%ymm5
-	vpaddd	96(%rbp),%ymm9,%ymm9
-	vpaddd	192(%rbp),%ymm13,%ymm13
-	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
-	vpaddd	64(%rbp),%ymm4,%ymm4
-	vpaddd	96(%rbp),%ymm8,%ymm8
-	vpaddd	160(%rbp),%ymm12,%ymm12
+	jne	.Lseal_avx2_main_loop_rounds
+	vpaddd	.Lchacha20_consts(%rip),%ymm3,%ymm3
+	vpaddd	0+64(%rbp),%ymm7,%ymm7
+	vpaddd	0+96(%rbp),%ymm11,%ymm11
+	vpaddd	0+256(%rbp),%ymm15,%ymm15
+	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2
+	vpaddd	0+64(%rbp),%ymm6,%ymm6
+	vpaddd	0+96(%rbp),%ymm10,%ymm10
+	vpaddd	0+224(%rbp),%ymm14,%ymm14
+	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
+	vpaddd	0+64(%rbp),%ymm5,%ymm5
+	vpaddd	0+96(%rbp),%ymm9,%ymm9
+	vpaddd	0+192(%rbp),%ymm13,%ymm13
+	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	0+64(%rbp),%ymm4,%ymm4
+	vpaddd	0+96(%rbp),%ymm8,%ymm8
+	vpaddd	0+160(%rbp),%ymm12,%ymm12
+
+	vmovdqa	%ymm0,0+128(%rbp)
+	addq	0+0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
+	addq	%rax,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r15
+	adcq	%r14,%r9
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	addq	0+16(%rdi),%r10
+	adcq	8+16(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
+	addq	%rax,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r15
+	adcq	%r14,%r9
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
 
 	leaq	32(%rdi),%rdi
-	vmovdqa	%ymm0,128(%rbp)
-	addq	-32(%rdi),%r10
-	adcq	8+-32(%rdi),%r11
-	adcq	$1,%r12
 	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
 	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
 	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
@@ -7126,45 +7192,7 @@
 	vmovdqu	%ymm7,64+0(%rdi)
 	vmovdqu	%ymm11,96+0(%rdi)
 
-	vmovdqa	128(%rbp),%ymm0
-	movq	0+0(%rbp),%rax
-	movq	%rax,%r15
-	mulq	%r10
-	movq	%rax,%r13
-	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
-	mulq	%r11
-	imulq	%r12,%r15
-	addq	%rax,%r14
-	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
-	movq	%rax,%r9
-	mulq	%r10
-	addq	%rax,%r14
-	adcq	$0,%rdx
-	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
-	mulq	%r11
-	addq	%rax,%r15
-	adcq	$0,%rdx
-	imulq	%r12,%r9
-	addq	%r10,%r15
-	adcq	%rdx,%r9
-	movq	%r13,%r10
-	movq	%r14,%r11
-	movq	%r15,%r12
-	andq	$3,%r12
-	movq	%r15,%r13
-	andq	$-4,%r13
-	movq	%r9,%r14
-	shrdq	$2,%r9,%r15
-	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
-	addq	%r15,%r10
-	adcq	%r9,%r11
-	adcq	$0,%r12
+	vmovdqa	0+128(%rbp),%ymm0
 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
@@ -7177,9 +7205,6 @@
 	vmovdqu	%ymm2,32+128(%rdi)
 	vmovdqu	%ymm6,64+128(%rdi)
 	vmovdqu	%ymm10,96+128(%rdi)
-	addq	-16(%rdi),%r10
-	adcq	8+-16(%rdi),%r11
-	adcq	$1,%r12
 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
@@ -7192,44 +7217,6 @@
 	vmovdqu	%ymm1,32+256(%rdi)
 	vmovdqu	%ymm5,64+256(%rdi)
 	vmovdqu	%ymm9,96+256(%rdi)
-	movq	0+0(%rbp),%rax
-	movq	%rax,%r15
-	mulq	%r10
-	movq	%rax,%r13
-	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
-	mulq	%r11
-	imulq	%r12,%r15
-	addq	%rax,%r14
-	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
-	movq	%rax,%r9
-	mulq	%r10
-	addq	%rax,%r14
-	adcq	$0,%rdx
-	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
-	mulq	%r11
-	addq	%rax,%r15
-	adcq	$0,%rdx
-	imulq	%r12,%r9
-	addq	%r10,%r15
-	adcq	%rdx,%r9
-	movq	%r13,%r10
-	movq	%r14,%r11
-	movq	%r15,%r12
-	andq	$3,%r12
-	movq	%r15,%r13
-	andq	$-4,%r13
-	movq	%r9,%r14
-	shrdq	$2,%r9,%r15
-	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
-	addq	%r15,%r10
-	adcq	%r9,%r11
-	adcq	$0,%r12
 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm4
 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm0
@@ -7246,32 +7233,26 @@
 	leaq	512(%rsi),%rsi
 	subq	$512,%rbx
 	cmpq	$512,%rbx
-	jg	1b
-	addq	0(%rdi),%r10
+	jg	.Lseal_avx2_main_loop
+
+	addq	0+0(%rdi),%r10
 	adcq	8+0(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
-	movq	%rax,%r15
-	mulq	%r10
-	movq	%rax,%r13
-	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
-	mulq	%r11
+	movq	0+0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
-	movq	%rax,%r9
-	mulq	%r10
-	addq	%rax,%r14
-	adcq	$0,%rdx
-	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
-	mulq	%r11
+	movq	8+0+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
 	addq	%rax,%r15
-	adcq	$0,%rdx
-	imulq	%r12,%r9
-	addq	%r10,%r15
 	adcq	%rdx,%r9
 	movq	%r13,%r10
 	movq	%r14,%r11
@@ -7282,37 +7263,29 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
-	addq	16(%rdi),%r10
+	addq	0+16(%rdi),%r10
 	adcq	8+16(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
-	movq	%rax,%r15
-	mulq	%r10
-	movq	%rax,%r13
-	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
-	mulq	%r11
+	movq	0+0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
-	movq	%rax,%r9
-	mulq	%r10
-	addq	%rax,%r14
-	adcq	$0,%rdx
-	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
-	mulq	%r11
+	movq	8+0+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
 	addq	%rax,%r15
-	adcq	$0,%rdx
-	imulq	%r12,%r9
-	addq	%r10,%r15
 	adcq	%rdx,%r9
 	movq	%r13,%r10
 	movq	%r14,%r11
@@ -7323,9 +7296,8 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
@@ -7333,43 +7305,41 @@
 	leaq	32(%rdi),%rdi
 	movq	$10,%rcx
 	xorq	%r8,%r8
+
+	cmpq	$384,%rbx
+	ja	.Lseal_avx2_tail_512
+	cmpq	$256,%rbx
+	ja	.Lseal_avx2_tail_384
 	cmpq	$128,%rbx
-	ja	3f
+	ja	.Lseal_avx2_tail_256
 
-seal_avx2_tail_128:
-	vmovdqa	.chacha20_consts(%rip),%ymm0
-	vmovdqa	64(%rbp),%ymm4
-	vmovdqa	96(%rbp),%ymm8
-	vmovdqa	.avx2_inc(%rip),%ymm12
-	vpaddd	160(%rbp),%ymm12,%ymm12
-	vmovdqa	%ymm12,160(%rbp)
+.Lseal_avx2_tail_128:
+	vmovdqa	.Lchacha20_consts(%rip),%ymm0
+	vmovdqa	0+64(%rbp),%ymm4
+	vmovdqa	0+96(%rbp),%ymm8
+	vmovdqa	.Lavx2_inc(%rip),%ymm12
+	vpaddd	0+160(%rbp),%ymm12,%ymm12
+	vmovdqa	%ymm12,0+160(%rbp)
 
-1:
-	addq	0(%rdi),%r10
+.Lseal_avx2_tail_128_rounds_and_3xhash:
+	addq	0+0(%rdi),%r10
 	adcq	8+0(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
-	movq	%rax,%r15
-	mulq	%r10
-	movq	%rax,%r13
-	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
-	mulq	%r11
+	movq	0+0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
-	movq	%rax,%r9
-	mulq	%r10
-	addq	%rax,%r14
-	adcq	$0,%rdx
-	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
-	mulq	%r11
+	movq	8+0+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
 	addq	%rax,%r15
-	adcq	$0,%rdx
-	imulq	%r12,%r9
-	addq	%r10,%r15
 	adcq	%rdx,%r9
 	movq	%r13,%r10
 	movq	%r14,%r11
@@ -7380,18 +7350,17 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	16(%rdi),%rdi
-2:
+.Lseal_avx2_tail_128_rounds_and_2xhash:
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpsrld	$20,%ymm4,%ymm3
@@ -7399,7 +7368,7 @@
 	vpxor	%ymm3,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpslld	$7,%ymm4,%ymm3
@@ -7408,31 +7377,24 @@
 	vpalignr	$12,%ymm12,%ymm12,%ymm12
 	vpalignr	$8,%ymm8,%ymm8,%ymm8
 	vpalignr	$4,%ymm4,%ymm4,%ymm4
-	addq	0(%rdi),%r10
+	addq	0+0(%rdi),%r10
 	adcq	8+0(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
-	movq	%rax,%r15
-	mulq	%r10
-	movq	%rax,%r13
-	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
-	mulq	%r11
+	movq	0+0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
-	movq	%rax,%r9
-	mulq	%r10
-	addq	%rax,%r14
-	adcq	$0,%rdx
-	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
-	mulq	%r11
+	movq	8+0+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
 	addq	%rax,%r15
-	adcq	$0,%rdx
-	imulq	%r12,%r9
-	addq	%r10,%r15
 	adcq	%rdx,%r9
 	movq	%r13,%r10
 	movq	%r14,%r11
@@ -7443,15 +7405,14 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpsrld	$20,%ymm4,%ymm3
@@ -7459,7 +7420,7 @@
 	vpxor	%ymm3,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpslld	$7,%ymm4,%ymm3
@@ -7468,31 +7429,24 @@
 	vpalignr	$4,%ymm12,%ymm12,%ymm12
 	vpalignr	$8,%ymm8,%ymm8,%ymm8
 	vpalignr	$12,%ymm4,%ymm4,%ymm4
-	addq	16(%rdi),%r10
+	addq	0+16(%rdi),%r10
 	adcq	8+16(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
-	movq	%rax,%r15
-	mulq	%r10
-	movq	%rax,%r13
-	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
-	mulq	%r11
+	movq	0+0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
-	movq	%rax,%r9
-	mulq	%r10
-	addq	%rax,%r14
-	adcq	$0,%rdx
-	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
-	mulq	%r11
+	movq	8+0+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
 	addq	%rax,%r15
-	adcq	$0,%rdx
-	imulq	%r12,%r9
-	addq	%r10,%r15
 	adcq	%rdx,%r9
 	movq	%r13,%r10
 	movq	%r14,%r11
@@ -7503,67 +7457,63 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	32(%rdi),%rdi
 	decq	%rcx
-	jg	1b
+	jg	.Lseal_avx2_tail_128_rounds_and_3xhash
 	decq	%r8
-	jge	2b
-	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
-	vpaddd	64(%rbp),%ymm4,%ymm4
-	vpaddd	96(%rbp),%ymm8,%ymm8
-	vpaddd	160(%rbp),%ymm12,%ymm12
+	jge	.Lseal_avx2_tail_128_rounds_and_2xhash
+	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	0+64(%rbp),%ymm4,%ymm4
+	vpaddd	0+96(%rbp),%ymm8,%ymm8
+	vpaddd	0+160(%rbp),%ymm12,%ymm12
 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
 	vmovdqa	%ymm3,%ymm8
 
-	jmp	seal_avx2_short_loop
-3:
-	cmpq	$256,%rbx
-	ja	3f
+	jmp	.Lseal_avx2_short_loop
 
-seal_avx2_tail_256:
-	vmovdqa	.chacha20_consts(%rip),%ymm0
-	vmovdqa	64(%rbp),%ymm4
-	vmovdqa	96(%rbp),%ymm8
+.Lseal_avx2_tail_256:
+	vmovdqa	.Lchacha20_consts(%rip),%ymm0
+	vmovdqa	0+64(%rbp),%ymm4
+	vmovdqa	0+96(%rbp),%ymm8
 	vmovdqa	%ymm0,%ymm1
 	vmovdqa	%ymm4,%ymm5
 	vmovdqa	%ymm8,%ymm9
-	vmovdqa	.avx2_inc(%rip),%ymm12
-	vpaddd	160(%rbp),%ymm12,%ymm13
+	vmovdqa	.Lavx2_inc(%rip),%ymm12
+	vpaddd	0+160(%rbp),%ymm12,%ymm13
 	vpaddd	%ymm13,%ymm12,%ymm12
-	vmovdqa	%ymm12,160(%rbp)
-	vmovdqa	%ymm13,192(%rbp)
+	vmovdqa	%ymm12,0+160(%rbp)
+	vmovdqa	%ymm13,0+192(%rbp)
 
-1:
-	addq	0(%rdi),%r10
+.Lseal_avx2_tail_256_rounds_and_3xhash:
+	addq	0+0(%rdi),%r10
 	adcq	8+0(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -7579,18 +7529,17 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	16(%rdi),%rdi
-2:
+.Lseal_avx2_tail_256_rounds_and_2xhash:
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpsrld	$20,%ymm4,%ymm3
@@ -7598,7 +7547,7 @@
 	vpxor	%ymm3,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpslld	$7,%ymm4,%ymm3
@@ -7609,7 +7558,7 @@
 	vpalignr	$4,%ymm4,%ymm4,%ymm4
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpsrld	$20,%ymm5,%ymm3
@@ -7617,7 +7566,7 @@
 	vpxor	%ymm3,%ymm5,%ymm5
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpslld	$7,%ymm5,%ymm3
@@ -7626,26 +7575,26 @@
 	vpalignr	$12,%ymm13,%ymm13,%ymm13
 	vpalignr	$8,%ymm9,%ymm9,%ymm9
 	vpalignr	$4,%ymm5,%ymm5,%ymm5
-	addq	0(%rdi),%r10
+	addq	0+0(%rdi),%r10
 	adcq	8+0(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -7661,15 +7610,14 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpsrld	$20,%ymm4,%ymm3
@@ -7677,7 +7625,7 @@
 	vpxor	%ymm3,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpslld	$7,%ymm4,%ymm3
@@ -7688,7 +7636,7 @@
 	vpalignr	$12,%ymm4,%ymm4,%ymm4
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpsrld	$20,%ymm5,%ymm3
@@ -7696,7 +7644,7 @@
 	vpxor	%ymm3,%ymm5,%ymm5
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpslld	$7,%ymm5,%ymm3
@@ -7705,26 +7653,26 @@
 	vpalignr	$4,%ymm13,%ymm13,%ymm13
 	vpalignr	$8,%ymm9,%ymm9,%ymm9
 	vpalignr	$12,%ymm5,%ymm5,%ymm5
-	addq	16(%rdi),%r10
+	addq	0+16(%rdi),%r10
 	adcq	8+16(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -7740,26 +7688,25 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	32(%rdi),%rdi
 	decq	%rcx
-	jg	1b
+	jg	.Lseal_avx2_tail_256_rounds_and_3xhash
 	decq	%r8
-	jge	2b
-	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
-	vpaddd	64(%rbp),%ymm5,%ymm5
-	vpaddd	96(%rbp),%ymm9,%ymm9
-	vpaddd	192(%rbp),%ymm13,%ymm13
-	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
-	vpaddd	64(%rbp),%ymm4,%ymm4
-	vpaddd	96(%rbp),%ymm8,%ymm8
-	vpaddd	160(%rbp),%ymm12,%ymm12
+	jge	.Lseal_avx2_tail_256_rounds_and_2xhash
+	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
+	vpaddd	0+64(%rbp),%ymm5,%ymm5
+	vpaddd	0+96(%rbp),%ymm9,%ymm9
+	vpaddd	0+192(%rbp),%ymm13,%ymm13
+	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	0+64(%rbp),%ymm4,%ymm4
+	vpaddd	0+96(%rbp),%ymm8,%ymm8
+	vpaddd	0+160(%rbp),%ymm12,%ymm12
 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
@@ -7781,50 +7728,47 @@
 	movq	$128,%rcx
 	leaq	128(%rsi),%rsi
 	subq	$128,%rbx
-	jmp	seal_avx2_hash
-3:
-	cmpq	$384,%rbx
-	ja	seal_avx2_tail_512
+	jmp	.Lseal_avx2_short_hash_remainder
 
-seal_avx2_tail_384:
-	vmovdqa	.chacha20_consts(%rip),%ymm0
-	vmovdqa	64(%rbp),%ymm4
-	vmovdqa	96(%rbp),%ymm8
+.Lseal_avx2_tail_384:
+	vmovdqa	.Lchacha20_consts(%rip),%ymm0
+	vmovdqa	0+64(%rbp),%ymm4
+	vmovdqa	0+96(%rbp),%ymm8
 	vmovdqa	%ymm0,%ymm1
 	vmovdqa	%ymm4,%ymm5
 	vmovdqa	%ymm8,%ymm9
 	vmovdqa	%ymm0,%ymm2
 	vmovdqa	%ymm4,%ymm6
 	vmovdqa	%ymm8,%ymm10
-	vmovdqa	.avx2_inc(%rip),%ymm12
-	vpaddd	160(%rbp),%ymm12,%ymm14
+	vmovdqa	.Lavx2_inc(%rip),%ymm12
+	vpaddd	0+160(%rbp),%ymm12,%ymm14
 	vpaddd	%ymm14,%ymm12,%ymm13
 	vpaddd	%ymm13,%ymm12,%ymm12
-	vmovdqa	%ymm12,160(%rbp)
-	vmovdqa	%ymm13,192(%rbp)
-	vmovdqa	%ymm14,224(%rbp)
+	vmovdqa	%ymm12,0+160(%rbp)
+	vmovdqa	%ymm13,0+192(%rbp)
+	vmovdqa	%ymm14,0+224(%rbp)
 
-1:
-	addq	0(%rdi),%r10
+.Lseal_avx2_tail_384_rounds_and_3xhash:
+	addq	0+0(%rdi),%r10
 	adcq	8+0(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -7840,18 +7784,17 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	16(%rdi),%rdi
-2:
+.Lseal_avx2_tail_384_rounds_and_2xhash:
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpsrld	$20,%ymm4,%ymm3
@@ -7859,7 +7802,7 @@
 	vpxor	%ymm3,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpslld	$7,%ymm4,%ymm3
@@ -7870,7 +7813,7 @@
 	vpalignr	$4,%ymm4,%ymm4,%ymm4
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpsrld	$20,%ymm5,%ymm3
@@ -7878,7 +7821,7 @@
 	vpxor	%ymm3,%ymm5,%ymm5
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpslld	$7,%ymm5,%ymm3
@@ -7887,26 +7830,26 @@
 	vpalignr	$12,%ymm13,%ymm13,%ymm13
 	vpalignr	$8,%ymm9,%ymm9,%ymm9
 	vpalignr	$4,%ymm5,%ymm5,%ymm5
-	addq	0(%rdi),%r10
+	addq	0+0(%rdi),%r10
 	adcq	8+0(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -7922,15 +7865,14 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpxor	%ymm2,%ymm14,%ymm14
-	vpshufb	.rol16(%rip),%ymm14,%ymm14
+	vpshufb	.Lrol16(%rip),%ymm14,%ymm14
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpsrld	$20,%ymm6,%ymm3
@@ -7938,7 +7880,7 @@
 	vpxor	%ymm3,%ymm6,%ymm6
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpxor	%ymm2,%ymm14,%ymm14
-	vpshufb	.rol8(%rip),%ymm14,%ymm14
+	vpshufb	.Lrol8(%rip),%ymm14,%ymm14
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpslld	$7,%ymm6,%ymm3
@@ -7949,7 +7891,7 @@
 	vpalignr	$4,%ymm6,%ymm6,%ymm6
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpsrld	$20,%ymm4,%ymm3
@@ -7957,7 +7899,7 @@
 	vpxor	%ymm3,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpslld	$7,%ymm4,%ymm3
@@ -7966,26 +7908,26 @@
 	vpalignr	$4,%ymm12,%ymm12,%ymm12
 	vpalignr	$8,%ymm8,%ymm8,%ymm8
 	vpalignr	$12,%ymm4,%ymm4,%ymm4
-	addq	16(%rdi),%r10
+	addq	0+16(%rdi),%r10
 	adcq	8+16(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -8001,15 +7943,14 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpsrld	$20,%ymm5,%ymm3
@@ -8017,7 +7958,7 @@
 	vpxor	%ymm3,%ymm5,%ymm5
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpslld	$7,%ymm5,%ymm3
@@ -8028,7 +7969,7 @@
 	vpalignr	$12,%ymm5,%ymm5,%ymm5
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpxor	%ymm2,%ymm14,%ymm14
-	vpshufb	.rol16(%rip),%ymm14,%ymm14
+	vpshufb	.Lrol16(%rip),%ymm14,%ymm14
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpsrld	$20,%ymm6,%ymm3
@@ -8036,7 +7977,7 @@
 	vpxor	%ymm3,%ymm6,%ymm6
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpxor	%ymm2,%ymm14,%ymm14
-	vpshufb	.rol8(%rip),%ymm14,%ymm14
+	vpshufb	.Lrol8(%rip),%ymm14,%ymm14
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpslld	$7,%ymm6,%ymm3
@@ -8048,21 +7989,21 @@
 
 	leaq	32(%rdi),%rdi
 	decq	%rcx
-	jg	1b
+	jg	.Lseal_avx2_tail_384_rounds_and_3xhash
 	decq	%r8
-	jge	2b
-	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
-	vpaddd	64(%rbp),%ymm6,%ymm6
-	vpaddd	96(%rbp),%ymm10,%ymm10
-	vpaddd	224(%rbp),%ymm14,%ymm14
-	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
-	vpaddd	64(%rbp),%ymm5,%ymm5
-	vpaddd	96(%rbp),%ymm9,%ymm9
-	vpaddd	192(%rbp),%ymm13,%ymm13
-	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
-	vpaddd	64(%rbp),%ymm4,%ymm4
-	vpaddd	96(%rbp),%ymm8,%ymm8
-	vpaddd	160(%rbp),%ymm12,%ymm12
+	jge	.Lseal_avx2_tail_384_rounds_and_2xhash
+	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2
+	vpaddd	0+64(%rbp),%ymm6,%ymm6
+	vpaddd	0+96(%rbp),%ymm10,%ymm10
+	vpaddd	0+224(%rbp),%ymm14,%ymm14
+	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
+	vpaddd	0+64(%rbp),%ymm5,%ymm5
+	vpaddd	0+96(%rbp),%ymm9,%ymm9
+	vpaddd	0+192(%rbp),%ymm13,%ymm13
+	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	0+64(%rbp),%ymm4,%ymm4
+	vpaddd	0+96(%rbp),%ymm8,%ymm8
+	vpaddd	0+160(%rbp),%ymm12,%ymm12
 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
@@ -8096,12 +8037,12 @@
 	movq	$256,%rcx
 	leaq	256(%rsi),%rsi
 	subq	$256,%rbx
-	jmp	seal_avx2_hash
+	jmp	.Lseal_avx2_short_hash_remainder
 
-seal_avx2_tail_512:
-	vmovdqa	.chacha20_consts(%rip),%ymm0
-	vmovdqa	64(%rbp),%ymm4
-	vmovdqa	96(%rbp),%ymm8
+.Lseal_avx2_tail_512:
+	vmovdqa	.Lchacha20_consts(%rip),%ymm0
+	vmovdqa	0+64(%rbp),%ymm4
+	vmovdqa	0+96(%rbp),%ymm8
 	vmovdqa	%ymm0,%ymm1
 	vmovdqa	%ymm4,%ymm5
 	vmovdqa	%ymm8,%ymm9
@@ -8111,28 +8052,28 @@
 	vmovdqa	%ymm0,%ymm3
 	vmovdqa	%ymm4,%ymm7
 	vmovdqa	%ymm8,%ymm11
-	vmovdqa	.avx2_inc(%rip),%ymm12
-	vpaddd	160(%rbp),%ymm12,%ymm15
+	vmovdqa	.Lavx2_inc(%rip),%ymm12
+	vpaddd	0+160(%rbp),%ymm12,%ymm15
 	vpaddd	%ymm15,%ymm12,%ymm14
 	vpaddd	%ymm14,%ymm12,%ymm13
 	vpaddd	%ymm13,%ymm12,%ymm12
-	vmovdqa	%ymm15,256(%rbp)
-	vmovdqa	%ymm14,224(%rbp)
-	vmovdqa	%ymm13,192(%rbp)
-	vmovdqa	%ymm12,160(%rbp)
+	vmovdqa	%ymm15,0+256(%rbp)
+	vmovdqa	%ymm14,0+224(%rbp)
+	vmovdqa	%ymm13,0+192(%rbp)
+	vmovdqa	%ymm12,0+160(%rbp)
 
-1:
-	addq	0(%rdi),%r10
+.Lseal_avx2_tail_512_rounds_and_3xhash:
+	addq	0+0(%rdi),%r10
 	adcq	8+0(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rdx
+	movq	0+0+0(%rbp),%rdx
 	movq	%rdx,%r15
 	mulxq	%r10,%r13,%r14
 	mulxq	%r11,%rax,%rdx
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rdx
+	movq	8+0+0(%rbp),%rdx
 	mulxq	%r10,%r10,%rax
 	addq	%r10,%r14
 	mulxq	%r11,%r11,%r9
@@ -8150,17 +8091,16 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	16(%rdi),%rdi
-2:
-	vmovdqa	%ymm8,128(%rbp)
-	vmovdqa	.rol16(%rip),%ymm8
+.Lseal_avx2_tail_512_rounds_and_2xhash:
+	vmovdqa	%ymm8,0+128(%rbp)
+	vmovdqa	.Lrol16(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
@@ -8173,19 +8113,18 @@
 	vpshufb	%ymm8,%ymm14,%ymm14
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
 	vpxor	%ymm11,%ymm7,%ymm7
-	addq	0(%rdi),%r10
+	vpxor	%ymm10,%ymm6,%ymm6
+	addq	0+0(%rdi),%r10
 	adcq	8+0(%rdi),%r11
 	adcq	$1,%r12
-	vpxor	%ymm10,%ymm6,%ymm6
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
 	vpsrld	$20,%ymm7,%ymm8
 	vpslld	$32-20,%ymm7,%ymm7
 	vpxor	%ymm8,%ymm7,%ymm7
@@ -8198,18 +8137,18 @@
 	vpsrld	$20,%ymm4,%ymm8
 	vpslld	$32-20,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	.rol8(%rip),%ymm8
+	vmovdqa	.Lrol8(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
-	movq	0+0(%rbp),%rdx
+	vpaddd	%ymm4,%ymm0,%ymm0
+	movq	0+0+0(%rbp),%rdx
 	movq	%rdx,%r15
 	mulxq	%r10,%r13,%r14
 	mulxq	%r11,%rax,%rdx
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm3,%ymm15,%ymm15
 	vpxor	%ymm2,%ymm14,%ymm14
 	vpxor	%ymm1,%ymm13,%ymm13
@@ -8218,26 +8157,25 @@
 	vpshufb	%ymm8,%ymm14,%ymm14
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
 	vpxor	%ymm11,%ymm7,%ymm7
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
 	vpsrld	$25,%ymm7,%ymm8
-	movq	8+0(%rbp),%rdx
+	vpslld	$32-25,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	movq	8+0+0(%rbp),%rdx
 	mulxq	%r10,%r10,%rax
 	addq	%r10,%r14
 	mulxq	%r11,%r11,%r9
 	adcq	%r11,%r15
 	adcq	$0,%r9
 	imulq	%r12,%rdx
-	vpslld	$32-25,%ymm7,%ymm7
-	vpxor	%ymm8,%ymm7,%ymm7
 	vpsrld	$25,%ymm6,%ymm8
 	vpslld	$32-25,%ymm6,%ymm6
 	vpxor	%ymm8,%ymm6,%ymm6
@@ -8247,7 +8185,7 @@
 	vpsrld	$25,%ymm4,%ymm8
 	vpslld	$32-25,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	128(%rbp),%ymm8
+	vmovdqa	0+128(%rbp),%ymm8
 	vpalignr	$4,%ymm7,%ymm7,%ymm7
 	vpalignr	$8,%ymm11,%ymm11,%ymm11
 	vpalignr	$12,%ymm15,%ymm15,%ymm15
@@ -8256,14 +8194,14 @@
 	vpalignr	$12,%ymm14,%ymm14,%ymm14
 	vpalignr	$4,%ymm5,%ymm5,%ymm5
 	vpalignr	$8,%ymm9,%ymm9,%ymm9
-	addq	%rax,%r15
-	adcq	%rdx,%r9
 	vpalignr	$12,%ymm13,%ymm13,%ymm13
 	vpalignr	$4,%ymm4,%ymm4,%ymm4
+	addq	%rax,%r15
+	adcq	%rdx,%r9
 	vpalignr	$8,%ymm8,%ymm8,%ymm8
 	vpalignr	$12,%ymm12,%ymm12,%ymm12
-	vmovdqa	%ymm8,128(%rbp)
-	vmovdqa	.rol16(%rip),%ymm8
+	vmovdqa	%ymm8,0+128(%rbp)
+	vmovdqa	.Lrol16(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
@@ -8276,8 +8214,10 @@
 	vpshufb	%ymm8,%ymm14,%ymm14
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
 	movq	%r13,%r10
 	movq	%r14,%r11
 	movq	%r15,%r12
@@ -8287,20 +8227,16 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
-	vpaddd	%ymm14,%ymm10,%ymm10
-	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm11,%ymm7,%ymm7
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
 	vpsrld	$20,%ymm7,%ymm8
 	vpslld	$32-20,%ymm7,%ymm7
 	vpxor	%ymm8,%ymm7,%ymm7
@@ -8313,12 +8249,12 @@
 	vpsrld	$20,%ymm4,%ymm8
 	vpslld	$32-20,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	addq	16(%rdi),%r10
-	adcq	8+16(%rdi),%r11
-	adcq	$1,%r12
-	vmovdqa	.rol8(%rip),%ymm8
+	vmovdqa	.Lrol8(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
+	addq	0+16(%rdi),%r10
+	adcq	8+16(%rdi),%r11
+	adcq	$1,%r12
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm3,%ymm15,%ymm15
@@ -8329,24 +8265,23 @@
 	vpshufb	%ymm8,%ymm14,%ymm14
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
 	vpxor	%ymm11,%ymm7,%ymm7
 	vpxor	%ymm10,%ymm6,%ymm6
-	movq	0+0(%rbp),%rdx
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,0+128(%rbp)
+	vpsrld	$25,%ymm7,%ymm8
+	movq	0+0+0(%rbp),%rdx
 	movq	%rdx,%r15
 	mulxq	%r10,%r13,%r14
 	mulxq	%r11,%rax,%rdx
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	vpxor	%ymm9,%ymm5,%ymm5
-	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
-	vpsrld	$25,%ymm7,%ymm8
 	vpslld	$32-25,%ymm7,%ymm7
 	vpxor	%ymm8,%ymm7,%ymm7
 	vpsrld	$25,%ymm6,%ymm8
@@ -8358,22 +8293,22 @@
 	vpsrld	$25,%ymm4,%ymm8
 	vpslld	$32-25,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	128(%rbp),%ymm8
+	vmovdqa	0+128(%rbp),%ymm8
 	vpalignr	$12,%ymm7,%ymm7,%ymm7
 	vpalignr	$8,%ymm11,%ymm11,%ymm11
 	vpalignr	$4,%ymm15,%ymm15,%ymm15
 	vpalignr	$12,%ymm6,%ymm6,%ymm6
-	movq	8+0(%rbp),%rdx
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$4,%ymm14,%ymm14,%ymm14
+	vpalignr	$12,%ymm5,%ymm5,%ymm5
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	movq	8+0+0(%rbp),%rdx
 	mulxq	%r10,%r10,%rax
 	addq	%r10,%r14
 	mulxq	%r11,%r11,%r9
 	adcq	%r11,%r15
 	adcq	$0,%r9
 	imulq	%r12,%rdx
-	vpalignr	$8,%ymm10,%ymm10,%ymm10
-	vpalignr	$4,%ymm14,%ymm14,%ymm14
-	vpalignr	$12,%ymm5,%ymm5,%ymm5
-	vpalignr	$8,%ymm9,%ymm9,%ymm9
 	vpalignr	$4,%ymm13,%ymm13,%ymm13
 	vpalignr	$12,%ymm4,%ymm4,%ymm4
 	vpalignr	$8,%ymm8,%ymm8,%ymm8
@@ -8390,6 +8325,10 @@
 
 
 
+
+
+
+
 	addq	%rax,%r15
 	adcq	%rdx,%r9
 
@@ -8421,36 +8360,35 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	32(%rdi),%rdi
 	decq	%rcx
-	jg	1b
+	jg	.Lseal_avx2_tail_512_rounds_and_3xhash
 	decq	%r8
-	jge	2b
-	vpaddd	.chacha20_consts(%rip),%ymm3,%ymm3
-	vpaddd	64(%rbp),%ymm7,%ymm7
-	vpaddd	96(%rbp),%ymm11,%ymm11
-	vpaddd	256(%rbp),%ymm15,%ymm15
-	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
-	vpaddd	64(%rbp),%ymm6,%ymm6
-	vpaddd	96(%rbp),%ymm10,%ymm10
-	vpaddd	224(%rbp),%ymm14,%ymm14
-	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
-	vpaddd	64(%rbp),%ymm5,%ymm5
-	vpaddd	96(%rbp),%ymm9,%ymm9
-	vpaddd	192(%rbp),%ymm13,%ymm13
-	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
-	vpaddd	64(%rbp),%ymm4,%ymm4
-	vpaddd	96(%rbp),%ymm8,%ymm8
-	vpaddd	160(%rbp),%ymm12,%ymm12
+	jge	.Lseal_avx2_tail_512_rounds_and_2xhash
+	vpaddd	.Lchacha20_consts(%rip),%ymm3,%ymm3
+	vpaddd	0+64(%rbp),%ymm7,%ymm7
+	vpaddd	0+96(%rbp),%ymm11,%ymm11
+	vpaddd	0+256(%rbp),%ymm15,%ymm15
+	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2
+	vpaddd	0+64(%rbp),%ymm6,%ymm6
+	vpaddd	0+96(%rbp),%ymm10,%ymm10
+	vpaddd	0+224(%rbp),%ymm14,%ymm14
+	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
+	vpaddd	0+64(%rbp),%ymm5,%ymm5
+	vpaddd	0+96(%rbp),%ymm9,%ymm9
+	vpaddd	0+192(%rbp),%ymm13,%ymm13
+	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	0+64(%rbp),%ymm4,%ymm4
+	vpaddd	0+96(%rbp),%ymm8,%ymm8
+	vpaddd	0+160(%rbp),%ymm12,%ymm12
 
-	vmovdqa	%ymm0,128(%rbp)
+	vmovdqa	%ymm0,0+128(%rbp)
 	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
 	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
 	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
@@ -8464,7 +8402,7 @@
 	vmovdqu	%ymm7,64+0(%rdi)
 	vmovdqu	%ymm11,96+0(%rdi)
 
-	vmovdqa	128(%rbp),%ymm0
+	vmovdqa	0+128(%rbp),%ymm0
 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
@@ -8498,27 +8436,27 @@
 	movq	$384,%rcx
 	leaq	384(%rsi),%rsi
 	subq	$384,%rbx
-	jmp	seal_avx2_hash
+	jmp	.Lseal_avx2_short_hash_remainder
 
-seal_avx2_320:
+.Lseal_avx2_320:
 	vmovdqa	%ymm0,%ymm1
 	vmovdqa	%ymm0,%ymm2
 	vmovdqa	%ymm4,%ymm5
 	vmovdqa	%ymm4,%ymm6
 	vmovdqa	%ymm8,%ymm9
 	vmovdqa	%ymm8,%ymm10
-	vpaddd	.avx2_inc(%rip),%ymm12,%ymm13
-	vpaddd	.avx2_inc(%rip),%ymm13,%ymm14
+	vpaddd	.Lavx2_inc(%rip),%ymm12,%ymm13
+	vpaddd	.Lavx2_inc(%rip),%ymm13,%ymm14
 	vmovdqa	%ymm4,%ymm7
 	vmovdqa	%ymm8,%ymm11
-	vmovdqa	%ymm12,160(%rbp)
-	vmovdqa	%ymm13,192(%rbp)
-	vmovdqa	%ymm14,224(%rbp)
+	vmovdqa	%ymm12,0+160(%rbp)
+	vmovdqa	%ymm13,0+192(%rbp)
+	vmovdqa	%ymm14,0+224(%rbp)
 	movq	$10,%r10
-1:
+.Lseal_avx2_320_rounds:
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpsrld	$20,%ymm4,%ymm3
@@ -8526,7 +8464,7 @@
 	vpxor	%ymm3,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpslld	$7,%ymm4,%ymm3
@@ -8537,7 +8475,7 @@
 	vpalignr	$4,%ymm4,%ymm4,%ymm4
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpsrld	$20,%ymm5,%ymm3
@@ -8545,7 +8483,7 @@
 	vpxor	%ymm3,%ymm5,%ymm5
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpslld	$7,%ymm5,%ymm3
@@ -8556,7 +8494,7 @@
 	vpalignr	$4,%ymm5,%ymm5,%ymm5
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpxor	%ymm2,%ymm14,%ymm14
-	vpshufb	.rol16(%rip),%ymm14,%ymm14
+	vpshufb	.Lrol16(%rip),%ymm14,%ymm14
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpsrld	$20,%ymm6,%ymm3
@@ -8564,7 +8502,7 @@
 	vpxor	%ymm3,%ymm6,%ymm6
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpxor	%ymm2,%ymm14,%ymm14
-	vpshufb	.rol8(%rip),%ymm14,%ymm14
+	vpshufb	.Lrol8(%rip),%ymm14,%ymm14
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpslld	$7,%ymm6,%ymm3
@@ -8575,7 +8513,7 @@
 	vpalignr	$4,%ymm6,%ymm6,%ymm6
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpsrld	$20,%ymm4,%ymm3
@@ -8583,7 +8521,7 @@
 	vpxor	%ymm3,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpslld	$7,%ymm4,%ymm3
@@ -8594,7 +8532,7 @@
 	vpalignr	$12,%ymm4,%ymm4,%ymm4
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpsrld	$20,%ymm5,%ymm3
@@ -8602,7 +8540,7 @@
 	vpxor	%ymm3,%ymm5,%ymm5
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpslld	$7,%ymm5,%ymm3
@@ -8613,7 +8551,7 @@
 	vpalignr	$12,%ymm5,%ymm5,%ymm5
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpxor	%ymm2,%ymm14,%ymm14
-	vpshufb	.rol16(%rip),%ymm14,%ymm14
+	vpshufb	.Lrol16(%rip),%ymm14,%ymm14
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpsrld	$20,%ymm6,%ymm3
@@ -8621,7 +8559,7 @@
 	vpxor	%ymm3,%ymm6,%ymm6
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpxor	%ymm2,%ymm14,%ymm14
-	vpshufb	.rol8(%rip),%ymm14,%ymm14
+	vpshufb	.Lrol8(%rip),%ymm14,%ymm14
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpslld	$7,%ymm6,%ymm3
@@ -8632,23 +8570,23 @@
 	vpalignr	$12,%ymm6,%ymm6,%ymm6
 
 	decq	%r10
-	jne	1b
-	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
-	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
-	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
+	jne	.Lseal_avx2_320_rounds
+	vpaddd	.Lchacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	.Lchacha20_consts(%rip),%ymm1,%ymm1
+	vpaddd	.Lchacha20_consts(%rip),%ymm2,%ymm2
 	vpaddd	%ymm7,%ymm4,%ymm4
 	vpaddd	%ymm7,%ymm5,%ymm5
 	vpaddd	%ymm7,%ymm6,%ymm6
 	vpaddd	%ymm11,%ymm8,%ymm8
 	vpaddd	%ymm11,%ymm9,%ymm9
 	vpaddd	%ymm11,%ymm10,%ymm10
-	vpaddd	160(%rbp),%ymm12,%ymm12
-	vpaddd	192(%rbp),%ymm13,%ymm13
-	vpaddd	224(%rbp),%ymm14,%ymm14
+	vpaddd	0+160(%rbp),%ymm12,%ymm12
+	vpaddd	0+192(%rbp),%ymm13,%ymm13
+	vpaddd	0+224(%rbp),%ymm14,%ymm14
 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
 
-	vpand	.clamp(%rip),%ymm3,%ymm3
-	vmovdqa	%ymm3,0(%rbp)
+	vpand	.Lclamp(%rip),%ymm3,%ymm3
+	vmovdqa	%ymm3,0+0(%rbp)
 
 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
@@ -8660,23 +8598,23 @@
 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm13
 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm2
 	vperm2i128	$0x13,%ymm10,%ymm14,%ymm6
-	jmp	seal_avx2_short
+	jmp	.Lseal_avx2_short
 
-seal_avx2_192:
+.Lseal_avx2_192:
 	vmovdqa	%ymm0,%ymm1
 	vmovdqa	%ymm0,%ymm2
 	vmovdqa	%ymm4,%ymm5
 	vmovdqa	%ymm4,%ymm6
 	vmovdqa	%ymm8,%ymm9
 	vmovdqa	%ymm8,%ymm10
-	vpaddd	.avx2_inc(%rip),%ymm12,%ymm13
+	vpaddd	.Lavx2_inc(%rip),%ymm12,%ymm13
 	vmovdqa	%ymm12,%ymm11
 	vmovdqa	%ymm13,%ymm15
 	movq	$10,%r10
-1:
+.Lseal_avx2_192_rounds:
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpsrld	$20,%ymm4,%ymm3
@@ -8684,7 +8622,7 @@
 	vpxor	%ymm3,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpslld	$7,%ymm4,%ymm3
@@ -8695,7 +8633,7 @@
 	vpalignr	$4,%ymm4,%ymm4,%ymm4
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpsrld	$20,%ymm5,%ymm3
@@ -8703,7 +8641,7 @@
 	vpxor	%ymm3,%ymm5,%ymm5
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpslld	$7,%ymm5,%ymm3
@@ -8714,7 +8652,7 @@
 	vpalignr	$4,%ymm5,%ymm5,%ymm5
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpshufb	.Lrol16(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpsrld	$20,%ymm4,%ymm3
@@ -8722,7 +8660,7 @@
 	vpxor	%ymm3,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpshufb	.Lrol8(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpslld	$7,%ymm4,%ymm3
@@ -8733,7 +8671,7 @@
 	vpalignr	$12,%ymm4,%ymm4,%ymm4
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpshufb	.Lrol16(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpsrld	$20,%ymm5,%ymm3
@@ -8741,7 +8679,7 @@
 	vpxor	%ymm3,%ymm5,%ymm5
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpshufb	.Lrol8(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpslld	$7,%ymm5,%ymm3
@@ -8752,7 +8690,7 @@
 	vpalignr	$12,%ymm5,%ymm5,%ymm5
 
 	decq	%r10
-	jne	1b
+	jne	.Lseal_avx2_192_rounds
 	vpaddd	%ymm2,%ymm0,%ymm0
 	vpaddd	%ymm2,%ymm1,%ymm1
 	vpaddd	%ymm6,%ymm4,%ymm4
@@ -8763,8 +8701,8 @@
 	vpaddd	%ymm15,%ymm13,%ymm13
 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
 
-	vpand	.clamp(%rip),%ymm3,%ymm3
-	vmovdqa	%ymm3,0(%rbp)
+	vpand	.Lclamp(%rip),%ymm3,%ymm3
+	vmovdqa	%ymm3,0+0(%rbp)
 
 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
@@ -8772,33 +8710,33 @@
 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
-seal_avx2_short:
+.Lseal_avx2_short:
 	movq	%r8,%r8
 	call	poly_hash_ad_internal
 	xorq	%rcx,%rcx
-seal_avx2_hash:
+.Lseal_avx2_short_hash_remainder:
 	cmpq	$16,%rcx
-	jb	seal_avx2_short_loop
-	addq	0(%rdi),%r10
+	jb	.Lseal_avx2_short_loop
+	addq	0+0(%rdi),%r10
 	adcq	8+0(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -8814,45 +8752,44 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	subq	$16,%rcx
 	addq	$16,%rdi
-	jmp	seal_avx2_hash
-seal_avx2_short_loop:
+	jmp	.Lseal_avx2_short_hash_remainder
+.Lseal_avx2_short_loop:
 	cmpq	$32,%rbx
-	jb	seal_avx2_short_tail
+	jb	.Lseal_avx2_short_tail
 	subq	$32,%rbx
 
 	vpxor	(%rsi),%ymm0,%ymm0
 	vmovdqu	%ymm0,(%rdi)
 	leaq	32(%rsi),%rsi
 
-	addq	0(%rdi),%r10
+	addq	0+0(%rdi),%r10
 	adcq	8+0(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -8868,32 +8805,31 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
-	addq	16(%rdi),%r10
+	addq	0+16(%rdi),%r10
 	adcq	8+16(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -8909,9 +8845,8 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
@@ -8927,34 +8862,34 @@
 	vmovdqa	%ymm13,%ymm9
 	vmovdqa	%ymm2,%ymm13
 	vmovdqa	%ymm6,%ymm2
-	jmp	seal_avx2_short_loop
-seal_avx2_short_tail:
+	jmp	.Lseal_avx2_short_loop
+.Lseal_avx2_short_tail:
 	cmpq	$16,%rbx
-	jb	1f
+	jb	.Lseal_avx2_exit
 	subq	$16,%rbx
 	vpxor	(%rsi),%xmm0,%xmm3
 	vmovdqu	%xmm3,(%rdi)
 	leaq	16(%rsi),%rsi
-	addq	0(%rdi),%r10
+	addq	0+0(%rdi),%r10
 	adcq	8+0(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -8970,18 +8905,18 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	16(%rdi),%rdi
 	vextracti128	$1,%ymm0,%xmm0
-1:
+.Lseal_avx2_exit:
 	vzeroupper
-	jmp	seal_sse_tail_16
+	jmp	.Lseal_sse_tail_16
 .cfi_endproc	
+.size	chacha20_poly1305_seal_avx2, .-chacha20_poly1305_seal_avx2
 #endif
 .section	.note.GNU-stack,"",@progbits
diff --git a/deps/boringssl/linux-x86_64/crypto/fipsmodule/sha1-x86_64.S b/deps/boringssl/linux-x86_64/crypto/fipsmodule/sha1-x86_64.S
index 84ae1a7..964687d 100644
--- a/deps/boringssl/linux-x86_64/crypto/fipsmodule/sha1-x86_64.S
+++ b/deps/boringssl/linux-x86_64/crypto/fipsmodule/sha1-x86_64.S
@@ -27,6 +27,8 @@
 	movl	8(%r10),%r10d
 	testl	$512,%r8d
 	jz	.Lialu
+	testl	$536870912,%r10d
+	jnz	_shaext_shortcut
 	andl	$296,%r10d
 	cmpl	$296,%r10d
 	je	_avx2_shortcut
@@ -1269,6 +1271,175 @@
 	.byte	0xf3,0xc3
 .cfi_endproc	
 .size	sha1_block_data_order,.-sha1_block_data_order
+.type	sha1_block_data_order_shaext,@function
+.align	32
+sha1_block_data_order_shaext:
+_shaext_shortcut:
+.cfi_startproc	
+	movdqu	(%rdi),%xmm0
+	movd	16(%rdi),%xmm1
+	movdqa	K_XX_XX+160(%rip),%xmm3
+
+	movdqu	(%rsi),%xmm4
+	pshufd	$27,%xmm0,%xmm0
+	movdqu	16(%rsi),%xmm5
+	pshufd	$27,%xmm1,%xmm1
+	movdqu	32(%rsi),%xmm6
+.byte	102,15,56,0,227
+	movdqu	48(%rsi),%xmm7
+.byte	102,15,56,0,235
+.byte	102,15,56,0,243
+	movdqa	%xmm1,%xmm9
+.byte	102,15,56,0,251
+	jmp	.Loop_shaext
+
+.align	16
+.Loop_shaext:
+	decq	%rdx
+	leaq	64(%rsi),%r8
+	paddd	%xmm4,%xmm1
+	cmovneq	%r8,%rsi
+	movdqa	%xmm0,%xmm8
+.byte	15,56,201,229
+	movdqa	%xmm0,%xmm2
+.byte	15,58,204,193,0
+.byte	15,56,200,213
+	pxor	%xmm6,%xmm4
+.byte	15,56,201,238
+.byte	15,56,202,231
+
+	movdqa	%xmm0,%xmm1
+.byte	15,58,204,194,0
+.byte	15,56,200,206
+	pxor	%xmm7,%xmm5
+.byte	15,56,202,236
+.byte	15,56,201,247
+	movdqa	%xmm0,%xmm2
+.byte	15,58,204,193,0
+.byte	15,56,200,215
+	pxor	%xmm4,%xmm6
+.byte	15,56,201,252
+.byte	15,56,202,245
+
+	movdqa	%xmm0,%xmm1
+.byte	15,58,204,194,0
+.byte	15,56,200,204
+	pxor	%xmm5,%xmm7
+.byte	15,56,202,254
+.byte	15,56,201,229
+	movdqa	%xmm0,%xmm2
+.byte	15,58,204,193,0
+.byte	15,56,200,213
+	pxor	%xmm6,%xmm4
+.byte	15,56,201,238
+.byte	15,56,202,231
+
+	movdqa	%xmm0,%xmm1
+.byte	15,58,204,194,1
+.byte	15,56,200,206
+	pxor	%xmm7,%xmm5
+.byte	15,56,202,236
+.byte	15,56,201,247
+	movdqa	%xmm0,%xmm2
+.byte	15,58,204,193,1
+.byte	15,56,200,215
+	pxor	%xmm4,%xmm6
+.byte	15,56,201,252
+.byte	15,56,202,245
+
+	movdqa	%xmm0,%xmm1
+.byte	15,58,204,194,1
+.byte	15,56,200,204
+	pxor	%xmm5,%xmm7
+.byte	15,56,202,254
+.byte	15,56,201,229
+	movdqa	%xmm0,%xmm2
+.byte	15,58,204,193,1
+.byte	15,56,200,213
+	pxor	%xmm6,%xmm4
+.byte	15,56,201,238
+.byte	15,56,202,231
+
+	movdqa	%xmm0,%xmm1
+.byte	15,58,204,194,1
+.byte	15,56,200,206
+	pxor	%xmm7,%xmm5
+.byte	15,56,202,236
+.byte	15,56,201,247
+	movdqa	%xmm0,%xmm2
+.byte	15,58,204,193,2
+.byte	15,56,200,215
+	pxor	%xmm4,%xmm6
+.byte	15,56,201,252
+.byte	15,56,202,245
+
+	movdqa	%xmm0,%xmm1
+.byte	15,58,204,194,2
+.byte	15,56,200,204
+	pxor	%xmm5,%xmm7
+.byte	15,56,202,254
+.byte	15,56,201,229
+	movdqa	%xmm0,%xmm2
+.byte	15,58,204,193,2
+.byte	15,56,200,213
+	pxor	%xmm6,%xmm4
+.byte	15,56,201,238
+.byte	15,56,202,231
+
+	movdqa	%xmm0,%xmm1
+.byte	15,58,204,194,2
+.byte	15,56,200,206
+	pxor	%xmm7,%xmm5
+.byte	15,56,202,236
+.byte	15,56,201,247
+	movdqa	%xmm0,%xmm2
+.byte	15,58,204,193,2
+.byte	15,56,200,215
+	pxor	%xmm4,%xmm6
+.byte	15,56,201,252
+.byte	15,56,202,245
+
+	movdqa	%xmm0,%xmm1
+.byte	15,58,204,194,3
+.byte	15,56,200,204
+	pxor	%xmm5,%xmm7
+.byte	15,56,202,254
+	movdqu	(%rsi),%xmm4
+	movdqa	%xmm0,%xmm2
+.byte	15,58,204,193,3
+.byte	15,56,200,213
+	movdqu	16(%rsi),%xmm5
+.byte	102,15,56,0,227
+
+	movdqa	%xmm0,%xmm1
+.byte	15,58,204,194,3
+.byte	15,56,200,206
+	movdqu	32(%rsi),%xmm6
+.byte	102,15,56,0,235
+
+	movdqa	%xmm0,%xmm2
+.byte	15,58,204,193,3
+.byte	15,56,200,215
+	movdqu	48(%rsi),%xmm7
+.byte	102,15,56,0,243
+
+	movdqa	%xmm0,%xmm1
+.byte	15,58,204,194,3
+.byte	65,15,56,200,201
+.byte	102,15,56,0,251
+
+	paddd	%xmm8,%xmm0
+	movdqa	%xmm1,%xmm9
+
+	jnz	.Loop_shaext
+
+	pshufd	$27,%xmm0,%xmm0
+	pshufd	$27,%xmm1,%xmm1
+	movdqu	%xmm0,(%rdi)
+	movd	%xmm1,16(%rdi)
+	.byte	0xf3,0xc3
+.cfi_endproc	
+.size	sha1_block_data_order_shaext,.-sha1_block_data_order_shaext
 .type	sha1_block_data_order_ssse3,@function
 .align	16
 sha1_block_data_order_ssse3:
diff --git a/deps/boringssl/mac-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S b/deps/boringssl/mac-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S
index e50227a..0f5cb55 100644
--- a/deps/boringssl/mac-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S
+++ b/deps/boringssl/mac-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.S
@@ -17,26 +17,26 @@
 chacha20_poly1305_constants:
 
 .p2align	6
-.chacha20_consts:
+L$chacha20_consts:
 .byte	'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
 .byte	'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
-.rol8:
+L$rol8:
 .byte	3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14
 .byte	3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14
-.rol16:
+L$rol16:
 .byte	2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13
 .byte	2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13
-.avx2_init:
+L$avx2_init:
 .long	0,0,0,0
-.sse_inc:
+L$sse_inc:
 .long	1,0,0,0
-.avx2_inc:
+L$avx2_inc:
 .long	2,0,0,0,2,0,0,0
-.clamp:
+L$clamp:
 .quad	0x0FFFFFFC0FFFFFFF, 0x0FFFFFFC0FFFFFFC
 .quad	0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF
 .p2align	4
-.and_masks:
+L$and_masks:
 .byte	0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
 .byte	0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
 .byte	0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
@@ -58,34 +58,35 @@
 .p2align	6
 poly_hash_ad_internal:
 
+
 	xorq	%r10,%r10
 	xorq	%r11,%r11
 	xorq	%r12,%r12
 	cmpq	$13,%r8
-	jne	hash_ad_loop
-poly_fast_tls_ad:
+	jne	L$hash_ad_loop
+L$poly_fast_tls_ad:
 
 	movq	(%rcx),%r10
 	movq	5(%rcx),%r11
 	shrq	$24,%r11
 	movq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -101,38 +102,37 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	.byte	0xf3,0xc3
-hash_ad_loop:
+L$hash_ad_loop:
 
 	cmpq	$16,%r8
-	jb	hash_ad_tail
-	addq	0(%rcx),%r10
+	jb	L$hash_ad_tail
+	addq	0+0(%rcx),%r10
 	adcq	8+0(%rcx),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -148,53 +148,52 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	16(%rcx),%rcx
 	subq	$16,%r8
-	jmp	hash_ad_loop
-hash_ad_tail:
+	jmp	L$hash_ad_loop
+L$hash_ad_tail:
 	cmpq	$0,%r8
-	je	1f
+	je	L$hash_ad_done
 
 	xorq	%r13,%r13
 	xorq	%r14,%r14
 	xorq	%r15,%r15
 	addq	%r8,%rcx
-hash_ad_tail_loop:
+L$hash_ad_tail_loop:
 	shldq	$8,%r13,%r14
 	shlq	$8,%r13
 	movzbq	-1(%rcx),%r15
 	xorq	%r15,%r13
 	decq	%rcx
 	decq	%r8
-	jne	hash_ad_tail_loop
+	jne	L$hash_ad_tail_loop
 
 	addq	%r13,%r10
 	adcq	%r14,%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -210,15 +209,14 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 
-1:
+L$hash_ad_done:
 	.byte	0xf3,0xc3
 
 
@@ -245,43 +243,39 @@
 
 	pushq	%r9
 
-	subq	$288 + 32,%rsp
-
-
-
-
-
+	subq	$288 + 0 + 32,%rsp
 
 
 	leaq	32(%rsp),%rbp
 	andq	$-32,%rbp
-	movq	%rdx,8+32(%rbp)
-	movq	%r8,0+32(%rbp)
+
 	movq	%rdx,%rbx
+	movq	%r8,0+0+32(%rbp)
+	movq	%rbx,8+0+32(%rbp)
 
 	movl	_OPENSSL_ia32cap_P+8(%rip),%eax
 	andl	$288,%eax
 	xorl	$288,%eax
 	jz	chacha20_poly1305_open_avx2
 
-1:
 	cmpq	$128,%rbx
-	jbe	open_sse_128
+	jbe	L$open_sse_128
 
-	movdqa	.chacha20_consts(%rip),%xmm0
+	movdqa	L$chacha20_consts(%rip),%xmm0
 	movdqu	0(%r9),%xmm4
 	movdqu	16(%r9),%xmm8
 	movdqu	32(%r9),%xmm12
+
 	movdqa	%xmm12,%xmm7
 
-	movdqa	%xmm4,48(%rbp)
-	movdqa	%xmm8,64(%rbp)
-	movdqa	%xmm12,96(%rbp)
+	movdqa	%xmm4,0+48(%rbp)
+	movdqa	%xmm8,0+64(%rbp)
+	movdqa	%xmm12,0+96(%rbp)
 	movq	$10,%r10
-1:
+L$open_sse_init_rounds:
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol16(%rip),%xmm12
+	pshufb	L$rol16(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -290,7 +284,7 @@
 	pxor	%xmm3,%xmm4
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol8(%rip),%xmm12
+	pshufb	L$rol8(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -302,7 +296,7 @@
 .byte	102,69,15,58,15,228,12
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol16(%rip),%xmm12
+	pshufb	L$rol16(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -311,7 +305,7 @@
 	pxor	%xmm3,%xmm4
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol8(%rip),%xmm12
+	pshufb	L$rol8(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -323,24 +317,24 @@
 .byte	102,69,15,58,15,228,4
 
 	decq	%r10
-	jne	1b
+	jne	L$open_sse_init_rounds
 
-	paddd	.chacha20_consts(%rip),%xmm0
-	paddd	48(%rbp),%xmm4
+	paddd	L$chacha20_consts(%rip),%xmm0
+	paddd	0+48(%rbp),%xmm4
 
-	pand	.clamp(%rip),%xmm0
-	movdqa	%xmm0,0(%rbp)
-	movdqa	%xmm4,16(%rbp)
+	pand	L$clamp(%rip),%xmm0
+	movdqa	%xmm0,0+0(%rbp)
+	movdqa	%xmm4,0+16(%rbp)
 
 	movq	%r8,%r8
 	call	poly_hash_ad_internal
-open_sse_main_loop:
+L$open_sse_main_loop:
 	cmpq	$256,%rbx
-	jb	2f
+	jb	L$open_sse_tail
 
-	movdqa	.chacha20_consts(%rip),%xmm0
-	movdqa	48(%rbp),%xmm4
-	movdqa	64(%rbp),%xmm8
+	movdqa	L$chacha20_consts(%rip),%xmm0
+	movdqa	0+48(%rbp),%xmm4
+	movdqa	0+64(%rbp),%xmm8
 	movdqa	%xmm0,%xmm1
 	movdqa	%xmm4,%xmm5
 	movdqa	%xmm8,%xmm9
@@ -350,26 +344,26 @@
 	movdqa	%xmm0,%xmm3
 	movdqa	%xmm4,%xmm7
 	movdqa	%xmm8,%xmm11
-	movdqa	96(%rbp),%xmm15
-	paddd	.sse_inc(%rip),%xmm15
+	movdqa	0+96(%rbp),%xmm15
+	paddd	L$sse_inc(%rip),%xmm15
 	movdqa	%xmm15,%xmm14
-	paddd	.sse_inc(%rip),%xmm14
+	paddd	L$sse_inc(%rip),%xmm14
 	movdqa	%xmm14,%xmm13
-	paddd	.sse_inc(%rip),%xmm13
+	paddd	L$sse_inc(%rip),%xmm13
 	movdqa	%xmm13,%xmm12
-	paddd	.sse_inc(%rip),%xmm12
-	movdqa	%xmm12,96(%rbp)
-	movdqa	%xmm13,112(%rbp)
-	movdqa	%xmm14,128(%rbp)
-	movdqa	%xmm15,144(%rbp)
+	paddd	L$sse_inc(%rip),%xmm12
+	movdqa	%xmm12,0+96(%rbp)
+	movdqa	%xmm13,0+112(%rbp)
+	movdqa	%xmm14,0+128(%rbp)
+	movdqa	%xmm15,0+144(%rbp)
 
 
 
 	movq	$4,%rcx
 	movq	%rsi,%r8
-1:
-	movdqa	%xmm8,80(%rbp)
-	movdqa	.rol16(%rip),%xmm8
+L$open_sse_main_loop_rounds:
+	movdqa	%xmm8,0+80(%rbp)
+	movdqa	L$rol16(%rip),%xmm8
 	paddd	%xmm7,%xmm3
 	paddd	%xmm6,%xmm2
 	paddd	%xmm5,%xmm1
@@ -382,13 +376,13 @@
 .byte	102,69,15,56,0,240
 .byte	102,69,15,56,0,232
 .byte	102,69,15,56,0,224
-	movdqa	80(%rbp),%xmm8
+	movdqa	0+80(%rbp),%xmm8
 	paddd	%xmm15,%xmm11
 	paddd	%xmm14,%xmm10
 	paddd	%xmm13,%xmm9
 	paddd	%xmm12,%xmm8
 	pxor	%xmm11,%xmm7
-	addq	0(%r8),%r10
+	addq	0+0(%r8),%r10
 	adcq	8+0(%r8),%r11
 	adcq	$1,%r12
 
@@ -396,7 +390,7 @@
 	pxor	%xmm10,%xmm6
 	pxor	%xmm9,%xmm5
 	pxor	%xmm8,%xmm4
-	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm8,0+80(%rbp)
 	movdqa	%xmm7,%xmm8
 	psrld	$20,%xmm8
 	pslld	$32-20,%xmm7
@@ -413,17 +407,17 @@
 	psrld	$20,%xmm8
 	pslld	$32-20,%xmm4
 	pxor	%xmm8,%xmm4
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movdqa	.rol8(%rip),%xmm8
+	movdqa	L$rol8(%rip),%xmm8
 	paddd	%xmm7,%xmm3
 	paddd	%xmm6,%xmm2
 	paddd	%xmm5,%xmm1
@@ -436,26 +430,26 @@
 .byte	102,69,15,56,0,240
 .byte	102,69,15,56,0,232
 .byte	102,69,15,56,0,224
-	movdqa	80(%rbp),%xmm8
+	movdqa	0+80(%rbp),%xmm8
 	paddd	%xmm15,%xmm11
 	paddd	%xmm14,%xmm10
 	paddd	%xmm13,%xmm9
 	paddd	%xmm12,%xmm8
 	pxor	%xmm11,%xmm7
 	pxor	%xmm10,%xmm6
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
 	pxor	%xmm9,%xmm5
 	pxor	%xmm8,%xmm4
-	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm8,0+80(%rbp)
 	movdqa	%xmm7,%xmm8
 	psrld	$25,%xmm8
 	pslld	$32-25,%xmm7
@@ -472,7 +466,7 @@
 	psrld	$25,%xmm8
 	pslld	$32-25,%xmm4
 	pxor	%xmm8,%xmm4
-	movdqa	80(%rbp),%xmm8
+	movdqa	0+80(%rbp),%xmm8
 	imulq	%r12,%r9
 	addq	%r10,%r15
 	adcq	%rdx,%r9
@@ -488,8 +482,8 @@
 .byte	102,15,58,15,228,4
 .byte	102,69,15,58,15,192,8
 .byte	102,69,15,58,15,228,12
-	movdqa	%xmm8,80(%rbp)
-	movdqa	.rol16(%rip),%xmm8
+	movdqa	%xmm8,0+80(%rbp)
+	movdqa	L$rol16(%rip),%xmm8
 	paddd	%xmm7,%xmm3
 	paddd	%xmm6,%xmm2
 	paddd	%xmm5,%xmm1
@@ -505,9 +499,8 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
@@ -517,7 +510,7 @@
 .byte	102,69,15,56,0,240
 .byte	102,69,15,56,0,232
 .byte	102,69,15,56,0,224
-	movdqa	80(%rbp),%xmm8
+	movdqa	0+80(%rbp),%xmm8
 	paddd	%xmm15,%xmm11
 	paddd	%xmm14,%xmm10
 	paddd	%xmm13,%xmm9
@@ -526,7 +519,7 @@
 	pxor	%xmm10,%xmm6
 	pxor	%xmm9,%xmm5
 	pxor	%xmm8,%xmm4
-	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm8,0+80(%rbp)
 	movdqa	%xmm7,%xmm8
 	psrld	$20,%xmm8
 	pslld	$32-20,%xmm7
@@ -543,7 +536,7 @@
 	psrld	$20,%xmm8
 	pslld	$32-20,%xmm4
 	pxor	%xmm8,%xmm4
-	movdqa	.rol8(%rip),%xmm8
+	movdqa	L$rol8(%rip),%xmm8
 	paddd	%xmm7,%xmm3
 	paddd	%xmm6,%xmm2
 	paddd	%xmm5,%xmm1
@@ -556,7 +549,7 @@
 .byte	102,69,15,56,0,240
 .byte	102,69,15,56,0,232
 .byte	102,69,15,56,0,224
-	movdqa	80(%rbp),%xmm8
+	movdqa	0+80(%rbp),%xmm8
 	paddd	%xmm15,%xmm11
 	paddd	%xmm14,%xmm10
 	paddd	%xmm13,%xmm9
@@ -565,7 +558,7 @@
 	pxor	%xmm10,%xmm6
 	pxor	%xmm9,%xmm5
 	pxor	%xmm8,%xmm4
-	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm8,0+80(%rbp)
 	movdqa	%xmm7,%xmm8
 	psrld	$25,%xmm8
 	pslld	$32-25,%xmm7
@@ -582,7 +575,7 @@
 	psrld	$25,%xmm8
 	pslld	$32-25,%xmm4
 	pxor	%xmm8,%xmm4
-	movdqa	80(%rbp),%xmm8
+	movdqa	0+80(%rbp),%xmm8
 .byte	102,15,58,15,255,12
 .byte	102,69,15,58,15,219,8
 .byte	102,69,15,58,15,255,4
@@ -597,27 +590,27 @@
 .byte	102,69,15,58,15,228,4
 
 	decq	%rcx
-	jge	1b
-	addq	0(%r8),%r10
+	jge	L$open_sse_main_loop_rounds
+	addq	0+0(%r8),%r10
 	adcq	8+0(%r8),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -633,33 +626,32 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	16(%r8),%r8
 	cmpq	$-6,%rcx
-	jg	1b
-	paddd	.chacha20_consts(%rip),%xmm3
-	paddd	48(%rbp),%xmm7
-	paddd	64(%rbp),%xmm11
-	paddd	144(%rbp),%xmm15
-	paddd	.chacha20_consts(%rip),%xmm2
-	paddd	48(%rbp),%xmm6
-	paddd	64(%rbp),%xmm10
-	paddd	128(%rbp),%xmm14
-	paddd	.chacha20_consts(%rip),%xmm1
-	paddd	48(%rbp),%xmm5
-	paddd	64(%rbp),%xmm9
-	paddd	112(%rbp),%xmm13
-	paddd	.chacha20_consts(%rip),%xmm0
-	paddd	48(%rbp),%xmm4
-	paddd	64(%rbp),%xmm8
-	paddd	96(%rbp),%xmm12
-	movdqa	%xmm12,80(%rbp)
+	jg	L$open_sse_main_loop_rounds
+	paddd	L$chacha20_consts(%rip),%xmm3
+	paddd	0+48(%rbp),%xmm7
+	paddd	0+64(%rbp),%xmm11
+	paddd	0+144(%rbp),%xmm15
+	paddd	L$chacha20_consts(%rip),%xmm2
+	paddd	0+48(%rbp),%xmm6
+	paddd	0+64(%rbp),%xmm10
+	paddd	0+128(%rbp),%xmm14
+	paddd	L$chacha20_consts(%rip),%xmm1
+	paddd	0+48(%rbp),%xmm5
+	paddd	0+64(%rbp),%xmm9
+	paddd	0+112(%rbp),%xmm13
+	paddd	L$chacha20_consts(%rip),%xmm0
+	paddd	0+48(%rbp),%xmm4
+	paddd	0+64(%rbp),%xmm8
+	paddd	0+96(%rbp),%xmm12
+	movdqa	%xmm12,0+80(%rbp)
 	movdqu	0 + 0(%rsi),%xmm12
 	pxor	%xmm3,%xmm12
 	movdqu	%xmm12,0 + 0(%rdi)
@@ -703,7 +695,7 @@
 	pxor	%xmm3,%xmm0
 	pxor	%xmm7,%xmm4
 	pxor	%xmm11,%xmm8
-	pxor	80(%rbp),%xmm15
+	pxor	0+80(%rbp),%xmm15
 	movdqu	%xmm0,0 + 192(%rdi)
 	movdqu	%xmm4,16 + 192(%rdi)
 	movdqu	%xmm8,32 + 192(%rdi)
@@ -712,45 +704,49 @@
 	leaq	256(%rsi),%rsi
 	leaq	256(%rdi),%rdi
 	subq	$256,%rbx
-	jmp	open_sse_main_loop
-2:
+	jmp	L$open_sse_main_loop
+L$open_sse_tail:
 
 	testq	%rbx,%rbx
-	jz	open_sse_finalize
+	jz	L$open_sse_finalize
+	cmpq	$192,%rbx
+	ja	L$open_sse_tail_256
+	cmpq	$128,%rbx
+	ja	L$open_sse_tail_192
 	cmpq	$64,%rbx
-	ja	3f
-	movdqa	.chacha20_consts(%rip),%xmm0
-	movdqa	48(%rbp),%xmm4
-	movdqa	64(%rbp),%xmm8
-	movdqa	96(%rbp),%xmm12
-	paddd	.sse_inc(%rip),%xmm12
-	movdqa	%xmm12,96(%rbp)
+	ja	L$open_sse_tail_128
+	movdqa	L$chacha20_consts(%rip),%xmm0
+	movdqa	0+48(%rbp),%xmm4
+	movdqa	0+64(%rbp),%xmm8
+	movdqa	0+96(%rbp),%xmm12
+	paddd	L$sse_inc(%rip),%xmm12
+	movdqa	%xmm12,0+96(%rbp)
 
 	xorq	%r8,%r8
 	movq	%rbx,%rcx
 	cmpq	$16,%rcx
-	jb	2f
-1:
-	addq	0(%rsi,%r8), %r10
-	adcq	8+0(%rsi,%r8), %r11
+	jb	L$open_sse_tail_64_rounds
+L$open_sse_tail_64_rounds_and_x1hash:
+	addq	0+0(%rsi,%r8,1),%r10
+	adcq	8+0(%rsi,%r8,1),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -766,19 +762,18 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	subq	$16,%rcx
-2:
+L$open_sse_tail_64_rounds:
 	addq	$16,%r8
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol16(%rip),%xmm12
+	pshufb	L$rol16(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -787,7 +782,7 @@
 	pxor	%xmm3,%xmm4
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol8(%rip),%xmm12
+	pshufb	L$rol8(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -799,7 +794,7 @@
 .byte	102,69,15,58,15,228,12
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol16(%rip),%xmm12
+	pshufb	L$rol16(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -808,7 +803,7 @@
 	pxor	%xmm3,%xmm4
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol8(%rip),%xmm12
+	pshufb	L$rol8(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -820,55 +815,54 @@
 .byte	102,69,15,58,15,228,4
 
 	cmpq	$16,%rcx
-	jae	1b
+	jae	L$open_sse_tail_64_rounds_and_x1hash
 	cmpq	$160,%r8
-	jne	2b
-	paddd	.chacha20_consts(%rip),%xmm0
-	paddd	48(%rbp),%xmm4
-	paddd	64(%rbp),%xmm8
-	paddd	96(%rbp),%xmm12
+	jne	L$open_sse_tail_64_rounds
+	paddd	L$chacha20_consts(%rip),%xmm0
+	paddd	0+48(%rbp),%xmm4
+	paddd	0+64(%rbp),%xmm8
+	paddd	0+96(%rbp),%xmm12
 
-	jmp	open_sse_tail_64_dec_loop
-3:
-	cmpq	$128,%rbx
-	ja	3f
-	movdqa	.chacha20_consts(%rip),%xmm0
-	movdqa	48(%rbp),%xmm4
-	movdqa	64(%rbp),%xmm8
+	jmp	L$open_sse_tail_64_dec_loop
+
+L$open_sse_tail_128:
+	movdqa	L$chacha20_consts(%rip),%xmm0
+	movdqa	0+48(%rbp),%xmm4
+	movdqa	0+64(%rbp),%xmm8
 	movdqa	%xmm0,%xmm1
 	movdqa	%xmm4,%xmm5
 	movdqa	%xmm8,%xmm9
-	movdqa	96(%rbp),%xmm13
-	paddd	.sse_inc(%rip),%xmm13
+	movdqa	0+96(%rbp),%xmm13
+	paddd	L$sse_inc(%rip),%xmm13
 	movdqa	%xmm13,%xmm12
-	paddd	.sse_inc(%rip),%xmm12
-	movdqa	%xmm12,96(%rbp)
-	movdqa	%xmm13,112(%rbp)
+	paddd	L$sse_inc(%rip),%xmm12
+	movdqa	%xmm12,0+96(%rbp)
+	movdqa	%xmm13,0+112(%rbp)
 
 	movq	%rbx,%rcx
 	andq	$-16,%rcx
 	xorq	%r8,%r8
-1:
-	addq	0(%rsi,%r8), %r10
-	adcq	8+0(%rsi,%r8), %r11
+L$open_sse_tail_128_rounds_and_x1hash:
+	addq	0+0(%rsi,%r8,1),%r10
+	adcq	8+0(%rsi,%r8,1),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -884,18 +878,17 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
-2:
+L$open_sse_tail_128_rounds:
 	addq	$16,%r8
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol16(%rip),%xmm12
+	pshufb	L$rol16(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -904,7 +897,7 @@
 	pxor	%xmm3,%xmm4
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol8(%rip),%xmm12
+	pshufb	L$rol8(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -916,7 +909,7 @@
 .byte	102,69,15,58,15,228,12
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol16(%rip),%xmm13
+	pshufb	L$rol16(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -925,7 +918,7 @@
 	pxor	%xmm3,%xmm5
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol8(%rip),%xmm13
+	pshufb	L$rol8(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -937,7 +930,7 @@
 .byte	102,69,15,58,15,237,12
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol16(%rip),%xmm12
+	pshufb	L$rol16(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -946,7 +939,7 @@
 	pxor	%xmm3,%xmm4
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol8(%rip),%xmm12
+	pshufb	L$rol8(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -958,7 +951,7 @@
 .byte	102,69,15,58,15,228,4
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol16(%rip),%xmm13
+	pshufb	L$rol16(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -967,7 +960,7 @@
 	pxor	%xmm3,%xmm5
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol8(%rip),%xmm13
+	pshufb	L$rol8(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -979,17 +972,17 @@
 .byte	102,69,15,58,15,237,4
 
 	cmpq	%rcx,%r8
-	jb	1b
+	jb	L$open_sse_tail_128_rounds_and_x1hash
 	cmpq	$160,%r8
-	jne	2b
-	paddd	.chacha20_consts(%rip),%xmm1
-	paddd	48(%rbp),%xmm5
-	paddd	64(%rbp),%xmm9
-	paddd	112(%rbp),%xmm13
-	paddd	.chacha20_consts(%rip),%xmm0
-	paddd	48(%rbp),%xmm4
-	paddd	64(%rbp),%xmm8
-	paddd	96(%rbp),%xmm12
+	jne	L$open_sse_tail_128_rounds
+	paddd	L$chacha20_consts(%rip),%xmm1
+	paddd	0+48(%rbp),%xmm5
+	paddd	0+64(%rbp),%xmm9
+	paddd	0+112(%rbp),%xmm13
+	paddd	L$chacha20_consts(%rip),%xmm0
+	paddd	0+48(%rbp),%xmm4
+	paddd	0+64(%rbp),%xmm8
+	paddd	0+96(%rbp),%xmm12
 	movdqu	0 + 0(%rsi),%xmm3
 	movdqu	16 + 0(%rsi),%xmm7
 	movdqu	32 + 0(%rsi),%xmm11
@@ -1006,28 +999,27 @@
 	subq	$64,%rbx
 	leaq	64(%rsi),%rsi
 	leaq	64(%rdi),%rdi
-	jmp	open_sse_tail_64_dec_loop
-3:
-	cmpq	$192,%rbx
-	ja	3f
-	movdqa	.chacha20_consts(%rip),%xmm0
-	movdqa	48(%rbp),%xmm4
-	movdqa	64(%rbp),%xmm8
+	jmp	L$open_sse_tail_64_dec_loop
+
+L$open_sse_tail_192:
+	movdqa	L$chacha20_consts(%rip),%xmm0
+	movdqa	0+48(%rbp),%xmm4
+	movdqa	0+64(%rbp),%xmm8
 	movdqa	%xmm0,%xmm1
 	movdqa	%xmm4,%xmm5
 	movdqa	%xmm8,%xmm9
 	movdqa	%xmm0,%xmm2
 	movdqa	%xmm4,%xmm6
 	movdqa	%xmm8,%xmm10
-	movdqa	96(%rbp),%xmm14
-	paddd	.sse_inc(%rip),%xmm14
+	movdqa	0+96(%rbp),%xmm14
+	paddd	L$sse_inc(%rip),%xmm14
 	movdqa	%xmm14,%xmm13
-	paddd	.sse_inc(%rip),%xmm13
+	paddd	L$sse_inc(%rip),%xmm13
 	movdqa	%xmm13,%xmm12
-	paddd	.sse_inc(%rip),%xmm12
-	movdqa	%xmm12,96(%rbp)
-	movdqa	%xmm13,112(%rbp)
-	movdqa	%xmm14,128(%rbp)
+	paddd	L$sse_inc(%rip),%xmm12
+	movdqa	%xmm12,0+96(%rbp)
+	movdqa	%xmm13,0+112(%rbp)
+	movdqa	%xmm14,0+128(%rbp)
 
 	movq	%rbx,%rcx
 	movq	$160,%r8
@@ -1035,27 +1027,27 @@
 	cmovgq	%r8,%rcx
 	andq	$-16,%rcx
 	xorq	%r8,%r8
-1:
-	addq	0(%rsi,%r8), %r10
-	adcq	8+0(%rsi,%r8), %r11
+L$open_sse_tail_192_rounds_and_x1hash:
+	addq	0+0(%rsi,%r8,1),%r10
+	adcq	8+0(%rsi,%r8,1),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -1071,18 +1063,17 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
-2:
+L$open_sse_tail_192_rounds:
 	addq	$16,%r8
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol16(%rip),%xmm12
+	pshufb	L$rol16(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -1091,7 +1082,7 @@
 	pxor	%xmm3,%xmm4
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol8(%rip),%xmm12
+	pshufb	L$rol8(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -1103,7 +1094,7 @@
 .byte	102,69,15,58,15,228,12
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol16(%rip),%xmm13
+	pshufb	L$rol16(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -1112,7 +1103,7 @@
 	pxor	%xmm3,%xmm5
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol8(%rip),%xmm13
+	pshufb	L$rol8(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -1124,7 +1115,7 @@
 .byte	102,69,15,58,15,237,12
 	paddd	%xmm6,%xmm2
 	pxor	%xmm2,%xmm14
-	pshufb	.rol16(%rip),%xmm14
+	pshufb	L$rol16(%rip),%xmm14
 	paddd	%xmm14,%xmm10
 	pxor	%xmm10,%xmm6
 	movdqa	%xmm6,%xmm3
@@ -1133,7 +1124,7 @@
 	pxor	%xmm3,%xmm6
 	paddd	%xmm6,%xmm2
 	pxor	%xmm2,%xmm14
-	pshufb	.rol8(%rip),%xmm14
+	pshufb	L$rol8(%rip),%xmm14
 	paddd	%xmm14,%xmm10
 	pxor	%xmm10,%xmm6
 	movdqa	%xmm6,%xmm3
@@ -1145,7 +1136,7 @@
 .byte	102,69,15,58,15,246,12
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol16(%rip),%xmm12
+	pshufb	L$rol16(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -1154,7 +1145,7 @@
 	pxor	%xmm3,%xmm4
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol8(%rip),%xmm12
+	pshufb	L$rol8(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -1166,7 +1157,7 @@
 .byte	102,69,15,58,15,228,4
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol16(%rip),%xmm13
+	pshufb	L$rol16(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -1175,7 +1166,7 @@
 	pxor	%xmm3,%xmm5
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol8(%rip),%xmm13
+	pshufb	L$rol8(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -1187,7 +1178,7 @@
 .byte	102,69,15,58,15,237,4
 	paddd	%xmm6,%xmm2
 	pxor	%xmm2,%xmm14
-	pshufb	.rol16(%rip),%xmm14
+	pshufb	L$rol16(%rip),%xmm14
 	paddd	%xmm14,%xmm10
 	pxor	%xmm10,%xmm6
 	movdqa	%xmm6,%xmm3
@@ -1196,7 +1187,7 @@
 	pxor	%xmm3,%xmm6
 	paddd	%xmm6,%xmm2
 	pxor	%xmm2,%xmm14
-	pshufb	.rol8(%rip),%xmm14
+	pshufb	L$rol8(%rip),%xmm14
 	paddd	%xmm14,%xmm10
 	pxor	%xmm10,%xmm6
 	movdqa	%xmm6,%xmm3
@@ -1208,31 +1199,31 @@
 .byte	102,69,15,58,15,246,4
 
 	cmpq	%rcx,%r8
-	jb	1b
+	jb	L$open_sse_tail_192_rounds_and_x1hash
 	cmpq	$160,%r8
-	jne	2b
+	jne	L$open_sse_tail_192_rounds
 	cmpq	$176,%rbx
-	jb	1f
-	addq	160(%rsi),%r10
+	jb	L$open_sse_tail_192_finish
+	addq	0+160(%rsi),%r10
 	adcq	8+160(%rsi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -1248,35 +1239,34 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	cmpq	$192,%rbx
-	jb	1f
-	addq	176(%rsi),%r10
+	jb	L$open_sse_tail_192_finish
+	addq	0+176(%rsi),%r10
 	adcq	8+176(%rsi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -1292,26 +1282,25 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
-1:
-	paddd	.chacha20_consts(%rip),%xmm2
-	paddd	48(%rbp),%xmm6
-	paddd	64(%rbp),%xmm10
-	paddd	128(%rbp),%xmm14
-	paddd	.chacha20_consts(%rip),%xmm1
-	paddd	48(%rbp),%xmm5
-	paddd	64(%rbp),%xmm9
-	paddd	112(%rbp),%xmm13
-	paddd	.chacha20_consts(%rip),%xmm0
-	paddd	48(%rbp),%xmm4
-	paddd	64(%rbp),%xmm8
-	paddd	96(%rbp),%xmm12
+L$open_sse_tail_192_finish:
+	paddd	L$chacha20_consts(%rip),%xmm2
+	paddd	0+48(%rbp),%xmm6
+	paddd	0+64(%rbp),%xmm10
+	paddd	0+128(%rbp),%xmm14
+	paddd	L$chacha20_consts(%rip),%xmm1
+	paddd	0+48(%rbp),%xmm5
+	paddd	0+64(%rbp),%xmm9
+	paddd	0+112(%rbp),%xmm13
+	paddd	L$chacha20_consts(%rip),%xmm0
+	paddd	0+48(%rbp),%xmm4
+	paddd	0+64(%rbp),%xmm8
+	paddd	0+96(%rbp),%xmm12
 	movdqu	0 + 0(%rsi),%xmm3
 	movdqu	16 + 0(%rsi),%xmm7
 	movdqu	32 + 0(%rsi),%xmm11
@@ -1340,12 +1329,12 @@
 	subq	$128,%rbx
 	leaq	128(%rsi),%rsi
 	leaq	128(%rdi),%rdi
-	jmp	open_sse_tail_64_dec_loop
-3:
+	jmp	L$open_sse_tail_64_dec_loop
 
-	movdqa	.chacha20_consts(%rip),%xmm0
-	movdqa	48(%rbp),%xmm4
-	movdqa	64(%rbp),%xmm8
+L$open_sse_tail_256:
+	movdqa	L$chacha20_consts(%rip),%xmm0
+	movdqa	0+48(%rbp),%xmm4
+	movdqa	0+64(%rbp),%xmm8
 	movdqa	%xmm0,%xmm1
 	movdqa	%xmm4,%xmm5
 	movdqa	%xmm8,%xmm9
@@ -1355,28 +1344,28 @@
 	movdqa	%xmm0,%xmm3
 	movdqa	%xmm4,%xmm7
 	movdqa	%xmm8,%xmm11
-	movdqa	96(%rbp),%xmm15
-	paddd	.sse_inc(%rip),%xmm15
+	movdqa	0+96(%rbp),%xmm15
+	paddd	L$sse_inc(%rip),%xmm15
 	movdqa	%xmm15,%xmm14
-	paddd	.sse_inc(%rip),%xmm14
+	paddd	L$sse_inc(%rip),%xmm14
 	movdqa	%xmm14,%xmm13
-	paddd	.sse_inc(%rip),%xmm13
+	paddd	L$sse_inc(%rip),%xmm13
 	movdqa	%xmm13,%xmm12
-	paddd	.sse_inc(%rip),%xmm12
-	movdqa	%xmm12,96(%rbp)
-	movdqa	%xmm13,112(%rbp)
-	movdqa	%xmm14,128(%rbp)
-	movdqa	%xmm15,144(%rbp)
+	paddd	L$sse_inc(%rip),%xmm12
+	movdqa	%xmm12,0+96(%rbp)
+	movdqa	%xmm13,0+112(%rbp)
+	movdqa	%xmm14,0+128(%rbp)
+	movdqa	%xmm15,0+144(%rbp)
 
 	xorq	%r8,%r8
-1:
-	addq	0(%rsi,%r8), %r10
-	adcq	8+0(%rsi,%r8), %r11
+L$open_sse_tail_256_rounds_and_x1hash:
+	addq	0+0(%rsi,%r8,1),%r10
+	adcq	8+0(%rsi,%r8,1),%r11
 	adcq	$1,%r12
-	movdqa	%xmm11,80(%rbp)
+	movdqa	%xmm11,0+80(%rbp)
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol16(%rip),%xmm12
+	pshufb	L$rol16(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm11
@@ -1385,7 +1374,7 @@
 	pxor	%xmm11,%xmm4
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol8(%rip),%xmm12
+	pshufb	L$rol8(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm11
@@ -1397,7 +1386,7 @@
 .byte	102,69,15,58,15,228,12
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol16(%rip),%xmm13
+	pshufb	L$rol16(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm11
@@ -1406,7 +1395,7 @@
 	pxor	%xmm11,%xmm5
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol8(%rip),%xmm13
+	pshufb	L$rol8(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm11
@@ -1418,7 +1407,7 @@
 .byte	102,69,15,58,15,237,12
 	paddd	%xmm6,%xmm2
 	pxor	%xmm2,%xmm14
-	pshufb	.rol16(%rip),%xmm14
+	pshufb	L$rol16(%rip),%xmm14
 	paddd	%xmm14,%xmm10
 	pxor	%xmm10,%xmm6
 	movdqa	%xmm6,%xmm11
@@ -1427,7 +1416,7 @@
 	pxor	%xmm11,%xmm6
 	paddd	%xmm6,%xmm2
 	pxor	%xmm2,%xmm14
-	pshufb	.rol8(%rip),%xmm14
+	pshufb	L$rol8(%rip),%xmm14
 	paddd	%xmm14,%xmm10
 	pxor	%xmm10,%xmm6
 	movdqa	%xmm6,%xmm11
@@ -1437,21 +1426,21 @@
 .byte	102,15,58,15,246,4
 .byte	102,69,15,58,15,210,8
 .byte	102,69,15,58,15,246,12
-	movdqa	80(%rbp),%xmm11
-	movq	0+0(%rbp),%rax
+	movdqa	0+80(%rbp),%xmm11
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movdqa	%xmm9,80(%rbp)
+	movdqa	%xmm9,0+80(%rbp)
 	paddd	%xmm7,%xmm3
 	pxor	%xmm3,%xmm15
-	pshufb	.rol16(%rip),%xmm15
+	pshufb	L$rol16(%rip),%xmm15
 	paddd	%xmm15,%xmm11
 	pxor	%xmm11,%xmm7
 	movdqa	%xmm7,%xmm9
@@ -1460,7 +1449,7 @@
 	pxor	%xmm9,%xmm7
 	paddd	%xmm7,%xmm3
 	pxor	%xmm3,%xmm15
-	pshufb	.rol8(%rip),%xmm15
+	pshufb	L$rol8(%rip),%xmm15
 	paddd	%xmm15,%xmm11
 	pxor	%xmm11,%xmm7
 	movdqa	%xmm7,%xmm9
@@ -1470,21 +1459,21 @@
 .byte	102,15,58,15,255,4
 .byte	102,69,15,58,15,219,8
 .byte	102,69,15,58,15,255,12
-	movdqa	80(%rbp),%xmm9
-	movq	8+0(%rbp),%rax
+	movdqa	0+80(%rbp),%xmm9
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
-	movdqa	%xmm11,80(%rbp)
+	movdqa	%xmm11,0+80(%rbp)
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol16(%rip),%xmm12
+	pshufb	L$rol16(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm11
@@ -1493,7 +1482,7 @@
 	pxor	%xmm11,%xmm4
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol8(%rip),%xmm12
+	pshufb	L$rol8(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm11
@@ -1505,7 +1494,7 @@
 .byte	102,69,15,58,15,228,4
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol16(%rip),%xmm13
+	pshufb	L$rol16(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm11
@@ -1514,7 +1503,7 @@
 	pxor	%xmm11,%xmm5
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol8(%rip),%xmm13
+	pshufb	L$rol8(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm11
@@ -1529,7 +1518,7 @@
 	adcq	%rdx,%r9
 	paddd	%xmm6,%xmm2
 	pxor	%xmm2,%xmm14
-	pshufb	.rol16(%rip),%xmm14
+	pshufb	L$rol16(%rip),%xmm14
 	paddd	%xmm14,%xmm10
 	pxor	%xmm10,%xmm6
 	movdqa	%xmm6,%xmm11
@@ -1538,7 +1527,7 @@
 	pxor	%xmm11,%xmm6
 	paddd	%xmm6,%xmm2
 	pxor	%xmm2,%xmm14
-	pshufb	.rol8(%rip),%xmm14
+	pshufb	L$rol8(%rip),%xmm14
 	paddd	%xmm14,%xmm10
 	pxor	%xmm10,%xmm6
 	movdqa	%xmm6,%xmm11
@@ -1548,7 +1537,7 @@
 .byte	102,15,58,15,246,12
 .byte	102,69,15,58,15,210,8
 .byte	102,69,15,58,15,246,4
-	movdqa	80(%rbp),%xmm11
+	movdqa	0+80(%rbp),%xmm11
 	movq	%r13,%r10
 	movq	%r14,%r11
 	movq	%r15,%r12
@@ -1558,16 +1547,15 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
-	movdqa	%xmm9,80(%rbp)
+	movdqa	%xmm9,0+80(%rbp)
 	paddd	%xmm7,%xmm3
 	pxor	%xmm3,%xmm15
-	pshufb	.rol16(%rip),%xmm15
+	pshufb	L$rol16(%rip),%xmm15
 	paddd	%xmm15,%xmm11
 	pxor	%xmm11,%xmm7
 	movdqa	%xmm7,%xmm9
@@ -1576,7 +1564,7 @@
 	pxor	%xmm9,%xmm7
 	paddd	%xmm7,%xmm3
 	pxor	%xmm3,%xmm15
-	pshufb	.rol8(%rip),%xmm15
+	pshufb	L$rol8(%rip),%xmm15
 	paddd	%xmm15,%xmm11
 	pxor	%xmm11,%xmm7
 	movdqa	%xmm7,%xmm9
@@ -1586,34 +1574,35 @@
 .byte	102,15,58,15,255,12
 .byte	102,69,15,58,15,219,8
 .byte	102,69,15,58,15,255,4
-	movdqa	80(%rbp),%xmm9
+	movdqa	0+80(%rbp),%xmm9
 
 	addq	$16,%r8
 	cmpq	$160,%r8
-	jb	1b
+	jb	L$open_sse_tail_256_rounds_and_x1hash
+
 	movq	%rbx,%rcx
 	andq	$-16,%rcx
-1:
-	addq	0(%rsi,%r8), %r10
-	adcq	8+0(%rsi,%r8), %r11
+L$open_sse_tail_256_hash:
+	addq	0+0(%rsi,%r8,1),%r10
+	adcq	8+0(%rsi,%r8,1),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -1629,33 +1618,32 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	addq	$16,%r8
 	cmpq	%rcx,%r8
-	jb	1b
-	paddd	.chacha20_consts(%rip),%xmm3
-	paddd	48(%rbp),%xmm7
-	paddd	64(%rbp),%xmm11
-	paddd	144(%rbp),%xmm15
-	paddd	.chacha20_consts(%rip),%xmm2
-	paddd	48(%rbp),%xmm6
-	paddd	64(%rbp),%xmm10
-	paddd	128(%rbp),%xmm14
-	paddd	.chacha20_consts(%rip),%xmm1
-	paddd	48(%rbp),%xmm5
-	paddd	64(%rbp),%xmm9
-	paddd	112(%rbp),%xmm13
-	paddd	.chacha20_consts(%rip),%xmm0
-	paddd	48(%rbp),%xmm4
-	paddd	64(%rbp),%xmm8
-	paddd	96(%rbp),%xmm12
-	movdqa	%xmm12,80(%rbp)
+	jb	L$open_sse_tail_256_hash
+	paddd	L$chacha20_consts(%rip),%xmm3
+	paddd	0+48(%rbp),%xmm7
+	paddd	0+64(%rbp),%xmm11
+	paddd	0+144(%rbp),%xmm15
+	paddd	L$chacha20_consts(%rip),%xmm2
+	paddd	0+48(%rbp),%xmm6
+	paddd	0+64(%rbp),%xmm10
+	paddd	0+128(%rbp),%xmm14
+	paddd	L$chacha20_consts(%rip),%xmm1
+	paddd	0+48(%rbp),%xmm5
+	paddd	0+64(%rbp),%xmm9
+	paddd	0+112(%rbp),%xmm13
+	paddd	L$chacha20_consts(%rip),%xmm0
+	paddd	0+48(%rbp),%xmm4
+	paddd	0+64(%rbp),%xmm8
+	paddd	0+96(%rbp),%xmm12
+	movdqa	%xmm12,0+80(%rbp)
 	movdqu	0 + 0(%rsi),%xmm12
 	pxor	%xmm3,%xmm12
 	movdqu	%xmm12,0 + 0(%rdi)
@@ -1693,15 +1681,15 @@
 	movdqu	%xmm9,32 + 128(%rdi)
 	movdqu	%xmm15,48 + 128(%rdi)
 
-	movdqa	80(%rbp),%xmm12
+	movdqa	0+80(%rbp),%xmm12
 	subq	$192,%rbx
 	leaq	192(%rsi),%rsi
 	leaq	192(%rdi),%rdi
 
 
-open_sse_tail_64_dec_loop:
+L$open_sse_tail_64_dec_loop:
 	cmpq	$16,%rbx
-	jb	1f
+	jb	L$open_sse_tail_16_init
 	subq	$16,%rbx
 	movdqu	(%rsi),%xmm3
 	pxor	%xmm3,%xmm0
@@ -1711,61 +1699,60 @@
 	movdqa	%xmm4,%xmm0
 	movdqa	%xmm8,%xmm4
 	movdqa	%xmm12,%xmm8
-	jmp	open_sse_tail_64_dec_loop
-1:
+	jmp	L$open_sse_tail_64_dec_loop
+L$open_sse_tail_16_init:
 	movdqa	%xmm0,%xmm1
 
 
-open_sse_tail_16:
+L$open_sse_tail_16:
 	testq	%rbx,%rbx
-	jz	open_sse_finalize
+	jz	L$open_sse_finalize
 
 
 
 	pxor	%xmm3,%xmm3
-	leaq	-1(%rsi,%rbx), %rsi
+	leaq	-1(%rsi,%rbx,1),%rsi
 	movq	%rbx,%r8
-2:
+L$open_sse_tail_16_compose:
 	pslldq	$1,%xmm3
 	pinsrb	$0,(%rsi),%xmm3
 	subq	$1,%rsi
 	subq	$1,%r8
-	jnz	2b
+	jnz	L$open_sse_tail_16_compose
 
-3:
 .byte	102,73,15,126,221
 	pextrq	$1,%xmm3,%r14
 
 	pxor	%xmm1,%xmm3
 
 
-2:
+L$open_sse_tail_16_extract:
 	pextrb	$0,%xmm3,(%rdi)
 	psrldq	$1,%xmm3
 	addq	$1,%rdi
 	subq	$1,%rbx
-	jne	2b
+	jne	L$open_sse_tail_16_extract
 
 	addq	%r13,%r10
 	adcq	%r14,%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -1781,35 +1768,34 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 
-open_sse_finalize:
-	addq	32(%rbp),%r10
-	adcq	8+32(%rbp),%r11
+L$open_sse_finalize:
+	addq	0+0+32(%rbp),%r10
+	adcq	8+0+32(%rbp),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -1825,9 +1811,8 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
@@ -1843,16 +1828,17 @@
 	cmovcq	%r14,%r11
 	cmovcq	%r15,%r12
 
-	addq	0+16(%rbp),%r10
-	adcq	8+16(%rbp),%r11
+	addq	0+0+16(%rbp),%r10
+	adcq	8+0+16(%rbp),%r11
 
-	addq	$288 + 32,%rsp
+
+	addq	$288 + 0 + 32,%rsp
+
 
 	popq	%r9
 
 	movq	%r10,(%r9)
 	movq	%r11,8(%r9)
-
 	popq	%r15
 
 	popq	%r14
@@ -1867,9 +1853,9 @@
 
 	.byte	0xf3,0xc3
 
+L$open_sse_128:
 
-open_sse_128:
-	movdqu	.chacha20_consts(%rip),%xmm0
+	movdqu	L$chacha20_consts(%rip),%xmm0
 	movdqa	%xmm0,%xmm1
 	movdqa	%xmm0,%xmm2
 	movdqu	0(%r9),%xmm4
@@ -1880,17 +1866,18 @@
 	movdqa	%xmm8,%xmm10
 	movdqu	32(%r9),%xmm12
 	movdqa	%xmm12,%xmm13
-	paddd	.sse_inc(%rip),%xmm13
+	paddd	L$sse_inc(%rip),%xmm13
 	movdqa	%xmm13,%xmm14
-	paddd	.sse_inc(%rip),%xmm14
+	paddd	L$sse_inc(%rip),%xmm14
 	movdqa	%xmm4,%xmm7
 	movdqa	%xmm8,%xmm11
 	movdqa	%xmm13,%xmm15
 	movq	$10,%r10
-1:
+
+L$open_sse_128_rounds:
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol16(%rip),%xmm12
+	pshufb	L$rol16(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -1899,7 +1886,7 @@
 	pxor	%xmm3,%xmm4
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol8(%rip),%xmm12
+	pshufb	L$rol8(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -1911,7 +1898,7 @@
 .byte	102,69,15,58,15,228,12
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol16(%rip),%xmm13
+	pshufb	L$rol16(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -1920,7 +1907,7 @@
 	pxor	%xmm3,%xmm5
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol8(%rip),%xmm13
+	pshufb	L$rol8(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -1932,7 +1919,7 @@
 .byte	102,69,15,58,15,237,12
 	paddd	%xmm6,%xmm2
 	pxor	%xmm2,%xmm14
-	pshufb	.rol16(%rip),%xmm14
+	pshufb	L$rol16(%rip),%xmm14
 	paddd	%xmm14,%xmm10
 	pxor	%xmm10,%xmm6
 	movdqa	%xmm6,%xmm3
@@ -1941,7 +1928,7 @@
 	pxor	%xmm3,%xmm6
 	paddd	%xmm6,%xmm2
 	pxor	%xmm2,%xmm14
-	pshufb	.rol8(%rip),%xmm14
+	pshufb	L$rol8(%rip),%xmm14
 	paddd	%xmm14,%xmm10
 	pxor	%xmm10,%xmm6
 	movdqa	%xmm6,%xmm3
@@ -1953,7 +1940,7 @@
 .byte	102,69,15,58,15,246,12
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol16(%rip),%xmm12
+	pshufb	L$rol16(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -1962,7 +1949,7 @@
 	pxor	%xmm3,%xmm4
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol8(%rip),%xmm12
+	pshufb	L$rol8(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -1974,7 +1961,7 @@
 .byte	102,69,15,58,15,228,4
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol16(%rip),%xmm13
+	pshufb	L$rol16(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -1983,7 +1970,7 @@
 	pxor	%xmm3,%xmm5
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol8(%rip),%xmm13
+	pshufb	L$rol8(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -1995,7 +1982,7 @@
 .byte	102,69,15,58,15,237,4
 	paddd	%xmm6,%xmm2
 	pxor	%xmm2,%xmm14
-	pshufb	.rol16(%rip),%xmm14
+	pshufb	L$rol16(%rip),%xmm14
 	paddd	%xmm14,%xmm10
 	pxor	%xmm10,%xmm6
 	movdqa	%xmm6,%xmm3
@@ -2004,7 +1991,7 @@
 	pxor	%xmm3,%xmm6
 	paddd	%xmm6,%xmm2
 	pxor	%xmm2,%xmm14
-	pshufb	.rol8(%rip),%xmm14
+	pshufb	L$rol8(%rip),%xmm14
 	paddd	%xmm14,%xmm10
 	pxor	%xmm10,%xmm6
 	movdqa	%xmm6,%xmm3
@@ -2016,30 +2003,30 @@
 .byte	102,69,15,58,15,246,4
 
 	decq	%r10
-	jnz	1b
-	paddd	.chacha20_consts(%rip),%xmm0
-	paddd	.chacha20_consts(%rip),%xmm1
-	paddd	.chacha20_consts(%rip),%xmm2
+	jnz	L$open_sse_128_rounds
+	paddd	L$chacha20_consts(%rip),%xmm0
+	paddd	L$chacha20_consts(%rip),%xmm1
+	paddd	L$chacha20_consts(%rip),%xmm2
 	paddd	%xmm7,%xmm4
 	paddd	%xmm7,%xmm5
 	paddd	%xmm7,%xmm6
 	paddd	%xmm11,%xmm9
 	paddd	%xmm11,%xmm10
 	paddd	%xmm15,%xmm13
-	paddd	.sse_inc(%rip),%xmm15
+	paddd	L$sse_inc(%rip),%xmm15
 	paddd	%xmm15,%xmm14
 
-	pand	.clamp(%rip),%xmm0
-	movdqa	%xmm0,0(%rbp)
-	movdqa	%xmm4,16(%rbp)
+	pand	L$clamp(%rip),%xmm0
+	movdqa	%xmm0,0+0(%rbp)
+	movdqa	%xmm4,0+16(%rbp)
 
 	movq	%r8,%r8
 	call	poly_hash_ad_internal
-1:
+L$open_sse_128_xor_hash:
 	cmpq	$16,%rbx
-	jb	open_sse_tail_16
+	jb	L$open_sse_tail_16
 	subq	$16,%rbx
-	addq	0(%rsi),%r10
+	addq	0+0(%rsi),%r10
 	adcq	8+0(%rsi),%r11
 	adcq	$1,%r12
 
@@ -2049,23 +2036,23 @@
 	movdqu	%xmm1,0(%rdi)
 	leaq	16(%rsi),%rsi
 	leaq	16(%rdi),%rdi
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -2081,9 +2068,8 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
@@ -2096,8 +2082,10 @@
 	movdqa	%xmm6,%xmm2
 	movdqa	%xmm10,%xmm6
 	movdqa	%xmm14,%xmm10
-	jmp	1b
-	jmp	open_sse_tail_16
+	jmp	L$open_sse_128_xor_hash
+
+
+
 
 
 
@@ -2126,20 +2114,15 @@
 
 	pushq	%r9
 
-	subq	$288 + 32,%rsp
-
-
-
-
-
-
+	subq	$288 + 0 + 32,%rsp
 
 	leaq	32(%rsp),%rbp
 	andq	$-32,%rbp
+
 	movq	56(%r9),%rbx
 	addq	%rdx,%rbx
-	movq	%rbx,8+32(%rbp)
-	movq	%r8,0+32(%rbp)
+	movq	%r8,0+0+32(%rbp)
+	movq	%rbx,8+0+32(%rbp)
 	movq	%rdx,%rbx
 
 	movl	_OPENSSL_ia32cap_P+8(%rip),%eax
@@ -2148,12 +2131,13 @@
 	jz	chacha20_poly1305_seal_avx2
 
 	cmpq	$128,%rbx
-	jbe	seal_sse_128
+	jbe	L$seal_sse_128
 
-	movdqa	.chacha20_consts(%rip),%xmm0
+	movdqa	L$chacha20_consts(%rip),%xmm0
 	movdqu	0(%r9),%xmm4
 	movdqu	16(%r9),%xmm8
 	movdqu	32(%r9),%xmm12
+
 	movdqa	%xmm0,%xmm1
 	movdqa	%xmm0,%xmm2
 	movdqa	%xmm0,%xmm3
@@ -2164,22 +2148,22 @@
 	movdqa	%xmm8,%xmm10
 	movdqa	%xmm8,%xmm11
 	movdqa	%xmm12,%xmm15
-	paddd	.sse_inc(%rip),%xmm12
+	paddd	L$sse_inc(%rip),%xmm12
 	movdqa	%xmm12,%xmm14
-	paddd	.sse_inc(%rip),%xmm12
+	paddd	L$sse_inc(%rip),%xmm12
 	movdqa	%xmm12,%xmm13
-	paddd	.sse_inc(%rip),%xmm12
+	paddd	L$sse_inc(%rip),%xmm12
 
-	movdqa	%xmm4,48(%rbp)
-	movdqa	%xmm8,64(%rbp)
-	movdqa	%xmm12,96(%rbp)
-	movdqa	%xmm13,112(%rbp)
-	movdqa	%xmm14,128(%rbp)
-	movdqa	%xmm15,144(%rbp)
+	movdqa	%xmm4,0+48(%rbp)
+	movdqa	%xmm8,0+64(%rbp)
+	movdqa	%xmm12,0+96(%rbp)
+	movdqa	%xmm13,0+112(%rbp)
+	movdqa	%xmm14,0+128(%rbp)
+	movdqa	%xmm15,0+144(%rbp)
 	movq	$10,%r10
-1:
-	movdqa	%xmm8,80(%rbp)
-	movdqa	.rol16(%rip),%xmm8
+L$seal_sse_init_rounds:
+	movdqa	%xmm8,0+80(%rbp)
+	movdqa	L$rol16(%rip),%xmm8
 	paddd	%xmm7,%xmm3
 	paddd	%xmm6,%xmm2
 	paddd	%xmm5,%xmm1
@@ -2192,7 +2176,7 @@
 .byte	102,69,15,56,0,240
 .byte	102,69,15,56,0,232
 .byte	102,69,15,56,0,224
-	movdqa	80(%rbp),%xmm8
+	movdqa	0+80(%rbp),%xmm8
 	paddd	%xmm15,%xmm11
 	paddd	%xmm14,%xmm10
 	paddd	%xmm13,%xmm9
@@ -2201,7 +2185,7 @@
 	pxor	%xmm10,%xmm6
 	pxor	%xmm9,%xmm5
 	pxor	%xmm8,%xmm4
-	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm8,0+80(%rbp)
 	movdqa	%xmm7,%xmm8
 	psrld	$20,%xmm8
 	pslld	$32-20,%xmm7
@@ -2218,7 +2202,7 @@
 	psrld	$20,%xmm8
 	pslld	$32-20,%xmm4
 	pxor	%xmm8,%xmm4
-	movdqa	.rol8(%rip),%xmm8
+	movdqa	L$rol8(%rip),%xmm8
 	paddd	%xmm7,%xmm3
 	paddd	%xmm6,%xmm2
 	paddd	%xmm5,%xmm1
@@ -2231,7 +2215,7 @@
 .byte	102,69,15,56,0,240
 .byte	102,69,15,56,0,232
 .byte	102,69,15,56,0,224
-	movdqa	80(%rbp),%xmm8
+	movdqa	0+80(%rbp),%xmm8
 	paddd	%xmm15,%xmm11
 	paddd	%xmm14,%xmm10
 	paddd	%xmm13,%xmm9
@@ -2240,7 +2224,7 @@
 	pxor	%xmm10,%xmm6
 	pxor	%xmm9,%xmm5
 	pxor	%xmm8,%xmm4
-	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm8,0+80(%rbp)
 	movdqa	%xmm7,%xmm8
 	psrld	$25,%xmm8
 	pslld	$32-25,%xmm7
@@ -2257,7 +2241,7 @@
 	psrld	$25,%xmm8
 	pslld	$32-25,%xmm4
 	pxor	%xmm8,%xmm4
-	movdqa	80(%rbp),%xmm8
+	movdqa	0+80(%rbp),%xmm8
 .byte	102,15,58,15,255,4
 .byte	102,69,15,58,15,219,8
 .byte	102,69,15,58,15,255,12
@@ -2270,8 +2254,8 @@
 .byte	102,15,58,15,228,4
 .byte	102,69,15,58,15,192,8
 .byte	102,69,15,58,15,228,12
-	movdqa	%xmm8,80(%rbp)
-	movdqa	.rol16(%rip),%xmm8
+	movdqa	%xmm8,0+80(%rbp)
+	movdqa	L$rol16(%rip),%xmm8
 	paddd	%xmm7,%xmm3
 	paddd	%xmm6,%xmm2
 	paddd	%xmm5,%xmm1
@@ -2284,7 +2268,7 @@
 .byte	102,69,15,56,0,240
 .byte	102,69,15,56,0,232
 .byte	102,69,15,56,0,224
-	movdqa	80(%rbp),%xmm8
+	movdqa	0+80(%rbp),%xmm8
 	paddd	%xmm15,%xmm11
 	paddd	%xmm14,%xmm10
 	paddd	%xmm13,%xmm9
@@ -2293,7 +2277,7 @@
 	pxor	%xmm10,%xmm6
 	pxor	%xmm9,%xmm5
 	pxor	%xmm8,%xmm4
-	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm8,0+80(%rbp)
 	movdqa	%xmm7,%xmm8
 	psrld	$20,%xmm8
 	pslld	$32-20,%xmm7
@@ -2310,7 +2294,7 @@
 	psrld	$20,%xmm8
 	pslld	$32-20,%xmm4
 	pxor	%xmm8,%xmm4
-	movdqa	.rol8(%rip),%xmm8
+	movdqa	L$rol8(%rip),%xmm8
 	paddd	%xmm7,%xmm3
 	paddd	%xmm6,%xmm2
 	paddd	%xmm5,%xmm1
@@ -2323,7 +2307,7 @@
 .byte	102,69,15,56,0,240
 .byte	102,69,15,56,0,232
 .byte	102,69,15,56,0,224
-	movdqa	80(%rbp),%xmm8
+	movdqa	0+80(%rbp),%xmm8
 	paddd	%xmm15,%xmm11
 	paddd	%xmm14,%xmm10
 	paddd	%xmm13,%xmm9
@@ -2332,7 +2316,7 @@
 	pxor	%xmm10,%xmm6
 	pxor	%xmm9,%xmm5
 	pxor	%xmm8,%xmm4
-	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm8,0+80(%rbp)
 	movdqa	%xmm7,%xmm8
 	psrld	$25,%xmm8
 	pslld	$32-25,%xmm7
@@ -2349,7 +2333,7 @@
 	psrld	$25,%xmm8
 	pslld	$32-25,%xmm4
 	pxor	%xmm8,%xmm4
-	movdqa	80(%rbp),%xmm8
+	movdqa	0+80(%rbp),%xmm8
 .byte	102,15,58,15,255,12
 .byte	102,69,15,58,15,219,8
 .byte	102,69,15,58,15,255,4
@@ -2364,28 +2348,28 @@
 .byte	102,69,15,58,15,228,4
 
 	decq	%r10
-	jnz	1b
-	paddd	.chacha20_consts(%rip),%xmm3
-	paddd	48(%rbp),%xmm7
-	paddd	64(%rbp),%xmm11
-	paddd	144(%rbp),%xmm15
-	paddd	.chacha20_consts(%rip),%xmm2
-	paddd	48(%rbp),%xmm6
-	paddd	64(%rbp),%xmm10
-	paddd	128(%rbp),%xmm14
-	paddd	.chacha20_consts(%rip),%xmm1
-	paddd	48(%rbp),%xmm5
-	paddd	64(%rbp),%xmm9
-	paddd	112(%rbp),%xmm13
-	paddd	.chacha20_consts(%rip),%xmm0
-	paddd	48(%rbp),%xmm4
-	paddd	64(%rbp),%xmm8
-	paddd	96(%rbp),%xmm12
+	jnz	L$seal_sse_init_rounds
+	paddd	L$chacha20_consts(%rip),%xmm3
+	paddd	0+48(%rbp),%xmm7
+	paddd	0+64(%rbp),%xmm11
+	paddd	0+144(%rbp),%xmm15
+	paddd	L$chacha20_consts(%rip),%xmm2
+	paddd	0+48(%rbp),%xmm6
+	paddd	0+64(%rbp),%xmm10
+	paddd	0+128(%rbp),%xmm14
+	paddd	L$chacha20_consts(%rip),%xmm1
+	paddd	0+48(%rbp),%xmm5
+	paddd	0+64(%rbp),%xmm9
+	paddd	0+112(%rbp),%xmm13
+	paddd	L$chacha20_consts(%rip),%xmm0
+	paddd	0+48(%rbp),%xmm4
+	paddd	0+64(%rbp),%xmm8
+	paddd	0+96(%rbp),%xmm12
 
 
-	pand	.clamp(%rip),%xmm3
-	movdqa	%xmm3,0(%rbp)
-	movdqa	%xmm7,16(%rbp)
+	pand	L$clamp(%rip),%xmm3
+	movdqa	%xmm3,0+0(%rbp)
+	movdqa	%xmm7,0+16(%rbp)
 
 	movq	%r8,%r8
 	call	poly_hash_ad_internal
@@ -2415,12 +2399,12 @@
 	movdqu	%xmm15,48 + 64(%rdi)
 
 	cmpq	$192,%rbx
-	ja	1f
+	ja	L$seal_sse_main_init
 	movq	$128,%rcx
 	subq	$128,%rbx
 	leaq	128(%rsi),%rsi
-	jmp	seal_sse_128_seal_hash
-1:
+	jmp	L$seal_sse_128_tail_hash
+L$seal_sse_main_init:
 	movdqu	0 + 128(%rsi),%xmm3
 	movdqu	16 + 128(%rsi),%xmm7
 	movdqu	32 + 128(%rsi),%xmm11
@@ -2440,16 +2424,16 @@
 	movq	$2,%rcx
 	movq	$8,%r8
 	cmpq	$64,%rbx
-	jbe	seal_sse_tail_64
+	jbe	L$seal_sse_tail_64
 	cmpq	$128,%rbx
-	jbe	seal_sse_tail_128
+	jbe	L$seal_sse_tail_128
 	cmpq	$192,%rbx
-	jbe	seal_sse_tail_192
+	jbe	L$seal_sse_tail_192
 
-1:
-	movdqa	.chacha20_consts(%rip),%xmm0
-	movdqa	48(%rbp),%xmm4
-	movdqa	64(%rbp),%xmm8
+L$seal_sse_main_loop:
+	movdqa	L$chacha20_consts(%rip),%xmm0
+	movdqa	0+48(%rbp),%xmm4
+	movdqa	0+64(%rbp),%xmm8
 	movdqa	%xmm0,%xmm1
 	movdqa	%xmm4,%xmm5
 	movdqa	%xmm8,%xmm9
@@ -2459,22 +2443,23 @@
 	movdqa	%xmm0,%xmm3
 	movdqa	%xmm4,%xmm7
 	movdqa	%xmm8,%xmm11
-	movdqa	96(%rbp),%xmm15
-	paddd	.sse_inc(%rip),%xmm15
+	movdqa	0+96(%rbp),%xmm15
+	paddd	L$sse_inc(%rip),%xmm15
 	movdqa	%xmm15,%xmm14
-	paddd	.sse_inc(%rip),%xmm14
+	paddd	L$sse_inc(%rip),%xmm14
 	movdqa	%xmm14,%xmm13
-	paddd	.sse_inc(%rip),%xmm13
+	paddd	L$sse_inc(%rip),%xmm13
 	movdqa	%xmm13,%xmm12
-	paddd	.sse_inc(%rip),%xmm12
-	movdqa	%xmm12,96(%rbp)
-	movdqa	%xmm13,112(%rbp)
-	movdqa	%xmm14,128(%rbp)
-	movdqa	%xmm15,144(%rbp)
+	paddd	L$sse_inc(%rip),%xmm12
+	movdqa	%xmm12,0+96(%rbp)
+	movdqa	%xmm13,0+112(%rbp)
+	movdqa	%xmm14,0+128(%rbp)
+	movdqa	%xmm15,0+144(%rbp)
 
-2:
-	movdqa	%xmm8,80(%rbp)
-	movdqa	.rol16(%rip),%xmm8
+.p2align	5
+L$seal_sse_main_rounds:
+	movdqa	%xmm8,0+80(%rbp)
+	movdqa	L$rol16(%rip),%xmm8
 	paddd	%xmm7,%xmm3
 	paddd	%xmm6,%xmm2
 	paddd	%xmm5,%xmm1
@@ -2487,19 +2472,19 @@
 .byte	102,69,15,56,0,240
 .byte	102,69,15,56,0,232
 .byte	102,69,15,56,0,224
-	movdqa	80(%rbp),%xmm8
+	movdqa	0+80(%rbp),%xmm8
 	paddd	%xmm15,%xmm11
 	paddd	%xmm14,%xmm10
 	paddd	%xmm13,%xmm9
 	paddd	%xmm12,%xmm8
 	pxor	%xmm11,%xmm7
-	addq	0(%rdi),%r10
+	addq	0+0(%rdi),%r10
 	adcq	8+0(%rdi),%r11
 	adcq	$1,%r12
 	pxor	%xmm10,%xmm6
 	pxor	%xmm9,%xmm5
 	pxor	%xmm8,%xmm4
-	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm8,0+80(%rbp)
 	movdqa	%xmm7,%xmm8
 	psrld	$20,%xmm8
 	pslld	$32-20,%xmm7
@@ -2516,17 +2501,17 @@
 	psrld	$20,%xmm8
 	pslld	$32-20,%xmm4
 	pxor	%xmm8,%xmm4
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movdqa	.rol8(%rip),%xmm8
+	movdqa	L$rol8(%rip),%xmm8
 	paddd	%xmm7,%xmm3
 	paddd	%xmm6,%xmm2
 	paddd	%xmm5,%xmm1
@@ -2539,26 +2524,26 @@
 .byte	102,69,15,56,0,240
 .byte	102,69,15,56,0,232
 .byte	102,69,15,56,0,224
-	movdqa	80(%rbp),%xmm8
+	movdqa	0+80(%rbp),%xmm8
 	paddd	%xmm15,%xmm11
 	paddd	%xmm14,%xmm10
 	paddd	%xmm13,%xmm9
 	paddd	%xmm12,%xmm8
 	pxor	%xmm11,%xmm7
 	pxor	%xmm10,%xmm6
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
 	pxor	%xmm9,%xmm5
 	pxor	%xmm8,%xmm4
-	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm8,0+80(%rbp)
 	movdqa	%xmm7,%xmm8
 	psrld	$25,%xmm8
 	pslld	$32-25,%xmm7
@@ -2575,7 +2560,7 @@
 	psrld	$25,%xmm8
 	pslld	$32-25,%xmm4
 	pxor	%xmm8,%xmm4
-	movdqa	80(%rbp),%xmm8
+	movdqa	0+80(%rbp),%xmm8
 	imulq	%r12,%r9
 	addq	%r10,%r15
 	adcq	%rdx,%r9
@@ -2591,8 +2576,8 @@
 .byte	102,15,58,15,228,4
 .byte	102,69,15,58,15,192,8
 .byte	102,69,15,58,15,228,12
-	movdqa	%xmm8,80(%rbp)
-	movdqa	.rol16(%rip),%xmm8
+	movdqa	%xmm8,0+80(%rbp)
+	movdqa	L$rol16(%rip),%xmm8
 	paddd	%xmm7,%xmm3
 	paddd	%xmm6,%xmm2
 	paddd	%xmm5,%xmm1
@@ -2608,9 +2593,8 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
@@ -2620,7 +2604,7 @@
 .byte	102,69,15,56,0,240
 .byte	102,69,15,56,0,232
 .byte	102,69,15,56,0,224
-	movdqa	80(%rbp),%xmm8
+	movdqa	0+80(%rbp),%xmm8
 	paddd	%xmm15,%xmm11
 	paddd	%xmm14,%xmm10
 	paddd	%xmm13,%xmm9
@@ -2629,7 +2613,7 @@
 	pxor	%xmm10,%xmm6
 	pxor	%xmm9,%xmm5
 	pxor	%xmm8,%xmm4
-	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm8,0+80(%rbp)
 	movdqa	%xmm7,%xmm8
 	psrld	$20,%xmm8
 	pslld	$32-20,%xmm7
@@ -2646,7 +2630,7 @@
 	psrld	$20,%xmm8
 	pslld	$32-20,%xmm4
 	pxor	%xmm8,%xmm4
-	movdqa	.rol8(%rip),%xmm8
+	movdqa	L$rol8(%rip),%xmm8
 	paddd	%xmm7,%xmm3
 	paddd	%xmm6,%xmm2
 	paddd	%xmm5,%xmm1
@@ -2659,7 +2643,7 @@
 .byte	102,69,15,56,0,240
 .byte	102,69,15,56,0,232
 .byte	102,69,15,56,0,224
-	movdqa	80(%rbp),%xmm8
+	movdqa	0+80(%rbp),%xmm8
 	paddd	%xmm15,%xmm11
 	paddd	%xmm14,%xmm10
 	paddd	%xmm13,%xmm9
@@ -2668,7 +2652,7 @@
 	pxor	%xmm10,%xmm6
 	pxor	%xmm9,%xmm5
 	pxor	%xmm8,%xmm4
-	movdqa	%xmm8,80(%rbp)
+	movdqa	%xmm8,0+80(%rbp)
 	movdqa	%xmm7,%xmm8
 	psrld	$25,%xmm8
 	pslld	$32-25,%xmm7
@@ -2685,7 +2669,7 @@
 	psrld	$25,%xmm8
 	pslld	$32-25,%xmm4
 	pxor	%xmm8,%xmm4
-	movdqa	80(%rbp),%xmm8
+	movdqa	0+80(%rbp),%xmm8
 .byte	102,15,58,15,255,12
 .byte	102,69,15,58,15,219,8
 .byte	102,69,15,58,15,255,4
@@ -2701,27 +2685,27 @@
 
 	leaq	16(%rdi),%rdi
 	decq	%r8
-	jge	2b
-	addq	0(%rdi),%r10
+	jge	L$seal_sse_main_rounds
+	addq	0+0(%rdi),%r10
 	adcq	8+0(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -2737,35 +2721,34 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	16(%rdi),%rdi
 	decq	%rcx
-	jg	2b
-	paddd	.chacha20_consts(%rip),%xmm3
-	paddd	48(%rbp),%xmm7
-	paddd	64(%rbp),%xmm11
-	paddd	144(%rbp),%xmm15
-	paddd	.chacha20_consts(%rip),%xmm2
-	paddd	48(%rbp),%xmm6
-	paddd	64(%rbp),%xmm10
-	paddd	128(%rbp),%xmm14
-	paddd	.chacha20_consts(%rip),%xmm1
-	paddd	48(%rbp),%xmm5
-	paddd	64(%rbp),%xmm9
-	paddd	112(%rbp),%xmm13
-	paddd	.chacha20_consts(%rip),%xmm0
-	paddd	48(%rbp),%xmm4
-	paddd	64(%rbp),%xmm8
-	paddd	96(%rbp),%xmm12
+	jg	L$seal_sse_main_rounds
+	paddd	L$chacha20_consts(%rip),%xmm3
+	paddd	0+48(%rbp),%xmm7
+	paddd	0+64(%rbp),%xmm11
+	paddd	0+144(%rbp),%xmm15
+	paddd	L$chacha20_consts(%rip),%xmm2
+	paddd	0+48(%rbp),%xmm6
+	paddd	0+64(%rbp),%xmm10
+	paddd	0+128(%rbp),%xmm14
+	paddd	L$chacha20_consts(%rip),%xmm1
+	paddd	0+48(%rbp),%xmm5
+	paddd	0+64(%rbp),%xmm9
+	paddd	0+112(%rbp),%xmm13
+	paddd	L$chacha20_consts(%rip),%xmm0
+	paddd	0+48(%rbp),%xmm4
+	paddd	0+64(%rbp),%xmm8
+	paddd	0+96(%rbp),%xmm12
 
-	movdqa	%xmm14,80(%rbp)
-	movdqa	%xmm14,80(%rbp)
+	movdqa	%xmm14,0+80(%rbp)
+	movdqa	%xmm14,0+80(%rbp)
 	movdqu	0 + 0(%rsi),%xmm14
 	pxor	%xmm3,%xmm14
 	movdqu	%xmm14,0 + 0(%rdi)
@@ -2779,7 +2762,7 @@
 	pxor	%xmm15,%xmm14
 	movdqu	%xmm14,48 + 0(%rdi)
 
-	movdqa	80(%rbp),%xmm14
+	movdqa	0+80(%rbp),%xmm14
 	movdqu	0 + 64(%rsi),%xmm3
 	movdqu	16 + 64(%rsi),%xmm7
 	movdqu	32 + 64(%rsi),%xmm11
@@ -2806,13 +2789,13 @@
 	movdqu	%xmm15,48 + 128(%rdi)
 
 	cmpq	$256,%rbx
-	ja	3f
+	ja	L$seal_sse_main_loop_xor
 
 	movq	$192,%rcx
 	subq	$192,%rbx
 	leaq	192(%rsi),%rsi
-	jmp	seal_sse_128_seal_hash
-3:
+	jmp	L$seal_sse_128_tail_hash
+L$seal_sse_main_loop_xor:
 	movdqu	0 + 192(%rsi),%xmm3
 	movdqu	16 + 192(%rsi),%xmm7
 	movdqu	32 + 192(%rsi),%xmm11
@@ -2831,43 +2814,45 @@
 	movq	$6,%rcx
 	movq	$4,%r8
 	cmpq	$192,%rbx
-	jg	1b
+	jg	L$seal_sse_main_loop
 	movq	%rbx,%rcx
 	testq	%rbx,%rbx
-	je	seal_sse_128_seal_hash
+	je	L$seal_sse_128_tail_hash
 	movq	$6,%rcx
+	cmpq	$128,%rbx
+	ja	L$seal_sse_tail_192
 	cmpq	$64,%rbx
-	jg	3f
+	ja	L$seal_sse_tail_128
 
-seal_sse_tail_64:
-	movdqa	.chacha20_consts(%rip),%xmm0
-	movdqa	48(%rbp),%xmm4
-	movdqa	64(%rbp),%xmm8
-	movdqa	96(%rbp),%xmm12
-	paddd	.sse_inc(%rip),%xmm12
-	movdqa	%xmm12,96(%rbp)
+L$seal_sse_tail_64:
+	movdqa	L$chacha20_consts(%rip),%xmm0
+	movdqa	0+48(%rbp),%xmm4
+	movdqa	0+64(%rbp),%xmm8
+	movdqa	0+96(%rbp),%xmm12
+	paddd	L$sse_inc(%rip),%xmm12
+	movdqa	%xmm12,0+96(%rbp)
 
-1:
-	addq	0(%rdi),%r10
+L$seal_sse_tail_64_rounds_and_x2hash:
+	addq	0+0(%rdi),%r10
 	adcq	8+0(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -2883,18 +2868,17 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	16(%rdi),%rdi
-2:
+L$seal_sse_tail_64_rounds_and_x1hash:
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol16(%rip),%xmm12
+	pshufb	L$rol16(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -2903,7 +2887,7 @@
 	pxor	%xmm3,%xmm4
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol8(%rip),%xmm12
+	pshufb	L$rol8(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -2915,7 +2899,7 @@
 .byte	102,69,15,58,15,228,12
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol16(%rip),%xmm12
+	pshufb	L$rol16(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -2924,7 +2908,7 @@
 	pxor	%xmm3,%xmm4
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol8(%rip),%xmm12
+	pshufb	L$rol8(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -2934,26 +2918,26 @@
 .byte	102,15,58,15,228,12
 .byte	102,69,15,58,15,192,8
 .byte	102,69,15,58,15,228,4
-	addq	0(%rdi),%r10
+	addq	0+0(%rdi),%r10
 	adcq	8+0(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -2969,63 +2953,59 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	16(%rdi),%rdi
 	decq	%rcx
-	jg	1b
+	jg	L$seal_sse_tail_64_rounds_and_x2hash
 	decq	%r8
-	jge	2b
-	paddd	.chacha20_consts(%rip),%xmm0
-	paddd	48(%rbp),%xmm4
-	paddd	64(%rbp),%xmm8
-	paddd	96(%rbp),%xmm12
+	jge	L$seal_sse_tail_64_rounds_and_x1hash
+	paddd	L$chacha20_consts(%rip),%xmm0
+	paddd	0+48(%rbp),%xmm4
+	paddd	0+64(%rbp),%xmm8
+	paddd	0+96(%rbp),%xmm12
 
-	jmp	seal_sse_128_seal
-3:
-	cmpq	$128,%rbx
-	jg	3f
+	jmp	L$seal_sse_128_tail_xor
 
-seal_sse_tail_128:
-	movdqa	.chacha20_consts(%rip),%xmm0
-	movdqa	48(%rbp),%xmm4
-	movdqa	64(%rbp),%xmm8
+L$seal_sse_tail_128:
+	movdqa	L$chacha20_consts(%rip),%xmm0
+	movdqa	0+48(%rbp),%xmm4
+	movdqa	0+64(%rbp),%xmm8
 	movdqa	%xmm0,%xmm1
 	movdqa	%xmm4,%xmm5
 	movdqa	%xmm8,%xmm9
-	movdqa	96(%rbp),%xmm13
-	paddd	.sse_inc(%rip),%xmm13
+	movdqa	0+96(%rbp),%xmm13
+	paddd	L$sse_inc(%rip),%xmm13
 	movdqa	%xmm13,%xmm12
-	paddd	.sse_inc(%rip),%xmm12
-	movdqa	%xmm12,96(%rbp)
-	movdqa	%xmm13,112(%rbp)
+	paddd	L$sse_inc(%rip),%xmm12
+	movdqa	%xmm12,0+96(%rbp)
+	movdqa	%xmm13,0+112(%rbp)
 
-1:
-	addq	0(%rdi),%r10
+L$seal_sse_tail_128_rounds_and_x2hash:
+	addq	0+0(%rdi),%r10
 	adcq	8+0(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -3041,18 +3021,17 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	16(%rdi),%rdi
-2:
+L$seal_sse_tail_128_rounds_and_x1hash:
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol16(%rip),%xmm12
+	pshufb	L$rol16(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -3061,7 +3040,7 @@
 	pxor	%xmm3,%xmm4
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol8(%rip),%xmm12
+	pshufb	L$rol8(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -3073,7 +3052,7 @@
 .byte	102,69,15,58,15,228,12
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol16(%rip),%xmm13
+	pshufb	L$rol16(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -3082,7 +3061,7 @@
 	pxor	%xmm3,%xmm5
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol8(%rip),%xmm13
+	pshufb	L$rol8(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -3092,26 +3071,26 @@
 .byte	102,15,58,15,237,4
 .byte	102,69,15,58,15,201,8
 .byte	102,69,15,58,15,237,12
-	addq	0(%rdi),%r10
+	addq	0+0(%rdi),%r10
 	adcq	8+0(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -3127,15 +3106,14 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol16(%rip),%xmm12
+	pshufb	L$rol16(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -3144,7 +3122,7 @@
 	pxor	%xmm3,%xmm4
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol8(%rip),%xmm12
+	pshufb	L$rol8(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -3156,7 +3134,7 @@
 .byte	102,69,15,58,15,228,4
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol16(%rip),%xmm13
+	pshufb	L$rol16(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -3165,7 +3143,7 @@
 	pxor	%xmm3,%xmm5
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol8(%rip),%xmm13
+	pshufb	L$rol8(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -3178,17 +3156,17 @@
 
 	leaq	16(%rdi),%rdi
 	decq	%rcx
-	jg	1b
+	jg	L$seal_sse_tail_128_rounds_and_x2hash
 	decq	%r8
-	jge	2b
-	paddd	.chacha20_consts(%rip),%xmm1
-	paddd	48(%rbp),%xmm5
-	paddd	64(%rbp),%xmm9
-	paddd	112(%rbp),%xmm13
-	paddd	.chacha20_consts(%rip),%xmm0
-	paddd	48(%rbp),%xmm4
-	paddd	64(%rbp),%xmm8
-	paddd	96(%rbp),%xmm12
+	jge	L$seal_sse_tail_128_rounds_and_x1hash
+	paddd	L$chacha20_consts(%rip),%xmm1
+	paddd	0+48(%rbp),%xmm5
+	paddd	0+64(%rbp),%xmm9
+	paddd	0+112(%rbp),%xmm13
+	paddd	L$chacha20_consts(%rip),%xmm0
+	paddd	0+48(%rbp),%xmm4
+	paddd	0+64(%rbp),%xmm8
+	paddd	0+96(%rbp),%xmm12
 	movdqu	0 + 0(%rsi),%xmm3
 	movdqu	16 + 0(%rsi),%xmm7
 	movdqu	32 + 0(%rsi),%xmm11
@@ -3205,50 +3183,49 @@
 	movq	$64,%rcx
 	subq	$64,%rbx
 	leaq	64(%rsi),%rsi
-	jmp	seal_sse_128_seal_hash
-3:
+	jmp	L$seal_sse_128_tail_hash
 
-seal_sse_tail_192:
-	movdqa	.chacha20_consts(%rip),%xmm0
-	movdqa	48(%rbp),%xmm4
-	movdqa	64(%rbp),%xmm8
+L$seal_sse_tail_192:
+	movdqa	L$chacha20_consts(%rip),%xmm0
+	movdqa	0+48(%rbp),%xmm4
+	movdqa	0+64(%rbp),%xmm8
 	movdqa	%xmm0,%xmm1
 	movdqa	%xmm4,%xmm5
 	movdqa	%xmm8,%xmm9
 	movdqa	%xmm0,%xmm2
 	movdqa	%xmm4,%xmm6
 	movdqa	%xmm8,%xmm10
-	movdqa	96(%rbp),%xmm14
-	paddd	.sse_inc(%rip),%xmm14
+	movdqa	0+96(%rbp),%xmm14
+	paddd	L$sse_inc(%rip),%xmm14
 	movdqa	%xmm14,%xmm13
-	paddd	.sse_inc(%rip),%xmm13
+	paddd	L$sse_inc(%rip),%xmm13
 	movdqa	%xmm13,%xmm12
-	paddd	.sse_inc(%rip),%xmm12
-	movdqa	%xmm12,96(%rbp)
-	movdqa	%xmm13,112(%rbp)
-	movdqa	%xmm14,128(%rbp)
+	paddd	L$sse_inc(%rip),%xmm12
+	movdqa	%xmm12,0+96(%rbp)
+	movdqa	%xmm13,0+112(%rbp)
+	movdqa	%xmm14,0+128(%rbp)
 
-1:
-	addq	0(%rdi),%r10
+L$seal_sse_tail_192_rounds_and_x2hash:
+	addq	0+0(%rdi),%r10
 	adcq	8+0(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -3264,18 +3241,17 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	16(%rdi),%rdi
-2:
+L$seal_sse_tail_192_rounds_and_x1hash:
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol16(%rip),%xmm12
+	pshufb	L$rol16(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -3284,7 +3260,7 @@
 	pxor	%xmm3,%xmm4
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol8(%rip),%xmm12
+	pshufb	L$rol8(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -3296,7 +3272,7 @@
 .byte	102,69,15,58,15,228,12
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol16(%rip),%xmm13
+	pshufb	L$rol16(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -3305,7 +3281,7 @@
 	pxor	%xmm3,%xmm5
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol8(%rip),%xmm13
+	pshufb	L$rol8(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -3317,7 +3293,7 @@
 .byte	102,69,15,58,15,237,12
 	paddd	%xmm6,%xmm2
 	pxor	%xmm2,%xmm14
-	pshufb	.rol16(%rip),%xmm14
+	pshufb	L$rol16(%rip),%xmm14
 	paddd	%xmm14,%xmm10
 	pxor	%xmm10,%xmm6
 	movdqa	%xmm6,%xmm3
@@ -3326,7 +3302,7 @@
 	pxor	%xmm3,%xmm6
 	paddd	%xmm6,%xmm2
 	pxor	%xmm2,%xmm14
-	pshufb	.rol8(%rip),%xmm14
+	pshufb	L$rol8(%rip),%xmm14
 	paddd	%xmm14,%xmm10
 	pxor	%xmm10,%xmm6
 	movdqa	%xmm6,%xmm3
@@ -3336,26 +3312,26 @@
 .byte	102,15,58,15,246,4
 .byte	102,69,15,58,15,210,8
 .byte	102,69,15,58,15,246,12
-	addq	0(%rdi),%r10
+	addq	0+0(%rdi),%r10
 	adcq	8+0(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -3371,15 +3347,14 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol16(%rip),%xmm12
+	pshufb	L$rol16(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -3388,7 +3363,7 @@
 	pxor	%xmm3,%xmm4
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol8(%rip),%xmm12
+	pshufb	L$rol8(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -3400,7 +3375,7 @@
 .byte	102,69,15,58,15,228,4
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol16(%rip),%xmm13
+	pshufb	L$rol16(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -3409,7 +3384,7 @@
 	pxor	%xmm3,%xmm5
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol8(%rip),%xmm13
+	pshufb	L$rol8(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -3421,7 +3396,7 @@
 .byte	102,69,15,58,15,237,4
 	paddd	%xmm6,%xmm2
 	pxor	%xmm2,%xmm14
-	pshufb	.rol16(%rip),%xmm14
+	pshufb	L$rol16(%rip),%xmm14
 	paddd	%xmm14,%xmm10
 	pxor	%xmm10,%xmm6
 	movdqa	%xmm6,%xmm3
@@ -3430,7 +3405,7 @@
 	pxor	%xmm3,%xmm6
 	paddd	%xmm6,%xmm2
 	pxor	%xmm2,%xmm14
-	pshufb	.rol8(%rip),%xmm14
+	pshufb	L$rol8(%rip),%xmm14
 	paddd	%xmm14,%xmm10
 	pxor	%xmm10,%xmm6
 	movdqa	%xmm6,%xmm3
@@ -3443,21 +3418,21 @@
 
 	leaq	16(%rdi),%rdi
 	decq	%rcx
-	jg	1b
+	jg	L$seal_sse_tail_192_rounds_and_x2hash
 	decq	%r8
-	jge	2b
-	paddd	.chacha20_consts(%rip),%xmm2
-	paddd	48(%rbp),%xmm6
-	paddd	64(%rbp),%xmm10
-	paddd	128(%rbp),%xmm14
-	paddd	.chacha20_consts(%rip),%xmm1
-	paddd	48(%rbp),%xmm5
-	paddd	64(%rbp),%xmm9
-	paddd	112(%rbp),%xmm13
-	paddd	.chacha20_consts(%rip),%xmm0
-	paddd	48(%rbp),%xmm4
-	paddd	64(%rbp),%xmm8
-	paddd	96(%rbp),%xmm12
+	jge	L$seal_sse_tail_192_rounds_and_x1hash
+	paddd	L$chacha20_consts(%rip),%xmm2
+	paddd	0+48(%rbp),%xmm6
+	paddd	0+64(%rbp),%xmm10
+	paddd	0+128(%rbp),%xmm14
+	paddd	L$chacha20_consts(%rip),%xmm1
+	paddd	0+48(%rbp),%xmm5
+	paddd	0+64(%rbp),%xmm9
+	paddd	0+112(%rbp),%xmm13
+	paddd	L$chacha20_consts(%rip),%xmm0
+	paddd	0+48(%rbp),%xmm4
+	paddd	0+64(%rbp),%xmm8
+	paddd	0+96(%rbp),%xmm12
 	movdqu	0 + 0(%rsi),%xmm3
 	movdqu	16 + 0(%rsi),%xmm7
 	movdqu	32 + 0(%rsi),%xmm11
@@ -3487,29 +3462,29 @@
 	subq	$128,%rbx
 	leaq	128(%rsi),%rsi
 
-seal_sse_128_seal_hash:
+L$seal_sse_128_tail_hash:
 	cmpq	$16,%rcx
-	jb	seal_sse_128_seal
-	addq	0(%rdi),%r10
+	jb	L$seal_sse_128_tail_xor
+	addq	0+0(%rdi),%r10
 	adcq	8+0(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -3525,20 +3500,19 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	subq	$16,%rcx
 	leaq	16(%rdi),%rdi
-	jmp	seal_sse_128_seal_hash
+	jmp	L$seal_sse_128_tail_hash
 
-seal_sse_128_seal:
+L$seal_sse_128_tail_xor:
 	cmpq	$16,%rbx
-	jb	seal_sse_tail_16
+	jb	L$seal_sse_tail_16
 	subq	$16,%rbx
 
 	movdqu	0(%rsi),%xmm3
@@ -3550,23 +3524,23 @@
 	adcq	$1,%r12
 	leaq	16(%rsi),%rsi
 	leaq	16(%rdi),%rdi
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -3582,9 +3556,8 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
@@ -3597,22 +3570,22 @@
 	movdqa	%xmm5,%xmm1
 	movdqa	%xmm9,%xmm5
 	movdqa	%xmm13,%xmm9
-	jmp	seal_sse_128_seal
+	jmp	L$seal_sse_128_tail_xor
 
-seal_sse_tail_16:
+L$seal_sse_tail_16:
 	testq	%rbx,%rbx
-	jz	process_blocks_of_extra_in
+	jz	L$process_blocks_of_extra_in
 
 	movq	%rbx,%r8
 	movq	%rbx,%rcx
-	leaq	-1(%rsi,%rbx), %rsi
+	leaq	-1(%rsi,%rbx,1),%rsi
 	pxor	%xmm15,%xmm15
-1:
+L$seal_sse_tail_16_compose:
 	pslldq	$1,%xmm15
 	pinsrb	$0,(%rsi),%xmm15
 	leaq	-1(%rsi),%rsi
 	decq	%rcx
-	jne	1b
+	jne	L$seal_sse_tail_16_compose
 
 
 	pxor	%xmm0,%xmm15
@@ -3620,12 +3593,12 @@
 
 	movq	%rbx,%rcx
 	movdqu	%xmm15,%xmm0
-2:
+L$seal_sse_tail_16_extract:
 	pextrb	$0,%xmm0,(%rdi)
 	psrldq	$1,%xmm0
 	addq	$1,%rdi
 	subq	$1,%rcx
-	jnz	2b
+	jnz	L$seal_sse_tail_16_extract
 
 
 
@@ -3634,23 +3607,23 @@
 
 
 
-	movq	288+32(%rsp),%r9
+	movq	288 + 0 + 32(%rsp),%r9
 	movq	56(%r9),%r14
 	movq	48(%r9),%r13
 	testq	%r14,%r14
-	jz	process_partial_block
+	jz	L$process_partial_block
 
 	movq	$16,%r15
 	subq	%rbx,%r15
 	cmpq	%r15,%r14
 
-	jge	load_extra_in
+	jge	L$load_extra_in
 	movq	%r14,%r15
 
-load_extra_in:
+L$load_extra_in:
 
 
-	leaq	-1(%r13,%r15), %rsi
+	leaq	-1(%r13,%r15,1),%rsi
 
 
 	addq	%r15,%r13
@@ -3664,29 +3637,29 @@
 
 
 	pxor	%xmm11,%xmm11
-3:
+L$load_extra_load_loop:
 	pslldq	$1,%xmm11
 	pinsrb	$0,(%rsi),%xmm11
 	leaq	-1(%rsi),%rsi
 	subq	$1,%r15
-	jnz	3b
+	jnz	L$load_extra_load_loop
 
 
 
 
 	movq	%rbx,%r15
 
-4:
+L$load_extra_shift_loop:
 	pslldq	$1,%xmm11
 	subq	$1,%r15
-	jnz	4b
+	jnz	L$load_extra_shift_loop
 
 
 
 
-	leaq	.and_masks(%rip),%r15
+	leaq	L$and_masks(%rip),%r15
 	shlq	$4,%rbx
-	pand	-16(%r15,%rbx), %xmm15
+	pand	-16(%r15,%rbx,1),%xmm15
 
 
 	por	%xmm11,%xmm15
@@ -3698,23 +3671,23 @@
 	addq	%r13,%r10
 	adcq	%r14,%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -3730,44 +3703,43 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 
-process_blocks_of_extra_in:
+L$process_blocks_of_extra_in:
 
-	movq	288+32(%rsp),%r9
+	movq	288+32+0 (%rsp),%r9
 	movq	48(%r9),%rsi
 	movq	56(%r9),%r8
 	movq	%r8,%rcx
 	shrq	$4,%r8
 
-5:
+L$process_extra_hash_loop:
 	jz	process_extra_in_trailer
-	addq	0(%rsi),%r10
+	addq	0+0(%rsi),%r10
 	adcq	8+0(%rsi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -3783,57 +3755,55 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	16(%rsi),%rsi
 	subq	$1,%r8
-	jmp	5b
-
+	jmp	L$process_extra_hash_loop
 process_extra_in_trailer:
 	andq	$15,%rcx
 	movq	%rcx,%rbx
-	jz	do_length_block
-	leaq	-1(%rsi,%rcx), %rsi
+	jz	L$do_length_block
+	leaq	-1(%rsi,%rcx,1),%rsi
 
-6:
+L$process_extra_in_trailer_load:
 	pslldq	$1,%xmm15
 	pinsrb	$0,(%rsi),%xmm15
 	leaq	-1(%rsi),%rsi
 	subq	$1,%rcx
-	jnz	6b
+	jnz	L$process_extra_in_trailer_load
 
-process_partial_block:
+L$process_partial_block:
 
-	leaq	.and_masks(%rip),%r15
+	leaq	L$and_masks(%rip),%r15
 	shlq	$4,%rbx
-	pand	-16(%r15,%rbx), %xmm15
+	pand	-16(%r15,%rbx,1),%xmm15
 .byte	102,77,15,126,253
 	pextrq	$1,%xmm15,%r14
 	addq	%r13,%r10
 	adcq	%r14,%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -3849,35 +3819,34 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 
-do_length_block:
-	addq	32(%rbp),%r10
-	adcq	8+32(%rbp),%r11
+L$do_length_block:
+	addq	0+0+32(%rbp),%r10
+	adcq	8+0+32(%rbp),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -3893,9 +3862,8 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
@@ -3911,16 +3879,17 @@
 	cmovcq	%r14,%r11
 	cmovcq	%r15,%r12
 
-	addq	0+16(%rbp),%r10
-	adcq	8+16(%rbp),%r11
+	addq	0+0+16(%rbp),%r10
+	adcq	8+0+16(%rbp),%r11
 
-	addq	$288 + 32,%rsp
+
+	addq	$288 + 0 + 32,%rsp
+
 
 	popq	%r9
 
-	movq	%r10,0(%r9)
+	movq	%r10,(%r9)
 	movq	%r11,8(%r9)
-
 	popq	%r15
 
 	popq	%r14
@@ -3935,9 +3904,9 @@
 
 	.byte	0xf3,0xc3
 
+L$seal_sse_128:
 
-seal_sse_128:
-	movdqu	.chacha20_consts(%rip),%xmm0
+	movdqu	L$chacha20_consts(%rip),%xmm0
 	movdqa	%xmm0,%xmm1
 	movdqa	%xmm0,%xmm2
 	movdqu	0(%r9),%xmm4
@@ -3948,17 +3917,18 @@
 	movdqa	%xmm8,%xmm10
 	movdqu	32(%r9),%xmm14
 	movdqa	%xmm14,%xmm12
-	paddd	.sse_inc(%rip),%xmm12
+	paddd	L$sse_inc(%rip),%xmm12
 	movdqa	%xmm12,%xmm13
-	paddd	.sse_inc(%rip),%xmm13
+	paddd	L$sse_inc(%rip),%xmm13
 	movdqa	%xmm4,%xmm7
 	movdqa	%xmm8,%xmm11
 	movdqa	%xmm12,%xmm15
 	movq	$10,%r10
-1:
+
+L$seal_sse_128_rounds:
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol16(%rip),%xmm12
+	pshufb	L$rol16(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -3967,7 +3937,7 @@
 	pxor	%xmm3,%xmm4
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol8(%rip),%xmm12
+	pshufb	L$rol8(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -3979,7 +3949,7 @@
 .byte	102,69,15,58,15,228,12
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol16(%rip),%xmm13
+	pshufb	L$rol16(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -3988,7 +3958,7 @@
 	pxor	%xmm3,%xmm5
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol8(%rip),%xmm13
+	pshufb	L$rol8(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -4000,7 +3970,7 @@
 .byte	102,69,15,58,15,237,12
 	paddd	%xmm6,%xmm2
 	pxor	%xmm2,%xmm14
-	pshufb	.rol16(%rip),%xmm14
+	pshufb	L$rol16(%rip),%xmm14
 	paddd	%xmm14,%xmm10
 	pxor	%xmm10,%xmm6
 	movdqa	%xmm6,%xmm3
@@ -4009,7 +3979,7 @@
 	pxor	%xmm3,%xmm6
 	paddd	%xmm6,%xmm2
 	pxor	%xmm2,%xmm14
-	pshufb	.rol8(%rip),%xmm14
+	pshufb	L$rol8(%rip),%xmm14
 	paddd	%xmm14,%xmm10
 	pxor	%xmm10,%xmm6
 	movdqa	%xmm6,%xmm3
@@ -4021,7 +3991,7 @@
 .byte	102,69,15,58,15,246,12
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol16(%rip),%xmm12
+	pshufb	L$rol16(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -4030,7 +4000,7 @@
 	pxor	%xmm3,%xmm4
 	paddd	%xmm4,%xmm0
 	pxor	%xmm0,%xmm12
-	pshufb	.rol8(%rip),%xmm12
+	pshufb	L$rol8(%rip),%xmm12
 	paddd	%xmm12,%xmm8
 	pxor	%xmm8,%xmm4
 	movdqa	%xmm4,%xmm3
@@ -4042,7 +4012,7 @@
 .byte	102,69,15,58,15,228,4
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol16(%rip),%xmm13
+	pshufb	L$rol16(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -4051,7 +4021,7 @@
 	pxor	%xmm3,%xmm5
 	paddd	%xmm5,%xmm1
 	pxor	%xmm1,%xmm13
-	pshufb	.rol8(%rip),%xmm13
+	pshufb	L$rol8(%rip),%xmm13
 	paddd	%xmm13,%xmm9
 	pxor	%xmm9,%xmm5
 	movdqa	%xmm5,%xmm3
@@ -4063,7 +4033,7 @@
 .byte	102,69,15,58,15,237,4
 	paddd	%xmm6,%xmm2
 	pxor	%xmm2,%xmm14
-	pshufb	.rol16(%rip),%xmm14
+	pshufb	L$rol16(%rip),%xmm14
 	paddd	%xmm14,%xmm10
 	pxor	%xmm10,%xmm6
 	movdqa	%xmm6,%xmm3
@@ -4072,7 +4042,7 @@
 	pxor	%xmm3,%xmm6
 	paddd	%xmm6,%xmm2
 	pxor	%xmm2,%xmm14
-	pshufb	.rol8(%rip),%xmm14
+	pshufb	L$rol8(%rip),%xmm14
 	paddd	%xmm14,%xmm10
 	pxor	%xmm10,%xmm6
 	movdqa	%xmm6,%xmm3
@@ -4084,51 +4054,64 @@
 .byte	102,69,15,58,15,246,4
 
 	decq	%r10
-	jnz	1b
-	paddd	.chacha20_consts(%rip),%xmm0
-	paddd	.chacha20_consts(%rip),%xmm1
-	paddd	.chacha20_consts(%rip),%xmm2
+	jnz	L$seal_sse_128_rounds
+	paddd	L$chacha20_consts(%rip),%xmm0
+	paddd	L$chacha20_consts(%rip),%xmm1
+	paddd	L$chacha20_consts(%rip),%xmm2
 	paddd	%xmm7,%xmm4
 	paddd	%xmm7,%xmm5
 	paddd	%xmm7,%xmm6
 	paddd	%xmm11,%xmm8
 	paddd	%xmm11,%xmm9
 	paddd	%xmm15,%xmm12
-	paddd	.sse_inc(%rip),%xmm15
+	paddd	L$sse_inc(%rip),%xmm15
 	paddd	%xmm15,%xmm13
 
-	pand	.clamp(%rip),%xmm2
-	movdqa	%xmm2,0(%rbp)
-	movdqa	%xmm6,16(%rbp)
+	pand	L$clamp(%rip),%xmm2
+	movdqa	%xmm2,0+0(%rbp)
+	movdqa	%xmm6,0+16(%rbp)
 
 	movq	%r8,%r8
 	call	poly_hash_ad_internal
-	jmp	seal_sse_128_seal
+	jmp	L$seal_sse_128_tail_xor
+
 
 
 
 
 .p2align	6
 chacha20_poly1305_open_avx2:
+
+
+
+
+
+
+
+
+
+
+
+
 	vzeroupper
-	vmovdqa	.chacha20_consts(%rip),%ymm0
+	vmovdqa	L$chacha20_consts(%rip),%ymm0
 	vbroadcasti128	0(%r9),%ymm4
 	vbroadcasti128	16(%r9),%ymm8
 	vbroadcasti128	32(%r9),%ymm12
-	vpaddd	.avx2_init(%rip),%ymm12,%ymm12
+	vpaddd	L$avx2_init(%rip),%ymm12,%ymm12
 	cmpq	$192,%rbx
-	jbe	open_avx2_192
+	jbe	L$open_avx2_192
 	cmpq	$320,%rbx
-	jbe	open_avx2_320
+	jbe	L$open_avx2_320
 
-	vmovdqa	%ymm4,64(%rbp)
-	vmovdqa	%ymm8,96(%rbp)
-	vmovdqa	%ymm12,160(%rbp)
+	vmovdqa	%ymm4,0+64(%rbp)
+	vmovdqa	%ymm8,0+96(%rbp)
+	vmovdqa	%ymm12,0+160(%rbp)
 	movq	$10,%r10
-1:
+L$open_avx2_init_rounds:
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpshufb	L$rol16(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpsrld	$20,%ymm4,%ymm3
@@ -4136,7 +4119,7 @@
 	vpxor	%ymm3,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpshufb	L$rol8(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpslld	$7,%ymm4,%ymm3
@@ -4147,7 +4130,7 @@
 	vpalignr	$4,%ymm4,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpshufb	L$rol16(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpsrld	$20,%ymm4,%ymm3
@@ -4155,7 +4138,7 @@
 	vpxor	%ymm3,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpshufb	L$rol8(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpslld	$7,%ymm4,%ymm3
@@ -4166,45 +4149,45 @@
 	vpalignr	$12,%ymm4,%ymm4,%ymm4
 
 	decq	%r10
-	jne	1b
-	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
-	vpaddd	64(%rbp),%ymm4,%ymm4
-	vpaddd	96(%rbp),%ymm8,%ymm8
-	vpaddd	160(%rbp),%ymm12,%ymm12
+	jne	L$open_avx2_init_rounds
+	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	0+64(%rbp),%ymm4,%ymm4
+	vpaddd	0+96(%rbp),%ymm8,%ymm8
+	vpaddd	0+160(%rbp),%ymm12,%ymm12
 
 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
 
-	vpand	.clamp(%rip),%ymm3,%ymm3
-	vmovdqa	%ymm3,0(%rbp)
+	vpand	L$clamp(%rip),%ymm3,%ymm3
+	vmovdqa	%ymm3,0+0(%rbp)
 
 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
 
 	movq	%r8,%r8
 	call	poly_hash_ad_internal
-	xorq	%rcx,%rcx
 
-1:
-	addq	0(%rsi,%rcx), %r10
-	adcq	8+0(%rsi,%rcx), %r11
+	xorq	%rcx,%rcx
+L$open_avx2_init_hash:
+	addq	0+0(%rsi,%rcx,1),%r10
+	adcq	8+0(%rsi,%rcx,1),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -4220,31 +4203,31 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	addq	$16,%rcx
 	cmpq	$64,%rcx
-	jne	1b
+	jne	L$open_avx2_init_hash
 
 	vpxor	0(%rsi),%ymm0,%ymm0
 	vpxor	32(%rsi),%ymm4,%ymm4
+
 	vmovdqu	%ymm0,0(%rdi)
 	vmovdqu	%ymm4,32(%rdi)
 	leaq	64(%rsi),%rsi
 	leaq	64(%rdi),%rdi
 	subq	$64,%rbx
-1:
+L$open_avx2_main_loop:
 
 	cmpq	$512,%rbx
-	jb	3f
-	vmovdqa	.chacha20_consts(%rip),%ymm0
-	vmovdqa	64(%rbp),%ymm4
-	vmovdqa	96(%rbp),%ymm8
+	jb	L$open_avx2_main_loop_done
+	vmovdqa	L$chacha20_consts(%rip),%ymm0
+	vmovdqa	0+64(%rbp),%ymm4
+	vmovdqa	0+96(%rbp),%ymm8
 	vmovdqa	%ymm0,%ymm1
 	vmovdqa	%ymm4,%ymm5
 	vmovdqa	%ymm8,%ymm9
@@ -4254,23 +4237,23 @@
 	vmovdqa	%ymm0,%ymm3
 	vmovdqa	%ymm4,%ymm7
 	vmovdqa	%ymm8,%ymm11
-	vmovdqa	.avx2_inc(%rip),%ymm12
-	vpaddd	160(%rbp),%ymm12,%ymm15
+	vmovdqa	L$avx2_inc(%rip),%ymm12
+	vpaddd	0+160(%rbp),%ymm12,%ymm15
 	vpaddd	%ymm15,%ymm12,%ymm14
 	vpaddd	%ymm14,%ymm12,%ymm13
 	vpaddd	%ymm13,%ymm12,%ymm12
-	vmovdqa	%ymm15,256(%rbp)
-	vmovdqa	%ymm14,224(%rbp)
-	vmovdqa	%ymm13,192(%rbp)
-	vmovdqa	%ymm12,160(%rbp)
+	vmovdqa	%ymm15,0+256(%rbp)
+	vmovdqa	%ymm14,0+224(%rbp)
+	vmovdqa	%ymm13,0+192(%rbp)
+	vmovdqa	%ymm12,0+160(%rbp)
 
 	xorq	%rcx,%rcx
-2:
-	addq	0*8(%rsi,%rcx), %r10
-	adcq	8+0*8(%rsi,%rcx), %r11
+L$open_avx2_main_loop_rounds:
+	addq	0+0(%rsi,%rcx,1),%r10
+	adcq	8+0(%rsi,%rcx,1),%r11
 	adcq	$1,%r12
-	vmovdqa	%ymm8,128(%rbp)
-	vmovdqa	.rol16(%rip),%ymm8
+	vmovdqa	%ymm8,0+128(%rbp)
+	vmovdqa	L$rol16(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
@@ -4279,7 +4262,7 @@
 	vpxor	%ymm2,%ymm14,%ymm14
 	vpxor	%ymm1,%ymm13,%ymm13
 	vpxor	%ymm0,%ymm12,%ymm12
-	movq	0+0(%rbp),%rdx
+	movq	0+0+0(%rbp),%rdx
 	movq	%rdx,%r15
 	mulxq	%r10,%r13,%r14
 	mulxq	%r11,%rax,%rdx
@@ -4290,23 +4273,22 @@
 	vpshufb	%ymm8,%ymm14,%ymm14
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
-	movq	8+0(%rbp),%rdx
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	movq	8+0+0(%rbp),%rdx
 	mulxq	%r10,%r10,%rax
 	addq	%r10,%r14
 	mulxq	%r11,%r11,%r9
 	adcq	%r11,%r15
 	adcq	$0,%r9
 	imulq	%r12,%rdx
-	vpxor	%ymm11,%ymm7,%ymm7
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
 	vpsrld	$20,%ymm7,%ymm8
 	vpslld	$32-20,%ymm7,%ymm7
 	vpxor	%ymm8,%ymm7,%ymm7
@@ -4314,18 +4296,19 @@
 	vpslld	$32-20,%ymm6,%ymm6
 	vpxor	%ymm8,%ymm6,%ymm6
 	vpsrld	$20,%ymm5,%ymm8
+	vpslld	$32-20,%ymm5,%ymm5
 	addq	%rax,%r15
 	adcq	%rdx,%r9
-	vpslld	$32-20,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm5,%ymm5
 	vpsrld	$20,%ymm4,%ymm8
 	vpslld	$32-20,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	.rol8(%rip),%ymm8
+	vmovdqa	L$rol8(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
 	movq	%r13,%r10
 	movq	%r14,%r11
 	movq	%r15,%r12
@@ -4335,13 +4318,11 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
-	vpxor	%ymm3,%ymm15,%ymm15
 	vpxor	%ymm2,%ymm14,%ymm14
 	vpxor	%ymm1,%ymm13,%ymm13
 	vpxor	%ymm0,%ymm12,%ymm12
@@ -4349,27 +4330,26 @@
 	vpshufb	%ymm8,%ymm14,%ymm14
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
-	addq	2*8(%rsi,%rcx), %r10
-	adcq	8+2*8(%rsi,%rcx), %r11
-	adcq	$1,%r12
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
+	addq	0+16(%rsi,%rcx,1),%r10
+	adcq	8+16(%rsi,%rcx,1),%r11
+	adcq	$1,%r12
 	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
 	vpxor	%ymm11,%ymm7,%ymm7
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	movq	0+0(%rbp),%rdx
+	vmovdqa	%ymm8,0+128(%rbp)
+	vpsrld	$25,%ymm7,%ymm8
+	movq	0+0+0(%rbp),%rdx
 	movq	%rdx,%r15
 	mulxq	%r10,%r13,%r14
 	mulxq	%r11,%rax,%rdx
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	vmovdqa	%ymm8,128(%rbp)
-	vpsrld	$25,%ymm7,%ymm8
 	vpslld	$32-25,%ymm7,%ymm7
 	vpxor	%ymm8,%ymm7,%ymm7
 	vpsrld	$25,%ymm6,%ymm8
@@ -4381,28 +4361,28 @@
 	vpsrld	$25,%ymm4,%ymm8
 	vpslld	$32-25,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	128(%rbp),%ymm8
+	vmovdqa	0+128(%rbp),%ymm8
 	vpalignr	$4,%ymm7,%ymm7,%ymm7
 	vpalignr	$8,%ymm11,%ymm11,%ymm11
 	vpalignr	$12,%ymm15,%ymm15,%ymm15
 	vpalignr	$4,%ymm6,%ymm6,%ymm6
-	movq	8+0(%rbp),%rdx
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$12,%ymm14,%ymm14,%ymm14
+	movq	8+0+0(%rbp),%rdx
 	mulxq	%r10,%r10,%rax
 	addq	%r10,%r14
 	mulxq	%r11,%r11,%r9
 	adcq	%r11,%r15
 	adcq	$0,%r9
 	imulq	%r12,%rdx
-	vpalignr	$8,%ymm10,%ymm10,%ymm10
-	vpalignr	$12,%ymm14,%ymm14,%ymm14
 	vpalignr	$4,%ymm5,%ymm5,%ymm5
 	vpalignr	$8,%ymm9,%ymm9,%ymm9
 	vpalignr	$12,%ymm13,%ymm13,%ymm13
 	vpalignr	$4,%ymm4,%ymm4,%ymm4
 	vpalignr	$8,%ymm8,%ymm8,%ymm8
 	vpalignr	$12,%ymm12,%ymm12,%ymm12
-	vmovdqa	%ymm8,128(%rbp)
-	vmovdqa	.rol16(%rip),%ymm8
+	vmovdqa	%ymm8,0+128(%rbp)
+	vmovdqa	L$rol16(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
@@ -4411,17 +4391,19 @@
 	vpxor	%ymm2,%ymm14,%ymm14
 	vpxor	%ymm1,%ymm13,%ymm13
 	vpxor	%ymm0,%ymm12,%ymm12
-	addq	%rax,%r15
-	adcq	%rdx,%r9
 	vpshufb	%ymm8,%ymm15,%ymm15
 	vpshufb	%ymm8,%ymm14,%ymm14
+	addq	%rax,%r15
+	adcq	%rdx,%r9
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
 	movq	%r13,%r10
 	movq	%r14,%r11
 	movq	%r15,%r12
@@ -4431,35 +4413,31 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
-	vpxor	%ymm11,%ymm7,%ymm7
-	vpxor	%ymm10,%ymm6,%ymm6
-	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
 	vpsrld	$20,%ymm7,%ymm8
 	vpslld	$32-20,%ymm7,%ymm7
 	vpxor	%ymm8,%ymm7,%ymm7
-	addq	4*8(%rsi,%rcx), %r10
-	adcq	8+4*8(%rsi,%rcx), %r11
-	adcq	$1,%r12
-
-	leaq	48(%rcx),%rcx
 	vpsrld	$20,%ymm6,%ymm8
 	vpslld	$32-20,%ymm6,%ymm6
 	vpxor	%ymm8,%ymm6,%ymm6
+	addq	0+32(%rsi,%rcx,1),%r10
+	adcq	8+32(%rsi,%rcx,1),%r11
+	adcq	$1,%r12
+
+	leaq	48(%rcx),%rcx
 	vpsrld	$20,%ymm5,%ymm8
 	vpslld	$32-20,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm5,%ymm5
 	vpsrld	$20,%ymm4,%ymm8
 	vpslld	$32-20,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	.rol8(%rip),%ymm8
+	vmovdqa	L$rol8(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
@@ -4468,49 +4446,48 @@
 	vpxor	%ymm2,%ymm14,%ymm14
 	vpxor	%ymm1,%ymm13,%ymm13
 	vpxor	%ymm0,%ymm12,%ymm12
-	movq	0+0(%rbp),%rdx
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	movq	0+0+0(%rbp),%rdx
 	movq	%rdx,%r15
 	mulxq	%r10,%r13,%r14
 	mulxq	%r11,%rax,%rdx
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	vpshufb	%ymm8,%ymm15,%ymm15
-	vpshufb	%ymm8,%ymm14,%ymm14
-	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpaddd	%ymm13,%ymm9,%ymm9
-	movq	8+0(%rbp),%rdx
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	movq	8+0+0(%rbp),%rdx
 	mulxq	%r10,%r10,%rax
 	addq	%r10,%r14
 	mulxq	%r11,%r11,%r9
 	adcq	%r11,%r15
 	adcq	$0,%r9
 	imulq	%r12,%rdx
-	vpaddd	%ymm12,%ymm8,%ymm8
-	vpxor	%ymm11,%ymm7,%ymm7
-	vpxor	%ymm10,%ymm6,%ymm6
-	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
 	vpsrld	$25,%ymm7,%ymm8
 	vpslld	$32-25,%ymm7,%ymm7
-	addq	%rax,%r15
-	adcq	%rdx,%r9
 	vpxor	%ymm8,%ymm7,%ymm7
 	vpsrld	$25,%ymm6,%ymm8
 	vpslld	$32-25,%ymm6,%ymm6
 	vpxor	%ymm8,%ymm6,%ymm6
+	addq	%rax,%r15
+	adcq	%rdx,%r9
 	vpsrld	$25,%ymm5,%ymm8
 	vpslld	$32-25,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm5,%ymm5
 	vpsrld	$25,%ymm4,%ymm8
 	vpslld	$32-25,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	128(%rbp),%ymm8
+	vmovdqa	0+128(%rbp),%ymm8
 	vpalignr	$12,%ymm7,%ymm7,%ymm7
 	vpalignr	$8,%ymm11,%ymm11,%ymm11
 	vpalignr	$4,%ymm15,%ymm15,%ymm15
@@ -4518,6 +4495,10 @@
 	vpalignr	$8,%ymm10,%ymm10,%ymm10
 	vpalignr	$4,%ymm14,%ymm14,%ymm14
 	vpalignr	$12,%ymm5,%ymm5,%ymm5
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$4,%ymm13,%ymm13,%ymm13
+	vpalignr	$12,%ymm4,%ymm4,%ymm4
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
 	movq	%r13,%r10
 	movq	%r14,%r11
 	movq	%r15,%r12
@@ -4527,39 +4508,34 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
-	vpalignr	$8,%ymm9,%ymm9,%ymm9
-	vpalignr	$4,%ymm13,%ymm13,%ymm13
-	vpalignr	$12,%ymm4,%ymm4,%ymm4
-	vpalignr	$8,%ymm8,%ymm8,%ymm8
 	vpalignr	$4,%ymm12,%ymm12,%ymm12
 
 	cmpq	$60*8,%rcx
-	jne	2b
-	vpaddd	.chacha20_consts(%rip),%ymm3,%ymm3
-	vpaddd	64(%rbp),%ymm7,%ymm7
-	vpaddd	96(%rbp),%ymm11,%ymm11
-	vpaddd	256(%rbp),%ymm15,%ymm15
-	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
-	vpaddd	64(%rbp),%ymm6,%ymm6
-	vpaddd	96(%rbp),%ymm10,%ymm10
-	vpaddd	224(%rbp),%ymm14,%ymm14
-	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
-	vpaddd	64(%rbp),%ymm5,%ymm5
-	vpaddd	96(%rbp),%ymm9,%ymm9
-	vpaddd	192(%rbp),%ymm13,%ymm13
-	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
-	vpaddd	64(%rbp),%ymm4,%ymm4
-	vpaddd	96(%rbp),%ymm8,%ymm8
-	vpaddd	160(%rbp),%ymm12,%ymm12
+	jne	L$open_avx2_main_loop_rounds
+	vpaddd	L$chacha20_consts(%rip),%ymm3,%ymm3
+	vpaddd	0+64(%rbp),%ymm7,%ymm7
+	vpaddd	0+96(%rbp),%ymm11,%ymm11
+	vpaddd	0+256(%rbp),%ymm15,%ymm15
+	vpaddd	L$chacha20_consts(%rip),%ymm2,%ymm2
+	vpaddd	0+64(%rbp),%ymm6,%ymm6
+	vpaddd	0+96(%rbp),%ymm10,%ymm10
+	vpaddd	0+224(%rbp),%ymm14,%ymm14
+	vpaddd	L$chacha20_consts(%rip),%ymm1,%ymm1
+	vpaddd	0+64(%rbp),%ymm5,%ymm5
+	vpaddd	0+96(%rbp),%ymm9,%ymm9
+	vpaddd	0+192(%rbp),%ymm13,%ymm13
+	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	0+64(%rbp),%ymm4,%ymm4
+	vpaddd	0+96(%rbp),%ymm8,%ymm8
+	vpaddd	0+160(%rbp),%ymm12,%ymm12
 
-	vmovdqa	%ymm0,128(%rbp)
-	addq	60*8(%rsi),%r10
+	vmovdqa	%ymm0,0+128(%rbp)
+	addq	0+60*8(%rsi),%r10
 	adcq	8+60*8(%rsi),%r11
 	adcq	$1,%r12
 	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
@@ -4575,24 +4551,24 @@
 	vmovdqu	%ymm7,64+0(%rdi)
 	vmovdqu	%ymm11,96+0(%rdi)
 
-	vmovdqa	128(%rbp),%ymm0
-	movq	0+0(%rbp),%rax
+	vmovdqa	0+128(%rbp),%ymm0
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -4608,9 +4584,8 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
@@ -4626,7 +4601,7 @@
 	vmovdqu	%ymm2,32+128(%rdi)
 	vmovdqu	%ymm6,64+128(%rdi)
 	vmovdqu	%ymm10,96+128(%rdi)
-	addq	60*8+16(%rsi),%r10
+	addq	0+60*8+16(%rsi),%r10
 	adcq	8+60*8+16(%rsi),%r11
 	adcq	$1,%r12
 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
@@ -4641,23 +4616,23 @@
 	vmovdqu	%ymm1,32+256(%rdi)
 	vmovdqu	%ymm5,64+256(%rdi)
 	vmovdqu	%ymm9,96+256(%rdi)
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -4673,9 +4648,8 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
@@ -4695,47 +4669,51 @@
 	leaq	512(%rsi),%rsi
 	leaq	512(%rdi),%rdi
 	subq	$512,%rbx
-	jmp	1b
-3:
+	jmp	L$open_avx2_main_loop
+L$open_avx2_main_loop_done:
 	testq	%rbx,%rbx
 	vzeroupper
-	je	open_sse_finalize
-3:
+	je	L$open_sse_finalize
+
+	cmpq	$384,%rbx
+	ja	L$open_avx2_tail_512
+	cmpq	$256,%rbx
+	ja	L$open_avx2_tail_384
 	cmpq	$128,%rbx
-	ja	3f
-	vmovdqa	.chacha20_consts(%rip),%ymm0
-	vmovdqa	64(%rbp),%ymm4
-	vmovdqa	96(%rbp),%ymm8
-	vmovdqa	.avx2_inc(%rip),%ymm12
-	vpaddd	160(%rbp),%ymm12,%ymm12
-	vmovdqa	%ymm12,160(%rbp)
+	ja	L$open_avx2_tail_256
+	vmovdqa	L$chacha20_consts(%rip),%ymm0
+	vmovdqa	0+64(%rbp),%ymm4
+	vmovdqa	0+96(%rbp),%ymm8
+	vmovdqa	L$avx2_inc(%rip),%ymm12
+	vpaddd	0+160(%rbp),%ymm12,%ymm12
+	vmovdqa	%ymm12,0+160(%rbp)
 
 	xorq	%r8,%r8
 	movq	%rbx,%rcx
 	andq	$-16,%rcx
 	testq	%rcx,%rcx
-	je	2f
-1:
-	addq	0*8(%rsi,%r8), %r10
-	adcq	8+0*8(%rsi,%r8), %r11
+	je	L$open_avx2_tail_128_rounds
+L$open_avx2_tail_128_rounds_and_x1hash:
+	addq	0+0(%rsi,%r8,1),%r10
+	adcq	8+0(%rsi,%r8,1),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -4751,18 +4729,17 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
-2:
+L$open_avx2_tail_128_rounds:
 	addq	$16,%r8
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpshufb	L$rol16(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpsrld	$20,%ymm4,%ymm3
@@ -4770,7 +4747,7 @@
 	vpxor	%ymm3,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpshufb	L$rol8(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpslld	$7,%ymm4,%ymm3
@@ -4781,7 +4758,7 @@
 	vpalignr	$4,%ymm4,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpshufb	L$rol16(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpsrld	$20,%ymm4,%ymm3
@@ -4789,7 +4766,7 @@
 	vpxor	%ymm3,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpshufb	L$rol8(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpslld	$7,%ymm4,%ymm3
@@ -4800,36 +4777,35 @@
 	vpalignr	$12,%ymm4,%ymm4,%ymm4
 
 	cmpq	%rcx,%r8
-	jb	1b
+	jb	L$open_avx2_tail_128_rounds_and_x1hash
 	cmpq	$160,%r8
-	jne	2b
-	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
-	vpaddd	64(%rbp),%ymm4,%ymm4
-	vpaddd	96(%rbp),%ymm8,%ymm8
-	vpaddd	160(%rbp),%ymm12,%ymm12
+	jne	L$open_avx2_tail_128_rounds
+	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	0+64(%rbp),%ymm4,%ymm4
+	vpaddd	0+96(%rbp),%ymm8,%ymm8
+	vpaddd	0+160(%rbp),%ymm12,%ymm12
 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
 	vmovdqa	%ymm3,%ymm8
 
-	jmp	open_avx2_tail_loop
-3:
-	cmpq	$256,%rbx
-	ja	3f
-	vmovdqa	.chacha20_consts(%rip),%ymm0
-	vmovdqa	64(%rbp),%ymm4
-	vmovdqa	96(%rbp),%ymm8
+	jmp	L$open_avx2_tail_128_xor
+
+L$open_avx2_tail_256:
+	vmovdqa	L$chacha20_consts(%rip),%ymm0
+	vmovdqa	0+64(%rbp),%ymm4
+	vmovdqa	0+96(%rbp),%ymm8
 	vmovdqa	%ymm0,%ymm1
 	vmovdqa	%ymm4,%ymm5
 	vmovdqa	%ymm8,%ymm9
-	vmovdqa	.avx2_inc(%rip),%ymm12
-	vpaddd	160(%rbp),%ymm12,%ymm13
+	vmovdqa	L$avx2_inc(%rip),%ymm12
+	vpaddd	0+160(%rbp),%ymm12,%ymm13
 	vpaddd	%ymm13,%ymm12,%ymm12
-	vmovdqa	%ymm12,160(%rbp)
-	vmovdqa	%ymm13,192(%rbp)
+	vmovdqa	%ymm12,0+160(%rbp)
+	vmovdqa	%ymm13,0+192(%rbp)
 
-	movq	%rbx,128(%rbp)
+	movq	%rbx,0+128(%rbp)
 	movq	%rbx,%rcx
 	subq	$128,%rcx
 	shrq	$4,%rcx
@@ -4838,18 +4814,18 @@
 	cmovgq	%r8,%rcx
 	movq	%rsi,%rbx
 	xorq	%r8,%r8
-1:
-	addq	0(%rbx),%r10
+L$open_avx2_tail_256_rounds_and_x1hash:
+	addq	0+0(%rbx),%r10
 	adcq	8+0(%rbx),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rdx
+	movq	0+0+0(%rbp),%rdx
 	movq	%rdx,%r15
 	mulxq	%r10,%r13,%r14
 	mulxq	%r11,%rax,%rdx
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rdx
+	movq	8+0+0(%rbp),%rdx
 	mulxq	%r10,%r10,%rax
 	addq	%r10,%r14
 	mulxq	%r11,%r11,%r9
@@ -4867,18 +4843,17 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	16(%rbx),%rbx
-2:
+L$open_avx2_tail_256_rounds:
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpshufb	L$rol16(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpsrld	$20,%ymm4,%ymm3
@@ -4886,7 +4861,7 @@
 	vpxor	%ymm3,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpshufb	L$rol8(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpslld	$7,%ymm4,%ymm3
@@ -4897,7 +4872,7 @@
 	vpalignr	$4,%ymm4,%ymm4,%ymm4
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpshufb	L$rol16(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpsrld	$20,%ymm5,%ymm3
@@ -4905,7 +4880,7 @@
 	vpxor	%ymm3,%ymm5,%ymm5
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpshufb	L$rol8(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpslld	$7,%ymm5,%ymm3
@@ -4918,7 +4893,7 @@
 	incq	%r8
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpshufb	L$rol16(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpsrld	$20,%ymm4,%ymm3
@@ -4926,7 +4901,7 @@
 	vpxor	%ymm3,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpshufb	L$rol8(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpslld	$7,%ymm4,%ymm3
@@ -4937,7 +4912,7 @@
 	vpalignr	$12,%ymm4,%ymm4,%ymm4
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpshufb	L$rol16(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpsrld	$20,%ymm5,%ymm3
@@ -4945,7 +4920,7 @@
 	vpxor	%ymm3,%ymm5,%ymm5
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpshufb	L$rol8(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpslld	$7,%ymm5,%ymm3
@@ -4956,7 +4931,7 @@
 	vpalignr	$12,%ymm5,%ymm5,%ymm5
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpxor	%ymm2,%ymm14,%ymm14
-	vpshufb	.rol16(%rip),%ymm14,%ymm14
+	vpshufb	L$rol16(%rip),%ymm14,%ymm14
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpsrld	$20,%ymm6,%ymm3
@@ -4964,7 +4939,7 @@
 	vpxor	%ymm3,%ymm6,%ymm6
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpxor	%ymm2,%ymm14,%ymm14
-	vpshufb	.rol8(%rip),%ymm14,%ymm14
+	vpshufb	L$rol8(%rip),%ymm14,%ymm14
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpslld	$7,%ymm6,%ymm3
@@ -4975,28 +4950,28 @@
 	vpalignr	$12,%ymm6,%ymm6,%ymm6
 
 	cmpq	%rcx,%r8
-	jb	1b
+	jb	L$open_avx2_tail_256_rounds_and_x1hash
 	cmpq	$10,%r8
-	jne	2b
+	jne	L$open_avx2_tail_256_rounds
 	movq	%rbx,%r8
 	subq	%rsi,%rbx
 	movq	%rbx,%rcx
-	movq	128(%rbp),%rbx
-1:
+	movq	0+128(%rbp),%rbx
+L$open_avx2_tail_256_hash:
 	addq	$16,%rcx
 	cmpq	%rbx,%rcx
-	jg	1f
-	addq	0(%r8),%r10
+	jg	L$open_avx2_tail_256_done
+	addq	0+0(%r8),%r10
 	adcq	8+0(%r8),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rdx
+	movq	0+0+0(%rbp),%rdx
 	movq	%rdx,%r15
 	mulxq	%r10,%r13,%r14
 	mulxq	%r11,%rax,%rdx
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rdx
+	movq	8+0+0(%rbp),%rdx
 	mulxq	%r10,%r10,%rax
 	addq	%r10,%r14
 	mulxq	%r11,%r11,%r9
@@ -5014,24 +4989,23 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	16(%r8),%r8
-	jmp	1b
-1:
-	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
-	vpaddd	64(%rbp),%ymm5,%ymm5
-	vpaddd	96(%rbp),%ymm9,%ymm9
-	vpaddd	192(%rbp),%ymm13,%ymm13
-	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
-	vpaddd	64(%rbp),%ymm4,%ymm4
-	vpaddd	96(%rbp),%ymm8,%ymm8
-	vpaddd	160(%rbp),%ymm12,%ymm12
+	jmp	L$open_avx2_tail_256_hash
+L$open_avx2_tail_256_done:
+	vpaddd	L$chacha20_consts(%rip),%ymm1,%ymm1
+	vpaddd	0+64(%rbp),%ymm5,%ymm5
+	vpaddd	0+96(%rbp),%ymm9,%ymm9
+	vpaddd	0+192(%rbp),%ymm13,%ymm13
+	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	0+64(%rbp),%ymm4,%ymm4
+	vpaddd	0+96(%rbp),%ymm8,%ymm8
+	vpaddd	0+160(%rbp),%ymm12,%ymm12
 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
@@ -5053,28 +5027,27 @@
 	leaq	128(%rsi),%rsi
 	leaq	128(%rdi),%rdi
 	subq	$128,%rbx
-	jmp	open_avx2_tail_loop
-3:
-	cmpq	$384,%rbx
-	ja	3f
-	vmovdqa	.chacha20_consts(%rip),%ymm0
-	vmovdqa	64(%rbp),%ymm4
-	vmovdqa	96(%rbp),%ymm8
+	jmp	L$open_avx2_tail_128_xor
+
+L$open_avx2_tail_384:
+	vmovdqa	L$chacha20_consts(%rip),%ymm0
+	vmovdqa	0+64(%rbp),%ymm4
+	vmovdqa	0+96(%rbp),%ymm8
 	vmovdqa	%ymm0,%ymm1
 	vmovdqa	%ymm4,%ymm5
 	vmovdqa	%ymm8,%ymm9
 	vmovdqa	%ymm0,%ymm2
 	vmovdqa	%ymm4,%ymm6
 	vmovdqa	%ymm8,%ymm10
-	vmovdqa	.avx2_inc(%rip),%ymm12
-	vpaddd	160(%rbp),%ymm12,%ymm14
+	vmovdqa	L$avx2_inc(%rip),%ymm12
+	vpaddd	0+160(%rbp),%ymm12,%ymm14
 	vpaddd	%ymm14,%ymm12,%ymm13
 	vpaddd	%ymm13,%ymm12,%ymm12
-	vmovdqa	%ymm12,160(%rbp)
-	vmovdqa	%ymm13,192(%rbp)
-	vmovdqa	%ymm14,224(%rbp)
+	vmovdqa	%ymm12,0+160(%rbp)
+	vmovdqa	%ymm13,0+192(%rbp)
+	vmovdqa	%ymm14,0+224(%rbp)
 
-	movq	%rbx,128(%rbp)
+	movq	%rbx,0+128(%rbp)
 	movq	%rbx,%rcx
 	subq	$256,%rcx
 	shrq	$4,%rcx
@@ -5084,18 +5057,18 @@
 	cmovgq	%r8,%rcx
 	movq	%rsi,%rbx
 	xorq	%r8,%r8
-1:
-	addq	0(%rbx),%r10
+L$open_avx2_tail_384_rounds_and_x2hash:
+	addq	0+0(%rbx),%r10
 	adcq	8+0(%rbx),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rdx
+	movq	0+0+0(%rbp),%rdx
 	movq	%rdx,%r15
 	mulxq	%r10,%r13,%r14
 	mulxq	%r11,%rax,%rdx
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rdx
+	movq	8+0+0(%rbp),%rdx
 	mulxq	%r10,%r10,%rax
 	addq	%r10,%r14
 	mulxq	%r11,%r11,%r9
@@ -5113,18 +5086,17 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	16(%rbx),%rbx
-2:
+L$open_avx2_tail_384_rounds_and_x1hash:
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpxor	%ymm2,%ymm14,%ymm14
-	vpshufb	.rol16(%rip),%ymm14,%ymm14
+	vpshufb	L$rol16(%rip),%ymm14,%ymm14
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpsrld	$20,%ymm6,%ymm3
@@ -5132,7 +5104,7 @@
 	vpxor	%ymm3,%ymm6,%ymm6
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpxor	%ymm2,%ymm14,%ymm14
-	vpshufb	.rol8(%rip),%ymm14,%ymm14
+	vpshufb	L$rol8(%rip),%ymm14,%ymm14
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpslld	$7,%ymm6,%ymm3
@@ -5143,7 +5115,7 @@
 	vpalignr	$4,%ymm6,%ymm6,%ymm6
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpshufb	L$rol16(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpsrld	$20,%ymm5,%ymm3
@@ -5151,7 +5123,7 @@
 	vpxor	%ymm3,%ymm5,%ymm5
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpshufb	L$rol8(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpslld	$7,%ymm5,%ymm3
@@ -5162,7 +5134,7 @@
 	vpalignr	$4,%ymm5,%ymm5,%ymm5
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpshufb	L$rol16(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpsrld	$20,%ymm4,%ymm3
@@ -5170,7 +5142,7 @@
 	vpxor	%ymm3,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpshufb	L$rol8(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpslld	$7,%ymm4,%ymm3
@@ -5179,26 +5151,26 @@
 	vpalignr	$12,%ymm12,%ymm12,%ymm12
 	vpalignr	$8,%ymm8,%ymm8,%ymm8
 	vpalignr	$4,%ymm4,%ymm4,%ymm4
-	addq	0(%rbx),%r10
+	addq	0+0(%rbx),%r10
 	adcq	8+0(%rbx),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -5214,9 +5186,8 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
@@ -5225,7 +5196,7 @@
 	incq	%r8
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpxor	%ymm2,%ymm14,%ymm14
-	vpshufb	.rol16(%rip),%ymm14,%ymm14
+	vpshufb	L$rol16(%rip),%ymm14,%ymm14
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpsrld	$20,%ymm6,%ymm3
@@ -5233,7 +5204,7 @@
 	vpxor	%ymm3,%ymm6,%ymm6
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpxor	%ymm2,%ymm14,%ymm14
-	vpshufb	.rol8(%rip),%ymm14,%ymm14
+	vpshufb	L$rol8(%rip),%ymm14,%ymm14
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpslld	$7,%ymm6,%ymm3
@@ -5244,7 +5215,7 @@
 	vpalignr	$12,%ymm6,%ymm6,%ymm6
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpshufb	L$rol16(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpsrld	$20,%ymm5,%ymm3
@@ -5252,7 +5223,7 @@
 	vpxor	%ymm3,%ymm5,%ymm5
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpshufb	L$rol8(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpslld	$7,%ymm5,%ymm3
@@ -5263,7 +5234,7 @@
 	vpalignr	$12,%ymm5,%ymm5,%ymm5
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpshufb	L$rol16(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpsrld	$20,%ymm4,%ymm3
@@ -5271,7 +5242,7 @@
 	vpxor	%ymm3,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpshufb	L$rol8(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpslld	$7,%ymm4,%ymm3
@@ -5282,28 +5253,28 @@
 	vpalignr	$12,%ymm4,%ymm4,%ymm4
 
 	cmpq	%rcx,%r8
-	jb	1b
+	jb	L$open_avx2_tail_384_rounds_and_x2hash
 	cmpq	$10,%r8
-	jne	2b
+	jne	L$open_avx2_tail_384_rounds_and_x1hash
 	movq	%rbx,%r8
 	subq	%rsi,%rbx
 	movq	%rbx,%rcx
-	movq	128(%rbp),%rbx
-1:
+	movq	0+128(%rbp),%rbx
+L$open_avx2_384_tail_hash:
 	addq	$16,%rcx
 	cmpq	%rbx,%rcx
-	jg	1f
-	addq	0(%r8),%r10
+	jg	L$open_avx2_384_tail_done
+	addq	0+0(%r8),%r10
 	adcq	8+0(%r8),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rdx
+	movq	0+0+0(%rbp),%rdx
 	movq	%rdx,%r15
 	mulxq	%r10,%r13,%r14
 	mulxq	%r11,%rax,%rdx
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rdx
+	movq	8+0+0(%rbp),%rdx
 	mulxq	%r10,%r10,%rax
 	addq	%r10,%r14
 	mulxq	%r11,%r11,%r9
@@ -5321,28 +5292,27 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	16(%r8),%r8
-	jmp	1b
-1:
-	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
-	vpaddd	64(%rbp),%ymm6,%ymm6
-	vpaddd	96(%rbp),%ymm10,%ymm10
-	vpaddd	224(%rbp),%ymm14,%ymm14
-	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
-	vpaddd	64(%rbp),%ymm5,%ymm5
-	vpaddd	96(%rbp),%ymm9,%ymm9
-	vpaddd	192(%rbp),%ymm13,%ymm13
-	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
-	vpaddd	64(%rbp),%ymm4,%ymm4
-	vpaddd	96(%rbp),%ymm8,%ymm8
-	vpaddd	160(%rbp),%ymm12,%ymm12
+	jmp	L$open_avx2_384_tail_hash
+L$open_avx2_384_tail_done:
+	vpaddd	L$chacha20_consts(%rip),%ymm2,%ymm2
+	vpaddd	0+64(%rbp),%ymm6,%ymm6
+	vpaddd	0+96(%rbp),%ymm10,%ymm10
+	vpaddd	0+224(%rbp),%ymm14,%ymm14
+	vpaddd	L$chacha20_consts(%rip),%ymm1,%ymm1
+	vpaddd	0+64(%rbp),%ymm5,%ymm5
+	vpaddd	0+96(%rbp),%ymm9,%ymm9
+	vpaddd	0+192(%rbp),%ymm13,%ymm13
+	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	0+64(%rbp),%ymm4,%ymm4
+	vpaddd	0+96(%rbp),%ymm8,%ymm8
+	vpaddd	0+160(%rbp),%ymm12,%ymm12
 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
@@ -5376,11 +5346,12 @@
 	leaq	256(%rsi),%rsi
 	leaq	256(%rdi),%rdi
 	subq	$256,%rbx
-	jmp	open_avx2_tail_loop
-3:
-	vmovdqa	.chacha20_consts(%rip),%ymm0
-	vmovdqa	64(%rbp),%ymm4
-	vmovdqa	96(%rbp),%ymm8
+	jmp	L$open_avx2_tail_128_xor
+
+L$open_avx2_tail_512:
+	vmovdqa	L$chacha20_consts(%rip),%ymm0
+	vmovdqa	0+64(%rbp),%ymm4
+	vmovdqa	0+96(%rbp),%ymm8
 	vmovdqa	%ymm0,%ymm1
 	vmovdqa	%ymm4,%ymm5
 	vmovdqa	%ymm8,%ymm9
@@ -5390,39 +5361,39 @@
 	vmovdqa	%ymm0,%ymm3
 	vmovdqa	%ymm4,%ymm7
 	vmovdqa	%ymm8,%ymm11
-	vmovdqa	.avx2_inc(%rip),%ymm12
-	vpaddd	160(%rbp),%ymm12,%ymm15
+	vmovdqa	L$avx2_inc(%rip),%ymm12
+	vpaddd	0+160(%rbp),%ymm12,%ymm15
 	vpaddd	%ymm15,%ymm12,%ymm14
 	vpaddd	%ymm14,%ymm12,%ymm13
 	vpaddd	%ymm13,%ymm12,%ymm12
-	vmovdqa	%ymm15,256(%rbp)
-	vmovdqa	%ymm14,224(%rbp)
-	vmovdqa	%ymm13,192(%rbp)
-	vmovdqa	%ymm12,160(%rbp)
+	vmovdqa	%ymm15,0+256(%rbp)
+	vmovdqa	%ymm14,0+224(%rbp)
+	vmovdqa	%ymm13,0+192(%rbp)
+	vmovdqa	%ymm12,0+160(%rbp)
 
 	xorq	%rcx,%rcx
 	movq	%rsi,%r8
-1:
-	addq	0(%r8),%r10
+L$open_avx2_tail_512_rounds_and_x2hash:
+	addq	0+0(%r8),%r10
 	adcq	8+0(%r8),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -5438,17 +5409,16 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	16(%r8),%r8
-2:
-	vmovdqa	%ymm8,128(%rbp)
-	vmovdqa	.rol16(%rip),%ymm8
+L$open_avx2_tail_512_rounds_and_x1hash:
+	vmovdqa	%ymm8,0+128(%rbp)
+	vmovdqa	L$rol16(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
@@ -5461,16 +5431,15 @@
 	vpshufb	%ymm8,%ymm14,%ymm14
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
 	vpxor	%ymm11,%ymm7,%ymm7
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
 	vpsrld	$20,%ymm7,%ymm8
 	vpslld	$32-20,%ymm7,%ymm7
 	vpxor	%ymm8,%ymm7,%ymm7
@@ -5483,18 +5452,19 @@
 	vpsrld	$20,%ymm4,%ymm8
 	vpslld	$32-20,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	.rol8(%rip),%ymm8
-	addq	0(%r8),%r10
+	vmovdqa	L$rol8(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	addq	0+0(%r8),%r10
 	adcq	8+0(%r8),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rdx
+	movq	0+0+0(%rbp),%rdx
 	movq	%rdx,%r15
 	mulxq	%r10,%r13,%r14
 	mulxq	%r11,%rax,%rdx
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rdx
+	movq	8+0+0(%rbp),%rdx
 	mulxq	%r10,%r10,%rax
 	addq	%r10,%r14
 	mulxq	%r11,%r11,%r9
@@ -5512,13 +5482,11 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
-	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpaddd	%ymm4,%ymm0,%ymm0
@@ -5530,16 +5498,15 @@
 	vpshufb	%ymm8,%ymm14,%ymm14
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
 	vpxor	%ymm11,%ymm7,%ymm7
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
 	vpsrld	$25,%ymm7,%ymm8
 	vpslld	$32-25,%ymm7,%ymm7
 	vpxor	%ymm8,%ymm7,%ymm7
@@ -5552,7 +5519,7 @@
 	vpsrld	$25,%ymm4,%ymm8
 	vpslld	$32-25,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	128(%rbp),%ymm8
+	vmovdqa	0+128(%rbp),%ymm8
 	vpalignr	$4,%ymm7,%ymm7,%ymm7
 	vpalignr	$8,%ymm11,%ymm11,%ymm11
 	vpalignr	$12,%ymm15,%ymm15,%ymm15
@@ -5565,18 +5532,20 @@
 	vpalignr	$4,%ymm4,%ymm4,%ymm4
 	vpalignr	$8,%ymm8,%ymm8,%ymm8
 	vpalignr	$12,%ymm12,%ymm12,%ymm12
-	vmovdqa	%ymm8,128(%rbp)
-	addq	16(%r8),%r10
+	vmovdqa	%ymm8,0+128(%rbp)
+	vmovdqa	L$rol16(%rip),%ymm8
+	vpaddd	%ymm7,%ymm3,%ymm3
+	addq	0+16(%r8),%r10
 	adcq	8+16(%r8),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rdx
+	movq	0+0+0(%rbp),%rdx
 	movq	%rdx,%r15
 	mulxq	%r10,%r13,%r14
 	mulxq	%r11,%rax,%rdx
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rdx
+	movq	8+0+0(%rbp),%rdx
 	mulxq	%r10,%r10,%rax
 	addq	%r10,%r14
 	mulxq	%r11,%r11,%r9
@@ -5594,16 +5563,13 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	32(%r8),%r8
-	vmovdqa	.rol16(%rip),%ymm8
-	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpaddd	%ymm4,%ymm0,%ymm0
@@ -5615,16 +5581,15 @@
 	vpshufb	%ymm8,%ymm14,%ymm14
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
 	vpxor	%ymm11,%ymm7,%ymm7
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
 	vpsrld	$20,%ymm7,%ymm8
 	vpslld	$32-20,%ymm7,%ymm7
 	vpxor	%ymm8,%ymm7,%ymm7
@@ -5637,7 +5602,7 @@
 	vpsrld	$20,%ymm4,%ymm8
 	vpslld	$32-20,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	.rol8(%rip),%ymm8
+	vmovdqa	L$rol8(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
@@ -5650,16 +5615,15 @@
 	vpshufb	%ymm8,%ymm14,%ymm14
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
 	vpxor	%ymm11,%ymm7,%ymm7
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
 	vpsrld	$25,%ymm7,%ymm8
 	vpslld	$32-25,%ymm7,%ymm7
 	vpxor	%ymm8,%ymm7,%ymm7
@@ -5672,7 +5636,7 @@
 	vpsrld	$25,%ymm4,%ymm8
 	vpslld	$32-25,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	128(%rbp),%ymm8
+	vmovdqa	0+128(%rbp),%ymm8
 	vpalignr	$12,%ymm7,%ymm7,%ymm7
 	vpalignr	$8,%ymm11,%ymm11,%ymm11
 	vpalignr	$4,%ymm15,%ymm15,%ymm15
@@ -5688,26 +5652,26 @@
 
 	incq	%rcx
 	cmpq	$4,%rcx
-	jl	1b
+	jl	L$open_avx2_tail_512_rounds_and_x2hash
 	cmpq	$10,%rcx
-	jne	2b
+	jne	L$open_avx2_tail_512_rounds_and_x1hash
 	movq	%rbx,%rcx
 	subq	$384,%rcx
 	andq	$-16,%rcx
-1:
+L$open_avx2_tail_512_hash:
 	testq	%rcx,%rcx
-	je	1f
-	addq	0(%r8),%r10
+	je	L$open_avx2_tail_512_done
+	addq	0+0(%r8),%r10
 	adcq	8+0(%r8),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rdx
+	movq	0+0+0(%rbp),%rdx
 	movq	%rdx,%r15
 	mulxq	%r10,%r13,%r14
 	mulxq	%r11,%rax,%rdx
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rdx
+	movq	8+0+0(%rbp),%rdx
 	mulxq	%r10,%r10,%rax
 	addq	%r10,%r14
 	mulxq	%r11,%r11,%r9
@@ -5725,35 +5689,34 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	16(%r8),%r8
 	subq	$16,%rcx
-	jmp	1b
-1:
-	vpaddd	.chacha20_consts(%rip),%ymm3,%ymm3
-	vpaddd	64(%rbp),%ymm7,%ymm7
-	vpaddd	96(%rbp),%ymm11,%ymm11
-	vpaddd	256(%rbp),%ymm15,%ymm15
-	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
-	vpaddd	64(%rbp),%ymm6,%ymm6
-	vpaddd	96(%rbp),%ymm10,%ymm10
-	vpaddd	224(%rbp),%ymm14,%ymm14
-	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
-	vpaddd	64(%rbp),%ymm5,%ymm5
-	vpaddd	96(%rbp),%ymm9,%ymm9
-	vpaddd	192(%rbp),%ymm13,%ymm13
-	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
-	vpaddd	64(%rbp),%ymm4,%ymm4
-	vpaddd	96(%rbp),%ymm8,%ymm8
-	vpaddd	160(%rbp),%ymm12,%ymm12
+	jmp	L$open_avx2_tail_512_hash
+L$open_avx2_tail_512_done:
+	vpaddd	L$chacha20_consts(%rip),%ymm3,%ymm3
+	vpaddd	0+64(%rbp),%ymm7,%ymm7
+	vpaddd	0+96(%rbp),%ymm11,%ymm11
+	vpaddd	0+256(%rbp),%ymm15,%ymm15
+	vpaddd	L$chacha20_consts(%rip),%ymm2,%ymm2
+	vpaddd	0+64(%rbp),%ymm6,%ymm6
+	vpaddd	0+96(%rbp),%ymm10,%ymm10
+	vpaddd	0+224(%rbp),%ymm14,%ymm14
+	vpaddd	L$chacha20_consts(%rip),%ymm1,%ymm1
+	vpaddd	0+64(%rbp),%ymm5,%ymm5
+	vpaddd	0+96(%rbp),%ymm9,%ymm9
+	vpaddd	0+192(%rbp),%ymm13,%ymm13
+	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	0+64(%rbp),%ymm4,%ymm4
+	vpaddd	0+96(%rbp),%ymm8,%ymm8
+	vpaddd	0+160(%rbp),%ymm12,%ymm12
 
-	vmovdqa	%ymm0,128(%rbp)
+	vmovdqa	%ymm0,0+128(%rbp)
 	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
 	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
 	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
@@ -5767,7 +5730,7 @@
 	vmovdqu	%ymm7,64+0(%rdi)
 	vmovdqu	%ymm11,96+0(%rdi)
 
-	vmovdqa	128(%rbp),%ymm0
+	vmovdqa	0+128(%rbp),%ymm0
 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
@@ -5801,9 +5764,9 @@
 	leaq	384(%rsi),%rsi
 	leaq	384(%rdi),%rdi
 	subq	$384,%rbx
-open_avx2_tail_loop:
+L$open_avx2_tail_128_xor:
 	cmpq	$32,%rbx
-	jb	open_avx2_tail
+	jb	L$open_avx2_tail_32_xor
 	subq	$32,%rbx
 	vpxor	(%rsi),%ymm0,%ymm0
 	vmovdqu	%ymm0,(%rdi)
@@ -5812,11 +5775,11 @@
 	vmovdqa	%ymm4,%ymm0
 	vmovdqa	%ymm8,%ymm4
 	vmovdqa	%ymm12,%ymm8
-	jmp	open_avx2_tail_loop
-open_avx2_tail:
+	jmp	L$open_avx2_tail_128_xor
+L$open_avx2_tail_32_xor:
 	cmpq	$16,%rbx
 	vmovdqa	%xmm0,%xmm1
-	jb	1f
+	jb	L$open_avx2_exit
 	subq	$16,%rbx
 
 	vpxor	(%rsi),%xmm0,%xmm1
@@ -5825,25 +5788,25 @@
 	leaq	16(%rdi),%rdi
 	vperm2i128	$0x11,%ymm0,%ymm0,%ymm0
 	vmovdqa	%xmm0,%xmm1
-1:
+L$open_avx2_exit:
 	vzeroupper
-	jmp	open_sse_tail_16
+	jmp	L$open_sse_tail_16
 
-open_avx2_192:
+L$open_avx2_192:
 	vmovdqa	%ymm0,%ymm1
 	vmovdqa	%ymm0,%ymm2
 	vmovdqa	%ymm4,%ymm5
 	vmovdqa	%ymm4,%ymm6
 	vmovdqa	%ymm8,%ymm9
 	vmovdqa	%ymm8,%ymm10
-	vpaddd	.avx2_inc(%rip),%ymm12,%ymm13
+	vpaddd	L$avx2_inc(%rip),%ymm12,%ymm13
 	vmovdqa	%ymm12,%ymm11
 	vmovdqa	%ymm13,%ymm15
 	movq	$10,%r10
-1:
+L$open_avx2_192_rounds:
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpshufb	L$rol16(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpsrld	$20,%ymm4,%ymm3
@@ -5851,7 +5814,7 @@
 	vpxor	%ymm3,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpshufb	L$rol8(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpslld	$7,%ymm4,%ymm3
@@ -5862,7 +5825,7 @@
 	vpalignr	$4,%ymm4,%ymm4,%ymm4
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpshufb	L$rol16(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpsrld	$20,%ymm5,%ymm3
@@ -5870,7 +5833,7 @@
 	vpxor	%ymm3,%ymm5,%ymm5
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpshufb	L$rol8(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpslld	$7,%ymm5,%ymm3
@@ -5881,7 +5844,7 @@
 	vpalignr	$4,%ymm5,%ymm5,%ymm5
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpshufb	L$rol16(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpsrld	$20,%ymm4,%ymm3
@@ -5889,7 +5852,7 @@
 	vpxor	%ymm3,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpshufb	L$rol8(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpslld	$7,%ymm4,%ymm3
@@ -5900,7 +5863,7 @@
 	vpalignr	$12,%ymm4,%ymm4,%ymm4
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpshufb	L$rol16(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpsrld	$20,%ymm5,%ymm3
@@ -5908,7 +5871,7 @@
 	vpxor	%ymm3,%ymm5,%ymm5
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpshufb	L$rol8(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpslld	$7,%ymm5,%ymm3
@@ -5919,7 +5882,7 @@
 	vpalignr	$12,%ymm5,%ymm5,%ymm5
 
 	decq	%r10
-	jne	1b
+	jne	L$open_avx2_192_rounds
 	vpaddd	%ymm2,%ymm0,%ymm0
 	vpaddd	%ymm2,%ymm1,%ymm1
 	vpaddd	%ymm6,%ymm4,%ymm4
@@ -5930,8 +5893,8 @@
 	vpaddd	%ymm15,%ymm13,%ymm13
 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
 
-	vpand	.clamp(%rip),%ymm3,%ymm3
-	vmovdqa	%ymm3,0(%rbp)
+	vpand	L$clamp(%rip),%ymm3,%ymm3
+	vmovdqa	%ymm3,0+0(%rbp)
 
 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
@@ -5939,33 +5902,33 @@
 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
-open_avx2_short:
+L$open_avx2_short:
 	movq	%r8,%r8
 	call	poly_hash_ad_internal
-open_avx2_hash_and_xor_loop:
+L$open_avx2_short_hash_and_xor_loop:
 	cmpq	$32,%rbx
-	jb	open_avx2_short_tail_32
+	jb	L$open_avx2_short_tail_32
 	subq	$32,%rbx
-	addq	0(%rsi),%r10
+	addq	0+0(%rsi),%r10
 	adcq	8+0(%rsi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -5981,32 +5944,31 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
-	addq	16(%rsi),%r10
+	addq	0+16(%rsi),%r10
 	adcq	8+16(%rsi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -6022,9 +5984,8 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
@@ -6044,32 +6005,32 @@
 	vmovdqa	%ymm13,%ymm9
 	vmovdqa	%ymm2,%ymm13
 	vmovdqa	%ymm6,%ymm2
-	jmp	open_avx2_hash_and_xor_loop
-open_avx2_short_tail_32:
+	jmp	L$open_avx2_short_hash_and_xor_loop
+L$open_avx2_short_tail_32:
 	cmpq	$16,%rbx
 	vmovdqa	%xmm0,%xmm1
-	jb	1f
+	jb	L$open_avx2_short_tail_32_exit
 	subq	$16,%rbx
-	addq	0(%rsi),%r10
+	addq	0+0(%rsi),%r10
 	adcq	8+0(%rsi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -6085,9 +6046,8 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
@@ -6097,29 +6057,29 @@
 	leaq	16(%rsi),%rsi
 	leaq	16(%rdi),%rdi
 	vextracti128	$1,%ymm0,%xmm1
-1:
+L$open_avx2_short_tail_32_exit:
 	vzeroupper
-	jmp	open_sse_tail_16
+	jmp	L$open_sse_tail_16
 
-open_avx2_320:
+L$open_avx2_320:
 	vmovdqa	%ymm0,%ymm1
 	vmovdqa	%ymm0,%ymm2
 	vmovdqa	%ymm4,%ymm5
 	vmovdqa	%ymm4,%ymm6
 	vmovdqa	%ymm8,%ymm9
 	vmovdqa	%ymm8,%ymm10
-	vpaddd	.avx2_inc(%rip),%ymm12,%ymm13
-	vpaddd	.avx2_inc(%rip),%ymm13,%ymm14
+	vpaddd	L$avx2_inc(%rip),%ymm12,%ymm13
+	vpaddd	L$avx2_inc(%rip),%ymm13,%ymm14
 	vmovdqa	%ymm4,%ymm7
 	vmovdqa	%ymm8,%ymm11
-	vmovdqa	%ymm12,160(%rbp)
-	vmovdqa	%ymm13,192(%rbp)
-	vmovdqa	%ymm14,224(%rbp)
+	vmovdqa	%ymm12,0+160(%rbp)
+	vmovdqa	%ymm13,0+192(%rbp)
+	vmovdqa	%ymm14,0+224(%rbp)
 	movq	$10,%r10
-1:
+L$open_avx2_320_rounds:
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpshufb	L$rol16(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpsrld	$20,%ymm4,%ymm3
@@ -6127,7 +6087,7 @@
 	vpxor	%ymm3,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpshufb	L$rol8(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpslld	$7,%ymm4,%ymm3
@@ -6138,7 +6098,7 @@
 	vpalignr	$4,%ymm4,%ymm4,%ymm4
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpshufb	L$rol16(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpsrld	$20,%ymm5,%ymm3
@@ -6146,7 +6106,7 @@
 	vpxor	%ymm3,%ymm5,%ymm5
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpshufb	L$rol8(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpslld	$7,%ymm5,%ymm3
@@ -6157,7 +6117,7 @@
 	vpalignr	$4,%ymm5,%ymm5,%ymm5
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpxor	%ymm2,%ymm14,%ymm14
-	vpshufb	.rol16(%rip),%ymm14,%ymm14
+	vpshufb	L$rol16(%rip),%ymm14,%ymm14
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpsrld	$20,%ymm6,%ymm3
@@ -6165,7 +6125,7 @@
 	vpxor	%ymm3,%ymm6,%ymm6
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpxor	%ymm2,%ymm14,%ymm14
-	vpshufb	.rol8(%rip),%ymm14,%ymm14
+	vpshufb	L$rol8(%rip),%ymm14,%ymm14
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpslld	$7,%ymm6,%ymm3
@@ -6176,7 +6136,7 @@
 	vpalignr	$4,%ymm6,%ymm6,%ymm6
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpshufb	L$rol16(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpsrld	$20,%ymm4,%ymm3
@@ -6184,7 +6144,7 @@
 	vpxor	%ymm3,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpshufb	L$rol8(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpslld	$7,%ymm4,%ymm3
@@ -6195,7 +6155,7 @@
 	vpalignr	$12,%ymm4,%ymm4,%ymm4
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpshufb	L$rol16(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpsrld	$20,%ymm5,%ymm3
@@ -6203,7 +6163,7 @@
 	vpxor	%ymm3,%ymm5,%ymm5
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpshufb	L$rol8(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpslld	$7,%ymm5,%ymm3
@@ -6214,7 +6174,7 @@
 	vpalignr	$12,%ymm5,%ymm5,%ymm5
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpxor	%ymm2,%ymm14,%ymm14
-	vpshufb	.rol16(%rip),%ymm14,%ymm14
+	vpshufb	L$rol16(%rip),%ymm14,%ymm14
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpsrld	$20,%ymm6,%ymm3
@@ -6222,7 +6182,7 @@
 	vpxor	%ymm3,%ymm6,%ymm6
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpxor	%ymm2,%ymm14,%ymm14
-	vpshufb	.rol8(%rip),%ymm14,%ymm14
+	vpshufb	L$rol8(%rip),%ymm14,%ymm14
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpslld	$7,%ymm6,%ymm3
@@ -6233,23 +6193,23 @@
 	vpalignr	$12,%ymm6,%ymm6,%ymm6
 
 	decq	%r10
-	jne	1b
-	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
-	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
-	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
+	jne	L$open_avx2_320_rounds
+	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	L$chacha20_consts(%rip),%ymm1,%ymm1
+	vpaddd	L$chacha20_consts(%rip),%ymm2,%ymm2
 	vpaddd	%ymm7,%ymm4,%ymm4
 	vpaddd	%ymm7,%ymm5,%ymm5
 	vpaddd	%ymm7,%ymm6,%ymm6
 	vpaddd	%ymm11,%ymm8,%ymm8
 	vpaddd	%ymm11,%ymm9,%ymm9
 	vpaddd	%ymm11,%ymm10,%ymm10
-	vpaddd	160(%rbp),%ymm12,%ymm12
-	vpaddd	192(%rbp),%ymm13,%ymm13
-	vpaddd	224(%rbp),%ymm14,%ymm14
+	vpaddd	0+160(%rbp),%ymm12,%ymm12
+	vpaddd	0+192(%rbp),%ymm13,%ymm13
+	vpaddd	0+224(%rbp),%ymm14,%ymm14
 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
 
-	vpand	.clamp(%rip),%ymm3,%ymm3
-	vmovdqa	%ymm3,0(%rbp)
+	vpand	L$clamp(%rip),%ymm3,%ymm3
+	vmovdqa	%ymm3,0+0(%rbp)
 
 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
@@ -6261,46 +6221,59 @@
 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm13
 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm2
 	vperm2i128	$0x13,%ymm10,%ymm14,%ymm6
-	jmp	open_avx2_short
+	jmp	L$open_avx2_short
+
 
 
 
 
 .p2align	6
 chacha20_poly1305_seal_avx2:
+
+
+
+
+
+
+
+
+
+
+
+
 	vzeroupper
-	vmovdqa	.chacha20_consts(%rip),%ymm0
+	vmovdqa	L$chacha20_consts(%rip),%ymm0
 	vbroadcasti128	0(%r9),%ymm4
 	vbroadcasti128	16(%r9),%ymm8
 	vbroadcasti128	32(%r9),%ymm12
-	vpaddd	.avx2_init(%rip),%ymm12,%ymm12
+	vpaddd	L$avx2_init(%rip),%ymm12,%ymm12
 	cmpq	$192,%rbx
-	jbe	seal_avx2_192
+	jbe	L$seal_avx2_192
 	cmpq	$320,%rbx
-	jbe	seal_avx2_320
+	jbe	L$seal_avx2_320
 	vmovdqa	%ymm0,%ymm1
 	vmovdqa	%ymm0,%ymm2
 	vmovdqa	%ymm0,%ymm3
 	vmovdqa	%ymm4,%ymm5
 	vmovdqa	%ymm4,%ymm6
 	vmovdqa	%ymm4,%ymm7
-	vmovdqa	%ymm4,64(%rbp)
+	vmovdqa	%ymm4,0+64(%rbp)
 	vmovdqa	%ymm8,%ymm9
 	vmovdqa	%ymm8,%ymm10
 	vmovdqa	%ymm8,%ymm11
-	vmovdqa	%ymm8,96(%rbp)
+	vmovdqa	%ymm8,0+96(%rbp)
 	vmovdqa	%ymm12,%ymm15
-	vpaddd	.avx2_inc(%rip),%ymm15,%ymm14
-	vpaddd	.avx2_inc(%rip),%ymm14,%ymm13
-	vpaddd	.avx2_inc(%rip),%ymm13,%ymm12
-	vmovdqa	%ymm12,160(%rbp)
-	vmovdqa	%ymm13,192(%rbp)
-	vmovdqa	%ymm14,224(%rbp)
-	vmovdqa	%ymm15,256(%rbp)
+	vpaddd	L$avx2_inc(%rip),%ymm15,%ymm14
+	vpaddd	L$avx2_inc(%rip),%ymm14,%ymm13
+	vpaddd	L$avx2_inc(%rip),%ymm13,%ymm12
+	vmovdqa	%ymm12,0+160(%rbp)
+	vmovdqa	%ymm13,0+192(%rbp)
+	vmovdqa	%ymm14,0+224(%rbp)
+	vmovdqa	%ymm15,0+256(%rbp)
 	movq	$10,%r10
-1:
-	vmovdqa	%ymm8,128(%rbp)
-	vmovdqa	.rol16(%rip),%ymm8
+L$seal_avx2_init_rounds:
+	vmovdqa	%ymm8,0+128(%rbp)
+	vmovdqa	L$rol16(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
@@ -6313,16 +6286,15 @@
 	vpshufb	%ymm8,%ymm14,%ymm14
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
 	vpxor	%ymm11,%ymm7,%ymm7
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
 	vpsrld	$20,%ymm7,%ymm8
 	vpslld	$32-20,%ymm7,%ymm7
 	vpxor	%ymm8,%ymm7,%ymm7
@@ -6335,7 +6307,7 @@
 	vpsrld	$20,%ymm4,%ymm8
 	vpslld	$32-20,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	.rol8(%rip),%ymm8
+	vmovdqa	L$rol8(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
@@ -6348,16 +6320,15 @@
 	vpshufb	%ymm8,%ymm14,%ymm14
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
 	vpxor	%ymm11,%ymm7,%ymm7
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
 	vpsrld	$25,%ymm7,%ymm8
 	vpslld	$32-25,%ymm7,%ymm7
 	vpxor	%ymm8,%ymm7,%ymm7
@@ -6370,7 +6341,7 @@
 	vpsrld	$25,%ymm4,%ymm8
 	vpslld	$32-25,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	128(%rbp),%ymm8
+	vmovdqa	0+128(%rbp),%ymm8
 	vpalignr	$4,%ymm7,%ymm7,%ymm7
 	vpalignr	$8,%ymm11,%ymm11,%ymm11
 	vpalignr	$12,%ymm15,%ymm15,%ymm15
@@ -6383,8 +6354,8 @@
 	vpalignr	$4,%ymm4,%ymm4,%ymm4
 	vpalignr	$8,%ymm8,%ymm8,%ymm8
 	vpalignr	$12,%ymm12,%ymm12,%ymm12
-	vmovdqa	%ymm8,128(%rbp)
-	vmovdqa	.rol16(%rip),%ymm8
+	vmovdqa	%ymm8,0+128(%rbp)
+	vmovdqa	L$rol16(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
@@ -6397,16 +6368,15 @@
 	vpshufb	%ymm8,%ymm14,%ymm14
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
 	vpxor	%ymm11,%ymm7,%ymm7
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
 	vpsrld	$20,%ymm7,%ymm8
 	vpslld	$32-20,%ymm7,%ymm7
 	vpxor	%ymm8,%ymm7,%ymm7
@@ -6419,7 +6389,7 @@
 	vpsrld	$20,%ymm4,%ymm8
 	vpslld	$32-20,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	.rol8(%rip),%ymm8
+	vmovdqa	L$rol8(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
@@ -6432,16 +6402,15 @@
 	vpshufb	%ymm8,%ymm14,%ymm14
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
 	vpxor	%ymm11,%ymm7,%ymm7
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
 	vpsrld	$25,%ymm7,%ymm8
 	vpslld	$32-25,%ymm7,%ymm7
 	vpxor	%ymm8,%ymm7,%ymm7
@@ -6454,7 +6423,7 @@
 	vpsrld	$25,%ymm4,%ymm8
 	vpslld	$32-25,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	128(%rbp),%ymm8
+	vmovdqa	0+128(%rbp),%ymm8
 	vpalignr	$12,%ymm7,%ymm7,%ymm7
 	vpalignr	$8,%ymm11,%ymm11,%ymm11
 	vpalignr	$4,%ymm15,%ymm15,%ymm15
@@ -6469,29 +6438,29 @@
 	vpalignr	$4,%ymm12,%ymm12,%ymm12
 
 	decq	%r10
-	jnz	1b
-	vpaddd	.chacha20_consts(%rip),%ymm3,%ymm3
-	vpaddd	64(%rbp),%ymm7,%ymm7
-	vpaddd	96(%rbp),%ymm11,%ymm11
-	vpaddd	256(%rbp),%ymm15,%ymm15
-	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
-	vpaddd	64(%rbp),%ymm6,%ymm6
-	vpaddd	96(%rbp),%ymm10,%ymm10
-	vpaddd	224(%rbp),%ymm14,%ymm14
-	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
-	vpaddd	64(%rbp),%ymm5,%ymm5
-	vpaddd	96(%rbp),%ymm9,%ymm9
-	vpaddd	192(%rbp),%ymm13,%ymm13
-	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
-	vpaddd	64(%rbp),%ymm4,%ymm4
-	vpaddd	96(%rbp),%ymm8,%ymm8
-	vpaddd	160(%rbp),%ymm12,%ymm12
+	jnz	L$seal_avx2_init_rounds
+	vpaddd	L$chacha20_consts(%rip),%ymm3,%ymm3
+	vpaddd	0+64(%rbp),%ymm7,%ymm7
+	vpaddd	0+96(%rbp),%ymm11,%ymm11
+	vpaddd	0+256(%rbp),%ymm15,%ymm15
+	vpaddd	L$chacha20_consts(%rip),%ymm2,%ymm2
+	vpaddd	0+64(%rbp),%ymm6,%ymm6
+	vpaddd	0+96(%rbp),%ymm10,%ymm10
+	vpaddd	0+224(%rbp),%ymm14,%ymm14
+	vpaddd	L$chacha20_consts(%rip),%ymm1,%ymm1
+	vpaddd	0+64(%rbp),%ymm5,%ymm5
+	vpaddd	0+96(%rbp),%ymm9,%ymm9
+	vpaddd	0+192(%rbp),%ymm13,%ymm13
+	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	0+64(%rbp),%ymm4,%ymm4
+	vpaddd	0+96(%rbp),%ymm8,%ymm8
+	vpaddd	0+160(%rbp),%ymm12,%ymm12
 
 	vperm2i128	$0x13,%ymm11,%ymm15,%ymm11
 	vperm2i128	$0x02,%ymm3,%ymm7,%ymm15
 	vperm2i128	$0x13,%ymm3,%ymm7,%ymm3
-	vpand	.clamp(%rip),%ymm15,%ymm15
-	vmovdqa	%ymm15,0(%rbp)
+	vpand	L$clamp(%rip),%ymm15,%ymm15
+	vmovdqa	%ymm15,0+0(%rbp)
 	movq	%r8,%r8
 	call	poly_hash_ad_internal
 
@@ -6533,7 +6502,7 @@
 	subq	$320,%rbx
 	movq	$320,%rcx
 	cmpq	$128,%rbx
-	jbe	seal_avx2_hash
+	jbe	L$seal_avx2_short_hash_remainder
 	vpxor	0(%rsi),%ymm0,%ymm0
 	vpxor	32(%rsi),%ymm4,%ymm4
 	vpxor	64(%rsi),%ymm8,%ymm8
@@ -6547,16 +6516,16 @@
 	movq	$8,%rcx
 	movq	$2,%r8
 	cmpq	$128,%rbx
-	jbe	seal_avx2_tail_128
+	jbe	L$seal_avx2_tail_128
 	cmpq	$256,%rbx
-	jbe	seal_avx2_tail_256
+	jbe	L$seal_avx2_tail_256
 	cmpq	$384,%rbx
-	jbe	seal_avx2_tail_384
+	jbe	L$seal_avx2_tail_384
 	cmpq	$512,%rbx
-	jbe	seal_avx2_tail_512
-	vmovdqa	.chacha20_consts(%rip),%ymm0
-	vmovdqa	64(%rbp),%ymm4
-	vmovdqa	96(%rbp),%ymm8
+	jbe	L$seal_avx2_tail_512
+	vmovdqa	L$chacha20_consts(%rip),%ymm0
+	vmovdqa	0+64(%rbp),%ymm4
+	vmovdqa	0+96(%rbp),%ymm8
 	vmovdqa	%ymm0,%ymm1
 	vmovdqa	%ymm4,%ymm5
 	vmovdqa	%ymm8,%ymm9
@@ -6566,17 +6535,17 @@
 	vmovdqa	%ymm0,%ymm3
 	vmovdqa	%ymm4,%ymm7
 	vmovdqa	%ymm8,%ymm11
-	vmovdqa	.avx2_inc(%rip),%ymm12
-	vpaddd	160(%rbp),%ymm12,%ymm15
+	vmovdqa	L$avx2_inc(%rip),%ymm12
+	vpaddd	0+160(%rbp),%ymm12,%ymm15
 	vpaddd	%ymm15,%ymm12,%ymm14
 	vpaddd	%ymm14,%ymm12,%ymm13
 	vpaddd	%ymm13,%ymm12,%ymm12
-	vmovdqa	%ymm15,256(%rbp)
-	vmovdqa	%ymm14,224(%rbp)
-	vmovdqa	%ymm13,192(%rbp)
-	vmovdqa	%ymm12,160(%rbp)
-	vmovdqa	%ymm8,128(%rbp)
-	vmovdqa	.rol16(%rip),%ymm8
+	vmovdqa	%ymm15,0+256(%rbp)
+	vmovdqa	%ymm14,0+224(%rbp)
+	vmovdqa	%ymm13,0+192(%rbp)
+	vmovdqa	%ymm12,0+160(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
+	vmovdqa	L$rol16(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
@@ -6589,16 +6558,15 @@
 	vpshufb	%ymm8,%ymm14,%ymm14
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
 	vpxor	%ymm11,%ymm7,%ymm7
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
 	vpsrld	$20,%ymm7,%ymm8
 	vpslld	$32-20,%ymm7,%ymm7
 	vpxor	%ymm8,%ymm7,%ymm7
@@ -6611,7 +6579,7 @@
 	vpsrld	$20,%ymm4,%ymm8
 	vpslld	$32-20,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	.rol8(%rip),%ymm8
+	vmovdqa	L$rol8(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
@@ -6624,16 +6592,15 @@
 	vpshufb	%ymm8,%ymm14,%ymm14
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
 	vpxor	%ymm11,%ymm7,%ymm7
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
 	vpsrld	$25,%ymm7,%ymm8
 	vpslld	$32-25,%ymm7,%ymm7
 	vpxor	%ymm8,%ymm7,%ymm7
@@ -6646,7 +6613,7 @@
 	vpsrld	$25,%ymm4,%ymm8
 	vpslld	$32-25,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	128(%rbp),%ymm8
+	vmovdqa	0+128(%rbp),%ymm8
 	vpalignr	$4,%ymm7,%ymm7,%ymm7
 	vpalignr	$8,%ymm11,%ymm11,%ymm11
 	vpalignr	$12,%ymm15,%ymm15,%ymm15
@@ -6659,8 +6626,8 @@
 	vpalignr	$4,%ymm4,%ymm4,%ymm4
 	vpalignr	$8,%ymm8,%ymm8,%ymm8
 	vpalignr	$12,%ymm12,%ymm12,%ymm12
-	vmovdqa	%ymm8,128(%rbp)
-	vmovdqa	.rol16(%rip),%ymm8
+	vmovdqa	%ymm8,0+128(%rbp)
+	vmovdqa	L$rol16(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
@@ -6673,16 +6640,15 @@
 	vpshufb	%ymm8,%ymm14,%ymm14
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
 	vpxor	%ymm11,%ymm7,%ymm7
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
 	vpsrld	$20,%ymm7,%ymm8
 	vpslld	$32-20,%ymm7,%ymm7
 	vpxor	%ymm8,%ymm7,%ymm7
@@ -6695,7 +6661,7 @@
 	vpsrld	$20,%ymm4,%ymm8
 	vpslld	$32-20,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	.rol8(%rip),%ymm8
+	vmovdqa	L$rol8(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
@@ -6708,16 +6674,15 @@
 	vpshufb	%ymm8,%ymm14,%ymm14
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
 	vpxor	%ymm11,%ymm7,%ymm7
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
 	vpsrld	$25,%ymm7,%ymm8
 	vpslld	$32-25,%ymm7,%ymm7
 	vpxor	%ymm8,%ymm7,%ymm7
@@ -6730,7 +6695,7 @@
 	vpsrld	$25,%ymm4,%ymm8
 	vpslld	$32-25,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	128(%rbp),%ymm8
+	vmovdqa	0+128(%rbp),%ymm8
 	vpalignr	$12,%ymm7,%ymm7,%ymm7
 	vpalignr	$8,%ymm11,%ymm11,%ymm11
 	vpalignr	$4,%ymm15,%ymm15,%ymm15
@@ -6743,8 +6708,8 @@
 	vpalignr	$12,%ymm4,%ymm4,%ymm4
 	vpalignr	$8,%ymm8,%ymm8,%ymm8
 	vpalignr	$4,%ymm12,%ymm12,%ymm12
-	vmovdqa	%ymm8,128(%rbp)
-	vmovdqa	.rol16(%rip),%ymm8
+	vmovdqa	%ymm8,0+128(%rbp)
+	vmovdqa	L$rol16(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
@@ -6757,16 +6722,15 @@
 	vpshufb	%ymm8,%ymm14,%ymm14
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
 	vpxor	%ymm11,%ymm7,%ymm7
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
 	vpsrld	$20,%ymm7,%ymm8
 	vpslld	$32-20,%ymm7,%ymm7
 	vpxor	%ymm8,%ymm7,%ymm7
@@ -6779,19 +6743,21 @@
 	vpsrld	$20,%ymm4,%ymm8
 	vpslld	$32-20,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	.rol8(%rip),%ymm8
+	vmovdqa	L$rol8(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
 
 	subq	$16,%rdi
 	movq	$9,%rcx
-	jmp	4f
-1:
-	vmovdqa	.chacha20_consts(%rip),%ymm0
-	vmovdqa	64(%rbp),%ymm4
-	vmovdqa	96(%rbp),%ymm8
+	jmp	L$seal_avx2_main_loop_rounds_entry
+.p2align	5
+L$seal_avx2_main_loop:
+	vmovdqa	L$chacha20_consts(%rip),%ymm0
+	vmovdqa	0+64(%rbp),%ymm4
+	vmovdqa	0+96(%rbp),%ymm8
 	vmovdqa	%ymm0,%ymm1
 	vmovdqa	%ymm4,%ymm5
 	vmovdqa	%ymm8,%ymm9
@@ -6801,23 +6767,24 @@
 	vmovdqa	%ymm0,%ymm3
 	vmovdqa	%ymm4,%ymm7
 	vmovdqa	%ymm8,%ymm11
-	vmovdqa	.avx2_inc(%rip),%ymm12
-	vpaddd	160(%rbp),%ymm12,%ymm15
+	vmovdqa	L$avx2_inc(%rip),%ymm12
+	vpaddd	0+160(%rbp),%ymm12,%ymm15
 	vpaddd	%ymm15,%ymm12,%ymm14
 	vpaddd	%ymm14,%ymm12,%ymm13
 	vpaddd	%ymm13,%ymm12,%ymm12
-	vmovdqa	%ymm15,256(%rbp)
-	vmovdqa	%ymm14,224(%rbp)
-	vmovdqa	%ymm13,192(%rbp)
-	vmovdqa	%ymm12,160(%rbp)
+	vmovdqa	%ymm15,0+256(%rbp)
+	vmovdqa	%ymm14,0+224(%rbp)
+	vmovdqa	%ymm13,0+192(%rbp)
+	vmovdqa	%ymm12,0+160(%rbp)
 
 	movq	$10,%rcx
-2:
-	addq	0(%rdi),%r10
+.p2align	5
+L$seal_avx2_main_loop_rounds:
+	addq	0+0(%rdi),%r10
 	adcq	8+0(%rdi),%r11
 	adcq	$1,%r12
-	vmovdqa	%ymm8,128(%rbp)
-	vmovdqa	.rol16(%rip),%ymm8
+	vmovdqa	%ymm8,0+128(%rbp)
+	vmovdqa	L$rol16(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
@@ -6826,7 +6793,7 @@
 	vpxor	%ymm2,%ymm14,%ymm14
 	vpxor	%ymm1,%ymm13,%ymm13
 	vpxor	%ymm0,%ymm12,%ymm12
-	movq	0+0(%rbp),%rdx
+	movq	0+0+0(%rbp),%rdx
 	movq	%rdx,%r15
 	mulxq	%r10,%r13,%r14
 	mulxq	%r11,%rax,%rdx
@@ -6837,23 +6804,22 @@
 	vpshufb	%ymm8,%ymm14,%ymm14
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
-	movq	8+0(%rbp),%rdx
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	movq	8+0+0(%rbp),%rdx
 	mulxq	%r10,%r10,%rax
 	addq	%r10,%r14
 	mulxq	%r11,%r11,%r9
 	adcq	%r11,%r15
 	adcq	$0,%r9
 	imulq	%r12,%rdx
-	vpxor	%ymm11,%ymm7,%ymm7
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
 	vpsrld	$20,%ymm7,%ymm8
 	vpslld	$32-20,%ymm7,%ymm7
 	vpxor	%ymm8,%ymm7,%ymm7
@@ -6861,18 +6827,19 @@
 	vpslld	$32-20,%ymm6,%ymm6
 	vpxor	%ymm8,%ymm6,%ymm6
 	vpsrld	$20,%ymm5,%ymm8
+	vpslld	$32-20,%ymm5,%ymm5
 	addq	%rax,%r15
 	adcq	%rdx,%r9
-	vpslld	$32-20,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm5,%ymm5
 	vpsrld	$20,%ymm4,%ymm8
 	vpslld	$32-20,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	.rol8(%rip),%ymm8
+	vmovdqa	L$rol8(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpaddd	%ymm4,%ymm0,%ymm0
+	vpxor	%ymm3,%ymm15,%ymm15
 	movq	%r13,%r10
 	movq	%r14,%r11
 	movq	%r15,%r12
@@ -6882,15 +6849,13 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
-4:
-	vpxor	%ymm3,%ymm15,%ymm15
+L$seal_avx2_main_loop_rounds_entry:
 	vpxor	%ymm2,%ymm14,%ymm14
 	vpxor	%ymm1,%ymm13,%ymm13
 	vpxor	%ymm0,%ymm12,%ymm12
@@ -6898,27 +6863,26 @@
 	vpshufb	%ymm8,%ymm14,%ymm14
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
-	addq	16(%rdi),%r10
-	adcq	8+16(%rdi),%r11
-	adcq	$1,%r12
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
+	addq	0+16(%rdi),%r10
+	adcq	8+16(%rdi),%r11
+	adcq	$1,%r12
 	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
 	vpxor	%ymm11,%ymm7,%ymm7
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	movq	0+0(%rbp),%rdx
+	vmovdqa	%ymm8,0+128(%rbp)
+	vpsrld	$25,%ymm7,%ymm8
+	movq	0+0+0(%rbp),%rdx
 	movq	%rdx,%r15
 	mulxq	%r10,%r13,%r14
 	mulxq	%r11,%rax,%rdx
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	vmovdqa	%ymm8,128(%rbp)
-	vpsrld	$25,%ymm7,%ymm8
 	vpslld	$32-25,%ymm7,%ymm7
 	vpxor	%ymm8,%ymm7,%ymm7
 	vpsrld	$25,%ymm6,%ymm8
@@ -6930,28 +6894,28 @@
 	vpsrld	$25,%ymm4,%ymm8
 	vpslld	$32-25,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	128(%rbp),%ymm8
+	vmovdqa	0+128(%rbp),%ymm8
 	vpalignr	$4,%ymm7,%ymm7,%ymm7
 	vpalignr	$8,%ymm11,%ymm11,%ymm11
 	vpalignr	$12,%ymm15,%ymm15,%ymm15
 	vpalignr	$4,%ymm6,%ymm6,%ymm6
-	movq	8+0(%rbp),%rdx
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$12,%ymm14,%ymm14,%ymm14
+	movq	8+0+0(%rbp),%rdx
 	mulxq	%r10,%r10,%rax
 	addq	%r10,%r14
 	mulxq	%r11,%r11,%r9
 	adcq	%r11,%r15
 	adcq	$0,%r9
 	imulq	%r12,%rdx
-	vpalignr	$8,%ymm10,%ymm10,%ymm10
-	vpalignr	$12,%ymm14,%ymm14,%ymm14
 	vpalignr	$4,%ymm5,%ymm5,%ymm5
 	vpalignr	$8,%ymm9,%ymm9,%ymm9
 	vpalignr	$12,%ymm13,%ymm13,%ymm13
 	vpalignr	$4,%ymm4,%ymm4,%ymm4
 	vpalignr	$8,%ymm8,%ymm8,%ymm8
 	vpalignr	$12,%ymm12,%ymm12,%ymm12
-	vmovdqa	%ymm8,128(%rbp)
-	vmovdqa	.rol16(%rip),%ymm8
+	vmovdqa	%ymm8,0+128(%rbp)
+	vmovdqa	L$rol16(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
@@ -6960,17 +6924,19 @@
 	vpxor	%ymm2,%ymm14,%ymm14
 	vpxor	%ymm1,%ymm13,%ymm13
 	vpxor	%ymm0,%ymm12,%ymm12
-	addq	%rax,%r15
-	adcq	%rdx,%r9
 	vpshufb	%ymm8,%ymm15,%ymm15
 	vpshufb	%ymm8,%ymm14,%ymm14
+	addq	%rax,%r15
+	adcq	%rdx,%r9
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
 	movq	%r13,%r10
 	movq	%r14,%r11
 	movq	%r15,%r12
@@ -6980,35 +6946,31 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
-	vpxor	%ymm11,%ymm7,%ymm7
-	vpxor	%ymm10,%ymm6,%ymm6
-	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
 	vpsrld	$20,%ymm7,%ymm8
 	vpslld	$32-20,%ymm7,%ymm7
 	vpxor	%ymm8,%ymm7,%ymm7
-	addq	32(%rdi),%r10
+	vpsrld	$20,%ymm6,%ymm8
+	vpslld	$32-20,%ymm6,%ymm6
+	vpxor	%ymm8,%ymm6,%ymm6
+	addq	0+32(%rdi),%r10
 	adcq	8+32(%rdi),%r11
 	adcq	$1,%r12
 
 	leaq	48(%rdi),%rdi
-	vpsrld	$20,%ymm6,%ymm8
-	vpslld	$32-20,%ymm6,%ymm6
-	vpxor	%ymm8,%ymm6,%ymm6
 	vpsrld	$20,%ymm5,%ymm8
 	vpslld	$32-20,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm5,%ymm5
 	vpsrld	$20,%ymm4,%ymm8
 	vpslld	$32-20,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	.rol8(%rip),%ymm8
+	vmovdqa	L$rol8(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
@@ -7017,49 +6979,48 @@
 	vpxor	%ymm2,%ymm14,%ymm14
 	vpxor	%ymm1,%ymm13,%ymm13
 	vpxor	%ymm0,%ymm12,%ymm12
-	movq	0+0(%rbp),%rdx
+	vpshufb	%ymm8,%ymm15,%ymm15
+	vpshufb	%ymm8,%ymm14,%ymm14
+	vpshufb	%ymm8,%ymm13,%ymm13
+	movq	0+0+0(%rbp),%rdx
 	movq	%rdx,%r15
 	mulxq	%r10,%r13,%r14
 	mulxq	%r11,%rax,%rdx
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	vpshufb	%ymm8,%ymm15,%ymm15
-	vpshufb	%ymm8,%ymm14,%ymm14
-	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpaddd	%ymm13,%ymm9,%ymm9
-	movq	8+0(%rbp),%rdx
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
+	vpxor	%ymm11,%ymm7,%ymm7
+	vpxor	%ymm10,%ymm6,%ymm6
+	vpxor	%ymm9,%ymm5,%ymm5
+	movq	8+0+0(%rbp),%rdx
 	mulxq	%r10,%r10,%rax
 	addq	%r10,%r14
 	mulxq	%r11,%r11,%r9
 	adcq	%r11,%r15
 	adcq	$0,%r9
 	imulq	%r12,%rdx
-	vpaddd	%ymm12,%ymm8,%ymm8
-	vpxor	%ymm11,%ymm7,%ymm7
-	vpxor	%ymm10,%ymm6,%ymm6
-	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
 	vpsrld	$25,%ymm7,%ymm8
 	vpslld	$32-25,%ymm7,%ymm7
-	addq	%rax,%r15
-	adcq	%rdx,%r9
 	vpxor	%ymm8,%ymm7,%ymm7
 	vpsrld	$25,%ymm6,%ymm8
 	vpslld	$32-25,%ymm6,%ymm6
 	vpxor	%ymm8,%ymm6,%ymm6
+	addq	%rax,%r15
+	adcq	%rdx,%r9
 	vpsrld	$25,%ymm5,%ymm8
 	vpslld	$32-25,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm5,%ymm5
 	vpsrld	$25,%ymm4,%ymm8
 	vpslld	$32-25,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	128(%rbp),%ymm8
+	vmovdqa	0+128(%rbp),%ymm8
 	vpalignr	$12,%ymm7,%ymm7,%ymm7
 	vpalignr	$8,%ymm11,%ymm11,%ymm11
 	vpalignr	$4,%ymm15,%ymm15,%ymm15
@@ -7067,6 +7028,10 @@
 	vpalignr	$8,%ymm10,%ymm10,%ymm10
 	vpalignr	$4,%ymm14,%ymm14,%ymm14
 	vpalignr	$12,%ymm5,%ymm5,%ymm5
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	vpalignr	$4,%ymm13,%ymm13,%ymm13
+	vpalignr	$12,%ymm4,%ymm4,%ymm4
+	vpalignr	$8,%ymm8,%ymm8,%ymm8
 	movq	%r13,%r10
 	movq	%r14,%r11
 	movq	%r15,%r12
@@ -7076,42 +7041,101 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
-	vpalignr	$8,%ymm9,%ymm9,%ymm9
-	vpalignr	$4,%ymm13,%ymm13,%ymm13
-	vpalignr	$12,%ymm4,%ymm4,%ymm4
-	vpalignr	$8,%ymm8,%ymm8,%ymm8
 	vpalignr	$4,%ymm12,%ymm12,%ymm12
 
 	decq	%rcx
-	jne	2b
-	vpaddd	.chacha20_consts(%rip),%ymm3,%ymm3
-	vpaddd	64(%rbp),%ymm7,%ymm7
-	vpaddd	96(%rbp),%ymm11,%ymm11
-	vpaddd	256(%rbp),%ymm15,%ymm15
-	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
-	vpaddd	64(%rbp),%ymm6,%ymm6
-	vpaddd	96(%rbp),%ymm10,%ymm10
-	vpaddd	224(%rbp),%ymm14,%ymm14
-	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
-	vpaddd	64(%rbp),%ymm5,%ymm5
-	vpaddd	96(%rbp),%ymm9,%ymm9
-	vpaddd	192(%rbp),%ymm13,%ymm13
-	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
-	vpaddd	64(%rbp),%ymm4,%ymm4
-	vpaddd	96(%rbp),%ymm8,%ymm8
-	vpaddd	160(%rbp),%ymm12,%ymm12
+	jne	L$seal_avx2_main_loop_rounds
+	vpaddd	L$chacha20_consts(%rip),%ymm3,%ymm3
+	vpaddd	0+64(%rbp),%ymm7,%ymm7
+	vpaddd	0+96(%rbp),%ymm11,%ymm11
+	vpaddd	0+256(%rbp),%ymm15,%ymm15
+	vpaddd	L$chacha20_consts(%rip),%ymm2,%ymm2
+	vpaddd	0+64(%rbp),%ymm6,%ymm6
+	vpaddd	0+96(%rbp),%ymm10,%ymm10
+	vpaddd	0+224(%rbp),%ymm14,%ymm14
+	vpaddd	L$chacha20_consts(%rip),%ymm1,%ymm1
+	vpaddd	0+64(%rbp),%ymm5,%ymm5
+	vpaddd	0+96(%rbp),%ymm9,%ymm9
+	vpaddd	0+192(%rbp),%ymm13,%ymm13
+	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	0+64(%rbp),%ymm4,%ymm4
+	vpaddd	0+96(%rbp),%ymm8,%ymm8
+	vpaddd	0+160(%rbp),%ymm12,%ymm12
+
+	vmovdqa	%ymm0,0+128(%rbp)
+	addq	0+0(%rdi),%r10
+	adcq	8+0(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
+	addq	%rax,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r15
+	adcq	%r14,%r9
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
+	addq	0+16(%rdi),%r10
+	adcq	8+16(%rdi),%r11
+	adcq	$1,%r12
+	movq	0+0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
+	imulq	%r12,%r15
+	addq	%rax,%r14
+	adcq	%rdx,%r15
+	movq	8+0+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
+	addq	%rax,%r15
+	adcq	%rdx,%r9
+	movq	%r13,%r10
+	movq	%r14,%r11
+	movq	%r15,%r12
+	andq	$3,%r12
+	movq	%r15,%r13
+	andq	$-4,%r13
+	movq	%r9,%r14
+	shrdq	$2,%r9,%r15
+	shrq	$2,%r9
+	addq	%r13,%r15
+	adcq	%r14,%r9
+	addq	%r15,%r10
+	adcq	%r9,%r11
+	adcq	$0,%r12
 
 	leaq	32(%rdi),%rdi
-	vmovdqa	%ymm0,128(%rbp)
-	addq	-32(%rdi),%r10
-	adcq	8+-32(%rdi),%r11
-	adcq	$1,%r12
 	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
 	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
 	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
@@ -7125,45 +7149,7 @@
 	vmovdqu	%ymm7,64+0(%rdi)
 	vmovdqu	%ymm11,96+0(%rdi)
 
-	vmovdqa	128(%rbp),%ymm0
-	movq	0+0(%rbp),%rax
-	movq	%rax,%r15
-	mulq	%r10
-	movq	%rax,%r13
-	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
-	mulq	%r11
-	imulq	%r12,%r15
-	addq	%rax,%r14
-	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
-	movq	%rax,%r9
-	mulq	%r10
-	addq	%rax,%r14
-	adcq	$0,%rdx
-	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
-	mulq	%r11
-	addq	%rax,%r15
-	adcq	$0,%rdx
-	imulq	%r12,%r9
-	addq	%r10,%r15
-	adcq	%rdx,%r9
-	movq	%r13,%r10
-	movq	%r14,%r11
-	movq	%r15,%r12
-	andq	$3,%r12
-	movq	%r15,%r13
-	andq	$-4,%r13
-	movq	%r9,%r14
-	shrdq	$2,%r9,%r15
-	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
-	addq	%r15,%r10
-	adcq	%r9,%r11
-	adcq	$0,%r12
+	vmovdqa	0+128(%rbp),%ymm0
 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
@@ -7176,9 +7162,6 @@
 	vmovdqu	%ymm2,32+128(%rdi)
 	vmovdqu	%ymm6,64+128(%rdi)
 	vmovdqu	%ymm10,96+128(%rdi)
-	addq	-16(%rdi),%r10
-	adcq	8+-16(%rdi),%r11
-	adcq	$1,%r12
 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
@@ -7191,44 +7174,6 @@
 	vmovdqu	%ymm1,32+256(%rdi)
 	vmovdqu	%ymm5,64+256(%rdi)
 	vmovdqu	%ymm9,96+256(%rdi)
-	movq	0+0(%rbp),%rax
-	movq	%rax,%r15
-	mulq	%r10
-	movq	%rax,%r13
-	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
-	mulq	%r11
-	imulq	%r12,%r15
-	addq	%rax,%r14
-	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
-	movq	%rax,%r9
-	mulq	%r10
-	addq	%rax,%r14
-	adcq	$0,%rdx
-	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
-	mulq	%r11
-	addq	%rax,%r15
-	adcq	$0,%rdx
-	imulq	%r12,%r9
-	addq	%r10,%r15
-	adcq	%rdx,%r9
-	movq	%r13,%r10
-	movq	%r14,%r11
-	movq	%r15,%r12
-	andq	$3,%r12
-	movq	%r15,%r13
-	andq	$-4,%r13
-	movq	%r9,%r14
-	shrdq	$2,%r9,%r15
-	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
-	addq	%r15,%r10
-	adcq	%r9,%r11
-	adcq	$0,%r12
 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm4
 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm0
@@ -7245,32 +7190,26 @@
 	leaq	512(%rsi),%rsi
 	subq	$512,%rbx
 	cmpq	$512,%rbx
-	jg	1b
-	addq	0(%rdi),%r10
+	jg	L$seal_avx2_main_loop
+
+	addq	0+0(%rdi),%r10
 	adcq	8+0(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
-	movq	%rax,%r15
-	mulq	%r10
-	movq	%rax,%r13
-	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
-	mulq	%r11
+	movq	0+0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
-	movq	%rax,%r9
-	mulq	%r10
-	addq	%rax,%r14
-	adcq	$0,%rdx
-	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
-	mulq	%r11
+	movq	8+0+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
 	addq	%rax,%r15
-	adcq	$0,%rdx
-	imulq	%r12,%r9
-	addq	%r10,%r15
 	adcq	%rdx,%r9
 	movq	%r13,%r10
 	movq	%r14,%r11
@@ -7281,37 +7220,29 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
-	addq	16(%rdi),%r10
+	addq	0+16(%rdi),%r10
 	adcq	8+16(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
-	movq	%rax,%r15
-	mulq	%r10
-	movq	%rax,%r13
-	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
-	mulq	%r11
+	movq	0+0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
-	movq	%rax,%r9
-	mulq	%r10
-	addq	%rax,%r14
-	adcq	$0,%rdx
-	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
-	mulq	%r11
+	movq	8+0+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
 	addq	%rax,%r15
-	adcq	$0,%rdx
-	imulq	%r12,%r9
-	addq	%r10,%r15
 	adcq	%rdx,%r9
 	movq	%r13,%r10
 	movq	%r14,%r11
@@ -7322,9 +7253,8 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
@@ -7332,43 +7262,41 @@
 	leaq	32(%rdi),%rdi
 	movq	$10,%rcx
 	xorq	%r8,%r8
+
+	cmpq	$384,%rbx
+	ja	L$seal_avx2_tail_512
+	cmpq	$256,%rbx
+	ja	L$seal_avx2_tail_384
 	cmpq	$128,%rbx
-	ja	3f
+	ja	L$seal_avx2_tail_256
 
-seal_avx2_tail_128:
-	vmovdqa	.chacha20_consts(%rip),%ymm0
-	vmovdqa	64(%rbp),%ymm4
-	vmovdqa	96(%rbp),%ymm8
-	vmovdqa	.avx2_inc(%rip),%ymm12
-	vpaddd	160(%rbp),%ymm12,%ymm12
-	vmovdqa	%ymm12,160(%rbp)
+L$seal_avx2_tail_128:
+	vmovdqa	L$chacha20_consts(%rip),%ymm0
+	vmovdqa	0+64(%rbp),%ymm4
+	vmovdqa	0+96(%rbp),%ymm8
+	vmovdqa	L$avx2_inc(%rip),%ymm12
+	vpaddd	0+160(%rbp),%ymm12,%ymm12
+	vmovdqa	%ymm12,0+160(%rbp)
 
-1:
-	addq	0(%rdi),%r10
+L$seal_avx2_tail_128_rounds_and_3xhash:
+	addq	0+0(%rdi),%r10
 	adcq	8+0(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
-	movq	%rax,%r15
-	mulq	%r10
-	movq	%rax,%r13
-	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
-	mulq	%r11
+	movq	0+0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
-	movq	%rax,%r9
-	mulq	%r10
-	addq	%rax,%r14
-	adcq	$0,%rdx
-	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
-	mulq	%r11
+	movq	8+0+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
 	addq	%rax,%r15
-	adcq	$0,%rdx
-	imulq	%r12,%r9
-	addq	%r10,%r15
 	adcq	%rdx,%r9
 	movq	%r13,%r10
 	movq	%r14,%r11
@@ -7379,18 +7307,17 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	16(%rdi),%rdi
-2:
+L$seal_avx2_tail_128_rounds_and_2xhash:
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpshufb	L$rol16(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpsrld	$20,%ymm4,%ymm3
@@ -7398,7 +7325,7 @@
 	vpxor	%ymm3,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpshufb	L$rol8(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpslld	$7,%ymm4,%ymm3
@@ -7407,31 +7334,24 @@
 	vpalignr	$12,%ymm12,%ymm12,%ymm12
 	vpalignr	$8,%ymm8,%ymm8,%ymm8
 	vpalignr	$4,%ymm4,%ymm4,%ymm4
-	addq	0(%rdi),%r10
+	addq	0+0(%rdi),%r10
 	adcq	8+0(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
-	movq	%rax,%r15
-	mulq	%r10
-	movq	%rax,%r13
-	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
-	mulq	%r11
+	movq	0+0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
-	movq	%rax,%r9
-	mulq	%r10
-	addq	%rax,%r14
-	adcq	$0,%rdx
-	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
-	mulq	%r11
+	movq	8+0+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
 	addq	%rax,%r15
-	adcq	$0,%rdx
-	imulq	%r12,%r9
-	addq	%r10,%r15
 	adcq	%rdx,%r9
 	movq	%r13,%r10
 	movq	%r14,%r11
@@ -7442,15 +7362,14 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpshufb	L$rol16(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpsrld	$20,%ymm4,%ymm3
@@ -7458,7 +7377,7 @@
 	vpxor	%ymm3,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpshufb	L$rol8(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpslld	$7,%ymm4,%ymm3
@@ -7467,31 +7386,24 @@
 	vpalignr	$4,%ymm12,%ymm12,%ymm12
 	vpalignr	$8,%ymm8,%ymm8,%ymm8
 	vpalignr	$12,%ymm4,%ymm4,%ymm4
-	addq	16(%rdi),%r10
+	addq	0+16(%rdi),%r10
 	adcq	8+16(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
-	movq	%rax,%r15
-	mulq	%r10
-	movq	%rax,%r13
-	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
-	mulq	%r11
+	movq	0+0+0(%rbp),%rdx
+	movq	%rdx,%r15
+	mulxq	%r10,%r13,%r14
+	mulxq	%r11,%rax,%rdx
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
-	movq	%rax,%r9
-	mulq	%r10
-	addq	%rax,%r14
-	adcq	$0,%rdx
-	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
-	mulq	%r11
+	movq	8+0+0(%rbp),%rdx
+	mulxq	%r10,%r10,%rax
+	addq	%r10,%r14
+	mulxq	%r11,%r11,%r9
+	adcq	%r11,%r15
+	adcq	$0,%r9
+	imulq	%r12,%rdx
 	addq	%rax,%r15
-	adcq	$0,%rdx
-	imulq	%r12,%r9
-	addq	%r10,%r15
 	adcq	%rdx,%r9
 	movq	%r13,%r10
 	movq	%r14,%r11
@@ -7502,67 +7414,63 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	32(%rdi),%rdi
 	decq	%rcx
-	jg	1b
+	jg	L$seal_avx2_tail_128_rounds_and_3xhash
 	decq	%r8
-	jge	2b
-	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
-	vpaddd	64(%rbp),%ymm4,%ymm4
-	vpaddd	96(%rbp),%ymm8,%ymm8
-	vpaddd	160(%rbp),%ymm12,%ymm12
+	jge	L$seal_avx2_tail_128_rounds_and_2xhash
+	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	0+64(%rbp),%ymm4,%ymm4
+	vpaddd	0+96(%rbp),%ymm8,%ymm8
+	vpaddd	0+160(%rbp),%ymm12,%ymm12
 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm3
 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm0
 	vperm2i128	$0x02,%ymm8,%ymm12,%ymm4
 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm12
 	vmovdqa	%ymm3,%ymm8
 
-	jmp	seal_avx2_short_loop
-3:
-	cmpq	$256,%rbx
-	ja	3f
+	jmp	L$seal_avx2_short_loop
 
-seal_avx2_tail_256:
-	vmovdqa	.chacha20_consts(%rip),%ymm0
-	vmovdqa	64(%rbp),%ymm4
-	vmovdqa	96(%rbp),%ymm8
+L$seal_avx2_tail_256:
+	vmovdqa	L$chacha20_consts(%rip),%ymm0
+	vmovdqa	0+64(%rbp),%ymm4
+	vmovdqa	0+96(%rbp),%ymm8
 	vmovdqa	%ymm0,%ymm1
 	vmovdqa	%ymm4,%ymm5
 	vmovdqa	%ymm8,%ymm9
-	vmovdqa	.avx2_inc(%rip),%ymm12
-	vpaddd	160(%rbp),%ymm12,%ymm13
+	vmovdqa	L$avx2_inc(%rip),%ymm12
+	vpaddd	0+160(%rbp),%ymm12,%ymm13
 	vpaddd	%ymm13,%ymm12,%ymm12
-	vmovdqa	%ymm12,160(%rbp)
-	vmovdqa	%ymm13,192(%rbp)
+	vmovdqa	%ymm12,0+160(%rbp)
+	vmovdqa	%ymm13,0+192(%rbp)
 
-1:
-	addq	0(%rdi),%r10
+L$seal_avx2_tail_256_rounds_and_3xhash:
+	addq	0+0(%rdi),%r10
 	adcq	8+0(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -7578,18 +7486,17 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	16(%rdi),%rdi
-2:
+L$seal_avx2_tail_256_rounds_and_2xhash:
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpshufb	L$rol16(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpsrld	$20,%ymm4,%ymm3
@@ -7597,7 +7504,7 @@
 	vpxor	%ymm3,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpshufb	L$rol8(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpslld	$7,%ymm4,%ymm3
@@ -7608,7 +7515,7 @@
 	vpalignr	$4,%ymm4,%ymm4,%ymm4
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpshufb	L$rol16(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpsrld	$20,%ymm5,%ymm3
@@ -7616,7 +7523,7 @@
 	vpxor	%ymm3,%ymm5,%ymm5
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpshufb	L$rol8(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpslld	$7,%ymm5,%ymm3
@@ -7625,26 +7532,26 @@
 	vpalignr	$12,%ymm13,%ymm13,%ymm13
 	vpalignr	$8,%ymm9,%ymm9,%ymm9
 	vpalignr	$4,%ymm5,%ymm5,%ymm5
-	addq	0(%rdi),%r10
+	addq	0+0(%rdi),%r10
 	adcq	8+0(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -7660,15 +7567,14 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpshufb	L$rol16(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpsrld	$20,%ymm4,%ymm3
@@ -7676,7 +7582,7 @@
 	vpxor	%ymm3,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpshufb	L$rol8(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpslld	$7,%ymm4,%ymm3
@@ -7687,7 +7593,7 @@
 	vpalignr	$12,%ymm4,%ymm4,%ymm4
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpshufb	L$rol16(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpsrld	$20,%ymm5,%ymm3
@@ -7695,7 +7601,7 @@
 	vpxor	%ymm3,%ymm5,%ymm5
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpshufb	L$rol8(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpslld	$7,%ymm5,%ymm3
@@ -7704,26 +7610,26 @@
 	vpalignr	$4,%ymm13,%ymm13,%ymm13
 	vpalignr	$8,%ymm9,%ymm9,%ymm9
 	vpalignr	$12,%ymm5,%ymm5,%ymm5
-	addq	16(%rdi),%r10
+	addq	0+16(%rdi),%r10
 	adcq	8+16(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -7739,26 +7645,25 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	32(%rdi),%rdi
 	decq	%rcx
-	jg	1b
+	jg	L$seal_avx2_tail_256_rounds_and_3xhash
 	decq	%r8
-	jge	2b
-	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
-	vpaddd	64(%rbp),%ymm5,%ymm5
-	vpaddd	96(%rbp),%ymm9,%ymm9
-	vpaddd	192(%rbp),%ymm13,%ymm13
-	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
-	vpaddd	64(%rbp),%ymm4,%ymm4
-	vpaddd	96(%rbp),%ymm8,%ymm8
-	vpaddd	160(%rbp),%ymm12,%ymm12
+	jge	L$seal_avx2_tail_256_rounds_and_2xhash
+	vpaddd	L$chacha20_consts(%rip),%ymm1,%ymm1
+	vpaddd	0+64(%rbp),%ymm5,%ymm5
+	vpaddd	0+96(%rbp),%ymm9,%ymm9
+	vpaddd	0+192(%rbp),%ymm13,%ymm13
+	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	0+64(%rbp),%ymm4,%ymm4
+	vpaddd	0+96(%rbp),%ymm8,%ymm8
+	vpaddd	0+160(%rbp),%ymm12,%ymm12
 	vperm2i128	$0x02,%ymm1,%ymm5,%ymm3
 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm5
 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm1
@@ -7780,50 +7685,47 @@
 	movq	$128,%rcx
 	leaq	128(%rsi),%rsi
 	subq	$128,%rbx
-	jmp	seal_avx2_hash
-3:
-	cmpq	$384,%rbx
-	ja	seal_avx2_tail_512
+	jmp	L$seal_avx2_short_hash_remainder
 
-seal_avx2_tail_384:
-	vmovdqa	.chacha20_consts(%rip),%ymm0
-	vmovdqa	64(%rbp),%ymm4
-	vmovdqa	96(%rbp),%ymm8
+L$seal_avx2_tail_384:
+	vmovdqa	L$chacha20_consts(%rip),%ymm0
+	vmovdqa	0+64(%rbp),%ymm4
+	vmovdqa	0+96(%rbp),%ymm8
 	vmovdqa	%ymm0,%ymm1
 	vmovdqa	%ymm4,%ymm5
 	vmovdqa	%ymm8,%ymm9
 	vmovdqa	%ymm0,%ymm2
 	vmovdqa	%ymm4,%ymm6
 	vmovdqa	%ymm8,%ymm10
-	vmovdqa	.avx2_inc(%rip),%ymm12
-	vpaddd	160(%rbp),%ymm12,%ymm14
+	vmovdqa	L$avx2_inc(%rip),%ymm12
+	vpaddd	0+160(%rbp),%ymm12,%ymm14
 	vpaddd	%ymm14,%ymm12,%ymm13
 	vpaddd	%ymm13,%ymm12,%ymm12
-	vmovdqa	%ymm12,160(%rbp)
-	vmovdqa	%ymm13,192(%rbp)
-	vmovdqa	%ymm14,224(%rbp)
+	vmovdqa	%ymm12,0+160(%rbp)
+	vmovdqa	%ymm13,0+192(%rbp)
+	vmovdqa	%ymm14,0+224(%rbp)
 
-1:
-	addq	0(%rdi),%r10
+L$seal_avx2_tail_384_rounds_and_3xhash:
+	addq	0+0(%rdi),%r10
 	adcq	8+0(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -7839,18 +7741,17 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	16(%rdi),%rdi
-2:
+L$seal_avx2_tail_384_rounds_and_2xhash:
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpshufb	L$rol16(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpsrld	$20,%ymm4,%ymm3
@@ -7858,7 +7759,7 @@
 	vpxor	%ymm3,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpshufb	L$rol8(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpslld	$7,%ymm4,%ymm3
@@ -7869,7 +7770,7 @@
 	vpalignr	$4,%ymm4,%ymm4,%ymm4
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpshufb	L$rol16(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpsrld	$20,%ymm5,%ymm3
@@ -7877,7 +7778,7 @@
 	vpxor	%ymm3,%ymm5,%ymm5
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpshufb	L$rol8(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpslld	$7,%ymm5,%ymm3
@@ -7886,26 +7787,26 @@
 	vpalignr	$12,%ymm13,%ymm13,%ymm13
 	vpalignr	$8,%ymm9,%ymm9,%ymm9
 	vpalignr	$4,%ymm5,%ymm5,%ymm5
-	addq	0(%rdi),%r10
+	addq	0+0(%rdi),%r10
 	adcq	8+0(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -7921,15 +7822,14 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpxor	%ymm2,%ymm14,%ymm14
-	vpshufb	.rol16(%rip),%ymm14,%ymm14
+	vpshufb	L$rol16(%rip),%ymm14,%ymm14
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpsrld	$20,%ymm6,%ymm3
@@ -7937,7 +7837,7 @@
 	vpxor	%ymm3,%ymm6,%ymm6
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpxor	%ymm2,%ymm14,%ymm14
-	vpshufb	.rol8(%rip),%ymm14,%ymm14
+	vpshufb	L$rol8(%rip),%ymm14,%ymm14
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpslld	$7,%ymm6,%ymm3
@@ -7948,7 +7848,7 @@
 	vpalignr	$4,%ymm6,%ymm6,%ymm6
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpshufb	L$rol16(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpsrld	$20,%ymm4,%ymm3
@@ -7956,7 +7856,7 @@
 	vpxor	%ymm3,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpshufb	L$rol8(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpslld	$7,%ymm4,%ymm3
@@ -7965,26 +7865,26 @@
 	vpalignr	$4,%ymm12,%ymm12,%ymm12
 	vpalignr	$8,%ymm8,%ymm8,%ymm8
 	vpalignr	$12,%ymm4,%ymm4,%ymm4
-	addq	16(%rdi),%r10
+	addq	0+16(%rdi),%r10
 	adcq	8+16(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -8000,15 +7900,14 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpshufb	L$rol16(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpsrld	$20,%ymm5,%ymm3
@@ -8016,7 +7915,7 @@
 	vpxor	%ymm3,%ymm5,%ymm5
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpshufb	L$rol8(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpslld	$7,%ymm5,%ymm3
@@ -8027,7 +7926,7 @@
 	vpalignr	$12,%ymm5,%ymm5,%ymm5
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpxor	%ymm2,%ymm14,%ymm14
-	vpshufb	.rol16(%rip),%ymm14,%ymm14
+	vpshufb	L$rol16(%rip),%ymm14,%ymm14
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpsrld	$20,%ymm6,%ymm3
@@ -8035,7 +7934,7 @@
 	vpxor	%ymm3,%ymm6,%ymm6
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpxor	%ymm2,%ymm14,%ymm14
-	vpshufb	.rol8(%rip),%ymm14,%ymm14
+	vpshufb	L$rol8(%rip),%ymm14,%ymm14
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpslld	$7,%ymm6,%ymm3
@@ -8047,21 +7946,21 @@
 
 	leaq	32(%rdi),%rdi
 	decq	%rcx
-	jg	1b
+	jg	L$seal_avx2_tail_384_rounds_and_3xhash
 	decq	%r8
-	jge	2b
-	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
-	vpaddd	64(%rbp),%ymm6,%ymm6
-	vpaddd	96(%rbp),%ymm10,%ymm10
-	vpaddd	224(%rbp),%ymm14,%ymm14
-	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
-	vpaddd	64(%rbp),%ymm5,%ymm5
-	vpaddd	96(%rbp),%ymm9,%ymm9
-	vpaddd	192(%rbp),%ymm13,%ymm13
-	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
-	vpaddd	64(%rbp),%ymm4,%ymm4
-	vpaddd	96(%rbp),%ymm8,%ymm8
-	vpaddd	160(%rbp),%ymm12,%ymm12
+	jge	L$seal_avx2_tail_384_rounds_and_2xhash
+	vpaddd	L$chacha20_consts(%rip),%ymm2,%ymm2
+	vpaddd	0+64(%rbp),%ymm6,%ymm6
+	vpaddd	0+96(%rbp),%ymm10,%ymm10
+	vpaddd	0+224(%rbp),%ymm14,%ymm14
+	vpaddd	L$chacha20_consts(%rip),%ymm1,%ymm1
+	vpaddd	0+64(%rbp),%ymm5,%ymm5
+	vpaddd	0+96(%rbp),%ymm9,%ymm9
+	vpaddd	0+192(%rbp),%ymm13,%ymm13
+	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	0+64(%rbp),%ymm4,%ymm4
+	vpaddd	0+96(%rbp),%ymm8,%ymm8
+	vpaddd	0+160(%rbp),%ymm12,%ymm12
 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
@@ -8095,12 +7994,12 @@
 	movq	$256,%rcx
 	leaq	256(%rsi),%rsi
 	subq	$256,%rbx
-	jmp	seal_avx2_hash
+	jmp	L$seal_avx2_short_hash_remainder
 
-seal_avx2_tail_512:
-	vmovdqa	.chacha20_consts(%rip),%ymm0
-	vmovdqa	64(%rbp),%ymm4
-	vmovdqa	96(%rbp),%ymm8
+L$seal_avx2_tail_512:
+	vmovdqa	L$chacha20_consts(%rip),%ymm0
+	vmovdqa	0+64(%rbp),%ymm4
+	vmovdqa	0+96(%rbp),%ymm8
 	vmovdqa	%ymm0,%ymm1
 	vmovdqa	%ymm4,%ymm5
 	vmovdqa	%ymm8,%ymm9
@@ -8110,28 +8009,28 @@
 	vmovdqa	%ymm0,%ymm3
 	vmovdqa	%ymm4,%ymm7
 	vmovdqa	%ymm8,%ymm11
-	vmovdqa	.avx2_inc(%rip),%ymm12
-	vpaddd	160(%rbp),%ymm12,%ymm15
+	vmovdqa	L$avx2_inc(%rip),%ymm12
+	vpaddd	0+160(%rbp),%ymm12,%ymm15
 	vpaddd	%ymm15,%ymm12,%ymm14
 	vpaddd	%ymm14,%ymm12,%ymm13
 	vpaddd	%ymm13,%ymm12,%ymm12
-	vmovdqa	%ymm15,256(%rbp)
-	vmovdqa	%ymm14,224(%rbp)
-	vmovdqa	%ymm13,192(%rbp)
-	vmovdqa	%ymm12,160(%rbp)
+	vmovdqa	%ymm15,0+256(%rbp)
+	vmovdqa	%ymm14,0+224(%rbp)
+	vmovdqa	%ymm13,0+192(%rbp)
+	vmovdqa	%ymm12,0+160(%rbp)
 
-1:
-	addq	0(%rdi),%r10
+L$seal_avx2_tail_512_rounds_and_3xhash:
+	addq	0+0(%rdi),%r10
 	adcq	8+0(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rdx
+	movq	0+0+0(%rbp),%rdx
 	movq	%rdx,%r15
 	mulxq	%r10,%r13,%r14
 	mulxq	%r11,%rax,%rdx
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rdx
+	movq	8+0+0(%rbp),%rdx
 	mulxq	%r10,%r10,%rax
 	addq	%r10,%r14
 	mulxq	%r11,%r11,%r9
@@ -8149,17 +8048,16 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	16(%rdi),%rdi
-2:
-	vmovdqa	%ymm8,128(%rbp)
-	vmovdqa	.rol16(%rip),%ymm8
+L$seal_avx2_tail_512_rounds_and_2xhash:
+	vmovdqa	%ymm8,0+128(%rbp)
+	vmovdqa	L$rol16(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
@@ -8172,19 +8070,18 @@
 	vpshufb	%ymm8,%ymm14,%ymm14
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
 	vpxor	%ymm11,%ymm7,%ymm7
-	addq	0(%rdi),%r10
+	vpxor	%ymm10,%ymm6,%ymm6
+	addq	0+0(%rdi),%r10
 	adcq	8+0(%rdi),%r11
 	adcq	$1,%r12
-	vpxor	%ymm10,%ymm6,%ymm6
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
 	vpsrld	$20,%ymm7,%ymm8
 	vpslld	$32-20,%ymm7,%ymm7
 	vpxor	%ymm8,%ymm7,%ymm7
@@ -8197,18 +8094,18 @@
 	vpsrld	$20,%ymm4,%ymm8
 	vpslld	$32-20,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	.rol8(%rip),%ymm8
+	vmovdqa	L$rol8(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
-	movq	0+0(%rbp),%rdx
+	vpaddd	%ymm4,%ymm0,%ymm0
+	movq	0+0+0(%rbp),%rdx
 	movq	%rdx,%r15
 	mulxq	%r10,%r13,%r14
 	mulxq	%r11,%rax,%rdx
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm3,%ymm15,%ymm15
 	vpxor	%ymm2,%ymm14,%ymm14
 	vpxor	%ymm1,%ymm13,%ymm13
@@ -8217,26 +8114,25 @@
 	vpshufb	%ymm8,%ymm14,%ymm14
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
 	vpxor	%ymm11,%ymm7,%ymm7
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
 	vpsrld	$25,%ymm7,%ymm8
-	movq	8+0(%rbp),%rdx
+	vpslld	$32-25,%ymm7,%ymm7
+	vpxor	%ymm8,%ymm7,%ymm7
+	movq	8+0+0(%rbp),%rdx
 	mulxq	%r10,%r10,%rax
 	addq	%r10,%r14
 	mulxq	%r11,%r11,%r9
 	adcq	%r11,%r15
 	adcq	$0,%r9
 	imulq	%r12,%rdx
-	vpslld	$32-25,%ymm7,%ymm7
-	vpxor	%ymm8,%ymm7,%ymm7
 	vpsrld	$25,%ymm6,%ymm8
 	vpslld	$32-25,%ymm6,%ymm6
 	vpxor	%ymm8,%ymm6,%ymm6
@@ -8246,7 +8142,7 @@
 	vpsrld	$25,%ymm4,%ymm8
 	vpslld	$32-25,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	128(%rbp),%ymm8
+	vmovdqa	0+128(%rbp),%ymm8
 	vpalignr	$4,%ymm7,%ymm7,%ymm7
 	vpalignr	$8,%ymm11,%ymm11,%ymm11
 	vpalignr	$12,%ymm15,%ymm15,%ymm15
@@ -8255,14 +8151,14 @@
 	vpalignr	$12,%ymm14,%ymm14,%ymm14
 	vpalignr	$4,%ymm5,%ymm5,%ymm5
 	vpalignr	$8,%ymm9,%ymm9,%ymm9
-	addq	%rax,%r15
-	adcq	%rdx,%r9
 	vpalignr	$12,%ymm13,%ymm13,%ymm13
 	vpalignr	$4,%ymm4,%ymm4,%ymm4
+	addq	%rax,%r15
+	adcq	%rdx,%r9
 	vpalignr	$8,%ymm8,%ymm8,%ymm8
 	vpalignr	$12,%ymm12,%ymm12,%ymm12
-	vmovdqa	%ymm8,128(%rbp)
-	vmovdqa	.rol16(%rip),%ymm8
+	vmovdqa	%ymm8,0+128(%rbp)
+	vmovdqa	L$rol16(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpaddd	%ymm5,%ymm1,%ymm1
@@ -8275,8 +8171,10 @@
 	vpshufb	%ymm8,%ymm14,%ymm14
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
+	vpaddd	%ymm14,%ymm10,%ymm10
+	vpaddd	%ymm13,%ymm9,%ymm9
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
 	movq	%r13,%r10
 	movq	%r14,%r11
 	movq	%r15,%r12
@@ -8286,20 +8184,16 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
-	vpaddd	%ymm14,%ymm10,%ymm10
-	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm11,%ymm7,%ymm7
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
+	vmovdqa	%ymm8,0+128(%rbp)
 	vpsrld	$20,%ymm7,%ymm8
 	vpslld	$32-20,%ymm7,%ymm7
 	vpxor	%ymm8,%ymm7,%ymm7
@@ -8312,12 +8206,12 @@
 	vpsrld	$20,%ymm4,%ymm8
 	vpslld	$32-20,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	addq	16(%rdi),%r10
-	adcq	8+16(%rdi),%r11
-	adcq	$1,%r12
-	vmovdqa	.rol8(%rip),%ymm8
+	vmovdqa	L$rol8(%rip),%ymm8
 	vpaddd	%ymm7,%ymm3,%ymm3
 	vpaddd	%ymm6,%ymm2,%ymm2
+	addq	0+16(%rdi),%r10
+	adcq	8+16(%rdi),%r11
+	adcq	$1,%r12
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm3,%ymm15,%ymm15
@@ -8328,24 +8222,23 @@
 	vpshufb	%ymm8,%ymm14,%ymm14
 	vpshufb	%ymm8,%ymm13,%ymm13
 	vpshufb	%ymm8,%ymm12,%ymm12
-	vmovdqa	128(%rbp),%ymm8
 	vpaddd	%ymm15,%ymm11,%ymm11
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpaddd	%ymm13,%ymm9,%ymm9
-	vpaddd	%ymm12,%ymm8,%ymm8
+	vpaddd	0+128(%rbp),%ymm12,%ymm8
 	vpxor	%ymm11,%ymm7,%ymm7
 	vpxor	%ymm10,%ymm6,%ymm6
-	movq	0+0(%rbp),%rdx
+	vpxor	%ymm9,%ymm5,%ymm5
+	vpxor	%ymm8,%ymm4,%ymm4
+	vmovdqa	%ymm8,0+128(%rbp)
+	vpsrld	$25,%ymm7,%ymm8
+	movq	0+0+0(%rbp),%rdx
 	movq	%rdx,%r15
 	mulxq	%r10,%r13,%r14
 	mulxq	%r11,%rax,%rdx
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	vpxor	%ymm9,%ymm5,%ymm5
-	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	%ymm8,128(%rbp)
-	vpsrld	$25,%ymm7,%ymm8
 	vpslld	$32-25,%ymm7,%ymm7
 	vpxor	%ymm8,%ymm7,%ymm7
 	vpsrld	$25,%ymm6,%ymm8
@@ -8357,22 +8250,22 @@
 	vpsrld	$25,%ymm4,%ymm8
 	vpslld	$32-25,%ymm4,%ymm4
 	vpxor	%ymm8,%ymm4,%ymm4
-	vmovdqa	128(%rbp),%ymm8
+	vmovdqa	0+128(%rbp),%ymm8
 	vpalignr	$12,%ymm7,%ymm7,%ymm7
 	vpalignr	$8,%ymm11,%ymm11,%ymm11
 	vpalignr	$4,%ymm15,%ymm15,%ymm15
 	vpalignr	$12,%ymm6,%ymm6,%ymm6
-	movq	8+0(%rbp),%rdx
+	vpalignr	$8,%ymm10,%ymm10,%ymm10
+	vpalignr	$4,%ymm14,%ymm14,%ymm14
+	vpalignr	$12,%ymm5,%ymm5,%ymm5
+	vpalignr	$8,%ymm9,%ymm9,%ymm9
+	movq	8+0+0(%rbp),%rdx
 	mulxq	%r10,%r10,%rax
 	addq	%r10,%r14
 	mulxq	%r11,%r11,%r9
 	adcq	%r11,%r15
 	adcq	$0,%r9
 	imulq	%r12,%rdx
-	vpalignr	$8,%ymm10,%ymm10,%ymm10
-	vpalignr	$4,%ymm14,%ymm14,%ymm14
-	vpalignr	$12,%ymm5,%ymm5,%ymm5
-	vpalignr	$8,%ymm9,%ymm9,%ymm9
 	vpalignr	$4,%ymm13,%ymm13,%ymm13
 	vpalignr	$12,%ymm4,%ymm4,%ymm4
 	vpalignr	$8,%ymm8,%ymm8,%ymm8
@@ -8389,6 +8282,10 @@
 
 
 
+
+
+
+
 	addq	%rax,%r15
 	adcq	%rdx,%r9
 
@@ -8420,36 +8317,35 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	32(%rdi),%rdi
 	decq	%rcx
-	jg	1b
+	jg	L$seal_avx2_tail_512_rounds_and_3xhash
 	decq	%r8
-	jge	2b
-	vpaddd	.chacha20_consts(%rip),%ymm3,%ymm3
-	vpaddd	64(%rbp),%ymm7,%ymm7
-	vpaddd	96(%rbp),%ymm11,%ymm11
-	vpaddd	256(%rbp),%ymm15,%ymm15
-	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
-	vpaddd	64(%rbp),%ymm6,%ymm6
-	vpaddd	96(%rbp),%ymm10,%ymm10
-	vpaddd	224(%rbp),%ymm14,%ymm14
-	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
-	vpaddd	64(%rbp),%ymm5,%ymm5
-	vpaddd	96(%rbp),%ymm9,%ymm9
-	vpaddd	192(%rbp),%ymm13,%ymm13
-	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
-	vpaddd	64(%rbp),%ymm4,%ymm4
-	vpaddd	96(%rbp),%ymm8,%ymm8
-	vpaddd	160(%rbp),%ymm12,%ymm12
+	jge	L$seal_avx2_tail_512_rounds_and_2xhash
+	vpaddd	L$chacha20_consts(%rip),%ymm3,%ymm3
+	vpaddd	0+64(%rbp),%ymm7,%ymm7
+	vpaddd	0+96(%rbp),%ymm11,%ymm11
+	vpaddd	0+256(%rbp),%ymm15,%ymm15
+	vpaddd	L$chacha20_consts(%rip),%ymm2,%ymm2
+	vpaddd	0+64(%rbp),%ymm6,%ymm6
+	vpaddd	0+96(%rbp),%ymm10,%ymm10
+	vpaddd	0+224(%rbp),%ymm14,%ymm14
+	vpaddd	L$chacha20_consts(%rip),%ymm1,%ymm1
+	vpaddd	0+64(%rbp),%ymm5,%ymm5
+	vpaddd	0+96(%rbp),%ymm9,%ymm9
+	vpaddd	0+192(%rbp),%ymm13,%ymm13
+	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	0+64(%rbp),%ymm4,%ymm4
+	vpaddd	0+96(%rbp),%ymm8,%ymm8
+	vpaddd	0+160(%rbp),%ymm12,%ymm12
 
-	vmovdqa	%ymm0,128(%rbp)
+	vmovdqa	%ymm0,0+128(%rbp)
 	vperm2i128	$0x02,%ymm3,%ymm7,%ymm0
 	vperm2i128	$0x13,%ymm3,%ymm7,%ymm7
 	vperm2i128	$0x02,%ymm11,%ymm15,%ymm3
@@ -8463,7 +8359,7 @@
 	vmovdqu	%ymm7,64+0(%rdi)
 	vmovdqu	%ymm11,96+0(%rdi)
 
-	vmovdqa	128(%rbp),%ymm0
+	vmovdqa	0+128(%rbp),%ymm0
 	vperm2i128	$0x02,%ymm2,%ymm6,%ymm3
 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm6
 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm2
@@ -8497,27 +8393,27 @@
 	movq	$384,%rcx
 	leaq	384(%rsi),%rsi
 	subq	$384,%rbx
-	jmp	seal_avx2_hash
+	jmp	L$seal_avx2_short_hash_remainder
 
-seal_avx2_320:
+L$seal_avx2_320:
 	vmovdqa	%ymm0,%ymm1
 	vmovdqa	%ymm0,%ymm2
 	vmovdqa	%ymm4,%ymm5
 	vmovdqa	%ymm4,%ymm6
 	vmovdqa	%ymm8,%ymm9
 	vmovdqa	%ymm8,%ymm10
-	vpaddd	.avx2_inc(%rip),%ymm12,%ymm13
-	vpaddd	.avx2_inc(%rip),%ymm13,%ymm14
+	vpaddd	L$avx2_inc(%rip),%ymm12,%ymm13
+	vpaddd	L$avx2_inc(%rip),%ymm13,%ymm14
 	vmovdqa	%ymm4,%ymm7
 	vmovdqa	%ymm8,%ymm11
-	vmovdqa	%ymm12,160(%rbp)
-	vmovdqa	%ymm13,192(%rbp)
-	vmovdqa	%ymm14,224(%rbp)
+	vmovdqa	%ymm12,0+160(%rbp)
+	vmovdqa	%ymm13,0+192(%rbp)
+	vmovdqa	%ymm14,0+224(%rbp)
 	movq	$10,%r10
-1:
+L$seal_avx2_320_rounds:
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpshufb	L$rol16(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpsrld	$20,%ymm4,%ymm3
@@ -8525,7 +8421,7 @@
 	vpxor	%ymm3,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpshufb	L$rol8(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpslld	$7,%ymm4,%ymm3
@@ -8536,7 +8432,7 @@
 	vpalignr	$4,%ymm4,%ymm4,%ymm4
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpshufb	L$rol16(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpsrld	$20,%ymm5,%ymm3
@@ -8544,7 +8440,7 @@
 	vpxor	%ymm3,%ymm5,%ymm5
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpshufb	L$rol8(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpslld	$7,%ymm5,%ymm3
@@ -8555,7 +8451,7 @@
 	vpalignr	$4,%ymm5,%ymm5,%ymm5
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpxor	%ymm2,%ymm14,%ymm14
-	vpshufb	.rol16(%rip),%ymm14,%ymm14
+	vpshufb	L$rol16(%rip),%ymm14,%ymm14
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpsrld	$20,%ymm6,%ymm3
@@ -8563,7 +8459,7 @@
 	vpxor	%ymm3,%ymm6,%ymm6
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpxor	%ymm2,%ymm14,%ymm14
-	vpshufb	.rol8(%rip),%ymm14,%ymm14
+	vpshufb	L$rol8(%rip),%ymm14,%ymm14
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpslld	$7,%ymm6,%ymm3
@@ -8574,7 +8470,7 @@
 	vpalignr	$4,%ymm6,%ymm6,%ymm6
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpshufb	L$rol16(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpsrld	$20,%ymm4,%ymm3
@@ -8582,7 +8478,7 @@
 	vpxor	%ymm3,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpshufb	L$rol8(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpslld	$7,%ymm4,%ymm3
@@ -8593,7 +8489,7 @@
 	vpalignr	$12,%ymm4,%ymm4,%ymm4
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpshufb	L$rol16(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpsrld	$20,%ymm5,%ymm3
@@ -8601,7 +8497,7 @@
 	vpxor	%ymm3,%ymm5,%ymm5
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpshufb	L$rol8(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpslld	$7,%ymm5,%ymm3
@@ -8612,7 +8508,7 @@
 	vpalignr	$12,%ymm5,%ymm5,%ymm5
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpxor	%ymm2,%ymm14,%ymm14
-	vpshufb	.rol16(%rip),%ymm14,%ymm14
+	vpshufb	L$rol16(%rip),%ymm14,%ymm14
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpsrld	$20,%ymm6,%ymm3
@@ -8620,7 +8516,7 @@
 	vpxor	%ymm3,%ymm6,%ymm6
 	vpaddd	%ymm6,%ymm2,%ymm2
 	vpxor	%ymm2,%ymm14,%ymm14
-	vpshufb	.rol8(%rip),%ymm14,%ymm14
+	vpshufb	L$rol8(%rip),%ymm14,%ymm14
 	vpaddd	%ymm14,%ymm10,%ymm10
 	vpxor	%ymm10,%ymm6,%ymm6
 	vpslld	$7,%ymm6,%ymm3
@@ -8631,23 +8527,23 @@
 	vpalignr	$12,%ymm6,%ymm6,%ymm6
 
 	decq	%r10
-	jne	1b
-	vpaddd	.chacha20_consts(%rip),%ymm0,%ymm0
-	vpaddd	.chacha20_consts(%rip),%ymm1,%ymm1
-	vpaddd	.chacha20_consts(%rip),%ymm2,%ymm2
+	jne	L$seal_avx2_320_rounds
+	vpaddd	L$chacha20_consts(%rip),%ymm0,%ymm0
+	vpaddd	L$chacha20_consts(%rip),%ymm1,%ymm1
+	vpaddd	L$chacha20_consts(%rip),%ymm2,%ymm2
 	vpaddd	%ymm7,%ymm4,%ymm4
 	vpaddd	%ymm7,%ymm5,%ymm5
 	vpaddd	%ymm7,%ymm6,%ymm6
 	vpaddd	%ymm11,%ymm8,%ymm8
 	vpaddd	%ymm11,%ymm9,%ymm9
 	vpaddd	%ymm11,%ymm10,%ymm10
-	vpaddd	160(%rbp),%ymm12,%ymm12
-	vpaddd	192(%rbp),%ymm13,%ymm13
-	vpaddd	224(%rbp),%ymm14,%ymm14
+	vpaddd	0+160(%rbp),%ymm12,%ymm12
+	vpaddd	0+192(%rbp),%ymm13,%ymm13
+	vpaddd	0+224(%rbp),%ymm14,%ymm14
 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
 
-	vpand	.clamp(%rip),%ymm3,%ymm3
-	vmovdqa	%ymm3,0(%rbp)
+	vpand	L$clamp(%rip),%ymm3,%ymm3
+	vmovdqa	%ymm3,0+0(%rbp)
 
 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
@@ -8659,23 +8555,23 @@
 	vperm2i128	$0x02,%ymm10,%ymm14,%ymm13
 	vperm2i128	$0x13,%ymm2,%ymm6,%ymm2
 	vperm2i128	$0x13,%ymm10,%ymm14,%ymm6
-	jmp	seal_avx2_short
+	jmp	L$seal_avx2_short
 
-seal_avx2_192:
+L$seal_avx2_192:
 	vmovdqa	%ymm0,%ymm1
 	vmovdqa	%ymm0,%ymm2
 	vmovdqa	%ymm4,%ymm5
 	vmovdqa	%ymm4,%ymm6
 	vmovdqa	%ymm8,%ymm9
 	vmovdqa	%ymm8,%ymm10
-	vpaddd	.avx2_inc(%rip),%ymm12,%ymm13
+	vpaddd	L$avx2_inc(%rip),%ymm12,%ymm13
 	vmovdqa	%ymm12,%ymm11
 	vmovdqa	%ymm13,%ymm15
 	movq	$10,%r10
-1:
+L$seal_avx2_192_rounds:
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpshufb	L$rol16(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpsrld	$20,%ymm4,%ymm3
@@ -8683,7 +8579,7 @@
 	vpxor	%ymm3,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpshufb	L$rol8(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpslld	$7,%ymm4,%ymm3
@@ -8694,7 +8590,7 @@
 	vpalignr	$4,%ymm4,%ymm4,%ymm4
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpshufb	L$rol16(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpsrld	$20,%ymm5,%ymm3
@@ -8702,7 +8598,7 @@
 	vpxor	%ymm3,%ymm5,%ymm5
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpshufb	L$rol8(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpslld	$7,%ymm5,%ymm3
@@ -8713,7 +8609,7 @@
 	vpalignr	$4,%ymm5,%ymm5,%ymm5
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol16(%rip),%ymm12,%ymm12
+	vpshufb	L$rol16(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpsrld	$20,%ymm4,%ymm3
@@ -8721,7 +8617,7 @@
 	vpxor	%ymm3,%ymm4,%ymm4
 	vpaddd	%ymm4,%ymm0,%ymm0
 	vpxor	%ymm0,%ymm12,%ymm12
-	vpshufb	.rol8(%rip),%ymm12,%ymm12
+	vpshufb	L$rol8(%rip),%ymm12,%ymm12
 	vpaddd	%ymm12,%ymm8,%ymm8
 	vpxor	%ymm8,%ymm4,%ymm4
 	vpslld	$7,%ymm4,%ymm3
@@ -8732,7 +8628,7 @@
 	vpalignr	$12,%ymm4,%ymm4,%ymm4
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol16(%rip),%ymm13,%ymm13
+	vpshufb	L$rol16(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpsrld	$20,%ymm5,%ymm3
@@ -8740,7 +8636,7 @@
 	vpxor	%ymm3,%ymm5,%ymm5
 	vpaddd	%ymm5,%ymm1,%ymm1
 	vpxor	%ymm1,%ymm13,%ymm13
-	vpshufb	.rol8(%rip),%ymm13,%ymm13
+	vpshufb	L$rol8(%rip),%ymm13,%ymm13
 	vpaddd	%ymm13,%ymm9,%ymm9
 	vpxor	%ymm9,%ymm5,%ymm5
 	vpslld	$7,%ymm5,%ymm3
@@ -8751,7 +8647,7 @@
 	vpalignr	$12,%ymm5,%ymm5,%ymm5
 
 	decq	%r10
-	jne	1b
+	jne	L$seal_avx2_192_rounds
 	vpaddd	%ymm2,%ymm0,%ymm0
 	vpaddd	%ymm2,%ymm1,%ymm1
 	vpaddd	%ymm6,%ymm4,%ymm4
@@ -8762,8 +8658,8 @@
 	vpaddd	%ymm15,%ymm13,%ymm13
 	vperm2i128	$0x02,%ymm0,%ymm4,%ymm3
 
-	vpand	.clamp(%rip),%ymm3,%ymm3
-	vmovdqa	%ymm3,0(%rbp)
+	vpand	L$clamp(%rip),%ymm3,%ymm3
+	vmovdqa	%ymm3,0+0(%rbp)
 
 	vperm2i128	$0x13,%ymm0,%ymm4,%ymm0
 	vperm2i128	$0x13,%ymm8,%ymm12,%ymm4
@@ -8771,33 +8667,33 @@
 	vperm2i128	$0x02,%ymm9,%ymm13,%ymm12
 	vperm2i128	$0x13,%ymm1,%ymm5,%ymm1
 	vperm2i128	$0x13,%ymm9,%ymm13,%ymm5
-seal_avx2_short:
+L$seal_avx2_short:
 	movq	%r8,%r8
 	call	poly_hash_ad_internal
 	xorq	%rcx,%rcx
-seal_avx2_hash:
+L$seal_avx2_short_hash_remainder:
 	cmpq	$16,%rcx
-	jb	seal_avx2_short_loop
-	addq	0(%rdi),%r10
+	jb	L$seal_avx2_short_loop
+	addq	0+0(%rdi),%r10
 	adcq	8+0(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -8813,45 +8709,44 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	subq	$16,%rcx
 	addq	$16,%rdi
-	jmp	seal_avx2_hash
-seal_avx2_short_loop:
+	jmp	L$seal_avx2_short_hash_remainder
+L$seal_avx2_short_loop:
 	cmpq	$32,%rbx
-	jb	seal_avx2_short_tail
+	jb	L$seal_avx2_short_tail
 	subq	$32,%rbx
 
 	vpxor	(%rsi),%ymm0,%ymm0
 	vmovdqu	%ymm0,(%rdi)
 	leaq	32(%rsi),%rsi
 
-	addq	0(%rdi),%r10
+	addq	0+0(%rdi),%r10
 	adcq	8+0(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -8867,32 +8762,31 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
-	addq	16(%rdi),%r10
+	addq	0+16(%rdi),%r10
 	adcq	8+16(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -8908,9 +8802,8 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
@@ -8926,34 +8819,34 @@
 	vmovdqa	%ymm13,%ymm9
 	vmovdqa	%ymm2,%ymm13
 	vmovdqa	%ymm6,%ymm2
-	jmp	seal_avx2_short_loop
-seal_avx2_short_tail:
+	jmp	L$seal_avx2_short_loop
+L$seal_avx2_short_tail:
 	cmpq	$16,%rbx
-	jb	1f
+	jb	L$seal_avx2_exit
 	subq	$16,%rbx
 	vpxor	(%rsi),%xmm0,%xmm3
 	vmovdqu	%xmm3,(%rdi)
 	leaq	16(%rsi),%rsi
-	addq	0(%rdi),%r10
+	addq	0+0(%rdi),%r10
 	adcq	8+0(%rdi),%r11
 	adcq	$1,%r12
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	movq	%rax,%r15
 	mulq	%r10
 	movq	%rax,%r13
 	movq	%rdx,%r14
-	movq	0+0(%rbp),%rax
+	movq	0+0+0(%rbp),%rax
 	mulq	%r11
 	imulq	%r12,%r15
 	addq	%rax,%r14
 	adcq	%rdx,%r15
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	movq	%rax,%r9
 	mulq	%r10
 	addq	%rax,%r14
 	adcq	$0,%rdx
 	movq	%rdx,%r10
-	movq	8+0(%rbp),%rax
+	movq	8+0+0(%rbp),%rax
 	mulq	%r11
 	addq	%rax,%r15
 	adcq	$0,%rdx
@@ -8969,17 +8862,17 @@
 	movq	%r9,%r14
 	shrdq	$2,%r9,%r15
 	shrq	$2,%r9
-	addq	%r13,%r10
-	adcq	%r14,%r11
-	adcq	$0,%r12
+	addq	%r13,%r15
+	adcq	%r14,%r9
 	addq	%r15,%r10
 	adcq	%r9,%r11
 	adcq	$0,%r12
 
 	leaq	16(%rdi),%rdi
 	vextracti128	$1,%ymm0,%xmm0
-1:
+L$seal_avx2_exit:
 	vzeroupper
-	jmp	seal_sse_tail_16
+	jmp	L$seal_sse_tail_16
+
 
 #endif
diff --git a/deps/boringssl/mac-x86_64/crypto/fipsmodule/sha1-x86_64.S b/deps/boringssl/mac-x86_64/crypto/fipsmodule/sha1-x86_64.S
index 672dce3..87ca5e9 100644
--- a/deps/boringssl/mac-x86_64/crypto/fipsmodule/sha1-x86_64.S
+++ b/deps/boringssl/mac-x86_64/crypto/fipsmodule/sha1-x86_64.S
@@ -26,6 +26,8 @@
 	movl	8(%r10),%r10d
 	testl	$512,%r8d
 	jz	L$ialu
+	testl	$536870912,%r10d
+	jnz	_shaext_shortcut
 	andl	$296,%r10d
 	cmpl	$296,%r10d
 	je	_avx2_shortcut
@@ -1269,6 +1271,175 @@
 
 
 
+.p2align	5
+sha1_block_data_order_shaext:
+_shaext_shortcut:
+
+	movdqu	(%rdi),%xmm0
+	movd	16(%rdi),%xmm1
+	movdqa	K_XX_XX+160(%rip),%xmm3
+
+	movdqu	(%rsi),%xmm4
+	pshufd	$27,%xmm0,%xmm0
+	movdqu	16(%rsi),%xmm5
+	pshufd	$27,%xmm1,%xmm1
+	movdqu	32(%rsi),%xmm6
+.byte	102,15,56,0,227
+	movdqu	48(%rsi),%xmm7
+.byte	102,15,56,0,235
+.byte	102,15,56,0,243
+	movdqa	%xmm1,%xmm9
+.byte	102,15,56,0,251
+	jmp	L$oop_shaext
+
+.p2align	4
+L$oop_shaext:
+	decq	%rdx
+	leaq	64(%rsi),%r8
+	paddd	%xmm4,%xmm1
+	cmovneq	%r8,%rsi
+	movdqa	%xmm0,%xmm8
+.byte	15,56,201,229
+	movdqa	%xmm0,%xmm2
+.byte	15,58,204,193,0
+.byte	15,56,200,213
+	pxor	%xmm6,%xmm4
+.byte	15,56,201,238
+.byte	15,56,202,231
+
+	movdqa	%xmm0,%xmm1
+.byte	15,58,204,194,0
+.byte	15,56,200,206
+	pxor	%xmm7,%xmm5
+.byte	15,56,202,236
+.byte	15,56,201,247
+	movdqa	%xmm0,%xmm2
+.byte	15,58,204,193,0
+.byte	15,56,200,215
+	pxor	%xmm4,%xmm6
+.byte	15,56,201,252
+.byte	15,56,202,245
+
+	movdqa	%xmm0,%xmm1
+.byte	15,58,204,194,0
+.byte	15,56,200,204
+	pxor	%xmm5,%xmm7
+.byte	15,56,202,254
+.byte	15,56,201,229
+	movdqa	%xmm0,%xmm2
+.byte	15,58,204,193,0
+.byte	15,56,200,213
+	pxor	%xmm6,%xmm4
+.byte	15,56,201,238
+.byte	15,56,202,231
+
+	movdqa	%xmm0,%xmm1
+.byte	15,58,204,194,1
+.byte	15,56,200,206
+	pxor	%xmm7,%xmm5
+.byte	15,56,202,236
+.byte	15,56,201,247
+	movdqa	%xmm0,%xmm2
+.byte	15,58,204,193,1
+.byte	15,56,200,215
+	pxor	%xmm4,%xmm6
+.byte	15,56,201,252
+.byte	15,56,202,245
+
+	movdqa	%xmm0,%xmm1
+.byte	15,58,204,194,1
+.byte	15,56,200,204
+	pxor	%xmm5,%xmm7
+.byte	15,56,202,254
+.byte	15,56,201,229
+	movdqa	%xmm0,%xmm2
+.byte	15,58,204,193,1
+.byte	15,56,200,213
+	pxor	%xmm6,%xmm4
+.byte	15,56,201,238
+.byte	15,56,202,231
+
+	movdqa	%xmm0,%xmm1
+.byte	15,58,204,194,1
+.byte	15,56,200,206
+	pxor	%xmm7,%xmm5
+.byte	15,56,202,236
+.byte	15,56,201,247
+	movdqa	%xmm0,%xmm2
+.byte	15,58,204,193,2
+.byte	15,56,200,215
+	pxor	%xmm4,%xmm6
+.byte	15,56,201,252
+.byte	15,56,202,245
+
+	movdqa	%xmm0,%xmm1
+.byte	15,58,204,194,2
+.byte	15,56,200,204
+	pxor	%xmm5,%xmm7
+.byte	15,56,202,254
+.byte	15,56,201,229
+	movdqa	%xmm0,%xmm2
+.byte	15,58,204,193,2
+.byte	15,56,200,213
+	pxor	%xmm6,%xmm4
+.byte	15,56,201,238
+.byte	15,56,202,231
+
+	movdqa	%xmm0,%xmm1
+.byte	15,58,204,194,2
+.byte	15,56,200,206
+	pxor	%xmm7,%xmm5
+.byte	15,56,202,236
+.byte	15,56,201,247
+	movdqa	%xmm0,%xmm2
+.byte	15,58,204,193,2
+.byte	15,56,200,215
+	pxor	%xmm4,%xmm6
+.byte	15,56,201,252
+.byte	15,56,202,245
+
+	movdqa	%xmm0,%xmm1
+.byte	15,58,204,194,3
+.byte	15,56,200,204
+	pxor	%xmm5,%xmm7
+.byte	15,56,202,254
+	movdqu	(%rsi),%xmm4
+	movdqa	%xmm0,%xmm2
+.byte	15,58,204,193,3
+.byte	15,56,200,213
+	movdqu	16(%rsi),%xmm5
+.byte	102,15,56,0,227
+
+	movdqa	%xmm0,%xmm1
+.byte	15,58,204,194,3
+.byte	15,56,200,206
+	movdqu	32(%rsi),%xmm6
+.byte	102,15,56,0,235
+
+	movdqa	%xmm0,%xmm2
+.byte	15,58,204,193,3
+.byte	15,56,200,215
+	movdqu	48(%rsi),%xmm7
+.byte	102,15,56,0,243
+
+	movdqa	%xmm0,%xmm1
+.byte	15,58,204,194,3
+.byte	65,15,56,200,201
+.byte	102,15,56,0,251
+
+	paddd	%xmm8,%xmm0
+	movdqa	%xmm1,%xmm9
+
+	jnz	L$oop_shaext
+
+	pshufd	$27,%xmm0,%xmm0
+	pshufd	$27,%xmm1,%xmm1
+	movdqu	%xmm0,(%rdi)
+	movd	%xmm1,16(%rdi)
+	.byte	0xf3,0xc3
+
+
+
 .p2align	4
 sha1_block_data_order_ssse3:
 _ssse3_shortcut:
diff --git a/deps/boringssl/src/CMakeLists.txt b/deps/boringssl/src/CMakeLists.txt
index c266e12..f58e853 100644
--- a/deps/boringssl/src/CMakeLists.txt
+++ b/deps/boringssl/src/CMakeLists.txt
@@ -484,6 +484,8 @@
   set(ARCH "x86")
 elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "aarch64")
   set(ARCH "aarch64")
+elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "ARM64")
+  set(ARCH "aarch64")
 elseif(${CMAKE_SYSTEM_PROCESSOR} STREQUAL "arm64")
   set(ARCH "aarch64")
 # Apple A12 Bionic chipset which is added in iPhone XS/XS Max/XR uses arm64e architecture.
@@ -617,6 +619,30 @@
   set(HANDSHAKER_ARGS "-handshaker-path" $<TARGET_FILE:handshaker>)
 endif()
 
+if(FIPS)
+  add_custom_target(
+    acvp_tests
+    COMMAND ${GO_EXECUTABLE} build -o ${CMAKE_BINARY_DIR}/acvptool
+            boringssl.googlesource.com/boringssl/util/fipstools/acvp/acvptool
+    COMMAND ${GO_EXECUTABLE} build -o ${CMAKE_BINARY_DIR}/testmodulewrapper
+            boringssl.googlesource.com/boringssl/util/fipstools/acvp/acvptool/testmodulewrapper
+    COMMAND cd util/fipstools/acvp/acvptool/test &&
+            ${GO_EXECUTABLE} run check_expected.go
+            -tool ${CMAKE_BINARY_DIR}/acvptool
+            -module-wrappers modulewrapper:$<TARGET_FILE:modulewrapper>,testmodulewrapper:${CMAKE_BINARY_DIR}/testmodulewrapper
+            -tests tests.json
+    WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
+    DEPENDS modulewrapper
+    USES_TERMINAL)
+
+  add_custom_target(
+    fips_specific_tests_if_any
+    DEPENDS acvp_tests
+  )
+else()
+  add_custom_target(fips_specific_tests_if_any)
+endif()
+
 add_custom_target(
     run_tests
     COMMAND ${GO_EXECUTABLE} run util/all_tests.go -build-dir
@@ -625,5 +651,5 @@
             ${GO_EXECUTABLE} test -shim-path $<TARGET_FILE:bssl_shim>
               ${HANDSHAKER_ARGS} ${RUNNER_ARGS}
     WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
-    DEPENDS all_tests bssl_shim handshaker
+    DEPENDS all_tests bssl_shim handshaker fips_specific_tests_if_any
     USES_TERMINAL)
diff --git a/deps/boringssl/src/crypto/CMakeLists.txt b/deps/boringssl/src/crypto/CMakeLists.txt
index 0339e7d..cde92b5 100644
--- a/deps/boringssl/src/crypto/CMakeLists.txt
+++ b/deps/boringssl/src/crypto/CMakeLists.txt
@@ -47,17 +47,23 @@
       endforeach()
     endif()
   else()
-    if(${ARCH} STREQUAL "x86_64")
-      set(PERLASM_STYLE nasm)
+    if(${ARCH} STREQUAL "aarch64")
+      set(PERLASM_STYLE win64)
+      set(ASM_EXT S)
+      enable_language(ASM)
     else()
-      set(PERLASM_STYLE win32n)
-      set(PERLASM_FLAGS "-DOPENSSL_IA32_SSE2")
-    endif()
-    set(CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} -gcv8")
+      if(${ARCH} STREQUAL "x86_64")
+        set(PERLASM_STYLE nasm)
+      else()
+        set(PERLASM_STYLE win32n)
+        set(PERLASM_FLAGS "-DOPENSSL_IA32_SSE2")
+      endif()
+      set(CMAKE_ASM_NASM_FLAGS "${CMAKE_ASM_NASM_FLAGS} -gcv8")
 
-    # On Windows, we use the NASM output, specifically built with Yasm.
-    set(ASM_EXT asm)
-    enable_language(ASM_NASM)
+      # On Windows, we use the NASM output.
+      set(ASM_EXT asm)
+      enable_language(ASM_NASM)
+    endif()
   endif()
 endif()
 
@@ -234,6 +240,7 @@
   bio/printf.c
   bio/socket.c
   bio/socket_helper.c
+  blake2/blake2.c
   bn_extra/bn_asn1.c
   bn_extra/convert.c
   buf/buf.c
@@ -258,6 +265,7 @@
   conf/conf.c
   cpu-aarch64-fuchsia.c
   cpu-aarch64-linux.c
+  cpu-aarch64-win.c
   cpu-arm-linux.c
   cpu-arm.c
   cpu-intel.c
@@ -265,10 +273,8 @@
   crypto.c
   curve25519/curve25519.c
   curve25519/spake25519.c
-  dh/dh.c
-  dh/params.c
-  dh/check.c
-  dh/dh_asn1.c
+  dh_extra/params.c
+  dh_extra/dh_asn1.c
   digest_extra/digest_extra.c
   dsa/dsa.c
   dsa/dsa_asn1.c
@@ -299,6 +305,7 @@
   evp/sign.c
   ex_data.c
   hkdf/hkdf.c
+  hpke/hpke.c
   hrss/hrss.c
   lhash/lhash.c
   mem.c
@@ -324,6 +331,7 @@
   rand_extra/deterministic.c
   rand_extra/forkunsafe.c
   rand_extra/fuchsia.c
+  rand_extra/passive.c
   rand_extra/rand_extra.c
   rand_extra/windows.c
   rc4/rc4.c
@@ -339,6 +347,7 @@
   thread_win.c
   trust_token/pmbtoken.c
   trust_token/trust_token.c
+  trust_token/voprf.c
   x509/a_digest.c
   x509/a_sign.c
   x509/a_strex.c
@@ -414,12 +423,10 @@
   x509v3/v3_pci.c
   x509v3/v3_pcia.c
   x509v3/v3_pcons.c
-  x509v3/v3_pku.c
   x509v3/v3_pmaps.c
   x509v3/v3_prn.c
   x509v3/v3_purp.c
   x509v3/v3_skey.c
-  x509v3/v3_sxnet.c
   x509v3/v3_utl.c
 
   $<TARGET_OBJECTS:fipsmodule>
@@ -456,7 +463,7 @@
 
 SET_TARGET_PROPERTIES(crypto PROPERTIES LINKER_LANGUAGE C)
 
-if(NOT MSVC AND NOT ANDROID)
+if(NOT WIN32 AND NOT ANDROID)
   target_link_libraries(crypto pthread)
 endif()
 
@@ -486,8 +493,9 @@
   abi_self_test.cc
   asn1/asn1_test.cc
   base64/base64_test.cc
-  buf/buf_test.cc
   bio/bio_test.cc
+  blake2/blake2_test.cc
+  buf/buf_test.cc
   bytestring/bytestring_test.cc
   chacha/chacha_test.cc
   cipher_extra/aead_test.cc
@@ -496,11 +504,12 @@
   compiler_test.cc
   constant_time_test.cc
   cpu-arm-linux_test.cc
+  crypto_test.cc
   curve25519/ed25519_test.cc
   curve25519/spake25519_test.cc
   curve25519/x25519_test.cc
   ecdh_extra/ecdh_test.cc
-  dh/dh_test.cc
+  dh_extra/dh_test.cc
   digest_extra/digest_test.cc
   dsa/dsa_test.cc
   err/err_test.cc
@@ -519,6 +528,7 @@
   fipsmodule/rand/fork_detect_test.cc
   fipsmodule/sha/sha_test.cc
   hkdf/hkdf_test.cc
+  hpke/hpke_test.cc
   hmac_extra/hmac_test.cc
   hrss/hrss_test.cc
   impl_dispatch_test.cc
diff --git a/deps/boringssl/src/crypto/asn1/a_bitstr.c b/deps/boringssl/src/crypto/asn1/a_bitstr.c
index 4024ed2..b945cb1 100644
--- a/deps/boringssl/src/crypto/asn1/a_bitstr.c
+++ b/deps/boringssl/src/crypto/asn1/a_bitstr.c
@@ -67,7 +67,7 @@
 
 int ASN1_BIT_STRING_set(ASN1_BIT_STRING *x, unsigned char *d, int len)
 {
-    return M_ASN1_BIT_STRING_set(x, d, len);
+    return ASN1_STRING_set(x, d, len);
 }
 
 int i2c_ASN1_BIT_STRING(const ASN1_BIT_STRING *a, unsigned char **pp)
@@ -146,7 +146,7 @@
     }
 
     if ((a == NULL) || ((*a) == NULL)) {
-        if ((ret = M_ASN1_BIT_STRING_new()) == NULL)
+        if ((ret = ASN1_BIT_STRING_new()) == NULL)
             return (NULL);
     } else
         ret = (*a);
@@ -188,7 +188,7 @@
     return (ret);
  err:
     if ((ret != NULL) && ((a == NULL) || (*a != ret)))
-        M_ASN1_BIT_STRING_free(ret);
+        ASN1_BIT_STRING_free(ret);
     return (NULL);
 }
 
diff --git a/deps/boringssl/src/crypto/asn1/a_enum.c b/deps/boringssl/src/crypto/asn1/a_enum.c
index b99663b..d7a7357 100644
--- a/deps/boringssl/src/crypto/asn1/a_enum.c
+++ b/deps/boringssl/src/crypto/asn1/a_enum.c
@@ -153,7 +153,7 @@
     int len, j;
 
     if (ai == NULL)
-        ret = M_ASN1_ENUMERATED_new();
+        ret = ASN1_ENUMERATED_new();
     else
         ret = ai;
     if (ret == NULL) {
@@ -179,7 +179,7 @@
     return (ret);
  err:
     if (ret != ai)
-        M_ASN1_ENUMERATED_free(ret);
+        ASN1_ENUMERATED_free(ret);
     return (NULL);
 }
 
diff --git a/deps/boringssl/src/crypto/asn1/a_int.c b/deps/boringssl/src/crypto/asn1/a_int.c
index 2eda6c0..1695fd0 100644
--- a/deps/boringssl/src/crypto/asn1/a_int.c
+++ b/deps/boringssl/src/crypto/asn1/a_int.c
@@ -67,7 +67,7 @@
 
 ASN1_INTEGER *ASN1_INTEGER_dup(const ASN1_INTEGER *x)
 {
-    return M_ASN1_INTEGER_dup(x);
+    return ASN1_STRING_dup(x);
 }
 
 int ASN1_INTEGER_cmp(const ASN1_INTEGER *x, const ASN1_INTEGER *y)
@@ -206,7 +206,7 @@
     }
 
     if ((a == NULL) || ((*a) == NULL)) {
-        if ((ret = M_ASN1_INTEGER_new()) == NULL)
+        if ((ret = ASN1_INTEGER_new()) == NULL)
             return (NULL);
         ret->type = V_ASN1_INTEGER;
     } else
@@ -282,7 +282,7 @@
  err:
     OPENSSL_PUT_ERROR(ASN1, i);
     if ((ret != NULL) && ((a == NULL) || (*a != ret)))
-        M_ASN1_INTEGER_free(ret);
+        ASN1_INTEGER_free(ret);
     return (NULL);
 }
 
@@ -374,7 +374,7 @@
     int len, j;
 
     if (ai == NULL)
-        ret = M_ASN1_INTEGER_new();
+        ret = ASN1_INTEGER_new();
     else
         ret = ai;
     if (ret == NULL) {
@@ -404,7 +404,7 @@
     return (ret);
  err:
     if (ret != ai)
-        M_ASN1_INTEGER_free(ret);
+        ASN1_INTEGER_free(ret);
     return (NULL);
 }
 
diff --git a/deps/boringssl/src/crypto/asn1/a_object.c b/deps/boringssl/src/crypto/asn1/a_object.c
index aa98453..bf386dd 100644
--- a/deps/boringssl/src/crypto/asn1/a_object.c
+++ b/deps/boringssl/src/crypto/asn1/a_object.c
@@ -250,19 +250,12 @@
     if (a == NULL)
         return;
     if (a->flags & ASN1_OBJECT_FLAG_DYNAMIC_STRINGS) {
-#ifndef CONST_STRICT            /* disable purely for compile-time strict
-                                 * const checking. Doing this on a "real"
-                                 * compile will cause memory leaks */
-        if (a->sn != NULL)
-            OPENSSL_free((void *)a->sn);
-        if (a->ln != NULL)
-            OPENSSL_free((void *)a->ln);
-#endif
+        OPENSSL_free((void *)a->sn);
+        OPENSSL_free((void *)a->ln);
         a->sn = a->ln = NULL;
     }
     if (a->flags & ASN1_OBJECT_FLAG_DYNAMIC_DATA) {
-        if (a->data != NULL)
-            OPENSSL_free((void *)a->data);
+        OPENSSL_free((void *)a->data);
         a->data = NULL;
         a->length = 0;
     }
diff --git a/deps/boringssl/src/crypto/asn1/a_octet.c b/deps/boringssl/src/crypto/asn1/a_octet.c
index 2e74d6b..312993b 100644
--- a/deps/boringssl/src/crypto/asn1/a_octet.c
+++ b/deps/boringssl/src/crypto/asn1/a_octet.c
@@ -61,17 +61,17 @@
 
 ASN1_OCTET_STRING *ASN1_OCTET_STRING_dup(const ASN1_OCTET_STRING *x)
 {
-    return M_ASN1_OCTET_STRING_dup(x);
+    return ASN1_STRING_dup(x);
 }
 
 int ASN1_OCTET_STRING_cmp(const ASN1_OCTET_STRING *a,
                           const ASN1_OCTET_STRING *b)
 {
-    return M_ASN1_OCTET_STRING_cmp(a, b);
+    return ASN1_STRING_cmp(a, b);
 }
 
 int ASN1_OCTET_STRING_set(ASN1_OCTET_STRING *x, const unsigned char *d,
                           int len)
 {
-    return M_ASN1_OCTET_STRING_set(x, d, len);
+    return ASN1_STRING_set(x, d, len);
 }
diff --git a/deps/boringssl/src/crypto/asn1/a_type.c b/deps/boringssl/src/crypto/asn1/a_type.c
index c12edfa..f320e49 100644
--- a/deps/boringssl/src/crypto/asn1/a_type.c
+++ b/deps/boringssl/src/crypto/asn1/a_type.c
@@ -61,6 +61,9 @@
 #include <openssl/mem.h>
 #include <openssl/obj.h>
 
+#include "asn1_locl.h"
+
+
 int ASN1_TYPE_get(const ASN1_TYPE *a)
 {
     if ((a->value.ptr != NULL) || (a->type == V_ASN1_NULL))
@@ -142,8 +145,7 @@
     case V_ASN1_UTF8STRING:
     case V_ASN1_OTHER:
     default:
-        result = ASN1_STRING_cmp((ASN1_STRING *)a->value.ptr,
-                                 (ASN1_STRING *)b->value.ptr);
+        result = ASN1_STRING_cmp(a->value.asn1_string, b->value.asn1_string);
         break;
     }
 
diff --git a/deps/boringssl/src/crypto/asn1/a_utctm.c b/deps/boringssl/src/crypto/asn1/a_utctm.c
index f7519df..d5bd0e4 100644
--- a/deps/boringssl/src/crypto/asn1/a_utctm.c
+++ b/deps/boringssl/src/crypto/asn1/a_utctm.c
@@ -197,7 +197,7 @@
 
     if (s == NULL) {
         free_s = 1;
-        s = M_ASN1_UTCTIME_new();
+        s = ASN1_UTCTIME_new();
     }
     if (s == NULL)
         goto err;
@@ -234,7 +234,7 @@
     return (s);
  err:
     if (free_s && s)
-        M_ASN1_UTCTIME_free(s);
+        ASN1_UTCTIME_free(s);
     return NULL;
 }
 
diff --git a/deps/boringssl/src/crypto/asn1/asn1_lib.c b/deps/boringssl/src/crypto/asn1/asn1_lib.c
index 1091009..db8afac 100644
--- a/deps/boringssl/src/crypto/asn1/asn1_lib.c
+++ b/deps/boringssl/src/crypto/asn1/asn1_lib.c
@@ -251,6 +251,8 @@
 
 int ASN1_put_eoc(unsigned char **pp)
 {
+    /* This function is no longer used in the library, but some external code
+     * uses it. */
     unsigned char *p = *pp;
     *p++ = 0;
     *p++ = 0;
@@ -311,9 +313,9 @@
 {
     if (str == NULL)
         return 0;
-    dst->type = str->type;
     if (!ASN1_STRING_set(dst, str->data, str->length))
         return 0;
+    dst->type = str->type;
     dst->flags = str->flags;
     return 1;
 }
@@ -395,13 +397,12 @@
     return (ret);
 }
 
-void ASN1_STRING_free(ASN1_STRING *a)
+void ASN1_STRING_free(ASN1_STRING *str)
 {
-    if (a == NULL)
+    if (str == NULL)
         return;
-    if (a->data && !(a->flags & ASN1_STRING_FLAG_NDEF))
-        OPENSSL_free(a->data);
-    OPENSSL_free(a);
+    OPENSSL_free(str->data);
+    OPENSSL_free(str);
 }
 
 int ASN1_STRING_cmp(const ASN1_STRING *a, const ASN1_STRING *b)
@@ -419,28 +420,22 @@
         return (i);
 }
 
-int ASN1_STRING_length(const ASN1_STRING *x)
+int ASN1_STRING_length(const ASN1_STRING *str)
 {
-    return M_ASN1_STRING_length(x);
+    return str->length;
 }
 
-void ASN1_STRING_length_set(ASN1_STRING *x, int len)
+int ASN1_STRING_type(const ASN1_STRING *str)
 {
-    M_ASN1_STRING_length_set(x, len);
-    return;
+    return str->type;
 }
 
-int ASN1_STRING_type(const ASN1_STRING *x)
+unsigned char *ASN1_STRING_data(ASN1_STRING *str)
 {
-    return M_ASN1_STRING_type(x);
+    return str->data;
 }
 
-unsigned char *ASN1_STRING_data(ASN1_STRING *x)
+const unsigned char *ASN1_STRING_get0_data(const ASN1_STRING *str)
 {
-    return M_ASN1_STRING_data(x);
-}
-
-const unsigned char *ASN1_STRING_get0_data(const ASN1_STRING *x)
-{
-    return x->data;
+    return str->data;
 }
diff --git a/deps/boringssl/src/crypto/asn1/asn1_locl.h b/deps/boringssl/src/crypto/asn1/asn1_locl.h
index 8cef246..bf90ea2 100644
--- a/deps/boringssl/src/crypto/asn1/asn1_locl.h
+++ b/deps/boringssl/src/crypto/asn1/asn1_locl.h
@@ -96,6 +96,36 @@
 int UTF8_getc(const unsigned char *str, int len, uint32_t *val);
 int UTF8_putc(unsigned char *str, int len, uint32_t value);
 
+int ASN1_item_ex_new(ASN1_VALUE **pval, const ASN1_ITEM *it);
+void ASN1_item_ex_free(ASN1_VALUE **pval, const ASN1_ITEM *it);
+
+void ASN1_template_free(ASN1_VALUE **pval, const ASN1_TEMPLATE *tt);
+int ASN1_item_ex_d2i(ASN1_VALUE **pval, const unsigned char **in, long len,
+                     const ASN1_ITEM *it, int tag, int aclass, char opt,
+                     ASN1_TLC *ctx);
+
+int ASN1_item_ex_i2d(ASN1_VALUE **pval, unsigned char **out,
+                     const ASN1_ITEM *it, int tag, int aclass);
+void ASN1_primitive_free(ASN1_VALUE **pval, const ASN1_ITEM *it);
+
+int asn1_get_choice_selector(ASN1_VALUE **pval, const ASN1_ITEM *it);
+int asn1_set_choice_selector(ASN1_VALUE **pval, int value, const ASN1_ITEM *it);
+
+ASN1_VALUE **asn1_get_field_ptr(ASN1_VALUE **pval, const ASN1_TEMPLATE *tt);
+
+const ASN1_TEMPLATE *asn1_do_adb(ASN1_VALUE **pval, const ASN1_TEMPLATE *tt,
+                                 int nullerr);
+
+void asn1_refcount_set_one(ASN1_VALUE **pval, const ASN1_ITEM *it);
+int asn1_refcount_dec_and_test_zero(ASN1_VALUE **pval, const ASN1_ITEM *it);
+
+void asn1_enc_init(ASN1_VALUE **pval, const ASN1_ITEM *it);
+void asn1_enc_free(ASN1_VALUE **pval, const ASN1_ITEM *it);
+int asn1_enc_restore(int *len, unsigned char **out, ASN1_VALUE **pval,
+                     const ASN1_ITEM *it);
+int asn1_enc_save(ASN1_VALUE **pval, const unsigned char *in, int inlen,
+                  const ASN1_ITEM *it);
+
 
 #if defined(__cplusplus)
 }  /* extern C */
diff --git a/deps/boringssl/src/crypto/asn1/asn1_test.cc b/deps/boringssl/src/crypto/asn1/asn1_test.cc
index 7f71c8c..7b09ba5 100644
--- a/deps/boringssl/src/crypto/asn1/asn1_test.cc
+++ b/deps/boringssl/src/crypto/asn1/asn1_test.cc
@@ -70,9 +70,9 @@
 }
 
 TEST(ASN1Test, IntegerSetting) {
-  bssl::UniquePtr<ASN1_INTEGER> by_bn(M_ASN1_INTEGER_new());
-  bssl::UniquePtr<ASN1_INTEGER> by_long(M_ASN1_INTEGER_new());
-  bssl::UniquePtr<ASN1_INTEGER> by_uint64(M_ASN1_INTEGER_new());
+  bssl::UniquePtr<ASN1_INTEGER> by_bn(ASN1_INTEGER_new());
+  bssl::UniquePtr<ASN1_INTEGER> by_long(ASN1_INTEGER_new());
+  bssl::UniquePtr<ASN1_INTEGER> by_uint64(ASN1_INTEGER_new());
   bssl::UniquePtr<BIGNUM> bn(BN_new());
 
   const std::vector<int64_t> kValues = {
@@ -96,6 +96,44 @@
   }
 }
 
+template <typename T>
+void TestSerialize(T obj, int (*i2d_func)(T a, uint8_t **pp),
+                   bssl::Span<const uint8_t> expected) {
+  int len = static_cast<int>(expected.size());
+  ASSERT_EQ(i2d_func(obj, nullptr), len);
+
+  std::vector<uint8_t> buf(expected.size());
+  uint8_t *ptr = buf.data();
+  ASSERT_EQ(i2d_func(obj, &ptr), len);
+  EXPECT_EQ(ptr, buf.data() + buf.size());
+  EXPECT_EQ(Bytes(expected), Bytes(buf));
+
+  // Test the allocating version.
+  ptr = nullptr;
+  ASSERT_EQ(i2d_func(obj, &ptr), len);
+  EXPECT_EQ(Bytes(expected), Bytes(ptr, expected.size()));
+  OPENSSL_free(ptr);
+}
+
+TEST(ASN1Test, SerializeObject) {
+  static const uint8_t kDER[] = {0x06, 0x09, 0x2a, 0x86, 0x48, 0x86,
+                                 0xf7, 0x0d, 0x01, 0x01, 0x01};
+  const ASN1_OBJECT *obj = OBJ_nid2obj(NID_rsaEncryption);
+  TestSerialize(obj, i2d_ASN1_OBJECT, kDER);
+}
+
+TEST(ASN1Test, SerializeBoolean) {
+  static const uint8_t kTrue[] = {0x01, 0x01, 0xff};
+  TestSerialize(0xff, i2d_ASN1_BOOLEAN, kTrue);
+
+  static const uint8_t kFalse[] = {0x01, 0x01, 0x00};
+  TestSerialize(0x00, i2d_ASN1_BOOLEAN, kFalse);
+}
+
+// The ASN.1 macros do not work on Windows shared library builds, where usage of
+// |OPENSSL_EXPORT| is a bit stricter.
+#if !defined(OPENSSL_WINDOWS) || !defined(BORINGSSL_SHARED_LIBRARY)
+
 typedef struct asn1_linked_list_st {
   struct asn1_linked_list_st *next;
 } ASN1_LINKED_LIST;
@@ -151,36 +189,41 @@
   ASN1_LINKED_LIST_free(list);
 }
 
-template <typename T>
-void TestSerialize(T obj, int (*i2d_func)(T a, uint8_t **pp),
-                   bssl::Span<const uint8_t> expected) {
-  int len = static_cast<int>(expected.size());
-  ASSERT_EQ(i2d_func(obj, nullptr), len);
+struct IMPLICIT_CHOICE {
+  ASN1_STRING *string;
+};
 
-  std::vector<uint8_t> buf(expected.size());
-  uint8_t *ptr = buf.data();
-  ASSERT_EQ(i2d_func(obj, &ptr), len);
-  EXPECT_EQ(ptr, buf.data() + buf.size());
-  EXPECT_EQ(Bytes(expected), Bytes(buf));
+// clang-format off
+DECLARE_ASN1_FUNCTIONS(IMPLICIT_CHOICE)
 
-  // Test the allocating version.
-  ptr = nullptr;
-  ASSERT_EQ(i2d_func(obj, &ptr), len);
-  EXPECT_EQ(Bytes(expected), Bytes(ptr, expected.size()));
-  OPENSSL_free(ptr);
+ASN1_SEQUENCE(IMPLICIT_CHOICE) = {
+  ASN1_IMP(IMPLICIT_CHOICE, string, DIRECTORYSTRING, 0)
+} ASN1_SEQUENCE_END(IMPLICIT_CHOICE)
+
+IMPLEMENT_ASN1_FUNCTIONS(IMPLICIT_CHOICE)
+// clang-format on
+
+// Test that the ASN.1 templates reject types with implicitly-tagged CHOICE
+// types.
+TEST(ASN1Test, ImplicitChoice) {
+  // Serializing a type with an implicitly tagged CHOICE should fail.
+  std::unique_ptr<IMPLICIT_CHOICE, decltype(&IMPLICIT_CHOICE_free)> obj(
+      IMPLICIT_CHOICE_new(), IMPLICIT_CHOICE_free);
+  EXPECT_EQ(-1, i2d_IMPLICIT_CHOICE(obj.get(), nullptr));
+
+  // An implicitly-tagged CHOICE is an error. Depending on the implementation,
+  // it may be misinterpreted as without the tag, or as clobbering the CHOICE
+  // tag. Test both inputs and ensure they fail.
+
+  // SEQUENCE { UTF8String {} }
+  static const uint8_t kInput1[] = {0x30, 0x02, 0x0c, 0x00};
+  const uint8_t *ptr = kInput1;
+  EXPECT_EQ(nullptr, d2i_IMPLICIT_CHOICE(nullptr, &ptr, sizeof(kInput1)));
+
+  // SEQUENCE { [0 PRIMITIVE] {} }
+  static const uint8_t kInput2[] = {0x30, 0x02, 0x80, 0x00};
+  ptr = kInput2;
+  EXPECT_EQ(nullptr, d2i_IMPLICIT_CHOICE(nullptr, &ptr, sizeof(kInput2)));
 }
 
-TEST(ASN1Test, SerializeObject) {
-  static const uint8_t kDER[] = {0x06, 0x09, 0x2a, 0x86, 0x48, 0x86,
-                                 0xf7, 0x0d, 0x01, 0x01, 0x01};
-  const ASN1_OBJECT *obj = OBJ_nid2obj(NID_rsaEncryption);
-  TestSerialize(obj, i2d_ASN1_OBJECT, kDER);
-}
-
-TEST(ASN1Test, SerializeBoolean) {
-  static const uint8_t kTrue[] = {0x01, 0x01, 0xff};
-  TestSerialize(0xff, i2d_ASN1_BOOLEAN, kTrue);
-
-  static const uint8_t kFalse[] = {0x01, 0x01, 0x00};
-  TestSerialize(0x00, i2d_ASN1_BOOLEAN, kFalse);
-}
+#endif  // !WINDOWS || !SHARED_LIBRARY
diff --git a/deps/boringssl/src/crypto/asn1/tasn_dec.c b/deps/boringssl/src/crypto/asn1/tasn_dec.c
index 32aba0b..99a9714 100644
--- a/deps/boringssl/src/crypto/asn1/tasn_dec.c
+++ b/deps/boringssl/src/crypto/asn1/tasn_dec.c
@@ -65,6 +65,7 @@
 #include <openssl/mem.h>
 
 #include "../internal.h"
+#include "asn1_locl.h"
 
 /*
  * Constructed types with a recursive definition (such as can be found in PKCS7)
@@ -95,6 +96,8 @@
                                    const unsigned char **in, long len,
                                    const ASN1_TEMPLATE *tt, char opt,
                                    ASN1_TLC *ctx, int depth);
+static int asn1_ex_c2i(ASN1_VALUE **pval, const unsigned char *cont, int len,
+                       int utype, char *free_cont, const ASN1_ITEM *it);
 static int asn1_d2i_ex_primitive(ASN1_VALUE **pval,
                                  const unsigned char **in, long len,
                                  const ASN1_ITEM *it,
@@ -166,19 +169,16 @@
                             char opt, ASN1_TLC *ctx, int depth)
 {
     const ASN1_TEMPLATE *tt, *errtt = NULL;
-    const ASN1_COMPAT_FUNCS *cf;
     const ASN1_EXTERN_FUNCS *ef;
     const ASN1_AUX *aux = it->funcs;
     ASN1_aux_cb *asn1_cb;
     const unsigned char *p = NULL, *q;
-    unsigned char *wp = NULL;   /* BIG FAT WARNING! BREAKS CONST WHERE USED */
-    unsigned char imphack = 0, oclass;
+    unsigned char oclass;
     char seq_eoc, seq_nolen, cst, isopt;
-    long tmplen;
     int i;
     int otag;
     int ret = 0;
-    ASN1_VALUE **pchptr, *ptmpval;
+    ASN1_VALUE **pchptr;
     int combine = aclass & ASN1_TFLG_COMBINE;
     aclass &= ~ASN1_TFLG_COMBINE;
     if (!pval)
@@ -223,6 +223,15 @@
         break;
 
     case ASN1_ITYPE_MSTRING:
+        /*
+         * It never makes sense for multi-strings to have implicit tagging, so
+         * if tag != -1, then this looks like an error in the template.
+         */
+        if (tag != -1) {
+            OPENSSL_PUT_ERROR(ASN1, ASN1_R_BAD_TEMPLATE);
+            goto err;
+        }
+
         p = *in;
         /* Just read in tag and class */
         ret = asn1_check_tlen(NULL, &otag, &oclass, NULL, NULL,
@@ -255,67 +264,16 @@
         ef = it->funcs;
         return ef->asn1_ex_d2i(pval, in, len, it, tag, aclass, opt, ctx);
 
-    case ASN1_ITYPE_COMPAT:
-        /* we must resort to old style evil hackery */
-        cf = it->funcs;
-
-        /* If OPTIONAL see if it is there */
-        if (opt) {
-            int exptag;
-            p = *in;
-            if (tag == -1)
-                exptag = it->utype;
-            else
-                exptag = tag;
-            /*
-             * Don't care about anything other than presence of expected tag
-             */
-
-            ret = asn1_check_tlen(NULL, NULL, NULL, NULL, NULL,
-                                  &p, len, exptag, aclass, 1, ctx);
-            if (!ret) {
-                OPENSSL_PUT_ERROR(ASN1, ASN1_R_NESTED_ASN1_ERROR);
-                goto err;
-            }
-            if (ret == -1)
-                return -1;
-        }
-
-        /*
-         * This is the old style evil hack IMPLICIT handling: since the
-         * underlying code is expecting a tag and class other than the one
-         * present we change the buffer temporarily then change it back
-         * afterwards. This doesn't and never did work for tags > 30. Yes
-         * this is *horrible* but it is only needed for old style d2i which
-         * will hopefully not be around for much longer. FIXME: should copy
-         * the buffer then modify it so the input buffer can be const: we
-         * should *always* copy because the old style d2i might modify the
-         * buffer.
-         */
-
-        if (tag != -1) {
-            wp = *(unsigned char **)in;
-            imphack = *wp;
-            if (p == NULL) {
-                OPENSSL_PUT_ERROR(ASN1, ASN1_R_NESTED_ASN1_ERROR);
-                goto err;
-            }
-            *wp = (unsigned char)((*p & V_ASN1_CONSTRUCTED)
-                                  | it->utype);
-        }
-
-        ptmpval = cf->asn1_d2i(pval, in, len);
-
-        if (tag != -1)
-            *wp = imphack;
-
-        if (ptmpval)
-            return 1;
-
-        OPENSSL_PUT_ERROR(ASN1, ASN1_R_NESTED_ASN1_ERROR);
-        goto err;
-
     case ASN1_ITYPE_CHOICE:
+        /*
+         * It never makes sense for CHOICE types to have implicit tagging, so if
+         * tag != -1, then this looks like an error in the template.
+         */
+        if (tag != -1) {
+            OPENSSL_PUT_ERROR(ASN1, ASN1_R_BAD_TEMPLATE);
+            goto err;
+        }
+
         if (asn1_cb && !asn1_cb(ASN1_OP_D2I_PRE, pval, it, NULL))
             goto auxerr;
 
@@ -370,10 +328,8 @@
         *in = p;
         return 1;
 
-    case ASN1_ITYPE_NDEF_SEQUENCE:
     case ASN1_ITYPE_SEQUENCE:
         p = *in;
-        tmplen = len;
 
         /* If no IMPLICIT tagging set to SEQUENCE, UNIVERSAL */
         if (tag == -1) {
@@ -388,13 +344,8 @@
             goto err;
         } else if (ret == -1)
             return -1;
-        if (aux && (aux->flags & ASN1_AFLG_BROKEN)) {
-            len = tmplen - (p - *in);
-            seq_nolen = 1;
-        }
         /* If indefinite we don't do a length check */
-        else
-            seq_nolen = seq_eoc;
+        seq_nolen = seq_eoc;
         if (!cst) {
             OPENSSL_PUT_ERROR(ASN1, ASN1_R_SEQUENCE_NOT_CONSTRUCTED);
             goto err;
@@ -649,7 +600,7 @@
         } else if (ret == -1)
             return -1;
         if (!*val)
-            *val = (ASN1_VALUE *)sk_new_null();
+            *val = (ASN1_VALUE *)sk_ASN1_VALUE_new_null();
         else {
             /*
              * We've got a valid STACK: free up any items present
@@ -860,19 +811,19 @@
 
 /* Translate ASN1 content octets into a structure */
 
-int asn1_ex_c2i(ASN1_VALUE **pval, const unsigned char *cont, int len,
-                int utype, char *free_cont, const ASN1_ITEM *it)
+static int asn1_ex_c2i(ASN1_VALUE **pval, const unsigned char *cont, int len,
+                       int utype, char *free_cont, const ASN1_ITEM *it)
 {
     ASN1_VALUE **opval = NULL;
     ASN1_STRING *stmp;
     ASN1_TYPE *typ = NULL;
     int ret = 0;
-    const ASN1_PRIMITIVE_FUNCS *pf;
     ASN1_INTEGER **tint;
-    pf = it->funcs;
 
-    if (pf && pf->prim_c2i)
-        return pf->prim_c2i(pval, cont, len, utype, free_cont, it);
+    /* Historically, |it->funcs| for primitive types contained an
+     * |ASN1_PRIMITIVE_FUNCS| table of callbacks. */
+    assert(it->funcs == NULL);
+
     /* If ANY type clear type and set pointer to internal value */
     if (it->utype == V_ASN1_ANY) {
         if (!*pval) {
@@ -1055,15 +1006,13 @@
  * constructed type and 'inf' should be set if it is indefinite length.
  */
 
-#ifndef ASN1_MAX_STRING_NEST
 /*
  * This determines how many levels of recursion are permitted in ASN1 string
  * types. If it is not limited stack overflows can occur. If set to zero no
  * recursion is allowed at all. Although zero should be adequate examples
  * exist that require a value of 1. So 5 should be more than enough.
  */
-# define ASN1_MAX_STRING_NEST 5
-#endif
+#define ASN1_MAX_STRING_NEST 5
 
 static int asn1_collect(BUF_MEM *buf, const unsigned char **in, long len,
                         char inf, int tag, int aclass, int depth)
diff --git a/deps/boringssl/src/crypto/asn1/tasn_enc.c b/deps/boringssl/src/crypto/asn1/tasn_enc.c
index 3722a51..1323439 100644
--- a/deps/boringssl/src/crypto/asn1/tasn_enc.c
+++ b/deps/boringssl/src/crypto/asn1/tasn_enc.c
@@ -63,10 +63,13 @@
 #include <openssl/mem.h>
 
 #include "../internal.h"
+#include "asn1_locl.h"
 
 
 static int asn1_i2d_ex_primitive(ASN1_VALUE **pval, unsigned char **out,
                                  const ASN1_ITEM *it, int tag, int aclass);
+static int asn1_ex_i2c(ASN1_VALUE **pval, unsigned char *cont, int *putype,
+                       const ASN1_ITEM *it);
 static int asn1_set_seq_out(STACK_OF(ASN1_VALUE) *sk, unsigned char **out,
                             int skcontlen, const ASN1_ITEM *item,
                             int do_sort, int iclass);
@@ -76,16 +79,9 @@
                                const ASN1_ITEM *it, int flags);
 
 /*
- * Top level i2d equivalents: the 'ndef' variant instructs the encoder to use
- * indefinite length constructed encoding, where appropriate
+ * Top level i2d equivalents
  */
 
-int ASN1_item_ndef_i2d(ASN1_VALUE *val, unsigned char **out,
-                       const ASN1_ITEM *it)
-{
-    return asn1_item_flags_i2d(val, out, it, ASN1_TFLG_NDEF);
-}
-
 int ASN1_item_i2d(ASN1_VALUE *val, unsigned char **out, const ASN1_ITEM *it)
 {
     return asn1_item_flags_i2d(val, out, it, 0);
@@ -128,9 +124,7 @@
                      const ASN1_ITEM *it, int tag, int aclass)
 {
     const ASN1_TEMPLATE *tt = NULL;
-    unsigned char *p = NULL;
-    int i, seqcontlen, seqlen, ndef = 1;
-    const ASN1_COMPAT_FUNCS *cf;
+    int i, seqcontlen, seqlen;
     const ASN1_EXTERN_FUNCS *ef;
     const ASN1_AUX *aux = it->funcs;
     ASN1_aux_cb *asn1_cb = 0;
@@ -151,9 +145,25 @@
         break;
 
     case ASN1_ITYPE_MSTRING:
+        /*
+         * It never makes sense for multi-strings to have implicit tagging, so
+         * if tag != -1, then this looks like an error in the template.
+         */
+        if (tag != -1) {
+            OPENSSL_PUT_ERROR(ASN1, ASN1_R_BAD_TEMPLATE);
+            return -1;
+        }
         return asn1_i2d_ex_primitive(pval, out, it, -1, aclass);
 
     case ASN1_ITYPE_CHOICE:
+        /*
+         * It never makes sense for CHOICE types to have implicit tagging, so if
+         * tag != -1, then this looks like an error in the template.
+         */
+        if (tag != -1) {
+            OPENSSL_PUT_ERROR(ASN1, ASN1_R_BAD_TEMPLATE);
+            return -1;
+        }
         if (asn1_cb && !asn1_cb(ASN1_OP_I2D_PRE, pval, it, NULL))
             return 0;
         i = asn1_get_choice_selector(pval, it);
@@ -174,26 +184,6 @@
         ef = it->funcs;
         return ef->asn1_ex_i2d(pval, out, it, tag, aclass);
 
-    case ASN1_ITYPE_COMPAT:
-        /* old style hackery... */
-        cf = it->funcs;
-        if (out)
-            p = *out;
-        i = cf->asn1_i2d(*pval, out);
-        /*
-         * Fixup for IMPLICIT tag: note this messes up for tags > 30, but so
-         * did the old code. Tags > 30 are very rare anyway.
-         */
-        if (out && (tag != -1))
-            *p = aclass | tag | (*p & V_ASN1_CONSTRUCTED);
-        return i;
-
-    case ASN1_ITYPE_NDEF_SEQUENCE:
-        /* Use indefinite length constructed if requested */
-        if (aclass & ASN1_TFLG_NDEF)
-            ndef = 2;
-        OPENSSL_FALLTHROUGH;
-
     case ASN1_ITYPE_SEQUENCE:
         i = asn1_enc_restore(&seqcontlen, out, pval, it);
         /* An error occurred */
@@ -228,11 +218,11 @@
             seqcontlen += tmplen;
         }
 
-        seqlen = ASN1_object_size(ndef, seqcontlen, tag);
+        seqlen = ASN1_object_size(/*constructed=*/1, seqcontlen, tag);
         if (!out || seqlen == -1)
             return seqlen;
         /* Output SEQUENCE header */
-        ASN1_put_object(out, ndef, seqcontlen, tag, aclass);
+        ASN1_put_object(out, /*constructed=*/1, seqcontlen, tag, aclass);
         for (i = 0, tt = it->templates; i < it->tcount; tt++, i++) {
             const ASN1_TEMPLATE *seqtt;
             ASN1_VALUE **pseqval;
@@ -243,8 +233,6 @@
             /* FIXME: check for errors in enhanced version */
             asn1_template_ex_i2d(pseqval, out, seqtt, -1, aclass);
         }
-        if (ndef == 2)
-            ASN1_put_eoc(out);
         if (asn1_cb && !asn1_cb(ASN1_OP_I2D_POST, pval, it, NULL))
             return 0;
         return seqlen;
@@ -259,7 +247,7 @@
 static int asn1_template_ex_i2d(ASN1_VALUE **pval, unsigned char **out,
                                 const ASN1_TEMPLATE *tt, int tag, int iclass)
 {
-    int i, ret, flags, ttag, tclass, ndef;
+    int i, ret, flags, ttag, tclass;
     size_t j;
     flags = tt->flags;
     /*
@@ -295,12 +283,6 @@
      * class and iclass is any flags passed to this function.
      */
 
-    /* if template and arguments require ndef, use it */
-    if ((flags & ASN1_TFLG_NDEF) && (iclass & ASN1_TFLG_NDEF))
-        ndef = 2;
-    else
-        ndef = 1;
-
     if (flags & ASN1_TFLG_SK_MASK) {
         /* SET OF, SEQUENCE OF */
         STACK_OF(ASN1_VALUE) *sk = (STACK_OF(ASN1_VALUE) *)*pval;
@@ -345,12 +327,12 @@
                 return -1;
             skcontlen += tmplen;
         }
-        sklen = ASN1_object_size(ndef, skcontlen, sktag);
+        sklen = ASN1_object_size(/*constructed=*/1, skcontlen, sktag);
         if (sklen == -1)
             return -1;
         /* If EXPLICIT need length of surrounding tag */
         if (flags & ASN1_TFLG_EXPTAG)
-            ret = ASN1_object_size(ndef, sklen, ttag);
+            ret = ASN1_object_size(/*constructed=*/1, sklen, ttag);
         else
             ret = sklen;
 
@@ -360,18 +342,12 @@
         /* Now encode this lot... */
         /* EXPLICIT tag */
         if (flags & ASN1_TFLG_EXPTAG)
-            ASN1_put_object(out, ndef, sklen, ttag, tclass);
+            ASN1_put_object(out, /*constructed=*/1, sklen, ttag, tclass);
         /* SET or SEQUENCE and IMPLICIT tag */
-        ASN1_put_object(out, ndef, skcontlen, sktag, skaclass);
+        ASN1_put_object(out, /*constructed=*/1, skcontlen, sktag, skaclass);
         /* And the stuff itself */
         asn1_set_seq_out(sk, out, skcontlen, ASN1_ITEM_ptr(tt->item),
                          isset, iclass);
-        if (ndef == 2) {
-            ASN1_put_eoc(out);
-            if (flags & ASN1_TFLG_EXPTAG)
-                ASN1_put_eoc(out);
-        }
-
         return ret;
     }
 
@@ -382,13 +358,11 @@
         if (!i)
             return 0;
         /* Find length of EXPLICIT tag */
-        ret = ASN1_object_size(ndef, i, ttag);
+        ret = ASN1_object_size(/*constructed=*/1, i, ttag);
         if (out && ret != -1) {
             /* Output tag and item */
-            ASN1_put_object(out, ndef, i, ttag, tclass);
+            ASN1_put_object(out, /*constructed=*/1, i, ttag, tclass);
             ASN1_item_ex_i2d(pval, out, ASN1_ITEM_ptr(tt->item), -1, iclass);
-            if (ndef == 2)
-                ASN1_put_eoc(out);
         }
         return ret;
     }
@@ -487,7 +461,6 @@
     int len;
     int utype;
     int usetag;
-    int ndef = 0;
 
     utype = it->utype;
 
@@ -513,12 +486,6 @@
     if (len == -1)
         return 0;
 
-    /* -2 return is special meaning use ndef */
-    if (len == -2) {
-        ndef = 2;
-        len = 0;
-    }
-
     /* If not implicitly tagged get tag from underlying type */
     if (tag == -1)
         tag = utype;
@@ -526,23 +493,20 @@
     /* Output tag+length followed by content octets */
     if (out) {
         if (usetag)
-            ASN1_put_object(out, ndef, len, tag, aclass);
+            ASN1_put_object(out, /*constructed=*/0, len, tag, aclass);
         asn1_ex_i2c(pval, *out, &utype, it);
-        if (ndef)
-            ASN1_put_eoc(out);
-        else
-            *out += len;
+        *out += len;
     }
 
     if (usetag)
-        return ASN1_object_size(ndef, len, tag);
+        return ASN1_object_size(/*constructed=*/0, len, tag);
     return len;
 }
 
 /* Produce content octets from a structure */
 
-int asn1_ex_i2c(ASN1_VALUE **pval, unsigned char *cout, int *putype,
-                const ASN1_ITEM *it)
+static int asn1_ex_i2c(ASN1_VALUE **pval, unsigned char *cout, int *putype,
+                       const ASN1_ITEM *it)
 {
     ASN1_BOOLEAN *tbool = NULL;
     ASN1_STRING *strtmp;
@@ -551,10 +515,10 @@
     const unsigned char *cont;
     unsigned char c;
     int len;
-    const ASN1_PRIMITIVE_FUNCS *pf;
-    pf = it->funcs;
-    if (pf && pf->prim_i2c)
-        return pf->prim_i2c(pval, cout, putype, it);
+
+    /* Historically, |it->funcs| for primitive types contained an
+     * |ASN1_PRIMITIVE_FUNCS| table of callbacks. */
+    assert(it->funcs == NULL);
 
     /* Should type be omitted? */
     if ((it->itype != ASN1_ITYPE_PRIMITIVE)
@@ -642,16 +606,6 @@
     default:
         /* All based on ASN1_STRING and handled the same */
         strtmp = (ASN1_STRING *)*pval;
-        /* Special handling for NDEF */
-        if ((it->size == ASN1_TFLG_NDEF)
-            && (strtmp->flags & ASN1_STRING_FLAG_NDEF)) {
-            if (cout) {
-                strtmp->data = cout;
-                strtmp->length = 0;
-            }
-            /* Special return code */
-            return -2;
-        }
         cont = strtmp->data;
         len = strtmp->length;
 
diff --git a/deps/boringssl/src/crypto/asn1/tasn_fre.c b/deps/boringssl/src/crypto/asn1/tasn_fre.c
index eabc0fb..a1e7315 100644
--- a/deps/boringssl/src/crypto/asn1/tasn_fre.c
+++ b/deps/boringssl/src/crypto/asn1/tasn_fre.c
@@ -56,6 +56,8 @@
 
 #include <openssl/asn1.h>
 
+#include <assert.h>
+
 #include <openssl/asn1t.h>
 #include <openssl/mem.h>
 
@@ -77,7 +79,6 @@
 {
     const ASN1_TEMPLATE *tt = NULL, *seqtt;
     const ASN1_EXTERN_FUNCS *ef;
-    const ASN1_COMPAT_FUNCS *cf;
     const ASN1_AUX *aux = it->funcs;
     ASN1_aux_cb *asn1_cb;
     int i;
@@ -124,19 +125,12 @@
         }
         break;
 
-    case ASN1_ITYPE_COMPAT:
-        cf = it->funcs;
-        if (cf && cf->asn1_free)
-            cf->asn1_free(*pval);
-        break;
-
     case ASN1_ITYPE_EXTERN:
         ef = it->funcs;
         if (ef && ef->asn1_ex_free)
             ef->asn1_ex_free(pval, it);
         break;
 
-    case ASN1_ITYPE_NDEF_SEQUENCE:
     case ASN1_ITYPE_SEQUENCE:
         if (!asn1_refcount_dec_and_test_zero(pval, it))
             return;
@@ -190,14 +184,9 @@
 void ASN1_primitive_free(ASN1_VALUE **pval, const ASN1_ITEM *it)
 {
     int utype;
-    if (it) {
-        const ASN1_PRIMITIVE_FUNCS *pf;
-        pf = it->funcs;
-        if (pf && pf->prim_free) {
-            pf->prim_free(pval, it);
-            return;
-        }
-    }
+    /* Historically, |it->funcs| for primitive types contained an
+     * |ASN1_PRIMITIVE_FUNCS| table of calbacks. */
+    assert(it == NULL || it->funcs == NULL);
     /* Special case: if 'it' is NULL free contents of ASN1_TYPE */
     if (!it) {
         ASN1_TYPE *typ = (ASN1_TYPE *)*pval;
diff --git a/deps/boringssl/src/crypto/asn1/tasn_new.c b/deps/boringssl/src/crypto/asn1/tasn_new.c
index 5db38be..dc864da 100644
--- a/deps/boringssl/src/crypto/asn1/tasn_new.c
+++ b/deps/boringssl/src/crypto/asn1/tasn_new.c
@@ -70,7 +70,9 @@
 static int asn1_item_ex_combine_new(ASN1_VALUE **pval, const ASN1_ITEM *it,
                                     int combine);
 static void asn1_item_clear(ASN1_VALUE **pval, const ASN1_ITEM *it);
+static int ASN1_template_new(ASN1_VALUE **pval, const ASN1_TEMPLATE *tt);
 static void asn1_template_clear(ASN1_VALUE **pval, const ASN1_TEMPLATE *tt);
+static int ASN1_primitive_new(ASN1_VALUE **pval, const ASN1_ITEM *it);
 static void asn1_primitive_clear(ASN1_VALUE **pval, const ASN1_ITEM *it);
 
 ASN1_VALUE *ASN1_item_new(const ASN1_ITEM *it)
@@ -92,7 +94,6 @@
                                     int combine)
 {
     const ASN1_TEMPLATE *tt = NULL;
-    const ASN1_COMPAT_FUNCS *cf;
     const ASN1_EXTERN_FUNCS *ef;
     const ASN1_AUX *aux = it->funcs;
     ASN1_aux_cb *asn1_cb;
@@ -103,11 +104,6 @@
     else
         asn1_cb = 0;
 
-#ifdef CRYPTO_MDEBUG
-    if (it->sname)
-        CRYPTO_push_info(it->sname);
-#endif
-
     switch (it->itype) {
 
     case ASN1_ITYPE_EXTERN:
@@ -118,15 +114,6 @@
         }
         break;
 
-    case ASN1_ITYPE_COMPAT:
-        cf = it->funcs;
-        if (cf && cf->asn1_new) {
-            *pval = cf->asn1_new();
-            if (!*pval)
-                goto memerr;
-        }
-        break;
-
     case ASN1_ITYPE_PRIMITIVE:
         if (it->templates) {
             if (!ASN1_template_new(pval, it->templates))
@@ -146,10 +133,6 @@
             if (!i)
                 goto auxerr;
             if (i == 2) {
-#ifdef CRYPTO_MDEBUG
-                if (it->sname)
-                    CRYPTO_pop_info();
-#endif
                 return 1;
             }
         }
@@ -164,17 +147,12 @@
             goto auxerr2;
         break;
 
-    case ASN1_ITYPE_NDEF_SEQUENCE:
     case ASN1_ITYPE_SEQUENCE:
         if (asn1_cb) {
             i = asn1_cb(ASN1_OP_NEW_PRE, pval, it, NULL);
             if (!i)
                 goto auxerr;
             if (i == 2) {
-#ifdef CRYPTO_MDEBUG
-                if (it->sname)
-                    CRYPTO_pop_info();
-#endif
                 return 1;
             }
         }
@@ -195,30 +173,18 @@
             goto auxerr2;
         break;
     }
-#ifdef CRYPTO_MDEBUG
-    if (it->sname)
-        CRYPTO_pop_info();
-#endif
     return 1;
 
  memerr2:
     asn1_item_combine_free(pval, it, combine);
  memerr:
     OPENSSL_PUT_ERROR(ASN1, ERR_R_MALLOC_FAILURE);
-#ifdef CRYPTO_MDEBUG
-    if (it->sname)
-        CRYPTO_pop_info();
-#endif
     return 0;
 
  auxerr2:
     asn1_item_combine_free(pval, it, combine);
  auxerr:
     OPENSSL_PUT_ERROR(ASN1, ASN1_R_AUX_ERROR);
-#ifdef CRYPTO_MDEBUG
-    if (it->sname)
-        CRYPTO_pop_info();
-#endif
     return 0;
 
 }
@@ -248,16 +214,14 @@
         asn1_primitive_clear(pval, it);
         break;
 
-    case ASN1_ITYPE_COMPAT:
     case ASN1_ITYPE_CHOICE:
     case ASN1_ITYPE_SEQUENCE:
-    case ASN1_ITYPE_NDEF_SEQUENCE:
         *pval = NULL;
         break;
     }
 }
 
-int ASN1_template_new(ASN1_VALUE **pval, const ASN1_TEMPLATE *tt)
+static int ASN1_template_new(ASN1_VALUE **pval, const ASN1_TEMPLATE *tt)
 {
     const ASN1_ITEM *it = ASN1_ITEM_ptr(tt->item);
     int ret;
@@ -271,10 +235,6 @@
         *pval = NULL;
         return 1;
     }
-#ifdef CRYPTO_MDEBUG
-    if (tt->field_name)
-        CRYPTO_push_info(tt->field_name);
-#endif
     /* If SET OF or SEQUENCE OF, its a STACK */
     if (tt->flags & ASN1_TFLG_SK_MASK) {
         STACK_OF(ASN1_VALUE) *skval;
@@ -291,10 +251,6 @@
     /* Otherwise pass it back to the item routine */
     ret = asn1_item_ex_combine_new(pval, it, tt->flags & ASN1_TFLG_COMBINE);
  done:
-#ifdef CRYPTO_MDEBUG
-    if (it->sname)
-        CRYPTO_pop_info();
-#endif
     return ret;
 }
 
@@ -312,7 +268,7 @@
  * all the old functions.
  */
 
-int ASN1_primitive_new(ASN1_VALUE **pval, const ASN1_ITEM *it)
+static int ASN1_primitive_new(ASN1_VALUE **pval, const ASN1_ITEM *it)
 {
     ASN1_TYPE *typ;
     ASN1_STRING *str;
@@ -321,11 +277,9 @@
     if (!it)
         return 0;
 
-    if (it->funcs) {
-        const ASN1_PRIMITIVE_FUNCS *pf = it->funcs;
-        if (pf->prim_new)
-            return pf->prim_new(pval, it);
-    }
+    /* Historically, |it->funcs| for primitive types contained an
+     * |ASN1_PRIMITIVE_FUNCS| table of calbacks. */
+    assert(it->funcs == NULL);
 
     if (it->itype == ASN1_ITYPE_MSTRING)
         utype = -1;
@@ -368,14 +322,9 @@
 static void asn1_primitive_clear(ASN1_VALUE **pval, const ASN1_ITEM *it)
 {
     int utype;
-    if (it && it->funcs) {
-        const ASN1_PRIMITIVE_FUNCS *pf = it->funcs;
-        if (pf->prim_clear)
-            pf->prim_clear(pval, it);
-        else
-            *pval = NULL;
-        return;
-    }
+    /* Historically, |it->funcs| for primitive types contained an
+     * |ASN1_PRIMITIVE_FUNCS| table of calbacks. */
+    assert(it == NULL || it->funcs == NULL);
     if (!it || (it->itype == ASN1_ITYPE_MSTRING))
         utype = -1;
     else
diff --git a/deps/boringssl/src/crypto/asn1/tasn_typ.c b/deps/boringssl/src/crypto/asn1/tasn_typ.c
index 7c5bfd5..44399ea 100644
--- a/deps/boringssl/src/crypto/asn1/tasn_typ.c
+++ b/deps/boringssl/src/crypto/asn1/tasn_typ.c
@@ -117,8 +117,6 @@
 
 /* Special, OCTET STRING with indefinite length constructed support */
 
-IMPLEMENT_ASN1_TYPE_ex(ASN1_OCTET_STRING_NDEF, ASN1_OCTET_STRING, ASN1_TFLG_NDEF)
-
 ASN1_ITEM_TEMPLATE(ASN1_SEQUENCE_ANY) =
         ASN1_EX_TEMPLATE_TYPE(ASN1_TFLG_SEQUENCE_OF, 0, ASN1_SEQUENCE_ANY, ASN1_ANY)
 ASN1_ITEM_TEMPLATE_END(ASN1_SEQUENCE_ANY)
diff --git a/deps/boringssl/src/crypto/asn1/tasn_utl.c b/deps/boringssl/src/crypto/asn1/tasn_utl.c
index a7516f6..f0288b4 100644
--- a/deps/boringssl/src/crypto/asn1/tasn_utl.c
+++ b/deps/boringssl/src/crypto/asn1/tasn_utl.c
@@ -66,6 +66,7 @@
 #include <openssl/thread.h>
 
 #include "../internal.h"
+#include "asn1_locl.h"
 
 
 /* Utility functions for manipulating fields and offsets */
@@ -91,8 +92,7 @@
 
 static CRYPTO_refcount_t *asn1_get_references(ASN1_VALUE **pval,
                                               const ASN1_ITEM *it) {
-  if (it->itype != ASN1_ITYPE_SEQUENCE &&
-      it->itype != ASN1_ITYPE_NDEF_SEQUENCE) {
+  if (it->itype != ASN1_ITYPE_SEQUENCE) {
     return NULL;
   }
   const ASN1_AUX *aux = it->funcs;
diff --git a/deps/boringssl/src/crypto/bio/bio.c b/deps/boringssl/src/crypto/bio/bio.c
index 7d97c3e..3d36e28 100644
--- a/deps/boringssl/src/crypto/bio/bio.c
+++ b/deps/boringssl/src/crypto/bio/bio.c
@@ -262,6 +262,8 @@
 
 int BIO_get_retry_reason(const BIO *bio) { return bio->retry_reason; }
 
+void BIO_set_retry_reason(BIO *bio, int reason) { bio->retry_reason = reason; }
+
 void BIO_clear_flags(BIO *bio, int flags) {
   bio->flags &= ~flags;
 }
diff --git a/deps/boringssl/src/crypto/bio/socket_helper.c b/deps/boringssl/src/crypto/bio/socket_helper.c
index d4209d0..fc751fd 100644
--- a/deps/boringssl/src/crypto/bio/socket_helper.c
+++ b/deps/boringssl/src/crypto/bio/socket_helper.c
@@ -55,7 +55,11 @@
   ret = getaddrinfo(hostname, port_str, &hint, &result);
   if (ret != 0) {
     OPENSSL_PUT_ERROR(SYS, 0);
+#if defined(OPENSSL_WINDOWS)
+    ERR_add_error_data(1, gai_strerrorA(ret));
+#else
     ERR_add_error_data(1, gai_strerror(ret));
+#endif
     return 0;
   }
 
diff --git a/deps/boringssl/src/crypto/blake2/blake2.c b/deps/boringssl/src/crypto/blake2/blake2.c
new file mode 100644
index 0000000..e3c560f
--- /dev/null
+++ b/deps/boringssl/src/crypto/blake2/blake2.c
@@ -0,0 +1,158 @@
+/* Copyright (c) 2021, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#include <openssl/blake2.h>
+
+#include <openssl/type_check.h>
+
+#include "../internal.h"
+
+// https://tools.ietf.org/html/rfc7693#section-2.6
+static const uint64_t kIV[8] = {
+    UINT64_C(0x6a09e667f3bcc908), UINT64_C(0xbb67ae8584caa73b),
+    UINT64_C(0x3c6ef372fe94f82b), UINT64_C(0xa54ff53a5f1d36f1),
+    UINT64_C(0x510e527fade682d1), UINT64_C(0x9b05688c2b3e6c1f),
+    UINT64_C(0x1f83d9abfb41bd6b), UINT64_C(0x5be0cd19137e2179),
+};
+
+// https://tools.ietf.org/html/rfc7693#section-2.7
+static const uint8_t kSigma[10 * 16] = {
+    // clang-format off
+    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
+    14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3,
+    11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4,
+    7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8,
+    9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13,
+    2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9,
+    12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11,
+    13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10,
+    6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5,
+    10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13, 0,
+    // clang-format on
+};
+
+#define RIGHT_ROTATE(v, n) (((v) >> (n)) | ((v) << (64 - (n))))
+
+// https://tools.ietf.org/html/rfc7693#section-3.1
+static void blake2b_mix(uint64_t v[16], int a, int b, int c, int d, uint64_t x,
+                        uint64_t y) {
+  v[a] = v[a] + v[b] + x;
+  v[d] = RIGHT_ROTATE(v[d] ^ v[a], 32);
+  v[c] = v[c] + v[d];
+  v[b] = RIGHT_ROTATE(v[b] ^ v[c], 24);
+  v[a] = v[a] + v[b] + y;
+  v[d] = RIGHT_ROTATE(v[d] ^ v[a], 16);
+  v[c] = v[c] + v[d];
+  v[b] = RIGHT_ROTATE(v[b] ^ v[c], 63);
+}
+
+static void blake2b_transform(
+    BLAKE2B_CTX *b2b,
+    const uint64_t block_words[BLAKE2B_CBLOCK / sizeof(uint64_t)],
+    size_t num_bytes, int is_final_block) {
+  // https://tools.ietf.org/html/rfc7693#section-3.2
+  uint64_t v[16];
+  OPENSSL_STATIC_ASSERT(sizeof(v) == sizeof(b2b->h) + sizeof(kIV), "");
+  OPENSSL_memcpy(v, b2b->h, sizeof(b2b->h));
+  OPENSSL_memcpy(&v[8], kIV, sizeof(kIV));
+
+  b2b->t_low += num_bytes;
+  if (b2b->t_low < num_bytes) {
+    b2b->t_high++;
+  }
+  v[12] ^= b2b->t_low;
+  v[13] ^= b2b->t_high;
+
+  if (is_final_block) {
+    v[14] = ~v[14];
+  }
+
+  for (int round = 0; round < 12; round++) {
+    const uint8_t *const s = &kSigma[16 * (round % 10)];
+    blake2b_mix(v, 0, 4, 8, 12, block_words[s[0]], block_words[s[1]]);
+    blake2b_mix(v, 1, 5, 9, 13, block_words[s[2]], block_words[s[3]]);
+    blake2b_mix(v, 2, 6, 10, 14, block_words[s[4]], block_words[s[5]]);
+    blake2b_mix(v, 3, 7, 11, 15, block_words[s[6]], block_words[s[7]]);
+    blake2b_mix(v, 0, 5, 10, 15, block_words[s[8]], block_words[s[9]]);
+    blake2b_mix(v, 1, 6, 11, 12, block_words[s[10]], block_words[s[11]]);
+    blake2b_mix(v, 2, 7, 8, 13, block_words[s[12]], block_words[s[13]]);
+    blake2b_mix(v, 3, 4, 9, 14, block_words[s[14]], block_words[s[15]]);
+  }
+
+  for (size_t i = 0; i < OPENSSL_ARRAY_SIZE(b2b->h); i++) {
+    b2b->h[i] ^= v[i];
+    b2b->h[i] ^= v[i + 8];
+  }
+}
+
+void BLAKE2B256_Init(BLAKE2B_CTX *b2b) {
+  OPENSSL_memset(b2b, 0, sizeof(BLAKE2B_CTX));
+
+  OPENSSL_STATIC_ASSERT(sizeof(kIV) == sizeof(b2b->h), "");
+  OPENSSL_memcpy(&b2b->h, kIV, sizeof(kIV));
+
+  // https://tools.ietf.org/html/rfc7693#section-2.5
+  b2b->h[0] ^= 0x01010000 | BLAKE2B256_DIGEST_LENGTH;
+}
+
+void BLAKE2B256_Update(BLAKE2B_CTX *b2b, const void *in_data, size_t len) {
+  const uint8_t *data = (const uint8_t *)in_data;
+
+  size_t todo = sizeof(b2b->block.bytes) - b2b->block_used;
+  if (todo > len) {
+    todo = len;
+  }
+  OPENSSL_memcpy(&b2b->block.bytes[b2b->block_used], data, todo);
+  b2b->block_used += todo;
+  data += todo;
+  len -= todo;
+
+  if (!len) {
+    return;
+  }
+
+  // More input remains therefore we must have filled |b2b->block|.
+  assert(b2b->block_used == BLAKE2B_CBLOCK);
+  blake2b_transform(b2b, b2b->block.words, BLAKE2B_CBLOCK,
+                    /*is_final_block=*/0);
+  b2b->block_used = 0;
+
+  while (len > BLAKE2B_CBLOCK) {
+    uint64_t block_words[BLAKE2B_CBLOCK / sizeof(uint64_t)];
+    OPENSSL_memcpy(block_words, data, sizeof(block_words));
+    blake2b_transform(b2b, block_words, BLAKE2B_CBLOCK, /*is_final_block=*/0);
+    data += BLAKE2B_CBLOCK;
+    len -= BLAKE2B_CBLOCK;
+  }
+
+  OPENSSL_memcpy(b2b->block.bytes, data, len);
+  b2b->block_used = len;
+}
+
+void BLAKE2B256_Final(uint8_t out[BLAKE2B256_DIGEST_LENGTH], BLAKE2B_CTX *b2b) {
+  OPENSSL_memset(&b2b->block.bytes[b2b->block_used], 0,
+                 sizeof(b2b->block.bytes) - b2b->block_used);
+  blake2b_transform(b2b, b2b->block.words, b2b->block_used,
+                    /*is_final_block=*/1);
+  OPENSSL_STATIC_ASSERT(BLAKE2B256_DIGEST_LENGTH <= sizeof(b2b->h), "");
+  memcpy(out, b2b->h, BLAKE2B256_DIGEST_LENGTH);
+}
+
+void BLAKE2B256(const uint8_t *data, size_t len,
+                uint8_t out[BLAKE2B256_DIGEST_LENGTH]) {
+  BLAKE2B_CTX ctx;
+  BLAKE2B256_Init(&ctx);
+  BLAKE2B256_Update(&ctx, data, len);
+  BLAKE2B256_Final(out, &ctx);
+}
diff --git a/deps/boringssl/src/crypto/blake2/blake2_test.cc b/deps/boringssl/src/crypto/blake2/blake2_test.cc
new file mode 100644
index 0000000..ff3152d
--- /dev/null
+++ b/deps/boringssl/src/crypto/blake2/blake2_test.cc
@@ -0,0 +1,55 @@
+/* Copyright (c) 2021, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#include <openssl/blake2.h>
+
+#include <gtest/gtest.h>
+
+#include "../test/file_test.h"
+#include "../test/test_util.h"
+
+TEST(BLAKE2B256Test, ABC) {
+  // https://tools.ietf.org/html/rfc7693#appendix-A, except updated for the
+  // 256-bit hash output.
+  const uint8_t kExpected[] = {
+      0xbd, 0xdd, 0x81, 0x3c, 0x63, 0x42, 0x39, 0x72, 0x31, 0x71, 0xef,
+      0x3f, 0xee, 0x98, 0x57, 0x9b, 0x94, 0x96, 0x4e, 0x3b, 0xb1, 0xcb,
+      0x3e, 0x42, 0x72, 0x62, 0xc8, 0xc0, 0x68, 0xd5, 0x23, 0x19,
+  };
+
+  uint8_t digest[BLAKE2B256_DIGEST_LENGTH];
+  BLAKE2B256((const uint8_t *)"abc", 3, digest);
+  EXPECT_EQ(Bytes(kExpected), Bytes(digest));
+}
+
+TEST(BLAKE2B256Test, TestVectors) {
+  FileTestGTest("crypto/blake2/blake2b256_tests.txt", [](FileTest *t) {
+    std::vector<uint8_t> msg, expected;
+    ASSERT_TRUE(t->GetBytes(&msg, "IN"));
+    ASSERT_TRUE(t->GetBytes(&expected, "HASH"));
+
+    uint8_t digest[BLAKE2B256_DIGEST_LENGTH];
+    BLAKE2B256(msg.data(), msg.size(), digest);
+    EXPECT_EQ(Bytes(digest), Bytes(expected)) << msg.size();
+
+    OPENSSL_memset(digest, 0, sizeof(digest));
+    BLAKE2B_CTX b2b;
+    BLAKE2B256_Init(&b2b);
+    for (uint8_t b : msg) {
+      BLAKE2B256_Update(&b2b, &b, 1);
+    }
+    BLAKE2B256_Final(digest, &b2b);
+    EXPECT_EQ(Bytes(digest), Bytes(expected)) << msg.size();
+  });
+}
diff --git a/deps/boringssl/src/crypto/bn_extra/bn_asn1.c b/deps/boringssl/src/crypto/bn_extra/bn_asn1.c
index 0d96573..a8333d4 100644
--- a/deps/boringssl/src/crypto/bn_extra/bn_asn1.c
+++ b/deps/boringssl/src/crypto/bn_extra/bn_asn1.c
@@ -20,25 +20,18 @@
 
 int BN_parse_asn1_unsigned(CBS *cbs, BIGNUM *ret) {
   CBS child;
+  int is_negative;
   if (!CBS_get_asn1(cbs, &child, CBS_ASN1_INTEGER) ||
-      CBS_len(&child) == 0) {
+      !CBS_is_valid_asn1_integer(&child, &is_negative)) {
     OPENSSL_PUT_ERROR(BN, BN_R_BAD_ENCODING);
     return 0;
   }
 
-  if (CBS_data(&child)[0] & 0x80) {
+  if (is_negative) {
     OPENSSL_PUT_ERROR(BN, BN_R_NEGATIVE_NUMBER);
     return 0;
   }
 
-  // INTEGERs must be minimal.
-  if (CBS_data(&child)[0] == 0x00 &&
-      CBS_len(&child) > 1 &&
-      !(CBS_data(&child)[1] & 0x80)) {
-    OPENSSL_PUT_ERROR(BN, BN_R_BAD_ENCODING);
-    return 0;
-  }
-
   return BN_bin2bn(CBS_data(&child), CBS_len(&child), ret) != NULL;
 }
 
diff --git a/deps/boringssl/src/crypto/bytestring/ber.c b/deps/boringssl/src/crypto/bytestring/ber.c
index 7437239..df88432 100644
--- a/deps/boringssl/src/crypto/bytestring/ber.c
+++ b/deps/boringssl/src/crypto/bytestring/ber.c
@@ -53,7 +53,7 @@
 // depending on whether an indefinite length element or constructed string was
 // found. The value of |orig_in| is not changed. It returns one on success (i.e.
 // |*ber_found| was set) and zero on error.
-static int cbs_find_ber(const CBS *orig_in, char *ber_found, unsigned depth) {
+static int cbs_find_ber(const CBS *orig_in, int *ber_found, unsigned depth) {
   CBS in;
 
   if (depth > kMaxDepth) {
@@ -68,14 +68,11 @@
     unsigned tag;
     size_t header_len;
 
-    if (!CBS_get_any_ber_asn1_element(&in, &contents, &tag, &header_len)) {
+    if (!CBS_get_any_ber_asn1_element(&in, &contents, &tag, &header_len,
+                                      ber_found)) {
       return 0;
     }
-    if (CBS_len(&contents) == header_len &&
-        header_len > 0 &&
-        CBS_data(&contents)[header_len-1] == 0x80) {
-      // Found an indefinite-length element.
-      *ber_found = 1;
+    if (*ber_found) {
       return 1;
     }
     if (tag & CBS_ASN1_CONSTRUCTED) {
@@ -120,9 +117,11 @@
     CBS contents;
     unsigned tag, child_string_tag = string_tag;
     size_t header_len;
+    int ber_found;
     CBB *out_contents, out_contents_storage;
 
-    if (!CBS_get_any_ber_asn1_element(in, &contents, &tag, &header_len)) {
+    if (!CBS_get_any_ber_asn1_element(in, &contents, &tag, &header_len,
+                                      &ber_found)) {
       return 0;
     }
 
@@ -194,7 +193,7 @@
 
   // First, do a quick walk to find any indefinite-length elements. Most of the
   // time we hope that there aren't any and thus we can quickly return.
-  char conversion_needed;
+  int conversion_needed;
   if (!cbs_find_ber(in, &conversion_needed, 0)) {
     return 0;
   }
diff --git a/deps/boringssl/src/crypto/bytestring/bytestring_test.cc b/deps/boringssl/src/crypto/bytestring/bytestring_test.cc
index 93593e9..eafb0de 100644
--- a/deps/boringssl/src/crypto/bytestring/bytestring_test.cc
+++ b/deps/boringssl/src/crypto/bytestring/bytestring_test.cc
@@ -592,6 +592,10 @@
 TEST(CBSTest, BerConvert) {
   static const uint8_t kSimpleBER[] = {0x01, 0x01, 0x00};
 
+  // kNonMinimalLengthBER has a non-minimally encoded length.
+  static const uint8_t kNonMinimalLengthBER[] = {0x02, 0x82, 0x00, 0x01, 0x01};
+  static const uint8_t kNonMinimalLengthDER[] = {0x02, 0x01, 0x01};
+
   // kIndefBER contains a SEQUENCE with an indefinite length.
   static const uint8_t kIndefBER[] = {0x30, 0x80, 0x01, 0x01, 0x02, 0x00, 0x00};
   static const uint8_t kIndefDER[] = {0x30, 0x03, 0x01, 0x01, 0x02};
@@ -644,6 +648,9 @@
 
   ExpectBerConvert("kSimpleBER", kSimpleBER, sizeof(kSimpleBER), kSimpleBER,
                    sizeof(kSimpleBER));
+  ExpectBerConvert("kNonMinimalLengthBER", kNonMinimalLengthDER,
+                   sizeof(kNonMinimalLengthDER), kNonMinimalLengthBER,
+                   sizeof(kNonMinimalLengthBER));
   ExpectBerConvert("kIndefBER", kIndefDER, sizeof(kIndefDER), kIndefBER,
                    sizeof(kIndefBER));
   ExpectBerConvert("kIndefBER2", kIndefDER2, sizeof(kIndefDER2), kIndefBER2,
@@ -657,6 +664,62 @@
                    sizeof(kConstructedStringBER));
 }
 
+struct BERTest {
+  const char *in_hex;
+  bool ok;
+  bool ber_found;
+  unsigned tag;
+};
+
+static const BERTest kBERTests[] = {
+  // Trivial cases, also valid DER.
+  {"0000", true, false, 0},
+  {"0100", true, false, 1},
+  {"020101", true, false, 2},
+
+  // Non-minimally encoded lengths.
+  {"02810101", true, true, 2},
+  {"0282000101", true, true, 2},
+  {"028300000101", true, true, 2},
+  {"02840000000101", true, true, 2},
+  // Technically valid BER, but not handled.
+  {"02850000000101", false, false, 0},
+
+  {"0280", false, false, 0},  // Indefinite length, but not constructed.
+  {"2280", true, true, CBS_ASN1_CONSTRUCTED | 2},  // Indefinite length.
+  {"3f0000", false, false, 0},  // Invalid extended tag zero (X.690 8.1.2.4.2.c)
+  {"1f0100", false, false, 0},  // Should be a low-number tag form, even in BER.
+  {"1f4000", true, false, 0x40},
+  {"1f804000", false, false, 0},  // Non-minimal tags are invalid, even in BER.
+};
+
+TEST(CBSTest, BERElementTest) {
+  for (const auto &test : kBERTests) {
+    SCOPED_TRACE(test.in_hex);
+
+    std::vector<uint8_t> in_bytes;
+    ASSERT_TRUE(DecodeHex(&in_bytes, test.in_hex));
+    CBS in(in_bytes);
+    CBS out;
+    unsigned tag;
+    size_t header_len;
+    int ber_found;
+    int ok =
+        CBS_get_any_ber_asn1_element(&in, &out, &tag, &header_len, &ber_found);
+    ASSERT_TRUE((ok == 1) == test.ok);
+    if (!test.ok) {
+      continue;
+    }
+
+    EXPECT_TRUE((ber_found == 1) == test.ber_found);
+    EXPECT_LE(header_len, in_bytes.size());
+    EXPECT_EQ(CBS_len(&out), in_bytes.size());
+    EXPECT_EQ(CBS_len(&in), 0u);
+    EXPECT_EQ(Bytes(out), Bytes(in_bytes));
+    EXPECT_EQ(tag, test.tag);
+  }
+}
+
 struct ImplicitStringTest {
   const char *in;
   size_t in_len;
@@ -720,50 +783,65 @@
 struct ASN1InvalidUint64Test {
   const char *encoding;
   size_t encoding_len;
+  bool overflow;
 };
 
 static const ASN1InvalidUint64Test kASN1InvalidUint64Tests[] = {
     // Bad tag.
-    {"\x03\x01\x00", 3},
+    {"\x03\x01\x00", 3, false},
     // Empty contents.
-    {"\x02\x00", 2},
+    {"\x02\x00", 2, false},
     // Negative number.
-    {"\x02\x01\x80", 3},
+    {"\x02\x01\x80", 3, false},
     // Overflow.
-    {"\x02\x09\x01\x00\x00\x00\x00\x00\x00\x00\x00", 11},
+    {"\x02\x09\x01\x00\x00\x00\x00\x00\x00\x00\x00", 11, true},
     // Leading zeros.
-    {"\x02\x02\x00\x01", 4},
+    {"\x02\x02\x00\x01", 4, false},
 };
 
 TEST(CBSTest, ASN1Uint64) {
-  for (size_t i = 0; i < OPENSSL_ARRAY_SIZE(kASN1Uint64Tests); i++) {
-    SCOPED_TRACE(i);
-    const ASN1Uint64Test *test = &kASN1Uint64Tests[i];
+  for (const ASN1Uint64Test &test : kASN1Uint64Tests) {
+    SCOPED_TRACE(Bytes(test.encoding, test.encoding_len));
+    SCOPED_TRACE(test.value);
     CBS cbs;
     uint64_t value;
     uint8_t *out;
     size_t len;
 
-    CBS_init(&cbs, (const uint8_t *)test->encoding, test->encoding_len);
+    CBS_init(&cbs, (const uint8_t *)test.encoding, test.encoding_len);
     ASSERT_TRUE(CBS_get_asn1_uint64(&cbs, &value));
     EXPECT_EQ(0u, CBS_len(&cbs));
-    EXPECT_EQ(test->value, value);
+    EXPECT_EQ(test.value, value);
+
+    CBS child;
+    int is_negative;
+    CBS_init(&cbs, (const uint8_t *)test.encoding, test.encoding_len);
+    ASSERT_TRUE(CBS_get_asn1(&cbs, &child, CBS_ASN1_INTEGER));
+    EXPECT_TRUE(CBS_is_valid_asn1_integer(&child, &is_negative));
+    EXPECT_EQ(0, is_negative);
+    EXPECT_TRUE(CBS_is_unsigned_asn1_integer(&child));
 
     bssl::ScopedCBB cbb;
     ASSERT_TRUE(CBB_init(cbb.get(), 0));
-    ASSERT_TRUE(CBB_add_asn1_uint64(cbb.get(), test->value));
+    ASSERT_TRUE(CBB_add_asn1_uint64(cbb.get(), test.value));
     ASSERT_TRUE(CBB_finish(cbb.get(), &out, &len));
     bssl::UniquePtr<uint8_t> scoper(out);
-    EXPECT_EQ(Bytes(test->encoding, test->encoding_len), Bytes(out, len));
+    EXPECT_EQ(Bytes(test.encoding, test.encoding_len), Bytes(out, len));
   }
 
-  for (size_t i = 0; i < OPENSSL_ARRAY_SIZE(kASN1InvalidUint64Tests); i++) {
-    const ASN1InvalidUint64Test *test = &kASN1InvalidUint64Tests[i];
+  for (const ASN1InvalidUint64Test &test : kASN1InvalidUint64Tests) {
+    SCOPED_TRACE(Bytes(test.encoding, test.encoding_len));
     CBS cbs;
     uint64_t value;
 
-    CBS_init(&cbs, (const uint8_t *)test->encoding, test->encoding_len);
+    CBS_init(&cbs, (const uint8_t *)test.encoding, test.encoding_len);
     EXPECT_FALSE(CBS_get_asn1_uint64(&cbs, &value));
+
+    CBS_init(&cbs, (const uint8_t *)test.encoding, test.encoding_len);
+    CBS child;
+    if (CBS_get_asn1(&cbs, &child, CBS_ASN1_INTEGER)) {
+      EXPECT_EQ(test.overflow, !!CBS_is_unsigned_asn1_integer(&child));
+    }
   }
 }
 
@@ -793,50 +871,67 @@
 struct ASN1InvalidInt64Test {
   const char *encoding;
   size_t encoding_len;
+  bool overflow;
 };
 
 static const ASN1InvalidInt64Test kASN1InvalidInt64Tests[] = {
     // Bad tag.
-    {"\x03\x01\x00", 3},
+    {"\x03\x01\x00", 3, false},
     // Empty contents.
-    {"\x02\x00", 2},
+    {"\x02\x00", 2, false},
     // Overflow.
-    {"\x02\x09\x01\x00\x00\x00\x00\x00\x00\x00\x00", 11},
+    {"\x02\x09\x01\x00\x00\x00\x00\x00\x00\x00\x00", 11, true},
+    // Underflow.
+    {"\x02\x09\x08\xff\xff\xff\xff\xff\xff\xff\xff", 11, true},
     // Leading zeros.
-    {"\x02\x02\x00\x01", 4},
+    {"\x02\x02\x00\x01", 4, false},
     // Leading 0xff.
-    {"\x02\x02\xff\xff", 4},
+    {"\x02\x02\xff\xff", 4, false},
 };
 
 TEST(CBSTest, ASN1Int64) {
-  for (size_t i = 0; i < OPENSSL_ARRAY_SIZE(kASN1Int64Tests); i++) {
-    SCOPED_TRACE(i);
-    const ASN1Int64Test *test = &kASN1Int64Tests[i];
+  for (const ASN1Int64Test &test : kASN1Int64Tests) {
+    SCOPED_TRACE(Bytes(test.encoding, test.encoding_len));
+    SCOPED_TRACE(test.value);
     CBS cbs;
     int64_t value;
     uint8_t *out;
     size_t len;
 
-    CBS_init(&cbs, (const uint8_t *)test->encoding, test->encoding_len);
+    CBS_init(&cbs, (const uint8_t *)test.encoding, test.encoding_len);
     ASSERT_TRUE(CBS_get_asn1_int64(&cbs, &value));
     EXPECT_EQ(0u, CBS_len(&cbs));
-    EXPECT_EQ(test->value, value);
+    EXPECT_EQ(test.value, value);
+
+    CBS child;
+    int is_negative;
+    CBS_init(&cbs, (const uint8_t *)test.encoding, test.encoding_len);
+    ASSERT_TRUE(CBS_get_asn1(&cbs, &child, CBS_ASN1_INTEGER));
+    EXPECT_TRUE(CBS_is_valid_asn1_integer(&child, &is_negative));
+    EXPECT_EQ(test.value < 0, !!is_negative);
+    EXPECT_EQ(test.value >= 0, !!CBS_is_unsigned_asn1_integer(&child));
 
     bssl::ScopedCBB cbb;
     ASSERT_TRUE(CBB_init(cbb.get(), 0));
-    ASSERT_TRUE(CBB_add_asn1_int64(cbb.get(), test->value));
+    ASSERT_TRUE(CBB_add_asn1_int64(cbb.get(), test.value));
     ASSERT_TRUE(CBB_finish(cbb.get(), &out, &len));
     bssl::UniquePtr<uint8_t> scoper(out);
-    EXPECT_EQ(Bytes(test->encoding, test->encoding_len), Bytes(out, len));
+    EXPECT_EQ(Bytes(test.encoding, test.encoding_len), Bytes(out, len));
   }
 
-  for (size_t i = 0; i < OPENSSL_ARRAY_SIZE(kASN1InvalidInt64Tests); i++) {
-    const ASN1InvalidInt64Test *test = &kASN1InvalidInt64Tests[i];
+  for (const ASN1InvalidInt64Test &test : kASN1InvalidInt64Tests) {
+    SCOPED_TRACE(Bytes(test.encoding, test.encoding_len));
     CBS cbs;
     int64_t value;
 
-    CBS_init(&cbs, (const uint8_t *)test->encoding, test->encoding_len);
+    CBS_init(&cbs, (const uint8_t *)test.encoding, test.encoding_len);
     EXPECT_FALSE(CBS_get_asn1_int64(&cbs, &value));
+
+    CBS_init(&cbs, (const uint8_t *)test.encoding, test.encoding_len);
+    CBS child;
+    if (CBS_get_asn1(&cbs, &child, CBS_ASN1_INTEGER)) {
+      EXPECT_EQ(test.overflow, !!CBS_is_valid_asn1_integer(&child, NULL));
+    }
   }
 }
 
diff --git a/deps/boringssl/src/crypto/bytestring/cbs.c b/deps/boringssl/src/crypto/bytestring/cbs.c
index 49d7003..5590ec8 100644
--- a/deps/boringssl/src/crypto/bytestring/cbs.c
+++ b/deps/boringssl/src/crypto/bytestring/cbs.c
@@ -254,8 +254,7 @@
   //
   // If the number portion is 31 (0x1f, the largest value that fits in the
   // allotted bits), then the tag is more than one byte long and the
-  // continuation bytes contain the tag number. This parser only supports tag
-  // numbers less than 31 (and thus single-byte tags).
+  // continuation bytes contain the tag number.
   unsigned tag = ((unsigned)tag_byte & 0xe0) << CBS_ASN1_TAG_SHIFT;
   unsigned tag_number = tag_byte & 0x1f;
   if (tag_number == 0x1f) {
@@ -263,7 +262,7 @@
     if (!parse_base128_integer(cbs, &v) ||
         // Check the tag number is within our supported bounds.
         v > CBS_ASN1_TAG_NUMBER_MASK ||
-        // Small tag numbers should have used low tag number form.
+        // Small tag numbers should have used low tag number form, even in BER.
         v < 0x1f) {
       return 0;
     }
@@ -277,13 +276,17 @@
 }
 
 static int cbs_get_any_asn1_element(CBS *cbs, CBS *out, unsigned *out_tag,
-                                    size_t *out_header_len, int ber_ok) {
+                                    size_t *out_header_len, int *out_ber_found,
+                                    int ber_ok) {
   CBS header = *cbs;
   CBS throwaway;
 
   if (out == NULL) {
     out = &throwaway;
   }
+  if (ber_ok) {
+    *out_ber_found = 0;
+  }
 
   unsigned tag;
   if (!parse_asn1_tag(&header, &tag)) {
@@ -321,27 +324,38 @@
       if (out_header_len != NULL) {
         *out_header_len = header_len;
       }
+      *out_ber_found = 1;
       return CBS_get_bytes(cbs, out, header_len);
     }
 
     // ITU-T X.690 clause 8.1.3.5.c specifies that the value 0xff shall not be
     // used as the first byte of the length. If this parser encounters that
-    // value, num_bytes will be parsed as 127, which will fail the check below.
+    // value, num_bytes will be parsed as 127, which will fail this check.
     if (num_bytes == 0 || num_bytes > 4) {
       return 0;
     }
     if (!cbs_get_u(&header, &len64, num_bytes)) {
       return 0;
     }
-    // ITU-T X.690 section 10.1 (DER length forms) requires encoding the length
-    // with the minimum number of octets.
+    // ITU-T X.690 section 10.1 (DER length forms) requires encoding the
+    // length with the minimum number of octets. BER could, technically, have
+    // 125 superfluous zero bytes. We do not attempt to handle that and still
+    // require that the length fit in a |uint32_t| for BER.
     if (len64 < 128) {
       // Length should have used short-form encoding.
-      return 0;
+      if (ber_ok) {
+        *out_ber_found = 1;
+      } else {
+        return 0;
+      }
     }
-    if ((len64 >> ((num_bytes-1)*8)) == 0) {
+    if ((len64 >> ((num_bytes - 1) * 8)) == 0) {
       // Length should have been at least one byte shorter.
-      return 0;
+      if (ber_ok) {
+        *out_ber_found = 1;
+      } else {
+        return 0;
+      }
     }
     len = len64;
     if (len + header_len + num_bytes < len) {
@@ -374,13 +388,15 @@
 int CBS_get_any_asn1_element(CBS *cbs, CBS *out, unsigned *out_tag,
                                     size_t *out_header_len) {
   return cbs_get_any_asn1_element(cbs, out, out_tag, out_header_len,
-                                  0 /* DER only */);
+                                  NULL, 0 /* DER only */);
 }
 
 int CBS_get_any_ber_asn1_element(CBS *cbs, CBS *out, unsigned *out_tag,
-                                 size_t *out_header_len) {
-  return cbs_get_any_asn1_element(cbs, out, out_tag, out_header_len,
-                                  1 /* BER allowed */);
+                                 size_t *out_header_len, int *out_ber_found) {
+  int ber_found_temp;
+  return cbs_get_any_asn1_element(
+      cbs, out, out_tag, out_header_len,
+      out_ber_found ? out_ber_found : &ber_found_temp, 1 /* BER allowed */);
 }
 
 static int cbs_get_asn1(CBS *cbs, CBS *out, unsigned tag_value,
@@ -426,29 +442,14 @@
 
 int CBS_get_asn1_uint64(CBS *cbs, uint64_t *out) {
   CBS bytes;
-  if (!CBS_get_asn1(cbs, &bytes, CBS_ASN1_INTEGER)) {
+  if (!CBS_get_asn1(cbs, &bytes, CBS_ASN1_INTEGER) ||
+      !CBS_is_unsigned_asn1_integer(&bytes)) {
     return 0;
   }
 
   *out = 0;
   const uint8_t *data = CBS_data(&bytes);
   size_t len = CBS_len(&bytes);
-
-  if (len == 0) {
-    // An INTEGER is encoded with at least one octet.
-    return 0;
-  }
-
-  if ((data[0] & 0x80) != 0) {
-    // Negative number.
-    return 0;
-  }
-
-  if (data[0] == 0 && len > 1 && (data[1] & 0x80) == 0) {
-    // Extra leading zeros.
-    return 0;
-  }
-
   for (size_t i = 0; i < len; i++) {
     if ((*out >> 56) != 0) {
       // Too large to represent as a uint64_t.
@@ -462,31 +463,21 @@
 }
 
 int CBS_get_asn1_int64(CBS *cbs, int64_t *out) {
+  int is_negative;
   CBS bytes;
-  if (!CBS_get_asn1(cbs, &bytes, CBS_ASN1_INTEGER)) {
+  if (!CBS_get_asn1(cbs, &bytes, CBS_ASN1_INTEGER) ||
+      !CBS_is_valid_asn1_integer(&bytes, &is_negative)) {
     return 0;
   }
   const uint8_t *data = CBS_data(&bytes);
   const size_t len = CBS_len(&bytes);
-
-  if (len == 0 || len > sizeof(int64_t)) {
-    // An INTEGER is encoded with at least one octet.
+  if (len > sizeof(int64_t)) {
     return 0;
   }
-  if (len > 1) {
-    if (data[0] == 0 && (data[1] & 0x80) == 0) {
-      return 0;  // Extra leading zeros.
-    }
-    if (data[0] == 0xff && (data[1] & 0x80) != 0) {
-      return 0;  // Extra leading 0xff.
-    }
-  }
-
   union {
     int64_t i;
     uint8_t bytes[sizeof(int64_t)];
   } u;
-  const int is_negative = (data[0] & 0x80);
   memset(u.bytes, is_negative ? 0xff : 0, sizeof(u.bytes));  // Sign-extend.
   for (size_t i = 0; i < len; i++) {
     u.bytes[i] = data[len - i - 1];
@@ -635,6 +626,30 @@
          (CBS_data(cbs)[byte_num] & (1 << bit_num)) != 0;
 }
 
+int CBS_is_valid_asn1_integer(const CBS *cbs, int *out_is_negative) {
+  CBS copy = *cbs;
+  uint8_t first_byte, second_byte;
+  if (!CBS_get_u8(&copy, &first_byte)) {
+    return 0;  // INTEGERs may not be empty.
+  }
+  if (out_is_negative != NULL) {
+    *out_is_negative = (first_byte & 0x80) != 0;
+  }
+  if (!CBS_get_u8(&copy, &second_byte)) {
+    return 1;  // One byte INTEGERs are always minimal.
+  }
+  if ((first_byte == 0x00 && (second_byte & 0x80) == 0) ||
+      (first_byte == 0xff && (second_byte & 0x80) != 0)) {
+    return 0;  // The value is minimal iff the first 9 bits are not all equal.
+  }
+  return 1;
+}
+
+int CBS_is_unsigned_asn1_integer(const CBS *cbs) {
+  int is_negative;
+  return CBS_is_valid_asn1_integer(cbs, &is_negative) && !is_negative;
+}
+
 static int add_decimal(CBB *out, uint64_t v) {
   char buf[DECIMAL_SIZE(uint64_t) + 1];
   BIO_snprintf(buf, sizeof(buf), "%" PRIu64, v);
diff --git a/deps/boringssl/src/crypto/cipher_extra/aead_test.cc b/deps/boringssl/src/crypto/cipher_extra/aead_test.cc
index fdf71b0..bf02e78 100644
--- a/deps/boringssl/src/crypto/cipher_extra/aead_test.cc
+++ b/deps/boringssl/src/crypto/cipher_extra/aead_test.cc
@@ -24,79 +24,148 @@
 #include <openssl/err.h>
 
 #include "../fipsmodule/cipher/internal.h"
+#include "internal.h"
 #include "../internal.h"
 #include "../test/abi_test.h"
 #include "../test/file_test.h"
 #include "../test/test_util.h"
 #include "../test/wycheproof_util.h"
 
+// kLimitedImplementation indicates that tests that assume a generic AEAD
+// interface should not be performed. For example, the key-wrap AEADs only
+// handle inputs that are a multiple of eight bytes in length and the TLS CBC
+// AEADs have the concept of “direction”.
+constexpr uint32_t kLimitedImplementation = 1 << 0;
+// kCanTruncateTags indicates that the AEAD supports truncatating tags to
+// arbitrary lengths.
+constexpr uint32_t kCanTruncateTags = 1 << 1;
+// kVariableNonce indicates that the AEAD supports a variable-length nonce.
+constexpr uint32_t kVariableNonce = 1 << 2;
+// kNondeterministic indicates that the AEAD performs randomised encryption thus
+// one cannot assume that encrypting the same data will result in the same
+// ciphertext.
+constexpr uint32_t kNondeterministic = 1 << 7;
+
+// RequiresADLength encodes an AD length requirement into flags.
+constexpr uint32_t RequiresADLength(size_t length) {
+  // If we had a more recent C++ version we could assert that the length is
+  // sufficiently small with:
+  //
+  // if (length >= 16) {
+  //  __builtin_unreachable();
+  // }
+  return (length & 0xf) << 3;
+}
+
+// RequiredADLength returns the AD length requirement encoded in |flags|, or
+// zero if there isn't one.
+constexpr size_t RequiredADLength(uint32_t flags) {
+  return (flags >> 3) & 0xf;
+}
+
+constexpr uint32_t RequiresMinimumTagLength(size_t length) {
+  // See above for statically checking the size at compile time with future C++
+  // versions.
+  return (length & 0xf) << 8;
+}
+
+constexpr size_t MinimumTagLength(uint32_t flags) {
+  return ((flags >> 8) & 0xf) == 0 ? 1 : ((flags >> 8) & 0xf);
+}
 
 struct KnownAEAD {
   const char name[40];
   const EVP_AEAD *(*func)(void);
   const char *test_vectors;
-  // limited_implementation indicates that tests that assume a generic AEAD
-  // interface should not be performed. For example, the key-wrap AEADs only
-  // handle inputs that are a multiple of eight bytes in length and the TLS CBC
-  // AEADs have the concept of “direction”.
-  bool limited_implementation;
-  // truncated_tags is true if the AEAD supports truncating tags to arbitrary
-  // lengths.
-  bool truncated_tags;
-  // variable_nonce is true if the AEAD supports a variable nonce length.
-  bool variable_nonce;
-  // ad_len, if non-zero, is the required length of the AD.
-  size_t ad_len;
+  uint32_t flags;
 };
 
 static const struct KnownAEAD kAEADs[] = {
-    {"AES_128_GCM", EVP_aead_aes_128_gcm, "aes_128_gcm_tests.txt", false, true,
-     true, 0},
+    {"AES_128_GCM", EVP_aead_aes_128_gcm, "aes_128_gcm_tests.txt",
+     kCanTruncateTags | kVariableNonce},
+
     {"AES_128_GCM_NIST", EVP_aead_aes_128_gcm, "nist_cavp/aes_128_gcm.txt",
-     false, true, true, 0},
-    {"AES_192_GCM", EVP_aead_aes_192_gcm, "aes_192_gcm_tests.txt", false, true,
-     true, 0},
-    {"AES_256_GCM", EVP_aead_aes_256_gcm, "aes_256_gcm_tests.txt", false, true,
-     true, 0},
+     kCanTruncateTags | kVariableNonce},
+
+    {"AES_192_GCM", EVP_aead_aes_192_gcm, "aes_192_gcm_tests.txt",
+     kCanTruncateTags | kVariableNonce},
+
+    {"AES_256_GCM", EVP_aead_aes_256_gcm, "aes_256_gcm_tests.txt",
+     kCanTruncateTags | kVariableNonce},
+
     {"AES_256_GCM_NIST", EVP_aead_aes_256_gcm, "nist_cavp/aes_256_gcm.txt",
-     false, true, true, 0},
+     kCanTruncateTags | kVariableNonce},
+
     {"AES_128_GCM_SIV", EVP_aead_aes_128_gcm_siv, "aes_128_gcm_siv_tests.txt",
-     false, false, false, 0},
+     0},
+
     {"AES_256_GCM_SIV", EVP_aead_aes_256_gcm_siv, "aes_256_gcm_siv_tests.txt",
-     false, false, false, 0},
+     0},
+
+    {"AES_128_GCM_RandomNonce", EVP_aead_aes_128_gcm_randnonce,
+     "aes_128_gcm_randnonce_tests.txt",
+     kNondeterministic | kCanTruncateTags | RequiresMinimumTagLength(13)},
+
+    {"AES_256_GCM_RandomNonce", EVP_aead_aes_256_gcm_randnonce,
+     "aes_256_gcm_randnonce_tests.txt",
+     kNondeterministic | kCanTruncateTags | RequiresMinimumTagLength(13)},
+
     {"ChaCha20Poly1305", EVP_aead_chacha20_poly1305,
-     "chacha20_poly1305_tests.txt", false, true, false, 0},
+     "chacha20_poly1305_tests.txt", kCanTruncateTags},
+
     {"XChaCha20Poly1305", EVP_aead_xchacha20_poly1305,
-     "xchacha20_poly1305_tests.txt", false, true, false, 0},
+     "xchacha20_poly1305_tests.txt", kCanTruncateTags},
+
     {"AES_128_CBC_SHA1_TLS", EVP_aead_aes_128_cbc_sha1_tls,
-     "aes_128_cbc_sha1_tls_tests.txt", true, false, false, 11},
+     "aes_128_cbc_sha1_tls_tests.txt",
+     kLimitedImplementation | RequiresADLength(11)},
+
     {"AES_128_CBC_SHA1_TLSImplicitIV",
      EVP_aead_aes_128_cbc_sha1_tls_implicit_iv,
-     "aes_128_cbc_sha1_tls_implicit_iv_tests.txt", true, false, false, 11},
+     "aes_128_cbc_sha1_tls_implicit_iv_tests.txt",
+     kLimitedImplementation | RequiresADLength(11)},
+
     {"AES_128_CBC_SHA256_TLS", EVP_aead_aes_128_cbc_sha256_tls,
-     "aes_128_cbc_sha256_tls_tests.txt", true, false, false, 11},
+     "aes_128_cbc_sha256_tls_tests.txt",
+     kLimitedImplementation | RequiresADLength(11)},
+
     {"AES_256_CBC_SHA1_TLS", EVP_aead_aes_256_cbc_sha1_tls,
-     "aes_256_cbc_sha1_tls_tests.txt", true, false, false, 11},
+     "aes_256_cbc_sha1_tls_tests.txt",
+     kLimitedImplementation | RequiresADLength(11)},
+
     {"AES_256_CBC_SHA1_TLSImplicitIV",
      EVP_aead_aes_256_cbc_sha1_tls_implicit_iv,
-     "aes_256_cbc_sha1_tls_implicit_iv_tests.txt", true, false, false, 11},
+     "aes_256_cbc_sha1_tls_implicit_iv_tests.txt",
+     kLimitedImplementation | RequiresADLength(11)},
+
     {"AES_256_CBC_SHA256_TLS", EVP_aead_aes_256_cbc_sha256_tls,
-     "aes_256_cbc_sha256_tls_tests.txt", true, false, false, 11},
+     "aes_256_cbc_sha256_tls_tests.txt",
+     kLimitedImplementation | RequiresADLength(11)},
+
     {"AES_256_CBC_SHA384_TLS", EVP_aead_aes_256_cbc_sha384_tls,
-     "aes_256_cbc_sha384_tls_tests.txt", true, false, false, 11},
+     "aes_256_cbc_sha384_tls_tests.txt",
+     kLimitedImplementation | RequiresADLength(11)},
+
     {"DES_EDE3_CBC_SHA1_TLS", EVP_aead_des_ede3_cbc_sha1_tls,
-     "des_ede3_cbc_sha1_tls_tests.txt", true, false, false, 11},
+     "des_ede3_cbc_sha1_tls_tests.txt",
+     kLimitedImplementation | RequiresADLength(11)},
+
     {"DES_EDE3_CBC_SHA1_TLSImplicitIV",
      EVP_aead_des_ede3_cbc_sha1_tls_implicit_iv,
-     "des_ede3_cbc_sha1_tls_implicit_iv_tests.txt", true, false, false, 11},
+     "des_ede3_cbc_sha1_tls_implicit_iv_tests.txt",
+     kLimitedImplementation | RequiresADLength(11)},
+
     {"AES_128_CTR_HMAC_SHA256", EVP_aead_aes_128_ctr_hmac_sha256,
-     "aes_128_ctr_hmac_sha256.txt", false, true, false, 0},
+     "aes_128_ctr_hmac_sha256.txt", kCanTruncateTags},
+
     {"AES_256_CTR_HMAC_SHA256", EVP_aead_aes_256_ctr_hmac_sha256,
-     "aes_256_ctr_hmac_sha256.txt", false, true, false, 0},
+     "aes_256_ctr_hmac_sha256.txt", kCanTruncateTags},
+
     {"AES_128_CCM_BLUETOOTH", EVP_aead_aes_128_ccm_bluetooth,
-     "aes_128_ccm_bluetooth_tests.txt", false, false, false, 0},
+     "aes_128_ccm_bluetooth_tests.txt", 0},
+
     {"AES_128_CCM_BLUETOOTH_8", EVP_aead_aes_128_ccm_bluetooth_8,
-     "aes_128_ccm_bluetooth_8_tests.txt", false, false, false, 0},
+     "aes_128_ccm_bluetooth_8_tests.txt", 0},
 };
 
 class PerAEADTest : public testing::TestWithParam<KnownAEAD> {
@@ -143,7 +212,8 @@
         ctx.get(), aead(), key.data(), key.size(), tag_len, evp_aead_seal));
 
     std::vector<uint8_t> out(in.size() + EVP_AEAD_max_overhead(aead()));
-    if (!t->HasAttribute("NO_SEAL")) {
+    if (!t->HasAttribute("NO_SEAL") &&
+        !(GetParam().flags & kNondeterministic)) {
       size_t out_len;
       ASSERT_TRUE(EVP_AEAD_CTX_seal(ctx.get(), out.data(), &out_len, out.size(),
                                     nonce.data(), nonce.size(), in.data(),
@@ -223,7 +293,8 @@
       "crypto/cipher_extra/test/" + std::string(aead_config.test_vectors);
   FileTestGTest(test_vectors.c_str(), [&](FileTest *t) {
     if (t->HasAttribute("NO_SEAL") ||
-        t->HasAttribute("FAILS")) {
+        t->HasAttribute("FAILS") ||
+        (aead_config.flags & kNondeterministic)) {
       t->SkipCurrent();
       return;
     }
@@ -291,7 +362,8 @@
 
     std::vector<uint8_t> out(in.size());
     std::vector<uint8_t> out_tag(EVP_AEAD_max_overhead(aead()));
-    if (!t->HasAttribute("NO_SEAL")) {
+    if (!t->HasAttribute("NO_SEAL") &&
+        !(aead_config.flags & kNondeterministic)) {
       size_t out_tag_len;
       ASSERT_TRUE(EVP_AEAD_CTX_seal_scatter(
           ctx.get(), out.data(), out_tag.data(), &out_tag_len, out_tag.size(),
@@ -409,7 +481,7 @@
 }
 
 TEST_P(PerAEADTest, TruncatedTags) {
-  if (!GetParam().truncated_tags) {
+  if (!(GetParam().flags & kCanTruncateTags)) {
     return;
   }
 
@@ -423,9 +495,10 @@
   const size_t nonce_len = EVP_AEAD_nonce_length(aead());
   ASSERT_GE(sizeof(nonce), nonce_len);
 
+  const size_t tag_len = MinimumTagLength(GetParam().flags);
   bssl::ScopedEVP_AEAD_CTX ctx;
   ASSERT_TRUE(EVP_AEAD_CTX_init(ctx.get(), aead(), key, key_len,
-                                1 /* one byte tag */, NULL /* ENGINE */));
+                                tag_len, NULL /* ENGINE */));
 
   const uint8_t plaintext[1] = {'A'};
 
@@ -446,7 +519,7 @@
 
   const size_t overhead_used = ciphertext_len - sizeof(plaintext);
   const size_t expected_overhead =
-      1 + EVP_AEAD_max_overhead(aead()) - EVP_AEAD_max_tag_len(aead());
+      tag_len + EVP_AEAD_max_overhead(aead()) - EVP_AEAD_max_tag_len(aead());
   EXPECT_EQ(overhead_used, expected_overhead)
       << "AEAD is probably ignoring request to truncate tags.";
 
@@ -469,7 +542,7 @@
 }
 
 TEST_P(PerAEADTest, AliasedBuffers) {
-  if (GetParam().limited_implementation) {
+  if (GetParam().flags & kLimitedImplementation) {
     return;
   }
 
@@ -532,8 +605,11 @@
   ASSERT_TRUE(EVP_AEAD_CTX_seal(ctx.get(), in, &out_len,
                                 sizeof(kPlaintext) + max_overhead, nonce.data(),
                                 nonce_len, in, sizeof(kPlaintext), nullptr, 0));
-  EXPECT_EQ(Bytes(valid_encryption.data(), valid_encryption_len),
-            Bytes(in, out_len));
+
+  if (!(GetParam().flags & kNondeterministic)) {
+    EXPECT_EQ(Bytes(valid_encryption.data(), valid_encryption_len),
+              Bytes(in, out_len));
+  }
 
   OPENSSL_memcpy(in, valid_encryption.data(), valid_encryption_len);
   ASSERT_TRUE(EVP_AEAD_CTX_open(ctx.get(), in, &out_len, valid_encryption_len,
@@ -555,8 +631,9 @@
   ASSERT_GE(sizeof(key) - 1, key_len);
   const size_t nonce_len = EVP_AEAD_nonce_length(aead());
   ASSERT_GE(sizeof(nonce) - 1, nonce_len);
-  const size_t ad_len =
-      GetParam().ad_len != 0 ? GetParam().ad_len : sizeof(ad) - 1;
+  const size_t ad_len = RequiredADLength(GetParam().flags) != 0
+                            ? RequiredADLength(GetParam().flags)
+                            : sizeof(ad) - 1;
   ASSERT_GE(sizeof(ad) - 1, ad_len);
 
   // Encrypt some input.
@@ -619,7 +696,7 @@
     // variable-length nonces, it does not allow the empty nonce.
     nonce_lens.push_back(0);
   }
-  if (!GetParam().variable_nonce) {
+  if (!(GetParam().flags & kVariableNonce)) {
     nonce_lens.push_back(valid_nonce_len + 1);
     if (valid_nonce_len != 0) {
       nonce_lens.push_back(valid_nonce_len - 1);
@@ -627,7 +704,9 @@
   }
 
   static const uint8_t kZeros[EVP_AEAD_MAX_KEY_LENGTH] = {0};
-  const size_t ad_len = GetParam().ad_len != 0 ? GetParam().ad_len : 16;
+  const size_t ad_len = RequiredADLength(GetParam().flags) != 0
+                            ? RequiredADLength(GetParam().flags)
+                            : 16;
   ASSERT_LE(ad_len, sizeof(kZeros));
 
   for (size_t nonce_len : nonce_lens) {
@@ -723,9 +802,10 @@
   alignas(2) uint8_t ad_buf[512];
   OPENSSL_memset(ad_buf, 'A', sizeof(ad_buf));
   const uint8_t *const ad = ad_buf + 1;
-  ASSERT_LE(GetParam().ad_len, sizeof(ad_buf) - 1);
-  const size_t ad_len =
-      GetParam().ad_len != 0 ? GetParam().ad_len : sizeof(ad_buf) - 1;
+  ASSERT_LE(RequiredADLength(GetParam().flags), sizeof(ad_buf) - 1);
+  const size_t ad_len = RequiredADLength(GetParam().flags) != 0
+                            ? RequiredADLength(GetParam().flags)
+                            : sizeof(ad_buf) - 1;
 
   uint8_t nonce[EVP_AEAD_MAX_NONCE_LENGTH];
   const size_t nonce_len = EVP_AEAD_nonce_length(aead());
@@ -748,6 +828,27 @@
   EXPECT_EQ(Bytes(plaintext + 1, sizeof(plaintext) - 1),
             Bytes(plaintext2 + 1, plaintext2_len));
 }
+
+TEST(ChaChaPoly1305Test, ABI) {
+  if (!chacha20_poly1305_asm_capable()) {
+    return;
+  }
+
+  std::unique_ptr<uint8_t[]> buf(new uint8_t[1024]);
+  for (size_t len = 0; len <= 1024; len += 5) {
+    SCOPED_TRACE(len);
+    union chacha20_poly1305_open_data open_ctx = {};
+    CHECK_ABI(chacha20_poly1305_open, buf.get(), buf.get(), len, buf.get(),
+              len % 128, &open_ctx);
+  }
+
+  for (size_t len = 0; len <= 1024; len += 5) {
+    SCOPED_TRACE(len);
+    union chacha20_poly1305_seal_data seal_ctx = {};
+    CHECK_ABI(chacha20_poly1305_seal, buf.get(), buf.get(), len, buf.get(),
+              len % 128, &seal_ctx);
+  }
+}
 #endif  // SUPPORTS_ABI_TEST
 
 TEST(AEADTest, AESCCMLargeAD) {
diff --git a/deps/boringssl/src/crypto/cipher_extra/e_chacha20poly1305.c b/deps/boringssl/src/crypto/cipher_extra/e_chacha20poly1305.c
index 1c175e9..1650188 100644
--- a/deps/boringssl/src/crypto/cipher_extra/e_chacha20poly1305.c
+++ b/deps/boringssl/src/crypto/cipher_extra/e_chacha20poly1305.c
@@ -18,18 +18,15 @@
 
 #include <openssl/chacha.h>
 #include <openssl/cipher.h>
-#include <openssl/cpu.h>
 #include <openssl/err.h>
 #include <openssl/mem.h>
 #include <openssl/poly1305.h>
 #include <openssl/type_check.h>
 
+#include "internal.h"
+#include "../chacha/internal.h"
 #include "../fipsmodule/cipher/internal.h"
 #include "../internal.h"
-#include "../chacha/internal.h"
-
-
-#define POLY1305_TAG_LEN 16
 
 struct aead_chacha20_poly1305_ctx {
   uint8_t key[32];
@@ -44,78 +41,6 @@
                       "AEAD state has insufficient alignment");
 #endif
 
-// For convenience (the x86_64 calling convention allows only six parameters in
-// registers), the final parameter for the assembly functions is both an input
-// and output parameter.
-union open_data {
-  struct {
-    alignas(16) uint8_t key[32];
-    uint32_t counter;
-    uint8_t nonce[12];
-  } in;
-  struct {
-    uint8_t tag[POLY1305_TAG_LEN];
-  } out;
-};
-
-union seal_data {
-  struct {
-    alignas(16) uint8_t key[32];
-    uint32_t counter;
-    uint8_t nonce[12];
-    const uint8_t *extra_ciphertext;
-    size_t extra_ciphertext_len;
-  } in;
-  struct {
-    uint8_t tag[POLY1305_TAG_LEN];
-  } out;
-};
-
-#if defined(OPENSSL_X86_64) && !defined(OPENSSL_NO_ASM) && \
-    !defined(OPENSSL_WINDOWS)
-static int asm_capable(void) {
-  const int sse41_capable = (OPENSSL_ia32cap_P[1] & (1 << 19)) != 0;
-  return sse41_capable;
-}
-
-OPENSSL_STATIC_ASSERT(sizeof(union open_data) == 48, "wrong open_data size");
-OPENSSL_STATIC_ASSERT(sizeof(union seal_data) == 48 + 8 + 8,
-                      "wrong seal_data size");
-
-// chacha20_poly1305_open is defined in chacha20_poly1305_x86_64.pl. It decrypts
-// |plaintext_len| bytes from |ciphertext| and writes them to |out_plaintext|.
-// Additional input parameters are passed in |aead_data->in|. On exit, it will
-// write calculated tag value to |aead_data->out.tag|, which the caller must
-// check.
-extern void chacha20_poly1305_open(uint8_t *out_plaintext,
-                                   const uint8_t *ciphertext,
-                                   size_t plaintext_len, const uint8_t *ad,
-                                   size_t ad_len, union open_data *aead_data);
-
-// chacha20_poly1305_open is defined in chacha20_poly1305_x86_64.pl. It encrypts
-// |plaintext_len| bytes from |plaintext| and writes them to |out_ciphertext|.
-// Additional input parameters are passed in |aead_data->in|. The calculated tag
-// value is over the computed ciphertext concatenated with |extra_ciphertext|
-// and written to |aead_data->out.tag|.
-extern void chacha20_poly1305_seal(uint8_t *out_ciphertext,
-                                   const uint8_t *plaintext,
-                                   size_t plaintext_len, const uint8_t *ad,
-                                   size_t ad_len, union seal_data *aead_data);
-#else
-static int asm_capable(void) { return 0; }
-
-
-static void chacha20_poly1305_open(uint8_t *out_plaintext,
-                                   const uint8_t *ciphertext,
-                                   size_t plaintext_len, const uint8_t *ad,
-                                   size_t ad_len, union open_data *aead_data) {}
-
-static void chacha20_poly1305_seal(uint8_t *out_ciphertext,
-                                   const uint8_t *plaintext,
-                                   size_t plaintext_len, const uint8_t *ad,
-                                   size_t ad_len, union seal_data *aead_data) {}
-#endif
-
 static int aead_chacha20_poly1305_init(EVP_AEAD_CTX *ctx, const uint8_t *key,
                                        size_t key_len, size_t tag_len) {
   struct aead_chacha20_poly1305_ctx *c20_ctx =
@@ -238,8 +163,8 @@
     }
   }
 
-  union seal_data data;
-  if (asm_capable()) {
+  union chacha20_poly1305_seal_data data;
+  if (chacha20_poly1305_asm_capable()) {
     OPENSSL_memcpy(data.in.key, key, 32);
     data.in.counter = 0;
     OPENSSL_memcpy(data.in.nonce, nonce, 12);
@@ -321,8 +246,8 @@
     return 0;
   }
 
-  union open_data data;
-  if (asm_capable()) {
+  union chacha20_poly1305_open_data data;
+  if (chacha20_poly1305_asm_capable()) {
     OPENSSL_memcpy(data.in.key, key, 32);
     data.in.counter = 0;
     OPENSSL_memcpy(data.in.nonce, nonce, 12);
diff --git a/deps/boringssl/src/crypto/cipher_extra/internal.h b/deps/boringssl/src/crypto/cipher_extra/internal.h
index 1d2c4e1..c2af48e 100644
--- a/deps/boringssl/src/crypto/cipher_extra/internal.h
+++ b/deps/boringssl/src/crypto/cipher_extra/internal.h
@@ -57,7 +57,11 @@
 #ifndef OPENSSL_HEADER_CIPHER_EXTRA_INTERNAL_H
 #define OPENSSL_HEADER_CIPHER_EXTRA_INTERNAL_H
 
+#include <stdlib.h>
+
 #include <openssl/base.h>
+#include <openssl/cpu.h>
+#include <openssl/type_check.h>
 
 #include "../internal.h"
 
@@ -120,6 +124,89 @@
                               const uint8_t *mac_secret,
                               unsigned mac_secret_length);
 
+#define POLY1305_TAG_LEN 16
+
+// For convenience (the x86_64 calling convention allows only six parameters in
+// registers), the final parameter for the assembly functions is both an input
+// and output parameter.
+union chacha20_poly1305_open_data {
+  struct {
+    alignas(16) uint8_t key[32];
+    uint32_t counter;
+    uint8_t nonce[12];
+  } in;
+  struct {
+    uint8_t tag[POLY1305_TAG_LEN];
+  } out;
+};
+
+union chacha20_poly1305_seal_data {
+  struct {
+    alignas(16) uint8_t key[32];
+    uint32_t counter;
+    uint8_t nonce[12];
+    const uint8_t *extra_ciphertext;
+    size_t extra_ciphertext_len;
+  } in;
+  struct {
+    uint8_t tag[POLY1305_TAG_LEN];
+  } out;
+};
+
+#if defined(OPENSSL_X86_64) && !defined(OPENSSL_NO_ASM)
+
+OPENSSL_STATIC_ASSERT(sizeof(union chacha20_poly1305_open_data) == 48,
+                      "wrong chacha20_poly1305_open_data size");
+OPENSSL_STATIC_ASSERT(sizeof(union chacha20_poly1305_seal_data) == 48 + 8 + 8,
+                      "wrong chacha20_poly1305_seal_data size");
+
+OPENSSL_INLINE int chacha20_poly1305_asm_capable(void) {
+  const int sse41_capable = (OPENSSL_ia32cap_P[1] & (1 << 19)) != 0;
+  return sse41_capable;
+}
+
+// chacha20_poly1305_open is defined in chacha20_poly1305_x86_64.pl. It decrypts
+// |plaintext_len| bytes from |ciphertext| and writes them to |out_plaintext|.
+// Additional input parameters are passed in |aead_data->in|. On exit, it will
+// write calculated tag value to |aead_data->out.tag|, which the caller must
+// check.
+extern void chacha20_poly1305_open(uint8_t *out_plaintext,
+                                   const uint8_t *ciphertext,
+                                   size_t plaintext_len, const uint8_t *ad,
+                                   size_t ad_len,
+                                   union chacha20_poly1305_open_data *data);
+
+// chacha20_poly1305_open is defined in chacha20_poly1305_x86_64.pl. It encrypts
+// |plaintext_len| bytes from |plaintext| and writes them to |out_ciphertext|.
+// Additional input parameters are passed in |aead_data->in|. The calculated tag
+// value is over the computed ciphertext concatenated with |extra_ciphertext|
+// and written to |aead_data->out.tag|.
+extern void chacha20_poly1305_seal(uint8_t *out_ciphertext,
+                                   const uint8_t *plaintext,
+                                   size_t plaintext_len, const uint8_t *ad,
+                                   size_t ad_len,
+                                   union chacha20_poly1305_seal_data *data);
+#else
+
+OPENSSL_INLINE int chacha20_poly1305_asm_capable(void) { return 0; }
+
+OPENSSL_INLINE void chacha20_poly1305_open(uint8_t *out_plaintext,
+                                   const uint8_t *ciphertext,
+                                   size_t plaintext_len, const uint8_t *ad,
+                                   size_t ad_len,
+                                   union chacha20_poly1305_open_data *data) {
+  abort();
+}
+
+OPENSSL_INLINE void chacha20_poly1305_seal(uint8_t *out_ciphertext,
+                                   const uint8_t *plaintext,
+                                   size_t plaintext_len, const uint8_t *ad,
+                                   size_t ad_len,
+                                   union chacha20_poly1305_seal_data *data) {
+  abort();
+}
+#endif
+
 
 #if defined(__cplusplus)
 }  // extern C
diff --git a/deps/boringssl/src/crypto/cpu-aarch64-win.c b/deps/boringssl/src/crypto/cpu-aarch64-win.c
new file mode 100644
index 0000000..ee7f8e0
--- /dev/null
+++ b/deps/boringssl/src/crypto/cpu-aarch64-win.c
@@ -0,0 +1,41 @@
+/* Copyright (c) 2018, Google Inc.
+ * Copyright (c) 2020, Arm Ltd.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#include <openssl/cpu.h>
+
+#if defined(OPENSSL_AARCH64) && defined(OPENSSL_WINDOWS) && \
+    !defined(OPENSSL_STATIC_ARMCAP)
+
+#include <windows.h>
+
+#include <openssl/arm_arch.h>
+
+#include "internal.h"
+
+extern uint32_t OPENSSL_armcap_P;
+void OPENSSL_cpuid_setup(void) {
+  // We do not need to check for the presence of NEON, as Armv8-A always has it
+  OPENSSL_armcap_P |= ARMV7_NEON;
+
+  if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) {
+    // These are all covered by one call in Windows
+    OPENSSL_armcap_P |= ARMV8_AES;
+    OPENSSL_armcap_P |= ARMV8_PMULL;
+    OPENSSL_armcap_P |= ARMV8_SHA1;
+    OPENSSL_armcap_P |= ARMV8_SHA256;
+  }
+}
+
+#endif
diff --git a/deps/boringssl/src/crypto/crypto_test.cc b/deps/boringssl/src/crypto/crypto_test.cc
new file mode 100644
index 0000000..f6c2374
--- /dev/null
+++ b/deps/boringssl/src/crypto/crypto_test.cc
@@ -0,0 +1,35 @@
+/* Copyright (c) 2020, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#include <stdio.h>
+#include <string.h>
+
+#include <string>
+
+#include <openssl/base.h>
+#include <openssl/crypto.h>
+
+#include <gtest/gtest.h>
+
+// Test that OPENSSL_VERSION_NUMBER and OPENSSL_VERSION_TEXT are consistent.
+// Node.js parses the version out of OPENSSL_VERSION_TEXT instead of using
+// OPENSSL_VERSION_NUMBER.
+TEST(CryptoTest, Version) {
+  char expected[512];
+  snprintf(expected, sizeof(expected), "OpenSSL %d.%d.%d ",
+           OPENSSL_VERSION_NUMBER >> 28, (OPENSSL_VERSION_NUMBER >> 20) & 0xff,
+           (OPENSSL_VERSION_NUMBER >> 12) & 0xff);
+  EXPECT_EQ(expected,
+            std::string(OPENSSL_VERSION_TEXT).substr(0, strlen(expected)));
+}
diff --git a/deps/boringssl/src/crypto/curve25519/ed25519_test.cc b/deps/boringssl/src/crypto/curve25519/ed25519_test.cc
index 4f34675..d56abe6 100644
--- a/deps/boringssl/src/crypto/curve25519/ed25519_test.cc
+++ b/deps/boringssl/src/crypto/curve25519/ed25519_test.cc
@@ -64,6 +64,48 @@
   };
 
   EXPECT_FALSE(ED25519_verify(kMsg, sizeof(kMsg), kSig, kPub));
+
+  // The following inputs try to exercise the boundaries of the order check,
+  // where s is near the order above and below. EdDSA hashes the public key with
+  // the message, which frustrates constructing actual boundary cases. Instead,
+  // these inputs were found by randomly generating signatures. kSigValid had
+  // the highest s value. kSigInvalid had the lowest s value, and then the order
+  // was added.
+  //
+  // This isn't ideal, but it is sensitive to the most significant 32 bits.
+  //
+  // The private key seed for kPub2 is
+  // a59a4130fcfd293c9737db8f14177ce034305cf34bdc4346f24b4d262e07b5c2.
+  static const uint8_t kPub2[] = {
+      0x10, 0x0f, 0xdf, 0x47, 0xfb, 0x94, 0xf1, 0x53, 0x6a, 0x4f, 0x7c,
+      0x3f, 0xda, 0x27, 0x38, 0x3f, 0xa0, 0x33, 0x75, 0xa8, 0xf5, 0x27,
+      0xc5, 0x37, 0xe6, 0xf1, 0x70, 0x3c, 0x47, 0xf9, 0x4f, 0x86};
+  static const uint8_t kMsgValid[] = {
+      0x12, 0x4e, 0x58, 0x3f, 0x8b, 0x8e, 0xca, 0x58, 0xbb, 0x29, 0xc2,
+      0x71, 0xb4, 0x1d, 0x36, 0x98, 0x6b, 0xbc, 0x45, 0x54, 0x1f, 0x8e,
+      0x51, 0xf9, 0xcb, 0x01, 0x33, 0xec, 0xa4, 0x47, 0x60, 0x1e};
+  static const uint8_t kSigValid[] = {
+      0xda, 0xc1, 0x19, 0xd6, 0xca, 0x87, 0xfc, 0x59, 0xae, 0x61, 0x1c,
+      0x15, 0x70, 0x48, 0xf4, 0xd4, 0xfc, 0x93, 0x2a, 0x14, 0x9d, 0xbe,
+      0x20, 0xec, 0x6e, 0xff, 0xd1, 0x43, 0x6a, 0xbf, 0x83, 0xea, 0x05,
+      0xc7, 0xdf, 0x0f, 0xef, 0x06, 0x14, 0x72, 0x41, 0x25, 0x91, 0x13,
+      0x90, 0x9b, 0xc7, 0x1b, 0xd3, 0xc5, 0x3b, 0xa4, 0x46, 0x4f, 0xfc,
+      0xad, 0x3c, 0x09, 0x68, 0xf2, 0xff, 0xff, 0xff, 0x0f};
+  static const uint8_t kMsgInvalid[] = {
+      0x6a, 0x0b, 0xc2, 0xb0, 0x05, 0x7c, 0xed, 0xfc, 0x0f, 0xa2, 0xe3,
+      0xf7, 0xf7, 0xd3, 0x92, 0x79, 0xb3, 0x0f, 0x45, 0x4a, 0x69, 0xdf,
+      0xd1, 0x11, 0x7c, 0x75, 0x8d, 0x86, 0xb1, 0x9d, 0x85, 0xe0};
+  static const uint8_t kSigInvalid[] = {
+      0x09, 0x71, 0xf8, 0x6d, 0x2c, 0x9c, 0x78, 0x58, 0x25, 0x24, 0xa1,
+      0x03, 0xcb, 0x9c, 0xf9, 0x49, 0x52, 0x2a, 0xe5, 0x28, 0xf8, 0x05,
+      0x4d, 0xc2, 0x01, 0x07, 0xd9, 0x99, 0xbe, 0x67, 0x3f, 0xf4, 0xe2,
+      0x5e, 0xbf, 0x2f, 0x29, 0x28, 0x76, 0x6b, 0x12, 0x48, 0xbe, 0xc6,
+      0xe9, 0x16, 0x97, 0x77, 0x5f, 0x84, 0x46, 0x63, 0x9e, 0xde, 0x46,
+      0xad, 0x4d, 0xf4, 0x05, 0x30, 0x00, 0x00, 0x00, 0x10};
+
+  EXPECT_TRUE(ED25519_verify(kMsgValid, sizeof(kMsgValid), kSigValid, kPub2));
+  EXPECT_FALSE(
+      ED25519_verify(kMsgInvalid, sizeof(kMsgInvalid), kSigInvalid, kPub2));
 }
 
 TEST(Ed25519Test, KeypairFromSeed) {
diff --git a/deps/boringssl/src/crypto/dh/params.c b/deps/boringssl/src/crypto/dh/params.c
deleted file mode 100644
index 3336029..0000000
--- a/deps/boringssl/src/crypto/dh/params.c
+++ /dev/null
@@ -1,93 +0,0 @@
-/* ====================================================================
- * Copyright (c) 2011 The OpenSSL Project.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * 3. All advertising materials mentioning features or use of this
- *    software must display the following acknowledgment:
- *    "This product includes software developed by the OpenSSL Project
- *    for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
- *
- * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
- *    endorse or promote products derived from this software without
- *    prior written permission. For written permission, please contact
- *    licensing@OpenSSL.org.
- *
- * 5. Products derived from this software may not be called "OpenSSL"
- *    nor may "OpenSSL" appear in their names without prior written
- *    permission of the OpenSSL Project.
- *
- * 6. Redistributions of any form whatsoever must retain the following
- *    acknowledgment:
- *    "This product includes software developed by the OpenSSL Project
- *    for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
- *
- * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
- * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
- * OF THE POSSIBILITY OF SUCH DAMAGE.
- * ====================================================================
- *
- * This product includes cryptographic software written by Eric Young
- * (eay@cryptsoft.com).  This product includes software written by Tim
- * Hudson (tjh@cryptsoft.com). */
-
-#include <openssl/dh.h>
-
-#include <openssl/bn.h>
-
-#include "../fipsmodule/bn/internal.h"
-
-
-BIGNUM *BN_get_rfc3526_prime_1536(BIGNUM *ret) {
-  static const BN_ULONG kPrime1536Data[] = {
-      TOBN(0xffffffff, 0xffffffff), TOBN(0xf1746c08, 0xca237327),
-      TOBN(0x670c354e, 0x4abc9804), TOBN(0x9ed52907, 0x7096966d),
-      TOBN(0x1c62f356, 0x208552bb), TOBN(0x83655d23, 0xdca3ad96),
-      TOBN(0x69163fa8, 0xfd24cf5f), TOBN(0x98da4836, 0x1c55d39a),
-      TOBN(0xc2007cb8, 0xa163bf05), TOBN(0x49286651, 0xece45b3d),
-      TOBN(0xae9f2411, 0x7c4b1fe6), TOBN(0xee386bfb, 0x5a899fa5),
-      TOBN(0x0bff5cb6, 0xf406b7ed), TOBN(0xf44c42e9, 0xa637ed6b),
-      TOBN(0xe485b576, 0x625e7ec6), TOBN(0x4fe1356d, 0x6d51c245),
-      TOBN(0x302b0a6d, 0xf25f1437), TOBN(0xef9519b3, 0xcd3a431b),
-      TOBN(0x514a0879, 0x8e3404dd), TOBN(0x020bbea6, 0x3b139b22),
-      TOBN(0x29024e08, 0x8a67cc74), TOBN(0xc4c6628b, 0x80dc1cd1),
-      TOBN(0xc90fdaa2, 0x2168c234), TOBN(0xffffffff, 0xffffffff),
-  };
-
-  static const BIGNUM kPrime1536BN = STATIC_BIGNUM(kPrime1536Data);
-
-  BIGNUM *alloc = NULL;
-  if (ret == NULL) {
-    alloc = BN_new();
-    if (alloc == NULL) {
-      return NULL;
-    }
-    ret = alloc;
-  }
-
-  if (!BN_copy(ret, &kPrime1536BN)) {
-    BN_free(alloc);
-    return NULL;
-  }
-
-  return ret;
-}
diff --git a/deps/boringssl/src/crypto/dh/dh_asn1.c b/deps/boringssl/src/crypto/dh_extra/dh_asn1.c
similarity index 100%
rename from deps/boringssl/src/crypto/dh/dh_asn1.c
rename to deps/boringssl/src/crypto/dh_extra/dh_asn1.c
diff --git a/deps/boringssl/src/crypto/dh/dh_test.cc b/deps/boringssl/src/crypto/dh_extra/dh_test.cc
similarity index 92%
rename from deps/boringssl/src/crypto/dh/dh_test.cc
rename to deps/boringssl/src/crypto/dh_extra/dh_test.cc
index c77e7e4..7933a8c 100644
--- a/deps/boringssl/src/crypto/dh/dh_test.cc
+++ b/deps/boringssl/src/crypto/dh_extra/dh_test.cc
@@ -71,6 +71,7 @@
 #include <openssl/mem.h>
 
 #include "../internal.h"
+#include "../test/test_util.h"
 
 
 static bool RunBasicTests();
@@ -443,3 +444,41 @@
 
   return true;
 }
+
+TEST(DHTest, LeadingZeros) {
+  bssl::UniquePtr<BIGNUM> p(BN_get_rfc3526_prime_1536(nullptr));
+  ASSERT_TRUE(p);
+  bssl::UniquePtr<BIGNUM> g(BN_new());
+  ASSERT_TRUE(g);
+  ASSERT_TRUE(BN_set_word(g.get(), 2));
+
+  bssl::UniquePtr<DH> dh(DH_new());
+  ASSERT_TRUE(dh);
+  ASSERT_TRUE(DH_set0_pqg(dh.get(), p.get(), /*q=*/nullptr, g.get()));
+  p.release();
+  g.release();
+
+  // These values are far too small to be reasonable Diffie-Hellman keys, but
+  // they are an easy way to get a shared secret with leading zeros.
+  bssl::UniquePtr<BIGNUM> priv_key(BN_new()), peer_key(BN_new());
+  ASSERT_TRUE(priv_key);
+  ASSERT_TRUE(BN_set_word(priv_key.get(), 2));
+  ASSERT_TRUE(peer_key);
+  ASSERT_TRUE(BN_set_word(peer_key.get(), 3));
+  ASSERT_TRUE(DH_set0_key(dh.get(), /*pub_key=*/nullptr, priv_key.get()));
+  priv_key.release();
+
+  uint8_t padded[192] = {0};
+  padded[191] = 9;
+  static const uint8_t kTruncated[] = {9};
+  EXPECT_EQ(int(sizeof(padded)), DH_size(dh.get()));
+
+  std::vector<uint8_t> buf(DH_size(dh.get()));
+  int len = DH_compute_key(buf.data(), peer_key.get(), dh.get());
+  ASSERT_GT(len, 0);
+  EXPECT_EQ(Bytes(buf.data(), len), Bytes(kTruncated));
+
+  len = DH_compute_key_padded(buf.data(), peer_key.get(), dh.get());
+  ASSERT_GT(len, 0);
+  EXPECT_EQ(Bytes(buf.data(), len), Bytes(padded));
+}
diff --git a/deps/boringssl/src/crypto/dh_extra/params.c b/deps/boringssl/src/crypto/dh_extra/params.c
new file mode 100644
index 0000000..6023ab1
--- /dev/null
+++ b/deps/boringssl/src/crypto/dh_extra/params.c
@@ -0,0 +1,272 @@
+/* ====================================================================
+ * Copyright (c) 2011 The OpenSSL Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission. For written permission, please contact
+ *    licensing@OpenSSL.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ *    nor may "OpenSSL" appear in their names without prior written
+ *    permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This product includes cryptographic software written by Eric Young
+ * (eay@cryptsoft.com).  This product includes software written by Tim
+ * Hudson (tjh@cryptsoft.com). */
+
+#include <openssl/dh.h>
+
+#include <openssl/bn.h>
+#include <openssl/err.h>
+#include <openssl/mem.h>
+
+#include "../fipsmodule/bn/internal.h"
+
+
+BIGNUM *BN_get_rfc3526_prime_1536(BIGNUM *ret) {
+  static const BN_ULONG kPrime1536Data[] = {
+      TOBN(0xffffffff, 0xffffffff), TOBN(0xf1746c08, 0xca237327),
+      TOBN(0x670c354e, 0x4abc9804), TOBN(0x9ed52907, 0x7096966d),
+      TOBN(0x1c62f356, 0x208552bb), TOBN(0x83655d23, 0xdca3ad96),
+      TOBN(0x69163fa8, 0xfd24cf5f), TOBN(0x98da4836, 0x1c55d39a),
+      TOBN(0xc2007cb8, 0xa163bf05), TOBN(0x49286651, 0xece45b3d),
+      TOBN(0xae9f2411, 0x7c4b1fe6), TOBN(0xee386bfb, 0x5a899fa5),
+      TOBN(0x0bff5cb6, 0xf406b7ed), TOBN(0xf44c42e9, 0xa637ed6b),
+      TOBN(0xe485b576, 0x625e7ec6), TOBN(0x4fe1356d, 0x6d51c245),
+      TOBN(0x302b0a6d, 0xf25f1437), TOBN(0xef9519b3, 0xcd3a431b),
+      TOBN(0x514a0879, 0x8e3404dd), TOBN(0x020bbea6, 0x3b139b22),
+      TOBN(0x29024e08, 0x8a67cc74), TOBN(0xc4c6628b, 0x80dc1cd1),
+      TOBN(0xc90fdaa2, 0x2168c234), TOBN(0xffffffff, 0xffffffff),
+  };
+
+  static const BIGNUM kPrime1536BN = STATIC_BIGNUM(kPrime1536Data);
+
+  BIGNUM *alloc = NULL;
+  if (ret == NULL) {
+    alloc = BN_new();
+    if (alloc == NULL) {
+      return NULL;
+    }
+    ret = alloc;
+  }
+
+  if (!BN_copy(ret, &kPrime1536BN)) {
+    BN_free(alloc);
+    return NULL;
+  }
+
+  return ret;
+}
+
+int DH_generate_parameters_ex(DH *dh, int prime_bits, int generator,
+                              BN_GENCB *cb) {
+  // We generate DH parameters as follows
+  // find a prime q which is prime_bits/2 bits long.
+  // p=(2*q)+1 or (p-1)/2 = q
+  // For this case, g is a generator if
+  // g^((p-1)/q) mod p != 1 for values of q which are the factors of p-1.
+  // Since the factors of p-1 are q and 2, we just need to check
+  // g^2 mod p != 1 and g^q mod p != 1.
+  //
+  // Having said all that,
+  // there is another special case method for the generators 2, 3 and 5.
+  // for 2, p mod 24 == 11
+  // for 3, p mod 12 == 5  <<<<< does not work for safe primes.
+  // for 5, p mod 10 == 3 or 7
+  //
+  // Thanks to Phil Karn <karn@qualcomm.com> for the pointers about the
+  // special generators and for answering some of my questions.
+  //
+  // I've implemented the second simple method :-).
+  // Since DH should be using a safe prime (both p and q are prime),
+  // this generator function can take a very very long time to run.
+
+  // Actually there is no reason to insist that 'generator' be a generator.
+  // It's just as OK (and in some sense better) to use a generator of the
+  // order-q subgroup.
+
+  BIGNUM *t1, *t2;
+  int g, ok = 0;
+  BN_CTX *ctx = NULL;
+
+  ctx = BN_CTX_new();
+  if (ctx == NULL) {
+    goto err;
+  }
+  BN_CTX_start(ctx);
+  t1 = BN_CTX_get(ctx);
+  t2 = BN_CTX_get(ctx);
+  if (t1 == NULL || t2 == NULL) {
+    goto err;
+  }
+
+  // Make sure |dh| has the necessary elements
+  if (dh->p == NULL) {
+    dh->p = BN_new();
+    if (dh->p == NULL) {
+      goto err;
+    }
+  }
+  if (dh->g == NULL) {
+    dh->g = BN_new();
+    if (dh->g == NULL) {
+      goto err;
+    }
+  }
+
+  if (generator <= 1) {
+    OPENSSL_PUT_ERROR(DH, DH_R_BAD_GENERATOR);
+    goto err;
+  }
+  if (generator == DH_GENERATOR_2) {
+    if (!BN_set_word(t1, 24)) {
+      goto err;
+    }
+    if (!BN_set_word(t2, 11)) {
+      goto err;
+    }
+    g = 2;
+  } else if (generator == DH_GENERATOR_5) {
+    if (!BN_set_word(t1, 10)) {
+      goto err;
+    }
+    if (!BN_set_word(t2, 3)) {
+      goto err;
+    }
+    // BN_set_word(t3,7); just have to miss
+    // out on these ones :-(
+    g = 5;
+  } else {
+    // in the general case, don't worry if 'generator' is a
+    // generator or not: since we are using safe primes,
+    // it will generate either an order-q or an order-2q group,
+    // which both is OK
+    if (!BN_set_word(t1, 2)) {
+      goto err;
+    }
+    if (!BN_set_word(t2, 1)) {
+      goto err;
+    }
+    g = generator;
+  }
+
+  if (!BN_generate_prime_ex(dh->p, prime_bits, 1, t1, t2, cb)) {
+    goto err;
+  }
+  if (!BN_GENCB_call(cb, 3, 0)) {
+    goto err;
+  }
+  if (!BN_set_word(dh->g, g)) {
+    goto err;
+  }
+  ok = 1;
+
+err:
+  if (!ok) {
+    OPENSSL_PUT_ERROR(DH, ERR_R_BN_LIB);
+  }
+
+  if (ctx != NULL) {
+    BN_CTX_end(ctx);
+    BN_CTX_free(ctx);
+  }
+  return ok;
+}
+
+static int int_dh_bn_cpy(BIGNUM **dst, const BIGNUM *src) {
+  BIGNUM *a = NULL;
+
+  if (src) {
+    a = BN_dup(src);
+    if (!a) {
+      return 0;
+    }
+  }
+
+  BN_free(*dst);
+  *dst = a;
+  return 1;
+}
+
+static int int_dh_param_copy(DH *to, const DH *from, int is_x942) {
+  if (is_x942 == -1) {
+    is_x942 = !!from->q;
+  }
+  if (!int_dh_bn_cpy(&to->p, from->p) ||
+      !int_dh_bn_cpy(&to->g, from->g)) {
+    return 0;
+  }
+
+  if (!is_x942) {
+    return 1;
+  }
+
+  if (!int_dh_bn_cpy(&to->q, from->q) ||
+      !int_dh_bn_cpy(&to->j, from->j)) {
+    return 0;
+  }
+
+  OPENSSL_free(to->seed);
+  to->seed = NULL;
+  to->seedlen = 0;
+
+  if (from->seed) {
+    to->seed = OPENSSL_memdup(from->seed, from->seedlen);
+    if (!to->seed) {
+      return 0;
+    }
+    to->seedlen = from->seedlen;
+  }
+
+  return 1;
+}
+
+DH *DHparams_dup(const DH *dh) {
+  DH *ret = DH_new();
+  if (!ret) {
+    return NULL;
+  }
+
+  if (!int_dh_param_copy(ret, dh, -1)) {
+    DH_free(ret);
+    return NULL;
+  }
+
+  return ret;
+}
diff --git a/deps/boringssl/src/crypto/digest_extra/digest_extra.c b/deps/boringssl/src/crypto/digest_extra/digest_extra.c
index 4b4bb38..311c5cb 100644
--- a/deps/boringssl/src/crypto/digest_extra/digest_extra.c
+++ b/deps/boringssl/src/crypto/digest_extra/digest_extra.c
@@ -59,10 +59,12 @@
 #include <string.h>
 
 #include <openssl/asn1.h>
+#include <openssl/blake2.h>
 #include <openssl/bytestring.h>
 #include <openssl/nid.h>
 
 #include "../internal.h"
+#include "../fipsmodule/digest/internal.h"
 
 
 struct nid_to_digest {
@@ -238,3 +240,26 @@
 
   return NULL;
 }
+
+static void blake2b256_init(EVP_MD_CTX *ctx) { BLAKE2B256_Init(ctx->md_data); }
+
+static void blake2b256_update(EVP_MD_CTX *ctx, const void *data, size_t len) {
+  BLAKE2B256_Update(ctx->md_data, data, len);
+}
+
+static void blake2b256_final(EVP_MD_CTX *ctx, uint8_t *md) {
+  BLAKE2B256_Final(md, ctx->md_data);
+}
+
+static const EVP_MD evp_md_blake2b256 = {
+  NID_undef,
+  BLAKE2B256_DIGEST_LENGTH,
+  0,
+  blake2b256_init,
+  blake2b256_update,
+  blake2b256_final,
+  BLAKE2B_CBLOCK,
+  sizeof(BLAKE2B_CTX),
+};
+
+const EVP_MD *EVP_blake2b256(void) { return &evp_md_blake2b256; }
diff --git a/deps/boringssl/src/crypto/digest_extra/digest_test.cc b/deps/boringssl/src/crypto/digest_extra/digest_test.cc
index ba0884a..80b5106 100644
--- a/deps/boringssl/src/crypto/digest_extra/digest_test.cc
+++ b/deps/boringssl/src/crypto/digest_extra/digest_test.cc
@@ -55,6 +55,7 @@
 static const MD sha512 = { "SHA512", &EVP_sha512, &SHA512 };
 static const MD sha512_256 = { "SHA512-256", &EVP_sha512_256, &SHA512_256 };
 static const MD md5_sha1 = { "MD5-SHA1", &EVP_md5_sha1, nullptr };
+static const MD blake2b256 = { "BLAKE2b-256", &EVP_blake2b256, nullptr };
 
 struct DigestTestVector {
   // md is the digest to test.
@@ -145,6 +146,10 @@
     // MD5-SHA1 tests.
     {md5_sha1, "abc", 1,
      "900150983cd24fb0d6963f7d28e17f72a9993e364706816aba3e25717850c26c9cd0d89d"},
+
+    // BLAKE2b-256 tests.
+    {blake2b256, "abc", 1,
+     "bddd813c634239723171ef3fee98579b94964e3bb1cb3e427262c8c068d52319"},
 };
 
 static void CompareDigest(const DigestTestVector *test,
diff --git a/deps/boringssl/src/crypto/dsa/dsa.c b/deps/boringssl/src/crypto/dsa/dsa.c
index 5cd98f8..c869568 100644
--- a/deps/boringssl/src/crypto/dsa/dsa.c
+++ b/deps/boringssl/src/crypto/dsa/dsa.c
@@ -72,12 +72,11 @@
 #include <openssl/sha.h>
 #include <openssl/thread.h>
 
+#include "internal.h"
 #include "../fipsmodule/bn/internal.h"
 #include "../internal.h"
 
 
-#define OPENSSL_DSA_MAX_MODULUS_BITS 10000
-
 // Primality test according to FIPS PUB 186[-1], Appendix 2.1: 50 rounds of
 // Miller-Rabin.
 #define DSS_prime_checks 50
@@ -568,23 +567,7 @@
 }
 
 DSA_SIG *DSA_do_sign(const uint8_t *digest, size_t digest_len, const DSA *dsa) {
-  if (!dsa->p || !dsa->q || !dsa->g) {
-    OPENSSL_PUT_ERROR(DSA, DSA_R_MISSING_PARAMETERS);
-    return NULL;
-  }
-
-  // Reject invalid parameters. In particular, the algorithm will infinite loop
-  // if |g| is zero.
-  if (BN_is_zero(dsa->p) || BN_is_zero(dsa->q) || BN_is_zero(dsa->g)) {
-    OPENSSL_PUT_ERROR(DSA, DSA_R_INVALID_PARAMETERS);
-    return NULL;
-  }
-
-  // We only support DSA keys that are a multiple of 8 bits. (This is a weaker
-  // check than the one in |DSA_do_check_signature|, which only allows 160-,
-  // 224-, and 256-bit keys.
-  if (BN_num_bits(dsa->q) % 8 != 0) {
-    OPENSSL_PUT_ERROR(DSA, DSA_R_BAD_Q_VALUE);
+  if (!dsa_check_parameters(dsa)) {
     return NULL;
   }
 
@@ -678,35 +661,17 @@
 
 int DSA_do_check_signature(int *out_valid, const uint8_t *digest,
                            size_t digest_len, DSA_SIG *sig, const DSA *dsa) {
-  BN_CTX *ctx;
-  BIGNUM u1, u2, t1;
-  int ret = 0;
-  unsigned i;
-
   *out_valid = 0;
-
-  if (!dsa->p || !dsa->q || !dsa->g) {
-    OPENSSL_PUT_ERROR(DSA, DSA_R_MISSING_PARAMETERS);
+  if (!dsa_check_parameters(dsa)) {
     return 0;
   }
 
-  i = BN_num_bits(dsa->q);
-  // FIPS 186-3 allows only different sizes for q.
-  if (i != 160 && i != 224 && i != 256) {
-    OPENSSL_PUT_ERROR(DSA, DSA_R_BAD_Q_VALUE);
-    return 0;
-  }
-
-  if (BN_num_bits(dsa->p) > OPENSSL_DSA_MAX_MODULUS_BITS) {
-    OPENSSL_PUT_ERROR(DSA, DSA_R_MODULUS_TOO_LARGE);
-    return 0;
-  }
-
+  int ret = 0;
+  BIGNUM u1, u2, t1;
   BN_init(&u1);
   BN_init(&u2);
   BN_init(&t1);
-
-  ctx = BN_CTX_new();
+  BN_CTX *ctx = BN_CTX_new();
   if (ctx == NULL) {
     goto err;
   }
@@ -729,11 +694,12 @@
   }
 
   // save M in u1
-  if (digest_len > (i >> 3)) {
+  unsigned q_bits = BN_num_bits(dsa->q);
+  if (digest_len > (q_bits >> 3)) {
     // if the digest length is greater than the size of q use the
     // BN_num_bits(dsa->q) leftmost bits of the digest, see
     // fips 186-3, 4.2
-    digest_len = (i >> 3);
+    digest_len = (q_bits >> 3);
   }
 
   if (BN_bin2bn(digest, digest_len, &u1) == NULL) {
diff --git a/deps/boringssl/src/crypto/dsa/dsa_asn1.c b/deps/boringssl/src/crypto/dsa/dsa_asn1.c
index 97fd07f..3f3bd48 100644
--- a/deps/boringssl/src/crypto/dsa/dsa_asn1.c
+++ b/deps/boringssl/src/crypto/dsa/dsa_asn1.c
@@ -61,9 +61,45 @@
 #include <openssl/err.h>
 #include <openssl/mem.h>
 
+#include "internal.h"
 #include "../bytestring/internal.h"
 
 
+#define OPENSSL_DSA_MAX_MODULUS_BITS 10000
+
+// This function is in dsa_asn1.c rather than dsa.c because it is reachable from
+// |EVP_PKEY| parsers. This makes it easier for the static linker to drop most
+// of the DSA implementation.
+int dsa_check_parameters(const DSA *dsa) {
+  if (!dsa->p || !dsa->q || !dsa->g) {
+    OPENSSL_PUT_ERROR(DSA, DSA_R_MISSING_PARAMETERS);
+    return 0;
+  }
+
+  // Reject invalid parameters. In particular, signing will infinite loop if |g|
+  // is zero.
+  if (BN_is_zero(dsa->p) || BN_is_zero(dsa->q) || BN_is_zero(dsa->g)) {
+    OPENSSL_PUT_ERROR(DSA, DSA_R_INVALID_PARAMETERS);
+    return 0;
+  }
+
+  // FIPS 186-4 allows only three different sizes for q.
+  unsigned q_bits = BN_num_bits(dsa->q);
+  if (q_bits != 160 && q_bits != 224 && q_bits != 256) {
+    OPENSSL_PUT_ERROR(DSA, DSA_R_BAD_Q_VALUE);
+    return 0;
+  }
+
+  // Bound |dsa->p| to avoid a DoS vector. Note this limit is much larger than
+  // the one in FIPS 186-4, which only allows L = 1024, 2048, and 3072.
+  if (BN_num_bits(dsa->p) > OPENSSL_DSA_MAX_MODULUS_BITS) {
+    OPENSSL_PUT_ERROR(DSA, DSA_R_MODULUS_TOO_LARGE);
+    return 0;
+  }
+
+  return 1;
+}
+
 static int parse_integer(CBS *cbs, BIGNUM **out) {
   assert(*out == NULL);
   *out = BN_new();
@@ -124,10 +160,16 @@
       !parse_integer(&child, &ret->g) ||
       CBS_len(&child) != 0) {
     OPENSSL_PUT_ERROR(DSA, DSA_R_DECODE_ERROR);
-    DSA_free(ret);
-    return NULL;
+    goto err;
+  }
+  if (!dsa_check_parameters(ret)) {
+    goto err;
   }
   return ret;
+
+err:
+  DSA_free(ret);
+  return NULL;
 }
 
 int DSA_marshal_public_key(CBB *cbb, const DSA *dsa) {
@@ -156,10 +198,16 @@
       !parse_integer(&child, &ret->g) ||
       CBS_len(&child) != 0) {
     OPENSSL_PUT_ERROR(DSA, DSA_R_DECODE_ERROR);
-    DSA_free(ret);
-    return NULL;
+    goto err;
+  }
+  if (!dsa_check_parameters(ret)) {
+    goto err;
   }
   return ret;
+
+err:
+  DSA_free(ret);
+  return NULL;
 }
 
 int DSA_marshal_parameters(CBB *cbb, const DSA *dsa) {
@@ -203,6 +251,9 @@
     OPENSSL_PUT_ERROR(DSA, DSA_R_DECODE_ERROR);
     goto err;
   }
+  if (!dsa_check_parameters(ret)) {
+    goto err;
+  }
   return ret;
 
 err:
diff --git a/deps/boringssl/src/crypto/dsa/internal.h b/deps/boringssl/src/crypto/dsa/internal.h
new file mode 100644
index 0000000..2d86edb
--- /dev/null
+++ b/deps/boringssl/src/crypto/dsa/internal.h
@@ -0,0 +1,34 @@
+/* Copyright (c) 2020, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#ifndef OPENSSL_HEADER_DSA_INTERNAL_H
+#define OPENSSL_HEADER_DSA_INTERNAL_H
+
+#include <openssl/dsa.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+
+// dsa_check_parameters checks that |dsa|'s group is within DoS bounds. It
+// returns one on success and zero on error.
+int dsa_check_parameters(const DSA *dsa);
+
+
+#if defined(__cplusplus)
+}  // extern C
+#endif
+
+#endif  // OPENSSL_HEADER_DSA_INTERNAL_H
diff --git a/deps/boringssl/src/crypto/ec_extra/ec_asn1.c b/deps/boringssl/src/crypto/ec_extra/ec_asn1.c
index 9769d01..56cbbed 100644
--- a/deps/boringssl/src/crypto/ec_extra/ec_asn1.c
+++ b/deps/boringssl/src/crypto/ec_extra/ec_asn1.c
@@ -241,21 +241,6 @@
   return 1;
 }
 
-// is_unsigned_integer returns one if |cbs| is a valid unsigned DER INTEGER and
-// zero otherwise.
-static int is_unsigned_integer(const CBS *cbs) {
-  if (CBS_len(cbs) == 0) {
-    return 0;
-  }
-  uint8_t byte = CBS_data(cbs)[0];
-  if ((byte & 0x80) ||
-      (byte == 0 && CBS_len(cbs) > 1 && (CBS_data(cbs)[1] & 0x80) == 0)) {
-    // Negative or not minimally-encoded.
-    return 0;
-  }
-  return 1;
-}
-
 // kPrimeFieldOID is the encoding of 1.2.840.10045.1.1.
 static const uint8_t kPrimeField[] = {0x2a, 0x86, 0x48, 0xce, 0x3d, 0x01, 0x01};
 
@@ -276,7 +261,7 @@
       OPENSSL_memcmp(CBS_data(&field_type), kPrimeField, sizeof(kPrimeField)) !=
           0 ||
       !CBS_get_asn1(&field_id, out_prime, CBS_ASN1_INTEGER) ||
-      !is_unsigned_integer(out_prime) ||
+      !CBS_is_unsigned_asn1_integer(out_prime) ||
       CBS_len(&field_id) != 0 ||
       !CBS_get_asn1(&params, &curve, CBS_ASN1_SEQUENCE) ||
       !CBS_get_asn1(&curve, out_a, CBS_ASN1_OCTETSTRING) ||
@@ -286,7 +271,7 @@
       CBS_len(&curve) != 0 ||
       !CBS_get_asn1(&params, &base, CBS_ASN1_OCTETSTRING) ||
       !CBS_get_asn1(&params, out_order, CBS_ASN1_INTEGER) ||
-      !is_unsigned_integer(out_order) ||
+      !CBS_is_unsigned_asn1_integer(out_order) ||
       !CBS_get_optional_asn1(&params, &cofactor, &has_cofactor,
                              CBS_ASN1_INTEGER) ||
       CBS_len(&params) != 0) {
diff --git a/deps/boringssl/src/crypto/ec_extra/hash_to_curve.c b/deps/boringssl/src/crypto/ec_extra/hash_to_curve.c
index 6f8b599..9c82454 100644
--- a/deps/boringssl/src/crypto/ec_extra/hash_to_curve.c
+++ b/deps/boringssl/src/crypto/ec_extra/hash_to_curve.c
@@ -50,12 +50,10 @@
 
 // expand_message_xmd implements the operation described in section 5.3.1 of
 // draft-irtf-cfrg-hash-to-curve-07. It returns one on success and zero on
-// allocation failure or if |out_len| was too large. If |is_draft06| is one, it
-// implements the operation from draft-irtf-cfrg-hash-to-curve-06 instead.
+// allocation failure or if |out_len| was too large.
 static int expand_message_xmd(const EVP_MD *md, uint8_t *out, size_t out_len,
                               const uint8_t *msg, size_t msg_len,
-                              const uint8_t *dst, size_t dst_len,
-                              int is_draft06) {
+                              const uint8_t *dst, size_t dst_len) {
   int ret = 0;
   const size_t block_size = EVP_MD_block_size(md);
   const size_t md_size = EVP_MD_size(md);
@@ -88,9 +86,8 @@
       !EVP_DigestUpdate(&ctx, kZeros, block_size) ||
       !EVP_DigestUpdate(&ctx, msg, msg_len) ||
       !EVP_DigestUpdate(&ctx, l_i_b_str_zero, sizeof(l_i_b_str_zero)) ||
-      (is_draft06 && !EVP_DigestUpdate(&ctx, &dst_len_u8, 1)) ||
       !EVP_DigestUpdate(&ctx, dst, dst_len) ||
-      (!is_draft06 && !EVP_DigestUpdate(&ctx, &dst_len_u8, 1)) ||
+      !EVP_DigestUpdate(&ctx, &dst_len_u8, 1) ||
       !EVP_DigestFinal_ex(&ctx, b_0, NULL)) {
     goto err;
   }
@@ -114,9 +111,8 @@
     if (!EVP_DigestInit_ex(&ctx, md, NULL) ||
         !EVP_DigestUpdate(&ctx, b_i, md_size) ||
         !EVP_DigestUpdate(&ctx, &i, 1) ||
-        (is_draft06 && !EVP_DigestUpdate(&ctx, &dst_len_u8, 1)) ||
         !EVP_DigestUpdate(&ctx, dst, dst_len) ||
-        (!is_draft06 && !EVP_DigestUpdate(&ctx, &dst_len_u8, 1)) ||
+        !EVP_DigestUpdate(&ctx, &dst_len_u8, 1) ||
         !EVP_DigestFinal_ex(&ctx, b_i, NULL)) {
       goto err;
     }
@@ -175,12 +171,11 @@
 static int hash_to_field2(const EC_GROUP *group, const EVP_MD *md,
                           EC_FELEM *out1, EC_FELEM *out2, const uint8_t *dst,
                           size_t dst_len, unsigned k, const uint8_t *msg,
-                          size_t msg_len, int is_draft06) {
+                          size_t msg_len) {
   size_t L;
   uint8_t buf[4 * EC_MAX_BYTES];
   if (!num_bytes_to_derive(&L, &group->field, k) ||
-      !expand_message_xmd(md, buf, 2 * L, msg, msg_len, dst, dst_len,
-                          is_draft06)) {
+      !expand_message_xmd(md, buf, 2 * L, msg, msg_len, dst, dst_len)) {
     return 0;
   }
   BN_ULONG words[2 * EC_MAX_WORDS];
@@ -196,12 +191,11 @@
 // group order rather than a field element. |k| is the security factor.
 static int hash_to_scalar(const EC_GROUP *group, const EVP_MD *md,
                           EC_SCALAR *out, const uint8_t *dst, size_t dst_len,
-                          unsigned k, const uint8_t *msg, size_t msg_len,
-                          int is_draft06) {
+                          unsigned k, const uint8_t *msg, size_t msg_len) {
   size_t L;
   uint8_t buf[EC_MAX_BYTES * 2];
   if (!num_bytes_to_derive(&L, &group->order, k) ||
-      !expand_message_xmd(md, buf, L, msg, msg_len, dst, dst_len, is_draft06)) {
+      !expand_message_xmd(md, buf, L, msg, msg_len, dst, dst_len)) {
     return 0;
   }
 
@@ -310,10 +304,9 @@
 static int hash_to_curve(const EC_GROUP *group, const EVP_MD *md,
                          const EC_FELEM *Z, const EC_FELEM *c2, unsigned k,
                          EC_RAW_POINT *out, const uint8_t *dst, size_t dst_len,
-                         const uint8_t *msg, size_t msg_len, int is_draft06) {
+                         const uint8_t *msg, size_t msg_len) {
   EC_FELEM u0, u1;
-  if (!hash_to_field2(group, md, &u0, &u1, dst, dst_len, k, msg, msg_len,
-                      is_draft06)) {
+  if (!hash_to_field2(group, md, &u0, &u1, dst, dst_len, k, msg, msg_len)) {
     return 0;
   }
 
@@ -376,7 +369,7 @@
   ec_felem_neg(group, &Z, &Z);
 
   return hash_to_curve(group, EVP_sha512(), &Z, &c2, /*k=*/192, out, dst,
-                       dst_len, msg, msg_len, /*is_draft06=*/0);
+                       dst_len, msg, msg_len);
 }
 
 int ec_hash_to_scalar_p384_xmd_sha512_draft07(
@@ -388,38 +381,5 @@
   }
 
   return hash_to_scalar(group, EVP_sha512(), out, dst, dst_len, /*k=*/192, msg,
-                        msg_len, /*is_draft06=*/0);
-}
-
-int ec_hash_to_curve_p521_xmd_sha512_sswu_draft06(
-    const EC_GROUP *group, EC_RAW_POINT *out, const uint8_t *dst,
-    size_t dst_len, const uint8_t *msg, size_t msg_len) {
-  // See section 8.3 of draft-irtf-cfrg-hash-to-curve-06.
-  if (EC_GROUP_get_curve_name(group) != NID_secp521r1) {
-    OPENSSL_PUT_ERROR(EC, EC_R_GROUP_MISMATCH);
-    return 0;
-  }
-
-  // Z = -4, c2 = 8.
-  EC_FELEM Z, c2;
-  if (!felem_from_u8(group, &Z, 4) ||
-      !felem_from_u8(group, &c2, 8)) {
-    return 0;
-  }
-  ec_felem_neg(group, &Z, &Z);
-
-  return hash_to_curve(group, EVP_sha512(), &Z, &c2, /*k=*/256, out, dst,
-                       dst_len, msg, msg_len, /*is_draft06=*/1);
-}
-
-int ec_hash_to_scalar_p521_xmd_sha512_draft06(
-    const EC_GROUP *group, EC_SCALAR *out, const uint8_t *dst, size_t dst_len,
-    const uint8_t *msg, size_t msg_len) {
-  if (EC_GROUP_get_curve_name(group) != NID_secp521r1) {
-    OPENSSL_PUT_ERROR(EC, EC_R_GROUP_MISMATCH);
-    return 0;
-  }
-
-  return hash_to_scalar(group, EVP_sha512(), out, dst, dst_len, /*k=*/256, msg,
-                        msg_len, /*is_draft06=*/1);
+                        msg_len);
 }
diff --git a/deps/boringssl/src/crypto/ec_extra/internal.h b/deps/boringssl/src/crypto/ec_extra/internal.h
index 940a414..55314ac 100644
--- a/deps/boringssl/src/crypto/ec_extra/internal.h
+++ b/deps/boringssl/src/crypto/ec_extra/internal.h
@@ -48,28 +48,6 @@
     const EC_GROUP *group, EC_SCALAR *out, const uint8_t *dst, size_t dst_len,
     const uint8_t *msg, size_t msg_len);
 
-// ec_hash_to_curve_p521_xmd_sha512_sswu_draft06 hashes |msg| to a point on
-// |group| and writes the result to |out|, implementing the
-// P521_XMD:SHA-512_SSWU_RO_ suite from draft-irtf-cfrg-hash-to-curve-06. It
-// returns one on success and zero on error.
-//
-// This function implements an older version of the draft and should not be used
-// in new code.
-OPENSSL_EXPORT int ec_hash_to_curve_p521_xmd_sha512_sswu_draft06(
-    const EC_GROUP *group, EC_RAW_POINT *out, const uint8_t *dst,
-    size_t dst_len, const uint8_t *msg, size_t msg_len);
-
-// ec_hash_to_scalar_p521_xmd_sha512_draft06 hashes |msg| to a scalar on |group|
-// and writes the result to |out|, using the hash_to_field operation from the
-// P521_XMD:SHA-512_SSWU_RO_ suite from draft-irtf-cfrg-hash-to-curve-06, but
-// generating a value modulo the group order rather than a field element.
-//
-// This function implements an older version of the draft and should not be used
-// in new code.
-OPENSSL_EXPORT int ec_hash_to_scalar_p521_xmd_sha512_draft06(
-    const EC_GROUP *group, EC_SCALAR *out, const uint8_t *dst, size_t dst_len,
-    const uint8_t *msg, size_t msg_len);
-
 
 #if defined(__cplusplus)
 }  // extern C
diff --git a/deps/boringssl/src/crypto/evp/evp.c b/deps/boringssl/src/crypto/evp/evp.c
index 60fdf64..653d657 100644
--- a/deps/boringssl/src/crypto/evp/evp.c
+++ b/deps/boringssl/src/crypto/evp/evp.c
@@ -76,6 +76,10 @@
 // TODO(davidben): Fix Node to not touch the error queue itself and remove this.
 OPENSSL_DECLARE_ERROR_REASON(EVP, NOT_XOF_OR_INVALID_LENGTH)
 
+// The HPKE module uses the EVP error namespace, but it lives in another
+// directory.
+OPENSSL_DECLARE_ERROR_REASON(EVP, EMPTY_PSK)
+
 EVP_PKEY *EVP_PKEY_new(void) {
   EVP_PKEY *ret;
 
diff --git a/deps/boringssl/src/crypto/evp/evp_asn1.c b/deps/boringssl/src/crypto/evp/evp_asn1.c
index fc1dce3..2f3e115 100644
--- a/deps/boringssl/src/crypto/evp/evp_asn1.c
+++ b/deps/boringssl/src/crypto/evp/evp_asn1.c
@@ -65,6 +65,7 @@
 #include <openssl/rsa.h>
 
 #include "internal.h"
+#include "../bytestring/internal.h"
 #include "../internal.h"
 
 
@@ -386,3 +387,161 @@
   EVP_PKEY_free(ret);
   return NULL;
 }
+
+EVP_PKEY *d2i_PUBKEY(EVP_PKEY **out, const uint8_t **inp, long len) {
+  if (len < 0) {
+    return NULL;
+  }
+  CBS cbs;
+  CBS_init(&cbs, *inp, (size_t)len);
+  EVP_PKEY *ret = EVP_parse_public_key(&cbs);
+  if (ret == NULL) {
+    return NULL;
+  }
+  if (out != NULL) {
+    EVP_PKEY_free(*out);
+    *out = ret;
+  }
+  *inp = CBS_data(&cbs);
+  return ret;
+}
+
+int i2d_PUBKEY(const EVP_PKEY *pkey, uint8_t **outp) {
+  if (pkey == NULL) {
+    return 0;
+  }
+
+  CBB cbb;
+  if (!CBB_init(&cbb, 128) ||
+      !EVP_marshal_public_key(&cbb, pkey)) {
+    CBB_cleanup(&cbb);
+    return -1;
+  }
+  return CBB_finish_i2d(&cbb, outp);
+}
+
+RSA *d2i_RSA_PUBKEY(RSA **out, const uint8_t **inp, long len) {
+  if (len < 0) {
+    return NULL;
+  }
+  CBS cbs;
+  CBS_init(&cbs, *inp, (size_t)len);
+  EVP_PKEY *pkey = EVP_parse_public_key(&cbs);
+  if (pkey == NULL) {
+    return NULL;
+  }
+  RSA *rsa = EVP_PKEY_get1_RSA(pkey);
+  EVP_PKEY_free(pkey);
+  if (rsa == NULL) {
+    return NULL;
+  }
+  if (out != NULL) {
+    RSA_free(*out);
+    *out = rsa;
+  }
+  *inp = CBS_data(&cbs);
+  return rsa;
+}
+
+int i2d_RSA_PUBKEY(const RSA *rsa, uint8_t **outp) {
+  if (rsa == NULL) {
+    return 0;
+  }
+
+  int ret = -1;
+  EVP_PKEY *pkey = EVP_PKEY_new();
+  if (pkey == NULL ||
+      !EVP_PKEY_set1_RSA(pkey, (RSA *)rsa)) {
+    goto err;
+  }
+
+  ret = i2d_PUBKEY(pkey, outp);
+
+err:
+  EVP_PKEY_free(pkey);
+  return ret;
+}
+
+DSA *d2i_DSA_PUBKEY(DSA **out, const uint8_t **inp, long len) {
+  if (len < 0) {
+    return NULL;
+  }
+  CBS cbs;
+  CBS_init(&cbs, *inp, (size_t)len);
+  EVP_PKEY *pkey = EVP_parse_public_key(&cbs);
+  if (pkey == NULL) {
+    return NULL;
+  }
+  DSA *dsa = EVP_PKEY_get1_DSA(pkey);
+  EVP_PKEY_free(pkey);
+  if (dsa == NULL) {
+    return NULL;
+  }
+  if (out != NULL) {
+    DSA_free(*out);
+    *out = dsa;
+  }
+  *inp = CBS_data(&cbs);
+  return dsa;
+}
+
+int i2d_DSA_PUBKEY(const DSA *dsa, uint8_t **outp) {
+  if (dsa == NULL) {
+    return 0;
+  }
+
+  int ret = -1;
+  EVP_PKEY *pkey = EVP_PKEY_new();
+  if (pkey == NULL ||
+      !EVP_PKEY_set1_DSA(pkey, (DSA *)dsa)) {
+    goto err;
+  }
+
+  ret = i2d_PUBKEY(pkey, outp);
+
+err:
+  EVP_PKEY_free(pkey);
+  return ret;
+}
+
+EC_KEY *d2i_EC_PUBKEY(EC_KEY **out, const uint8_t **inp, long len) {
+  if (len < 0) {
+    return NULL;
+  }
+  CBS cbs;
+  CBS_init(&cbs, *inp, (size_t)len);
+  EVP_PKEY *pkey = EVP_parse_public_key(&cbs);
+  if (pkey == NULL) {
+    return NULL;
+  }
+  EC_KEY *ec_key = EVP_PKEY_get1_EC_KEY(pkey);
+  EVP_PKEY_free(pkey);
+  if (ec_key == NULL) {
+    return NULL;
+  }
+  if (out != NULL) {
+    EC_KEY_free(*out);
+    *out = ec_key;
+  }
+  *inp = CBS_data(&cbs);
+  return ec_key;
+}
+
+int i2d_EC_PUBKEY(const EC_KEY *ec_key, uint8_t **outp) {
+  if (ec_key == NULL) {
+    return 0;
+  }
+
+  int ret = -1;
+  EVP_PKEY *pkey = EVP_PKEY_new();
+  if (pkey == NULL ||
+      !EVP_PKEY_set1_EC_KEY(pkey, (EC_KEY *)ec_key)) {
+    goto err;
+  }
+
+  ret = i2d_PUBKEY(pkey, outp);
+
+err:
+  EVP_PKEY_free(pkey);
+  return ret;
+}
diff --git a/deps/boringssl/src/crypto/evp/p_dsa_asn1.c b/deps/boringssl/src/crypto/evp/p_dsa_asn1.c
index d50e0fc..ac91127 100644
--- a/deps/boringssl/src/crypto/evp/p_dsa_asn1.c
+++ b/deps/boringssl/src/crypto/evp/p_dsa_asn1.c
@@ -141,9 +141,13 @@
     goto err;
   }
 
-  // Decode the key.
+  // Decode the key. To avoid DoS attacks when importing private keys, we bound
+  // |dsa->priv_key| against |dsa->q|, which itself bound by
+  // |DSA_parse_parameters|. (We cannot call |BN_num_bits| on |dsa->priv_key|.
+  // That would leak a secret bit width.)
   if (!BN_parse_asn1_unsigned(key, dsa->priv_key) ||
-      CBS_len(key) != 0) {
+      CBS_len(key) != 0 ||
+      BN_cmp(dsa->priv_key, dsa->q) >= 0) {
     OPENSSL_PUT_ERROR(EVP, EVP_R_DECODE_ERROR);
     goto err;
   }
diff --git a/deps/boringssl/src/crypto/fipsmodule/CMakeLists.txt b/deps/boringssl/src/crypto/fipsmodule/CMakeLists.txt
index a675fbd..83cf3f7 100644
--- a/deps/boringssl/src/crypto/fipsmodule/CMakeLists.txt
+++ b/deps/boringssl/src/crypto/fipsmodule/CMakeLists.txt
@@ -124,6 +124,22 @@
 perlasm(x86_64-mont.${ASM_EXT} bn/asm/x86_64-mont.pl)
 perlasm(x86-mont.${ASM_EXT} bn/asm/x86-mont.pl)
 
+function(cpreprocess dest src)
+  set(TARGET "")
+  if(CMAKE_ASM_COMPILER_TARGET)
+    set(TARGET "--target=${CMAKE_ASM_COMPILER_TARGET}")
+  endif()
+
+  add_custom_command(
+    OUTPUT ${dest}
+    COMMAND ${CMAKE_ASM_COMPILER} ${TARGET} $CMAKE_ASM_FLAGS -E ${src} -I${PROJECT_SOURCE_DIR}/include > ${dest}
+    DEPENDS
+    ${src}
+    ${PROJECT_SOURCE_DIR}/include/openssl/arm_arch.h
+    WORKING_DIRECTORY .
+  )
+endfunction()
+
 if(FIPS_DELOCATE)
   if(FIPS_SHARED)
     error("Can't set both delocate and shared mode for FIPS build")
@@ -144,6 +160,18 @@
     bcm.c
   )
 
+  if(${ARCH} STREQUAL "aarch64")
+    # Perlasm output on Aarch64 needs to pass through the C preprocessor before
+    # it can be parsed by delocate.
+    foreach(asm ${BCM_ASM_SOURCES})
+      cpreprocess(${asm}.s ${asm})
+      list(APPEND BCM_ASM_PROCESSED_SOURCES "${asm}.s")
+    endforeach()
+  else()
+    # No preprocessing is required on other platforms.
+    set(BCM_ASM_PROCESSED_SOURCES ${BCM_ASM_SOURCES})
+  endif()
+
   add_dependencies(bcm_c_generated_asm global_target)
 
   set_target_properties(bcm_c_generated_asm PROPERTIES COMPILE_OPTIONS "-S")
@@ -152,8 +180,8 @@
   go_executable(delocate boringssl.googlesource.com/boringssl/util/fipstools/delocate)
   add_custom_command(
     OUTPUT bcm-delocated.S
-    COMMAND ./delocate -a $<TARGET_FILE:bcm_c_generated_asm> -o bcm-delocated.S ${BCM_ASM_SOURCES}
-    DEPENDS bcm_c_generated_asm delocate ${BCM_ASM_SOURCES}
+    COMMAND ./delocate -a $<TARGET_FILE:bcm_c_generated_asm> -o bcm-delocated.S ${BCM_ASM_PROCESSED_SOURCES}
+    DEPENDS bcm_c_generated_asm delocate ${BCM_ASM_PROCESSED_SOURCES}
     WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
   )
 
@@ -170,11 +198,16 @@
   set_target_properties(bcm_hashunset PROPERTIES POSITION_INDEPENDENT_CODE ON)
   set_target_properties(bcm_hashunset PROPERTIES LINKER_LANGUAGE C)
 
+  set(MAYBE_INJECT_HASH_SHA256_FLAG "")
+  if (ARCH STREQUAL "aarch64")
+    set(MAYBE_INJECT_HASH_SHA256_FLAG "-sha256")
+  endif()
+
   go_executable(inject_hash
 	        boringssl.googlesource.com/boringssl/util/fipstools/inject_hash)
   add_custom_command(
     OUTPUT bcm.o
-    COMMAND ./inject_hash -o bcm.o -in-archive $<TARGET_FILE:bcm_hashunset>
+    COMMAND ./inject_hash -o bcm.o -in-archive $<TARGET_FILE:bcm_hashunset> ${MAYBE_INJECT_HASH_SHA256_FLAG}
     DEPENDS bcm_hashunset inject_hash
     WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
   )
diff --git a/deps/boringssl/src/crypto/fipsmodule/bcm.c b/deps/boringssl/src/crypto/fipsmodule/bcm.c
index 1d9a919..601c4a8 100644
--- a/deps/boringssl/src/crypto/fipsmodule/bcm.c
+++ b/deps/boringssl/src/crypto/fipsmodule/bcm.c
@@ -60,6 +60,8 @@
 #include "cipher/e_aes.c"
 #include "cipher/e_des.c"
 #include "des/des.c"
+#include "dh/check.c"
+#include "dh/dh.c"
 #include "digest/digest.c"
 #include "digest/digests.c"
 #include "ecdh/ecdh.c"
@@ -193,7 +195,7 @@
   assert_within(rodata_start, kP256Params, rodata_end);
   assert_within(rodata_start, kPKCS1SigPrefixes, rodata_end);
 
-#if defined(OPENSSL_ANDROID)
+#if defined(OPENSSL_AARCH64) || defined(OPENSSL_ANDROID)
   uint8_t result[SHA256_DIGEST_LENGTH];
   const EVP_MD *const kHashFunction = EVP_sha256();
 #else
diff --git a/deps/boringssl/src/crypto/fipsmodule/bn/bn.c b/deps/boringssl/src/crypto/fipsmodule/bn/bn.c
index e3f1c90..424d462 100644
--- a/deps/boringssl/src/crypto/fipsmodule/bn/bn.c
+++ b/deps/boringssl/src/crypto/fipsmodule/bn/bn.c
@@ -101,26 +101,7 @@
 }
 
 void BN_clear_free(BIGNUM *bn) {
-  char should_free;
-
-  if (bn == NULL) {
-    return;
-  }
-
-  if (bn->d != NULL) {
-    if ((bn->flags & BN_FLG_STATIC_DATA) == 0) {
-      OPENSSL_free(bn->d);
-    } else {
-      OPENSSL_cleanse(bn->d, bn->dmax * sizeof(bn->d[0]));
-    }
-  }
-
-  should_free = (bn->flags & BN_FLG_MALLOCED) != 0;
-  if (should_free) {
-    OPENSSL_free(bn);
-  } else {
-    OPENSSL_cleanse(bn, sizeof(BIGNUM));
-  }
+  BN_free(bn);
 }
 
 BIGNUM *BN_dup(const BIGNUM *src) {
@@ -302,6 +283,18 @@
   return 1;
 }
 
+void bn_set_static_words(BIGNUM *bn, const BN_ULONG *words, size_t num) {
+  if ((bn->flags & BN_FLG_STATIC_DATA) == 0) {
+    OPENSSL_free(bn->d);
+  }
+  bn->d = (BN_ULONG *)words;
+
+  bn->width = num;
+  bn->dmax = num;
+  bn->neg = 0;
+  bn->flags |= BN_FLG_STATIC_DATA;
+}
+
 int bn_fits_in_words(const BIGNUM *bn, size_t num) {
   // All words beyond |num| must be zero.
   BN_ULONG mask = 0;
diff --git a/deps/boringssl/src/crypto/fipsmodule/bn/div.c b/deps/boringssl/src/crypto/fipsmodule/bn/div.c
index 27b591c..333c770 100644
--- a/deps/boringssl/src/crypto/fipsmodule/bn/div.c
+++ b/deps/boringssl/src/crypto/fipsmodule/bn/div.c
@@ -64,10 +64,10 @@
 #include "internal.h"
 
 
-#if !defined(BN_CAN_DIVIDE_ULLONG) && !defined(BN_CAN_USE_INLINE_ASM)
 // bn_div_words divides a double-width |h|,|l| by |d| and returns the result,
 // which must fit in a |BN_ULONG|.
-static BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l, BN_ULONG d) {
+OPENSSL_UNUSED static BN_ULONG bn_div_words(BN_ULONG h, BN_ULONG l,
+                                            BN_ULONG d) {
   BN_ULONG dh, dl, q, ret = 0, th, tl, t;
   int i, count = 2;
 
@@ -135,7 +135,6 @@
   ret |= q;
   return ret;
 }
-#endif  // !defined(BN_CAN_DIVIDE_ULLONG) && !defined(BN_CAN_USE_INLINE_ASM)
 
 static inline void bn_div_rem_words(BN_ULONG *quotient_out, BN_ULONG *rem_out,
                                     BN_ULONG n0, BN_ULONG n1, BN_ULONG d0) {
diff --git a/deps/boringssl/src/crypto/fipsmodule/bn/internal.h b/deps/boringssl/src/crypto/fipsmodule/bn/internal.h
index af32b3d..623e0c6 100644
--- a/deps/boringssl/src/crypto/fipsmodule/bn/internal.h
+++ b/deps/boringssl/src/crypto/fipsmodule/bn/internal.h
@@ -123,7 +123,7 @@
 #ifndef OPENSSL_HEADER_BN_INTERNAL_H
 #define OPENSSL_HEADER_BN_INTERNAL_H
 
-#include <openssl/base.h>
+#include <openssl/bn.h>
 
 #if defined(OPENSSL_X86_64) && defined(_MSC_VER)
 OPENSSL_MSVC_PRAGMA(warning(push, 3))
@@ -241,6 +241,14 @@
 // least significant word first.
 int bn_set_words(BIGNUM *bn, const BN_ULONG *words, size_t num);
 
+// bn_set_static_words acts like |bn_set_words|, but doesn't copy the data. A
+// flag is set on |bn| so that |BN_free| won't attempt to free the data.
+//
+// The |STATIC_BIGNUM| macro is probably a better solution for this outside of
+// the FIPS module. Inside of the FIPS module that macro generates rel.ro data,
+// which doesn't work with FIPS requirements.
+void bn_set_static_words(BIGNUM *bn, const BN_ULONG *words, size_t num);
+
 // bn_fits_in_words returns one if |bn| may be represented in |num| words, plus
 // a sign bit, and zero otherwise.
 int bn_fits_in_words(const BIGNUM *bn, size_t num);
@@ -404,9 +412,19 @@
 int bn_mod_exp_base_2_consttime(BIGNUM *r, unsigned p, const BIGNUM *n,
                                 BN_CTX *ctx);
 
-#if defined(OPENSSL_X86_64) && defined(_MSC_VER)
+#if defined(_MSC_VER)
+#if defined(OPENSSL_X86_64)
 #define BN_UMULT_LOHI(low, high, a, b) ((low) = _umul128((a), (b), &(high)))
+#elif defined(OPENSSL_AARCH64)
+#define BN_UMULT_LOHI(low, high, a, b) \
+  do {                                 \
+    const BN_ULONG _a = (a);           \
+    const BN_ULONG _b = (b);           \
+    (low) = _a * _b;                   \
+    (high) = __umulh(_a, _b);          \
+  } while (0)
 #endif
+#endif  // _MSC_VER
 
 #if !defined(BN_ULLONG) && !defined(BN_UMULT_LOHI)
 #error "Either BN_ULLONG or BN_UMULT_LOHI must be defined on every platform."
diff --git a/deps/boringssl/src/crypto/fipsmodule/cipher/e_aes.c b/deps/boringssl/src/crypto/fipsmodule/cipher/e_aes.c
index 8f4907f..6df2b7b 100644
--- a/deps/boringssl/src/crypto/fipsmodule/cipher/e_aes.c
+++ b/deps/boringssl/src/crypto/fipsmodule/cipher/e_aes.c
@@ -68,6 +68,8 @@
 OPENSSL_MSVC_PRAGMA(warning(push))
 OPENSSL_MSVC_PRAGMA(warning(disable: 4702))  // Unreachable code.
 
+#define AES_GCM_NONCE_LENGTH 12
+
 #if defined(BSAES)
 static void vpaes_ctr32_encrypt_blocks_with_bsaes(const uint8_t *in,
                                                   uint8_t *out, size_t blocks,
@@ -630,7 +632,7 @@
   out->nid = NID_aes_128_gcm;
   out->block_size = 1;
   out->key_len = 16;
-  out->iv_len = 12;
+  out->iv_len = AES_GCM_NONCE_LENGTH;
   out->ctx_size = sizeof(EVP_AES_GCM_CTX) + EVP_AES_GCM_CTX_PADDING;
   out->flags = EVP_CIPH_GCM_MODE | EVP_CIPH_CUSTOM_IV | EVP_CIPH_CUSTOM_COPY |
                EVP_CIPH_FLAG_CUSTOM_CIPHER | EVP_CIPH_ALWAYS_CALL_INIT |
@@ -698,7 +700,7 @@
   out->nid = NID_aes_192_gcm;
   out->block_size = 1;
   out->key_len = 24;
-  out->iv_len = 12;
+  out->iv_len = AES_GCM_NONCE_LENGTH;
   out->ctx_size = sizeof(EVP_AES_GCM_CTX) + EVP_AES_GCM_CTX_PADDING;
   out->flags = EVP_CIPH_GCM_MODE | EVP_CIPH_CUSTOM_IV | EVP_CIPH_CUSTOM_COPY |
                EVP_CIPH_FLAG_CUSTOM_CIPHER | EVP_CIPH_ALWAYS_CALL_INIT |
@@ -766,7 +768,7 @@
   out->nid = NID_aes_256_gcm;
   out->block_size = 1;
   out->key_len = 32;
-  out->iv_len = 12;
+  out->iv_len = AES_GCM_NONCE_LENGTH;
   out->ctx_size = sizeof(EVP_AES_GCM_CTX) + EVP_AES_GCM_CTX_PADDING;
   out->flags = EVP_CIPH_GCM_MODE | EVP_CIPH_CUSTOM_IV | EVP_CIPH_CUSTOM_COPY |
                EVP_CIPH_FLAG_CUSTOM_CIPHER | EVP_CIPH_ALWAYS_CALL_INIT |
@@ -931,21 +933,19 @@
 
 static void aead_aes_gcm_cleanup(EVP_AEAD_CTX *ctx) {}
 
-static int aead_aes_gcm_seal_scatter(const EVP_AEAD_CTX *ctx, uint8_t *out,
-                                     uint8_t *out_tag, size_t *out_tag_len,
-                                     size_t max_out_tag_len,
-                                     const uint8_t *nonce, size_t nonce_len,
-                                     const uint8_t *in, size_t in_len,
-                                     const uint8_t *extra_in,
-                                     size_t extra_in_len,
-                                     const uint8_t *ad, size_t ad_len) {
-  struct aead_aes_gcm_ctx *gcm_ctx = (struct aead_aes_gcm_ctx *) &ctx->state;
-
-  if (extra_in_len + ctx->tag_len < ctx->tag_len) {
+static int aead_aes_gcm_seal_scatter_impl(
+    const struct aead_aes_gcm_ctx *gcm_ctx,
+    uint8_t *out, uint8_t *out_tag, size_t *out_tag_len, size_t max_out_tag_len,
+    const uint8_t *nonce, size_t nonce_len,
+    const uint8_t *in, size_t in_len,
+    const uint8_t *extra_in, size_t extra_in_len,
+    const uint8_t *ad, size_t ad_len,
+    size_t tag_len) {
+  if (extra_in_len + tag_len < tag_len) {
     OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_TOO_LARGE);
     return 0;
   }
-  if (max_out_tag_len < extra_in_len + ctx->tag_len) {
+  if (max_out_tag_len < extra_in_len + tag_len) {
     OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_BUFFER_TOO_SMALL);
     return 0;
   }
@@ -989,18 +989,35 @@
     }
   }
 
-  CRYPTO_gcm128_tag(&gcm, out_tag + extra_in_len, ctx->tag_len);
-  *out_tag_len = ctx->tag_len + extra_in_len;
+  CRYPTO_gcm128_tag(&gcm, out_tag + extra_in_len, tag_len);
+  *out_tag_len = tag_len + extra_in_len;
 
   return 1;
 }
 
-static int aead_aes_gcm_open_gather(const EVP_AEAD_CTX *ctx, uint8_t *out,
-                                    const uint8_t *nonce, size_t nonce_len,
-                                    const uint8_t *in, size_t in_len,
-                                    const uint8_t *in_tag, size_t in_tag_len,
-                                    const uint8_t *ad, size_t ad_len) {
-  struct aead_aes_gcm_ctx *gcm_ctx = (struct aead_aes_gcm_ctx *) &ctx->state;
+static int aead_aes_gcm_seal_scatter(const EVP_AEAD_CTX *ctx, uint8_t *out,
+                                     uint8_t *out_tag, size_t *out_tag_len,
+                                     size_t max_out_tag_len,
+                                     const uint8_t *nonce, size_t nonce_len,
+                                     const uint8_t *in, size_t in_len,
+                                     const uint8_t *extra_in,
+                                     size_t extra_in_len,
+                                     const uint8_t *ad, size_t ad_len) {
+  const struct aead_aes_gcm_ctx *gcm_ctx =
+      (const struct aead_aes_gcm_ctx *)&ctx->state;
+  return aead_aes_gcm_seal_scatter_impl(
+      gcm_ctx, out, out_tag, out_tag_len, max_out_tag_len, nonce, nonce_len, in,
+      in_len, extra_in, extra_in_len, ad, ad_len, ctx->tag_len);
+}
+
+static int aead_aes_gcm_open_gather_impl(const struct aead_aes_gcm_ctx *gcm_ctx,
+                                         uint8_t *out,
+                                         const uint8_t *nonce, size_t nonce_len,
+                                         const uint8_t *in, size_t in_len,
+                                         const uint8_t *in_tag,
+                                         size_t in_tag_len,
+                                         const uint8_t *ad, size_t ad_len,
+                                         size_t tag_len) {
   uint8_t tag[EVP_AEAD_AES_GCM_TAG_LEN];
 
   if (nonce_len == 0) {
@@ -1008,7 +1025,7 @@
     return 0;
   }
 
-  if (in_tag_len != ctx->tag_len) {
+  if (in_tag_len != tag_len) {
     OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_BAD_DECRYPT);
     return 0;
   }
@@ -1035,8 +1052,8 @@
     }
   }
 
-  CRYPTO_gcm128_tag(&gcm, tag, ctx->tag_len);
-  if (CRYPTO_memcmp(tag, in_tag, ctx->tag_len) != 0) {
+  CRYPTO_gcm128_tag(&gcm, tag, tag_len);
+  if (CRYPTO_memcmp(tag, in_tag, tag_len) != 0) {
     OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_BAD_DECRYPT);
     return 0;
   }
@@ -1044,11 +1061,22 @@
   return 1;
 }
 
+static int aead_aes_gcm_open_gather(const EVP_AEAD_CTX *ctx, uint8_t *out,
+                                    const uint8_t *nonce, size_t nonce_len,
+                                    const uint8_t *in, size_t in_len,
+                                    const uint8_t *in_tag, size_t in_tag_len,
+                                    const uint8_t *ad, size_t ad_len) {
+  struct aead_aes_gcm_ctx *gcm_ctx = (struct aead_aes_gcm_ctx *)&ctx->state;
+  return aead_aes_gcm_open_gather_impl(gcm_ctx, out, nonce, nonce_len, in,
+                                       in_len, in_tag, in_tag_len, ad, ad_len,
+                                       ctx->tag_len);
+}
+
 DEFINE_METHOD_FUNCTION(EVP_AEAD, EVP_aead_aes_128_gcm) {
   memset(out, 0, sizeof(EVP_AEAD));
 
   out->key_len = 16;
-  out->nonce_len = 12;
+  out->nonce_len = AES_GCM_NONCE_LENGTH;
   out->overhead = EVP_AEAD_AES_GCM_TAG_LEN;
   out->max_tag_len = EVP_AEAD_AES_GCM_TAG_LEN;
   out->seal_scatter_supports_extra_in = 1;
@@ -1063,7 +1091,7 @@
   memset(out, 0, sizeof(EVP_AEAD));
 
   out->key_len = 24;
-  out->nonce_len = 12;
+  out->nonce_len = AES_GCM_NONCE_LENGTH;
   out->overhead = EVP_AEAD_AES_GCM_TAG_LEN;
   out->max_tag_len = EVP_AEAD_AES_GCM_TAG_LEN;
   out->seal_scatter_supports_extra_in = 1;
@@ -1078,7 +1106,7 @@
   memset(out, 0, sizeof(EVP_AEAD));
 
   out->key_len = 32;
-  out->nonce_len = 12;
+  out->nonce_len = AES_GCM_NONCE_LENGTH;
   out->overhead = EVP_AEAD_AES_GCM_TAG_LEN;
   out->max_tag_len = EVP_AEAD_AES_GCM_TAG_LEN;
   out->seal_scatter_supports_extra_in = 1;
@@ -1089,6 +1117,116 @@
   out->open_gather = aead_aes_gcm_open_gather;
 }
 
+static int aead_aes_gcm_init_randnonce(EVP_AEAD_CTX *ctx, const uint8_t *key,
+                                       size_t key_len,
+                                       size_t requested_tag_len) {
+  if (requested_tag_len != EVP_AEAD_DEFAULT_TAG_LENGTH) {
+    if (requested_tag_len < AES_GCM_NONCE_LENGTH) {
+      OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_BUFFER_TOO_SMALL);
+      return 0;
+    }
+    requested_tag_len -= AES_GCM_NONCE_LENGTH;
+  }
+
+  if (!aead_aes_gcm_init(ctx, key, key_len, requested_tag_len)) {
+    return 0;
+  }
+
+  ctx->tag_len += AES_GCM_NONCE_LENGTH;
+  return 1;
+}
+
+static int aead_aes_gcm_seal_scatter_randnonce(
+    const EVP_AEAD_CTX *ctx,
+    uint8_t *out, uint8_t *out_tag, size_t *out_tag_len, size_t max_out_tag_len,
+    const uint8_t *external_nonce, size_t external_nonce_len,
+    const uint8_t *in, size_t in_len,
+    const uint8_t *extra_in, size_t extra_in_len,
+    const uint8_t *ad, size_t ad_len) {
+  if (external_nonce_len != 0) {
+    OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_INVALID_NONCE_SIZE);
+    return 0;
+  }
+
+  uint8_t nonce[AES_GCM_NONCE_LENGTH];
+  if (max_out_tag_len < sizeof(nonce)) {
+    OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_BUFFER_TOO_SMALL);
+    return 0;
+  }
+
+  RAND_bytes(nonce, sizeof(nonce));
+  const struct aead_aes_gcm_ctx *gcm_ctx =
+      (const struct aead_aes_gcm_ctx *)&ctx->state;
+  if (!aead_aes_gcm_seal_scatter_impl(gcm_ctx, out, out_tag, out_tag_len,
+                                      max_out_tag_len - AES_GCM_NONCE_LENGTH,
+                                      nonce, sizeof(nonce), in, in_len,
+                                      extra_in, extra_in_len, ad, ad_len,
+                                      ctx->tag_len - AES_GCM_NONCE_LENGTH)) {
+    return 0;
+  }
+
+  assert(*out_tag_len + sizeof(nonce) <= max_out_tag_len);
+  memcpy(out_tag + *out_tag_len, nonce, sizeof(nonce));
+  *out_tag_len += sizeof(nonce);
+
+  return 1;
+}
+
+static int aead_aes_gcm_open_gather_randnonce(
+    const EVP_AEAD_CTX *ctx, uint8_t *out,
+    const uint8_t *external_nonce, size_t external_nonce_len,
+    const uint8_t *in, size_t in_len,
+    const uint8_t *in_tag, size_t in_tag_len,
+    const uint8_t *ad, size_t ad_len) {
+  if (external_nonce_len != 0) {
+    OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_INVALID_NONCE_SIZE);
+    return 0;
+  }
+
+  if (in_tag_len < AES_GCM_NONCE_LENGTH) {
+    OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_BAD_DECRYPT);
+    return 0;
+  }
+  const uint8_t *nonce = in_tag + in_tag_len - AES_GCM_NONCE_LENGTH;
+
+  const struct aead_aes_gcm_ctx *gcm_ctx =
+      (const struct aead_aes_gcm_ctx *)&ctx->state;
+  return aead_aes_gcm_open_gather_impl(
+      gcm_ctx, out, nonce, AES_GCM_NONCE_LENGTH, in, in_len, in_tag,
+      in_tag_len - AES_GCM_NONCE_LENGTH, ad, ad_len,
+      ctx->tag_len - AES_GCM_NONCE_LENGTH);
+}
+
+DEFINE_METHOD_FUNCTION(EVP_AEAD, EVP_aead_aes_128_gcm_randnonce) {
+  memset(out, 0, sizeof(EVP_AEAD));
+
+  out->key_len = 16;
+  out->nonce_len = 0;
+  out->overhead = EVP_AEAD_AES_GCM_TAG_LEN + AES_GCM_NONCE_LENGTH;
+  out->max_tag_len = EVP_AEAD_AES_GCM_TAG_LEN + AES_GCM_NONCE_LENGTH;
+  out->seal_scatter_supports_extra_in = 1;
+
+  out->init = aead_aes_gcm_init_randnonce;
+  out->cleanup = aead_aes_gcm_cleanup;
+  out->seal_scatter = aead_aes_gcm_seal_scatter_randnonce;
+  out->open_gather = aead_aes_gcm_open_gather_randnonce;
+}
+
+DEFINE_METHOD_FUNCTION(EVP_AEAD, EVP_aead_aes_256_gcm_randnonce) {
+  memset(out, 0, sizeof(EVP_AEAD));
+
+  out->key_len = 32;
+  out->nonce_len = 0;
+  out->overhead = EVP_AEAD_AES_GCM_TAG_LEN + AES_GCM_NONCE_LENGTH;
+  out->max_tag_len = EVP_AEAD_AES_GCM_TAG_LEN + AES_GCM_NONCE_LENGTH;
+  out->seal_scatter_supports_extra_in = 1;
+
+  out->init = aead_aes_gcm_init_randnonce;
+  out->cleanup = aead_aes_gcm_cleanup;
+  out->seal_scatter = aead_aes_gcm_seal_scatter_randnonce;
+  out->open_gather = aead_aes_gcm_open_gather_randnonce;
+}
+
 struct aead_aes_gcm_tls12_ctx {
   struct aead_aes_gcm_ctx gcm_ctx;
   uint64_t min_next_nonce;
@@ -1128,7 +1266,7 @@
   struct aead_aes_gcm_tls12_ctx *gcm_ctx =
       (struct aead_aes_gcm_tls12_ctx *) &ctx->state;
 
-  if (nonce_len != 12) {
+  if (nonce_len != AES_GCM_NONCE_LENGTH) {
     OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_UNSUPPORTED_NONCE_SIZE);
     return 0;
   }
@@ -1155,7 +1293,7 @@
   memset(out, 0, sizeof(EVP_AEAD));
 
   out->key_len = 16;
-  out->nonce_len = 12;
+  out->nonce_len = AES_GCM_NONCE_LENGTH;
   out->overhead = EVP_AEAD_AES_GCM_TAG_LEN;
   out->max_tag_len = EVP_AEAD_AES_GCM_TAG_LEN;
   out->seal_scatter_supports_extra_in = 1;
@@ -1170,7 +1308,7 @@
   memset(out, 0, sizeof(EVP_AEAD));
 
   out->key_len = 32;
-  out->nonce_len = 12;
+  out->nonce_len = AES_GCM_NONCE_LENGTH;
   out->overhead = EVP_AEAD_AES_GCM_TAG_LEN;
   out->max_tag_len = EVP_AEAD_AES_GCM_TAG_LEN;
   out->seal_scatter_supports_extra_in = 1;
@@ -1223,7 +1361,7 @@
   struct aead_aes_gcm_tls13_ctx *gcm_ctx =
       (struct aead_aes_gcm_tls13_ctx *) &ctx->state;
 
-  if (nonce_len != 12) {
+  if (nonce_len != AES_GCM_NONCE_LENGTH) {
     OPENSSL_PUT_ERROR(CIPHER, CIPHER_R_UNSUPPORTED_NONCE_SIZE);
     return 0;
   }
@@ -1261,7 +1399,7 @@
   memset(out, 0, sizeof(EVP_AEAD));
 
   out->key_len = 16;
-  out->nonce_len = 12;
+  out->nonce_len = AES_GCM_NONCE_LENGTH;
   out->overhead = EVP_AEAD_AES_GCM_TAG_LEN;
   out->max_tag_len = EVP_AEAD_AES_GCM_TAG_LEN;
   out->seal_scatter_supports_extra_in = 1;
@@ -1276,7 +1414,7 @@
   memset(out, 0, sizeof(EVP_AEAD));
 
   out->key_len = 32;
-  out->nonce_len = 12;
+  out->nonce_len = AES_GCM_NONCE_LENGTH;
   out->overhead = EVP_AEAD_AES_GCM_TAG_LEN;
   out->max_tag_len = EVP_AEAD_AES_GCM_TAG_LEN;
   out->seal_scatter_supports_extra_in = 1;
diff --git a/deps/boringssl/src/crypto/dh/check.c b/deps/boringssl/src/crypto/fipsmodule/dh/check.c
similarity index 100%
rename from deps/boringssl/src/crypto/dh/check.c
rename to deps/boringssl/src/crypto/fipsmodule/dh/check.c
diff --git a/deps/boringssl/src/crypto/dh/dh.c b/deps/boringssl/src/crypto/fipsmodule/dh/dh.c
similarity index 63%
rename from deps/boringssl/src/crypto/dh/dh.c
rename to deps/boringssl/src/crypto/fipsmodule/dh/dh.c
index 3df9a80..ab596e9 100644
--- a/deps/boringssl/src/crypto/dh/dh.c
+++ b/deps/boringssl/src/crypto/fipsmodule/dh/dh.c
@@ -60,17 +60,16 @@
 
 #include <openssl/bn.h>
 #include <openssl/err.h>
-#include <openssl/ex_data.h>
+#include <openssl/digest.h>
 #include <openssl/mem.h>
 #include <openssl/thread.h>
 
-#include "../internal.h"
+#include "../../internal.h"
+#include "../bn/internal.h"
 
 
 #define OPENSSL_DH_MAX_MODULUS_BITS 10000
 
-static CRYPTO_EX_DATA_CLASS g_ex_data_class = CRYPTO_EX_DATA_CLASS_INIT;
-
 DH *DH_new(void) {
   DH *dh = OPENSSL_malloc(sizeof(DH));
   if (dh == NULL) {
@@ -83,7 +82,6 @@
   CRYPTO_MUTEX_init(&dh->method_mont_p_lock);
 
   dh->references = 1;
-  CRYPTO_new_ex_data(&dh->ex_data);
 
   return dh;
 }
@@ -97,8 +95,6 @@
     return;
   }
 
-  CRYPTO_free_ex_data(&g_ex_data_class, dh, &dh->ex_data);
-
   BN_MONT_CTX_free(dh->method_mont_p);
   BN_clear_free(dh->p);
   BN_clear_free(dh->g);
@@ -189,120 +185,6 @@
   return 1;
 }
 
-int DH_generate_parameters_ex(DH *dh, int prime_bits, int generator, BN_GENCB *cb) {
-  // We generate DH parameters as follows
-  // find a prime q which is prime_bits/2 bits long.
-  // p=(2*q)+1 or (p-1)/2 = q
-  // For this case, g is a generator if
-  // g^((p-1)/q) mod p != 1 for values of q which are the factors of p-1.
-  // Since the factors of p-1 are q and 2, we just need to check
-  // g^2 mod p != 1 and g^q mod p != 1.
-  //
-  // Having said all that,
-  // there is another special case method for the generators 2, 3 and 5.
-  // for 2, p mod 24 == 11
-  // for 3, p mod 12 == 5  <<<<< does not work for safe primes.
-  // for 5, p mod 10 == 3 or 7
-  //
-  // Thanks to Phil Karn <karn@qualcomm.com> for the pointers about the
-  // special generators and for answering some of my questions.
-  //
-  // I've implemented the second simple method :-).
-  // Since DH should be using a safe prime (both p and q are prime),
-  // this generator function can take a very very long time to run.
-
-  // Actually there is no reason to insist that 'generator' be a generator.
-  // It's just as OK (and in some sense better) to use a generator of the
-  // order-q subgroup.
-
-  BIGNUM *t1, *t2;
-  int g, ok = 0;
-  BN_CTX *ctx = NULL;
-
-  ctx = BN_CTX_new();
-  if (ctx == NULL) {
-    goto err;
-  }
-  BN_CTX_start(ctx);
-  t1 = BN_CTX_get(ctx);
-  t2 = BN_CTX_get(ctx);
-  if (t1 == NULL || t2 == NULL) {
-    goto err;
-  }
-
-  // Make sure |dh| has the necessary elements
-  if (dh->p == NULL) {
-    dh->p = BN_new();
-    if (dh->p == NULL) {
-      goto err;
-    }
-  }
-  if (dh->g == NULL) {
-    dh->g = BN_new();
-    if (dh->g == NULL) {
-      goto err;
-    }
-  }
-
-  if (generator <= 1) {
-    OPENSSL_PUT_ERROR(DH, DH_R_BAD_GENERATOR);
-    goto err;
-  }
-  if (generator == DH_GENERATOR_2) {
-    if (!BN_set_word(t1, 24)) {
-      goto err;
-    }
-    if (!BN_set_word(t2, 11)) {
-      goto err;
-    }
-    g = 2;
-  } else if (generator == DH_GENERATOR_5) {
-    if (!BN_set_word(t1, 10)) {
-      goto err;
-    }
-    if (!BN_set_word(t2, 3)) {
-      goto err;
-    }
-    // BN_set_word(t3,7); just have to miss
-    // out on these ones :-(
-    g = 5;
-  } else {
-    // in the general case, don't worry if 'generator' is a
-    // generator or not: since we are using safe primes,
-    // it will generate either an order-q or an order-2q group,
-    // which both is OK
-    if (!BN_set_word(t1, 2)) {
-      goto err;
-    }
-    if (!BN_set_word(t2, 1)) {
-      goto err;
-    }
-    g = generator;
-  }
-
-  if (!BN_generate_prime_ex(dh->p, prime_bits, 1, t1, t2, cb)) {
-    goto err;
-  }
-  if (!BN_GENCB_call(cb, 3, 0)) {
-    goto err;
-  }
-  if (!BN_set_word(dh->g, g)) {
-    goto err;
-  }
-  ok = 1;
-
-err:
-  if (!ok) {
-    OPENSSL_PUT_ERROR(DH, ERR_R_BN_LIB);
-  }
-
-  if (ctx != NULL) {
-    BN_CTX_end(ctx);
-    BN_CTX_free(ctx);
-  }
-  return ok;
-}
-
 int DH_generate_key(DH *dh) {
   int ok = 0;
   int generate_new_key = 0;
@@ -390,56 +272,127 @@
   return ok;
 }
 
-int DH_compute_key(unsigned char *out, const BIGNUM *peers_key, DH *dh) {
-  BN_CTX *ctx = NULL;
-  BIGNUM *shared_key;
-  int ret = -1;
-  int check_result;
-
+static int dh_compute_key(DH *dh, BIGNUM *out_shared_key,
+                          const BIGNUM *peers_key, BN_CTX *ctx) {
   if (BN_num_bits(dh->p) > OPENSSL_DH_MAX_MODULUS_BITS) {
     OPENSSL_PUT_ERROR(DH, DH_R_MODULUS_TOO_LARGE);
-    goto err;
-  }
-
-  ctx = BN_CTX_new();
-  if (ctx == NULL) {
-    goto err;
-  }
-  BN_CTX_start(ctx);
-  shared_key = BN_CTX_get(ctx);
-  if (shared_key == NULL) {
-    goto err;
+    return 0;
   }
 
   if (dh->priv_key == NULL) {
     OPENSSL_PUT_ERROR(DH, DH_R_NO_PRIVATE_VALUE);
-    goto err;
+    return 0;
   }
 
-  if (!BN_MONT_CTX_set_locked(&dh->method_mont_p, &dh->method_mont_p_lock,
+  int check_result;
+  if (!DH_check_pub_key(dh, peers_key, &check_result) || check_result) {
+    OPENSSL_PUT_ERROR(DH, DH_R_INVALID_PUBKEY);
+    return 0;
+  }
+
+  int ret = 0;
+  BN_CTX_start(ctx);
+  BIGNUM *p_minus_1 = BN_CTX_get(ctx);
+
+  if (!p_minus_1 ||
+      !BN_MONT_CTX_set_locked(&dh->method_mont_p, &dh->method_mont_p_lock,
                               dh->p, ctx)) {
     goto err;
   }
 
-  if (!DH_check_pub_key(dh, peers_key, &check_result) || check_result) {
-    OPENSSL_PUT_ERROR(DH, DH_R_INVALID_PUBKEY);
-    goto err;
-  }
-
-  if (!BN_mod_exp_mont_consttime(shared_key, peers_key, dh->priv_key, dh->p,
-                                 ctx, dh->method_mont_p)) {
+  if (!BN_mod_exp_mont_consttime(out_shared_key, peers_key, dh->priv_key, dh->p,
+                                 ctx, dh->method_mont_p) ||
+      !BN_copy(p_minus_1, dh->p) ||
+      !BN_sub_word(p_minus_1, 1)) {
     OPENSSL_PUT_ERROR(DH, ERR_R_BN_LIB);
     goto err;
   }
 
-  ret = BN_bn2bin(shared_key, out);
-
-err:
-  if (ctx != NULL) {
-    BN_CTX_end(ctx);
-    BN_CTX_free(ctx);
+  // This performs the check required by SP 800-56Ar3 section 5.7.1.1 step two.
+  if (BN_cmp_word(out_shared_key, 1) <= 0 ||
+      BN_cmp(out_shared_key, p_minus_1) == 0) {
+    OPENSSL_PUT_ERROR(DH, DH_R_INVALID_PUBKEY);
+    goto err;
   }
 
+  ret = 1;
+
+ err:
+  BN_CTX_end(ctx);
+  return ret;
+}
+
+int DH_compute_key_padded(unsigned char *out, const BIGNUM *peers_key, DH *dh) {
+  BN_CTX *ctx = BN_CTX_new();
+  if (ctx == NULL) {
+    return -1;
+  }
+  BN_CTX_start(ctx);
+
+  int dh_size = DH_size(dh);
+  int ret = -1;
+  BIGNUM *shared_key = BN_CTX_get(ctx);
+  if (shared_key &&
+      dh_compute_key(dh, shared_key, peers_key, ctx) &&
+      BN_bn2bin_padded(out, dh_size, shared_key)) {
+    ret = dh_size;
+  }
+
+  BN_CTX_end(ctx);
+  BN_CTX_free(ctx);
+  return ret;
+}
+
+int DH_compute_key(unsigned char *out, const BIGNUM *peers_key, DH *dh) {
+  BN_CTX *ctx = BN_CTX_new();
+  if (ctx == NULL) {
+    return -1;
+  }
+  BN_CTX_start(ctx);
+
+  int ret = -1;
+  BIGNUM *shared_key = BN_CTX_get(ctx);
+  if (shared_key && dh_compute_key(dh, shared_key, peers_key, ctx)) {
+    ret = BN_bn2bin(shared_key, out);
+  }
+
+  BN_CTX_end(ctx);
+  BN_CTX_free(ctx);
+  return ret;
+}
+
+int DH_compute_key_hashed(DH *dh, uint8_t *out, size_t *out_len,
+                          size_t max_out_len, const BIGNUM *peers_key,
+                          const EVP_MD *digest) {
+  *out_len = (size_t)-1;
+
+  const size_t digest_len = EVP_MD_size(digest);
+  if (digest_len > max_out_len) {
+    return 0;
+  }
+
+  int ret = 0;
+  const size_t dh_len = DH_size(dh);
+  uint8_t *shared_bytes = OPENSSL_malloc(dh_len);
+  unsigned out_len_unsigned;
+  if (!shared_bytes ||
+      // SP 800-56A is ambiguous about whether the output should be padded prior
+      // to revision three. But revision three, section C.1, awkwardly specifies
+      // padding to the length of p.
+      //
+      // Also, padded output avoids side-channels, so is always strongly
+      // advisable.
+      DH_compute_key_padded(shared_bytes, peers_key, dh) != (int)dh_len ||
+      !EVP_Digest(shared_bytes, dh_len, out, &out_len_unsigned, digest, NULL) ||
+      out_len_unsigned != digest_len) {
+    goto err;
+  }
+
+  *out_len = digest_len;
+  ret = 1;
+
+ err:
+  OPENSSL_free(shared_bytes);
   return ret;
 }
 
@@ -452,82 +405,52 @@
   return 1;
 }
 
-static int int_dh_bn_cpy(BIGNUM **dst, const BIGNUM *src) {
-  BIGNUM *a = NULL;
+DH *DH_get_rfc7919_2048(void) {
+  // This is the prime from https://tools.ietf.org/html/rfc7919#appendix-A.1,
+  // which is specifically approved for FIPS in appendix D of SP 800-56Ar3.
+  static const BN_ULONG kFFDHE2048Data[] = {
+      TOBN(0xffffffff, 0xffffffff), TOBN(0x886b4238, 0x61285c97),
+      TOBN(0xc6f34a26, 0xc1b2effa), TOBN(0xc58ef183, 0x7d1683b2),
+      TOBN(0x3bb5fcbc, 0x2ec22005), TOBN(0xc3fe3b1b, 0x4c6fad73),
+      TOBN(0x8e4f1232, 0xeef28183), TOBN(0x9172fe9c, 0xe98583ff),
+      TOBN(0xc03404cd, 0x28342f61), TOBN(0x9e02fce1, 0xcdf7e2ec),
+      TOBN(0x0b07a7c8, 0xee0a6d70), TOBN(0xae56ede7, 0x6372bb19),
+      TOBN(0x1d4f42a3, 0xde394df4), TOBN(0xb96adab7, 0x60d7f468),
+      TOBN(0xd108a94b, 0xb2c8e3fb), TOBN(0xbc0ab182, 0xb324fb61),
+      TOBN(0x30acca4f, 0x483a797a), TOBN(0x1df158a1, 0x36ade735),
+      TOBN(0xe2a689da, 0xf3efe872), TOBN(0x984f0c70, 0xe0e68b77),
+      TOBN(0xb557135e, 0x7f57c935), TOBN(0x85636555, 0x3ded1af3),
+      TOBN(0x2433f51f, 0x5f066ed0), TOBN(0xd3df1ed5, 0xd5fd6561),
+      TOBN(0xf681b202, 0xaec4617a), TOBN(0x7d2fe363, 0x630c75d8),
+      TOBN(0xcc939dce, 0x249b3ef9), TOBN(0xa9e13641, 0x146433fb),
+      TOBN(0xd8b9c583, 0xce2d3695), TOBN(0xafdc5620, 0x273d3cf1),
+      TOBN(0xadf85458, 0xa2bb4a9a), TOBN(0xffffffff, 0xffffffff),
+  };
 
-  if (src) {
-    a = BN_dup(src);
-    if (!a) {
-      return 0;
-    }
+  BIGNUM *const ffdhe2048_p = BN_new();
+  BIGNUM *const ffdhe2048_q = BN_new();
+  BIGNUM *const ffdhe2048_g = BN_new();
+  DH *const dh = DH_new();
+
+  if (!ffdhe2048_p || !ffdhe2048_q || !ffdhe2048_g || !dh) {
+    goto err;
   }
 
-  BN_free(*dst);
-  *dst = a;
-  return 1;
-}
+  bn_set_static_words(ffdhe2048_p, kFFDHE2048Data,
+                      OPENSSL_ARRAY_SIZE(kFFDHE2048Data));
 
-static int int_dh_param_copy(DH *to, const DH *from, int is_x942) {
-  if (is_x942 == -1) {
-    is_x942 = !!from->q;
-  }
-  if (!int_dh_bn_cpy(&to->p, from->p) ||
-      !int_dh_bn_cpy(&to->g, from->g)) {
-    return 0;
+  if (!BN_rshift1(ffdhe2048_q, ffdhe2048_p) ||
+      !BN_set_word(ffdhe2048_g, 2) ||
+      !DH_set0_pqg(dh, ffdhe2048_p, ffdhe2048_q, ffdhe2048_g)) {
+    goto err;
   }
 
-  if (!is_x942) {
-    return 1;
-  }
+  return dh;
 
-  if (!int_dh_bn_cpy(&to->q, from->q) ||
-      !int_dh_bn_cpy(&to->j, from->j)) {
-    return 0;
-  }
-
-  OPENSSL_free(to->seed);
-  to->seed = NULL;
-  to->seedlen = 0;
-
-  if (from->seed) {
-    to->seed = OPENSSL_memdup(from->seed, from->seedlen);
-    if (!to->seed) {
-      return 0;
-    }
-    to->seedlen = from->seedlen;
-  }
-
-  return 1;
-}
-
-DH *DHparams_dup(const DH *dh) {
-  DH *ret = DH_new();
-  if (!ret) {
+ err:
+    BN_free(ffdhe2048_p);
+    BN_free(ffdhe2048_q);
+    BN_free(ffdhe2048_g);
+    DH_free(dh);
     return NULL;
-  }
-
-  if (!int_dh_param_copy(ret, dh, -1)) {
-    DH_free(ret);
-    return NULL;
-  }
-
-  return ret;
-}
-
-int DH_get_ex_new_index(long argl, void *argp, CRYPTO_EX_unused *unused,
-                        CRYPTO_EX_dup *dup_unused, CRYPTO_EX_free *free_func) {
-  int index;
-  if (!CRYPTO_get_ex_new_index(&g_ex_data_class, &index, argl, argp,
-                               free_func)) {
-    return -1;
-  }
-  return index;
-}
-
-int DH_set_ex_data(DH *d, int idx, void *arg) {
-  return CRYPTO_set_ex_data(&d->ex_data, idx, arg);
-}
-
-void *DH_get_ex_data(DH *d, int idx) {
-  return CRYPTO_get_ex_data(&d->ex_data, idx);
 }
diff --git a/deps/boringssl/src/crypto/fipsmodule/digest/digest.c b/deps/boringssl/src/crypto/fipsmodule/digest/digest.c
index a0b3bf5..6b0c198 100644
--- a/deps/boringssl/src/crypto/fipsmodule/digest/digest.c
+++ b/deps/boringssl/src/crypto/fipsmodule/digest/digest.c
@@ -122,6 +122,8 @@
 
 uint32_t EVP_MD_meth_get_flags(const EVP_MD *md) { return EVP_MD_flags(md); }
 
+void EVP_MD_CTX_set_flags(EVP_MD_CTX *ctx, int flags) {}
+
 int EVP_MD_CTX_copy_ex(EVP_MD_CTX *out, const EVP_MD_CTX *in) {
   // |in->digest| may be NULL if this is a signing |EVP_MD_CTX| for, e.g.,
   // Ed25519 which does not hash with |EVP_MD_CTX|.
diff --git a/deps/boringssl/src/crypto/fipsmodule/ec/ec.c b/deps/boringssl/src/crypto/fipsmodule/ec/ec.c
index ab2fd89..c976341 100644
--- a/deps/boringssl/src/crypto/fipsmodule/ec/ec.c
+++ b/deps/boringssl/src/crypto/fipsmodule/ec/ec.c
@@ -797,6 +797,12 @@
   return 1;
 }
 
+int EC_POINT_get_affine_coordinates(const EC_GROUP *group,
+                                    const EC_POINT *point, BIGNUM *x, BIGNUM *y,
+                                    BN_CTX *ctx) {
+  return EC_POINT_get_affine_coordinates_GFp(group, point, x, y, ctx);
+}
+
 void ec_affine_to_jacobian(const EC_GROUP *group, EC_RAW_POINT *out,
                            const EC_AFFINE *p) {
   out->X = p->X;
@@ -879,6 +885,12 @@
   return 1;
 }
 
+int EC_POINT_set_affine_coordinates(const EC_GROUP *group, EC_POINT *point,
+                                    const BIGNUM *x, const BIGNUM *y,
+                                    BN_CTX *ctx) {
+  return EC_POINT_set_affine_coordinates_GFp(group, point, x, y, ctx);
+}
+
 int EC_POINT_add(const EC_GROUP *group, EC_POINT *r, const EC_POINT *a,
                  const EC_POINT *b, BN_CTX *ctx) {
   if (EC_GROUP_cmp(group, r->group, NULL) != 0 ||
diff --git a/deps/boringssl/src/crypto/fipsmodule/ec/ec_key.c b/deps/boringssl/src/crypto/fipsmodule/ec/ec_key.c
index cd48c60..bc09e0e 100644
--- a/deps/boringssl/src/crypto/fipsmodule/ec/ec_key.c
+++ b/deps/boringssl/src/crypto/fipsmodule/ec/ec_key.c
@@ -440,7 +440,15 @@
 }
 
 int EC_KEY_generate_key_fips(EC_KEY *eckey) {
-  return EC_KEY_generate_key(eckey) && EC_KEY_check_fips(eckey);
+  if (EC_KEY_generate_key(eckey) && EC_KEY_check_fips(eckey)) {
+    return 1;
+  }
+
+  EC_POINT_free(eckey->pub_key);
+  ec_wrapped_scalar_free(eckey->priv_key);
+  eckey->pub_key = NULL;
+  eckey->priv_key = NULL;
+  return 0;
 }
 
 int EC_KEY_get_ex_new_index(long argl, void *argp, CRYPTO_EX_unused *unused,
diff --git a/deps/boringssl/src/crypto/fipsmodule/ec/ec_test.cc b/deps/boringssl/src/crypto/fipsmodule/ec/ec_test.cc
index 59d55b5..edcfeaa 100644
--- a/deps/boringssl/src/crypto/fipsmodule/ec/ec_test.cc
+++ b/deps/boringssl/src/crypto/fipsmodule/ec/ec_test.cc
@@ -1170,74 +1170,6 @@
        "37f2913224287b9dfb64742851f760eb14ca115ff9",
        "1510e764f1be968d661b7aaecb26a6d38c98e5205ca150f0ae426d"
        "2c3983c68e3a9ffb283c6ae4891d891b5705500475"},
-
-      // Note these tests do not match the tests vectors
-      // draft-irtf-cfrg-hash-to-curve-06 due to a
-      // spec issue. See
-      // https://github.com/cfrg/draft-irtf-cfrg-hash-to-curve/pull/238 for
-      // corrected test vectors.
-      {&ec_hash_to_curve_p521_xmd_sha512_sswu_draft06, NID_secp521r1,
-       "P521_XMD:SHA-512_SSWU_RO_TESTGEN", "",
-       "00758617b5e40aa8b30fcfd3c7453ad1abeff158de5697d6f1ccb8"
-       "4690aaa8bb6692986200d16206e85e4f39f1d2829fee1a5904a089"
-       "b4fab3b76873429877c58f99",
-       "016edf324d95fcbe4a30f06751f16cdd5d0b49921dd653cefb3ea2"
-       "dc2b5b903e36d9924a65407283588cc6c224ab6d6324c73cdc166c"
-       "e1530b46984b459e966349b3"},
-      {&ec_hash_to_curve_p521_xmd_sha512_sswu_draft06, NID_secp521r1,
-       "P521_XMD:SHA-512_SSWU_RO_TESTGEN", "abc",
-       "00dcec1a83b676247293e96b1672f67aa5d041a4ded49f542a971a"
-       "60603dd39194f4d8e587f640563a9ab57dcc69af638129b220683f"
-       "f03ed9ad8cfdff3833a01452",
-       "01edc4b497be85361a0afc508058792dc7fc6499a4c51fa3475093"
-       "fd9951ea46fe055e1b007a12caf9be1ce3028bd0b4ca4ffa5200f9"
-       "d11e7fc96e068276ad1319c2"},
-      {&ec_hash_to_curve_p521_xmd_sha512_sswu_draft06, NID_secp521r1,
-       "P521_XMD:SHA-512_SSWU_RO_TESTGEN", "abcdef0123456789",
-       "01f58bfb34825d028c392976a09cebee829734f7714c84b8a13580"
-       "afcc2eb4726e18e307476c1fccdc857a3d6767fd2882875ab132b7"
-       "fa7f3f6bae8954384001b1a1",
-       "00ee0d2d0bfb0bdc6215814fe7096a3dfbf020dce4f0645e8e21a9"
-       "0d6a6113a5ca61ae7d8f3b485b04f2eb2b85e34fc7f9f1bf367386"
-       "2e03932b0acc3655e84d480f"},
-      {&ec_hash_to_curve_p521_xmd_sha512_sswu_draft06, NID_secp521r1,
-       "P521_XMD:SHA-512_SSWU_RO_TESTGEN",
-       "a512_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
-       "016d9a90619bb20c49a2a73cc8c6218cd9b3fb13c720fff2e1f8db"
-       "ac92862c7da4faf404faeff6b64f0d9b1c5824cec99b0d0ed02b3f"
-       "acb6275ce553404ea361503e",
-       "007e301e3357fb1d53961c56e53ce2763e44b297062a3eb14b9f8d"
-       "6aadc92162a74f7e254a606275e76ea0ac343b3bc746f99804bacd"
-       "7351a76fce44347c72a6fe9a"},
-
-      // Custom test vector which triggers long DST path.
-      {&ec_hash_to_curve_p521_xmd_sha512_sswu_draft06, NID_secp521r1,
-       "P521_XMD:SHA-512_SSWU_RO_TESTGEN_aaaaaaaaaaaaaaaaaaaaa"
-       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
-       "abcdef0123456789",
-       "0036b0c8bbec60335ff8b0397c2cb80283b97051cc949c5c190c28"
-       "92b279fafd6c372dcec3e71eab85c48ed440c14498332548ee46d0"
-       "c85442cbdc5b4032e86c3884",
-       "0081e32ca4378ae89b03142361d9c7fbe66acf0351aca3a71eca50"
-       "7a37fb8673b69cb108d079a248aedd74f06949d6623e7f7605ea10"
-       "f6f751ab574c005db7377d7f"},
   };
 
   for (const auto &test : kTests) {
@@ -1269,8 +1201,6 @@
   EC_RAW_POINT p;
   static const uint8_t kDST[] = {0, 1, 2, 3};
   static const uint8_t kMessage[] = {4, 5, 6, 7};
-  EXPECT_FALSE(ec_hash_to_curve_p521_xmd_sha512_sswu_draft06(
-      p224.get(), &p, kDST, sizeof(kDST), kMessage, sizeof(kMessage)));
   EXPECT_FALSE(ec_hash_to_curve_p384_xmd_sha512_sswu_draft07(
       p224.get(), &p, kDST, sizeof(kDST), kMessage, sizeof(kMessage)));
 }
@@ -1308,31 +1238,6 @@
        "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
        "750f2fae7d2b2f41ac737d180c1d4363d85a1504798b4976d40921"
        "1ddb3651c13a5b4daba9975cdfce18336791131915"},
-      {&ec_hash_to_scalar_p521_xmd_sha512_draft06, NID_secp521r1,
-       "P521_XMD:SHA-512_SCALAR_TEST", "",
-       "01a6206c2fc677c11d51807bf46d64a17f92396673074c5cee9299"
-       "4d28eec5445d5ed89799b30b39c964ecf62f39d59e7d43de15d910"
-       "c2c1d69f3ebc01eab241e5dc"},
-      {&ec_hash_to_scalar_p521_xmd_sha512_draft06, NID_secp521r1,
-       "P521_XMD:SHA-512_SCALAR_TEST", "abcdef0123456789",
-       "00af484a5d9389a9912f555234c578d4b1b7c4a6f5009018d133a4"
-       "069172c9f5ce2d853b8643fe7bb50a83427ed3520a7a793c41a455"
-       "a02aa99431434fb6b5b0b26e"},
-      {&ec_hash_to_scalar_p521_xmd_sha512_draft06, NID_secp521r1,
-       "P521_XMD:SHA-512_SCALAR_TEST",
-       "a512_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
-       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
-       "00b2db2ceb64ad055cafc5a0fc92560525d6dcc4975b86bbb79013"
-       "a1c3ab5d412320cb55df8088a658039a70c5657d5aefaaaa81cc5d"
-       "eecdd40c03eb0517fe2e158c"},
   };
 
   for (const auto &test : kTests) {
@@ -1358,8 +1263,6 @@
   EC_SCALAR scalar;
   static const uint8_t kDST[] = {0, 1, 2, 3};
   static const uint8_t kMessage[] = {4, 5, 6, 7};
-  EXPECT_FALSE(ec_hash_to_scalar_p521_xmd_sha512_draft06(
-      p224.get(), &scalar, kDST, sizeof(kDST), kMessage, sizeof(kMessage)));
   EXPECT_FALSE(ec_hash_to_scalar_p384_xmd_sha512_draft07(
       p224.get(), &scalar, kDST, sizeof(kDST), kMessage, sizeof(kMessage)));
 }
diff --git a/deps/boringssl/src/crypto/fipsmodule/ec/internal.h b/deps/boringssl/src/crypto/fipsmodule/ec/internal.h
index 836d3ca..18aabb0 100644
--- a/deps/boringssl/src/crypto/fipsmodule/ec/internal.h
+++ b/deps/boringssl/src/crypto/fipsmodule/ec/internal.h
@@ -703,7 +703,8 @@
 int ec_GFp_mont_felem_from_bytes(const EC_GROUP *group, EC_FELEM *out,
                                  const uint8_t *in, size_t len);
 
-void ec_GFp_nistp_recode_scalar_bits(uint8_t *sign, uint8_t *digit, uint8_t in);
+void ec_GFp_nistp_recode_scalar_bits(crypto_word_t *sign, crypto_word_t *digit,
+                                     crypto_word_t in);
 
 const EC_METHOD *EC_GFp_nistp224_method(void);
 const EC_METHOD *EC_GFp_nistp256_method(void);
diff --git a/deps/boringssl/src/crypto/fipsmodule/ec/p224-64.c b/deps/boringssl/src/crypto/fipsmodule/ec/p224-64.c
index c106de0..da4308c 100644
--- a/deps/boringssl/src/crypto/fipsmodule/ec/p224-64.c
+++ b/deps/boringssl/src/crypto/fipsmodule/ec/p224-64.c
@@ -866,7 +866,7 @@
 }
 
 // p224_get_bit returns the |i|th bit in |in|
-static char p224_get_bit(const p224_felem_bytearray in, size_t i) {
+static crypto_word_t p224_get_bit(const p224_felem_bytearray in, size_t i) {
   if (i >= 224) {
     return 0;
   }
@@ -977,13 +977,13 @@
 
     // Add every 5 doublings.
     if (i % 5 == 0) {
-      uint64_t bits = p224_get_bit(scalar->bytes, i + 4) << 5;
+      crypto_word_t bits = p224_get_bit(scalar->bytes, i + 4) << 5;
       bits |= p224_get_bit(scalar->bytes, i + 3) << 4;
       bits |= p224_get_bit(scalar->bytes, i + 2) << 3;
       bits |= p224_get_bit(scalar->bytes, i + 1) << 2;
       bits |= p224_get_bit(scalar->bytes, i) << 1;
       bits |= p224_get_bit(scalar->bytes, i - 1);
-      uint8_t sign, digit;
+      crypto_word_t sign, digit;
       ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
 
       // Select the point to add or subtract.
@@ -1022,7 +1022,7 @@
     }
 
     // First, look 28 bits upwards.
-    uint64_t bits = p224_get_bit(scalar->bytes, i + 196) << 3;
+    crypto_word_t bits = p224_get_bit(scalar->bytes, i + 196) << 3;
     bits |= p224_get_bit(scalar->bytes, i + 140) << 2;
     bits |= p224_get_bit(scalar->bytes, i + 84) << 1;
     bits |= p224_get_bit(scalar->bytes, i + 28);
@@ -1080,14 +1080,15 @@
     // Add multiples of the generator.
     if (i <= 27) {
       // First, look 28 bits upwards.
-      uint64_t bits = p224_get_bit(g_scalar->bytes, i + 196) << 3;
+      crypto_word_t bits = p224_get_bit(g_scalar->bytes, i + 196) << 3;
       bits |= p224_get_bit(g_scalar->bytes, i + 140) << 2;
       bits |= p224_get_bit(g_scalar->bytes, i + 84) << 1;
       bits |= p224_get_bit(g_scalar->bytes, i + 28);
 
+      size_t index = (size_t)bits;
       p224_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */,
-                     g_p224_pre_comp[1][bits][0], g_p224_pre_comp[1][bits][1],
-                     g_p224_pre_comp[1][bits][2]);
+                     g_p224_pre_comp[1][index][0], g_p224_pre_comp[1][index][1],
+                     g_p224_pre_comp[1][index][2]);
       assert(!skip);
 
       // Second, look at the current position.
@@ -1095,20 +1096,21 @@
       bits |= p224_get_bit(g_scalar->bytes, i + 112) << 2;
       bits |= p224_get_bit(g_scalar->bytes, i + 56) << 1;
       bits |= p224_get_bit(g_scalar->bytes, i);
+      index = (size_t)bits;
       p224_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */,
-                     g_p224_pre_comp[0][bits][0], g_p224_pre_comp[0][bits][1],
-                     g_p224_pre_comp[0][bits][2]);
+                     g_p224_pre_comp[0][index][0], g_p224_pre_comp[0][index][1],
+                     g_p224_pre_comp[0][index][2]);
     }
 
     // Incorporate |p_scalar| every 5 doublings.
     if (i % 5 == 0) {
-      uint64_t bits = p224_get_bit(p_scalar->bytes, i + 4) << 5;
+      crypto_word_t bits = p224_get_bit(p_scalar->bytes, i + 4) << 5;
       bits |= p224_get_bit(p_scalar->bytes, i + 3) << 4;
       bits |= p224_get_bit(p_scalar->bytes, i + 2) << 3;
       bits |= p224_get_bit(p_scalar->bytes, i + 1) << 2;
       bits |= p224_get_bit(p_scalar->bytes, i) << 1;
       bits |= p224_get_bit(p_scalar->bytes, i - 1);
-      uint8_t sign, digit;
+      crypto_word_t sign, digit;
       ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
 
       // Select the point to add or subtract.
diff --git a/deps/boringssl/src/crypto/fipsmodule/ec/p256-x86_64.c b/deps/boringssl/src/crypto/fipsmodule/ec/p256-x86_64.c
index 973130f..29ae193 100644
--- a/deps/boringssl/src/crypto/fipsmodule/ec/p256-x86_64.c
+++ b/deps/boringssl/src/crypto/fipsmodule/ec/p256-x86_64.c
@@ -50,8 +50,8 @@
 
 // Recode window to a signed digit, see |ec_GFp_nistp_recode_scalar_bits| in
 // util.c for details
-static unsigned booth_recode_w5(unsigned in) {
-  unsigned s, d;
+static crypto_word_t booth_recode_w5(crypto_word_t in) {
+  crypto_word_t s, d;
 
   s = ~((in >> 5) - 1);
   d = (1 << 6) - in - 1;
@@ -61,8 +61,8 @@
   return (d << 1) + (s & 1);
 }
 
-static unsigned booth_recode_w7(unsigned in) {
-  unsigned s, d;
+static crypto_word_t booth_recode_w7(crypto_word_t in) {
+  crypto_word_t s, d;
 
   s = ~((in >> 7) - 1);
   d = (1 << 8) - in - 1;
@@ -194,8 +194,8 @@
   assert(p_scalar != NULL);
   assert(group->field.width == P256_LIMBS);
 
-  static const unsigned kWindowSize = 5;
-  static const unsigned kMask = (1 << (5 /* kWindowSize */ + 1)) - 1;
+  static const size_t kWindowSize = 5;
+  static const crypto_word_t kMask = (1 << (5 /* kWindowSize */ + 1)) - 1;
 
   // A |P256_POINT| is (3 * 32) = 96 bytes, and the 64-byte alignment should
   // add no more than 63 bytes of overhead. Thus, |table| should require
@@ -232,17 +232,17 @@
 
   BN_ULONG tmp[P256_LIMBS];
   alignas(32) P256_POINT h;
-  unsigned index = 255;
-  unsigned wvalue = p_str[(index - 1) / 8];
+  size_t index = 255;
+  crypto_word_t wvalue = p_str[(index - 1) / 8];
   wvalue = (wvalue >> ((index - 1) % 8)) & kMask;
 
   ecp_nistz256_select_w5(r, table, booth_recode_w5(wvalue) >> 1);
 
   while (index >= 5) {
     if (index != 255) {
-      unsigned off = (index - 1) / 8;
+      size_t off = (index - 1) / 8;
 
-      wvalue = p_str[off] | p_str[off + 1] << 8;
+      wvalue = (crypto_word_t)p_str[off] | (crypto_word_t)p_str[off + 1] << 8;
       wvalue = (wvalue >> ((index - 1) % 8)) & kMask;
 
       wvalue = booth_recode_w5(wvalue);
@@ -283,21 +283,22 @@
   P256_POINT_AFFINE a;
 } p256_point_union_t;
 
-static unsigned calc_first_wvalue(unsigned *index, const uint8_t p_str[33]) {
-  static const unsigned kWindowSize = 7;
-  static const unsigned kMask = (1 << (7 /* kWindowSize */ + 1)) - 1;
+static crypto_word_t calc_first_wvalue(size_t *index, const uint8_t p_str[33]) {
+  static const size_t kWindowSize = 7;
+  static const crypto_word_t kMask = (1 << (7 /* kWindowSize */ + 1)) - 1;
   *index = kWindowSize;
 
-  unsigned wvalue = (p_str[0] << 1) & kMask;
+  crypto_word_t wvalue = (p_str[0] << 1) & kMask;
   return booth_recode_w7(wvalue);
 }
 
-static unsigned calc_wvalue(unsigned *index, const uint8_t p_str[33]) {
-  static const unsigned kWindowSize = 7;
-  static const unsigned kMask = (1 << (7 /* kWindowSize */ + 1)) - 1;
+static crypto_word_t calc_wvalue(size_t *index, const uint8_t p_str[33]) {
+  static const size_t kWindowSize = 7;
+  static const crypto_word_t kMask = (1 << (7 /* kWindowSize */ + 1)) - 1;
 
-  const unsigned off = (*index - 1) / 8;
-  unsigned wvalue = p_str[off] | p_str[off + 1] << 8;
+  const size_t off = (*index - 1) / 8;
+  crypto_word_t wvalue =
+      (crypto_word_t)p_str[off] | (crypto_word_t)p_str[off + 1] << 8;
   wvalue = (wvalue >> ((*index - 1) % 8)) & kMask;
   *index += kWindowSize;
 
@@ -325,8 +326,8 @@
   p_str[32] = 0;
 
   // First window
-  unsigned index = 0;
-  unsigned wvalue = calc_first_wvalue(&index, p_str);
+  size_t index = 0;
+  crypto_word_t wvalue = calc_first_wvalue(&index, p_str);
 
   ecp_nistz256_select_w7(&p.a, ecp_nistz256_precomputed[0], wvalue >> 1);
   ecp_nistz256_neg(p.p.Z, p.p.Y);
@@ -370,8 +371,8 @@
   p_str[32] = 0;
 
   // First window
-  unsigned index = 0;
-  unsigned wvalue = calc_first_wvalue(&index, p_str);
+  size_t index = 0;
+  size_t wvalue = calc_first_wvalue(&index, p_str);
 
   // Convert |p| from affine to Jacobian coordinates. We set Z to zero if |p|
   // is infinity and |ONE| otherwise. |p| was computed from the table, so it
diff --git a/deps/boringssl/src/crypto/fipsmodule/ec/p256.c b/deps/boringssl/src/crypto/fipsmodule/ec/p256.c
index 9355ac1..9f5694c 100644
--- a/deps/boringssl/src/crypto/fipsmodule/ec/p256.c
+++ b/deps/boringssl/src/crypto/fipsmodule/ec/p256.c
@@ -67,7 +67,7 @@
 
 static void fiat_p256_copy(fiat_p256_limb_t out[FIAT_P256_NLIMBS],
                            const fiat_p256_limb_t in1[FIAT_P256_NLIMBS]) {
-  for (int i = 0; i < FIAT_P256_NLIMBS; i++) {
+  for (size_t i = 0; i < FIAT_P256_NLIMBS; i++) {
     out[i] = in1[i];
   }
 }
@@ -393,7 +393,7 @@
 }
 
 // fiat_p256_get_bit returns the |i|th bit in |in|
-static char fiat_p256_get_bit(const uint8_t *in, int i) {
+static crypto_word_t fiat_p256_get_bit(const uint8_t *in, int i) {
   if (i < 0 || i >= 256) {
     return 0;
   }
@@ -498,20 +498,20 @@
 
     // do other additions every 5 doublings
     if (i % 5 == 0) {
-      uint64_t bits = fiat_p256_get_bit(scalar->bytes, i + 4) << 5;
+      crypto_word_t bits = fiat_p256_get_bit(scalar->bytes, i + 4) << 5;
       bits |= fiat_p256_get_bit(scalar->bytes, i + 3) << 4;
       bits |= fiat_p256_get_bit(scalar->bytes, i + 2) << 3;
       bits |= fiat_p256_get_bit(scalar->bytes, i + 1) << 2;
       bits |= fiat_p256_get_bit(scalar->bytes, i) << 1;
       bits |= fiat_p256_get_bit(scalar->bytes, i - 1);
-      uint8_t sign, digit;
+      crypto_word_t sign, digit;
       ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits);
 
       // select the point to add or subtract, in constant time.
-      fiat_p256_select_point(digit, 17, (const fiat_p256_felem(*)[3])p_pre_comp,
-                             tmp);
+      fiat_p256_select_point((fiat_p256_limb_t)digit, 17,
+                             (const fiat_p256_felem(*)[3])p_pre_comp, tmp);
       fiat_p256_opp(ftmp, tmp[1]);  // (X, -Y, Z) is the negative point.
-      fiat_p256_cmovznz(tmp[1], sign, tmp[1], ftmp);
+      fiat_p256_cmovznz(tmp[1], (fiat_p256_limb_t)sign, tmp[1], ftmp);
 
       if (!skip) {
         fiat_p256_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2],
@@ -543,12 +543,13 @@
     }
 
     // First, look 32 bits upwards.
-    uint64_t bits = fiat_p256_get_bit(scalar->bytes, i + 224) << 3;
+    crypto_word_t bits = fiat_p256_get_bit(scalar->bytes, i + 224) << 3;
     bits |= fiat_p256_get_bit(scalar->bytes, i + 160) << 2;
     bits |= fiat_p256_get_bit(scalar->bytes, i + 96) << 1;
     bits |= fiat_p256_get_bit(scalar->bytes, i + 32);
     // Select the point to add, in constant time.
-    fiat_p256_select_point_affine(bits, 15, fiat_p256_g_pre_comp[1], tmp);
+    fiat_p256_select_point_affine((fiat_p256_limb_t)bits, 15,
+                                  fiat_p256_g_pre_comp[1], tmp);
 
     if (!skip) {
       fiat_p256_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2],
@@ -566,7 +567,8 @@
     bits |= fiat_p256_get_bit(scalar->bytes, i + 64) << 1;
     bits |= fiat_p256_get_bit(scalar->bytes, i);
     // Select the point to add, in constant time.
-    fiat_p256_select_point_affine(bits, 15, fiat_p256_g_pre_comp[0], tmp);
+    fiat_p256_select_point_affine((fiat_p256_limb_t)bits, 15,
+                                  fiat_p256_g_pre_comp[0], tmp);
     fiat_p256_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], 1 /* mixed */,
                         tmp[0], tmp[1], tmp[2]);
   }
@@ -613,14 +615,15 @@
     // constant-time lookup.
     if (i <= 31) {
       // First, look 32 bits upwards.
-      uint64_t bits = fiat_p256_get_bit(g_scalar->bytes, i + 224) << 3;
+      crypto_word_t bits = fiat_p256_get_bit(g_scalar->bytes, i + 224) << 3;
       bits |= fiat_p256_get_bit(g_scalar->bytes, i + 160) << 2;
       bits |= fiat_p256_get_bit(g_scalar->bytes, i + 96) << 1;
       bits |= fiat_p256_get_bit(g_scalar->bytes, i + 32);
       if (bits != 0) {
+        size_t index = (size_t)(bits - 1);
         fiat_p256_point_add(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2],
-                            1 /* mixed */, fiat_p256_g_pre_comp[1][bits - 1][0],
-                            fiat_p256_g_pre_comp[1][bits - 1][1],
+                            1 /* mixed */, fiat_p256_g_pre_comp[1][index][0],
+                            fiat_p256_g_pre_comp[1][index][1],
                             fiat_p256_one);
         skip = 0;
       }
@@ -631,9 +634,10 @@
       bits |= fiat_p256_get_bit(g_scalar->bytes, i + 64) << 1;
       bits |= fiat_p256_get_bit(g_scalar->bytes, i);
       if (bits != 0) {
+        size_t index = (size_t)(bits - 1);
         fiat_p256_point_add(ret[0], ret[1], ret[2], ret[0], ret[1], ret[2],
-                            1 /* mixed */, fiat_p256_g_pre_comp[0][bits - 1][0],
-                            fiat_p256_g_pre_comp[0][bits - 1][1],
+                            1 /* mixed */, fiat_p256_g_pre_comp[0][index][0],
+                            fiat_p256_g_pre_comp[0][index][1],
                             fiat_p256_one);
         skip = 0;
       }
@@ -642,7 +646,7 @@
     int digit = p_wNAF[i];
     if (digit != 0) {
       assert(digit & 1);
-      int idx = digit < 0 ? (-digit) >> 1 : digit >> 1;
+      size_t idx = (size_t)(digit < 0 ? (-digit) >> 1 : digit >> 1);
       fiat_p256_felem *y = &p_pre_comp[idx][1], tmp;
       if (digit < 0) {
         fiat_p256_opp(tmp, p_pre_comp[idx][1]);
diff --git a/deps/boringssl/src/crypto/fipsmodule/ec/simple_mul.c b/deps/boringssl/src/crypto/fipsmodule/ec/simple_mul.c
index 127e2b3..0e6384e 100644
--- a/deps/boringssl/src/crypto/fipsmodule/ec/simple_mul.c
+++ b/deps/boringssl/src/crypto/fipsmodule/ec/simple_mul.c
@@ -108,7 +108,7 @@
   if (i > 0) {
     window |= bn_is_bit_set_words(scalar->words, width, i - 1);
   }
-  uint8_t sign, digit;
+  crypto_word_t sign, digit;
   ec_GFp_nistp_recode_scalar_bits(&sign, &digit, window);
 
   // Select the entry in constant-time.
@@ -121,7 +121,7 @@
   // Negate if necessary.
   EC_FELEM neg_Y;
   ec_felem_neg(group, &neg_Y, &out->Y);
-  BN_ULONG sign_mask = sign;
+  crypto_word_t sign_mask = sign;
   sign_mask = 0u - sign_mask;
   ec_felem_select(group, &out->Y, sign_mask, &neg_Y, &out->Y);
 }
diff --git a/deps/boringssl/src/crypto/fipsmodule/ec/util.c b/deps/boringssl/src/crypto/fipsmodule/ec/util.c
index 4f39f18..c4323f2 100644
--- a/deps/boringssl/src/crypto/fipsmodule/ec/util.c
+++ b/deps/boringssl/src/crypto/fipsmodule/ec/util.c
@@ -240,9 +240,9 @@
 //   P-384: ...01110011; w = 2, 5, 6, 7 are okay
 //   P-256: ...01010001; w = 5, 7 are okay
 //   P-224: ...00111101; w = 3, 4, 5, 6 are okay
-void ec_GFp_nistp_recode_scalar_bits(uint8_t *sign, uint8_t *digit,
-                                     uint8_t in) {
-  uint8_t s, d;
+void ec_GFp_nistp_recode_scalar_bits(crypto_word_t *sign, crypto_word_t *digit,
+                                     crypto_word_t in) {
+  crypto_word_t s, d;
 
   s = ~((in >> 5) - 1); /* sets all bits to MSB(in), 'in' seen as
                           * 6-bit value */
diff --git a/deps/boringssl/src/crypto/fipsmodule/rand/internal.h b/deps/boringssl/src/crypto/fipsmodule/rand/internal.h
index db81c33..598a17b 100644
--- a/deps/boringssl/src/crypto/fipsmodule/rand/internal.h
+++ b/deps/boringssl/src/crypto/fipsmodule/rand/internal.h
@@ -36,6 +36,34 @@
 void RAND_bytes_with_additional_data(uint8_t *out, size_t out_len,
                                      const uint8_t user_additional_data[32]);
 
+#if defined(BORINGSSL_FIPS)
+
+// We overread from /dev/urandom or RDRAND by a factor of 10 and XOR to whiten.
+#define BORINGSSL_FIPS_OVERREAD 10
+
+// CRYPTO_get_seed_entropy writes |out_entropy_len| bytes of entropy, suitable
+// for seeding a DRBG, to |out_entropy|. It sets |*out_used_cpu| to one if the
+// entropy came directly from the CPU and zero if it came from the OS. It
+// actively obtains entropy from the CPU/OS and so should not be called from
+// within the FIPS module if |BORINGSSL_FIPS_PASSIVE_ENTROPY| is defined.
+void CRYPTO_get_seed_entropy(uint8_t *out_entropy, size_t out_entropy_len,
+                             int *out_used_cpu);
+
+#if defined(BORINGSSL_FIPS_PASSIVE_ENTROPY)
+
+// RAND_load_entropy supplies |entropy_len| bytes of entropy to the module. The
+// |from_cpu| parameter is true iff the entropy was obtained directly from the
+// CPU.
+void RAND_load_entropy(const uint8_t *entropy, size_t entropy_len,
+                       int from_cpu);
+
+// RAND_need_entropy is implemented outside of the FIPS module and is called
+// when the module has stopped because it has run out of entropy.
+void RAND_need_entropy(size_t bytes_needed);
+
+#endif  // BORINGSSL_FIPS_PASSIVE_ENTROPY
+#endif  // BORINGSSL_FIPS
+
 // CRYPTO_sysrand fills |len| bytes at |buf| with entropy from the operating
 // system.
 void CRYPTO_sysrand(uint8_t *buf, size_t len);
diff --git a/deps/boringssl/src/crypto/fipsmodule/rand/rand.c b/deps/boringssl/src/crypto/fipsmodule/rand/rand.c
index 05d6a29..aa0f05b 100644
--- a/deps/boringssl/src/crypto/fipsmodule/rand/rand.c
+++ b/deps/boringssl/src/crypto/fipsmodule/rand/rand.c
@@ -25,6 +25,7 @@
 #include <openssl/chacha.h>
 #include <openssl/cpu.h>
 #include <openssl/mem.h>
+#include <openssl/type_check.h>
 
 #include "internal.h"
 #include "fork_detect.h"
@@ -63,11 +64,11 @@
   // (re)seeded. This is bound by |kReseedInterval|.
   unsigned calls;
   // last_block_valid is non-zero iff |last_block| contains data from
-  // |CRYPTO_sysrand_for_seed|.
+  // |get_seed_entropy|.
   int last_block_valid;
 
 #if defined(BORINGSSL_FIPS)
-  // last_block contains the previous block from |CRYPTO_sysrand_for_seed|.
+  // last_block contains the previous block from |get_seed_entropy|.
   uint8_t last_block[CRNGT_BLOCK_SIZE];
   // next and prev form a NULL-terminated, double-linked list of all states in
   // a process.
@@ -146,12 +147,6 @@
     OPENSSL_memcpy(buf + len_multiple8, rand_buf, remainder);
   }
 
-#if defined(BORINGSSL_FIPS_BREAK_CRNG)
-  // This breaks the "continuous random number generator test" defined in FIPS
-  // 140-2, section 4.9.2, and implemented in rand_get_seed().
-  OPENSSL_memset(buf, 0, len);
-#endif
-
   return 1;
 }
 
@@ -165,25 +160,110 @@
 
 #if defined(BORINGSSL_FIPS)
 
+void CRYPTO_get_seed_entropy(uint8_t *out_entropy, size_t out_entropy_len,
+                             int *out_used_cpu) {
+  *out_used_cpu = 0;
+  if (have_rdrand() && rdrand(out_entropy, out_entropy_len)) {
+    *out_used_cpu = 1;
+  } else {
+    CRYPTO_sysrand_for_seed(out_entropy, out_entropy_len);
+  }
+
+#if defined(BORINGSSL_FIPS_BREAK_CRNG)
+  // This breaks the "continuous random number generator test" defined in FIPS
+  // 140-2, section 4.9.2, and implemented in |rand_get_seed|.
+  OPENSSL_memset(out_entropy, 0, out_entropy_len);
+#endif
+}
+
+#if defined(BORINGSSL_FIPS_PASSIVE_ENTROPY)
+
+// In passive entropy mode, entropy is supplied from outside of the module via
+// |RAND_load_entropy| and is stored in global instance of the following
+// structure.
+
+struct entropy_buffer {
+  // bytes contains entropy suitable for seeding a DRBG.
+  uint8_t bytes[CTR_DRBG_ENTROPY_LEN * BORINGSSL_FIPS_OVERREAD];
+  // bytes_valid indicates the number of bytes of |bytes| that contain valid
+  // data.
+  size_t bytes_valid;
+  // from_cpu is true if any of the contents of |bytes| were obtained directly
+  // from the CPU.
+  int from_cpu;
+};
+
+DEFINE_BSS_GET(struct entropy_buffer, entropy_buffer);
+DEFINE_STATIC_MUTEX(entropy_buffer_lock);
+
+void RAND_load_entropy(const uint8_t *entropy, size_t entropy_len,
+                       int from_cpu) {
+  struct entropy_buffer *const buffer = entropy_buffer_bss_get();
+
+  CRYPTO_STATIC_MUTEX_lock_write(entropy_buffer_lock_bss_get());
+  const size_t space = sizeof(buffer->bytes) - buffer->bytes_valid;
+  if (entropy_len > space) {
+    entropy_len = space;
+  }
+
+  OPENSSL_memcpy(&buffer->bytes[buffer->bytes_valid], entropy, entropy_len);
+  buffer->bytes_valid += entropy_len;
+  buffer->from_cpu |= from_cpu && (entropy_len != 0);
+  CRYPTO_STATIC_MUTEX_unlock_write(entropy_buffer_lock_bss_get());
+}
+
+// get_seed_entropy fills |out_entropy_len| bytes of |out_entropy| from the
+// global |entropy_buffer|.
+static void get_seed_entropy(uint8_t *out_entropy, size_t out_entropy_len,
+                             int *out_used_cpu) {
+  struct entropy_buffer *const buffer = entropy_buffer_bss_get();
+  if (out_entropy_len > sizeof(buffer->bytes)) {
+    abort();
+  }
+
+  CRYPTO_STATIC_MUTEX_lock_write(entropy_buffer_lock_bss_get());
+  while (buffer->bytes_valid < out_entropy_len) {
+    CRYPTO_STATIC_MUTEX_unlock_write(entropy_buffer_lock_bss_get());
+    RAND_need_entropy(out_entropy_len - buffer->bytes_valid);
+    CRYPTO_STATIC_MUTEX_lock_write(entropy_buffer_lock_bss_get());
+  }
+
+  *out_used_cpu = buffer->from_cpu;
+  OPENSSL_memcpy(out_entropy, buffer->bytes, out_entropy_len);
+  OPENSSL_memmove(buffer->bytes, &buffer->bytes[out_entropy_len],
+                  buffer->bytes_valid - out_entropy_len);
+  buffer->bytes_valid -= out_entropy_len;
+  if (buffer->bytes_valid == 0) {
+    buffer->from_cpu = 0;
+  }
+
+  CRYPTO_STATIC_MUTEX_unlock_write(entropy_buffer_lock_bss_get());
+}
+
+#else
+
+// In the active case, |get_seed_entropy| simply calls |CRYPTO_get_seed_entropy|
+// in order to obtain entropy from the CPU or OS.
+static void get_seed_entropy(uint8_t *out_entropy, size_t out_entropy_len,
+                            int *out_used_cpu) {
+  CRYPTO_get_seed_entropy(out_entropy, out_entropy_len, out_used_cpu);
+}
+
+#endif  // !BORINGSSL_FIPS_PASSIVE_ENTROPY
+
+// rand_get_seed fills |seed| with entropy and sets |*out_used_cpu| to one if
+// that entropy came directly from the CPU and zero otherwise.
 static void rand_get_seed(struct rand_thread_state *state,
-                          uint8_t seed[CTR_DRBG_ENTROPY_LEN]) {
+                          uint8_t seed[CTR_DRBG_ENTROPY_LEN],
+                          int *out_used_cpu) {
   if (!state->last_block_valid) {
-    if (!have_rdrand() ||
-        !rdrand(state->last_block, sizeof(state->last_block))) {
-      CRYPTO_sysrand_for_seed(state->last_block, sizeof(state->last_block));
-    }
+    int unused;
+    get_seed_entropy(state->last_block, sizeof(state->last_block), &unused);
     state->last_block_valid = 1;
   }
 
-  // We overread from /dev/urandom or RDRAND by a factor of 10 and XOR to
-  // whiten.
-#define FIPS_OVERREAD 10
-  uint8_t entropy[CTR_DRBG_ENTROPY_LEN * FIPS_OVERREAD];
-
-  int used_rdrand = have_rdrand() && rdrand(entropy, sizeof(entropy));
-  if (!used_rdrand) {
-    CRYPTO_sysrand_for_seed(entropy, sizeof(entropy));
-  }
+  uint8_t entropy[CTR_DRBG_ENTROPY_LEN * BORINGSSL_FIPS_OVERREAD];
+  get_seed_entropy(entropy, sizeof(entropy), out_used_cpu);
 
   // See FIPS 140-2, section 4.9.2. This is the “continuous random number
   // generator test” which causes the program to randomly abort. Hopefully the
@@ -193,6 +273,7 @@
     BORINGSSL_FIPS_abort();
   }
 
+  OPENSSL_STATIC_ASSERT(sizeof(entropy) % CRNGT_BLOCK_SIZE == 0, "");
   for (size_t i = CRNGT_BLOCK_SIZE; i < sizeof(entropy);
        i += CRNGT_BLOCK_SIZE) {
     if (CRYPTO_memcmp(entropy + i - CRNGT_BLOCK_SIZE, entropy + i,
@@ -207,31 +288,24 @@
 
   OPENSSL_memcpy(seed, entropy, CTR_DRBG_ENTROPY_LEN);
 
-  for (size_t i = 1; i < FIPS_OVERREAD; i++) {
+  for (size_t i = 1; i < BORINGSSL_FIPS_OVERREAD; i++) {
     for (size_t j = 0; j < CTR_DRBG_ENTROPY_LEN; j++) {
       seed[j] ^= entropy[CTR_DRBG_ENTROPY_LEN * i + j];
     }
   }
-
-#if defined(OPENSSL_URANDOM)
-  // If we used RDRAND, also opportunistically read from the system. This avoids
-  // solely relying on the hardware once the entropy pool has been initialized.
-  if (used_rdrand) {
-    CRYPTO_sysrand_if_available(entropy, CTR_DRBG_ENTROPY_LEN);
-    for (size_t i = 0; i < CTR_DRBG_ENTROPY_LEN; i++) {
-      seed[i] ^= entropy[i];
-    }
-  }
-#endif
 }
 
 #else
 
+// rand_get_seed fills |seed| with entropy and sets |*out_used_cpu| to one if
+// that entropy came directly from the CPU and zero otherwise.
 static void rand_get_seed(struct rand_thread_state *state,
-                          uint8_t seed[CTR_DRBG_ENTROPY_LEN]) {
+                          uint8_t seed[CTR_DRBG_ENTROPY_LEN],
+                          int *out_used_cpu) {
   // If not in FIPS mode, we don't overread from the system entropy source and
   // we don't depend only on the hardware RDRAND.
   CRYPTO_sysrand(seed, CTR_DRBG_ENTROPY_LEN);
+  *out_used_cpu = 0;
 }
 
 #endif
@@ -290,8 +364,23 @@
 
     state->last_block_valid = 0;
     uint8_t seed[CTR_DRBG_ENTROPY_LEN];
-    rand_get_seed(state, seed);
-    if (!CTR_DRBG_init(&state->drbg, seed, NULL, 0)) {
+    int used_cpu;
+    rand_get_seed(state, seed, &used_cpu);
+
+    uint8_t personalization[CTR_DRBG_ENTROPY_LEN];
+    size_t personalization_len = 0;
+#if defined(OPENSSL_URANDOM)
+    // If we used RDRAND, also opportunistically read from the system. This
+    // avoids solely relying on the hardware once the entropy pool has been
+    // initialized.
+    if (used_cpu &&
+        CRYPTO_sysrand_if_available(personalization, sizeof(personalization))) {
+      personalization_len = sizeof(personalization);
+    }
+#endif
+
+    if (!CTR_DRBG_init(&state->drbg, seed, personalization,
+                       personalization_len)) {
       abort();
     }
     state->calls = 0;
@@ -315,7 +404,8 @@
   if (state->calls >= kReseedInterval ||
       state->fork_generation != fork_generation) {
     uint8_t seed[CTR_DRBG_ENTROPY_LEN];
-    rand_get_seed(state, seed);
+    int used_cpu;
+    rand_get_seed(state, seed, &used_cpu);
 #if defined(BORINGSSL_FIPS)
     // Take a read lock around accesses to |state->drbg|. This is needed to
     // avoid returning bad entropy if we race with
diff --git a/deps/boringssl/src/crypto/fipsmodule/rand/urandom.c b/deps/boringssl/src/crypto/fipsmodule/rand/urandom.c
index bae3fc3..3def3aa 100644
--- a/deps/boringssl/src/crypto/fipsmodule/rand/urandom.c
+++ b/deps/boringssl/src/crypto/fipsmodule/rand/urandom.c
@@ -366,14 +366,7 @@
     perror("entropy fill failed");
     abort();
   }
-
-#if defined(BORINGSSL_FIPS_BREAK_CRNG)
-  // This breaks the "continuous random number generator test" defined in FIPS
-  // 140-2, section 4.9.2, and implemented in rand_get_seed().
-  OPENSSL_memset(out, 0, requested);
-#endif
 }
-
 #endif  // BORINGSSL_FIPS
 
 int CRYPTO_sysrand_if_available(uint8_t *out, size_t requested) {
diff --git a/deps/boringssl/src/crypto/fipsmodule/rsa/internal.h b/deps/boringssl/src/crypto/fipsmodule/rsa/internal.h
index faa6fb7..d9d6fac 100644
--- a/deps/boringssl/src/crypto/fipsmodule/rsa/internal.h
+++ b/deps/boringssl/src/crypto/fipsmodule/rsa/internal.h
@@ -108,6 +108,10 @@
 int RSA_padding_add_none(uint8_t *to, size_t to_len, const uint8_t *from,
                          size_t from_len);
 
+// rsa_check_public_key checks that |rsa|'s public modulus and exponent are
+// within DoS bounds.
+int rsa_check_public_key(const RSA *rsa);
+
 // RSA_private_transform calls either the method-specific |private_transform|
 // function (if given) or the generic one. See the comment for
 // |private_transform| in |rsa_meth_st|.
diff --git a/deps/boringssl/src/crypto/fipsmodule/rsa/rsa.c b/deps/boringssl/src/crypto/fipsmodule/rsa/rsa.c
index 2929673..ae63e1a 100644
--- a/deps/boringssl/src/crypto/fipsmodule/rsa/rsa.c
+++ b/deps/boringssl/src/crypto/fipsmodule/rsa/rsa.c
@@ -661,6 +661,9 @@
     return 1;
   }
 
+  // Note |bn_mul_consttime| and |bn_div_consttime| do not scale linearly, but
+  // checking |ainv| is in range bounds the running time, assuming |m|'s bounds
+  // were checked by the caller.
   BN_CTX_start(ctx);
   BIGNUM *tmp = BN_CTX_get(ctx);
   int ret = tmp != NULL &&
@@ -674,22 +677,35 @@
 }
 
 int RSA_check_key(const RSA *key) {
+  // TODO(davidben): RSA key initialization is spread across
+  // |rsa_check_public_key|, |RSA_check_key|, |freeze_private_key|, and
+  // |BN_MONT_CTX_set_locked| as a result of API issues. See
+  // https://crbug.com/boringssl/316. As a result, we inconsistently check RSA
+  // invariants. We should fix this and integrate that logic.
+
   if (RSA_is_opaque(key)) {
     // Opaque keys can't be checked.
     return 1;
   }
 
+  if (!rsa_check_public_key(key)) {
+    return 0;
+  }
+
   if ((key->p != NULL) != (key->q != NULL)) {
     OPENSSL_PUT_ERROR(RSA, RSA_R_ONLY_ONE_OF_P_Q_GIVEN);
     return 0;
   }
 
-  if (!key->n || !key->e) {
-    OPENSSL_PUT_ERROR(RSA, RSA_R_VALUE_MISSING);
+  // |key->d| must be bounded by |key->n|. This ensures bounds on |RSA_bits|
+  // translate to bounds on the running time of private key operations.
+  if (key->d != NULL &&
+      (BN_is_negative(key->d) || BN_cmp(key->d, key->n) >= 0)) {
+    OPENSSL_PUT_ERROR(RSA, RSA_R_D_OUT_OF_RANGE);
     return 0;
   }
 
-  if (!key->d || !key->p) {
+  if (key->d == NULL || key->p == NULL) {
     // For a public key, or without p and q, there's nothing that can be
     // checked.
     return 1;
@@ -709,24 +725,28 @@
   BN_init(&qm1);
   BN_init(&dmp1);
   BN_init(&dmq1);
+
+  // Check that p * q == n. Before we multiply, we check that p and q are in
+  // bounds, to avoid a DoS vector in |bn_mul_consttime| below. Note that
+  // n was bound by |rsa_check_public_key|.
+  if (BN_is_negative(key->p) || BN_cmp(key->p, key->n) >= 0 ||
+      BN_is_negative(key->q) || BN_cmp(key->q, key->n) >= 0) {
+    OPENSSL_PUT_ERROR(RSA, RSA_R_N_NOT_EQUAL_P_Q);
+    goto out;
+  }
   if (!bn_mul_consttime(&tmp, key->p, key->q, ctx)) {
     OPENSSL_PUT_ERROR(RSA, ERR_LIB_BN);
     goto out;
   }
-
   if (BN_cmp(&tmp, key->n) != 0) {
     OPENSSL_PUT_ERROR(RSA, RSA_R_N_NOT_EQUAL_P_Q);
     goto out;
   }
 
-  if (BN_is_negative(key->d) || BN_cmp(key->d, key->n) >= 0) {
-    OPENSSL_PUT_ERROR(RSA, RSA_R_D_OUT_OF_RANGE);
-    goto out;
-  }
-
   // d must be an inverse of e mod the Carmichael totient, lcm(p-1, q-1), but it
   // may be unreduced because other implementations use the Euler totient. We
-  // simply check that d * e is one mod p-1 and mod q-1.
+  // simply check that d * e is one mod p-1 and mod q-1. Note d and e were bound
+  // by earlier checks in this function.
   if (!bn_usub_consttime(&pm1, key->p, BN_value_one()) ||
       !bn_usub_consttime(&qm1, key->q, BN_value_one()) ||
       !bn_mul_consttime(&de, key->d, key->e, ctx) ||
diff --git a/deps/boringssl/src/crypto/fipsmodule/rsa/rsa_impl.c b/deps/boringssl/src/crypto/fipsmodule/rsa/rsa_impl.c
index 2d9a9c9..2f76e9e 100644
--- a/deps/boringssl/src/crypto/fipsmodule/rsa/rsa_impl.c
+++ b/deps/boringssl/src/crypto/fipsmodule/rsa/rsa_impl.c
@@ -73,7 +73,12 @@
 #include "../rand/fork_detect.h"
 
 
-static int check_modulus_and_exponent_sizes(const RSA *rsa) {
+int rsa_check_public_key(const RSA *rsa) {
+  if (rsa->n == NULL || rsa->e == NULL) {
+    OPENSSL_PUT_ERROR(RSA, RSA_R_VALUE_MISSING);
+    return 0;
+  }
+
   unsigned rsa_bits = BN_num_bits(rsa->n);
 
   if (rsa_bits > 16 * 1024) {
@@ -253,8 +258,7 @@
 
 int RSA_encrypt(RSA *rsa, size_t *out_len, uint8_t *out, size_t max_out,
                 const uint8_t *in, size_t in_len, int padding) {
-  if (rsa->n == NULL || rsa->e == NULL) {
-    OPENSSL_PUT_ERROR(RSA, RSA_R_VALUE_MISSING);
+  if (!rsa_check_public_key(rsa)) {
     return 0;
   }
 
@@ -269,10 +273,6 @@
     return 0;
   }
 
-  if (!check_modulus_and_exponent_sizes(rsa)) {
-    return 0;
-  }
-
   ctx = BN_CTX_new();
   if (ctx == NULL) {
     goto err;
@@ -592,8 +592,7 @@
 
 int RSA_verify_raw(RSA *rsa, size_t *out_len, uint8_t *out, size_t max_out,
                    const uint8_t *in, size_t in_len, int padding) {
-  if (rsa->n == NULL || rsa->e == NULL) {
-    OPENSSL_PUT_ERROR(RSA, RSA_R_VALUE_MISSING);
+  if (!rsa_check_public_key(rsa)) {
     return 0;
   }
 
@@ -610,10 +609,6 @@
     return 0;
   }
 
-  if (!check_modulus_and_exponent_sizes(rsa)) {
-    return 0;
-  }
-
   BN_CTX *ctx = BN_CTX_new();
   if (ctx == NULL) {
     return 0;
@@ -938,20 +933,57 @@
   return *out != NULL;
 }
 
-// kBoringSSLRSASqrtTwo is the BIGNUM representation of ⌊2¹⁵³⁵×√2⌋. This is
-// chosen to give enough precision for 3072-bit RSA, the largest key size FIPS
+// kBoringSSLRSASqrtTwo is the BIGNUM representation of ⌊2²⁰⁴⁷×√2⌋. This is
+// chosen to give enough precision for 4096-bit RSA, the largest key size FIPS
 // specifies. Key sizes beyond this will round up.
 //
-// To verify this number, check that n² < 2³⁰⁷¹ < (n+1)², where n is value
+// To calculate, use the following Haskell code:
+//
+// import Text.Printf (printf)
+// import Data.List (intercalate)
+//
+// pow2 = 4095
+// target = 2^pow2
+//
+// f x = x*x - (toRational target)
+//
+// fprime x = 2*x
+//
+// newtonIteration x = x - (f x) / (fprime x)
+//
+// converge x =
+//   let n = floor x in
+//   if n*n - target < 0 && (n+1)*(n+1) - target > 0
+//     then n
+//     else converge (newtonIteration x)
+//
+// divrem bits x = (x `div` (2^bits), x `rem` (2^bits))
+//
+// bnWords :: Integer -> [Integer]
+// bnWords x =
+//   if x == 0
+//     then []
+//     else let (high, low) = divrem 64 x in low : bnWords high
+//
+// showWord x = let (high, low) = divrem 32 x in printf "TOBN(0x%08x, 0x%08x)" high low
+//
+// output :: String
+// output = intercalate ", " $ map showWord $ bnWords $ converge (2 ^ (pow2 `div` 2))
+//
+// To verify this number, check that n² < 2⁴⁰⁹⁵ < (n+1)², where n is value
 // represented here. Note the components are listed in little-endian order. Here
 // is some sample Python code to check:
 //
 //   >>> TOBN = lambda a, b: a << 32 | b
 //   >>> l = [ <paste the contents of kSqrtTwo> ]
 //   >>> n = sum(a * 2**(64*i) for i, a in enumerate(l))
-//   >>> n**2 < 2**3071 < (n+1)**2
+//   >>> n**2 < 2**4095 < (n+1)**2
 //   True
 const BN_ULONG kBoringSSLRSASqrtTwo[] = {
+    TOBN(0x4d7c60a5, 0xe633e3e1), TOBN(0x5fcf8f7b, 0xca3ea33b),
+    TOBN(0xc246785e, 0x92957023), TOBN(0xf9acce41, 0x797f2805),
+    TOBN(0xfdfe170f, 0xd3b1f780), TOBN(0xd24f4a76, 0x3facb882),
+    TOBN(0x18838a2e, 0xaff5f3b2), TOBN(0xc1fcbdde, 0xa2f7dc33),
     TOBN(0xdea06241, 0xf7aa81c2), TOBN(0xf6a1be3f, 0xca221307),
     TOBN(0x332a5e9f, 0x7bda1ebf), TOBN(0x0104dc01, 0xfe32352f),
     TOBN(0xb8cf341b, 0x6f8236c7), TOBN(0x4264dabc, 0xd528b651),
@@ -1121,8 +1153,8 @@
 
   // Reject excessively large public exponents. Windows CryptoAPI and Go don't
   // support values larger than 32 bits, so match their limits for generating
-  // keys. (|check_modulus_and_exponent_sizes| uses a slightly more conservative
-  // value, but we don't need to support generating such keys.)
+  // keys. (|rsa_check_public_key| uses a slightly more conservative value, but
+  // we don't need to support generating such keys.)
   // https://github.com/golang/go/issues/3161
   // https://msdn.microsoft.com/en-us/library/aa387685(VS.85).aspx
   if (BN_num_bits(e_value) > 32) {
@@ -1172,13 +1204,13 @@
   int sqrt2_bits = kBoringSSLRSASqrtTwoLen * BN_BITS2;
   assert(sqrt2_bits == (int)BN_num_bits(sqrt2));
   if (sqrt2_bits > prime_bits) {
-    // For key sizes up to 3072 (prime_bits = 1536), this is exactly
+    // For key sizes up to 4096 (prime_bits = 2048), this is exactly
     // ⌊2^(prime_bits-1)×√2⌋.
     if (!BN_rshift(sqrt2, sqrt2, sqrt2_bits - prime_bits)) {
       goto bn_err;
     }
   } else if (prime_bits > sqrt2_bits) {
-    // For key sizes beyond 3072, this is approximate. We err towards retrying
+    // For key sizes beyond 4096, this is approximate. We err towards retrying
     // to ensure our key is the right size and round up.
     if (!BN_add_word(sqrt2, 1) ||
         !BN_lshift(sqrt2, sqrt2, prime_bits - sqrt2_bits)) {
@@ -1284,58 +1316,80 @@
   *in = NULL;
 }
 
-int RSA_generate_key_ex(RSA *rsa, int bits, const BIGNUM *e_value,
-                        BN_GENCB *cb) {
+static int RSA_generate_key_ex_maybe_fips(RSA *rsa, int bits,
+                                          const BIGNUM *e_value, BN_GENCB *cb,
+                                          int check_fips) {
+  RSA *tmp = NULL;
+  uint32_t err;
+  int ret = 0;
+
   // |rsa_generate_key_impl|'s 2^-20 failure probability is too high at scale,
   // so we run the FIPS algorithm four times, bringing it down to 2^-80. We
   // should just adjust the retry limit, but FIPS 186-4 prescribes that value
   // and thus results in unnecessary complexity.
-  for (int i = 0; i < 4; i++) {
+  int failures = 0;
+  do {
     ERR_clear_error();
     // Generate into scratch space, to avoid leaving partial work on failure.
-    RSA *tmp = RSA_new();
+    tmp = RSA_new();
     if (tmp == NULL) {
-      return 0;
+      goto out;
     }
+
     if (rsa_generate_key_impl(tmp, bits, e_value, cb)) {
-      replace_bignum(&rsa->n, &tmp->n);
-      replace_bignum(&rsa->e, &tmp->e);
-      replace_bignum(&rsa->d, &tmp->d);
-      replace_bignum(&rsa->p, &tmp->p);
-      replace_bignum(&rsa->q, &tmp->q);
-      replace_bignum(&rsa->dmp1, &tmp->dmp1);
-      replace_bignum(&rsa->dmq1, &tmp->dmq1);
-      replace_bignum(&rsa->iqmp, &tmp->iqmp);
-      replace_bn_mont_ctx(&rsa->mont_n, &tmp->mont_n);
-      replace_bn_mont_ctx(&rsa->mont_p, &tmp->mont_p);
-      replace_bn_mont_ctx(&rsa->mont_q, &tmp->mont_q);
-      replace_bignum(&rsa->d_fixed, &tmp->d_fixed);
-      replace_bignum(&rsa->dmp1_fixed, &tmp->dmp1_fixed);
-      replace_bignum(&rsa->dmq1_fixed, &tmp->dmq1_fixed);
-      replace_bignum(&rsa->inv_small_mod_large_mont,
-                     &tmp->inv_small_mod_large_mont);
-      rsa->private_key_frozen = tmp->private_key_frozen;
-      RSA_free(tmp);
-      return 1;
+      break;
     }
-    uint32_t err = ERR_peek_error();
+
+    err = ERR_peek_error();
     RSA_free(tmp);
     tmp = NULL;
+    failures++;
+
     // Only retry on |RSA_R_TOO_MANY_ITERATIONS|. This is so a caller-induced
     // failure in |BN_GENCB_call| is still fatal.
-    if (ERR_GET_LIB(err) != ERR_LIB_RSA ||
-        ERR_GET_REASON(err) != RSA_R_TOO_MANY_ITERATIONS) {
-      return 0;
-    }
+  } while (failures < 4 && ERR_GET_LIB(err) == ERR_LIB_RSA &&
+           ERR_GET_REASON(err) == RSA_R_TOO_MANY_ITERATIONS);
+
+  if (tmp == NULL || (check_fips && !RSA_check_fips(tmp))) {
+    goto out;
   }
 
-  return 0;
+  replace_bignum(&rsa->n, &tmp->n);
+  replace_bignum(&rsa->e, &tmp->e);
+  replace_bignum(&rsa->d, &tmp->d);
+  replace_bignum(&rsa->p, &tmp->p);
+  replace_bignum(&rsa->q, &tmp->q);
+  replace_bignum(&rsa->dmp1, &tmp->dmp1);
+  replace_bignum(&rsa->dmq1, &tmp->dmq1);
+  replace_bignum(&rsa->iqmp, &tmp->iqmp);
+  replace_bn_mont_ctx(&rsa->mont_n, &tmp->mont_n);
+  replace_bn_mont_ctx(&rsa->mont_p, &tmp->mont_p);
+  replace_bn_mont_ctx(&rsa->mont_q, &tmp->mont_q);
+  replace_bignum(&rsa->d_fixed, &tmp->d_fixed);
+  replace_bignum(&rsa->dmp1_fixed, &tmp->dmp1_fixed);
+  replace_bignum(&rsa->dmq1_fixed, &tmp->dmq1_fixed);
+  replace_bignum(&rsa->inv_small_mod_large_mont,
+                 &tmp->inv_small_mod_large_mont);
+  rsa->private_key_frozen = tmp->private_key_frozen;
+  ret = 1;
+
+out:
+  RSA_free(tmp);
+  return ret;
+}
+
+int RSA_generate_key_ex(RSA *rsa, int bits, const BIGNUM *e_value,
+                        BN_GENCB *cb) {
+  return RSA_generate_key_ex_maybe_fips(rsa, bits, e_value, cb,
+                                        /*check_fips=*/0);
 }
 
 int RSA_generate_key_fips(RSA *rsa, int bits, BN_GENCB *cb) {
   // FIPS 186-4 allows 2048-bit and 3072-bit RSA keys (1024-bit and 1536-bit
   // primes, respectively) with the prime generation method we use.
-  if (bits != 2048 && bits != 3072) {
+  // Subsequently, IG A.14 stated that larger modulus sizes can be used and ACVP
+  // testing supports 4096 bits.
+  if (bits != 2048 && bits != 3072 && bits != 4096) {
     OPENSSL_PUT_ERROR(RSA, RSA_R_BAD_RSA_PARAMETERS);
     return 0;
   }
@@ -1343,8 +1397,7 @@
   BIGNUM *e = BN_new();
   int ret = e != NULL &&
             BN_set_word(e, RSA_F4) &&
-            RSA_generate_key_ex(rsa, bits, e, cb) &&
-            RSA_check_fips(rsa);
+            RSA_generate_key_ex_maybe_fips(rsa, bits, e, cb, /*check_fips=*/1);
   BN_free(e);
   return ret;
 }
diff --git a/deps/boringssl/src/crypto/fipsmodule/self_check/self_check.c b/deps/boringssl/src/crypto/fipsmodule/self_check/self_check.c
index 4b954b7..638500b 100644
--- a/deps/boringssl/src/crypto/fipsmodule/self_check/self_check.c
+++ b/deps/boringssl/src/crypto/fipsmodule/self_check/self_check.c
@@ -21,6 +21,8 @@
 #include <openssl/aes.h>
 #include <openssl/bn.h>
 #include <openssl/des.h>
+#include <openssl/dh.h>
+#include <openssl/digest.h>
 #include <openssl/ec.h>
 #include <openssl/ecdsa.h>
 #include <openssl/ec_key.h>
@@ -31,6 +33,7 @@
 #include "../../internal.h"
 #include "../ec/internal.h"
 #include "../rand/internal.h"
+#include "../tls/internal.h"
 
 
 // MSVC wants to put a NUL byte at the end of non-char arrays and so cannot
@@ -244,6 +247,41 @@
   return ec_key;
 }
 
+static DH *self_test_dh(void) {
+  DH *dh = DH_get_rfc7919_2048();
+  if (!dh) {
+    return NULL;
+  }
+
+  BIGNUM *priv = BN_new();
+  if (!priv) {
+    goto err;
+  }
+
+  // kFFDHE2048PrivateKeyData is a 225-bit value. (225 because that's the
+  // minimum private key size in
+  // https://tools.ietf.org/html/rfc7919#appendix-A.1.)
+  static const BN_ULONG kFFDHE2048PrivateKeyData[] = {
+      TOBN(0x187be36b, 0xd38a4fa1),
+      TOBN(0x0a152f39, 0x6458f3b8),
+      TOBN(0x0570187e, 0xc422eeb7),
+      TOBN(0x00000001, 0x91173f2a),
+  };
+
+  bn_set_static_words(priv, kFFDHE2048PrivateKeyData,
+                      OPENSSL_ARRAY_SIZE(kFFDHE2048PrivateKeyData));
+
+  if (!DH_set0_key(dh, NULL, priv)) {
+    goto err;
+  }
+  return dh;
+
+err:
+  BN_free(priv);
+  DH_free(dh);
+  return NULL;
+}
+
 #if defined(OPENSSL_ANDROID)
 static const size_t kModuleDigestSize = SHA256_DIGEST_LENGTH;
 #else
@@ -460,6 +498,81 @@
       0x00,
 #endif
   };
+  const uint8_t kTLSOutput[32] = {
+      0x67, 0x85, 0xde, 0x60, 0xfc, 0x0a, 0x83, 0xe9, 0xa2, 0x2a, 0xb3,
+      0xf0, 0x27, 0x0c, 0xba, 0xf7, 0xfa, 0x82, 0x3d, 0x14, 0x77, 0x1d,
+      0x86, 0x29, 0x79, 0x39, 0x77, 0x8a, 0xd5, 0x0e, 0x9d,
+#if !defined(BORINGSSL_FIPS_BREAK_TLS_KDF)
+      0x32,
+#else
+      0x00,
+#endif
+  };
+  const uint8_t kTLSSecret[32] = {
+      0xbf, 0xe4, 0xb7, 0xe0, 0x26, 0x55, 0x5f, 0x6a, 0xdf, 0x5d, 0x27,
+      0xd6, 0x89, 0x99, 0x2a, 0xd6, 0xf7, 0x65, 0x66, 0x07, 0x4b, 0x55,
+      0x5f, 0x64, 0x55, 0xcd, 0xd5, 0x77, 0xa4, 0xc7, 0x09, 0x61,
+  };
+  const char kTLSLabel[] = "FIPS self test";
+  const uint8_t kTLSSeed1[16] = {
+      0x8f, 0x0d, 0xe8, 0xb6, 0x90, 0x8f, 0xb1, 0xd2,
+      0x6d, 0x51, 0xf4, 0x79, 0x18, 0x63, 0x51, 0x65,
+  };
+  const uint8_t kTLSSeed2[16] = {
+      0x7d, 0x24, 0x1a, 0x9d, 0x3c, 0x59, 0xbf, 0x3c,
+      0x31, 0x1e, 0x2b, 0x21, 0x41, 0x8d, 0x32, 0x81,
+  };
+
+  // kFFDHE2048PublicValueData is an arbitrary public value, mod
+  // kFFDHE2048Data. (The private key happens to be 4096.)
+  static const BN_ULONG kFFDHE2048PublicValueData[] = {
+      TOBN(0x187be36b, 0xd38a4fa1), TOBN(0x0a152f39, 0x6458f3b8),
+      TOBN(0x0570187e, 0xc422eeb7), TOBN(0x18af7482, 0x91173f2a),
+      TOBN(0xe9fdac6a, 0xcff4eaaa), TOBN(0xf6afebb7, 0x6e589d6c),
+      TOBN(0xf92f8e9a, 0xb7e33fb0), TOBN(0x70acf2aa, 0x4cf36ddd),
+      TOBN(0x561ab426, 0xd07137fd), TOBN(0x5f57d037, 0x430ee91e),
+      TOBN(0xe3e768c8, 0x60d10b8a), TOBN(0xb14884d8, 0xa18af8ce),
+      TOBN(0xf8a98014, 0xa12b74e4), TOBN(0x748d407c, 0x3437b7a8),
+      TOBN(0x627588c4, 0x9875d5a7), TOBN(0xdd24a127, 0x53c8f09d),
+      TOBN(0x85a997d5, 0x0cd51aec), TOBN(0x44f0c619, 0xce348458),
+      TOBN(0x9b894b24, 0x5f6b69a1), TOBN(0xae1302f2, 0xf6d4777e),
+      TOBN(0xe6678eeb, 0x375db18e), TOBN(0x2674e1d6, 0x4fbcbdc8),
+      TOBN(0xb297a823, 0x6fa93d28), TOBN(0x6a12fb70, 0x7c8c0510),
+      TOBN(0x5c6d1aeb, 0xdb06f65b), TOBN(0xe8c2954e, 0x4c1804ca),
+      TOBN(0x06bdeac1, 0xf5500fa7), TOBN(0x6a315604, 0x189cd76b),
+      TOBN(0xbae7b0b3, 0x6e362dc0), TOBN(0xa57c73bd, 0xdc70fb82),
+      TOBN(0xfaff50d2, 0x9d573457), TOBN(0x352bd399, 0xbe84058e),
+  };
+
+  const uint8_t kDHOutput[2048 / 8] = {
+      0x2a, 0xe6, 0xd3, 0xa6, 0x13, 0x58, 0x8e, 0xce, 0x53, 0xaa, 0xf6, 0x5d,
+      0x9a, 0xae, 0x02, 0x12, 0xf5, 0x80, 0x3d, 0x06, 0x09, 0x76, 0xac, 0x57,
+      0x37, 0x9e, 0xab, 0x38, 0x62, 0x25, 0x05, 0x1d, 0xf3, 0xa9, 0x39, 0x60,
+      0xf6, 0xae, 0x90, 0xed, 0x1e, 0xad, 0x6e, 0xe9, 0xe3, 0xba, 0x27, 0xf6,
+      0xdb, 0x54, 0xdf, 0xe2, 0xbd, 0xbb, 0x7f, 0xf1, 0x81, 0xac, 0x1a, 0xfa,
+      0xdb, 0x87, 0x07, 0x98, 0x76, 0x90, 0x21, 0xf2, 0xae, 0xda, 0x0d, 0x84,
+      0x97, 0x64, 0x0b, 0xbf, 0xb8, 0x8d, 0x10, 0x46, 0xe2, 0xd5, 0xca, 0x1b,
+      0xbb, 0xe5, 0x37, 0xb2, 0x3b, 0x35, 0xd3, 0x1b, 0x65, 0xea, 0xae, 0xf2,
+      0x03, 0xe2, 0xb6, 0xde, 0x22, 0xb7, 0x86, 0x49, 0x79, 0xfe, 0xd7, 0x16,
+      0xf7, 0xdc, 0x9c, 0x59, 0xf5, 0xb7, 0x70, 0xc0, 0x53, 0x42, 0x6f, 0xb1,
+      0xd2, 0x4e, 0x00, 0x25, 0x4b, 0x2d, 0x5a, 0x9b, 0xd0, 0xe9, 0x27, 0x43,
+      0xcc, 0x00, 0x66, 0xea, 0x94, 0x7a, 0x0b, 0xb9, 0x89, 0x0c, 0x5e, 0x94,
+      0xb8, 0x3a, 0x78, 0x9c, 0x4d, 0x84, 0xe6, 0x32, 0x2c, 0x38, 0x7c, 0xf7,
+      0x43, 0x9c, 0xd8, 0xb8, 0x1c, 0xce, 0x24, 0x91, 0x20, 0x67, 0x7a, 0x54,
+      0x1f, 0x7e, 0x86, 0x7f, 0xa1, 0xc1, 0x03, 0x4e, 0x2c, 0x26, 0x71, 0xb2,
+      0x06, 0x30, 0xb3, 0x6c, 0x15, 0xcc, 0xac, 0x25, 0xe5, 0x37, 0x3f, 0x24,
+      0x8f, 0x2a, 0x89, 0x5e, 0x3d, 0x43, 0x94, 0xc9, 0x36, 0xae, 0x40, 0x00,
+      0x6a, 0x0d, 0xb0, 0x6e, 0x8b, 0x2e, 0x70, 0x57, 0xe1, 0x88, 0x53, 0xd6,
+      0x06, 0x80, 0x2a, 0x4e, 0x5a, 0xf0, 0x1e, 0xaa, 0xcb, 0xab, 0x06, 0x0e,
+      0x27, 0x0f, 0xd9, 0x88, 0xd9, 0x01, 0xe3, 0x07, 0xeb, 0xdf, 0xc3, 0x12,
+      0xe3, 0x40, 0x88, 0x7b, 0x5f, 0x59, 0x78, 0x6e, 0x26, 0x20, 0xc3, 0xdf,
+      0xc8, 0xe4, 0x5e,
+#if !defined(BORINGSSL_FIPS_BREAK_FFC_DH)
+      0xb8,
+#else
+      0x00,
+#endif
+  };
 
   EVP_AEAD_CTX aead_ctx;
   EVP_AEAD_CTX_zero(&aead_ctx);
@@ -611,7 +724,7 @@
     goto err;
   }
 
-  // ECDSA Sign/Verify PWCT
+  // ECDSA Sign/Verify KAT
 
   // The 'k' value for ECDSA is fixed to avoid an entropy draw.
   ec_key->fixed_k = BN_new();
@@ -632,7 +745,13 @@
       !BN_bn2bin(sig->s, ecdsa_s_bytes) ||
       !check_test(kECDSASigR, ecdsa_r_bytes, sizeof(kECDSASigR), "ECDSA R") ||
       !check_test(kECDSASigS, ecdsa_s_bytes, sizeof(kECDSASigS), "ECDSA S")) {
-    fprintf(stderr, "ECDSA KAT failed.\n");
+    fprintf(stderr, "ECDSA signature KAT failed.\n");
+    goto err;
+  }
+
+  if (!ECDSA_do_verify(kPlaintextSHA256, sizeof(kPlaintextSHA256), sig,
+                       ec_key)) {
+    fprintf(stderr, "ECDSA verification KAT failed.\n");
     goto err;
   }
 
@@ -660,6 +779,29 @@
     goto err;
   }
 
+  // FFC Diffie-Hellman KAT
+
+  BIGNUM *const ffdhe2048_value = BN_new();
+  DH *const dh = self_test_dh();
+  int dh_ok = 0;
+  if (ffdhe2048_value && dh) {
+    bn_set_static_words(ffdhe2048_value, kFFDHE2048PublicValueData,
+                        OPENSSL_ARRAY_SIZE(kFFDHE2048PublicValueData));
+
+    uint8_t dh_out[sizeof(kDHOutput)];
+    dh_ok =
+        sizeof(dh_out) == DH_size(dh) &&
+        DH_compute_key_padded(dh_out, ffdhe2048_value, dh) == sizeof(dh_out) &&
+        check_test(kDHOutput, dh_out, sizeof(dh_out), "FFC DH");
+  }
+
+  BN_free(ffdhe2048_value);
+  DH_free(dh);
+  if (!dh_ok) {
+    fprintf(stderr, "FFDH failed.\n");
+    goto err;
+  }
+
   // DBRG KAT
   CTR_DRBG_STATE drbg;
   if (!CTR_DRBG_init(&drbg, kDRBGEntropy, kDRBGPersonalization,
@@ -684,6 +826,17 @@
     goto err;
   }
 
+  // TLS KDF KAT
+  uint8_t tls_output[sizeof(kTLSOutput)];
+  if (!CRYPTO_tls1_prf(EVP_sha256(), tls_output, sizeof(tls_output), kTLSSecret,
+                       sizeof(kTLSSecret), kTLSLabel, sizeof(kTLSLabel),
+                       kTLSSeed1, sizeof(kTLSSeed1), kTLSSeed2,
+                       sizeof(kTLSSeed2)) ||
+      !check_test(kTLSOutput, tls_output, sizeof(kTLSOutput), "TLS KDF KAT")) {
+    fprintf(stderr, "TLS KDF failed.\n");
+    goto err;
+  }
+
   ret = 1;
 
 #if defined(BORINGSSL_FIPS_SELF_TEST_FLAG_FILE)
diff --git a/deps/boringssl/src/crypto/hpke/hpke.c b/deps/boringssl/src/crypto/hpke/hpke.c
new file mode 100644
index 0000000..ee03e53
--- /dev/null
+++ b/deps/boringssl/src/crypto/hpke/hpke.c
@@ -0,0 +1,532 @@
+/* Copyright (c) 2020, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#include <assert.h>
+#include <string.h>
+
+#include <openssl/aead.h>
+#include <openssl/bytestring.h>
+#include <openssl/digest.h>
+#include <openssl/err.h>
+#include <openssl/evp.h>
+#include <openssl/hkdf.h>
+#include <openssl/sha.h>
+
+#include "../internal.h"
+#include "internal.h"
+
+
+// This file implements draft-irtf-cfrg-hpke-07.
+
+#define KEM_CONTEXT_LEN (2 * X25519_PUBLIC_VALUE_LEN)
+
+// HPKE KEM scheme IDs.
+#define HPKE_DHKEM_X25519_HKDF_SHA256 0x0020
+
+// This is strlen("HPKE") + 3 * sizeof(uint16_t).
+#define HPKE_SUITE_ID_LEN 10
+
+#define HPKE_MODE_BASE 0
+#define HPKE_MODE_PSK 1
+
+static const char kHpkeRfcId[] = "HPKE-07";
+
+static int add_label_string(CBB *cbb, const char *label) {
+  return CBB_add_bytes(cbb, (const uint8_t *)label, strlen(label));
+}
+
+// The suite_id for the KEM is defined as concat("KEM", I2OSP(kem_id, 2)). Note
+// that the suite_id used outside of the KEM also includes the kdf_id and
+// aead_id.
+static const uint8_t kX25519SuiteID[] = {
+    'K', 'E', 'M', HPKE_DHKEM_X25519_HKDF_SHA256 >> 8,
+    HPKE_DHKEM_X25519_HKDF_SHA256 & 0x00ff};
+
+// The suite_id for non-KEM pieces of HPKE is defined as concat("HPKE",
+// I2OSP(kem_id, 2), I2OSP(kdf_id, 2), I2OSP(aead_id, 2)).
+static int hpke_build_suite_id(uint8_t out[HPKE_SUITE_ID_LEN], uint16_t kdf_id,
+                               uint16_t aead_id) {
+  CBB cbb;
+  int ret = CBB_init_fixed(&cbb, out, HPKE_SUITE_ID_LEN) &&
+            add_label_string(&cbb, "HPKE") &&
+            CBB_add_u16(&cbb, HPKE_DHKEM_X25519_HKDF_SHA256) &&
+            CBB_add_u16(&cbb, kdf_id) &&
+            CBB_add_u16(&cbb, aead_id);
+  CBB_cleanup(&cbb);
+  return ret;
+}
+
+static int hpke_labeled_extract(const EVP_MD *hkdf_md, uint8_t *out_key,
+                                size_t *out_len, const uint8_t *salt,
+                                size_t salt_len, const uint8_t *suite_id,
+                                size_t suite_id_len, const char *label,
+                                const uint8_t *ikm, size_t ikm_len) {
+  // labeledIKM = concat("RFCXXXX ", suite_id, label, IKM)
+  CBB labeled_ikm;
+  int ok = CBB_init(&labeled_ikm, 0) &&
+           add_label_string(&labeled_ikm, kHpkeRfcId) &&
+           CBB_add_bytes(&labeled_ikm, suite_id, suite_id_len) &&
+           add_label_string(&labeled_ikm, label) &&
+           CBB_add_bytes(&labeled_ikm, ikm, ikm_len) &&
+           HKDF_extract(out_key, out_len, hkdf_md, CBB_data(&labeled_ikm),
+                        CBB_len(&labeled_ikm), salt, salt_len);
+  CBB_cleanup(&labeled_ikm);
+  return ok;
+}
+
+static int hpke_labeled_expand(const EVP_MD *hkdf_md, uint8_t *out_key,
+                               size_t out_len, const uint8_t *prk,
+                               size_t prk_len, const uint8_t *suite_id,
+                               size_t suite_id_len, const char *label,
+                               const uint8_t *info, size_t info_len) {
+  // labeledInfo = concat(I2OSP(L, 2), "RFCXXXX ", suite_id, label, info)
+  CBB labeled_info;
+  int ok = CBB_init(&labeled_info, 0) &&
+           CBB_add_u16(&labeled_info, out_len) &&
+           add_label_string(&labeled_info, kHpkeRfcId) &&
+           CBB_add_bytes(&labeled_info, suite_id, suite_id_len) &&
+           add_label_string(&labeled_info, label) &&
+           CBB_add_bytes(&labeled_info, info, info_len) &&
+           HKDF_expand(out_key, out_len, hkdf_md, prk, prk_len,
+                       CBB_data(&labeled_info), CBB_len(&labeled_info));
+  CBB_cleanup(&labeled_info);
+  return ok;
+}
+
+static int hpke_extract_and_expand(const EVP_MD *hkdf_md, uint8_t *out_key,
+                                   size_t out_len,
+                                   const uint8_t dh[X25519_PUBLIC_VALUE_LEN],
+                                   const uint8_t kem_context[KEM_CONTEXT_LEN]) {
+  uint8_t prk[EVP_MAX_MD_SIZE];
+  size_t prk_len;
+  static const char kEaePrkLabel[] = "eae_prk";
+  if (!hpke_labeled_extract(hkdf_md, prk, &prk_len, NULL, 0, kX25519SuiteID,
+                            sizeof(kX25519SuiteID), kEaePrkLabel, dh,
+                            X25519_PUBLIC_VALUE_LEN)) {
+    return 0;
+  }
+  static const char kPRKExpandLabel[] = "shared_secret";
+  if (!hpke_labeled_expand(hkdf_md, out_key, out_len, prk, prk_len,
+                           kX25519SuiteID, sizeof(kX25519SuiteID),
+                           kPRKExpandLabel, kem_context, KEM_CONTEXT_LEN)) {
+    return 0;
+  }
+  return 1;
+}
+
+const EVP_AEAD *EVP_HPKE_get_aead(uint16_t aead_id) {
+  switch (aead_id) {
+    case EVP_HPKE_AEAD_AES_GCM_128:
+      return EVP_aead_aes_128_gcm();
+    case EVP_HPKE_AEAD_AES_GCM_256:
+      return EVP_aead_aes_256_gcm();
+    case EVP_HPKE_AEAD_CHACHA20POLY1305:
+      return EVP_aead_chacha20_poly1305();
+  }
+  OPENSSL_PUT_ERROR(EVP, ERR_R_INTERNAL_ERROR);
+  return NULL;
+}
+
+const EVP_MD *EVP_HPKE_get_hkdf_md(uint16_t kdf_id) {
+  switch (kdf_id) {
+    case EVP_HPKE_HKDF_SHA256:
+      return EVP_sha256();
+    case EVP_HPKE_HKDF_SHA384:
+      return EVP_sha384();
+    case EVP_HPKE_HKDF_SHA512:
+      return EVP_sha512();
+  }
+  OPENSSL_PUT_ERROR(EVP, ERR_R_INTERNAL_ERROR);
+  return NULL;
+}
+
+static int hpke_key_schedule(EVP_HPKE_CTX *hpke, uint8_t mode,
+                             const uint8_t *shared_secret,
+                             size_t shared_secret_len, const uint8_t *info,
+                             size_t info_len, const uint8_t *psk,
+                             size_t psk_len, const uint8_t *psk_id,
+                             size_t psk_id_len) {
+  // Verify the PSK inputs.
+  switch (mode) {
+    case HPKE_MODE_BASE:
+      // This is an internal error, unreachable from the caller.
+      assert(psk_len == 0 && psk_id_len == 0);
+      break;
+    case HPKE_MODE_PSK:
+      if (psk_len == 0 || psk_id_len == 0) {
+        OPENSSL_PUT_ERROR(EVP, EVP_R_EMPTY_PSK);
+        return 0;
+      }
+      break;
+    default:
+      return 0;
+  }
+
+  // Attempt to get an EVP_AEAD*.
+  const EVP_AEAD *aead = EVP_HPKE_get_aead(hpke->aead_id);
+  if (aead == NULL) {
+    return 0;
+  }
+
+  uint8_t suite_id[HPKE_SUITE_ID_LEN];
+  if (!hpke_build_suite_id(suite_id, hpke->kdf_id, hpke->aead_id)) {
+    return 0;
+  }
+
+  // psk_id_hash = LabeledExtract("", "psk_id_hash", psk_id)
+  static const char kPskIdHashLabel[] = "psk_id_hash";
+  uint8_t psk_id_hash[EVP_MAX_MD_SIZE];
+  size_t psk_id_hash_len;
+  if (!hpke_labeled_extract(hpke->hkdf_md, psk_id_hash, &psk_id_hash_len, NULL,
+                            0, suite_id, sizeof(suite_id), kPskIdHashLabel,
+                            psk_id, psk_id_len)) {
+    return 0;
+  }
+
+  // info_hash = LabeledExtract("", "info_hash", info)
+  static const char kInfoHashLabel[] = "info_hash";
+  uint8_t info_hash[EVP_MAX_MD_SIZE];
+  size_t info_hash_len;
+  if (!hpke_labeled_extract(hpke->hkdf_md, info_hash, &info_hash_len, NULL, 0,
+                            suite_id, sizeof(suite_id), kInfoHashLabel, info,
+                            info_len)) {
+    return 0;
+  }
+
+  // key_schedule_context = concat(mode, psk_id_hash, info_hash)
+  uint8_t context[sizeof(uint8_t) + 2 * EVP_MAX_MD_SIZE];
+  size_t context_len;
+  CBB context_cbb;
+  if (!CBB_init_fixed(&context_cbb, context, sizeof(context)) ||
+      !CBB_add_u8(&context_cbb, mode) ||
+      !CBB_add_bytes(&context_cbb, psk_id_hash, psk_id_hash_len) ||
+      !CBB_add_bytes(&context_cbb, info_hash, info_hash_len) ||
+      !CBB_finish(&context_cbb, NULL, &context_len)) {
+    return 0;
+  }
+
+  // secret = LabeledExtract(shared_secret, "secret", psk)
+  static const char kSecretExtractLabel[] = "secret";
+  uint8_t secret[EVP_MAX_MD_SIZE];
+  size_t secret_len;
+  if (!hpke_labeled_extract(hpke->hkdf_md, secret, &secret_len, shared_secret,
+                            shared_secret_len, suite_id, sizeof(suite_id),
+                            kSecretExtractLabel, psk, psk_len)) {
+    return 0;
+  }
+
+  // key = LabeledExpand(secret, "key", key_schedule_context, Nk)
+  static const char kKeyExpandLabel[] = "key";
+  uint8_t key[EVP_AEAD_MAX_KEY_LENGTH];
+  const size_t kKeyLen = EVP_AEAD_key_length(aead);
+  if (!hpke_labeled_expand(hpke->hkdf_md, key, kKeyLen, secret, secret_len,
+                           suite_id, sizeof(suite_id), kKeyExpandLabel, context,
+                           context_len)) {
+    return 0;
+  }
+
+  // Initialize the HPKE context's AEAD context, storing a copy of |key|.
+  if (!EVP_AEAD_CTX_init(&hpke->aead_ctx, aead, key, kKeyLen, 0, NULL)) {
+    return 0;
+  }
+
+  // base_nonce = LabeledExpand(secret, "base_nonce", key_schedule_context, Nn)
+  static const char kNonceExpandLabel[] = "base_nonce";
+  if (!hpke_labeled_expand(hpke->hkdf_md, hpke->base_nonce,
+                           EVP_AEAD_nonce_length(aead), secret, secret_len,
+                           suite_id, sizeof(suite_id), kNonceExpandLabel,
+                           context, context_len)) {
+    return 0;
+  }
+
+  // exporter_secret = LabeledExpand(secret, "exp", key_schedule_context, Nh)
+  static const char kExporterSecretExpandLabel[] = "exp";
+  if (!hpke_labeled_expand(hpke->hkdf_md, hpke->exporter_secret,
+                           EVP_MD_size(hpke->hkdf_md), secret, secret_len,
+                           suite_id, sizeof(suite_id),
+                           kExporterSecretExpandLabel, context, context_len)) {
+    return 0;
+  }
+
+  return 1;
+}
+
+// The number of bytes written to |out_shared_secret| is the size of the KEM's
+// KDF (currently we only support SHA256).
+static int hpke_encap(EVP_HPKE_CTX *hpke,
+                      uint8_t out_shared_secret[SHA256_DIGEST_LENGTH],
+                      const uint8_t public_key_r[X25519_PUBLIC_VALUE_LEN],
+                      const uint8_t ephemeral_private[X25519_PRIVATE_KEY_LEN],
+                      const uint8_t ephemeral_public[X25519_PUBLIC_VALUE_LEN]) {
+  uint8_t dh[X25519_PUBLIC_VALUE_LEN];
+  if (!X25519(dh, ephemeral_private, public_key_r)) {
+    OPENSSL_PUT_ERROR(EVP, EVP_R_INVALID_PEER_KEY);
+    return 0;
+  }
+
+  uint8_t kem_context[KEM_CONTEXT_LEN];
+  OPENSSL_memcpy(kem_context, ephemeral_public, X25519_PUBLIC_VALUE_LEN);
+  OPENSSL_memcpy(kem_context + X25519_PUBLIC_VALUE_LEN, public_key_r,
+                 X25519_PUBLIC_VALUE_LEN);
+  if (!hpke_extract_and_expand(EVP_sha256(), out_shared_secret,
+                               SHA256_DIGEST_LENGTH, dh, kem_context)) {
+    return 0;
+  }
+  return 1;
+}
+
+static int hpke_decap(const EVP_HPKE_CTX *hpke,
+                      uint8_t out_shared_secret[SHA256_DIGEST_LENGTH],
+                      const uint8_t enc[X25519_PUBLIC_VALUE_LEN],
+                      const uint8_t public_key_r[X25519_PUBLIC_VALUE_LEN],
+                      const uint8_t secret_key_r[X25519_PRIVATE_KEY_LEN]) {
+  uint8_t dh[X25519_PUBLIC_VALUE_LEN];
+  if (!X25519(dh, secret_key_r, enc)) {
+    OPENSSL_PUT_ERROR(EVP, EVP_R_INVALID_PEER_KEY);
+    return 0;
+  }
+  uint8_t kem_context[KEM_CONTEXT_LEN];
+  OPENSSL_memcpy(kem_context, enc, X25519_PUBLIC_VALUE_LEN);
+  OPENSSL_memcpy(kem_context + X25519_PUBLIC_VALUE_LEN, public_key_r,
+                 X25519_PUBLIC_VALUE_LEN);
+  if (!hpke_extract_and_expand(EVP_sha256(), out_shared_secret,
+                               SHA256_DIGEST_LENGTH, dh, kem_context)) {
+    return 0;
+  }
+  return 1;
+}
+
+void EVP_HPKE_CTX_init(EVP_HPKE_CTX *ctx) {
+  OPENSSL_memset(ctx, 0, sizeof(EVP_HPKE_CTX));
+  EVP_AEAD_CTX_zero(&ctx->aead_ctx);
+}
+
+void EVP_HPKE_CTX_cleanup(EVP_HPKE_CTX *ctx) {
+  EVP_AEAD_CTX_cleanup(&ctx->aead_ctx);
+}
+
+int EVP_HPKE_CTX_setup_base_s_x25519(
+    EVP_HPKE_CTX *hpke, uint8_t out_enc[X25519_PUBLIC_VALUE_LEN],
+    uint16_t kdf_id, uint16_t aead_id,
+    const uint8_t peer_public_value[X25519_PUBLIC_VALUE_LEN],
+    const uint8_t *info, size_t info_len) {
+  // The GenerateKeyPair() step technically belongs in the KEM's Encap()
+  // function, but we've moved it up a layer to make it easier for tests to
+  // inject an ephemeral keypair.
+  uint8_t ephemeral_private[X25519_PRIVATE_KEY_LEN];
+  X25519_keypair(out_enc, ephemeral_private);
+  return EVP_HPKE_CTX_setup_base_s_x25519_for_test(
+      hpke, kdf_id, aead_id, peer_public_value, info, info_len,
+      ephemeral_private, out_enc);
+}
+
+int EVP_HPKE_CTX_setup_base_s_x25519_for_test(
+    EVP_HPKE_CTX *hpke, uint16_t kdf_id, uint16_t aead_id,
+    const uint8_t peer_public_value[X25519_PUBLIC_VALUE_LEN],
+    const uint8_t *info, size_t info_len,
+    const uint8_t ephemeral_private[X25519_PRIVATE_KEY_LEN],
+    const uint8_t ephemeral_public[X25519_PUBLIC_VALUE_LEN]) {
+  hpke->is_sender = 1;
+  hpke->kdf_id = kdf_id;
+  hpke->aead_id = aead_id;
+  hpke->hkdf_md = EVP_HPKE_get_hkdf_md(kdf_id);
+  if (hpke->hkdf_md == NULL) {
+    return 0;
+  }
+  uint8_t shared_secret[SHA256_DIGEST_LENGTH];
+  if (!hpke_encap(hpke, shared_secret, peer_public_value, ephemeral_private,
+                  ephemeral_public) ||
+      !hpke_key_schedule(hpke, HPKE_MODE_BASE, shared_secret,
+                         sizeof(shared_secret), info, info_len, NULL, 0, NULL,
+                         0)) {
+    return 0;
+  }
+  return 1;
+}
+
+int EVP_HPKE_CTX_setup_base_r_x25519(
+    EVP_HPKE_CTX *hpke, uint16_t kdf_id, uint16_t aead_id,
+    const uint8_t enc[X25519_PUBLIC_VALUE_LEN],
+    const uint8_t public_key[X25519_PUBLIC_VALUE_LEN],
+    const uint8_t private_key[X25519_PRIVATE_KEY_LEN], const uint8_t *info,
+    size_t info_len) {
+  hpke->is_sender = 0;
+  hpke->kdf_id = kdf_id;
+  hpke->aead_id = aead_id;
+  hpke->hkdf_md = EVP_HPKE_get_hkdf_md(kdf_id);
+  if (hpke->hkdf_md == NULL) {
+    return 0;
+  }
+  uint8_t shared_secret[SHA256_DIGEST_LENGTH];
+  if (!hpke_decap(hpke, shared_secret, enc, public_key, private_key) ||
+      !hpke_key_schedule(hpke, HPKE_MODE_BASE, shared_secret,
+                         sizeof(shared_secret), info, info_len, NULL, 0, NULL,
+                         0)) {
+    return 0;
+  }
+  return 1;
+}
+
+int EVP_HPKE_CTX_setup_psk_s_x25519(
+    EVP_HPKE_CTX *hpke, uint8_t out_enc[X25519_PUBLIC_VALUE_LEN],
+    uint16_t kdf_id, uint16_t aead_id,
+    const uint8_t peer_public_value[X25519_PUBLIC_VALUE_LEN],
+    const uint8_t *info, size_t info_len, const uint8_t *psk, size_t psk_len,
+    const uint8_t *psk_id, size_t psk_id_len) {
+  // The GenerateKeyPair() step technically belongs in the KEM's Encap()
+  // function, but we've moved it up a layer to make it easier for tests to
+  // inject an ephemeral keypair.
+  uint8_t ephemeral_private[X25519_PRIVATE_KEY_LEN];
+  X25519_keypair(out_enc, ephemeral_private);
+  return EVP_HPKE_CTX_setup_psk_s_x25519_for_test(
+      hpke, kdf_id, aead_id, peer_public_value, info, info_len, psk, psk_len,
+      psk_id, psk_id_len, ephemeral_private, out_enc);
+}
+
+int EVP_HPKE_CTX_setup_psk_s_x25519_for_test(
+    EVP_HPKE_CTX *hpke, uint16_t kdf_id, uint16_t aead_id,
+    const uint8_t peer_public_value[X25519_PUBLIC_VALUE_LEN],
+    const uint8_t *info, size_t info_len, const uint8_t *psk, size_t psk_len,
+    const uint8_t *psk_id, size_t psk_id_len,
+    const uint8_t ephemeral_private[X25519_PRIVATE_KEY_LEN],
+    const uint8_t ephemeral_public[X25519_PUBLIC_VALUE_LEN]) {
+  hpke->is_sender = 1;
+  hpke->kdf_id = kdf_id;
+  hpke->aead_id = aead_id;
+  hpke->hkdf_md = EVP_HPKE_get_hkdf_md(kdf_id);
+  if (hpke->hkdf_md == NULL) {
+    return 0;
+  }
+  uint8_t shared_secret[SHA256_DIGEST_LENGTH];
+  if (!hpke_encap(hpke, shared_secret, peer_public_value, ephemeral_private,
+                  ephemeral_public) ||
+      !hpke_key_schedule(hpke, HPKE_MODE_PSK, shared_secret,
+                         sizeof(shared_secret), info, info_len, psk, psk_len,
+                         psk_id, psk_id_len)) {
+    return 0;
+  }
+  return 1;
+}
+
+int EVP_HPKE_CTX_setup_psk_r_x25519(
+    EVP_HPKE_CTX *hpke, uint16_t kdf_id, uint16_t aead_id,
+    const uint8_t enc[X25519_PUBLIC_VALUE_LEN],
+    const uint8_t public_key[X25519_PUBLIC_VALUE_LEN],
+    const uint8_t private_key[X25519_PRIVATE_KEY_LEN], const uint8_t *info,
+    size_t info_len, const uint8_t *psk, size_t psk_len, const uint8_t *psk_id,
+    size_t psk_id_len) {
+  hpke->is_sender = 0;
+  hpke->kdf_id = kdf_id;
+  hpke->aead_id = aead_id;
+  hpke->hkdf_md = EVP_HPKE_get_hkdf_md(kdf_id);
+  if (hpke->hkdf_md == NULL) {
+    return 0;
+  }
+  uint8_t shared_secret[SHA256_DIGEST_LENGTH];
+  if (!hpke_decap(hpke, shared_secret, enc, public_key, private_key) ||
+      !hpke_key_schedule(hpke, HPKE_MODE_PSK, shared_secret,
+                         sizeof(shared_secret), info, info_len, psk, psk_len,
+                         psk_id, psk_id_len)) {
+    return 0;
+  }
+  return 1;
+}
+
+static void hpke_nonce(const EVP_HPKE_CTX *hpke, uint8_t *out_nonce,
+                       size_t nonce_len) {
+  assert(nonce_len >= 8);
+
+  // Write padded big-endian bytes of |hpke->seq| to |out_nonce|.
+  OPENSSL_memset(out_nonce, 0, nonce_len);
+  uint64_t seq_copy = hpke->seq;
+  for (size_t i = 0; i < 8; i++) {
+    out_nonce[nonce_len - i - 1] = seq_copy & 0xff;
+    seq_copy >>= 8;
+  }
+
+  // XOR the encoded sequence with the |hpke->base_nonce|.
+  for (size_t i = 0; i < nonce_len; i++) {
+    out_nonce[i] ^= hpke->base_nonce[i];
+  }
+}
+
+size_t EVP_HPKE_CTX_max_overhead(const EVP_HPKE_CTX *hpke) {
+  assert(hpke->is_sender);
+  return EVP_AEAD_max_overhead(hpke->aead_ctx.aead);
+}
+
+int EVP_HPKE_CTX_open(EVP_HPKE_CTX *hpke, uint8_t *out, size_t *out_len,
+                      size_t max_out_len, const uint8_t *in, size_t in_len,
+                      const uint8_t *ad, size_t ad_len) {
+  if (hpke->is_sender) {
+    OPENSSL_PUT_ERROR(EVP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
+    return 0;
+  }
+  if (hpke->seq == UINT64_MAX) {
+    OPENSSL_PUT_ERROR(EVP, ERR_R_OVERFLOW);
+    return 0;
+  }
+
+  uint8_t nonce[EVP_AEAD_MAX_NONCE_LENGTH];
+  const size_t nonce_len = EVP_AEAD_nonce_length(hpke->aead_ctx.aead);
+  hpke_nonce(hpke, nonce, nonce_len);
+
+  if (!EVP_AEAD_CTX_open(&hpke->aead_ctx, out, out_len, max_out_len, nonce,
+                         nonce_len, in, in_len, ad, ad_len)) {
+    return 0;
+  }
+  hpke->seq++;
+  return 1;
+}
+
+int EVP_HPKE_CTX_seal(EVP_HPKE_CTX *hpke, uint8_t *out, size_t *out_len,
+                      size_t max_out_len, const uint8_t *in, size_t in_len,
+                      const uint8_t *ad, size_t ad_len) {
+  if (!hpke->is_sender) {
+    OPENSSL_PUT_ERROR(EVP, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
+    return 0;
+  }
+  if (hpke->seq == UINT64_MAX) {
+    OPENSSL_PUT_ERROR(EVP, ERR_R_OVERFLOW);
+    return 0;
+  }
+
+  uint8_t nonce[EVP_AEAD_MAX_NONCE_LENGTH];
+  const size_t nonce_len = EVP_AEAD_nonce_length(hpke->aead_ctx.aead);
+  hpke_nonce(hpke, nonce, nonce_len);
+
+  if (!EVP_AEAD_CTX_seal(&hpke->aead_ctx, out, out_len, max_out_len, nonce,
+                         nonce_len, in, in_len, ad, ad_len)) {
+    return 0;
+  }
+  hpke->seq++;
+  return 1;
+}
+
+int EVP_HPKE_CTX_export(const EVP_HPKE_CTX *hpke, uint8_t *out,
+                        size_t secret_len, const uint8_t *context,
+                        size_t context_len) {
+  uint8_t suite_id[HPKE_SUITE_ID_LEN];
+  if (!hpke_build_suite_id(suite_id, hpke->kdf_id, hpke->aead_id)) {
+    return 0;
+  }
+  static const char kExportExpandLabel[] = "sec";
+  if (!hpke_labeled_expand(hpke->hkdf_md, out, secret_len,
+                           hpke->exporter_secret, EVP_MD_size(hpke->hkdf_md),
+                           suite_id, sizeof(suite_id), kExportExpandLabel,
+                           context, context_len)) {
+    return 0;
+  }
+  return 1;
+}
diff --git a/deps/boringssl/src/crypto/hpke/hpke_test.cc b/deps/boringssl/src/crypto/hpke/hpke_test.cc
new file mode 100644
index 0000000..c007b3d
--- /dev/null
+++ b/deps/boringssl/src/crypto/hpke/hpke_test.cc
@@ -0,0 +1,489 @@
+/* Copyright (c) 2020, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#include <cstdint>
+#include <limits>
+#include <string>
+#include <vector>
+
+#include <gtest/gtest.h>
+
+#include <openssl/base.h>
+#include <openssl/curve25519.h>
+#include <openssl/digest.h>
+#include <openssl/err.h>
+#include <openssl/evp.h>
+#include <openssl/sha.h>
+#include <openssl/span.h>
+
+#include "../test/file_test.h"
+#include "../test/test_util.h"
+#include "internal.h"
+
+
+namespace bssl {
+namespace {
+
+enum class HPKEMode {
+  kBase = 0,
+  kPSK = 1,
+};
+
+// HPKETestVector corresponds to one array member in the published
+// test-vectors.json.
+class HPKETestVector {
+ public:
+  explicit HPKETestVector() = default;
+  ~HPKETestVector() = default;
+
+  bool ReadFromFileTest(FileTest *t);
+
+  void Verify() const {
+    ScopedEVP_HPKE_CTX sender_ctx;
+    ScopedEVP_HPKE_CTX receiver_ctx;
+
+    switch (mode_) {
+      case HPKEMode::kBase:
+        ASSERT_GT(secret_key_e_.size(), 0u);
+        ASSERT_EQ(psk_.size(), 0u);
+        ASSERT_EQ(psk_id_.size(), 0u);
+
+        // Set up the sender.
+        ASSERT_TRUE(EVP_HPKE_CTX_setup_base_s_x25519_for_test(
+            sender_ctx.get(), kdf_id_, aead_id_, public_key_r_.data(),
+            info_.data(), info_.size(), secret_key_e_.data(),
+            public_key_e_.data()));
+
+        // Set up the receiver.
+        ASSERT_TRUE(EVP_HPKE_CTX_setup_base_r_x25519(
+            receiver_ctx.get(), kdf_id_, aead_id_, public_key_e_.data(),
+            public_key_r_.data(), secret_key_r_.data(), info_.data(),
+            info_.size()));
+        break;
+
+      case HPKEMode::kPSK:
+        ASSERT_GT(secret_key_e_.size(), 0u);
+        ASSERT_GT(psk_.size(), 0u);
+        ASSERT_GT(psk_id_.size(), 0u);
+
+        // Set up the sender.
+        ASSERT_TRUE(EVP_HPKE_CTX_setup_psk_s_x25519_for_test(
+            sender_ctx.get(), kdf_id_, aead_id_, public_key_r_.data(),
+            info_.data(), info_.size(), psk_.data(), psk_.size(),
+            psk_id_.data(), psk_id_.size(), secret_key_e_.data(),
+            public_key_e_.data()));
+
+        // Set up the receiver.
+        ASSERT_TRUE(EVP_HPKE_CTX_setup_psk_r_x25519(
+            receiver_ctx.get(), kdf_id_, aead_id_, public_key_e_.data(),
+            public_key_r_.data(), secret_key_r_.data(), info_.data(),
+            info_.size(), psk_.data(), psk_.size(), psk_id_.data(),
+            psk_id_.size()));
+        break;
+      default:
+        FAIL() << "Unsupported mode";
+        return;
+    }
+
+    VerifyEncryptions(sender_ctx.get(), receiver_ctx.get());
+    VerifyExports(sender_ctx.get());
+    VerifyExports(receiver_ctx.get());
+  }
+
+ private:
+  void VerifyEncryptions(EVP_HPKE_CTX *sender_ctx,
+                         EVP_HPKE_CTX *receiver_ctx) const {
+    for (const Encryption &task : encryptions_) {
+      std::vector<uint8_t> encrypted(task.plaintext.size() +
+                                     EVP_HPKE_CTX_max_overhead(sender_ctx));
+      size_t encrypted_len;
+      ASSERT_TRUE(EVP_HPKE_CTX_seal(
+          sender_ctx, encrypted.data(), &encrypted_len, encrypted.size(),
+          task.plaintext.data(), task.plaintext.size(), task.aad.data(),
+          task.aad.size()));
+
+      ASSERT_EQ(Bytes(encrypted.data(), encrypted_len), Bytes(task.ciphertext));
+
+      std::vector<uint8_t> decrypted(task.ciphertext.size());
+      size_t decrypted_len;
+      ASSERT_TRUE(EVP_HPKE_CTX_open(
+          receiver_ctx, decrypted.data(), &decrypted_len, decrypted.size(),
+          task.ciphertext.data(), task.ciphertext.size(), task.aad.data(),
+          task.aad.size()));
+
+      ASSERT_EQ(Bytes(decrypted.data(), decrypted_len), Bytes(task.plaintext));
+    }
+  }
+
+  void VerifyExports(EVP_HPKE_CTX *ctx) const {
+    for (const Export &task : exports_) {
+      std::vector<uint8_t> exported_secret(task.export_length);
+
+      ASSERT_TRUE(EVP_HPKE_CTX_export(
+          ctx, exported_secret.data(), exported_secret.size(),
+          task.exporter_context.data(), task.exporter_context.size()));
+      ASSERT_EQ(Bytes(exported_secret), Bytes(task.exported_value));
+    }
+  }
+
+  struct Encryption {
+    std::vector<uint8_t> aad;
+    std::vector<uint8_t> ciphertext;
+    std::vector<uint8_t> plaintext;
+  };
+
+  struct Export {
+    std::vector<uint8_t> exporter_context;
+    size_t export_length;
+    std::vector<uint8_t> exported_value;
+  };
+
+  HPKEMode mode_;
+  uint16_t kdf_id_;
+  uint16_t aead_id_;
+  std::vector<uint8_t> context_;
+  std::vector<uint8_t> info_;
+  std::vector<uint8_t> public_key_e_;
+  std::vector<uint8_t> secret_key_e_;
+  std::vector<uint8_t> public_key_r_;
+  std::vector<uint8_t> secret_key_r_;
+  std::vector<Encryption> encryptions_;
+  std::vector<Export> exports_;
+  std::vector<uint8_t> psk_;     // Empty when mode is not PSK.
+  std::vector<uint8_t> psk_id_;  // Empty when mode is not PSK.
+};
+
+// Match FileTest's naming scheme for duplicated attribute names.
+std::string BuildAttrName(const std::string &name, int iter) {
+  return iter == 1 ? name : name + "/" + std::to_string(iter);
+}
+
+// Parses |s| as an unsigned integer of type T and writes the value to |out|.
+// Returns true on success. If the integer value exceeds the maximum T value,
+// returns false.
+template <typename T>
+bool ParseIntSafe(T *out, const std::string &s) {
+  T value = 0;
+  for (char c : s) {
+    if (c < '0' || c > '9') {
+      return false;
+    }
+    if (value > (std::numeric_limits<T>::max() - (c - '0')) / 10) {
+      return false;
+    }
+    value = 10 * value + (c - '0');
+  }
+  *out = value;
+  return true;
+}
+
+// Read the |key| attribute from |file_test| and convert it to an integer.
+template <typename T>
+bool FileTestReadInt(FileTest *file_test, T *out, const std::string &key) {
+  std::string s;
+  return file_test->GetAttribute(&s, key) && ParseIntSafe(out, s);
+}
+
+
+bool HPKETestVector::ReadFromFileTest(FileTest *t) {
+  uint8_t mode_tmp;
+  if (!FileTestReadInt(t, &mode_tmp, "mode")) {
+    return false;
+  }
+  mode_ = static_cast<HPKEMode>(mode_tmp);
+
+  if (!FileTestReadInt(t, &kdf_id_, "kdf_id") ||
+      !FileTestReadInt(t, &aead_id_, "aead_id") ||
+      !t->GetBytes(&info_, "info") ||
+      !t->GetBytes(&secret_key_r_, "skRm") ||
+      !t->GetBytes(&public_key_r_, "pkRm") ||
+      !t->GetBytes(&secret_key_e_, "skEm") ||
+      !t->GetBytes(&public_key_e_, "pkEm")) {
+    return false;
+  }
+
+  if (mode_ == HPKEMode::kPSK) {
+    if (!t->GetBytes(&psk_, "psk") ||
+        !t->GetBytes(&psk_id_, "psk_id")) {
+      return false;
+    }
+  }
+
+  for (int i = 1; t->HasAttribute(BuildAttrName("aad", i)); i++) {
+    Encryption encryption;
+    if (!t->GetBytes(&encryption.aad, BuildAttrName("aad", i)) ||
+        !t->GetBytes(&encryption.ciphertext, BuildAttrName("ciphertext", i)) ||
+        !t->GetBytes(&encryption.plaintext, BuildAttrName("plaintext", i))) {
+      return false;
+    }
+    encryptions_.push_back(std::move(encryption));
+  }
+
+  for (int i = 1; t->HasAttribute(BuildAttrName("exporter_context", i)); i++) {
+    Export exp;
+    if (!t->GetBytes(&exp.exporter_context,
+                     BuildAttrName("exporter_context", i)) ||
+        !FileTestReadInt(t, &exp.export_length, BuildAttrName("L", i)) ||
+        !t->GetBytes(&exp.exported_value, BuildAttrName("exported_value", i))) {
+      return false;
+    }
+    exports_.push_back(std::move(exp));
+  }
+  return true;
+}
+
+}  // namespace
+
+TEST(HPKETest, VerifyTestVectors) {
+  FileTestGTest("crypto/hpke/hpke_test_vectors.txt", [](FileTest *t) {
+    HPKETestVector test_vec;
+    EXPECT_TRUE(test_vec.ReadFromFileTest(t));
+    test_vec.Verify();
+  });
+}
+
+// The test vectors used fixed sender ephemeral keys, while HPKE itself
+// generates new keys for each context. Test this codepath by checking we can
+// decrypt our own messages.
+TEST(HPKETest, RoundTrip) {
+  uint16_t kdf_ids[] = {EVP_HPKE_HKDF_SHA256, EVP_HPKE_HKDF_SHA384,
+                        EVP_HPKE_HKDF_SHA512};
+  uint16_t aead_ids[] = {EVP_HPKE_AEAD_AES_GCM_128, EVP_HPKE_AEAD_AES_GCM_256,
+                         EVP_HPKE_AEAD_CHACHA20POLY1305};
+
+  const uint8_t info_a[] = {1, 1, 2, 3, 5, 8};
+  const uint8_t info_b[] = {42, 42, 42};
+  const uint8_t ad_a[] = {1, 2, 4, 8, 16};
+  const uint8_t ad_b[] = {7};
+  Span<const uint8_t> info_values[] = {{nullptr, 0}, info_a, info_b};
+  Span<const uint8_t> ad_values[] = {{nullptr, 0}, ad_a, ad_b};
+
+  // Generate the receiver's keypair.
+  uint8_t secret_key_r[X25519_PRIVATE_KEY_LEN];
+  uint8_t public_key_r[X25519_PUBLIC_VALUE_LEN];
+  X25519_keypair(public_key_r, secret_key_r);
+
+  for (uint16_t kdf_id : kdf_ids) {
+    for (uint16_t aead_id : aead_ids) {
+      for (const Span<const uint8_t> &info : info_values) {
+        for (const Span<const uint8_t> &ad : ad_values) {
+          // Set up the sender.
+          ScopedEVP_HPKE_CTX sender_ctx;
+          uint8_t enc[X25519_PUBLIC_VALUE_LEN];
+          ASSERT_TRUE(EVP_HPKE_CTX_setup_base_s_x25519(
+              sender_ctx.get(), enc, kdf_id, aead_id, public_key_r, info.data(),
+              info.size()));
+
+          // Set up the receiver.
+          ScopedEVP_HPKE_CTX receiver_ctx;
+          ASSERT_TRUE(EVP_HPKE_CTX_setup_base_r_x25519(
+              receiver_ctx.get(), kdf_id, aead_id, enc, public_key_r,
+              secret_key_r, info.data(), info.size()));
+
+          const char kCleartextPayload[] = "foobar";
+
+          // Have sender encrypt message for the receiver.
+          std::vector<uint8_t> ciphertext(
+              sizeof(kCleartextPayload) +
+              EVP_HPKE_CTX_max_overhead(sender_ctx.get()));
+          size_t ciphertext_len;
+          ASSERT_TRUE(EVP_HPKE_CTX_seal(
+              sender_ctx.get(), ciphertext.data(), &ciphertext_len,
+              ciphertext.size(),
+              reinterpret_cast<const uint8_t *>(kCleartextPayload),
+              sizeof(kCleartextPayload), ad.data(), ad.size()));
+
+          // Have receiver decrypt the message.
+          std::vector<uint8_t> cleartext(ciphertext.size());
+          size_t cleartext_len;
+          ASSERT_TRUE(EVP_HPKE_CTX_open(receiver_ctx.get(), cleartext.data(),
+                                        &cleartext_len, cleartext.size(),
+                                        ciphertext.data(), ciphertext_len,
+                                        ad.data(), ad.size()));
+
+          // Verify that decrypted message matches the original.
+          ASSERT_EQ(Bytes(cleartext.data(), cleartext_len),
+                    Bytes(kCleartextPayload, sizeof(kCleartextPayload)));
+        }
+      }
+    }
+  }
+}
+
+// Verify that the DH operations inside Encap() and Decap() both fail when the
+// public key is on a small-order point in the curve.
+TEST(HPKETest, X25519EncapSmallOrderPoint) {
+  // Borrowed from X25519Test.SmallOrder.
+  static const uint8_t kSmallOrderPoint[32] = {
+      0xe0, 0xeb, 0x7a, 0x7c, 0x3b, 0x41, 0xb8, 0xae, 0x16, 0x56, 0xe3,
+      0xfa, 0xf1, 0x9f, 0xc4, 0x6a, 0xda, 0x09, 0x8d, 0xeb, 0x9c, 0x32,
+      0xb1, 0xfd, 0x86, 0x62, 0x05, 0x16, 0x5f, 0x49, 0xb8,
+  };
+
+  // Generate a valid keypair for the receiver.
+  uint8_t secret_key_r[X25519_PRIVATE_KEY_LEN];
+  uint8_t public_key_r[X25519_PUBLIC_VALUE_LEN];
+  X25519_keypair(public_key_r, secret_key_r);
+
+  uint16_t kdf_ids[] = {EVP_HPKE_HKDF_SHA256, EVP_HPKE_HKDF_SHA384,
+                        EVP_HPKE_HKDF_SHA512};
+  uint16_t aead_ids[] = {EVP_HPKE_AEAD_AES_GCM_128, EVP_HPKE_AEAD_AES_GCM_256,
+                         EVP_HPKE_AEAD_CHACHA20POLY1305};
+
+  for (uint16_t kdf_id : kdf_ids) {
+    for (uint16_t aead_id : aead_ids) {
+      // Set up the sender, passing in kSmallOrderPoint as |peer_public_value|.
+      ScopedEVP_HPKE_CTX sender_ctx;
+      uint8_t enc[X25519_PUBLIC_VALUE_LEN];
+      ASSERT_FALSE(EVP_HPKE_CTX_setup_base_s_x25519(
+          sender_ctx.get(), enc, kdf_id, aead_id, kSmallOrderPoint, nullptr,
+          0));
+
+      // Set up the receiver, passing in kSmallOrderPoint as |enc|.
+      ScopedEVP_HPKE_CTX receiver_ctx;
+      ASSERT_FALSE(EVP_HPKE_CTX_setup_base_r_x25519(
+          receiver_ctx.get(), kdf_id, aead_id, kSmallOrderPoint, public_key_r,
+          secret_key_r, nullptr, 0));
+    }
+  }
+}
+
+// Test that Seal() fails when the context has been initialized as a receiver.
+TEST(HPKETest, ReceiverInvalidSeal) {
+  const uint8_t kMockEnc[X25519_PUBLIC_VALUE_LEN] = {0xff};
+  const char kCleartextPayload[] = "foobar";
+
+  // Generate the receiver's keypair.
+  uint8_t secret_key_r[X25519_PRIVATE_KEY_LEN];
+  uint8_t public_key_r[X25519_PUBLIC_VALUE_LEN];
+  X25519_keypair(public_key_r, secret_key_r);
+
+  // Set up the receiver.
+  ScopedEVP_HPKE_CTX receiver_ctx;
+  ASSERT_TRUE(EVP_HPKE_CTX_setup_base_r_x25519(
+      receiver_ctx.get(), EVP_HPKE_HKDF_SHA256, EVP_HPKE_AEAD_AES_GCM_128,
+      kMockEnc, public_key_r, secret_key_r, nullptr, 0));
+
+  // Call Seal() on the receiver.
+  size_t ciphertext_len;
+  uint8_t ciphertext[100];
+  ASSERT_FALSE(EVP_HPKE_CTX_seal(
+      receiver_ctx.get(), ciphertext, &ciphertext_len, sizeof(ciphertext),
+      reinterpret_cast<const uint8_t *>(kCleartextPayload),
+      sizeof(kCleartextPayload), nullptr, 0));
+}
+
+// Test that Open() fails when the context has been initialized as a sender.
+TEST(HPKETest, SenderInvalidOpen) {
+  const uint8_t kMockCiphertext[100] = {0xff};
+  const size_t kMockCiphertextLen = 80;
+
+  // Generate the receiver's keypair.
+  uint8_t secret_key_r[X25519_PRIVATE_KEY_LEN];
+  uint8_t public_key_r[X25519_PUBLIC_VALUE_LEN];
+  X25519_keypair(public_key_r, secret_key_r);
+
+  // Set up the sender.
+  ScopedEVP_HPKE_CTX sender_ctx;
+  uint8_t enc[X25519_PUBLIC_VALUE_LEN];
+  ASSERT_TRUE(EVP_HPKE_CTX_setup_base_s_x25519(
+      sender_ctx.get(), enc, EVP_HPKE_HKDF_SHA256, EVP_HPKE_AEAD_AES_GCM_128,
+      public_key_r, nullptr, 0));
+
+  // Call Open() on the sender.
+  uint8_t cleartext[128];
+  size_t cleartext_len;
+  ASSERT_FALSE(EVP_HPKE_CTX_open(sender_ctx.get(), cleartext, &cleartext_len,
+                                 sizeof(cleartext), kMockCiphertext,
+                                 kMockCiphertextLen, nullptr, 0));
+}
+
+// Test that the PSK variants of Setup functions fail when any of the PSK inputs
+// are empty.
+TEST(HPKETest, EmptyPSK) {
+  const uint8_t kMockEnc[X25519_PUBLIC_VALUE_LEN] = {0xff};
+  const std::vector<uint8_t> kPSKValues[] = {std::vector<uint8_t>(100, 0xff),
+                                             {}};
+
+  // Generate the receiver's keypair.
+  uint8_t secret_key_r[X25519_PRIVATE_KEY_LEN];
+  uint8_t public_key_r[X25519_PUBLIC_VALUE_LEN];
+  X25519_keypair(public_key_r, secret_key_r);
+
+  // Vary the PSK and PSKID inputs for the sender and receiver, trying all four
+  // permutations of empty and nonempty inputs.
+
+  for (const auto &psk : kPSKValues) {
+    for (const auto &psk_id : kPSKValues) {
+      const bool kExpectSuccess = psk.size() > 0 && psk_id.size() > 0;
+
+      ASSERT_EQ(ERR_get_error(), 0u);
+
+      ScopedEVP_HPKE_CTX sender_ctx;
+      uint8_t enc[X25519_PUBLIC_VALUE_LEN];
+      ASSERT_EQ(EVP_HPKE_CTX_setup_psk_s_x25519(
+                    sender_ctx.get(), enc, EVP_HPKE_HKDF_SHA256,
+                    EVP_HPKE_AEAD_AES_GCM_128, public_key_r, nullptr, 0,
+                    psk.data(), psk.size(), psk_id.data(), psk_id.size()),
+                kExpectSuccess);
+
+      if (!kExpectSuccess) {
+        uint32_t err = ERR_get_error();
+        EXPECT_EQ(ERR_LIB_EVP, ERR_GET_LIB(err));
+        EXPECT_EQ(EVP_R_EMPTY_PSK, ERR_GET_REASON(err));
+      }
+      ERR_clear_error();
+
+      ScopedEVP_HPKE_CTX receiver_ctx;
+      ASSERT_EQ(
+          EVP_HPKE_CTX_setup_psk_r_x25519(
+              receiver_ctx.get(), EVP_HPKE_HKDF_SHA256,
+              EVP_HPKE_AEAD_AES_GCM_128, kMockEnc, public_key_r, secret_key_r,
+              nullptr, 0, psk.data(), psk.size(), psk_id.data(), psk_id.size()),
+          kExpectSuccess);
+
+      if (!kExpectSuccess) {
+        uint32_t err = ERR_get_error();
+        EXPECT_EQ(ERR_LIB_EVP, ERR_GET_LIB(err));
+        EXPECT_EQ(EVP_R_EMPTY_PSK, ERR_GET_REASON(err));
+      }
+      ERR_clear_error();
+    }
+  }
+}
+
+TEST(HPKETest, InternalParseIntSafe) {
+  uint8_t u8 = 0xff;
+  ASSERT_FALSE(ParseIntSafe(&u8, "-1"));
+
+  ASSERT_TRUE(ParseIntSafe(&u8, "0"));
+  ASSERT_EQ(u8, 0);
+
+  ASSERT_TRUE(ParseIntSafe(&u8, "255"));
+  ASSERT_EQ(u8, 255);
+
+  ASSERT_FALSE(ParseIntSafe(&u8, "256"));
+
+  uint16_t u16 = 0xffff;
+  ASSERT_TRUE(ParseIntSafe(&u16, "257"));
+  ASSERT_EQ(u16, 257);
+
+  ASSERT_TRUE(ParseIntSafe(&u16, "65535"));
+  ASSERT_EQ(u16, 65535);
+
+  ASSERT_FALSE(ParseIntSafe(&u16, "65536"));
+}
+
+
+}  // namespace bssl
diff --git a/deps/boringssl/src/crypto/hpke/internal.h b/deps/boringssl/src/crypto/hpke/internal.h
new file mode 100644
index 0000000..3d2f4ba
--- /dev/null
+++ b/deps/boringssl/src/crypto/hpke/internal.h
@@ -0,0 +1,246 @@
+/* Copyright (c) 2020, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#ifndef OPENSSL_HEADER_CRYPTO_HPKE_INTERNAL_H
+#define OPENSSL_HEADER_CRYPTO_HPKE_INTERNAL_H
+
+#include <openssl/aead.h>
+#include <openssl/base.h>
+#include <openssl/curve25519.h>
+#include <openssl/digest.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+
+// Hybrid Public Key Encryption.
+//
+// Hybrid Public Key Encryption (HPKE) enables a sender to encrypt messages to a
+// receiver with a public key. Optionally, the sender may authenticate its
+// possession of a pre-shared key to the recipient.
+//
+// See https://tools.ietf.org/html/draft-irtf-cfrg-hpke-07.
+
+// EVP_HPKE_AEAD_* are AEAD identifiers.
+#define EVP_HPKE_AEAD_AES_GCM_128 0x0001
+#define EVP_HPKE_AEAD_AES_GCM_256 0x0002
+#define EVP_HPKE_AEAD_CHACHA20POLY1305 0x0003
+
+// EVP_HPKE_HKDF_* are HKDF identifiers.
+#define EVP_HPKE_HKDF_SHA256 0x0001
+#define EVP_HPKE_HKDF_SHA384 0x0002
+#define EVP_HPKE_HKDF_SHA512 0x0003
+
+// EVP_HPKE_MAX_OVERHEAD contains the largest value that
+// |EVP_HPKE_CTX_max_overhead| would ever return for any context.
+#define EVP_HPKE_MAX_OVERHEAD EVP_AEAD_MAX_OVERHEAD
+
+
+// Encryption contexts.
+
+// An |EVP_HPKE_CTX| is an HPKE encryption context.
+typedef struct evp_hpke_ctx_st {
+  const EVP_MD *hkdf_md;
+  EVP_AEAD_CTX aead_ctx;
+  uint16_t kdf_id;
+  uint16_t aead_id;
+  uint8_t base_nonce[EVP_AEAD_MAX_NONCE_LENGTH];
+  uint8_t exporter_secret[EVP_MAX_MD_SIZE];
+  uint64_t seq;
+  int is_sender;
+} EVP_HPKE_CTX;
+
+// EVP_HPKE_CTX_init initializes an already-allocated |EVP_HPKE_CTX|. The caller
+// should then use one of the |EVP_HPKE_CTX_setup_*| functions.
+//
+// It is safe, but not necessary to call |EVP_HPKE_CTX_cleanup| in this state.
+OPENSSL_EXPORT void EVP_HPKE_CTX_init(EVP_HPKE_CTX *ctx);
+
+// EVP_HPKE_CTX_cleanup releases memory referenced by |ctx|. |ctx| must have
+// been initialized with |EVP_HPKE_CTX_init|.
+OPENSSL_EXPORT void EVP_HPKE_CTX_cleanup(EVP_HPKE_CTX *ctx);
+
+
+// Setting up HPKE contexts.
+//
+// In each of the following functions, |hpke| must have been initialized with
+// |EVP_HPKE_CTX_init|. |kdf_id| selects the KDF for non-KEM HPKE operations and
+// must be one of the |EVP_HPKE_HKDF_*| constants. |aead_id| selects the AEAD
+// for the "open" and "seal" operations and must be one of the |EVP_HPKE_AEAD_*|
+// constants.
+
+// EVP_HPKE_CTX_setup_base_s_x25519 sets up |hpke| as a sender context that can
+// encrypt for the private key corresponding to |peer_public_value| (the
+// recipient's public key). It returns one on success, and zero otherwise. Note
+// that this function will fail if |peer_public_value| is invalid.
+//
+// This function writes the encapsulated shared secret to |out_enc|.
+OPENSSL_EXPORT int EVP_HPKE_CTX_setup_base_s_x25519(
+    EVP_HPKE_CTX *hpke, uint8_t out_enc[X25519_PUBLIC_VALUE_LEN],
+    uint16_t kdf_id, uint16_t aead_id,
+    const uint8_t peer_public_value[X25519_PUBLIC_VALUE_LEN],
+    const uint8_t *info, size_t info_len);
+
+// EVP_HPKE_CTX_setup_base_s_x25519_for_test behaves like
+// |EVP_HPKE_CTX_setup_base_s_x25519|, but takes a pre-generated ephemeral
+// sender key.
+OPENSSL_EXPORT int EVP_HPKE_CTX_setup_base_s_x25519_for_test(
+    EVP_HPKE_CTX *hpke, uint16_t kdf_id, uint16_t aead_id,
+    const uint8_t peer_public_value[X25519_PUBLIC_VALUE_LEN],
+    const uint8_t *info, size_t info_len,
+    const uint8_t ephemeral_private[X25519_PRIVATE_KEY_LEN],
+    const uint8_t ephemeral_public[X25519_PUBLIC_VALUE_LEN]);
+
+// EVP_HPKE_CTX_setup_base_r_x25519 sets up |hpke| as a recipient context that
+// can decrypt messages. |private_key| is the recipient's private key, and |enc|
+// is the encapsulated shared secret from the sender. Note that this function
+// will fail if |enc| is invalid.
+OPENSSL_EXPORT int EVP_HPKE_CTX_setup_base_r_x25519(
+    EVP_HPKE_CTX *hpke, uint16_t kdf_id, uint16_t aead_id,
+    const uint8_t enc[X25519_PUBLIC_VALUE_LEN],
+    const uint8_t public_key[X25519_PUBLIC_VALUE_LEN],
+    const uint8_t private_key[X25519_PRIVATE_KEY_LEN], const uint8_t *info,
+    size_t info_len);
+
+// EVP_HPKE_CTX_setup_psk_s_x25519 sets up |hpke| as a sender context that can
+// encrypt for the private key corresponding to |peer_public_value| (the
+// recipient's public key) and authenticate its possession of a PSK. It returns
+// one on success, and zero otherwise. Note that this function will fail if
+// |peer_public_value| is invalid.
+//
+// The PSK and its ID must be provided in |psk| and |psk_id|, respectively. Both
+// must be nonempty (|psk_len| and |psk_id_len| must be non-zero), or this
+// function will fail.
+//
+// This function writes the encapsulated shared secret to |out_enc|.
+OPENSSL_EXPORT int EVP_HPKE_CTX_setup_psk_s_x25519(
+    EVP_HPKE_CTX *hpke, uint8_t out_enc[X25519_PUBLIC_VALUE_LEN],
+    uint16_t kdf_id, uint16_t aead_id,
+    const uint8_t peer_public_value[X25519_PUBLIC_VALUE_LEN],
+    const uint8_t *info, size_t info_len, const uint8_t *psk, size_t psk_len,
+    const uint8_t *psk_id, size_t psk_id_len);
+
+// EVP_HPKE_CTX_setup_psk_s_x25519_for_test behaves like
+// |EVP_HPKE_CTX_setup_psk_s_x25519|, but takes a pre-generated ephemeral sender
+// key.
+OPENSSL_EXPORT int EVP_HPKE_CTX_setup_psk_s_x25519_for_test(
+    EVP_HPKE_CTX *hpke, uint16_t kdf_id, uint16_t aead_id,
+    const uint8_t peer_public_value[X25519_PUBLIC_VALUE_LEN],
+    const uint8_t *info, size_t info_len, const uint8_t *psk, size_t psk_len,
+    const uint8_t *psk_id, size_t psk_id_len,
+    const uint8_t ephemeral_private[X25519_PRIVATE_KEY_LEN],
+    const uint8_t ephemeral_public[X25519_PUBLIC_VALUE_LEN]);
+
+// EVP_HPKE_CTX_setup_psk_r_x25519 sets up |hpke| as a recipient context that
+// can decrypt messages. Future open (decrypt) operations will fail if the
+// sender does not possess the PSK indicated by |psk| and |psk_id|.
+// |private_key| is the recipient's private key, and |enc| is the encapsulated
+// shared secret from the sender. If |enc| is invalid, this function will fail.
+//
+// The PSK and its ID must be provided in |psk| and |psk_id|, respectively. Both
+// must be nonempty (|psk_len| and |psk_id_len| must be non-zero), or this
+// function will fail.
+OPENSSL_EXPORT int EVP_HPKE_CTX_setup_psk_r_x25519(
+    EVP_HPKE_CTX *hpke, uint16_t kdf_id, uint16_t aead_id,
+    const uint8_t enc[X25519_PUBLIC_VALUE_LEN],
+    const uint8_t public_key[X25519_PUBLIC_VALUE_LEN],
+    const uint8_t private_key[X25519_PRIVATE_KEY_LEN], const uint8_t *info,
+    size_t info_len, const uint8_t *psk, size_t psk_len, const uint8_t *psk_id,
+    size_t psk_id_len);
+
+
+// Using an HPKE context.
+
+// EVP_HPKE_CTX_open uses the HPKE context |hpke| to authenticate |in_len| bytes
+// from |in| and |ad_len| bytes from |ad| and to decrypt at most |in_len| bytes
+// into |out|. It returns one on success, and zero otherwise.
+//
+// This operation will fail if the |hpke| context is not set up as a receiver.
+//
+// Note that HPKE encryption is stateful and ordered. The sender's first call to
+// |EVP_HPKE_CTX_seal| must correspond to the recipient's first call to
+// |EVP_HPKE_CTX_open|, etc.
+//
+// At most |in_len| bytes are written to |out|. In order to ensure success,
+// |max_out_len| should be at least |in_len|. On successful return, |*out_len|
+// is set to the actual number of bytes written.
+OPENSSL_EXPORT int EVP_HPKE_CTX_open(EVP_HPKE_CTX *hpke, uint8_t *out,
+                                     size_t *out_len, size_t max_out_len,
+                                     const uint8_t *in, size_t in_len,
+                                     const uint8_t *ad, size_t ad_len);
+
+// EVP_HPKE_CTX_seal uses the HPKE context |hpke| to encrypt and authenticate
+// |in_len| bytes of ciphertext |in| and authenticate |ad_len| bytes from |ad|,
+// writing the result to |out|. It returns one on success and zero otherwise.
+//
+// This operation will fail if the |hpke| context is not set up as a sender.
+//
+// Note that HPKE encryption is stateful and ordered. The sender's first call to
+// |EVP_HPKE_CTX_seal| must correspond to the recipient's first call to
+// |EVP_HPKE_CTX_open|, etc.
+//
+// At most, |max_out_len| encrypted bytes are written to |out|. On successful
+// return, |*out_len| is set to the actual number of bytes written.
+//
+// To ensure success, |max_out_len| should be |in_len| plus the result of
+// |EVP_HPKE_CTX_max_overhead| or |EVP_HPKE_MAX_OVERHEAD|.
+OPENSSL_EXPORT int EVP_HPKE_CTX_seal(EVP_HPKE_CTX *hpke, uint8_t *out,
+                                     size_t *out_len, size_t max_out_len,
+                                     const uint8_t *in, size_t in_len,
+                                     const uint8_t *ad, size_t ad_len);
+
+// EVP_HPKE_CTX_export uses the HPKE context |hpke| to export a secret of
+// |secret_len| bytes into |out|. This function uses |context_len| bytes from
+// |context| as a context string for the secret. This is necessary to separate
+// different uses of exported secrets and bind relevant caller-specific context
+// into the output. It returns one on success and zero otherwise.
+OPENSSL_EXPORT int EVP_HPKE_CTX_export(const EVP_HPKE_CTX *hpke, uint8_t *out,
+                                       size_t secret_len,
+                                       const uint8_t *context,
+                                       size_t context_len);
+
+// EVP_HPKE_CTX_max_overhead returns the maximum number of additional bytes
+// added by sealing data with |EVP_HPKE_CTX_seal|. The |hpke| context must be
+// set up as a sender.
+OPENSSL_EXPORT size_t EVP_HPKE_CTX_max_overhead(const EVP_HPKE_CTX *hpke);
+
+// EVP_HPKE_get_aead returns the AEAD corresponding to |aead_id|, or NULL if
+// |aead_id| is not a known AEAD identifier.
+OPENSSL_EXPORT const EVP_AEAD *EVP_HPKE_get_aead(uint16_t aead_id);
+
+// EVP_HPKE_get_hkdf_md returns the hash function associated with |kdf_id|, or
+// NULL if |kdf_id| is not a known KDF identifier that uses HKDF.
+OPENSSL_EXPORT const EVP_MD *EVP_HPKE_get_hkdf_md(uint16_t kdf_id);
+
+
+#if defined(__cplusplus)
+}  // extern C
+#endif
+
+#if !defined(BORINGSSL_NO_CXX)
+extern "C++" {
+
+BSSL_NAMESPACE_BEGIN
+
+using ScopedEVP_HPKE_CTX =
+    internal::StackAllocated<EVP_HPKE_CTX, void, EVP_HPKE_CTX_init,
+                             EVP_HPKE_CTX_cleanup>;
+
+BSSL_NAMESPACE_END
+
+}  // extern C++
+#endif
+
+#endif  // OPENSSL_HEADER_CRYPTO_HPKE_INTERNAL_H
diff --git a/deps/boringssl/src/crypto/pkcs8/internal.h b/deps/boringssl/src/crypto/pkcs8/internal.h
index c3302f7..b5d42ba 100644
--- a/deps/boringssl/src/crypto/pkcs8/internal.h
+++ b/deps/boringssl/src/crypto/pkcs8/internal.h
@@ -63,6 +63,13 @@
 #endif
 
 
+struct pkcs8_priv_key_info_st {
+  ASN1_INTEGER *version;
+  X509_ALGOR *pkeyalg;
+  ASN1_OCTET_STRING *pkey;
+  STACK_OF(X509_ATTRIBUTE) *attributes;
+};
+
 // pkcs8_pbe_decrypt decrypts |in| using the PBE scheme described by
 // |algorithm|, which should be a serialized AlgorithmIdentifier structure. On
 // success, it sets |*out| to a newly-allocated buffer containing the decrypted
diff --git a/deps/boringssl/src/crypto/pkcs8/pkcs8_x509.c b/deps/boringssl/src/crypto/pkcs8/pkcs8_x509.c
index 4458b56..a2f9075 100644
--- a/deps/boringssl/src/crypto/pkcs8/pkcs8_x509.c
+++ b/deps/boringssl/src/crypto/pkcs8/pkcs8_x509.c
@@ -96,10 +96,8 @@
   // Since the structure must still be valid use ASN1_OP_FREE_PRE
   if (operation == ASN1_OP_FREE_PRE) {
     PKCS8_PRIV_KEY_INFO *key = (PKCS8_PRIV_KEY_INFO *)*pval;
-    if (key->pkey && key->pkey->type == V_ASN1_OCTET_STRING &&
-        key->pkey->value.octet_string) {
-      OPENSSL_cleanse(key->pkey->value.octet_string->data,
-                      key->pkey->value.octet_string->length);
+    if (key->pkey) {
+      OPENSSL_cleanse(key->pkey->data, key->pkey->length);
     }
   }
   return 1;
@@ -108,12 +106,45 @@
 ASN1_SEQUENCE_cb(PKCS8_PRIV_KEY_INFO, pkey_cb) = {
   ASN1_SIMPLE(PKCS8_PRIV_KEY_INFO, version, ASN1_INTEGER),
   ASN1_SIMPLE(PKCS8_PRIV_KEY_INFO, pkeyalg, X509_ALGOR),
-  ASN1_SIMPLE(PKCS8_PRIV_KEY_INFO, pkey, ASN1_ANY),
+  ASN1_SIMPLE(PKCS8_PRIV_KEY_INFO, pkey, ASN1_OCTET_STRING),
   ASN1_IMP_SET_OF_OPT(PKCS8_PRIV_KEY_INFO, attributes, X509_ATTRIBUTE, 0)
 } ASN1_SEQUENCE_END_cb(PKCS8_PRIV_KEY_INFO, PKCS8_PRIV_KEY_INFO)
 
 IMPLEMENT_ASN1_FUNCTIONS(PKCS8_PRIV_KEY_INFO)
 
+int PKCS8_pkey_set0(PKCS8_PRIV_KEY_INFO *priv, ASN1_OBJECT *aobj, int version,
+                    int ptype, void *pval, uint8_t *penc, int penclen) {
+  if (version >= 0 &&
+      !ASN1_INTEGER_set(priv->version, version)) {
+    return 0;
+  }
+
+  if (!X509_ALGOR_set0(priv->pkeyalg, aobj, ptype, pval)) {
+    return 0;
+  }
+
+  if (penc != NULL) {
+    ASN1_STRING_set0(priv->pkey, penc, penclen);
+  }
+
+  return 1;
+}
+
+int PKCS8_pkey_get0(ASN1_OBJECT **ppkalg, const uint8_t **pk, int *ppklen,
+                    X509_ALGOR **pa, PKCS8_PRIV_KEY_INFO *p8) {
+  if (ppkalg) {
+    *ppkalg = p8->pkeyalg->algorithm;
+  }
+  if (pk) {
+    *pk = ASN1_STRING_data(p8->pkey);
+    *ppklen = ASN1_STRING_length(p8->pkey);
+  }
+  if (pa) {
+    *pa = p8->pkeyalg;
+  }
+  return 1;
+}
+
 EVP_PKEY *EVP_PKCS82PKEY(PKCS8_PRIV_KEY_INFO *p8) {
   uint8_t *der = NULL;
   int der_len = i2d_PKCS8_PRIV_KEY_INFO(p8, &der);
diff --git a/deps/boringssl/src/crypto/poly1305/poly1305.c b/deps/boringssl/src/crypto/poly1305/poly1305.c
index a6dd145..31a567d 100644
--- a/deps/boringssl/src/crypto/poly1305/poly1305.c
+++ b/deps/boringssl/src/crypto/poly1305/poly1305.c
@@ -46,10 +46,14 @@
   uint32_t s1, s2, s3, s4;
   uint32_t h0, h1, h2, h3, h4;
   uint8_t buf[16];
-  unsigned int buf_used;
+  size_t buf_used;
   uint8_t key[16];
 };
 
+OPENSSL_STATIC_ASSERT(
+    sizeof(struct poly1305_state_st) + 63 <= sizeof(poly1305_state),
+    "poly1305_state isn't large enough to hold aligned poly1305_state_st");
+
 static inline struct poly1305_state_st *poly1305_aligned_state(
     poly1305_state *state) {
   return (struct poly1305_state_st *)(((uintptr_t)state + 63) & ~63);
@@ -200,7 +204,6 @@
 
 void CRYPTO_poly1305_update(poly1305_state *statep, const uint8_t *in,
                             size_t in_len) {
-  unsigned int i;
   struct poly1305_state_st *state = poly1305_aligned_state(statep);
 
 #if defined(OPENSSL_POLY1305_NEON)
@@ -211,11 +214,11 @@
 #endif
 
   if (state->buf_used) {
-    unsigned todo = 16 - state->buf_used;
+    size_t todo = 16 - state->buf_used;
     if (todo > in_len) {
-      todo = (unsigned)in_len;
+      todo = in_len;
     }
-    for (i = 0; i < todo; i++) {
+    for (size_t i = 0; i < todo; i++) {
       state->buf[state->buf_used + i] = in[i];
     }
     state->buf_used += todo;
@@ -236,10 +239,10 @@
   }
 
   if (in_len) {
-    for (i = 0; i < in_len; i++) {
+    for (size_t i = 0; i < in_len; i++) {
       state->buf[i] = in[i];
     }
-    state->buf_used = (unsigned)in_len;
+    state->buf_used = in_len;
   }
 }
 
diff --git a/deps/boringssl/src/crypto/poly1305/poly1305_arm.c b/deps/boringssl/src/crypto/poly1305/poly1305_arm.c
index 004221d..d6f034c 100644
--- a/deps/boringssl/src/crypto/poly1305/poly1305_arm.c
+++ b/deps/boringssl/src/crypto/poly1305/poly1305_arm.c
@@ -36,7 +36,7 @@
                       const fe1305x2 *c);
 
 extern int blocks(fe1305x2 *h, const fe1305x2 *precomp, const uint8_t *in,
-                  unsigned int inlen);
+                  size_t inlen);
 
 static void freeze(fe1305x2 *r) {
   int i;
@@ -136,7 +136,7 @@
 }
 
 static void fe1305x2_frombytearray(fe1305x2 *r, const uint8_t *x, size_t xlen) {
-  unsigned i;
+  size_t i;
   uint8_t t[17];
 
   for (i = 0; (i < 16) && (i < xlen); i++) {
@@ -179,17 +179,20 @@
 struct poly1305_state_st {
   uint8_t data[sizeof(fe1305x2[5]) + 128];
   uint8_t buf[32];
-  unsigned int buf_used;
+  size_t buf_used;
   uint8_t key[16];
 };
 
+OPENSSL_STATIC_ASSERT(
+    sizeof(struct poly1305_state_st) + 63 <= sizeof(poly1305_state),
+    "poly1305_state isn't large enough to hold aligned poly1305_state_st.");
+
 void CRYPTO_poly1305_init_neon(poly1305_state *state, const uint8_t key[32]) {
   struct poly1305_state_st *st = (struct poly1305_state_st *)(state);
   fe1305x2 *const r = (fe1305x2 *)(st->data + (15 & (-(int)st->data)));
   fe1305x2 *const h = r + 1;
   fe1305x2 *const c = h + 1;
   fe1305x2 *const precomp = c + 1;
-  unsigned int j;
 
   r->v[1] = r->v[0] = 0x3ffffff & load32(key);
   r->v[3] = r->v[2] = 0x3ffff03 & (load32(key + 3) >> 2);
@@ -197,7 +200,7 @@
   r->v[7] = r->v[6] = 0x3f03fff & (load32(key + 9) >> 6);
   r->v[9] = r->v[8] = 0x00fffff & (load32(key + 12) >> 8);
 
-  for (j = 0; j < 10; j++) {
+  for (size_t j = 0; j < 10; j++) {
     h->v[j] = 0;  // XXX: should fast-forward a bit
   }
 
@@ -215,14 +218,13 @@
   fe1305x2 *const h = r + 1;
   fe1305x2 *const c = h + 1;
   fe1305x2 *const precomp = c + 1;
-  unsigned int i;
 
   if (st->buf_used) {
-    unsigned int todo = 32 - st->buf_used;
+    size_t todo = 32 - st->buf_used;
     if (todo > in_len) {
       todo = in_len;
     }
-    for (i = 0; i < todo; i++) {
+    for (size_t i = 0; i < todo; i++) {
       st->buf[st->buf_used + i] = in[i];
     }
     st->buf_used += todo;
@@ -232,7 +234,7 @@
     if (st->buf_used == sizeof(st->buf) && in_len) {
       addmulmod(h, h, precomp, &zero);
       fe1305x2_frombytearray(c, st->buf, sizeof(st->buf));
-      for (i = 0; i < 10; i++) {
+      for (size_t i = 0; i < 10; i++) {
         h->v[i] += c->v[i];
       }
       st->buf_used = 0;
@@ -240,7 +242,7 @@
   }
 
   while (in_len > 32) {
-    unsigned int tlen = 1048576;
+    size_t tlen = 1048576;
     if (in_len < tlen) {
       tlen = in_len;
     }
@@ -250,7 +252,7 @@
   }
 
   if (in_len) {
-    for (i = 0; i < in_len; i++) {
+    for (size_t i = 0; i < in_len; i++) {
       st->buf[i] = in[i];
     }
     st->buf_used = in_len;
diff --git a/deps/boringssl/src/crypto/poly1305/poly1305_vec.c b/deps/boringssl/src/crypto/poly1305/poly1305_vec.c
index 29cd5c3..83f1efe 100644
--- a/deps/boringssl/src/crypto/poly1305/poly1305_vec.c
+++ b/deps/boringssl/src/crypto/poly1305/poly1305_vec.c
@@ -92,6 +92,10 @@
 } poly1305_state_internal; /* 448 bytes total + 63 bytes for
                               alignment = 511 bytes raw */
 
+OPENSSL_STATIC_ASSERT(
+    sizeof(struct poly1305_state_internal_t) + 63 <= sizeof(poly1305_state),
+    "poly1305_state isn't large enough to hold aligned poly1305_state_internal_t");
+
 static inline poly1305_state_internal *poly1305_aligned_state(
     poly1305_state *state) {
   return (poly1305_state_internal *)(((uint64_t)state + 63) & ~63);
diff --git a/deps/boringssl/src/crypto/rand_extra/passive.c b/deps/boringssl/src/crypto/rand_extra/passive.c
new file mode 100644
index 0000000..a8c2487
--- /dev/null
+++ b/deps/boringssl/src/crypto/rand_extra/passive.c
@@ -0,0 +1,34 @@
+/* Copyright (c) 2020, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#include <openssl/base.h>
+#include "../fipsmodule/rand/internal.h"
+
+#if defined(BORINGSSL_FIPS_PASSIVE_ENTROPY)
+
+// RAND_need_entropy is called by the FIPS module when it has blocked because of
+// a lack of entropy. This signal is used as an indication to feed it more.
+void RAND_need_entropy(size_t bytes_needed) {
+  uint8_t buf[CTR_DRBG_ENTROPY_LEN * BORINGSSL_FIPS_OVERREAD];
+  size_t todo = sizeof(buf);
+  if (todo > bytes_needed) {
+    todo = bytes_needed;
+  }
+
+  int used_cpu;
+  CRYPTO_get_seed_entropy(buf, todo, &used_cpu);
+  RAND_load_entropy(buf, todo, used_cpu);
+}
+
+#endif  // BORINGSSL_FIPS_PASSIVE_ENTROPY
diff --git a/deps/boringssl/src/crypto/rand_extra/rand_extra.c b/deps/boringssl/src/crypto/rand_extra/rand_extra.c
index bed9e1e..596605a 100644
--- a/deps/boringssl/src/crypto/rand_extra/rand_extra.c
+++ b/deps/boringssl/src/crypto/rand_extra/rand_extra.c
@@ -63,6 +63,10 @@
   return (RAND_METHOD*) &kSSLeayMethod;
 }
 
+RAND_METHOD *RAND_OpenSSL(void) {
+  return RAND_SSLeay();
+}
+
 const RAND_METHOD *RAND_get_rand_method(void) { return RAND_SSLeay(); }
 
 void RAND_set_rand_method(const RAND_METHOD *method) {}
diff --git a/deps/boringssl/src/crypto/rsa_extra/rsa_test.cc b/deps/boringssl/src/crypto/rsa_extra/rsa_test.cc
index e9b68c9..883eb97 100644
--- a/deps/boringssl/src/crypto/rsa_extra/rsa_test.cc
+++ b/deps/boringssl/src/crypto/rsa_extra/rsa_test.cc
@@ -496,24 +496,26 @@
   bssl::UniquePtr<RSA> rsa(RSA_new());
   ASSERT_TRUE(rsa);
 
-  // RSA_generate_key_fips may only be used for 2048-bit and 3072-bit keys.
+  // RSA_generate_key_fips may only be used for 2048-, 3072-, and 4096-bit
+  // keys.
   EXPECT_FALSE(RSA_generate_key_fips(rsa.get(), 512, nullptr));
   EXPECT_FALSE(RSA_generate_key_fips(rsa.get(), 1024, nullptr));
   EXPECT_FALSE(RSA_generate_key_fips(rsa.get(), 2047, nullptr));
   EXPECT_FALSE(RSA_generate_key_fips(rsa.get(), 2049, nullptr));
   EXPECT_FALSE(RSA_generate_key_fips(rsa.get(), 3071, nullptr));
   EXPECT_FALSE(RSA_generate_key_fips(rsa.get(), 3073, nullptr));
-  EXPECT_FALSE(RSA_generate_key_fips(rsa.get(), 4096, nullptr));
+  EXPECT_FALSE(RSA_generate_key_fips(rsa.get(), 4095, nullptr));
+  EXPECT_FALSE(RSA_generate_key_fips(rsa.get(), 4097, nullptr));
   ERR_clear_error();
 
-  // Test that we can generate 2048-bit and 3072-bit RSA keys.
-  ASSERT_TRUE(RSA_generate_key_fips(rsa.get(), 2048, nullptr));
-  EXPECT_EQ(2048u, BN_num_bits(rsa->n));
+  // Test that we can generate keys of the supported lengths:
+  for (const size_t bits : {2048, 3072, 4096}) {
+    SCOPED_TRACE(bits);
 
-  rsa.reset(RSA_new());
-  ASSERT_TRUE(rsa);
-  ASSERT_TRUE(RSA_generate_key_fips(rsa.get(), 3072, nullptr));
-  EXPECT_EQ(3072u, BN_num_bits(rsa->n));
+    rsa.reset(RSA_new());
+    ASSERT_TRUE(RSA_generate_key_fips(rsa.get(), bits, nullptr));
+    EXPECT_EQ(bits, BN_num_bits(rsa->n));
+  }
 }
 
 TEST(RSATest, BadKey) {
@@ -1044,8 +1046,8 @@
   ASSERT_TRUE(BN_sqr(sqrt.get(), sqrt.get(), ctx.get()));
   EXPECT_LT(BN_cmp(pow2.get(), sqrt.get()), 0);
 
-  // Check the kBoringSSLRSASqrtTwo is sized for a 3072-bit RSA key.
-  EXPECT_EQ(3072u / 2u, bits);
+  // Check the kBoringSSLRSASqrtTwo is sized for a 4096-bit RSA key.
+  EXPECT_EQ(4096u / 2u, bits);
 }
 #endif  // !BORINGSSL_SHARED_LIBRARY
 
diff --git a/deps/boringssl/src/crypto/stack/stack.c b/deps/boringssl/src/crypto/stack/stack.c
index 599bd7b..6da6e3b 100644
--- a/deps/boringssl/src/crypto/stack/stack.c
+++ b/deps/boringssl/src/crypto/stack/stack.c
@@ -57,7 +57,6 @@
 #include <openssl/stack.h>
 
 #include <assert.h>
-#include <string.h>
 
 #include <openssl/mem.h>
 
@@ -69,11 +68,9 @@
 static const size_t kMinSize = 4;
 
 _STACK *sk_new(stack_cmp_func comp) {
-  _STACK *ret;
-
-  ret = OPENSSL_malloc(sizeof(_STACK));
+  _STACK *ret = OPENSSL_malloc(sizeof(_STACK));
   if (ret == NULL) {
-    goto err;
+    return NULL;
   }
   OPENSSL_memset(ret, 0, sizeof(_STACK));
 
@@ -331,23 +328,20 @@
 }
 
 _STACK *sk_dup(const _STACK *sk) {
-  _STACK *ret;
-  void **s;
-
   if (sk == NULL) {
     return NULL;
   }
 
-  ret = sk_new(sk->comp);
+  _STACK *ret = OPENSSL_malloc(sizeof(_STACK));
   if (ret == NULL) {
-    goto err;
+    return NULL;
   }
+  OPENSSL_memset(ret, 0, sizeof(_STACK));
 
-  s = (void **)OPENSSL_realloc(ret->data, sizeof(void *) * sk->num_alloc);
-  if (s == NULL) {
+  ret->data = OPENSSL_malloc(sizeof(void *) * sk->num_alloc);
+  if (ret->data == NULL) {
     goto err;
   }
-  ret->data = s;
 
   ret->num = sk->num;
   OPENSSL_memcpy(ret->data, sk->data, sizeof(void *) * sk->num);
diff --git a/deps/boringssl/src/crypto/test/file_test.cc b/deps/boringssl/src/crypto/test/file_test.cc
index c39c078..47a6f4c 100644
--- a/deps/boringssl/src/crypto/test/file_test.cc
+++ b/deps/boringssl/src/crypto/test/file_test.cc
@@ -179,7 +179,7 @@
       kv = std::string(kv.begin() + 1, kv.end() - 1);
 
       for (;;) {
-        size_t idx = kv.find(",");
+        size_t idx = kv.find(',');
         if (idx == std::string::npos) {
           idx = kv.size();
         }
@@ -205,11 +205,10 @@
 
       // Duplicate keys are rewritten to have “/2”, “/3”, … suffixes.
       std::string mapped_key = key;
-      for (unsigned i = 2; attributes_.count(mapped_key) != 0; i++) {
-        char suffix[32];
-        snprintf(suffix, sizeof(suffix), "/%u", i);
-        suffix[sizeof(suffix)-1] = 0;
-        mapped_key = key + suffix;
+      // If absent, the value will be zero-initialized.
+      const size_t num_occurrences = ++attribute_count_[key];
+      if (num_occurrences > 1) {
+        mapped_key += "/" + std::to_string(num_occurrences);
       }
 
       unused_attributes_.insert(mapped_key);
@@ -317,6 +316,7 @@
   start_line_ = 0;
   type_.clear();
   parameter_.clear();
+  attribute_count_.clear();
   attributes_.clear();
   unused_attributes_.clear();
   unused_instructions_.clear();
diff --git a/deps/boringssl/src/crypto/test/file_test.h b/deps/boringssl/src/crypto/test/file_test.h
index 87f306f..1502003 100644
--- a/deps/boringssl/src/crypto/test/file_test.h
+++ b/deps/boringssl/src/crypto/test/file_test.h
@@ -221,6 +221,9 @@
   std::string type_;
   // parameter_ is the value of the first attribute.
   std::string parameter_;
+  // attribute_count_ maps unsuffixed attribute names to the number of times
+  // they have occurred so far.
+  std::map<std::string, size_t> attribute_count_;
   // attributes_ contains all attributes in the test, including the first.
   std::map<std::string, std::string> attributes_;
   // instructions_ contains all instructions in scope for the test.
diff --git a/deps/boringssl/src/crypto/trust_token/internal.h b/deps/boringssl/src/crypto/trust_token/internal.h
index d65057f..0aa1936 100644
--- a/deps/boringssl/src/crypto/trust_token/internal.h
+++ b/deps/boringssl/src/crypto/trust_token/internal.h
@@ -30,16 +30,20 @@
 #endif
 
 
-// PMBTokens.
-//
-// PMBTokens is described in https://eprint.iacr.org/2020/072/20200324:214215
-// and provides anonymous tokens with private metadata. We implement the
-// construction with validity verification, described in appendix H,
-// construction 6.
+// For the following cryptographic schemes, we use P-384 instead of our usual
+// choice of P-256. See Appendix I of
+// https://eprint.iacr.org/2020/072/20200324:214215 which describes two attacks
+// which may affect smaller curves. In particular, p-1 for P-256 is smooth,
+// giving a low complexity for the p-1 attack. P-384's p-1 has a 281-bit prime
+// factor,
+// 3055465788140352002733946906144561090641249606160407884365391979704929268480326390471.
+// This lower-bounds the p-1 attack at O(2^140). The p+1 attack is lower-bounded
+// by O(p^(1/3)) or O(2^128), so we do not need to check the smoothness of p+1.
 
-// PMBTOKEN_NONCE_SIZE is the size of nonces used as part of the PMBToken
+
+// TRUST_TOKEN_NONCE_SIZE is the size of nonces used as part of the Trust_Token
 // protocol.
-#define PMBTOKEN_NONCE_SIZE 64
+#define TRUST_TOKEN_NONCE_SIZE 64
 
 typedef struct {
   // TODO(https://crbug.com/boringssl/334): These should store |EC_PRECOMP| so
@@ -47,7 +51,7 @@
   EC_AFFINE pub0;
   EC_AFFINE pub1;
   EC_AFFINE pubs;
-} PMBTOKEN_CLIENT_KEY;
+} TRUST_TOKEN_CLIENT_KEY;
 
 typedef struct {
   EC_SCALAR x0;
@@ -62,68 +66,47 @@
   EC_PRECOMP pub1_precomp;
   EC_AFFINE pubs;
   EC_PRECOMP pubs_precomp;
-} PMBTOKEN_ISSUER_KEY;
+} TRUST_TOKEN_ISSUER_KEY;
 
-// PMBTOKEN_PRETOKEN represents the intermediate state a client keeps during a
-// PMBToken issuance operation.
+// TRUST_TOKEN_PRETOKEN represents the intermediate state a client keeps during
+// a Trust_Token issuance operation.
 typedef struct pmb_pretoken_st {
-  uint8_t t[PMBTOKEN_NONCE_SIZE];
+  uint8_t t[TRUST_TOKEN_NONCE_SIZE];
   EC_SCALAR r;
   EC_AFFINE Tp;
-} PMBTOKEN_PRETOKEN;
+} TRUST_TOKEN_PRETOKEN;
 
-// PMBTOKEN_PRETOKEN_free releases the memory associated with |token|.
-OPENSSL_EXPORT void PMBTOKEN_PRETOKEN_free(PMBTOKEN_PRETOKEN *token);
+// TRUST_TOKEN_PRETOKEN_free releases the memory associated with |token|.
+OPENSSL_EXPORT void TRUST_TOKEN_PRETOKEN_free(TRUST_TOKEN_PRETOKEN *token);
 
-DEFINE_STACK_OF(PMBTOKEN_PRETOKEN)
+DEFINE_STACK_OF(TRUST_TOKEN_PRETOKEN)
 
-// The following functions implement the corresponding |TRUST_TOKENS_METHOD|
-// functions for |TRUST_TOKENS_experiment_v0|'s PMBTokens construction which
-// uses P-521.
-int pmbtoken_exp0_generate_key(CBB *out_private, CBB *out_public);
-int pmbtoken_exp0_client_key_from_bytes(PMBTOKEN_CLIENT_KEY *key,
-                                        const uint8_t *in, size_t len);
-int pmbtoken_exp0_issuer_key_from_bytes(PMBTOKEN_ISSUER_KEY *key,
-                                        const uint8_t *in, size_t len);
-STACK_OF(PMBTOKEN_PRETOKEN) * pmbtoken_exp0_blind(CBB *cbb, size_t count);
-int pmbtoken_exp0_sign(const PMBTOKEN_ISSUER_KEY *key, CBB *cbb, CBS *cbs,
-                       size_t num_requested, size_t num_to_issue,
-                       uint8_t private_metadata);
-STACK_OF(TRUST_TOKEN) *
-    pmbtoken_exp0_unblind(const PMBTOKEN_CLIENT_KEY *key,
-                          const STACK_OF(PMBTOKEN_PRETOKEN) * pretokens,
-                          CBS *cbs, size_t count, uint32_t key_id);
-int pmbtoken_exp0_read(const PMBTOKEN_ISSUER_KEY *key,
-                       uint8_t out_nonce[PMBTOKEN_NONCE_SIZE],
-                       uint8_t *out_private_metadata, const uint8_t *token,
-                       size_t token_len);
+
+// PMBTokens.
+//
+// PMBTokens is described in https://eprint.iacr.org/2020/072/20200324:214215
+// and provides anonymous tokens with private metadata. We implement the
+// construction with validity verification, described in appendix H,
+// construction 6.
 
 // The following functions implement the corresponding |TRUST_TOKENS_METHOD|
 // functions for |TRUST_TOKENS_experiment_v1|'s PMBTokens construction which
 // uses P-384.
-//
-// We use P-384 instead of our usual choice of P-256. See Appendix I which
-// describes two attacks which may affect smaller curves. In particular, p-1 for
-// P-256 is smooth, giving a low complexity for the p-1 attack. P-384's p-1 has
-// a 281-bit prime factor,
-// 3055465788140352002733946906144561090641249606160407884365391979704929268480326390471.
-// This lower-bounds the p-1 attack at O(2^140). The p+1 attack is lower-bounded
-// by O(p^(1/3)) or O(2^128), so we do not need to check the smoothness of p+1.
 int pmbtoken_exp1_generate_key(CBB *out_private, CBB *out_public);
-int pmbtoken_exp1_client_key_from_bytes(PMBTOKEN_CLIENT_KEY *key,
+int pmbtoken_exp1_client_key_from_bytes(TRUST_TOKEN_CLIENT_KEY *key,
                                         const uint8_t *in, size_t len);
-int pmbtoken_exp1_issuer_key_from_bytes(PMBTOKEN_ISSUER_KEY *key,
+int pmbtoken_exp1_issuer_key_from_bytes(TRUST_TOKEN_ISSUER_KEY *key,
                                         const uint8_t *in, size_t len);
-STACK_OF(PMBTOKEN_PRETOKEN) * pmbtoken_exp1_blind(CBB *cbb, size_t count);
-int pmbtoken_exp1_sign(const PMBTOKEN_ISSUER_KEY *key, CBB *cbb, CBS *cbs,
+STACK_OF(TRUST_TOKEN_PRETOKEN) * pmbtoken_exp1_blind(CBB *cbb, size_t count);
+int pmbtoken_exp1_sign(const TRUST_TOKEN_ISSUER_KEY *key, CBB *cbb, CBS *cbs,
                        size_t num_requested, size_t num_to_issue,
                        uint8_t private_metadata);
 STACK_OF(TRUST_TOKEN) *
-    pmbtoken_exp1_unblind(const PMBTOKEN_CLIENT_KEY *key,
-                          const STACK_OF(PMBTOKEN_PRETOKEN) * pretokens,
+    pmbtoken_exp1_unblind(const TRUST_TOKEN_CLIENT_KEY *key,
+                          const STACK_OF(TRUST_TOKEN_PRETOKEN) * pretokens,
                           CBS *cbs, size_t count, uint32_t key_id);
-int pmbtoken_exp1_read(const PMBTOKEN_ISSUER_KEY *key,
-                       uint8_t out_nonce[PMBTOKEN_NONCE_SIZE],
+int pmbtoken_exp1_read(const TRUST_TOKEN_ISSUER_KEY *key,
+                       uint8_t out_nonce[TRUST_TOKEN_NONCE_SIZE],
                        uint8_t *out_private_metadata, const uint8_t *token,
                        size_t token_len);
 
@@ -131,6 +114,62 @@
 // function is used to confirm H was computed as expected.
 OPENSSL_EXPORT int pmbtoken_exp1_get_h_for_testing(uint8_t out[97]);
 
+// The following functions implement the corresponding |TRUST_TOKENS_METHOD|
+// functions for |TRUST_TOKENS_experiment_v2|'s PMBTokens construction which
+// uses P-384.
+int pmbtoken_exp2_generate_key(CBB *out_private, CBB *out_public);
+int pmbtoken_exp2_client_key_from_bytes(TRUST_TOKEN_CLIENT_KEY *key,
+                                        const uint8_t *in, size_t len);
+int pmbtoken_exp2_issuer_key_from_bytes(TRUST_TOKEN_ISSUER_KEY *key,
+                                        const uint8_t *in, size_t len);
+STACK_OF(TRUST_TOKEN_PRETOKEN) * pmbtoken_exp2_blind(CBB *cbb, size_t count);
+int pmbtoken_exp2_sign(const TRUST_TOKEN_ISSUER_KEY *key, CBB *cbb, CBS *cbs,
+                       size_t num_requested, size_t num_to_issue,
+                       uint8_t private_metadata);
+STACK_OF(TRUST_TOKEN) *
+    pmbtoken_exp2_unblind(const TRUST_TOKEN_CLIENT_KEY *key,
+                          const STACK_OF(TRUST_TOKEN_PRETOKEN) * pretokens,
+                          CBS *cbs, size_t count, uint32_t key_id);
+int pmbtoken_exp2_read(const TRUST_TOKEN_ISSUER_KEY *key,
+                       uint8_t out_nonce[TRUST_TOKEN_NONCE_SIZE],
+                       uint8_t *out_private_metadata, const uint8_t *token,
+                       size_t token_len);
+
+// pmbtoken_exp2_get_h_for_testing returns H in uncompressed coordinates. This
+// function is used to confirm H was computed as expected.
+OPENSSL_EXPORT int pmbtoken_exp2_get_h_for_testing(uint8_t out[97]);
+
+
+// VOPRF.
+//
+// VOPRFs are described in https://tools.ietf.org/html/draft-irtf-cfrg-voprf-04
+// and provide anonymous tokens. This implementation uses TrustToken DSTs and
+// the DLEQ batching primitive from
+// https://eprint.iacr.org/2020/072/20200324:214215.
+// VOPRF only uses the |pub|' field of the TRUST_TOKEN_CLIENT_KEY and
+// |xs|/|pubs| fields of the TRUST_TOKEN_ISSUER_KEY.
+
+// The following functions implement the corresponding |TRUST_TOKENS_METHOD|
+// functions for |TRUST_TOKENS_experiment_v2|'s VOPRF construction which uses
+// P-384.
+int voprf_exp2_generate_key(CBB *out_private, CBB *out_public);
+int voprf_exp2_client_key_from_bytes(TRUST_TOKEN_CLIENT_KEY *key,
+                                     const uint8_t *in, size_t len);
+int voprf_exp2_issuer_key_from_bytes(TRUST_TOKEN_ISSUER_KEY *key,
+                                     const uint8_t *in, size_t len);
+STACK_OF(TRUST_TOKEN_PRETOKEN) * voprf_exp2_blind(CBB *cbb, size_t count);
+int voprf_exp2_sign(const TRUST_TOKEN_ISSUER_KEY *key, CBB *cbb, CBS *cbs,
+                    size_t num_requested, size_t num_to_issue,
+                    uint8_t private_metadata);
+STACK_OF(TRUST_TOKEN) *
+    voprf_exp2_unblind(const TRUST_TOKEN_CLIENT_KEY *key,
+                       const STACK_OF(TRUST_TOKEN_PRETOKEN) * pretokens,
+                       CBS *cbs, size_t count, uint32_t key_id);
+int voprf_exp2_read(const TRUST_TOKEN_ISSUER_KEY *key,
+                    uint8_t out_nonce[TRUST_TOKEN_NONCE_SIZE],
+                    uint8_t *out_private_metadata, const uint8_t *token,
+                    size_t token_len);
+
 
 // Trust Tokens internals.
 
@@ -143,23 +182,23 @@
   // client_key_from_bytes decodes a client key from |in| and sets |key|
   // to the resulting key. It returns one on success and zero
   // on failure.
-  int (*client_key_from_bytes)(PMBTOKEN_CLIENT_KEY *key, const uint8_t *in,
+  int (*client_key_from_bytes)(TRUST_TOKEN_CLIENT_KEY *key, const uint8_t *in,
                                size_t len);
 
   // issuer_key_from_bytes decodes a issuer key from |in| and sets |key|
   // to the resulting key. It returns one on success and zero
   // on failure.
-  int (*issuer_key_from_bytes)(PMBTOKEN_ISSUER_KEY *key, const uint8_t *in,
+  int (*issuer_key_from_bytes)(TRUST_TOKEN_ISSUER_KEY *key, const uint8_t *in,
                                size_t len);
 
   // blind generates a new issuance request for |count| tokens. On
-  // success, it returns a newly-allocated |STACK_OF(PMBTOKEN_PRETOKEN)| and
+  // success, it returns a newly-allocated |STACK_OF(TRUST_TOKEN_PRETOKEN)| and
   // writes a request to the issuer to |cbb|. On failure, it returns NULL. The
-  // |STACK_OF(PMBTOKEN_PRETOKEN)|s should be passed to |pmbtoken_unblind| when
+  // |STACK_OF(TRUST_TOKEN_PRETOKEN)|s should be passed to |pmbtoken_unblind| when
   // the server responds.
   //
   // This function implements the AT.Usr0 operation.
-  STACK_OF(PMBTOKEN_PRETOKEN) *(*blind)(CBB *cbb, size_t count);
+  STACK_OF(TRUST_TOKEN_PRETOKEN) * (*blind)(CBB *cbb, size_t count);
 
   // sign parses a request for |num_requested| tokens from |cbs| and
   // issues |num_to_issue| tokens with |key| and a private metadata value of
@@ -167,7 +206,7 @@
   // success and zero on failure.
   //
   // This function implements the AT.Sig operation.
-  int (*sign)(const PMBTOKEN_ISSUER_KEY *key, CBB *cbb, CBS *cbs,
+  int (*sign)(const TRUST_TOKEN_ISSUER_KEY *key, CBB *cbb, CBS *cbs,
               size_t num_requested, size_t num_to_issue,
               uint8_t private_metadata);
 
@@ -180,8 +219,8 @@
   //
   // This function implements the AT.Usr1 operation.
   STACK_OF(TRUST_TOKEN) *
-      (*unblind)(const PMBTOKEN_CLIENT_KEY *key,
-                 const STACK_OF(PMBTOKEN_PRETOKEN) * pretokens, CBS *cbs,
+      (*unblind)(const TRUST_TOKEN_CLIENT_KEY *key,
+                 const STACK_OF(TRUST_TOKEN_PRETOKEN) * pretokens, CBS *cbs,
                  size_t count, uint32_t key_id);
 
   // read parses a PMBToken from |token| and verifies it using |key|. On
@@ -189,31 +228,32 @@
   // |out_nonce| and |*out_private_metadata|. Otherwise, it returns zero. Note
   // that, unlike the output of |unblind|, |token| does not have a
   // four-byte key ID prepended.
-  int (*read)(const PMBTOKEN_ISSUER_KEY *key,
-              uint8_t out_nonce[PMBTOKEN_NONCE_SIZE],
+  int (*read)(const TRUST_TOKEN_ISSUER_KEY *key,
+              uint8_t out_nonce[TRUST_TOKEN_NONCE_SIZE],
               uint8_t *out_private_metadata, const uint8_t *token,
               size_t token_len);
 
-  // use_token_hash determines whether to include the token hash in the SRR and
-  // private metadata encryption.
-  int use_token_hash : 1;
+  // whether the construction supports private metadata.
+  int has_private_metadata;
 
-  // batched_proof determines whether PMBToken uses a batched DLEQOR proof when
-  // signing tokens.
-  int batched_proof : 1;
+  // max keys that can be configured.
+  size_t max_keys;
+
+  // whether the SRR is part of the protocol.
+  int has_srr;
 };
 
 // Structure representing a single Trust Token public key with the specified ID.
 struct trust_token_client_key_st {
   uint32_t id;
-  PMBTOKEN_CLIENT_KEY key;
+  TRUST_TOKEN_CLIENT_KEY key;
 };
 
 // Structure representing a single Trust Token private key with the specified
 // ID.
 struct trust_token_issuer_key_st {
   uint32_t id;
-  PMBTOKEN_ISSUER_KEY key;
+  TRUST_TOKEN_ISSUER_KEY key;
 };
 
 struct trust_token_client_st {
@@ -224,13 +264,13 @@
 
   // keys is the set of public keys that are supported by the client for
   // issuance/redemptions.
-  struct trust_token_client_key_st keys[3];
+  struct trust_token_client_key_st keys[6];
 
   // num_keys is the number of keys currently configured.
   size_t num_keys;
 
   // pretokens is the intermediate state during an active issuance.
-  STACK_OF(PMBTOKEN_PRETOKEN)* pretokens;
+  STACK_OF(TRUST_TOKEN_PRETOKEN)* pretokens;
 
   // srr_key is the public key used to verify the signature of the SRR.
   EVP_PKEY *srr_key;
@@ -246,7 +286,7 @@
   // keys is the set of private keys that are supported by the issuer for
   // issuance/redemptions. The public metadata is an index into this list of
   // keys.
-  struct trust_token_issuer_key_st keys[3];
+  struct trust_token_issuer_key_st keys[6];
 
   // num_keys is the number of keys currently configured.
   size_t num_keys;
@@ -268,7 +308,7 @@
 
 BSSL_NAMESPACE_BEGIN
 
-BORINGSSL_MAKE_DELETER(PMBTOKEN_PRETOKEN, PMBTOKEN_PRETOKEN_free)
+BORINGSSL_MAKE_DELETER(TRUST_TOKEN_PRETOKEN, TRUST_TOKEN_PRETOKEN_free)
 
 BSSL_NAMESPACE_END
 
diff --git a/deps/boringssl/src/crypto/trust_token/pmbtoken.c b/deps/boringssl/src/crypto/trust_token/pmbtoken.c
index 291cb86..a6549b9 100644
--- a/deps/boringssl/src/crypto/trust_token/pmbtoken.c
+++ b/deps/boringssl/src/crypto/trust_token/pmbtoken.c
@@ -31,10 +31,10 @@
 
 
 typedef int (*hash_t_func_t)(const EC_GROUP *group, EC_RAW_POINT *out,
-                             const uint8_t t[PMBTOKEN_NONCE_SIZE]);
+                             const uint8_t t[TRUST_TOKEN_NONCE_SIZE]);
 typedef int (*hash_s_func_t)(const EC_GROUP *group, EC_RAW_POINT *out,
                              const EC_AFFINE *t,
-                             const uint8_t s[PMBTOKEN_NONCE_SIZE]);
+                             const uint8_t s[TRUST_TOKEN_NONCE_SIZE]);
 typedef int (*hash_c_func_t)(const EC_GROUP *group, EC_SCALAR *out,
                              uint8_t *buf, size_t len);
 
@@ -52,9 +52,7 @@
   // hash_c implements the H_c operation in PMBTokens. It returns one on success
   // and zero on error.
   hash_c_func_t hash_c;
-  // batched_proof determines whether PMBToken uses a batched DLEQOR proof when
-  // signing tokens.
-  int batched_proof : 1;
+  int prefix_point : 1;
 } PMBTOKEN_METHOD;
 
 static const uint8_t kDefaultAdditionalData[32] = {0};
@@ -62,7 +60,7 @@
 static int pmbtoken_init_method(PMBTOKEN_METHOD *method, int curve_nid,
                                 const uint8_t *h_bytes, size_t h_len,
                                 hash_t_func_t hash_t, hash_s_func_t hash_s,
-                                hash_c_func_t hash_c, int batched_proof) {
+                                hash_c_func_t hash_c, int prefix_point) {
   method->group = EC_GROUP_new_by_curve_name(curve_nid);
   if (method->group == NULL) {
     return 0;
@@ -71,7 +69,7 @@
   method->hash_t = hash_t;
   method->hash_s = hash_s;
   method->hash_c = hash_c;
-  method->batched_proof = batched_proof;
+  method->prefix_point = prefix_point;
 
   EC_AFFINE h;
   if (!ec_point_from_uncompressed(method->group, &h, h_bytes, h_len)) {
@@ -117,11 +115,40 @@
                            len) == len;
 }
 
+static int cbb_add_prefixed_point(CBB *out, const EC_GROUP *group,
+                                  const EC_AFFINE *point, int prefix_point) {
+  if (prefix_point) {
+    CBB child;
+    if (!CBB_add_u16_length_prefixed(out, &child) ||
+        !point_to_cbb(&child, group, point) ||
+        !CBB_flush(out)) {
+      return 0;
+    }
+  } else {
+    if (!point_to_cbb(out, group, point) ||
+        !CBB_flush(out)) {
+      return 0;
+    }
+  }
+
+  return 1;
+}
+
 static int cbs_get_prefixed_point(CBS *cbs, const EC_GROUP *group,
-                                  EC_AFFINE *out) {
+                                  EC_AFFINE *out, int prefix_point) {
   CBS child;
-  if (!CBS_get_u16_length_prefixed(cbs, &child) ||
-      !ec_point_from_uncompressed(group, out, CBS_data(&child),
+  if (prefix_point) {
+    if (!CBS_get_u16_length_prefixed(cbs, &child)) {
+      return 0;
+    }
+  } else {
+    size_t plen = 1 + 2 * BN_num_bytes(&group->field);
+    if (!CBS_get_bytes(cbs, &child, plen)) {
+      return 0;
+    }
+  }
+
+  if (!ec_point_from_uncompressed(group, out, CBS_data(&child),
                                   CBS_len(&child))) {
     return 0;
   }
@@ -138,10 +165,6 @@
                                           scalars, 3);
 }
 
-void PMBTOKEN_PRETOKEN_free(PMBTOKEN_PRETOKEN *pretoken) {
-  OPENSSL_free(pretoken);
-}
-
 static int pmbtoken_generate_key(const PMBTOKEN_METHOD *method,
                                  CBB *out_private, CBB *out_public) {
   const EC_GROUP *group = method->group;
@@ -170,16 +193,12 @@
     return 0;
   }
 
-  // TODO(https://crbug.com/boringssl/331): When updating the key format, remove
-  // the redundant length prefixes.
-  CBB child;
-  if (!CBB_add_u16_length_prefixed(out_public, &child) ||
-      !point_to_cbb(&child, group, &pub_affine[0]) ||
-      !CBB_add_u16_length_prefixed(out_public, &child) ||
-      !point_to_cbb(&child, group, &pub_affine[1]) ||
-      !CBB_add_u16_length_prefixed(out_public, &child) ||
-      !point_to_cbb(&child, group, &pub_affine[2]) ||
-      !CBB_flush(out_public)) {
+  if (!cbb_add_prefixed_point(out_public, group, &pub_affine[0],
+                              method->prefix_point) ||
+      !cbb_add_prefixed_point(out_public, group, &pub_affine[1],
+                              method->prefix_point) ||
+      !cbb_add_prefixed_point(out_public, group, &pub_affine[2],
+                              method->prefix_point)) {
     OPENSSL_PUT_ERROR(TRUST_TOKEN, TRUST_TOKEN_R_BUFFER_TOO_SMALL);
     return 0;
   }
@@ -188,15 +207,16 @@
 }
 
 static int pmbtoken_client_key_from_bytes(const PMBTOKEN_METHOD *method,
-                                          PMBTOKEN_CLIENT_KEY *key,
+                                          TRUST_TOKEN_CLIENT_KEY *key,
                                           const uint8_t *in, size_t len) {
-  // TODO(https://crbug.com/boringssl/331): When updating the key format, remove
-  // the redundant length prefixes.
   CBS cbs;
   CBS_init(&cbs, in, len);
-  if (!cbs_get_prefixed_point(&cbs, method->group, &key->pub0) ||
-      !cbs_get_prefixed_point(&cbs, method->group, &key->pub1) ||
-      !cbs_get_prefixed_point(&cbs, method->group, &key->pubs) ||
+  if (!cbs_get_prefixed_point(&cbs, method->group, &key->pub0,
+                              method->prefix_point) ||
+      !cbs_get_prefixed_point(&cbs, method->group, &key->pub1,
+                              method->prefix_point) ||
+      !cbs_get_prefixed_point(&cbs, method->group, &key->pubs,
+                              method->prefix_point) ||
       CBS_len(&cbs) != 0) {
     OPENSSL_PUT_ERROR(TRUST_TOKEN, TRUST_TOKEN_R_DECODE_FAILURE);
     return 0;
@@ -206,7 +226,7 @@
 }
 
 static int pmbtoken_issuer_key_from_bytes(const PMBTOKEN_METHOD *method,
-                                          PMBTOKEN_ISSUER_KEY *key,
+                                          TRUST_TOKEN_ISSUER_KEY *key,
                                           const uint8_t *in, size_t len) {
   const EC_GROUP *group = method->group;
   CBS cbs, tmp;
@@ -245,10 +265,10 @@
   return 1;
 }
 
-static STACK_OF(PMBTOKEN_PRETOKEN) *
+static STACK_OF(TRUST_TOKEN_PRETOKEN) *
     pmbtoken_blind(const PMBTOKEN_METHOD *method, CBB *cbb, size_t count) {
   const EC_GROUP *group = method->group;
-  STACK_OF(PMBTOKEN_PRETOKEN) *pretokens = sk_PMBTOKEN_PRETOKEN_new_null();
+  STACK_OF(TRUST_TOKEN_PRETOKEN) *pretokens = sk_TRUST_TOKEN_PRETOKEN_new_null();
   if (pretokens == NULL) {
     OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_MALLOC_FAILURE);
     goto err;
@@ -256,11 +276,11 @@
 
   for (size_t i = 0; i < count; i++) {
     // Insert |pretoken| into |pretokens| early to simplify error-handling.
-    PMBTOKEN_PRETOKEN *pretoken = OPENSSL_malloc(sizeof(PMBTOKEN_PRETOKEN));
+    TRUST_TOKEN_PRETOKEN *pretoken = OPENSSL_malloc(sizeof(TRUST_TOKEN_PRETOKEN));
     if (pretoken == NULL ||
-        !sk_PMBTOKEN_PRETOKEN_push(pretokens, pretoken)) {
+        !sk_TRUST_TOKEN_PRETOKEN_push(pretokens, pretoken)) {
       OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_MALLOC_FAILURE);
-      PMBTOKEN_PRETOKEN_free(pretoken);
+      TRUST_TOKEN_PRETOKEN_free(pretoken);
       goto err;
     }
 
@@ -286,12 +306,8 @@
       goto err;
     }
 
-    // TODO(https://crbug.com/boringssl/331): When updating the key format,
-    // remove the redundant length prefixes.
-    CBB child;
-    if (!CBB_add_u16_length_prefixed(cbb, &child) ||
-        !point_to_cbb(&child, group, &pretoken->Tp) ||
-        !CBB_flush(cbb)) {
+    if (!cbb_add_prefixed_point(cbb, group, &pretoken->Tp,
+                                method->prefix_point)) {
       goto err;
     }
   }
@@ -299,7 +315,7 @@
   return pretokens;
 
 err:
-  sk_PMBTOKEN_PRETOKEN_pop_free(pretokens, PMBTOKEN_PRETOKEN_free);
+  sk_TRUST_TOKEN_PRETOKEN_pop_free(pretokens, TRUST_TOKEN_PRETOKEN_free);
   return NULL;
 }
 
@@ -435,9 +451,10 @@
 // DLEQOR2 with only one value (n=1).
 
 static int dleq_generate(const PMBTOKEN_METHOD *method, CBB *cbb,
-                         const PMBTOKEN_ISSUER_KEY *priv, const EC_RAW_POINT *T,
-                         const EC_RAW_POINT *S, const EC_RAW_POINT *W,
-                         const EC_RAW_POINT *Ws, uint8_t private_metadata) {
+                         const TRUST_TOKEN_ISSUER_KEY *priv,
+                         const EC_RAW_POINT *T, const EC_RAW_POINT *S,
+                         const EC_RAW_POINT *W, const EC_RAW_POINT *Ws,
+                         uint8_t private_metadata) {
   const EC_GROUP *group = method->group;
 
   // We generate a DLEQ proof for the validity token and a DLEQOR2 proof for the
@@ -596,7 +613,7 @@
 }
 
 static int dleq_verify(const PMBTOKEN_METHOD *method, CBS *cbs,
-                       const PMBTOKEN_CLIENT_KEY *pub, const EC_RAW_POINT *T,
+                       const TRUST_TOKEN_CLIENT_KEY *pub, const EC_RAW_POINT *T,
                        const EC_RAW_POINT *S, const EC_RAW_POINT *W,
                        const EC_RAW_POINT *Ws) {
   const EC_GROUP *group = method->group;
@@ -715,7 +732,7 @@
 }
 
 static int pmbtoken_sign(const PMBTOKEN_METHOD *method,
-                         const PMBTOKEN_ISSUER_KEY *key, CBB *cbb, CBS *cbs,
+                         const TRUST_TOKEN_ISSUER_KEY *key, CBB *cbb, CBS *cbs,
                          size_t num_requested, size_t num_to_issue,
                          uint8_t private_metadata) {
   const EC_GROUP *group = method->group;
@@ -724,43 +741,37 @@
     return 0;
   }
 
+  if (num_to_issue > ((size_t)-1) / sizeof(EC_RAW_POINT) ||
+      num_to_issue > ((size_t)-1) / sizeof(EC_SCALAR)) {
+    OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_OVERFLOW);
+    return 0;
+  }
+
   int ret = 0;
-  EC_RAW_POINT *Tps = NULL;
-  EC_RAW_POINT *Sps = NULL;
-  EC_RAW_POINT *Wps = NULL;
-  EC_RAW_POINT *Wsps = NULL;
-  EC_SCALAR *es = NULL;
+  EC_RAW_POINT *Tps = OPENSSL_malloc(num_to_issue * sizeof(EC_RAW_POINT));
+  EC_RAW_POINT *Sps = OPENSSL_malloc(num_to_issue * sizeof(EC_RAW_POINT));
+  EC_RAW_POINT *Wps = OPENSSL_malloc(num_to_issue * sizeof(EC_RAW_POINT));
+  EC_RAW_POINT *Wsps = OPENSSL_malloc(num_to_issue * sizeof(EC_RAW_POINT));
+  EC_SCALAR *es = OPENSSL_malloc(num_to_issue * sizeof(EC_SCALAR));
   CBB batch_cbb;
   CBB_zero(&batch_cbb);
-  if (method->batched_proof) {
-    if (num_to_issue > ((size_t)-1) / sizeof(EC_RAW_POINT) ||
-        num_to_issue > ((size_t)-1) / sizeof(EC_SCALAR)) {
-      OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_OVERFLOW);
-      goto err;
-    }
-    Tps = OPENSSL_malloc(num_to_issue * sizeof(EC_RAW_POINT));
-    Sps = OPENSSL_malloc(num_to_issue * sizeof(EC_RAW_POINT));
-    Wps = OPENSSL_malloc(num_to_issue * sizeof(EC_RAW_POINT));
-    Wsps = OPENSSL_malloc(num_to_issue * sizeof(EC_RAW_POINT));
-    es = OPENSSL_malloc(num_to_issue * sizeof(EC_SCALAR));
-    if (!Tps ||
-        !Sps ||
-        !Wps ||
-        !Wsps ||
-        !es ||
-        !CBB_init(&batch_cbb, 0) ||
-        !point_to_cbb(&batch_cbb, method->group, &key->pubs) ||
-        !point_to_cbb(&batch_cbb, method->group, &key->pub0) ||
-        !point_to_cbb(&batch_cbb, method->group, &key->pub1)) {
-      OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_MALLOC_FAILURE);
-      goto err;
-    }
+  if (!Tps ||
+      !Sps ||
+      !Wps ||
+      !Wsps ||
+      !es ||
+      !CBB_init(&batch_cbb, 0) ||
+      !point_to_cbb(&batch_cbb, method->group, &key->pubs) ||
+      !point_to_cbb(&batch_cbb, method->group, &key->pub0) ||
+      !point_to_cbb(&batch_cbb, method->group, &key->pub1)) {
+    OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_MALLOC_FAILURE);
+    goto err;
   }
 
   for (size_t i = 0; i < num_to_issue; i++) {
     EC_AFFINE Tp_affine;
     EC_RAW_POINT Tp;
-    if (!cbs_get_prefixed_point(cbs, group, &Tp_affine)) {
+    if (!cbs_get_prefixed_point(cbs, group, &Tp_affine, method->prefix_point)) {
       OPENSSL_PUT_ERROR(TRUST_TOKEN, TRUST_TOKEN_R_DECODE_FAILURE);
       goto err;
     }
@@ -771,47 +782,36 @@
     ec_scalar_select(group, &xb, mask, &key->x1, &key->x0);
     ec_scalar_select(group, &yb, mask, &key->y1, &key->y0);
 
-    uint8_t s[PMBTOKEN_NONCE_SIZE];
-    RAND_bytes(s, PMBTOKEN_NONCE_SIZE);
+    uint8_t s[TRUST_TOKEN_NONCE_SIZE];
+    RAND_bytes(s, TRUST_TOKEN_NONCE_SIZE);
     // The |jacobians| and |affines| contain Sp, Wp, and Wsp.
     EC_RAW_POINT jacobians[3];
     EC_AFFINE affines[3];
-    CBB child;
     if (!method->hash_s(group, &jacobians[0], &Tp_affine, s) ||
         !ec_point_mul_scalar_batch(group, &jacobians[1], &Tp, &xb,
                                    &jacobians[0], &yb, NULL, NULL) ||
         !ec_point_mul_scalar_batch(group, &jacobians[2], &Tp, &key->xs,
                                    &jacobians[0], &key->ys, NULL, NULL) ||
         !ec_jacobian_to_affine_batch(group, affines, jacobians, 3) ||
-        !CBB_add_bytes(cbb, s, PMBTOKEN_NONCE_SIZE) ||
-        // TODO(https://crbug.com/boringssl/331): When updating the key format,
-        // remove the redundant length prefixes.
-        !CBB_add_u16_length_prefixed(cbb, &child) ||
-        !point_to_cbb(&child, group, &affines[1]) ||
-        !CBB_add_u16_length_prefixed(cbb, &child) ||
-        !point_to_cbb(&child, group, &affines[2])) {
+        !CBB_add_bytes(cbb, s, TRUST_TOKEN_NONCE_SIZE) ||
+        !cbb_add_prefixed_point(cbb, group, &affines[1],
+                                method->prefix_point) ||
+        !cbb_add_prefixed_point(cbb, group, &affines[2],
+                                method->prefix_point)) {
       goto err;
     }
 
-    if (!method->batched_proof) {
-      if (!CBB_add_u16_length_prefixed(cbb, &child) ||
-          !dleq_generate(method, &child, key, &Tp, &jacobians[0], &jacobians[1],
-                         &jacobians[2], private_metadata)) {
-        goto err;
-      }
-    } else {
-      if (!point_to_cbb(&batch_cbb, group, &Tp_affine) ||
-          !point_to_cbb(&batch_cbb, group, &affines[0]) ||
-          !point_to_cbb(&batch_cbb, group, &affines[1]) ||
-          !point_to_cbb(&batch_cbb, group, &affines[2])) {
-        OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_MALLOC_FAILURE);
-        goto err;
-      }
-      Tps[i] = Tp;
-      Sps[i] = jacobians[0];
-      Wps[i] = jacobians[1];
-      Wsps[i] = jacobians[2];
+    if (!point_to_cbb(&batch_cbb, group, &Tp_affine) ||
+        !point_to_cbb(&batch_cbb, group, &affines[0]) ||
+        !point_to_cbb(&batch_cbb, group, &affines[1]) ||
+        !point_to_cbb(&batch_cbb, group, &affines[2])) {
+      OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_MALLOC_FAILURE);
+      goto err;
     }
+    Tps[i] = Tp;
+    Sps[i] = jacobians[0];
+    Wps[i] = jacobians[1];
+    Wsps[i] = jacobians[2];
 
     if (!CBB_flush(cbb)) {
       goto err;
@@ -821,41 +821,43 @@
   // The DLEQ batching construction is described in appendix B of
   // https://eprint.iacr.org/2020/072/20200324:214215. Note the additional
   // computations all act on public inputs.
-  if (method->batched_proof) {
-    for (size_t i = 0; i < num_to_issue; i++) {
-      if (!hash_c_batch(method, &es[i], &batch_cbb, i)) {
-        goto err;
-      }
-    }
-
-    EC_RAW_POINT Tp_batch, Sp_batch, Wp_batch, Wsp_batch;
-    if (!ec_point_mul_scalar_public_batch(group, &Tp_batch,
-                                          /*g_scalar=*/NULL, Tps, es,
-                                          num_to_issue) ||
-        !ec_point_mul_scalar_public_batch(group, &Sp_batch,
-                                          /*g_scalar=*/NULL, Sps, es,
-                                          num_to_issue) ||
-        !ec_point_mul_scalar_public_batch(group, &Wp_batch,
-                                          /*g_scalar=*/NULL, Wps, es,
-                                          num_to_issue) ||
-        !ec_point_mul_scalar_public_batch(group, &Wsp_batch,
-                                          /*g_scalar=*/NULL, Wsps, es,
-                                          num_to_issue)) {
-      goto err;
-    }
-
-    CBB proof;
-    if (!CBB_add_u16_length_prefixed(cbb, &proof) ||
-        !dleq_generate(method, &proof, key, &Tp_batch, &Sp_batch, &Wp_batch,
-                       &Wsp_batch, private_metadata) ||
-        !CBB_flush(cbb)) {
+  for (size_t i = 0; i < num_to_issue; i++) {
+    if (!hash_c_batch(method, &es[i], &batch_cbb, i)) {
       goto err;
     }
   }
 
+  EC_RAW_POINT Tp_batch, Sp_batch, Wp_batch, Wsp_batch;
+  if (!ec_point_mul_scalar_public_batch(group, &Tp_batch,
+                                        /*g_scalar=*/NULL, Tps, es,
+                                        num_to_issue) ||
+      !ec_point_mul_scalar_public_batch(group, &Sp_batch,
+                                        /*g_scalar=*/NULL, Sps, es,
+                                        num_to_issue) ||
+      !ec_point_mul_scalar_public_batch(group, &Wp_batch,
+                                        /*g_scalar=*/NULL, Wps, es,
+                                        num_to_issue) ||
+      !ec_point_mul_scalar_public_batch(group, &Wsp_batch,
+                                        /*g_scalar=*/NULL, Wsps, es,
+                                        num_to_issue)) {
+    goto err;
+  }
+
+  CBB proof;
+  if (!CBB_add_u16_length_prefixed(cbb, &proof) ||
+      !dleq_generate(method, &proof, key, &Tp_batch, &Sp_batch, &Wp_batch,
+                     &Wsp_batch, private_metadata) ||
+      !CBB_flush(cbb)) {
+    goto err;
+  }
+
   // Skip over any unused requests.
   size_t point_len = 1 + 2 * BN_num_bytes(&group->field);
-  if (!CBS_skip(cbs, (2 + point_len) * (num_requested - num_to_issue))) {
+  size_t token_len = point_len;
+  if (method->prefix_point) {
+    token_len += 2;
+  }
+  if (!CBS_skip(cbs, token_len * (num_requested - num_to_issue))) {
     OPENSSL_PUT_ERROR(TRUST_TOKEN, TRUST_TOKEN_R_DECODE_FAILURE);
     goto err;
   }
@@ -874,11 +876,11 @@
 
 static STACK_OF(TRUST_TOKEN) *
     pmbtoken_unblind(const PMBTOKEN_METHOD *method,
-                     const PMBTOKEN_CLIENT_KEY *key,
-                     const STACK_OF(PMBTOKEN_PRETOKEN) * pretokens, CBS *cbs,
+                     const TRUST_TOKEN_CLIENT_KEY *key,
+                     const STACK_OF(TRUST_TOKEN_PRETOKEN) * pretokens, CBS *cbs,
                      size_t count, uint32_t key_id) {
   const EC_GROUP *group = method->group;
-  if (count > sk_PMBTOKEN_PRETOKEN_num(pretokens)) {
+  if (count > sk_TRUST_TOKEN_PRETOKEN_num(pretokens)) {
     OPENSSL_PUT_ERROR(TRUST_TOKEN, TRUST_TOKEN_R_DECODE_FAILURE);
     return NULL;
   }
@@ -890,115 +892,86 @@
     return NULL;
   }
 
-  EC_RAW_POINT *Tps = NULL;
-  EC_RAW_POINT *Sps = NULL;
-  EC_RAW_POINT *Wps = NULL;
-  EC_RAW_POINT *Wsps = NULL;
-  EC_SCALAR *es = NULL;
+  if (count > ((size_t)-1) / sizeof(EC_RAW_POINT) ||
+      count > ((size_t)-1) / sizeof(EC_SCALAR)) {
+    OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_OVERFLOW);
+    return 0;
+  }
+  EC_RAW_POINT *Tps = OPENSSL_malloc(count * sizeof(EC_RAW_POINT));
+  EC_RAW_POINT *Sps = OPENSSL_malloc(count * sizeof(EC_RAW_POINT));
+  EC_RAW_POINT *Wps = OPENSSL_malloc(count * sizeof(EC_RAW_POINT));
+  EC_RAW_POINT *Wsps = OPENSSL_malloc(count * sizeof(EC_RAW_POINT));
+  EC_SCALAR *es = OPENSSL_malloc(count * sizeof(EC_SCALAR));
   CBB batch_cbb;
   CBB_zero(&batch_cbb);
-  if (method->batched_proof) {
-    if (count > ((size_t)-1) / sizeof(EC_RAW_POINT) ||
-        count > ((size_t)-1) / sizeof(EC_SCALAR)) {
-      OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_OVERFLOW);
-      goto err;
-    }
-    Tps = OPENSSL_malloc(count * sizeof(EC_RAW_POINT));
-    Sps = OPENSSL_malloc(count * sizeof(EC_RAW_POINT));
-    Wps = OPENSSL_malloc(count * sizeof(EC_RAW_POINT));
-    Wsps = OPENSSL_malloc(count * sizeof(EC_RAW_POINT));
-    es = OPENSSL_malloc(count * sizeof(EC_SCALAR));
-    if (!Tps ||
-        !Sps ||
-        !Wps ||
-        !Wsps ||
-        !es ||
-        !CBB_init(&batch_cbb, 0) ||
-        !point_to_cbb(&batch_cbb, method->group, &key->pubs) ||
-        !point_to_cbb(&batch_cbb, method->group, &key->pub0) ||
-        !point_to_cbb(&batch_cbb, method->group, &key->pub1)) {
-      OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_MALLOC_FAILURE);
-      goto err;
-    }
+  if (!Tps ||
+      !Sps ||
+      !Wps ||
+      !Wsps ||
+      !es ||
+      !CBB_init(&batch_cbb, 0) ||
+      !point_to_cbb(&batch_cbb, method->group, &key->pubs) ||
+      !point_to_cbb(&batch_cbb, method->group, &key->pub0) ||
+      !point_to_cbb(&batch_cbb, method->group, &key->pub1)) {
+    OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_MALLOC_FAILURE);
+    goto err;
   }
 
   for (size_t i = 0; i < count; i++) {
-    const PMBTOKEN_PRETOKEN *pretoken =
-        sk_PMBTOKEN_PRETOKEN_value(pretokens, i);
+    const TRUST_TOKEN_PRETOKEN *pretoken =
+        sk_TRUST_TOKEN_PRETOKEN_value(pretokens, i);
 
-    uint8_t s[PMBTOKEN_NONCE_SIZE];
+    uint8_t s[TRUST_TOKEN_NONCE_SIZE];
     EC_AFFINE Wp_affine, Wsp_affine;
-    CBS proof;
-    if (!CBS_copy_bytes(cbs, s, PMBTOKEN_NONCE_SIZE) ||
-        !cbs_get_prefixed_point(cbs, group, &Wp_affine) ||
-        !cbs_get_prefixed_point(cbs, group, &Wsp_affine)) {
+    if (!CBS_copy_bytes(cbs, s, TRUST_TOKEN_NONCE_SIZE) ||
+        !cbs_get_prefixed_point(cbs, group, &Wp_affine, method->prefix_point) ||
+        !cbs_get_prefixed_point(cbs, group, &Wsp_affine,
+                                method->prefix_point)) {
       OPENSSL_PUT_ERROR(TRUST_TOKEN, TRUST_TOKEN_R_DECODE_FAILURE);
       goto err;
     }
 
-    EC_RAW_POINT Tp, Wp, Wsp, Sp;
-    ec_affine_to_jacobian(group, &Tp, &pretoken->Tp);
-    ec_affine_to_jacobian(group, &Wp, &Wp_affine);
-    ec_affine_to_jacobian(group, &Wsp, &Wsp_affine);
-    if (!method->hash_s(group, &Sp, &pretoken->Tp, s)) {
+    ec_affine_to_jacobian(group, &Tps[i], &pretoken->Tp);
+    ec_affine_to_jacobian(group, &Wps[i], &Wp_affine);
+    ec_affine_to_jacobian(group, &Wsps[i], &Wsp_affine);
+    if (!method->hash_s(group, &Sps[i], &pretoken->Tp, s)) {
       goto err;
     }
 
-    if (!method->batched_proof) {
-      if(!CBS_get_u16_length_prefixed(cbs, &proof)) {
-        OPENSSL_PUT_ERROR(TRUST_TOKEN, TRUST_TOKEN_R_DECODE_FAILURE);
-        goto err;
-      }
-
-      if (!dleq_verify(method, &proof, key, &Tp, &Sp, &Wp, &Wsp)) {
-        goto err;
-      }
-
-      if (CBS_len(&proof) != 0) {
-        OPENSSL_PUT_ERROR(TRUST_TOKEN, TRUST_TOKEN_R_DECODE_FAILURE);
-        goto err;
-      }
-    } else {
-      EC_AFFINE Sp_affine;
-      if (!point_to_cbb(&batch_cbb, group, &pretoken->Tp) ||
-          !ec_jacobian_to_affine(group, &Sp_affine, &Sp) ||
-          !point_to_cbb(&batch_cbb, group, &Sp_affine) ||
-          !point_to_cbb(&batch_cbb, group, &Wp_affine) ||
-          !point_to_cbb(&batch_cbb, group, &Wsp_affine)) {
-        OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_MALLOC_FAILURE);
-        goto err;
-      }
-      Tps[i] = Tp;
-      Sps[i] = Sp;
-      Wps[i] = Wp;
-      Wsps[i] = Wsp;
+    EC_AFFINE Sp_affine;
+    if (!point_to_cbb(&batch_cbb, group, &pretoken->Tp) ||
+        !ec_jacobian_to_affine(group, &Sp_affine, &Sps[i]) ||
+        !point_to_cbb(&batch_cbb, group, &Sp_affine) ||
+        !point_to_cbb(&batch_cbb, group, &Wp_affine) ||
+        !point_to_cbb(&batch_cbb, group, &Wsp_affine)) {
+      OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_MALLOC_FAILURE);
+      goto err;
     }
 
     // Unblind the token.
     EC_RAW_POINT jacobians[3];
     EC_AFFINE affines[3];
-    if (!ec_point_mul_scalar(group, &jacobians[0], &Sp, &pretoken->r) ||
-        !ec_point_mul_scalar(group, &jacobians[1], &Wp, &pretoken->r) ||
-        !ec_point_mul_scalar(group, &jacobians[2], &Wsp, &pretoken->r) ||
+    if (!ec_point_mul_scalar(group, &jacobians[0], &Sps[i], &pretoken->r) ||
+        !ec_point_mul_scalar(group, &jacobians[1], &Wps[i], &pretoken->r) ||
+        !ec_point_mul_scalar(group, &jacobians[2], &Wsps[i], &pretoken->r) ||
         !ec_jacobian_to_affine_batch(group, affines, jacobians, 3)) {
       goto err;
     }
 
     // Serialize the token. Include |key_id| to avoid an extra copy in the layer
     // above.
-    CBB token_cbb, child;
+    CBB token_cbb;
     size_t point_len = 1 + 2 * BN_num_bytes(&group->field);
-    if (!CBB_init(&token_cbb, 4 + PMBTOKEN_NONCE_SIZE + 3 * (2 + point_len)) ||
+    if (!CBB_init(&token_cbb,
+                  4 + TRUST_TOKEN_NONCE_SIZE + 3 * (2 + point_len)) ||
         !CBB_add_u32(&token_cbb, key_id) ||
-        !CBB_add_bytes(&token_cbb, pretoken->t, PMBTOKEN_NONCE_SIZE) ||
-        // TODO(https://crbug.com/boringssl/331): When updating the key format,
-        // remove the redundant length prefixes.
-        !CBB_add_u16_length_prefixed(&token_cbb, &child) ||
-        !point_to_cbb(&child, group, &affines[0]) ||
-        !CBB_add_u16_length_prefixed(&token_cbb, &child) ||
-        !point_to_cbb(&child, group, &affines[1]) ||
-        !CBB_add_u16_length_prefixed(&token_cbb, &child) ||
-        !point_to_cbb(&child, group, &affines[2]) ||
+        !CBB_add_bytes(&token_cbb, pretoken->t, TRUST_TOKEN_NONCE_SIZE) ||
+        !cbb_add_prefixed_point(&token_cbb, group, &affines[0],
+                                method->prefix_point) ||
+        !cbb_add_prefixed_point(&token_cbb, group, &affines[1],
+                                method->prefix_point) ||
+        !cbb_add_prefixed_point(&token_cbb, group, &affines[2],
+                                method->prefix_point) ||
         !CBB_flush(&token_cbb)) {
       CBB_cleanup(&token_cbb);
       goto err;
@@ -1018,32 +991,30 @@
   // The DLEQ batching construction is described in appendix B of
   // https://eprint.iacr.org/2020/072/20200324:214215. Note the additional
   // computations all act on public inputs.
-  if (method->batched_proof) {
-    for (size_t i = 0; i < count; i++) {
-      if (!hash_c_batch(method, &es[i], &batch_cbb, i)) {
-        goto err;
-      }
-    }
-
-    EC_RAW_POINT Tp_batch, Sp_batch, Wp_batch, Wsp_batch;
-    if (!ec_point_mul_scalar_public_batch(group, &Tp_batch,
-                                          /*g_scalar=*/NULL, Tps, es, count) ||
-        !ec_point_mul_scalar_public_batch(group, &Sp_batch,
-                                          /*g_scalar=*/NULL, Sps, es, count) ||
-        !ec_point_mul_scalar_public_batch(group, &Wp_batch,
-                                          /*g_scalar=*/NULL, Wps, es, count) ||
-        !ec_point_mul_scalar_public_batch(group, &Wsp_batch,
-                                          /*g_scalar=*/NULL, Wsps, es, count)) {
+  for (size_t i = 0; i < count; i++) {
+    if (!hash_c_batch(method, &es[i], &batch_cbb, i)) {
       goto err;
     }
+  }
 
-    CBS proof;
-    if (!CBS_get_u16_length_prefixed(cbs, &proof) ||
-        !dleq_verify(method, &proof, key, &Tp_batch, &Sp_batch, &Wp_batch,
-                     &Wsp_batch) ||
-        CBS_len(&proof) != 0) {
-      goto err;
-    }
+  EC_RAW_POINT Tp_batch, Sp_batch, Wp_batch, Wsp_batch;
+  if (!ec_point_mul_scalar_public_batch(group, &Tp_batch,
+                                        /*g_scalar=*/NULL, Tps, es, count) ||
+      !ec_point_mul_scalar_public_batch(group, &Sp_batch,
+                                        /*g_scalar=*/NULL, Sps, es, count) ||
+      !ec_point_mul_scalar_public_batch(group, &Wp_batch,
+                                        /*g_scalar=*/NULL, Wps, es, count) ||
+      !ec_point_mul_scalar_public_batch(group, &Wsp_batch,
+                                        /*g_scalar=*/NULL, Wsps, es, count)) {
+    goto err;
+  }
+
+  CBS proof;
+  if (!CBS_get_u16_length_prefixed(cbs, &proof) ||
+      !dleq_verify(method, &proof, key, &Tp_batch, &Sp_batch, &Wp_batch,
+                   &Wsp_batch) ||
+      CBS_len(&proof) != 0) {
+    goto err;
   }
 
   ok = 1;
@@ -1063,18 +1034,18 @@
 }
 
 static int pmbtoken_read(const PMBTOKEN_METHOD *method,
-                         const PMBTOKEN_ISSUER_KEY *key,
-                         uint8_t out_nonce[PMBTOKEN_NONCE_SIZE],
+                         const TRUST_TOKEN_ISSUER_KEY *key,
+                         uint8_t out_nonce[TRUST_TOKEN_NONCE_SIZE],
                          uint8_t *out_private_metadata, const uint8_t *token,
                          size_t token_len) {
   const EC_GROUP *group = method->group;
   CBS cbs;
   CBS_init(&cbs, token, token_len);
   EC_AFFINE S, W, Ws;
-  if (!CBS_copy_bytes(&cbs, out_nonce, PMBTOKEN_NONCE_SIZE) ||
-      !cbs_get_prefixed_point(&cbs, group, &S) ||
-      !cbs_get_prefixed_point(&cbs, group, &W) ||
-      !cbs_get_prefixed_point(&cbs, group, &Ws) ||
+  if (!CBS_copy_bytes(&cbs, out_nonce, TRUST_TOKEN_NONCE_SIZE) ||
+      !cbs_get_prefixed_point(&cbs, group, &S, method->prefix_point) ||
+      !cbs_get_prefixed_point(&cbs, group, &W, method->prefix_point) ||
+      !cbs_get_prefixed_point(&cbs, group, &Ws, method->prefix_point) ||
       CBS_len(&cbs) != 0) {
     OPENSSL_PUT_ERROR(TRUST_TOKEN, TRUST_TOKEN_R_INVALID_TOKEN);
     return 0;
@@ -1127,214 +1098,18 @@
 }
 
 
-// PMBTokens experiment v0.
-
-static int pmbtoken_exp0_hash_t(const EC_GROUP *group, EC_RAW_POINT *out,
-                                const uint8_t t[PMBTOKEN_NONCE_SIZE]) {
-  const uint8_t kHashTLabel[] = "PMBTokensV0 HashT";
-  return ec_hash_to_curve_p521_xmd_sha512_sswu_draft06(
-      group, out, kHashTLabel, sizeof(kHashTLabel), t, PMBTOKEN_NONCE_SIZE);
-}
-
-static int pmbtoken_exp0_hash_s(const EC_GROUP *group, EC_RAW_POINT *out,
-                                const EC_AFFINE *t,
-                                const uint8_t s[PMBTOKEN_NONCE_SIZE]) {
-  const uint8_t kHashSLabel[] = "PMBTokensV0 HashS";
-  int ret = 0;
-  CBB cbb;
-  uint8_t *buf = NULL;
-  size_t len;
-  if (!CBB_init(&cbb, 0) ||
-      !point_to_cbb(&cbb, group, t) ||
-      !CBB_add_bytes(&cbb, s, PMBTOKEN_NONCE_SIZE) ||
-      !CBB_finish(&cbb, &buf, &len) ||
-      !ec_hash_to_curve_p521_xmd_sha512_sswu_draft06(
-          group, out, kHashSLabel, sizeof(kHashSLabel), buf, len)) {
-    OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_MALLOC_FAILURE);
-    goto err;
-  }
-
-  ret = 1;
-
-err:
-  OPENSSL_free(buf);
-  CBB_cleanup(&cbb);
-  return ret;
-}
-
-static int pmbtoken_exp0_hash_c(const EC_GROUP *group, EC_SCALAR *out,
-                                uint8_t *buf, size_t len) {
-  const uint8_t kHashCLabel[] = "PMBTokensV0 HashC";
-  return ec_hash_to_scalar_p521_xmd_sha512_draft06(
-      group, out, kHashCLabel, sizeof(kHashCLabel), buf, len);
-}
-
-// H for PMBTokens v0 was generated with the following Python code.
-/*
-import hashlib
-
-SEED_H = 'PrivacyPass H'
-
-A = -3
-B = 0x051953eb9618e1c9a1f929a21a0b68540eea2da725b99b315f3b8b489918ef109e156193951ec7e937b1652c0bd3bb1bf073573df883d2c34f1ef451fd46b503f00
-P = 2**521 - 1
-
-def get_y(x):
-  y2 = (x**3 + A*x + B) % P
-  y = pow(y2, (P+1)/4, P)
-  if (y*y) % P != y2:
-    raise ValueError("point not on curve")
-  return y
-
-def bit(h,i):
-  return (ord(h[i/8]) >> (i%8)) & 1
-
-b = 521
-def decode_point(so):
-  s = hashlib.sha256(so + '0').digest() + hashlib.sha256(so + '1').digest() + \
-      hashlib.sha256(so + '2').digest()
-
-  x = 0
-  for i in range(0,b):
-    x = x + (long(bit(s,i))<<i)
-  if x >= P:
-    raise ValueError("x out of range")
-  y = get_y(x)
-  if y & 1 != bit(s,b-1): y = P-y
-  return (x, y)
-
-
-def gen_point(seed):
-  v = hashlib.sha256(seed).digest()
-  it = 1
-  while True:
-    try:
-      x,y = decode_point(v)
-    except Exception, e:
-      print e
-      it += 1
-      v = hashlib.sha256(v).digest()
-      continue
-    print "Found in %d iterations:" % it
-    print "  x = %d" % x
-    print "  y = %d" % y
-    print " Encoded (hex): (%x, %x)" % (x, y)
-    return (x, y)
-
-if __name__ == "__main__":
-  gen_point(SEED_H)
-*/
-static int pmbtoken_exp0_ok = 0;
-static PMBTOKEN_METHOD pmbtoken_exp0_method;
-static CRYPTO_once_t pmbtoken_exp0_method_once = CRYPTO_ONCE_INIT;
-
-static void pmbtoken_exp0_init_method_impl(void) {
-  static const uint8_t kH[] = {
-      0x04, 0x01, 0xf0, 0xa9, 0xf7, 0x9e, 0xbc, 0x12, 0x6c, 0xef, 0xd1, 0xab,
-      0x29, 0x10, 0x03, 0x6f, 0x4e, 0xf5, 0xbd, 0xeb, 0x0f, 0x6b, 0xc0, 0x5c,
-      0x0e, 0xce, 0xfe, 0x59, 0x45, 0xd1, 0x3e, 0x25, 0x33, 0x7e, 0x4c, 0xda,
-      0x64, 0x53, 0x54, 0x4e, 0xf9, 0x76, 0x0d, 0x6d, 0xc5, 0x39, 0x2a, 0xd4,
-      0xce, 0x84, 0x6e, 0x31, 0xc2, 0x86, 0x21, 0xf9, 0x5c, 0x98, 0xb9, 0x3d,
-      0x01, 0x74, 0x9f, 0xc5, 0x1e, 0x47, 0x24, 0x00, 0x5c, 0x17, 0x62, 0x51,
-      0x7d, 0x32, 0x5e, 0x29, 0xac, 0x52, 0x14, 0x75, 0x6f, 0x36, 0xd9, 0xc7,
-      0xfa, 0xbb, 0xa9, 0x3b, 0x9d, 0x70, 0x49, 0x1e, 0xb4, 0x53, 0xbc, 0x55,
-      0xea, 0xad, 0x8f, 0x26, 0x1d, 0xe0, 0xbc, 0xf3, 0x50, 0x5c, 0x7e, 0x66,
-      0x41, 0xb5, 0x61, 0x70, 0x12, 0x72, 0xac, 0x6a, 0xb0, 0x6e, 0x78, 0x3d,
-      0x17, 0x08, 0xe3, 0xdf, 0x3c, 0xff, 0xa6, 0xa0, 0xea, 0x96, 0x67, 0x92,
-      0xcd,
-  };
-
-  pmbtoken_exp0_ok =
-      pmbtoken_init_method(&pmbtoken_exp0_method, NID_secp521r1, kH, sizeof(kH),
-                           pmbtoken_exp0_hash_t, pmbtoken_exp0_hash_s,
-                           pmbtoken_exp0_hash_c, /*batched_proof=*/0);
-}
-
-static int pmbtoken_exp0_init_method(void) {
-  CRYPTO_once(&pmbtoken_exp0_method_once, pmbtoken_exp0_init_method_impl);
-  if (!pmbtoken_exp0_ok) {
-    OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_INTERNAL_ERROR);
-    return 0;
-  }
-  return 1;
-}
-
-int pmbtoken_exp0_generate_key(CBB *out_private, CBB *out_public) {
-  if (!pmbtoken_exp0_init_method()) {
-    return 0;
-  }
-
-  return pmbtoken_generate_key(&pmbtoken_exp0_method, out_private, out_public);
-}
-
-int pmbtoken_exp0_client_key_from_bytes(PMBTOKEN_CLIENT_KEY *key,
-                                        const uint8_t *in, size_t len) {
-  if (!pmbtoken_exp0_init_method()) {
-    return 0;
-  }
-  return pmbtoken_client_key_from_bytes(&pmbtoken_exp0_method, key, in, len);
-}
-
-int pmbtoken_exp0_issuer_key_from_bytes(PMBTOKEN_ISSUER_KEY *key,
-                                        const uint8_t *in, size_t len) {
-  if (!pmbtoken_exp0_init_method()) {
-    return 0;
-  }
-  return pmbtoken_issuer_key_from_bytes(&pmbtoken_exp0_method, key, in, len);
-}
-
-STACK_OF(PMBTOKEN_PRETOKEN) * pmbtoken_exp0_blind(CBB *cbb, size_t count) {
-  if (!pmbtoken_exp0_init_method()) {
-    return NULL;
-  }
-  return pmbtoken_blind(&pmbtoken_exp0_method, cbb, count);
-}
-
-int pmbtoken_exp0_sign(const PMBTOKEN_ISSUER_KEY *key, CBB *cbb, CBS *cbs,
-                       size_t num_requested, size_t num_to_issue,
-                       uint8_t private_metadata) {
-  if (!pmbtoken_exp0_init_method()) {
-    return 0;
-  }
-  return pmbtoken_sign(&pmbtoken_exp0_method, key, cbb, cbs, num_requested,
-                       num_to_issue, private_metadata);
-}
-
-STACK_OF(TRUST_TOKEN) *
-    pmbtoken_exp0_unblind(const PMBTOKEN_CLIENT_KEY *key,
-                          const STACK_OF(PMBTOKEN_PRETOKEN) * pretokens,
-                          CBS *cbs, size_t count, uint32_t key_id) {
-  if (!pmbtoken_exp0_init_method()) {
-    return NULL;
-  }
-  return pmbtoken_unblind(&pmbtoken_exp0_method, key, pretokens, cbs, count,
-                          key_id);
-}
-
-int pmbtoken_exp0_read(const PMBTOKEN_ISSUER_KEY *key,
-                       uint8_t out_nonce[PMBTOKEN_NONCE_SIZE],
-                       uint8_t *out_private_metadata, const uint8_t *token,
-                       size_t token_len) {
-  if (!pmbtoken_exp0_init_method()) {
-    return 0;
-  }
-  return pmbtoken_read(&pmbtoken_exp0_method, key, out_nonce,
-                       out_private_metadata, token, token_len);
-}
-
-
 // PMBTokens experiment v1.
 
 static int pmbtoken_exp1_hash_t(const EC_GROUP *group, EC_RAW_POINT *out,
-                                const uint8_t t[PMBTOKEN_NONCE_SIZE]) {
+                                const uint8_t t[TRUST_TOKEN_NONCE_SIZE]) {
   const uint8_t kHashTLabel[] = "PMBTokens Experiment V1 HashT";
   return ec_hash_to_curve_p384_xmd_sha512_sswu_draft07(
-      group, out, kHashTLabel, sizeof(kHashTLabel), t, PMBTOKEN_NONCE_SIZE);
+      group, out, kHashTLabel, sizeof(kHashTLabel), t, TRUST_TOKEN_NONCE_SIZE);
 }
 
 static int pmbtoken_exp1_hash_s(const EC_GROUP *group, EC_RAW_POINT *out,
                                 const EC_AFFINE *t,
-                                const uint8_t s[PMBTOKEN_NONCE_SIZE]) {
+                                const uint8_t s[TRUST_TOKEN_NONCE_SIZE]) {
   const uint8_t kHashSLabel[] = "PMBTokens Experiment V1 HashS";
   int ret = 0;
   CBB cbb;
@@ -1342,7 +1117,7 @@
   size_t len;
   if (!CBB_init(&cbb, 0) ||
       !point_to_cbb(&cbb, group, t) ||
-      !CBB_add_bytes(&cbb, s, PMBTOKEN_NONCE_SIZE) ||
+      !CBB_add_bytes(&cbb, s, TRUST_TOKEN_NONCE_SIZE) ||
       !CBB_finish(&cbb, &buf, &len) ||
       !ec_hash_to_curve_p384_xmd_sha512_sswu_draft07(
           group, out, kHashSLabel, sizeof(kHashSLabel), buf, len)) {
@@ -1387,7 +1162,7 @@
   pmbtoken_exp1_ok =
       pmbtoken_init_method(&pmbtoken_exp1_method, NID_secp384r1, kH, sizeof(kH),
                            pmbtoken_exp1_hash_t, pmbtoken_exp1_hash_s,
-                           pmbtoken_exp1_hash_c, /*batched_proof=*/1);
+                           pmbtoken_exp1_hash_c, 1);
 }
 
 static int pmbtoken_exp1_init_method(void) {
@@ -1407,7 +1182,7 @@
   return pmbtoken_generate_key(&pmbtoken_exp1_method, out_private, out_public);
 }
 
-int pmbtoken_exp1_client_key_from_bytes(PMBTOKEN_CLIENT_KEY *key,
+int pmbtoken_exp1_client_key_from_bytes(TRUST_TOKEN_CLIENT_KEY *key,
                                         const uint8_t *in, size_t len) {
   if (!pmbtoken_exp1_init_method()) {
     return 0;
@@ -1415,7 +1190,7 @@
   return pmbtoken_client_key_from_bytes(&pmbtoken_exp1_method, key, in, len);
 }
 
-int pmbtoken_exp1_issuer_key_from_bytes(PMBTOKEN_ISSUER_KEY *key,
+int pmbtoken_exp1_issuer_key_from_bytes(TRUST_TOKEN_ISSUER_KEY *key,
                                         const uint8_t *in, size_t len) {
   if (!pmbtoken_exp1_init_method()) {
     return 0;
@@ -1423,14 +1198,14 @@
   return pmbtoken_issuer_key_from_bytes(&pmbtoken_exp1_method, key, in, len);
 }
 
-STACK_OF(PMBTOKEN_PRETOKEN) * pmbtoken_exp1_blind(CBB *cbb, size_t count) {
+STACK_OF(TRUST_TOKEN_PRETOKEN) * pmbtoken_exp1_blind(CBB *cbb, size_t count) {
   if (!pmbtoken_exp1_init_method()) {
     return NULL;
   }
   return pmbtoken_blind(&pmbtoken_exp1_method, cbb, count);
 }
 
-int pmbtoken_exp1_sign(const PMBTOKEN_ISSUER_KEY *key, CBB *cbb, CBS *cbs,
+int pmbtoken_exp1_sign(const TRUST_TOKEN_ISSUER_KEY *key, CBB *cbb, CBS *cbs,
                        size_t num_requested, size_t num_to_issue,
                        uint8_t private_metadata) {
   if (!pmbtoken_exp1_init_method()) {
@@ -1441,8 +1216,8 @@
 }
 
 STACK_OF(TRUST_TOKEN) *
-    pmbtoken_exp1_unblind(const PMBTOKEN_CLIENT_KEY *key,
-                          const STACK_OF(PMBTOKEN_PRETOKEN) * pretokens,
+    pmbtoken_exp1_unblind(const TRUST_TOKEN_CLIENT_KEY *key,
+                          const STACK_OF(TRUST_TOKEN_PRETOKEN) * pretokens,
                           CBS *cbs, size_t count, uint32_t key_id) {
   if (!pmbtoken_exp1_init_method()) {
     return NULL;
@@ -1451,8 +1226,8 @@
                           key_id);
 }
 
-int pmbtoken_exp1_read(const PMBTOKEN_ISSUER_KEY *key,
-                       uint8_t out_nonce[PMBTOKEN_NONCE_SIZE],
+int pmbtoken_exp1_read(const TRUST_TOKEN_ISSUER_KEY *key,
+                       uint8_t out_nonce[TRUST_TOKEN_NONCE_SIZE],
                        uint8_t *out_private_metadata, const uint8_t *token,
                        size_t token_len) {
   if (!pmbtoken_exp1_init_method()) {
@@ -1472,3 +1247,153 @@
          ec_point_to_bytes(pmbtoken_exp1_method.group, &h,
                            POINT_CONVERSION_UNCOMPRESSED, out, 97) == 97;
 }
+
+// PMBTokens experiment v2.
+
+static int pmbtoken_exp2_hash_t(const EC_GROUP *group, EC_RAW_POINT *out,
+                                const uint8_t t[TRUST_TOKEN_NONCE_SIZE]) {
+  const uint8_t kHashTLabel[] = "PMBTokens Experiment V2 HashT";
+  return ec_hash_to_curve_p384_xmd_sha512_sswu_draft07(
+      group, out, kHashTLabel, sizeof(kHashTLabel), t, TRUST_TOKEN_NONCE_SIZE);
+}
+
+static int pmbtoken_exp2_hash_s(const EC_GROUP *group, EC_RAW_POINT *out,
+                                const EC_AFFINE *t,
+                                const uint8_t s[TRUST_TOKEN_NONCE_SIZE]) {
+  const uint8_t kHashSLabel[] = "PMBTokens Experiment V2 HashS";
+  int ret = 0;
+  CBB cbb;
+  uint8_t *buf = NULL;
+  size_t len;
+  if (!CBB_init(&cbb, 0) ||
+      !point_to_cbb(&cbb, group, t) ||
+      !CBB_add_bytes(&cbb, s, TRUST_TOKEN_NONCE_SIZE) ||
+      !CBB_finish(&cbb, &buf, &len) ||
+      !ec_hash_to_curve_p384_xmd_sha512_sswu_draft07(
+          group, out, kHashSLabel, sizeof(kHashSLabel), buf, len)) {
+    OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_MALLOC_FAILURE);
+    goto err;
+  }
+
+  ret = 1;
+
+err:
+  OPENSSL_free(buf);
+  CBB_cleanup(&cbb);
+  return ret;
+}
+
+static int pmbtoken_exp2_hash_c(const EC_GROUP *group, EC_SCALAR *out,
+                                uint8_t *buf, size_t len) {
+  const uint8_t kHashCLabel[] = "PMBTokens Experiment V2 HashC";
+  return ec_hash_to_scalar_p384_xmd_sha512_draft07(
+      group, out, kHashCLabel, sizeof(kHashCLabel), buf, len);
+}
+
+static int pmbtoken_exp2_ok = 0;
+static PMBTOKEN_METHOD pmbtoken_exp2_method;
+static CRYPTO_once_t pmbtoken_exp2_method_once = CRYPTO_ONCE_INIT;
+
+static void pmbtoken_exp2_init_method_impl(void) {
+  // This is the output of |ec_hash_to_scalar_p384_xmd_sha512_draft07| with DST
+  // "PMBTokens Experiment V2 HashH" and message "generator".
+  static const uint8_t kH[] = {
+      0x04, 0xbc, 0x27, 0x24, 0x99, 0xfa, 0xc9, 0xa4, 0x74, 0x6f, 0xf9,
+      0x07, 0x81, 0x55, 0xf8, 0x1f, 0x6f, 0xda, 0x09, 0xe7, 0x8c, 0x5d,
+      0x9e, 0x4e, 0x14, 0x7c, 0x53, 0x14, 0xbc, 0x7e, 0x29, 0x57, 0x92,
+      0x17, 0x94, 0x6e, 0xd2, 0xdf, 0xa5, 0x31, 0x1b, 0x4e, 0xb7, 0xfc,
+      0x93, 0xe3, 0x6e, 0x14, 0x1f, 0x4f, 0x14, 0xf3, 0xe5, 0x47, 0x61,
+      0x1c, 0x2c, 0x72, 0x25, 0xf0, 0x4a, 0x45, 0x23, 0x2d, 0x57, 0x93,
+      0x0e, 0xb2, 0x55, 0xb8, 0x57, 0x25, 0x4c, 0x1e, 0xdb, 0xfd, 0x58,
+      0x70, 0x17, 0x9a, 0xbb, 0x9e, 0x5e, 0x93, 0x9e, 0x92, 0xd3, 0xe8,
+      0x25, 0x62, 0xbf, 0x59, 0xb2, 0xd2, 0x3d, 0x71, 0xff
+  };
+
+  pmbtoken_exp2_ok =
+      pmbtoken_init_method(&pmbtoken_exp2_method, NID_secp384r1, kH, sizeof(kH),
+                           pmbtoken_exp2_hash_t, pmbtoken_exp2_hash_s,
+                           pmbtoken_exp2_hash_c, 0);
+}
+
+static int pmbtoken_exp2_init_method(void) {
+  CRYPTO_once(&pmbtoken_exp2_method_once, pmbtoken_exp2_init_method_impl);
+  if (!pmbtoken_exp2_ok) {
+    OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_INTERNAL_ERROR);
+    return 0;
+  }
+  return 1;
+}
+
+int pmbtoken_exp2_generate_key(CBB *out_private, CBB *out_public) {
+  if (!pmbtoken_exp2_init_method()) {
+    return 0;
+  }
+
+  return pmbtoken_generate_key(&pmbtoken_exp2_method, out_private, out_public);
+}
+
+int pmbtoken_exp2_client_key_from_bytes(TRUST_TOKEN_CLIENT_KEY *key,
+                                        const uint8_t *in, size_t len) {
+  if (!pmbtoken_exp2_init_method()) {
+    return 0;
+  }
+  return pmbtoken_client_key_from_bytes(&pmbtoken_exp2_method, key, in, len);
+}
+
+int pmbtoken_exp2_issuer_key_from_bytes(TRUST_TOKEN_ISSUER_KEY *key,
+                                        const uint8_t *in, size_t len) {
+  if (!pmbtoken_exp2_init_method()) {
+    return 0;
+  }
+  return pmbtoken_issuer_key_from_bytes(&pmbtoken_exp2_method, key, in, len);
+}
+
+STACK_OF(TRUST_TOKEN_PRETOKEN) * pmbtoken_exp2_blind(CBB *cbb, size_t count) {
+  if (!pmbtoken_exp2_init_method()) {
+    return NULL;
+  }
+  return pmbtoken_blind(&pmbtoken_exp2_method, cbb, count);
+}
+
+int pmbtoken_exp2_sign(const TRUST_TOKEN_ISSUER_KEY *key, CBB *cbb, CBS *cbs,
+                       size_t num_requested, size_t num_to_issue,
+                       uint8_t private_metadata) {
+  if (!pmbtoken_exp2_init_method()) {
+    return 0;
+  }
+  return pmbtoken_sign(&pmbtoken_exp2_method, key, cbb, cbs, num_requested,
+                       num_to_issue, private_metadata);
+}
+
+STACK_OF(TRUST_TOKEN) *
+    pmbtoken_exp2_unblind(const TRUST_TOKEN_CLIENT_KEY *key,
+                          const STACK_OF(TRUST_TOKEN_PRETOKEN) * pretokens,
+                          CBS *cbs, size_t count, uint32_t key_id) {
+  if (!pmbtoken_exp2_init_method()) {
+    return NULL;
+  }
+  return pmbtoken_unblind(&pmbtoken_exp2_method, key, pretokens, cbs, count,
+                          key_id);
+}
+
+int pmbtoken_exp2_read(const TRUST_TOKEN_ISSUER_KEY *key,
+                       uint8_t out_nonce[TRUST_TOKEN_NONCE_SIZE],
+                       uint8_t *out_private_metadata, const uint8_t *token,
+                       size_t token_len) {
+  if (!pmbtoken_exp2_init_method()) {
+    return 0;
+  }
+  return pmbtoken_read(&pmbtoken_exp2_method, key, out_nonce,
+                       out_private_metadata, token, token_len);
+}
+
+int pmbtoken_exp2_get_h_for_testing(uint8_t out[97]) {
+  if (!pmbtoken_exp2_init_method()) {
+    return 0;
+  }
+  EC_AFFINE h;
+  return ec_jacobian_to_affine(pmbtoken_exp2_method.group, &h,
+                               &pmbtoken_exp2_method.h) &&
+         ec_point_to_bytes(pmbtoken_exp2_method.group, &h,
+                           POINT_CONVERSION_UNCOMPRESSED, out, 97) == 97;
+}
diff --git a/deps/boringssl/src/crypto/trust_token/trust_token.c b/deps/boringssl/src/crypto/trust_token/trust_token.c
index 1ade23e..3334fba 100644
--- a/deps/boringssl/src/crypto/trust_token/trust_token.c
+++ b/deps/boringssl/src/crypto/trust_token/trust_token.c
@@ -27,21 +27,6 @@
 // protocol for issuing and redeeming tokens built on top of the PMBTokens
 // construction.
 
-const TRUST_TOKEN_METHOD *TRUST_TOKEN_experiment_v0(void) {
-  static const TRUST_TOKEN_METHOD kMethod = {
-      pmbtoken_exp0_generate_key,
-      pmbtoken_exp0_client_key_from_bytes,
-      pmbtoken_exp0_issuer_key_from_bytes,
-      pmbtoken_exp0_blind,
-      pmbtoken_exp0_sign,
-      pmbtoken_exp0_unblind,
-      pmbtoken_exp0_read,
-      0 /* don't use token hash */,
-      0 /* don't use batched proof */,
-  };
-  return &kMethod;
-}
-
 const TRUST_TOKEN_METHOD *TRUST_TOKEN_experiment_v1(void) {
   static const TRUST_TOKEN_METHOD kMethod = {
       pmbtoken_exp1_generate_key,
@@ -51,12 +36,49 @@
       pmbtoken_exp1_sign,
       pmbtoken_exp1_unblind,
       pmbtoken_exp1_read,
-      1 /* use token hash */,
-      1 /* use batched proof */,
+      1, /* has_private_metadata */
+      3, /* max_keys */
+      1, /* has_srr */
   };
   return &kMethod;
 }
 
+const TRUST_TOKEN_METHOD *TRUST_TOKEN_experiment_v2_voprf(void) {
+  static const TRUST_TOKEN_METHOD kMethod = {
+      voprf_exp2_generate_key,
+      voprf_exp2_client_key_from_bytes,
+      voprf_exp2_issuer_key_from_bytes,
+      voprf_exp2_blind,
+      voprf_exp2_sign,
+      voprf_exp2_unblind,
+      voprf_exp2_read,
+      0, /* has_private_metadata */
+      6, /* max_keys */
+      0, /* has_srr */
+  };
+  return &kMethod;
+}
+
+const TRUST_TOKEN_METHOD *TRUST_TOKEN_experiment_v2_pmb(void) {
+  static const TRUST_TOKEN_METHOD kMethod = {
+      pmbtoken_exp2_generate_key,
+      pmbtoken_exp2_client_key_from_bytes,
+      pmbtoken_exp2_issuer_key_from_bytes,
+      pmbtoken_exp2_blind,
+      pmbtoken_exp2_sign,
+      pmbtoken_exp2_unblind,
+      pmbtoken_exp2_read,
+      1, /* has_private_metadata */
+      3, /* max_keys */
+      0, /* has_srr */
+  };
+  return &kMethod;
+}
+
+void TRUST_TOKEN_PRETOKEN_free(TRUST_TOKEN_PRETOKEN *pretoken) {
+  OPENSSL_free(pretoken);
+}
+
 TRUST_TOKEN *TRUST_TOKEN_new(const uint8_t *data, size_t len) {
   TRUST_TOKEN *ret = OPENSSL_malloc(sizeof(TRUST_TOKEN));
   if (ret == NULL) {
@@ -142,13 +164,14 @@
     return;
   }
   EVP_PKEY_free(ctx->srr_key);
-  sk_PMBTOKEN_PRETOKEN_pop_free(ctx->pretokens, PMBTOKEN_PRETOKEN_free);
+  sk_TRUST_TOKEN_PRETOKEN_pop_free(ctx->pretokens, TRUST_TOKEN_PRETOKEN_free);
   OPENSSL_free(ctx);
 }
 
 int TRUST_TOKEN_CLIENT_add_key(TRUST_TOKEN_CLIENT *ctx, size_t *out_key_index,
                                const uint8_t *key, size_t key_len) {
-  if (ctx->num_keys == OPENSSL_ARRAY_SIZE(ctx->keys)) {
+  if (ctx->num_keys == OPENSSL_ARRAY_SIZE(ctx->keys) ||
+      ctx->num_keys >= ctx->method->max_keys) {
     OPENSSL_PUT_ERROR(TRUST_TOKEN, TRUST_TOKEN_R_TOO_MANY_KEYS);
     return 0;
   }
@@ -170,6 +193,9 @@
 }
 
 int TRUST_TOKEN_CLIENT_set_srr_key(TRUST_TOKEN_CLIENT *ctx, EVP_PKEY *key) {
+  if (!ctx->method->has_srr) {
+    return 1;
+  }
   EVP_PKEY_free(ctx->srr_key);
   EVP_PKEY_up_ref(key);
   ctx->srr_key = key;
@@ -184,7 +210,7 @@
 
   int ret = 0;
   CBB request;
-  STACK_OF(PMBTOKEN_PRETOKEN) *pretokens = NULL;
+  STACK_OF(TRUST_TOKEN_PRETOKEN) *pretokens = NULL;
   if (!CBB_init(&request, 0) ||
       !CBB_add_u16(&request, count)) {
     OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_MALLOC_FAILURE);
@@ -201,14 +227,14 @@
     goto err;
   }
 
-  sk_PMBTOKEN_PRETOKEN_pop_free(ctx->pretokens, PMBTOKEN_PRETOKEN_free);
+  sk_TRUST_TOKEN_PRETOKEN_pop_free(ctx->pretokens, TRUST_TOKEN_PRETOKEN_free);
   ctx->pretokens = pretokens;
   pretokens = NULL;
   ret = 1;
 
 err:
   CBB_cleanup(&request);
-  sk_PMBTOKEN_PRETOKEN_pop_free(pretokens, PMBTOKEN_PRETOKEN_free);
+  sk_TRUST_TOKEN_PRETOKEN_pop_free(pretokens, TRUST_TOKEN_PRETOKEN_free);
   return ret;
 }
 
@@ -242,7 +268,7 @@
     return NULL;
   }
 
-  if (count > sk_PMBTOKEN_PRETOKEN_num(ctx->pretokens)) {
+  if (count > sk_TRUST_TOKEN_PRETOKEN_num(ctx->pretokens)) {
     OPENSSL_PUT_ERROR(TRUST_TOKEN, TRUST_TOKEN_R_DECODE_FAILURE);
     return NULL;
   }
@@ -259,7 +285,7 @@
     return NULL;
   }
 
-  sk_PMBTOKEN_PRETOKEN_pop_free(ctx->pretokens, PMBTOKEN_PRETOKEN_free);
+  sk_TRUST_TOKEN_PRETOKEN_pop_free(ctx->pretokens, TRUST_TOKEN_PRETOKEN_free);
   ctx->pretokens = NULL;
 
   *out_key_index = key_index;
@@ -277,7 +303,7 @@
       !CBB_add_bytes(&token_inner, token->data, token->len) ||
       !CBB_add_u16_length_prefixed(&request, &inner) ||
       !CBB_add_bytes(&inner, data, data_len) ||
-      !CBB_add_u64(&request, time) ||
+      (ctx->method->has_srr && !CBB_add_u64(&request, time)) ||
       !CBB_finish(&request, out, out_len)) {
     OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_MALLOC_FAILURE);
     CBB_cleanup(&request);
@@ -287,20 +313,32 @@
 }
 
 int TRUST_TOKEN_CLIENT_finish_redemption(TRUST_TOKEN_CLIENT *ctx,
-                                         uint8_t **out_srr, size_t *out_srr_len,
+                                         uint8_t **out_rr, size_t *out_rr_len,
                                          uint8_t **out_sig, size_t *out_sig_len,
                                          const uint8_t *response,
                                          size_t response_len) {
-  if (ctx->srr_key == NULL) {
-    OPENSSL_PUT_ERROR(TRUST_TOKEN, TRUST_TOKEN_R_NO_SRR_KEY_CONFIGURED);
+  CBS in, srr, sig;
+  CBS_init(&in, response, response_len);
+  if (!ctx->method->has_srr) {
+    if (!CBS_stow(&in, out_rr, out_rr_len)) {
+      OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_MALLOC_FAILURE);
+      return 0;
+    }
+
+    *out_sig = NULL;
+    *out_sig_len = 0;
+    return 1;
+  }
+
+  if (!CBS_get_u16_length_prefixed(&in, &srr) ||
+      !CBS_get_u16_length_prefixed(&in, &sig) ||
+      CBS_len(&in) != 0) {
+    OPENSSL_PUT_ERROR(TRUST_TOKEN, TRUST_TOKEN_R_DECODE_ERROR);
     return 0;
   }
 
-  CBS in, srr, sig;
-  CBS_init(&in, response, response_len);
-  if (!CBS_get_u16_length_prefixed(&in, &srr) ||
-      !CBS_get_u16_length_prefixed(&in, &sig)) {
-    OPENSSL_PUT_ERROR(TRUST_TOKEN, TRUST_TOKEN_R_DECODE_ERROR);
+  if (ctx->srr_key == NULL) {
+    OPENSSL_PUT_ERROR(TRUST_TOKEN, TRUST_TOKEN_R_NO_SRR_KEY_CONFIGURED);
     return 0;
   }
 
@@ -326,8 +364,8 @@
     return 0;
   }
 
-  *out_srr = srr_buf;
-  *out_srr_len = srr_len;
+  *out_rr = srr_buf;
+  *out_rr_len = srr_len;
   *out_sig = sig_buf;
   *out_sig_len = sig_len;
   return 1;
@@ -363,7 +401,8 @@
 
 int TRUST_TOKEN_ISSUER_add_key(TRUST_TOKEN_ISSUER *ctx, const uint8_t *key,
                                size_t key_len) {
-  if (ctx->num_keys == OPENSSL_ARRAY_SIZE(ctx->keys)) {
+  if (ctx->num_keys == OPENSSL_ARRAY_SIZE(ctx->keys) ||
+      ctx->num_keys >= ctx->method->max_keys) {
     OPENSSL_PUT_ERROR(TRUST_TOKEN, TRUST_TOKEN_R_TOO_MANY_KEYS);
     return 0;
   }
@@ -428,7 +467,8 @@
 
   const struct trust_token_issuer_key_st *key =
       trust_token_issuer_get_key(ctx, public_metadata);
-  if (key == NULL || private_metadata > 1) {
+  if (key == NULL || private_metadata > 1 ||
+      (!ctx->method->has_private_metadata && private_metadata != 0)) {
     OPENSSL_PUT_ERROR(TRUST_TOKEN, TRUST_TOKEN_R_INVALID_METADATA);
     return 0;
   }
@@ -478,6 +518,72 @@
   return ret;
 }
 
+
+int TRUST_TOKEN_ISSUER_redeem_raw(const TRUST_TOKEN_ISSUER *ctx,
+                                  uint32_t *out_public, uint8_t *out_private,
+                                  TRUST_TOKEN **out_token,
+                                  uint8_t **out_client_data,
+                                  size_t *out_client_data_len,
+                                  const uint8_t *request, size_t request_len) {
+  CBS request_cbs, token_cbs;
+  CBS_init(&request_cbs, request, request_len);
+  if (!CBS_get_u16_length_prefixed(&request_cbs, &token_cbs)) {
+    OPENSSL_PUT_ERROR(TRUST_TOKEN, TRUST_TOKEN_R_DECODE_ERROR);
+    return 0;
+  }
+
+  uint32_t public_metadata = 0;
+  uint8_t private_metadata = 0;
+
+  // Parse the token. If there is an error, treat it as an invalid token.
+  if (!CBS_get_u32(&token_cbs, &public_metadata)) {
+    OPENSSL_PUT_ERROR(TRUST_TOKEN, TRUST_TOKEN_R_INVALID_TOKEN);
+    return 0;
+  }
+
+  const struct trust_token_issuer_key_st *key =
+      trust_token_issuer_get_key(ctx, public_metadata);
+  uint8_t nonce[TRUST_TOKEN_NONCE_SIZE];
+  if (key == NULL ||
+      !ctx->method->read(&key->key, nonce, &private_metadata,
+                         CBS_data(&token_cbs), CBS_len(&token_cbs))) {
+    OPENSSL_PUT_ERROR(TRUST_TOKEN, TRUST_TOKEN_R_INVALID_TOKEN);
+    return 0;
+  }
+
+  CBS client_data;
+  if (!CBS_get_u16_length_prefixed(&request_cbs, &client_data) ||
+      (ctx->method->has_srr && !CBS_skip(&request_cbs, 8)) ||
+      CBS_len(&request_cbs) != 0) {
+    OPENSSL_PUT_ERROR(TRUST_TOKEN, TRUST_TOKEN_R_DECODE_ERROR);
+    return 0;
+  }
+
+  uint8_t *client_data_buf = NULL;
+  size_t client_data_len = 0;
+  if (!CBS_stow(&client_data, &client_data_buf, &client_data_len)) {
+    OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_MALLOC_FAILURE);
+    goto err;
+  }
+
+  TRUST_TOKEN *token = TRUST_TOKEN_new(nonce, TRUST_TOKEN_NONCE_SIZE);
+  if (token == NULL) {
+    OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_MALLOC_FAILURE);
+    goto err;
+  }
+  *out_public = public_metadata;
+  *out_private = private_metadata;
+  *out_token = token;
+  *out_client_data = client_data_buf;
+  *out_client_data_len = client_data_len;
+
+  return 1;
+
+err:
+  OPENSSL_free(client_data_buf);
+  return 0;
+}
+
 // https://tools.ietf.org/html/rfc7049#section-2.1
 static int add_cbor_int_with_type(CBB *cbb, uint8_t major_type,
                                   uint64_t value) {
@@ -561,7 +667,7 @@
 
   const struct trust_token_issuer_key_st *key =
       trust_token_issuer_get_key(ctx, public_metadata);
-  uint8_t nonce[PMBTOKEN_NONCE_SIZE];
+  uint8_t nonce[TRUST_TOKEN_NONCE_SIZE];
   if (key == NULL ||
       !ctx->method->read(&key->key, nonce, &private_metadata,
                          CBS_data(&token_cbs), CBS_len(&token_cbs))) {
@@ -582,9 +688,9 @@
   }
 
   CBS client_data;
-  uint64_t redemption_time;
+  uint64_t redemption_time = 0;
   if (!CBS_get_u16_length_prefixed(&request_cbs, &client_data) ||
-      !CBS_get_u64(&request_cbs, &redemption_time)) {
+      (ctx->method->has_srr && !CBS_get_u64(&request_cbs, &redemption_time))) {
     OPENSSL_PUT_ERROR(TRUST_TOKEN, TRUST_TOKEN_R_DECODE_ERROR);
     goto err;
   }
@@ -597,20 +703,25 @@
   SHA256_Update(&sha_ctx, CBS_data(&token_copy), CBS_len(&token_copy));
   SHA256_Final(token_hash, &sha_ctx);
 
-  uint8_t metadata_obfuscator;
-  if (ctx->method->use_token_hash) {
-    metadata_obfuscator =
-        get_metadata_obfuscator(ctx->metadata_key, ctx->metadata_key_len,
-                                token_hash, sizeof(token_hash));
-  } else {
-    metadata_obfuscator =
-        get_metadata_obfuscator(ctx->metadata_key, ctx->metadata_key_len,
-                                CBS_data(&client_data), CBS_len(&client_data));
-  }
+  uint8_t metadata_obfuscator = get_metadata_obfuscator(
+      ctx->metadata_key, ctx->metadata_key_len, token_hash, sizeof(token_hash));
 
   // The SRR is constructed as per the format described in
   // https://docs.google.com/document/d/1TNnya6B8pyomDK2F1R9CL3dY10OAmqWlnCxsWyOBDVQ/edit#heading=h.7mkzvhpqb8l5
 
+  // The V2 protocol is intended to be used with
+  // |TRUST_TOKEN_ISSUER_redeem_raw|. However, we temporarily support it with
+  // |TRUST_TOKEN_ISSUER_redeem| to ease the transition for existing issuer
+  // callers. Those callers' consumers currently expect an expiry-timestamp
+  // field, so we fill in a placeholder value.
+  //
+  // TODO(svaldez): After the existing issues have migrated to
+  // |TRUST_TOKEN_ISSUER_redeem_raw| remove this logic.
+  uint64_t expiry_time = 0;
+  if (ctx->method->has_srr) {
+    expiry_time = redemption_time + lifetime;
+  }
+
   static const char kClientDataLabel[] = "client-data";
   static const char kExpiryTimestampLabel[] = "expiry-timestamp";
   static const char kMetadataLabel[] = "metadata";
@@ -625,10 +736,7 @@
   assert(strlen(kClientDataLabel) < strlen(kExpiryTimestampLabel));
   assert(strlen(kPublicLabel) < strlen(kPrivateLabel));
 
-  size_t map_entries = 3;
-  if (ctx->method->use_token_hash) {
-    map_entries = 4;
-  }
+  size_t map_entries = 4;
 
   if (!CBB_init(&srr, 0) ||
       !add_cbor_map(&srr, map_entries) ||  // SRR map
@@ -637,24 +745,14 @@
       !add_cbor_text(&srr, kPublicLabel, strlen(kPublicLabel)) ||
       !add_cbor_int(&srr, public_metadata) ||
       !add_cbor_text(&srr, kPrivateLabel, strlen(kPrivateLabel)) ||
-      !add_cbor_int(&srr, private_metadata ^ metadata_obfuscator)) {
-    OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_MALLOC_FAILURE);
-    goto err;
-  }
-
-  if (ctx->method->use_token_hash) {
-    if (!add_cbor_text(&srr, kTokenHashLabel, strlen(kTokenHashLabel)) ||
-        !add_cbor_bytes(&srr, token_hash, sizeof(token_hash))) {
-      OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_MALLOC_FAILURE);
-      goto err;
-    }
-  }
-
-  if (!add_cbor_text(&srr, kClientDataLabel, strlen(kClientDataLabel)) ||
+      !add_cbor_int(&srr, private_metadata ^ metadata_obfuscator) ||
+      !add_cbor_text(&srr, kTokenHashLabel, strlen(kTokenHashLabel)) ||
+      !add_cbor_bytes(&srr, token_hash, sizeof(token_hash)) ||
+      !add_cbor_text(&srr, kClientDataLabel, strlen(kClientDataLabel)) ||
       !CBB_add_bytes(&srr, CBS_data(&client_data), CBS_len(&client_data)) ||
       !add_cbor_text(&srr, kExpiryTimestampLabel,
                      strlen(kExpiryTimestampLabel)) ||
-      !add_cbor_int(&srr, redemption_time + lifetime) ||
+      !add_cbor_int(&srr, expiry_time) ||
       !CBB_finish(&srr, &srr_buf, &srr_len)) {
     OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_MALLOC_FAILURE);
     goto err;
@@ -666,16 +764,56 @@
     goto err;
   }
 
-  CBB child;
-  uint8_t *ptr;
-  if (!CBB_add_u16_length_prefixed(&response, &child) ||
-      !CBB_add_bytes(&child, srr_buf, srr_len) ||
-      !CBB_add_u16_length_prefixed(&response, &child) ||
-      !CBB_reserve(&child, &ptr, sig_len) ||
-      !EVP_DigestSign(&md_ctx, ptr, &sig_len, srr_buf, srr_len) ||
-      !CBB_did_write(&child, sig_len)) {
-    OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_MALLOC_FAILURE);
-    goto err;
+  // Merge SRR and Signature into single string.
+  // TODO(svaldez): Expose API to construct this from the caller.
+  if (!ctx->method->has_srr) {
+    static const char kSRRHeader[] = "body=:";
+    static const char kSRRSplit[] = ":, signature=:";
+    static const char kSRREnd[] = ":";
+
+    size_t srr_b64_len, sig_b64_len;
+    if (!EVP_EncodedLength(&srr_b64_len, srr_len) ||
+        !EVP_EncodedLength(&sig_b64_len, sig_len)) {
+      goto err;
+    }
+
+    sig_buf = OPENSSL_malloc(sig_len);
+    uint8_t *srr_b64_buf = OPENSSL_malloc(srr_b64_len);
+    uint8_t *sig_b64_buf = OPENSSL_malloc(sig_b64_len);
+    if (!sig_buf ||
+        !srr_b64_buf ||
+        !sig_b64_buf ||
+        !EVP_DigestSign(&md_ctx, sig_buf, &sig_len, srr_buf, srr_len) ||
+        !CBB_add_bytes(&response, (const uint8_t *)kSRRHeader,
+                       strlen(kSRRHeader)) ||
+        !CBB_add_bytes(&response, srr_b64_buf,
+                       EVP_EncodeBlock(srr_b64_buf, srr_buf, srr_len)) ||
+        !CBB_add_bytes(&response, (const uint8_t *)kSRRSplit,
+                       strlen(kSRRSplit)) ||
+        !CBB_add_bytes(&response, sig_b64_buf,
+                       EVP_EncodeBlock(sig_b64_buf, sig_buf, sig_len)) ||
+        !CBB_add_bytes(&response, (const uint8_t *)kSRREnd, strlen(kSRREnd))) {
+      OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_MALLOC_FAILURE);
+      OPENSSL_free(srr_b64_buf);
+      OPENSSL_free(sig_b64_buf);
+      goto err;
+    }
+
+    OPENSSL_free(srr_b64_buf);
+    OPENSSL_free(sig_b64_buf);
+  } else {
+    CBB child;
+    uint8_t *ptr;
+    if (!CBB_add_u16_length_prefixed(&response, &child) ||
+        !CBB_add_bytes(&child, srr_buf, srr_len) ||
+        !CBB_add_u16_length_prefixed(&response, &child) ||
+        !CBB_reserve(&child, &ptr, sig_len) ||
+        !EVP_DigestSign(&md_ctx, ptr, &sig_len, srr_buf, srr_len) ||
+        !CBB_did_write(&child, sig_len) ||
+        !CBB_flush(&response)) {
+      OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_MALLOC_FAILURE);
+      goto err;
+    }
   }
 
   if (!CBS_stow(&client_data, &client_data_buf, &client_data_len) ||
@@ -684,7 +822,7 @@
     goto err;
   }
 
-  TRUST_TOKEN *token = TRUST_TOKEN_new(nonce, PMBTOKEN_NONCE_SIZE);
+  TRUST_TOKEN *token = TRUST_TOKEN_new(nonce, TRUST_TOKEN_NONCE_SIZE);
   if (token == NULL) {
     OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_MALLOC_FAILURE);
     goto err;
diff --git a/deps/boringssl/src/crypto/trust_token/trust_token_test.cc b/deps/boringssl/src/crypto/trust_token/trust_token_test.cc
index 41bf55d..f9f183d 100644
--- a/deps/boringssl/src/crypto/trust_token/trust_token_test.cc
+++ b/deps/boringssl/src/crypto/trust_token/trust_token_test.cc
@@ -44,18 +44,6 @@
 
 namespace {
 
-TEST(TrustTokenTest, KeyGenExp0) {
-  uint8_t priv_key[TRUST_TOKEN_MAX_PRIVATE_KEY_SIZE];
-  uint8_t pub_key[TRUST_TOKEN_MAX_PUBLIC_KEY_SIZE];
-  size_t priv_key_len, pub_key_len;
-  ASSERT_TRUE(TRUST_TOKEN_generate_key(
-      TRUST_TOKEN_experiment_v0(), priv_key, &priv_key_len,
-      TRUST_TOKEN_MAX_PRIVATE_KEY_SIZE, pub_key, &pub_key_len,
-      TRUST_TOKEN_MAX_PUBLIC_KEY_SIZE, 0x0001));
-  ASSERT_EQ(400u, priv_key_len);
-  ASSERT_EQ(409u, pub_key_len);
-}
-
 TEST(TrustTokenTest, KeyGenExp1) {
   uint8_t priv_key[TRUST_TOKEN_MAX_PRIVATE_KEY_SIZE];
   uint8_t pub_key[TRUST_TOKEN_MAX_PUBLIC_KEY_SIZE];
@@ -68,6 +56,30 @@
   ASSERT_EQ(301u, pub_key_len);
 }
 
+TEST(TrustTokenTest, KeyGenExp2VOPRF) {
+  uint8_t priv_key[TRUST_TOKEN_MAX_PRIVATE_KEY_SIZE];
+  uint8_t pub_key[TRUST_TOKEN_MAX_PUBLIC_KEY_SIZE];
+  size_t priv_key_len, pub_key_len;
+  ASSERT_TRUE(TRUST_TOKEN_generate_key(
+      TRUST_TOKEN_experiment_v2_voprf(), priv_key, &priv_key_len,
+      TRUST_TOKEN_MAX_PRIVATE_KEY_SIZE, pub_key, &pub_key_len,
+      TRUST_TOKEN_MAX_PUBLIC_KEY_SIZE, 0x0001));
+  ASSERT_EQ(52u, priv_key_len);
+  ASSERT_EQ(101u, pub_key_len);
+}
+
+TEST(TrustTokenTest, KeyGenExp2PMB) {
+  uint8_t priv_key[TRUST_TOKEN_MAX_PRIVATE_KEY_SIZE];
+  uint8_t pub_key[TRUST_TOKEN_MAX_PUBLIC_KEY_SIZE];
+  size_t priv_key_len, pub_key_len;
+  ASSERT_TRUE(TRUST_TOKEN_generate_key(
+      TRUST_TOKEN_experiment_v2_pmb(), priv_key, &priv_key_len,
+      TRUST_TOKEN_MAX_PRIVATE_KEY_SIZE, pub_key, &pub_key_len,
+      TRUST_TOKEN_MAX_PUBLIC_KEY_SIZE, 0x0001));
+  ASSERT_EQ(292u, priv_key_len);
+  ASSERT_EQ(295u, pub_key_len);
+}
+
 // Test that H in |TRUST_TOKEN_experiment_v1| was computed correctly.
 TEST(TrustTokenTest, HExp1) {
   const EC_GROUP *group = EC_GROUP_new_by_curve_name(NID_secp384r1);
@@ -90,14 +102,40 @@
   EXPECT_EQ(Bytes(h), Bytes(expected_bytes, expected_len));
 }
 
+// Test that H in |TRUST_TOKEN_experiment_v2_pmb| was computed correctly.
+TEST(TrustTokenTest, HExp2) {
+  const EC_GROUP *group = EC_GROUP_new_by_curve_name(NID_secp384r1);
+  ASSERT_TRUE(group);
+
+  const uint8_t kHGen[] = "generator";
+  const uint8_t kHLabel[] = "PMBTokens Experiment V2 HashH";
+
+  bssl::UniquePtr<EC_POINT> expected_h(EC_POINT_new(group));
+  ASSERT_TRUE(expected_h);
+  ASSERT_TRUE(ec_hash_to_curve_p384_xmd_sha512_sswu_draft07(
+      group, &expected_h->raw, kHLabel, sizeof(kHLabel), kHGen, sizeof(kHGen)));
+  uint8_t expected_bytes[1 + 2 * EC_MAX_BYTES];
+  size_t expected_len =
+      EC_POINT_point2oct(group, expected_h.get(), POINT_CONVERSION_UNCOMPRESSED,
+                         expected_bytes, sizeof(expected_bytes), nullptr);
+
+  uint8_t h[97];
+  ASSERT_TRUE(pmbtoken_exp2_get_h_for_testing(h));
+  EXPECT_EQ(Bytes(h), Bytes(expected_bytes, expected_len));
+}
+
 static std::vector<const TRUST_TOKEN_METHOD *> AllMethods() {
-  return {TRUST_TOKEN_experiment_v0(), TRUST_TOKEN_experiment_v1()};
+  return {
+    TRUST_TOKEN_experiment_v1(),
+    TRUST_TOKEN_experiment_v2_voprf(),
+    TRUST_TOKEN_experiment_v2_pmb()
+  };
 }
 
 class TrustTokenProtocolTestBase : public ::testing::Test {
  public:
-  explicit TrustTokenProtocolTestBase(const TRUST_TOKEN_METHOD *method)
-      : method_(method) {}
+  explicit TrustTokenProtocolTestBase(const TRUST_TOKEN_METHOD *method_arg)
+      : method_(method_arg) {}
 
   // KeyID returns the key ID associated with key index |i|.
   static uint32_t KeyID(size_t i) {
@@ -114,7 +152,7 @@
     issuer.reset(TRUST_TOKEN_ISSUER_new(method(), issuer_max_batchsize));
     ASSERT_TRUE(issuer);
 
-    for (size_t i = 0; i < 3; i++) {
+    for (size_t i = 0; i < method()->max_keys; i++) {
       uint8_t priv_key[TRUST_TOKEN_MAX_PRIVATE_KEY_SIZE];
       uint8_t pub_key[TRUST_TOKEN_MAX_PUBLIC_KEY_SIZE];
       size_t priv_key_len, pub_key_len, key_index;
@@ -175,7 +213,7 @@
   bssl::UniquePtr<uint8_t> free_issue_msg(issue_msg);
   ASSERT_TRUE(TRUST_TOKEN_ISSUER_issue(
       issuer.get(), &issue_resp, &resp_len, &tokens_issued, issue_msg, msg_len,
-      /*public_metadata=*/KeyID(0), /*private_metadata=*/1,
+      /*public_metadata=*/KeyID(0), /*private_metadata=*/0,
       /*max_issuance=*/10));
   bssl::UniquePtr<uint8_t> free_msg(issue_resp);
   bssl::UniquePtr<STACK_OF(TRUST_TOKEN)> tokens(
@@ -287,7 +325,7 @@
 
   for (TRUST_TOKEN *token : tokens.get()) {
     const uint8_t kClientData[] = "\x70TEST CLIENT DATA";
-    uint64_t kRedemptionTime = 13374242;
+    uint64_t kRedemptionTime = (method()->has_srr ? 13374242 : 0);
 
     uint8_t *redeem_msg = NULL, *redeem_resp = NULL;
     ASSERT_TRUE(TRUST_TOKEN_CLIENT_begin_redemption(
@@ -328,7 +366,7 @@
 
   for (TRUST_TOKEN *token : tokens.get()) {
     const uint8_t kClientData[] = "\x70TEST CLIENT DATA";
-    uint64_t kRedemptionTime = 13374242;
+    uint64_t kRedemptionTime = 0;
 
     uint8_t *redeem_msg = NULL, *redeem_resp = NULL;
     ASSERT_TRUE(TRUST_TOKEN_CLIENT_begin_redemption(
@@ -351,10 +389,14 @@
               Bytes(client_data, client_data_len));
     resp_len = 10;
 
+    // If the protocol doesn't use SRRs, TRUST_TOKEN_CLIENT_finish_redemtpion
+    // leaves all SRR validation to the caller.
     uint8_t *srr = NULL, *sig = NULL;
     size_t srr_len, sig_len;
-    ASSERT_FALSE(TRUST_TOKEN_CLIENT_finish_redemption(
-        client.get(), &srr, &srr_len, &sig, &sig_len, redeem_resp, resp_len));
+    bool expect_failure = !method()->has_srr;
+    ASSERT_EQ(expect_failure, TRUST_TOKEN_CLIENT_finish_redemption(
+                                  client.get(), &srr, &srr_len, &sig, &sig_len,
+                                  redeem_resp, resp_len));
     bssl::UniquePtr<uint8_t> free_srr(srr);
     bssl::UniquePtr<uint8_t> free_sig(sig);
   }
@@ -440,9 +482,14 @@
                                                 &msg_len, 10));
   bssl::UniquePtr<uint8_t> free_issue_msg(issue_msg);
   size_t tokens_issued;
-  ASSERT_TRUE(TRUST_TOKEN_ISSUER_issue(
+  bool result = TRUST_TOKEN_ISSUER_issue(
       issuer.get(), &issue_resp, &resp_len, &tokens_issued, issue_msg, msg_len,
-      public_metadata(), private_metadata(), /*max_issuance=*/1));
+      public_metadata(), private_metadata(), /*max_issuance=*/1);
+  if (!method()->has_private_metadata && private_metadata()) {
+    ASSERT_FALSE(result);
+    return;
+  }
+  ASSERT_TRUE(result);
   bssl::UniquePtr<uint8_t> free_msg(issue_resp);
   size_t key_index;
   bssl::UniquePtr<STACK_OF(TRUST_TOKEN)> tokens(
@@ -452,16 +499,9 @@
 
   for (TRUST_TOKEN *token : tokens.get()) {
     const uint8_t kClientData[] = "\x70TEST CLIENT DATA";
-    uint64_t kRedemptionTime = 13374242;
+    uint64_t kRedemptionTime = (method()->has_srr ? 13374242 : 0);
 
-    const uint8_t kExpectedSRRNoTokenHash[] =
-        "\xa3\x68\x6d\x65\x74\x61\x64\x61\x74\x61\xa2\x66\x70\x75\x62\x6c\x69"
-        "\x63\x00\x67\x70\x72\x69\x76\x61\x74\x65\x00\x6b\x63\x6c\x69\x65\x6e"
-        "\x74\x2d\x64\x61\x74\x61\x70\x54\x45\x53\x54\x20\x43\x4c\x49\x45\x4e"
-        "\x54\x20\x44\x41\x54\x41\x70\x65\x78\x70\x69\x72\x79\x2d\x74\x69\x6d"
-        "\x65\x73\x74\x61\x6d\x70\x1a\x00\xcc\x15\x7a";
-
-    const uint8_t kExpectedSRRTokenHash[] =
+    const uint8_t kExpectedSRRV1[] =
         "\xa4\x68\x6d\x65\x74\x61\x64\x61\x74\x61\xa2\x66\x70\x75\x62\x6c\x69"
         "\x63\x00\x67\x70\x72\x69\x76\x61\x74\x65\x00\x6a\x74\x6f\x6b\x65\x6e"
         "\x2d\x68\x61\x73\x68\x58\x20\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
@@ -471,6 +511,23 @@
         "\x70\x65\x78\x70\x69\x72\x79\x2d\x74\x69\x6d\x65\x73\x74\x61\x6d\x70"
         "\x1a\x00\xcc\x15\x7a";
 
+    const uint8_t kExpectedSRRV2[] =
+        "\xa4\x68\x6d\x65\x74\x61\x64\x61\x74\x61\xa2\x66\x70\x75\x62\x6c\x69"
+        "\x63\x00\x67\x70\x72\x69\x76\x61\x74\x65\x00\x6a\x74\x6f\x6b\x65\x6e"
+        "\x2d\x68\x61\x73\x68\x58\x20\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
+        "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
+        "\x00\x00\x00\x00\x00\x6b\x63\x6c\x69\x65\x6e\x74\x2d\x64\x61\x74\x61"
+        "\x70\x54\x45\x53\x54\x20\x43\x4c\x49\x45\x4e\x54\x20\x44\x41\x54\x41"
+        "\x70\x65\x78\x70\x69\x72\x79\x2d\x74\x69\x6d\x65\x73\x74\x61\x6d\x70"
+        "\x00";
+
+    const uint8_t *expected_srr = kExpectedSRRV1;
+    size_t expected_srr_len = sizeof(kExpectedSRRV1) - 1;
+    if (!method()->has_srr) {
+      expected_srr = kExpectedSRRV2;
+      expected_srr_len = sizeof(kExpectedSRRV2) - 1;
+    }
+
     uint8_t *redeem_msg = NULL, *redeem_resp = NULL;
     ASSERT_TRUE(TRUST_TOKEN_CLIENT_begin_redemption(
         client.get(), &redeem_msg, &msg_len, token, kClientData,
@@ -498,54 +555,113 @@
     bssl::UniquePtr<uint8_t> free_srr(srr);
     bssl::UniquePtr<uint8_t> free_sig(sig);
 
-    if (method()->use_token_hash) {
-      const uint8_t kTokenHashDSTLabel[] = "TrustTokenV0 TokenHash";
-      uint8_t token_hash[SHA256_DIGEST_LENGTH];
-      SHA256_CTX sha_ctx;
-      SHA256_Init(&sha_ctx);
-      SHA256_Update(&sha_ctx, kTokenHashDSTLabel, sizeof(kTokenHashDSTLabel));
-      SHA256_Update(&sha_ctx, token->data, token->len);
-      SHA256_Final(token_hash, &sha_ctx);
+    if (!method()->has_srr) {
+      size_t b64_len;
+      ASSERT_TRUE(EVP_EncodedLength(&b64_len, expected_srr_len));
+      b64_len -= 1;
+      const char kSRRHeader[] = "body=:";
+      ASSERT_LT(sizeof(kSRRHeader) - 1 + b64_len, srr_len);
 
-      // Check the token hash is in the SRR.
-      ASSERT_EQ(Bytes(token_hash), Bytes(srr + 41, sizeof(token_hash)));
-
-      uint8_t decode_private_metadata;
-      ASSERT_TRUE(TRUST_TOKEN_decode_private_metadata(
-          method(), &decode_private_metadata, metadata_key,
-          sizeof(metadata_key), token_hash, sizeof(token_hash), srr[27]));
-      ASSERT_EQ(srr[18], public_metadata());
-      ASSERT_EQ(decode_private_metadata, private_metadata());
-
-      // Clear out the metadata bits.
-      srr[18] = 0;
-      srr[27] = 0;
-
-      // Clear out the token hash.
-      OPENSSL_memset(srr + 41, 0, sizeof(token_hash));
-
-      ASSERT_EQ(Bytes(kExpectedSRRTokenHash, sizeof(kExpectedSRRTokenHash) - 1),
-                Bytes(srr, srr_len));
-    } else {
-      uint8_t decode_private_metadata;
-      ASSERT_TRUE(TRUST_TOKEN_decode_private_metadata(
-          method(), &decode_private_metadata, metadata_key,
-          sizeof(metadata_key), kClientData, sizeof(kClientData) - 1, srr[27]));
-      ASSERT_EQ(srr[18], public_metadata());
-      ASSERT_EQ(decode_private_metadata, private_metadata());
-
-      // Clear out the metadata bits.
-      srr[18] = 0;
-      srr[27] = 0;
-
-      ASSERT_EQ(
-          Bytes(kExpectedSRRNoTokenHash, sizeof(kExpectedSRRNoTokenHash) - 1),
-          Bytes(srr, srr_len));
+      ASSERT_EQ(Bytes(kSRRHeader, sizeof(kSRRHeader) - 1),
+                Bytes(srr, sizeof(kSRRHeader) - 1));
+      uint8_t *decoded_srr =
+          (uint8_t *)OPENSSL_malloc(expected_srr_len + 2);
+      ASSERT_TRUE(decoded_srr);
+      ASSERT_LE(
+          int(expected_srr_len),
+          EVP_DecodeBlock(decoded_srr, srr + sizeof(kSRRHeader) - 1, b64_len));
+      srr = decoded_srr;
+      srr_len = expected_srr_len;
+      free_srr.reset(srr);
     }
+
+    const uint8_t kTokenHashDSTLabel[] = "TrustTokenV0 TokenHash";
+    uint8_t token_hash[SHA256_DIGEST_LENGTH];
+    SHA256_CTX sha_ctx;
+    SHA256_Init(&sha_ctx);
+    SHA256_Update(&sha_ctx, kTokenHashDSTLabel, sizeof(kTokenHashDSTLabel));
+    SHA256_Update(&sha_ctx, token->data, token->len);
+    SHA256_Final(token_hash, &sha_ctx);
+
+    // Check the token hash is in the SRR.
+    ASSERT_EQ(Bytes(token_hash), Bytes(srr + 41, sizeof(token_hash)));
+
+    uint8_t decode_private_metadata;
+    ASSERT_TRUE(TRUST_TOKEN_decode_private_metadata(
+        method(), &decode_private_metadata, metadata_key,
+        sizeof(metadata_key), token_hash, sizeof(token_hash), srr[27]));
+    ASSERT_EQ(srr[18], public_metadata());
+    ASSERT_EQ(decode_private_metadata, private_metadata());
+
+    // Clear out the metadata bits.
+    srr[18] = 0;
+    srr[27] = 0;
+
+    // Clear out the token hash.
+    OPENSSL_memset(srr + 41, 0, sizeof(token_hash));
+
+    ASSERT_EQ(Bytes(expected_srr, expected_srr_len),
+              Bytes(srr, srr_len));
+  }
+}
+
+TEST_P(TrustTokenMetadataTest, RawSetAndGetMetadata) {
+  ASSERT_NO_FATAL_FAILURE(SetupContexts());
+
+  uint8_t *issue_msg = NULL, *issue_resp = NULL;
+  size_t msg_len, resp_len;
+  ASSERT_TRUE(TRUST_TOKEN_CLIENT_begin_issuance(client.get(), &issue_msg,
+                                                &msg_len, 10));
+  bssl::UniquePtr<uint8_t> free_issue_msg(issue_msg);
+  size_t tokens_issued;
+  bool result = TRUST_TOKEN_ISSUER_issue(
+      issuer.get(), &issue_resp, &resp_len, &tokens_issued, issue_msg, msg_len,
+      public_metadata(), private_metadata(), /*max_issuance=*/1);
+  if (!method()->has_private_metadata && private_metadata()) {
+    ASSERT_FALSE(result);
+    return;
+  }
+  ASSERT_TRUE(result);
+  bssl::UniquePtr<uint8_t> free_msg(issue_resp);
+  size_t key_index;
+  bssl::UniquePtr<STACK_OF(TRUST_TOKEN)> tokens(
+      TRUST_TOKEN_CLIENT_finish_issuance(client.get(), &key_index, issue_resp,
+                                         resp_len));
+  ASSERT_TRUE(tokens);
+  EXPECT_EQ(1u, sk_TRUST_TOKEN_num(tokens.get()));
+
+  for (TRUST_TOKEN *token : tokens.get()) {
+    const uint8_t kClientData[] = "\x70TEST CLIENT DATA";
+    uint64_t kRedemptionTime = (method()->has_srr ? 13374242 : 0);
+
+    uint8_t *redeem_msg = NULL;
+    ASSERT_TRUE(TRUST_TOKEN_CLIENT_begin_redemption(
+        client.get(), &redeem_msg, &msg_len, token, kClientData,
+        sizeof(kClientData) - 1, kRedemptionTime));
+    bssl::UniquePtr<uint8_t> free_redeem_msg(redeem_msg);
+    uint32_t public_value;
+    uint8_t private_value;
+    TRUST_TOKEN *rtoken;
+    uint8_t *client_data;
+    size_t client_data_len;
+    ASSERT_TRUE(TRUST_TOKEN_ISSUER_redeem_raw(
+        issuer.get(), &public_value, &private_value, &rtoken,
+        &client_data, &client_data_len, redeem_msg, msg_len));
+    bssl::UniquePtr<uint8_t> free_client_data(client_data);
+    bssl::UniquePtr<TRUST_TOKEN> free_rtoken(rtoken);
+
+    ASSERT_EQ(Bytes(kClientData, sizeof(kClientData) - 1),
+              Bytes(client_data, client_data_len));
+    ASSERT_EQ(public_value, static_cast<uint32_t>(public_metadata()));
+    ASSERT_EQ(private_value, private_metadata());
   }
 }
 
 TEST_P(TrustTokenMetadataTest, TooManyRequests) {
+  if (!method()->has_private_metadata && private_metadata()) {
+    return;
+  }
+
   issuer_max_batchsize = 1;
   ASSERT_NO_FATAL_FAILURE(SetupContexts());
 
@@ -570,6 +686,10 @@
 
 
 TEST_P(TrustTokenMetadataTest, TruncatedProof) {
+  if (!method()->has_private_metadata && private_metadata()) {
+    return;
+  }
+
   ASSERT_NO_FATAL_FAILURE(SetupContexts());
 
   uint8_t *issue_msg = NULL, *issue_resp = NULL;
@@ -586,44 +706,35 @@
   CBS real_response;
   CBS_init(&real_response, issue_resp, resp_len);
   uint16_t count;
-  uint32_t public_metadata;
+  uint32_t parsed_public_metadata;
   bssl::ScopedCBB bad_response;
   ASSERT_TRUE(CBB_init(bad_response.get(), 0));
   ASSERT_TRUE(CBS_get_u16(&real_response, &count));
   ASSERT_TRUE(CBB_add_u16(bad_response.get(), count));
-  ASSERT_TRUE(CBS_get_u32(&real_response, &public_metadata));
-  ASSERT_TRUE(CBB_add_u32(bad_response.get(), public_metadata));
+  ASSERT_TRUE(CBS_get_u32(&real_response, &parsed_public_metadata));
+  ASSERT_TRUE(CBB_add_u32(bad_response.get(), parsed_public_metadata));
 
+  const EC_GROUP *group = EC_GROUP_new_by_curve_name(NID_secp384r1);
+  size_t token_length =
+      TRUST_TOKEN_NONCE_SIZE + 2 * (1 + 2 * BN_num_bytes(&group->field));
+  if (method() == TRUST_TOKEN_experiment_v1()) {
+    token_length += 4;
+  }
+  if (method() == TRUST_TOKEN_experiment_v2_voprf()) {
+    token_length = 1 + 2 * BN_num_bytes(&group->field);
+  }
   for (size_t i = 0; i < count; i++) {
-    uint8_t s[PMBTOKEN_NONCE_SIZE];
-    CBS tmp;
-    ASSERT_TRUE(CBS_copy_bytes(&real_response, s, PMBTOKEN_NONCE_SIZE));
-    ASSERT_TRUE(CBB_add_bytes(bad_response.get(), s, PMBTOKEN_NONCE_SIZE));
-    ASSERT_TRUE(CBS_get_u16_length_prefixed(&real_response, &tmp));
-    ASSERT_TRUE(CBB_add_u16(bad_response.get(), CBS_len(&tmp)));
-    ASSERT_TRUE(
-        CBB_add_bytes(bad_response.get(), CBS_data(&tmp), CBS_len(&tmp)));
-    ASSERT_TRUE(CBS_get_u16_length_prefixed(&real_response, &tmp));
-    ASSERT_TRUE(CBB_add_u16(bad_response.get(), CBS_len(&tmp)));
-    ASSERT_TRUE(
-        CBB_add_bytes(bad_response.get(), CBS_data(&tmp), CBS_len(&tmp)));
-    if (!method()->batched_proof) {
-      ASSERT_TRUE(CBS_get_u16_length_prefixed(&real_response, &tmp));
-      CBB dleq;
-      ASSERT_TRUE(CBB_add_u16_length_prefixed(bad_response.get(), &dleq));
-      ASSERT_TRUE(CBB_add_bytes(&dleq, CBS_data(&tmp), CBS_len(&tmp) - 2));
-      ASSERT_TRUE(CBB_flush(bad_response.get()));
-    }
+    ASSERT_TRUE(CBB_add_bytes(bad_response.get(), CBS_data(&real_response),
+                              token_length));
+    ASSERT_TRUE(CBS_skip(&real_response, token_length));
   }
 
-  if (method()->batched_proof) {
-    CBS tmp;
-    ASSERT_TRUE(CBS_get_u16_length_prefixed(&real_response, &tmp));
-    CBB dleq;
-    ASSERT_TRUE(CBB_add_u16_length_prefixed(bad_response.get(), &dleq));
-    ASSERT_TRUE(CBB_add_bytes(&dleq, CBS_data(&tmp), CBS_len(&tmp) - 2));
-    ASSERT_TRUE(CBB_flush(bad_response.get()));
-  }
+  CBS tmp;
+  ASSERT_TRUE(CBS_get_u16_length_prefixed(&real_response, &tmp));
+  CBB dleq;
+  ASSERT_TRUE(CBB_add_u16_length_prefixed(bad_response.get(), &dleq));
+  ASSERT_TRUE(CBB_add_bytes(&dleq, CBS_data(&tmp), CBS_len(&tmp) - 2));
+  ASSERT_TRUE(CBB_flush(bad_response.get()));
 
   uint8_t *bad_buf;
   size_t bad_len;
@@ -638,6 +749,10 @@
 }
 
 TEST_P(TrustTokenMetadataTest, ExcessDataProof) {
+  if (!method()->has_private_metadata && private_metadata()) {
+    return;
+  }
+
   ASSERT_NO_FATAL_FAILURE(SetupContexts());
 
   uint8_t *issue_msg = NULL, *issue_resp = NULL;
@@ -654,46 +769,36 @@
   CBS real_response;
   CBS_init(&real_response, issue_resp, resp_len);
   uint16_t count;
-  uint32_t public_metadata;
+  uint32_t parsed_public_metadata;
   bssl::ScopedCBB bad_response;
   ASSERT_TRUE(CBB_init(bad_response.get(), 0));
   ASSERT_TRUE(CBS_get_u16(&real_response, &count));
   ASSERT_TRUE(CBB_add_u16(bad_response.get(), count));
-  ASSERT_TRUE(CBS_get_u32(&real_response, &public_metadata));
-  ASSERT_TRUE(CBB_add_u32(bad_response.get(), public_metadata));
+  ASSERT_TRUE(CBS_get_u32(&real_response, &parsed_public_metadata));
+  ASSERT_TRUE(CBB_add_u32(bad_response.get(), parsed_public_metadata));
 
+  const EC_GROUP *group = EC_GROUP_new_by_curve_name(NID_secp384r1);
+  size_t token_length =
+      TRUST_TOKEN_NONCE_SIZE + 2 * (1 + 2 * BN_num_bytes(&group->field));
+  if (method() == TRUST_TOKEN_experiment_v1()) {
+    token_length += 4;
+  }
+  if (method() == TRUST_TOKEN_experiment_v2_voprf()) {
+    token_length = 1 + 2 * BN_num_bytes(&group->field);
+  }
   for (size_t i = 0; i < count; i++) {
-    uint8_t s[PMBTOKEN_NONCE_SIZE];
-    CBS tmp;
-    ASSERT_TRUE(CBS_copy_bytes(&real_response, s, PMBTOKEN_NONCE_SIZE));
-    ASSERT_TRUE(CBB_add_bytes(bad_response.get(), s, PMBTOKEN_NONCE_SIZE));
-    ASSERT_TRUE(CBS_get_u16_length_prefixed(&real_response, &tmp));
-    ASSERT_TRUE(CBB_add_u16(bad_response.get(), CBS_len(&tmp)));
-    ASSERT_TRUE(
-        CBB_add_bytes(bad_response.get(), CBS_data(&tmp), CBS_len(&tmp)));
-    ASSERT_TRUE(CBS_get_u16_length_prefixed(&real_response, &tmp));
-    ASSERT_TRUE(CBB_add_u16(bad_response.get(), CBS_len(&tmp)));
-    ASSERT_TRUE(
-        CBB_add_bytes(bad_response.get(), CBS_data(&tmp), CBS_len(&tmp)));
-    if (!method()->batched_proof) {
-      ASSERT_TRUE(CBS_get_u16_length_prefixed(&real_response, &tmp));
-      CBB dleq;
-      ASSERT_TRUE(CBB_add_u16_length_prefixed(bad_response.get(), &dleq));
-      ASSERT_TRUE(CBB_add_bytes(&dleq, CBS_data(&tmp), CBS_len(&tmp)));
-      ASSERT_TRUE(CBB_add_u16(&dleq, 42));
-      ASSERT_TRUE(CBB_flush(bad_response.get()));
-    }
+    ASSERT_TRUE(CBB_add_bytes(bad_response.get(), CBS_data(&real_response),
+                              token_length));
+    ASSERT_TRUE(CBS_skip(&real_response, token_length));
   }
 
-  if (method()->batched_proof) {
-    CBS tmp;
-    ASSERT_TRUE(CBS_get_u16_length_prefixed(&real_response, &tmp));
-    CBB dleq;
-    ASSERT_TRUE(CBB_add_u16_length_prefixed(bad_response.get(), &dleq));
-    ASSERT_TRUE(CBB_add_bytes(&dleq, CBS_data(&tmp), CBS_len(&tmp)));
-    ASSERT_TRUE(CBB_add_u16(&dleq, 42));
-    ASSERT_TRUE(CBB_flush(bad_response.get()));
-  }
+  CBS tmp;
+  ASSERT_TRUE(CBS_get_u16_length_prefixed(&real_response, &tmp));
+  CBB dleq;
+  ASSERT_TRUE(CBB_add_u16_length_prefixed(bad_response.get(), &dleq));
+  ASSERT_TRUE(CBB_add_bytes(&dleq, CBS_data(&tmp), CBS_len(&tmp)));
+  ASSERT_TRUE(CBB_add_u16(&dleq, 42));
+  ASSERT_TRUE(CBB_flush(bad_response.get()));
 
   uint8_t *bad_buf;
   size_t bad_len;
@@ -728,6 +833,14 @@
 };
 
 TEST_P(TrustTokenBadKeyTest, BadKey) {
+  // For versions without private metadata, only corruptions of 'xs' (the 4th
+  // entry in |scalars| below) result in a bad key, as the other scalars are
+  // unused internally.
+  if (!method()->has_private_metadata &&
+      (private_metadata() || corrupted_key() != 4)) {
+    return;
+  }
+
   ASSERT_NO_FATAL_FAILURE(SetupContexts());
 
   uint8_t *issue_msg = NULL, *issue_resp = NULL;
diff --git a/deps/boringssl/src/crypto/trust_token/voprf.c b/deps/boringssl/src/crypto/trust_token/voprf.c
new file mode 100644
index 0000000..f93ee9c
--- /dev/null
+++ b/deps/boringssl/src/crypto/trust_token/voprf.c
@@ -0,0 +1,766 @@
+/* Copyright (c) 2020, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#include <openssl/trust_token.h>
+
+#include <openssl/bn.h>
+#include <openssl/bytestring.h>
+#include <openssl/ec.h>
+#include <openssl/err.h>
+#include <openssl/mem.h>
+#include <openssl/nid.h>
+#include <openssl/rand.h>
+
+#include "../ec_extra/internal.h"
+#include "../fipsmodule/ec/internal.h"
+
+#include "internal.h"
+
+
+typedef int (*hash_to_group_func_t)(const EC_GROUP *group, EC_RAW_POINT *out,
+                                    const uint8_t t[TRUST_TOKEN_NONCE_SIZE]);
+typedef int (*hash_to_scalar_func_t)(const EC_GROUP *group, EC_SCALAR *out,
+                                     uint8_t *buf, size_t len);
+
+typedef struct {
+  const EC_GROUP *group;
+
+  // hash_to_group implements the HashToGroup operation for VOPRFs. It returns
+  // one on success and zero on error.
+  hash_to_group_func_t hash_to_group;
+  // hash_to_scalar implements the HashToScalar operation for VOPRFs. It returns
+  // one on success and zero on error.
+  hash_to_scalar_func_t hash_to_scalar;
+} VOPRF_METHOD;
+
+static const uint8_t kDefaultAdditionalData[32] = {0};
+
+static int voprf_init_method(VOPRF_METHOD *method, int curve_nid,
+                             hash_to_group_func_t hash_to_group,
+                             hash_to_scalar_func_t hash_to_scalar) {
+  method->group = EC_GROUP_new_by_curve_name(curve_nid);
+  if (method->group == NULL) {
+    return 0;
+  }
+
+  method->hash_to_group = hash_to_group;
+  method->hash_to_scalar = hash_to_scalar;
+
+  return 1;
+}
+
+static int cbb_add_point(CBB *out, const EC_GROUP *group,
+                         const EC_AFFINE *point) {
+  size_t len =
+      ec_point_to_bytes(group, point, POINT_CONVERSION_UNCOMPRESSED, NULL, 0);
+  if (len == 0) {
+    return 0;
+  }
+
+  uint8_t *p;
+  return CBB_add_space(out, &p, len) &&
+         ec_point_to_bytes(group, point, POINT_CONVERSION_UNCOMPRESSED, p,
+                           len) == len &&
+         CBB_flush(out);
+}
+
+static int cbs_get_point(CBS *cbs, const EC_GROUP *group, EC_AFFINE *out) {
+  CBS child;
+  size_t plen = 1 + 2 * BN_num_bytes(&group->field);
+  if (!CBS_get_bytes(cbs, &child, plen) ||
+      !ec_point_from_uncompressed(group, out, CBS_data(&child),
+                                  CBS_len(&child))) {
+    return 0;
+  }
+  return 1;
+}
+
+static int scalar_to_cbb(CBB *out, const EC_GROUP *group,
+                         const EC_SCALAR *scalar) {
+  uint8_t *buf;
+  size_t scalar_len = BN_num_bytes(&group->order);
+  if (!CBB_add_space(out, &buf, scalar_len)) {
+    OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_MALLOC_FAILURE);
+    return 0;
+  }
+  ec_scalar_to_bytes(group, buf, &scalar_len, scalar);
+  return 1;
+}
+
+static int scalar_from_cbs(CBS *cbs, const EC_GROUP *group, EC_SCALAR *out) {
+  size_t scalar_len = BN_num_bytes(&group->order);
+  CBS tmp;
+  if (!CBS_get_bytes(cbs, &tmp, scalar_len)) {
+    OPENSSL_PUT_ERROR(TRUST_TOKEN, TRUST_TOKEN_R_DECODE_FAILURE);
+    return 0;
+  }
+
+  ec_scalar_from_bytes(group, out, CBS_data(&tmp), CBS_len(&tmp));
+  return 1;
+}
+
+static int voprf_generate_key(const VOPRF_METHOD *method, CBB *out_private,
+                              CBB *out_public) {
+  const EC_GROUP *group = method->group;
+  EC_RAW_POINT pub;
+  EC_SCALAR priv;
+  EC_AFFINE pub_affine;
+  if (!ec_random_nonzero_scalar(group, &priv, kDefaultAdditionalData) ||
+      !ec_point_mul_scalar_base(group, &pub, &priv) ||
+      !ec_jacobian_to_affine(group, &pub_affine, &pub)) {
+    OPENSSL_PUT_ERROR(TRUST_TOKEN, TRUST_TOKEN_R_KEYGEN_FAILURE);
+    return 0;
+  }
+
+  if (!scalar_to_cbb(out_private, group, &priv) ||
+      !cbb_add_point(out_public, group, &pub_affine)) {
+    OPENSSL_PUT_ERROR(TRUST_TOKEN, TRUST_TOKEN_R_BUFFER_TOO_SMALL);
+    return 0;
+  }
+
+  return 1;
+}
+
+static int voprf_client_key_from_bytes(const VOPRF_METHOD *method,
+                                       TRUST_TOKEN_CLIENT_KEY *key,
+                                       const uint8_t *in, size_t len) {
+  const EC_GROUP *group = method->group;
+  if (!ec_point_from_uncompressed(group, &key->pubs, in, len)) {
+    OPENSSL_PUT_ERROR(TRUST_TOKEN, TRUST_TOKEN_R_DECODE_FAILURE);
+    return 0;
+  }
+
+  return 1;
+}
+
+static int voprf_issuer_key_from_bytes(const VOPRF_METHOD *method,
+                                       TRUST_TOKEN_ISSUER_KEY *key,
+                                       const uint8_t *in, size_t len) {
+  const EC_GROUP *group = method->group;
+  if (!ec_scalar_from_bytes(group, &key->xs, in, len)) {
+    OPENSSL_PUT_ERROR(TRUST_TOKEN, TRUST_TOKEN_R_DECODE_FAILURE);
+    return 0;
+  }
+
+  // Recompute the public key.
+  EC_RAW_POINT pub;
+  if (!ec_point_mul_scalar_base(group, &pub, &key->xs) ||
+      !ec_jacobian_to_affine(group, &key->pubs, &pub)) {
+    return 0;
+  }
+
+  return 1;
+}
+
+static STACK_OF(TRUST_TOKEN_PRETOKEN) *
+    voprf_blind(const VOPRF_METHOD *method, CBB *cbb, size_t count) {
+  const EC_GROUP *group = method->group;
+  STACK_OF(TRUST_TOKEN_PRETOKEN) *pretokens =
+      sk_TRUST_TOKEN_PRETOKEN_new_null();
+  if (pretokens == NULL) {
+    OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_MALLOC_FAILURE);
+    goto err;
+  }
+
+  for (size_t i = 0; i < count; i++) {
+    // Insert |pretoken| into |pretokens| early to simplify error-handling.
+    TRUST_TOKEN_PRETOKEN *pretoken =
+        OPENSSL_malloc(sizeof(TRUST_TOKEN_PRETOKEN));
+    if (pretoken == NULL ||
+        !sk_TRUST_TOKEN_PRETOKEN_push(pretokens, pretoken)) {
+      OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_MALLOC_FAILURE);
+      TRUST_TOKEN_PRETOKEN_free(pretoken);
+      goto err;
+    }
+
+    RAND_bytes(pretoken->t, sizeof(pretoken->t));
+
+    // We sample r in Montgomery form to simplify inverting.
+    EC_SCALAR r;
+    if (!ec_random_nonzero_scalar(group, &r,
+                                  kDefaultAdditionalData)) {
+      OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_MALLOC_FAILURE);
+      goto err;
+    }
+
+    // pretoken->r is rinv.
+    ec_scalar_inv0_montgomery(group, &pretoken->r, &r);
+    // Convert both out of Montgomery form.
+    ec_scalar_from_montgomery(group, &r, &r);
+    ec_scalar_from_montgomery(group, &pretoken->r, &pretoken->r);
+
+    // Tp is the blinded token in the VOPRF protocol.
+    EC_RAW_POINT P, Tp;
+    if (!method->hash_to_group(group, &P, pretoken->t) ||
+        !ec_point_mul_scalar(group, &Tp, &P, &r) ||
+        !ec_jacobian_to_affine(group, &pretoken->Tp, &Tp)) {
+      goto err;
+    }
+
+    if (!cbb_add_point(cbb, group, &pretoken->Tp)) {
+      goto err;
+    }
+  }
+
+  return pretokens;
+
+err:
+  sk_TRUST_TOKEN_PRETOKEN_pop_free(pretokens, TRUST_TOKEN_PRETOKEN_free);
+  return NULL;
+}
+
+static int hash_to_scalar_dleq(const VOPRF_METHOD *method, EC_SCALAR *out,
+                               const EC_AFFINE *X, const EC_AFFINE *T,
+                               const EC_AFFINE *W, const EC_AFFINE *K0,
+                               const EC_AFFINE *K1) {
+  static const uint8_t kDLEQLabel[] = "DLEQ";
+
+  int ok = 0;
+  CBB cbb;
+  CBB_zero(&cbb);
+  uint8_t *buf = NULL;
+  size_t len;
+  if (!CBB_init(&cbb, 0) ||
+      !CBB_add_bytes(&cbb, kDLEQLabel, sizeof(kDLEQLabel)) ||
+      !cbb_add_point(&cbb, method->group, X) ||
+      !cbb_add_point(&cbb, method->group, T) ||
+      !cbb_add_point(&cbb, method->group, W) ||
+      !cbb_add_point(&cbb, method->group, K0) ||
+      !cbb_add_point(&cbb, method->group, K1) ||
+      !CBB_finish(&cbb, &buf, &len) ||
+      !method->hash_to_scalar(method->group, out, buf, len)) {
+    OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_MALLOC_FAILURE);
+    goto err;
+  }
+
+  ok = 1;
+
+err:
+  CBB_cleanup(&cbb);
+  OPENSSL_free(buf);
+  return ok;
+}
+
+static int hash_to_scalar_batch(const VOPRF_METHOD *method, EC_SCALAR *out,
+                                const CBB *points, size_t index) {
+  static const uint8_t kDLEQBatchLabel[] = "DLEQ BATCH";
+  if (index > 0xffff) {
+    // The protocol supports only two-byte batches.
+    OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_OVERFLOW);
+    return 0;
+  }
+
+  int ok = 0;
+  CBB cbb;
+  CBB_zero(&cbb);
+  uint8_t *buf = NULL;
+  size_t len;
+  if (!CBB_init(&cbb, 0) ||
+      !CBB_add_bytes(&cbb, kDLEQBatchLabel, sizeof(kDLEQBatchLabel)) ||
+      !CBB_add_bytes(&cbb, CBB_data(points), CBB_len(points)) ||
+      !CBB_add_u16(&cbb, (uint16_t)index) ||
+      !CBB_finish(&cbb, &buf, &len) ||
+      !method->hash_to_scalar(method->group, out, buf, len)) {
+    OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_MALLOC_FAILURE);
+    goto err;
+  }
+
+  ok = 1;
+
+err:
+  CBB_cleanup(&cbb);
+  OPENSSL_free(buf);
+  return ok;
+}
+
+static int dleq_generate(const VOPRF_METHOD *method, CBB *cbb,
+                         const TRUST_TOKEN_ISSUER_KEY *priv,
+                         const EC_RAW_POINT *T, const EC_RAW_POINT *W) {
+  const EC_GROUP *group = method->group;
+
+  enum {
+    idx_T,
+    idx_W,
+    idx_k0,
+    idx_k1,
+    num_idx,
+  };
+  EC_RAW_POINT jacobians[num_idx];
+
+  // Setup the DLEQ proof.
+  EC_SCALAR r;
+  if (// r <- Zp
+      !ec_random_nonzero_scalar(group, &r, kDefaultAdditionalData) ||
+      // k0;k1 = r*(G;T)
+      !ec_point_mul_scalar_base(group, &jacobians[idx_k0], &r) ||
+      !ec_point_mul_scalar(group, &jacobians[idx_k1], T, &r))  {
+    return 0;
+  }
+
+  EC_AFFINE affines[num_idx];
+  jacobians[idx_T] = *T;
+  jacobians[idx_W] = *W;
+  if (!ec_jacobian_to_affine_batch(group, affines, jacobians, num_idx)) {
+    return 0;
+  }
+
+  // Compute c = Hc(...).
+  EC_SCALAR c;
+  if (!hash_to_scalar_dleq(method, &c, &priv->pubs, &affines[idx_T],
+                           &affines[idx_W], &affines[idx_k0],
+                           &affines[idx_k1])) {
+    return 0;
+  }
+
+
+  EC_SCALAR c_mont;
+  ec_scalar_to_montgomery(group, &c_mont, &c);
+
+  // u = r + c*xs
+  EC_SCALAR u;
+  ec_scalar_mul_montgomery(group, &u, &priv->xs, &c_mont);
+  ec_scalar_add(group, &u, &r, &u);
+
+  // Store DLEQ proof in transcript.
+  if (!scalar_to_cbb(cbb, group, &c) ||
+      !scalar_to_cbb(cbb, group, &u)) {
+    OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_MALLOC_FAILURE);
+    return 0;
+  }
+
+  return 1;
+}
+
+static int mul_public_2(const EC_GROUP *group, EC_RAW_POINT *out,
+                        const EC_RAW_POINT *p0, const EC_SCALAR *scalar0,
+                        const EC_RAW_POINT *p1, const EC_SCALAR *scalar1) {
+  EC_RAW_POINT points[2] = {*p0, *p1};
+  EC_SCALAR scalars[2] = {*scalar0, *scalar1};
+  return ec_point_mul_scalar_public_batch(group, out, /*g_scalar=*/NULL, points,
+                                          scalars, 2);
+}
+
+static int dleq_verify(const VOPRF_METHOD *method, CBS *cbs,
+                       const TRUST_TOKEN_CLIENT_KEY *pub, const EC_RAW_POINT *T,
+                       const EC_RAW_POINT *W) {
+  const EC_GROUP *group = method->group;
+
+
+  enum {
+    idx_T,
+    idx_W,
+    idx_k0,
+    idx_k1,
+    num_idx,
+  };
+  EC_RAW_POINT jacobians[num_idx];
+
+  // Decode the DLEQ proof.
+  EC_SCALAR c, u;
+  if (!scalar_from_cbs(cbs, group, &c) ||
+      !scalar_from_cbs(cbs, group, &u)) {
+    OPENSSL_PUT_ERROR(TRUST_TOKEN, TRUST_TOKEN_R_DECODE_FAILURE);
+    return 0;
+  }
+
+  // k0;k1 = u*(G;T) - c*(pub;W)
+  EC_RAW_POINT pubs;
+  ec_affine_to_jacobian(group, &pubs, &pub->pubs);
+  EC_SCALAR minus_c;
+  ec_scalar_neg(group, &minus_c, &c);
+  if (!ec_point_mul_scalar_public(group, &jacobians[idx_k0], &u, &pubs,
+                                  &minus_c) ||
+      !mul_public_2(group, &jacobians[idx_k1], T, &u, W, &minus_c)) {
+    return 0;
+  }
+
+  // Check the DLEQ proof.
+  EC_AFFINE affines[num_idx];
+  jacobians[idx_T] = *T;
+  jacobians[idx_W] = *W;
+  if (!ec_jacobian_to_affine_batch(group, affines, jacobians, num_idx)) {
+    return 0;
+  }
+
+  // Compute c = Hc(...).
+  EC_SCALAR calculated;
+  if (!hash_to_scalar_dleq(method, &calculated, &pub->pubs, &affines[idx_T],
+                           &affines[idx_W], &affines[idx_k0],
+                           &affines[idx_k1])) {
+    return 0;
+  }
+
+  // c == calculated
+  if (!ec_scalar_equal_vartime(group, &c, &calculated)) {
+    OPENSSL_PUT_ERROR(TRUST_TOKEN, TRUST_TOKEN_R_INVALID_PROOF);
+    return 0;
+  }
+
+  return 1;
+}
+
+static int voprf_sign(const VOPRF_METHOD *method,
+                      const TRUST_TOKEN_ISSUER_KEY *key, CBB *cbb, CBS *cbs,
+                      size_t num_requested, size_t num_to_issue) {
+  const EC_GROUP *group = method->group;
+  if (num_requested < num_to_issue) {
+    OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_INTERNAL_ERROR);
+    return 0;
+  }
+
+  if (num_to_issue > ((size_t)-1) / sizeof(EC_RAW_POINT) ||
+      num_to_issue > ((size_t)-1) / sizeof(EC_SCALAR)) {
+    OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_OVERFLOW);
+    return 0;
+  }
+
+  int ret = 0;
+  EC_RAW_POINT *BTs = OPENSSL_malloc(num_to_issue * sizeof(EC_RAW_POINT));
+  EC_RAW_POINT *Zs = OPENSSL_malloc(num_to_issue * sizeof(EC_RAW_POINT));
+  EC_SCALAR *es = OPENSSL_malloc(num_to_issue * sizeof(EC_SCALAR));
+  CBB batch_cbb;
+  CBB_zero(&batch_cbb);
+  if (!BTs ||
+      !Zs ||
+      !es ||
+      !CBB_init(&batch_cbb, 0) ||
+      !cbb_add_point(&batch_cbb, method->group, &key->pubs)) {
+    OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_MALLOC_FAILURE);
+    goto err;
+  }
+
+  for (size_t i = 0; i < num_to_issue; i++) {
+    EC_AFFINE BT_affine, Z_affine;
+    EC_RAW_POINT BT, Z;
+    if (!cbs_get_point(cbs, group, &BT_affine)) {
+      OPENSSL_PUT_ERROR(TRUST_TOKEN, TRUST_TOKEN_R_DECODE_FAILURE);
+      goto err;
+    }
+    ec_affine_to_jacobian(group, &BT, &BT_affine);
+    if (!ec_point_mul_scalar(group, &Z, &BT, &key->xs) ||
+        !ec_jacobian_to_affine(group, &Z_affine, &Z) ||
+        !cbb_add_point(cbb, group, &Z_affine)) {
+      goto err;
+    }
+
+    if (!cbb_add_point(&batch_cbb, group, &BT_affine) ||
+        !cbb_add_point(&batch_cbb, group, &Z_affine)) {
+      OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_MALLOC_FAILURE);
+      goto err;
+    }
+    BTs[i] = BT;
+    Zs[i] = Z;
+
+    if (!CBB_flush(cbb)) {
+      goto err;
+    }
+  }
+
+  // The DLEQ batching construction is described in appendix B of
+  // https://eprint.iacr.org/2020/072/20200324:214215. Note the additional
+  // computations all act on public inputs.
+  for (size_t i = 0; i < num_to_issue; i++) {
+    if (!hash_to_scalar_batch(method, &es[i], &batch_cbb, i)) {
+      goto err;
+    }
+  }
+
+  EC_RAW_POINT BT_batch, Z_batch;
+  if (!ec_point_mul_scalar_public_batch(group, &BT_batch,
+                                        /*g_scalar=*/NULL, BTs, es,
+                                        num_to_issue) ||
+      !ec_point_mul_scalar_public_batch(group, &Z_batch,
+                                        /*g_scalar=*/NULL, Zs, es,
+                                        num_to_issue)) {
+    goto err;
+  }
+
+  CBB proof;
+  if (!CBB_add_u16_length_prefixed(cbb, &proof) ||
+      !dleq_generate(method, &proof, key, &BT_batch, &Z_batch) ||
+      !CBB_flush(cbb)) {
+    goto err;
+  }
+
+  // Skip over any unused requests.
+  size_t point_len = 1 + 2 * BN_num_bytes(&group->field);
+  if (!CBS_skip(cbs, point_len * (num_requested - num_to_issue))) {
+    OPENSSL_PUT_ERROR(TRUST_TOKEN, TRUST_TOKEN_R_DECODE_FAILURE);
+    goto err;
+  }
+
+  ret = 1;
+
+err:
+  OPENSSL_free(BTs);
+  OPENSSL_free(Zs);
+  OPENSSL_free(es);
+  CBB_cleanup(&batch_cbb);
+  return ret;
+}
+
+static STACK_OF(TRUST_TOKEN) *
+    voprf_unblind(const VOPRF_METHOD *method, const TRUST_TOKEN_CLIENT_KEY *key,
+                  const STACK_OF(TRUST_TOKEN_PRETOKEN) * pretokens, CBS *cbs,
+                  size_t count, uint32_t key_id) {
+  const EC_GROUP *group = method->group;
+  if (count > sk_TRUST_TOKEN_PRETOKEN_num(pretokens)) {
+    OPENSSL_PUT_ERROR(TRUST_TOKEN, TRUST_TOKEN_R_DECODE_FAILURE);
+    return NULL;
+  }
+
+  int ok = 0;
+  STACK_OF(TRUST_TOKEN) *ret = sk_TRUST_TOKEN_new_null();
+  if (ret == NULL) {
+    OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_MALLOC_FAILURE);
+    return NULL;
+  }
+
+  if (count > ((size_t)-1) / sizeof(EC_RAW_POINT) ||
+      count > ((size_t)-1) / sizeof(EC_SCALAR)) {
+    OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_OVERFLOW);
+    return 0;
+  }
+  EC_RAW_POINT *BTs = OPENSSL_malloc(count * sizeof(EC_RAW_POINT));
+  EC_RAW_POINT *Zs = OPENSSL_malloc(count * sizeof(EC_RAW_POINT));
+  EC_SCALAR *es = OPENSSL_malloc(count * sizeof(EC_SCALAR));
+  CBB batch_cbb;
+  CBB_zero(&batch_cbb);
+  if (!BTs ||
+      !Zs ||
+      !es ||
+      !CBB_init(&batch_cbb, 0) ||
+      !cbb_add_point(&batch_cbb, method->group, &key->pubs)) {
+    OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_MALLOC_FAILURE);
+    goto err;
+  }
+
+  for (size_t i = 0; i < count; i++) {
+    const TRUST_TOKEN_PRETOKEN *pretoken =
+        sk_TRUST_TOKEN_PRETOKEN_value(pretokens, i);
+
+    EC_AFFINE Z_affine;
+    if (!cbs_get_point(cbs, group, &Z_affine)) {
+      OPENSSL_PUT_ERROR(TRUST_TOKEN, TRUST_TOKEN_R_DECODE_FAILURE);
+      goto err;
+    }
+
+    ec_affine_to_jacobian(group, &BTs[i], &pretoken->Tp);
+    ec_affine_to_jacobian(group, &Zs[i], &Z_affine);
+
+    if (!cbb_add_point(&batch_cbb, group, &pretoken->Tp) ||
+        !cbb_add_point(&batch_cbb, group, &Z_affine)) {
+      OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_MALLOC_FAILURE);
+      goto err;
+    }
+
+    // Unblind the token.
+    // pretoken->r is rinv.
+    EC_RAW_POINT N;
+    EC_AFFINE N_affine;
+    if (!ec_point_mul_scalar(group, &N, &Zs[i], &pretoken->r) ||
+        !ec_jacobian_to_affine(group, &N_affine, &N)) {
+      goto err;
+    }
+
+    // Serialize the token. Include |key_id| to avoid an extra copy in the layer
+    // above.
+    CBB token_cbb;
+    size_t point_len = 1 + 2 * BN_num_bytes(&group->field);
+    if (!CBB_init(&token_cbb, 4 + TRUST_TOKEN_NONCE_SIZE + (2 + point_len)) ||
+        !CBB_add_u32(&token_cbb, key_id) ||
+        !CBB_add_bytes(&token_cbb, pretoken->t, TRUST_TOKEN_NONCE_SIZE) ||
+        !cbb_add_point(&token_cbb, group, &N_affine) ||
+        !CBB_flush(&token_cbb)) {
+      CBB_cleanup(&token_cbb);
+      goto err;
+    }
+
+    TRUST_TOKEN *token =
+        TRUST_TOKEN_new(CBB_data(&token_cbb), CBB_len(&token_cbb));
+    CBB_cleanup(&token_cbb);
+    if (token == NULL ||
+        !sk_TRUST_TOKEN_push(ret, token)) {
+      OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_MALLOC_FAILURE);
+      TRUST_TOKEN_free(token);
+      goto err;
+    }
+  }
+
+  // The DLEQ batching construction is described in appendix B of
+  // https://eprint.iacr.org/2020/072/20200324:214215. Note the additional
+  // computations all act on public inputs.
+  for (size_t i = 0; i < count; i++) {
+    if (!hash_to_scalar_batch(method, &es[i], &batch_cbb, i)) {
+      goto err;
+    }
+  }
+
+  EC_RAW_POINT BT_batch, Z_batch;
+  if (!ec_point_mul_scalar_public_batch(group, &BT_batch,
+                                        /*g_scalar=*/NULL, BTs, es, count) ||
+      !ec_point_mul_scalar_public_batch(group, &Z_batch,
+                                        /*g_scalar=*/NULL, Zs, es, count)) {
+    goto err;
+  }
+
+  CBS proof;
+  if (!CBS_get_u16_length_prefixed(cbs, &proof) ||
+      !dleq_verify(method, &proof, key, &BT_batch, &Z_batch) ||
+      CBS_len(&proof) != 0) {
+    goto err;
+  }
+
+  ok = 1;
+
+err:
+  OPENSSL_free(BTs);
+  OPENSSL_free(Zs);
+  OPENSSL_free(es);
+  CBB_cleanup(&batch_cbb);
+  if (!ok) {
+    sk_TRUST_TOKEN_pop_free(ret, TRUST_TOKEN_free);
+    ret = NULL;
+  }
+  return ret;
+}
+
+static int voprf_read(const VOPRF_METHOD *method,
+                      const TRUST_TOKEN_ISSUER_KEY *key,
+                      uint8_t out_nonce[TRUST_TOKEN_NONCE_SIZE],
+                      const uint8_t *token, size_t token_len) {
+  const EC_GROUP *group = method->group;
+  CBS cbs;
+  CBS_init(&cbs, token, token_len);
+  EC_AFFINE Ws;
+  if (!CBS_copy_bytes(&cbs, out_nonce, TRUST_TOKEN_NONCE_SIZE) ||
+      !cbs_get_point(&cbs, group, &Ws) ||
+      CBS_len(&cbs) != 0) {
+    OPENSSL_PUT_ERROR(TRUST_TOKEN, TRUST_TOKEN_R_INVALID_TOKEN);
+    return 0;
+  }
+
+
+  EC_RAW_POINT T;
+  if (!method->hash_to_group(group, &T, out_nonce)) {
+    return 0;
+  }
+
+  EC_RAW_POINT Ws_calculated;
+  if (!ec_point_mul_scalar(group, &Ws_calculated, &T, &key->xs) ||
+      !ec_affine_jacobian_equal(group, &Ws, &Ws_calculated)) {
+    OPENSSL_PUT_ERROR(TRUST_TOKEN, TRUST_TOKEN_R_BAD_VALIDITY_CHECK);
+    return 0;
+  }
+
+  return 1;
+}
+
+
+// VOPRF experiment v2.
+
+static int voprf_exp2_hash_to_group(const EC_GROUP *group, EC_RAW_POINT *out,
+                                    const uint8_t t[TRUST_TOKEN_NONCE_SIZE]) {
+  const uint8_t kHashTLabel[] = "TrustToken VOPRF Experiment V2 HashToGroup";
+  return ec_hash_to_curve_p384_xmd_sha512_sswu_draft07(
+      group, out, kHashTLabel, sizeof(kHashTLabel), t, TRUST_TOKEN_NONCE_SIZE);
+}
+
+static int voprf_exp2_hash_to_scalar(const EC_GROUP *group, EC_SCALAR *out,
+                             uint8_t *buf, size_t len) {
+  const uint8_t kHashCLabel[] = "TrustToken VOPRF Experiment V2 HashToScalar";
+  return ec_hash_to_scalar_p384_xmd_sha512_draft07(
+      group, out, kHashCLabel, sizeof(kHashCLabel), buf, len);
+}
+
+static int voprf_exp2_ok = 0;
+static VOPRF_METHOD voprf_exp2_method;
+static CRYPTO_once_t voprf_exp2_method_once = CRYPTO_ONCE_INIT;
+
+static void voprf_exp2_init_method_impl(void) {
+  voprf_exp2_ok =
+      voprf_init_method(&voprf_exp2_method, NID_secp384r1,
+                        voprf_exp2_hash_to_group, voprf_exp2_hash_to_scalar);
+}
+
+static int voprf_exp2_init_method(void) {
+  CRYPTO_once(&voprf_exp2_method_once, voprf_exp2_init_method_impl);
+  if (!voprf_exp2_ok) {
+    OPENSSL_PUT_ERROR(TRUST_TOKEN, ERR_R_INTERNAL_ERROR);
+    return 0;
+  }
+  return 1;
+}
+
+int voprf_exp2_generate_key(CBB *out_private, CBB *out_public) {
+  if (!voprf_exp2_init_method()) {
+    return 0;
+  }
+
+  return voprf_generate_key(&voprf_exp2_method, out_private, out_public);
+}
+
+int voprf_exp2_client_key_from_bytes(TRUST_TOKEN_CLIENT_KEY *key,
+                                     const uint8_t *in, size_t len) {
+  if (!voprf_exp2_init_method()) {
+    return 0;
+  }
+  return voprf_client_key_from_bytes(&voprf_exp2_method, key, in, len);
+}
+
+int voprf_exp2_issuer_key_from_bytes(TRUST_TOKEN_ISSUER_KEY *key,
+                                     const uint8_t *in, size_t len) {
+  if (!voprf_exp2_init_method()) {
+    return 0;
+  }
+  return voprf_issuer_key_from_bytes(&voprf_exp2_method, key, in, len);
+}
+
+STACK_OF(TRUST_TOKEN_PRETOKEN) * voprf_exp2_blind(CBB *cbb, size_t count) {
+  if (!voprf_exp2_init_method()) {
+    return NULL;
+  }
+  return voprf_blind(&voprf_exp2_method, cbb, count);
+}
+
+int voprf_exp2_sign(const TRUST_TOKEN_ISSUER_KEY *key, CBB *cbb, CBS *cbs,
+                    size_t num_requested, size_t num_to_issue,
+                    uint8_t private_metadata) {
+  if (!voprf_exp2_init_method() || private_metadata != 0) {
+    return 0;
+  }
+  return voprf_sign(&voprf_exp2_method, key, cbb, cbs, num_requested,
+                    num_to_issue);
+}
+
+STACK_OF(TRUST_TOKEN) *
+    voprf_exp2_unblind(const TRUST_TOKEN_CLIENT_KEY *key,
+                       const STACK_OF(TRUST_TOKEN_PRETOKEN) * pretokens,
+                       CBS *cbs, size_t count, uint32_t key_id) {
+  if (!voprf_exp2_init_method()) {
+    return NULL;
+  }
+  return voprf_unblind(&voprf_exp2_method, key, pretokens, cbs, count,
+                          key_id);
+}
+
+int voprf_exp2_read(const TRUST_TOKEN_ISSUER_KEY *key,
+                    uint8_t out_nonce[TRUST_TOKEN_NONCE_SIZE],
+                    uint8_t *out_private_metadata, const uint8_t *token,
+                    size_t token_len) {
+  if (!voprf_exp2_init_method()) {
+    return 0;
+  }
+  return voprf_read(&voprf_exp2_method, key, out_nonce, token, token_len);
+}
diff --git a/deps/boringssl/src/crypto/x509/a_strex.c b/deps/boringssl/src/crypto/x509/a_strex.c
index eeec5d1..2c4824e 100644
--- a/deps/boringssl/src/crypto/x509/a_strex.c
+++ b/deps/boringssl/src/crypto/x509/a_strex.c
@@ -446,7 +446,7 @@
 #define FN_WIDTH_LN     25
 #define FN_WIDTH_SN     10
 
-static int do_name_ex(char_io *io_ch, void *arg, X509_NAME *n,
+static int do_name_ex(char_io *io_ch, void *arg, const X509_NAME *n,
                       int indent, unsigned long flags)
 {
     int i, prev = -1, orflags, cnt;
@@ -584,7 +584,7 @@
 
 /* Wrappers round the main functions */
 
-int X509_NAME_print_ex(BIO *out, X509_NAME *nm, int indent,
+int X509_NAME_print_ex(BIO *out, const X509_NAME *nm, int indent,
                        unsigned long flags)
 {
     if (flags == XN_FLAG_COMPAT)
@@ -593,7 +593,7 @@
 }
 
 #ifndef OPENSSL_NO_FP_API
-int X509_NAME_print_ex_fp(FILE *fp, X509_NAME *nm, int indent,
+int X509_NAME_print_ex_fp(FILE *fp, const X509_NAME *nm, int indent,
                           unsigned long flags)
 {
     if (flags == XN_FLAG_COMPAT) {
diff --git a/deps/boringssl/src/crypto/x509/algorithm.c b/deps/boringssl/src/crypto/x509/algorithm.c
index b9f3314..c021dc4 100644
--- a/deps/boringssl/src/crypto/x509/algorithm.c
+++ b/deps/boringssl/src/crypto/x509/algorithm.c
@@ -142,10 +142,12 @@
     return 0;
   }
 
-  /* RSA signature algorithms include an explicit NULL parameter but we also
-   * accept omitted values for compatibility. Other algorithms must omit it. */
-  if (sigalg->parameter != NULL && (pkey_nid != EVP_PKEY_RSA ||
-                                    sigalg->parameter->type != V_ASN1_NULL)) {
+  /* The parameter should be an explicit NULL for RSA and omitted for ECDSA. For
+   * compatibility, we allow either for both algorithms. See b/167375496.
+   *
+   * TODO(davidben): Chromium's verifier allows both forms for RSA, but enforces
+   * ECDSA more strictly. Align with Chromium and add a flag for b/167375496. */
+  if (sigalg->parameter != NULL && sigalg->parameter->type != V_ASN1_NULL) {
     OPENSSL_PUT_ERROR(X509, X509_R_INVALID_PARAMETER);
     return 0;
   }
diff --git a/deps/boringssl/src/crypto/x509/rsa_pss.c b/deps/boringssl/src/crypto/x509/rsa_pss.c
index 9230934..39637b9 100644
--- a/deps/boringssl/src/crypto/x509/rsa_pss.c
+++ b/deps/boringssl/src/crypto/x509/rsa_pss.c
@@ -199,11 +199,15 @@
   if (saltlen == -1) {
     saltlen = EVP_MD_size(sigmd);
   } else if (saltlen == -2) {
+    // TODO(davidben): Forbid this mode. The world has largely standardized on
+    // salt length matching hash length.
     saltlen = EVP_PKEY_size(pk) - EVP_MD_size(sigmd) - 2;
     if (((EVP_PKEY_bits(pk) - 1) & 0x7) == 0) {
       saltlen--;
     }
-  } else {
+  } else if (saltlen != (int)EVP_MD_size(sigmd)) {
+    // We only allow salt length matching hash length and, for now, the -2 case.
+    OPENSSL_PUT_ERROR(X509, X509_R_INVALID_PSS_PARAMETERS);
     return 0;
   }
 
diff --git a/deps/boringssl/src/crypto/x509/t_crl.c b/deps/boringssl/src/crypto/x509/t_crl.c
index dc9b87f..14f98c5 100644
--- a/deps/boringssl/src/crypto/x509/t_crl.c
+++ b/deps/boringssl/src/crypto/x509/t_crl.c
@@ -91,10 +91,10 @@
     BIO_printf(out, "%8sIssuer: %s\n", "", p);
     OPENSSL_free(p);
     BIO_printf(out, "%8sLast Update: ", "");
-    ASN1_TIME_print(out, X509_CRL_get_lastUpdate(x));
+    ASN1_TIME_print(out, X509_CRL_get0_lastUpdate(x));
     BIO_printf(out, "\n%8sNext Update: ", "");
-    if (X509_CRL_get_nextUpdate(x))
-        ASN1_TIME_print(out, X509_CRL_get_nextUpdate(x));
+    if (X509_CRL_get0_nextUpdate(x))
+        ASN1_TIME_print(out, X509_CRL_get0_nextUpdate(x));
     else
         BIO_printf(out, "NONE");
     BIO_printf(out, "\n");
diff --git a/deps/boringssl/src/crypto/x509/t_x509.c b/deps/boringssl/src/crypto/x509/t_x509.c
index e45a765..5db8746 100644
--- a/deps/boringssl/src/crypto/x509/t_x509.c
+++ b/deps/boringssl/src/crypto/x509/t_x509.c
@@ -494,7 +494,7 @@
   return 0;
 }
 
-int X509_NAME_print(BIO *bp, X509_NAME *name, int obase)
+int X509_NAME_print(BIO *bp, const X509_NAME *name, int obase)
 {
     char *s, *c, *b;
     int ret = 0, l, i;
diff --git a/deps/boringssl/src/crypto/x509/x509.c b/deps/boringssl/src/crypto/x509/x509.c
index 188fd49..9049a35 100644
--- a/deps/boringssl/src/crypto/x509/x509.c
+++ b/deps/boringssl/src/crypto/x509/x509.c
@@ -65,73 +65,6 @@
  * it to avoid downstream churn. */
 OPENSSL_DECLARE_ERROR_REASON(X509, UNSUPPORTED_ALGORITHM)
 
-int PKCS8_pkey_set0(PKCS8_PRIV_KEY_INFO *priv, ASN1_OBJECT *aobj, int version,
-                    int ptype, void *pval, uint8_t *penc, int penclen) {
-  uint8_t **ppenc = NULL;
-  if (version >= 0) {
-    if (!ASN1_INTEGER_set(priv->version, version)) {
-      return 0;
-    }
-  }
-
-  if (penc) {
-    int pmtype;
-    ASN1_OCTET_STRING *oct;
-
-    oct = ASN1_OCTET_STRING_new();
-    if (!oct) {
-      return 0;
-    }
-    oct->data = penc;
-    ppenc = &oct->data;
-    oct->length = penclen;
-    if (priv->broken == PKCS8_NO_OCTET) {
-      pmtype = V_ASN1_SEQUENCE;
-    } else {
-      pmtype = V_ASN1_OCTET_STRING;
-    }
-    ASN1_TYPE_set(priv->pkey, pmtype, oct);
-  }
-
-  if (!X509_ALGOR_set0(priv->pkeyalg, aobj, ptype, pval)) {
-    /* If call fails do not swallow 'enc' */
-    if (ppenc) {
-      *ppenc = NULL;
-    }
-    return 0;
-  }
-
-  return 1;
-}
-
-int PKCS8_pkey_get0(ASN1_OBJECT **ppkalg, const uint8_t **pk, int *ppklen,
-                    X509_ALGOR **pa, PKCS8_PRIV_KEY_INFO *p8) {
-  if (ppkalg) {
-    *ppkalg = p8->pkeyalg->algorithm;
-  }
-
-  if (p8->pkey->type == V_ASN1_OCTET_STRING) {
-    p8->broken = PKCS8_OK;
-    if (pk) {
-      *pk = p8->pkey->value.octet_string->data;
-      *ppklen = p8->pkey->value.octet_string->length;
-    }
-  } else if (p8->pkey->type == V_ASN1_SEQUENCE) {
-    p8->broken = PKCS8_NO_OCTET;
-    if (pk) {
-      *pk = p8->pkey->value.sequence->data;
-      *ppklen = p8->pkey->value.sequence->length;
-    }
-  } else {
-    return 0;
-  }
-
-  if (pa) {
-    *pa = p8->pkeyalg;
-  }
-  return 1;
-}
-
 int X509_signature_dump(BIO *bp, const ASN1_STRING *sig, int indent) {
   const uint8_t *s;
   int i, n;
diff --git a/deps/boringssl/src/crypto/x509/x509_cmp.c b/deps/boringssl/src/crypto/x509/x509_cmp.c
index cd025ab..cf0a941 100644
--- a/deps/boringssl/src/crypto/x509/x509_cmp.c
+++ b/deps/boringssl/src/crypto/x509/x509_cmp.c
@@ -77,7 +77,7 @@
 
     ai = a->cert_info;
     bi = b->cert_info;
-    i = M_ASN1_INTEGER_cmp(ai->serialNumber, bi->serialNumber);
+    i = ASN1_INTEGER_cmp(ai->serialNumber, bi->serialNumber);
     if (i)
         return (i);
     return (X509_NAME_cmp(ai->issuer, bi->issuer));
@@ -131,7 +131,7 @@
     return OPENSSL_memcmp(a->sha1_hash, b->sha1_hash, 20);
 }
 
-X509_NAME *X509_get_issuer_name(X509 *a)
+X509_NAME *X509_get_issuer_name(const X509 *a)
 {
     return (a->cert_info->issuer);
 }
@@ -146,7 +146,7 @@
     return (X509_NAME_hash_old(x->cert_info->issuer));
 }
 
-X509_NAME *X509_get_subject_name(X509 *a)
+X509_NAME *X509_get_subject_name(const X509 *a)
 {
     return (a->cert_info->subject);
 }
@@ -156,6 +156,11 @@
     return (a->cert_info->serialNumber);
 }
 
+const ASN1_INTEGER *X509_get0_serialNumber(const X509 *x509)
+{
+    return x509->cert_info->serialNumber;
+}
+
 unsigned long X509_subject_name_hash(X509 *x)
 {
     return (X509_NAME_hash(x->cert_info->subject));
diff --git a/deps/boringssl/src/crypto/x509/x509_ext.c b/deps/boringssl/src/crypto/x509/x509_ext.c
index a329f6f..f6da54a 100644
--- a/deps/boringssl/src/crypto/x509/x509_ext.c
+++ b/deps/boringssl/src/crypto/x509/x509_ext.c
@@ -62,27 +62,28 @@
 #include <openssl/x509.h>
 #include <openssl/x509v3.h>
 
-int X509_CRL_get_ext_count(X509_CRL *x)
+int X509_CRL_get_ext_count(const X509_CRL *x)
 {
     return (X509v3_get_ext_count(x->crl->extensions));
 }
 
-int X509_CRL_get_ext_by_NID(X509_CRL *x, int nid, int lastpos)
+int X509_CRL_get_ext_by_NID(const X509_CRL *x, int nid, int lastpos)
 {
     return (X509v3_get_ext_by_NID(x->crl->extensions, nid, lastpos));
 }
 
-int X509_CRL_get_ext_by_OBJ(X509_CRL *x, ASN1_OBJECT *obj, int lastpos)
+int X509_CRL_get_ext_by_OBJ(const X509_CRL *x, const ASN1_OBJECT *obj,
+                            int lastpos)
 {
     return (X509v3_get_ext_by_OBJ(x->crl->extensions, obj, lastpos));
 }
 
-int X509_CRL_get_ext_by_critical(X509_CRL *x, int crit, int lastpos)
+int X509_CRL_get_ext_by_critical(const X509_CRL *x, int crit, int lastpos)
 {
     return (X509v3_get_ext_by_critical(x->crl->extensions, crit, lastpos));
 }
 
-X509_EXTENSION *X509_CRL_get_ext(X509_CRL *x, int loc)
+X509_EXTENSION *X509_CRL_get_ext(const X509_CRL *x, int loc)
 {
     return (X509v3_get_ext(x->crl->extensions, loc));
 }
@@ -92,9 +93,10 @@
     return (X509v3_delete_ext(x->crl->extensions, loc));
 }
 
-void *X509_CRL_get_ext_d2i(X509_CRL *x, int nid, int *crit, int *idx)
+void *X509_CRL_get_ext_d2i(const X509_CRL *crl, int nid, int *out_critical,
+                           int *out_idx)
 {
-    return X509V3_get_d2i(x->crl->extensions, nid, crit, idx);
+    return X509V3_get_d2i(crl->crl->extensions, nid, out_critical, out_idx);
 }
 
 int X509_CRL_add1_ext_i2d(X509_CRL *x, int nid, void *value, int crit,
@@ -108,28 +110,28 @@
     return (X509v3_add_ext(&(x->crl->extensions), ex, loc) != NULL);
 }
 
-int X509_get_ext_count(X509 *x)
+int X509_get_ext_count(const X509 *x)
 {
     return (X509v3_get_ext_count(x->cert_info->extensions));
 }
 
-int X509_get_ext_by_NID(X509 *x, int nid, int lastpos)
+int X509_get_ext_by_NID(const X509 *x, int nid, int lastpos)
 {
     return (X509v3_get_ext_by_NID(x->cert_info->extensions, nid, lastpos));
 }
 
-int X509_get_ext_by_OBJ(X509 *x, ASN1_OBJECT *obj, int lastpos)
+int X509_get_ext_by_OBJ(const X509 *x, const ASN1_OBJECT *obj, int lastpos)
 {
     return (X509v3_get_ext_by_OBJ(x->cert_info->extensions, obj, lastpos));
 }
 
-int X509_get_ext_by_critical(X509 *x, int crit, int lastpos)
+int X509_get_ext_by_critical(const X509 *x, int crit, int lastpos)
 {
     return (X509v3_get_ext_by_critical
             (x->cert_info->extensions, crit, lastpos));
 }
 
-X509_EXTENSION *X509_get_ext(X509 *x, int loc)
+X509_EXTENSION *X509_get_ext(const X509 *x, int loc)
 {
     return (X509v3_get_ext(x->cert_info->extensions, loc));
 }
@@ -144,9 +146,11 @@
     return (X509v3_add_ext(&(x->cert_info->extensions), ex, loc) != NULL);
 }
 
-void *X509_get_ext_d2i(X509 *x, int nid, int *crit, int *idx)
+void *X509_get_ext_d2i(const X509 *x509, int nid, int *out_critical,
+                       int *out_idx)
 {
-    return X509V3_get_d2i(x->cert_info->extensions, nid, crit, idx);
+    return X509V3_get_d2i(x509->cert_info->extensions, nid, out_critical,
+                          out_idx);
 }
 
 int X509_add1_ext_i2d(X509 *x, int nid, void *value, int crit,
@@ -156,28 +160,29 @@
                            flags);
 }
 
-int X509_REVOKED_get_ext_count(X509_REVOKED *x)
+int X509_REVOKED_get_ext_count(const X509_REVOKED *x)
 {
     return (X509v3_get_ext_count(x->extensions));
 }
 
-int X509_REVOKED_get_ext_by_NID(X509_REVOKED *x, int nid, int lastpos)
+int X509_REVOKED_get_ext_by_NID(const X509_REVOKED *x, int nid, int lastpos)
 {
     return (X509v3_get_ext_by_NID(x->extensions, nid, lastpos));
 }
 
-int X509_REVOKED_get_ext_by_OBJ(X509_REVOKED *x, ASN1_OBJECT *obj,
+int X509_REVOKED_get_ext_by_OBJ(const X509_REVOKED *x, const ASN1_OBJECT *obj,
                                 int lastpos)
 {
     return (X509v3_get_ext_by_OBJ(x->extensions, obj, lastpos));
 }
 
-int X509_REVOKED_get_ext_by_critical(X509_REVOKED *x, int crit, int lastpos)
+int X509_REVOKED_get_ext_by_critical(const X509_REVOKED *x, int crit,
+                                     int lastpos)
 {
     return (X509v3_get_ext_by_critical(x->extensions, crit, lastpos));
 }
 
-X509_EXTENSION *X509_REVOKED_get_ext(X509_REVOKED *x, int loc)
+X509_EXTENSION *X509_REVOKED_get_ext(const X509_REVOKED *x, int loc)
 {
     return (X509v3_get_ext(x->extensions, loc));
 }
@@ -192,9 +197,10 @@
     return (X509v3_add_ext(&(x->extensions), ex, loc) != NULL);
 }
 
-void *X509_REVOKED_get_ext_d2i(X509_REVOKED *x, int nid, int *crit, int *idx)
+void *X509_REVOKED_get_ext_d2i(const X509_REVOKED *revoked, int nid,
+                               int *out_critical, int *out_idx)
 {
-    return X509V3_get_d2i(x->extensions, nid, crit, idx);
+    return X509V3_get_d2i(revoked->extensions, nid, out_critical, out_idx);
 }
 
 int X509_REVOKED_add1_ext_i2d(X509_REVOKED *x, int nid, void *value, int crit,
diff --git a/deps/boringssl/src/crypto/x509/x509_obj.c b/deps/boringssl/src/crypto/x509/x509_obj.c
index 520b7a0..80d16c1 100644
--- a/deps/boringssl/src/crypto/x509/x509_obj.c
+++ b/deps/boringssl/src/crypto/x509/x509_obj.c
@@ -73,7 +73,7 @@
 
 #define NAME_ONELINE_MAX    (1024 * 1024)
 
-char *X509_NAME_oneline(X509_NAME *a, char *buf, int len)
+char *X509_NAME_oneline(const X509_NAME *a, char *buf, int len)
 {
     X509_NAME_ENTRY *ne;
     size_t i;
diff --git a/deps/boringssl/src/crypto/x509/x509_r2x.c b/deps/boringssl/src/crypto/x509/x509_r2x.c
index 723bd49..a44b172 100644
--- a/deps/boringssl/src/crypto/x509/x509_r2x.c
+++ b/deps/boringssl/src/crypto/x509/x509_r2x.c
@@ -79,7 +79,7 @@
     xi = ret->cert_info;
 
     if (sk_X509_ATTRIBUTE_num(r->req_info->attributes) != 0) {
-        if ((xi->version = M_ASN1_INTEGER_new()) == NULL)
+        if ((xi->version = ASN1_INTEGER_new()) == NULL)
             goto err;
         if (!ASN1_INTEGER_set(xi->version, 2))
             goto err;
diff --git a/deps/boringssl/src/crypto/x509/x509_set.c b/deps/boringssl/src/crypto/x509/x509_set.c
index e7bfbe3..5f17851 100644
--- a/deps/boringssl/src/crypto/x509/x509_set.c
+++ b/deps/boringssl/src/crypto/x509/x509_set.c
@@ -75,12 +75,12 @@
     if (x == NULL)
         return (0);
     if (version == 0) {
-        M_ASN1_INTEGER_free(x->cert_info->version);
+        ASN1_INTEGER_free(x->cert_info->version);
         x->cert_info->version = NULL;
         return (1);
     }
     if (x->cert_info->version == NULL) {
-        if ((x->cert_info->version = M_ASN1_INTEGER_new()) == NULL)
+        if ((x->cert_info->version = ASN1_INTEGER_new()) == NULL)
             return (0);
     }
     return (ASN1_INTEGER_set(x->cert_info->version, version));
@@ -94,9 +94,9 @@
         return (0);
     in = x->cert_info->serialNumber;
     if (in != serial) {
-        in = M_ASN1_INTEGER_dup(serial);
+        in = ASN1_INTEGER_dup(serial);
         if (in != NULL) {
-            M_ASN1_INTEGER_free(x->cert_info->serialNumber);
+            ASN1_INTEGER_free(x->cert_info->serialNumber);
             x->cert_info->serialNumber = in;
         }
     }
@@ -117,7 +117,7 @@
     return (X509_NAME_set(&x->cert_info->subject, name));
 }
 
-int X509_set_notBefore(X509 *x, const ASN1_TIME *tm)
+int X509_set1_notBefore(X509 *x, const ASN1_TIME *tm)
 {
     ASN1_TIME *in;
 
@@ -125,15 +125,20 @@
         return (0);
     in = x->cert_info->validity->notBefore;
     if (in != tm) {
-        in = M_ASN1_TIME_dup(tm);
+        in = ASN1_STRING_dup(tm);
         if (in != NULL) {
-            M_ASN1_TIME_free(x->cert_info->validity->notBefore);
+            ASN1_TIME_free(x->cert_info->validity->notBefore);
             x->cert_info->validity->notBefore = in;
         }
     }
     return (in != NULL);
 }
 
+int X509_set_notBefore(X509 *x, const ASN1_TIME *tm)
+{
+    return X509_set1_notBefore(x, tm);
+}
+
 const ASN1_TIME *X509_get0_notBefore(const X509 *x)
 {
     return x->cert_info->validity->notBefore;
@@ -155,7 +160,7 @@
     return x509->cert_info->validity->notBefore;
 }
 
-int X509_set_notAfter(X509 *x, const ASN1_TIME *tm)
+int X509_set1_notAfter(X509 *x, const ASN1_TIME *tm)
 {
     ASN1_TIME *in;
 
@@ -163,15 +168,20 @@
         return (0);
     in = x->cert_info->validity->notAfter;
     if (in != tm) {
-        in = M_ASN1_TIME_dup(tm);
+        in = ASN1_STRING_dup(tm);
         if (in != NULL) {
-            M_ASN1_TIME_free(x->cert_info->validity->notAfter);
+            ASN1_TIME_free(x->cert_info->validity->notAfter);
             x->cert_info->validity->notAfter = in;
         }
     }
     return (in != NULL);
 }
 
+int X509_set_notAfter(X509 *x, const ASN1_TIME *tm)
+{
+    return X509_set1_notAfter(x, tm);
+}
+
 const ASN1_TIME *X509_get0_notAfter(const X509 *x)
 {
     return x->cert_info->validity->notAfter;
@@ -193,6 +203,17 @@
     return x509->cert_info->validity->notAfter;
 }
 
+void X509_get0_uids(const X509 *x509, const ASN1_BIT_STRING **out_issuer_uid,
+                    const ASN1_BIT_STRING **out_subject_uid)
+{
+    if (out_issuer_uid != NULL) {
+        *out_issuer_uid = x509->cert_info->issuerUID;
+    }
+    if (out_subject_uid != NULL) {
+        *out_subject_uid = x509->cert_info->subjectUID;
+    }
+}
+
 int X509_set_pubkey(X509 *x, EVP_PKEY *pkey)
 {
     if ((x == NULL) || (x->cert_info == NULL))
@@ -200,7 +221,7 @@
     return (X509_PUBKEY_set(&(x->cert_info->key), pkey));
 }
 
-STACK_OF(X509_EXTENSION) *X509_get0_extensions(const X509 *x)
+const STACK_OF(X509_EXTENSION) *X509_get0_extensions(const X509 *x)
 {
     return x->cert_info->extensions;
 }
diff --git a/deps/boringssl/src/crypto/x509/x509_test.cc b/deps/boringssl/src/crypto/x509/x509_test.cc
index 366e66e..0debb8a 100644
--- a/deps/boringssl/src/crypto/x509/x509_test.cc
+++ b/deps/boringssl/src/crypto/x509/x509_test.cc
@@ -37,208 +37,220 @@
 
 std::string GetTestData(const char *path);
 
-static const char kCrossSigningRootPEM[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIICcTCCAdqgAwIBAgIIagJHiPvE0MowDQYJKoZIhvcNAQELBQAwPDEaMBgGA1UE\n"
-    "ChMRQm9yaW5nU1NMIFRFU1RJTkcxHjAcBgNVBAMTFUNyb3NzLXNpZ25pbmcgUm9v\n"
-    "dCBDQTAgFw0xNTAxMDEwMDAwMDBaGA8yMTAwMDEwMTAwMDAwMFowPDEaMBgGA1UE\n"
-    "ChMRQm9yaW5nU1NMIFRFU1RJTkcxHjAcBgNVBAMTFUNyb3NzLXNpZ25pbmcgUm9v\n"
-    "dCBDQTCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEAwo3qFvSB9Zmlbpzn9wJp\n"
-    "ikI75Rxkatez8VkLqyxbOhPYl2Haz8F5p1gDG96dCI6jcLGgu3AKT9uhEQyyUko5\n"
-    "EKYasazSeA9CQrdyhPg0mkTYVETnPM1W/ebid1YtqQbq1CMWlq2aTDoSGAReGFKP\n"
-    "RTdXAbuAXzpCfi/d8LqV13UCAwEAAaN6MHgwDgYDVR0PAQH/BAQDAgIEMB0GA1Ud\n"
-    "JQQWMBQGCCsGAQUFBwMBBggrBgEFBQcDAjAPBgNVHRMBAf8EBTADAQH/MBkGA1Ud\n"
-    "DgQSBBBHKHC7V3Z/3oLvEZx0RZRwMBsGA1UdIwQUMBKAEEcocLtXdn/egu8RnHRF\n"
-    "lHAwDQYJKoZIhvcNAQELBQADgYEAnglibsy6mGtpIXivtlcz4zIEnHw/lNW+r/eC\n"
-    "CY7evZTmOoOuC/x9SS3MF9vawt1HFUummWM6ZgErqVBOXIB4//ykrcCgf5ZbF5Hr\n"
-    "+3EFprKhBqYiXdD8hpBkrBoXwn85LPYWNd2TceCrx0YtLIprE2R5MB2RIq8y4Jk3\n"
-    "YFXvkME=\n"
-    "-----END CERTIFICATE-----\n";
+static const char kCrossSigningRootPEM[] = R"(
+-----BEGIN CERTIFICATE-----
+MIICcTCCAdqgAwIBAgIIagJHiPvE0MowDQYJKoZIhvcNAQELBQAwPDEaMBgGA1UE
+ChMRQm9yaW5nU1NMIFRFU1RJTkcxHjAcBgNVBAMTFUNyb3NzLXNpZ25pbmcgUm9v
+dCBDQTAgFw0xNTAxMDEwMDAwMDBaGA8yMTAwMDEwMTAwMDAwMFowPDEaMBgGA1UE
+ChMRQm9yaW5nU1NMIFRFU1RJTkcxHjAcBgNVBAMTFUNyb3NzLXNpZ25pbmcgUm9v
+dCBDQTCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEAwo3qFvSB9Zmlbpzn9wJp
+ikI75Rxkatez8VkLqyxbOhPYl2Haz8F5p1gDG96dCI6jcLGgu3AKT9uhEQyyUko5
+EKYasazSeA9CQrdyhPg0mkTYVETnPM1W/ebid1YtqQbq1CMWlq2aTDoSGAReGFKP
+RTdXAbuAXzpCfi/d8LqV13UCAwEAAaN6MHgwDgYDVR0PAQH/BAQDAgIEMB0GA1Ud
+JQQWMBQGCCsGAQUFBwMBBggrBgEFBQcDAjAPBgNVHRMBAf8EBTADAQH/MBkGA1Ud
+DgQSBBBHKHC7V3Z/3oLvEZx0RZRwMBsGA1UdIwQUMBKAEEcocLtXdn/egu8RnHRF
+lHAwDQYJKoZIhvcNAQELBQADgYEAnglibsy6mGtpIXivtlcz4zIEnHw/lNW+r/eC
+CY7evZTmOoOuC/x9SS3MF9vawt1HFUummWM6ZgErqVBOXIB4//ykrcCgf5ZbF5Hr
++3EFprKhBqYiXdD8hpBkrBoXwn85LPYWNd2TceCrx0YtLIprE2R5MB2RIq8y4Jk3
+YFXvkME=
+-----END CERTIFICATE-----
+)";
 
-static const char kRootCAPEM[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIICVTCCAb6gAwIBAgIIAj5CwoHlWuYwDQYJKoZIhvcNAQELBQAwLjEaMBgGA1UE\n"
-    "ChMRQm9yaW5nU1NMIFRFU1RJTkcxEDAOBgNVBAMTB1Jvb3QgQ0EwIBcNMTUwMTAx\n"
-    "MDAwMDAwWhgPMjEwMDAxMDEwMDAwMDBaMC4xGjAYBgNVBAoTEUJvcmluZ1NTTCBU\n"
-    "RVNUSU5HMRAwDgYDVQQDEwdSb290IENBMIGfMA0GCSqGSIb3DQEBAQUAA4GNADCB\n"
-    "iQKBgQDpDn8RDOZa5oaDcPZRBy4CeBH1siSSOO4mYgLHlPE+oXdqwI/VImi2XeJM\n"
-    "2uCFETXCknJJjYG0iJdrt/yyRFvZTQZw+QzGj+mz36NqhGxDWb6dstB2m8PX+plZ\n"
-    "w7jl81MDvUnWs8yiQ/6twgu5AbhWKZQDJKcNKCEpqa6UW0r5nwIDAQABo3oweDAO\n"
-    "BgNVHQ8BAf8EBAMCAgQwHQYDVR0lBBYwFAYIKwYBBQUHAwEGCCsGAQUFBwMCMA8G\n"
-    "A1UdEwEB/wQFMAMBAf8wGQYDVR0OBBIEEEA31wH7QC+4HH5UBCeMWQEwGwYDVR0j\n"
-    "BBQwEoAQQDfXAftAL7gcflQEJ4xZATANBgkqhkiG9w0BAQsFAAOBgQDXylEK77Za\n"
-    "kKeY6ZerrScWyZhrjIGtHFu09qVpdJEzrk87k2G7iHHR9CAvSofCgEExKtWNS9dN\n"
-    "+9WiZp/U48iHLk7qaYXdEuO07No4BYtXn+lkOykE+FUxmA4wvOF1cTd2tdj3MzX2\n"
-    "kfGIBAYhzGZWhY3JbhIfTEfY1PNM1pWChQ==\n"
-    "-----END CERTIFICATE-----\n";
+static const char kRootCAPEM[] = R"(
+-----BEGIN CERTIFICATE-----
+MIICVTCCAb6gAwIBAgIIAj5CwoHlWuYwDQYJKoZIhvcNAQELBQAwLjEaMBgGA1UE
+ChMRQm9yaW5nU1NMIFRFU1RJTkcxEDAOBgNVBAMTB1Jvb3QgQ0EwIBcNMTUwMTAx
+MDAwMDAwWhgPMjEwMDAxMDEwMDAwMDBaMC4xGjAYBgNVBAoTEUJvcmluZ1NTTCBU
+RVNUSU5HMRAwDgYDVQQDEwdSb290IENBMIGfMA0GCSqGSIb3DQEBAQUAA4GNADCB
+iQKBgQDpDn8RDOZa5oaDcPZRBy4CeBH1siSSOO4mYgLHlPE+oXdqwI/VImi2XeJM
+2uCFETXCknJJjYG0iJdrt/yyRFvZTQZw+QzGj+mz36NqhGxDWb6dstB2m8PX+plZ
+w7jl81MDvUnWs8yiQ/6twgu5AbhWKZQDJKcNKCEpqa6UW0r5nwIDAQABo3oweDAO
+BgNVHQ8BAf8EBAMCAgQwHQYDVR0lBBYwFAYIKwYBBQUHAwEGCCsGAQUFBwMCMA8G
+A1UdEwEB/wQFMAMBAf8wGQYDVR0OBBIEEEA31wH7QC+4HH5UBCeMWQEwGwYDVR0j
+BBQwEoAQQDfXAftAL7gcflQEJ4xZATANBgkqhkiG9w0BAQsFAAOBgQDXylEK77Za
+kKeY6ZerrScWyZhrjIGtHFu09qVpdJEzrk87k2G7iHHR9CAvSofCgEExKtWNS9dN
++9WiZp/U48iHLk7qaYXdEuO07No4BYtXn+lkOykE+FUxmA4wvOF1cTd2tdj3MzX2
+kfGIBAYhzGZWhY3JbhIfTEfY1PNM1pWChQ==
+-----END CERTIFICATE-----
+)";
 
-static const char kRootCrossSignedPEM[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIICYzCCAcygAwIBAgIIAj5CwoHlWuYwDQYJKoZIhvcNAQELBQAwPDEaMBgGA1UE\n"
-    "ChMRQm9yaW5nU1NMIFRFU1RJTkcxHjAcBgNVBAMTFUNyb3NzLXNpZ25pbmcgUm9v\n"
-    "dCBDQTAgFw0xNTAxMDEwMDAwMDBaGA8yMTAwMDEwMTAwMDAwMFowLjEaMBgGA1UE\n"
-    "ChMRQm9yaW5nU1NMIFRFU1RJTkcxEDAOBgNVBAMTB1Jvb3QgQ0EwgZ8wDQYJKoZI\n"
-    "hvcNAQEBBQADgY0AMIGJAoGBAOkOfxEM5lrmhoNw9lEHLgJ4EfWyJJI47iZiAseU\n"
-    "8T6hd2rAj9UiaLZd4kza4IURNcKSckmNgbSIl2u3/LJEW9lNBnD5DMaP6bPfo2qE\n"
-    "bENZvp2y0Habw9f6mVnDuOXzUwO9SdazzKJD/q3CC7kBuFYplAMkpw0oISmprpRb\n"
-    "SvmfAgMBAAGjejB4MA4GA1UdDwEB/wQEAwICBDAdBgNVHSUEFjAUBggrBgEFBQcD\n"
-    "AQYIKwYBBQUHAwIwDwYDVR0TAQH/BAUwAwEB/zAZBgNVHQ4EEgQQQDfXAftAL7gc\n"
-    "flQEJ4xZATAbBgNVHSMEFDASgBBHKHC7V3Z/3oLvEZx0RZRwMA0GCSqGSIb3DQEB\n"
-    "CwUAA4GBAErTxYJ0en9HVRHAAr5OO5wuk5Iq3VMc79TMyQLCXVL8YH8Uk7KEwv+q\n"
-    "9MEKZv2eR/Vfm4HlXlUuIqfgUXbwrAYC/YVVX86Wnbpy/jc73NYVCq8FEZeO+0XU\n"
-    "90SWAPDdp+iL7aZdimnMtG1qlM1edmz8AKbrhN/R3IbA2CL0nCWV\n"
-    "-----END CERTIFICATE-----\n";
+static const char kRootCrossSignedPEM[] = R"(
+-----BEGIN CERTIFICATE-----
+MIICYzCCAcygAwIBAgIIAj5CwoHlWuYwDQYJKoZIhvcNAQELBQAwPDEaMBgGA1UE
+ChMRQm9yaW5nU1NMIFRFU1RJTkcxHjAcBgNVBAMTFUNyb3NzLXNpZ25pbmcgUm9v
+dCBDQTAgFw0xNTAxMDEwMDAwMDBaGA8yMTAwMDEwMTAwMDAwMFowLjEaMBgGA1UE
+ChMRQm9yaW5nU1NMIFRFU1RJTkcxEDAOBgNVBAMTB1Jvb3QgQ0EwgZ8wDQYJKoZI
+hvcNAQEBBQADgY0AMIGJAoGBAOkOfxEM5lrmhoNw9lEHLgJ4EfWyJJI47iZiAseU
+8T6hd2rAj9UiaLZd4kza4IURNcKSckmNgbSIl2u3/LJEW9lNBnD5DMaP6bPfo2qE
+bENZvp2y0Habw9f6mVnDuOXzUwO9SdazzKJD/q3CC7kBuFYplAMkpw0oISmprpRb
+SvmfAgMBAAGjejB4MA4GA1UdDwEB/wQEAwICBDAdBgNVHSUEFjAUBggrBgEFBQcD
+AQYIKwYBBQUHAwIwDwYDVR0TAQH/BAUwAwEB/zAZBgNVHQ4EEgQQQDfXAftAL7gc
+flQEJ4xZATAbBgNVHSMEFDASgBBHKHC7V3Z/3oLvEZx0RZRwMA0GCSqGSIb3DQEB
+CwUAA4GBAErTxYJ0en9HVRHAAr5OO5wuk5Iq3VMc79TMyQLCXVL8YH8Uk7KEwv+q
+9MEKZv2eR/Vfm4HlXlUuIqfgUXbwrAYC/YVVX86Wnbpy/jc73NYVCq8FEZeO+0XU
+90SWAPDdp+iL7aZdimnMtG1qlM1edmz8AKbrhN/R3IbA2CL0nCWV
+-----END CERTIFICATE-----
+)";
 
-static const char kIntermediatePEM[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIICXjCCAcegAwIBAgIJAKJMH+7rscPcMA0GCSqGSIb3DQEBCwUAMC4xGjAYBgNV\n"
-    "BAoTEUJvcmluZ1NTTCBURVNUSU5HMRAwDgYDVQQDEwdSb290IENBMCAXDTE1MDEw\n"
-    "MTAwMDAwMFoYDzIxMDAwMTAxMDAwMDAwWjA2MRowGAYDVQQKExFCb3JpbmdTU0wg\n"
-    "VEVTVElORzEYMBYGA1UEAxMPSW50ZXJtZWRpYXRlIENBMIGfMA0GCSqGSIb3DQEB\n"
-    "AQUAA4GNADCBiQKBgQC7YtI0l8ocTYJ0gKyXTtPL4iMJCNY4OcxXl48jkncVG1Hl\n"
-    "blicgNUa1r9m9YFtVkxvBinb8dXiUpEGhVg4awRPDcatlsBSEBuJkiZGYbRcAmSu\n"
-    "CmZYnf6u3aYQ18SU8WqVERPpE4cwVVs+6kwlzRw0+XDoZAczu8ZezVhCUc6NbQID\n"
-    "AQABo3oweDAOBgNVHQ8BAf8EBAMCAgQwHQYDVR0lBBYwFAYIKwYBBQUHAwEGCCsG\n"
-    "AQUFBwMCMA8GA1UdEwEB/wQFMAMBAf8wGQYDVR0OBBIEEIwaaKi1dttdV3sfjRSy\n"
-    "BqMwGwYDVR0jBBQwEoAQQDfXAftAL7gcflQEJ4xZATANBgkqhkiG9w0BAQsFAAOB\n"
-    "gQCvnolNWEHuQS8PFVVyuLR+FKBeUUdrVbSfHSzTqNAqQGp0C9fk5oCzDq6ZgTfY\n"
-    "ESXM4cJhb3IAnW0UM0NFsYSKQJ50JZL2L3z5ZLQhHdbs4RmODGoC40BVdnJ4/qgB\n"
-    "aGSh09eQRvAVmbVCviDK2ipkWNegdyI19jFfNP5uIkGlYg==\n"
-    "-----END CERTIFICATE-----\n";
+static const char kIntermediatePEM[] = R"(
+-----BEGIN CERTIFICATE-----
+MIICXjCCAcegAwIBAgIJAKJMH+7rscPcMA0GCSqGSIb3DQEBCwUAMC4xGjAYBgNV
+BAoTEUJvcmluZ1NTTCBURVNUSU5HMRAwDgYDVQQDEwdSb290IENBMCAXDTE1MDEw
+MTAwMDAwMFoYDzIxMDAwMTAxMDAwMDAwWjA2MRowGAYDVQQKExFCb3JpbmdTU0wg
+VEVTVElORzEYMBYGA1UEAxMPSW50ZXJtZWRpYXRlIENBMIGfMA0GCSqGSIb3DQEB
+AQUAA4GNADCBiQKBgQC7YtI0l8ocTYJ0gKyXTtPL4iMJCNY4OcxXl48jkncVG1Hl
+blicgNUa1r9m9YFtVkxvBinb8dXiUpEGhVg4awRPDcatlsBSEBuJkiZGYbRcAmSu
+CmZYnf6u3aYQ18SU8WqVERPpE4cwVVs+6kwlzRw0+XDoZAczu8ZezVhCUc6NbQID
+AQABo3oweDAOBgNVHQ8BAf8EBAMCAgQwHQYDVR0lBBYwFAYIKwYBBQUHAwEGCCsG
+AQUFBwMCMA8GA1UdEwEB/wQFMAMBAf8wGQYDVR0OBBIEEIwaaKi1dttdV3sfjRSy
+BqMwGwYDVR0jBBQwEoAQQDfXAftAL7gcflQEJ4xZATANBgkqhkiG9w0BAQsFAAOB
+gQCvnolNWEHuQS8PFVVyuLR+FKBeUUdrVbSfHSzTqNAqQGp0C9fk5oCzDq6ZgTfY
+ESXM4cJhb3IAnW0UM0NFsYSKQJ50JZL2L3z5ZLQhHdbs4RmODGoC40BVdnJ4/qgB
+aGSh09eQRvAVmbVCviDK2ipkWNegdyI19jFfNP5uIkGlYg==
+-----END CERTIFICATE-----
+)";
 
-static const char kIntermediateSelfSignedPEM[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIICZjCCAc+gAwIBAgIJAKJMH+7rscPcMA0GCSqGSIb3DQEBCwUAMDYxGjAYBgNV\n"
-    "BAoTEUJvcmluZ1NTTCBURVNUSU5HMRgwFgYDVQQDEw9JbnRlcm1lZGlhdGUgQ0Ew\n"
-    "IBcNMTUwMTAxMDAwMDAwWhgPMjEwMDAxMDEwMDAwMDBaMDYxGjAYBgNVBAoTEUJv\n"
-    "cmluZ1NTTCBURVNUSU5HMRgwFgYDVQQDEw9JbnRlcm1lZGlhdGUgQ0EwgZ8wDQYJ\n"
-    "KoZIhvcNAQEBBQADgY0AMIGJAoGBALti0jSXyhxNgnSArJdO08viIwkI1jg5zFeX\n"
-    "jyOSdxUbUeVuWJyA1RrWv2b1gW1WTG8GKdvx1eJSkQaFWDhrBE8Nxq2WwFIQG4mS\n"
-    "JkZhtFwCZK4KZlid/q7dphDXxJTxapURE+kThzBVWz7qTCXNHDT5cOhkBzO7xl7N\n"
-    "WEJRzo1tAgMBAAGjejB4MA4GA1UdDwEB/wQEAwICBDAdBgNVHSUEFjAUBggrBgEF\n"
-    "BQcDAQYIKwYBBQUHAwIwDwYDVR0TAQH/BAUwAwEB/zAZBgNVHQ4EEgQQjBpoqLV2\n"
-    "211Xex+NFLIGozAbBgNVHSMEFDASgBCMGmiotXbbXVd7H40UsgajMA0GCSqGSIb3\n"
-    "DQEBCwUAA4GBALcccSrAQ0/EqQBsx0ZDTUydHXXNP2DrUkpUKmAXIe8McqIVSlkT\n"
-    "6H4xz7z8VRKBo9j+drjjtCw2i0CQc8aOLxRb5WJ8eVLnaW2XRlUqAzhF0CrulfVI\n"
-    "E4Vs6ZLU+fra1WAuIj6qFiigRja+3YkZArG8tMA9vtlhTX/g7YBZIkqH\n"
-    "-----END CERTIFICATE-----\n";
+static const char kIntermediateSelfSignedPEM[] = R"(
+-----BEGIN CERTIFICATE-----
+MIICZjCCAc+gAwIBAgIJAKJMH+7rscPcMA0GCSqGSIb3DQEBCwUAMDYxGjAYBgNV
+BAoTEUJvcmluZ1NTTCBURVNUSU5HMRgwFgYDVQQDEw9JbnRlcm1lZGlhdGUgQ0Ew
+IBcNMTUwMTAxMDAwMDAwWhgPMjEwMDAxMDEwMDAwMDBaMDYxGjAYBgNVBAoTEUJv
+cmluZ1NTTCBURVNUSU5HMRgwFgYDVQQDEw9JbnRlcm1lZGlhdGUgQ0EwgZ8wDQYJ
+KoZIhvcNAQEBBQADgY0AMIGJAoGBALti0jSXyhxNgnSArJdO08viIwkI1jg5zFeX
+jyOSdxUbUeVuWJyA1RrWv2b1gW1WTG8GKdvx1eJSkQaFWDhrBE8Nxq2WwFIQG4mS
+JkZhtFwCZK4KZlid/q7dphDXxJTxapURE+kThzBVWz7qTCXNHDT5cOhkBzO7xl7N
+WEJRzo1tAgMBAAGjejB4MA4GA1UdDwEB/wQEAwICBDAdBgNVHSUEFjAUBggrBgEF
+BQcDAQYIKwYBBQUHAwIwDwYDVR0TAQH/BAUwAwEB/zAZBgNVHQ4EEgQQjBpoqLV2
+211Xex+NFLIGozAbBgNVHSMEFDASgBCMGmiotXbbXVd7H40UsgajMA0GCSqGSIb3
+DQEBCwUAA4GBALcccSrAQ0/EqQBsx0ZDTUydHXXNP2DrUkpUKmAXIe8McqIVSlkT
+6H4xz7z8VRKBo9j+drjjtCw2i0CQc8aOLxRb5WJ8eVLnaW2XRlUqAzhF0CrulfVI
+E4Vs6ZLU+fra1WAuIj6qFiigRja+3YkZArG8tMA9vtlhTX/g7YBZIkqH
+-----END CERTIFICATE-----
+)";
 
-static const char kLeafPEM[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIICXjCCAcegAwIBAgIIWjO48ufpunYwDQYJKoZIhvcNAQELBQAwNjEaMBgGA1UE\n"
-    "ChMRQm9yaW5nU1NMIFRFU1RJTkcxGDAWBgNVBAMTD0ludGVybWVkaWF0ZSBDQTAg\n"
-    "Fw0xNTAxMDEwMDAwMDBaGA8yMTAwMDEwMTAwMDAwMFowMjEaMBgGA1UEChMRQm9y\n"
-    "aW5nU1NMIFRFU1RJTkcxFDASBgNVBAMTC2V4YW1wbGUuY29tMIGfMA0GCSqGSIb3\n"
-    "DQEBAQUAA4GNADCBiQKBgQDD0U0ZYgqShJ7oOjsyNKyVXEHqeafmk/bAoPqY/h1c\n"
-    "oPw2E8KmeqiUSoTPjG5IXSblOxcqpbAXgnjPzo8DI3GNMhAf8SYNYsoH7gc7Uy7j\n"
-    "5x8bUrisGnuTHqkqH6d4/e7ETJ7i3CpR8bvK16DggEvQTudLipz8FBHtYhFakfdh\n"
-    "TwIDAQABo3cwdTAOBgNVHQ8BAf8EBAMCBaAwHQYDVR0lBBYwFAYIKwYBBQUHAwEG\n"
-    "CCsGAQUFBwMCMAwGA1UdEwEB/wQCMAAwGQYDVR0OBBIEEKN5pvbur7mlXjeMEYA0\n"
-    "4nUwGwYDVR0jBBQwEoAQjBpoqLV2211Xex+NFLIGozANBgkqhkiG9w0BAQsFAAOB\n"
-    "gQBj/p+JChp//LnXWC1k121LM/ii7hFzQzMrt70bny406SGz9jAjaPOX4S3gt38y\n"
-    "rhjpPukBlSzgQXFg66y6q5qp1nQTD1Cw6NkKBe9WuBlY3iYfmsf7WT8nhlT1CttU\n"
-    "xNCwyMX9mtdXdQicOfNjIGUCD5OLV5PgHFPRKiHHioBAhg==\n"
-    "-----END CERTIFICATE-----\n";
+static const char kLeafPEM[] = R"(
+-----BEGIN CERTIFICATE-----
+MIICXjCCAcegAwIBAgIIWjO48ufpunYwDQYJKoZIhvcNAQELBQAwNjEaMBgGA1UE
+ChMRQm9yaW5nU1NMIFRFU1RJTkcxGDAWBgNVBAMTD0ludGVybWVkaWF0ZSBDQTAg
+Fw0xNTAxMDEwMDAwMDBaGA8yMTAwMDEwMTAwMDAwMFowMjEaMBgGA1UEChMRQm9y
+aW5nU1NMIFRFU1RJTkcxFDASBgNVBAMTC2V4YW1wbGUuY29tMIGfMA0GCSqGSIb3
+DQEBAQUAA4GNADCBiQKBgQDD0U0ZYgqShJ7oOjsyNKyVXEHqeafmk/bAoPqY/h1c
+oPw2E8KmeqiUSoTPjG5IXSblOxcqpbAXgnjPzo8DI3GNMhAf8SYNYsoH7gc7Uy7j
+5x8bUrisGnuTHqkqH6d4/e7ETJ7i3CpR8bvK16DggEvQTudLipz8FBHtYhFakfdh
+TwIDAQABo3cwdTAOBgNVHQ8BAf8EBAMCBaAwHQYDVR0lBBYwFAYIKwYBBQUHAwEG
+CCsGAQUFBwMCMAwGA1UdEwEB/wQCMAAwGQYDVR0OBBIEEKN5pvbur7mlXjeMEYA0
+4nUwGwYDVR0jBBQwEoAQjBpoqLV2211Xex+NFLIGozANBgkqhkiG9w0BAQsFAAOB
+gQBj/p+JChp//LnXWC1k121LM/ii7hFzQzMrt70bny406SGz9jAjaPOX4S3gt38y
+rhjpPukBlSzgQXFg66y6q5qp1nQTD1Cw6NkKBe9WuBlY3iYfmsf7WT8nhlT1CttU
+xNCwyMX9mtdXdQicOfNjIGUCD5OLV5PgHFPRKiHHioBAhg==
+-----END CERTIFICATE-----
+)";
 
-static const char kLeafNoKeyUsagePEM[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIICNTCCAZ6gAwIBAgIJAIFQGaLQ0G2mMA0GCSqGSIb3DQEBCwUAMDYxGjAYBgNV\n"
-    "BAoTEUJvcmluZ1NTTCBURVNUSU5HMRgwFgYDVQQDEw9JbnRlcm1lZGlhdGUgQ0Ew\n"
-    "IBcNMTUwMTAxMDAwMDAwWhgPMjEwMDAxMDEwMDAwMDBaMDcxGjAYBgNVBAoTEUJv\n"
-    "cmluZ1NTTCBURVNUSU5HMRkwFwYDVQQDExBldmlsLmV4YW1wbGUuY29tMIGfMA0G\n"
-    "CSqGSIb3DQEBAQUAA4GNADCBiQKBgQDOKoZe75NPz77EOaMMl4/0s3PyQw++zJvp\n"
-    "ejHAxZiTPCJgMbEHLrSzNoHdopg+CLUH5bE4wTXM8w9Inv5P8OAFJt7gJuPUunmk\n"
-    "j+NoU3QfzOR6BroePcz1vXX9jyVHRs087M/sLqWRHu9IR+/A+UTcBaWaFiDVUxtJ\n"
-    "YOwFMwjNPQIDAQABo0gwRjAMBgNVHRMBAf8EAjAAMBkGA1UdDgQSBBBJfLEUWHq1\n"
-    "27rZ1AVx2J5GMBsGA1UdIwQUMBKAEIwaaKi1dttdV3sfjRSyBqMwDQYJKoZIhvcN\n"
-    "AQELBQADgYEALVKN2Y3LZJOtu6SxFIYKxbLaXhTGTdIjxipZhmbBRDFjbZjZZOTe\n"
-    "6Oo+VDNPYco4rBexK7umYXJyfTqoY0E8dbiImhTcGTEj7OAB3DbBomgU1AYe+t2D\n"
-    "uwBqh4Y3Eto+Zn4pMVsxGEfUpjzjZDel7bN1/oU/9KWPpDfywfUmjgk=\n"
-    "-----END CERTIFICATE-----\n";
+static const char kLeafNoKeyUsagePEM[] = R"(
+-----BEGIN CERTIFICATE-----
+MIICNTCCAZ6gAwIBAgIJAIFQGaLQ0G2mMA0GCSqGSIb3DQEBCwUAMDYxGjAYBgNV
+BAoTEUJvcmluZ1NTTCBURVNUSU5HMRgwFgYDVQQDEw9JbnRlcm1lZGlhdGUgQ0Ew
+IBcNMTUwMTAxMDAwMDAwWhgPMjEwMDAxMDEwMDAwMDBaMDcxGjAYBgNVBAoTEUJv
+cmluZ1NTTCBURVNUSU5HMRkwFwYDVQQDExBldmlsLmV4YW1wbGUuY29tMIGfMA0G
+CSqGSIb3DQEBAQUAA4GNADCBiQKBgQDOKoZe75NPz77EOaMMl4/0s3PyQw++zJvp
+ejHAxZiTPCJgMbEHLrSzNoHdopg+CLUH5bE4wTXM8w9Inv5P8OAFJt7gJuPUunmk
+j+NoU3QfzOR6BroePcz1vXX9jyVHRs087M/sLqWRHu9IR+/A+UTcBaWaFiDVUxtJ
+YOwFMwjNPQIDAQABo0gwRjAMBgNVHRMBAf8EAjAAMBkGA1UdDgQSBBBJfLEUWHq1
+27rZ1AVx2J5GMBsGA1UdIwQUMBKAEIwaaKi1dttdV3sfjRSyBqMwDQYJKoZIhvcN
+AQELBQADgYEALVKN2Y3LZJOtu6SxFIYKxbLaXhTGTdIjxipZhmbBRDFjbZjZZOTe
+6Oo+VDNPYco4rBexK7umYXJyfTqoY0E8dbiImhTcGTEj7OAB3DbBomgU1AYe+t2D
+uwBqh4Y3Eto+Zn4pMVsxGEfUpjzjZDel7bN1/oU/9KWPpDfywfUmjgk=
+-----END CERTIFICATE-----
+)";
 
-static const char kForgeryPEM[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIICZzCCAdCgAwIBAgIIdTlMzQoKkeMwDQYJKoZIhvcNAQELBQAwNzEaMBgGA1UE\n"
-    "ChMRQm9yaW5nU1NMIFRFU1RJTkcxGTAXBgNVBAMTEGV2aWwuZXhhbXBsZS5jb20w\n"
-    "IBcNMTUwMTAxMDAwMDAwWhgPMjEwMDAxMDEwMDAwMDBaMDoxGjAYBgNVBAoTEUJv\n"
-    "cmluZ1NTTCBURVNUSU5HMRwwGgYDVQQDExNmb3JnZXJ5LmV4YW1wbGUuY29tMIGf\n"
-    "MA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDADTwruBQZGb7Ay6s9HiYv5d1lwtEy\n"
-    "xQdA2Sy8Rn8uA20Q4KgqwVY7wzIZ+z5Butrsmwb70gdG1XU+yRaDeE7XVoW6jSpm\n"
-    "0sw35/5vJbTcL4THEFbnX0OPZnvpuZDFUkvVtq5kxpDWsVyM24G8EEq7kPih3Sa3\n"
-    "OMhXVXF8kso6UQIDAQABo3cwdTAOBgNVHQ8BAf8EBAMCBaAwHQYDVR0lBBYwFAYI\n"
-    "KwYBBQUHAwEGCCsGAQUFBwMCMAwGA1UdEwEB/wQCMAAwGQYDVR0OBBIEEEYJ/WHM\n"
-    "8p64erPWIg4/liwwGwYDVR0jBBQwEoAQSXyxFFh6tdu62dQFcdieRjANBgkqhkiG\n"
-    "9w0BAQsFAAOBgQA+zH7bHPElWRWJvjxDqRexmYLn+D3Aivs8XgXQJsM94W0EzSUf\n"
-    "DSLfRgaQwcb2gg2xpDFoG+W0vc6O651uF23WGt5JaFFJJxqjII05IexfCNhuPmp4\n"
-    "4UZAXPttuJXpn74IY1tuouaM06B3vXKZR+/ityKmfJvSwxacmFcK+2ziAg==\n"
-    "-----END CERTIFICATE-----\n";
+static const char kForgeryPEM[] = R"(
+-----BEGIN CERTIFICATE-----
+MIICZzCCAdCgAwIBAgIIdTlMzQoKkeMwDQYJKoZIhvcNAQELBQAwNzEaMBgGA1UE
+ChMRQm9yaW5nU1NMIFRFU1RJTkcxGTAXBgNVBAMTEGV2aWwuZXhhbXBsZS5jb20w
+IBcNMTUwMTAxMDAwMDAwWhgPMjEwMDAxMDEwMDAwMDBaMDoxGjAYBgNVBAoTEUJv
+cmluZ1NTTCBURVNUSU5HMRwwGgYDVQQDExNmb3JnZXJ5LmV4YW1wbGUuY29tMIGf
+MA0GCSqGSIb3DQEBAQUAA4GNADCBiQKBgQDADTwruBQZGb7Ay6s9HiYv5d1lwtEy
+xQdA2Sy8Rn8uA20Q4KgqwVY7wzIZ+z5Butrsmwb70gdG1XU+yRaDeE7XVoW6jSpm
+0sw35/5vJbTcL4THEFbnX0OPZnvpuZDFUkvVtq5kxpDWsVyM24G8EEq7kPih3Sa3
+OMhXVXF8kso6UQIDAQABo3cwdTAOBgNVHQ8BAf8EBAMCBaAwHQYDVR0lBBYwFAYI
+KwYBBQUHAwEGCCsGAQUFBwMCMAwGA1UdEwEB/wQCMAAwGQYDVR0OBBIEEEYJ/WHM
+8p64erPWIg4/liwwGwYDVR0jBBQwEoAQSXyxFFh6tdu62dQFcdieRjANBgkqhkiG
+9w0BAQsFAAOBgQA+zH7bHPElWRWJvjxDqRexmYLn+D3Aivs8XgXQJsM94W0EzSUf
+DSLfRgaQwcb2gg2xpDFoG+W0vc6O651uF23WGt5JaFFJJxqjII05IexfCNhuPmp4
+4UZAXPttuJXpn74IY1tuouaM06B3vXKZR+/ityKmfJvSwxacmFcK+2ziAg==
+-----END CERTIFICATE-----
+)";
 
 // kExamplePSSCert is an example RSA-PSS self-signed certificate, signed with
 // the default hash functions.
-static const char kExamplePSSCert[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIICYjCCAcagAwIBAgIJAI3qUyT6SIfzMBIGCSqGSIb3DQEBCjAFogMCAWowRTEL\n"
-    "MAkGA1UEBhMCQVUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoMGEludGVy\n"
-    "bmV0IFdpZGdpdHMgUHR5IEx0ZDAeFw0xNDEwMDkxOTA5NTVaFw0xNTEwMDkxOTA5\n"
-    "NTVaMEUxCzAJBgNVBAYTAkFVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQK\n"
-    "DBhJbnRlcm5ldCBXaWRnaXRzIFB0eSBMdGQwgZ8wDQYJKoZIhvcNAQEBBQADgY0A\n"
-    "MIGJAoGBAPi4bIO0vNmoV8CltFl2jFQdeesiUgR+0zfrQf2D+fCmhRU0dXFahKg8\n"
-    "0u9aTtPel4rd/7vPCqqGkr64UOTNb4AzMHYTj8p73OxaymPHAyXvqIqDWHYg+hZ3\n"
-    "13mSYwFIGth7Z/FSVUlO1m5KXNd6NzYM3t2PROjCpywrta9kS2EHAgMBAAGjUDBO\n"
-    "MB0GA1UdDgQWBBTQQfuJQR6nrVrsNF1JEflVgXgfEzAfBgNVHSMEGDAWgBTQQfuJ\n"
-    "QR6nrVrsNF1JEflVgXgfEzAMBgNVHRMEBTADAQH/MBIGCSqGSIb3DQEBCjAFogMC\n"
-    "AWoDgYEASUy2RZcgNbNQZA0/7F+V1YTLEXwD16bm+iSVnzGwtexmQVEYIZG74K/w\n"
-    "xbdZQdTbpNJkp1QPjPfh0zsatw6dmt5QoZ8K8No0DjR9dgf+Wvv5WJvJUIQBoAVN\n"
-    "Z0IL+OQFz6+LcTHxD27JJCebrATXZA0wThGTQDm7crL+a+SujBY=\n"
-    "-----END CERTIFICATE-----\n";
+static const char kExamplePSSCert[] = R"(
+-----BEGIN CERTIFICATE-----
+MIICYjCCAcagAwIBAgIJAI3qUyT6SIfzMBIGCSqGSIb3DQEBCjAFogMCAWowRTEL
+MAkGA1UEBhMCQVUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoMGEludGVy
+bmV0IFdpZGdpdHMgUHR5IEx0ZDAeFw0xNDEwMDkxOTA5NTVaFw0xNTEwMDkxOTA5
+NTVaMEUxCzAJBgNVBAYTAkFVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQK
+DBhJbnRlcm5ldCBXaWRnaXRzIFB0eSBMdGQwgZ8wDQYJKoZIhvcNAQEBBQADgY0A
+MIGJAoGBAPi4bIO0vNmoV8CltFl2jFQdeesiUgR+0zfrQf2D+fCmhRU0dXFahKg8
+0u9aTtPel4rd/7vPCqqGkr64UOTNb4AzMHYTj8p73OxaymPHAyXvqIqDWHYg+hZ3
+13mSYwFIGth7Z/FSVUlO1m5KXNd6NzYM3t2PROjCpywrta9kS2EHAgMBAAGjUDBO
+MB0GA1UdDgQWBBTQQfuJQR6nrVrsNF1JEflVgXgfEzAfBgNVHSMEGDAWgBTQQfuJ
+QR6nrVrsNF1JEflVgXgfEzAMBgNVHRMEBTADAQH/MBIGCSqGSIb3DQEBCjAFogMC
+AWoDgYEASUy2RZcgNbNQZA0/7F+V1YTLEXwD16bm+iSVnzGwtexmQVEYIZG74K/w
+xbdZQdTbpNJkp1QPjPfh0zsatw6dmt5QoZ8K8No0DjR9dgf+Wvv5WJvJUIQBoAVN
+Z0IL+OQFz6+LcTHxD27JJCebrATXZA0wThGTQDm7crL+a+SujBY=
+-----END CERTIFICATE-----
+)";
 
 // kBadPSSCertPEM is a self-signed RSA-PSS certificate with bad parameters.
-static const char kBadPSSCertPEM[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIIDdjCCAjqgAwIBAgIJANcwZLyfEv7DMD4GCSqGSIb3DQEBCjAxoA0wCwYJYIZI\n"
-    "AWUDBAIBoRowGAYJKoZIhvcNAQEIMAsGCWCGSAFlAwQCAaIEAgIA3jAnMSUwIwYD\n"
-    "VQQDDBxUZXN0IEludmFsaWQgUFNTIGNlcnRpZmljYXRlMB4XDTE1MTEwNDE2MDIz\n"
-    "NVoXDTE1MTIwNDE2MDIzNVowJzElMCMGA1UEAwwcVGVzdCBJbnZhbGlkIFBTUyBj\n"
-    "ZXJ0aWZpY2F0ZTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAMTaM7WH\n"
-    "qVCAGAIA+zL1KWvvASTrhlq+1ePdO7wsrWX2KiYoTYrJYTnxhLnn0wrHqApt79nL\n"
-    "IBG7cfShyZqFHOY/IzlYPMVt+gPo293gw96Fds5JBsjhjkyGnOyr9OUntFqvxDbT\n"
-    "IIFU7o9IdxD4edaqjRv+fegVE+B79pDk4s0ujsk6dULtCg9Rst0ucGFo19mr+b7k\n"
-    "dbfn8pZ72ZNDJPueVdrUAWw9oll61UcYfk75XdrLk6JlL41GrYHc8KlfXf43gGQq\n"
-    "QfrpHkg4Ih2cI6Wt2nhFGAzrlcorzLliQIUJRIhM8h4IgDfpBpaPdVQLqS2pFbXa\n"
-    "5eQjqiyJwak2vJ8CAwEAAaNQME4wHQYDVR0OBBYEFCt180N4oGUt5LbzBwQ4Ia+2\n"
-    "4V97MB8GA1UdIwQYMBaAFCt180N4oGUt5LbzBwQ4Ia+24V97MAwGA1UdEwQFMAMB\n"
-    "Af8wMQYJKoZIhvcNAQEKMCSgDTALBglghkgBZQMEAgGhDTALBgkqhkiG9w0BAQii\n"
-    "BAICAN4DggEBAAjBtm90lGxgddjc4Xu/nbXXFHVs2zVcHv/mqOZoQkGB9r/BVgLb\n"
-    "xhHrFZ2pHGElbUYPfifdS9ztB73e1d4J+P29o0yBqfd4/wGAc/JA8qgn6AAEO/Xn\n"
-    "plhFeTRJQtLZVl75CkHXgUGUd3h+ADvKtcBuW9dSUncaUrgNKR8u/h/2sMG38RWY\n"
-    "DzBddC/66YTa3r7KkVUfW7yqRQfELiGKdcm+bjlTEMsvS+EhHup9CzbpoCx2Fx9p\n"
-    "NPtFY3yEObQhmL1JyoCRWqBE75GzFPbRaiux5UpEkns+i3trkGssZzsOuVqHNTNZ\n"
-    "lC9+9hPHIoc9UMmAQNo1vGIW3NWVoeGbaJ8=\n"
-    "-----END CERTIFICATE-----\n";
+static const char kBadPSSCertPEM[] = R"(
+-----BEGIN CERTIFICATE-----
+MIIDdjCCAjqgAwIBAgIJANcwZLyfEv7DMD4GCSqGSIb3DQEBCjAxoA0wCwYJYIZI
+AWUDBAIBoRowGAYJKoZIhvcNAQEIMAsGCWCGSAFlAwQCAaIEAgIA3jAnMSUwIwYD
+VQQDDBxUZXN0IEludmFsaWQgUFNTIGNlcnRpZmljYXRlMB4XDTE1MTEwNDE2MDIz
+NVoXDTE1MTIwNDE2MDIzNVowJzElMCMGA1UEAwwcVGVzdCBJbnZhbGlkIFBTUyBj
+ZXJ0aWZpY2F0ZTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAMTaM7WH
+qVCAGAIA+zL1KWvvASTrhlq+1ePdO7wsrWX2KiYoTYrJYTnxhLnn0wrHqApt79nL
+IBG7cfShyZqFHOY/IzlYPMVt+gPo293gw96Fds5JBsjhjkyGnOyr9OUntFqvxDbT
+IIFU7o9IdxD4edaqjRv+fegVE+B79pDk4s0ujsk6dULtCg9Rst0ucGFo19mr+b7k
+dbfn8pZ72ZNDJPueVdrUAWw9oll61UcYfk75XdrLk6JlL41GrYHc8KlfXf43gGQq
+QfrpHkg4Ih2cI6Wt2nhFGAzrlcorzLliQIUJRIhM8h4IgDfpBpaPdVQLqS2pFbXa
+5eQjqiyJwak2vJ8CAwEAAaNQME4wHQYDVR0OBBYEFCt180N4oGUt5LbzBwQ4Ia+2
+4V97MB8GA1UdIwQYMBaAFCt180N4oGUt5LbzBwQ4Ia+24V97MAwGA1UdEwQFMAMB
+Af8wMQYJKoZIhvcNAQEKMCSgDTALBglghkgBZQMEAgGhDTALBgkqhkiG9w0BAQii
+BAICAN4DggEBAAjBtm90lGxgddjc4Xu/nbXXFHVs2zVcHv/mqOZoQkGB9r/BVgLb
+xhHrFZ2pHGElbUYPfifdS9ztB73e1d4J+P29o0yBqfd4/wGAc/JA8qgn6AAEO/Xn
+plhFeTRJQtLZVl75CkHXgUGUd3h+ADvKtcBuW9dSUncaUrgNKR8u/h/2sMG38RWY
+DzBddC/66YTa3r7KkVUfW7yqRQfELiGKdcm+bjlTEMsvS+EhHup9CzbpoCx2Fx9p
+NPtFY3yEObQhmL1JyoCRWqBE75GzFPbRaiux5UpEkns+i3trkGssZzsOuVqHNTNZ
+lC9+9hPHIoc9UMmAQNo1vGIW3NWVoeGbaJ8=
+-----END CERTIFICATE-----
+)";
 
-static const char kRSAKey[] =
-    "-----BEGIN RSA PRIVATE KEY-----\n"
-    "MIICXgIBAAKBgQDYK8imMuRi/03z0K1Zi0WnvfFHvwlYeyK9Na6XJYaUoIDAtB92\n"
-    "kWdGMdAQhLciHnAjkXLI6W15OoV3gA/ElRZ1xUpxTMhjP6PyY5wqT5r6y8FxbiiF\n"
-    "KKAnHmUcrgfVW28tQ+0rkLGMryRtrukXOgXBv7gcrmU7G1jC2a7WqmeI8QIDAQAB\n"
-    "AoGBAIBy09Fd4DOq/Ijp8HeKuCMKTHqTW1xGHshLQ6jwVV2vWZIn9aIgmDsvkjCe\n"
-    "i6ssZvnbjVcwzSoByhjN8ZCf/i15HECWDFFh6gt0P5z0MnChwzZmvatV/FXCT0j+\n"
-    "WmGNB/gkehKjGXLLcjTb6dRYVJSCZhVuOLLcbWIV10gggJQBAkEA8S8sGe4ezyyZ\n"
-    "m4e9r95g6s43kPqtj5rewTsUxt+2n4eVodD+ZUlCULWVNAFLkYRTBCASlSrm9Xhj\n"
-    "QpmWAHJUkQJBAOVzQdFUaewLtdOJoPCtpYoY1zd22eae8TQEmpGOR11L6kbxLQsk\n"
-    "aMly/DOnOaa82tqAGTdqDEZgSNmCeKKknmECQAvpnY8GUOVAubGR6c+W90iBuQLj\n"
-    "LtFp/9ihd2w/PoDwrHZaoUYVcT4VSfJQog/k7kjE4MYXYWL8eEKg3WTWQNECQQDk\n"
-    "104Wi91Umd1PzF0ijd2jXOERJU1wEKe6XLkYYNHWQAe5l4J4MWj9OdxFXAxIuuR/\n"
-    "tfDwbqkta4xcux67//khAkEAvvRXLHTaa6VFzTaiiO8SaFsHV3lQyXOtMrBpB5jd\n"
-    "moZWgjHvB2W9Ckn7sDqsPB+U2tyX0joDdQEyuiMECDY8oQ==\n"
-    "-----END RSA PRIVATE KEY-----\n";
+static const char kRSAKey[] = R"(
+-----BEGIN RSA PRIVATE KEY-----
+MIICXgIBAAKBgQDYK8imMuRi/03z0K1Zi0WnvfFHvwlYeyK9Na6XJYaUoIDAtB92
+kWdGMdAQhLciHnAjkXLI6W15OoV3gA/ElRZ1xUpxTMhjP6PyY5wqT5r6y8FxbiiF
+KKAnHmUcrgfVW28tQ+0rkLGMryRtrukXOgXBv7gcrmU7G1jC2a7WqmeI8QIDAQAB
+AoGBAIBy09Fd4DOq/Ijp8HeKuCMKTHqTW1xGHshLQ6jwVV2vWZIn9aIgmDsvkjCe
+i6ssZvnbjVcwzSoByhjN8ZCf/i15HECWDFFh6gt0P5z0MnChwzZmvatV/FXCT0j+
+WmGNB/gkehKjGXLLcjTb6dRYVJSCZhVuOLLcbWIV10gggJQBAkEA8S8sGe4ezyyZ
+m4e9r95g6s43kPqtj5rewTsUxt+2n4eVodD+ZUlCULWVNAFLkYRTBCASlSrm9Xhj
+QpmWAHJUkQJBAOVzQdFUaewLtdOJoPCtpYoY1zd22eae8TQEmpGOR11L6kbxLQsk
+aMly/DOnOaa82tqAGTdqDEZgSNmCeKKknmECQAvpnY8GUOVAubGR6c+W90iBuQLj
+LtFp/9ihd2w/PoDwrHZaoUYVcT4VSfJQog/k7kjE4MYXYWL8eEKg3WTWQNECQQDk
+104Wi91Umd1PzF0ijd2jXOERJU1wEKe6XLkYYNHWQAe5l4J4MWj9OdxFXAxIuuR/
+tfDwbqkta4xcux67//khAkEAvvRXLHTaa6VFzTaiiO8SaFsHV3lQyXOtMrBpB5jd
+moZWgjHvB2W9Ckn7sDqsPB+U2tyX0joDdQEyuiMECDY8oQ==
+-----END RSA PRIVATE KEY-----
+)";
 
-static const char kP256Key[] =
-    "-----BEGIN PRIVATE KEY-----\n"
-    "MIGHAgEAMBMGByqGSM49AgEGCCqGSM49AwEHBG0wawIBAQQgBw8IcnrUoEqc3VnJ\n"
-    "TYlodwi1b8ldMHcO6NHJzgqLtGqhRANCAATmK2niv2Wfl74vHg2UikzVl2u3qR4N\n"
-    "Rvvdqakendy6WgHn1peoChj5w8SjHlbifINI2xYaHPUdfvGULUvPciLB\n"
-    "-----END PRIVATE KEY-----\n";
+static const char kP256Key[] = R"(
+-----BEGIN PRIVATE KEY-----
+MIGHAgEAMBMGByqGSM49AgEGCCqGSM49AwEHBG0wawIBAQQgBw8IcnrUoEqc3VnJ
+TYlodwi1b8ldMHcO6NHJzgqLtGqhRANCAATmK2niv2Wfl74vHg2UikzVl2u3qR4N
+Rvvdqakendy6WgHn1peoChj5w8SjHlbifINI2xYaHPUdfvGULUvPciLB
+-----END PRIVATE KEY-----
+)";
 
 // kCRLTestRoot is a test root certificate. It has private key:
 //
@@ -269,207 +281,233 @@
 //     Lbf3v2dvxOpTNTONWjp2c+iUQo8QxJCZr5Sfb21oQ9Ktcrmc/CY7LeBVDibXwxdM
 //     vRG8kBzvslFWh7REzC3u06GSVhyKDfW93kN2cKVwGoahRlhj7oHuZQ==
 //     -----END RSA PRIVATE KEY-----
-static const char kCRLTestRoot[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIIDbzCCAlegAwIBAgIJAODri7v0dDUFMA0GCSqGSIb3DQEBCwUAME4xCzAJBgNV\n"
-    "BAYTAlVTMRMwEQYDVQQIDApDYWxpZm9ybmlhMRYwFAYDVQQHDA1Nb3VudGFpbiBW\n"
-    "aWV3MRIwEAYDVQQKDAlCb3JpbmdTU0wwHhcNMTYwOTI2MTUwNjI2WhcNMjYwOTI0\n"
-    "MTUwNjI2WjBOMQswCQYDVQQGEwJVUzETMBEGA1UECAwKQ2FsaWZvcm5pYTEWMBQG\n"
-    "A1UEBwwNTW91bnRhaW4gVmlldzESMBAGA1UECgwJQm9yaW5nU1NMMIIBIjANBgkq\n"
-    "hkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAo16WiLWZuaymsD8n5SKPmxV1y6jjgr3B\n"
-    "S/dUBpbrzd1aeFzNlI8l2jfAnzUyp+I21RQ+nh/MhqjGElkTtK9xMn1Y+S9GMRh+\n"
-    "5R/Du0iCb1tCZIPY07Tgrb0KMNWe0v2QKVVruuYSgxIWodBfxlKO64Z8AJ5IbnWp\n"
-    "uRqO6rctN9qUoMlTIAB6dL4G0tDJ/PGFWOJYwOMEIX54bly2wgyYJVBKiRRt4f7n\n"
-    "8H922qmvPNA9idmX9G1VAtgV6x97XXi7ULORIQvn9lVQF6nTYDBJhyuPB+mLThbL\n"
-    "P2o9orxGx7aCtnnBZUIxUvHNOI0FaSaZH7Fi0xsZ/GkG2HZe7ImPJwIDAQABo1Aw\n"
-    "TjAdBgNVHQ4EFgQUWPt3N5cZ/CRvubbrkqfBnAqhq94wHwYDVR0jBBgwFoAUWPt3\n"
-    "N5cZ/CRvubbrkqfBnAqhq94wDAYDVR0TBAUwAwEB/zANBgkqhkiG9w0BAQsFAAOC\n"
-    "AQEAORu6M0MOwXy+3VEBwNilfTxyqDfruQsc1jA4PT8Oe8zora1WxE1JB4q2FJOz\n"
-    "EAuM3H/NXvEnBuN+ITvKZAJUfm4NKX97qmjMJwLKWe1gVv+VQTr63aR7mgWJReQN\n"
-    "XdMztlVeZs2dppV6uEg3ia1X0G7LARxGpA9ETbMyCpb39XxlYuTClcbA5ftDN99B\n"
-    "3Xg9KNdd++Ew22O3HWRDvdDpTO/JkzQfzi3sYwUtzMEonENhczJhGf7bQMmvL/w5\n"
-    "24Wxj4Z7KzzWIHsNqE/RIs6RV3fcW61j/mRgW2XyoWnMVeBzvcJr9NXp4VQYmFPw\n"
-    "amd8GKMZQvP0ufGnUn7D7uartA==\n"
-    "-----END CERTIFICATE-----\n";
+static const char kCRLTestRoot[] = R"(
+-----BEGIN CERTIFICATE-----
+MIIDbzCCAlegAwIBAgIJAODri7v0dDUFMA0GCSqGSIb3DQEBCwUAME4xCzAJBgNV
+BAYTAlVTMRMwEQYDVQQIDApDYWxpZm9ybmlhMRYwFAYDVQQHDA1Nb3VudGFpbiBW
+aWV3MRIwEAYDVQQKDAlCb3JpbmdTU0wwHhcNMTYwOTI2MTUwNjI2WhcNMjYwOTI0
+MTUwNjI2WjBOMQswCQYDVQQGEwJVUzETMBEGA1UECAwKQ2FsaWZvcm5pYTEWMBQG
+A1UEBwwNTW91bnRhaW4gVmlldzESMBAGA1UECgwJQm9yaW5nU1NMMIIBIjANBgkq
+hkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAo16WiLWZuaymsD8n5SKPmxV1y6jjgr3B
+S/dUBpbrzd1aeFzNlI8l2jfAnzUyp+I21RQ+nh/MhqjGElkTtK9xMn1Y+S9GMRh+
+5R/Du0iCb1tCZIPY07Tgrb0KMNWe0v2QKVVruuYSgxIWodBfxlKO64Z8AJ5IbnWp
+uRqO6rctN9qUoMlTIAB6dL4G0tDJ/PGFWOJYwOMEIX54bly2wgyYJVBKiRRt4f7n
+8H922qmvPNA9idmX9G1VAtgV6x97XXi7ULORIQvn9lVQF6nTYDBJhyuPB+mLThbL
+P2o9orxGx7aCtnnBZUIxUvHNOI0FaSaZH7Fi0xsZ/GkG2HZe7ImPJwIDAQABo1Aw
+TjAdBgNVHQ4EFgQUWPt3N5cZ/CRvubbrkqfBnAqhq94wHwYDVR0jBBgwFoAUWPt3
+N5cZ/CRvubbrkqfBnAqhq94wDAYDVR0TBAUwAwEB/zANBgkqhkiG9w0BAQsFAAOC
+AQEAORu6M0MOwXy+3VEBwNilfTxyqDfruQsc1jA4PT8Oe8zora1WxE1JB4q2FJOz
+EAuM3H/NXvEnBuN+ITvKZAJUfm4NKX97qmjMJwLKWe1gVv+VQTr63aR7mgWJReQN
+XdMztlVeZs2dppV6uEg3ia1X0G7LARxGpA9ETbMyCpb39XxlYuTClcbA5ftDN99B
+3Xg9KNdd++Ew22O3HWRDvdDpTO/JkzQfzi3sYwUtzMEonENhczJhGf7bQMmvL/w5
+24Wxj4Z7KzzWIHsNqE/RIs6RV3fcW61j/mRgW2XyoWnMVeBzvcJr9NXp4VQYmFPw
+amd8GKMZQvP0ufGnUn7D7uartA==
+-----END CERTIFICATE-----
+)";
 
-static const char kCRLTestLeaf[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIIDkDCCAnigAwIBAgICEAAwDQYJKoZIhvcNAQELBQAwTjELMAkGA1UEBhMCVVMx\n"
-    "EzARBgNVBAgMCkNhbGlmb3JuaWExFjAUBgNVBAcMDU1vdW50YWluIFZpZXcxEjAQ\n"
-    "BgNVBAoMCUJvcmluZ1NTTDAeFw0xNjA5MjYxNTA4MzFaFw0xNzA5MjYxNTA4MzFa\n"
-    "MEsxCzAJBgNVBAYTAlVTMRMwEQYDVQQIDApDYWxpZm9ybmlhMRIwEAYDVQQKDAlC\n"
-    "b3JpbmdTU0wxEzARBgNVBAMMCmJvcmluZy5zc2wwggEiMA0GCSqGSIb3DQEBAQUA\n"
-    "A4IBDwAwggEKAoIBAQDc5v1S1M0W+QWM+raWfO0LH8uvqEwuJQgODqMaGnSlWUx9\n"
-    "8iQcnWfjyPja3lWg9K62hSOFDuSyEkysKHDxijz5R93CfLcfnVXjWQDJe7EJTTDP\n"
-    "ozEvxN6RjAeYv7CF000euYr3QT5iyBjg76+bon1p0jHZBJeNPP1KqGYgyxp+hzpx\n"
-    "e0gZmTlGAXd8JQK4v8kpdYwD6PPifFL/jpmQpqOtQmH/6zcLjY4ojmqpEdBqIKIX\n"
-    "+saA29hMq0+NK3K+wgg31RU+cVWxu3tLOIiesETkeDgArjWRS1Vkzbi4v9SJxtNu\n"
-    "OZuAxWiynRJw3JwH/OFHYZIvQqz68ZBoj96cepjPAgMBAAGjezB5MAkGA1UdEwQC\n"
-    "MAAwLAYJYIZIAYb4QgENBB8WHU9wZW5TU0wgR2VuZXJhdGVkIENlcnRpZmljYXRl\n"
-    "MB0GA1UdDgQWBBTGn0OVVh/aoYt0bvEKG+PIERqnDzAfBgNVHSMEGDAWgBRY+3c3\n"
-    "lxn8JG+5tuuSp8GcCqGr3jANBgkqhkiG9w0BAQsFAAOCAQEAd2nM8gCQN2Dc8QJw\n"
-    "XSZXyuI3DBGGCHcay/3iXu0JvTC3EiQo8J6Djv7WLI0N5KH8mkm40u89fJAB2lLZ\n"
-    "ShuHVtcC182bOKnePgwp9CNwQ21p0rDEu/P3X46ZvFgdxx82E9xLa0tBB8PiPDWh\n"
-    "lV16jbaKTgX5AZqjnsyjR5o9/mbZVupZJXx5Syq+XA8qiJfstSYJs4KyKK9UOjql\n"
-    "ICkJVKpi2ahDBqX4MOH4SLfzVk8pqSpviS6yaA1RXqjpkxiN45WWaXDldVHMSkhC\n"
-    "5CNXsXi4b1nAntu89crwSLA3rEwzCWeYj+BX7e1T9rr3oJdwOU/2KQtW1js1yQUG\n"
-    "tjJMFw==\n"
-    "-----END CERTIFICATE-----\n";
+static const char kCRLTestLeaf[] = R"(
+-----BEGIN CERTIFICATE-----
+MIIDkDCCAnigAwIBAgICEAAwDQYJKoZIhvcNAQELBQAwTjELMAkGA1UEBhMCVVMx
+EzARBgNVBAgMCkNhbGlmb3JuaWExFjAUBgNVBAcMDU1vdW50YWluIFZpZXcxEjAQ
+BgNVBAoMCUJvcmluZ1NTTDAeFw0xNjA5MjYxNTA4MzFaFw0xNzA5MjYxNTA4MzFa
+MEsxCzAJBgNVBAYTAlVTMRMwEQYDVQQIDApDYWxpZm9ybmlhMRIwEAYDVQQKDAlC
+b3JpbmdTU0wxEzARBgNVBAMMCmJvcmluZy5zc2wwggEiMA0GCSqGSIb3DQEBAQUA
+A4IBDwAwggEKAoIBAQDc5v1S1M0W+QWM+raWfO0LH8uvqEwuJQgODqMaGnSlWUx9
+8iQcnWfjyPja3lWg9K62hSOFDuSyEkysKHDxijz5R93CfLcfnVXjWQDJe7EJTTDP
+ozEvxN6RjAeYv7CF000euYr3QT5iyBjg76+bon1p0jHZBJeNPP1KqGYgyxp+hzpx
+e0gZmTlGAXd8JQK4v8kpdYwD6PPifFL/jpmQpqOtQmH/6zcLjY4ojmqpEdBqIKIX
++saA29hMq0+NK3K+wgg31RU+cVWxu3tLOIiesETkeDgArjWRS1Vkzbi4v9SJxtNu
+OZuAxWiynRJw3JwH/OFHYZIvQqz68ZBoj96cepjPAgMBAAGjezB5MAkGA1UdEwQC
+MAAwLAYJYIZIAYb4QgENBB8WHU9wZW5TU0wgR2VuZXJhdGVkIENlcnRpZmljYXRl
+MB0GA1UdDgQWBBTGn0OVVh/aoYt0bvEKG+PIERqnDzAfBgNVHSMEGDAWgBRY+3c3
+lxn8JG+5tuuSp8GcCqGr3jANBgkqhkiG9w0BAQsFAAOCAQEAd2nM8gCQN2Dc8QJw
+XSZXyuI3DBGGCHcay/3iXu0JvTC3EiQo8J6Djv7WLI0N5KH8mkm40u89fJAB2lLZ
+ShuHVtcC182bOKnePgwp9CNwQ21p0rDEu/P3X46ZvFgdxx82E9xLa0tBB8PiPDWh
+lV16jbaKTgX5AZqjnsyjR5o9/mbZVupZJXx5Syq+XA8qiJfstSYJs4KyKK9UOjql
+ICkJVKpi2ahDBqX4MOH4SLfzVk8pqSpviS6yaA1RXqjpkxiN45WWaXDldVHMSkhC
+5CNXsXi4b1nAntu89crwSLA3rEwzCWeYj+BX7e1T9rr3oJdwOU/2KQtW1js1yQUG
+tjJMFw==
+-----END CERTIFICATE-----
+)";
 
-static const char kBasicCRL[] =
-    "-----BEGIN X509 CRL-----\n"
-    "MIIBpzCBkAIBATANBgkqhkiG9w0BAQsFADBOMQswCQYDVQQGEwJVUzETMBEGA1UE\n"
-    "CAwKQ2FsaWZvcm5pYTEWMBQGA1UEBwwNTW91bnRhaW4gVmlldzESMBAGA1UECgwJ\n"
-    "Qm9yaW5nU1NMFw0xNjA5MjYxNTEwNTVaFw0xNjEwMjYxNTEwNTVaoA4wDDAKBgNV\n"
-    "HRQEAwIBATANBgkqhkiG9w0BAQsFAAOCAQEAnrBKKgvd9x9zwK9rtUvVeFeJ7+LN\n"
-    "ZEAc+a5oxpPNEsJx6hXoApYEbzXMxuWBQoCs5iEBycSGudct21L+MVf27M38KrWo\n"
-    "eOkq0a2siqViQZO2Fb/SUFR0k9zb8xl86Zf65lgPplALun0bV/HT7MJcl04Tc4os\n"
-    "dsAReBs5nqTGNEd5AlC1iKHvQZkM//MD51DspKnDpsDiUVi54h9C1SpfZmX8H2Vv\n"
-    "diyu0fZ/bPAM3VAGawatf/SyWfBMyKpoPXEG39oAzmjjOj8en82psn7m474IGaho\n"
-    "/vBbhl1ms5qQiLYPjm4YELtnXQoFyC72tBjbdFd/ZE9k4CNKDbxFUXFbkw==\n"
-    "-----END X509 CRL-----\n";
+static const char kBasicCRL[] = R"(
+-----BEGIN X509 CRL-----
+MIIBpzCBkAIBATANBgkqhkiG9w0BAQsFADBOMQswCQYDVQQGEwJVUzETMBEGA1UE
+CAwKQ2FsaWZvcm5pYTEWMBQGA1UEBwwNTW91bnRhaW4gVmlldzESMBAGA1UECgwJ
+Qm9yaW5nU1NMFw0xNjA5MjYxNTEwNTVaFw0xNjEwMjYxNTEwNTVaoA4wDDAKBgNV
+HRQEAwIBATANBgkqhkiG9w0BAQsFAAOCAQEAnrBKKgvd9x9zwK9rtUvVeFeJ7+LN
+ZEAc+a5oxpPNEsJx6hXoApYEbzXMxuWBQoCs5iEBycSGudct21L+MVf27M38KrWo
+eOkq0a2siqViQZO2Fb/SUFR0k9zb8xl86Zf65lgPplALun0bV/HT7MJcl04Tc4os
+dsAReBs5nqTGNEd5AlC1iKHvQZkM//MD51DspKnDpsDiUVi54h9C1SpfZmX8H2Vv
+diyu0fZ/bPAM3VAGawatf/SyWfBMyKpoPXEG39oAzmjjOj8en82psn7m474IGaho
+/vBbhl1ms5qQiLYPjm4YELtnXQoFyC72tBjbdFd/ZE9k4CNKDbxFUXFbkw==
+-----END X509 CRL-----
+)";
 
-static const char kRevokedCRL[] =
-    "-----BEGIN X509 CRL-----\n"
-    "MIIBvjCBpwIBATANBgkqhkiG9w0BAQsFADBOMQswCQYDVQQGEwJVUzETMBEGA1UE\n"
-    "CAwKQ2FsaWZvcm5pYTEWMBQGA1UEBwwNTW91bnRhaW4gVmlldzESMBAGA1UECgwJ\n"
-    "Qm9yaW5nU1NMFw0xNjA5MjYxNTEyNDRaFw0xNjEwMjYxNTEyNDRaMBUwEwICEAAX\n"
-    "DTE2MDkyNjE1MTIyNlqgDjAMMAoGA1UdFAQDAgECMA0GCSqGSIb3DQEBCwUAA4IB\n"
-    "AQCUGaM4DcWzlQKrcZvI8TMeR8BpsvQeo5BoI/XZu2a8h//PyRyMwYeaOM+3zl0d\n"
-    "sjgCT8b3C1FPgT+P2Lkowv7rJ+FHJRNQkogr+RuqCSPTq65ha4WKlRGWkMFybzVH\n"
-    "NloxC+aU3lgp/NlX9yUtfqYmJek1CDrOOGPrAEAwj1l/BUeYKNGqfBWYJQtPJu+5\n"
-    "OaSvIYGpETCZJscUWODmLEb/O3DM438vLvxonwGqXqS0KX37+CHpUlyhnSovxXxp\n"
-    "Pz4aF+L7OtczxL0GYtD2fR9B7TDMqsNmHXgQrixvvOY7MUdLGbd4RfJL3yA53hyO\n"
-    "xzfKY2TzxLiOmctG0hXFkH5J\n"
-    "-----END X509 CRL-----\n";
+static const char kRevokedCRL[] = R"(
+-----BEGIN X509 CRL-----
+MIIBvjCBpwIBATANBgkqhkiG9w0BAQsFADBOMQswCQYDVQQGEwJVUzETMBEGA1UE
+CAwKQ2FsaWZvcm5pYTEWMBQGA1UEBwwNTW91bnRhaW4gVmlldzESMBAGA1UECgwJ
+Qm9yaW5nU1NMFw0xNjA5MjYxNTEyNDRaFw0xNjEwMjYxNTEyNDRaMBUwEwICEAAX
+DTE2MDkyNjE1MTIyNlqgDjAMMAoGA1UdFAQDAgECMA0GCSqGSIb3DQEBCwUAA4IB
+AQCUGaM4DcWzlQKrcZvI8TMeR8BpsvQeo5BoI/XZu2a8h//PyRyMwYeaOM+3zl0d
+sjgCT8b3C1FPgT+P2Lkowv7rJ+FHJRNQkogr+RuqCSPTq65ha4WKlRGWkMFybzVH
+NloxC+aU3lgp/NlX9yUtfqYmJek1CDrOOGPrAEAwj1l/BUeYKNGqfBWYJQtPJu+5
+OaSvIYGpETCZJscUWODmLEb/O3DM438vLvxonwGqXqS0KX37+CHpUlyhnSovxXxp
+Pz4aF+L7OtczxL0GYtD2fR9B7TDMqsNmHXgQrixvvOY7MUdLGbd4RfJL3yA53hyO
+xzfKY2TzxLiOmctG0hXFkH5J
+-----END X509 CRL-----
+)";
 
-static const char kBadIssuerCRL[] =
-    "-----BEGIN X509 CRL-----\n"
-    "MIIBwjCBqwIBATANBgkqhkiG9w0BAQsFADBSMQswCQYDVQQGEwJVUzETMBEGA1UE\n"
-    "CAwKQ2FsaWZvcm5pYTEWMBQGA1UEBwwNTW91bnRhaW4gVmlldzEWMBQGA1UECgwN\n"
-    "Tm90IEJvcmluZ1NTTBcNMTYwOTI2MTUxMjQ0WhcNMTYxMDI2MTUxMjQ0WjAVMBMC\n"
-    "AhAAFw0xNjA5MjYxNTEyMjZaoA4wDDAKBgNVHRQEAwIBAjANBgkqhkiG9w0BAQsF\n"
-    "AAOCAQEAlBmjOA3Fs5UCq3GbyPEzHkfAabL0HqOQaCP12btmvIf/z8kcjMGHmjjP\n"
-    "t85dHbI4Ak/G9wtRT4E/j9i5KML+6yfhRyUTUJKIK/kbqgkj06uuYWuFipURlpDB\n"
-    "cm81RzZaMQvmlN5YKfzZV/clLX6mJiXpNQg6zjhj6wBAMI9ZfwVHmCjRqnwVmCUL\n"
-    "TybvuTmkryGBqREwmSbHFFjg5ixG/ztwzON/Ly78aJ8Bql6ktCl9+/gh6VJcoZ0q\n"
-    "L8V8aT8+Ghfi+zrXM8S9BmLQ9n0fQe0wzKrDZh14EK4sb7zmOzFHSxm3eEXyS98g\n"
-    "Od4cjsc3ymNk88S4jpnLRtIVxZB+SQ==\n"
-    "-----END X509 CRL-----\n";
+static const char kBadIssuerCRL[] = R"(
+-----BEGIN X509 CRL-----
+MIIBwjCBqwIBATANBgkqhkiG9w0BAQsFADBSMQswCQYDVQQGEwJVUzETMBEGA1UE
+CAwKQ2FsaWZvcm5pYTEWMBQGA1UEBwwNTW91bnRhaW4gVmlldzEWMBQGA1UECgwN
+Tm90IEJvcmluZ1NTTBcNMTYwOTI2MTUxMjQ0WhcNMTYxMDI2MTUxMjQ0WjAVMBMC
+AhAAFw0xNjA5MjYxNTEyMjZaoA4wDDAKBgNVHRQEAwIBAjANBgkqhkiG9w0BAQsF
+AAOCAQEAlBmjOA3Fs5UCq3GbyPEzHkfAabL0HqOQaCP12btmvIf/z8kcjMGHmjjP
+t85dHbI4Ak/G9wtRT4E/j9i5KML+6yfhRyUTUJKIK/kbqgkj06uuYWuFipURlpDB
+cm81RzZaMQvmlN5YKfzZV/clLX6mJiXpNQg6zjhj6wBAMI9ZfwVHmCjRqnwVmCUL
+TybvuTmkryGBqREwmSbHFFjg5ixG/ztwzON/Ly78aJ8Bql6ktCl9+/gh6VJcoZ0q
+L8V8aT8+Ghfi+zrXM8S9BmLQ9n0fQe0wzKrDZh14EK4sb7zmOzFHSxm3eEXyS98g
+Od4cjsc3ymNk88S4jpnLRtIVxZB+SQ==
+-----END X509 CRL-----
+)";
 
 // kKnownCriticalCRL is kBasicCRL but with a critical issuing distribution point
 // extension.
-static const char kKnownCriticalCRL[] =
-    "-----BEGIN X509 CRL-----\n"
-    "MIIBuDCBoQIBATANBgkqhkiG9w0BAQsFADBOMQswCQYDVQQGEwJVUzETMBEGA1UE\n"
-    "CAwKQ2FsaWZvcm5pYTEWMBQGA1UEBwwNTW91bnRhaW4gVmlldzESMBAGA1UECgwJ\n"
-    "Qm9yaW5nU1NMFw0xNjA5MjYxNTEwNTVaFw0xNjEwMjYxNTEwNTVaoB8wHTAKBgNV\n"
-    "HRQEAwIBATAPBgNVHRwBAf8EBTADgQH/MA0GCSqGSIb3DQEBCwUAA4IBAQAs37Jq\n"
-    "3Htcehm6C2PKXOHekwTqTLOPWsYHfF68kYhdzcopDZBeoKE7jLRkRRGFDaR/tfUs\n"
-    "kwLSDNSQ8EwPb9PT1X8kmFn9QmJgWD6f6BzaH5ZZ9iBUwOcvrydlb/jnjdIZHQxs\n"
-    "fKOAceW5XX3f7DANC3qwYLsQZR/APkfV8nXjPYVUz1kKj04uq/BbQviInjyUYixN\n"
-    "xDx+GDWVVXccehcwAu983kAqP+JDaVQPBVksLuBXz2adrEWwvbLCnZeL3zH1IY9h\n"
-    "6MFO6echpvGbU/H+dRX9UkhdJ7gdwKVD3RjfJl+DRVox9lz8Pbo5H699Tkv9/DQP\n"
-    "9dMWxqhQlv23osLp\n"
-    "-----END X509 CRL-----\n";
+static const char kKnownCriticalCRL[] = R"(
+-----BEGIN X509 CRL-----
+MIIBuDCBoQIBATANBgkqhkiG9w0BAQsFADBOMQswCQYDVQQGEwJVUzETMBEGA1UE
+CAwKQ2FsaWZvcm5pYTEWMBQGA1UEBwwNTW91bnRhaW4gVmlldzESMBAGA1UECgwJ
+Qm9yaW5nU1NMFw0xNjA5MjYxNTEwNTVaFw0xNjEwMjYxNTEwNTVaoB8wHTAKBgNV
+HRQEAwIBATAPBgNVHRwBAf8EBTADgQH/MA0GCSqGSIb3DQEBCwUAA4IBAQAs37Jq
+3Htcehm6C2PKXOHekwTqTLOPWsYHfF68kYhdzcopDZBeoKE7jLRkRRGFDaR/tfUs
+kwLSDNSQ8EwPb9PT1X8kmFn9QmJgWD6f6BzaH5ZZ9iBUwOcvrydlb/jnjdIZHQxs
+fKOAceW5XX3f7DANC3qwYLsQZR/APkfV8nXjPYVUz1kKj04uq/BbQviInjyUYixN
+xDx+GDWVVXccehcwAu983kAqP+JDaVQPBVksLuBXz2adrEWwvbLCnZeL3zH1IY9h
+6MFO6echpvGbU/H+dRX9UkhdJ7gdwKVD3RjfJl+DRVox9lz8Pbo5H699Tkv9/DQP
+9dMWxqhQlv23osLp
+-----END X509 CRL-----
+)";
 
 // kUnknownCriticalCRL is kBasicCRL but with an unknown critical extension.
-static const char kUnknownCriticalCRL[] =
-    "-----BEGIN X509 CRL-----\n"
-    "MIIBvDCBpQIBATANBgkqhkiG9w0BAQsFADBOMQswCQYDVQQGEwJVUzETMBEGA1UE\n"
-    "CAwKQ2FsaWZvcm5pYTEWMBQGA1UEBwwNTW91bnRhaW4gVmlldzESMBAGA1UECgwJ\n"
-    "Qm9yaW5nU1NMFw0xNjA5MjYxNTEwNTVaFw0xNjEwMjYxNTEwNTVaoCMwITAKBgNV\n"
-    "HRQEAwIBATATBgwqhkiG9xIEAYS3CQABAf8EADANBgkqhkiG9w0BAQsFAAOCAQEA\n"
-    "GvBP0xqL509InMj/3493YVRV+ldTpBv5uTD6jewzf5XdaxEQ/VjTNe5zKnxbpAib\n"
-    "Kf7cwX0PMSkZjx7k7kKdDlEucwVvDoqC+O9aJcqVmM6GDyNb9xENxd0XCXja6MZC\n"
-    "yVgP4AwLauB2vSiEprYJyI1APph3iAEeDm60lTXX/wBM/tupQDDujKh2GPyvBRfJ\n"
-    "+wEDwGg3ICwvu4gO4zeC5qnFR+bpL9t5tOMAQnVZ0NWv+k7mkd2LbHdD44dxrfXC\n"
-    "nhtfERx99SDmC/jtUAJrGhtCO8acr7exCeYcduN7KKCm91OeCJKK6OzWst0Og1DB\n"
-    "kwzzU2rL3G65CrZ7H0SZsQ==\n"
-    "-----END X509 CRL-----\n";
+static const char kUnknownCriticalCRL[] = R"(
+-----BEGIN X509 CRL-----
+MIIBvDCBpQIBATANBgkqhkiG9w0BAQsFADBOMQswCQYDVQQGEwJVUzETMBEGA1UE
+CAwKQ2FsaWZvcm5pYTEWMBQGA1UEBwwNTW91bnRhaW4gVmlldzESMBAGA1UECgwJ
+Qm9yaW5nU1NMFw0xNjA5MjYxNTEwNTVaFw0xNjEwMjYxNTEwNTVaoCMwITAKBgNV
+HRQEAwIBATATBgwqhkiG9xIEAYS3CQABAf8EADANBgkqhkiG9w0BAQsFAAOCAQEA
+GvBP0xqL509InMj/3493YVRV+ldTpBv5uTD6jewzf5XdaxEQ/VjTNe5zKnxbpAib
+Kf7cwX0PMSkZjx7k7kKdDlEucwVvDoqC+O9aJcqVmM6GDyNb9xENxd0XCXja6MZC
+yVgP4AwLauB2vSiEprYJyI1APph3iAEeDm60lTXX/wBM/tupQDDujKh2GPyvBRfJ
++wEDwGg3ICwvu4gO4zeC5qnFR+bpL9t5tOMAQnVZ0NWv+k7mkd2LbHdD44dxrfXC
+nhtfERx99SDmC/jtUAJrGhtCO8acr7exCeYcduN7KKCm91OeCJKK6OzWst0Og1DB
+kwzzU2rL3G65CrZ7H0SZsQ==
+-----END X509 CRL-----
+)";
 
 // kUnknownCriticalCRL2 is kBasicCRL but with a critical issuing distribution
 // point extension followed by an unknown critical extension
-static const char kUnknownCriticalCRL2[] =
-    "-----BEGIN X509 CRL-----\n"
-    "MIIBzTCBtgIBATANBgkqhkiG9w0BAQsFADBOMQswCQYDVQQGEwJVUzETMBEGA1UE\n"
-    "CAwKQ2FsaWZvcm5pYTEWMBQGA1UEBwwNTW91bnRhaW4gVmlldzESMBAGA1UECgwJ\n"
-    "Qm9yaW5nU1NMFw0xNjA5MjYxNTEwNTVaFw0xNjEwMjYxNTEwNTVaoDQwMjAKBgNV\n"
-    "HRQEAwIBATAPBgNVHRwBAf8EBTADgQH/MBMGDCqGSIb3EgQBhLcJAAEB/wQAMA0G\n"
-    "CSqGSIb3DQEBCwUAA4IBAQBgSogsC5kf2wzr+0hmZtmLXYd0itAiYO0Gh9AyaEOO\n"
-    "myJFuqICHBSLXXUgwNkTUa2x2I/ivyReVFV756VOlWoaV2wJUs0zeCeVBgC9ZFsq\n"
-    "5a+8OGgXwgoYESFV5Y3QRF2a1Ytzfbw/o6xLXzTngvMsLOs12D4B5SkopyEZibF4\n"
-    "tXlRZyvEudTg3CCrjNP+p/GV07nZ3wcMmKJwQeilgzFUV7NaVCCo9jvPBGp0RxAN\n"
-    "KNif7jmjK4hD5mswo/Eq5kxQIc+mTfuUFdgHuAu1hfLYe0YK+Hr4RFf6Qy4hl7Ne\n"
-    "YjqkkSVIcr87u+8AznwdstnQzsyD27Jt7SjVORkYRywi\n"
-    "-----END X509 CRL-----\n";
+static const char kUnknownCriticalCRL2[] = R"(
+-----BEGIN X509 CRL-----
+MIIBzTCBtgIBATANBgkqhkiG9w0BAQsFADBOMQswCQYDVQQGEwJVUzETMBEGA1UE
+CAwKQ2FsaWZvcm5pYTEWMBQGA1UEBwwNTW91bnRhaW4gVmlldzESMBAGA1UECgwJ
+Qm9yaW5nU1NMFw0xNjA5MjYxNTEwNTVaFw0xNjEwMjYxNTEwNTVaoDQwMjAKBgNV
+HRQEAwIBATAPBgNVHRwBAf8EBTADgQH/MBMGDCqGSIb3EgQBhLcJAAEB/wQAMA0G
+CSqGSIb3DQEBCwUAA4IBAQBgSogsC5kf2wzr+0hmZtmLXYd0itAiYO0Gh9AyaEOO
+myJFuqICHBSLXXUgwNkTUa2x2I/ivyReVFV756VOlWoaV2wJUs0zeCeVBgC9ZFsq
+5a+8OGgXwgoYESFV5Y3QRF2a1Ytzfbw/o6xLXzTngvMsLOs12D4B5SkopyEZibF4
+tXlRZyvEudTg3CCrjNP+p/GV07nZ3wcMmKJwQeilgzFUV7NaVCCo9jvPBGp0RxAN
+KNif7jmjK4hD5mswo/Eq5kxQIc+mTfuUFdgHuAu1hfLYe0YK+Hr4RFf6Qy4hl7Ne
+YjqkkSVIcr87u+8AznwdstnQzsyD27Jt7SjVORkYRywi
+-----END X509 CRL-----
+)";
 
 // kBadExtensionCRL is kBasicCRL but with an incorrectly-encoded issuing
 // distribution point extension.
-static const char kBadExtensionCRL[] =
-    "-----BEGIN X509 CRL-----\n"
-    "MIIBujCBowIBATANBgkqhkiG9w0BAQsFADBOMQswCQYDVQQGEwJVUzETMBEGA1UE\n"
-    "CAwKQ2FsaWZvcm5pYTEWMBQGA1UEBwwNTW91bnRhaW4gVmlldzESMBAGA1UECgwJ\n"
-    "Qm9yaW5nU1NMFw0xNjA5MjYxNTEwNTVaFw0xNjEwMjYxNTEwNTVaoCEwHzAKBgNV\n"
-    "HRQEAwIBATARBgNVHRwBAf8EBzAFoQMBAf8wDQYJKoZIhvcNAQELBQADggEBAA+3\n"
-    "i+5e5Ub8sccfgOBs6WVJFI9c8gvJjrJ8/dYfFIAuCyeocs7DFXn1n13CRZ+URR/Q\n"
-    "mVWgU28+xeusuSPYFpd9cyYTcVyNUGNTI3lwgcE/yVjPaOmzSZKdPakApRxtpKKQ\n"
-    "NN/56aQz3bnT/ZSHQNciRB8U6jiD9V30t0w+FDTpGaG+7bzzUH3UVF9xf9Ctp60A\n"
-    "3mfLe0scas7owSt4AEFuj2SPvcE7yvdOXbu+IEv21cEJUVExJAbhvIweHXh6yRW+\n"
-    "7VVeiNzdIjkZjyTmAzoXGha4+wbxXyBRbfH+XWcO/H+8nwyG8Gktdu2QB9S9nnIp\n"
-    "o/1TpfOMSGhMyMoyPrk=\n"
-    "-----END X509 CRL-----\n";
+static const char kBadExtensionCRL[] = R"(
+-----BEGIN X509 CRL-----
+MIIBujCBowIBATANBgkqhkiG9w0BAQsFADBOMQswCQYDVQQGEwJVUzETMBEGA1UE
+CAwKQ2FsaWZvcm5pYTEWMBQGA1UEBwwNTW91bnRhaW4gVmlldzESMBAGA1UECgwJ
+Qm9yaW5nU1NMFw0xNjA5MjYxNTEwNTVaFw0xNjEwMjYxNTEwNTVaoCEwHzAKBgNV
+HRQEAwIBATARBgNVHRwBAf8EBzAFoQMBAf8wDQYJKoZIhvcNAQELBQADggEBAA+3
+i+5e5Ub8sccfgOBs6WVJFI9c8gvJjrJ8/dYfFIAuCyeocs7DFXn1n13CRZ+URR/Q
+mVWgU28+xeusuSPYFpd9cyYTcVyNUGNTI3lwgcE/yVjPaOmzSZKdPakApRxtpKKQ
+NN/56aQz3bnT/ZSHQNciRB8U6jiD9V30t0w+FDTpGaG+7bzzUH3UVF9xf9Ctp60A
+3mfLe0scas7owSt4AEFuj2SPvcE7yvdOXbu+IEv21cEJUVExJAbhvIweHXh6yRW+
+7VVeiNzdIjkZjyTmAzoXGha4+wbxXyBRbfH+XWcO/H+8nwyG8Gktdu2QB9S9nnIp
+o/1TpfOMSGhMyMoyPrk=
+-----END X509 CRL-----
+)";
 
 // kEd25519Cert is a self-signed Ed25519 certificate.
-static const char kEd25519Cert[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIIBkTCCAUOgAwIBAgIJAJwooam0UCDmMAUGAytlcDBFMQswCQYDVQQGEwJBVTET\n"
-    "MBEGA1UECAwKU29tZS1TdGF0ZTEhMB8GA1UECgwYSW50ZXJuZXQgV2lkZ2l0cyBQ\n"
-    "dHkgTHRkMB4XDTE0MDQyMzIzMjE1N1oXDTE0MDUyMzIzMjE1N1owRTELMAkGA1UE\n"
-    "BhMCQVUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoMGEludGVybmV0IFdp\n"
-    "ZGdpdHMgUHR5IEx0ZDAqMAUGAytlcAMhANdamAGCsQq31Uv+08lkBzoO4XLz2qYj\n"
-    "Ja8CGmj3B1Eao1AwTjAdBgNVHQ4EFgQUoux7eV+fJK2v3ah6QPU/lj1/+7UwHwYD\n"
-    "VR0jBBgwFoAUoux7eV+fJK2v3ah6QPU/lj1/+7UwDAYDVR0TBAUwAwEB/zAFBgMr\n"
-    "ZXADQQBuCzqji8VP9xU8mHEMjXGChX7YP5J664UyVKHKH9Z1u4wEbB8dJ3ScaWSL\n"
-    "r+VHVKUhsrvcdCelnXRrrSD7xWAL\n"
-    "-----END CERTIFICATE-----\n";
+static const char kEd25519Cert[] = R"(
+-----BEGIN CERTIFICATE-----
+MIIBkTCCAUOgAwIBAgIJAJwooam0UCDmMAUGAytlcDBFMQswCQYDVQQGEwJBVTET
+MBEGA1UECAwKU29tZS1TdGF0ZTEhMB8GA1UECgwYSW50ZXJuZXQgV2lkZ2l0cyBQ
+dHkgTHRkMB4XDTE0MDQyMzIzMjE1N1oXDTE0MDUyMzIzMjE1N1owRTELMAkGA1UE
+BhMCQVUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoMGEludGVybmV0IFdp
+ZGdpdHMgUHR5IEx0ZDAqMAUGAytlcAMhANdamAGCsQq31Uv+08lkBzoO4XLz2qYj
+Ja8CGmj3B1Eao1AwTjAdBgNVHQ4EFgQUoux7eV+fJK2v3ah6QPU/lj1/+7UwHwYD
+VR0jBBgwFoAUoux7eV+fJK2v3ah6QPU/lj1/+7UwDAYDVR0TBAUwAwEB/zAFBgMr
+ZXADQQBuCzqji8VP9xU8mHEMjXGChX7YP5J664UyVKHKH9Z1u4wEbB8dJ3ScaWSL
+r+VHVKUhsrvcdCelnXRrrSD7xWAL
+-----END CERTIFICATE-----
+)";
 
 // kEd25519CertNull is an invalid self-signed Ed25519 with an explicit NULL in
 // the signature algorithm.
-static const char kEd25519CertNull[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIIBlTCCAUWgAwIBAgIJAJwooam0UCDmMAcGAytlcAUAMEUxCzAJBgNVBAYTAkFV\n"
-    "MRMwEQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRlcm5ldCBXaWRnaXRz\n"
-    "IFB0eSBMdGQwHhcNMTQwNDIzMjMyMTU3WhcNMTQwNTIzMjMyMTU3WjBFMQswCQYD\n"
-    "VQQGEwJBVTETMBEGA1UECAwKU29tZS1TdGF0ZTEhMB8GA1UECgwYSW50ZXJuZXQg\n"
-    "V2lkZ2l0cyBQdHkgTHRkMCowBQYDK2VwAyEA11qYAYKxCrfVS/7TyWQHOg7hcvPa\n"
-    "piMlrwIaaPcHURqjUDBOMB0GA1UdDgQWBBSi7Ht5X58kra/dqHpA9T+WPX/7tTAf\n"
-    "BgNVHSMEGDAWgBSi7Ht5X58kra/dqHpA9T+WPX/7tTAMBgNVHRMEBTADAQH/MAcG\n"
-    "AytlcAUAA0EA70uefNocdJohkKPNROKVyBuBD3LXMyvmdTklsaxSRY3PcZdOohlr\n"
-    "recgVPpVS7B+d9g4EwtZXIh4lodTBDHBBw==\n"
-    "-----END CERTIFICATE-----\n";
+static const char kEd25519CertNull[] = R"(
+-----BEGIN CERTIFICATE-----
+MIIBlTCCAUWgAwIBAgIJAJwooam0UCDmMAcGAytlcAUAMEUxCzAJBgNVBAYTAkFV
+MRMwEQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRlcm5ldCBXaWRnaXRz
+IFB0eSBMdGQwHhcNMTQwNDIzMjMyMTU3WhcNMTQwNTIzMjMyMTU3WjBFMQswCQYD
+VQQGEwJBVTETMBEGA1UECAwKU29tZS1TdGF0ZTEhMB8GA1UECgwYSW50ZXJuZXQg
+V2lkZ2l0cyBQdHkgTHRkMCowBQYDK2VwAyEA11qYAYKxCrfVS/7TyWQHOg7hcvPa
+piMlrwIaaPcHURqjUDBOMB0GA1UdDgQWBBSi7Ht5X58kra/dqHpA9T+WPX/7tTAf
+BgNVHSMEGDAWgBSi7Ht5X58kra/dqHpA9T+WPX/7tTAMBgNVHRMEBTADAQH/MAcG
+AytlcAUAA0EA70uefNocdJohkKPNROKVyBuBD3LXMyvmdTklsaxSRY3PcZdOohlr
+recgVPpVS7B+d9g4EwtZXIh4lodTBDHBBw==
+-----END CERTIFICATE-----
+)";
+
+// kX25519 is the example X25519 certificate from
+// https://tools.ietf.org/html/rfc8410#section-10.2
+static const char kX25519Cert[] = R"(
+-----BEGIN CERTIFICATE-----
+MIIBLDCB36ADAgECAghWAUdKKo3DMDAFBgMrZXAwGTEXMBUGA1UEAwwOSUVURiBUZX
+N0IERlbW8wHhcNMTYwODAxMTIxOTI0WhcNNDAxMjMxMjM1OTU5WjAZMRcwFQYDVQQD
+DA5JRVRGIFRlc3QgRGVtbzAqMAUGAytlbgMhAIUg8AmJMKdUdIt93LQ+91oNvzoNJj
+ga9OukqY6qm05qo0UwQzAPBgNVHRMBAf8EBTADAQEAMA4GA1UdDwEBAAQEAwIDCDAg
+BgNVHQ4BAQAEFgQUmx9e7e0EM4Xk97xiPFl1uQvIuzswBQYDK2VwA0EAryMB/t3J5v
+/BzKc9dNZIpDmAgs3babFOTQbs+BolzlDUwsPrdGxO3YNGhW7Ibz3OGhhlxXrCe1Cg
+w1AH9efZBw==
+-----END CERTIFICATE-----
+)";
 
 // kSANTypesLeaf is a leaf certificate (signed by |kSANTypesRoot|) which
 // contains SANS for example.com, test@example.com, 127.0.0.1, and
 // https://example.com/. (The latter is useless for now since crypto/x509
 // doesn't deal with URI SANs directly.)
-static const char kSANTypesLeaf[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIIClzCCAgCgAwIBAgIJAOjwnT/iW+qmMA0GCSqGSIb3DQEBCwUAMCsxFzAVBgNV\n"
-    "BAoTDkJvcmluZ1NTTCBUZXN0MRAwDgYDVQQDEwdSb290IENBMB4XDTE1MDEwMTAw\n"
-    "MDAwMFoXDTI1MDEwMTAwMDAwMFowLzEXMBUGA1UEChMOQm9yaW5nU1NMIFRlc3Qx\n"
-    "FDASBgNVBAMTC2V4YW1wbGUuY29tMIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKB\n"
-    "gQDbRn2TLhInBki8Bighq37EtqJd/h5SRYh6NkelCA2SQlvCgcC+l3mYQPtPbRT9\n"
-    "KxOLwqUuZ9jUCZ7WIji3Sgt0cyvCNPHRk+WW2XR781ifbGE8wLBB1NkrKyQjd1sc\n"
-    "O711Xc4gVM+hY4cdHiTE8x0aUIuqthRD7ZendWL0FMhS1wIDAQABo4G+MIG7MA4G\n"
-    "A1UdDwEB/wQEAwIFoDAdBgNVHSUEFjAUBggrBgEFBQcDAQYIKwYBBQUHAwIwDAYD\n"
-    "VR0TAQH/BAIwADAZBgNVHQ4EEgQQn5EWH0NDPkmm3m22gNefYDAbBgNVHSMEFDAS\n"
-    "gBBAN9cB+0AvuBx+VAQnjFkBMEQGA1UdEQQ9MDuCC2V4YW1wbGUuY29tgRB0ZXN0\n"
-    "QGV4YW1wbGUuY29thwR/AAABhhRodHRwczovL2V4YW1wbGUuY29tLzANBgkqhkiG\n"
-    "9w0BAQsFAAOBgQBtwJvY6+Tk6D6DOtDVaNoJ5y8E25CCuE/Ga4OuIcYJas+yLckf\n"
-    "dZwUV3GUG2oBXl2MrpUFxXd4hKBO1CmlBY+hZEeIx0Yp6QWK9P/vnZeydOTP26mk\n"
-    "jusJ2PqSmtKNU1Zcaba4d29oFejmOAfeguhR8AHpsc/zHEaS5Q9cJsuJcw==\n"
-    "-----END CERTIFICATE-----\n";
+static const char kSANTypesLeaf[] = R"(
+-----BEGIN CERTIFICATE-----
+MIIClzCCAgCgAwIBAgIJAOjwnT/iW+qmMA0GCSqGSIb3DQEBCwUAMCsxFzAVBgNV
+BAoTDkJvcmluZ1NTTCBUZXN0MRAwDgYDVQQDEwdSb290IENBMB4XDTE1MDEwMTAw
+MDAwMFoXDTI1MDEwMTAwMDAwMFowLzEXMBUGA1UEChMOQm9yaW5nU1NMIFRlc3Qx
+FDASBgNVBAMTC2V4YW1wbGUuY29tMIGfMA0GCSqGSIb3DQEBAQUAA4GNADCBiQKB
+gQDbRn2TLhInBki8Bighq37EtqJd/h5SRYh6NkelCA2SQlvCgcC+l3mYQPtPbRT9
+KxOLwqUuZ9jUCZ7WIji3Sgt0cyvCNPHRk+WW2XR781ifbGE8wLBB1NkrKyQjd1sc
+O711Xc4gVM+hY4cdHiTE8x0aUIuqthRD7ZendWL0FMhS1wIDAQABo4G+MIG7MA4G
+A1UdDwEB/wQEAwIFoDAdBgNVHSUEFjAUBggrBgEFBQcDAQYIKwYBBQUHAwIwDAYD
+VR0TAQH/BAIwADAZBgNVHQ4EEgQQn5EWH0NDPkmm3m22gNefYDAbBgNVHSMEFDAS
+gBBAN9cB+0AvuBx+VAQnjFkBMEQGA1UdEQQ9MDuCC2V4YW1wbGUuY29tgRB0ZXN0
+QGV4YW1wbGUuY29thwR/AAABhhRodHRwczovL2V4YW1wbGUuY29tLzANBgkqhkiG
+9w0BAQsFAAOBgQBtwJvY6+Tk6D6DOtDVaNoJ5y8E25CCuE/Ga4OuIcYJas+yLckf
+dZwUV3GUG2oBXl2MrpUFxXd4hKBO1CmlBY+hZEeIx0Yp6QWK9P/vnZeydOTP26mk
+jusJ2PqSmtKNU1Zcaba4d29oFejmOAfeguhR8AHpsc/zHEaS5Q9cJsuJcw==
+-----END CERTIFICATE-----
+)";
 
 // -----BEGIN RSA PRIVATE KEY-----
 // MIICWwIBAAKBgQDbRn2TLhInBki8Bighq37EtqJd/h5SRYh6NkelCA2SQlvCgcC+
@@ -487,22 +525,23 @@
 // YvJUG1zoHwUVrxxbR3DbpTODlktLcl/0b97D0IkH3w==
 // -----END RSA PRIVATE KEY-----
 
-static const char kSANTypesRoot[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIICTTCCAbagAwIBAgIIAj5CwoHlWuYwDQYJKoZIhvcNAQELBQAwKzEXMBUGA1UE\n"
-    "ChMOQm9yaW5nU1NMIFRlc3QxEDAOBgNVBAMTB1Jvb3QgQ0EwHhcNMTUwMTAxMDAw\n"
-    "MDAwWhcNMjUwMTAxMDAwMDAwWjArMRcwFQYDVQQKEw5Cb3JpbmdTU0wgVGVzdDEQ\n"
-    "MA4GA1UEAxMHUm9vdCBDQTCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEA6Q5/\n"
-    "EQzmWuaGg3D2UQcuAngR9bIkkjjuJmICx5TxPqF3asCP1SJotl3iTNrghRE1wpJy\n"
-    "SY2BtIiXa7f8skRb2U0GcPkMxo/ps9+jaoRsQ1m+nbLQdpvD1/qZWcO45fNTA71J\n"
-    "1rPMokP+rcILuQG4VimUAySnDSghKamulFtK+Z8CAwEAAaN6MHgwDgYDVR0PAQH/\n"
-    "BAQDAgIEMB0GA1UdJQQWMBQGCCsGAQUFBwMBBggrBgEFBQcDAjAPBgNVHRMBAf8E\n"
-    "BTADAQH/MBkGA1UdDgQSBBBAN9cB+0AvuBx+VAQnjFkBMBsGA1UdIwQUMBKAEEA3\n"
-    "1wH7QC+4HH5UBCeMWQEwDQYJKoZIhvcNAQELBQADgYEAc4N6hTE62/3gwg+kyc2f\n"
-    "c/Jj1mHrOt+0NRaBnmvbmNpsEjHS96Ef4Wt/ZlPXPkkv1C1VosJnOIMF3Q522wRH\n"
-    "bqaxARldS12VAa3gcWisDWD+SqSyDxjyojz0XDiJkTrFuCTCUiZO+1GLB7SO10Ms\n"
-    "d5YVX0c90VMnUhF/dlrqS9U=\n"
-    "-----END CERTIFICATE-----\n";
+static const char kSANTypesRoot[] = R"(
+-----BEGIN CERTIFICATE-----
+MIICTTCCAbagAwIBAgIIAj5CwoHlWuYwDQYJKoZIhvcNAQELBQAwKzEXMBUGA1UE
+ChMOQm9yaW5nU1NMIFRlc3QxEDAOBgNVBAMTB1Jvb3QgQ0EwHhcNMTUwMTAxMDAw
+MDAwWhcNMjUwMTAxMDAwMDAwWjArMRcwFQYDVQQKEw5Cb3JpbmdTU0wgVGVzdDEQ
+MA4GA1UEAxMHUm9vdCBDQTCBnzANBgkqhkiG9w0BAQEFAAOBjQAwgYkCgYEA6Q5/
+EQzmWuaGg3D2UQcuAngR9bIkkjjuJmICx5TxPqF3asCP1SJotl3iTNrghRE1wpJy
+SY2BtIiXa7f8skRb2U0GcPkMxo/ps9+jaoRsQ1m+nbLQdpvD1/qZWcO45fNTA71J
+1rPMokP+rcILuQG4VimUAySnDSghKamulFtK+Z8CAwEAAaN6MHgwDgYDVR0PAQH/
+BAQDAgIEMB0GA1UdJQQWMBQGCCsGAQUFBwMBBggrBgEFBQcDAjAPBgNVHRMBAf8E
+BTADAQH/MBkGA1UdDgQSBBBAN9cB+0AvuBx+VAQnjFkBMBsGA1UdIwQUMBKAEEA3
+1wH7QC+4HH5UBCeMWQEwDQYJKoZIhvcNAQELBQADgYEAc4N6hTE62/3gwg+kyc2f
+c/Jj1mHrOt+0NRaBnmvbmNpsEjHS96Ef4Wt/ZlPXPkkv1C1VosJnOIMF3Q522wRH
+bqaxARldS12VAa3gcWisDWD+SqSyDxjyojz0XDiJkTrFuCTCUiZO+1GLB7SO10Ms
+d5YVX0c90VMnUhF/dlrqS9U=
+-----END CERTIFICATE-----
+)";
 
 // -----BEGIN RSA PRIVATE KEY-----
 // MIICXAIBAAKBgQDpDn8RDOZa5oaDcPZRBy4CeBH1siSSOO4mYgLHlPE+oXdqwI/V
@@ -599,157 +638,166 @@
 
 // kNoBasicConstraintsCertSignIntermediate doesn't have isCA set, but contains
 // certSign in the keyUsage.
-static const char kNoBasicConstraintsCertSignIntermediate[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIIBqjCCAROgAwIBAgIBAjANBgkqhkiG9w0BAQsFADArMRcwFQYDVQQKEw5Cb3Jp\n"
-    "bmdTU0wgVGVzdDEQMA4GA1UEAxMHUm9vdCBDQTAgFw0wMDAxMDEwMDAwMDBaGA8y\n"
-    "MDk5MDEwMTAwMDAwMFowHzEdMBsGA1UEAxMUTm8gQmFzaWMgQ29uc3RyYWludHMw\n"
-    "WTATBgcqhkjOPQIBBggqhkjOPQMBBwNCAASEFMblfxIEDO8My7wHtHWTuDzNyID1\n"
-    "OsPkMGkn32O/pSyXxXuAqDeFoMVffUMTyfm8JcYugSEbrv2qEXXM4bZRoy8wLTAO\n"
-    "BgNVHQ8BAf8EBAMCAgQwGwYDVR0jBBQwEoAQQDfXAftAL7gcflQEJ4xZATANBgkq\n"
-    "hkiG9w0BAQsFAAOBgQC1Lh6hIAm3K5kRh5iIydU0YAEm7eV6ZSskERDUq3DLJyl9\n"
-    "ZUZCHUzvb464dkwZjeNzaUVS1pdElJslwX3DtGgeJLJGCnk8zUjBjaNrrDm0kzPW\n"
-    "xKt/6oif1ci/KCKqKNXJAIFbc4e+IiBpenwpxHk3If4NM+Ek0nKoO8Uj0NkgTQ==\n"
-    "-----END CERTIFICATE-----\n";
+static const char kNoBasicConstraintsCertSignIntermediate[] = R"(
+-----BEGIN CERTIFICATE-----
+MIIBqjCCAROgAwIBAgIBAjANBgkqhkiG9w0BAQsFADArMRcwFQYDVQQKEw5Cb3Jp
+bmdTU0wgVGVzdDEQMA4GA1UEAxMHUm9vdCBDQTAgFw0wMDAxMDEwMDAwMDBaGA8y
+MDk5MDEwMTAwMDAwMFowHzEdMBsGA1UEAxMUTm8gQmFzaWMgQ29uc3RyYWludHMw
+WTATBgcqhkjOPQIBBggqhkjOPQMBBwNCAASEFMblfxIEDO8My7wHtHWTuDzNyID1
+OsPkMGkn32O/pSyXxXuAqDeFoMVffUMTyfm8JcYugSEbrv2qEXXM4bZRoy8wLTAO
+BgNVHQ8BAf8EBAMCAgQwGwYDVR0jBBQwEoAQQDfXAftAL7gcflQEJ4xZATANBgkq
+hkiG9w0BAQsFAAOBgQC1Lh6hIAm3K5kRh5iIydU0YAEm7eV6ZSskERDUq3DLJyl9
+ZUZCHUzvb464dkwZjeNzaUVS1pdElJslwX3DtGgeJLJGCnk8zUjBjaNrrDm0kzPW
+xKt/6oif1ci/KCKqKNXJAIFbc4e+IiBpenwpxHk3If4NM+Ek0nKoO8Uj0NkgTQ==
+-----END CERTIFICATE-----
+)";
 
-static const char kNoBasicConstraintsCertSignLeaf[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIIBUDCB96ADAgECAgEDMAoGCCqGSM49BAMCMB8xHTAbBgNVBAMTFE5vIEJhc2lj\n"
-    "IENvbnN0cmFpbnRzMCAXDTAwMDEwMTAwMDAwMFoYDzIwOTkwMTAxMDAwMDAwWjAx\n"
-    "MS8wLQYDVQQDEyZMZWFmIGZyb20gQ0Egd2l0aCBubyBCYXNpYyBDb25zdHJhaW50\n"
-    "czBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IABEsYPMwzdJKjB+2gpC90ib2ilHoB\n"
-    "w/arQ6ikUX0CNUDDaKaOu/jF39ogzVlg4lDFrjCKShSfCCcrwgONv70IZGijEDAO\n"
-    "MAwGA1UdEwEB/wQCMAAwCgYIKoZIzj0EAwIDSAAwRQIgbV7R99yM+okXSIs6Fp3o\n"
-    "eCOXiDL60IBxaTOcLS44ywcCIQDbn87Gj5cFgHBYAkzdHqDsyGXkxQTHDq9jmX24\n"
-    "Djy3Zw==\n"
-    "-----END CERTIFICATE-----\n";
+static const char kNoBasicConstraintsCertSignLeaf[] = R"(
+-----BEGIN CERTIFICATE-----
+MIIBUDCB96ADAgECAgEDMAoGCCqGSM49BAMCMB8xHTAbBgNVBAMTFE5vIEJhc2lj
+IENvbnN0cmFpbnRzMCAXDTAwMDEwMTAwMDAwMFoYDzIwOTkwMTAxMDAwMDAwWjAx
+MS8wLQYDVQQDEyZMZWFmIGZyb20gQ0Egd2l0aCBubyBCYXNpYyBDb25zdHJhaW50
+czBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IABEsYPMwzdJKjB+2gpC90ib2ilHoB
+w/arQ6ikUX0CNUDDaKaOu/jF39ogzVlg4lDFrjCKShSfCCcrwgONv70IZGijEDAO
+MAwGA1UdEwEB/wQCMAAwCgYIKoZIzj0EAwIDSAAwRQIgbV7R99yM+okXSIs6Fp3o
+eCOXiDL60IBxaTOcLS44ywcCIQDbn87Gj5cFgHBYAkzdHqDsyGXkxQTHDq9jmX24
+Djy3Zw==
+-----END CERTIFICATE-----
+)";
 
 // kNoBasicConstraintsNetscapeCAIntermediate doesn't have isCA set, but contains
 // a Netscape certificate-type extension that asserts a type of "SSL CA".
-static const char kNoBasicConstraintsNetscapeCAIntermediate[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIIBuDCCASGgAwIBAgIBAjANBgkqhkiG9w0BAQsFADArMRcwFQYDVQQKEw5Cb3Jp\n"
-    "bmdTU0wgVGVzdDEQMA4GA1UEAxMHUm9vdCBDQTAgFw0wMDAxMDEwMDAwMDBaGA8y\n"
-    "MDk5MDEwMTAwMDAwMFowKjEoMCYGA1UEAxMfTm8gQmFzaWMgQ29uc3RyYWludHMg\n"
-    "KE5ldHNjYXBlKTBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IABCeMbmCaOtMzXBqi\n"
-    "PrCdNOH23CkaawUA+pAezitAN4RXS1O2CGK5sJjGPVVeogROU8G7/b+mU+ciZIzH\n"
-    "1PP8FJKjMjAwMBsGA1UdIwQUMBKAEEA31wH7QC+4HH5UBCeMWQEwEQYJYIZIAYb4\n"
-    "QgEBBAQDAgIEMA0GCSqGSIb3DQEBCwUAA4GBAAgNWjh7cfBTClTAk+Ml//5xb9Ju\n"
-    "tkBhG6Rm+kkMD+qiSMO6t7xS7CsA0+jIBjkdEYaLZ3oxtQCBdZsVNxUvRxZ0AUfF\n"
-    "G3DtRFTsrI1f7IQhpMuqEMF4shPW+5x54hrq0Fo6xMs6XoinJZcTUaaB8EeXRF6M\n"
-    "P9p6HuyLrmn0c/F0\n"
-    "-----END CERTIFICATE-----\n";
+static const char kNoBasicConstraintsNetscapeCAIntermediate[] = R"(
+-----BEGIN CERTIFICATE-----
+MIIBuDCCASGgAwIBAgIBAjANBgkqhkiG9w0BAQsFADArMRcwFQYDVQQKEw5Cb3Jp
+bmdTU0wgVGVzdDEQMA4GA1UEAxMHUm9vdCBDQTAgFw0wMDAxMDEwMDAwMDBaGA8y
+MDk5MDEwMTAwMDAwMFowKjEoMCYGA1UEAxMfTm8gQmFzaWMgQ29uc3RyYWludHMg
+KE5ldHNjYXBlKTBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IABCeMbmCaOtMzXBqi
+PrCdNOH23CkaawUA+pAezitAN4RXS1O2CGK5sJjGPVVeogROU8G7/b+mU+ciZIzH
+1PP8FJKjMjAwMBsGA1UdIwQUMBKAEEA31wH7QC+4HH5UBCeMWQEwEQYJYIZIAYb4
+QgEBBAQDAgIEMA0GCSqGSIb3DQEBCwUAA4GBAAgNWjh7cfBTClTAk+Ml//5xb9Ju
+tkBhG6Rm+kkMD+qiSMO6t7xS7CsA0+jIBjkdEYaLZ3oxtQCBdZsVNxUvRxZ0AUfF
+G3DtRFTsrI1f7IQhpMuqEMF4shPW+5x54hrq0Fo6xMs6XoinJZcTUaaB8EeXRF6M
+P9p6HuyLrmn0c/F0
+-----END CERTIFICATE-----
+)";
 
-static const char kNoBasicConstraintsNetscapeCALeaf[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIIBXDCCAQKgAwIBAgIBAzAKBggqhkjOPQQDAjAqMSgwJgYDVQQDEx9ObyBCYXNp\n"
-    "YyBDb25zdHJhaW50cyAoTmV0c2NhcGUpMCAXDTAwMDEwMTAwMDAwMFoYDzIwOTkw\n"
-    "MTAxMDAwMDAwWjAxMS8wLQYDVQQDEyZMZWFmIGZyb20gQ0Egd2l0aCBubyBCYXNp\n"
-    "YyBDb25zdHJhaW50czBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IABDlJKolDu3R2\n"
-    "tPqSDycr0QJcWhxdBv76V0EEVflcHRxED6vAioTEcnQszt1OfKtBZvjlo0yp6i6Q\n"
-    "DaYit0ZInmWjEDAOMAwGA1UdEwEB/wQCMAAwCgYIKoZIzj0EAwIDSAAwRQIhAJsh\n"
-    "aZL6BHeEfoUBj1oZ2Ln91qzj3UCVMJ+vrmwAFdYyAiA3wp2JphgchvmoUFuzPXwj\n"
-    "XyPwWPbymSTpzKhB4xB7qQ==\n"
-    "-----END CERTIFICATE-----\n";
+static const char kNoBasicConstraintsNetscapeCALeaf[] = R"(
+-----BEGIN CERTIFICATE-----
+MIIBXDCCAQKgAwIBAgIBAzAKBggqhkjOPQQDAjAqMSgwJgYDVQQDEx9ObyBCYXNp
+YyBDb25zdHJhaW50cyAoTmV0c2NhcGUpMCAXDTAwMDEwMTAwMDAwMFoYDzIwOTkw
+MTAxMDAwMDAwWjAxMS8wLQYDVQQDEyZMZWFmIGZyb20gQ0Egd2l0aCBubyBCYXNp
+YyBDb25zdHJhaW50czBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IABDlJKolDu3R2
+tPqSDycr0QJcWhxdBv76V0EEVflcHRxED6vAioTEcnQszt1OfKtBZvjlo0yp6i6Q
+DaYit0ZInmWjEDAOMAwGA1UdEwEB/wQCMAAwCgYIKoZIzj0EAwIDSAAwRQIhAJsh
+aZL6BHeEfoUBj1oZ2Ln91qzj3UCVMJ+vrmwAFdYyAiA3wp2JphgchvmoUFuzPXwj
+XyPwWPbymSTpzKhB4xB7qQ==
+-----END CERTIFICATE-----
+)";
 
-static const char kSelfSignedMismatchAlgorithms[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIIFMjCCAxqgAwIBAgIJAL0mG5fOeJ7xMA0GCSqGSIb3DQEBDQUAMC0xCzAJBgNV\n"
-    "BAYTAkdCMQ8wDQYDVQQHDAZMb25kb24xDTALBgNVBAoMBFRlc3QwIBcNMTgwOTE3\n"
-    "MTIxNzU3WhgPMjExODA4MjQxMjE3NTdaMC0xCzAJBgNVBAYTAkdCMQ8wDQYDVQQH\n"
-    "DAZMb25kb24xDTALBgNVBAoMBFRlc3QwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAw\n"
-    "ggIKAoICAQDCMhBrRAGGw+n2GdctBr/cEK4FZA6ajiHjihgpCHoSBdyL4R2jGKLS\n"
-    "g0WgaMXa1HpkKN7LcIySosEBPlmcRkr1RqbEvQStOSvoFCXYvtx3alM6HTbXMcDR\n"
-    "mqoKoABP6LXsPSoMWIgqMtP2X9EOppzHVIK1yFYFfbIlvYUV2Ka+MuMe0Vh5wvD1\n"
-    "4GanPb+cWSKgdRSVQovCCMY3yWtZKVEaxRpCsk/mYYIFWz0tcgMjIKwDx1XXgiAV\n"
-    "nU6NK43xbaw3XhtnaD/pv9lhTTbNrlcln9LjTD097BaK4R+1AEPHnpfxA9Ui3upn\n"
-    "kbsNUdGdOB0ksZi/vd7lh833YgquQUIAhYrbfvq/HFCpVV1gljzlS3sqULYpLE//\n"
-    "i3OsuL2mE+CYIJGpIi2GeJJWXciNMTJDOqTn+fRDtVb4RPp4Y70DJirp7XzaBi3q\n"
-    "H0edANCzPSRCDbZsOhzIXhXshldiXVRX666DDlbMQgLTEnNKrkwv6DmU8o15XQsb\n"
-    "8k1Os2YwXmkEOxUQ7AJZXVTZSf6UK9Znmdq1ZrHjybMfRUkHVxJcnKvrxfryralv\n"
-    "gzfvu+D6HuxrCo3Ojqa+nDgIbxKEBtdrcsMhq1jWPFhjwo1fSadAkKOfdCAuXJRD\n"
-    "THg3b4Sf+W7Cpc570YHrIpBf7WFl2XsPcEM0mJZ5+yATASCubNozQwIDAQABo1Mw\n"
-    "UTAdBgNVHQ4EFgQUES0hupZSqY21JOba10QyZuxm91EwHwYDVR0jBBgwFoAUES0h\n"
-    "upZSqY21JOba10QyZuxm91EwDwYDVR0TAQH/BAUwAwEB/zANBgkqhkiG9w0BAQsF\n"
-    "AAOCAgEABTN5S30ng/RMpBweDm2N561PdpaCdiRXtAFCRVWR2mkDYC/Xj9Vqe6be\n"
-    "PyM7L/5OKYVjzF1yJu67z/dx+ja5o+41g17jdqla7hyPx+9B4uRyDh+1KJTa+duj\n"
-    "mw/aA1LCr6O6W4WizDOsChJ6FaB2Y1+GlFnKWb5nUdhVJqXQE1WOX9dZnw8Y4Npd\n"
-    "VmAsjWot0BZorJrt3fwfcv3QfA896twkbo7Llv/8qzg4sXZXZ4ZtgAOqnPngiSn+\n"
-    "JT/vYCXZ406VvAFpFqMcVz2dO/VGuL8lGIMHRKNyafrsV81EzH1W/XmRWOgvgj6r\n"
-    "yQI63ln/AMY72HQ97xLkE1xKunGz6bK5Ug5+O43Uftc4Mb6MUgzo+ZqEQ3Ob+cAV\n"
-    "cvjmtwDaPO/O39O5Xq0tLTlkn2/cKf4OQ6S++GDxzyRVHh5JXgP4j9+jfZY57Woy\n"
-    "R1bE7N50JjY4cDermBJKdlBIjL7UPhqmLyaG7V0hBitFlgGBUCcJtJOV0xYd5aF3\n"
-    "pxNkvMXhBmh95fjxJ0cJjpO7tN1RAwtMMNgsl7OUbuVRQCHOPW5DgP5qY21jDeRn\n"
-    "BY82382l+9QzykmJLI5MZnmj4BA9uIDCwMtoTTvP++SsvhUAbuvh7MOOUQL0EY4m\n"
-    "KStYq7X9PKseN+PvmfeoffIKc5R/Ha39oi7cGMVHCr8aiEhsf94=\n"
-    "-----END CERTIFICATE-----\n";
+static const char kSelfSignedMismatchAlgorithms[] = R"(
+-----BEGIN CERTIFICATE-----
+MIIFMjCCAxqgAwIBAgIJAL0mG5fOeJ7xMA0GCSqGSIb3DQEBDQUAMC0xCzAJBgNV
+BAYTAkdCMQ8wDQYDVQQHDAZMb25kb24xDTALBgNVBAoMBFRlc3QwIBcNMTgwOTE3
+MTIxNzU3WhgPMjExODA4MjQxMjE3NTdaMC0xCzAJBgNVBAYTAkdCMQ8wDQYDVQQH
+DAZMb25kb24xDTALBgNVBAoMBFRlc3QwggIiMA0GCSqGSIb3DQEBAQUAA4ICDwAw
+ggIKAoICAQDCMhBrRAGGw+n2GdctBr/cEK4FZA6ajiHjihgpCHoSBdyL4R2jGKLS
+g0WgaMXa1HpkKN7LcIySosEBPlmcRkr1RqbEvQStOSvoFCXYvtx3alM6HTbXMcDR
+mqoKoABP6LXsPSoMWIgqMtP2X9EOppzHVIK1yFYFfbIlvYUV2Ka+MuMe0Vh5wvD1
+4GanPb+cWSKgdRSVQovCCMY3yWtZKVEaxRpCsk/mYYIFWz0tcgMjIKwDx1XXgiAV
+nU6NK43xbaw3XhtnaD/pv9lhTTbNrlcln9LjTD097BaK4R+1AEPHnpfxA9Ui3upn
+kbsNUdGdOB0ksZi/vd7lh833YgquQUIAhYrbfvq/HFCpVV1gljzlS3sqULYpLE//
+i3OsuL2mE+CYIJGpIi2GeJJWXciNMTJDOqTn+fRDtVb4RPp4Y70DJirp7XzaBi3q
+H0edANCzPSRCDbZsOhzIXhXshldiXVRX666DDlbMQgLTEnNKrkwv6DmU8o15XQsb
+8k1Os2YwXmkEOxUQ7AJZXVTZSf6UK9Znmdq1ZrHjybMfRUkHVxJcnKvrxfryralv
+gzfvu+D6HuxrCo3Ojqa+nDgIbxKEBtdrcsMhq1jWPFhjwo1fSadAkKOfdCAuXJRD
+THg3b4Sf+W7Cpc570YHrIpBf7WFl2XsPcEM0mJZ5+yATASCubNozQwIDAQABo1Mw
+UTAdBgNVHQ4EFgQUES0hupZSqY21JOba10QyZuxm91EwHwYDVR0jBBgwFoAUES0h
+upZSqY21JOba10QyZuxm91EwDwYDVR0TAQH/BAUwAwEB/zANBgkqhkiG9w0BAQsF
+AAOCAgEABTN5S30ng/RMpBweDm2N561PdpaCdiRXtAFCRVWR2mkDYC/Xj9Vqe6be
+PyM7L/5OKYVjzF1yJu67z/dx+ja5o+41g17jdqla7hyPx+9B4uRyDh+1KJTa+duj
+mw/aA1LCr6O6W4WizDOsChJ6FaB2Y1+GlFnKWb5nUdhVJqXQE1WOX9dZnw8Y4Npd
+VmAsjWot0BZorJrt3fwfcv3QfA896twkbo7Llv/8qzg4sXZXZ4ZtgAOqnPngiSn+
+JT/vYCXZ406VvAFpFqMcVz2dO/VGuL8lGIMHRKNyafrsV81EzH1W/XmRWOgvgj6r
+yQI63ln/AMY72HQ97xLkE1xKunGz6bK5Ug5+O43Uftc4Mb6MUgzo+ZqEQ3Ob+cAV
+cvjmtwDaPO/O39O5Xq0tLTlkn2/cKf4OQ6S++GDxzyRVHh5JXgP4j9+jfZY57Woy
+R1bE7N50JjY4cDermBJKdlBIjL7UPhqmLyaG7V0hBitFlgGBUCcJtJOV0xYd5aF3
+pxNkvMXhBmh95fjxJ0cJjpO7tN1RAwtMMNgsl7OUbuVRQCHOPW5DgP5qY21jDeRn
+BY82382l+9QzykmJLI5MZnmj4BA9uIDCwMtoTTvP++SsvhUAbuvh7MOOUQL0EY4m
+KStYq7X9PKseN+PvmfeoffIKc5R/Ha39oi7cGMVHCr8aiEhsf94=
+-----END CERTIFICATE-----
+)";
 
 // kCommonNameWithSANs is a leaf certificate signed by kSANTypesRoot, with
 // *.host1.test as the common name and a SAN list of *.host2.test and
 // foo.host3.test.
-static const char kCommonNameWithSANs[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIIB2zCCAUSgAwIBAgIBAzANBgkqhkiG9w0BAQsFADArMRcwFQYDVQQKEw5Cb3Jp\n"
-    "bmdTU0wgVGVzdDEQMA4GA1UEAxMHUm9vdCBDQTAgFw0wMDAxMDEwMDAwMDBaGA8y\n"
-    "MDk5MDEwMTAwMDAwMFowNzEeMBwGA1UEChMVQ29tbW9uIG5hbWUgd2l0aCBTQU5z\n"
-    "MRUwEwYDVQQDDAwqLmhvc3QxLnRlc3QwWTATBgcqhkjOPQIBBggqhkjOPQMBBwNC\n"
-    "AASgWzfnFnpQrokSLIC+LhCKJDUAY/2usfIDpOnafYoYCasbYetkmOslgyY4Nn07\n"
-    "zjvjNROprA/0bdULXAkdL9bNo0gwRjAbBgNVHSMEFDASgBBAN9cB+0AvuBx+VAQn\n"
-    "jFkBMCcGA1UdEQQgMB6CDCouaG9zdDIudGVzdIIOZm9vLmhvc3QzLnRlc3QwDQYJ\n"
-    "KoZIhvcNAQELBQADgYEAtv2e3hBhsslXB1HTxgusjoschWOVtvGZUaYlhkKzKTCL\n"
-    "4YpDn50BccnucBU/b9phYvaEZtyzOv4ZXhxTGyLnLrIVB9x5ikfCcfl+LNYNjDwM\n"
-    "enm/h1zOfJ7wXLyscD4kU29Wc/zxBd70thIgLYn16CC1S9NtXKsXXDXv5VVH/bg=\n"
-    "-----END CERTIFICATE-----\n";
+static const char kCommonNameWithSANs[] = R"(
+-----BEGIN CERTIFICATE-----
+MIIB2zCCAUSgAwIBAgIBAzANBgkqhkiG9w0BAQsFADArMRcwFQYDVQQKEw5Cb3Jp
+bmdTU0wgVGVzdDEQMA4GA1UEAxMHUm9vdCBDQTAgFw0wMDAxMDEwMDAwMDBaGA8y
+MDk5MDEwMTAwMDAwMFowNzEeMBwGA1UEChMVQ29tbW9uIG5hbWUgd2l0aCBTQU5z
+MRUwEwYDVQQDDAwqLmhvc3QxLnRlc3QwWTATBgcqhkjOPQIBBggqhkjOPQMBBwNC
+AASgWzfnFnpQrokSLIC+LhCKJDUAY/2usfIDpOnafYoYCasbYetkmOslgyY4Nn07
+zjvjNROprA/0bdULXAkdL9bNo0gwRjAbBgNVHSMEFDASgBBAN9cB+0AvuBx+VAQn
+jFkBMCcGA1UdEQQgMB6CDCouaG9zdDIudGVzdIIOZm9vLmhvc3QzLnRlc3QwDQYJ
+KoZIhvcNAQELBQADgYEAtv2e3hBhsslXB1HTxgusjoschWOVtvGZUaYlhkKzKTCL
+4YpDn50BccnucBU/b9phYvaEZtyzOv4ZXhxTGyLnLrIVB9x5ikfCcfl+LNYNjDwM
+enm/h1zOfJ7wXLyscD4kU29Wc/zxBd70thIgLYn16CC1S9NtXKsXXDXv5VVH/bg=
+-----END CERTIFICATE-----
+)";
 
 // kCommonNameWithSANs is a leaf certificate signed by kSANTypesRoot, with
 // *.host1.test as the common name and no SAN list.
-static const char kCommonNameWithoutSANs[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIIBtTCCAR6gAwIBAgIBAzANBgkqhkiG9w0BAQsFADArMRcwFQYDVQQKEw5Cb3Jp\n"
-    "bmdTU0wgVGVzdDEQMA4GA1UEAxMHUm9vdCBDQTAgFw0wMDAxMDEwMDAwMDBaGA8y\n"
-    "MDk5MDEwMTAwMDAwMFowOjEhMB8GA1UEChMYQ29tbW9uIG5hbWUgd2l0aG91dCBT\n"
-    "QU5zMRUwEwYDVQQDDAwqLmhvc3QxLnRlc3QwWTATBgcqhkjOPQIBBggqhkjOPQMB\n"
-    "BwNCAARt2vjlIrPE+kr11VS1rRP/AYQu4fvf1bNw/K9rwYlVBhmLMPYasEmpCtKE\n"
-    "0bDIFydtDYC3wZDpSS+YiaG40sdAox8wHTAbBgNVHSMEFDASgBBAN9cB+0AvuBx+\n"
-    "VAQnjFkBMA0GCSqGSIb3DQEBCwUAA4GBAHRbIeaCEytOpJpw9O2dlB656AHe1+t5\n"
-    "4JiS5mvtzoVOLn7fFk5EFQtZS7sG1Uc2XjlSw+iyvFoTFEqfKyU/mIdc2vBuPwA2\n"
-    "+YXT8aE4S+UZ9oz5j0gDpikGnkSCW0cyHD8L8fntNjaQRSaM482JpmtdmuxClmWO\n"
-    "pFFXI2B5usgI\n"
-    "-----END CERTIFICATE-----\n";
+static const char kCommonNameWithoutSANs[] = R"(
+-----BEGIN CERTIFICATE-----
+MIIBtTCCAR6gAwIBAgIBAzANBgkqhkiG9w0BAQsFADArMRcwFQYDVQQKEw5Cb3Jp
+bmdTU0wgVGVzdDEQMA4GA1UEAxMHUm9vdCBDQTAgFw0wMDAxMDEwMDAwMDBaGA8y
+MDk5MDEwMTAwMDAwMFowOjEhMB8GA1UEChMYQ29tbW9uIG5hbWUgd2l0aG91dCBT
+QU5zMRUwEwYDVQQDDAwqLmhvc3QxLnRlc3QwWTATBgcqhkjOPQIBBggqhkjOPQMB
+BwNCAARt2vjlIrPE+kr11VS1rRP/AYQu4fvf1bNw/K9rwYlVBhmLMPYasEmpCtKE
+0bDIFydtDYC3wZDpSS+YiaG40sdAox8wHTAbBgNVHSMEFDASgBBAN9cB+0AvuBx+
+VAQnjFkBMA0GCSqGSIb3DQEBCwUAA4GBAHRbIeaCEytOpJpw9O2dlB656AHe1+t5
+4JiS5mvtzoVOLn7fFk5EFQtZS7sG1Uc2XjlSw+iyvFoTFEqfKyU/mIdc2vBuPwA2
++YXT8aE4S+UZ9oz5j0gDpikGnkSCW0cyHD8L8fntNjaQRSaM482JpmtdmuxClmWO
+pFFXI2B5usgI
+-----END CERTIFICATE-----
+)";
 
 // kCommonNameWithEmailSAN is a leaf certificate signed by kSANTypesRoot, with
 // *.host1.test as the common name and the email address test@host2.test in the
 // SAN list.
-static const char kCommonNameWithEmailSAN[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIIBvDCCASWgAwIBAgIBAjANBgkqhkiG9w0BAQsFADArMRcwFQYDVQQKEw5Cb3Jp\n"
-    "bmdTU0wgVGVzdDEQMA4GA1UEAxMHUm9vdCBDQTAgFw0wMDAxMDEwMDAwMDBaGA8y\n"
-    "MDk5MDEwMTAwMDAwMFowFzEVMBMGA1UEAwwMKi5ob3N0MS50ZXN0MFkwEwYHKoZI\n"
-    "zj0CAQYIKoZIzj0DAQcDQgAEtevOxcTjpPzlNGoUMFfZyr1k03/Hiuh+EsnuScDs\n"
-    "8XLKi6fDkvSaDClI99ycabQZRPIrvyT+dglDC6ugQd+CYqNJMEcwDAYDVR0TAQH/\n"
-    "BAIwADAbBgNVHSMEFDASgBBAN9cB+0AvuBx+VAQnjFkBMBoGA1UdEQQTMBGBD3Rl\n"
-    "c3RAaG9zdDIudGVzdDANBgkqhkiG9w0BAQsFAAOBgQCGbqb78OWJWl4zb+qw0Dz2\n"
-    "HJgZZJt6/+nNG/XJKdaYeS4eofsbwsJI4fuuOF6ZvYCJxVNtGqdfZDgycvFA9hjv\n"
-    "NGosBF1/spP17cmzTahLjxs71jDvHV/EQJbKGl/Zpta1Em1VrzSrwoOFabPXzZTJ\n"
-    "aet/mER21Z/9ZsTUoJQPJw==\n"
-    "-----END CERTIFICATE-----\n";
+static const char kCommonNameWithEmailSAN[] = R"(
+-----BEGIN CERTIFICATE-----
+MIIBvDCCASWgAwIBAgIBAjANBgkqhkiG9w0BAQsFADArMRcwFQYDVQQKEw5Cb3Jp
+bmdTU0wgVGVzdDEQMA4GA1UEAxMHUm9vdCBDQTAgFw0wMDAxMDEwMDAwMDBaGA8y
+MDk5MDEwMTAwMDAwMFowFzEVMBMGA1UEAwwMKi5ob3N0MS50ZXN0MFkwEwYHKoZI
+zj0CAQYIKoZIzj0DAQcDQgAEtevOxcTjpPzlNGoUMFfZyr1k03/Hiuh+EsnuScDs
+8XLKi6fDkvSaDClI99ycabQZRPIrvyT+dglDC6ugQd+CYqNJMEcwDAYDVR0TAQH/
+BAIwADAbBgNVHSMEFDASgBBAN9cB+0AvuBx+VAQnjFkBMBoGA1UdEQQTMBGBD3Rl
+c3RAaG9zdDIudGVzdDANBgkqhkiG9w0BAQsFAAOBgQCGbqb78OWJWl4zb+qw0Dz2
+HJgZZJt6/+nNG/XJKdaYeS4eofsbwsJI4fuuOF6ZvYCJxVNtGqdfZDgycvFA9hjv
+NGosBF1/spP17cmzTahLjxs71jDvHV/EQJbKGl/Zpta1Em1VrzSrwoOFabPXzZTJ
+aet/mER21Z/9ZsTUoJQPJw==
+-----END CERTIFICATE-----
+)";
 
 // kCommonNameWithIPSAN is a leaf certificate signed by kSANTypesRoot, with
 // *.host1.test as the common name and the IP address 127.0.0.1 in the
 // SAN list.
-static const char kCommonNameWithIPSAN[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIIBsTCCARqgAwIBAgIBAjANBgkqhkiG9w0BAQsFADArMRcwFQYDVQQKEw5Cb3Jp\n"
-    "bmdTU0wgVGVzdDEQMA4GA1UEAxMHUm9vdCBDQTAgFw0wMDAxMDEwMDAwMDBaGA8y\n"
-    "MDk5MDEwMTAwMDAwMFowFzEVMBMGA1UEAwwMKi5ob3N0MS50ZXN0MFkwEwYHKoZI\n"
-    "zj0CAQYIKoZIzj0DAQcDQgAEFKrgkxm8PysXbwnHQeTD3p8YY0+sY4ssnZgmj8wX\n"
-    "KTyn893fdBHWlz71GO6t82wMTF5d+ZYwI2XU52pfl4SB2aM+MDwwDAYDVR0TAQH/\n"
-    "BAIwADAbBgNVHSMEFDASgBBAN9cB+0AvuBx+VAQnjFkBMA8GA1UdEQQIMAaHBH8A\n"
-    "AAEwDQYJKoZIhvcNAQELBQADgYEAQWZ8Oj059ZjS109V/ijMYT28xuAN5n6HHxCO\n"
-    "DopTP56Zu9+gme5wTETWEfocspZvgecoUOcedTFoKSQ7JafO09NcVLA+D6ddYpju\n"
-    "mgfuiLy9dDhqvX/NHaLBMxOBWWbOLwWE+ibyX+pOzjWRCw1L7eUXOr6PhZAOQsmU\n"
-    "D0+O6KI=\n"
-    "-----END CERTIFICATE-----\n";
+static const char kCommonNameWithIPSAN[] = R"(
+-----BEGIN CERTIFICATE-----
+MIIBsTCCARqgAwIBAgIBAjANBgkqhkiG9w0BAQsFADArMRcwFQYDVQQKEw5Cb3Jp
+bmdTU0wgVGVzdDEQMA4GA1UEAxMHUm9vdCBDQTAgFw0wMDAxMDEwMDAwMDBaGA8y
+MDk5MDEwMTAwMDAwMFowFzEVMBMGA1UEAwwMKi5ob3N0MS50ZXN0MFkwEwYHKoZI
+zj0CAQYIKoZIzj0DAQcDQgAEFKrgkxm8PysXbwnHQeTD3p8YY0+sY4ssnZgmj8wX
+KTyn893fdBHWlz71GO6t82wMTF5d+ZYwI2XU52pfl4SB2aM+MDwwDAYDVR0TAQH/
+BAIwADAbBgNVHSMEFDASgBBAN9cB+0AvuBx+VAQnjFkBMA8GA1UdEQQIMAaHBH8A
+AAEwDQYJKoZIhvcNAQELBQADgYEAQWZ8Oj059ZjS109V/ijMYT28xuAN5n6HHxCO
+DopTP56Zu9+gme5wTETWEfocspZvgecoUOcedTFoKSQ7JafO09NcVLA+D6ddYpju
+mgfuiLy9dDhqvX/NHaLBMxOBWWbOLwWE+ibyX+pOzjWRCw1L7eUXOr6PhZAOQsmU
+D0+O6KI=
+-----END CERTIFICATE-----
+)";
 
 // kConstrainedIntermediate is an intermediate signed by kSANTypesRoot, with
 // permitted DNS names of permitted1.test and foo.permitted2.test and an
@@ -760,84 +808,89 @@
 // JhNOfIv/d8heWFBeKOfMR+RfaROhRANCAASbbbWYiN6mn+BCpg4XNpibOH0D/DN4
 // kZ5C/Ml2YVomC9T83OKk2CzB8fPAabPb4P4Vv+fIabpEfjWS5nzKLY1y
 // -----END PRIVATE KEY-----
-static const char kConstrainedIntermediate[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIICDjCCAXegAwIBAgIBAjANBgkqhkiG9w0BAQsFADArMRcwFQYDVQQKEw5Cb3Jp\n"
-    "bmdTU0wgVGVzdDEQMA4GA1UEAxMHUm9vdCBDQTAgFw0wMDAxMDEwMDAwMDBaGA8y\n"
-    "MDk5MDEwMTAwMDAwMFowKDEmMCQGA1UEAxMdTmFtZSBDb25zdHJhaW50cyBJbnRl\n"
-    "cm1lZGlhdGUwWTATBgcqhkjOPQIBBggqhkjOPQMBBwNCAASbbbWYiN6mn+BCpg4X\n"
-    "NpibOH0D/DN4kZ5C/Ml2YVomC9T83OKk2CzB8fPAabPb4P4Vv+fIabpEfjWS5nzK\n"
-    "LY1yo4GJMIGGMA8GA1UdEwEB/wQFMAMBAf8wGwYDVR0jBBQwEoAQQDfXAftAL7gc\n"
-    "flQEJ4xZATBWBgNVHR4BAf8ETDBKoCowEYIPcGVybWl0dGVkMS50ZXN0MBWCE2Zv\n"
-    "by5wZXJtaXR0ZWQyLnRlc3ShHDAaghhleGNsdWRlZC5wZXJtaXR0ZWQxLnRlc3Qw\n"
-    "DQYJKoZIhvcNAQELBQADgYEAFq1Ka05hiKREwRpSceQPzIIH4B5a5IVBg5/EvmQI\n"
-    "9V0fXyAE1GmahPt70sIBxIgzNTEaY8P/IoOuCdlZWe0msmyEO3S6YSAzOWR5Van6\n"
-    "cXmFM1uMd95TlkxUMRdV+jKJTvG6R/BM2zltaV7Xt662k5HtzT5Svw0rZlFaggZz\n"
-    "UyM=\n"
-    "-----END CERTIFICATE-----\n";
+static const char kConstrainedIntermediate[] = R"(
+-----BEGIN CERTIFICATE-----
+MIICDjCCAXegAwIBAgIBAjANBgkqhkiG9w0BAQsFADArMRcwFQYDVQQKEw5Cb3Jp
+bmdTU0wgVGVzdDEQMA4GA1UEAxMHUm9vdCBDQTAgFw0wMDAxMDEwMDAwMDBaGA8y
+MDk5MDEwMTAwMDAwMFowKDEmMCQGA1UEAxMdTmFtZSBDb25zdHJhaW50cyBJbnRl
+cm1lZGlhdGUwWTATBgcqhkjOPQIBBggqhkjOPQMBBwNCAASbbbWYiN6mn+BCpg4X
+NpibOH0D/DN4kZ5C/Ml2YVomC9T83OKk2CzB8fPAabPb4P4Vv+fIabpEfjWS5nzK
+LY1yo4GJMIGGMA8GA1UdEwEB/wQFMAMBAf8wGwYDVR0jBBQwEoAQQDfXAftAL7gc
+flQEJ4xZATBWBgNVHR4BAf8ETDBKoCowEYIPcGVybWl0dGVkMS50ZXN0MBWCE2Zv
+by5wZXJtaXR0ZWQyLnRlc3ShHDAaghhleGNsdWRlZC5wZXJtaXR0ZWQxLnRlc3Qw
+DQYJKoZIhvcNAQELBQADgYEAFq1Ka05hiKREwRpSceQPzIIH4B5a5IVBg5/EvmQI
+9V0fXyAE1GmahPt70sIBxIgzNTEaY8P/IoOuCdlZWe0msmyEO3S6YSAzOWR5Van6
+cXmFM1uMd95TlkxUMRdV+jKJTvG6R/BM2zltaV7Xt662k5HtzT5Svw0rZlFaggZz
+UyM=
+-----END CERTIFICATE-----
+)";
 
 // kCommonNamePermittedLeaf is a leaf certificate signed by
 // kConstrainedIntermediate. Its common name is permitted by the name
 // constraints.
-static const char kCommonNamePermittedLeaf[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIIBaDCCAQ2gAwIBAgIBAzAKBggqhkjOPQQDAjAoMSYwJAYDVQQDEx1OYW1lIENv\n"
-    "bnN0cmFpbnRzIEludGVybWVkaWF0ZTAgFw0wMDAxMDEwMDAwMDBaGA8yMDk5MDEw\n"
-    "MTAwMDAwMFowPjEeMBwGA1UEChMVQ29tbW9uIG5hbWUgcGVybWl0dGVkMRwwGgYD\n"
-    "VQQDExNmb28ucGVybWl0dGVkMS50ZXN0MFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcD\n"
-    "QgAENX5Ycs8q8MRzPYUz6DqLHhJR3wcmniFRgkiEa7MxE/mRe00y0VGwH7xi7Aoc\n"
-    "emXPrtD4JwN5bssbcxWGAKYYzaMQMA4wDAYDVR0TAQH/BAIwADAKBggqhkjOPQQD\n"
-    "AgNJADBGAiEAtsnWuRQXtw2xbieC78Y8SVEtTjcZUx8uZyQe1GPLfGICIQDR4fNY\n"
-    "yg3PC94ydPNQZVsFxAne32CbonWWsokalTFpUQ==\n"
-    "-----END CERTIFICATE-----\n";
+static const char kCommonNamePermittedLeaf[] = R"(
+-----BEGIN CERTIFICATE-----
+MIIBaDCCAQ2gAwIBAgIBAzAKBggqhkjOPQQDAjAoMSYwJAYDVQQDEx1OYW1lIENv
+bnN0cmFpbnRzIEludGVybWVkaWF0ZTAgFw0wMDAxMDEwMDAwMDBaGA8yMDk5MDEw
+MTAwMDAwMFowPjEeMBwGA1UEChMVQ29tbW9uIG5hbWUgcGVybWl0dGVkMRwwGgYD
+VQQDExNmb28ucGVybWl0dGVkMS50ZXN0MFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcD
+QgAENX5Ycs8q8MRzPYUz6DqLHhJR3wcmniFRgkiEa7MxE/mRe00y0VGwH7xi7Aoc
+emXPrtD4JwN5bssbcxWGAKYYzaMQMA4wDAYDVR0TAQH/BAIwADAKBggqhkjOPQQD
+AgNJADBGAiEAtsnWuRQXtw2xbieC78Y8SVEtTjcZUx8uZyQe1GPLfGICIQDR4fNY
+yg3PC94ydPNQZVsFxAne32CbonWWsokalTFpUQ==
+-----END CERTIFICATE-----
+)";
 static const char kCommonNamePermitted[] = "foo.permitted1.test";
 
 // kCommonNameNotPermittedLeaf is a leaf certificate signed by
 // kConstrainedIntermediate. Its common name is not permitted by the name
 // constraints.
-static const char kCommonNameNotPermittedLeaf[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIIBazCCARCgAwIBAgIBBDAKBggqhkjOPQQDAjAoMSYwJAYDVQQDEx1OYW1lIENv\n"
-    "bnN0cmFpbnRzIEludGVybWVkaWF0ZTAgFw0wMDAxMDEwMDAwMDBaGA8yMDk5MDEw\n"
-    "MTAwMDAwMFowQTEiMCAGA1UEChMZQ29tbW9uIG5hbWUgbm90IHBlcm1pdHRlZDEb\n"
-    "MBkGA1UEAxMSbm90LXBlcm1pdHRlZC50ZXN0MFkwEwYHKoZIzj0CAQYIKoZIzj0D\n"
-    "AQcDQgAEzfghKuWf0JoXb0Drp09C3yXMSQQ1byt+AUaymvsHOWsxQ9v1Q+vkF/IM\n"
-    "HRqGTk2TyxrB2iClVEn/Uu+YtYox1KMQMA4wDAYDVR0TAQH/BAIwADAKBggqhkjO\n"
-    "PQQDAgNJADBGAiEAxaUslxmoWL1tIvnDz7gDkto/HcmdU0jHVuUQLXcCG8wCIQCN\n"
-    "5xZjitlCQU8UB5qSu9wH4B+0JcVO3Ss4Az76HEJWMw==\n"
-    "-----END CERTIFICATE-----\n";
+static const char kCommonNameNotPermittedLeaf[] = R"(
+-----BEGIN CERTIFICATE-----
+MIIBazCCARCgAwIBAgIBBDAKBggqhkjOPQQDAjAoMSYwJAYDVQQDEx1OYW1lIENv
+bnN0cmFpbnRzIEludGVybWVkaWF0ZTAgFw0wMDAxMDEwMDAwMDBaGA8yMDk5MDEw
+MTAwMDAwMFowQTEiMCAGA1UEChMZQ29tbW9uIG5hbWUgbm90IHBlcm1pdHRlZDEb
+MBkGA1UEAxMSbm90LXBlcm1pdHRlZC50ZXN0MFkwEwYHKoZIzj0CAQYIKoZIzj0D
+AQcDQgAEzfghKuWf0JoXb0Drp09C3yXMSQQ1byt+AUaymvsHOWsxQ9v1Q+vkF/IM
+HRqGTk2TyxrB2iClVEn/Uu+YtYox1KMQMA4wDAYDVR0TAQH/BAIwADAKBggqhkjO
+PQQDAgNJADBGAiEAxaUslxmoWL1tIvnDz7gDkto/HcmdU0jHVuUQLXcCG8wCIQCN
+5xZjitlCQU8UB5qSu9wH4B+0JcVO3Ss4Az76HEJWMw==
+-----END CERTIFICATE-----
+)";
 static const char kCommonNameNotPermitted[] = "not-permitted.test";
 
 // kCommonNameNotPermittedWithSANsLeaf is a leaf certificate signed by
 // kConstrainedIntermediate. Its common name is not permitted by the name
 // constraints but it has a SAN list.
-static const char kCommonNameNotPermittedWithSANsLeaf[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIIBqTCCAU+gAwIBAgIBBjAKBggqhkjOPQQDAjAoMSYwJAYDVQQDEx1OYW1lIENv\n"
-    "bnN0cmFpbnRzIEludGVybWVkaWF0ZTAgFw0wMDAxMDEwMDAwMDBaGA8yMDk5MDEw\n"
-    "MTAwMDAwMFowSzEsMCoGA1UEChMjQ29tbW9uIG5hbWUgbm90IHBlcm1pdHRlZCB3\n"
-    "aXRoIFNBTlMxGzAZBgNVBAMTEm5vdC1wZXJtaXR0ZWQudGVzdDBZMBMGByqGSM49\n"
-    "AgEGCCqGSM49AwEHA0IABKsn9wOApXFHrqhLdQgbFSeaSoAIbxgO0zVSRZUb5naR\n"
-    "93zoL3MFOvZEF8xiEqh7le+l3XuUig0fwqpcsZzRNJajRTBDMAwGA1UdEwEB/wQC\n"
-    "MAAwMwYDVR0RBCwwKoITZm9vLnBlcm1pdHRlZDEudGVzdIITZm9vLnBlcm1pdHRl\n"
-    "ZDIudGVzdDAKBggqhkjOPQQDAgNIADBFAiACk+1f184KkKAXuntmrz+Ygcq8MiZl\n"
-    "4delx44FtcNaegIhAIA5nYfzxNcTXxDo3U+x1vSLH6Y7faLvHiFySp7O//q+\n"
-    "-----END CERTIFICATE-----\n";
+static const char kCommonNameNotPermittedWithSANsLeaf[] = R"(
+-----BEGIN CERTIFICATE-----
+MIIBqTCCAU+gAwIBAgIBBjAKBggqhkjOPQQDAjAoMSYwJAYDVQQDEx1OYW1lIENv
+bnN0cmFpbnRzIEludGVybWVkaWF0ZTAgFw0wMDAxMDEwMDAwMDBaGA8yMDk5MDEw
+MTAwMDAwMFowSzEsMCoGA1UEChMjQ29tbW9uIG5hbWUgbm90IHBlcm1pdHRlZCB3
+aXRoIFNBTlMxGzAZBgNVBAMTEm5vdC1wZXJtaXR0ZWQudGVzdDBZMBMGByqGSM49
+AgEGCCqGSM49AwEHA0IABKsn9wOApXFHrqhLdQgbFSeaSoAIbxgO0zVSRZUb5naR
+93zoL3MFOvZEF8xiEqh7le+l3XuUig0fwqpcsZzRNJajRTBDMAwGA1UdEwEB/wQC
+MAAwMwYDVR0RBCwwKoITZm9vLnBlcm1pdHRlZDEudGVzdIITZm9vLnBlcm1pdHRl
+ZDIudGVzdDAKBggqhkjOPQQDAgNIADBFAiACk+1f184KkKAXuntmrz+Ygcq8MiZl
+4delx44FtcNaegIhAIA5nYfzxNcTXxDo3U+x1vSLH6Y7faLvHiFySp7O//q+
+-----END CERTIFICATE-----
+)";
 static const char kCommonNameNotPermittedWithSANs[] = "not-permitted.test";
 
 // kCommonNameNotDNSLeaf is a leaf certificate signed by
 // kConstrainedIntermediate. Its common name is not a DNS name.
-static const char kCommonNameNotDNSLeaf[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIIBYTCCAQagAwIBAgIBCDAKBggqhkjOPQQDAjAoMSYwJAYDVQQDEx1OYW1lIENv\n"
-    "bnN0cmFpbnRzIEludGVybWVkaWF0ZTAgFw0wMDAxMDEwMDAwMDBaGA8yMDk5MDEw\n"
-    "MTAwMDAwMFowNzEcMBoGA1UEChMTQ29tbW9uIG5hbWUgbm90IEROUzEXMBUGA1UE\n"
-    "AxMOTm90IGEgRE5TIG5hbWUwWTATBgcqhkjOPQIBBggqhkjOPQMBBwNCAASnueyc\n"
-    "Zxtnw5ke2J2T0/LwAK37auQP/RSFd9mem+BJVbgviawtAlignJmafp7Zw4/GdYEJ\n"
-    "Vm8qlriOJtluvXGcoxAwDjAMBgNVHRMBAf8EAjAAMAoGCCqGSM49BAMCA0kAMEYC\n"
-    "IQChUAmVNI39VHe0zemRE09VDcSEgOxr1nTvjLcg/Q8pVQIhAJYZnJI0YZAi05QH\n"
-    "RHNlAkTK2TnUaVn3fGSylaLiFS1r\n"
-    "-----END CERTIFICATE-----\n";
+static const char kCommonNameNotDNSLeaf[] = R"(
+-----BEGIN CERTIFICATE-----
+MIIBYTCCAQagAwIBAgIBCDAKBggqhkjOPQQDAjAoMSYwJAYDVQQDEx1OYW1lIENv
+bnN0cmFpbnRzIEludGVybWVkaWF0ZTAgFw0wMDAxMDEwMDAwMDBaGA8yMDk5MDEw
+MTAwMDAwMFowNzEcMBoGA1UEChMTQ29tbW9uIG5hbWUgbm90IEROUzEXMBUGA1UE
+AxMOTm90IGEgRE5TIG5hbWUwWTATBgcqhkjOPQIBBggqhkjOPQMBBwNCAASnueyc
+Zxtnw5ke2J2T0/LwAK37auQP/RSFd9mem+BJVbgviawtAlignJmafp7Zw4/GdYEJ
+Vm8qlriOJtluvXGcoxAwDjAMBgNVHRMBAf8EAjAAMAoGCCqGSM49BAMCA0kAMEYC
+IQChUAmVNI39VHe0zemRE09VDcSEgOxr1nTvjLcg/Q8pVQIhAJYZnJI0YZAi05QH
+RHNlAkTK2TnUaVn3fGSylaLiFS1r
+-----END CERTIFICATE-----
+)";
 static const char kCommonNameNotDNS[] = "Not a DNS name";
 
 // The following six certificates are issued by |kSANTypesRoot| and have
@@ -868,88 +921,94 @@
 //     pem.Encode(os.Stdout, &pem.Block{Type: "CERTIFICATE", Bytes: leafDER})
 // }
 
-static const char kMicrosoftSGCCert[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIIBtDCCAR2gAwIBAgIBAzANBgkqhkiG9w0BAQsFADArMRcwFQYDVQQKEw5Cb3Jp\n"
-    "bmdTU0wgVGVzdDEQMA4GA1UEAxMHUm9vdCBDQTAgFw0wMDAxMDEwMDAwMDBaGA8y\n"
-    "MDk5MDEwMTAwMDAwMFowFDESMBAGA1UEAxMJRUtVIG1zU0dDMFkwEwYHKoZIzj0C\n"
-    "AQYIKoZIzj0DAQcDQgAEEn61v3Vs+q6bTyyRnrJvuKBE8PTNVLbXGB52jig4Qse2\n"
-    "mGygNEysS0uzZ0luz+rn2hDRUFL6sHLUs1d8UMbI/6NEMEIwFQYDVR0lBA4wDAYK\n"
-    "KwYBBAGCNwoDAzAMBgNVHRMBAf8EAjAAMBsGA1UdIwQUMBKAEEA31wH7QC+4HH5U\n"
-    "BCeMWQEwDQYJKoZIhvcNAQELBQADgYEAgDQI9RSo3E3ZVnU71TV/LjG9xwHtfk6I\n"
-    "rlNnlJJ0lsTHAuMc1mwCbzhtsmasetwYlIa9G8GFWB9Gh/QqHA7G649iGGmXShqe\n"
-    "aVDuWgeSEJxBPE2jILoMm4pEYF7jfonTn7XXX6O78yuSlP+NPIU0gUKHkWZ1sWk0\n"
-    "cC4l0r/6jik=\n"
-    "-----END CERTIFICATE-----\n";
+static const char kMicrosoftSGCCert[] = R"(
+-----BEGIN CERTIFICATE-----
+MIIBtDCCAR2gAwIBAgIBAzANBgkqhkiG9w0BAQsFADArMRcwFQYDVQQKEw5Cb3Jp
+bmdTU0wgVGVzdDEQMA4GA1UEAxMHUm9vdCBDQTAgFw0wMDAxMDEwMDAwMDBaGA8y
+MDk5MDEwMTAwMDAwMFowFDESMBAGA1UEAxMJRUtVIG1zU0dDMFkwEwYHKoZIzj0C
+AQYIKoZIzj0DAQcDQgAEEn61v3Vs+q6bTyyRnrJvuKBE8PTNVLbXGB52jig4Qse2
+mGygNEysS0uzZ0luz+rn2hDRUFL6sHLUs1d8UMbI/6NEMEIwFQYDVR0lBA4wDAYK
+KwYBBAGCNwoDAzAMBgNVHRMBAf8EAjAAMBsGA1UdIwQUMBKAEEA31wH7QC+4HH5U
+BCeMWQEwDQYJKoZIhvcNAQELBQADgYEAgDQI9RSo3E3ZVnU71TV/LjG9xwHtfk6I
+rlNnlJJ0lsTHAuMc1mwCbzhtsmasetwYlIa9G8GFWB9Gh/QqHA7G649iGGmXShqe
+aVDuWgeSEJxBPE2jILoMm4pEYF7jfonTn7XXX6O78yuSlP+NPIU0gUKHkWZ1sWk0
+cC4l0r/6jik=
+-----END CERTIFICATE-----
+)";
 
-static const char kNetscapeSGCCert[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIIBszCCARygAwIBAgIBAzANBgkqhkiG9w0BAQsFADArMRcwFQYDVQQKEw5Cb3Jp\n"
-    "bmdTU0wgVGVzdDEQMA4GA1UEAxMHUm9vdCBDQTAgFw0wMDAxMDEwMDAwMDBaGA8y\n"
-    "MDk5MDEwMTAwMDAwMFowFDESMBAGA1UEAxMJRUtVIG1zU0dDMFkwEwYHKoZIzj0C\n"
-    "AQYIKoZIzj0DAQcDQgAE3NbT+TnBfq1DWJCezjaUL52YhDU7cOkI2S2PoWgJ1v7x\n"
-    "kKLwBonUFZjppZs69SyBHeJdti+KoJ3qTW+hCG08EaNDMEEwFAYDVR0lBA0wCwYJ\n"
-    "YIZIAYb4QgQBMAwGA1UdEwEB/wQCMAAwGwYDVR0jBBQwEoAQQDfXAftAL7gcflQE\n"
-    "J4xZATANBgkqhkiG9w0BAQsFAAOBgQBuiyVcfazekHkCWksxdFmjPmMtWCxFjkzc\n"
-    "8VBxFE0CfSHQAfZ8J7tXd1FbAq/eXdZvvo8v0JB4sOM4Ex1ob1fuvDFHdSAHAD7W\n"
-    "dhKIjJyzVojoxjCjyue0XMeEPl7RiqbdxoS/R5HFAqAF0T2OeQAqP9gTpOXoau1M\n"
-    "RQHX6HQJJg==\n"
-    "-----END CERTIFICATE-----\n";
+static const char kNetscapeSGCCert[] = R"(
+-----BEGIN CERTIFICATE-----
+MIIBszCCARygAwIBAgIBAzANBgkqhkiG9w0BAQsFADArMRcwFQYDVQQKEw5Cb3Jp
+bmdTU0wgVGVzdDEQMA4GA1UEAxMHUm9vdCBDQTAgFw0wMDAxMDEwMDAwMDBaGA8y
+MDk5MDEwMTAwMDAwMFowFDESMBAGA1UEAxMJRUtVIG1zU0dDMFkwEwYHKoZIzj0C
+AQYIKoZIzj0DAQcDQgAE3NbT+TnBfq1DWJCezjaUL52YhDU7cOkI2S2PoWgJ1v7x
+kKLwBonUFZjppZs69SyBHeJdti+KoJ3qTW+hCG08EaNDMEEwFAYDVR0lBA0wCwYJ
+YIZIAYb4QgQBMAwGA1UdEwEB/wQCMAAwGwYDVR0jBBQwEoAQQDfXAftAL7gcflQE
+J4xZATANBgkqhkiG9w0BAQsFAAOBgQBuiyVcfazekHkCWksxdFmjPmMtWCxFjkzc
+8VBxFE0CfSHQAfZ8J7tXd1FbAq/eXdZvvo8v0JB4sOM4Ex1ob1fuvDFHdSAHAD7W
+dhKIjJyzVojoxjCjyue0XMeEPl7RiqbdxoS/R5HFAqAF0T2OeQAqP9gTpOXoau1M
+RQHX6HQJJg==
+-----END CERTIFICATE-----
+)";
 
-static const char kServerEKUCert[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIIBsjCCARugAwIBAgIBAzANBgkqhkiG9w0BAQsFADArMRcwFQYDVQQKEw5Cb3Jp\n"
-    "bmdTU0wgVGVzdDEQMA4GA1UEAxMHUm9vdCBDQTAgFw0wMDAxMDEwMDAwMDBaGA8y\n"
-    "MDk5MDEwMTAwMDAwMFowFDESMBAGA1UEAxMJRUtVIG1zU0dDMFkwEwYHKoZIzj0C\n"
-    "AQYIKoZIzj0DAQcDQgAEDd35i+VWPwIOKLrLWTuP5cqD+yJDB5nujEzPgkXP5LKJ\n"
-    "SZRbHTqTdpYZB2jy6y90RY2Bsjx7FfZ7nN5G2g1GOKNCMEAwEwYDVR0lBAwwCgYI\n"
-    "KwYBBQUHAwEwDAYDVR0TAQH/BAIwADAbBgNVHSMEFDASgBBAN9cB+0AvuBx+VAQn\n"
-    "jFkBMA0GCSqGSIb3DQEBCwUAA4GBAIKmbMBjuivL/rxDu7u7Vr3o3cdmEggBJxwL\n"
-    "iatNW3x1wg0645aNYOktW/iQ7mAAiziTY73GFyfiJDWqnY+CwA94ZWyQidjHdN/I\n"
-    "6BR52sN/dkYEoInYEbmDNMc/if+T0yqeBQLP4BeKLiT8p0qqaimae6LgibS19hDP\n"
-    "2hoEMdz2\n"
-    "-----END CERTIFICATE-----\n";
+static const char kServerEKUCert[] = R"(
+-----BEGIN CERTIFICATE-----
+MIIBsjCCARugAwIBAgIBAzANBgkqhkiG9w0BAQsFADArMRcwFQYDVQQKEw5Cb3Jp
+bmdTU0wgVGVzdDEQMA4GA1UEAxMHUm9vdCBDQTAgFw0wMDAxMDEwMDAwMDBaGA8y
+MDk5MDEwMTAwMDAwMFowFDESMBAGA1UEAxMJRUtVIG1zU0dDMFkwEwYHKoZIzj0C
+AQYIKoZIzj0DAQcDQgAEDd35i+VWPwIOKLrLWTuP5cqD+yJDB5nujEzPgkXP5LKJ
+SZRbHTqTdpYZB2jy6y90RY2Bsjx7FfZ7nN5G2g1GOKNCMEAwEwYDVR0lBAwwCgYI
+KwYBBQUHAwEwDAYDVR0TAQH/BAIwADAbBgNVHSMEFDASgBBAN9cB+0AvuBx+VAQn
+jFkBMA0GCSqGSIb3DQEBCwUAA4GBAIKmbMBjuivL/rxDu7u7Vr3o3cdmEggBJxwL
+iatNW3x1wg0645aNYOktW/iQ7mAAiziTY73GFyfiJDWqnY+CwA94ZWyQidjHdN/I
+6BR52sN/dkYEoInYEbmDNMc/if+T0yqeBQLP4BeKLiT8p0qqaimae6LgibS19hDP
+2hoEMdz2
+-----END CERTIFICATE-----
+)";
 
-static const char kServerEKUPlusMicrosoftSGCCert[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIIBvjCCASegAwIBAgIBAzANBgkqhkiG9w0BAQsFADArMRcwFQYDVQQKEw5Cb3Jp\n"
-    "bmdTU0wgVGVzdDEQMA4GA1UEAxMHUm9vdCBDQTAgFw0wMDAxMDEwMDAwMDBaGA8y\n"
-    "MDk5MDEwMTAwMDAwMFowFDESMBAGA1UEAxMJRUtVIG1zU0dDMFkwEwYHKoZIzj0C\n"
-    "AQYIKoZIzj0DAQcDQgAEDO1MYPxq+U4oXMIK8UnsS4C696wpcu4UOmcMJJ5CUd5Z\n"
-    "ZpJShN6kYKnrb3GK/6xEgbUGntmrzSRG5FYqk6QgD6NOMEwwHwYDVR0lBBgwFgYI\n"
-    "KwYBBQUHAwEGCisGAQQBgjcKAwMwDAYDVR0TAQH/BAIwADAbBgNVHSMEFDASgBBA\n"
-    "N9cB+0AvuBx+VAQnjFkBMA0GCSqGSIb3DQEBCwUAA4GBAHOu2IBa4lHzVGS36HxS\n"
-    "SejUE87Ji1ysM6BgkYbfxfS9MuV+J3UnqH57JjbH/3CFl4ZDWceF6SGBSCn8LqKa\n"
-    "KHpwoNFU3zA99iQzVJgbUyN0PbKwHEanLyKDJZyFk71R39ToxhSNQgaQYjZYCy1H\n"
-    "5V9oXd1bodEqVsOZ/mur24Ku\n"
-    "-----END CERTIFICATE-----\n";
+static const char kServerEKUPlusMicrosoftSGCCert[] = R"(
+-----BEGIN CERTIFICATE-----
+MIIBvjCCASegAwIBAgIBAzANBgkqhkiG9w0BAQsFADArMRcwFQYDVQQKEw5Cb3Jp
+bmdTU0wgVGVzdDEQMA4GA1UEAxMHUm9vdCBDQTAgFw0wMDAxMDEwMDAwMDBaGA8y
+MDk5MDEwMTAwMDAwMFowFDESMBAGA1UEAxMJRUtVIG1zU0dDMFkwEwYHKoZIzj0C
+AQYIKoZIzj0DAQcDQgAEDO1MYPxq+U4oXMIK8UnsS4C696wpcu4UOmcMJJ5CUd5Z
+ZpJShN6kYKnrb3GK/6xEgbUGntmrzSRG5FYqk6QgD6NOMEwwHwYDVR0lBBgwFgYI
+KwYBBQUHAwEGCisGAQQBgjcKAwMwDAYDVR0TAQH/BAIwADAbBgNVHSMEFDASgBBA
+N9cB+0AvuBx+VAQnjFkBMA0GCSqGSIb3DQEBCwUAA4GBAHOu2IBa4lHzVGS36HxS
+SejUE87Ji1ysM6BgkYbfxfS9MuV+J3UnqH57JjbH/3CFl4ZDWceF6SGBSCn8LqKa
+KHpwoNFU3zA99iQzVJgbUyN0PbKwHEanLyKDJZyFk71R39ToxhSNQgaQYjZYCy1H
+5V9oXd1bodEqVsOZ/mur24Ku
+-----END CERTIFICATE-----
+)";
 
-static const char kAnyEKU[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIIBrjCCARegAwIBAgIBAzANBgkqhkiG9w0BAQsFADArMRcwFQYDVQQKEw5Cb3Jp\n"
-    "bmdTU0wgVGVzdDEQMA4GA1UEAxMHUm9vdCBDQTAgFw0wMDAxMDEwMDAwMDBaGA8y\n"
-    "MDk5MDEwMTAwMDAwMFowFDESMBAGA1UEAxMJRUtVIG1zU0dDMFkwEwYHKoZIzj0C\n"
-    "AQYIKoZIzj0DAQcDQgAE9nsLABDporlTvx1OBUc4Hd5vxfX+8nS/OhbHmKtFLYNu\n"
-    "1CLLrImbwMQYD2G+PgLO6sQHmASq2jmJKp6ZWsRkTqM+MDwwDwYDVR0lBAgwBgYE\n"
-    "VR0lADAMBgNVHRMBAf8EAjAAMBsGA1UdIwQUMBKAEEA31wH7QC+4HH5UBCeMWQEw\n"
-    "DQYJKoZIhvcNAQELBQADgYEAxgjgn1SAzQ+2GeCicZ5ndvVhKIeFelGCQ989XTVq\n"
-    "uUbAYBW6v8GXNuVzoXYxDgNSanF6U+w+INrJ6daKVrIxAxdk9QFgBXqJoupuRAA3\n"
-    "/OqnmYux0EqOTLbTK1P8DhaiaD0KV6dWGUwzqsgBmPkZ0lgNaPjvb1mKV3jhBkjz\n"
-    "L6A=\n"
-    "-----END CERTIFICATE-----\n";
+static const char kAnyEKU[] = R"(
+-----BEGIN CERTIFICATE-----
+MIIBrjCCARegAwIBAgIBAzANBgkqhkiG9w0BAQsFADArMRcwFQYDVQQKEw5Cb3Jp
+bmdTU0wgVGVzdDEQMA4GA1UEAxMHUm9vdCBDQTAgFw0wMDAxMDEwMDAwMDBaGA8y
+MDk5MDEwMTAwMDAwMFowFDESMBAGA1UEAxMJRUtVIG1zU0dDMFkwEwYHKoZIzj0C
+AQYIKoZIzj0DAQcDQgAE9nsLABDporlTvx1OBUc4Hd5vxfX+8nS/OhbHmKtFLYNu
+1CLLrImbwMQYD2G+PgLO6sQHmASq2jmJKp6ZWsRkTqM+MDwwDwYDVR0lBAgwBgYE
+VR0lADAMBgNVHRMBAf8EAjAAMBsGA1UdIwQUMBKAEEA31wH7QC+4HH5UBCeMWQEw
+DQYJKoZIhvcNAQELBQADgYEAxgjgn1SAzQ+2GeCicZ5ndvVhKIeFelGCQ989XTVq
+uUbAYBW6v8GXNuVzoXYxDgNSanF6U+w+INrJ6daKVrIxAxdk9QFgBXqJoupuRAA3
+/OqnmYux0EqOTLbTK1P8DhaiaD0KV6dWGUwzqsgBmPkZ0lgNaPjvb1mKV3jhBkjz
+L6A=
+-----END CERTIFICATE-----
+)";
 
-static const char kNoEKU[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIIBnTCCAQagAwIBAgIBAzANBgkqhkiG9w0BAQsFADArMRcwFQYDVQQKEw5Cb3Jp\n"
-    "bmdTU0wgVGVzdDEQMA4GA1UEAxMHUm9vdCBDQTAgFw0wMDAxMDEwMDAwMDBaGA8y\n"
-    "MDk5MDEwMTAwMDAwMFowFDESMBAGA1UEAxMJRUtVIG1zU0dDMFkwEwYHKoZIzj0C\n"
-    "AQYIKoZIzj0DAQcDQgAEpSFSqbYY86ZcMamE606dqdyjWlwhSHKOLUFsUUIzkMPz\n"
-    "KHRu/x3Yzi8+Hm8eFK/TnCbkpYsYw4hIw00176dYzaMtMCswDAYDVR0TAQH/BAIw\n"
-    "ADAbBgNVHSMEFDASgBBAN9cB+0AvuBx+VAQnjFkBMA0GCSqGSIb3DQEBCwUAA4GB\n"
-    "AHvYzynIkjLThExHRS+385hfv4vgrQSMmCM1SAnEIjSBGsU7RPgiGAstN06XivuF\n"
-    "T1fNugRmTu4OtOIbfdYkcjavJufw9hR9zWTt77CNMTy9XmOZLgdS5boFTtLCztr3\n"
-    "TXHOSQQD8Dl4BK0wOet+TP6LBEjHlRFjAqK4bu9xpxV2\n"
-    "-----END CERTIFICATE-----\n";
+static const char kNoEKU[] = R"(
+-----BEGIN CERTIFICATE-----
+MIIBnTCCAQagAwIBAgIBAzANBgkqhkiG9w0BAQsFADArMRcwFQYDVQQKEw5Cb3Jp
+bmdTU0wgVGVzdDEQMA4GA1UEAxMHUm9vdCBDQTAgFw0wMDAxMDEwMDAwMDBaGA8y
+MDk5MDEwMTAwMDAwMFowFDESMBAGA1UEAxMJRUtVIG1zU0dDMFkwEwYHKoZIzj0C
+AQYIKoZIzj0DAQcDQgAEpSFSqbYY86ZcMamE606dqdyjWlwhSHKOLUFsUUIzkMPz
+KHRu/x3Yzi8+Hm8eFK/TnCbkpYsYw4hIw00176dYzaMtMCswDAYDVR0TAQH/BAIw
+ADAbBgNVHSMEFDASgBBAN9cB+0AvuBx+VAQnjFkBMA0GCSqGSIb3DQEBCwUAA4GB
+AHvYzynIkjLThExHRS+385hfv4vgrQSMmCM1SAnEIjSBGsU7RPgiGAstN06XivuF
+T1fNugRmTu4OtOIbfdYkcjavJufw9hR9zWTt77CNMTy9XmOZLgdS5boFTtLCztr3
+TXHOSQQD8Dl4BK0wOet+TP6LBEjHlRFjAqK4bu9xpxV2
+-----END CERTIFICATE-----
+)";
 
 // CertFromPEM parses the given, NUL-terminated pem block and returns an
 // |X509*|.
@@ -1429,6 +1488,40 @@
   ERR_clear_error();
 }
 
+TEST(X509Test, TestX25519) {
+  bssl::UniquePtr<X509> cert(CertFromPEM(kX25519Cert));
+  ASSERT_TRUE(cert);
+
+  bssl::UniquePtr<EVP_PKEY> pkey(X509_get_pubkey(cert.get()));
+  ASSERT_TRUE(pkey);
+
+  EXPECT_EQ(EVP_PKEY_id(pkey.get()), EVP_PKEY_X25519);
+
+  constexpr uint8_t kExpectedPublicValue[] = {
+      0x85, 0x20, 0xf0, 0x09, 0x89, 0x30, 0xa7, 0x54, 0x74, 0x8b, 0x7d,
+      0xdc, 0xb4, 0x3e, 0xf7, 0x5a, 0x0d, 0xbf, 0x3a, 0x0d, 0x26, 0x38,
+      0x1a, 0xf4, 0xeb, 0xa4, 0xa9, 0x8e, 0xaa, 0x9b, 0x4e, 0x6a,
+  };
+  uint8_t public_value[sizeof(kExpectedPublicValue)];
+  size_t public_value_size = sizeof(public_value);
+  ASSERT_TRUE(EVP_PKEY_get_raw_public_key(pkey.get(), public_value,
+                                          &public_value_size));
+  EXPECT_EQ(Bytes(kExpectedPublicValue),
+            Bytes(public_value, public_value_size));
+}
+
+static bssl::UniquePtr<X509> ReencodeCertificate(X509 *cert) {
+  uint8_t *der = nullptr;
+  int len = i2d_X509(cert, &der);
+  bssl::UniquePtr<uint8_t> free_der(der);
+  if (len <= 0) {
+    return nullptr;
+  }
+
+  const uint8_t *inp = der;
+  return bssl::UniquePtr<X509>(d2i_X509(nullptr, &inp, len));
+}
+
 static bool SignatureRoundTrips(EVP_MD_CTX *md_ctx, EVP_PKEY *pkey) {
   // Make a certificate like signed with |md_ctx|'s settings.'
   bssl::UniquePtr<X509> cert(CertFromPEM(kLeafPEM));
@@ -1438,7 +1531,14 @@
 
   // Ensure that |pkey| may still be used to verify the resulting signature. All
   // settings in |md_ctx| must have been serialized appropriately.
-  return !!X509_verify(cert.get(), pkey);
+  if (!X509_verify(cert.get(), pkey)) {
+    return false;
+  }
+
+  // Re-encode the certificate. X509 objects contain a cached TBSCertificate
+  // encoding and |X509_sign_ctx| should have refreshed that cache.
+  bssl::UniquePtr<X509> copy = ReencodeCertificate(cert.get());
+  return copy && X509_verify(copy.get(), pkey);
 }
 
 TEST(X509Test, RSASign) {
@@ -1458,6 +1558,99 @@
   ASSERT_TRUE(EVP_PKEY_CTX_set_rsa_padding(pkey_ctx, RSA_PKCS1_PSS_PADDING));
   ASSERT_TRUE(EVP_PKEY_CTX_set_rsa_mgf1_md(pkey_ctx, EVP_sha512()));
   ASSERT_TRUE(SignatureRoundTrips(md_ctx.get(), pkey.get()));
+
+  // RSA-PSS with salt length matching hash length should work when passing in
+  // -1 or the value explicitly.
+  md_ctx.Reset();
+  ASSERT_TRUE(EVP_DigestSignInit(md_ctx.get(), &pkey_ctx, EVP_sha256(), NULL,
+                                 pkey.get()));
+  ASSERT_TRUE(EVP_PKEY_CTX_set_rsa_padding(pkey_ctx, RSA_PKCS1_PSS_PADDING));
+  ASSERT_TRUE(EVP_PKEY_CTX_set_rsa_pss_saltlen(pkey_ctx, -1));
+  ASSERT_TRUE(SignatureRoundTrips(md_ctx.get(), pkey.get()));
+
+  md_ctx.Reset();
+  ASSERT_TRUE(EVP_DigestSignInit(md_ctx.get(), &pkey_ctx, EVP_sha256(), NULL,
+                                 pkey.get()));
+  ASSERT_TRUE(EVP_PKEY_CTX_set_rsa_padding(pkey_ctx, RSA_PKCS1_PSS_PADDING));
+  ASSERT_TRUE(EVP_PKEY_CTX_set_rsa_pss_saltlen(pkey_ctx, 32));
+  ASSERT_TRUE(SignatureRoundTrips(md_ctx.get(), pkey.get()));
+}
+
+// Test the APIs for manually signing a certificate.
+TEST(X509Test, RSASignManual) {
+  const int kSignatureNID = NID_sha384WithRSAEncryption;
+  const EVP_MD *kSignatureHash = EVP_sha384();
+
+  bssl::UniquePtr<EVP_PKEY> pkey(PrivateKeyFromPEM(kRSAKey));
+  ASSERT_TRUE(pkey);
+  bssl::UniquePtr<X509_ALGOR> algor(X509_ALGOR_new());
+  ASSERT_TRUE(algor);
+  ASSERT_TRUE(X509_ALGOR_set0(algor.get(), OBJ_nid2obj(kSignatureNID),
+                              V_ASN1_NULL, nullptr));
+
+  // Test certificates made both from other certificates and |X509_new|, in case
+  // there are bugs in filling in fields from different states. (Parsed
+  // certificate contain a TBSCertificate cache, and |X509_new| initializes
+  // fields based on complex ASN.1 template logic.)
+  for (bool new_cert : {true, false}) {
+    SCOPED_TRACE(new_cert);
+
+    bssl::UniquePtr<X509> cert;
+    if (new_cert) {
+      cert.reset(X509_new());
+      // Fill in some fields for the certificate arbitrarily.
+      EXPECT_TRUE(X509_set_version(cert.get(), 2 /* X.509v3 */));
+      EXPECT_TRUE(ASN1_INTEGER_set(X509_get_serialNumber(cert.get()), 1));
+      EXPECT_TRUE(X509_gmtime_adj(X509_getm_notBefore(cert.get()), 0));
+      EXPECT_TRUE(
+          X509_gmtime_adj(X509_getm_notAfter(cert.get()), 60 * 60 * 24));
+      X509_NAME *subject = X509_get_subject_name(cert.get());
+      X509_NAME_add_entry_by_txt(subject, "CN", MBSTRING_ASC,
+                                 reinterpret_cast<const uint8_t *>("Test"), -1,
+                                 -1, 0);
+      EXPECT_TRUE(X509_set_issuer_name(cert.get(), subject));
+      EXPECT_TRUE(X509_set_pubkey(cert.get(), pkey.get()));
+    } else {
+      // Extract fields from a parsed certificate.
+      cert = CertFromPEM(kLeafPEM);
+      ASSERT_TRUE(cert);
+
+      // We should test with a different algorithm from what is already in the
+      // certificate.
+      EXPECT_NE(kSignatureNID, X509_get_signature_nid(cert.get()));
+    }
+
+    // Fill in the signature algorithm.
+    ASSERT_TRUE(X509_set1_signature_algo(cert.get(), algor.get()));
+
+    // Extract the TBSCertificiate.
+    uint8_t *tbs_cert = nullptr;
+    int tbs_cert_len = i2d_re_X509_tbs(cert.get(), &tbs_cert);
+    bssl::UniquePtr<uint8_t> free_tbs_cert(tbs_cert);
+    ASSERT_GT(tbs_cert_len, 0);
+
+    // Generate a signature externally and fill it in.
+    bssl::ScopedEVP_MD_CTX md_ctx;
+    ASSERT_TRUE(EVP_DigestSignInit(md_ctx.get(), nullptr, kSignatureHash,
+                                   nullptr, pkey.get()));
+    size_t sig_len;
+    ASSERT_TRUE(EVP_DigestSign(md_ctx.get(), nullptr, &sig_len, tbs_cert,
+                               tbs_cert_len));
+    std::vector<uint8_t> sig(sig_len);
+    ASSERT_TRUE(EVP_DigestSign(md_ctx.get(), sig.data(), &sig_len, tbs_cert,
+                               tbs_cert_len));
+    sig.resize(sig_len);
+    ASSERT_TRUE(X509_set1_signature_value(cert.get(), sig.data(), sig.size()));
+
+    // Check the signature.
+    EXPECT_TRUE(X509_verify(cert.get(), pkey.get()));
+
+    // Re-encode the certificate. X509 objects contain a cached TBSCertificate
+    // encoding and |i2d_re_X509_tbs| should have refreshed that cache.
+    bssl::UniquePtr<X509> copy = ReencodeCertificate(cert.get());
+    ASSERT_TRUE(copy);
+    EXPECT_TRUE(X509_verify(copy.get(), pkey.get()));
+  }
 }
 
 TEST(X509Test, Ed25519Sign) {
@@ -2322,125 +2515,136 @@
 
 // kExplicitDefaultVersionPEM is an X.509v1 certificate with the version number
 // encoded explicitly, rather than omitted as required by DER.
-static const char kExplicitDefaultVersionPEM[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIIBfTCCASSgAwIBAAIJANlMBNpJfb/rMAkGByqGSM49BAEwRTELMAkGA1UEBhMC\n"
-    "QVUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoMGEludGVybmV0IFdpZGdp\n"
-    "dHMgUHR5IEx0ZDAeFw0xNDA0MjMyMzIxNTdaFw0xNDA1MjMyMzIxNTdaMEUxCzAJ\n"
-    "BgNVBAYTAkFVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRlcm5l\n"
-    "dCBXaWRnaXRzIFB0eSBMdGQwWTATBgcqhkjOPQIBBggqhkjOPQMBBwNCAATmK2ni\n"
-    "v2Wfl74vHg2UikzVl2u3qR4NRvvdqakendy6WgHn1peoChj5w8SjHlbifINI2xYa\n"
-    "HPUdfvGULUvPciLBMAkGByqGSM49BAEDSAAwRQIhAPKgNV5ROjbDgnmb7idQhY5w\n"
-    "BnSVV9IpdAD0vhWHXcQHAiB8HnkUaiGD8Hp0aHlfFJmaaLTxy54VXuYfMlJhXnXJ\n"
-    "FA==\n"
-    "-----END CERTIFICATE-----\n";
+static const char kExplicitDefaultVersionPEM[] = R"(
+-----BEGIN CERTIFICATE-----
+MIIBfTCCASSgAwIBAAIJANlMBNpJfb/rMAkGByqGSM49BAEwRTELMAkGA1UEBhMC
+QVUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoMGEludGVybmV0IFdpZGdp
+dHMgUHR5IEx0ZDAeFw0xNDA0MjMyMzIxNTdaFw0xNDA1MjMyMzIxNTdaMEUxCzAJ
+BgNVBAYTAkFVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRlcm5l
+dCBXaWRnaXRzIFB0eSBMdGQwWTATBgcqhkjOPQIBBggqhkjOPQMBBwNCAATmK2ni
+v2Wfl74vHg2UikzVl2u3qR4NRvvdqakendy6WgHn1peoChj5w8SjHlbifINI2xYa
+HPUdfvGULUvPciLBMAkGByqGSM49BAEDSAAwRQIhAPKgNV5ROjbDgnmb7idQhY5w
+BnSVV9IpdAD0vhWHXcQHAiB8HnkUaiGD8Hp0aHlfFJmaaLTxy54VXuYfMlJhXnXJ
+FA==
+-----END CERTIFICATE-----
+)";
 
 // kNegativeVersionPEM is an X.509 certificate with a negative version number.
-static const char kNegativeVersionPEM[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIIBfTCCASSgAwIB/wIJANlMBNpJfb/rMAkGByqGSM49BAEwRTELMAkGA1UEBhMC\n"
-    "QVUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoMGEludGVybmV0IFdpZGdp\n"
-    "dHMgUHR5IEx0ZDAeFw0xNDA0MjMyMzIxNTdaFw0xNDA1MjMyMzIxNTdaMEUxCzAJ\n"
-    "BgNVBAYTAkFVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRlcm5l\n"
-    "dCBXaWRnaXRzIFB0eSBMdGQwWTATBgcqhkjOPQIBBggqhkjOPQMBBwNCAATmK2ni\n"
-    "v2Wfl74vHg2UikzVl2u3qR4NRvvdqakendy6WgHn1peoChj5w8SjHlbifINI2xYa\n"
-    "HPUdfvGULUvPciLBMAkGByqGSM49BAEDSAAwRQIhAPKgNV5ROjbDgnmb7idQhY5w\n"
-    "BnSVV9IpdAD0vhWHXcQHAiB8HnkUaiGD8Hp0aHlfFJmaaLTxy54VXuYfMlJhXnXJ\n"
-    "FA==\n"
-    "-----END CERTIFICATE-----\n";
+static const char kNegativeVersionPEM[] = R"(
+-----BEGIN CERTIFICATE-----
+MIIBfTCCASSgAwIB/wIJANlMBNpJfb/rMAkGByqGSM49BAEwRTELMAkGA1UEBhMC
+QVUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoMGEludGVybmV0IFdpZGdp
+dHMgUHR5IEx0ZDAeFw0xNDA0MjMyMzIxNTdaFw0xNDA1MjMyMzIxNTdaMEUxCzAJ
+BgNVBAYTAkFVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRlcm5l
+dCBXaWRnaXRzIFB0eSBMdGQwWTATBgcqhkjOPQIBBggqhkjOPQMBBwNCAATmK2ni
+v2Wfl74vHg2UikzVl2u3qR4NRvvdqakendy6WgHn1peoChj5w8SjHlbifINI2xYa
+HPUdfvGULUvPciLBMAkGByqGSM49BAEDSAAwRQIhAPKgNV5ROjbDgnmb7idQhY5w
+BnSVV9IpdAD0vhWHXcQHAiB8HnkUaiGD8Hp0aHlfFJmaaLTxy54VXuYfMlJhXnXJ
+FA==
+-----END CERTIFICATE-----
+)";
 
 // kFutureVersionPEM is an X.509 certificate with a version number value of
 // three, which is not defined. (v3 has value two).
-static const char kFutureVersionPEM[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIIBfTCCASSgAwIBAwIJANlMBNpJfb/rMAkGByqGSM49BAEwRTELMAkGA1UEBhMC\n"
-    "QVUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoMGEludGVybmV0IFdpZGdp\n"
-    "dHMgUHR5IEx0ZDAeFw0xNDA0MjMyMzIxNTdaFw0xNDA1MjMyMzIxNTdaMEUxCzAJ\n"
-    "BgNVBAYTAkFVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRlcm5l\n"
-    "dCBXaWRnaXRzIFB0eSBMdGQwWTATBgcqhkjOPQIBBggqhkjOPQMBBwNCAATmK2ni\n"
-    "v2Wfl74vHg2UikzVl2u3qR4NRvvdqakendy6WgHn1peoChj5w8SjHlbifINI2xYa\n"
-    "HPUdfvGULUvPciLBMAkGByqGSM49BAEDSAAwRQIhAPKgNV5ROjbDgnmb7idQhY5w\n"
-    "BnSVV9IpdAD0vhWHXcQHAiB8HnkUaiGD8Hp0aHlfFJmaaLTxy54VXuYfMlJhXnXJ\n"
-    "FA==\n"
-    "-----END CERTIFICATE-----\n";
+static const char kFutureVersionPEM[] = R"(
+-----BEGIN CERTIFICATE-----
+MIIBfTCCASSgAwIBAwIJANlMBNpJfb/rMAkGByqGSM49BAEwRTELMAkGA1UEBhMC
+QVUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoMGEludGVybmV0IFdpZGdp
+dHMgUHR5IEx0ZDAeFw0xNDA0MjMyMzIxNTdaFw0xNDA1MjMyMzIxNTdaMEUxCzAJ
+BgNVBAYTAkFVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRlcm5l
+dCBXaWRnaXRzIFB0eSBMdGQwWTATBgcqhkjOPQIBBggqhkjOPQMBBwNCAATmK2ni
+v2Wfl74vHg2UikzVl2u3qR4NRvvdqakendy6WgHn1peoChj5w8SjHlbifINI2xYa
+HPUdfvGULUvPciLBMAkGByqGSM49BAEDSAAwRQIhAPKgNV5ROjbDgnmb7idQhY5w
+BnSVV9IpdAD0vhWHXcQHAiB8HnkUaiGD8Hp0aHlfFJmaaLTxy54VXuYfMlJhXnXJ
+FA==
+-----END CERTIFICATE-----
+)";
 
 // kOverflowVersionPEM is an X.509 certificate with a version field which
 // overflows |uint64_t|.
-static const char kOverflowVersionPEM[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIIBoDCCAUegJgIkAP//////////////////////////////////////////////\n"
-    "AgkA2UwE2kl9v+swCQYHKoZIzj0EATBFMQswCQYDVQQGEwJBVTETMBEGA1UECAwK\n"
-    "U29tZS1TdGF0ZTEhMB8GA1UECgwYSW50ZXJuZXQgV2lkZ2l0cyBQdHkgTHRkMB4X\n"
-    "DTE0MDQyMzIzMjE1N1oXDTE0MDUyMzIzMjE1N1owRTELMAkGA1UEBhMCQVUxEzAR\n"
-    "BgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5\n"
-    "IEx0ZDBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IABOYraeK/ZZ+Xvi8eDZSKTNWX\n"
-    "a7epHg1G+92pqR6d3LpaAefWl6gKGPnDxKMeVuJ8g0jbFhoc9R1+8ZQtS89yIsEw\n"
-    "CQYHKoZIzj0EAQNIADBFAiEA8qA1XlE6NsOCeZvuJ1CFjnAGdJVX0il0APS+FYdd\n"
-    "xAcCIHweeRRqIYPwenRoeV8UmZpotPHLnhVe5h8yUmFedckU\n"
-    "-----END CERTIFICATE-----\n";
+static const char kOverflowVersionPEM[] = R"(
+-----BEGIN CERTIFICATE-----
+MIIBoDCCAUegJgIkAP//////////////////////////////////////////////
+AgkA2UwE2kl9v+swCQYHKoZIzj0EATBFMQswCQYDVQQGEwJBVTETMBEGA1UECAwK
+U29tZS1TdGF0ZTEhMB8GA1UECgwYSW50ZXJuZXQgV2lkZ2l0cyBQdHkgTHRkMB4X
+DTE0MDQyMzIzMjE1N1oXDTE0MDUyMzIzMjE1N1owRTELMAkGA1UEBhMCQVUxEzAR
+BgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoMGEludGVybmV0IFdpZGdpdHMgUHR5
+IEx0ZDBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IABOYraeK/ZZ+Xvi8eDZSKTNWX
+a7epHg1G+92pqR6d3LpaAefWl6gKGPnDxKMeVuJ8g0jbFhoc9R1+8ZQtS89yIsEw
+CQYHKoZIzj0EAQNIADBFAiEA8qA1XlE6NsOCeZvuJ1CFjnAGdJVX0il0APS+FYdd
+xAcCIHweeRRqIYPwenRoeV8UmZpotPHLnhVe5h8yUmFedckU
+-----END CERTIFICATE-----
+)";
 
 // kV1WithExtensionsPEM is an X.509v1 certificate with extensions.
-static const char kV1WithExtensionsPEM[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIIByjCCAXECCQDZTATaSX2/6zAJBgcqhkjOPQQBMEUxCzAJBgNVBAYTAkFVMRMw\n"
-    "EQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRlcm5ldCBXaWRnaXRzIFB0\n"
-    "eSBMdGQwHhcNMTQwNDIzMjMyMTU3WhcNMTQwNTIzMjMyMTU3WjBFMQswCQYDVQQG\n"
-    "EwJBVTETMBEGA1UECAwKU29tZS1TdGF0ZTEhMB8GA1UECgwYSW50ZXJuZXQgV2lk\n"
-    "Z2l0cyBQdHkgTHRkMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAE5itp4r9ln5e+\n"
-    "Lx4NlIpM1Zdrt6keDUb73ampHp3culoB59aXqAoY+cPEox5W4nyDSNsWGhz1HX7x\n"
-    "lC1Lz3IiwaNQME4wHQYDVR0OBBYEFKuE0qyrlfCCThZ4B1VXX+QmjYLRMB8GA1Ud\n"
-    "IwQYMBaAFKuE0qyrlfCCThZ4B1VXX+QmjYLRMAwGA1UdEwQFMAMBAf8wCQYHKoZI\n"
-    "zj0EAQNIADBFAiEA8qA1XlE6NsOCeZvuJ1CFjnAGdJVX0il0APS+FYddxAcCIHwe\n"
-    "eRRqIYPwenRoeV8UmZpotPHLnhVe5h8yUmFedckU\n"
-    "-----END CERTIFICATE-----\n";
+static const char kV1WithExtensionsPEM[] = R"(
+-----BEGIN CERTIFICATE-----
+MIIByjCCAXECCQDZTATaSX2/6zAJBgcqhkjOPQQBMEUxCzAJBgNVBAYTAkFVMRMw
+EQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRlcm5ldCBXaWRnaXRzIFB0
+eSBMdGQwHhcNMTQwNDIzMjMyMTU3WhcNMTQwNTIzMjMyMTU3WjBFMQswCQYDVQQG
+EwJBVTETMBEGA1UECAwKU29tZS1TdGF0ZTEhMB8GA1UECgwYSW50ZXJuZXQgV2lk
+Z2l0cyBQdHkgTHRkMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAE5itp4r9ln5e+
+Lx4NlIpM1Zdrt6keDUb73ampHp3culoB59aXqAoY+cPEox5W4nyDSNsWGhz1HX7x
+lC1Lz3IiwaNQME4wHQYDVR0OBBYEFKuE0qyrlfCCThZ4B1VXX+QmjYLRMB8GA1Ud
+IwQYMBaAFKuE0qyrlfCCThZ4B1VXX+QmjYLRMAwGA1UdEwQFMAMBAf8wCQYHKoZI
+zj0EAQNIADBFAiEA8qA1XlE6NsOCeZvuJ1CFjnAGdJVX0il0APS+FYddxAcCIHwe
+eRRqIYPwenRoeV8UmZpotPHLnhVe5h8yUmFedckU
+-----END CERTIFICATE-----
+)";
 
 // kV2WithExtensionsPEM is an X.509v2 certificate with extensions.
-static const char kV2WithExtensionsPEM[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIIBzzCCAXagAwIBAQIJANlMBNpJfb/rMAkGByqGSM49BAEwRTELMAkGA1UEBhMC\n"
-    "QVUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoMGEludGVybmV0IFdpZGdp\n"
-    "dHMgUHR5IEx0ZDAeFw0xNDA0MjMyMzIxNTdaFw0xNDA1MjMyMzIxNTdaMEUxCzAJ\n"
-    "BgNVBAYTAkFVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRlcm5l\n"
-    "dCBXaWRnaXRzIFB0eSBMdGQwWTATBgcqhkjOPQIBBggqhkjOPQMBBwNCAATmK2ni\n"
-    "v2Wfl74vHg2UikzVl2u3qR4NRvvdqakendy6WgHn1peoChj5w8SjHlbifINI2xYa\n"
-    "HPUdfvGULUvPciLBo1AwTjAdBgNVHQ4EFgQUq4TSrKuV8IJOFngHVVdf5CaNgtEw\n"
-    "HwYDVR0jBBgwFoAUq4TSrKuV8IJOFngHVVdf5CaNgtEwDAYDVR0TBAUwAwEB/zAJ\n"
-    "BgcqhkjOPQQBA0gAMEUCIQDyoDVeUTo2w4J5m+4nUIWOcAZ0lVfSKXQA9L4Vh13E\n"
-    "BwIgfB55FGohg/B6dGh5XxSZmmi08cueFV7mHzJSYV51yRQ=\n"
-    "-----END CERTIFICATE-----\n";
+static const char kV2WithExtensionsPEM[] = R"(
+-----BEGIN CERTIFICATE-----
+MIIBzzCCAXagAwIBAQIJANlMBNpJfb/rMAkGByqGSM49BAEwRTELMAkGA1UEBhMC
+QVUxEzARBgNVBAgMClNvbWUtU3RhdGUxITAfBgNVBAoMGEludGVybmV0IFdpZGdp
+dHMgUHR5IEx0ZDAeFw0xNDA0MjMyMzIxNTdaFw0xNDA1MjMyMzIxNTdaMEUxCzAJ
+BgNVBAYTAkFVMRMwEQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRlcm5l
+dCBXaWRnaXRzIFB0eSBMdGQwWTATBgcqhkjOPQIBBggqhkjOPQMBBwNCAATmK2ni
+v2Wfl74vHg2UikzVl2u3qR4NRvvdqakendy6WgHn1peoChj5w8SjHlbifINI2xYa
+HPUdfvGULUvPciLBo1AwTjAdBgNVHQ4EFgQUq4TSrKuV8IJOFngHVVdf5CaNgtEw
+HwYDVR0jBBgwFoAUq4TSrKuV8IJOFngHVVdf5CaNgtEwDAYDVR0TBAUwAwEB/zAJ
+BgcqhkjOPQQBA0gAMEUCIQDyoDVeUTo2w4J5m+4nUIWOcAZ0lVfSKXQA9L4Vh13E
+BwIgfB55FGohg/B6dGh5XxSZmmi08cueFV7mHzJSYV51yRQ=
+-----END CERTIFICATE-----
+)";
 
 // kV1WithIssuerUniqueIDPEM is an X.509v1 certificate with an issuerUniqueID.
-static const char kV1WithIssuerUniqueIDPEM[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIIBgzCCASoCCQDZTATaSX2/6zAJBgcqhkjOPQQBMEUxCzAJBgNVBAYTAkFVMRMw\n"
-    "EQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRlcm5ldCBXaWRnaXRzIFB0\n"
-    "eSBMdGQwHhcNMTQwNDIzMjMyMTU3WhcNMTQwNTIzMjMyMTU3WjBFMQswCQYDVQQG\n"
-    "EwJBVTETMBEGA1UECAwKU29tZS1TdGF0ZTEhMB8GA1UECgwYSW50ZXJuZXQgV2lk\n"
-    "Z2l0cyBQdHkgTHRkMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAE5itp4r9ln5e+\n"
-    "Lx4NlIpM1Zdrt6keDUb73ampHp3culoB59aXqAoY+cPEox5W4nyDSNsWGhz1HX7x\n"
-    "lC1Lz3IiwYEJAAEjRWeJq83vMAkGByqGSM49BAEDSAAwRQIhAPKgNV5ROjbDgnmb\n"
-    "7idQhY5wBnSVV9IpdAD0vhWHXcQHAiB8HnkUaiGD8Hp0aHlfFJmaaLTxy54VXuYf\n"
-    "MlJhXnXJFA==\n"
-    "-----END CERTIFICATE-----\n";
+static const char kV1WithIssuerUniqueIDPEM[] = R"(
+-----BEGIN CERTIFICATE-----
+MIIBgzCCASoCCQDZTATaSX2/6zAJBgcqhkjOPQQBMEUxCzAJBgNVBAYTAkFVMRMw
+EQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRlcm5ldCBXaWRnaXRzIFB0
+eSBMdGQwHhcNMTQwNDIzMjMyMTU3WhcNMTQwNTIzMjMyMTU3WjBFMQswCQYDVQQG
+EwJBVTETMBEGA1UECAwKU29tZS1TdGF0ZTEhMB8GA1UECgwYSW50ZXJuZXQgV2lk
+Z2l0cyBQdHkgTHRkMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAE5itp4r9ln5e+
+Lx4NlIpM1Zdrt6keDUb73ampHp3culoB59aXqAoY+cPEox5W4nyDSNsWGhz1HX7x
+lC1Lz3IiwYEJAAEjRWeJq83vMAkGByqGSM49BAEDSAAwRQIhAPKgNV5ROjbDgnmb
+7idQhY5wBnSVV9IpdAD0vhWHXcQHAiB8HnkUaiGD8Hp0aHlfFJmaaLTxy54VXuYf
+MlJhXnXJFA==
+-----END CERTIFICATE-----
+)";
 
 // kV1WithSubjectUniqueIDPEM is an X.509v1 certificate with an issuerUniqueID.
-static const char kV1WithSubjectUniqueIDPEM[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIIBgzCCASoCCQDZTATaSX2/6zAJBgcqhkjOPQQBMEUxCzAJBgNVBAYTAkFVMRMw\n"
-    "EQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRlcm5ldCBXaWRnaXRzIFB0\n"
-    "eSBMdGQwHhcNMTQwNDIzMjMyMTU3WhcNMTQwNTIzMjMyMTU3WjBFMQswCQYDVQQG\n"
-    "EwJBVTETMBEGA1UECAwKU29tZS1TdGF0ZTEhMB8GA1UECgwYSW50ZXJuZXQgV2lk\n"
-    "Z2l0cyBQdHkgTHRkMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAE5itp4r9ln5e+\n"
-    "Lx4NlIpM1Zdrt6keDUb73ampHp3culoB59aXqAoY+cPEox5W4nyDSNsWGhz1HX7x\n"
-    "lC1Lz3IiwYIJAAEjRWeJq83vMAkGByqGSM49BAEDSAAwRQIhAPKgNV5ROjbDgnmb\n"
-    "7idQhY5wBnSVV9IpdAD0vhWHXcQHAiB8HnkUaiGD8Hp0aHlfFJmaaLTxy54VXuYf\n"
-    "MlJhXnXJFA==\n"
-    "-----END CERTIFICATE-----\n";
+static const char kV1WithSubjectUniqueIDPEM[] = R"(
+-----BEGIN CERTIFICATE-----
+MIIBgzCCASoCCQDZTATaSX2/6zAJBgcqhkjOPQQBMEUxCzAJBgNVBAYTAkFVMRMw
+EQYDVQQIDApTb21lLVN0YXRlMSEwHwYDVQQKDBhJbnRlcm5ldCBXaWRnaXRzIFB0
+eSBMdGQwHhcNMTQwNDIzMjMyMTU3WhcNMTQwNTIzMjMyMTU3WjBFMQswCQYDVQQG
+EwJBVTETMBEGA1UECAwKU29tZS1TdGF0ZTEhMB8GA1UECgwYSW50ZXJuZXQgV2lk
+Z2l0cyBQdHkgTHRkMFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAE5itp4r9ln5e+
+Lx4NlIpM1Zdrt6keDUb73ampHp3culoB59aXqAoY+cPEox5W4nyDSNsWGhz1HX7x
+lC1Lz3IiwYIJAAEjRWeJq83vMAkGByqGSM49BAEDSAAwRQIhAPKgNV5ROjbDgnmb
+7idQhY5wBnSVV9IpdAD0vhWHXcQHAiB8HnkUaiGD8Hp0aHlfFJmaaLTxy54VXuYf
+MlJhXnXJFA==
+-----END CERTIFICATE-----
+)";
 
 // Test that the X.509 parser enforces versions are valid and match the fields
 // present.
 TEST(X509Test, InvalidVersion) {
-  EXPECT_FALSE(CertFromPEM(kExplicitDefaultVersionPEM));
+  // kExplicitDefaultVersionPEM is invalid but, for now, we accept it. See
+  // https://crbug.com/boringssl/364.
+  EXPECT_TRUE(CertFromPEM(kExplicitDefaultVersionPEM));
+
   EXPECT_FALSE(CertFromPEM(kNegativeVersionPEM));
   EXPECT_FALSE(CertFromPEM(kFutureVersionPEM));
   EXPECT_FALSE(CertFromPEM(kOverflowVersionPEM));
@@ -2450,74 +2654,120 @@
   EXPECT_FALSE(CertFromPEM(kV1WithSubjectUniqueIDPEM));
 }
 
+// Unlike upstream OpenSSL, we require a non-null store in
+// |X509_STORE_CTX_init|.
+TEST(X509Test, NullStore) {
+  bssl::UniquePtr<X509> leaf(CertFromPEM(kLeafPEM));
+  ASSERT_TRUE(leaf);
+  bssl::UniquePtr<X509_STORE_CTX> ctx(X509_STORE_CTX_new());
+  ASSERT_TRUE(ctx);
+  EXPECT_FALSE(X509_STORE_CTX_init(ctx.get(), nullptr, leaf.get(), nullptr));
+}
+
+TEST(X509Test, BasicConstraints) {
+  const uint32_t kFlagMask = EXFLAG_CA | EXFLAG_BCONS | EXFLAG_INVALID;
+
+  static const struct {
+    const char *file;
+    uint32_t flags;
+    int path_len;
+  } kTests[] = {
+      {"basic_constraints_none.pem", 0, -1},
+      {"basic_constraints_ca.pem", EXFLAG_CA | EXFLAG_BCONS, -1},
+      {"basic_constraints_ca_pathlen_0.pem", EXFLAG_CA | EXFLAG_BCONS, 0},
+      {"basic_constraints_ca_pathlen_1.pem", EXFLAG_CA | EXFLAG_BCONS, 1},
+      {"basic_constraints_ca_pathlen_10.pem", EXFLAG_CA | EXFLAG_BCONS, 10},
+      {"basic_constraints_leaf.pem", EXFLAG_BCONS, -1},
+      {"invalid_extension_leaf_basic_constraints.pem", EXFLAG_INVALID, -1},
+  };
+
+  for (const auto &test : kTests) {
+    SCOPED_TRACE(test.file);
+
+    std::string path = "crypto/x509/test/";
+    path += test.file;
+
+    bssl::UniquePtr<X509> cert = CertFromPEM(GetTestData(path.c_str()).c_str());
+    ASSERT_TRUE(cert);
+    EXPECT_EQ(test.flags, X509_get_extension_flags(cert.get()) & kFlagMask);
+    EXPECT_EQ(test.path_len, X509_get_pathlen(cert.get()));
+  }
+}
+
 // The following strings are test certificates signed by kP256Key and kRSAKey,
 // with missing, NULL, or invalid algorithm parameters.
-static const char kP256NoParam[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIIBIDCBxqADAgECAgIE0jAKBggqhkjOPQQDAjAPMQ0wCwYDVQQDEwRUZXN0MCAX\n"
-    "DTAwMDEwMTAwMDAwMFoYDzIxMDAwMTAxMDAwMDAwWjAPMQ0wCwYDVQQDEwRUZXN0\n"
-    "MFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAE5itp4r9ln5e+Lx4NlIpM1Zdrt6ke\n"
-    "DUb73ampHp3culoB59aXqAoY+cPEox5W4nyDSNsWGhz1HX7xlC1Lz3IiwaMQMA4w\n"
-    "DAYDVR0TBAUwAwEB/zAKBggqhkjOPQQDAgNJADBGAiEAqdIiF+bN9Cl44oUeICpy\n"
-    "aXd7HqhpVUaglYKw9ChmNUACIQCpMdL0fNkFNDbRww9dSl/y7kBdk/tp16HiqeSy\n"
-    "gGzFYg==\n"
-    "-----END CERTIFICATE-----\n";
-static const char kP256NullParam[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIIBJDCByKADAgECAgIE0jAMBggqhkjOPQQDAgUAMA8xDTALBgNVBAMTBFRlc3Qw\n"
-    "IBcNMDAwMTAxMDAwMDAwWhgPMjEwMDAxMDEwMDAwMDBaMA8xDTALBgNVBAMTBFRl\n"
-    "c3QwWTATBgcqhkjOPQIBBggqhkjOPQMBBwNCAATmK2niv2Wfl74vHg2UikzVl2u3\n"
-    "qR4NRvvdqakendy6WgHn1peoChj5w8SjHlbifINI2xYaHPUdfvGULUvPciLBoxAw\n"
-    "DjAMBgNVHRMEBTADAQH/MAwGCCqGSM49BAMCBQADSQAwRgIhAKILHmyo+F3Cn/VX\n"
-    "UUeSXOQQKX5aLzsQitwwmNF3ZgH3AiEAsYHcrVj/ftmoQIORARkQ/+PrqntXev8r\n"
-    "t6uPxHrmpUY=\n"
-    "-----END CERTIFICATE-----\n";
-static const char kP256InvalidParam[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIIBMTCBz6ADAgECAgIE0jATBggqhkjOPQQDAgQHZ2FyYmFnZTAPMQ0wCwYDVQQD\n"
-    "EwRUZXN0MCAXDTAwMDEwMTAwMDAwMFoYDzIxMDAwMTAxMDAwMDAwWjAPMQ0wCwYD\n"
-    "VQQDEwRUZXN0MFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAE5itp4r9ln5e+Lx4N\n"
-    "lIpM1Zdrt6keDUb73ampHp3culoB59aXqAoY+cPEox5W4nyDSNsWGhz1HX7xlC1L\n"
-    "z3IiwaMQMA4wDAYDVR0TBAUwAwEB/zATBggqhkjOPQQDAgQHZ2FyYmFnZQNIADBF\n"
-    "AiAglpDf/YhN89LeJ2WAs/F0SJIrsuhS4uoInIz6WXUiuQIhAIu5Pwhp5E3Pbo8y\n"
-    "fLULTZnynuQUULQkRcF7S7T2WpIL\n"
-    "-----END CERTIFICATE-----\n";
-static const char kRSANoParam[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIIBWzCBx6ADAgECAgIE0jALBgkqhkiG9w0BAQswDzENMAsGA1UEAxMEVGVzdDAg\n"
-    "Fw0wMDAxMDEwMDAwMDBaGA8yMTAwMDEwMTAwMDAwMFowDzENMAsGA1UEAxMEVGVz\n"
-    "dDBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IABOYraeK/ZZ+Xvi8eDZSKTNWXa7ep\n"
-    "Hg1G+92pqR6d3LpaAefWl6gKGPnDxKMeVuJ8g0jbFhoc9R1+8ZQtS89yIsGjEDAO\n"
-    "MAwGA1UdEwQFMAMBAf8wCwYJKoZIhvcNAQELA4GBAC1f8W3W0Ao7CPfIBQYDSbPh\n"
-    "brZpbxdBU5x27JOS7iSa+Lc9pEH5VCX9vIypHVHXLPEfZ38yIt11eiyrmZB6w62N\n"
-    "l9kIeZ6FVPmC30d3sXx70Jjs+ZX9yt7kD1gLyNAQQfeYfa4rORAZT1n2YitD74NY\n"
-    "TWUH2ieFP3l+ecj1SeQR\n"
-    "-----END CERTIFICATE-----\n";
-static const char kRSANullParam[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIIBXzCByaADAgECAgIE0jANBgkqhkiG9w0BAQsFADAPMQ0wCwYDVQQDEwRUZXN0\n"
-    "MCAXDTAwMDEwMTAwMDAwMFoYDzIxMDAwMTAxMDAwMDAwWjAPMQ0wCwYDVQQDEwRU\n"
-    "ZXN0MFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAE5itp4r9ln5e+Lx4NlIpM1Zdr\n"
-    "t6keDUb73ampHp3culoB59aXqAoY+cPEox5W4nyDSNsWGhz1HX7xlC1Lz3IiwaMQ\n"
-    "MA4wDAYDVR0TBAUwAwEB/zANBgkqhkiG9w0BAQsFAAOBgQAzVcfIv+Rq1KrMXqIL\n"
-    "fPq/cWZjgqFZA1RGaGElNaqp+rkJfamq5tDGzckWpebrK+jjRN7yIlcWDtPpy3Gy\n"
-    "seZfvtBDR0TwJm0S/pQl8prKB4wgALcwe3bmi56Rq85nzY5ZLNcP16LQxL+jAAua\n"
-    "SwmQUz4bRpckRBj+sIyp1We+pg==\n"
-    "-----END CERTIFICATE-----\n";
-static const char kRSAInvalidParam[] =
-    "-----BEGIN CERTIFICATE-----\n"
-    "MIIBbTCB0KADAgECAgIE0jAUBgkqhkiG9w0BAQsEB2dhcmJhZ2UwDzENMAsGA1UE\n"
-    "AxMEVGVzdDAgFw0wMDAxMDEwMDAwMDBaGA8yMTAwMDEwMTAwMDAwMFowDzENMAsG\n"
-    "A1UEAxMEVGVzdDBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IABOYraeK/ZZ+Xvi8e\n"
-    "DZSKTNWXa7epHg1G+92pqR6d3LpaAefWl6gKGPnDxKMeVuJ8g0jbFhoc9R1+8ZQt\n"
-    "S89yIsGjEDAOMAwGA1UdEwQFMAMBAf8wFAYJKoZIhvcNAQELBAdnYXJiYWdlA4GB\n"
-    "AHTJ6cWWjCNrZhqiWWVI3jdK+h5xpRG8jGMXxR4JnjtoYRRusJLOXhmapwCB6fA0\n"
-    "4vc+66O27v36yDmQX+tIc/hDrTpKNJptU8q3n2VagREvoHhkOTYkcCeS8vmnMtn8\n"
-    "5OMNZ/ajVwOssw61GcAlScRqEHkZFBoGp7e+QpgB2tf9\n"
-    "-----END CERTIFICATE-----\n";
+static const char kP256NoParam[] = R"(
+-----BEGIN CERTIFICATE-----
+MIIBIDCBxqADAgECAgIE0jAKBggqhkjOPQQDAjAPMQ0wCwYDVQQDEwRUZXN0MCAX
+DTAwMDEwMTAwMDAwMFoYDzIxMDAwMTAxMDAwMDAwWjAPMQ0wCwYDVQQDEwRUZXN0
+MFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAE5itp4r9ln5e+Lx4NlIpM1Zdrt6ke
+DUb73ampHp3culoB59aXqAoY+cPEox5W4nyDSNsWGhz1HX7xlC1Lz3IiwaMQMA4w
+DAYDVR0TBAUwAwEB/zAKBggqhkjOPQQDAgNJADBGAiEAqdIiF+bN9Cl44oUeICpy
+aXd7HqhpVUaglYKw9ChmNUACIQCpMdL0fNkFNDbRww9dSl/y7kBdk/tp16HiqeSy
+gGzFYg==
+-----END CERTIFICATE-----
+)";
+static const char kP256NullParam[] = R"(
+-----BEGIN CERTIFICATE-----
+MIIBJDCByKADAgECAgIE0jAMBggqhkjOPQQDAgUAMA8xDTALBgNVBAMTBFRlc3Qw
+IBcNMDAwMTAxMDAwMDAwWhgPMjEwMDAxMDEwMDAwMDBaMA8xDTALBgNVBAMTBFRl
+c3QwWTATBgcqhkjOPQIBBggqhkjOPQMBBwNCAATmK2niv2Wfl74vHg2UikzVl2u3
+qR4NRvvdqakendy6WgHn1peoChj5w8SjHlbifINI2xYaHPUdfvGULUvPciLBoxAw
+DjAMBgNVHRMEBTADAQH/MAwGCCqGSM49BAMCBQADSQAwRgIhAKILHmyo+F3Cn/VX
+UUeSXOQQKX5aLzsQitwwmNF3ZgH3AiEAsYHcrVj/ftmoQIORARkQ/+PrqntXev8r
+t6uPxHrmpUY=
+-----END CERTIFICATE-----
+)";
+static const char kP256InvalidParam[] = R"(
+-----BEGIN CERTIFICATE-----
+MIIBMTCBz6ADAgECAgIE0jATBggqhkjOPQQDAgQHZ2FyYmFnZTAPMQ0wCwYDVQQD
+EwRUZXN0MCAXDTAwMDEwMTAwMDAwMFoYDzIxMDAwMTAxMDAwMDAwWjAPMQ0wCwYD
+VQQDEwRUZXN0MFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAE5itp4r9ln5e+Lx4N
+lIpM1Zdrt6keDUb73ampHp3culoB59aXqAoY+cPEox5W4nyDSNsWGhz1HX7xlC1L
+z3IiwaMQMA4wDAYDVR0TBAUwAwEB/zATBggqhkjOPQQDAgQHZ2FyYmFnZQNIADBF
+AiAglpDf/YhN89LeJ2WAs/F0SJIrsuhS4uoInIz6WXUiuQIhAIu5Pwhp5E3Pbo8y
+fLULTZnynuQUULQkRcF7S7T2WpIL
+-----END CERTIFICATE-----
+)";
+static const char kRSANoParam[] = R"(
+-----BEGIN CERTIFICATE-----
+MIIBWzCBx6ADAgECAgIE0jALBgkqhkiG9w0BAQswDzENMAsGA1UEAxMEVGVzdDAg
+Fw0wMDAxMDEwMDAwMDBaGA8yMTAwMDEwMTAwMDAwMFowDzENMAsGA1UEAxMEVGVz
+dDBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IABOYraeK/ZZ+Xvi8eDZSKTNWXa7ep
+Hg1G+92pqR6d3LpaAefWl6gKGPnDxKMeVuJ8g0jbFhoc9R1+8ZQtS89yIsGjEDAO
+MAwGA1UdEwQFMAMBAf8wCwYJKoZIhvcNAQELA4GBAC1f8W3W0Ao7CPfIBQYDSbPh
+brZpbxdBU5x27JOS7iSa+Lc9pEH5VCX9vIypHVHXLPEfZ38yIt11eiyrmZB6w62N
+l9kIeZ6FVPmC30d3sXx70Jjs+ZX9yt7kD1gLyNAQQfeYfa4rORAZT1n2YitD74NY
+TWUH2ieFP3l+ecj1SeQR
+-----END CERTIFICATE-----
+)";
+static const char kRSANullParam[] = R"(
+-----BEGIN CERTIFICATE-----
+MIIBXzCByaADAgECAgIE0jANBgkqhkiG9w0BAQsFADAPMQ0wCwYDVQQDEwRUZXN0
+MCAXDTAwMDEwMTAwMDAwMFoYDzIxMDAwMTAxMDAwMDAwWjAPMQ0wCwYDVQQDEwRU
+ZXN0MFkwEwYHKoZIzj0CAQYIKoZIzj0DAQcDQgAE5itp4r9ln5e+Lx4NlIpM1Zdr
+t6keDUb73ampHp3culoB59aXqAoY+cPEox5W4nyDSNsWGhz1HX7xlC1Lz3IiwaMQ
+MA4wDAYDVR0TBAUwAwEB/zANBgkqhkiG9w0BAQsFAAOBgQAzVcfIv+Rq1KrMXqIL
+fPq/cWZjgqFZA1RGaGElNaqp+rkJfamq5tDGzckWpebrK+jjRN7yIlcWDtPpy3Gy
+seZfvtBDR0TwJm0S/pQl8prKB4wgALcwe3bmi56Rq85nzY5ZLNcP16LQxL+jAAua
+SwmQUz4bRpckRBj+sIyp1We+pg==
+-----END CERTIFICATE-----
+)";
+static const char kRSAInvalidParam[] = R"(
+-----BEGIN CERTIFICATE-----
+MIIBbTCB0KADAgECAgIE0jAUBgkqhkiG9w0BAQsEB2dhcmJhZ2UwDzENMAsGA1UE
+AxMEVGVzdDAgFw0wMDAxMDEwMDAwMDBaGA8yMTAwMDEwMTAwMDAwMFowDzENMAsG
+A1UEAxMEVGVzdDBZMBMGByqGSM49AgEGCCqGSM49AwEHA0IABOYraeK/ZZ+Xvi8e
+DZSKTNWXa7epHg1G+92pqR6d3LpaAefWl6gKGPnDxKMeVuJ8g0jbFhoc9R1+8ZQt
+S89yIsGjEDAOMAwGA1UdEwQFMAMBAf8wFAYJKoZIhvcNAQELBAdnYXJiYWdlA4GB
+AHTJ6cWWjCNrZhqiWWVI3jdK+h5xpRG8jGMXxR4JnjtoYRRusJLOXhmapwCB6fA0
+4vc+66O27v36yDmQX+tIc/hDrTpKNJptU8q3n2VagREvoHhkOTYkcCeS8vmnMtn8
+5OMNZ/ajVwOssw61GcAlScRqEHkZFBoGp7e+QpgB2tf9
+-----END CERTIFICATE-----
+)";
 
 TEST(X509Test, AlgorithmParameters) {
-  // P-256 requires the parameter be omitted.
+  // P-256 parameters should be omitted, but we accept NULL ones.
   bssl::UniquePtr<EVP_PKEY> key = PrivateKeyFromPEM(kP256Key);
   ASSERT_TRUE(key);
 
@@ -2527,15 +2777,12 @@
 
   cert = CertFromPEM(kP256NullParam);
   ASSERT_TRUE(cert);
-  EXPECT_FALSE(X509_verify(cert.get(), key.get()));
-  uint32_t err = ERR_get_error();
-  EXPECT_EQ(ERR_LIB_X509, ERR_GET_LIB(err));
-  EXPECT_EQ(X509_R_INVALID_PARAMETER, ERR_GET_REASON(err));
+  EXPECT_TRUE(X509_verify(cert.get(), key.get()));
 
   cert = CertFromPEM(kP256InvalidParam);
   ASSERT_TRUE(cert);
   EXPECT_FALSE(X509_verify(cert.get(), key.get()));
-  err = ERR_get_error();
+  uint32_t err = ERR_get_error();
   EXPECT_EQ(ERR_LIB_X509, ERR_GET_LIB(err));
   EXPECT_EQ(X509_R_INVALID_PARAMETER, ERR_GET_REASON(err));
 
@@ -2558,3 +2805,184 @@
   EXPECT_EQ(ERR_LIB_X509, ERR_GET_LIB(err));
   EXPECT_EQ(X509_R_INVALID_PARAMETER, ERR_GET_REASON(err));
 }
+
+TEST(X509Test, GeneralName)  {
+  const std::vector<uint8_t> kNames[] = {
+      // [0] {
+      //   OBJECT_IDENTIFIER { 1.2.840.113554.4.1.72585.2.1 }
+      //   [0] {
+      //     SEQUENCE {}
+      //   }
+      // }
+      {0xa0, 0x13, 0x06, 0x0d, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x12, 0x04,
+       0x01, 0x84, 0xb7, 0x09, 0x02, 0x01, 0xa0, 0x02, 0x30, 0x00},
+      // [0] {
+      //   OBJECT_IDENTIFIER { 1.2.840.113554.4.1.72585.2.1 }
+      //   [0] {
+      //     [APPLICATION 0] {}
+      //   }
+      // }
+      {0xa0, 0x13, 0x06, 0x0d, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x12, 0x04,
+       0x01, 0x84, 0xb7, 0x09, 0x02, 0x01, 0xa0, 0x02, 0x60, 0x00},
+      // [0] {
+      //   OBJECT_IDENTIFIER { 1.2.840.113554.4.1.72585.2.1 }
+      //   [0] {
+      //     UTF8String { "a" }
+      //   }
+      // }
+      {0xa0, 0x14, 0x06, 0x0d, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x12, 0x04,
+       0x01, 0x84, 0xb7, 0x09, 0x02, 0x01, 0xa0, 0x03, 0x0c, 0x01, 0x61},
+      // [0] {
+      //   OBJECT_IDENTIFIER { 1.2.840.113554.4.1.72585.2.2 }
+      //   [0] {
+      //     UTF8String { "a" }
+      //   }
+      // }
+      {0xa0, 0x14, 0x06, 0x0d, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x12, 0x04,
+       0x01, 0x84, 0xb7, 0x09, 0x02, 0x02, 0xa0, 0x03, 0x0c, 0x01, 0x61},
+      // [0] {
+      //   OBJECT_IDENTIFIER { 1.2.840.113554.4.1.72585.2.1 }
+      //   [0] {
+      //     UTF8String { "b" }
+      //   }
+      // }
+      {0xa0, 0x14, 0x06, 0x0d, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x12, 0x04,
+       0x01, 0x84, 0xb7, 0x09, 0x02, 0x01, 0xa0, 0x03, 0x0c, 0x01, 0x62},
+      // [0] {
+      //   OBJECT_IDENTIFIER { 1.2.840.113554.4.1.72585.2.1 }
+      //   [0] {
+      //     BOOLEAN { TRUE }
+      //   }
+      // }
+      {0xa0, 0x14, 0x06, 0x0d, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x12, 0x04,
+       0x01, 0x84, 0xb7, 0x09, 0x02, 0x01, 0xa0, 0x03, 0x01, 0x01, 0xff},
+      // [0] {
+      //   OBJECT_IDENTIFIER { 1.2.840.113554.4.1.72585.2.1 }
+      //   [0] {
+      //     BOOLEAN { FALSE }
+      //   }
+      // }
+      {0xa0, 0x14, 0x06, 0x0d, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x12, 0x04,
+       0x01, 0x84, 0xb7, 0x09, 0x02, 0x01, 0xa0, 0x03, 0x01, 0x01, 0x00},
+      // [1 PRIMITIVE] { "a" }
+      {0x81, 0x01, 0x61},
+      // [1 PRIMITIVE] { "b" }
+      {0x81, 0x01, 0x62},
+      // [2 PRIMITIVE] { "a" }
+      {0x82, 0x01, 0x61},
+      // [2 PRIMITIVE] { "b" }
+      {0x82, 0x01, 0x62},
+      // [4] {
+      //   SEQUENCE {
+      //     SET {
+      //       SEQUENCE {
+      //         # commonName
+      //         OBJECT_IDENTIFIER { 2.5.4.3 }
+      //         UTF8String { "a" }
+      //       }
+      //     }
+      //   }
+      // }
+      {0xa4, 0x0e, 0x30, 0x0c, 0x31, 0x0a, 0x30, 0x08, 0x06, 0x03, 0x55, 0x04,
+       0x03, 0x0c, 0x01, 0x61},
+      // [4] {
+      //   SEQUENCE {
+      //     SET {
+      //       SEQUENCE {
+      //         # commonName
+      //         OBJECT_IDENTIFIER { 2.5.4.3 }
+      //         UTF8String { "b" }
+      //       }
+      //     }
+      //   }
+      // }
+      {0xa4, 0x0e, 0x30, 0x0c, 0x31, 0x0a, 0x30, 0x08, 0x06, 0x03, 0x55, 0x04,
+       0x03, 0x0c, 0x01, 0x62},
+      // [5] {
+      //   [1] {
+      //     UTF8String { "a" }
+      //   }
+      // }
+      {0xa5, 0x05, 0xa1, 0x03, 0x0c, 0x01, 0x61},
+      // [5] {
+      //   [1] {
+      //     UTF8String { "b" }
+      //   }
+      // }
+      {0xa5, 0x05, 0xa1, 0x03, 0x0c, 0x01, 0x62},
+      // [5] {
+      //   [0] {
+      //     UTF8String {}
+      //   }
+      //   [1] {
+      //     UTF8String { "a" }
+      //   }
+      // }
+      {0xa5, 0x09, 0xa0, 0x02, 0x0c, 0x00, 0xa1, 0x03, 0x0c, 0x01, 0x61},
+      // [5] {
+      //   [0] {
+      //     UTF8String { "a" }
+      //   }
+      //   [1] {
+      //     UTF8String { "a" }
+      //   }
+      // }
+      {0xa5, 0x0a, 0xa0, 0x03, 0x0c, 0x01, 0x61, 0xa1, 0x03, 0x0c, 0x01, 0x61},
+      // [5] {
+      //   [0] {
+      //     UTF8String { "b" }
+      //   }
+      //   [1] {
+      //     UTF8String { "a" }
+      //   }
+      // }
+      {0xa5, 0x0a, 0xa0, 0x03, 0x0c, 0x01, 0x62, 0xa1, 0x03, 0x0c, 0x01, 0x61},
+      // [6 PRIMITIVE] { "a" }
+      {0x86, 0x01, 0x61},
+      // [6 PRIMITIVE] { "b" }
+      {0x86, 0x01, 0x62},
+      // [7 PRIMITIVE] { `11111111` }
+      {0x87, 0x04, 0x11, 0x11, 0x11, 0x11},
+      // [7 PRIMITIVE] { `22222222`}
+      {0x87, 0x04, 0x22, 0x22, 0x22, 0x22},
+      // [7 PRIMITIVE] { `11111111111111111111111111111111` }
+      {0x87, 0x10, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
+       0x11, 0x11, 0x11, 0x11, 0x11, 0x11},
+      // [7 PRIMITIVE] { `22222222222222222222222222222222` }
+      {0x87, 0x10, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
+       0x22, 0x22, 0x22, 0x22, 0x22, 0x22},
+      // [8 PRIMITIVE] { 1.2.840.113554.4.1.72585.2.1 }
+      {0x88, 0x0d, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x12, 0x04, 0x01, 0x84, 0xb7,
+       0x09, 0x02, 0x01},
+      // [8 PRIMITIVE] { 1.2.840.113554.4.1.72585.2.2 }
+      {0x88, 0x0d, 0x2a, 0x86, 0x48, 0x86, 0xf7, 0x12, 0x04, 0x01, 0x84, 0xb7,
+       0x09, 0x02, 0x02},
+  };
+
+  // Every name should be equal to itself and not equal to any others.
+  for (size_t i = 0; i < OPENSSL_ARRAY_SIZE(kNames); i++) {
+    SCOPED_TRACE(Bytes(kNames[i]));
+
+    const uint8_t *ptr = kNames[i].data();
+    bssl::UniquePtr<GENERAL_NAME> a(
+        d2i_GENERAL_NAME(nullptr, &ptr, kNames[i].size()));
+    ASSERT_TRUE(a);
+    ASSERT_EQ(ptr, kNames[i].data() + kNames[i].size());
+
+    for (size_t j = 0; j < OPENSSL_ARRAY_SIZE(kNames); j++) {
+      SCOPED_TRACE(Bytes(kNames[j]));
+
+      ptr = kNames[j].data();
+      bssl::UniquePtr<GENERAL_NAME> b(
+          d2i_GENERAL_NAME(nullptr, &ptr, kNames[j].size()));
+      ASSERT_TRUE(b);
+      ASSERT_EQ(ptr, kNames[j].data() + kNames[j].size());
+
+      if (i == j) {
+        EXPECT_EQ(GENERAL_NAME_cmp(a.get(), b.get()), 0);
+      } else {
+        EXPECT_NE(GENERAL_NAME_cmp(a.get(), b.get()), 0);
+      }
+    }
+  }
+}
diff --git a/deps/boringssl/src/crypto/x509/x509_trs.c b/deps/boringssl/src/crypto/x509/x509_trs.c
index 019301a..d3002e8 100644
--- a/deps/boringssl/src/crypto/x509/x509_trs.c
+++ b/deps/boringssl/src/crypto/x509/x509_trs.c
@@ -260,17 +260,17 @@
     trtable = NULL;
 }
 
-int X509_TRUST_get_flags(X509_TRUST *xp)
+int X509_TRUST_get_flags(const X509_TRUST *xp)
 {
     return xp->flags;
 }
 
-char *X509_TRUST_get0_name(X509_TRUST *xp)
+char *X509_TRUST_get0_name(const X509_TRUST *xp)
 {
     return xp->name;
 }
 
-int X509_TRUST_get_trust(X509_TRUST *xp)
+int X509_TRUST_get_trust(const X509_TRUST *xp)
 {
     return xp->trust;
 }
diff --git a/deps/boringssl/src/crypto/x509/x509_txt.c b/deps/boringssl/src/crypto/x509/x509_txt.c
index 8e6ac27..17e14a6 100644
--- a/deps/boringssl/src/crypto/x509/x509_txt.c
+++ b/deps/boringssl/src/crypto/x509/x509_txt.c
@@ -56,144 +56,144 @@
 
 #include <openssl/x509.h>
 
-const char *X509_verify_cert_error_string(long n)
+const char *X509_verify_cert_error_string(long err)
 {
-    switch ((int)n) {
+    switch (err) {
     case X509_V_OK:
-        return ("ok");
+        return "ok";
     case X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT:
-        return ("unable to get issuer certificate");
+        return "unable to get issuer certificate";
     case X509_V_ERR_UNABLE_TO_GET_CRL:
-        return ("unable to get certificate CRL");
+        return "unable to get certificate CRL";
     case X509_V_ERR_UNABLE_TO_DECRYPT_CERT_SIGNATURE:
-        return ("unable to decrypt certificate's signature");
+        return "unable to decrypt certificate's signature";
     case X509_V_ERR_UNABLE_TO_DECRYPT_CRL_SIGNATURE:
-        return ("unable to decrypt CRL's signature");
+        return "unable to decrypt CRL's signature";
     case X509_V_ERR_UNABLE_TO_DECODE_ISSUER_PUBLIC_KEY:
-        return ("unable to decode issuer public key");
+        return "unable to decode issuer public key";
     case X509_V_ERR_CERT_SIGNATURE_FAILURE:
-        return ("certificate signature failure");
+        return "certificate signature failure";
     case X509_V_ERR_CRL_SIGNATURE_FAILURE:
-        return ("CRL signature failure");
+        return "CRL signature failure";
     case X509_V_ERR_CERT_NOT_YET_VALID:
-        return ("certificate is not yet valid");
+        return "certificate is not yet valid";
     case X509_V_ERR_CRL_NOT_YET_VALID:
-        return ("CRL is not yet valid");
+        return "CRL is not yet valid";
     case X509_V_ERR_CERT_HAS_EXPIRED:
-        return ("certificate has expired");
+        return "certificate has expired";
     case X509_V_ERR_CRL_HAS_EXPIRED:
-        return ("CRL has expired");
+        return "CRL has expired";
     case X509_V_ERR_ERROR_IN_CERT_NOT_BEFORE_FIELD:
-        return ("format error in certificate's notBefore field");
+        return "format error in certificate's notBefore field";
     case X509_V_ERR_ERROR_IN_CERT_NOT_AFTER_FIELD:
-        return ("format error in certificate's notAfter field");
+        return "format error in certificate's notAfter field";
     case X509_V_ERR_ERROR_IN_CRL_LAST_UPDATE_FIELD:
-        return ("format error in CRL's lastUpdate field");
+        return "format error in CRL's lastUpdate field";
     case X509_V_ERR_ERROR_IN_CRL_NEXT_UPDATE_FIELD:
-        return ("format error in CRL's nextUpdate field");
+        return "format error in CRL's nextUpdate field";
     case X509_V_ERR_OUT_OF_MEM:
-        return ("out of memory");
+        return "out of memory";
     case X509_V_ERR_DEPTH_ZERO_SELF_SIGNED_CERT:
-        return ("self signed certificate");
+        return "self signed certificate";
     case X509_V_ERR_SELF_SIGNED_CERT_IN_CHAIN:
-        return ("self signed certificate in certificate chain");
+        return "self signed certificate in certificate chain";
     case X509_V_ERR_UNABLE_TO_GET_ISSUER_CERT_LOCALLY:
-        return ("unable to get local issuer certificate");
+        return "unable to get local issuer certificate";
     case X509_V_ERR_UNABLE_TO_VERIFY_LEAF_SIGNATURE:
-        return ("unable to verify the first certificate");
+        return "unable to verify the first certificate";
     case X509_V_ERR_CERT_CHAIN_TOO_LONG:
-        return ("certificate chain too long");
+        return "certificate chain too long";
     case X509_V_ERR_CERT_REVOKED:
-        return ("certificate revoked");
+        return "certificate revoked";
     case X509_V_ERR_INVALID_CA:
-        return ("invalid CA certificate");
+        return "invalid CA certificate";
     case X509_V_ERR_INVALID_NON_CA:
-        return ("invalid non-CA certificate (has CA markings)");
+        return "invalid non-CA certificate (has CA markings)";
     case X509_V_ERR_PATH_LENGTH_EXCEEDED:
-        return ("path length constraint exceeded");
+        return "path length constraint exceeded";
     case X509_V_ERR_PROXY_PATH_LENGTH_EXCEEDED:
-        return ("proxy path length constraint exceeded");
+        return "proxy path length constraint exceeded";
     case X509_V_ERR_PROXY_CERTIFICATES_NOT_ALLOWED:
         return
-            ("proxy certificates not allowed, please set the appropriate flag");
+            "proxy certificates not allowed, please set the appropriate flag";
     case X509_V_ERR_INVALID_PURPOSE:
-        return ("unsupported certificate purpose");
+        return "unsupported certificate purpose";
     case X509_V_ERR_CERT_UNTRUSTED:
-        return ("certificate not trusted");
+        return "certificate not trusted";
     case X509_V_ERR_CERT_REJECTED:
-        return ("certificate rejected");
+        return "certificate rejected";
     case X509_V_ERR_APPLICATION_VERIFICATION:
-        return ("application verification failure");
+        return "application verification failure";
     case X509_V_ERR_SUBJECT_ISSUER_MISMATCH:
-        return ("subject issuer mismatch");
+        return "subject issuer mismatch";
     case X509_V_ERR_AKID_SKID_MISMATCH:
-        return ("authority and subject key identifier mismatch");
+        return "authority and subject key identifier mismatch";
     case X509_V_ERR_AKID_ISSUER_SERIAL_MISMATCH:
-        return ("authority and issuer serial number mismatch");
+        return "authority and issuer serial number mismatch";
     case X509_V_ERR_KEYUSAGE_NO_CERTSIGN:
-        return ("key usage does not include certificate signing");
+        return "key usage does not include certificate signing";
     case X509_V_ERR_UNABLE_TO_GET_CRL_ISSUER:
-        return ("unable to get CRL issuer certificate");
+        return "unable to get CRL issuer certificate";
     case X509_V_ERR_UNHANDLED_CRITICAL_EXTENSION:
-        return ("unhandled critical extension");
+        return "unhandled critical extension";
     case X509_V_ERR_KEYUSAGE_NO_CRL_SIGN:
-        return ("key usage does not include CRL signing");
+        return "key usage does not include CRL signing";
     case X509_V_ERR_KEYUSAGE_NO_DIGITAL_SIGNATURE:
-        return ("key usage does not include digital signature");
+        return "key usage does not include digital signature";
     case X509_V_ERR_UNHANDLED_CRITICAL_CRL_EXTENSION:
-        return ("unhandled critical CRL extension");
+        return "unhandled critical CRL extension";
     case X509_V_ERR_INVALID_EXTENSION:
-        return ("invalid or inconsistent certificate extension");
+        return "invalid or inconsistent certificate extension";
     case X509_V_ERR_INVALID_POLICY_EXTENSION:
-        return ("invalid or inconsistent certificate policy extension");
+        return "invalid or inconsistent certificate policy extension";
     case X509_V_ERR_NO_EXPLICIT_POLICY:
-        return ("no explicit policy");
+        return "no explicit policy";
     case X509_V_ERR_DIFFERENT_CRL_SCOPE:
-        return ("Different CRL scope");
+        return "Different CRL scope";
     case X509_V_ERR_UNSUPPORTED_EXTENSION_FEATURE:
-        return ("Unsupported extension feature");
+        return "Unsupported extension feature";
     case X509_V_ERR_UNNESTED_RESOURCE:
-        return ("RFC 3779 resource not subset of parent's resources");
+        return "RFC 3779 resource not subset of parent's resources";
 
     case X509_V_ERR_PERMITTED_VIOLATION:
-        return ("permitted subtree violation");
+        return "permitted subtree violation";
     case X509_V_ERR_EXCLUDED_VIOLATION:
-        return ("excluded subtree violation");
+        return "excluded subtree violation";
     case X509_V_ERR_SUBTREE_MINMAX:
-        return ("name constraints minimum and maximum not supported");
+        return "name constraints minimum and maximum not supported";
     case X509_V_ERR_UNSUPPORTED_CONSTRAINT_TYPE:
-        return ("unsupported name constraint type");
+        return "unsupported name constraint type";
     case X509_V_ERR_UNSUPPORTED_CONSTRAINT_SYNTAX:
-        return ("unsupported or invalid name constraint syntax");
+        return "unsupported or invalid name constraint syntax";
     case X509_V_ERR_UNSUPPORTED_NAME_SYNTAX:
-        return ("unsupported or invalid name syntax");
+        return "unsupported or invalid name syntax";
     case X509_V_ERR_CRL_PATH_VALIDATION_ERROR:
-        return ("CRL path validation error");
+        return "CRL path validation error";
 
     case X509_V_ERR_SUITE_B_INVALID_VERSION:
-        return ("Suite B: certificate version invalid");
+        return "Suite B: certificate version invalid";
     case X509_V_ERR_SUITE_B_INVALID_ALGORITHM:
-        return ("Suite B: invalid public key algorithm");
+        return "Suite B: invalid public key algorithm";
     case X509_V_ERR_SUITE_B_INVALID_CURVE:
-        return ("Suite B: invalid ECC curve");
+        return "Suite B: invalid ECC curve";
     case X509_V_ERR_SUITE_B_INVALID_SIGNATURE_ALGORITHM:
-        return ("Suite B: invalid signature algorithm");
+        return "Suite B: invalid signature algorithm";
     case X509_V_ERR_SUITE_B_LOS_NOT_ALLOWED:
-        return ("Suite B: curve not allowed for this LOS");
+        return "Suite B: curve not allowed for this LOS";
     case X509_V_ERR_SUITE_B_CANNOT_SIGN_P_384_WITH_P_256:
-        return ("Suite B: cannot sign P-384 with P-256");
+        return "Suite B: cannot sign P-384 with P-256";
 
     case X509_V_ERR_HOSTNAME_MISMATCH:
-        return ("Hostname mismatch");
+        return "Hostname mismatch";
     case X509_V_ERR_EMAIL_MISMATCH:
-        return ("Email address mismatch");
+        return "Email address mismatch";
     case X509_V_ERR_IP_ADDRESS_MISMATCH:
-        return ("IP address mismatch");
+        return "IP address mismatch";
 
     case X509_V_ERR_INVALID_CALL:
-        return ("Invalid certificate verification context");
+        return "Invalid certificate verification context";
     case X509_V_ERR_STORE_LOOKUP:
-        return ("Issuer certificate lookup error");
+        return "Issuer certificate lookup error";
 
     case X509_V_ERR_NAME_CONSTRAINTS_WITHOUT_SANS:
         return "Issuer has name constraints but leaf has no SANs";
diff --git a/deps/boringssl/src/crypto/x509/x509_v3.c b/deps/boringssl/src/crypto/x509/x509_v3.c
index ecbc0dd..91bf024 100644
--- a/deps/boringssl/src/crypto/x509/x509_v3.c
+++ b/deps/boringssl/src/crypto/x509/x509_v3.c
@@ -181,7 +181,7 @@
 
 X509_EXTENSION *X509_EXTENSION_create_by_NID(X509_EXTENSION **ex, int nid,
                                              int crit,
-                                             ASN1_OCTET_STRING *data)
+                                             const ASN1_OCTET_STRING *data)
 {
     const ASN1_OBJECT *obj;
     X509_EXTENSION *ret;
@@ -197,7 +197,7 @@
 
 X509_EXTENSION *X509_EXTENSION_create_by_OBJ(X509_EXTENSION **ex,
                                              const ASN1_OBJECT *obj, int crit,
-                                             ASN1_OCTET_STRING *data)
+                                             const ASN1_OCTET_STRING *data)
 {
     X509_EXTENSION *ret;
 
@@ -242,13 +242,13 @@
     return (1);
 }
 
-int X509_EXTENSION_set_data(X509_EXTENSION *ex, ASN1_OCTET_STRING *data)
+int X509_EXTENSION_set_data(X509_EXTENSION *ex, const ASN1_OCTET_STRING *data)
 {
     int i;
 
     if (ex == NULL)
         return (0);
-    i = M_ASN1_OCTET_STRING_set(ex->value, data->data, data->length);
+    i = ASN1_OCTET_STRING_set(ex->value, data->data, data->length);
     if (!i)
         return (0);
     return (1);
diff --git a/deps/boringssl/src/crypto/x509/x509_vfy.c b/deps/boringssl/src/crypto/x509/x509_vfy.c
index 308ebbc..a997202 100644
--- a/deps/boringssl/src/crypto/x509/x509_vfy.c
+++ b/deps/boringssl/src/crypto/x509/x509_vfy.c
@@ -1037,7 +1037,7 @@
     else
         ptime = NULL;
 
-    i = X509_cmp_time(X509_CRL_get_lastUpdate(crl), ptime);
+    i = X509_cmp_time(X509_CRL_get0_lastUpdate(crl), ptime);
     if (i == 0) {
         if (!notify)
             return 0;
@@ -1054,8 +1054,8 @@
             return 0;
     }
 
-    if (X509_CRL_get_nextUpdate(crl)) {
-        i = X509_cmp_time(X509_CRL_get_nextUpdate(crl), ptime);
+    if (X509_CRL_get0_nextUpdate(crl)) {
+        i = X509_cmp_time(X509_CRL_get0_nextUpdate(crl), ptime);
 
         if (i == 0) {
             if (!notify)
@@ -1100,8 +1100,8 @@
         /* If current CRL is equivalent use it if it is newer */
         if (crl_score == best_score && best_crl != NULL) {
             int day, sec;
-            if (ASN1_TIME_diff(&day, &sec, X509_CRL_get_lastUpdate(best_crl),
-                               X509_CRL_get_lastUpdate(crl)) == 0)
+            if (ASN1_TIME_diff(&day, &sec, X509_CRL_get0_lastUpdate(best_crl),
+                               X509_CRL_get0_lastUpdate(crl)) == 0)
                 continue;
             /*
              * ASN1_TIME_diff never returns inconsistent signs for |day|
@@ -2058,9 +2058,9 @@
     if (!X509_CRL_set_issuer_name(crl, X509_CRL_get_issuer(newer)))
         goto memerr;
 
-    if (!X509_CRL_set_lastUpdate(crl, X509_CRL_get_lastUpdate(newer)))
+    if (!X509_CRL_set1_lastUpdate(crl, X509_CRL_get0_lastUpdate(newer)))
         goto memerr;
-    if (!X509_CRL_set_nextUpdate(crl, X509_CRL_get_nextUpdate(newer)))
+    if (!X509_CRL_set1_nextUpdate(crl, X509_CRL_get0_nextUpdate(newer)))
         goto memerr;
 
     /* Set base CRL number: must be critical */
@@ -2307,8 +2307,6 @@
 int X509_STORE_CTX_init(X509_STORE_CTX *ctx, X509_STORE *store, X509 *x509,
                         STACK_OF(X509) *chain)
 {
-    int ret = 1;
-
     X509_STORE_CTX_zero(ctx);
     ctx->ctx = store;
     ctx->cert = x509;
@@ -2316,78 +2314,74 @@
 
     CRYPTO_new_ex_data(&ctx->ex_data);
 
+    if (store == NULL) {
+        OPENSSL_PUT_ERROR(X509, ERR_R_PASSED_NULL_PARAMETER);
+        goto err;
+    }
+
     ctx->param = X509_VERIFY_PARAM_new();
     if (!ctx->param)
         goto err;
 
     /*
-     * Inherit callbacks and flags from X509_STORE if not set use defaults.
+     * Inherit callbacks and flags from X509_STORE.
      */
 
-    if (store)
-        ret = X509_VERIFY_PARAM_inherit(ctx->param, store->param);
-    else
-        ctx->param->inh_flags |= X509_VP_FLAG_DEFAULT | X509_VP_FLAG_ONCE;
+    ctx->verify_cb = store->verify_cb;
+    ctx->cleanup = store->cleanup;
 
-    if (store) {
-        ctx->verify_cb = store->verify_cb;
-        ctx->cleanup = store->cleanup;
-    } else
-        ctx->cleanup = 0;
-
-    if (ret)
-        ret = X509_VERIFY_PARAM_inherit(ctx->param,
-                                        X509_VERIFY_PARAM_lookup("default"));
-
-    if (ret == 0)
+    if (!X509_VERIFY_PARAM_inherit(ctx->param, store->param) ||
+        !X509_VERIFY_PARAM_inherit(ctx->param,
+                                   X509_VERIFY_PARAM_lookup("default"))) {
         goto err;
+    }
 
-    if (store && store->check_issued)
+    if (store->check_issued)
         ctx->check_issued = store->check_issued;
     else
         ctx->check_issued = check_issued;
 
-    if (store && store->get_issuer)
+    if (store->get_issuer)
         ctx->get_issuer = store->get_issuer;
     else
         ctx->get_issuer = X509_STORE_CTX_get1_issuer;
 
-    if (store && store->verify_cb)
+    if (store->verify_cb)
         ctx->verify_cb = store->verify_cb;
     else
         ctx->verify_cb = null_callback;
 
-    if (store && store->verify)
+    if (store->verify)
         ctx->verify = store->verify;
     else
         ctx->verify = internal_verify;
 
-    if (store && store->check_revocation)
+    if (store->check_revocation)
         ctx->check_revocation = store->check_revocation;
     else
         ctx->check_revocation = check_revocation;
 
-    if (store && store->get_crl)
+    if (store->get_crl)
         ctx->get_crl = store->get_crl;
     else
         ctx->get_crl = NULL;
 
-    if (store && store->check_crl)
+    if (store->check_crl)
         ctx->check_crl = store->check_crl;
     else
         ctx->check_crl = check_crl;
 
-    if (store && store->cert_crl)
+    if (store->cert_crl)
         ctx->cert_crl = store->cert_crl;
     else
         ctx->cert_crl = cert_crl;
 
-    if (store && store->lookup_certs)
+    if (store->lookup_certs)
         ctx->lookup_certs = store->lookup_certs;
     else
         ctx->lookup_certs = X509_STORE_get1_certs;
 
-    if (store && store->lookup_crls)
+    if (store->lookup_crls)
         ctx->lookup_crls = store->lookup_crls;
     else
         ctx->lookup_crls = X509_STORE_get1_crls;
diff --git a/deps/boringssl/src/crypto/x509/x509cset.c b/deps/boringssl/src/crypto/x509/x509cset.c
index d2f2b8f..cc27acb 100644
--- a/deps/boringssl/src/crypto/x509/x509cset.c
+++ b/deps/boringssl/src/crypto/x509/x509cset.c
@@ -66,7 +66,7 @@
     if (x == NULL)
         return (0);
     if (x->crl->version == NULL) {
-        if ((x->crl->version = M_ASN1_INTEGER_new()) == NULL)
+        if ((x->crl->version = ASN1_INTEGER_new()) == NULL)
             return (0);
     }
     return (ASN1_INTEGER_set(x->crl->version, version));
@@ -79,7 +79,7 @@
     return (X509_NAME_set(&x->crl->issuer, name));
 }
 
-int X509_CRL_set_lastUpdate(X509_CRL *x, const ASN1_TIME *tm)
+int X509_CRL_set1_lastUpdate(X509_CRL *x, const ASN1_TIME *tm)
 {
     ASN1_TIME *in;
 
@@ -87,16 +87,16 @@
         return (0);
     in = x->crl->lastUpdate;
     if (in != tm) {
-        in = M_ASN1_TIME_dup(tm);
+        in = ASN1_STRING_dup(tm);
         if (in != NULL) {
-            M_ASN1_TIME_free(x->crl->lastUpdate);
+            ASN1_TIME_free(x->crl->lastUpdate);
             x->crl->lastUpdate = in;
         }
     }
     return (in != NULL);
 }
 
-int X509_CRL_set_nextUpdate(X509_CRL *x, const ASN1_TIME *tm)
+int X509_CRL_set1_nextUpdate(X509_CRL *x, const ASN1_TIME *tm)
 {
     ASN1_TIME *in;
 
@@ -104,9 +104,9 @@
         return (0);
     in = x->crl->nextUpdate;
     if (in != tm) {
-        in = M_ASN1_TIME_dup(tm);
+        in = ASN1_STRING_dup(tm);
         if (in != NULL) {
-            M_ASN1_TIME_free(x->crl->nextUpdate);
+            ASN1_TIME_free(x->crl->nextUpdate);
             x->crl->nextUpdate = in;
         }
     }
@@ -170,6 +170,11 @@
     return crl->crl->revoked;
 }
 
+const STACK_OF(X509_EXTENSION) *X509_CRL_get0_extensions(const X509_CRL *crl)
+{
+    return crl->crl->extensions;
+}
+
 void X509_CRL_get0_signature(const X509_CRL *crl, const ASN1_BIT_STRING **psig,
                              const X509_ALGOR **palg)
 {
@@ -184,52 +189,64 @@
     return OBJ_obj2nid(crl->sig_alg->algorithm);
 }
 
-const ASN1_TIME *X509_REVOKED_get0_revocationDate(const X509_REVOKED *x)
+const ASN1_TIME *X509_REVOKED_get0_revocationDate(const X509_REVOKED *revoked)
 {
-    return x->revocationDate;
+    return revoked->revocationDate;
 }
 
-int X509_REVOKED_set_revocationDate(X509_REVOKED *x, ASN1_TIME *tm)
+int X509_REVOKED_set_revocationDate(X509_REVOKED *revoked, const ASN1_TIME *tm)
 {
     ASN1_TIME *in;
 
-    if (x == NULL)
+    if (revoked == NULL)
         return (0);
-    in = x->revocationDate;
+    in = revoked->revocationDate;
     if (in != tm) {
-        in = M_ASN1_TIME_dup(tm);
+        in = ASN1_STRING_dup(tm);
         if (in != NULL) {
-            M_ASN1_TIME_free(x->revocationDate);
-            x->revocationDate = in;
+            ASN1_TIME_free(revoked->revocationDate);
+            revoked->revocationDate = in;
         }
     }
     return (in != NULL);
 }
 
-const ASN1_INTEGER *X509_REVOKED_get0_serialNumber(const X509_REVOKED *x)
+const ASN1_INTEGER *X509_REVOKED_get0_serialNumber(const X509_REVOKED *revoked)
 {
-    return x->serialNumber;
+    return revoked->serialNumber;
 }
 
-int X509_REVOKED_set_serialNumber(X509_REVOKED *x, ASN1_INTEGER *serial)
+int X509_REVOKED_set_serialNumber(X509_REVOKED *revoked,
+                                  const ASN1_INTEGER *serial)
 {
     ASN1_INTEGER *in;
 
-    if (x == NULL)
+    if (revoked == NULL)
         return (0);
-    in = x->serialNumber;
+    in = revoked->serialNumber;
     if (in != serial) {
-        in = M_ASN1_INTEGER_dup(serial);
+        in = ASN1_INTEGER_dup(serial);
         if (in != NULL) {
-            M_ASN1_INTEGER_free(x->serialNumber);
-            x->serialNumber = in;
+            ASN1_INTEGER_free(revoked->serialNumber);
+            revoked->serialNumber = in;
         }
     }
     return (in != NULL);
 }
 
-int i2d_re_X509_CRL_tbs(X509_CRL *crl, unsigned char **pp)
+const STACK_OF(X509_EXTENSION) *
+    X509_REVOKED_get0_extensions(const X509_REVOKED *r)
+{
+    return r->extensions;
+}
+
+int i2d_re_X509_CRL_tbs(X509_CRL *crl, unsigned char **outp)
 {
     crl->crl->enc.modified = 1;
-    return i2d_X509_CRL_INFO(crl->crl, pp);
+    return i2d_X509_CRL_INFO(crl->crl, outp);
+}
+
+int i2d_X509_CRL_tbs(X509_CRL *crl, unsigned char **outp)
+{
+    return i2d_X509_CRL_INFO(crl->crl, outp);
 }
diff --git a/deps/boringssl/src/crypto/x509/x509name.c b/deps/boringssl/src/crypto/x509/x509name.c
index fbb76f0..0bf3459 100644
--- a/deps/boringssl/src/crypto/x509/x509name.c
+++ b/deps/boringssl/src/crypto/x509/x509name.c
@@ -66,7 +66,8 @@
 #include "../internal.h"
 
 
-int X509_NAME_get_text_by_NID(X509_NAME *name, int nid, char *buf, int len)
+int X509_NAME_get_text_by_NID(const X509_NAME *name, int nid, char *buf,
+                              int len)
 {
     const ASN1_OBJECT *obj;
 
@@ -76,7 +77,7 @@
     return (X509_NAME_get_text_by_OBJ(name, obj, buf, len));
 }
 
-int X509_NAME_get_text_by_OBJ(X509_NAME *name, const ASN1_OBJECT *obj,
+int X509_NAME_get_text_by_OBJ(const X509_NAME *name, const ASN1_OBJECT *obj,
                               char *buf, int len)
 {
     int i;
@@ -94,14 +95,14 @@
     return (i);
 }
 
-int X509_NAME_entry_count(X509_NAME *name)
+int X509_NAME_entry_count(const X509_NAME *name)
 {
     if (name == NULL)
         return (0);
     return (sk_X509_NAME_ENTRY_num(name->entries));
 }
 
-int X509_NAME_get_index_by_NID(X509_NAME *name, int nid, int lastpos)
+int X509_NAME_get_index_by_NID(const X509_NAME *name, int nid, int lastpos)
 {
     const ASN1_OBJECT *obj;
 
@@ -112,7 +113,7 @@
 }
 
 /* NOTE: you should be passsing -1, not 0 as lastpos */
-int X509_NAME_get_index_by_OBJ(X509_NAME *name, const ASN1_OBJECT *obj,
+int X509_NAME_get_index_by_OBJ(const X509_NAME *name, const ASN1_OBJECT *obj,
                                int lastpos)
 {
     int n;
@@ -133,7 +134,7 @@
     return (-1);
 }
 
-X509_NAME_ENTRY *X509_NAME_get_entry(X509_NAME *name, int loc)
+X509_NAME_ENTRY *X509_NAME_get_entry(const X509_NAME *name, int loc)
 {
     if (name == NULL || loc < 0
         || sk_X509_NAME_ENTRY_num(name->entries) <= (size_t)loc)
@@ -374,14 +375,14 @@
     return (1);
 }
 
-ASN1_OBJECT *X509_NAME_ENTRY_get_object(X509_NAME_ENTRY *ne)
+ASN1_OBJECT *X509_NAME_ENTRY_get_object(const X509_NAME_ENTRY *ne)
 {
     if (ne == NULL)
         return (NULL);
     return (ne->object);
 }
 
-ASN1_STRING *X509_NAME_ENTRY_get_data(X509_NAME_ENTRY *ne)
+ASN1_STRING *X509_NAME_ENTRY_get_data(const X509_NAME_ENTRY *ne)
 {
     if (ne == NULL)
         return (NULL);
diff --git a/deps/boringssl/src/crypto/x509/x_all.c b/deps/boringssl/src/crypto/x509/x_all.c
index 33c11b6..a29e038 100644
--- a/deps/boringssl/src/crypto/x509/x_all.c
+++ b/deps/boringssl/src/crypto/x509/x_all.c
@@ -66,20 +66,20 @@
 #include <openssl/rsa.h>
 #include <openssl/stack.h>
 
-int X509_verify(X509 *a, EVP_PKEY *r)
+int X509_verify(X509 *x509, EVP_PKEY *pkey)
 {
-    if (X509_ALGOR_cmp(a->sig_alg, a->cert_info->signature)) {
+    if (X509_ALGOR_cmp(x509->sig_alg, x509->cert_info->signature)) {
         OPENSSL_PUT_ERROR(X509, X509_R_SIGNATURE_ALGORITHM_MISMATCH);
         return 0;
     }
-    return (ASN1_item_verify(ASN1_ITEM_rptr(X509_CINF), a->sig_alg,
-                             a->signature, a->cert_info, r));
+    return ASN1_item_verify(ASN1_ITEM_rptr(X509_CINF), x509->sig_alg,
+                            x509->signature, x509->cert_info, pkey);
 }
 
-int X509_REQ_verify(X509_REQ *a, EVP_PKEY *r)
+int X509_REQ_verify(X509_REQ *req, EVP_PKEY *pkey)
 {
-    return (ASN1_item_verify(ASN1_ITEM_rptr(X509_REQ_INFO),
-                             a->sig_alg, a->signature, a->req_info, r));
+    return ASN1_item_verify(ASN1_ITEM_rptr(X509_REQ_INFO),
+                            req->sig_alg, req->signature, req->req_info, pkey);
 }
 
 int X509_sign(X509 *x, EVP_PKEY *pkey, const EVP_MD *md)
@@ -131,10 +131,10 @@
                            x->signature, x->spkac, pkey, md));
 }
 
-int NETSCAPE_SPKI_verify(NETSCAPE_SPKI *x, EVP_PKEY *pkey)
+int NETSCAPE_SPKI_verify(NETSCAPE_SPKI *spki, EVP_PKEY *pkey)
 {
-    return (ASN1_item_verify(ASN1_ITEM_rptr(NETSCAPE_SPKAC), x->sig_algor,
-                             x->signature, x->spkac, pkey));
+    return (ASN1_item_verify(ASN1_ITEM_rptr(NETSCAPE_SPKAC), spki->sig_algor,
+                             spki->signature, spki->spkac, pkey));
 }
 
 #ifndef OPENSSL_NO_FP_API
diff --git a/deps/boringssl/src/crypto/x509/x_crl.c b/deps/boringssl/src/crypto/x509/x_crl.c
index f8ec4a3..3b9f137 100644
--- a/deps/boringssl/src/crypto/x509/x_crl.c
+++ b/deps/boringssl/src/crypto/x509/x_crl.c
@@ -393,8 +393,7 @@
 
 static int X509_REVOKED_cmp(const X509_REVOKED **a, const X509_REVOKED **b)
 {
-    return (ASN1_STRING_cmp((ASN1_STRING *)(*a)->serialNumber,
-                            (ASN1_STRING *)(*b)->serialNumber));
+    return ASN1_STRING_cmp((*a)->serialNumber, (*b)->serialNumber);
 }
 
 int X509_CRL_add0_revoked(X509_CRL *crl, X509_REVOKED *rev)
@@ -411,10 +410,10 @@
     return 1;
 }
 
-int X509_CRL_verify(X509_CRL *crl, EVP_PKEY *r)
+int X509_CRL_verify(X509_CRL *crl, EVP_PKEY *pkey)
 {
     if (crl->meth->crl_verify)
-        return crl->meth->crl_verify(crl, r);
+        return crl->meth->crl_verify(crl, pkey);
     return 0;
 }
 
diff --git a/deps/boringssl/src/crypto/x509/x_name.c b/deps/boringssl/src/crypto/x509/x_name.c
index 7824100..bef9ec4 100644
--- a/deps/boringssl/src/crypto/x509/x_name.c
+++ b/deps/boringssl/src/crypto/x509/x_name.c
@@ -197,18 +197,8 @@
                             char opt, ASN1_TLC *ctx)
 {
     const unsigned char *p = *in, *q;
-    union {
-        STACK_OF(STACK_OF_X509_NAME_ENTRY) *s;
-        ASN1_VALUE *a;
-    } intname = {
-        NULL
-    };
-    union {
-        X509_NAME *x;
-        ASN1_VALUE *a;
-    } nm = {
-        NULL
-    };
+    STACK_OF(STACK_OF_X509_NAME_ENTRY) *intname = NULL;
+    X509_NAME *nm = NULL;
     size_t i, j;
     int ret;
     STACK_OF(X509_NAME_ENTRY) *entries;
@@ -220,46 +210,48 @@
     q = p;
 
     /* Get internal representation of Name */
-    ret = ASN1_item_ex_d2i(&intname.a,
+    ASN1_VALUE *intname_val = NULL;
+    ret = ASN1_item_ex_d2i(&intname_val,
                            &p, len, ASN1_ITEM_rptr(X509_NAME_INTERNAL),
                            tag, aclass, opt, ctx);
-
     if (ret <= 0)
         return ret;
+    intname = (STACK_OF(STACK_OF_X509_NAME_ENTRY) *)intname_val;
 
     if (*val)
         x509_name_ex_free(val, NULL);
-    if (!x509_name_ex_new(&nm.a, NULL))
+    ASN1_VALUE *nm_val = NULL;
+    if (!x509_name_ex_new(&nm_val, NULL))
         goto err;
+    nm = (X509_NAME *)nm_val;
     /* We've decoded it: now cache encoding */
-    if (!BUF_MEM_grow(nm.x->bytes, p - q))
+    if (!BUF_MEM_grow(nm->bytes, p - q))
         goto err;
-    OPENSSL_memcpy(nm.x->bytes->data, q, p - q);
+    OPENSSL_memcpy(nm->bytes->data, q, p - q);
 
     /* Convert internal representation to X509_NAME structure */
-    for (i = 0; i < sk_STACK_OF_X509_NAME_ENTRY_num(intname.s); i++) {
-        entries = sk_STACK_OF_X509_NAME_ENTRY_value(intname.s, i);
+    for (i = 0; i < sk_STACK_OF_X509_NAME_ENTRY_num(intname); i++) {
+        entries = sk_STACK_OF_X509_NAME_ENTRY_value(intname, i);
         for (j = 0; j < sk_X509_NAME_ENTRY_num(entries); j++) {
             entry = sk_X509_NAME_ENTRY_value(entries, j);
             entry->set = i;
-            if (!sk_X509_NAME_ENTRY_push(nm.x->entries, entry))
+            if (!sk_X509_NAME_ENTRY_push(nm->entries, entry))
                 goto err;
             (void)sk_X509_NAME_ENTRY_set(entries, j, NULL);
         }
     }
-    ret = x509_name_canon(nm.x);
+    ret = x509_name_canon(nm);
     if (!ret)
         goto err;
-    sk_STACK_OF_X509_NAME_ENTRY_pop_free(intname.s,
+    sk_STACK_OF_X509_NAME_ENTRY_pop_free(intname,
                                          local_sk_X509_NAME_ENTRY_free);
-    nm.x->modified = 0;
-    *val = nm.a;
+    nm->modified = 0;
+    *val = (ASN1_VALUE *)nm;
     *in = p;
     return ret;
  err:
-    if (nm.x != NULL)
-        X509_NAME_free(nm.x);
-    sk_STACK_OF_X509_NAME_ENTRY_pop_free(intname.s,
+    X509_NAME_free(nm);
+    sk_STACK_OF_X509_NAME_ENTRY_pop_free(intname,
                                          local_sk_X509_NAME_ENTRY_pop_free);
     OPENSSL_PUT_ERROR(X509, ERR_R_ASN1_LIB);
     return 0;
@@ -288,20 +280,15 @@
 
 static int x509_name_encode(X509_NAME *a)
 {
-    union {
-        STACK_OF(STACK_OF_X509_NAME_ENTRY) *s;
-        ASN1_VALUE *a;
-    } intname = {
-        NULL
-    };
     int len;
     unsigned char *p;
     STACK_OF(X509_NAME_ENTRY) *entries = NULL;
     X509_NAME_ENTRY *entry;
     int set = -1;
     size_t i;
-    intname.s = sk_STACK_OF_X509_NAME_ENTRY_new_null();
-    if (!intname.s)
+    STACK_OF(STACK_OF_X509_NAME_ENTRY) *intname =
+        sk_STACK_OF_X509_NAME_ENTRY_new_null();
+    if (!intname)
         goto memerr;
     for (i = 0; i < sk_X509_NAME_ENTRY_num(a->entries); i++) {
         entry = sk_X509_NAME_ENTRY_value(a->entries, i);
@@ -309,7 +296,7 @@
             entries = sk_X509_NAME_ENTRY_new_null();
             if (!entries)
                 goto memerr;
-            if (!sk_STACK_OF_X509_NAME_ENTRY_push(intname.s, entries)) {
+            if (!sk_STACK_OF_X509_NAME_ENTRY_push(intname, entries)) {
                 sk_X509_NAME_ENTRY_free(entries);
                 goto memerr;
             }
@@ -318,19 +305,20 @@
         if (!sk_X509_NAME_ENTRY_push(entries, entry))
             goto memerr;
     }
-    len = ASN1_item_ex_i2d(&intname.a, NULL,
+    ASN1_VALUE *intname_val = (ASN1_VALUE *)intname;
+    len = ASN1_item_ex_i2d(&intname_val, NULL,
                            ASN1_ITEM_rptr(X509_NAME_INTERNAL), -1, -1);
     if (!BUF_MEM_grow(a->bytes, len))
         goto memerr;
     p = (unsigned char *)a->bytes->data;
-    ASN1_item_ex_i2d(&intname.a,
+    ASN1_item_ex_i2d(&intname_val,
                      &p, ASN1_ITEM_rptr(X509_NAME_INTERNAL), -1, -1);
-    sk_STACK_OF_X509_NAME_ENTRY_pop_free(intname.s,
+    sk_STACK_OF_X509_NAME_ENTRY_pop_free(intname,
                                          local_sk_X509_NAME_ENTRY_free);
     a->modified = 0;
     return len;
  memerr:
-    sk_STACK_OF_X509_NAME_ENTRY_pop_free(intname.s,
+    sk_STACK_OF_X509_NAME_ENTRY_pop_free(intname,
                                          local_sk_X509_NAME_ENTRY_free);
     OPENSSL_PUT_ERROR(X509, ERR_R_MALLOC_FAILURE);
     return -1;
diff --git a/deps/boringssl/src/crypto/x509/x_pkey.c b/deps/boringssl/src/crypto/x509/x_pkey.c
index 8231a24..e562d73 100644
--- a/deps/boringssl/src/crypto/x509/x_pkey.c
+++ b/deps/boringssl/src/crypto/x509/x_pkey.c
@@ -78,7 +78,7 @@
     ret->enc_algor = X509_ALGOR_new();
     if (ret->enc_algor == NULL)
         goto err;
-    ret->enc_pkey = M_ASN1_OCTET_STRING_new();
+    ret->enc_pkey = ASN1_OCTET_STRING_new();
     if (ret->enc_pkey == NULL)
         goto err;
     return ret;
@@ -97,7 +97,7 @@
     if (x->enc_algor != NULL)
         X509_ALGOR_free(x->enc_algor);
     if (x->enc_pkey != NULL)
-        M_ASN1_OCTET_STRING_free(x->enc_pkey);
+        ASN1_OCTET_STRING_free(x->enc_pkey);
     if (x->dec_pkey != NULL)
         EVP_PKEY_free(x->dec_pkey);
     if ((x->key_data != NULL) && (x->key_free))
diff --git a/deps/boringssl/src/crypto/x509/x_pubkey.c b/deps/boringssl/src/crypto/x509/x_pubkey.c
index 3d07d66..37dee49 100644
--- a/deps/boringssl/src/crypto/x509/x_pubkey.c
+++ b/deps/boringssl/src/crypto/x509/x_pubkey.c
@@ -180,160 +180,6 @@
     return NULL;
 }
 
-/*
- * Now two pseudo ASN1 routines that take an EVP_PKEY structure and encode or
- * decode as X509_PUBKEY
- */
-
-EVP_PKEY *d2i_PUBKEY(EVP_PKEY **a, const unsigned char **pp, long length)
-{
-    X509_PUBKEY *xpk;
-    EVP_PKEY *pktmp;
-    xpk = d2i_X509_PUBKEY(NULL, pp, length);
-    if (!xpk)
-        return NULL;
-    pktmp = X509_PUBKEY_get(xpk);
-    X509_PUBKEY_free(xpk);
-    if (!pktmp)
-        return NULL;
-    if (a) {
-        EVP_PKEY_free(*a);
-        *a = pktmp;
-    }
-    return pktmp;
-}
-
-int i2d_PUBKEY(const EVP_PKEY *a, unsigned char **pp)
-{
-    X509_PUBKEY *xpk = NULL;
-    int ret;
-    if (!a)
-        return 0;
-    if (!X509_PUBKEY_set(&xpk, (EVP_PKEY *)a))
-        return 0;
-    ret = i2d_X509_PUBKEY(xpk, pp);
-    X509_PUBKEY_free(xpk);
-    return ret;
-}
-
-/*
- * The following are equivalents but which return RSA and DSA keys
- */
-RSA *d2i_RSA_PUBKEY(RSA **a, const unsigned char **pp, long length)
-{
-    EVP_PKEY *pkey;
-    RSA *key;
-    const unsigned char *q;
-    q = *pp;
-    pkey = d2i_PUBKEY(NULL, &q, length);
-    if (!pkey)
-        return NULL;
-    key = EVP_PKEY_get1_RSA(pkey);
-    EVP_PKEY_free(pkey);
-    if (!key)
-        return NULL;
-    *pp = q;
-    if (a) {
-        RSA_free(*a);
-        *a = key;
-    }
-    return key;
-}
-
-int i2d_RSA_PUBKEY(const RSA *a, unsigned char **pp)
-{
-    EVP_PKEY *pktmp;
-    int ret;
-    if (!a)
-        return 0;
-    pktmp = EVP_PKEY_new();
-    if (!pktmp) {
-        OPENSSL_PUT_ERROR(X509, ERR_R_MALLOC_FAILURE);
-        return 0;
-    }
-    EVP_PKEY_set1_RSA(pktmp, (RSA *)a);
-    ret = i2d_PUBKEY(pktmp, pp);
-    EVP_PKEY_free(pktmp);
-    return ret;
-}
-
-#ifndef OPENSSL_NO_DSA
-DSA *d2i_DSA_PUBKEY(DSA **a, const unsigned char **pp, long length)
-{
-    EVP_PKEY *pkey;
-    DSA *key;
-    const unsigned char *q;
-    q = *pp;
-    pkey = d2i_PUBKEY(NULL, &q, length);
-    if (!pkey)
-        return NULL;
-    key = EVP_PKEY_get1_DSA(pkey);
-    EVP_PKEY_free(pkey);
-    if (!key)
-        return NULL;
-    *pp = q;
-    if (a) {
-        DSA_free(*a);
-        *a = key;
-    }
-    return key;
-}
-
-int i2d_DSA_PUBKEY(const DSA *a, unsigned char **pp)
-{
-    EVP_PKEY *pktmp;
-    int ret;
-    if (!a)
-        return 0;
-    pktmp = EVP_PKEY_new();
-    if (!pktmp) {
-        OPENSSL_PUT_ERROR(X509, ERR_R_MALLOC_FAILURE);
-        return 0;
-    }
-    EVP_PKEY_set1_DSA(pktmp, (DSA *)a);
-    ret = i2d_PUBKEY(pktmp, pp);
-    EVP_PKEY_free(pktmp);
-    return ret;
-}
-#endif
-
-EC_KEY *d2i_EC_PUBKEY(EC_KEY **a, const unsigned char **pp, long length)
-{
-    EVP_PKEY *pkey;
-    EC_KEY *key;
-    const unsigned char *q;
-    q = *pp;
-    pkey = d2i_PUBKEY(NULL, &q, length);
-    if (!pkey)
-        return (NULL);
-    key = EVP_PKEY_get1_EC_KEY(pkey);
-    EVP_PKEY_free(pkey);
-    if (!key)
-        return (NULL);
-    *pp = q;
-    if (a) {
-        EC_KEY_free(*a);
-        *a = key;
-    }
-    return (key);
-}
-
-int i2d_EC_PUBKEY(const EC_KEY *a, unsigned char **pp)
-{
-    EVP_PKEY *pktmp;
-    int ret;
-    if (!a)
-        return (0);
-    if ((pktmp = EVP_PKEY_new()) == NULL) {
-        OPENSSL_PUT_ERROR(X509, ERR_R_MALLOC_FAILURE);
-        return (0);
-    }
-    EVP_PKEY_set1_EC_KEY(pktmp, (EC_KEY *)a);
-    ret = i2d_PUBKEY(pktmp, pp);
-    EVP_PKEY_free(pktmp);
-    return (ret);
-}
-
 int X509_PUBKEY_set0_param(X509_PUBKEY *pub, const ASN1_OBJECT *aobj,
                            int ptype, void *pval,
                            unsigned char *penc, int penclen)
diff --git a/deps/boringssl/src/crypto/x509/x_sig.c b/deps/boringssl/src/crypto/x509/x_sig.c
index e18024a..ca08c64 100644
--- a/deps/boringssl/src/crypto/x509/x_sig.c
+++ b/deps/boringssl/src/crypto/x509/x_sig.c
@@ -67,3 +67,23 @@
 } ASN1_SEQUENCE_END(X509_SIG)
 
 IMPLEMENT_ASN1_FUNCTIONS(X509_SIG)
+
+void X509_SIG_get0(const X509_SIG *sig, const X509_ALGOR **out_alg,
+                   const ASN1_OCTET_STRING **out_digest) {
+  if (out_alg != NULL) {
+    *out_alg = sig->algor;
+  }
+  if (out_digest != NULL) {
+    *out_digest = sig->digest;
+  }
+}
+
+void X509_SIG_getm(X509_SIG *sig, X509_ALGOR **out_alg,
+                   ASN1_OCTET_STRING **out_digest) {
+  if (out_alg != NULL) {
+    *out_alg = sig->algor;
+  }
+  if (out_digest != NULL) {
+    *out_digest = sig->digest;
+  }
+}
diff --git a/deps/boringssl/src/crypto/x509/x_x509.c b/deps/boringssl/src/crypto/x509/x_x509.c
index 010b625..ff0bff8 100644
--- a/deps/boringssl/src/crypto/x509/x_x509.c
+++ b/deps/boringssl/src/crypto/x509/x_x509.c
@@ -98,7 +98,6 @@
     switch (operation) {
 
     case ASN1_OP_NEW_POST:
-        ret->name = NULL;
         ret->ex_flags = 0;
         ret->ex_pathlen = -1;
         ret->skid = NULL;
@@ -116,12 +115,14 @@
         break;
 
     case ASN1_OP_D2I_POST: {
-        /* The version must be one of v1(0), v2(1), or v3(2). If the version is
-         * v1(0), it must be omitted because it is DEFAULT. */
+        /* The version must be one of v1(0), v2(1), or v3(2). */
         long version = 0;
         if (ret->cert_info->version != NULL) {
             version = ASN1_INTEGER_get(ret->cert_info->version);
-            if (version <= 0 || version > 2) {
+            /* TODO(https://crbug.com/boringssl/364): |version| = 0 should also
+             * be rejected. This means an explicitly-encoded X.509v1 version.
+             * v1 is DEFAULT, so DER requires it be omitted. */
+            if (version < 0 || version > 2) {
                 OPENSSL_PUT_ERROR(X509, X509_R_INVALID_VERSION);
                 return 0;
             }
@@ -140,10 +141,6 @@
             return 0;
         }
 
-        /* TODO(davidben): Remove this field once the few external accesses are
-         * removed. */
-        OPENSSL_free(ret->name);
-        ret->name = X509_NAME_oneline(ret->cert_info->subject, NULL, 0);
         break;
     }
 
@@ -158,7 +155,6 @@
         GENERAL_NAMES_free(ret->altname);
         NAME_CONSTRAINTS_free(ret->nc);
         CRYPTO_BUFFER_free(ret->buf);
-        OPENSSL_free(ret->name);
         break;
 
     }
@@ -339,10 +335,45 @@
     return length;
 }
 
-int i2d_re_X509_tbs(X509 *x, unsigned char **pp)
+int i2d_re_X509_tbs(X509 *x509, unsigned char **outp)
 {
-    x->cert_info->enc.modified = 1;
-    return i2d_X509_CINF(x->cert_info, pp);
+    x509->cert_info->enc.modified = 1;
+    return i2d_X509_CINF(x509->cert_info, outp);
+}
+
+int i2d_X509_tbs(X509 *x509, unsigned char **outp)
+{
+    return i2d_X509_CINF(x509->cert_info, outp);
+}
+
+int X509_set1_signature_algo(X509 *x509, const X509_ALGOR *algo)
+{
+    /* TODO(davidben): Const-correct generated ASN.1 dup functions.
+     * Alternatively, when the types are hidden and we can embed required fields
+     * directly in structs, import |X509_ALGOR_copy| from upstream. */
+    X509_ALGOR *copy1 = X509_ALGOR_dup((X509_ALGOR *)algo);
+    X509_ALGOR *copy2 = X509_ALGOR_dup((X509_ALGOR *)algo);
+    if (copy1 == NULL || copy2 == NULL) {
+        X509_ALGOR_free(copy1);
+        X509_ALGOR_free(copy2);
+        return 0;
+    }
+
+    X509_ALGOR_free(x509->sig_alg);
+    x509->sig_alg = copy1;
+    X509_ALGOR_free(x509->cert_info->signature);
+    x509->cert_info->signature = copy2;
+    return 1;
+}
+
+int X509_set1_signature_value(X509 *x509, const uint8_t *sig, size_t sig_len)
+{
+    if (!ASN1_STRING_set(x509->signature, sig, sig_len)) {
+      return 0;
+    }
+    x509->signature->flags &= ~(ASN1_STRING_FLAG_BITS_LEFT | 0x07);
+    x509->signature->flags |= ASN1_STRING_FLAG_BITS_LEFT;
+    return 1;
 }
 
 void X509_get0_signature(const ASN1_BIT_STRING **psig, const X509_ALGOR **palg,
diff --git a/deps/boringssl/src/crypto/x509/x_x509a.c b/deps/boringssl/src/crypto/x509/x_x509a.c
index dccc46a..823fa5c 100644
--- a/deps/boringssl/src/crypto/x509/x_x509a.c
+++ b/deps/boringssl/src/crypto/x509/x_x509a.c
@@ -89,7 +89,7 @@
     return x->aux;
 }
 
-int X509_alias_set1(X509 *x, unsigned char *name, int len)
+int X509_alias_set1(X509 *x, const unsigned char *name, int len)
 {
     X509_CERT_AUX *aux;
     if (!name) {
@@ -106,7 +106,7 @@
     return ASN1_STRING_set(aux->alias, name, len);
 }
 
-int X509_keyid_set1(X509 *x, unsigned char *id, int len)
+int X509_keyid_set1(X509 *x, const unsigned char *id, int len)
 {
     X509_CERT_AUX *aux;
     if (!id) {
diff --git a/deps/boringssl/src/crypto/x509v3/ext_dat.h b/deps/boringssl/src/crypto/x509v3/ext_dat.h
index a6ca45b..7930126 100644
--- a/deps/boringssl/src/crypto/x509v3/ext_dat.h
+++ b/deps/boringssl/src/crypto/x509v3/ext_dat.h
@@ -61,8 +61,7 @@
 #endif
 
 extern const X509V3_EXT_METHOD v3_bcons, v3_nscert, v3_key_usage, v3_ext_ku;
-extern const X509V3_EXT_METHOD v3_pkey_usage_period, v3_sxnet, v3_info,
-    v3_sinfo;
+extern const X509V3_EXT_METHOD v3_info, v3_sinfo;
 extern const X509V3_EXT_METHOD v3_ns_ia5_list[], v3_alt[], v3_skey_id,
     v3_akey_id;
 extern const X509V3_EXT_METHOD v3_crl_num, v3_crl_reason, v3_crl_invdate;
@@ -96,7 +95,6 @@
     &v3_ns_ia5_list[6],
     &v3_skey_id,
     &v3_key_usage,
-    &v3_pkey_usage_period,
     &v3_alt[0],
     &v3_alt[1],
     &v3_bcons,
@@ -108,7 +106,6 @@
     &v3_delta_crl,
     &v3_crl_reason,
     &v3_crl_invdate,
-    &v3_sxnet,
     &v3_info,
 #ifndef OPENSSL_NO_OCSP
     &v3_ocsp_nonce,
diff --git a/deps/boringssl/src/crypto/x509v3/pcy_data.c b/deps/boringssl/src/crypto/x509v3/pcy_data.c
index 498de4d..58584c2 100644
--- a/deps/boringssl/src/crypto/x509v3/pcy_data.c
+++ b/deps/boringssl/src/crypto/x509v3/pcy_data.c
@@ -98,13 +98,15 @@
     } else
         id = NULL;
     ret = OPENSSL_malloc(sizeof(X509_POLICY_DATA));
-    if (!ret)
+    if (!ret) {
+        OPENSSL_PUT_ERROR(X509V3, ERR_R_MALLOC_FAILURE);
+        ASN1_OBJECT_free(id);
         return NULL;
+    }
     ret->expected_policy_set = sk_ASN1_OBJECT_new_null();
     if (!ret->expected_policy_set) {
         OPENSSL_free(ret);
-        if (id)
-            ASN1_OBJECT_free(id);
+        ASN1_OBJECT_free(id);
         return NULL;
     }
 
diff --git a/deps/boringssl/src/crypto/x509v3/v3_akey.c b/deps/boringssl/src/crypto/x509v3/v3_akey.c
index 30c02e2..1037673 100644
--- a/deps/boringssl/src/crypto/x509v3/v3_akey.c
+++ b/deps/boringssl/src/crypto/x509v3/v3_akey.c
@@ -172,7 +172,7 @@
 
     if ((issuer && !ikeyid) || (issuer == 2)) {
         isname = X509_NAME_dup(X509_get_issuer_name(cert));
-        serial = M_ASN1_INTEGER_dup(X509_get_serialNumber(cert));
+        serial = ASN1_INTEGER_dup(X509_get_serialNumber(cert));
         if (!isname || !serial) {
             OPENSSL_PUT_ERROR(X509V3, X509V3_R_UNABLE_TO_GET_ISSUER_DETAILS);
             goto err;
@@ -201,7 +201,7 @@
 
  err:
     X509_NAME_free(isname);
-    M_ASN1_INTEGER_free(serial);
-    M_ASN1_OCTET_STRING_free(ikeyid);
+    ASN1_INTEGER_free(serial);
+    ASN1_OCTET_STRING_free(ikeyid);
     return NULL;
 }
diff --git a/deps/boringssl/src/crypto/x509v3/v3_alt.c b/deps/boringssl/src/crypto/x509v3/v3_alt.c
index 0e79b45..4d54075 100644
--- a/deps/boringssl/src/crypto/x509v3/v3_alt.c
+++ b/deps/boringssl/src/crypto/x509v3/v3_alt.c
@@ -75,8 +75,8 @@
                                      STACK_OF(CONF_VALUE) *nval);
 static int copy_email(X509V3_CTX *ctx, GENERAL_NAMES *gens, int move_p);
 static int copy_issuer(X509V3_CTX *ctx, GENERAL_NAMES *gens);
-static int do_othername(GENERAL_NAME *gen, char *value, X509V3_CTX *ctx);
-static int do_dirname(GENERAL_NAME *gen, char *value, X509V3_CTX *ctx);
+static int do_othername(GENERAL_NAME *gen, const char *value, X509V3_CTX *ctx);
+static int do_dirname(GENERAL_NAME *gen, const char *value, X509V3_CTX *ctx);
 
 const X509V3_EXT_METHOD v3_alt[] = {
     {NID_subject_alt_name, 0, ASN1_ITEM_ref(GENERAL_NAMES),
@@ -288,40 +288,40 @@
 
 static int copy_issuer(X509V3_CTX *ctx, GENERAL_NAMES *gens)
 {
-    GENERAL_NAMES *ialt;
-    GENERAL_NAME *gen;
-    X509_EXTENSION *ext;
-    int i;
-    size_t j;
     if (ctx && (ctx->flags == CTX_TEST))
         return 1;
     if (!ctx || !ctx->issuer_cert) {
         OPENSSL_PUT_ERROR(X509V3, X509V3_R_NO_ISSUER_DETAILS);
-        goto err;
+        return 0;
     }
-    i = X509_get_ext_by_NID(ctx->issuer_cert, NID_subject_alt_name, -1);
+    int i = X509_get_ext_by_NID(ctx->issuer_cert, NID_subject_alt_name, -1);
     if (i < 0)
         return 1;
+
+    int ret = 0;
+    GENERAL_NAMES *ialt = NULL;
+    X509_EXTENSION *ext;
     if (!(ext = X509_get_ext(ctx->issuer_cert, i)) ||
         !(ialt = X509V3_EXT_d2i(ext))) {
         OPENSSL_PUT_ERROR(X509V3, X509V3_R_ISSUER_DECODE_ERROR);
         goto err;
     }
 
-    for (j = 0; j < sk_GENERAL_NAME_num(ialt); j++) {
-        gen = sk_GENERAL_NAME_value(ialt, j);
+    for (size_t j = 0; j < sk_GENERAL_NAME_num(ialt); j++) {
+        GENERAL_NAME *gen = sk_GENERAL_NAME_value(ialt, j);
         if (!sk_GENERAL_NAME_push(gens, gen)) {
             OPENSSL_PUT_ERROR(X509V3, ERR_R_MALLOC_FAILURE);
             goto err;
         }
+        /* Ownership of |gen| has moved from |ialt| to |gens|. */
+        sk_GENERAL_NAME_set(ialt, j, NULL);
     }
-    sk_GENERAL_NAME_free(ialt);
 
-    return 1;
+    ret = 1;
 
- err:
-    return 0;
-
+err:
+    GENERAL_NAMES_free(ialt);
+    return ret;
 }
 
 static GENERAL_NAMES *v2i_subject_alt(X509V3_EXT_METHOD *method,
@@ -386,7 +386,7 @@
     while ((i = X509_NAME_get_index_by_NID(nm,
                                            NID_pkcs9_emailAddress, i)) >= 0) {
         ne = X509_NAME_get_entry(nm, i);
-        email = M_ASN1_IA5STRING_dup(X509_NAME_ENTRY_get_data(ne));
+        email = ASN1_STRING_dup(X509_NAME_ENTRY_get_data(ne));
         if (move_p) {
             X509_NAME_delete_entry(nm, i);
             X509_NAME_ENTRY_free(ne);
@@ -410,7 +410,7 @@
 
  err:
     GENERAL_NAME_free(gen);
-    M_ASN1_IA5STRING_free(email);
+    ASN1_IA5STRING_free(email);
     return 0;
 
 }
@@ -446,8 +446,8 @@
 
 GENERAL_NAME *a2i_GENERAL_NAME(GENERAL_NAME *out,
                                const X509V3_EXT_METHOD *method,
-                               X509V3_CTX *ctx, int gen_type, char *value,
-                               int is_nc)
+                               X509V3_CTX *ctx, int gen_type,
+                               const char *value, int is_nc)
 {
     char is_string = 0;
     GENERAL_NAME *gen = NULL;
@@ -517,7 +517,7 @@
     }
 
     if (is_string) {
-        if (!(gen->d.ia5 = M_ASN1_IA5STRING_new()) ||
+        if (!(gen->d.ia5 = ASN1_IA5STRING_new()) ||
             !ASN1_STRING_set(gen->d.ia5, (unsigned char *)value,
                              strlen(value))) {
             OPENSSL_PUT_ERROR(X509V3, ERR_R_MALLOC_FAILURE);
@@ -575,9 +575,10 @@
 
 }
 
-static int do_othername(GENERAL_NAME *gen, char *value, X509V3_CTX *ctx)
+static int do_othername(GENERAL_NAME *gen, const char *value, X509V3_CTX *ctx)
 {
-    char *objtmp = NULL, *p;
+    char *objtmp = NULL;
+    const char *p;
     int objlen;
     if (!(p = strchr(value, ';')))
         return 0;
@@ -602,7 +603,7 @@
     return 1;
 }
 
-static int do_dirname(GENERAL_NAME *gen, char *value, X509V3_CTX *ctx)
+static int do_dirname(GENERAL_NAME *gen, const char *value, X509V3_CTX *ctx)
 {
     int ret = 0;
     STACK_OF(CONF_VALUE) *sk = NULL;
diff --git a/deps/boringssl/src/crypto/x509v3/v3_bitst.c b/deps/boringssl/src/crypto/x509v3/v3_bitst.c
index 86a8c36..402f830 100644
--- a/deps/boringssl/src/crypto/x509v3/v3_bitst.c
+++ b/deps/boringssl/src/crypto/x509v3/v3_bitst.c
@@ -113,7 +113,7 @@
     ASN1_BIT_STRING *bs;
     size_t i;
     const BIT_STRING_BITNAME *bnam;
-    if (!(bs = M_ASN1_BIT_STRING_new())) {
+    if (!(bs = ASN1_BIT_STRING_new())) {
         OPENSSL_PUT_ERROR(X509V3, ERR_R_MALLOC_FAILURE);
         return NULL;
     }
@@ -124,7 +124,7 @@
                 !strcmp(bnam->lname, val->name)) {
                 if (!ASN1_BIT_STRING_set_bit(bs, bnam->bitnum, 1)) {
                     OPENSSL_PUT_ERROR(X509V3, ERR_R_MALLOC_FAILURE);
-                    M_ASN1_BIT_STRING_free(bs);
+                    ASN1_BIT_STRING_free(bs);
                     return NULL;
                 }
                 break;
@@ -133,7 +133,7 @@
         if (!bnam->lname) {
             OPENSSL_PUT_ERROR(X509V3, X509V3_R_UNKNOWN_BIT_STRING_ARGUMENT);
             X509V3_conf_err(val);
-            M_ASN1_BIT_STRING_free(bs);
+            ASN1_BIT_STRING_free(bs);
             return NULL;
         }
     }
diff --git a/deps/boringssl/src/crypto/x509v3/v3_conf.c b/deps/boringssl/src/crypto/x509v3/v3_conf.c
index e98d0fc..158f8df 100644
--- a/deps/boringssl/src/crypto/x509v3/v3_conf.c
+++ b/deps/boringssl/src/crypto/x509v3/v3_conf.c
@@ -71,22 +71,22 @@
 #include "../internal.h"
 #include "internal.h"
 
-static int v3_check_critical(char **value);
-static int v3_check_generic(char **value);
+static int v3_check_critical(const char **value);
+static int v3_check_generic(const char **value);
 static X509_EXTENSION *do_ext_nconf(CONF *conf, X509V3_CTX *ctx, int ext_nid,
-                                    int crit, char *value);
-static X509_EXTENSION *v3_generic_extension(const char *ext, char *value,
+                                    int crit, const char *value);
+static X509_EXTENSION *v3_generic_extension(const char *ext, const char *value,
                                             int crit, int type,
                                             X509V3_CTX *ctx);
 static X509_EXTENSION *do_ext_i2d(const X509V3_EXT_METHOD *method,
                                   int ext_nid, int crit, void *ext_struc);
-static unsigned char *generic_asn1(char *value, X509V3_CTX *ctx,
+static unsigned char *generic_asn1(const char *value, X509V3_CTX *ctx,
                                    long *ext_len);
 /* CONF *conf:  Config file    */
 /* char *name:  Name    */
 /* char *value:  Value    */
-X509_EXTENSION *X509V3_EXT_nconf(CONF *conf, X509V3_CTX *ctx, char *name,
-                                 char *value)
+X509_EXTENSION *X509V3_EXT_nconf(CONF *conf, X509V3_CTX *ctx, const char *name,
+                                 const char *value)
 {
     int crit;
     int ext_type;
@@ -105,7 +105,7 @@
 /* CONF *conf:  Config file    */
 /* char *value:  Value    */
 X509_EXTENSION *X509V3_EXT_nconf_nid(CONF *conf, X509V3_CTX *ctx, int ext_nid,
-                                     char *value)
+                                     const char *value)
 {
     int crit;
     int ext_type;
@@ -119,7 +119,7 @@
 /* CONF *conf:  Config file    */
 /* char *value:  Value    */
 static X509_EXTENSION *do_ext_nconf(CONF *conf, X509V3_CTX *ctx, int ext_nid,
-                                    int crit, char *value)
+                                    int crit, const char *value)
 {
     const X509V3_EXT_METHOD *method;
     X509_EXTENSION *ext;
@@ -199,7 +199,7 @@
         p = ext_der;
         method->i2d(ext_struc, &p);
     }
-    if (!(ext_oct = M_ASN1_OCTET_STRING_new()))
+    if (!(ext_oct = ASN1_OCTET_STRING_new()))
         goto merr;
     ext_oct->data = ext_der;
     ext_oct->length = ext_len;
@@ -207,7 +207,7 @@
     ext = X509_EXTENSION_create_by_NID(NULL, ext_nid, crit, ext_oct);
     if (!ext)
         goto merr;
-    M_ASN1_OCTET_STRING_free(ext_oct);
+    ASN1_OCTET_STRING_free(ext_oct);
 
     return ext;
 
@@ -230,9 +230,9 @@
 }
 
 /* Check the extension string for critical flag */
-static int v3_check_critical(char **value)
+static int v3_check_critical(const char **value)
 {
-    char *p = *value;
+    const char *p = *value;
     if ((strlen(p) < 9) || strncmp(p, "critical,", 9))
         return 0;
     p += 9;
@@ -243,10 +243,10 @@
 }
 
 /* Check extension string for generic extension and return the type */
-static int v3_check_generic(char **value)
+static int v3_check_generic(const char **value)
 {
     int gen_type = 0;
-    char *p = *value;
+    const char *p = *value;
     if ((strlen(p) >= 4) && !strncmp(p, "DER:", 4)) {
         p += 4;
         gen_type = 1;
@@ -263,7 +263,7 @@
 }
 
 /* Create a generic extension: for now just handle DER type */
-static X509_EXTENSION *v3_generic_extension(const char *ext, char *value,
+static X509_EXTENSION *v3_generic_extension(const char *ext, const char *value,
                                             int crit, int gen_type,
                                             X509V3_CTX *ctx)
 {
@@ -289,7 +289,7 @@
         goto err;
     }
 
-    if (!(oct = M_ASN1_OCTET_STRING_new())) {
+    if (!(oct = ASN1_OCTET_STRING_new())) {
         OPENSSL_PUT_ERROR(X509V3, ERR_R_MALLOC_FAILURE);
         goto err;
     }
@@ -302,14 +302,14 @@
 
  err:
     ASN1_OBJECT_free(obj);
-    M_ASN1_OCTET_STRING_free(oct);
+    ASN1_OCTET_STRING_free(oct);
     if (ext_der)
         OPENSSL_free(ext_der);
     return extension;
 
 }
 
-static unsigned char *generic_asn1(char *value, X509V3_CTX *ctx,
+static unsigned char *generic_asn1(const char *value, X509V3_CTX *ctx,
                                    long *ext_len)
 {
     ASN1_TYPE *typ;
@@ -327,7 +327,7 @@
  * file section to an extension STACK.
  */
 
-int X509V3_EXT_add_nconf_sk(CONF *conf, X509V3_CTX *ctx, char *section,
+int X509V3_EXT_add_nconf_sk(CONF *conf, X509V3_CTX *ctx, const char *section,
                             STACK_OF(X509_EXTENSION) **sk)
 {
     X509_EXTENSION *ext;
@@ -351,7 +351,7 @@
  * Convenience functions to add extensions to a certificate, CRL and request
  */
 
-int X509V3_EXT_add_nconf(CONF *conf, X509V3_CTX *ctx, char *section,
+int X509V3_EXT_add_nconf(CONF *conf, X509V3_CTX *ctx, const char *section,
                          X509 *cert)
 {
     STACK_OF(X509_EXTENSION) **sk = NULL;
@@ -362,7 +362,7 @@
 
 /* Same as above but for a CRL */
 
-int X509V3_EXT_CRL_add_nconf(CONF *conf, X509V3_CTX *ctx, char *section,
+int X509V3_EXT_CRL_add_nconf(CONF *conf, X509V3_CTX *ctx, const char *section,
                              X509_CRL *crl)
 {
     STACK_OF(X509_EXTENSION) **sk = NULL;
@@ -373,7 +373,7 @@
 
 /* Add extensions to certificate request */
 
-int X509V3_EXT_REQ_add_nconf(CONF *conf, X509V3_CTX *ctx, char *section,
+int X509V3_EXT_REQ_add_nconf(CONF *conf, X509V3_CTX *ctx, const char *section,
                              X509_REQ *req)
 {
     STACK_OF(X509_EXTENSION) *extlist = NULL, **sk = NULL;
@@ -390,7 +390,7 @@
 
 /* Config database functions */
 
-char *X509V3_get_string(X509V3_CTX *ctx, char *name, char *section)
+char *X509V3_get_string(X509V3_CTX *ctx, const char *name, const char *section)
 {
     if (!ctx->db || !ctx->db_meth || !ctx->db_meth->get_string) {
         OPENSSL_PUT_ERROR(X509V3, X509V3_R_OPERATION_NOT_DEFINED);
@@ -401,7 +401,7 @@
     return NULL;
 }
 
-STACK_OF(CONF_VALUE) *X509V3_get_section(X509V3_CTX *ctx, char *section)
+STACK_OF(CONF_VALUE) *X509V3_get_section(X509V3_CTX *ctx, const char *section)
 {
     if (!ctx->db || !ctx->db_meth || !ctx->db_meth->get_section) {
         OPENSSL_PUT_ERROR(X509V3, X509V3_R_OPERATION_NOT_DEFINED);
@@ -428,13 +428,17 @@
         ctx->db_meth->free_section(ctx->db, section);
 }
 
-static char *nconf_get_string(void *db, char *section, char *value)
+static char *nconf_get_string(void *db, const char *section, const char *value)
 {
-    /* TODO(fork): this should return a const value. */
+    /* TODO(fork): This returns a non-const pointer because |X509V3_CONF_METHOD|
+     * allows |get_string| to return caller-owned pointers, provided they're
+     * freed by |free_string|. |nconf_method| leaves |free_string| NULL, and
+     * there are no other implementations of |X509V3_CONF_METHOD|, so this can
+     * be simplified if we make it private. */
     return (char *)NCONF_get_string(db, section, value);
 }
 
-static STACK_OF(CONF_VALUE) *nconf_get_section(void *db, char *section)
+static STACK_OF(CONF_VALUE) *nconf_get_section(void *db, const char *section)
 {
     return NCONF_get_section(db, section);
 }
diff --git a/deps/boringssl/src/crypto/x509v3/v3_cpols.c b/deps/boringssl/src/crypto/x509v3/v3_cpols.c
index 18d260b..216e7ae 100644
--- a/deps/boringssl/src/crypto/x509v3/v3_cpols.c
+++ b/deps/boringssl/src/crypto/x509v3/v3_cpols.c
@@ -245,7 +245,7 @@
                 OPENSSL_PUT_ERROR(X509V3, ERR_R_INTERNAL_ERROR);
                 goto err;
             }
-            qual->d.cpsuri = M_ASN1_IA5STRING_new();
+            qual->d.cpsuri = ASN1_IA5STRING_new();
             if (qual->d.cpsuri == NULL) {
                 goto err;
             }
@@ -319,7 +319,7 @@
     for (i = 0; i < sk_CONF_VALUE_num(unot); i++) {
         cnf = sk_CONF_VALUE_value(unot, i);
         if (!strcmp(cnf->name, "explicitText")) {
-            not->exptext = M_ASN1_VISIBLESTRING_new();
+            not->exptext = ASN1_VISIBLESTRING_new();
             if (not->exptext == NULL)
                 goto merr;
             if (!ASN1_STRING_set(not->exptext, cnf->value,
diff --git a/deps/boringssl/src/crypto/x509v3/v3_enum.c b/deps/boringssl/src/crypto/x509v3/v3_enum.c
index eff77e8..3a9d4d6 100644
--- a/deps/boringssl/src/crypto/x509v3/v3_enum.c
+++ b/deps/boringssl/src/crypto/x509v3/v3_enum.c
@@ -87,7 +87,8 @@
     (void *)crl_reasons
 };
 
-char *i2s_ASN1_ENUMERATED_TABLE(X509V3_EXT_METHOD *method, ASN1_ENUMERATED *e)
+char *i2s_ASN1_ENUMERATED_TABLE(X509V3_EXT_METHOD *method,
+                                const ASN1_ENUMERATED *e)
 {
     const ENUMERATED_NAMES *enam;
     long strval;
diff --git a/deps/boringssl/src/crypto/x509v3/v3_genn.c b/deps/boringssl/src/crypto/x509v3/v3_genn.c
index 552a524..ae79374 100644
--- a/deps/boringssl/src/crypto/x509v3/v3_genn.c
+++ b/deps/boringssl/src/crypto/x509v3/v3_genn.c
@@ -72,8 +72,9 @@
 IMPLEMENT_ASN1_FUNCTIONS(OTHERNAME)
 
 ASN1_SEQUENCE(EDIPARTYNAME) = {
-        ASN1_IMP_OPT(EDIPARTYNAME, nameAssigner, DIRECTORYSTRING, 0),
-        ASN1_IMP_OPT(EDIPARTYNAME, partyName, DIRECTORYSTRING, 1)
+        /* DirectoryString is a CHOICE type, so use explicit tagging. */
+        ASN1_EXP_OPT(EDIPARTYNAME, nameAssigner, DIRECTORYSTRING, 0),
+        ASN1_EXP(EDIPARTYNAME, partyName, DIRECTORYSTRING, 1)
 } ASN1_SEQUENCE_END(EDIPARTYNAME)
 
 IMPLEMENT_ASN1_FUNCTIONS(EDIPARTYNAME)
@@ -102,42 +103,56 @@
 
 IMPLEMENT_ASN1_DUP_FUNCTION(GENERAL_NAME)
 
-/* Returns 0 if they are equal, != 0 otherwise. */
-int GENERAL_NAME_cmp(GENERAL_NAME *a, GENERAL_NAME *b)
+static int edipartyname_cmp(const EDIPARTYNAME *a, const EDIPARTYNAME *b)
 {
-    int result = -1;
+    /* nameAssigner is optional and may be NULL. */
+    if (a->nameAssigner == NULL) {
+        if (b->nameAssigner != NULL) {
+            return -1;
+        }
+    } else {
+        if (b->nameAssigner == NULL ||
+            ASN1_STRING_cmp(a->nameAssigner, b->nameAssigner) != 0) {
+            return -1;
+        }
+    }
 
+    /* partyName may not be NULL. */
+    return ASN1_STRING_cmp(a->partyName, b->partyName);
+}
+
+/* Returns 0 if they are equal, != 0 otherwise. */
+int GENERAL_NAME_cmp(const GENERAL_NAME *a, const GENERAL_NAME *b)
+{
     if (!a || !b || a->type != b->type)
         return -1;
+
     switch (a->type) {
     case GEN_X400:
+        return ASN1_TYPE_cmp(a->d.x400Address, b->d.x400Address);
+
     case GEN_EDIPARTY:
-        result = ASN1_TYPE_cmp(a->d.other, b->d.other);
-        break;
+        return edipartyname_cmp(a->d.ediPartyName, b->d.ediPartyName);
 
     case GEN_OTHERNAME:
-        result = OTHERNAME_cmp(a->d.otherName, b->d.otherName);
-        break;
+        return OTHERNAME_cmp(a->d.otherName, b->d.otherName);
 
     case GEN_EMAIL:
     case GEN_DNS:
     case GEN_URI:
-        result = ASN1_STRING_cmp(a->d.ia5, b->d.ia5);
-        break;
+        return ASN1_STRING_cmp(a->d.ia5, b->d.ia5);
 
     case GEN_DIRNAME:
-        result = X509_NAME_cmp(a->d.dirn, b->d.dirn);
-        break;
+        return X509_NAME_cmp(a->d.dirn, b->d.dirn);
 
     case GEN_IPADD:
-        result = ASN1_OCTET_STRING_cmp(a->d.ip, b->d.ip);
-        break;
+        return ASN1_OCTET_STRING_cmp(a->d.ip, b->d.ip);
 
     case GEN_RID:
-        result = OBJ_cmp(a->d.rid, b->d.rid);
-        break;
+        return OBJ_cmp(a->d.rid, b->d.rid);
     }
-    return result;
+
+    return -1;
 }
 
 /* Returns 0 if they are equal, != 0 otherwise. */
@@ -159,8 +174,11 @@
 {
     switch (type) {
     case GEN_X400:
+        a->d.x400Address = value;
+        break;
+
     case GEN_EDIPARTY:
-        a->d.other = value;
+        a->d.ediPartyName = value;
         break;
 
     case GEN_OTHERNAME:
@@ -188,14 +206,16 @@
     a->type = type;
 }
 
-void *GENERAL_NAME_get0_value(GENERAL_NAME *a, int *ptype)
+void *GENERAL_NAME_get0_value(const GENERAL_NAME *a, int *ptype)
 {
     if (ptype)
         *ptype = a->type;
     switch (a->type) {
     case GEN_X400:
+        return a->d.x400Address;
+
     case GEN_EDIPARTY:
-        return a->d.other;
+        return a->d.ediPartyName;
 
     case GEN_OTHERNAME:
         return a->d.otherName;
@@ -233,7 +253,7 @@
     return 1;
 }
 
-int GENERAL_NAME_get0_otherName(GENERAL_NAME *gen,
+int GENERAL_NAME_get0_otherName(const GENERAL_NAME *gen,
                                 ASN1_OBJECT **poid, ASN1_TYPE **pvalue)
 {
     if (gen->type != GEN_OTHERNAME)
diff --git a/deps/boringssl/src/crypto/x509v3/v3_ia5.c b/deps/boringssl/src/crypto/x509v3/v3_ia5.c
index 6b2056d..700200c 100644
--- a/deps/boringssl/src/crypto/x509v3/v3_ia5.c
+++ b/deps/boringssl/src/crypto/x509v3/v3_ia5.c
@@ -108,11 +108,10 @@
         OPENSSL_PUT_ERROR(X509V3, X509V3_R_INVALID_NULL_ARGUMENT);
         return NULL;
     }
-    if (!(ia5 = M_ASN1_IA5STRING_new()))
+    if (!(ia5 = ASN1_IA5STRING_new()))
         goto err;
-    if (!ASN1_STRING_set((ASN1_STRING *)ia5, (unsigned char *)str,
-                         strlen(str))) {
-        M_ASN1_IA5STRING_free(ia5);
+    if (!ASN1_STRING_set(ia5, str, strlen(str))) {
+        ASN1_IA5STRING_free(ia5);
         goto err;
     }
     return ia5;
diff --git a/deps/boringssl/src/crypto/x509v3/v3_info.c b/deps/boringssl/src/crypto/x509v3/v3_info.c
index 7a48bd5..3615c71 100644
--- a/deps/boringssl/src/crypto/x509v3/v3_info.c
+++ b/deps/boringssl/src/crypto/x509v3/v3_info.c
@@ -208,7 +208,7 @@
     return NULL;
 }
 
-int i2a_ACCESS_DESCRIPTION(BIO *bp, ACCESS_DESCRIPTION *a)
+int i2a_ACCESS_DESCRIPTION(BIO *bp, const ACCESS_DESCRIPTION *a)
 {
     i2a_ASN1_OBJECT(bp, a->method);
 #ifdef UNDEF
diff --git a/deps/boringssl/src/crypto/x509v3/v3_lib.c b/deps/boringssl/src/crypto/x509v3/v3_lib.c
index d5eda3d..d89733f 100644
--- a/deps/boringssl/src/crypto/x509v3/v3_lib.c
+++ b/deps/boringssl/src/crypto/x509v3/v3_lib.c
@@ -122,7 +122,7 @@
     return sk_X509V3_EXT_METHOD_value(ext_list, idx);
 }
 
-const X509V3_EXT_METHOD *X509V3_EXT_get(X509_EXTENSION *ext)
+const X509V3_EXT_METHOD *X509V3_EXT_get(const X509_EXTENSION *ext)
 {
     int nid;
     if ((nid = OBJ_obj2nid(ext->object)) == NID_undef)
@@ -203,7 +203,7 @@
 
 /* Return an extension internal structure */
 
-void *X509V3_EXT_d2i(X509_EXTENSION *ext)
+void *X509V3_EXT_d2i(const X509_EXTENSION *ext)
 {
     const X509V3_EXT_METHOD *method;
     const unsigned char *p;
@@ -217,49 +217,38 @@
     return method->d2i(NULL, &p, ext->value->length);
 }
 
-/*
- * Get critical flag and decoded version of extension from a NID. The "idx"
- * variable returns the last found extension and can be used to retrieve
- * multiple extensions of the same NID. However multiple extensions with the
- * same NID is usually due to a badly encoded certificate so if idx is NULL
- * we choke if multiple extensions exist. The "crit" variable is set to the
- * critical value. The return value is the decoded extension or NULL on
- * error. The actual error can have several different causes, the value of
- * *crit reflects the cause: >= 0, extension found but not decoded (reflects
- * critical value). -1 extension not found. -2 extension occurs more than
- * once.
- */
-
-void *X509V3_get_d2i(STACK_OF(X509_EXTENSION) *x, int nid, int *crit,
-                     int *idx)
+void *X509V3_get_d2i(const STACK_OF(X509_EXTENSION) *extensions, int nid,
+                     int *out_critical, int *out_idx)
 {
     int lastpos;
     size_t i;
     X509_EXTENSION *ex, *found_ex = NULL;
-    if (!x) {
-        if (idx)
-            *idx = -1;
-        if (crit)
-            *crit = -1;
+    if (!extensions) {
+        if (out_idx)
+            *out_idx = -1;
+        if (out_critical)
+            *out_critical = -1;
         return NULL;
     }
-    if (idx)
-        lastpos = *idx + 1;
+    if (out_idx)
+        lastpos = *out_idx + 1;
     else
         lastpos = 0;
     if (lastpos < 0)
         lastpos = 0;
-    for (i = lastpos; i < sk_X509_EXTENSION_num(x); i++) {
-        ex = sk_X509_EXTENSION_value(x, i);
+    for (i = lastpos; i < sk_X509_EXTENSION_num(extensions); i++) {
+        ex = sk_X509_EXTENSION_value(extensions, i);
         if (OBJ_obj2nid(ex->object) == nid) {
-            if (idx) {
-                *idx = i;
+            if (out_idx) {
+                /* TODO(https://crbug.com/boringssl/379): Consistently reject
+                 * duplicate extensions. */
+                *out_idx = i;
                 found_ex = ex;
                 break;
             } else if (found_ex) {
                 /* Found more than one */
-                if (crit)
-                    *crit = -2;
+                if (out_critical)
+                    *out_critical = -2;
                 return NULL;
             }
             found_ex = ex;
@@ -267,16 +256,16 @@
     }
     if (found_ex) {
         /* Found it */
-        if (crit)
-            *crit = X509_EXTENSION_get_critical(found_ex);
+        if (out_critical)
+            *out_critical = X509_EXTENSION_get_critical(found_ex);
         return X509V3_EXT_d2i(found_ex);
     }
 
     /* Extension not found */
-    if (idx)
-        *idx = -1;
-    if (crit)
-        *crit = -1;
+    if (out_idx)
+        *out_idx = -1;
+    if (out_critical)
+        *out_critical = -1;
     return NULL;
 }
 
diff --git a/deps/boringssl/src/crypto/x509v3/v3_pku.c b/deps/boringssl/src/crypto/x509v3/v3_pku.c
deleted file mode 100644
index e4868b4..0000000
--- a/deps/boringssl/src/crypto/x509v3/v3_pku.c
+++ /dev/null
@@ -1,110 +0,0 @@
-/* v3_pku.c */
-/*
- * Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL project
- * 1999.
- */
-/* ====================================================================
- * Copyright (c) 1999 The OpenSSL Project.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * 3. All advertising materials mentioning features or use of this
- *    software must display the following acknowledgment:
- *    "This product includes software developed by the OpenSSL Project
- *    for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
- *
- * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
- *    endorse or promote products derived from this software without
- *    prior written permission. For written permission, please contact
- *    licensing@OpenSSL.org.
- *
- * 5. Products derived from this software may not be called "OpenSSL"
- *    nor may "OpenSSL" appear in their names without prior written
- *    permission of the OpenSSL Project.
- *
- * 6. Redistributions of any form whatsoever must retain the following
- *    acknowledgment:
- *    "This product includes software developed by the OpenSSL Project
- *    for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
- *
- * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
- * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
- * OF THE POSSIBILITY OF SUCH DAMAGE.
- * ====================================================================
- *
- * This product includes cryptographic software written by Eric Young
- * (eay@cryptsoft.com).  This product includes software written by Tim
- * Hudson (tjh@cryptsoft.com). */
-
-#include <stdio.h>
-
-#include <openssl/asn1.h>
-#include <openssl/asn1t.h>
-#include <openssl/mem.h>
-#include <openssl/obj.h>
-#include <openssl/x509v3.h>
-
-static int i2r_PKEY_USAGE_PERIOD(X509V3_EXT_METHOD *method,
-                                 PKEY_USAGE_PERIOD *usage, BIO *out,
-                                 int indent);
-/*
- * static PKEY_USAGE_PERIOD *v2i_PKEY_USAGE_PERIOD(X509V3_EXT_METHOD *method,
- * X509V3_CTX *ctx, STACK_OF(CONF_VALUE) *values);
- */
-const X509V3_EXT_METHOD v3_pkey_usage_period = {
-    NID_private_key_usage_period, 0, ASN1_ITEM_ref(PKEY_USAGE_PERIOD),
-    0, 0, 0, 0,
-    0, 0, 0, 0,
-    (X509V3_EXT_I2R)i2r_PKEY_USAGE_PERIOD, NULL,
-    NULL
-};
-
-ASN1_SEQUENCE(PKEY_USAGE_PERIOD) = {
-        ASN1_IMP_OPT(PKEY_USAGE_PERIOD, notBefore, ASN1_GENERALIZEDTIME, 0),
-        ASN1_IMP_OPT(PKEY_USAGE_PERIOD, notAfter, ASN1_GENERALIZEDTIME, 1)
-} ASN1_SEQUENCE_END(PKEY_USAGE_PERIOD)
-
-IMPLEMENT_ASN1_FUNCTIONS(PKEY_USAGE_PERIOD)
-
-static int i2r_PKEY_USAGE_PERIOD(X509V3_EXT_METHOD *method,
-                                 PKEY_USAGE_PERIOD *usage, BIO *out,
-                                 int indent)
-{
-    BIO_printf(out, "%*s", indent, "");
-    if (usage->notBefore) {
-        BIO_write(out, "Not Before: ", 12);
-        ASN1_GENERALIZEDTIME_print(out, usage->notBefore);
-        if (usage->notAfter)
-            BIO_write(out, ", ", 2);
-    }
-    if (usage->notAfter) {
-        BIO_write(out, "Not After: ", 11);
-        ASN1_GENERALIZEDTIME_print(out, usage->notAfter);
-    }
-    return 1;
-}
-
-/*
- * static PKEY_USAGE_PERIOD *v2i_PKEY_USAGE_PERIOD(method, ctx, values)
- * X509V3_EXT_METHOD *method; X509V3_CTX *ctx; STACK_OF(CONF_VALUE) *values;
- * { return NULL; }
- */
diff --git a/deps/boringssl/src/crypto/x509v3/v3_prn.c b/deps/boringssl/src/crypto/x509v3/v3_prn.c
index 2f5efcf..f6f341a 100644
--- a/deps/boringssl/src/crypto/x509v3/v3_prn.c
+++ b/deps/boringssl/src/crypto/x509v3/v3_prn.c
@@ -156,7 +156,7 @@
 }
 
 int X509V3_extensions_print(BIO *bp, const char *title,
-                            STACK_OF(X509_EXTENSION) *exts,
+                            const STACK_OF(X509_EXTENSION) *exts,
                             unsigned long flag, int indent)
 {
     size_t i;
@@ -183,7 +183,7 @@
             return 0;
         if (!X509V3_EXT_print(bp, ex, flag, indent + 4)) {
             BIO_printf(bp, "%*s", indent + 4, "");
-            M_ASN1_OCTET_STRING_print(bp, ex->value);
+            ASN1_STRING_print(bp, ex->value);
         }
         if (BIO_write(bp, "\n", 1) <= 0)
             return 0;
diff --git a/deps/boringssl/src/crypto/x509v3/v3_purp.c b/deps/boringssl/src/crypto/x509v3/v3_purp.c
index e41b657..acb7602 100644
--- a/deps/boringssl/src/crypto/x509v3/v3_purp.c
+++ b/deps/boringssl/src/crypto/x509v3/v3_purp.c
@@ -307,22 +307,22 @@
     xptable = NULL;
 }
 
-int X509_PURPOSE_get_id(X509_PURPOSE *xp)
+int X509_PURPOSE_get_id(const X509_PURPOSE *xp)
 {
     return xp->purpose;
 }
 
-char *X509_PURPOSE_get0_name(X509_PURPOSE *xp)
+char *X509_PURPOSE_get0_name(const X509_PURPOSE *xp)
 {
     return xp->name;
 }
 
-char *X509_PURPOSE_get0_sname(X509_PURPOSE *xp)
+char *X509_PURPOSE_get0_sname(const X509_PURPOSE *xp)
 {
     return xp->sname;
 }
 
-int X509_PURPOSE_get_trust(X509_PURPOSE *xp)
+int X509_PURPOSE_get_trust(const X509_PURPOSE *xp)
 {
     return xp->trust;
 }
@@ -451,8 +451,14 @@
                 || !bs->ca) {
                 x->ex_flags |= EXFLAG_INVALID;
                 x->ex_pathlen = 0;
-            } else
+            } else {
+                /* TODO(davidben): |ASN1_INTEGER_get| returns -1 on overflow,
+                 * which currently acts as if the constraint isn't present. This
+                 * works (an overflowing path length constraint may as well be
+                 * infinity), but Chromium's verifier simply treats values above
+                 * 255 as an error. */
                 x->ex_pathlen = ASN1_INTEGER_get(bs->pathlen);
+            }
         } else
             x->ex_pathlen = -1;
         BASIC_CONSTRAINTS_free(bs);
@@ -855,9 +861,9 @@
 
 uint32_t X509_get_extension_flags(X509 *x)
 {
-    if (!x509v3_cache_extensions(x)) {
-        return 0;
-    }
+    /* Ignore the return value. On failure, |x->ex_flags| will include
+     * |EXFLAG_INVALID|. */
+    x509v3_cache_extensions(x);
     return x->ex_flags;
 }
 
@@ -880,3 +886,44 @@
         return x->ex_xkusage;
     return UINT32_MAX;
 }
+
+const ASN1_OCTET_STRING *X509_get0_subject_key_id(X509 *x509)
+{
+    if (!x509v3_cache_extensions(x509)) {
+        return NULL;
+    }
+    return x509->skid;
+}
+
+const ASN1_OCTET_STRING *X509_get0_authority_key_id(X509 *x509)
+{
+    if (!x509v3_cache_extensions(x509)) {
+        return NULL;
+    }
+    return x509->akid != NULL ? x509->akid->keyid : NULL;
+}
+
+const GENERAL_NAMES *X509_get0_authority_issuer(X509 *x509)
+{
+    if (!x509v3_cache_extensions(x509)) {
+        return NULL;
+    }
+    return x509->akid != NULL ? x509->akid->issuer : NULL;
+}
+
+const ASN1_INTEGER *X509_get0_authority_serial(X509 *x509)
+{
+    if (!x509v3_cache_extensions(x509)) {
+        return NULL;
+    }
+    return x509->akid != NULL ? x509->akid->serial : NULL;
+}
+
+long X509_get_pathlen(X509 *x509)
+{
+    if (!x509v3_cache_extensions(x509) ||
+        (x509->ex_flags & EXFLAG_BCONS) == 0) {
+        return -1;
+    }
+    return x509->ex_pathlen;
+}
diff --git a/deps/boringssl/src/crypto/x509v3/v3_skey.c b/deps/boringssl/src/crypto/x509v3/v3_skey.c
index 6a16e78..140356d 100644
--- a/deps/boringssl/src/crypto/x509v3/v3_skey.c
+++ b/deps/boringssl/src/crypto/x509v3/v3_skey.c
@@ -77,24 +77,24 @@
     NULL
 };
 
-char *i2s_ASN1_OCTET_STRING(X509V3_EXT_METHOD *method, ASN1_OCTET_STRING *oct)
+char *i2s_ASN1_OCTET_STRING(X509V3_EXT_METHOD *method, const ASN1_OCTET_STRING *oct)
 {
     return x509v3_bytes_to_hex(oct->data, oct->length);
 }
 
 ASN1_OCTET_STRING *s2i_ASN1_OCTET_STRING(X509V3_EXT_METHOD *method,
-                                         X509V3_CTX *ctx, char *str)
+                                         X509V3_CTX *ctx, const char *str)
 {
     ASN1_OCTET_STRING *oct;
     long length;
 
-    if (!(oct = M_ASN1_OCTET_STRING_new())) {
+    if (!(oct = ASN1_OCTET_STRING_new())) {
         OPENSSL_PUT_ERROR(X509V3, ERR_R_MALLOC_FAILURE);
         return NULL;
     }
 
     if (!(oct->data = x509v3_hex_to_bytes(str, &length))) {
-        M_ASN1_OCTET_STRING_free(oct);
+        ASN1_OCTET_STRING_free(oct);
         return NULL;
     }
 
@@ -115,7 +115,7 @@
     if (strcmp(str, "hash"))
         return s2i_ASN1_OCTET_STRING(method, ctx, str);
 
-    if (!(oct = M_ASN1_OCTET_STRING_new())) {
+    if (!(oct = ASN1_OCTET_STRING_new())) {
         OPENSSL_PUT_ERROR(X509V3, ERR_R_MALLOC_FAILURE);
         return NULL;
     }
@@ -142,7 +142,7 @@
         (pk->data, pk->length, pkey_dig, &diglen, EVP_sha1(), NULL))
         goto err;
 
-    if (!M_ASN1_OCTET_STRING_set(oct, pkey_dig, diglen)) {
+    if (!ASN1_OCTET_STRING_set(oct, pkey_dig, diglen)) {
         OPENSSL_PUT_ERROR(X509V3, ERR_R_MALLOC_FAILURE);
         goto err;
     }
@@ -150,6 +150,6 @@
     return oct;
 
  err:
-    M_ASN1_OCTET_STRING_free(oct);
+    ASN1_OCTET_STRING_free(oct);
     return NULL;
 }
diff --git a/deps/boringssl/src/crypto/x509v3/v3_sxnet.c b/deps/boringssl/src/crypto/x509v3/v3_sxnet.c
deleted file mode 100644
index 51c5a67..0000000
--- a/deps/boringssl/src/crypto/x509v3/v3_sxnet.c
+++ /dev/null
@@ -1,274 +0,0 @@
-/* v3_sxnet.c */
-/*
- * Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL project
- * 1999.
- */
-/* ====================================================================
- * Copyright (c) 1999 The OpenSSL Project.  All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in
- *    the documentation and/or other materials provided with the
- *    distribution.
- *
- * 3. All advertising materials mentioning features or use of this
- *    software must display the following acknowledgment:
- *    "This product includes software developed by the OpenSSL Project
- *    for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
- *
- * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
- *    endorse or promote products derived from this software without
- *    prior written permission. For written permission, please contact
- *    licensing@OpenSSL.org.
- *
- * 5. Products derived from this software may not be called "OpenSSL"
- *    nor may "OpenSSL" appear in their names without prior written
- *    permission of the OpenSSL Project.
- *
- * 6. Redistributions of any form whatsoever must retain the following
- *    acknowledgment:
- *    "This product includes software developed by the OpenSSL Project
- *    for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
- *
- * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
- * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
- * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
- * OF THE POSSIBILITY OF SUCH DAMAGE.
- * ====================================================================
- *
- * This product includes cryptographic software written by Eric Young
- * (eay@cryptsoft.com).  This product includes software written by Tim
- * Hudson (tjh@cryptsoft.com).
- *
- */
-
-#include <stdio.h>
-#include <string.h>
-
-#include <openssl/asn1.h>
-#include <openssl/asn1t.h>
-#include <openssl/conf.h>
-#include <openssl/err.h>
-#include <openssl/mem.h>
-#include <openssl/obj.h>
-#include <openssl/x509v3.h>
-
-/* Support for Thawte strong extranet extension */
-
-#define SXNET_TEST
-
-static int sxnet_i2r(X509V3_EXT_METHOD *method, SXNET *sx, BIO *out,
-                     int indent);
-#ifdef SXNET_TEST
-static SXNET *sxnet_v2i(X509V3_EXT_METHOD *method, X509V3_CTX *ctx,
-                        STACK_OF(CONF_VALUE) *nval);
-#endif
-const X509V3_EXT_METHOD v3_sxnet = {
-    NID_sxnet, X509V3_EXT_MULTILINE, ASN1_ITEM_ref(SXNET),
-    0, 0, 0, 0,
-    0, 0,
-    0,
-#ifdef SXNET_TEST
-    (X509V3_EXT_V2I)sxnet_v2i,
-#else
-    0,
-#endif
-    (X509V3_EXT_I2R)sxnet_i2r,
-    0,
-    NULL
-};
-
-ASN1_SEQUENCE(SXNETID) = {
-        ASN1_SIMPLE(SXNETID, zone, ASN1_INTEGER),
-        ASN1_SIMPLE(SXNETID, user, ASN1_OCTET_STRING)
-} ASN1_SEQUENCE_END(SXNETID)
-
-IMPLEMENT_ASN1_FUNCTIONS(SXNETID)
-
-ASN1_SEQUENCE(SXNET) = {
-        ASN1_SIMPLE(SXNET, version, ASN1_INTEGER),
-        ASN1_SEQUENCE_OF(SXNET, ids, SXNETID)
-} ASN1_SEQUENCE_END(SXNET)
-
-IMPLEMENT_ASN1_FUNCTIONS(SXNET)
-
-static int sxnet_i2r(X509V3_EXT_METHOD *method, SXNET *sx, BIO *out,
-                     int indent)
-{
-    long v;
-    char *tmp;
-    SXNETID *id;
-    size_t i;
-    v = ASN1_INTEGER_get(sx->version);
-    BIO_printf(out, "%*sVersion: %ld (0x%lX)", indent, "", v + 1, v);
-    for (i = 0; i < sk_SXNETID_num(sx->ids); i++) {
-        id = sk_SXNETID_value(sx->ids, i);
-        tmp = i2s_ASN1_INTEGER(NULL, id->zone);
-        BIO_printf(out, "\n%*sZone: %s, User: ", indent, "", tmp);
-        OPENSSL_free(tmp);
-        M_ASN1_OCTET_STRING_print(out, id->user);
-    }
-    return 1;
-}
-
-#ifdef SXNET_TEST
-
-/*
- * NBB: this is used for testing only. It should *not* be used for anything
- * else because it will just take static IDs from the configuration file and
- * they should really be separate values for each user.
- */
-
-static SXNET *sxnet_v2i(X509V3_EXT_METHOD *method, X509V3_CTX *ctx,
-                        STACK_OF(CONF_VALUE) *nval)
-{
-    CONF_VALUE *cnf;
-    SXNET *sx = NULL;
-    size_t i;
-    for (i = 0; i < sk_CONF_VALUE_num(nval); i++) {
-        cnf = sk_CONF_VALUE_value(nval, i);
-        if (!SXNET_add_id_asc(&sx, cnf->name, cnf->value, -1))
-            return NULL;
-    }
-    return sx;
-}
-
-#endif
-
-/* Strong Extranet utility functions */
-
-/* Add an id given the zone as an ASCII number */
-
-int SXNET_add_id_asc(SXNET **psx, char *zone, char *user, int userlen)
-{
-    ASN1_INTEGER *izone = NULL;
-    if (!(izone = s2i_ASN1_INTEGER(NULL, zone))) {
-        OPENSSL_PUT_ERROR(X509V3, X509V3_R_ERROR_CONVERTING_ZONE);
-        return 0;
-    }
-    return SXNET_add_id_INTEGER(psx, izone, user, userlen);
-}
-
-/* Add an id given the zone as an unsigned long */
-
-int SXNET_add_id_ulong(SXNET **psx, unsigned long lzone, char *user,
-                       int userlen)
-{
-    ASN1_INTEGER *izone = NULL;
-    if (!(izone = M_ASN1_INTEGER_new()) || !ASN1_INTEGER_set(izone, lzone)) {
-        OPENSSL_PUT_ERROR(X509V3, ERR_R_MALLOC_FAILURE);
-        M_ASN1_INTEGER_free(izone);
-        return 0;
-    }
-    return SXNET_add_id_INTEGER(psx, izone, user, userlen);
-
-}
-
-/*
- * Add an id given the zone as an ASN1_INTEGER. Note this version uses the
- * passed integer and doesn't make a copy so don't free it up afterwards.
- */
-
-int SXNET_add_id_INTEGER(SXNET **psx, ASN1_INTEGER *zone, char *user,
-                         int userlen)
-{
-    SXNET *sx = NULL;
-    SXNETID *id = NULL;
-    if (!psx || !zone || !user) {
-        OPENSSL_PUT_ERROR(X509V3, X509V3_R_INVALID_NULL_ARGUMENT);
-        return 0;
-    }
-    if (userlen == -1)
-        userlen = strlen(user);
-    if (userlen > 64) {
-        OPENSSL_PUT_ERROR(X509V3, X509V3_R_USER_TOO_LONG);
-        return 0;
-    }
-    if (!*psx) {
-        if (!(sx = SXNET_new()))
-            goto err;
-        if (!ASN1_INTEGER_set(sx->version, 0))
-            goto err;
-        *psx = sx;
-    } else
-        sx = *psx;
-    if (SXNET_get_id_INTEGER(sx, zone)) {
-        OPENSSL_PUT_ERROR(X509V3, X509V3_R_DUPLICATE_ZONE_ID);
-        return 0;
-    }
-
-    if (!(id = SXNETID_new()))
-        goto err;
-    if (userlen == -1)
-        userlen = strlen(user);
-
-    if (!M_ASN1_OCTET_STRING_set(id->user, user, userlen))
-        goto err;
-    if (!sk_SXNETID_push(sx->ids, id))
-        goto err;
-    id->zone = zone;
-    return 1;
-
- err:
-    OPENSSL_PUT_ERROR(X509V3, ERR_R_MALLOC_FAILURE);
-    SXNETID_free(id);
-    SXNET_free(sx);
-    *psx = NULL;
-    return 0;
-}
-
-ASN1_OCTET_STRING *SXNET_get_id_asc(SXNET *sx, char *zone)
-{
-    ASN1_INTEGER *izone = NULL;
-    ASN1_OCTET_STRING *oct;
-    if (!(izone = s2i_ASN1_INTEGER(NULL, zone))) {
-        OPENSSL_PUT_ERROR(X509V3, X509V3_R_ERROR_CONVERTING_ZONE);
-        return NULL;
-    }
-    oct = SXNET_get_id_INTEGER(sx, izone);
-    M_ASN1_INTEGER_free(izone);
-    return oct;
-}
-
-ASN1_OCTET_STRING *SXNET_get_id_ulong(SXNET *sx, unsigned long lzone)
-{
-    ASN1_INTEGER *izone = NULL;
-    ASN1_OCTET_STRING *oct;
-    if (!(izone = M_ASN1_INTEGER_new()) || !ASN1_INTEGER_set(izone, lzone)) {
-        OPENSSL_PUT_ERROR(X509V3, ERR_R_MALLOC_FAILURE);
-        M_ASN1_INTEGER_free(izone);
-        return NULL;
-    }
-    oct = SXNET_get_id_INTEGER(sx, izone);
-    M_ASN1_INTEGER_free(izone);
-    return oct;
-}
-
-ASN1_OCTET_STRING *SXNET_get_id_INTEGER(SXNET *sx, ASN1_INTEGER *zone)
-{
-    SXNETID *id;
-    size_t i;
-    for (i = 0; i < sk_SXNETID_num(sx->ids); i++) {
-        id = sk_SXNETID_value(sx->ids, i);
-        if (!M_ASN1_INTEGER_cmp(id->zone, zone))
-            return id->user;
-    }
-    return NULL;
-}
-
-IMPLEMENT_ASN1_SET_OF(SXNETID)
diff --git a/deps/boringssl/src/crypto/x509v3/v3_utl.c b/deps/boringssl/src/crypto/x509v3/v3_utl.c
index 9138ef7..c0952c0 100644
--- a/deps/boringssl/src/crypto/x509v3/v3_utl.c
+++ b/deps/boringssl/src/crypto/x509v3/v3_utl.c
@@ -147,7 +147,7 @@
     return X509V3_add_value(name, "FALSE", extlist);
 }
 
-int X509V3_add_value_bool_nf(char *name, int asn1_bool,
+int X509V3_add_value_bool_nf(const char *name, int asn1_bool,
                              STACK_OF(CONF_VALUE) **extlist)
 {
     if (asn1_bool)
@@ -194,7 +194,7 @@
     return ret;
 }
 
-char *i2s_ASN1_ENUMERATED(X509V3_EXT_METHOD *method, ASN1_ENUMERATED *a)
+char *i2s_ASN1_ENUMERATED(X509V3_EXT_METHOD *method, const ASN1_ENUMERATED *a)
 {
     BIGNUM *bntmp = NULL;
     char *strtmp = NULL;
@@ -207,7 +207,7 @@
     return strtmp;
 }
 
-char *i2s_ASN1_INTEGER(X509V3_EXT_METHOD *method, ASN1_INTEGER *a)
+char *i2s_ASN1_INTEGER(X509V3_EXT_METHOD *method, const ASN1_INTEGER *a)
 {
     BIGNUM *bntmp = NULL;
     char *strtmp = NULL;
@@ -220,7 +220,7 @@
     return strtmp;
 }
 
-ASN1_INTEGER *s2i_ASN1_INTEGER(X509V3_EXT_METHOD *method, char *value)
+ASN1_INTEGER *s2i_ASN1_INTEGER(X509V3_EXT_METHOD *method, const char *value)
 {
     BIGNUM *bn = NULL;
     ASN1_INTEGER *aint;
@@ -282,7 +282,7 @@
     return ret;
 }
 
-int X509V3_get_value_bool(CONF_VALUE *value, int *asn1_bool)
+int X509V3_get_value_bool(const CONF_VALUE *value, int *asn1_bool)
 {
     char *btmp;
     if (!(btmp = value->value))
@@ -304,7 +304,7 @@
     return 0;
 }
 
-int X509V3_get_value_int(CONF_VALUE *value, ASN1_INTEGER **aint)
+int X509V3_get_value_int(const CONF_VALUE *value, ASN1_INTEGER **aint)
 {
     ASN1_INTEGER *itmp;
     if (!(itmp = s2i_ASN1_INTEGER(NULL, value->value))) {
diff --git a/deps/boringssl/src/decrepit/cfb/cfb.c b/deps/boringssl/src/decrepit/cfb/cfb.c
index 441ebe6..fa1cfd4 100644
--- a/deps/boringssl/src/decrepit/cfb/cfb.c
+++ b/deps/boringssl/src/decrepit/cfb/cfb.c
@@ -57,6 +57,13 @@
     NULL /* cleanup */,  NULL /* ctrl */,
 };
 
+static const EVP_CIPHER aes_192_cfb128 = {
+    NID_aes_192_cfb128,  1 /* block_size */,  24 /* key_size */,
+    16 /* iv_len */,     sizeof(EVP_CFB_CTX), EVP_CIPH_CFB_MODE,
+    NULL /* app_data */, aes_cfb_init_key,    aes_cfb128_cipher,
+    NULL /* cleanup */,  NULL /* ctrl */,
+};
+
 static const EVP_CIPHER aes_256_cfb128 = {
     NID_aes_256_cfb128,  1 /* block_size */,  32 /* key_size */,
     16 /* iv_len */,     sizeof(EVP_CFB_CTX), EVP_CIPH_CFB_MODE,
@@ -65,4 +72,8 @@
 };
 
 const EVP_CIPHER *EVP_aes_128_cfb128(void) { return &aes_128_cfb128; }
+const EVP_CIPHER *EVP_aes_128_cfb(void) { return &aes_128_cfb128; }
+const EVP_CIPHER *EVP_aes_192_cfb128(void) { return &aes_192_cfb128; }
+const EVP_CIPHER *EVP_aes_192_cfb(void) { return &aes_192_cfb128; }
 const EVP_CIPHER *EVP_aes_256_cfb128(void) { return &aes_256_cfb128; }
+const EVP_CIPHER *EVP_aes_256_cfb(void) { return &aes_256_cfb128; }
diff --git a/deps/boringssl/src/decrepit/cfb/cfb_test.cc b/deps/boringssl/src/decrepit/cfb/cfb_test.cc
index ce55ba9..2510a88 100644
--- a/deps/boringssl/src/decrepit/cfb/cfb_test.cc
+++ b/deps/boringssl/src/decrepit/cfb/cfb_test.cc
@@ -47,6 +47,23 @@
   {
     // This is the test case from
     // http://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-38a.pdf,
+    // section F.3.15, CFB128-AES192
+    24,
+    {0x8e, 0x73, 0xb0, 0xf7, 0xda, 0x0e, 0x64, 0x52, 0xc8, 0x10, 0xf3, 0x2b, 0x80, 0x90, 0x79, 0xe5,
+     0x62, 0xf8, 0xea, 0xd2, 0x52, 0x2c, 0x6b, 0x7b},
+    {0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f},
+    {0x6b, 0xc1, 0xbe, 0xe2, 0x2e, 0x40, 0x9f, 0x96, 0xe9, 0x3d, 0x7e, 0x11, 0x73, 0x93, 0x17, 0x2a,
+     0xae, 0x2d, 0x8a, 0x57, 0x1e, 0x03, 0xac, 0x9c, 0x9e, 0xb7, 0x6f, 0xac, 0x45, 0xaf, 0x8e, 0x51,
+     0x30, 0xc8, 0x1c, 0x46, 0xa3, 0x5c, 0xe4, 0x11, 0xe5, 0xfb, 0xc1, 0x19, 0x1a, 0x0a, 0x52, 0xef,
+     0xf6, 0x9f, 0x24, 0x45, 0xdf, 0x4f, 0x9b, 0x17, 0xad, 0x2b, 0x41, 0x7b, 0xe6, 0x6c, 0x37, 0x10},
+    {0xcd, 0xc8, 0x0d, 0x6f, 0xdd, 0xf1, 0x8c, 0xab, 0x34, 0xc2, 0x59, 0x09, 0xc9, 0x9a, 0x41, 0x74,
+     0x67, 0xce, 0x7f, 0x7f, 0x81, 0x17, 0x36, 0x21, 0x96, 0x1a, 0x2b, 0x70, 0x17, 0x1d, 0x3d, 0x7a,
+     0x2e, 0x1e, 0x8a, 0x1d, 0xd5, 0x9b, 0x88, 0xb1, 0xc8, 0xe6, 0x0f, 0xed, 0x1e, 0xfa, 0xc4, 0xc9,
+     0xc0, 0x5f, 0x9f, 0x9c, 0xa9, 0x83, 0x4f, 0xa0, 0x42, 0xae, 0x8f, 0xba, 0x58, 0x4b, 0x09, 0xff},
+  },
+  {
+    // This is the test case from
+    // http://nvlpubs.nist.gov/nistpubs/Legacy/SP/nistspecialpublication800-38a.pdf,
     // section F.3.17, CFB128-AES256
     32,
     {0x60, 0x3d, 0xeb, 0x10, 0x15, 0xca, 0x71, 0xbe, 0x2b, 0x73, 0xae, 0xf0, 0x85, 0x7d, 0x77, 0x81,
@@ -76,11 +93,14 @@
       bssl::ScopedEVP_CIPHER_CTX ctx;
       if (test.key_len == 16) {
         ASSERT_TRUE(EVP_EncryptInit_ex(ctx.get(), EVP_aes_128_cfb128(), nullptr,
-                                      test.key, test.iv));
+                                       test.key, test.iv));
+      } else if (test.key_len == 24) {
+        ASSERT_TRUE(EVP_EncryptInit_ex(ctx.get(), EVP_aes_192_cfb128(), nullptr,
+                                       test.key, test.iv));
       } else {
         assert(test.key_len == 32);
         ASSERT_TRUE(EVP_EncryptInit_ex(ctx.get(), EVP_aes_256_cfb128(), nullptr,
-                                      test.key, test.iv));
+                                       test.key, test.iv));
       }
 
       size_t done = 0;
@@ -104,11 +124,14 @@
     bssl::ScopedEVP_CIPHER_CTX decrypt_ctx;
     if (test.key_len == 16) {
       ASSERT_TRUE(EVP_DecryptInit_ex(decrypt_ctx.get(), EVP_aes_128_cfb128(),
-                                    nullptr, test.key, test.iv));
+                                     nullptr, test.key, test.iv));
+    } else if (test.key_len == 24) {
+      ASSERT_TRUE(EVP_DecryptInit_ex(decrypt_ctx.get(), EVP_aes_192_cfb128(),
+                                     nullptr, test.key, test.iv));
     } else {
       assert(test.key_len == 32);
       ASSERT_TRUE(EVP_DecryptInit_ex(decrypt_ctx.get(), EVP_aes_256_cfb128(),
-                                    nullptr, test.key, test.iv));
+                                     nullptr, test.key, test.iv));
     }
 
     std::unique_ptr<uint8_t[]> plaintext(new uint8_t[input_len]);
diff --git a/deps/boringssl/src/decrepit/x509/x509_decrepit.c b/deps/boringssl/src/decrepit/x509/x509_decrepit.c
index 5237754..3abab06 100644
--- a/deps/boringssl/src/decrepit/x509/x509_decrepit.c
+++ b/deps/boringssl/src/decrepit/x509/x509_decrepit.c
@@ -20,7 +20,7 @@
 
 
 X509_EXTENSION *X509V3_EXT_conf_nid(LHASH_OF(CONF_VALUE) *conf, X509V3_CTX *ctx,
-                                    int ext_nid, char *value) {
+                                    int ext_nid, const char *value) {
   assert(conf == NULL);
   return X509V3_EXT_nconf_nid(NULL, ctx, ext_nid, value);
 }
diff --git a/deps/boringssl/src/include/openssl/aead.h b/deps/boringssl/src/include/openssl/aead.h
index 6d78db2..3bc74e7 100644
--- a/deps/boringssl/src/include/openssl/aead.h
+++ b/deps/boringssl/src/include/openssl/aead.h
@@ -146,6 +146,30 @@
 // https://tools.ietf.org/html/draft-irtf-cfrg-gcmsiv-02
 OPENSSL_EXPORT const EVP_AEAD *EVP_aead_aes_256_gcm_siv(void);
 
+// EVP_aead_aes_128_gcm_randnonce is AES-128 in Galois Counter Mode with
+// internal nonce generation. The 12-byte nonce is appended to the tag
+// and is generated internally. The "tag", for the purpurses of the API, is thus
+// 12 bytes larger. The nonce parameter when using this AEAD must be
+// zero-length. Since the nonce is random, a single key should not be used for
+// more than 2^32 seal operations.
+//
+// Warning: this is for use for FIPS compliance only. It is probably not
+// suitable for other uses. Using standard AES-GCM AEADs allows one to achieve
+// the same effect, but gives more control over nonce storage.
+OPENSSL_EXPORT const EVP_AEAD *EVP_aead_aes_128_gcm_randnonce(void);
+
+// EVP_aead_aes_256_gcm_randnonce is AES-256 in Galois Counter Mode with
+// internal nonce generation. The 12-byte nonce is appended to the tag
+// and is generated internally. The "tag", for the purpurses of the API, is thus
+// 12 bytes larger. The nonce parameter when using this AEAD must be
+// zero-length. Since the nonce is random, a single key should not be used for
+// more than 2^32 seal operations.
+//
+// Warning: this is for use for FIPS compliance only. It is probably not
+// suitable for other uses. Using standard AES-GCM AEADs allows one to achieve
+// the same effect, but gives more control over nonce storage.
+OPENSSL_EXPORT const EVP_AEAD *EVP_aead_aes_256_gcm_randnonce(void);
+
 // EVP_aead_aes_128_ccm_bluetooth is AES-128-CCM with M=4 and L=2 (4-byte tags
 // and 13-byte nonces), as decribed in the Bluetooth Core Specification v5.0,
 // Volume 6, Part E, Section 1.
diff --git a/deps/boringssl/src/include/openssl/aes.h b/deps/boringssl/src/include/openssl/aes.h
index e560625..496ec90 100644
--- a/deps/boringssl/src/include/openssl/aes.h
+++ b/deps/boringssl/src/include/openssl/aes.h
@@ -106,7 +106,10 @@
 
 // AES_ctr128_encrypt encrypts (or decrypts, it's the same in CTR mode) |len|
 // bytes from |in| to |out|. The |num| parameter must be set to zero on the
-// first call and |ivec| will be incremented.
+// first call and |ivec| will be incremented. This function may be called
+// in-place with |in| equal to |out|, but otherwise the buffers may not
+// partially overlap. A partial overlap may overwrite input data before it is
+// read.
 OPENSSL_EXPORT void AES_ctr128_encrypt(const uint8_t *in, uint8_t *out,
                                        size_t len, const AES_KEY *key,
                                        uint8_t ivec[AES_BLOCK_SIZE],
@@ -114,26 +117,35 @@
                                        unsigned int *num);
 
 // AES_ecb_encrypt encrypts (or decrypts, if |enc| == |AES_DECRYPT|) a single,
-// 16 byte block from |in| to |out|.
+// 16 byte block from |in| to |out|. This function may be called in-place with
+// |in| equal to |out|, but otherwise the buffers may not partially overlap. A
+// partial overlap may overwrite input data before it is read.
 OPENSSL_EXPORT void AES_ecb_encrypt(const uint8_t *in, uint8_t *out,
                                     const AES_KEY *key, const int enc);
 
 // AES_cbc_encrypt encrypts (or decrypts, if |enc| == |AES_DECRYPT|) |len|
 // bytes from |in| to |out|. The length must be a multiple of the block size.
+// This function may be called in-place with |in| equal to |out|, but otherwise
+// the buffers may not partially overlap. A partial overlap may overwrite input
+// data before it is read.
 OPENSSL_EXPORT void AES_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t len,
                                     const AES_KEY *key, uint8_t *ivec,
                                     const int enc);
 
 // AES_ofb128_encrypt encrypts (or decrypts, it's the same in OFB mode) |len|
 // bytes from |in| to |out|. The |num| parameter must be set to zero on the
-// first call.
+// first call. This function may be called in-place with |in| equal to |out|,
+// but otherwise the buffers may not partially overlap. A partial overlap may
+// overwrite input data before it is read.
 OPENSSL_EXPORT void AES_ofb128_encrypt(const uint8_t *in, uint8_t *out,
                                        size_t len, const AES_KEY *key,
                                        uint8_t *ivec, int *num);
 
 // AES_cfb128_encrypt encrypts (or decrypts, if |enc| == |AES_DECRYPT|) |len|
 // bytes from |in| to |out|. The |num| parameter must be set to zero on the
-// first call.
+// first call. This function may be called in-place with |in| equal to |out|,
+// but otherwise the buffers may not partially overlap. A partial overlap may
+// overwrite input data before it is read.
 OPENSSL_EXPORT void AES_cfb128_encrypt(const uint8_t *in, uint8_t *out,
                                        size_t len, const AES_KEY *key,
                                        uint8_t *ivec, int *num, int enc);
diff --git a/deps/boringssl/src/include/openssl/arm_arch.h b/deps/boringssl/src/include/openssl/arm_arch.h
index faa2655..31ff8a6 100644
--- a/deps/boringssl/src/include/openssl/arm_arch.h
+++ b/deps/boringssl/src/include/openssl/arm_arch.h
@@ -117,5 +117,59 @@
 // ARMV8_PMULL indicates support for carryless multiplication.
 #define ARMV8_PMULL (1 << 5)
 
+#if defined(__ASSEMBLER__)
+
+// Support macros for
+//   - Armv8.3-A Pointer Authentication and
+//   - Armv8.5-A Branch Target Identification
+// features which require emitting a .note.gnu.property section with the
+// appropriate architecture-dependent feature bits set.
+// Read more: "ELF for the Arm® 64-bit Architecture"
+
+#if defined(__ARM_FEATURE_BTI_DEFAULT) && __ARM_FEATURE_BTI_DEFAULT == 1
+#define GNU_PROPERTY_AARCH64_BTI (1 << 0)   // Has Branch Target Identification
+#define AARCH64_VALID_CALL_TARGET hint #34  // BTI 'c'
+#else
+#define GNU_PROPERTY_AARCH64_BTI 0  // No Branch Target Identification
+#define AARCH64_VALID_CALL_TARGET
+#endif
+
+#if defined(__ARM_FEATURE_PAC_DEFAULT) && \
+    (__ARM_FEATURE_PAC_DEFAULT & 1) == 1  // Signed with A-key
+#define GNU_PROPERTY_AARCH64_POINTER_AUTH \
+  (1 << 1)                                       // Has Pointer Authentication
+#define AARCH64_SIGN_LINK_REGISTER hint #25      // PACIASP
+#define AARCH64_VALIDATE_LINK_REGISTER hint #29  // AUTIASP
+#elif defined(__ARM_FEATURE_PAC_DEFAULT) && \
+    (__ARM_FEATURE_PAC_DEFAULT & 2) == 2  // Signed with B-key
+#define GNU_PROPERTY_AARCH64_POINTER_AUTH \
+  (1 << 1)                                       // Has Pointer Authentication
+#define AARCH64_SIGN_LINK_REGISTER hint #27      // PACIBSP
+#define AARCH64_VALIDATE_LINK_REGISTER hint #31  // AUTIBSP
+#else
+#define GNU_PROPERTY_AARCH64_POINTER_AUTH 0  // No Pointer Authentication
+#if GNU_PROPERTY_AARCH64_BTI != 0
+#define AARCH64_SIGN_LINK_REGISTER AARCH64_VALID_CALL_TARGET
+#else
+#define AARCH64_SIGN_LINK_REGISTER
+#endif
+#define AARCH64_VALIDATE_LINK_REGISTER
+#endif
+
+#if GNU_PROPERTY_AARCH64_POINTER_AUTH != 0 || GNU_PROPERTY_AARCH64_BTI != 0
+.pushsection .note.gnu.property, "a";
+.balign 8;
+.long 4;
+.long 0x10;
+.long 0x5;
+.asciz "GNU";
+.long 0xc0000000; /* GNU_PROPERTY_AARCH64_FEATURE_1_AND */
+.long 4;
+.long (GNU_PROPERTY_AARCH64_POINTER_AUTH | GNU_PROPERTY_AARCH64_BTI);
+.long 0;
+.popsection;
+#endif
+
+#endif  /* defined __ASSEMBLER__ */
 
 #endif  // OPENSSL_HEADER_ARM_ARCH_H
diff --git a/deps/boringssl/src/include/openssl/asn1.h b/deps/boringssl/src/include/openssl/asn1.h
index c1a8d5a..9269553 100644
--- a/deps/boringssl/src/include/openssl/asn1.h
+++ b/deps/boringssl/src/include/openssl/asn1.h
@@ -4,21 +4,21 @@
  * This package is an SSL implementation written
  * by Eric Young (eay@cryptsoft.com).
  * The implementation was written so as to conform with Netscapes SSL.
- * 
+ *
  * This library is free for commercial and non-commercial use as long as
  * the following conditions are aheared to.  The following conditions
  * apply to all code found in this distribution, be it the RC4, RSA,
  * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
  * included with this distribution is covered by the same copyright terms
  * except that the holder is Tim Hudson (tjh@cryptsoft.com).
- * 
+ *
  * Copyright remains Eric Young's, and as such any Copyright notices in
  * the code are not to be removed.
  * If this package is used in a product, Eric Young should be given attribution
  * as the author of the parts of the library used.
  * This can be in the form of a textual message at program startup or
  * in documentation (online or textual) provided with the package.
- * 
+ *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
  * are met:
@@ -33,10 +33,10 @@
  *     Eric Young (eay@cryptsoft.com)"
  *    The word 'cryptographic' can be left out if the rouines from the library
  *    being used are not cryptographic related :-).
- * 4. If you include any Windows specific code (or a derivative thereof) from 
+ * 4. If you include any Windows specific code (or a derivative thereof) from
  *    the apps directory (application code) you must include an acknowledgement:
  *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
- * 
+ *
  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
@@ -48,7 +48,7 @@
  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
- * 
+ *
  * The licence and distribution terms for any publically available version or
  * derivative of this code cannot be changed.  i.e. this code cannot simply be
  * copied and put under another distribution licence
@@ -67,400 +67,548 @@
 
 #include <openssl/bn.h>
 
-#ifdef  __cplusplus
+#ifdef __cplusplus
 extern "C" {
 #endif
 
 
-/* Legacy ASN.1 library.
- *
- * This header is part of OpenSSL's ASN.1 implementation. It is retained for
- * compatibility but otherwise underdocumented and not actively maintained. Use
- * the new |CBS| and |CBB| library in <openssl/bytestring.h> instead. */
+// Legacy ASN.1 library.
+//
+// This header is part of OpenSSL's ASN.1 implementation. It is retained for
+// compatibility but otherwise underdocumented and not actively maintained. Use
+// the new |CBS| and |CBB| library in <openssl/bytestring.h> instead.
 
 
-#define V_ASN1_UNIVERSAL		0x00
-#define	V_ASN1_APPLICATION		0x40
-#define V_ASN1_CONTEXT_SPECIFIC		0x80
-#define V_ASN1_PRIVATE			0xc0
+// Tag constants.
+//
+// These constants are used in various APIs to specify ASN.1 types and tag
+// components. See the specific API's documentation for details on which values
+// are used and how.
 
-#define V_ASN1_CONSTRUCTED		0x20
-#define V_ASN1_PRIMITIVE_TAG		0x1f
+// The following constants are tag classes.
+#define V_ASN1_UNIVERSAL 0x00
+#define V_ASN1_APPLICATION 0x40
+#define V_ASN1_CONTEXT_SPECIFIC 0x80
+#define V_ASN1_PRIVATE 0xc0
 
-#define V_ASN1_APP_CHOOSE		-2	/* let the recipient choose */
-#define V_ASN1_OTHER			-3	/* used in ASN1_TYPE */
-#define V_ASN1_ANY			-4	/* used in ASN1 template code */
+// V_ASN1_CONSTRUCTED indicates an element is constructed, rather than
+// primitive.
+#define V_ASN1_CONSTRUCTED 0x20
 
-#define V_ASN1_NEG			0x100	/* negative flag */
-/* No supported universal tags may exceed this value, to avoid ambiguity with
- * V_ASN1_NEG. */
-#define V_ASN1_MAX_UNIVERSAL		0xff
+// V_ASN1_PRIMITIVE_TAG is the highest tag number which can be encoded in a
+// single byte. Note this is unrelated to whether an element is constructed or
+// primitive.
+//
+// TODO(davidben): Make this private.
+#define V_ASN1_PRIMITIVE_TAG 0x1f
 
-#define V_ASN1_UNDEF			-1
-#define V_ASN1_EOC			0
-#define V_ASN1_BOOLEAN			1	/**/
-#define V_ASN1_INTEGER			2
-#define V_ASN1_NEG_INTEGER		(2 | V_ASN1_NEG)
-#define V_ASN1_BIT_STRING		3
-#define V_ASN1_OCTET_STRING		4
-#define V_ASN1_NULL			5
-#define V_ASN1_OBJECT			6
-#define V_ASN1_OBJECT_DESCRIPTOR	7
-#define V_ASN1_EXTERNAL			8
-#define V_ASN1_REAL			9
-#define V_ASN1_ENUMERATED		10
-#define V_ASN1_NEG_ENUMERATED		(10 | V_ASN1_NEG)
-#define V_ASN1_UTF8STRING		12
-#define V_ASN1_SEQUENCE			16
-#define V_ASN1_SET			17
-#define V_ASN1_NUMERICSTRING		18	/**/
-#define V_ASN1_PRINTABLESTRING		19
-#define V_ASN1_T61STRING		20
-#define V_ASN1_TELETEXSTRING		20	/* alias */
-#define V_ASN1_VIDEOTEXSTRING		21	/**/
-#define V_ASN1_IA5STRING		22
-#define V_ASN1_UTCTIME			23
-#define V_ASN1_GENERALIZEDTIME		24	/**/
-#define V_ASN1_GRAPHICSTRING		25	/**/
-#define V_ASN1_ISO64STRING		26	/**/
-#define V_ASN1_VISIBLESTRING		26	/* alias */
-#define V_ASN1_GENERALSTRING		27	/**/
-#define V_ASN1_UNIVERSALSTRING		28	/**/
-#define V_ASN1_BMPSTRING		30
+// V_ASN1_MAX_UNIVERSAL is the highest supported universal tag number. It is
+// necessary to avoid ambiguity with |V_ASN1_NEG|.
+//
+// TODO(davidben): Make this private.
+#define V_ASN1_MAX_UNIVERSAL 0xff
 
-/* For use with d2i_ASN1_type_bytes() */
-#define B_ASN1_NUMERICSTRING	0x0001
-#define B_ASN1_PRINTABLESTRING	0x0002
-#define B_ASN1_T61STRING	0x0004
-#define B_ASN1_TELETEXSTRING	0x0004
-#define B_ASN1_VIDEOTEXSTRING	0x0008
-#define B_ASN1_IA5STRING	0x0010
-#define B_ASN1_GRAPHICSTRING	0x0020
-#define B_ASN1_ISO64STRING	0x0040
-#define B_ASN1_VISIBLESTRING	0x0040
-#define B_ASN1_GENERALSTRING	0x0080
-#define B_ASN1_UNIVERSALSTRING	0x0100
-#define B_ASN1_OCTET_STRING	0x0200
-#define B_ASN1_BIT_STRING	0x0400
-#define B_ASN1_BMPSTRING	0x0800
-#define B_ASN1_UNKNOWN		0x1000
-#define B_ASN1_UTF8STRING	0x2000
-#define B_ASN1_UTCTIME		0x4000
-#define B_ASN1_GENERALIZEDTIME	0x8000
-#define B_ASN1_SEQUENCE		0x10000
+// V_ASN1_UNDEF is used in some APIs to indicate an ASN.1 element is omitted.
+#define V_ASN1_UNDEF (-1)
 
-/* For use with ASN1_mbstring_copy() */
-#define MBSTRING_FLAG		0x1000
-#define MBSTRING_UTF8		(MBSTRING_FLAG)
-/* |MBSTRING_ASC| refers to Latin-1, not ASCII. It is used with TeletexString
- * which, in turn, is treated as Latin-1 rather than T.61 by OpenSSL and most
- * other software. */
-#define MBSTRING_ASC		(MBSTRING_FLAG|1)
-#define MBSTRING_BMP		(MBSTRING_FLAG|2)
-#define MBSTRING_UNIV		(MBSTRING_FLAG|4)
+// V_ASN1_APP_CHOOSE is used in some APIs to specify a default ASN.1 type based
+// on the context.
+#define V_ASN1_APP_CHOOSE (-2)
 
-#define DECLARE_ASN1_SET_OF(type) /* filled in by mkstack.pl */
-#define IMPLEMENT_ASN1_SET_OF(type) /* nothing, no longer needed */
+// V_ASN1_OTHER is used in |ASN1_TYPE| to indicate a non-universal ASN.1 type.
+#define V_ASN1_OTHER (-3)
 
-/* These are used internally in the ASN1_OBJECT to keep track of
- * whether the names and data need to be free()ed */
-#define ASN1_OBJECT_FLAG_DYNAMIC	 0x01	/* internal use */
-#define ASN1_OBJECT_FLAG_DYNAMIC_STRINGS 0x04	/* internal use */
-#define ASN1_OBJECT_FLAG_DYNAMIC_DATA 	 0x08	/* internal use */
-struct asn1_object_st
-	{
-	const char *sn,*ln;
-	int nid;
-	int length;
-	const unsigned char *data;	/* data remains const after init */
-	int flags;	/* Should we free this one */
-	};
+// V_ASN1_ANY is used by the ASN.1 templates to indicate an ANY type.
+#define V_ASN1_ANY (-4)
+
+// The following constants are tag numbers for universal types.
+#define V_ASN1_EOC 0
+#define V_ASN1_BOOLEAN 1
+#define V_ASN1_INTEGER 2
+#define V_ASN1_BIT_STRING 3
+#define V_ASN1_OCTET_STRING 4
+#define V_ASN1_NULL 5
+#define V_ASN1_OBJECT 6
+#define V_ASN1_OBJECT_DESCRIPTOR 7
+#define V_ASN1_EXTERNAL 8
+#define V_ASN1_REAL 9
+#define V_ASN1_ENUMERATED 10
+#define V_ASN1_UTF8STRING 12
+#define V_ASN1_SEQUENCE 16
+#define V_ASN1_SET 17
+#define V_ASN1_NUMERICSTRING 18
+#define V_ASN1_PRINTABLESTRING 19
+#define V_ASN1_T61STRING 20
+#define V_ASN1_TELETEXSTRING 20
+#define V_ASN1_VIDEOTEXSTRING 21
+#define V_ASN1_IA5STRING 22
+#define V_ASN1_UTCTIME 23
+#define V_ASN1_GENERALIZEDTIME 24
+#define V_ASN1_GRAPHICSTRING 25
+#define V_ASN1_ISO64STRING 26
+#define V_ASN1_VISIBLESTRING 26
+#define V_ASN1_GENERALSTRING 27
+#define V_ASN1_UNIVERSALSTRING 28
+#define V_ASN1_BMPSTRING 30
+
+// The following constants are used for |ASN1_STRING| values that represent
+// negative INTEGER and ENUMERATED values. See |ASN1_STRING| for more details.
+#define V_ASN1_NEG 0x100
+#define V_ASN1_NEG_INTEGER (V_ASN1_INTEGER | V_ASN1_NEG)
+#define V_ASN1_NEG_ENUMERATED (V_ASN1_ENUMERATED | V_ASN1_NEG)
+
+
+// Strings.
+//
+// ASN.1 contains a myriad of string types, as well as types that contain data
+// that may be encoded into a string. This library uses a single type,
+// |ASN1_STRING|, to represent most values.
+
+// An asn1_string_st (aka |ASN1_STRING|) represents a value of a string-like
+// ASN.1 type. It contains a type field, and a byte string data field with a
+// type-specific representation.
+//
+// When representing a string value, the type field is one of
+// |V_ASN1_OCTET_STRING|, |V_ASN1_UTF8STRING|, |V_ASN1_NUMERICSTRING|,
+// |V_ASN1_PRINTABLESTRING|, |V_ASN1_T61STRING|, |V_ASN1_VIDEOTEXSTRING|,
+// |V_ASN1_IA5STRING|, |V_ASN1_GRAPHICSTRING|, |V_ASN1_ISO64STRING|,
+// |V_ASN1_VISIBLESTRING|, |V_ASN1_GENERALSTRING|, |V_ASN1_UNIVERSALSTRING|, or
+// |V_ASN1_BMPSTRING|. The data contains the byte representation of of the
+// string.
+//
+// When representing a BIT STRING value, the type field is |V_ASN1_BIT_STRING|.
+// The data contains the encoded form of the BIT STRING, including any padding
+// bits added to round to a whole number of bytes, but excluding the leading
+// byte containing the number of padding bits. The number of padding bits is
+// encoded in the flags field. See |ASN1_STRING_FLAG_BITS_LEFT| for details. For
+// example, DER encodes the BIT STRING {1, 0} as {0x06, 0x80 = 0b10_000000}. The
+// |ASN1_STRING| representation has data of {0x80} and flags of
+// ASN1_STRING_FLAG_BITS_LEFT | 6.
+//
+// When representing an INTEGER or ENUMERATED value, the data contains the
+// big-endian encoding of the absolute value of the integer. The sign bit is
+// encoded in the type: non-negative values have a type of |V_ASN1_INTEGER| or
+// |V_ASN1_ENUMERATED|, while negative values have a type of
+// |V_ASN1_NEG_INTEGER| or |V_ASN1_NEG_ENUMERATED|. Note this differs from DER's
+// two's complement representation.
+//
+// When representing a GeneralizedTime or UTCTime value, the type field is
+// |V_ASN1_GENERALIZEDTIME| or |V_ASN1_UTCTIME|, respectively. The data contains
+// the DER encoding of the value. For example, the UNIX epoch would be
+// "19700101000000Z" for a GeneralizedTime and "700101000000Z" for a UTCTime.
+//
+// TODO(davidben): |ASN1_TYPE| additionally uses |ASN1_STRING| to represent
+// various other odd cases. It also likes to assume unknown universal tags are
+// string types. Make a note here when documenting |ASN1_TYPE|.
+//
+// |ASN1_STRING| additionally has the following typedefs: |ASN1_BIT_STRING|,
+// |ASN1_BMPSTRING|, |ASN1_ENUMERATED|, |ASN1_GENERALIZEDTIME|,
+// |ASN1_GENERALSTRING|, |ASN1_IA5STRING|, |ASN1_INTEGER|, |ASN1_OCTET_STRING|,
+// |ASN1_PRINTABLESTRING|, |ASN1_T61STRING|, |ASN1_TIME|,
+// |ASN1_UNIVERSALSTRING|, |ASN1_UTCTIME|, |ASN1_UTF8STRING|, and
+// |ASN1_VISIBLESTRING|. Other than |ASN1_TIME|, these correspond to universal
+// ASN.1 types. |ASN1_TIME| represents a CHOICE of UTCTime and GeneralizedTime,
+// with a cutoff of 2049, as used in Section 4.1.2.5 of RFC 5280.
+//
+// For clarity, callers are encouraged to use the appropriate typedef when
+// available. They are the same type as |ASN1_STRING|, so a caller may freely
+// pass them into functions expecting |ASN1_STRING|, such as
+// |ASN1_STRING_length|.
+//
+// If a function returns an |ASN1_STRING| where the typedef or ASN.1 structure
+// implies constraints on the type field, callers may assume that the type field
+// is correct. However, if a function takes an |ASN1_STRING| as input, callers
+// must ensure the type field matches. These invariants are not captured by the
+// C type system and may not be checked at runtime. For example, callers may
+// assume the output of |X509_get0_serialNumber| has type |V_ASN1_INTEGER| or
+// |V_ASN1_NEG_INTEGER|. Callers must not pass a string of type
+// |V_ASN1_OCTET_STRING| to |X509_set_serialNumber|. Doing so may break
+// invariants on the |X509| object and break the |X509_get0_serialNumber|
+// invariant.
+//
+// TODO(davidben): This is very unfriendly. Getting the type field wrong should
+// not cause memory errors, but it may do strange things. We should add runtime
+// checks to anything that consumes |ASN1_STRING|s from the caller.
+struct asn1_string_st {
+  int length;
+  int type;
+  unsigned char *data;
+  long flags;
+};
+
+// ASN1_STRING_FLAG_BITS_LEFT indicates, in a BIT STRING |ASN1_STRING|, that
+// flags & 0x7 contains the number of padding bits added to the BIT STRING
+// value. When not set, all trailing zero bits in the last byte are implicitly
+// treated as padding. This behavior is deprecated and should not be used.
+#define ASN1_STRING_FLAG_BITS_LEFT 0x08
+
+// ASN1_STRING_FLAG_MSTRING indicates that the |ASN1_STRING| is an MSTRING type,
+// which is how this library refers to a CHOICE type of several string types.
+// For example, DirectoryString as defined in RFC5280.
+//
+// TODO(davidben): This is only used in one place within the library and is easy
+// to accidentally drop. Can it be removed?
+#define ASN1_STRING_FLAG_MSTRING 0x040
+
+// ASN1_STRING_type_new returns a newly-allocated empty |ASN1_STRING| object of
+// type |type|, or NULL on error.
+OPENSSL_EXPORT ASN1_STRING *ASN1_STRING_type_new(int type);
+
+// ASN1_STRING_new returns a newly-allocated empty |ASN1_STRING| object with an
+// arbitrary type. Prefer one of the type-specific constructors, such as
+// |ASN1_OCTET_STRING_new|, or |ASN1_STRING_type_new|.
+OPENSSL_EXPORT ASN1_STRING *ASN1_STRING_new(void);
+
+// ASN1_STRING_free releases memory associated with |str|.
+OPENSSL_EXPORT void ASN1_STRING_free(ASN1_STRING *str);
+
+// ASN1_STRING_copy sets |dst| to a copy of |str|. It returns one on success and
+// zero on error.
+OPENSSL_EXPORT int ASN1_STRING_copy(ASN1_STRING *dst, const ASN1_STRING *str);
+
+// ASN1_STRING_dup returns a newly-allocated copy of |str|, or NULL on error.
+OPENSSL_EXPORT ASN1_STRING *ASN1_STRING_dup(const ASN1_STRING *str);
+
+// ASN1_STRING_type returns the type of |str|. This value will be one of the
+// |V_ASN1_*| constants.
+OPENSSL_EXPORT int ASN1_STRING_type(const ASN1_STRING *str);
+
+// ASN1_STRING_get0_data returns a pointer to |str|'s contents. Callers should
+// use |ASN1_STRING_length| to determine the length of the string. The string
+// may have embedded NUL bytes and may not be NUL-terminated.
+OPENSSL_EXPORT const unsigned char *ASN1_STRING_get0_data(
+    const ASN1_STRING *str);
+
+// ASN1_STRING_data returns a mutable pointer to |str|'s contents. Callers
+// should use |ASN1_STRING_length| to determine the length of the string. The
+// string may have embedded NUL bytes and may not be NUL-terminated.
+//
+// Prefer |ASN1_STRING_get0_data|.
+OPENSSL_EXPORT unsigned char *ASN1_STRING_data(ASN1_STRING *str);
+
+// ASN1_STRING_length returns the length of |str|, in bytes.
+OPENSSL_EXPORT int ASN1_STRING_length(const ASN1_STRING *str);
+
+// ASN1_STRING_cmp compares |a| and |b|'s type and contents. It returns an
+// integer equal to, less than, or greater than zero if |a| is equal to, less
+// than, or greater than |b|, respectively. The comparison is suitable for
+// sorting, but callers should not rely on the particular comparison.
+//
+// Note if |a| or |b| are BIT STRINGs, this function does not compare the
+// |ASN1_STRING_FLAG_BITS_LEFT| flags.
+//
+// TODO(davidben): The BIT STRING comparison seems like a bug. Fix it?
+OPENSSL_EXPORT int ASN1_STRING_cmp(const ASN1_STRING *a, const ASN1_STRING *b);
+
+// ASN1_STRING_set sets the contents of |str| to a copy of |len| bytes from
+// |data|. It returns one on success and zero on error.
+OPENSSL_EXPORT int ASN1_STRING_set(ASN1_STRING *str, const void *data, int len);
+
+// ASN1_STRING_set0 sets the contents of |str| to |len| bytes from |data|. It
+// takes ownership of |data|, which must have been allocated with
+// |OPENSSL_malloc|.
+OPENSSL_EXPORT void ASN1_STRING_set0(ASN1_STRING *str, void *data, int len);
+
+// TODO(davidben): Pull up and document functions specific to individual string
+// types.
+
+
+// Underdocumented functions.
+//
+// The following functions are not yet documented and organized.
+
+// For use with d2i_ASN1_type_bytes()
+#define B_ASN1_NUMERICSTRING 0x0001
+#define B_ASN1_PRINTABLESTRING 0x0002
+#define B_ASN1_T61STRING 0x0004
+#define B_ASN1_TELETEXSTRING 0x0004
+#define B_ASN1_VIDEOTEXSTRING 0x0008
+#define B_ASN1_IA5STRING 0x0010
+#define B_ASN1_GRAPHICSTRING 0x0020
+#define B_ASN1_ISO64STRING 0x0040
+#define B_ASN1_VISIBLESTRING 0x0040
+#define B_ASN1_GENERALSTRING 0x0080
+#define B_ASN1_UNIVERSALSTRING 0x0100
+#define B_ASN1_OCTET_STRING 0x0200
+#define B_ASN1_BIT_STRING 0x0400
+#define B_ASN1_BMPSTRING 0x0800
+#define B_ASN1_UNKNOWN 0x1000
+#define B_ASN1_UTF8STRING 0x2000
+#define B_ASN1_UTCTIME 0x4000
+#define B_ASN1_GENERALIZEDTIME 0x8000
+#define B_ASN1_SEQUENCE 0x10000
+
+// For use with ASN1_mbstring_copy()
+#define MBSTRING_FLAG 0x1000
+#define MBSTRING_UTF8 (MBSTRING_FLAG)
+// |MBSTRING_ASC| refers to Latin-1, not ASCII. It is used with TeletexString
+// which, in turn, is treated as Latin-1 rather than T.61 by OpenSSL and most
+// other software.
+#define MBSTRING_ASC (MBSTRING_FLAG | 1)
+#define MBSTRING_BMP (MBSTRING_FLAG | 2)
+#define MBSTRING_UNIV (MBSTRING_FLAG | 4)
+
+#define DECLARE_ASN1_SET_OF(type)    // filled in by mkstack.pl
+#define IMPLEMENT_ASN1_SET_OF(type)  // nothing, no longer needed
+
+// These are used internally in the ASN1_OBJECT to keep track of
+// whether the names and data need to be free()ed
+#define ASN1_OBJECT_FLAG_DYNAMIC 0x01          // internal use
+#define ASN1_OBJECT_FLAG_DYNAMIC_STRINGS 0x04  // internal use
+#define ASN1_OBJECT_FLAG_DYNAMIC_DATA 0x08     // internal use
+struct asn1_object_st {
+  const char *sn, *ln;
+  int nid;
+  int length;
+  const unsigned char *data;  // data remains const after init
+  int flags;                  // Should we free this one
+};
 
 DEFINE_STACK_OF(ASN1_OBJECT)
 
-#define ASN1_STRING_FLAG_BITS_LEFT 0x08 /* Set if 0x07 has bits left value */
-/* This indicates that the ASN1_STRING is not a real value but just a place
- * holder for the location where indefinite length constructed data should
- * be inserted in the memory buffer 
- */
-#define ASN1_STRING_FLAG_NDEF 0x010 
+// ASN1_ENCODING structure: this is used to save the received
+// encoding of an ASN1 type. This is useful to get round
+// problems with invalid encodings which can break signatures.
 
-/* This flag is used by ASN1 code to indicate an ASN1_STRING is an MSTRING
- * type.
- */
-#define ASN1_STRING_FLAG_MSTRING 0x040 
-/* This is the base type that holds just about everything :-) */
-struct asn1_string_st
-	{
-	int length;
-	int type;
-	unsigned char *data;
-	/* The value of the following field depends on the type being
-	 * held.  It is mostly being used for BIT_STRING so if the
-	 * input data has a non-zero 'unused bits' value, it will be
-	 * handled correctly */
-	long flags;
-	};
+typedef struct ASN1_ENCODING_st {
+  unsigned char *enc;  // DER encoding
+  long len;            // Length of encoding
+  int modified;        // set to 1 if 'enc' is invalid
+  // alias_only is zero if |enc| owns the buffer that it points to
+  // (although |enc| may still be NULL). If one, |enc| points into a
+  // buffer that is owned elsewhere.
+  unsigned alias_only : 1;
+  // alias_only_on_next_parse is one iff the next parsing operation
+  // should avoid taking a copy of the input and rather set
+  // |alias_only|.
+  unsigned alias_only_on_next_parse : 1;
+} ASN1_ENCODING;
 
-/* ASN1_ENCODING structure: this is used to save the received
- * encoding of an ASN1 type. This is useful to get round
- * problems with invalid encodings which can break signatures.
- */
-
-typedef struct ASN1_ENCODING_st
-	{
-	unsigned char *enc;	/* DER encoding */
-	long len;		/* Length of encoding */
-	int modified;		/* set to 1 if 'enc' is invalid */
-	/* alias_only is zero if |enc| owns the buffer that it points to
-	 * (although |enc| may still be NULL). If one, |enc| points into a
-	 * buffer that is owned elsewhere. */
-	unsigned alias_only:1;
-	/* alias_only_on_next_parse is one iff the next parsing operation
-	 * should avoid taking a copy of the input and rather set
-	 * |alias_only|. */
-	unsigned alias_only_on_next_parse:1;
-	} ASN1_ENCODING;
-
-#define STABLE_FLAGS_MALLOC	0x01
-#define STABLE_NO_MASK		0x02
-#define DIRSTRING_TYPE	\
- (B_ASN1_PRINTABLESTRING|B_ASN1_T61STRING|B_ASN1_BMPSTRING|B_ASN1_UTF8STRING)
-#define PKCS9STRING_TYPE (DIRSTRING_TYPE|B_ASN1_IA5STRING)
+#define STABLE_FLAGS_MALLOC 0x01
+#define STABLE_NO_MASK 0x02
+#define DIRSTRING_TYPE                                            \
+  (B_ASN1_PRINTABLESTRING | B_ASN1_T61STRING | B_ASN1_BMPSTRING | \
+   B_ASN1_UTF8STRING)
+#define PKCS9STRING_TYPE (DIRSTRING_TYPE | B_ASN1_IA5STRING)
 
 typedef struct asn1_string_table_st {
-	int nid;
-	long minsize;
-	long maxsize;
-	unsigned long mask;
-	unsigned long flags;
+  int nid;
+  long minsize;
+  long maxsize;
+  unsigned long mask;
+  unsigned long flags;
 } ASN1_STRING_TABLE;
 
-/* size limits: this stuff is taken straight from RFC2459 */
+// size limits: this stuff is taken straight from RFC2459
 
-#define ub_name				32768
-#define ub_common_name			64
-#define ub_locality_name		128
-#define ub_state_name			128
-#define ub_organization_name		64
-#define ub_organization_unit_name	64
-#define ub_title			64
-#define ub_email_address		128
+#define ub_name 32768
+#define ub_common_name 64
+#define ub_locality_name 128
+#define ub_state_name 128
+#define ub_organization_name 64
+#define ub_organization_unit_name 64
+#define ub_title 64
+#define ub_email_address 128
 
-/* Declarations for template structures: for full definitions
- * see asn1t.h
- */
+// Declarations for template structures: for full definitions
+// see asn1t.h
 typedef struct ASN1_TEMPLATE_st ASN1_TEMPLATE;
 typedef struct ASN1_TLC_st ASN1_TLC;
-/* This is just an opaque pointer */
+// This is just an opaque pointer
 typedef struct ASN1_VALUE_st ASN1_VALUE;
 
-/* Declare ASN1 functions: the implement macro in in asn1t.h */
+// Declare ASN1 functions: the implement macro in in asn1t.h
 
 #define DECLARE_ASN1_FUNCTIONS(type) DECLARE_ASN1_FUNCTIONS_name(type, type)
 
 #define DECLARE_ASN1_ALLOC_FUNCTIONS(type) \
-	DECLARE_ASN1_ALLOC_FUNCTIONS_name(type, type)
+  DECLARE_ASN1_ALLOC_FUNCTIONS_name(type, type)
 
 #define DECLARE_ASN1_FUNCTIONS_name(type, name) \
-	DECLARE_ASN1_ALLOC_FUNCTIONS_name(type, name) \
-	DECLARE_ASN1_ENCODE_FUNCTIONS(type, name, name)
+  DECLARE_ASN1_ALLOC_FUNCTIONS_name(type, name) \
+  DECLARE_ASN1_ENCODE_FUNCTIONS(type, name, name)
 
 #define DECLARE_ASN1_FUNCTIONS_fname(type, itname, name) \
-	DECLARE_ASN1_ALLOC_FUNCTIONS_name(type, name) \
-	DECLARE_ASN1_ENCODE_FUNCTIONS(type, itname, name)
+  DECLARE_ASN1_ALLOC_FUNCTIONS_name(type, name)          \
+  DECLARE_ASN1_ENCODE_FUNCTIONS(type, itname, name)
 
-#define	DECLARE_ASN1_ENCODE_FUNCTIONS(type, itname, name) \
-	OPENSSL_EXPORT type *d2i_##name(type **a, const unsigned char **in, long len); \
-	OPENSSL_EXPORT int i2d_##name(type *a, unsigned char **out); \
-	DECLARE_ASN1_ITEM(itname)
+#define DECLARE_ASN1_ENCODE_FUNCTIONS(type, itname, name)             \
+  OPENSSL_EXPORT type *d2i_##name(type **a, const unsigned char **in, \
+                                  long len);                          \
+  OPENSSL_EXPORT int i2d_##name(type *a, unsigned char **out);        \
+  DECLARE_ASN1_ITEM(itname)
 
-#define	DECLARE_ASN1_ENCODE_FUNCTIONS_const(type, name) \
-	OPENSSL_EXPORT type *d2i_##name(type **a, const unsigned char **in, long len); \
-	OPENSSL_EXPORT int i2d_##name(const type *a, unsigned char **out); \
-	DECLARE_ASN1_ITEM(name)
-
-#define	DECLARE_ASN1_NDEF_FUNCTION(name) \
-	OPENSSL_EXPORT int i2d_##name##_NDEF(name *a, unsigned char **out);
+#define DECLARE_ASN1_ENCODE_FUNCTIONS_const(type, name)               \
+  OPENSSL_EXPORT type *d2i_##name(type **a, const unsigned char **in, \
+                                  long len);                          \
+  OPENSSL_EXPORT int i2d_##name(const type *a, unsigned char **out);  \
+  DECLARE_ASN1_ITEM(name)
 
 #define DECLARE_ASN1_FUNCTIONS_const(name) \
-	DECLARE_ASN1_ALLOC_FUNCTIONS(name) \
-	DECLARE_ASN1_ENCODE_FUNCTIONS_const(name, name)
+  DECLARE_ASN1_ALLOC_FUNCTIONS(name)       \
+  DECLARE_ASN1_ENCODE_FUNCTIONS_const(name, name)
 
 #define DECLARE_ASN1_ALLOC_FUNCTIONS_name(type, name) \
-	OPENSSL_EXPORT type *name##_new(void); \
-	OPENSSL_EXPORT void name##_free(type *a);
+  OPENSSL_EXPORT type *name##_new(void);              \
+  OPENSSL_EXPORT void name##_free(type *a);
 
 #define DECLARE_ASN1_PRINT_FUNCTION(stname) \
-	DECLARE_ASN1_PRINT_FUNCTION_fname(stname, stname)
+  DECLARE_ASN1_PRINT_FUNCTION_fname(stname, stname)
 
-#define DECLARE_ASN1_PRINT_FUNCTION_fname(stname, fname) \
-	OPENSSL_EXPORT int fname##_print_ctx(BIO *out, stname *x, int indent, \
-					 const ASN1_PCTX *pctx);
+#define DECLARE_ASN1_PRINT_FUNCTION_fname(stname, fname)                \
+  OPENSSL_EXPORT int fname##_print_ctx(BIO *out, stname *x, int indent, \
+                                       const ASN1_PCTX *pctx);
 
 typedef void *d2i_of_void(void **, const unsigned char **, long);
 typedef int i2d_of_void(const void *, unsigned char **);
 
-/* The following macros and typedefs allow an ASN1_ITEM
- * to be embedded in a structure and referenced. Since
- * the ASN1_ITEM pointers need to be globally accessible
- * (possibly from shared libraries) they may exist in
- * different forms. On platforms that support it the
- * ASN1_ITEM structure itself will be globally exported.
- * Other platforms will export a function that returns
- * an ASN1_ITEM pointer.
- *
- * To handle both cases transparently the macros below
- * should be used instead of hard coding an ASN1_ITEM
- * pointer in a structure.
- *
- * The structure will look like this:
- *
- * typedef struct SOMETHING_st {
- *      ...
- *      ASN1_ITEM_EXP *iptr;
- *      ...
- * } SOMETHING; 
- *
- * It would be initialised as e.g.:
- *
- * SOMETHING somevar = {...,ASN1_ITEM_ref(X509),...};
- *
- * and the actual pointer extracted with:
- *
- * const ASN1_ITEM *it = ASN1_ITEM_ptr(somevar.iptr);
- *
- * Finally an ASN1_ITEM pointer can be extracted from an
- * appropriate reference with: ASN1_ITEM_rptr(X509). This
- * would be used when a function takes an ASN1_ITEM * argument.
- *
- */
+// The following macros and typedefs allow an ASN1_ITEM
+// to be embedded in a structure and referenced. Since
+// the ASN1_ITEM pointers need to be globally accessible
+// (possibly from shared libraries) they may exist in
+// different forms. On platforms that support it the
+// ASN1_ITEM structure itself will be globally exported.
+// Other platforms will export a function that returns
+// an ASN1_ITEM pointer.
+//
+// To handle both cases transparently the macros below
+// should be used instead of hard coding an ASN1_ITEM
+// pointer in a structure.
+//
+// The structure will look like this:
+//
+// typedef struct SOMETHING_st {
+//      ...
+//      ASN1_ITEM_EXP *iptr;
+//      ...
+// } SOMETHING;
+//
+// It would be initialised as e.g.:
+//
+// SOMETHING somevar = {...,ASN1_ITEM_ref(X509),...};
+//
+// and the actual pointer extracted with:
+//
+// const ASN1_ITEM *it = ASN1_ITEM_ptr(somevar.iptr);
+//
+// Finally an ASN1_ITEM pointer can be extracted from an
+// appropriate reference with: ASN1_ITEM_rptr(X509). This
+// would be used when a function takes an ASN1_ITEM * argument.
+//
 
-/* ASN1_ITEM pointer exported type */
+// ASN1_ITEM pointer exported type
 typedef const ASN1_ITEM ASN1_ITEM_EXP;
 
-/* Macro to obtain ASN1_ITEM pointer from exported type */
+// Macro to obtain ASN1_ITEM pointer from exported type
 #define ASN1_ITEM_ptr(iptr) (iptr)
 
-/* Macro to include ASN1_ITEM pointer from base type */
+// Macro to include ASN1_ITEM pointer from base type
 #define ASN1_ITEM_ref(iptr) (&(iptr##_it))
 
 #define ASN1_ITEM_rptr(ref) (&(ref##_it))
 
-#define DECLARE_ASN1_ITEM(name) \
-	extern OPENSSL_EXPORT const ASN1_ITEM name##_it;
+#define DECLARE_ASN1_ITEM(name) extern OPENSSL_EXPORT const ASN1_ITEM name##_it;
 
-/* Parameters used by ASN1_STRING_print_ex() */
+// Parameters used by ASN1_STRING_print_ex()
 
-/* These determine which characters to escape:
- * RFC2253 special characters, control characters and
- * MSB set characters
- */
+// These determine which characters to escape:
+// RFC2253 special characters, control characters and
+// MSB set characters
 
-#define ASN1_STRFLGS_ESC_2253		1
-#define ASN1_STRFLGS_ESC_CTRL		2
-#define ASN1_STRFLGS_ESC_MSB		4
+#define ASN1_STRFLGS_ESC_2253 1
+#define ASN1_STRFLGS_ESC_CTRL 2
+#define ASN1_STRFLGS_ESC_MSB 4
 
 
-/* This flag determines how we do escaping: normally
- * RC2253 backslash only, set this to use backslash and
- * quote.
- */
+// This flag determines how we do escaping: normally
+// RC2253 backslash only, set this to use backslash and
+// quote.
 
-#define ASN1_STRFLGS_ESC_QUOTE		8
+#define ASN1_STRFLGS_ESC_QUOTE 8
 
 
-/* These three flags are internal use only. */
+// These three flags are internal use only.
 
-/* Character is a valid PrintableString character */
-#define CHARTYPE_PRINTABLESTRING	0x10
-/* Character needs escaping if it is the first character */
-#define CHARTYPE_FIRST_ESC_2253		0x20
-/* Character needs escaping if it is the last character */
-#define CHARTYPE_LAST_ESC_2253		0x40
+// Character is a valid PrintableString character
+#define CHARTYPE_PRINTABLESTRING 0x10
+// Character needs escaping if it is the first character
+#define CHARTYPE_FIRST_ESC_2253 0x20
+// Character needs escaping if it is the last character
+#define CHARTYPE_LAST_ESC_2253 0x40
 
-/* NB the internal flags are safely reused below by flags
- * handled at the top level.
- */
+// NB the internal flags are safely reused below by flags
+// handled at the top level.
 
-/* If this is set we convert all character strings
- * to UTF8 first 
- */
+// If this is set we convert all character strings
+// to UTF8 first
 
-#define ASN1_STRFLGS_UTF8_CONVERT	0x10
+#define ASN1_STRFLGS_UTF8_CONVERT 0x10
 
-/* If this is set we don't attempt to interpret content:
- * just assume all strings are 1 byte per character. This
- * will produce some pretty odd looking output!
- */
+// If this is set we don't attempt to interpret content:
+// just assume all strings are 1 byte per character. This
+// will produce some pretty odd looking output!
 
-#define ASN1_STRFLGS_IGNORE_TYPE	0x20
+#define ASN1_STRFLGS_IGNORE_TYPE 0x20
 
-/* If this is set we include the string type in the output */
-#define ASN1_STRFLGS_SHOW_TYPE		0x40
+// If this is set we include the string type in the output
+#define ASN1_STRFLGS_SHOW_TYPE 0x40
 
-/* This determines which strings to display and which to
- * 'dump' (hex dump of content octets or DER encoding). We can
- * only dump non character strings or everything. If we
- * don't dump 'unknown' they are interpreted as character
- * strings with 1 octet per character and are subject to
- * the usual escaping options.
- */
+// This determines which strings to display and which to
+// 'dump' (hex dump of content octets or DER encoding). We can
+// only dump non character strings or everything. If we
+// don't dump 'unknown' they are interpreted as character
+// strings with 1 octet per character and are subject to
+// the usual escaping options.
 
-#define ASN1_STRFLGS_DUMP_ALL		0x80
-#define ASN1_STRFLGS_DUMP_UNKNOWN	0x100
+#define ASN1_STRFLGS_DUMP_ALL 0x80
+#define ASN1_STRFLGS_DUMP_UNKNOWN 0x100
 
-/* These determine what 'dumping' does, we can dump the
- * content octets or the DER encoding: both use the
- * RFC2253 #XXXXX notation.
- */
+// These determine what 'dumping' does, we can dump the
+// content octets or the DER encoding: both use the
+// RFC2253 #XXXXX notation.
 
-#define ASN1_STRFLGS_DUMP_DER		0x200
+#define ASN1_STRFLGS_DUMP_DER 0x200
 
-/* All the string flags consistent with RFC2253,
- * escaping control characters isn't essential in
- * RFC2253 but it is advisable anyway.
- */
+// All the string flags consistent with RFC2253,
+// escaping control characters isn't essential in
+// RFC2253 but it is advisable anyway.
 
-#define ASN1_STRFLGS_RFC2253	(ASN1_STRFLGS_ESC_2253 | \
-				ASN1_STRFLGS_ESC_CTRL | \
-				ASN1_STRFLGS_ESC_MSB | \
-				ASN1_STRFLGS_UTF8_CONVERT | \
-				ASN1_STRFLGS_DUMP_UNKNOWN | \
-				ASN1_STRFLGS_DUMP_DER)
+#define ASN1_STRFLGS_RFC2253                                              \
+  (ASN1_STRFLGS_ESC_2253 | ASN1_STRFLGS_ESC_CTRL | ASN1_STRFLGS_ESC_MSB | \
+   ASN1_STRFLGS_UTF8_CONVERT | ASN1_STRFLGS_DUMP_UNKNOWN |                \
+   ASN1_STRFLGS_DUMP_DER)
 
 DEFINE_STACK_OF(ASN1_INTEGER)
 DECLARE_ASN1_SET_OF(ASN1_INTEGER)
 
-struct asn1_type_st
-	{
-	int type;
-	union	{
-		char *ptr;
-		ASN1_BOOLEAN		boolean;
-		ASN1_STRING *		asn1_string;
-		ASN1_OBJECT *		object;
-		ASN1_INTEGER *		integer;
-		ASN1_ENUMERATED *	enumerated;
-		ASN1_BIT_STRING *	bit_string;
-		ASN1_OCTET_STRING *	octet_string;
-		ASN1_PRINTABLESTRING *	printablestring;
-		ASN1_T61STRING *	t61string;
-		ASN1_IA5STRING *	ia5string;
-		ASN1_GENERALSTRING *	generalstring;
-		ASN1_BMPSTRING *	bmpstring;
-		ASN1_UNIVERSALSTRING *	universalstring;
-		ASN1_UTCTIME *		utctime;
-		ASN1_GENERALIZEDTIME *	generalizedtime;
-		ASN1_VISIBLESTRING *	visiblestring;
-		ASN1_UTF8STRING *	utf8string;
-		/* set and sequence are left complete and still
-		 * contain the set or sequence bytes */
-		ASN1_STRING *		set;
-		ASN1_STRING *		sequence;
-		ASN1_VALUE *		asn1_value;
-		} value;
-    };
+struct asn1_type_st {
+  int type;
+  union {
+    char *ptr;
+    ASN1_BOOLEAN boolean;
+    ASN1_STRING *asn1_string;
+    ASN1_OBJECT *object;
+    ASN1_INTEGER *integer;
+    ASN1_ENUMERATED *enumerated;
+    ASN1_BIT_STRING *bit_string;
+    ASN1_OCTET_STRING *octet_string;
+    ASN1_PRINTABLESTRING *printablestring;
+    ASN1_T61STRING *t61string;
+    ASN1_IA5STRING *ia5string;
+    ASN1_GENERALSTRING *generalstring;
+    ASN1_BMPSTRING *bmpstring;
+    ASN1_UNIVERSALSTRING *universalstring;
+    ASN1_UTCTIME *utctime;
+    ASN1_GENERALIZEDTIME *generalizedtime;
+    ASN1_VISIBLESTRING *visiblestring;
+    ASN1_UTF8STRING *utf8string;
+    // set and sequence are left complete and still
+    // contain the set or sequence bytes
+    ASN1_STRING *set;
+    ASN1_STRING *sequence;
+    ASN1_VALUE *asn1_value;
+  } value;
+};
 
 DEFINE_STACK_OF(ASN1_TYPE)
 DECLARE_ASN1_SET_OF(ASN1_TYPE)
@@ -470,152 +618,81 @@
 DECLARE_ASN1_ENCODE_FUNCTIONS_const(ASN1_SEQUENCE_ANY, ASN1_SEQUENCE_ANY)
 DECLARE_ASN1_ENCODE_FUNCTIONS_const(ASN1_SEQUENCE_ANY, ASN1_SET_ANY)
 
-struct X509_algor_st
-       {
-       ASN1_OBJECT *algorithm;
-       ASN1_TYPE *parameter;
-       } /* X509_ALGOR */;
+struct X509_algor_st {
+  ASN1_OBJECT *algorithm;
+  ASN1_TYPE *parameter;
+} /* X509_ALGOR */;
 
 DECLARE_ASN1_FUNCTIONS(X509_ALGOR)
 
-/* This is used to contain a list of bit names */
+// This is used to contain a list of bit names
 typedef struct BIT_STRING_BITNAME_st {
-	int bitnum;
-	const char *lname;
-	const char *sname;
+  int bitnum;
+  const char *lname;
+  const char *sname;
 } BIT_STRING_BITNAME;
 
+// M_ASN1_* are legacy aliases for various |ASN1_STRING| functions. Use the
+// functions themselves.
+#define M_ASN1_STRING_length(x) ASN1_STRING_length(x)
+#define M_ASN1_STRING_type(x) ASN1_STRING_type(x)
+#define M_ASN1_STRING_data(x) ASN1_STRING_data(x)
+#define M_ASN1_BIT_STRING_new() ASN1_BIT_STRING_new()
+#define M_ASN1_BIT_STRING_free(a) ASN1_BIT_STRING_free(a)
+#define M_ASN1_BIT_STRING_dup(a) ASN1_STRING_dup(a)
+#define M_ASN1_BIT_STRING_cmp(a, b) ASN1_STRING_cmp(a, b)
+#define M_ASN1_BIT_STRING_set(a, b, c) ASN1_BIT_STRING_set(a, b, c)
+#define M_ASN1_INTEGER_new() ASN1_INTEGER_new()
+#define M_ASN1_INTEGER_free(a) ASN1_INTEGER_free(a)
+#define M_ASN1_INTEGER_dup(a) ASN1_INTEGER_dup(a)
+#define M_ASN1_INTEGER_cmp(a, b) ASN1_INTEGER_cmp(a, b)
+#define M_ASN1_ENUMERATED_new() ASN1_ENUMERATED_new()
+#define M_ASN1_ENUMERATED_free(a) ASN1_ENUMERATED_free(a)
+#define M_ASN1_ENUMERATED_dup(a) ASN1_STRING_dup(a)
+#define M_ASN1_ENUMERATED_cmp(a, b) ASN1_STRING_cmp(a, b)
+#define M_ASN1_OCTET_STRING_new() ASN1_OCTET_STRING_new()
+#define M_ASN1_OCTET_STRING_free(a) ASN1_OCTET_STRING_free()
+#define M_ASN1_OCTET_STRING_dup(a) ASN1_OCTET_STRING_dup(a)
+#define M_ASN1_OCTET_STRING_cmp(a, b) ASN1_OCTET_STRING_cmp(a, b)
+#define M_ASN1_OCTET_STRING_set(a, b, c) ASN1_OCTET_STRING_set(a, b, c)
+#define M_ASN1_OCTET_STRING_print(a, b) ASN1_STRING_print(a, b)
+#define M_ASN1_PRINTABLESTRING_new() ASN1_PRINTABLESTRING_new()
+#define M_ASN1_PRINTABLESTRING_free(a) ASN1_PRINTABLESTRING_free(a)
+#define M_ASN1_IA5STRING_new() ASN1_IA5STRING_new()
+#define M_ASN1_IA5STRING_free(a) ASN1_IA5STRING_free(a)
+#define M_ASN1_IA5STRING_dup(a) ASN1_STRING_dup(a)
+#define M_ASN1_UTCTIME_new() ASN1_UTCTIME_new()
+#define M_ASN1_UTCTIME_free(a) ASN1_UTCTIME_free(a)
+#define M_ASN1_UTCTIME_dup(a) ASN1_STRING_dup(a)
+#define M_ASN1_T61STRING_new() ASN1_T61STRING_new()
+#define M_ASN1_T61STRING_free(a) ASN1_T61STRING_free(a)
+#define M_ASN1_GENERALIZEDTIME_new() ASN1_GENERALIZEDTIME_new()
+#define M_ASN1_GENERALIZEDTIME_free(a) ASN1_GENERALIZEDTIME_free(a)
+#define M_ASN1_GENERALIZEDTIME_dup(a) ASN1_STRING_dup(a)
+#define M_ASN1_GENERALSTRING_new() ASN1_GENERALSTRING_new()
+#define M_ASN1_GENERALSTRING_free(a) ASN1_GENERALSTRING_free(a)
+#define M_ASN1_UNIVERSALSTRING_new() ASN1_UNIVERSALSTRING_new()
+#define M_ASN1_UNIVERSALSTRING_free(a) ASN1_UNIVERSALSTRING_free(a)
+#define M_ASN1_BMPSTRING_new() ASN1_BMPSTRING_new()
+#define M_ASN1_BMPSTRING_free(a) ASN1_BMPSTRING_free(a)
+#define M_ASN1_VISIBLESTRING_new() ASN1_VISIBLESTRING_new()
+#define M_ASN1_VISIBLESTRING_free(a) ASN1_VISIBLESTRING_free(a)
+#define M_ASN1_UTF8STRING_new() ASN1_UTF8STRING_new()
+#define M_ASN1_UTF8STRING_free(a) ASN1_UTF8STRING_free(a)
 
-#define M_ASN1_STRING_length(x)	((x)->length)
-#define M_ASN1_STRING_length_set(x, n)	((x)->length = (n))
-#define M_ASN1_STRING_type(x)	((x)->type)
-#define M_ASN1_STRING_data(x)	((x)->data)
+#define B_ASN1_TIME B_ASN1_UTCTIME | B_ASN1_GENERALIZEDTIME
 
-/* Macros for string operations */
-#define M_ASN1_BIT_STRING_new()	(ASN1_BIT_STRING *)\
-		ASN1_STRING_type_new(V_ASN1_BIT_STRING)
-#define M_ASN1_BIT_STRING_free(a)	ASN1_STRING_free((ASN1_STRING *)a)
-#define M_ASN1_BIT_STRING_dup(a) (ASN1_BIT_STRING *)\
-		ASN1_STRING_dup((const ASN1_STRING *)a)
-#define M_ASN1_BIT_STRING_cmp(a,b) ASN1_STRING_cmp(\
-		(const ASN1_STRING *)a,(const ASN1_STRING *)b)
-#define M_ASN1_BIT_STRING_set(a,b,c) ASN1_STRING_set((ASN1_STRING *)a,b,c)
+#define B_ASN1_PRINTABLE                                              \
+  B_ASN1_NUMERICSTRING | B_ASN1_PRINTABLESTRING | B_ASN1_T61STRING |  \
+      B_ASN1_IA5STRING | B_ASN1_BIT_STRING | B_ASN1_UNIVERSALSTRING | \
+      B_ASN1_BMPSTRING | B_ASN1_UTF8STRING | B_ASN1_SEQUENCE | B_ASN1_UNKNOWN
 
-#define M_ASN1_INTEGER_new()	(ASN1_INTEGER *)\
-		ASN1_STRING_type_new(V_ASN1_INTEGER)
-#define M_ASN1_INTEGER_free(a)		ASN1_STRING_free((ASN1_STRING *)a)
-#define M_ASN1_INTEGER_dup(a) (ASN1_INTEGER *)\
-		ASN1_STRING_dup((const ASN1_STRING *)a)
-#define M_ASN1_INTEGER_cmp(a,b)	ASN1_STRING_cmp(\
-		(const ASN1_STRING *)a,(const ASN1_STRING *)b)
-
-#define M_ASN1_ENUMERATED_new()	(ASN1_ENUMERATED *)\
-		ASN1_STRING_type_new(V_ASN1_ENUMERATED)
-#define M_ASN1_ENUMERATED_free(a)	ASN1_STRING_free((ASN1_STRING *)a)
-#define M_ASN1_ENUMERATED_dup(a) (ASN1_ENUMERATED *)\
-		ASN1_STRING_dup((const ASN1_STRING *)a)
-#define M_ASN1_ENUMERATED_cmp(a,b)	ASN1_STRING_cmp(\
-		(const ASN1_STRING *)a,(const ASN1_STRING *)b)
-
-#define M_ASN1_OCTET_STRING_new()	(ASN1_OCTET_STRING *)\
-		ASN1_STRING_type_new(V_ASN1_OCTET_STRING)
-#define M_ASN1_OCTET_STRING_free(a)	ASN1_STRING_free((ASN1_STRING *)a)
-#define M_ASN1_OCTET_STRING_dup(a) (ASN1_OCTET_STRING *)\
-		ASN1_STRING_dup((const ASN1_STRING *)a)
-#define M_ASN1_OCTET_STRING_cmp(a,b) ASN1_STRING_cmp(\
-		(const ASN1_STRING *)a,(const ASN1_STRING *)b)
-#define M_ASN1_OCTET_STRING_set(a,b,c)	ASN1_STRING_set((ASN1_STRING *)a,b,c)
-#define M_ASN1_OCTET_STRING_print(a,b)	ASN1_STRING_print(a,(ASN1_STRING *)b)
-
-#define B_ASN1_TIME \
-			B_ASN1_UTCTIME | \
-			B_ASN1_GENERALIZEDTIME
-
-#define B_ASN1_PRINTABLE \
-			B_ASN1_NUMERICSTRING| \
-			B_ASN1_PRINTABLESTRING| \
-			B_ASN1_T61STRING| \
-			B_ASN1_IA5STRING| \
-			B_ASN1_BIT_STRING| \
-			B_ASN1_UNIVERSALSTRING|\
-			B_ASN1_BMPSTRING|\
-			B_ASN1_UTF8STRING|\
-			B_ASN1_SEQUENCE|\
-			B_ASN1_UNKNOWN
-
-#define B_ASN1_DIRECTORYSTRING \
-			B_ASN1_PRINTABLESTRING| \
-			B_ASN1_TELETEXSTRING|\
-			B_ASN1_BMPSTRING|\
-			B_ASN1_UNIVERSALSTRING|\
-			B_ASN1_UTF8STRING
+#define B_ASN1_DIRECTORYSTRING                                       \
+  B_ASN1_PRINTABLESTRING | B_ASN1_TELETEXSTRING | B_ASN1_BMPSTRING | \
+      B_ASN1_UNIVERSALSTRING | B_ASN1_UTF8STRING
 
 #define B_ASN1_DISPLAYTEXT \
-			B_ASN1_IA5STRING| \
-			B_ASN1_VISIBLESTRING| \
-			B_ASN1_BMPSTRING|\
-			B_ASN1_UTF8STRING
-
-#define M_ASN1_PRINTABLE_new()	ASN1_STRING_type_new(V_ASN1_T61STRING)
-#define M_ASN1_PRINTABLE_free(a)	ASN1_STRING_free((ASN1_STRING *)a)
-
-#define M_DIRECTORYSTRING_new() ASN1_STRING_type_new(V_ASN1_PRINTABLESTRING)
-#define M_DIRECTORYSTRING_free(a)	ASN1_STRING_free((ASN1_STRING *)a)
-
-#define M_DISPLAYTEXT_new() ASN1_STRING_type_new(V_ASN1_VISIBLESTRING)
-#define M_DISPLAYTEXT_free(a) ASN1_STRING_free((ASN1_STRING *)a)
-
-#define M_ASN1_PRINTABLESTRING_new() (ASN1_PRINTABLESTRING *)\
-		ASN1_STRING_type_new(V_ASN1_PRINTABLESTRING)
-#define M_ASN1_PRINTABLESTRING_free(a)	ASN1_STRING_free((ASN1_STRING *)a)
-
-#define M_ASN1_T61STRING_new()	(ASN1_T61STRING *)\
-		ASN1_STRING_type_new(V_ASN1_T61STRING)
-#define M_ASN1_T61STRING_free(a)	ASN1_STRING_free((ASN1_STRING *)a)
-
-#define M_ASN1_IA5STRING_new()	(ASN1_IA5STRING *)\
-		ASN1_STRING_type_new(V_ASN1_IA5STRING)
-#define M_ASN1_IA5STRING_free(a)	ASN1_STRING_free((ASN1_STRING *)a)
-#define M_ASN1_IA5STRING_dup(a)	\
-		(ASN1_IA5STRING *)ASN1_STRING_dup((const ASN1_STRING *)a)
-
-#define M_ASN1_UTCTIME_new()	(ASN1_UTCTIME *)\
-		ASN1_STRING_type_new(V_ASN1_UTCTIME)
-#define M_ASN1_UTCTIME_free(a)	ASN1_STRING_free((ASN1_STRING *)a)
-#define M_ASN1_UTCTIME_dup(a) (ASN1_UTCTIME *)\
-		ASN1_STRING_dup((const ASN1_STRING *)a)
-
-#define M_ASN1_GENERALIZEDTIME_new()	(ASN1_GENERALIZEDTIME *)\
-		ASN1_STRING_type_new(V_ASN1_GENERALIZEDTIME)
-#define M_ASN1_GENERALIZEDTIME_free(a)	ASN1_STRING_free((ASN1_STRING *)a)
-#define M_ASN1_GENERALIZEDTIME_dup(a) (ASN1_GENERALIZEDTIME *)ASN1_STRING_dup(\
-	(const ASN1_STRING *)a)
-
-#define M_ASN1_TIME_new()	(ASN1_TIME *)\
-		ASN1_STRING_type_new(V_ASN1_UTCTIME)
-#define M_ASN1_TIME_free(a)	ASN1_STRING_free((ASN1_STRING *)a)
-#define M_ASN1_TIME_dup(a) (ASN1_TIME *)\
-	ASN1_STRING_dup((const ASN1_STRING *)a)
-
-#define M_ASN1_GENERALSTRING_new()	(ASN1_GENERALSTRING *)\
-		ASN1_STRING_type_new(V_ASN1_GENERALSTRING)
-#define M_ASN1_GENERALSTRING_free(a)	ASN1_STRING_free((ASN1_STRING *)a)
-
-#define M_ASN1_UNIVERSALSTRING_new()	(ASN1_UNIVERSALSTRING *)\
-		ASN1_STRING_type_new(V_ASN1_UNIVERSALSTRING)
-#define M_ASN1_UNIVERSALSTRING_free(a)	ASN1_STRING_free((ASN1_STRING *)a)
-
-#define M_ASN1_BMPSTRING_new()	(ASN1_BMPSTRING *)\
-		ASN1_STRING_type_new(V_ASN1_BMPSTRING)
-#define M_ASN1_BMPSTRING_free(a)	ASN1_STRING_free((ASN1_STRING *)a)
-
-#define M_ASN1_VISIBLESTRING_new()	(ASN1_VISIBLESTRING *)\
-		ASN1_STRING_type_new(V_ASN1_VISIBLESTRING)
-#define M_ASN1_VISIBLESTRING_free(a)	ASN1_STRING_free((ASN1_STRING *)a)
-
-#define M_ASN1_UTF8STRING_new()	(ASN1_UTF8STRING *)\
-		ASN1_STRING_type_new(V_ASN1_UTF8STRING)
-#define M_ASN1_UTF8STRING_free(a)	ASN1_STRING_free((ASN1_STRING *)a)
+  B_ASN1_IA5STRING | B_ASN1_VISIBLESTRING | B_ASN1_BMPSTRING | B_ASN1_UTF8STRING
 
 DECLARE_ASN1_FUNCTIONS_fname(ASN1_TYPE, ASN1_ANY, ASN1_TYPE)
 
@@ -624,56 +701,53 @@
 OPENSSL_EXPORT int ASN1_TYPE_set1(ASN1_TYPE *a, int type, const void *value);
 OPENSSL_EXPORT int ASN1_TYPE_cmp(const ASN1_TYPE *a, const ASN1_TYPE *b);
 
-OPENSSL_EXPORT ASN1_OBJECT *	ASN1_OBJECT_new(void );
-OPENSSL_EXPORT void		ASN1_OBJECT_free(ASN1_OBJECT *a);
-OPENSSL_EXPORT int		i2d_ASN1_OBJECT(const ASN1_OBJECT *a,unsigned char **pp);
-OPENSSL_EXPORT ASN1_OBJECT *	c2i_ASN1_OBJECT(ASN1_OBJECT **a,const unsigned char **pp,
-						long length);
-OPENSSL_EXPORT ASN1_OBJECT *	d2i_ASN1_OBJECT(ASN1_OBJECT **a,const unsigned char **pp,
-						long length);
+OPENSSL_EXPORT ASN1_OBJECT *ASN1_OBJECT_new(void);
+OPENSSL_EXPORT void ASN1_OBJECT_free(ASN1_OBJECT *a);
+OPENSSL_EXPORT int i2d_ASN1_OBJECT(const ASN1_OBJECT *a, unsigned char **pp);
+OPENSSL_EXPORT ASN1_OBJECT *c2i_ASN1_OBJECT(ASN1_OBJECT **a,
+                                            const unsigned char **pp,
+                                            long length);
+OPENSSL_EXPORT ASN1_OBJECT *d2i_ASN1_OBJECT(ASN1_OBJECT **a,
+                                            const unsigned char **pp,
+                                            long length);
 
 DECLARE_ASN1_ITEM(ASN1_OBJECT)
 
 DECLARE_ASN1_SET_OF(ASN1_OBJECT)
 
-OPENSSL_EXPORT ASN1_STRING *	ASN1_STRING_new(void);
-OPENSSL_EXPORT void		ASN1_STRING_free(ASN1_STRING *a);
-OPENSSL_EXPORT int		ASN1_STRING_copy(ASN1_STRING *dst, const ASN1_STRING *str);
-OPENSSL_EXPORT ASN1_STRING *	ASN1_STRING_dup(const ASN1_STRING *a);
-OPENSSL_EXPORT ASN1_STRING *	ASN1_STRING_type_new(int type );
-OPENSSL_EXPORT int 		ASN1_STRING_cmp(const ASN1_STRING *a, const ASN1_STRING *b);
-  /* Since this is used to store all sorts of things, via macros, for now, make
-     its data void * */
-OPENSSL_EXPORT int 		ASN1_STRING_set(ASN1_STRING *str, const void *data, int len);
-OPENSSL_EXPORT void		ASN1_STRING_set0(ASN1_STRING *str, void *data, int len);
-OPENSSL_EXPORT int ASN1_STRING_length(const ASN1_STRING *x);
-OPENSSL_EXPORT void ASN1_STRING_length_set(ASN1_STRING *x, int n);
-OPENSSL_EXPORT int ASN1_STRING_type(const ASN1_STRING *x);
-OPENSSL_EXPORT unsigned char * ASN1_STRING_data(ASN1_STRING *x);
-OPENSSL_EXPORT const unsigned char *ASN1_STRING_get0_data(const ASN1_STRING *x);
-
 DECLARE_ASN1_FUNCTIONS(ASN1_BIT_STRING)
-OPENSSL_EXPORT int		i2c_ASN1_BIT_STRING(const ASN1_BIT_STRING *a,unsigned char **pp);
-OPENSSL_EXPORT ASN1_BIT_STRING *c2i_ASN1_BIT_STRING(ASN1_BIT_STRING **a,const unsigned char **pp, long length);
-OPENSSL_EXPORT int		ASN1_BIT_STRING_set(ASN1_BIT_STRING *a, unsigned char *d, int length );
-OPENSSL_EXPORT int		ASN1_BIT_STRING_set_bit(ASN1_BIT_STRING *a, int n, int value);
-OPENSSL_EXPORT int		ASN1_BIT_STRING_get_bit(const ASN1_BIT_STRING *a, int n);
-OPENSSL_EXPORT int            ASN1_BIT_STRING_check(const ASN1_BIT_STRING *a, unsigned char *flags, int flags_len);
+OPENSSL_EXPORT int i2c_ASN1_BIT_STRING(const ASN1_BIT_STRING *a,
+                                       unsigned char **pp);
+OPENSSL_EXPORT ASN1_BIT_STRING *c2i_ASN1_BIT_STRING(ASN1_BIT_STRING **a,
+                                                    const unsigned char **pp,
+                                                    long length);
+OPENSSL_EXPORT int ASN1_BIT_STRING_set(ASN1_BIT_STRING *a, unsigned char *d,
+                                       int length);
+OPENSSL_EXPORT int ASN1_BIT_STRING_set_bit(ASN1_BIT_STRING *a, int n,
+                                           int value);
+OPENSSL_EXPORT int ASN1_BIT_STRING_get_bit(const ASN1_BIT_STRING *a, int n);
+OPENSSL_EXPORT int ASN1_BIT_STRING_check(const ASN1_BIT_STRING *a,
+                                         unsigned char *flags, int flags_len);
 
-OPENSSL_EXPORT int		i2d_ASN1_BOOLEAN(int a,unsigned char **pp);
-OPENSSL_EXPORT int 		d2i_ASN1_BOOLEAN(int *a,const unsigned char **pp,long length);
+OPENSSL_EXPORT int i2d_ASN1_BOOLEAN(int a, unsigned char **pp);
+OPENSSL_EXPORT int d2i_ASN1_BOOLEAN(int *a, const unsigned char **pp,
+                                    long length);
 
 DECLARE_ASN1_FUNCTIONS(ASN1_INTEGER)
-OPENSSL_EXPORT int		i2c_ASN1_INTEGER(const ASN1_INTEGER *a,unsigned char **pp);
-OPENSSL_EXPORT ASN1_INTEGER *c2i_ASN1_INTEGER(ASN1_INTEGER **a,const unsigned char **pp, long length);
-OPENSSL_EXPORT ASN1_INTEGER *	ASN1_INTEGER_dup(const ASN1_INTEGER *x);
-OPENSSL_EXPORT int ASN1_INTEGER_cmp(const ASN1_INTEGER *x, const ASN1_INTEGER *y);
+OPENSSL_EXPORT int i2c_ASN1_INTEGER(const ASN1_INTEGER *a, unsigned char **pp);
+OPENSSL_EXPORT ASN1_INTEGER *c2i_ASN1_INTEGER(ASN1_INTEGER **a,
+                                              const unsigned char **pp,
+                                              long length);
+OPENSSL_EXPORT ASN1_INTEGER *ASN1_INTEGER_dup(const ASN1_INTEGER *x);
+OPENSSL_EXPORT int ASN1_INTEGER_cmp(const ASN1_INTEGER *x,
+                                    const ASN1_INTEGER *y);
 
 DECLARE_ASN1_FUNCTIONS(ASN1_ENUMERATED)
 
 OPENSSL_EXPORT int ASN1_UTCTIME_check(const ASN1_UTCTIME *a);
-OPENSSL_EXPORT ASN1_UTCTIME *ASN1_UTCTIME_set(ASN1_UTCTIME *s,time_t t);
-OPENSSL_EXPORT ASN1_UTCTIME *ASN1_UTCTIME_adj(ASN1_UTCTIME *s, time_t t, int offset_day, long offset_sec);
+OPENSSL_EXPORT ASN1_UTCTIME *ASN1_UTCTIME_set(ASN1_UTCTIME *s, time_t t);
+OPENSSL_EXPORT ASN1_UTCTIME *ASN1_UTCTIME_adj(ASN1_UTCTIME *s, time_t t,
+                                              int offset_day, long offset_sec);
 OPENSSL_EXPORT int ASN1_UTCTIME_set_string(ASN1_UTCTIME *s, const char *str);
 OPENSSL_EXPORT int ASN1_UTCTIME_cmp_time_t(const ASN1_UTCTIME *s, time_t t);
 #if 0
@@ -681,15 +755,22 @@
 #endif
 
 OPENSSL_EXPORT int ASN1_GENERALIZEDTIME_check(const ASN1_GENERALIZEDTIME *a);
-OPENSSL_EXPORT ASN1_GENERALIZEDTIME *ASN1_GENERALIZEDTIME_set(ASN1_GENERALIZEDTIME *s,time_t t);
-OPENSSL_EXPORT ASN1_GENERALIZEDTIME *ASN1_GENERALIZEDTIME_adj(ASN1_GENERALIZEDTIME *s, time_t t, int offset_day, long offset_sec);
-OPENSSL_EXPORT int ASN1_GENERALIZEDTIME_set_string(ASN1_GENERALIZEDTIME *s, const char *str);
-OPENSSL_EXPORT int ASN1_TIME_diff(int *pday, int *psec, const ASN1_TIME *from, const ASN1_TIME *to);
+OPENSSL_EXPORT ASN1_GENERALIZEDTIME *ASN1_GENERALIZEDTIME_set(
+    ASN1_GENERALIZEDTIME *s, time_t t);
+OPENSSL_EXPORT ASN1_GENERALIZEDTIME *ASN1_GENERALIZEDTIME_adj(
+    ASN1_GENERALIZEDTIME *s, time_t t, int offset_day, long offset_sec);
+OPENSSL_EXPORT int ASN1_GENERALIZEDTIME_set_string(ASN1_GENERALIZEDTIME *s,
+                                                   const char *str);
+OPENSSL_EXPORT int ASN1_TIME_diff(int *pday, int *psec, const ASN1_TIME *from,
+                                  const ASN1_TIME *to);
 
 DECLARE_ASN1_FUNCTIONS(ASN1_OCTET_STRING)
-OPENSSL_EXPORT ASN1_OCTET_STRING *	ASN1_OCTET_STRING_dup(const ASN1_OCTET_STRING *a);
-OPENSSL_EXPORT int 	ASN1_OCTET_STRING_cmp(const ASN1_OCTET_STRING *a, const ASN1_OCTET_STRING *b);
-OPENSSL_EXPORT int 	ASN1_OCTET_STRING_set(ASN1_OCTET_STRING *str, const unsigned char *data, int len);
+OPENSSL_EXPORT ASN1_OCTET_STRING *ASN1_OCTET_STRING_dup(
+    const ASN1_OCTET_STRING *a);
+OPENSSL_EXPORT int ASN1_OCTET_STRING_cmp(const ASN1_OCTET_STRING *a,
+                                         const ASN1_OCTET_STRING *b);
+OPENSSL_EXPORT int ASN1_OCTET_STRING_set(ASN1_OCTET_STRING *str,
+                                         const unsigned char *data, int len);
 
 DECLARE_ASN1_FUNCTIONS(ASN1_VISIBLESTRING)
 DECLARE_ASN1_FUNCTIONS(ASN1_UNIVERSALSTRING)
@@ -709,42 +790,50 @@
 DECLARE_ASN1_FUNCTIONS(ASN1_GENERALIZEDTIME)
 DECLARE_ASN1_FUNCTIONS(ASN1_TIME)
 
-DECLARE_ASN1_ITEM(ASN1_OCTET_STRING_NDEF)
-
-OPENSSL_EXPORT ASN1_TIME *ASN1_TIME_set(ASN1_TIME *s,time_t t);
-OPENSSL_EXPORT ASN1_TIME *ASN1_TIME_adj(ASN1_TIME *s,time_t t, int offset_day, long offset_sec);
+OPENSSL_EXPORT ASN1_TIME *ASN1_TIME_set(ASN1_TIME *s, time_t t);
+OPENSSL_EXPORT ASN1_TIME *ASN1_TIME_adj(ASN1_TIME *s, time_t t, int offset_day,
+                                        long offset_sec);
 OPENSSL_EXPORT int ASN1_TIME_check(const ASN1_TIME *t);
-OPENSSL_EXPORT ASN1_GENERALIZEDTIME *ASN1_TIME_to_generalizedtime(const ASN1_TIME *t, ASN1_GENERALIZEDTIME **out);
+OPENSSL_EXPORT ASN1_GENERALIZEDTIME *ASN1_TIME_to_generalizedtime(
+    const ASN1_TIME *t, ASN1_GENERALIZEDTIME **out);
 OPENSSL_EXPORT int ASN1_TIME_set_string(ASN1_TIME *s, const char *str);
 
 OPENSSL_EXPORT int i2a_ASN1_INTEGER(BIO *bp, const ASN1_INTEGER *a);
 OPENSSL_EXPORT int i2a_ASN1_ENUMERATED(BIO *bp, const ASN1_ENUMERATED *a);
 OPENSSL_EXPORT int i2a_ASN1_OBJECT(BIO *bp, const ASN1_OBJECT *a);
 OPENSSL_EXPORT int i2a_ASN1_STRING(BIO *bp, const ASN1_STRING *a, int type);
-OPENSSL_EXPORT int i2t_ASN1_OBJECT(char *buf,int buf_len, const ASN1_OBJECT *a);
+OPENSSL_EXPORT int i2t_ASN1_OBJECT(char *buf, int buf_len,
+                                   const ASN1_OBJECT *a);
 
-OPENSSL_EXPORT ASN1_OBJECT *ASN1_OBJECT_create(int nid, unsigned char *data,int len, const char *sn, const char *ln);
+OPENSSL_EXPORT ASN1_OBJECT *ASN1_OBJECT_create(int nid, unsigned char *data,
+                                               int len, const char *sn,
+                                               const char *ln);
 
 OPENSSL_EXPORT int ASN1_INTEGER_set(ASN1_INTEGER *a, long v);
 OPENSSL_EXPORT int ASN1_INTEGER_set_uint64(ASN1_INTEGER *out, uint64_t v);
 OPENSSL_EXPORT long ASN1_INTEGER_get(const ASN1_INTEGER *a);
-OPENSSL_EXPORT ASN1_INTEGER *BN_to_ASN1_INTEGER(const BIGNUM *bn, ASN1_INTEGER *ai);
-OPENSSL_EXPORT BIGNUM *ASN1_INTEGER_to_BN(const ASN1_INTEGER *ai,BIGNUM *bn);
+OPENSSL_EXPORT ASN1_INTEGER *BN_to_ASN1_INTEGER(const BIGNUM *bn,
+                                                ASN1_INTEGER *ai);
+OPENSSL_EXPORT BIGNUM *ASN1_INTEGER_to_BN(const ASN1_INTEGER *ai, BIGNUM *bn);
 
 OPENSSL_EXPORT int ASN1_ENUMERATED_set(ASN1_ENUMERATED *a, long v);
 OPENSSL_EXPORT long ASN1_ENUMERATED_get(const ASN1_ENUMERATED *a);
-OPENSSL_EXPORT ASN1_ENUMERATED *BN_to_ASN1_ENUMERATED(const BIGNUM *bn, ASN1_ENUMERATED *ai);
-OPENSSL_EXPORT BIGNUM *ASN1_ENUMERATED_to_BN(const ASN1_ENUMERATED *ai,BIGNUM *bn);
+OPENSSL_EXPORT ASN1_ENUMERATED *BN_to_ASN1_ENUMERATED(const BIGNUM *bn,
+                                                      ASN1_ENUMERATED *ai);
+OPENSSL_EXPORT BIGNUM *ASN1_ENUMERATED_to_BN(const ASN1_ENUMERATED *ai,
+                                             BIGNUM *bn);
 
-/* General */
-/* given a string, return the correct type, max is the maximum length */
+// General
+// given a string, return the correct type, max is the maximum length
 OPENSSL_EXPORT int ASN1_PRINTABLE_type(const unsigned char *s, int max);
 
 OPENSSL_EXPORT unsigned long ASN1_tag2bit(int tag);
 
-/* SPECIALS */
-OPENSSL_EXPORT int ASN1_get_object(const unsigned char **pp, long *plength, int *ptag, int *pclass, long omax);
-OPENSSL_EXPORT void ASN1_put_object(unsigned char **pp, int constructed, int length, int tag, int xclass);
+// SPECIALS
+OPENSSL_EXPORT int ASN1_get_object(const unsigned char **pp, long *plength,
+                                   int *ptag, int *pclass, long omax);
+OPENSSL_EXPORT void ASN1_put_object(unsigned char **pp, int constructed,
+                                    int length, int tag, int xclass);
 OPENSSL_EXPORT int ASN1_put_eoc(unsigned char **pp);
 OPENSSL_EXPORT int ASN1_object_size(int constructed, int length, int tag);
 
@@ -753,7 +842,8 @@
 #ifndef OPENSSL_NO_FP_API
 OPENSSL_EXPORT void *ASN1_item_d2i_fp(const ASN1_ITEM *it, FILE *in, void *x);
 OPENSSL_EXPORT int ASN1_item_i2d_fp(const ASN1_ITEM *it, FILE *out, void *x);
-OPENSSL_EXPORT int ASN1_STRING_print_ex_fp(FILE *fp, const ASN1_STRING *str, unsigned long flags);
+OPENSSL_EXPORT int ASN1_STRING_print_ex_fp(FILE *fp, const ASN1_STRING *str,
+                                           unsigned long flags);
 #endif
 
 OPENSSL_EXPORT int ASN1_STRING_to_UTF8(unsigned char **out, ASN1_STRING *in);
@@ -761,43 +851,58 @@
 OPENSSL_EXPORT void *ASN1_item_d2i_bio(const ASN1_ITEM *it, BIO *in, void *x);
 OPENSSL_EXPORT int ASN1_item_i2d_bio(const ASN1_ITEM *it, BIO *out, void *x);
 OPENSSL_EXPORT int ASN1_UTCTIME_print(BIO *fp, const ASN1_UTCTIME *a);
-OPENSSL_EXPORT int ASN1_GENERALIZEDTIME_print(BIO *fp, const ASN1_GENERALIZEDTIME *a);
+OPENSSL_EXPORT int ASN1_GENERALIZEDTIME_print(BIO *fp,
+                                              const ASN1_GENERALIZEDTIME *a);
 OPENSSL_EXPORT int ASN1_TIME_print(BIO *fp, const ASN1_TIME *a);
 OPENSSL_EXPORT int ASN1_STRING_print(BIO *bp, const ASN1_STRING *v);
-OPENSSL_EXPORT int ASN1_STRING_print_ex(BIO *out, const ASN1_STRING *str, unsigned long flags);
+OPENSSL_EXPORT int ASN1_STRING_print_ex(BIO *out, const ASN1_STRING *str,
+                                        unsigned long flags);
 OPENSSL_EXPORT const char *ASN1_tag2str(int tag);
 
-/* Used to load and write netscape format cert */
+// Used to load and write netscape format cert
 
-OPENSSL_EXPORT void *ASN1_item_unpack(const ASN1_STRING *oct, const ASN1_ITEM *it);
+OPENSSL_EXPORT void *ASN1_item_unpack(const ASN1_STRING *oct,
+                                      const ASN1_ITEM *it);
 
-OPENSSL_EXPORT ASN1_STRING *ASN1_item_pack(void *obj, const ASN1_ITEM *it, ASN1_OCTET_STRING **oct);
+OPENSSL_EXPORT ASN1_STRING *ASN1_item_pack(void *obj, const ASN1_ITEM *it,
+                                           ASN1_OCTET_STRING **oct);
 
 OPENSSL_EXPORT void ASN1_STRING_set_default_mask(unsigned long mask);
 OPENSSL_EXPORT int ASN1_STRING_set_default_mask_asc(const char *p);
 OPENSSL_EXPORT unsigned long ASN1_STRING_get_default_mask(void);
-OPENSSL_EXPORT int ASN1_mbstring_copy(ASN1_STRING **out, const unsigned char *in, int len, int inform, unsigned long mask);
-OPENSSL_EXPORT int ASN1_mbstring_ncopy(ASN1_STRING **out, const unsigned char *in, int len, int inform, unsigned long mask, long minsize, long maxsize);
+OPENSSL_EXPORT int ASN1_mbstring_copy(ASN1_STRING **out,
+                                      const unsigned char *in, int len,
+                                      int inform, unsigned long mask);
+OPENSSL_EXPORT int ASN1_mbstring_ncopy(ASN1_STRING **out,
+                                       const unsigned char *in, int len,
+                                       int inform, unsigned long mask,
+                                       long minsize, long maxsize);
 
-OPENSSL_EXPORT ASN1_STRING *ASN1_STRING_set_by_NID(ASN1_STRING **out, const unsigned char *in, int inlen, int inform, int nid);
+OPENSSL_EXPORT ASN1_STRING *ASN1_STRING_set_by_NID(ASN1_STRING **out,
+                                                   const unsigned char *in,
+                                                   int inlen, int inform,
+                                                   int nid);
 OPENSSL_EXPORT ASN1_STRING_TABLE *ASN1_STRING_TABLE_get(int nid);
-OPENSSL_EXPORT int ASN1_STRING_TABLE_add(int, long, long, unsigned long, unsigned long);
+OPENSSL_EXPORT int ASN1_STRING_TABLE_add(int, long, long, unsigned long,
+                                         unsigned long);
 OPENSSL_EXPORT void ASN1_STRING_TABLE_cleanup(void);
 
-/* ASN1 template functions */
+// ASN1 template functions
 
-/* Old API compatible functions */
+// Old API compatible functions
 OPENSSL_EXPORT ASN1_VALUE *ASN1_item_new(const ASN1_ITEM *it);
 OPENSSL_EXPORT void ASN1_item_free(ASN1_VALUE *val, const ASN1_ITEM *it);
-OPENSSL_EXPORT ASN1_VALUE * ASN1_item_d2i(ASN1_VALUE **val, const unsigned char **in, long len, const ASN1_ITEM *it);
-OPENSSL_EXPORT int ASN1_item_i2d(ASN1_VALUE *val, unsigned char **out, const ASN1_ITEM *it);
-OPENSSL_EXPORT int ASN1_item_ndef_i2d(ASN1_VALUE *val, unsigned char **out, const ASN1_ITEM *it);
+OPENSSL_EXPORT ASN1_VALUE *ASN1_item_d2i(ASN1_VALUE **val,
+                                         const unsigned char **in, long len,
+                                         const ASN1_ITEM *it);
+OPENSSL_EXPORT int ASN1_item_i2d(ASN1_VALUE *val, unsigned char **out,
+                                 const ASN1_ITEM *it);
 
 OPENSSL_EXPORT ASN1_TYPE *ASN1_generate_nconf(const char *str, CONF *nconf);
 OPENSSL_EXPORT ASN1_TYPE *ASN1_generate_v3(const char *str, X509V3_CTX *cnf);
 
 
-#ifdef  __cplusplus
+#ifdef __cplusplus
 }
 
 extern "C++" {
@@ -810,7 +915,7 @@
 
 BSSL_NAMESPACE_END
 
-}  /* extern C++ */
+}  // extern C++
 
 #endif
 
@@ -907,5 +1012,6 @@
 #define ASN1_R_WRONG_TAG 190
 #define ASN1_R_WRONG_TYPE 191
 #define ASN1_R_NESTED_TOO_DEEP 192
+#define ASN1_R_BAD_TEMPLATE 193
 
 #endif
diff --git a/deps/boringssl/src/include/openssl/asn1t.h b/deps/boringssl/src/include/openssl/asn1t.h
index 7bd7701..c5e2685 100644
--- a/deps/boringssl/src/include/openssl/asn1t.h
+++ b/deps/boringssl/src/include/openssl/asn1t.h
@@ -141,20 +141,10 @@
 		#stname \
 	ASN1_ITEM_end(tname)
 
-#define ASN1_NDEF_SEQUENCE(tname) \
-	ASN1_SEQUENCE(tname)
-
-#define ASN1_NDEF_SEQUENCE_cb(tname, cb) \
-	ASN1_SEQUENCE_cb(tname, cb)
-
 #define ASN1_SEQUENCE_cb(tname, cb) \
 	static const ASN1_AUX tname##_aux = {NULL, 0, 0, cb, 0}; \
 	ASN1_SEQUENCE(tname)
 
-#define ASN1_BROKEN_SEQUENCE(tname) \
-	static const ASN1_AUX tname##_aux = {NULL, ASN1_AFLG_BROKEN, 0, 0, 0}; \
-	ASN1_SEQUENCE(tname)
-
 #define ASN1_SEQUENCE_ref(tname, cb) \
 	static const ASN1_AUX tname##_aux = {NULL, ASN1_AFLG_REFCOUNT, offsetof(tname, references), cb, 0}; \
 	ASN1_SEQUENCE(tname)
@@ -163,20 +153,6 @@
 	static const ASN1_AUX tname##_aux = {NULL, ASN1_AFLG_ENCODING, 0, cb, offsetof(tname, enc)}; \
 	ASN1_SEQUENCE(tname)
 
-#define ASN1_NDEF_SEQUENCE_END(tname) \
-	;\
-	ASN1_ITEM_start(tname) \
-		ASN1_ITYPE_NDEF_SEQUENCE,\
-		V_ASN1_SEQUENCE,\
-		tname##_seq_tt,\
-		sizeof(tname##_seq_tt) / sizeof(ASN1_TEMPLATE),\
-		NULL,\
-		sizeof(tname),\
-		#tname \
-	ASN1_ITEM_end(tname)
-
-#define ASN1_BROKEN_SEQUENCE_END(stname) ASN1_SEQUENCE_END_ref(stname, stname)
-
 #define ASN1_SEQUENCE_END_enc(stname, tname) ASN1_SEQUENCE_END_ref(stname, tname)
 
 #define ASN1_SEQUENCE_END_cb(stname, tname) ASN1_SEQUENCE_END_ref(stname, tname)
@@ -193,18 +169,6 @@
 		#stname \
 	ASN1_ITEM_end(tname)
 
-#define ASN1_NDEF_SEQUENCE_END_cb(stname, tname) \
-	;\
-	ASN1_ITEM_start(tname) \
-		ASN1_ITYPE_NDEF_SEQUENCE,\
-		V_ASN1_SEQUENCE,\
-		tname##_seq_tt,\
-		sizeof(tname##_seq_tt) / sizeof(ASN1_TEMPLATE),\
-		&tname##_aux,\
-		sizeof(stname),\
-		#stname \
-	ASN1_ITEM_end(tname)
-
 
 /* This pair helps declare a CHOICE type. We can do:
  *
@@ -353,14 +317,6 @@
 #define ASN1_EXP_SEQUENCE_OF_OPT(stname, field, type, tag) \
 			ASN1_EXP_EX(stname, field, type, tag, ASN1_TFLG_SEQUENCE_OF|ASN1_TFLG_OPTIONAL)
 
-/* EXPLICIT using indefinite length constructed form */
-#define ASN1_NDEF_EXP(stname, field, type, tag) \
-			ASN1_EXP_EX(stname, field, type, tag, ASN1_TFLG_NDEF)
-
-/* EXPLICIT OPTIONAL using indefinite length constructed form */
-#define ASN1_NDEF_EXP_OPT(stname, field, type, tag) \
-			ASN1_EXP_EX(stname, field, type, tag, ASN1_TFLG_OPTIONAL|ASN1_TFLG_NDEF)
-
 /* Macros for the ASN1_ADB structure */
 
 #define ASN1_ADB(name) \
@@ -393,9 +349,7 @@
 unsigned long flags;		/* Various flags */
 long tag;			/* tag, not used if no tagging */
 unsigned long offset;		/* Offset of this field in structure */
-#ifndef NO_ASN1_FIELD_NAMES
 const char *field_name;		/* Field name */
-#endif
 ASN1_ITEM_EXP *item;		/* Relevant ASN1_ITEM or ASN1_ADB */
 };
 
@@ -504,13 +458,6 @@
 
 #define ASN1_TFLG_COMBINE	(0x1<<10)
 
-/* This flag when present in a SEQUENCE OF, SET OF
- * or EXPLICIT causes indefinite length constructed
- * encoding to be used if required.
- */
-
-#define ASN1_TFLG_NDEF		(0x1<<11)
-
 /* This is the actual ASN1 item itself */
 
 struct ASN1_ITEM_st {
@@ -520,9 +467,7 @@
 long tcount;			/* Number of templates if SEQUENCE or CHOICE */
 const void *funcs;		/* functions that handle this type */
 long size;			/* Structure size (usually)*/
-#ifndef NO_ASN1_FIELD_NAMES
 const char *sname;		/* Structure name */
-#endif
 };
 
 /* These are values for the itype field and
@@ -547,10 +492,6 @@
  * The 'funcs' field is used for application
  * specific functions. 
  *
- * For COMPAT types the funcs field gives a
- * set of functions that handle this type, this
- * supports the old d2i, i2d convention.
- *
  * The EXTERN type uses a new style d2i/i2d.
  * The new style should be used where possible
  * because it avoids things like the d2i IMPLICIT
@@ -563,10 +504,6 @@
  * has a special meaning, it is used as a mask
  * of acceptable types using the B_ASN1 constants.
  *
- * NDEF_SEQUENCE is the same as SEQUENCE except
- * that it will use indefinite length constructed
- * encoding if requested.
- *
  */
 
 #define ASN1_ITYPE_PRIMITIVE		0x0
@@ -575,14 +512,10 @@
 
 #define ASN1_ITYPE_CHOICE		0x2
 
-#define ASN1_ITYPE_COMPAT		0x3
-
 #define ASN1_ITYPE_EXTERN		0x4
 
 #define ASN1_ITYPE_MSTRING		0x5
 
-#define ASN1_ITYPE_NDEF_SEQUENCE	0x6
-
 /* Cache for ASN1 tag and length, so we
  * don't keep re-reading it for things
  * like CHOICE
@@ -615,17 +548,6 @@
 						int indent, const char *fname, 
 						const ASN1_PCTX *pctx);
 
-typedef int ASN1_primitive_i2c(ASN1_VALUE **pval, unsigned char *cont, int *putype, const ASN1_ITEM *it);
-typedef int ASN1_primitive_c2i(ASN1_VALUE **pval, const unsigned char *cont, int len, int utype, char *free_cont, const ASN1_ITEM *it);
-typedef int ASN1_primitive_print(BIO *out, ASN1_VALUE **pval, const ASN1_ITEM *it, int indent, const ASN1_PCTX *pctx);
-
-typedef struct ASN1_COMPAT_FUNCS_st {
-	ASN1_new_func *asn1_new;
-	ASN1_free_func *asn1_free;
-	ASN1_d2i_func *asn1_d2i;
-	ASN1_i2d_func *asn1_i2d;
-} ASN1_COMPAT_FUNCS;
-
 typedef struct ASN1_EXTERN_FUNCS_st {
 	void *app_data;
 	ASN1_ex_new_func *asn1_ex_new;
@@ -637,17 +559,6 @@
 	ASN1_ex_print_func *asn1_ex_print;
 } ASN1_EXTERN_FUNCS;
 
-typedef struct ASN1_PRIMITIVE_FUNCS_st {
-	void *app_data;
-	unsigned long flags;
-	ASN1_ex_new_func *prim_new;
-	ASN1_ex_free_func *prim_free;
-	ASN1_ex_free_func *prim_clear;
-	ASN1_primitive_c2i *prim_c2i;
-	ASN1_primitive_i2c *prim_i2c;
-	ASN1_primitive_print *prim_print;
-} ASN1_PRIMITIVE_FUNCS;
-
 /* This is the ASN1_AUX structure: it handles various
  * miscellaneous requirements. For example the use of
  * reference counts and an informational callback.
@@ -676,31 +587,12 @@
 	int enc_offset;		/* Offset of ASN1_ENCODING structure */
 } ASN1_AUX;
 
-/* For print related callbacks exarg points to this structure */
-typedef struct ASN1_PRINT_ARG_st {
-	BIO *out;
-	int indent;
-	const ASN1_PCTX *pctx;
-} ASN1_PRINT_ARG;
-
-/* For streaming related callbacks exarg points to this structure */
-typedef struct ASN1_STREAM_ARG_st {
-	/* BIO to stream through */
-	BIO *out;
-	/* BIO with filters appended */
-	BIO *ndef_bio;
-	/* Streaming I/O boundary */
-	unsigned char **boundary;
-} ASN1_STREAM_ARG;
-
 /* Flags in ASN1_AUX */
 
 /* Use a reference count */
 #define ASN1_AFLG_REFCOUNT	1
 /* Save the encoding of structure (useful for signatures) */
 #define ASN1_AFLG_ENCODING	2
-/* The Sequence length is invalid */
-#define ASN1_AFLG_BROKEN	4
 
 /* operation values for asn1_cb */
 
@@ -732,27 +624,6 @@
 					ASN1_ITYPE_MSTRING, mask, NULL, 0, NULL, sizeof(ASN1_STRING), #itname \
 				ASN1_ITEM_end(itname)
 
-/* Macro to implement an ASN1_ITEM in terms of old style funcs */
-
-#define IMPLEMENT_COMPAT_ASN1(sname) IMPLEMENT_COMPAT_ASN1_type(sname, V_ASN1_SEQUENCE)
-
-#define IMPLEMENT_COMPAT_ASN1_type(sname, tag) \
-	static const ASN1_COMPAT_FUNCS sname##_ff = { \
-		(ASN1_new_func *)sname##_new, \
-		(ASN1_free_func *)sname##_free, \
-		(ASN1_d2i_func *)d2i_##sname, \
-		(ASN1_i2d_func *)i2d_##sname, \
-	}; \
-	ASN1_ITEM_start(sname) \
-		ASN1_ITYPE_COMPAT, \
-		tag, \
-		NULL, \
-		0, \
-		&sname##_ff, \
-		0, \
-		#sname \
-	ASN1_ITEM_end(sname)
-
 #define IMPLEMENT_EXTERN_ASN1(sname, tag, fptrs) \
 	ASN1_ITEM_start(sname) \
 		ASN1_ITYPE_EXTERN, \
@@ -813,12 +684,6 @@
 		return ASN1_item_i2d((ASN1_VALUE *)a, out, ASN1_ITEM_rptr(itname));\
 	} 
 
-#define IMPLEMENT_ASN1_NDEF_FUNCTION(stname) \
-	int i2d_##stname##_NDEF(stname *a, unsigned char **out) \
-	{ \
-		return ASN1_item_ndef_i2d((ASN1_VALUE *)a, out, ASN1_ITEM_rptr(stname));\
-	} 
-
 /* This includes evil casts to remove const: they will go away when full
  * ASN1 constification is done.
  */
@@ -854,38 +719,6 @@
 
 DEFINE_STACK_OF(ASN1_VALUE)
 
-/* Functions used internally by the ASN1 code */
-
-int ASN1_item_ex_new(ASN1_VALUE **pval, const ASN1_ITEM *it);
-void ASN1_item_ex_free(ASN1_VALUE **pval, const ASN1_ITEM *it);
-int ASN1_template_new(ASN1_VALUE **pval, const ASN1_TEMPLATE *tt);
-int ASN1_primitive_new(ASN1_VALUE **pval, const ASN1_ITEM *it);
-
-void ASN1_template_free(ASN1_VALUE **pval, const ASN1_TEMPLATE *tt);
-int ASN1_item_ex_d2i(ASN1_VALUE **pval, const unsigned char **in, long len, const ASN1_ITEM *it,
-				int tag, int aclass, char opt, ASN1_TLC *ctx);
-
-int ASN1_item_ex_i2d(ASN1_VALUE **pval, unsigned char **out, const ASN1_ITEM *it, int tag, int aclass);
-void ASN1_primitive_free(ASN1_VALUE **pval, const ASN1_ITEM *it);
-
-int asn1_ex_i2c(ASN1_VALUE **pval, unsigned char *cont, int *putype, const ASN1_ITEM *it);
-int asn1_ex_c2i(ASN1_VALUE **pval, const unsigned char *cont, int len, int utype, char *free_cont, const ASN1_ITEM *it);
-
-int asn1_get_choice_selector(ASN1_VALUE **pval, const ASN1_ITEM *it);
-int asn1_set_choice_selector(ASN1_VALUE **pval, int value, const ASN1_ITEM *it);
-
-ASN1_VALUE ** asn1_get_field_ptr(ASN1_VALUE **pval, const ASN1_TEMPLATE *tt);
-
-const ASN1_TEMPLATE *asn1_do_adb(ASN1_VALUE **pval, const ASN1_TEMPLATE *tt, int nullerr);
-
-void asn1_refcount_set_one(ASN1_VALUE **pval, const ASN1_ITEM *it);
-int asn1_refcount_dec_and_test_zero(ASN1_VALUE **pval, const ASN1_ITEM *it);
-
-void asn1_enc_init(ASN1_VALUE **pval, const ASN1_ITEM *it);
-void asn1_enc_free(ASN1_VALUE **pval, const ASN1_ITEM *it);
-int asn1_enc_restore(int *len, unsigned char **out, ASN1_VALUE **pval, const ASN1_ITEM *it);
-int asn1_enc_save(ASN1_VALUE **pval, const unsigned char *in, int inlen, const ASN1_ITEM *it);
-
 #ifdef  __cplusplus
 }
 #endif
diff --git a/deps/boringssl/src/include/openssl/base.h b/deps/boringssl/src/include/openssl/base.h
index 7f6acce..90924e6 100644
--- a/deps/boringssl/src/include/openssl/base.h
+++ b/deps/boringssl/src/include/openssl/base.h
@@ -90,19 +90,19 @@
 #elif defined(__x86) || defined(__i386) || defined(__i386__) || defined(_M_IX86)
 #define OPENSSL_32_BIT
 #define OPENSSL_X86
-#elif defined(__aarch64__)
+#elif defined(__AARCH64EL__) || defined(_M_ARM64)
 #define OPENSSL_64_BIT
 #define OPENSSL_AARCH64
-#elif defined(__arm) || defined(__arm__) || defined(_M_ARM)
+#elif defined(__ARMEL__) || defined(_M_ARM)
 #define OPENSSL_32_BIT
 #define OPENSSL_ARM
 #elif (defined(__PPC64__) || defined(__powerpc64__)) && defined(_LITTLE_ENDIAN)
 #define OPENSSL_64_BIT
 #define OPENSSL_PPC64LE
-#elif defined(__mips__) && !defined(__LP64__)
+#elif defined(__MIPSEL__) && !defined(__LP64__)
 #define OPENSSL_32_BIT
 #define OPENSSL_MIPS
-#elif defined(__mips__) && defined(__LP64__)
+#elif defined(__MIPSEL__) && defined(__LP64__)
 #define OPENSSL_64_BIT
 #define OPENSSL_MIPS64
 #elif defined(__pnacl__)
@@ -138,7 +138,10 @@
 #define OPENSSL_WINDOWS
 #endif
 
-#if defined(__linux__)
+// Trusty isn't Linux but currently defines __linux__. As a workaround, we
+// exclude it here.
+// TODO(b/169780122): Remove this workaround once Trusty no longer defines it.
+#if defined(__linux__) && !defined(TRUSTY)
 #define OPENSSL_LINUX
 #endif
 
@@ -153,6 +156,10 @@
 
 #if defined(__ANDROID_API__)
 #define OPENSSL_ANDROID
+#if defined(BORINGSSL_FIPS)
+// The FIPS module on Android passively receives entropy.
+#define BORINGSSL_FIPS_PASSIVE_ENTROPY
+#endif
 #endif
 
 // BoringSSL requires platform's locking APIs to make internal global state
@@ -173,7 +180,7 @@
 #endif
 
 #define OPENSSL_IS_BORINGSSL
-#define OPENSSL_VERSION_NUMBER 0x1010007f
+#define OPENSSL_VERSION_NUMBER 0x1010107f
 #define SSLEAY_VERSION_NUMBER OPENSSL_VERSION_NUMBER
 
 // BORINGSSL_API_VERSION is a positive integer that increments as BoringSSL
@@ -184,7 +191,7 @@
 // A consumer may use this symbol in the preprocessor to temporarily build
 // against multiple revisions of BoringSSL at the same time. It is not
 // recommended to do so for longer than is necessary.
-#define BORINGSSL_API_VERSION 10
+#define BORINGSSL_API_VERSION 14
 
 #if defined(BORINGSSL_SHARED_LIBRARY)
 
@@ -369,6 +376,7 @@
 typedef struct bignum_st BIGNUM;
 typedef struct bio_method_st BIO_METHOD;
 typedef struct bio_st BIO;
+typedef struct blake2b_state_st BLAKE2B_CTX;
 typedef struct bn_gencb_st BN_GENCB;
 typedef struct bn_mont_ctx_st BN_MONT_CTX;
 typedef struct buf_mem_st BUF_MEM;
diff --git a/deps/boringssl/src/include/openssl/bio.h b/deps/boringssl/src/include/openssl/bio.h
index da0dcdf..f25492a 100644
--- a/deps/boringssl/src/include/openssl/bio.h
+++ b/deps/boringssl/src/include/openssl/bio.h
@@ -199,6 +199,10 @@
 // retried. The return value is one of the |BIO_RR_*| values.
 OPENSSL_EXPORT int BIO_get_retry_reason(const BIO *bio);
 
+// BIO_set_retry_reason sets the special I/O operation that needs to be retried
+// to |reason|, which should be one of the |BIO_RR_*| values.
+OPENSSL_EXPORT void BIO_set_retry_reason(BIO *bio, int reason);
+
 // BIO_clear_flags ANDs |bio->flags| with the bitwise-complement of |flags|.
 OPENSSL_EXPORT void BIO_clear_flags(BIO *bio, int flags);
 
diff --git a/deps/boringssl/src/include/openssl/blake2.h b/deps/boringssl/src/include/openssl/blake2.h
new file mode 100644
index 0000000..9ec1e6c
--- /dev/null
+++ b/deps/boringssl/src/include/openssl/blake2.h
@@ -0,0 +1,62 @@
+/* Copyright (c) 2021, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#ifndef OPENSSL_HEADER_BLAKE2_H
+#define OPENSSL_HEADER_BLAKE2_H
+
+#include <openssl/base.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+
+#define BLAKE2B256_DIGEST_LENGTH (256 / 8)
+#define BLAKE2B_CBLOCK 128
+
+struct blake2b_state_st {
+  uint64_t h[8];
+  uint64_t t_low, t_high;
+  union {
+    uint8_t bytes[BLAKE2B_CBLOCK];
+    uint64_t words[16];
+  } block;
+  size_t block_used;
+};
+
+// BLAKE2B256_Init initialises |b2b| to perform a BLAKE2b-256 hash. There are no
+// pointers inside |b2b| thus release of |b2b| is purely managed by the caller.
+OPENSSL_EXPORT void BLAKE2B256_Init(BLAKE2B_CTX *b2b);
+
+// BLAKE2B256_Update appends |len| bytes from |data| to the digest being
+// calculated by |b2b|.
+OPENSSL_EXPORT void BLAKE2B256_Update(BLAKE2B_CTX *b2b, const void *data,
+                                      size_t len);
+
+// BLAKE2B256_Final completes the digest calculated by |b2b| and writes
+// |BLAKE2B256_DIGEST_LENGTH| bytes to |out|.
+OPENSSL_EXPORT void BLAKE2B256_Final(uint8_t out[BLAKE2B256_DIGEST_LENGTH],
+                                     BLAKE2B_CTX *b2b);
+
+// BLAKE2B256 writes the BLAKE2b-256 digset of |len| bytes from |data| to
+// |out|.
+OPENSSL_EXPORT void BLAKE2B256(const uint8_t *data, size_t len,
+                               uint8_t out[BLAKE2B256_DIGEST_LENGTH]);
+
+
+#if defined(__cplusplus)
+}  // extern C
+#endif
+
+#endif  // OPENSSL_HEADER_BLAKE2_H
diff --git a/deps/boringssl/src/include/openssl/bytestring.h b/deps/boringssl/src/include/openssl/bytestring.h
index 1f9c879..5ae0348 100644
--- a/deps/boringssl/src/include/openssl/bytestring.h
+++ b/deps/boringssl/src/include/openssl/bytestring.h
@@ -252,12 +252,16 @@
                                             size_t *out_header_len);
 
 // CBS_get_any_ber_asn1_element acts the same as |CBS_get_any_asn1_element| but
-// also allows indefinite-length elements to be returned. In that case,
-// |*out_header_len| and |CBS_len(out)| will both be two as only the header is
-// returned, otherwise it behaves the same as the previous function.
+// also allows indefinite-length elements to be returned and does not enforce
+// that lengths are minimal. For indefinite-lengths, |*out_header_len| and
+// |CBS_len(out)| will be equal as only the header is returned (although this is
+// also true for empty elements so the length must be checked too). If
+// |out_ber_found| is not NULL then it is set to one if any case of invalid DER
+// but valid BER is found, and to zero otherwise.
 OPENSSL_EXPORT int CBS_get_any_ber_asn1_element(CBS *cbs, CBS *out,
                                                 unsigned *out_tag,
-                                                size_t *out_header_len);
+                                                size_t *out_header_len,
+                                                int *out_ber_found);
 
 // CBS_get_asn1_uint64 gets an ASN.1 INTEGER from |cbs| using |CBS_get_asn1|
 // and sets |*out| to its value. It returns one on success and zero on error,
@@ -310,14 +314,25 @@
                                               int default_value);
 
 // CBS_is_valid_asn1_bitstring returns one if |cbs| is a valid ASN.1 BIT STRING
-// and zero otherwise.
+// body and zero otherwise.
 OPENSSL_EXPORT int CBS_is_valid_asn1_bitstring(const CBS *cbs);
 
 // CBS_asn1_bitstring_has_bit returns one if |cbs| is a valid ASN.1 BIT STRING
-// and the specified bit is present and set. Otherwise, it returns zero. |bit|
-// is indexed starting from zero.
+// body and the specified bit is present and set. Otherwise, it returns zero.
+// |bit| is indexed starting from zero.
 OPENSSL_EXPORT int CBS_asn1_bitstring_has_bit(const CBS *cbs, unsigned bit);
 
+// CBS_is_valid_asn1_integer returns one if |cbs| is a valid ASN.1 INTEGER,
+// body and zero otherwise. On success, if |out_is_negative| is non-NULL,
+// |*out_is_negative| will be set to one if |cbs| is negative and zero
+// otherwise.
+OPENSSL_EXPORT int CBS_is_valid_asn1_integer(const CBS *cbs,
+                                             int *out_is_negative);
+
+// CBS_is_unsigned_asn1_integer returns one if |cbs| is a valid non-negative
+// ASN.1 INTEGER body and zero otherwise.
+OPENSSL_EXPORT int CBS_is_unsigned_asn1_integer(const CBS *cbs);
+
 // CBS_asn1_oid_to_text interprets |cbs| as DER-encoded ASN.1 OBJECT IDENTIFIER
 // contents (not including the element framing) and returns the ASCII
 // representation (e.g., "1.2.840.113554.4.1.72585") in a newly-allocated
diff --git a/deps/boringssl/src/include/openssl/cipher.h b/deps/boringssl/src/include/openssl/cipher.h
index d22a6c2..c6bec48 100644
--- a/deps/boringssl/src/include/openssl/cipher.h
+++ b/deps/boringssl/src/include/openssl/cipher.h
@@ -380,6 +380,12 @@
 // processing.
 #define EVP_CIPH_CUSTOM_COPY 0x1000
 
+// EVP_CIPH_FLAG_NON_FIPS_ALLOW is meaningless. In OpenSSL it permits non-FIPS
+// algorithms in FIPS mode. But BoringSSL FIPS mode doesn't prohibit algorithms
+// (it's up the the caller to use the FIPS module in a fashion compliant with
+// their needs). Thus this exists only to allow code to compile.
+#define EVP_CIPH_FLAG_NON_FIPS_ALLOW 0
+
 
 // Deprecated functions
 
@@ -425,9 +431,24 @@
 // EVP_aes_128_cfb128 is only available in decrepit.
 OPENSSL_EXPORT const EVP_CIPHER *EVP_aes_128_cfb128(void);
 
+// EVP_aes_128_cfb is an alias for |EVP_aes_128_cfb128| and is only available in
+// decrepit.
+OPENSSL_EXPORT const EVP_CIPHER *EVP_aes_128_cfb(void);
+
+// EVP_aes_192_cfb128 is only available in decrepit.
+OPENSSL_EXPORT const EVP_CIPHER *EVP_aes_192_cfb128(void);
+
+// EVP_aes_192_cfb is an alias for |EVP_aes_192_cfb128| and is only available in
+// decrepit.
+OPENSSL_EXPORT const EVP_CIPHER *EVP_aes_192_cfb(void);
+
 // EVP_aes_256_cfb128 is only available in decrepit.
 OPENSSL_EXPORT const EVP_CIPHER *EVP_aes_256_cfb128(void);
 
+// EVP_aes_256_cfb is an alias for |EVP_aes_256_cfb128| and is only available in
+// decrepit.
+OPENSSL_EXPORT const EVP_CIPHER *EVP_aes_256_cfb(void);
+
 // EVP_bf_ecb is Blowfish in ECB mode and is only available in decrepit.
 OPENSSL_EXPORT const EVP_CIPHER *EVP_bf_ecb(void);
 
diff --git a/deps/boringssl/src/include/openssl/crypto.h b/deps/boringssl/src/include/openssl/crypto.h
index 0dc5373..b820e40 100644
--- a/deps/boringssl/src/include/openssl/crypto.h
+++ b/deps/boringssl/src/include/openssl/crypto.h
@@ -76,7 +76,7 @@
 
 // OPENSSL_VERSION_TEXT contains a string the identifies the version of
 // “OpenSSL”. node.js requires a version number in this text.
-#define OPENSSL_VERSION_TEXT "OpenSSL 1.1.0 (compatible; BoringSSL)"
+#define OPENSSL_VERSION_TEXT "OpenSSL 1.1.1 (compatible; BoringSSL)"
 
 #define OPENSSL_VERSION 0
 #define OPENSSL_CFLAGS 1
diff --git a/deps/boringssl/src/include/openssl/des.h b/deps/boringssl/src/include/openssl/des.h
index af1c822..539b2c5 100644
--- a/deps/boringssl/src/include/openssl/des.h
+++ b/deps/boringssl/src/include/openssl/des.h
@@ -65,6 +65,12 @@
 
 
 // DES.
+//
+// This module is deprecated and retained for legacy reasons only. It is slow
+// and may leak key material with timing or cache side channels. Moreover,
+// single-keyed DES is broken and can be brute-forced in under a day.
+//
+// Use a modern cipher, such as AES-GCM or ChaCha20-Poly1305, instead.
 
 
 typedef struct DES_cblock_st {
diff --git a/deps/boringssl/src/include/openssl/dh.h b/deps/boringssl/src/include/openssl/dh.h
index f3badcc..9a86e9e 100644
--- a/deps/boringssl/src/include/openssl/dh.h
+++ b/deps/boringssl/src/include/openssl/dh.h
@@ -59,7 +59,6 @@
 
 #include <openssl/base.h>
 
-#include <openssl/ex_data.h>
 #include <openssl/thread.h>
 
 #if defined(__cplusplus)
@@ -69,6 +68,10 @@
 
 // DH contains functions for performing Diffie-Hellman key agreement in
 // multiplicative groups.
+//
+// This module is deprecated and retained for legacy reasons only. It is not
+// considered a priority for performance or hardening work. Do not use it in
+// new code. Use X25519 or ECDH with P-256 instead.
 
 
 // Allocation and destruction.
@@ -136,6 +139,11 @@
 // and returned. It returns NULL on allocation failure.
 OPENSSL_EXPORT BIGNUM *BN_get_rfc3526_prime_1536(BIGNUM *ret);
 
+// DH_get_rfc7919_2048 returns the group `ffdhe2048` from
+// https://tools.ietf.org/html/rfc7919#appendix-A.1. It returns NULL if out
+// of memory.
+OPENSSL_EXPORT DH *DH_get_rfc7919_2048(void);
+
 
 // Parameter generation.
 
@@ -160,12 +168,37 @@
 // |dh|. It returns one on success and zero on error.
 OPENSSL_EXPORT int DH_generate_key(DH *dh);
 
-// DH_compute_key calculates the shared key between |dh| and |peers_key| and
-// writes it as a big-endian integer into |out|, which must have |DH_size|
-// bytes of space. It returns the number of bytes written, or a negative number
-// on error.
-OPENSSL_EXPORT int DH_compute_key(uint8_t *out, const BIGNUM *peers_key,
-                                  DH *dh);
+// DH_compute_key_padded calculates the shared key between |dh| and |peers_key|
+// and writes it as a big-endian integer into |out|, padded up to |DH_size|
+// bytes. It returns the number of bytes written, which is always |DH_size|, or
+// a negative number on error. |out| must have |DH_size| bytes of space.
+//
+// WARNING: this differs from the usual BoringSSL return-value convention.
+//
+// Note this function differs from |DH_compute_key| in that it preserves leading
+// zeros in the secret. This function is the preferred variant. It matches PKCS
+// #3 and avoids some side channel attacks. However, the two functions are not
+// drop-in replacements for each other. Using a different variant than the
+// application expects will result in sporadic key mismatches.
+//
+// Callers that expect a fixed-width secret should use this function over
+// |DH_compute_key|. Callers that use either function should migrate to a modern
+// primitive such as X25519 or ECDH with P-256 instead.
+OPENSSL_EXPORT int DH_compute_key_padded(uint8_t *out, const BIGNUM *peers_key,
+                                         DH *dh);
+
+// DH_compute_key_hashed calculates the shared key between |dh| and |peers_key|
+// and hashes it with the given |digest|. If the hash output is less than
+// |max_out_len| bytes then it writes the hash output to |out| and sets
+// |*out_len| to the number of bytes written. Otherwise it signals an error. It
+// returns one on success or zero on error.
+//
+// NOTE: this follows the usual BoringSSL return-value convention, but that's
+// different from |DH_compute_key| and |DH_compute_key_padded|.
+OPENSSL_EXPORT int DH_compute_key_hashed(DH *dh, uint8_t *out, size_t *out_len,
+                                         size_t max_out_len,
+                                         const BIGNUM *peers_key,
+                                         const EVP_MD *digest);
 
 
 // Utility functions.
@@ -225,18 +258,6 @@
 OPENSSL_EXPORT int DH_marshal_parameters(CBB *cbb, const DH *dh);
 
 
-// ex_data functions.
-//
-// See |ex_data.h| for details.
-
-OPENSSL_EXPORT int DH_get_ex_new_index(long argl, void *argp,
-                                       CRYPTO_EX_unused *unused,
-                                       CRYPTO_EX_dup *dup_unused,
-                                       CRYPTO_EX_free *free_func);
-OPENSSL_EXPORT int DH_set_ex_data(DH *d, int idx, void *arg);
-OPENSSL_EXPORT void *DH_get_ex_data(DH *d, int idx);
-
-
 // Deprecated functions.
 
 // DH_generate_parameters behaves like |DH_generate_parameters_ex|, which is
@@ -266,6 +287,28 @@
 // Use |DH_marshal_parameters| instead.
 OPENSSL_EXPORT int i2d_DHparams(const DH *in, unsigned char **outp);
 
+// DH_compute_key behaves like |DH_compute_key_padded| but, contrary to PKCS #3,
+// returns a variable-length shared key with leading zeros. It returns the
+// number of bytes written, or a negative number on error. |out| must have
+// |DH_size| bytes of space.
+//
+// WARNING: this differs from the usual BoringSSL return-value convention.
+//
+// Note this function's running time and memory access pattern leaks information
+// about the shared secret. Particularly if |dh| is reused, this may result in
+// side channel attacks such as https://raccoon-attack.com/.
+//
+// |DH_compute_key_padded| is the preferred variant and avoids the above
+// attacks. However, the two functions are not drop-in replacements for each
+// other. Using a different variant than the application expects will result in
+// sporadic key mismatches.
+//
+// Callers that expect a fixed-width secret should use |DH_compute_key_padded|
+// instead. Callers that use either function should migrate to a modern
+// primitive such as X25519 or ECDH with P-256 instead.
+OPENSSL_EXPORT int DH_compute_key(uint8_t *out, const BIGNUM *peers_key,
+                                  DH *dh);
+
 
 struct dh_st {
   BIGNUM *p;
@@ -289,7 +332,6 @@
 
   int flags;
   CRYPTO_refcount_t references;
-  CRYPTO_EX_DATA ex_data;
 };
 
 
diff --git a/deps/boringssl/src/include/openssl/digest.h b/deps/boringssl/src/include/openssl/digest.h
index 7b0ed06..66f1b5d 100644
--- a/deps/boringssl/src/include/openssl/digest.h
+++ b/deps/boringssl/src/include/openssl/digest.h
@@ -84,6 +84,7 @@
 OPENSSL_EXPORT const EVP_MD *EVP_sha384(void);
 OPENSSL_EXPORT const EVP_MD *EVP_sha512(void);
 OPENSSL_EXPORT const EVP_MD *EVP_sha512_256(void);
+OPENSSL_EXPORT const EVP_MD *EVP_blake2b256(void);
 
 // EVP_md5_sha1 is a TLS-specific |EVP_MD| which computes the concatenation of
 // MD5 and SHA-1, as used in TLS 1.1 and below.
@@ -283,6 +284,15 @@
 // EVP_MD_meth_get_flags calls |EVP_MD_flags|.
 OPENSSL_EXPORT uint32_t EVP_MD_meth_get_flags(const EVP_MD *md);
 
+// EVP_MD_CTX_set_flags does nothing.
+OPENSSL_EXPORT void EVP_MD_CTX_set_flags(EVP_MD_CTX *ctx, int flags);
+
+// EVP_MD_CTX_FLAG_NON_FIPS_ALLOW is meaningless. In OpenSSL it permits non-FIPS
+// algorithms in FIPS mode. But BoringSSL FIPS mode doesn't prohibit algorithms
+// (it's up the the caller to use the FIPS module in a fashion compliant with
+// their needs). Thus this exists only to allow code to compile.
+#define EVP_MD_CTX_FLAG_NON_FIPS_ALLOW 0
+
 
 struct evp_md_pctx_ops;
 
diff --git a/deps/boringssl/src/include/openssl/ec.h b/deps/boringssl/src/include/openssl/ec.h
index cfad93e..363c096 100644
--- a/deps/boringssl/src/include/openssl/ec.h
+++ b/deps/boringssl/src/include/openssl/ec.h
@@ -221,6 +221,13 @@
                                                        BIGNUM *x, BIGNUM *y,
                                                        BN_CTX *ctx);
 
+// EC_POINT_get_affine_coordinates is an alias of
+// |EC_POINT_get_affine_coordinates_GFp|.
+OPENSSL_EXPORT int EC_POINT_get_affine_coordinates(const EC_GROUP *group,
+                                                   const EC_POINT *point,
+                                                   BIGNUM *x, BIGNUM *y,
+                                                   BN_CTX *ctx);
+
 // EC_POINT_set_affine_coordinates_GFp sets the value of |point| to be
 // (|x|, |y|). The |ctx| argument may be used if not NULL. It returns one
 // on success or zero on error. It's considered an error if the point is not on
@@ -237,6 +244,14 @@
                                                        const BIGNUM *y,
                                                        BN_CTX *ctx);
 
+// EC_POINT_set_affine_coordinates is an alias of
+// |EC_POINT_set_affine_coordinates_GFp|.
+OPENSSL_EXPORT int EC_POINT_set_affine_coordinates(const EC_GROUP *group,
+                                                   EC_POINT *point,
+                                                   const BIGNUM *x,
+                                                   const BIGNUM *y,
+                                                   BN_CTX *ctx);
+
 // EC_POINT_point2oct serialises |point| into the X9.62 form given by |form|
 // into, at most, |len| bytes at |buf|. It returns the number of bytes written
 // or zero on error if |buf| is non-NULL, else the number of bytes needed. The
diff --git a/deps/boringssl/src/include/openssl/evp.h b/deps/boringssl/src/include/openssl/evp.h
index fe6c8b6..0710792 100644
--- a/deps/boringssl/src/include/openssl/evp.h
+++ b/deps/boringssl/src/include/openssl/evp.h
@@ -219,7 +219,9 @@
 //
 // The caller must check the type of the parsed private key to ensure it is
 // suitable and validate other desired key properties such as RSA modulus size
-// or EC curve.
+// or EC curve. In particular, RSA private key operations scale cubicly, so
+// applications accepting RSA private keys from external sources may need to
+// bound key sizes (use |EVP_PKEY_bits| or |RSA_bits|) to avoid a DoS vector.
 //
 // A PrivateKeyInfo ends with an optional set of attributes. These are not
 // processed and so this function will silently ignore any trailing data in the
@@ -714,7 +716,8 @@
 // RSA specific control functions.
 
 // EVP_PKEY_CTX_set_rsa_padding sets the padding type to use. It should be one
-// of the |RSA_*_PADDING| values. Returns one on success or zero on error.
+// of the |RSA_*_PADDING| values. Returns one on success or zero on error. By
+// default, the padding is |RSA_PKCS1_PADDING|.
 OPENSSL_EXPORT int EVP_PKEY_CTX_set_rsa_padding(EVP_PKEY_CTX *ctx, int padding);
 
 // EVP_PKEY_CTX_get_rsa_padding sets |*out_padding| to the current padding
@@ -732,6 +735,8 @@
 // If unsure, use -1.
 //
 // Returns one on success or zero on error.
+//
+// TODO(davidben): The default is currently -2. Switch it to -1.
 OPENSSL_EXPORT int EVP_PKEY_CTX_set_rsa_pss_saltlen(EVP_PKEY_CTX *ctx,
                                                     int salt_len);
 
@@ -756,7 +761,10 @@
                                                       BIGNUM *e);
 
 // EVP_PKEY_CTX_set_rsa_oaep_md sets |md| as the digest used in OAEP padding.
-// Returns one on success or zero on error.
+// Returns one on success or zero on error. If unset, the default is SHA-1.
+// Callers are recommended to overwrite this default.
+//
+// TODO(davidben): Remove the default and require callers specify this.
 OPENSSL_EXPORT int EVP_PKEY_CTX_set_rsa_oaep_md(EVP_PKEY_CTX *ctx,
                                                 const EVP_MD *md);
 
@@ -767,6 +775,10 @@
 
 // EVP_PKEY_CTX_set_rsa_mgf1_md sets |md| as the digest used in MGF1. Returns
 // one on success or zero on error.
+//
+// If unset, the default is the signing hash for |RSA_PKCS1_PSS_PADDING| and the
+// OAEP hash for |RSA_PKCS1_OAEP_PADDING|. Callers are recommended to use this
+// default and not call this function.
 OPENSSL_EXPORT int EVP_PKEY_CTX_set_rsa_mgf1_md(EVP_PKEY_CTX *ctx,
                                                 const EVP_MD *md);
 
@@ -947,6 +959,75 @@
 OPENSSL_EXPORT int EVP_PKEY_CTX_set_rsa_pss_keygen_mgf1_md(EVP_PKEY_CTX *ctx,
                                                            const EVP_MD *md);
 
+// i2d_PUBKEY marshals a public key from |pkey| as a DER-encoded
+// SubjectPublicKeyInfo. If |outp| is not NULL, the result is written to |*outp|
+// and |*outp| is advanced just past the output. It returns the number of bytes
+// in the result, whether written or not, or a negative value on error.
+//
+// Use |EVP_marshal_public_key| instead.
+OPENSSL_EXPORT int i2d_PUBKEY(const EVP_PKEY *pkey, uint8_t **outp);
+
+// d2i_PUBKEY parses a DER-encoded SubjectPublicKeyInfo from |len| bytes at
+// |*inp|. It returns a newly-allocated result, or NULL on error. On success,
+// |*inp| is advanced past the DER structure. If |out| is not NULL, it also
+// frees any existing object pointed by |*out| and writes the result.
+//
+// Use |EVP_parse_public_key| instead.
+OPENSSL_EXPORT EVP_PKEY *d2i_PUBKEY(EVP_PKEY **out, const uint8_t **inp,
+                                    long len);
+
+// i2d_RSA_PUBKEY marshals |rsa| as a DER-encoded SubjectPublicKeyInfo. If
+// |outp| is not NULL, the result is written to |*outp| and
+// |*outp| is advanced just past the output. It returns the number of bytes in
+// the result, whether written or not, or a negative value on error.
+//
+// Use |EVP_marshal_public_key| instead.
+OPENSSL_EXPORT int i2d_RSA_PUBKEY(const RSA *rsa, uint8_t **outp);
+
+// d2i_RSA_PUBKEY parses an RSA public key as a DER-encoded SubjectPublicKeyInfo
+// from |len| bytes at |*inp|. It returns a newly-allocated result, or NULL on
+// error. On success, |*inp| is advanced past the DER structure. If |out| is not
+// NULL, it also frees any existing object pointed by |*out| and writes the
+// result.
+//
+// Use |EVP_parse_public_key| instead.
+OPENSSL_EXPORT RSA *d2i_RSA_PUBKEY(RSA **out, const uint8_t **inp, long len);
+
+// i2d_DSA_PUBKEY marshals |dsa| as a DER-encoded SubjectPublicKeyInfo. If
+// |outp| is not NULL, the result is written to |*outp| and |*outp| is advanced
+// just past the output. It returns the number of bytes in the result, whether
+// written or not, or a negative value on error.
+//
+// Use |EVP_marshal_public_key| instead.
+OPENSSL_EXPORT int i2d_DSA_PUBKEY(const DSA *dsa, uint8_t **outp);
+
+// d2i_DSA_PUBKEY parses a DSA public key as a DER-encoded SubjectPublicKeyInfo
+// from |len| bytes at |*inp|. It returns a newly-allocated result, or NULL on
+// error. On success, |*inp| is advanced past the DER structure. If |out| is not
+// NULL, it also frees any existing object pointed by |*out| and writes the
+// result.
+//
+// Use |EVP_parse_public_key| instead.
+OPENSSL_EXPORT DSA *d2i_DSA_PUBKEY(DSA **out, const uint8_t **inp, long len);
+
+// i2d_EC_PUBKEY marshals |ec_key| as a DER-encoded SubjectPublicKeyInfo. If
+// |outp| is not NULL, the result is written to |*outp| and |*outp| is advanced
+// just past the output. It returns the number of bytes in the result, whether
+// written or not, or a negative value on error.
+//
+// Use |EVP_marshal_public_key| instead.
+OPENSSL_EXPORT int i2d_EC_PUBKEY(const EC_KEY *ec_key, uint8_t **outp);
+
+// d2i_EC_PUBKEY parses an EC public key as a DER-encoded SubjectPublicKeyInfo
+// from |len| bytes at |*inp|. It returns a newly-allocated result, or NULL on
+// error. On success, |*inp| is advanced past the DER structure. If |out| is not
+// NULL, it also frees any existing object pointed by |*out| and writes the
+// result.
+//
+// Use |EVP_parse_public_key| instead.
+OPENSSL_EXPORT EC_KEY *d2i_EC_PUBKEY(EC_KEY **out, const uint8_t **inp,
+                                     long len);
+
 
 // Preprocessor compatibility section (hidden).
 //
@@ -1046,5 +1127,6 @@
 #define EVP_R_INVALID_PARAMETERS 133
 #define EVP_R_INVALID_PEER_KEY 134
 #define EVP_R_NOT_XOF_OR_INVALID_LENGTH 135
+#define EVP_R_EMPTY_PSK 136
 
 #endif  // OPENSSL_HEADER_EVP_H
diff --git a/deps/boringssl/src/include/openssl/opensslconf.h b/deps/boringssl/src/include/openssl/opensslconf.h
index 3c6ffd8..3f1faf3 100644
--- a/deps/boringssl/src/include/openssl/opensslconf.h
+++ b/deps/boringssl/src/include/openssl/opensslconf.h
@@ -55,6 +55,9 @@
 #define OPENSSL_NO_RMD160
 #define OPENSSL_NO_SCTP
 #define OPENSSL_NO_SEED
+#define OPENSSL_NO_SM2
+#define OPENSSL_NO_SM3
+#define OPENSSL_NO_SM4
 #define OPENSSL_NO_SRP
 #define OPENSSL_NO_SSL2
 #define OPENSSL_NO_SSL3
diff --git a/deps/boringssl/src/include/openssl/pem.h b/deps/boringssl/src/include/openssl/pem.h
index 9c0ff93..f39989e 100644
--- a/deps/boringssl/src/include/openssl/pem.h
+++ b/deps/boringssl/src/include/openssl/pem.h
@@ -62,64 +62,63 @@
 #include <openssl/cipher.h>
 #include <openssl/digest.h>
 #include <openssl/evp.h>
-#include <openssl/stack.h>
 #include <openssl/pkcs7.h>
+#include <openssl/stack.h>
 #include <openssl/x509.h>
 
-/* For compatibility with open-iscsi, which assumes that it can get
- * |OPENSSL_malloc| from pem.h or err.h */
+// For compatibility with open-iscsi, which assumes that it can get
+// |OPENSSL_malloc| from pem.h or err.h
 #include <openssl/crypto.h>
 
-#ifdef  __cplusplus
+#ifdef __cplusplus
 extern "C" {
 #endif
 
 
-#define PEM_BUFSIZE		1024
+#define PEM_BUFSIZE 1024
 
-#define PEM_STRING_X509_OLD	"X509 CERTIFICATE"
-#define PEM_STRING_X509		"CERTIFICATE"
-#define PEM_STRING_X509_PAIR	"CERTIFICATE PAIR"
-#define PEM_STRING_X509_TRUSTED	"TRUSTED CERTIFICATE"
-#define PEM_STRING_X509_REQ_OLD	"NEW CERTIFICATE REQUEST"
-#define PEM_STRING_X509_REQ	"CERTIFICATE REQUEST"
-#define PEM_STRING_X509_CRL	"X509 CRL"
-#define PEM_STRING_EVP_PKEY	"ANY PRIVATE KEY"
-#define PEM_STRING_PUBLIC	"PUBLIC KEY"
-#define PEM_STRING_RSA		"RSA PRIVATE KEY"
-#define PEM_STRING_RSA_PUBLIC	"RSA PUBLIC KEY"
-#define PEM_STRING_DSA		"DSA PRIVATE KEY"
-#define PEM_STRING_DSA_PUBLIC	"DSA PUBLIC KEY"
+#define PEM_STRING_X509_OLD "X509 CERTIFICATE"
+#define PEM_STRING_X509 "CERTIFICATE"
+#define PEM_STRING_X509_PAIR "CERTIFICATE PAIR"
+#define PEM_STRING_X509_TRUSTED "TRUSTED CERTIFICATE"
+#define PEM_STRING_X509_REQ_OLD "NEW CERTIFICATE REQUEST"
+#define PEM_STRING_X509_REQ "CERTIFICATE REQUEST"
+#define PEM_STRING_X509_CRL "X509 CRL"
+#define PEM_STRING_EVP_PKEY "ANY PRIVATE KEY"
+#define PEM_STRING_PUBLIC "PUBLIC KEY"
+#define PEM_STRING_RSA "RSA PRIVATE KEY"
+#define PEM_STRING_RSA_PUBLIC "RSA PUBLIC KEY"
+#define PEM_STRING_DSA "DSA PRIVATE KEY"
+#define PEM_STRING_DSA_PUBLIC "DSA PUBLIC KEY"
 #define PEM_STRING_EC "EC PRIVATE KEY"
-#define PEM_STRING_PKCS7	"PKCS7"
-#define PEM_STRING_PKCS7_SIGNED	"PKCS #7 SIGNED DATA"
-#define PEM_STRING_PKCS8	"ENCRYPTED PRIVATE KEY"
-#define PEM_STRING_PKCS8INF	"PRIVATE KEY"
-#define PEM_STRING_DHPARAMS	"DH PARAMETERS"
-#define PEM_STRING_SSL_SESSION	"SSL SESSION PARAMETERS"
-#define PEM_STRING_DSAPARAMS	"DSA PARAMETERS"
+#define PEM_STRING_PKCS7 "PKCS7"
+#define PEM_STRING_PKCS7_SIGNED "PKCS #7 SIGNED DATA"
+#define PEM_STRING_PKCS8 "ENCRYPTED PRIVATE KEY"
+#define PEM_STRING_PKCS8INF "PRIVATE KEY"
+#define PEM_STRING_DHPARAMS "DH PARAMETERS"
+#define PEM_STRING_SSL_SESSION "SSL SESSION PARAMETERS"
+#define PEM_STRING_DSAPARAMS "DSA PARAMETERS"
 #define PEM_STRING_ECDSA_PUBLIC "ECDSA PUBLIC KEY"
-#define PEM_STRING_ECPRIVATEKEY	"EC PRIVATE KEY"
-#define PEM_STRING_CMS		"CMS"
+#define PEM_STRING_ECPRIVATEKEY "EC PRIVATE KEY"
+#define PEM_STRING_CMS "CMS"
 
-/* enc_type is one off */
-#define PEM_TYPE_ENCRYPTED      10
-#define PEM_TYPE_MIC_ONLY       20
-#define PEM_TYPE_MIC_CLEAR      30
-#define PEM_TYPE_CLEAR		40
+// enc_type is one off
+#define PEM_TYPE_ENCRYPTED 10
+#define PEM_TYPE_MIC_ONLY 20
+#define PEM_TYPE_MIC_CLEAR 30
+#define PEM_TYPE_CLEAR 40
 
-/* These macros make the PEM_read/PEM_write functions easier to maintain and
- * write. Now they are all implemented with either:
- * IMPLEMENT_PEM_rw(...) or IMPLEMENT_PEM_rw_cb(...)
- */
+// These macros make the PEM_read/PEM_write functions easier to maintain and
+// write. Now they are all implemented with either:
+// IMPLEMENT_PEM_rw(...) or IMPLEMENT_PEM_rw_cb(...)
 
 #ifdef OPENSSL_NO_FP_API
 
-#define IMPLEMENT_PEM_read_fp(name, type, str, asn1) /**/
-#define IMPLEMENT_PEM_write_fp(name, type, str, asn1) /**/
-#define IMPLEMENT_PEM_write_fp_const(name, type, str, asn1) /**/
-#define IMPLEMENT_PEM_write_cb_fp(name, type, str, asn1) /**/
-#define IMPLEMENT_PEM_write_cb_fp_const(name, type, str, asn1) /**/
+#define IMPLEMENT_PEM_read_fp(name, type, str, asn1)            //
+#define IMPLEMENT_PEM_write_fp(name, type, str, asn1)           //
+#define IMPLEMENT_PEM_write_fp_const(name, type, str, asn1)     //
+#define IMPLEMENT_PEM_write_cb_fp(name, type, str, asn1)        //
+#define IMPLEMENT_PEM_write_cb_fp_const(name, type, str, asn1)  //
 
 #else
 
@@ -228,133 +227,178 @@
   }
 
 #define IMPLEMENT_PEM_write(name, type, str, asn1) \
-	IMPLEMENT_PEM_write_bio(name, type, str, asn1) \
-	IMPLEMENT_PEM_write_fp(name, type, str, asn1) 
+  IMPLEMENT_PEM_write_bio(name, type, str, asn1)   \
+  IMPLEMENT_PEM_write_fp(name, type, str, asn1)
 
 #define IMPLEMENT_PEM_write_const(name, type, str, asn1) \
-	IMPLEMENT_PEM_write_bio_const(name, type, str, asn1) \
-	IMPLEMENT_PEM_write_fp_const(name, type, str, asn1) 
+  IMPLEMENT_PEM_write_bio_const(name, type, str, asn1)   \
+  IMPLEMENT_PEM_write_fp_const(name, type, str, asn1)
 
 #define IMPLEMENT_PEM_write_cb(name, type, str, asn1) \
-	IMPLEMENT_PEM_write_cb_bio(name, type, str, asn1) \
-	IMPLEMENT_PEM_write_cb_fp(name, type, str, asn1) 
+  IMPLEMENT_PEM_write_cb_bio(name, type, str, asn1)   \
+  IMPLEMENT_PEM_write_cb_fp(name, type, str, asn1)
 
 #define IMPLEMENT_PEM_write_cb_const(name, type, str, asn1) \
-	IMPLEMENT_PEM_write_cb_bio_const(name, type, str, asn1) \
-	IMPLEMENT_PEM_write_cb_fp_const(name, type, str, asn1) 
+  IMPLEMENT_PEM_write_cb_bio_const(name, type, str, asn1)   \
+  IMPLEMENT_PEM_write_cb_fp_const(name, type, str, asn1)
 
 #define IMPLEMENT_PEM_read(name, type, str, asn1) \
-	IMPLEMENT_PEM_read_bio(name, type, str, asn1) \
-	IMPLEMENT_PEM_read_fp(name, type, str, asn1) 
+  IMPLEMENT_PEM_read_bio(name, type, str, asn1)   \
+  IMPLEMENT_PEM_read_fp(name, type, str, asn1)
 
 #define IMPLEMENT_PEM_rw(name, type, str, asn1) \
-	IMPLEMENT_PEM_read(name, type, str, asn1) \
-	IMPLEMENT_PEM_write(name, type, str, asn1)
+  IMPLEMENT_PEM_read(name, type, str, asn1)     \
+  IMPLEMENT_PEM_write(name, type, str, asn1)
 
 #define IMPLEMENT_PEM_rw_const(name, type, str, asn1) \
-	IMPLEMENT_PEM_read(name, type, str, asn1) \
-	IMPLEMENT_PEM_write_const(name, type, str, asn1)
+  IMPLEMENT_PEM_read(name, type, str, asn1)           \
+  IMPLEMENT_PEM_write_const(name, type, str, asn1)
 
 #define IMPLEMENT_PEM_rw_cb(name, type, str, asn1) \
-	IMPLEMENT_PEM_read(name, type, str, asn1) \
-	IMPLEMENT_PEM_write_cb(name, type, str, asn1)
+  IMPLEMENT_PEM_read(name, type, str, asn1)        \
+  IMPLEMENT_PEM_write_cb(name, type, str, asn1)
 
-/* These are the same except they are for the declarations */
+// These are the same except they are for the declarations
 
 #if defined(OPENSSL_NO_FP_API)
 
-#define DECLARE_PEM_read_fp(name, type) /**/
-#define DECLARE_PEM_write_fp(name, type) /**/
-#define DECLARE_PEM_write_cb_fp(name, type) /**/
+#define DECLARE_PEM_read_fp(name, type)      //
+#define DECLARE_PEM_write_fp(name, type)     //
+#define DECLARE_PEM_write_cb_fp(name, type)  //
 
 #else
 
-#define DECLARE_PEM_read_fp(name, type) \
-	OPENSSL_EXPORT type *PEM_read_##name(FILE *fp, type **x, pem_password_cb *cb, void *u);
+#define DECLARE_PEM_read_fp(name, type)                    \
+  OPENSSL_EXPORT type *PEM_read_##name(FILE *fp, type **x, \
+                                       pem_password_cb *cb, void *u);
 
 #define DECLARE_PEM_write_fp(name, type) \
-	OPENSSL_EXPORT int PEM_write_##name(FILE *fp, type *x);
+  OPENSSL_EXPORT int PEM_write_##name(FILE *fp, type *x);
 
 #define DECLARE_PEM_write_fp_const(name, type) \
-	OPENSSL_EXPORT int PEM_write_##name(FILE *fp, const type *x);
+  OPENSSL_EXPORT int PEM_write_##name(FILE *fp, const type *x);
 
-#define DECLARE_PEM_write_cb_fp(name, type) \
-	OPENSSL_EXPORT int PEM_write_##name(FILE *fp, type *x, const EVP_CIPHER *enc, \
-	     unsigned char *kstr, int klen, pem_password_cb *cb, void *u);
+#define DECLARE_PEM_write_cb_fp(name, type)                                    \
+  OPENSSL_EXPORT int PEM_write_##name(                                         \
+      FILE *fp, type *x, const EVP_CIPHER *enc, unsigned char *kstr, int klen, \
+      pem_password_cb *cb, void *u);
 
 #endif
 
-#define DECLARE_PEM_read_bio(name, type) \
-	OPENSSL_EXPORT type *PEM_read_bio_##name(BIO *bp, type **x, pem_password_cb *cb, void *u);
+#define DECLARE_PEM_read_bio(name, type)                      \
+  OPENSSL_EXPORT type *PEM_read_bio_##name(BIO *bp, type **x, \
+                                           pem_password_cb *cb, void *u);
 
 #define DECLARE_PEM_write_bio(name, type) \
-	OPENSSL_EXPORT int PEM_write_bio_##name(BIO *bp, type *x);
+  OPENSSL_EXPORT int PEM_write_bio_##name(BIO *bp, type *x);
 
 #define DECLARE_PEM_write_bio_const(name, type) \
-	OPENSSL_EXPORT int PEM_write_bio_##name(BIO *bp, const type *x);
+  OPENSSL_EXPORT int PEM_write_bio_##name(BIO *bp, const type *x);
 
-#define DECLARE_PEM_write_cb_bio(name, type) \
-	OPENSSL_EXPORT int PEM_write_bio_##name(BIO *bp, type *x, const EVP_CIPHER *enc, \
-	     unsigned char *kstr, int klen, pem_password_cb *cb, void *u);
+#define DECLARE_PEM_write_cb_bio(name, type)                                  \
+  OPENSSL_EXPORT int PEM_write_bio_##name(                                    \
+      BIO *bp, type *x, const EVP_CIPHER *enc, unsigned char *kstr, int klen, \
+      pem_password_cb *cb, void *u);
 
 
 #define DECLARE_PEM_write(name, type) \
-	DECLARE_PEM_write_bio(name, type) \
-	DECLARE_PEM_write_fp(name, type) 
+  DECLARE_PEM_write_bio(name, type)   \
+  DECLARE_PEM_write_fp(name, type)
 
 #define DECLARE_PEM_write_const(name, type) \
-	DECLARE_PEM_write_bio_const(name, type) \
-	DECLARE_PEM_write_fp_const(name, type)
+  DECLARE_PEM_write_bio_const(name, type)   \
+  DECLARE_PEM_write_fp_const(name, type)
 
 #define DECLARE_PEM_write_cb(name, type) \
-	DECLARE_PEM_write_cb_bio(name, type) \
-	DECLARE_PEM_write_cb_fp(name, type) 
+  DECLARE_PEM_write_cb_bio(name, type)   \
+  DECLARE_PEM_write_cb_fp(name, type)
 
 #define DECLARE_PEM_read(name, type) \
-	DECLARE_PEM_read_bio(name, type) \
-	DECLARE_PEM_read_fp(name, type)
+  DECLARE_PEM_read_bio(name, type)   \
+  DECLARE_PEM_read_fp(name, type)
 
 #define DECLARE_PEM_rw(name, type) \
-	DECLARE_PEM_read(name, type) \
-	DECLARE_PEM_write(name, type)
+  DECLARE_PEM_read(name, type)     \
+  DECLARE_PEM_write(name, type)
 
 #define DECLARE_PEM_rw_const(name, type) \
-	DECLARE_PEM_read(name, type) \
-	DECLARE_PEM_write_const(name, type)
+  DECLARE_PEM_read(name, type)           \
+  DECLARE_PEM_write_const(name, type)
 
 #define DECLARE_PEM_rw_cb(name, type) \
-	DECLARE_PEM_read(name, type) \
-	DECLARE_PEM_write_cb(name, type)
+  DECLARE_PEM_read(name, type)        \
+  DECLARE_PEM_write_cb(name, type)
 
-/* "userdata": new with OpenSSL 0.9.4 */
+// "userdata": new with OpenSSL 0.9.4
 typedef int pem_password_cb(char *buf, int size, int rwflag, void *userdata);
 
-OPENSSL_EXPORT int	PEM_get_EVP_CIPHER_INFO(char *header, EVP_CIPHER_INFO *cipher);
-OPENSSL_EXPORT int	PEM_do_header (EVP_CIPHER_INFO *cipher, unsigned char *data,long *len, pem_password_cb *callback,void *u);
+OPENSSL_EXPORT int PEM_get_EVP_CIPHER_INFO(char *header,
+                                           EVP_CIPHER_INFO *cipher);
+OPENSSL_EXPORT int PEM_do_header(EVP_CIPHER_INFO *cipher, unsigned char *data,
+                                 long *len, pem_password_cb *callback, void *u);
 
-OPENSSL_EXPORT int	PEM_read_bio(BIO *bp, char **name, char **header, unsigned char **data,long *len);
-OPENSSL_EXPORT int	PEM_write_bio(BIO *bp,const char *name, const char *hdr, const unsigned char *data, long len);
-OPENSSL_EXPORT int PEM_bytes_read_bio(unsigned char **pdata, long *plen, char **pnm, const char *name, BIO *bp, pem_password_cb *cb, void *u);
-OPENSSL_EXPORT void *	PEM_ASN1_read_bio(d2i_of_void *d2i, const char *name, BIO *bp, void **x, pem_password_cb *cb, void *u);
-OPENSSL_EXPORT int	PEM_ASN1_write_bio(i2d_of_void *i2d,const char *name,BIO *bp, void *x, const EVP_CIPHER *enc,unsigned char *kstr,int klen, pem_password_cb *cb, void *u);
+// PEM_read_bio reads from |bp|, until the next PEM block. If one is found, it
+// returns one and sets |*name|, |*header|, and |*data| to newly-allocated
+// buffers containing the PEM type, the header block, and the decoded data,
+// respectively. |*name| and |*header| are NUL-terminated C strings, while
+// |*data| has |*len| bytes. The caller must release each of |*name|, |*header|,
+// and |*data| with |OPENSSL_free| when done. If no PEM block is found, this
+// function returns zero and pushes |PEM_R_NO_START_LINE| to the error queue. If
+// one is found, but there is an error decoding it, it returns zero and pushes
+// some other error to the error queue.
+OPENSSL_EXPORT int PEM_read_bio(BIO *bp, char **name, char **header,
+                                unsigned char **data, long *len);
 
-OPENSSL_EXPORT STACK_OF(X509_INFO) *	PEM_X509_INFO_read_bio(BIO *bp, STACK_OF(X509_INFO) *sk, pem_password_cb *cb, void *u);
-OPENSSL_EXPORT int	PEM_X509_INFO_write_bio(BIO *bp,X509_INFO *xi, EVP_CIPHER *enc, unsigned char *kstr, int klen, pem_password_cb *cd, void *u);
+// PEM_write_bio writes a PEM block to |bp|, containing |len| bytes from |data|
+// as data. |name| and |hdr| are NUL-terminated C strings containing the PEM
+// type and header block, respectively. This function returns zero on error and
+// the number of bytes written on success.
+OPENSSL_EXPORT int PEM_write_bio(BIO *bp, const char *name, const char *hdr,
+                                 const unsigned char *data, long len);
 
-OPENSSL_EXPORT int	PEM_read(FILE *fp, char **name, char **header, unsigned char **data,long *len);
-OPENSSL_EXPORT int	PEM_write(FILE *fp, const char *name, const char *hdr, const unsigned char *data, long len);
-OPENSSL_EXPORT void *  PEM_ASN1_read(d2i_of_void *d2i, const char *name, FILE *fp, void **x, pem_password_cb *cb, void *u);
-OPENSSL_EXPORT int	PEM_ASN1_write(i2d_of_void *i2d,const char *name,FILE *fp, void *x,const EVP_CIPHER *enc,unsigned char *kstr, int klen,pem_password_cb *callback, void *u);
-OPENSSL_EXPORT STACK_OF(X509_INFO) *	PEM_X509_INFO_read(FILE *fp, STACK_OF(X509_INFO) *sk, pem_password_cb *cb, void *u);
+OPENSSL_EXPORT int PEM_bytes_read_bio(unsigned char **pdata, long *plen,
+                                      char **pnm, const char *name, BIO *bp,
+                                      pem_password_cb *cb, void *u);
+OPENSSL_EXPORT void *PEM_ASN1_read_bio(d2i_of_void *d2i, const char *name,
+                                       BIO *bp, void **x, pem_password_cb *cb,
+                                       void *u);
+OPENSSL_EXPORT int PEM_ASN1_write_bio(i2d_of_void *i2d, const char *name,
+                                      BIO *bp, void *x, const EVP_CIPHER *enc,
+                                      unsigned char *kstr, int klen,
+                                      pem_password_cb *cb, void *u);
 
-/* PEM_def_callback treats |userdata| as a string and copies it into |buf|,
- * assuming its |size| is sufficient. Returns the length of the string, or 0
- * if there is not enough room. If either |buf| or |userdata| is NULL, 0 is
- * returned. Note that this is different from OpenSSL, which prompts for a
- * password. */
-OPENSSL_EXPORT int	PEM_def_callback(char *buf, int size, int rwflag, void *userdata);
-OPENSSL_EXPORT void	PEM_proc_type(char *buf, int type);
-OPENSSL_EXPORT void	PEM_dek_info(char *buf, const char *type, int len, char *str);
+OPENSSL_EXPORT STACK_OF(X509_INFO) *PEM_X509_INFO_read_bio(
+    BIO *bp, STACK_OF(X509_INFO) *sk, pem_password_cb *cb, void *u);
+OPENSSL_EXPORT int PEM_X509_INFO_write_bio(BIO *bp, X509_INFO *xi,
+                                           EVP_CIPHER *enc, unsigned char *kstr,
+                                           int klen, pem_password_cb *cd,
+                                           void *u);
+
+OPENSSL_EXPORT int PEM_read(FILE *fp, char **name, char **header,
+                            unsigned char **data, long *len);
+OPENSSL_EXPORT int PEM_write(FILE *fp, const char *name, const char *hdr,
+                             const unsigned char *data, long len);
+OPENSSL_EXPORT void *PEM_ASN1_read(d2i_of_void *d2i, const char *name, FILE *fp,
+                                   void **x, pem_password_cb *cb, void *u);
+OPENSSL_EXPORT int PEM_ASN1_write(i2d_of_void *i2d, const char *name, FILE *fp,
+                                  void *x, const EVP_CIPHER *enc,
+                                  unsigned char *kstr, int klen,
+                                  pem_password_cb *callback, void *u);
+OPENSSL_EXPORT STACK_OF(X509_INFO) *PEM_X509_INFO_read(FILE *fp,
+                                                       STACK_OF(X509_INFO) *sk,
+                                                       pem_password_cb *cb,
+                                                       void *u);
+
+// PEM_def_callback treats |userdata| as a string and copies it into |buf|,
+// assuming its |size| is sufficient. Returns the length of the string, or 0
+// if there is not enough room. If either |buf| or |userdata| is NULL, 0 is
+// returned. Note that this is different from OpenSSL, which prompts for a
+// password.
+OPENSSL_EXPORT int PEM_def_callback(char *buf, int size, int rwflag,
+                                    void *userdata);
+OPENSSL_EXPORT void PEM_proc_type(char *buf, int type);
+OPENSSL_EXPORT void PEM_dek_info(char *buf, const char *type, int len,
+                                 char *str);
 
 
 DECLARE_PEM_rw(X509, X509)
@@ -397,22 +441,46 @@
 
 DECLARE_PEM_rw(PUBKEY, EVP_PKEY)
 
-OPENSSL_EXPORT int PEM_write_bio_PKCS8PrivateKey_nid(BIO *bp, EVP_PKEY *x, int nid, char *kstr, int klen, pem_password_cb *cb, void *u);
-OPENSSL_EXPORT int PEM_write_bio_PKCS8PrivateKey(BIO *, EVP_PKEY *, const EVP_CIPHER *, char *, int, pem_password_cb *, void *);
-OPENSSL_EXPORT int i2d_PKCS8PrivateKey_bio(BIO *bp, EVP_PKEY *x, const EVP_CIPHER *enc, char *kstr, int klen, pem_password_cb *cb, void *u);
-OPENSSL_EXPORT int i2d_PKCS8PrivateKey_nid_bio(BIO *bp, EVP_PKEY *x, int nid, char *kstr, int klen, pem_password_cb *cb, void *u);
-OPENSSL_EXPORT EVP_PKEY *d2i_PKCS8PrivateKey_bio(BIO *bp, EVP_PKEY **x, pem_password_cb *cb, void *u);
+OPENSSL_EXPORT int PEM_write_bio_PKCS8PrivateKey_nid(BIO *bp, EVP_PKEY *x,
+                                                     int nid, char *kstr,
+                                                     int klen,
+                                                     pem_password_cb *cb,
+                                                     void *u);
+OPENSSL_EXPORT int PEM_write_bio_PKCS8PrivateKey(BIO *, EVP_PKEY *,
+                                                 const EVP_CIPHER *, char *,
+                                                 int, pem_password_cb *,
+                                                 void *);
+OPENSSL_EXPORT int i2d_PKCS8PrivateKey_bio(BIO *bp, EVP_PKEY *x,
+                                           const EVP_CIPHER *enc, char *kstr,
+                                           int klen, pem_password_cb *cb,
+                                           void *u);
+OPENSSL_EXPORT int i2d_PKCS8PrivateKey_nid_bio(BIO *bp, EVP_PKEY *x, int nid,
+                                               char *kstr, int klen,
+                                               pem_password_cb *cb, void *u);
+OPENSSL_EXPORT EVP_PKEY *d2i_PKCS8PrivateKey_bio(BIO *bp, EVP_PKEY **x,
+                                                 pem_password_cb *cb, void *u);
 
-OPENSSL_EXPORT int i2d_PKCS8PrivateKey_fp(FILE *fp, EVP_PKEY *x, const EVP_CIPHER *enc, char *kstr, int klen, pem_password_cb *cb, void *u);
-OPENSSL_EXPORT int i2d_PKCS8PrivateKey_nid_fp(FILE *fp, EVP_PKEY *x, int nid, char *kstr, int klen, pem_password_cb *cb, void *u);
-OPENSSL_EXPORT int PEM_write_PKCS8PrivateKey_nid(FILE *fp, EVP_PKEY *x, int nid, char *kstr, int klen, pem_password_cb *cb, void *u);
+OPENSSL_EXPORT int i2d_PKCS8PrivateKey_fp(FILE *fp, EVP_PKEY *x,
+                                          const EVP_CIPHER *enc, char *kstr,
+                                          int klen, pem_password_cb *cb,
+                                          void *u);
+OPENSSL_EXPORT int i2d_PKCS8PrivateKey_nid_fp(FILE *fp, EVP_PKEY *x, int nid,
+                                              char *kstr, int klen,
+                                              pem_password_cb *cb, void *u);
+OPENSSL_EXPORT int PEM_write_PKCS8PrivateKey_nid(FILE *fp, EVP_PKEY *x, int nid,
+                                                 char *kstr, int klen,
+                                                 pem_password_cb *cb, void *u);
 
-OPENSSL_EXPORT EVP_PKEY *d2i_PKCS8PrivateKey_fp(FILE *fp, EVP_PKEY **x, pem_password_cb *cb, void *u);
+OPENSSL_EXPORT EVP_PKEY *d2i_PKCS8PrivateKey_fp(FILE *fp, EVP_PKEY **x,
+                                                pem_password_cb *cb, void *u);
 
-OPENSSL_EXPORT int PEM_write_PKCS8PrivateKey(FILE *fp,EVP_PKEY *x,const EVP_CIPHER *enc, char *kstr,int klen, pem_password_cb *cd, void *u);
+OPENSSL_EXPORT int PEM_write_PKCS8PrivateKey(FILE *fp, EVP_PKEY *x,
+                                             const EVP_CIPHER *enc, char *kstr,
+                                             int klen, pem_password_cb *cd,
+                                             void *u);
 
 
-#ifdef  __cplusplus
+#ifdef __cplusplus
 }
 #endif
 
@@ -432,4 +500,4 @@
 #define PEM_R_UNSUPPORTED_CIPHER 113
 #define PEM_R_UNSUPPORTED_ENCRYPTION 114
 
-#endif  /* OPENSSL_HEADER_PEM_H */
+#endif  // OPENSSL_HEADER_PEM_H
diff --git a/deps/boringssl/src/include/openssl/rand.h b/deps/boringssl/src/include/openssl/rand.h
index 4847eb7..b07015b 100644
--- a/deps/boringssl/src/include/openssl/rand.h
+++ b/deps/boringssl/src/include/openssl/rand.h
@@ -97,6 +97,9 @@
 // RAND_SSLeay returns a pointer to a dummy |RAND_METHOD|.
 OPENSSL_EXPORT RAND_METHOD *RAND_SSLeay(void);
 
+// RAND_OpenSSL returns a pointer to a dummy |RAND_METHOD|.
+OPENSSL_EXPORT RAND_METHOD *RAND_OpenSSL(void);
+
 // RAND_get_rand_method returns |RAND_SSLeay()|.
 OPENSSL_EXPORT const RAND_METHOD *RAND_get_rand_method(void);
 
diff --git a/deps/boringssl/src/include/openssl/span.h b/deps/boringssl/src/include/openssl/span.h
index 1d732eb..7410bf9 100644
--- a/deps/boringssl/src/include/openssl/span.h
+++ b/deps/boringssl/src/include/openssl/span.h
@@ -21,8 +21,9 @@
 
 extern "C++" {
 
+#include <stdlib.h>
+
 #include <algorithm>
-#include <cstdlib>
 #include <type_traits>
 
 BSSL_NAMESPACE_BEGIN
diff --git a/deps/boringssl/src/include/openssl/ssl.h b/deps/boringssl/src/include/openssl/ssl.h
index 8e11ef2..7ff7e72 100644
--- a/deps/boringssl/src/include/openssl/ssl.h
+++ b/deps/boringssl/src/include/openssl/ssl.h
@@ -267,7 +267,7 @@
 // |SSL_set0_rbio| and |SSL_set0_wbio| instead.
 OPENSSL_EXPORT void SSL_set_bio(SSL *ssl, BIO *rbio, BIO *wbio);
 
-// SSL_set0_rbio configures |ssl| to write to |rbio|. It takes ownership of
+// SSL_set0_rbio configures |ssl| to read from |rbio|. It takes ownership of
 // |rbio|.
 //
 // Note that, although this function and |SSL_set0_wbio| may be called on the
@@ -953,6 +953,18 @@
 OPENSSL_EXPORT size_t
 SSL_get0_peer_verify_algorithms(const SSL *ssl, const uint16_t **out_sigalgs);
 
+// SSL_get0_peer_delegation_algorithms sets |*out_sigalgs| to an array
+// containing the signature algorithms the peer is willing to use with delegated
+// credentials.  It returns the length of the array. If not sent, the empty
+// array is returned.
+//
+// The behavior of this function is undefined except during the callbacks set by
+// by |SSL_CTX_set_cert_cb| and |SSL_CTX_set_client_cert_cb| or when the
+// handshake is paused because of them.
+OPENSSL_EXPORT size_t
+SSL_get0_peer_delegation_algorithms(const SSL *ssl,
+                                    const uint16_t **out_sigalgs);
+
 // SSL_certs_clear resets the private key, leaf certificate, and certificate
 // chain of |ssl|.
 OPENSSL_EXPORT void SSL_certs_clear(SSL *ssl);
@@ -1281,8 +1293,8 @@
 // cast to a |uint16_t| to get it.
 OPENSSL_EXPORT uint32_t SSL_CIPHER_get_id(const SSL_CIPHER *cipher);
 
-// SSL_CIPHER_get_value returns |cipher|'s IANA-assigned number.
-OPENSSL_EXPORT uint16_t SSL_CIPHER_get_value(const SSL_CIPHER *cipher);
+// SSL_CIPHER_get_protocol_id returns |cipher|'s IANA-assigned number.
+OPENSSL_EXPORT uint16_t SSL_CIPHER_get_protocol_id(const SSL_CIPHER *cipher);
 
 // SSL_CIPHER_is_aead returns one if |cipher| uses an AEAD cipher.
 OPENSSL_EXPORT int SSL_CIPHER_is_aead(const SSL_CIPHER *cipher);
@@ -1726,9 +1738,9 @@
 // SSL_MAX_MASTER_KEY_LENGTH is the maximum length of a master secret.
 #define SSL_MAX_MASTER_KEY_LENGTH 48
 
-// SSL_SESSION_get_master_key writes up to |max_out| bytes of |session|'s master
-// secret to |out| and returns the number of bytes written. If |max_out| is
-// zero, it returns the size of the master secret.
+// SSL_SESSION_get_master_key writes up to |max_out| bytes of |session|'s secret
+// to |out| and returns the number of bytes written. If |max_out| is zero, it
+// returns the size of the secret.
 OPENSSL_EXPORT size_t SSL_SESSION_get_master_key(const SSL_SESSION *session,
                                                  uint8_t *out, size_t max_out);
 
@@ -2764,15 +2776,58 @@
                                                           int enabled);
 
 
+// Application-layer protocol settings
+//
+// The ALPS extension (draft-vvv-tls-alps) allows exchanging application-layer
+// settings in the TLS handshake for applications negotiated with ALPN. Note
+// that, when ALPS is negotiated, the client and server each advertise their own
+// settings, so there are functions to both configure setting to send and query
+// received settings.
+
+// SSL_add_application_settings configures |ssl| to enable ALPS with ALPN
+// protocol |proto|, sending an ALPS value of |settings|. It returns one on
+// success and zero on error. If |proto| is negotiated via ALPN and the peer
+// supports ALPS, |settings| will be sent to the peer. The peer's ALPS value can
+// be retrieved with |SSL_get0_peer_application_settings|.
+//
+// On the client, this function should be called before the handshake, once for
+// each supported ALPN protocol which uses ALPS. |proto| must be included in the
+// client's ALPN configuration (see |SSL_CTX_set_alpn_protos| and
+// |SSL_set_alpn_protos|). On the server, ALPS can be preconfigured for each
+// protocol as in the client, or configuration can be deferred to the ALPN
+// callback (see |SSL_CTX_set_alpn_select_cb|), in which case only the selected
+// protocol needs to be configured.
+//
+// ALPS can be independently configured from 0-RTT, however changes in protocol
+// settings will fallback to 1-RTT to negotiate the new value, so it is
+// recommended for |settings| to be relatively stable.
+OPENSSL_EXPORT int SSL_add_application_settings(SSL *ssl, const uint8_t *proto,
+                                                size_t proto_len,
+                                                const uint8_t *settings,
+                                                size_t settings_len);
+
+// SSL_get0_peer_application_settings sets |*out_data| and |*out_len| to a
+// buffer containing the peer's ALPS value, or the empty string if ALPS was not
+// negotiated. Note an empty string could also indicate the peer sent an empty
+// settings value. Use |SSL_has_application_settings| to check if ALPS was
+// negotiated. The output buffer is owned by |ssl| and is valid until the next
+// time |ssl| is modified.
+OPENSSL_EXPORT void SSL_get0_peer_application_settings(const SSL *ssl,
+                                                       const uint8_t **out_data,
+                                                       size_t *out_len);
+
+// SSL_has_application_settings returns one if ALPS was negotiated on this
+// connection and zero otherwise.
+OPENSSL_EXPORT int SSL_has_application_settings(const SSL *ssl);
+
+
 // Certificate compression.
 //
-// Certificates in TLS 1.3 can be compressed[1]. BoringSSL supports this as both
-// a client and a server, but does not link against any specific compression
-// libraries in order to keep dependencies to a minimum. Instead, hooks for
-// compression and decompression can be installed in an |SSL_CTX| to enable
-// support.
-//
-// [1] https://tools.ietf.org/html/draft-ietf-tls-certificate-compression-03.
+// Certificates in TLS 1.3 can be compressed (RFC 8879). BoringSSL supports this
+// as both a client and a server, but does not link against any specific
+// compression libraries in order to keep dependencies to a minimum. Instead,
+// hooks for compression and decompression can be installed in an |SSL_CTX| to
+// enable support.
 
 // ssl_cert_compression_func_t is a pointer to a function that performs
 // compression. It must write the compressed representation of |in| to |out|,
@@ -3329,6 +3384,12 @@
 OPENSSL_EXPORT void SSL_get_peer_quic_transport_params(
     const SSL *ssl, const uint8_t **out_params, size_t *out_params_len);
 
+// SSL_set_quic_use_legacy_codepoint configures whether to use the legacy QUIC
+// extension codepoint 0xffa5 as opposed to the official value 57. Call with
+// |use_legacy| set to 1 to use 0xffa5 and call with 0 to use 57. The default
+// value for this is currently 1 but it will change to 0 at a later date.
+OPENSSL_EXPORT void SSL_set_quic_use_legacy_codepoint(SSL *ssl, int use_legacy);
+
 // SSL_set_quic_early_data_context configures a context string in QUIC servers
 // for accepting early data. If a resumption connection offers early data, the
 // server will check if the value matches that of the connection which minted
@@ -3481,8 +3542,10 @@
   ssl_early_data_ticket_age_skew = 12,
   // QUIC parameters differ between this connection and the original.
   ssl_early_data_quic_parameter_mismatch = 13,
+  // The application settings did not match the session.
+  ssl_early_data_alps_mismatch = 14,
   // The value of the largest entry.
-  ssl_early_data_reason_max_value = ssl_early_data_quic_parameter_mismatch,
+  ssl_early_data_reason_max_value = ssl_early_data_alps_mismatch,
 };
 
 // SSL_get_early_data_reason returns details why 0-RTT was accepted or rejected
@@ -3490,6 +3553,26 @@
 OPENSSL_EXPORT enum ssl_early_data_reason_t SSL_get_early_data_reason(
     const SSL *ssl);
 
+// SSL_early_data_reason_string returns a string representation for |reason|, or
+// NULL if |reason| is unknown. This function may be used for logging.
+OPENSSL_EXPORT const char *SSL_early_data_reason_string(
+    enum ssl_early_data_reason_t reason);
+
+
+// Encrypted Client Hello.
+//
+// ECH is a mechanism for encrypting the entire ClientHello message in TLS 1.3.
+// This can prevent observers from seeing cleartext information about the
+// connection, such as the server_name extension.
+//
+// ECH support in BoringSSL is still experimental and under development.
+//
+// See https://tools.ietf.org/html/draft-ietf-tls-esni-09.
+
+// SSL_set_enable_ech_grease configures whether the client may send ECH GREASE
+// as part of this connection.
+OPENSSL_EXPORT void SSL_set_enable_ech_grease(SSL *ssl, int enable);
+
 
 // Alerts.
 //
@@ -3604,11 +3687,13 @@
                                const uint8_t **out_write_iv,
                                size_t *out_iv_len);
 
-// SSL_get_key_block_len returns the length of |ssl|'s key block.
+// SSL_get_key_block_len returns the length of |ssl|'s key block. It is an error
+// to call this function during a handshake.
 OPENSSL_EXPORT size_t SSL_get_key_block_len(const SSL *ssl);
 
 // SSL_generate_key_block generates |out_len| bytes of key material for |ssl|'s
-// current connection state.
+// current connection state. It is an error to call this function during a
+// handshake.
 OPENSSL_EXPORT int SSL_generate_key_block(const SSL *ssl, uint8_t *out,
                                           size_t out_len);
 
@@ -4021,19 +4106,6 @@
 OPENSSL_EXPORT void SSL_CTX_set_false_start_allowed_without_alpn(SSL_CTX *ctx,
                                                                  int allowed);
 
-// SSL_CTX_set_ignore_tls13_downgrade configures whether connections on |ctx|
-// ignore the downgrade signal in the server's random value.
-OPENSSL_EXPORT void SSL_CTX_set_ignore_tls13_downgrade(SSL_CTX *ctx,
-                                                       int ignore);
-
-// SSL_set_ignore_tls13_downgrade configures whether |ssl| ignores the downgrade
-// signal in the server's random value.
-OPENSSL_EXPORT void SSL_set_ignore_tls13_downgrade(SSL *ssl, int ignore);
-
-// SSL_is_tls13_downgrade returns one if the TLS 1.3 anti-downgrade
-// mechanism would have aborted |ssl|'s handshake and zero otherwise.
-OPENSSL_EXPORT int SSL_is_tls13_downgrade(const SSL *ssl);
-
 // SSL_used_hello_retry_request returns one if the TLS 1.3 HelloRetryRequest
 // message has been either sent by the server or received by the client. It
 // returns zero otherwise.
@@ -4686,6 +4758,42 @@
 // |SSL_CTX_set_tlsext_status_cb|'s callback and returns one.
 OPENSSL_EXPORT int SSL_CTX_set_tlsext_status_arg(SSL_CTX *ctx, void *arg);
 
+// The following symbols are compatibility aliases for reason codes used when
+// receiving an alert from the peer. Use the other names instead, which fit the
+// naming convention.
+//
+// TODO(davidben): Fix references to |SSL_R_TLSV1_CERTIFICATE_REQUIRED| and
+// remove the compatibility value. The others come from OpenSSL.
+#define SSL_R_TLSV1_UNSUPPORTED_EXTENSION \
+  SSL_R_TLSV1_ALERT_UNSUPPORTED_EXTENSION
+#define SSL_R_TLSV1_CERTIFICATE_UNOBTAINABLE \
+  SSL_R_TLSV1_ALERT_CERTIFICATE_UNOBTAINABLE
+#define SSL_R_TLSV1_UNRECOGNIZED_NAME SSL_R_TLSV1_ALERT_UNRECOGNIZED_NAME
+#define SSL_R_TLSV1_BAD_CERTIFICATE_STATUS_RESPONSE \
+  SSL_R_TLSV1_ALERT_BAD_CERTIFICATE_STATUS_RESPONSE
+#define SSL_R_TLSV1_BAD_CERTIFICATE_HASH_VALUE \
+  SSL_R_TLSV1_ALERT_BAD_CERTIFICATE_HASH_VALUE
+#define SSL_R_TLSV1_CERTIFICATE_REQUIRED SSL_R_TLSV1_ALERT_CERTIFICATE_REQUIRED
+
+// SSL_CIPHER_get_value calls |SSL_CIPHER_get_protocol_id|.
+//
+// TODO(davidben): |SSL_CIPHER_get_value| was our name for this function, but
+// upstream added it as |SSL_CIPHER_get_protocol_id|. Switch callers to the new
+// name and remove this one.
+OPENSSL_EXPORT uint16_t SSL_CIPHER_get_value(const SSL_CIPHER *cipher);
+
+// SSL_CTX_set_ignore_tls13_downgrade does nothing.
+OPENSSL_EXPORT void SSL_CTX_set_ignore_tls13_downgrade(SSL_CTX *ctx,
+                                                       int ignore);
+
+// SSL_set_ignore_tls13_downgrade does nothing.
+OPENSSL_EXPORT void SSL_set_ignore_tls13_downgrade(SSL *ssl, int ignore);
+
+// SSL_is_tls13_downgrade returns zero. Historically, this function returned
+// whether the TLS 1.3 downgrade signal would have been enforced if not
+// disabled. The TLS 1.3 downgrade signal is now always enforced.
+OPENSSL_EXPORT int SSL_is_tls13_downgrade(const SSL *ssl);
+
 
 // Nodejs compatibility section (hidden).
 //
@@ -5177,6 +5285,10 @@
 #define SSL_R_INCONSISTENT_CLIENT_HELLO 303
 #define SSL_R_CIPHER_MISMATCH_ON_EARLY_DATA 304
 #define SSL_R_QUIC_TRANSPORT_PARAMETERS_MISCONFIGURED 305
+#define SSL_R_UNEXPECTED_COMPATIBILITY_MODE 306
+#define SSL_R_MISSING_ALPN 307
+#define SSL_R_NEGOTIATED_ALPS_WITHOUT_ALPN 308
+#define SSL_R_ALPS_MISMATCH_ON_EARLY_DATA 309
 #define SSL_R_SSLV3_ALERT_CLOSE_NOTIFY 1000
 #define SSL_R_SSLV3_ALERT_UNEXPECTED_MESSAGE 1010
 #define SSL_R_SSLV3_ALERT_BAD_RECORD_MAC 1020
@@ -5202,12 +5314,13 @@
 #define SSL_R_TLSV1_ALERT_INAPPROPRIATE_FALLBACK 1086
 #define SSL_R_TLSV1_ALERT_USER_CANCELLED 1090
 #define SSL_R_TLSV1_ALERT_NO_RENEGOTIATION 1100
-#define SSL_R_TLSV1_UNSUPPORTED_EXTENSION 1110
-#define SSL_R_TLSV1_CERTIFICATE_UNOBTAINABLE 1111
-#define SSL_R_TLSV1_UNRECOGNIZED_NAME 1112
-#define SSL_R_TLSV1_BAD_CERTIFICATE_STATUS_RESPONSE 1113
-#define SSL_R_TLSV1_BAD_CERTIFICATE_HASH_VALUE 1114
-#define SSL_R_TLSV1_UNKNOWN_PSK_IDENTITY 1115
-#define SSL_R_TLSV1_CERTIFICATE_REQUIRED 1116
+#define SSL_R_TLSV1_ALERT_UNSUPPORTED_EXTENSION 1110
+#define SSL_R_TLSV1_ALERT_CERTIFICATE_UNOBTAINABLE 1111
+#define SSL_R_TLSV1_ALERT_UNRECOGNIZED_NAME 1112
+#define SSL_R_TLSV1_ALERT_BAD_CERTIFICATE_STATUS_RESPONSE 1113
+#define SSL_R_TLSV1_ALERT_BAD_CERTIFICATE_HASH_VALUE 1114
+#define SSL_R_TLSV1_ALERT_UNKNOWN_PSK_IDENTITY 1115
+#define SSL_R_TLSV1_ALERT_CERTIFICATE_REQUIRED 1116
+#define SSL_R_TLSV1_ALERT_NO_APPLICATION_PROTOCOL 1120
 
 #endif  // OPENSSL_HEADER_SSL_H
diff --git a/deps/boringssl/src/include/openssl/tls1.h b/deps/boringssl/src/include/openssl/tls1.h
index 64ed762..da79a08 100644
--- a/deps/boringssl/src/include/openssl/tls1.h
+++ b/deps/boringssl/src/include/openssl/tls1.h
@@ -206,13 +206,27 @@
 // ExtensionType value from draft-ietf-tokbind-negotiation-10
 #define TLSEXT_TYPE_token_binding 24
 
-// ExtensionType value from draft-ietf-quic-tls. Note that this collides with
-// TLS-LTS and, based on scans, something else too. Since it's QUIC-only, that
-// shouldn't be a problem in practice.
-#define TLSEXT_TYPE_quic_transport_parameters 0xffa5
+// ExtensionType value from draft-ietf-quic-tls. Drafts 00 through 32 use
+// 0xffa5 which is part of the Private Use section of the registry, and it
+// collides with TLS-LTS and, based on scans, something else too (though this
+// hasn't been a problem in practice since it's QUIC-only). Drafts 33 onward
+// use the value 57 which was officially registered with IANA.
+#define TLSEXT_TYPE_quic_transport_parameters_legacy 0xffa5
+#define TLSEXT_TYPE_quic_transport_parameters_standard 57
 
-// ExtensionType value assigned to
-// https://tools.ietf.org/html/draft-ietf-tls-certificate-compression-03
+// TLSEXT_TYPE_quic_transport_parameters is an alias for
+// |TLSEXT_TYPE_quic_transport_parameters_legacy|. It will switch to
+// |TLSEXT_TYPE_quic_transport_parameters_standard| at a later date.
+//
+// Callers using |SSL_set_quic_use_legacy_codepoint| should use
+// |TLSEXT_TYPE_quic_transport_parameters_legacy| or
+// |TLSEXT_TYPE_quic_transport_parameters_standard| rather than this constant.
+// When the default code point is switched to the standard one, this value will
+// be updated and we will transition callers back to the unsuffixed constant.
+#define TLSEXT_TYPE_quic_transport_parameters \
+  TLSEXT_TYPE_quic_transport_parameters_legacy
+
+// ExtensionType value from RFC8879
 #define TLSEXT_TYPE_cert_compression 27
 
 // ExtensionType value from RFC4507
@@ -232,9 +246,17 @@
 // ExtensionType value from RFC5746
 #define TLSEXT_TYPE_renegotiate 0xff01
 
-// ExtensionType value from draft-ietf-tls-subcerts. This is not an IANA defined
+// ExtensionType value from draft-ietf-tls-subcerts.
+#define TLSEXT_TYPE_delegated_credential 0x22
+
+// ExtensionType value from draft-vvv-tls-alps. This is not an IANA defined
 // extension number.
-#define TLSEXT_TYPE_delegated_credential 0xff02
+#define TLSEXT_TYPE_application_settings 17513
+
+// ExtensionType values from draft-ietf-tls-esni-09. This is not an IANA defined
+// extension number.
+#define TLSEXT_TYPE_encrypted_client_hello 0xfe09
+#define TLSEXT_TYPE_ech_is_inner 0xda09
 
 // ExtensionType value from RFC6962
 #define TLSEXT_TYPE_certificate_timestamp 18
@@ -268,7 +290,7 @@
 #define TLSEXT_hash_sha384 5
 #define TLSEXT_hash_sha512 6
 
-// From https://tools.ietf.org/html/draft-ietf-tls-certificate-compression-03#section-3
+// From https://www.rfc-editor.org/rfc/rfc8879.html#section-3
 #define TLSEXT_cert_compression_zlib 1
 #define TLSEXT_cert_compression_brotli 2
 
diff --git a/deps/boringssl/src/include/openssl/trust_token.h b/deps/boringssl/src/include/openssl/trust_token.h
index a73a868..d9247f7 100644
--- a/deps/boringssl/src/include/openssl/trust_token.h
+++ b/deps/boringssl/src/include/openssl/trust_token.h
@@ -36,15 +36,18 @@
 //
 // WARNING: This API is unstable and subject to change.
 
-// TRUST_TOKEN_experiment_v0 is an experimental Trust Tokens protocol using
-// PMBTokens and P-521.
-OPENSSL_EXPORT const TRUST_TOKEN_METHOD *TRUST_TOKEN_experiment_v0(void);
-
 // TRUST_TOKEN_experiment_v1 is an experimental Trust Tokens protocol using
-// PMBTokens and P-384. This version is still under developement and should not
-// be used yet.
+// PMBTokens and P-384.
 OPENSSL_EXPORT const TRUST_TOKEN_METHOD *TRUST_TOKEN_experiment_v1(void);
 
+// TRUST_TOKEN_experiment_v2_voprf is an experimental Trust Tokens protocol
+// using VOPRFs and P-384 with up to 6 keys, without RR verification.
+OPENSSL_EXPORT const TRUST_TOKEN_METHOD *TRUST_TOKEN_experiment_v2_voprf(void);
+
+// TRUST_TOKEN_experiment_v2_pmb is an experimental Trust Tokens protocol using
+// PMBTokens and P-384 with up to 3 keys, without RR verification.
+OPENSSL_EXPORT const TRUST_TOKEN_METHOD *TRUST_TOKEN_experiment_v2_pmb(void);
+
 // trust_token_st represents a single-use token for the Trust Token protocol.
 // For the client, this is the token and its corresponding signature. For the
 // issuer, this is the token itself.
@@ -143,21 +146,23 @@
 // |token| and receive a signature over |data| and serializes the request into
 // a newly-allocated buffer, setting |*out| to that buffer and |*out_len| to
 // its length. |time| is the number of seconds since the UNIX epoch and used to
-// verify the validity of the issuer's response. The caller takes ownership of
-// the buffer and must call |OPENSSL_free| when done. It returns one on success
-// or zero on error.
+// verify the validity of the issuer's response in TrustTokenV1 and ignored in
+// other versions. The caller takes ownership of the buffer and must call
+// |OPENSSL_free| when done. It returns one on success or zero on error.
 OPENSSL_EXPORT int TRUST_TOKEN_CLIENT_begin_redemption(
     TRUST_TOKEN_CLIENT *ctx, uint8_t **out, size_t *out_len,
     const TRUST_TOKEN *token, const uint8_t *data, size_t data_len,
     uint64_t time);
 
-// TRUST_TOKEN_CLIENT_finish_redemption consumes |response| from the issuer and
-// verifies the SRR. If valid, it returns one and sets |*out_srr| and
-// |*out_srr_len| (respectively, |*out_sig| and |*out_sig_len|) to a
-// newly-allocated buffer containing the SRR (respectively, the SRR signature).
-// Otherwise, it returns zero.
+// TRUST_TOKEN_CLIENT_finish_redemption consumes |response| from the issuer. In
+// |TRUST_TOKEN_experiment_v1|, it then verifies the SRR and if valid  sets
+// |*out_rr| and |*out_rr_len| (respectively, |*out_sig| and |*out_sig_len|)
+// to a newly-allocated buffer containing the SRR (respectively, the SRR
+// signature). In other versions, it sets |*out_rr| and |*out_rr_len|
+// to a newly-allocated buffer containing |response| and leaves all validation
+// to the caller. It returns one on success or zero on failure.
 OPENSSL_EXPORT int TRUST_TOKEN_CLIENT_finish_redemption(
-    TRUST_TOKEN_CLIENT *ctx, uint8_t **out_srr, size_t *out_srr_len,
+    TRUST_TOKEN_CLIENT *ctx, uint8_t **out_rr, size_t *out_rr_len,
     uint8_t **out_sig, size_t *out_sig_len, const uint8_t *response,
     size_t response_len);
 
@@ -219,34 +224,48 @@
     uint32_t public_metadata, uint8_t private_metadata, size_t max_issuance);
 
 // TRUST_TOKEN_ISSUER_redeem ingests a |request| for token redemption and
-// verifies the token. If the token is valid, a SRR is produced with a lifetime
+// verifies the token. If the token is valid, a RR is produced with a lifetime
 // of |lifetime| (in seconds), signing over the requested data from the request
 // and the value of the token, storing the result into a newly-allocated buffer
 // and setting |*out| to that buffer and |*out_len| to its length. The extracted
 // |TRUST_TOKEN| is stored into a newly-allocated buffer and stored in
 // |*out_token|. The extracted client data is stored into a newly-allocated
-// buffer and stored in |*out_client_data|. The extracted redemption time is
-// stored in |*out_redemption_time|. The caller takes ownership of each output
-// buffer and must call |OPENSSL_free| when done. It returns one on success or
-// zero on error.
+// buffer and stored in |*out_client_data|. In TrustTokenV1, the extracted
+// redemption time is stored in |*out_redemption_time|. The caller takes
+// ownership of each output buffer and must call |OPENSSL_free| when done. It
+// returns one on success or zero on error.
 //
 // The caller must keep track of all values of |*out_token| seen globally before
 // returning the SRR to the client. If the value has been reused, the caller
 // must discard the SRR and report an error to the caller. Returning an SRR with
 // replayed values allows an attacker to double-spend tokens.
-//
-// The private metadata construction in |TRUST_TOKEN_experiment_v0| does not
-// keep the value secret and should not be used when secrecy is required.
 OPENSSL_EXPORT int TRUST_TOKEN_ISSUER_redeem(
     const TRUST_TOKEN_ISSUER *ctx, uint8_t **out, size_t *out_len,
     TRUST_TOKEN **out_token, uint8_t **out_client_data,
     size_t *out_client_data_len, uint64_t *out_redemption_time,
     const uint8_t *request, size_t request_len, uint64_t lifetime);
 
+// TRUST_TOKEN_ISSUER_redeem_raw ingests a |request| for token redemption and
+// verifies the token. The public metadata is stored in |*out_public|. The
+// private metadata (if any) is stored in |*out_private|. The extracted
+// |TRUST_TOKEN| is stored into a newly-allocated buffer and stored in
+// |*out_token|. The extracted client data is stored into a newly-allocated
+// buffer and stored in |*out_client_data|. The caller takes ownership of each
+// output buffer and must call |OPENSSL_free| when done. It returns one on
+// success or zero on error.
+//
+// The caller must keep track of all values of |*out_token| seen globally before
+// returning a response to the client. If the value has been reused, the caller
+// must report an error to the client. Returning a response with replayed values
+// allows an attacker to double-spend tokens.
+OPENSSL_EXPORT int TRUST_TOKEN_ISSUER_redeem_raw(
+    const TRUST_TOKEN_ISSUER *ctx, uint32_t *out_public, uint8_t *out_private,
+    TRUST_TOKEN **out_token, uint8_t **out_client_data,
+    size_t *out_client_data_len, const uint8_t *request, size_t request_len);
+
 // TRUST_TOKEN_decode_private_metadata decodes |encrypted_bit| using the
 // private metadata key specified by a |key| buffer of length |key_len| and the
 // nonce by a |nonce| buffer of length |nonce_len|. The nonce in
-// |TRUST_TOKEN_experiment_v0| is the client-data field of the SRR. The nonce in
 // |TRUST_TOKEN_experiment_v1| is the token-hash field of the SRR. |*out_value|
 // is set to the decrypted value, either zero or one. It returns one on success
 // and zero on error.
diff --git a/deps/boringssl/src/include/openssl/x509.h b/deps/boringssl/src/include/openssl/x509.h
index 342569c..a75442f 100644
--- a/deps/boringssl/src/include/openssl/x509.h
+++ b/deps/boringssl/src/include/openssl/x509.h
@@ -143,10 +143,10 @@
 
 // we always keep X509_NAMEs in 2 forms.
 struct X509_name_st {
-  STACK_OF(X509_NAME_ENTRY) * entries;
+  STACK_OF(X509_NAME_ENTRY) *entries;
   int modified;  // true if 'bytes' needs to be built
   BUF_MEM *bytes;
-  //	unsigned long hash; Keep the hash around for lookups
+  // unsigned long hash; Keep the hash around for lookups
   unsigned char *canon_enc;
   int canon_enclen;
 } /* X509_NAME */;
@@ -170,7 +170,7 @@
   int single;  // 0 for a set, 1 for a single item (which is wrong)
   union {
     char *ptr;
-    /* 0 */ STACK_OF(ASN1_TYPE) * set;
+    /* 0 */ STACK_OF(ASN1_TYPE) *set;
     /* 1 */ ASN1_TYPE *single;
   } value;
 } /* X509_ATTRIBUTE */;
@@ -185,7 +185,7 @@
   X509_NAME *subject;
   X509_PUBKEY *pubkey;
   //  d=2 hl=2 l=  0 cons: cont: 00
-  STACK_OF(X509_ATTRIBUTE) * attributes;  // [ 0 ]
+  STACK_OF(X509_ATTRIBUTE) *attributes;  // [ 0 ]
 } /* X509_REQ_INFO */;
 
 struct X509_req_st {
@@ -203,9 +203,9 @@
   X509_VAL *validity;
   X509_NAME *subject;
   X509_PUBKEY *key;
-  ASN1_BIT_STRING *issuerUID;             // [ 1 ] optional in v2
-  ASN1_BIT_STRING *subjectUID;            // [ 2 ] optional in v2
-  STACK_OF(X509_EXTENSION) * extensions;  // [ 3 ] optional in v3
+  ASN1_BIT_STRING *issuerUID;            // [ 1 ] optional in v2
+  ASN1_BIT_STRING *subjectUID;           // [ 2 ] optional in v2
+  STACK_OF(X509_EXTENSION) *extensions;  // [ 3 ] optional in v3
   ASN1_ENCODING enc;
 } /* X509_CINF */;
 
@@ -215,11 +215,11 @@
 // the end of the certificate itself
 
 struct x509_cert_aux_st {
-  STACK_OF(ASN1_OBJECT) * trust;   // trusted uses
-  STACK_OF(ASN1_OBJECT) * reject;  // rejected uses
-  ASN1_UTF8STRING *alias;          // "friendly name"
-  ASN1_OCTET_STRING *keyid;        // key id of private key
-  STACK_OF(X509_ALGOR) * other;    // other unspecified info
+  STACK_OF(ASN1_OBJECT) *trust;   // trusted uses
+  STACK_OF(ASN1_OBJECT) *reject;  // rejected uses
+  ASN1_UTF8STRING *alias;         // "friendly name"
+  ASN1_OCTET_STRING *keyid;       // key id of private key
+  STACK_OF(X509_ALGOR) *other;    // other unspecified info
 } /* X509_CERT_AUX */;
 
 DECLARE_STACK_OF(DIST_POINT)
@@ -230,7 +230,6 @@
   X509_ALGOR *sig_alg;
   ASN1_BIT_STRING *signature;
   CRYPTO_refcount_t references;
-  char *name;
   CRYPTO_EX_DATA ex_data;
   // These contain copies of various extension values
   long ex_pathlen;
@@ -242,8 +241,8 @@
   ASN1_OCTET_STRING *skid;
   AUTHORITY_KEYID *akid;
   X509_POLICY_CACHE *policy_cache;
-  STACK_OF(DIST_POINT) * crldp;
-  STACK_OF(GENERAL_NAME) * altname;
+  STACK_OF(DIST_POINT) *crldp;
+  STACK_OF(GENERAL_NAME) *altname;
   NAME_CONSTRAINTS *nc;
   unsigned char sha1_hash[SHA_DIGEST_LENGTH];
   X509_CERT_AUX *aux;
@@ -365,9 +364,9 @@
 struct x509_revoked_st {
   ASN1_INTEGER *serialNumber;
   ASN1_TIME *revocationDate;
-  STACK_OF(X509_EXTENSION) /* optional */ * extensions;
+  STACK_OF(X509_EXTENSION) /* optional */ *extensions;
   // Set up if indirect CRL
-  STACK_OF(GENERAL_NAME) * issuer;
+  STACK_OF(GENERAL_NAME) *issuer;
   // Revocation reason
   int reason;
   int sequence;  // load sequence
@@ -382,8 +381,8 @@
   X509_NAME *issuer;
   ASN1_TIME *lastUpdate;
   ASN1_TIME *nextUpdate;
-  STACK_OF(X509_REVOKED) * revoked;
-  STACK_OF(X509_EXTENSION) /* [0] */ * extensions;
+  STACK_OF(X509_REVOKED) *revoked;
+  STACK_OF(X509_EXTENSION) /* [0] */ *extensions;
   ASN1_ENCODING enc;
 } /* X509_CRL_INFO */;
 
@@ -406,7 +405,7 @@
   ASN1_INTEGER *crl_number;
   ASN1_INTEGER *base_crl_number;
   unsigned char sha1_hash[SHA_DIGEST_LENGTH];
-  STACK_OF(GENERAL_NAMES) * issuers;
+  STACK_OF(GENERAL_NAMES) *issuers;
   const X509_CRL_METHOD *meth;
   void *meth_data;
 } /* X509_CRL */;
@@ -461,21 +460,6 @@
   ASN1_BIT_STRING *signature;
 } /* NETSCAPE_SPKI */;
 
-// PKCS#8 private key info structure
-
-struct pkcs8_priv_key_info_st {
-  int broken;  // Flag for various broken formats
-#define PKCS8_OK 0
-#define PKCS8_NO_OCTET 1
-#define PKCS8_EMBEDDED_PARAM 2
-#define PKCS8_NS_DB 3
-#define PKCS8_NEG_PRIVKEY 4
-  ASN1_INTEGER *version;
-  X509_ALGOR *pkeyalg;
-  ASN1_TYPE *pkey;  // Should be OCTET STRING but some are broken
-  STACK_OF(X509_ATTRIBUTE) * attributes;
-};
-
 #ifdef __cplusplus
 }
 #endif
@@ -486,6 +470,11 @@
 extern "C" {
 #endif
 
+// TODO(davidben): Document remaining functions, reorganize them, and define
+// supported patterns for using |X509| objects in general. In particular, when
+// it is safe to call mutating functions is a little tricky due to various
+// internal caches.
+
 // X509_get_version returns the numerical value of |x509|'s version. That is,
 // it returns zero for X.509v1, one for X.509v2, and two for X.509v3. Unknown
 // versions are rejected by the parser, but a manually-created |X509| object may
@@ -493,16 +482,54 @@
 // version, or -1 on overflow.
 OPENSSL_EXPORT long X509_get_version(const X509 *x509);
 
-// X509_get_notBefore returns |x509|'s notBefore value. Note this function is
-// not const-correct for legacy reasons. Use |X509_get0_notBefore| or
+// X509_get0_serialNumber returns |x509|'s serial number.
+OPENSSL_EXPORT const ASN1_INTEGER *X509_get0_serialNumber(const X509 *x509);
+
+// X509_get0_notBefore returns |x509|'s notBefore time.
+OPENSSL_EXPORT const ASN1_TIME *X509_get0_notBefore(const X509 *x509);
+
+// X509_get0_notAfter returns |x509|'s notAfter time.
+OPENSSL_EXPORT const ASN1_TIME *X509_get0_notAfter(const X509 *x509);
+
+// X509_set1_notBefore sets |x509|'s notBefore time to |tm|. It returns one on
+// success and zero on error.
+OPENSSL_EXPORT int X509_set1_notBefore(X509 *x509, const ASN1_TIME *tm);
+
+// X509_set1_notAfter sets |x509|'s notAfter time to |tm|. it returns one on
+// success and zero on error.
+OPENSSL_EXPORT int X509_set1_notAfter(X509 *x509, const ASN1_TIME *tm);
+
+// X509_getm_notBefore returns a mutable pointer to |x509|'s notBefore time.
+OPENSSL_EXPORT ASN1_TIME *X509_getm_notBefore(X509 *x509);
+
+// X509_getm_notAfter returns a mutable pointer to |x509|'s notAfter time.
+OPENSSL_EXPORT ASN1_TIME *X509_getm_notAfter(X509 *x);
+
+// X509_get_notBefore returns |x509|'s notBefore time. Note this function is not
+// const-correct for legacy reasons. Use |X509_get0_notBefore| or
 // |X509_getm_notBefore| instead.
 OPENSSL_EXPORT ASN1_TIME *X509_get_notBefore(const X509 *x509);
 
-// X509_get_notAfter returns |x509|'s notAfter value. Note this function is not
+// X509_get_notAfter returns |x509|'s notAfter time. Note this function is not
 // const-correct for legacy reasons. Use |X509_get0_notAfter| or
 // |X509_getm_notAfter| instead.
 OPENSSL_EXPORT ASN1_TIME *X509_get_notAfter(const X509 *x509);
 
+// X509_set_notBefore calls |X509_set1_notBefore|. Use |X509_set1_notBefore|
+// instead.
+OPENSSL_EXPORT int X509_set_notBefore(X509 *x509, const ASN1_TIME *tm);
+
+// X509_set_notAfter calls |X509_set1_notAfter|. Use |X509_set1_notAfter|
+// instead.
+OPENSSL_EXPORT int X509_set_notAfter(X509 *x509, const ASN1_TIME *tm);
+
+// X509_get0_uids sets |*out_issuer_uid| and |*out_subject_uid| to non-owning
+// pointers to the issuerUID and subjectUID fields, respectively, of |x509|.
+// Either output pointer may be NULL to skip the field.
+OPENSSL_EXPORT void X509_get0_uids(const X509 *x509,
+                                   const ASN1_BIT_STRING **out_issuer_uid,
+                                   const ASN1_BIT_STRING **out_subject_uid);
+
 // X509_get_cert_info returns |x509|'s TBSCertificate structure. Note this
 // function is not const-correct for legacy reasons.
 //
@@ -514,6 +541,15 @@
 // |X509_get_pubkey| instead.
 #define X509_extract_key(x) X509_get_pubkey(x)
 
+// X509_get_pathlen returns path length constraint from the basic constraints
+// extension in |x509|. (See RFC5280, section 4.2.1.9.) It returns -1 if the
+// constraint is not present, or if some extension in |x509| was invalid.
+//
+// Note that decoding an |X509| object will not check for invalid extensions. To
+// detect the error case, call |X509_get_extensions_flags| and check the
+// |EXFLAG_INVALID| bit.
+OPENSSL_EXPORT long X509_get_pathlen(X509 *x509);
+
 // X509_REQ_get_version returns the numerical value of |req|'s version. That is,
 // it returns zero for a v1 request. If |req| is invalid, it may return another
 // value, or -1 on overflow.
@@ -537,15 +573,29 @@
 // X509_CRL_get0_lastUpdate returns |crl|'s lastUpdate time.
 OPENSSL_EXPORT const ASN1_TIME *X509_CRL_get0_lastUpdate(const X509_CRL *crl);
 
-// X509_CRL_get0_lastUpdate returns |crl|'s nextUpdate time.
+// X509_CRL_get0_nextUpdate returns |crl|'s nextUpdate time, or NULL if |crl|
+// has none.
 OPENSSL_EXPORT const ASN1_TIME *X509_CRL_get0_nextUpdate(const X509_CRL *crl);
 
+// X509_CRL_set1_lastUpdate sets |crl|'s lastUpdate time to |tm|. It returns one
+// on success and zero on error.
+OPENSSL_EXPORT int X509_CRL_set1_lastUpdate(X509_CRL *crl, const ASN1_TIME *tm);
+
+// X509_CRL_set1_nextUpdate sets |crl|'s nextUpdate time to |tm|. It returns one
+// on success and zero on error.
+OPENSSL_EXPORT int X509_CRL_set1_nextUpdate(X509_CRL *crl, const ASN1_TIME *tm);
+
+// The following symbols are deprecated aliases to |X509_CRL_set1_*|.
+#define X509_CRL_set_lastUpdate X509_CRL_set1_lastUpdate
+#define X509_CRL_set_nextUpdate X509_CRL_set1_nextUpdate
+
 // X509_CRL_get_lastUpdate returns a mutable pointer to |crl|'s lastUpdate time.
-// Use |X509_CRL_get0_lastUpdate| or |X509_CRL_set_lastUpdate| instead.
+// Use |X509_CRL_get0_lastUpdate| or |X509_CRL_set1_lastUpdate| instead.
 OPENSSL_EXPORT ASN1_TIME *X509_CRL_get_lastUpdate(X509_CRL *crl);
 
-// X509_CRL_get_nextUpdate returns a mutable pointer to |crl|'s nextUpdate time.
-// Use |X509_CRL_get0_nextUpdate| or |X509_CRL_set_nextUpdate| instead.
+// X509_CRL_get_nextUpdate returns a mutable pointer to |crl|'s nextUpdate time,
+// or NULL if |crl| has none. Use |X509_CRL_get0_nextUpdate| or
+// |X509_CRL_set1_nextUpdate| instead.
 OPENSSL_EXPORT ASN1_TIME *X509_CRL_get_nextUpdate(X509_CRL *crl);
 
 // X509_CRL_get_issuer returns |crl|'s issuer name. Note this function is not
@@ -559,6 +609,10 @@
 // would break existing callers. For now, we match upstream.
 OPENSSL_EXPORT STACK_OF(X509_REVOKED) *X509_CRL_get_REVOKED(X509_CRL *crl);
 
+// X509_CRL_get0_extensions returns |crl|'s extension list.
+OPENSSL_EXPORT const STACK_OF(X509_EXTENSION) *X509_CRL_get0_extensions(
+    const X509_CRL *crl);
+
 // X509_CINF_set_modified marks |cinf| as modified so that changes will be
 // reflected in serializing the structure.
 //
@@ -575,6 +629,17 @@
 // |X509_get0_tbs_sigalg| instead.
 OPENSSL_EXPORT const X509_ALGOR *X509_CINF_get_signature(const X509_CINF *cinf);
 
+// X509_SIG_get0 sets |*out_alg| and |*out_digest| to non-owning pointers to
+// |sig|'s algorithm and digest fields, respectively. Either |out_alg| and
+// |out_digest| may be NULL to skip those fields.
+OPENSSL_EXPORT void X509_SIG_get0(const X509_SIG *sig,
+                                  const X509_ALGOR **out_alg,
+                                  const ASN1_OCTET_STRING **out_digest);
+
+// X509_SIG_getm behaves like |X509_SIG_get0| but returns mutable pointers.
+OPENSSL_EXPORT void X509_SIG_getm(X509_SIG *sig, X509_ALGOR **out_alg,
+                                  ASN1_OCTET_STRING **out_digest);
+
 OPENSSL_EXPORT void X509_CRL_set_default_method(const X509_CRL_METHOD *meth);
 OPENSSL_EXPORT X509_CRL_METHOD *X509_CRL_METHOD_new(
     int (*crl_init)(X509_CRL *crl), int (*crl_free)(X509_CRL *crl),
@@ -589,50 +654,158 @@
 // X509_get_X509_PUBKEY returns the public key of |x509|. Note this function is
 // not const-correct for legacy reasons. Callers should not modify the returned
 // object.
-X509_PUBKEY *X509_get_X509_PUBKEY(const X509 *x509);
+OPENSSL_EXPORT X509_PUBKEY *X509_get_X509_PUBKEY(const X509 *x509);
 
-OPENSSL_EXPORT const char *X509_verify_cert_error_string(long n);
+// X509_verify_cert_error_string returns |err| as a human-readable string, where
+// |err| should be one of the |X509_V_*| values. If |err| is unknown, it returns
+// a default description.
+//
+// TODO(davidben): Move this function to x509_vfy.h, with the |X509_V_*|
+// definitions, or fold x509_vfy.h into this function.
+OPENSSL_EXPORT const char *X509_verify_cert_error_string(long err);
 
-#ifndef OPENSSL_NO_EVP
-OPENSSL_EXPORT int X509_verify(X509 *a, EVP_PKEY *r);
+// X509_verify checks that |x509| has a valid signature by |pkey|. It returns
+// one if the signature is valid and zero otherwise. Note this function only
+// checks the signature itself and does not perform a full certificate
+// validation.
+OPENSSL_EXPORT int X509_verify(X509 *x509, EVP_PKEY *pkey);
 
-OPENSSL_EXPORT int X509_REQ_verify(X509_REQ *a, EVP_PKEY *r);
-OPENSSL_EXPORT int X509_CRL_verify(X509_CRL *a, EVP_PKEY *r);
-OPENSSL_EXPORT int NETSCAPE_SPKI_verify(NETSCAPE_SPKI *a, EVP_PKEY *r);
+// X509_REQ_verify checks that |req| has a valid signature by |pkey|. It returns
+// one if the signature is valid and zero otherwise.
+OPENSSL_EXPORT int X509_REQ_verify(X509_REQ *req, EVP_PKEY *pkey);
 
+// X509_CRL_verify checks that |crl| has a valid signature by |pkey|. It returns
+// one if the signature is valid and zero otherwise.
+OPENSSL_EXPORT int X509_CRL_verify(X509_CRL *crl, EVP_PKEY *pkey);
+
+// NETSCAPE_SPKI_verify checks that |spki| has a valid signature by |pkey|. It
+// returns one if the signature is valid and zero otherwise.
+OPENSSL_EXPORT int NETSCAPE_SPKI_verify(NETSCAPE_SPKI *spki, EVP_PKEY *pkey);
+
+// NETSCAPE_SPKI_b64_decode decodes |len| bytes from |str| as a base64-encoded
+// Netscape signed public key and challenge (SPKAC) structure. It returns a
+// newly-allocated |NETSCAPE_SPKI| structure with the result, or NULL on error.
+// If |len| is 0 or negative, the length is calculated with |strlen| and |str|
+// must be a NUL-terminated C string.
 OPENSSL_EXPORT NETSCAPE_SPKI *NETSCAPE_SPKI_b64_decode(const char *str,
                                                        int len);
-OPENSSL_EXPORT char *NETSCAPE_SPKI_b64_encode(NETSCAPE_SPKI *x);
-OPENSSL_EXPORT EVP_PKEY *NETSCAPE_SPKI_get_pubkey(NETSCAPE_SPKI *x);
-OPENSSL_EXPORT int NETSCAPE_SPKI_set_pubkey(NETSCAPE_SPKI *x, EVP_PKEY *pkey);
 
-OPENSSL_EXPORT int NETSCAPE_SPKI_print(BIO *out, NETSCAPE_SPKI *spki);
+// NETSCAPE_SPKI_b64_encode encodes |spki| as a base64-encoded Netscape signed
+// public key and challenge (SPKAC) structure. It returns a newly-allocated
+// NUL-terminated C string with the result, or NULL on error. The caller must
+// release the memory with |OPENSSL_free| when done.
+OPENSSL_EXPORT char *NETSCAPE_SPKI_b64_encode(NETSCAPE_SPKI *spki);
 
-OPENSSL_EXPORT int X509_signature_dump(BIO *bp, const ASN1_STRING *sig,
+// NETSCAPE_SPKI_get_pubkey decodes and returns the public key in |spki| as an
+// |EVP_PKEY|, or NULL on error. The caller takes ownership of the resulting
+// pointer and must call |EVP_PKEY_free| when done.
+OPENSSL_EXPORT EVP_PKEY *NETSCAPE_SPKI_get_pubkey(NETSCAPE_SPKI *spki);
+
+// NETSCAPE_SPKI_set_pubkey sets |spki|'s public key to |pkey|. It returns one
+// on success or zero on error. This function does not take ownership of |pkey|,
+// so the caller may continue to manage its lifetime independently of |spki|.
+OPENSSL_EXPORT int NETSCAPE_SPKI_set_pubkey(NETSCAPE_SPKI *spki,
+                                            EVP_PKEY *pkey);
+
+// X509_signature_dump writes a human-readable representation of |sig| to |bio|,
+// indented with |indent| spaces. It returns one on success and zero on error.
+OPENSSL_EXPORT int X509_signature_dump(BIO *bio, const ASN1_STRING *sig,
                                        int indent);
-OPENSSL_EXPORT int X509_signature_print(BIO *bp, const X509_ALGOR *alg,
+
+// X509_signature_print writes a human-readable representation of |alg| and
+// |sig| to |bio|. It returns one on success and zero on error.
+OPENSSL_EXPORT int X509_signature_print(BIO *bio, const X509_ALGOR *alg,
                                         const ASN1_STRING *sig);
 
-OPENSSL_EXPORT int X509_sign(X509 *x, EVP_PKEY *pkey, const EVP_MD *md);
-OPENSSL_EXPORT int X509_sign_ctx(X509 *x, EVP_MD_CTX *ctx);
-OPENSSL_EXPORT int X509_REQ_sign(X509_REQ *x, EVP_PKEY *pkey, const EVP_MD *md);
-OPENSSL_EXPORT int X509_REQ_sign_ctx(X509_REQ *x, EVP_MD_CTX *ctx);
-OPENSSL_EXPORT int X509_CRL_sign(X509_CRL *x, EVP_PKEY *pkey, const EVP_MD *md);
-OPENSSL_EXPORT int X509_CRL_sign_ctx(X509_CRL *x, EVP_MD_CTX *ctx);
-OPENSSL_EXPORT int NETSCAPE_SPKI_sign(NETSCAPE_SPKI *x, EVP_PKEY *pkey,
+// X509_sign signs |x509| with |pkey| and replaces the signature algorithm and
+// signature fields. It returns one on success and zero on error. This function
+// uses digest algorithm |md|, or |pkey|'s default if NULL. Other signing
+// parameters use |pkey|'s defaults. To customize them, use |X509_sign_ctx|.
+OPENSSL_EXPORT int X509_sign(X509 *x509, EVP_PKEY *pkey, const EVP_MD *md);
+
+// X509_sign_ctx signs |x509| with |ctx| and replaces the signature algorithm
+// and signature fields. It returns one on success and zero on error. The
+// signature algorithm and parameters come from |ctx|, which must have been
+// initialized with |EVP_DigestSignInit|. The caller should configure the
+// corresponding |EVP_PKEY_CTX| before calling this function.
+OPENSSL_EXPORT int X509_sign_ctx(X509 *x509, EVP_MD_CTX *ctx);
+
+// X509_REQ_sign signs |req| with |pkey| and replaces the signature algorithm
+// and signature fields. It returns one on success and zero on error. This
+// function uses digest algorithm |md|, or |pkey|'s default if NULL. Other
+// signing parameters use |pkey|'s defaults. To customize them, use
+// |X509_REQ_sign_ctx|.
+OPENSSL_EXPORT int X509_REQ_sign(X509_REQ *req, EVP_PKEY *pkey,
+                                 const EVP_MD *md);
+
+// X509_REQ_sign_ctx signs |req| with |ctx| and replaces the signature algorithm
+// and signature fields. It returns one on success and zero on error. The
+// signature algorithm and parameters come from |ctx|, which must have been
+// initialized with |EVP_DigestSignInit|. The caller should configure the
+// corresponding |EVP_PKEY_CTX| before calling this function.
+OPENSSL_EXPORT int X509_REQ_sign_ctx(X509_REQ *req, EVP_MD_CTX *ctx);
+
+// X509_CRL_sign signs |crl| with |pkey| and replaces the signature algorithm
+// and signature fields. It returns one on success and zero on error. This
+// function uses digest algorithm |md|, or |pkey|'s default if NULL. Other
+// signing parameters use |pkey|'s defaults. To customize them, use
+// |X509_CRL_sign_ctx|.
+OPENSSL_EXPORT int X509_CRL_sign(X509_CRL *crl, EVP_PKEY *pkey,
+                                 const EVP_MD *md);
+
+// X509_CRL_sign_ctx signs |crl| with |ctx| and replaces the signature algorithm
+// and signature fields. It returns one on success and zero on error. The
+// signature algorithm and parameters come from |ctx|, which must have been
+// initialized with |EVP_DigestSignInit|. The caller should configure the
+// corresponding |EVP_PKEY_CTX| before calling this function.
+OPENSSL_EXPORT int X509_CRL_sign_ctx(X509_CRL *crl, EVP_MD_CTX *ctx);
+
+// NETSCAPE_SPKI_sign signs |spki| with |pkey| and replaces the signature
+// algorithm and signature fields. It returns one on success and zero on error.
+// This function uses digest algorithm |md|, or |pkey|'s default if NULL. Other
+// signing parameters use |pkey|'s defaults.
+OPENSSL_EXPORT int NETSCAPE_SPKI_sign(NETSCAPE_SPKI *spki, EVP_PKEY *pkey,
                                       const EVP_MD *md);
 
-OPENSSL_EXPORT int X509_pubkey_digest(const X509 *data, const EVP_MD *type,
-                                      unsigned char *md, unsigned int *len);
-OPENSSL_EXPORT int X509_digest(const X509 *data, const EVP_MD *type,
-                               unsigned char *md, unsigned int *len);
-OPENSSL_EXPORT int X509_CRL_digest(const X509_CRL *data, const EVP_MD *type,
-                                   unsigned char *md, unsigned int *len);
-OPENSSL_EXPORT int X509_REQ_digest(const X509_REQ *data, const EVP_MD *type,
-                                   unsigned char *md, unsigned int *len);
-OPENSSL_EXPORT int X509_NAME_digest(const X509_NAME *data, const EVP_MD *type,
-                                    unsigned char *md, unsigned int *len);
-#endif
+// X509_pubkey_digest hashes the DER encoding of |x509|'s subjectPublicKeyInfo
+// field with |md| and writes the result to |out|. |EVP_MD_CTX_size| bytes are
+// written, which is at most |EVP_MAX_MD_SIZE|. If |out_len| is not NULL,
+// |*out_len| is set to the number of bytes written. This function returns one
+// on success and zero on error.
+OPENSSL_EXPORT int X509_pubkey_digest(const X509 *x509, const EVP_MD *md,
+                                      uint8_t *out, unsigned *out_len);
+
+// X509_digest hashes |x509|'s DER encoding with |md| and writes the result to
+// |out|. |EVP_MD_CTX_size| bytes are written, which is at most
+// |EVP_MAX_MD_SIZE|. If |out_len| is not NULL, |*out_len| is set to the number
+// of bytes written. This function returns one on success and zero on error.
+// Note this digest covers the entire certificate, not just the signed portion.
+OPENSSL_EXPORT int X509_digest(const X509 *x509, const EVP_MD *md, uint8_t *out,
+                               unsigned *out_len);
+
+// X509_CRL_digest hashes |crl|'s DER encoding with |md| and writes the result
+// to |out|. |EVP_MD_CTX_size| bytes are written, which is at most
+// |EVP_MAX_MD_SIZE|. If |out_len| is not NULL, |*out_len| is set to the number
+// of bytes written. This function returns one on success and zero on error.
+// Note this digest covers the entire CRL, not just the signed portion.
+OPENSSL_EXPORT int X509_CRL_digest(const X509_CRL *crl, const EVP_MD *md,
+                                   uint8_t *out, unsigned *out_len);
+
+// X509_REQ_digest hashes |req|'s DER encoding with |md| and writes the result
+// to |out|. |EVP_MD_CTX_size| bytes are written, which is at most
+// |EVP_MAX_MD_SIZE|. If |out_len| is not NULL, |*out_len| is set to the number
+// of bytes written. This function returns one on success and zero on error.
+// Note this digest covers the entire certificate request, not just the signed
+// portion.
+OPENSSL_EXPORT int X509_REQ_digest(const X509_REQ *req, const EVP_MD *md,
+                                   uint8_t *out, unsigned *out_len);
+
+// X509_NAME_digest hashes |name|'s DER encoding with |md| and writes the result
+// to |out|. |EVP_MD_CTX_size| bytes are written, which is at most
+// |EVP_MAX_MD_SIZE|. If |out_len| is not NULL, |*out_len| is set to the number
+// of bytes written. This function returns one on success and zero on error.
+OPENSSL_EXPORT int X509_NAME_digest(const X509_NAME *name, const EVP_MD *md,
+                                    uint8_t *out, unsigned *out_len);
 
 // X509_parse_from_buffer parses an X.509 structure from |buf| and returns a
 // fresh X509 or NULL on error. There must not be any trailing data in |buf|.
@@ -759,20 +932,6 @@
 
 OPENSSL_EXPORT int X509_PUBKEY_set(X509_PUBKEY **x, EVP_PKEY *pkey);
 OPENSSL_EXPORT EVP_PKEY *X509_PUBKEY_get(X509_PUBKEY *key);
-OPENSSL_EXPORT int i2d_PUBKEY(const EVP_PKEY *a, unsigned char **pp);
-OPENSSL_EXPORT EVP_PKEY *d2i_PUBKEY(EVP_PKEY **a, const unsigned char **pp,
-                                    long length);
-OPENSSL_EXPORT int i2d_RSA_PUBKEY(const RSA *a, unsigned char **pp);
-OPENSSL_EXPORT RSA *d2i_RSA_PUBKEY(RSA **a, const unsigned char **pp,
-                                   long length);
-#ifndef OPENSSL_NO_DSA
-OPENSSL_EXPORT int i2d_DSA_PUBKEY(const DSA *a, unsigned char **pp);
-OPENSSL_EXPORT DSA *d2i_DSA_PUBKEY(DSA **a, const unsigned char **pp,
-                                   long length);
-#endif
-OPENSSL_EXPORT int i2d_EC_PUBKEY(const EC_KEY *a, unsigned char **pp);
-OPENSSL_EXPORT EC_KEY *d2i_EC_PUBKEY(EC_KEY **a, const unsigned char **pp,
-                                     long length);
 
 DECLARE_ASN1_FUNCTIONS(X509_SIG)
 DECLARE_ASN1_FUNCTIONS(X509_REQ_INFO)
@@ -809,14 +968,58 @@
 OPENSSL_EXPORT X509 *d2i_X509_AUX(X509 **a, const unsigned char **pp,
                                   long length);
 
-OPENSSL_EXPORT int i2d_re_X509_tbs(X509 *x, unsigned char **pp);
+// i2d_re_X509_tbs serializes the TBSCertificate portion of |x509|. If |outp| is
+// NULL, nothing is written. Otherwise, if |*outp| is not NULL, the result is
+// written to |*outp|, which must have enough space available, and |*outp| is
+// advanced just past the output. If |outp| is non-NULL and |*outp| is NULL, it
+// sets |*outp| to a newly-allocated buffer containing the result. The caller is
+// responsible for releasing the buffer with |OPENSSL_free|. In all cases, this
+// function returns the number of bytes in the result, whether written or not,
+// or a negative value on error.
+//
+// This function re-encodes the TBSCertificate and may not reflect |x509|'s
+// original encoding. It may be used to manually generate a signature for a new
+// certificate. To verify certificates, use |i2d_X509_tbs| instead.
+OPENSSL_EXPORT int i2d_re_X509_tbs(X509 *x509, unsigned char **outp);
+
+// i2d_X509_tbs serializes the TBSCertificate portion of |x509|. If |outp| is
+// NULL, nothing is written. Otherwise, if |*outp| is not NULL, the result is
+// written to |*outp|, which must have enough space available, and |*outp| is
+// advanced just past the output. If |outp| is non-NULL and |*outp| is NULL, it
+// sets |*outp| to a newly-allocated buffer containing the result. The caller is
+// responsible for releasing the buffer with |OPENSSL_free|. In all cases, this
+// function returns the number of bytes in the result, whether written or not,
+// or a negative value on error.
+//
+// This function preserves the original encoding of the TBSCertificate and may
+// not reflect modifications made to |x509|. It may be used to manually verify
+// the signature of an existing certificate. To generate certificates, use
+// |i2d_re_X509_tbs| instead.
+OPENSSL_EXPORT int i2d_X509_tbs(X509 *x509, unsigned char **outp);
+
+// X509_set1_signature_algo sets |x509|'s signature algorithm to |algo| and
+// returns one on success or zero on error. It updates both the signature field
+// of the TBSCertificate structure, and the signatureAlgorithm field of the
+// Certificate.
+OPENSSL_EXPORT int X509_set1_signature_algo(X509 *x509, const X509_ALGOR *algo);
+
+// X509_set1_signature_value sets |x509|'s signature to a copy of the |sig_len|
+// bytes pointed by |sig|. It returns one on success and zero on error.
+//
+// Due to a specification error, X.509 certificates store signatures in ASN.1
+// BIT STRINGs, but signature algorithms return byte strings rather than bit
+// strings. This function creates a BIT STRING containing a whole number of
+// bytes, with the bit order matching the DER encoding. This matches the
+// encoding used by all X.509 signature algorithms.
+OPENSSL_EXPORT int X509_set1_signature_value(X509 *x509, const uint8_t *sig,
+                                             size_t sig_len);
 
 OPENSSL_EXPORT void X509_get0_signature(const ASN1_BIT_STRING **psig,
                                         const X509_ALGOR **palg, const X509 *x);
 OPENSSL_EXPORT int X509_get_signature_nid(const X509 *x);
 
-OPENSSL_EXPORT int X509_alias_set1(X509 *x, unsigned char *name, int len);
-OPENSSL_EXPORT int X509_keyid_set1(X509 *x, unsigned char *id, int len);
+OPENSSL_EXPORT int X509_alias_set1(X509 *x, const unsigned char *name, int len);
+OPENSSL_EXPORT int X509_keyid_set1(X509 *x, const unsigned char *id, int len);
 OPENSSL_EXPORT unsigned char *X509_alias_get0(X509 *x, int *len);
 OPENSSL_EXPORT unsigned char *X509_keyid_get0(X509 *x, int *len);
 OPENSSL_EXPORT int (*X509_TRUST_set_default(int (*trust)(int, X509 *,
@@ -844,10 +1047,9 @@
 DECLARE_ASN1_FUNCTIONS(NETSCAPE_SPKI)
 DECLARE_ASN1_FUNCTIONS(NETSCAPE_SPKAC)
 
-#ifndef OPENSSL_NO_EVP
 OPENSSL_EXPORT X509_INFO *X509_INFO_new(void);
 OPENSSL_EXPORT void X509_INFO_free(X509_INFO *a);
-OPENSSL_EXPORT char *X509_NAME_oneline(X509_NAME *a, char *buf, int size);
+OPENSSL_EXPORT char *X509_NAME_oneline(const X509_NAME *a, char *buf, int size);
 
 OPENSSL_EXPORT int ASN1_digest(i2d_of_void *i2d, const EVP_MD *type, char *data,
                                unsigned char *md, unsigned int *len);
@@ -868,25 +1070,19 @@
                                       X509_ALGOR *algor2,
                                       ASN1_BIT_STRING *signature, void *asn,
                                       EVP_MD_CTX *ctx);
-#endif
 
 OPENSSL_EXPORT int X509_set_version(X509 *x, long version);
 OPENSSL_EXPORT int X509_set_serialNumber(X509 *x, ASN1_INTEGER *serial);
 OPENSSL_EXPORT ASN1_INTEGER *X509_get_serialNumber(X509 *x);
 OPENSSL_EXPORT int X509_set_issuer_name(X509 *x, X509_NAME *name);
-OPENSSL_EXPORT X509_NAME *X509_get_issuer_name(X509 *a);
+OPENSSL_EXPORT X509_NAME *X509_get_issuer_name(const X509 *a);
 OPENSSL_EXPORT int X509_set_subject_name(X509 *x, X509_NAME *name);
-OPENSSL_EXPORT X509_NAME *X509_get_subject_name(X509 *a);
-OPENSSL_EXPORT int X509_set_notBefore(X509 *x, const ASN1_TIME *tm);
-OPENSSL_EXPORT const ASN1_TIME *X509_get0_notBefore(const X509 *x);
-OPENSSL_EXPORT ASN1_TIME *X509_getm_notBefore(X509 *x);
-OPENSSL_EXPORT int X509_set_notAfter(X509 *x, const ASN1_TIME *tm);
-OPENSSL_EXPORT const ASN1_TIME *X509_get0_notAfter(const X509 *x);
-OPENSSL_EXPORT ASN1_TIME *X509_getm_notAfter(X509 *x);
+OPENSSL_EXPORT X509_NAME *X509_get_subject_name(const X509 *a);
 OPENSSL_EXPORT int X509_set_pubkey(X509 *x, EVP_PKEY *pkey);
 OPENSSL_EXPORT EVP_PKEY *X509_get_pubkey(X509 *x);
 OPENSSL_EXPORT ASN1_BIT_STRING *X509_get0_pubkey_bitstr(const X509 *x);
-OPENSSL_EXPORT STACK_OF(X509_EXTENSION) * X509_get0_extensions(const X509 *x);
+OPENSSL_EXPORT const STACK_OF(X509_EXTENSION) *X509_get0_extensions(
+    const X509 *x);
 OPENSSL_EXPORT const X509_ALGOR *X509_get0_tbs_sigalg(const X509 *x);
 
 OPENSSL_EXPORT int X509_REQ_set_version(X509_REQ *x, long version);
@@ -901,13 +1097,12 @@
 OPENSSL_EXPORT int X509_REQ_extension_nid(int nid);
 OPENSSL_EXPORT const int *X509_REQ_get_extension_nids(void);
 OPENSSL_EXPORT void X509_REQ_set_extension_nids(const int *nids);
-OPENSSL_EXPORT STACK_OF(X509_EXTENSION) *
-    X509_REQ_get_extensions(X509_REQ *req);
+OPENSSL_EXPORT STACK_OF(X509_EXTENSION) *X509_REQ_get_extensions(X509_REQ *req);
 OPENSSL_EXPORT int X509_REQ_add_extensions_nid(X509_REQ *req,
-                                               STACK_OF(X509_EXTENSION) * exts,
+                                               STACK_OF(X509_EXTENSION) *exts,
                                                int nid);
 OPENSSL_EXPORT int X509_REQ_add_extensions(X509_REQ *req,
-                                           STACK_OF(X509_EXTENSION) * exts);
+                                           STACK_OF(X509_EXTENSION) *exts);
 OPENSSL_EXPORT int X509_REQ_get_attr_count(const X509_REQ *req);
 OPENSSL_EXPORT int X509_REQ_get_attr_by_NID(const X509_REQ *req, int nid,
                                             int lastpos);
@@ -930,8 +1125,6 @@
 
 OPENSSL_EXPORT int X509_CRL_set_version(X509_CRL *x, long version);
 OPENSSL_EXPORT int X509_CRL_set_issuer_name(X509_CRL *x, X509_NAME *name);
-OPENSSL_EXPORT int X509_CRL_set_lastUpdate(X509_CRL *x, const ASN1_TIME *tm);
-OPENSSL_EXPORT int X509_CRL_set_nextUpdate(X509_CRL *x, const ASN1_TIME *tm);
 OPENSSL_EXPORT int X509_CRL_sort(X509_CRL *crl);
 OPENSSL_EXPORT int X509_CRL_up_ref(X509_CRL *crl);
 
@@ -939,16 +1132,59 @@
                                             const ASN1_BIT_STRING **psig,
                                             const X509_ALGOR **palg);
 OPENSSL_EXPORT int X509_CRL_get_signature_nid(const X509_CRL *crl);
-OPENSSL_EXPORT int i2d_re_X509_CRL_tbs(X509_CRL *req, unsigned char **pp);
 
+// i2d_re_X509_CRL_tbs serializes the TBSCertList portion of |crl|. If |outp| is
+// NULL, nothing is written. Otherwise, if |*outp| is not NULL, the result is
+// written to |*outp|, which must have enough space available, and |*outp| is
+// advanced just past the output. If |outp| is non-NULL and |*outp| is NULL, it
+// sets |*outp| to a newly-allocated buffer containing the result. The caller is
+// responsible for releasing the buffer with |OPENSSL_free|. In all cases, this
+// function returns the number of bytes in the result, whether written or not,
+// or a negative value on error.
+//
+// This function re-encodes the TBSCertList and may not reflect |crl|'s original
+// encoding. It may be used to manually generate a signature for a new CRL. To
+// verify CRLs, use |i2d_X509_CRL_tbs| instead.
+OPENSSL_EXPORT int i2d_re_X509_CRL_tbs(X509_CRL *crl, unsigned char **outp);
+
+// i2d_X509_CRL_tbs serializes the TBSCertList portion of |crl|. If |outp| is
+// NULL, nothing is written. Otherwise, if |*outp| is not NULL, the result is
+// written to |*outp|, which must have enough space available, and |*outp| is
+// advanced just past the output. If |outp| is non-NULL and |*outp| is NULL, it
+// sets |*outp| to a newly-allocated buffer containing the result. The caller is
+// responsible for releasing the buffer with |OPENSSL_free|. In all cases, this
+// function returns the number of bytes in the result, whether written or not,
+// or a negative value on error.
+//
+// This function preserves the original encoding of the TBSCertList and may not
+// reflect modifications made to |crl|. It may be used to manually verify the
+// signature of an existing CRL. To generate CRLs, use |i2d_re_X509_CRL_tbs|
+// instead.
+OPENSSL_EXPORT int i2d_X509_CRL_tbs(X509_CRL *crl, unsigned char **outp);
+
+// X509_REVOKED_get0_serialNumber returns the serial number of the certificate
+// revoked by |revoked|.
 OPENSSL_EXPORT const ASN1_INTEGER *X509_REVOKED_get0_serialNumber(
-    const X509_REVOKED *x);
-OPENSSL_EXPORT int X509_REVOKED_set_serialNumber(X509_REVOKED *x,
-                                                 ASN1_INTEGER *serial);
+    const X509_REVOKED *revoked);
+
+// X509_REVOKED_set_serialNumber sets |revoked|'s serial number to |serial|. It
+// returns one on success or zero on error.
+OPENSSL_EXPORT int X509_REVOKED_set_serialNumber(X509_REVOKED *revoked,
+                                                 const ASN1_INTEGER *serial);
+
+// X509_REVOKED_get0_revocationDate returns the revocation time of the
+// certificate revoked by |revoked|.
 OPENSSL_EXPORT const ASN1_TIME *X509_REVOKED_get0_revocationDate(
-    const X509_REVOKED *x);
-OPENSSL_EXPORT int X509_REVOKED_set_revocationDate(X509_REVOKED *r,
-                                                   ASN1_TIME *tm);
+    const X509_REVOKED *revoked);
+
+// X509_REVOKED_set_revocationDate sets |revoked|'s revocation time to |tm|. It
+// returns one on success or zero on error.
+OPENSSL_EXPORT int X509_REVOKED_set_revocationDate(X509_REVOKED *revoked,
+                                                   const ASN1_TIME *tm);
+
+// X509_REVOKED_get0_extensions returns |r|'s extensions.
+OPENSSL_EXPORT const STACK_OF(X509_EXTENSION) *X509_REVOKED_get0_extensions(
+    const X509_REVOKED *r);
 
 OPENSSL_EXPORT X509_CRL *X509_CRL_diff(X509_CRL *base, X509_CRL *newer,
                                        EVP_PKEY *skey, const EVP_MD *md,
@@ -958,11 +1194,11 @@
 
 OPENSSL_EXPORT int X509_check_private_key(X509 *x509, const EVP_PKEY *pkey);
 OPENSSL_EXPORT int X509_chain_check_suiteb(int *perror_depth, X509 *x,
-                                           STACK_OF(X509) * chain,
+                                           STACK_OF(X509) *chain,
                                            unsigned long flags);
 OPENSSL_EXPORT int X509_CRL_check_suiteb(X509_CRL *crl, EVP_PKEY *pk,
                                          unsigned long flags);
-OPENSSL_EXPORT STACK_OF(X509) * X509_chain_up_ref(STACK_OF(X509) * chain);
+OPENSSL_EXPORT STACK_OF(X509) *X509_chain_up_ref(STACK_OF(X509) *chain);
 
 OPENSSL_EXPORT int X509_issuer_and_serial_cmp(const X509 *a, const X509 *b);
 OPENSSL_EXPORT unsigned long X509_issuer_and_serial_hash(X509 *a);
@@ -989,12 +1225,12 @@
 OPENSSL_EXPORT int X509_print_fp(FILE *bp, X509 *x);
 OPENSSL_EXPORT int X509_CRL_print_fp(FILE *bp, X509_CRL *x);
 OPENSSL_EXPORT int X509_REQ_print_fp(FILE *bp, X509_REQ *req);
-OPENSSL_EXPORT int X509_NAME_print_ex_fp(FILE *fp, X509_NAME *nm, int indent,
-                                         unsigned long flags);
+OPENSSL_EXPORT int X509_NAME_print_ex_fp(FILE *fp, const X509_NAME *nm,
+                                         int indent, unsigned long flags);
 #endif
 
-OPENSSL_EXPORT int X509_NAME_print(BIO *bp, X509_NAME *name, int obase);
-OPENSSL_EXPORT int X509_NAME_print_ex(BIO *out, X509_NAME *nm, int indent,
+OPENSSL_EXPORT int X509_NAME_print(BIO *bp, const X509_NAME *name, int obase);
+OPENSSL_EXPORT int X509_NAME_print_ex(BIO *out, const X509_NAME *nm, int indent,
                                       unsigned long flags);
 OPENSSL_EXPORT int X509_print_ex(BIO *bp, X509 *x, unsigned long nmflag,
                                  unsigned long cflag);
@@ -1006,21 +1242,22 @@
                                      unsigned long cflag);
 OPENSSL_EXPORT int X509_REQ_print(BIO *bp, X509_REQ *req);
 
-OPENSSL_EXPORT int X509_NAME_entry_count(X509_NAME *name);
-OPENSSL_EXPORT int X509_NAME_get_text_by_NID(X509_NAME *name, int nid,
+OPENSSL_EXPORT int X509_NAME_entry_count(const X509_NAME *name);
+OPENSSL_EXPORT int X509_NAME_get_text_by_NID(const X509_NAME *name, int nid,
                                              char *buf, int len);
-OPENSSL_EXPORT int X509_NAME_get_text_by_OBJ(X509_NAME *name,
+OPENSSL_EXPORT int X509_NAME_get_text_by_OBJ(const X509_NAME *name,
                                              const ASN1_OBJECT *obj, char *buf,
                                              int len);
 
 // NOTE: you should be passsing -1, not 0 as lastpos.  The functions that use
 // lastpos, search after that position on.
-OPENSSL_EXPORT int X509_NAME_get_index_by_NID(X509_NAME *name, int nid,
+OPENSSL_EXPORT int X509_NAME_get_index_by_NID(const X509_NAME *name, int nid,
                                               int lastpos);
-OPENSSL_EXPORT int X509_NAME_get_index_by_OBJ(X509_NAME *name,
+OPENSSL_EXPORT int X509_NAME_get_index_by_OBJ(const X509_NAME *name,
                                               const ASN1_OBJECT *obj,
                                               int lastpos);
-OPENSSL_EXPORT X509_NAME_ENTRY *X509_NAME_get_entry(X509_NAME *name, int loc);
+OPENSSL_EXPORT X509_NAME_ENTRY *X509_NAME_get_entry(const X509_NAME *name,
+                                                    int loc);
 OPENSSL_EXPORT X509_NAME_ENTRY *X509_NAME_delete_entry(X509_NAME *name,
                                                        int loc);
 OPENSSL_EXPORT int X509_NAME_add_entry(X509_NAME *name, X509_NAME_ENTRY *ne,
@@ -1051,105 +1288,148 @@
 OPENSSL_EXPORT int X509_NAME_ENTRY_set_data(X509_NAME_ENTRY *ne, int type,
                                             const unsigned char *bytes,
                                             int len);
-OPENSSL_EXPORT ASN1_OBJECT *X509_NAME_ENTRY_get_object(X509_NAME_ENTRY *ne);
-OPENSSL_EXPORT ASN1_STRING *X509_NAME_ENTRY_get_data(X509_NAME_ENTRY *ne);
+OPENSSL_EXPORT ASN1_OBJECT *X509_NAME_ENTRY_get_object(
+    const X509_NAME_ENTRY *ne);
+OPENSSL_EXPORT ASN1_STRING *X509_NAME_ENTRY_get_data(const X509_NAME_ENTRY *ne);
 
-OPENSSL_EXPORT int X509v3_get_ext_count(const STACK_OF(X509_EXTENSION) * x);
-OPENSSL_EXPORT int X509v3_get_ext_by_NID(const STACK_OF(X509_EXTENSION) * x,
+OPENSSL_EXPORT int X509v3_get_ext_count(const STACK_OF(X509_EXTENSION) *x);
+OPENSSL_EXPORT int X509v3_get_ext_by_NID(const STACK_OF(X509_EXTENSION) *x,
                                          int nid, int lastpos);
-OPENSSL_EXPORT int X509v3_get_ext_by_OBJ(const STACK_OF(X509_EXTENSION) * x,
+OPENSSL_EXPORT int X509v3_get_ext_by_OBJ(const STACK_OF(X509_EXTENSION) *x,
                                          const ASN1_OBJECT *obj, int lastpos);
-OPENSSL_EXPORT int X509v3_get_ext_by_critical(const STACK_OF(X509_EXTENSION) *
-                                                  x,
+OPENSSL_EXPORT int X509v3_get_ext_by_critical(const STACK_OF(X509_EXTENSION) *x,
                                               int crit, int lastpos);
-OPENSSL_EXPORT X509_EXTENSION *X509v3_get_ext(const STACK_OF(X509_EXTENSION) *
-                                                  x,
+OPENSSL_EXPORT X509_EXTENSION *X509v3_get_ext(const STACK_OF(X509_EXTENSION) *x,
                                               int loc);
-OPENSSL_EXPORT X509_EXTENSION *X509v3_delete_ext(STACK_OF(X509_EXTENSION) * x,
+OPENSSL_EXPORT X509_EXTENSION *X509v3_delete_ext(STACK_OF(X509_EXTENSION) *x,
                                                  int loc);
-OPENSSL_EXPORT STACK_OF(X509_EXTENSION) *
-    X509v3_add_ext(STACK_OF(X509_EXTENSION) * *x, X509_EXTENSION *ex, int loc);
+OPENSSL_EXPORT STACK_OF(X509_EXTENSION) *X509v3_add_ext(
+    STACK_OF(X509_EXTENSION) **x, X509_EXTENSION *ex, int loc);
 
-OPENSSL_EXPORT int X509_get_ext_count(X509 *x);
-OPENSSL_EXPORT int X509_get_ext_by_NID(X509 *x, int nid, int lastpos);
-OPENSSL_EXPORT int X509_get_ext_by_OBJ(X509 *x, ASN1_OBJECT *obj, int lastpos);
-OPENSSL_EXPORT int X509_get_ext_by_critical(X509 *x, int crit, int lastpos);
-OPENSSL_EXPORT X509_EXTENSION *X509_get_ext(X509 *x, int loc);
+OPENSSL_EXPORT int X509_get_ext_count(const X509 *x);
+OPENSSL_EXPORT int X509_get_ext_by_NID(const X509 *x, int nid, int lastpos);
+OPENSSL_EXPORT int X509_get_ext_by_OBJ(const X509 *x, const ASN1_OBJECT *obj,
+                                       int lastpos);
+OPENSSL_EXPORT int X509_get_ext_by_critical(const X509 *x, int crit,
+                                            int lastpos);
+OPENSSL_EXPORT X509_EXTENSION *X509_get_ext(const X509 *x, int loc);
 OPENSSL_EXPORT X509_EXTENSION *X509_delete_ext(X509 *x, int loc);
 OPENSSL_EXPORT int X509_add_ext(X509 *x, X509_EXTENSION *ex, int loc);
-OPENSSL_EXPORT void *X509_get_ext_d2i(X509 *x, int nid, int *crit, int *idx);
+
+// X509_get_ext_d2i behaves like |X509V3_get_d2i| but looks for the extension in
+// |x509|'s extension list.
+//
+// WARNING: This function is difficult to use correctly. See the documentation
+// for |X509V3_get_d2i| for details.
+OPENSSL_EXPORT void *X509_get_ext_d2i(const X509 *x509, int nid,
+                                      int *out_critical, int *out_idx);
+
+// X509_add1_ext_i2d behaves like |X509V3_add1_i2d| but adds the extension to
+// |x|'s extension list.
+//
+// WARNING: This function may return zero or -1 on error. The caller must also
+// ensure |value|'s type matches |nid|. See the documentation for
+// |X509V3_add1_i2d| for details.
 OPENSSL_EXPORT int X509_add1_ext_i2d(X509 *x, int nid, void *value, int crit,
                                      unsigned long flags);
 
-OPENSSL_EXPORT int X509_CRL_get_ext_count(X509_CRL *x);
-OPENSSL_EXPORT int X509_CRL_get_ext_by_NID(X509_CRL *x, int nid, int lastpos);
-OPENSSL_EXPORT int X509_CRL_get_ext_by_OBJ(X509_CRL *x, ASN1_OBJECT *obj,
+OPENSSL_EXPORT int X509_CRL_get_ext_count(const X509_CRL *x);
+OPENSSL_EXPORT int X509_CRL_get_ext_by_NID(const X509_CRL *x, int nid,
                                            int lastpos);
-OPENSSL_EXPORT int X509_CRL_get_ext_by_critical(X509_CRL *x, int crit,
+OPENSSL_EXPORT int X509_CRL_get_ext_by_OBJ(const X509_CRL *x,
+                                           const ASN1_OBJECT *obj, int lastpos);
+OPENSSL_EXPORT int X509_CRL_get_ext_by_critical(const X509_CRL *x, int crit,
                                                 int lastpos);
-OPENSSL_EXPORT X509_EXTENSION *X509_CRL_get_ext(X509_CRL *x, int loc);
+OPENSSL_EXPORT X509_EXTENSION *X509_CRL_get_ext(const X509_CRL *x, int loc);
 OPENSSL_EXPORT X509_EXTENSION *X509_CRL_delete_ext(X509_CRL *x, int loc);
 OPENSSL_EXPORT int X509_CRL_add_ext(X509_CRL *x, X509_EXTENSION *ex, int loc);
-OPENSSL_EXPORT void *X509_CRL_get_ext_d2i(X509_CRL *x, int nid, int *crit,
-                                          int *idx);
+
+// X509_CRL_get_ext_d2i behaves like |X509V3_get_d2i| but looks for the
+// extension in |crl|'s extension list.
+//
+// WARNING: This function is difficult to use correctly. See the documentation
+// for |X509V3_get_d2i| for details.
+OPENSSL_EXPORT void *X509_CRL_get_ext_d2i(const X509_CRL *crl, int nid,
+                                          int *out_critical, int *out_idx);
+
+// X509_CRL_add1_ext_i2d behaves like |X509V3_add1_i2d| but adds the extension
+// to |x|'s extension list.
+//
+// WARNING: This function may return zero or -1 on error. The caller must also
+// ensure |value|'s type matches |nid|. See the documentation for
+// |X509V3_add1_i2d| for details.
 OPENSSL_EXPORT int X509_CRL_add1_ext_i2d(X509_CRL *x, int nid, void *value,
                                          int crit, unsigned long flags);
 
-OPENSSL_EXPORT int X509_REVOKED_get_ext_count(X509_REVOKED *x);
-OPENSSL_EXPORT int X509_REVOKED_get_ext_by_NID(X509_REVOKED *x, int nid,
+OPENSSL_EXPORT int X509_REVOKED_get_ext_count(const X509_REVOKED *x);
+OPENSSL_EXPORT int X509_REVOKED_get_ext_by_NID(const X509_REVOKED *x, int nid,
                                                int lastpos);
-OPENSSL_EXPORT int X509_REVOKED_get_ext_by_OBJ(X509_REVOKED *x,
-                                               ASN1_OBJECT *obj, int lastpos);
-OPENSSL_EXPORT int X509_REVOKED_get_ext_by_critical(X509_REVOKED *x, int crit,
-                                                    int lastpos);
-OPENSSL_EXPORT X509_EXTENSION *X509_REVOKED_get_ext(X509_REVOKED *x, int loc);
+OPENSSL_EXPORT int X509_REVOKED_get_ext_by_OBJ(const X509_REVOKED *x,
+                                               const ASN1_OBJECT *obj,
+                                               int lastpos);
+OPENSSL_EXPORT int X509_REVOKED_get_ext_by_critical(const X509_REVOKED *x,
+                                                    int crit, int lastpos);
+OPENSSL_EXPORT X509_EXTENSION *X509_REVOKED_get_ext(const X509_REVOKED *x,
+                                                    int loc);
 OPENSSL_EXPORT X509_EXTENSION *X509_REVOKED_delete_ext(X509_REVOKED *x,
                                                        int loc);
 OPENSSL_EXPORT int X509_REVOKED_add_ext(X509_REVOKED *x, X509_EXTENSION *ex,
                                         int loc);
-OPENSSL_EXPORT void *X509_REVOKED_get_ext_d2i(X509_REVOKED *x, int nid,
-                                              int *crit, int *idx);
+
+// X509_REVOKED_get_ext_d2i behaves like |X509V3_get_d2i| but looks for the
+// extension in |revoked|'s extension list.
+//
+// WARNING: This function is difficult to use correctly. See the documentation
+// for |X509V3_get_d2i| for details.
+OPENSSL_EXPORT void *X509_REVOKED_get_ext_d2i(const X509_REVOKED *revoked,
+                                              int nid, int *out_critical,
+                                              int *out_idx);
+
+// X509_REVOKED_add1_ext_i2d behaves like |X509V3_add1_i2d| but adds the
+// extension to |x|'s extension list.
+//
+// WARNING: This function may return zero or -1 on error. The caller must also
+// ensure |value|'s type matches |nid|. See the documentation for
+// |X509V3_add1_i2d| for details.
 OPENSSL_EXPORT int X509_REVOKED_add1_ext_i2d(X509_REVOKED *x, int nid,
                                              void *value, int crit,
                                              unsigned long flags);
 
 OPENSSL_EXPORT X509_EXTENSION *X509_EXTENSION_create_by_NID(
-    X509_EXTENSION **ex, int nid, int crit, ASN1_OCTET_STRING *data);
+    X509_EXTENSION **ex, int nid, int crit, const ASN1_OCTET_STRING *data);
 OPENSSL_EXPORT X509_EXTENSION *X509_EXTENSION_create_by_OBJ(
     X509_EXTENSION **ex, const ASN1_OBJECT *obj, int crit,
-    ASN1_OCTET_STRING *data);
+    const ASN1_OCTET_STRING *data);
 OPENSSL_EXPORT int X509_EXTENSION_set_object(X509_EXTENSION *ex,
                                              const ASN1_OBJECT *obj);
 OPENSSL_EXPORT int X509_EXTENSION_set_critical(X509_EXTENSION *ex, int crit);
 OPENSSL_EXPORT int X509_EXTENSION_set_data(X509_EXTENSION *ex,
-                                           ASN1_OCTET_STRING *data);
+                                           const ASN1_OCTET_STRING *data);
 OPENSSL_EXPORT ASN1_OBJECT *X509_EXTENSION_get_object(X509_EXTENSION *ex);
 OPENSSL_EXPORT ASN1_OCTET_STRING *X509_EXTENSION_get_data(X509_EXTENSION *ne);
 OPENSSL_EXPORT int X509_EXTENSION_get_critical(X509_EXTENSION *ex);
 
-OPENSSL_EXPORT int X509at_get_attr_count(const STACK_OF(X509_ATTRIBUTE) * x);
-OPENSSL_EXPORT int X509at_get_attr_by_NID(const STACK_OF(X509_ATTRIBUTE) * x,
+OPENSSL_EXPORT int X509at_get_attr_count(const STACK_OF(X509_ATTRIBUTE) *x);
+OPENSSL_EXPORT int X509at_get_attr_by_NID(const STACK_OF(X509_ATTRIBUTE) *x,
                                           int nid, int lastpos);
-OPENSSL_EXPORT int X509at_get_attr_by_OBJ(const STACK_OF(X509_ATTRIBUTE) * sk,
+OPENSSL_EXPORT int X509at_get_attr_by_OBJ(const STACK_OF(X509_ATTRIBUTE) *sk,
                                           const ASN1_OBJECT *obj, int lastpos);
-OPENSSL_EXPORT X509_ATTRIBUTE *X509at_get_attr(const STACK_OF(X509_ATTRIBUTE) *
-                                                   x,
-                                               int loc);
-OPENSSL_EXPORT X509_ATTRIBUTE *X509at_delete_attr(STACK_OF(X509_ATTRIBUTE) * x,
+OPENSSL_EXPORT X509_ATTRIBUTE *X509at_get_attr(
+    const STACK_OF(X509_ATTRIBUTE) *x, int loc);
+OPENSSL_EXPORT X509_ATTRIBUTE *X509at_delete_attr(STACK_OF(X509_ATTRIBUTE) *x,
                                                   int loc);
-OPENSSL_EXPORT STACK_OF(X509_ATTRIBUTE) *
-    X509at_add1_attr(STACK_OF(X509_ATTRIBUTE) * *x, X509_ATTRIBUTE *attr);
-OPENSSL_EXPORT STACK_OF(X509_ATTRIBUTE) *
-    X509at_add1_attr_by_OBJ(STACK_OF(X509_ATTRIBUTE) * *x,
-                            const ASN1_OBJECT *obj, int type,
-                            const unsigned char *bytes, int len);
-OPENSSL_EXPORT STACK_OF(X509_ATTRIBUTE) *
-    X509at_add1_attr_by_NID(STACK_OF(X509_ATTRIBUTE) * *x, int nid, int type,
-                            const unsigned char *bytes, int len);
-OPENSSL_EXPORT STACK_OF(X509_ATTRIBUTE) *
-    X509at_add1_attr_by_txt(STACK_OF(X509_ATTRIBUTE) * *x, const char *attrname,
-                            int type, const unsigned char *bytes, int len);
-OPENSSL_EXPORT void *X509at_get0_data_by_OBJ(STACK_OF(X509_ATTRIBUTE) * x,
+OPENSSL_EXPORT STACK_OF(X509_ATTRIBUTE) *X509at_add1_attr(
+    STACK_OF(X509_ATTRIBUTE) **x, X509_ATTRIBUTE *attr);
+OPENSSL_EXPORT STACK_OF(X509_ATTRIBUTE) *X509at_add1_attr_by_OBJ(
+    STACK_OF(X509_ATTRIBUTE) **x, const ASN1_OBJECT *obj, int type,
+    const unsigned char *bytes, int len);
+OPENSSL_EXPORT STACK_OF(X509_ATTRIBUTE) *X509at_add1_attr_by_NID(
+    STACK_OF(X509_ATTRIBUTE) **x, int nid, int type, const unsigned char *bytes,
+    int len);
+OPENSSL_EXPORT STACK_OF(X509_ATTRIBUTE) *X509at_add1_attr_by_txt(
+    STACK_OF(X509_ATTRIBUTE) **x, const char *attrname, int type,
+    const unsigned char *bytes, int len);
+OPENSSL_EXPORT void *X509at_get0_data_by_OBJ(STACK_OF(X509_ATTRIBUTE) *x,
                                              ASN1_OBJECT *obj, int lastpos,
                                              int type);
 OPENSSL_EXPORT X509_ATTRIBUTE *X509_ATTRIBUTE_create_by_NID(
@@ -1174,10 +1454,10 @@
 OPENSSL_EXPORT int X509_verify_cert(X509_STORE_CTX *ctx);
 
 // lookup a cert from a X509 STACK
-OPENSSL_EXPORT X509 *X509_find_by_issuer_and_serial(STACK_OF(X509) * sk,
+OPENSSL_EXPORT X509 *X509_find_by_issuer_and_serial(STACK_OF(X509) *sk,
                                                     X509_NAME *name,
                                                     ASN1_INTEGER *serial);
-OPENSSL_EXPORT X509 *X509_find_by_subject(STACK_OF(X509) * sk, X509_NAME *name);
+OPENSSL_EXPORT X509 *X509_find_by_subject(STACK_OF(X509) *sk, X509_NAME *name);
 
 // PKCS#8 utilities
 
@@ -1209,9 +1489,9 @@
                                   int (*ck)(X509_TRUST *, X509 *, int),
                                   char *name, int arg1, void *arg2);
 OPENSSL_EXPORT void X509_TRUST_cleanup(void);
-OPENSSL_EXPORT int X509_TRUST_get_flags(X509_TRUST *xp);
-OPENSSL_EXPORT char *X509_TRUST_get0_name(X509_TRUST *xp);
-OPENSSL_EXPORT int X509_TRUST_get_trust(X509_TRUST *xp);
+OPENSSL_EXPORT int X509_TRUST_get_flags(const X509_TRUST *xp);
+OPENSSL_EXPORT char *X509_TRUST_get0_name(const X509_TRUST *xp);
+OPENSSL_EXPORT int X509_TRUST_get_trust(const X509_TRUST *xp);
 
 
 typedef struct rsa_pss_params_st {
diff --git a/deps/boringssl/src/include/openssl/x509v3.h b/deps/boringssl/src/include/openssl/x509v3.h
index b5db715..2c9ba73 100644
--- a/deps/boringssl/src/include/openssl/x509v3.h
+++ b/deps/boringssl/src/include/openssl/x509v3.h
@@ -8,7 +8,7 @@
  * are met:
  *
  * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer. 
+ *    notice, this list of conditions and the following disclaimer.
  *
  * 2. Redistributions in binary form must reproduce the above copyright
  *    notice, this list of conditions and the following disclaimer in
@@ -57,158 +57,152 @@
 
 #include <openssl/bio.h>
 #include <openssl/conf.h>
-#include <openssl/x509.h>
 #include <openssl/lhash.h>
+#include <openssl/x509.h>
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
 
-/* Legacy X.509 library.
- *
- * This header is part of OpenSSL's X.509 implementation. It is retained for
- * compatibility but otherwise underdocumented and not actively maintained. In
- * the future, a replacement library will be available. Meanwhile, minimize
- * dependencies on this header where possible. */
+// Legacy X.509 library.
+//
+// This header is part of OpenSSL's X.509 implementation. It is retained for
+// compatibility but otherwise underdocumented and not actively maintained. In
+// the future, a replacement library will be available. Meanwhile, minimize
+// dependencies on this header where possible.
 
 
-/* Forward reference */
+// Forward reference
 struct v3_ext_method;
 struct v3_ext_ctx;
 
-/* Useful typedefs */
+// Useful typedefs
 
-typedef void * (*X509V3_EXT_NEW)(void);
+typedef void *(*X509V3_EXT_NEW)(void);
 typedef void (*X509V3_EXT_FREE)(void *);
-typedef void * (*X509V3_EXT_D2I)(void *, const unsigned char ** , long);
+typedef void *(*X509V3_EXT_D2I)(void *, const unsigned char **, long);
 typedef int (*X509V3_EXT_I2D)(void *, unsigned char **);
-typedef STACK_OF(CONF_VALUE) *
-  (*X509V3_EXT_I2V)(const struct v3_ext_method *method, void *ext,
-		    STACK_OF(CONF_VALUE) *extlist);
-typedef void * (*X509V3_EXT_V2I)(const struct v3_ext_method *method,
-				 struct v3_ext_ctx *ctx,
-				 STACK_OF(CONF_VALUE) *values);
-typedef char * (*X509V3_EXT_I2S)(const struct v3_ext_method *method, void *ext);
-typedef void * (*X509V3_EXT_S2I)(const struct v3_ext_method *method,
-				 struct v3_ext_ctx *ctx, const char *str);
+typedef STACK_OF(CONF_VALUE) *(*X509V3_EXT_I2V)(
+    const struct v3_ext_method *method, void *ext,
+    STACK_OF(CONF_VALUE) *extlist);
+typedef void *(*X509V3_EXT_V2I)(const struct v3_ext_method *method,
+                                struct v3_ext_ctx *ctx,
+                                STACK_OF(CONF_VALUE) *values);
+typedef char *(*X509V3_EXT_I2S)(const struct v3_ext_method *method, void *ext);
+typedef void *(*X509V3_EXT_S2I)(const struct v3_ext_method *method,
+                                struct v3_ext_ctx *ctx, const char *str);
 typedef int (*X509V3_EXT_I2R)(const struct v3_ext_method *method, void *ext,
-			      BIO *out, int indent);
-typedef void * (*X509V3_EXT_R2I)(const struct v3_ext_method *method,
-				 struct v3_ext_ctx *ctx, const char *str);
+                              BIO *out, int indent);
+typedef void *(*X509V3_EXT_R2I)(const struct v3_ext_method *method,
+                                struct v3_ext_ctx *ctx, const char *str);
 
-/* V3 extension structure */
+// V3 extension structure
 
 struct v3_ext_method {
-int ext_nid;
-int ext_flags;
-/* If this is set the following four fields are ignored */
-ASN1_ITEM_EXP *it;
-/* Old style ASN1 calls */
-X509V3_EXT_NEW ext_new;
-X509V3_EXT_FREE ext_free;
-X509V3_EXT_D2I d2i;
-X509V3_EXT_I2D i2d;
+  int ext_nid;
+  int ext_flags;
+  // If this is set the following four fields are ignored
+  ASN1_ITEM_EXP *it;
+  // Old style ASN1 calls
+  X509V3_EXT_NEW ext_new;
+  X509V3_EXT_FREE ext_free;
+  X509V3_EXT_D2I d2i;
+  X509V3_EXT_I2D i2d;
 
-/* The following pair is used for string extensions */
-X509V3_EXT_I2S i2s;
-X509V3_EXT_S2I s2i;
+  // The following pair is used for string extensions
+  X509V3_EXT_I2S i2s;
+  X509V3_EXT_S2I s2i;
 
-/* The following pair is used for multi-valued extensions */
-X509V3_EXT_I2V i2v;
-X509V3_EXT_V2I v2i;
+  // The following pair is used for multi-valued extensions
+  X509V3_EXT_I2V i2v;
+  X509V3_EXT_V2I v2i;
 
-/* The following are used for raw extensions */
-X509V3_EXT_I2R i2r;
-X509V3_EXT_R2I r2i;
+  // The following are used for raw extensions
+  X509V3_EXT_I2R i2r;
+  X509V3_EXT_R2I r2i;
 
-void *usr_data;	/* Any extension specific data */
+  void *usr_data;  // Any extension specific data
 };
 
 typedef struct X509V3_CONF_METHOD_st {
-char * (*get_string)(void *db, char *section, char *value);
-STACK_OF(CONF_VALUE) * (*get_section)(void *db, char *section);
-void (*free_string)(void *db, char * string);
-void (*free_section)(void *db, STACK_OF(CONF_VALUE) *section);
+  char *(*get_string)(void *db, const char *section, const char *value);
+  STACK_OF(CONF_VALUE) *(*get_section)(void *db, const char *section);
+  void (*free_string)(void *db, char *string);
+  void (*free_section)(void *db, STACK_OF(CONF_VALUE) *section);
 } X509V3_CONF_METHOD;
 
-/* Context specific info */
+// Context specific info
 struct v3_ext_ctx {
 #define CTX_TEST 0x1
-int flags;
-X509 *issuer_cert;
-X509 *subject_cert;
-X509_REQ *subject_req;
-X509_CRL *crl;
-const X509V3_CONF_METHOD *db_meth;
-void *db;
-/* Maybe more here */
+  int flags;
+  X509 *issuer_cert;
+  X509 *subject_cert;
+  X509_REQ *subject_req;
+  X509_CRL *crl;
+  const X509V3_CONF_METHOD *db_meth;
+  void *db;
+  // Maybe more here
 };
 
 typedef struct v3_ext_method X509V3_EXT_METHOD;
 
 DEFINE_STACK_OF(X509V3_EXT_METHOD)
 
-/* ext_flags values */
-#define X509V3_EXT_DYNAMIC	0x1
-#define X509V3_EXT_CTX_DEP	0x2
-#define X509V3_EXT_MULTILINE	0x4
+// ext_flags values
+#define X509V3_EXT_DYNAMIC 0x1
+#define X509V3_EXT_CTX_DEP 0x2
+#define X509V3_EXT_MULTILINE 0x4
 
 typedef BIT_STRING_BITNAME ENUMERATED_NAMES;
 
 struct BASIC_CONSTRAINTS_st {
-int ca;
-ASN1_INTEGER *pathlen;
+  int ca;
+  ASN1_INTEGER *pathlen;
 };
 
 
-typedef struct PKEY_USAGE_PERIOD_st {
-ASN1_GENERALIZEDTIME *notBefore;
-ASN1_GENERALIZEDTIME *notAfter;
-} PKEY_USAGE_PERIOD;
-
 typedef struct otherName_st {
-ASN1_OBJECT *type_id;
-ASN1_TYPE *value;
+  ASN1_OBJECT *type_id;
+  ASN1_TYPE *value;
 } OTHERNAME;
 
 typedef struct EDIPartyName_st {
-	ASN1_STRING *nameAssigner;
-	ASN1_STRING *partyName;
+  ASN1_STRING *nameAssigner;
+  ASN1_STRING *partyName;
 } EDIPARTYNAME;
 
 typedef struct GENERAL_NAME_st {
+#define GEN_OTHERNAME 0
+#define GEN_EMAIL 1
+#define GEN_DNS 2
+#define GEN_X400 3
+#define GEN_DIRNAME 4
+#define GEN_EDIPARTY 5
+#define GEN_URI 6
+#define GEN_IPADD 7
+#define GEN_RID 8
 
-#define GEN_OTHERNAME	0
-#define GEN_EMAIL	1
-#define GEN_DNS		2
-#define GEN_X400	3
-#define GEN_DIRNAME	4
-#define GEN_EDIPARTY	5
-#define GEN_URI		6
-#define GEN_IPADD	7
-#define GEN_RID		8
+  int type;
+  union {
+    char *ptr;
+    OTHERNAME *otherName;  // otherName
+    ASN1_IA5STRING *rfc822Name;
+    ASN1_IA5STRING *dNSName;
+    ASN1_TYPE *x400Address;
+    X509_NAME *directoryName;
+    EDIPARTYNAME *ediPartyName;
+    ASN1_IA5STRING *uniformResourceIdentifier;
+    ASN1_OCTET_STRING *iPAddress;
+    ASN1_OBJECT *registeredID;
 
-int type;
-union {
-	char *ptr;
-	OTHERNAME *otherName; /* otherName */
-	ASN1_IA5STRING *rfc822Name;
-	ASN1_IA5STRING *dNSName;
-	ASN1_TYPE *x400Address;
-	X509_NAME *directoryName;
-	EDIPARTYNAME *ediPartyName;
-	ASN1_IA5STRING *uniformResourceIdentifier;
-	ASN1_OCTET_STRING *iPAddress;
-	ASN1_OBJECT *registeredID;
-
-	/* Old names */
-	ASN1_OCTET_STRING *ip; /* iPAddress */
-	X509_NAME *dirn;		/* dirn */
-	ASN1_IA5STRING *ia5;/* rfc822Name, dNSName, uniformResourceIdentifier */
-	ASN1_OBJECT *rid; /* registeredID */
-	ASN1_TYPE *other; /* x400Address */
-} d;
+    // Old names
+    ASN1_OCTET_STRING *ip;  // iPAddress
+    X509_NAME *dirn;        // dirn
+    ASN1_IA5STRING *ia5;    // rfc822Name, dNSName, uniformResourceIdentifier
+    ASN1_OBJECT *rid;       // registeredID
+    ASN1_TYPE *other;       // x400Address
+  } d;
 } GENERAL_NAME;
 
 DEFINE_STACK_OF(GENERAL_NAME)
@@ -219,8 +213,8 @@
 DEFINE_STACK_OF(GENERAL_NAMES)
 
 typedef struct ACCESS_DESCRIPTION_st {
-	ASN1_OBJECT *method;
-	GENERAL_NAME *location;
+  ASN1_OBJECT *method;
+  GENERAL_NAME *location;
 } ACCESS_DESCRIPTION;
 
 DEFINE_STACK_OF(ACCESS_DESCRIPTION)
@@ -231,34 +225,34 @@
 typedef STACK_OF(ASN1_OBJECT) EXTENDED_KEY_USAGE;
 
 typedef struct DIST_POINT_NAME_st {
-int type;
-union {
-	GENERAL_NAMES *fullname;
-	STACK_OF(X509_NAME_ENTRY) *relativename;
-} name;
-/* If relativename then this contains the full distribution point name */
-X509_NAME *dpname;
+  int type;
+  union {
+    GENERAL_NAMES *fullname;
+    STACK_OF(X509_NAME_ENTRY) *relativename;
+  } name;
+  // If relativename then this contains the full distribution point name
+  X509_NAME *dpname;
 } DIST_POINT_NAME;
-/* All existing reasons */
-#define CRLDP_ALL_REASONS	0x807f
+// All existing reasons
+#define CRLDP_ALL_REASONS 0x807f
 
-#define CRL_REASON_NONE				(-1)
-#define CRL_REASON_UNSPECIFIED			0
-#define CRL_REASON_KEY_COMPROMISE		1
-#define CRL_REASON_CA_COMPROMISE		2
-#define CRL_REASON_AFFILIATION_CHANGED		3
-#define CRL_REASON_SUPERSEDED			4
-#define CRL_REASON_CESSATION_OF_OPERATION	5
-#define CRL_REASON_CERTIFICATE_HOLD		6
-#define CRL_REASON_REMOVE_FROM_CRL		8
-#define CRL_REASON_PRIVILEGE_WITHDRAWN		9
-#define CRL_REASON_AA_COMPROMISE		10
+#define CRL_REASON_NONE (-1)
+#define CRL_REASON_UNSPECIFIED 0
+#define CRL_REASON_KEY_COMPROMISE 1
+#define CRL_REASON_CA_COMPROMISE 2
+#define CRL_REASON_AFFILIATION_CHANGED 3
+#define CRL_REASON_SUPERSEDED 4
+#define CRL_REASON_CESSATION_OF_OPERATION 5
+#define CRL_REASON_CERTIFICATE_HOLD 6
+#define CRL_REASON_REMOVE_FROM_CRL 8
+#define CRL_REASON_PRIVILEGE_WITHDRAWN 9
+#define CRL_REASON_AA_COMPROMISE 10
 
 struct DIST_POINT_st {
-DIST_POINT_NAME	*distpoint;
-ASN1_BIT_STRING *reasons;
-GENERAL_NAMES *CRLissuer;
-int dp_reasons;
+  DIST_POINT_NAME *distpoint;
+  ASN1_BIT_STRING *reasons;
+  GENERAL_NAMES *CRLissuer;
+  int dp_reasons;
 };
 
 typedef STACK_OF(DIST_POINT) CRL_DIST_POINTS;
@@ -267,51 +261,36 @@
 DECLARE_ASN1_SET_OF(DIST_POINT)
 
 struct AUTHORITY_KEYID_st {
-ASN1_OCTET_STRING *keyid;
-GENERAL_NAMES *issuer;
-ASN1_INTEGER *serial;
+  ASN1_OCTET_STRING *keyid;
+  GENERAL_NAMES *issuer;
+  ASN1_INTEGER *serial;
 };
 
-/* Strong extranet structures */
-
-typedef struct SXNET_ID_st {
-	ASN1_INTEGER *zone;
-	ASN1_OCTET_STRING *user;
-} SXNETID;
-
-DEFINE_STACK_OF(SXNETID)
-DECLARE_ASN1_SET_OF(SXNETID)
-
-typedef struct SXNET_st {
-	ASN1_INTEGER *version;
-	STACK_OF(SXNETID) *ids;
-} SXNET;
-
 typedef struct NOTICEREF_st {
-	ASN1_STRING *organization;
-	STACK_OF(ASN1_INTEGER) *noticenos;
+  ASN1_STRING *organization;
+  STACK_OF(ASN1_INTEGER) *noticenos;
 } NOTICEREF;
 
 typedef struct USERNOTICE_st {
-	NOTICEREF *noticeref;
-	ASN1_STRING *exptext;
+  NOTICEREF *noticeref;
+  ASN1_STRING *exptext;
 } USERNOTICE;
 
 typedef struct POLICYQUALINFO_st {
-	ASN1_OBJECT *pqualid;
-	union {
-		ASN1_IA5STRING *cpsuri;
-		USERNOTICE *usernotice;
-		ASN1_TYPE *other;
-	} d;
+  ASN1_OBJECT *pqualid;
+  union {
+    ASN1_IA5STRING *cpsuri;
+    USERNOTICE *usernotice;
+    ASN1_TYPE *other;
+  } d;
 } POLICYQUALINFO;
 
 DEFINE_STACK_OF(POLICYQUALINFO)
 DECLARE_ASN1_SET_OF(POLICYQUALINFO)
 
 typedef struct POLICYINFO_st {
-	ASN1_OBJECT *policyid;
-	STACK_OF(POLICYQUALINFO) *qualifiers;
+  ASN1_OBJECT *policyid;
+  STACK_OF(POLICYQUALINFO) *qualifiers;
 } POLICYINFO;
 
 typedef STACK_OF(POLICYINFO) CERTIFICATEPOLICIES;
@@ -320,8 +299,8 @@
 DECLARE_ASN1_SET_OF(POLICYINFO)
 
 typedef struct POLICY_MAPPING_st {
-	ASN1_OBJECT *issuerDomainPolicy;
-	ASN1_OBJECT *subjectDomainPolicy;
+  ASN1_OBJECT *issuerDomainPolicy;
+  ASN1_OBJECT *subjectDomainPolicy;
 } POLICY_MAPPING;
 
 DEFINE_STACK_OF(POLICY_MAPPING)
@@ -329,246 +308,221 @@
 typedef STACK_OF(POLICY_MAPPING) POLICY_MAPPINGS;
 
 typedef struct GENERAL_SUBTREE_st {
-	GENERAL_NAME *base;
-	ASN1_INTEGER *minimum;
-	ASN1_INTEGER *maximum;
+  GENERAL_NAME *base;
+  ASN1_INTEGER *minimum;
+  ASN1_INTEGER *maximum;
 } GENERAL_SUBTREE;
 
 DEFINE_STACK_OF(GENERAL_SUBTREE)
 
 struct NAME_CONSTRAINTS_st {
-	STACK_OF(GENERAL_SUBTREE) *permittedSubtrees;
-	STACK_OF(GENERAL_SUBTREE) *excludedSubtrees;
+  STACK_OF(GENERAL_SUBTREE) *permittedSubtrees;
+  STACK_OF(GENERAL_SUBTREE) *excludedSubtrees;
 };
 
 typedef struct POLICY_CONSTRAINTS_st {
-	ASN1_INTEGER *requireExplicitPolicy;
-	ASN1_INTEGER *inhibitPolicyMapping;
+  ASN1_INTEGER *requireExplicitPolicy;
+  ASN1_INTEGER *inhibitPolicyMapping;
 } POLICY_CONSTRAINTS;
 
-/* Proxy certificate structures, see RFC 3820 */
-typedef struct PROXY_POLICY_st
-	{
-	ASN1_OBJECT *policyLanguage;
-	ASN1_OCTET_STRING *policy;
-	} PROXY_POLICY;
+// Proxy certificate structures, see RFC 3820
+typedef struct PROXY_POLICY_st {
+  ASN1_OBJECT *policyLanguage;
+  ASN1_OCTET_STRING *policy;
+} PROXY_POLICY;
 
-typedef struct PROXY_CERT_INFO_EXTENSION_st
-	{
-	ASN1_INTEGER *pcPathLengthConstraint;
-	PROXY_POLICY *proxyPolicy;
-	} PROXY_CERT_INFO_EXTENSION;
+typedef struct PROXY_CERT_INFO_EXTENSION_st {
+  ASN1_INTEGER *pcPathLengthConstraint;
+  PROXY_POLICY *proxyPolicy;
+} PROXY_CERT_INFO_EXTENSION;
 
 DECLARE_ASN1_FUNCTIONS(PROXY_POLICY)
 DECLARE_ASN1_FUNCTIONS(PROXY_CERT_INFO_EXTENSION)
 
-struct ISSUING_DIST_POINT_st
-	{
-	DIST_POINT_NAME *distpoint;
-	int onlyuser;
-	int onlyCA;
-	ASN1_BIT_STRING *onlysomereasons;
-	int indirectCRL;
-	int onlyattr;
-	};
+struct ISSUING_DIST_POINT_st {
+  DIST_POINT_NAME *distpoint;
+  int onlyuser;
+  int onlyCA;
+  ASN1_BIT_STRING *onlysomereasons;
+  int indirectCRL;
+  int onlyattr;
+};
 
-/* Values in idp_flags field */
-/* IDP present */
-#define	IDP_PRESENT	0x1
-/* IDP values inconsistent */
-#define IDP_INVALID	0x2
-/* onlyuser true */
-#define	IDP_ONLYUSER	0x4
-/* onlyCA true */
-#define	IDP_ONLYCA	0x8
-/* onlyattr true */
-#define IDP_ONLYATTR	0x10
-/* indirectCRL true */
-#define IDP_INDIRECT	0x20
-/* onlysomereasons present */
-#define IDP_REASONS	0x40
+// Values in idp_flags field
+// IDP present
+#define IDP_PRESENT 0x1
+// IDP values inconsistent
+#define IDP_INVALID 0x2
+// onlyuser true
+#define IDP_ONLYUSER 0x4
+// onlyCA true
+#define IDP_ONLYCA 0x8
+// onlyattr true
+#define IDP_ONLYATTR 0x10
+// indirectCRL true
+#define IDP_INDIRECT 0x20
+// onlysomereasons present
+#define IDP_REASONS 0x40
 
-#define X509V3_conf_err(val) ERR_add_error_data(6, "section:", (val)->section, \
-",name:", (val)->name, ",value:", (val)->value);
+#define X509V3_conf_err(val)                                               \
+  ERR_add_error_data(6, "section:", (val)->section, ",name:", (val)->name, \
+                     ",value:", (val)->value);
 
 #define X509V3_set_ctx_test(ctx) \
-			X509V3_set_ctx(ctx, NULL, NULL, NULL, NULL, CTX_TEST)
+  X509V3_set_ctx(ctx, NULL, NULL, NULL, NULL, CTX_TEST)
 #define X509V3_set_ctx_nodb(ctx) (ctx)->db = NULL;
 
-#define EXT_BITSTRING(nid, table) { nid, 0, ASN1_ITEM_ref(ASN1_BIT_STRING), \
-			0,0,0,0, \
-			0,0, \
-			(X509V3_EXT_I2V)i2v_ASN1_BIT_STRING, \
-			(X509V3_EXT_V2I)v2i_ASN1_BIT_STRING, \
-			NULL, NULL, \
-			(void *)(table)}
+#define EXT_BITSTRING(nid, table)                                        \
+  {                                                                      \
+    nid, 0, ASN1_ITEM_ref(ASN1_BIT_STRING), 0, 0, 0, 0, 0, 0,            \
+        (X509V3_EXT_I2V)i2v_ASN1_BIT_STRING,                             \
+        (X509V3_EXT_V2I)v2i_ASN1_BIT_STRING, NULL, NULL, (void *)(table) \
+  }
 
-#define EXT_IA5STRING(nid) { nid, 0, ASN1_ITEM_ref(ASN1_IA5STRING), \
-			0,0,0,0, \
-			(X509V3_EXT_I2S)i2s_ASN1_IA5STRING, \
-			(X509V3_EXT_S2I)s2i_ASN1_IA5STRING, \
-			0,0,0,0, \
-			NULL}
+#define EXT_IA5STRING(nid)                                   \
+  {                                                          \
+    nid, 0, ASN1_ITEM_ref(ASN1_IA5STRING), 0, 0, 0, 0,       \
+        (X509V3_EXT_I2S)i2s_ASN1_IA5STRING,                  \
+        (X509V3_EXT_S2I)s2i_ASN1_IA5STRING, 0, 0, 0, 0, NULL \
+  }
 
-#define EXT_END { -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
+#define EXT_END \
+  { -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
 
 
-/* X509_PURPOSE stuff */
+// X509_PURPOSE stuff
 
-#define EXFLAG_BCONS		0x1
-#define EXFLAG_KUSAGE		0x2
-#define EXFLAG_XKUSAGE		0x4
-#define EXFLAG_NSCERT		0x8
+#define EXFLAG_BCONS 0x1
+#define EXFLAG_KUSAGE 0x2
+#define EXFLAG_XKUSAGE 0x4
+#define EXFLAG_NSCERT 0x8
 
-#define EXFLAG_CA		0x10
-/* Really self issued not necessarily self signed */
-#define EXFLAG_SI		0x20
-#define EXFLAG_V1		0x40
-#define EXFLAG_INVALID		0x80
-#define EXFLAG_SET		0x100
-#define EXFLAG_CRITICAL		0x200
-#define EXFLAG_PROXY		0x400
+#define EXFLAG_CA 0x10
+// Really self issued not necessarily self signed
+#define EXFLAG_SI 0x20
+#define EXFLAG_V1 0x40
+#define EXFLAG_INVALID 0x80
+#define EXFLAG_SET 0x100
+#define EXFLAG_CRITICAL 0x200
+#define EXFLAG_PROXY 0x400
 
-#define EXFLAG_INVALID_POLICY	0x800
-#define EXFLAG_FRESHEST		0x1000
-/* Self signed */
-#define EXFLAG_SS		0x2000
+#define EXFLAG_INVALID_POLICY 0x800
+#define EXFLAG_FRESHEST 0x1000
+// Self signed
+#define EXFLAG_SS 0x2000
 
-#define KU_DIGITAL_SIGNATURE	0x0080
-#define KU_NON_REPUDIATION	0x0040
-#define KU_KEY_ENCIPHERMENT	0x0020
-#define KU_DATA_ENCIPHERMENT	0x0010
-#define KU_KEY_AGREEMENT	0x0008
-#define KU_KEY_CERT_SIGN	0x0004
-#define KU_CRL_SIGN		0x0002
-#define KU_ENCIPHER_ONLY	0x0001
-#define KU_DECIPHER_ONLY	0x8000
+#define KU_DIGITAL_SIGNATURE 0x0080
+#define KU_NON_REPUDIATION 0x0040
+#define KU_KEY_ENCIPHERMENT 0x0020
+#define KU_DATA_ENCIPHERMENT 0x0010
+#define KU_KEY_AGREEMENT 0x0008
+#define KU_KEY_CERT_SIGN 0x0004
+#define KU_CRL_SIGN 0x0002
+#define KU_ENCIPHER_ONLY 0x0001
+#define KU_DECIPHER_ONLY 0x8000
 
-#define NS_SSL_CLIENT		0x80
-#define NS_SSL_SERVER		0x40
-#define NS_SMIME		0x20
-#define NS_OBJSIGN		0x10
-#define NS_SSL_CA		0x04
-#define NS_SMIME_CA		0x02
-#define NS_OBJSIGN_CA		0x01
-#define NS_ANY_CA		(NS_SSL_CA|NS_SMIME_CA|NS_OBJSIGN_CA)
+#define NS_SSL_CLIENT 0x80
+#define NS_SSL_SERVER 0x40
+#define NS_SMIME 0x20
+#define NS_OBJSIGN 0x10
+#define NS_SSL_CA 0x04
+#define NS_SMIME_CA 0x02
+#define NS_OBJSIGN_CA 0x01
+#define NS_ANY_CA (NS_SSL_CA | NS_SMIME_CA | NS_OBJSIGN_CA)
 
-#define XKU_SSL_SERVER		0x1	
-#define XKU_SSL_CLIENT		0x2
-#define XKU_SMIME		0x4
-#define XKU_CODE_SIGN		0x8
-#define XKU_SGC			0x10
-#define XKU_OCSP_SIGN		0x20
-#define XKU_TIMESTAMP		0x40
-#define XKU_DVCS		0x80
-#define XKU_ANYEKU		0x100
+#define XKU_SSL_SERVER 0x1
+#define XKU_SSL_CLIENT 0x2
+#define XKU_SMIME 0x4
+#define XKU_CODE_SIGN 0x8
+#define XKU_SGC 0x10
+#define XKU_OCSP_SIGN 0x20
+#define XKU_TIMESTAMP 0x40
+#define XKU_DVCS 0x80
+#define XKU_ANYEKU 0x100
 
-#define X509_PURPOSE_DYNAMIC	0x1
-#define X509_PURPOSE_DYNAMIC_NAME	0x2
+#define X509_PURPOSE_DYNAMIC 0x1
+#define X509_PURPOSE_DYNAMIC_NAME 0x2
 
 typedef struct x509_purpose_st {
-	int purpose;
-	int trust;		/* Default trust ID */
-	int flags;
-	int (*check_purpose)(const struct x509_purpose_st *,
-				const X509 *, int);
-	char *name;
-	char *sname;
-	void *usr_data;
+  int purpose;
+  int trust;  // Default trust ID
+  int flags;
+  int (*check_purpose)(const struct x509_purpose_st *, const X509 *, int);
+  char *name;
+  char *sname;
+  void *usr_data;
 } X509_PURPOSE;
 
-#define X509_PURPOSE_SSL_CLIENT		1
-#define X509_PURPOSE_SSL_SERVER		2
-#define X509_PURPOSE_NS_SSL_SERVER	3
-#define X509_PURPOSE_SMIME_SIGN		4
-#define X509_PURPOSE_SMIME_ENCRYPT	5
-#define X509_PURPOSE_CRL_SIGN		6
-#define X509_PURPOSE_ANY		7
-#define X509_PURPOSE_OCSP_HELPER	8
-#define X509_PURPOSE_TIMESTAMP_SIGN	9
+#define X509_PURPOSE_SSL_CLIENT 1
+#define X509_PURPOSE_SSL_SERVER 2
+#define X509_PURPOSE_NS_SSL_SERVER 3
+#define X509_PURPOSE_SMIME_SIGN 4
+#define X509_PURPOSE_SMIME_ENCRYPT 5
+#define X509_PURPOSE_CRL_SIGN 6
+#define X509_PURPOSE_ANY 7
+#define X509_PURPOSE_OCSP_HELPER 8
+#define X509_PURPOSE_TIMESTAMP_SIGN 9
 
-#define X509_PURPOSE_MIN		1
-#define X509_PURPOSE_MAX		9
-
-/* Flags for X509V3_EXT_print() */
-
-#define X509V3_EXT_UNKNOWN_MASK		(0xfL << 16)
-/* Return error for unknown extensions */
-#define X509V3_EXT_DEFAULT		0
-/* Print error for unknown extensions */
-#define X509V3_EXT_ERROR_UNKNOWN	(1L << 16)
-/* ASN1 parse unknown extensions */
-#define X509V3_EXT_PARSE_UNKNOWN	(2L << 16)
-/* BIO_dump unknown extensions */
-#define X509V3_EXT_DUMP_UNKNOWN		(3L << 16)
-
-/* Flags for X509V3_add1_i2d */
-
-#define X509V3_ADD_OP_MASK		0xfL
-#define X509V3_ADD_DEFAULT		0L
-#define X509V3_ADD_APPEND		1L
-#define X509V3_ADD_REPLACE		2L
-#define X509V3_ADD_REPLACE_EXISTING	3L
-#define X509V3_ADD_KEEP_EXISTING	4L
-#define X509V3_ADD_DELETE		5L
-#define X509V3_ADD_SILENT		0x10
+#define X509_PURPOSE_MIN 1
+#define X509_PURPOSE_MAX 9
 
 DEFINE_STACK_OF(X509_PURPOSE)
 
 DECLARE_ASN1_FUNCTIONS(BASIC_CONSTRAINTS)
 
-DECLARE_ASN1_FUNCTIONS(SXNET)
-DECLARE_ASN1_FUNCTIONS(SXNETID)
-
-int SXNET_add_id_asc(SXNET **psx, char *zone, char *user, int userlen); 
-int SXNET_add_id_ulong(SXNET **psx, unsigned long lzone, char *user, int userlen); 
-int SXNET_add_id_INTEGER(SXNET **psx, ASN1_INTEGER *izone, char *user, int userlen); 
-
-ASN1_OCTET_STRING *SXNET_get_id_asc(SXNET *sx, char *zone);
-ASN1_OCTET_STRING *SXNET_get_id_ulong(SXNET *sx, unsigned long lzone);
-ASN1_OCTET_STRING *SXNET_get_id_INTEGER(SXNET *sx, ASN1_INTEGER *zone);
-
 DECLARE_ASN1_FUNCTIONS(AUTHORITY_KEYID)
 
-DECLARE_ASN1_FUNCTIONS(PKEY_USAGE_PERIOD)
-
 DECLARE_ASN1_FUNCTIONS(GENERAL_NAME)
 OPENSSL_EXPORT GENERAL_NAME *GENERAL_NAME_dup(GENERAL_NAME *a);
-OPENSSL_EXPORT int GENERAL_NAME_cmp(GENERAL_NAME *a, GENERAL_NAME *b);
+
+// GENERAL_NAME_cmp returns zero if |a| and |b| are equal and a non-zero
+// value otherwise. Note this function does not provide a comparison suitable
+// for sorting.
+OPENSSL_EXPORT int GENERAL_NAME_cmp(const GENERAL_NAME *a,
+                                    const GENERAL_NAME *b);
 
 
 
 OPENSSL_EXPORT ASN1_BIT_STRING *v2i_ASN1_BIT_STRING(X509V3_EXT_METHOD *method,
-				X509V3_CTX *ctx, STACK_OF(CONF_VALUE) *nval);
-OPENSSL_EXPORT STACK_OF(CONF_VALUE) *i2v_ASN1_BIT_STRING(X509V3_EXT_METHOD *method,
-				ASN1_BIT_STRING *bits,
-				STACK_OF(CONF_VALUE) *extlist);
+                                                    X509V3_CTX *ctx,
+                                                    STACK_OF(CONF_VALUE) *nval);
+OPENSSL_EXPORT STACK_OF(CONF_VALUE) *i2v_ASN1_BIT_STRING(
+    X509V3_EXT_METHOD *method, ASN1_BIT_STRING *bits,
+    STACK_OF(CONF_VALUE) *extlist);
 
-OPENSSL_EXPORT STACK_OF(CONF_VALUE) *i2v_GENERAL_NAME(X509V3_EXT_METHOD *method, GENERAL_NAME *gen, STACK_OF(CONF_VALUE) *ret);
+OPENSSL_EXPORT STACK_OF(CONF_VALUE) *i2v_GENERAL_NAME(
+    X509V3_EXT_METHOD *method, GENERAL_NAME *gen, STACK_OF(CONF_VALUE) *ret);
 OPENSSL_EXPORT int GENERAL_NAME_print(BIO *out, GENERAL_NAME *gen);
 
 DECLARE_ASN1_FUNCTIONS(GENERAL_NAMES)
 
-OPENSSL_EXPORT STACK_OF(CONF_VALUE) *i2v_GENERAL_NAMES(X509V3_EXT_METHOD *method,
-		GENERAL_NAMES *gen, STACK_OF(CONF_VALUE) *extlist);
+OPENSSL_EXPORT STACK_OF(CONF_VALUE) *i2v_GENERAL_NAMES(
+    X509V3_EXT_METHOD *method, GENERAL_NAMES *gen,
+    STACK_OF(CONF_VALUE) *extlist);
 OPENSSL_EXPORT GENERAL_NAMES *v2i_GENERAL_NAMES(const X509V3_EXT_METHOD *method,
-				 X509V3_CTX *ctx, STACK_OF(CONF_VALUE) *nval);
+                                                X509V3_CTX *ctx,
+                                                STACK_OF(CONF_VALUE) *nval);
 
 DECLARE_ASN1_FUNCTIONS(OTHERNAME)
 DECLARE_ASN1_FUNCTIONS(EDIPARTYNAME)
 OPENSSL_EXPORT int OTHERNAME_cmp(OTHERNAME *a, OTHERNAME *b);
-OPENSSL_EXPORT void GENERAL_NAME_set0_value(GENERAL_NAME *a, int type, void *value);
-OPENSSL_EXPORT void *GENERAL_NAME_get0_value(GENERAL_NAME *a, int *ptype);
+OPENSSL_EXPORT void GENERAL_NAME_set0_value(GENERAL_NAME *a, int type,
+                                            void *value);
+OPENSSL_EXPORT void *GENERAL_NAME_get0_value(const GENERAL_NAME *a, int *ptype);
 OPENSSL_EXPORT int GENERAL_NAME_set0_othername(GENERAL_NAME *gen,
-				ASN1_OBJECT *oid, ASN1_TYPE *value);
-OPENSSL_EXPORT int GENERAL_NAME_get0_otherName(GENERAL_NAME *gen, 
-				ASN1_OBJECT **poid, ASN1_TYPE **pvalue);
+                                               ASN1_OBJECT *oid,
+                                               ASN1_TYPE *value);
+OPENSSL_EXPORT int GENERAL_NAME_get0_otherName(const GENERAL_NAME *gen,
+                                               ASN1_OBJECT **poid,
+                                               ASN1_TYPE **pvalue);
 
-OPENSSL_EXPORT char *i2s_ASN1_OCTET_STRING(X509V3_EXT_METHOD *method, ASN1_OCTET_STRING *ia5);
-OPENSSL_EXPORT ASN1_OCTET_STRING *s2i_ASN1_OCTET_STRING(X509V3_EXT_METHOD *method, X509V3_CTX *ctx, char *str);
+OPENSSL_EXPORT char *i2s_ASN1_OCTET_STRING(X509V3_EXT_METHOD *method,
+                                           const ASN1_OCTET_STRING *ia5);
+OPENSSL_EXPORT ASN1_OCTET_STRING *s2i_ASN1_OCTET_STRING(
+    X509V3_EXT_METHOD *method, X509V3_CTX *ctx, const char *str);
 
 DECLARE_ASN1_FUNCTIONS(EXTENDED_KEY_USAGE)
-OPENSSL_EXPORT int i2a_ACCESS_DESCRIPTION(BIO *bp, ACCESS_DESCRIPTION* a);
+OPENSSL_EXPORT int i2a_ACCESS_DESCRIPTION(BIO *bp, const ACCESS_DESCRIPTION *a);
 
 DECLARE_ASN1_FUNCTIONS(CERTIFICATEPOLICIES)
 DECLARE_ASN1_FUNCTIONS(POLICYINFO)
@@ -581,7 +535,8 @@
 DECLARE_ASN1_FUNCTIONS(DIST_POINT_NAME)
 DECLARE_ASN1_FUNCTIONS(ISSUING_DIST_POINT)
 
-OPENSSL_EXPORT int DIST_POINT_set_dpname(DIST_POINT_NAME *dpn, X509_NAME *iname);
+OPENSSL_EXPORT int DIST_POINT_set_dpname(DIST_POINT_NAME *dpn,
+                                         X509_NAME *iname);
 
 OPENSSL_EXPORT int NAME_CONSTRAINTS_check(X509 *x, NAME_CONSTRAINTS *nc);
 
@@ -602,76 +557,230 @@
 DECLARE_ASN1_ITEM(POLICY_CONSTRAINTS)
 
 OPENSSL_EXPORT GENERAL_NAME *a2i_GENERAL_NAME(GENERAL_NAME *out,
-			       const X509V3_EXT_METHOD *method, X509V3_CTX *ctx,
-			       int gen_type, char *value, int is_nc);
+                                              const X509V3_EXT_METHOD *method,
+                                              X509V3_CTX *ctx, int gen_type,
+                                              const char *value, int is_nc);
 
-OPENSSL_EXPORT GENERAL_NAME *v2i_GENERAL_NAME(const X509V3_EXT_METHOD *method, X509V3_CTX *ctx,
-			       CONF_VALUE *cnf);
-OPENSSL_EXPORT GENERAL_NAME *v2i_GENERAL_NAME_ex(GENERAL_NAME *out,
-				  const X509V3_EXT_METHOD *method,
-				  X509V3_CTX *ctx, CONF_VALUE *cnf, int is_nc);
+OPENSSL_EXPORT GENERAL_NAME *v2i_GENERAL_NAME(const X509V3_EXT_METHOD *method,
+                                              X509V3_CTX *ctx, CONF_VALUE *cnf);
+OPENSSL_EXPORT GENERAL_NAME *v2i_GENERAL_NAME_ex(
+    GENERAL_NAME *out, const X509V3_EXT_METHOD *method, X509V3_CTX *ctx,
+    CONF_VALUE *cnf, int is_nc);
 OPENSSL_EXPORT void X509V3_conf_free(CONF_VALUE *val);
 
 // X509V3_EXT_conf_nid contains the only exposed instance of an LHASH in our
 // public headers. The |conf| pointer must be NULL but cryptography.io wraps
 // this function so we cannot, yet, replace the type with a dummy struct.
-OPENSSL_EXPORT X509_EXTENSION *X509V3_EXT_conf_nid(LHASH_OF(CONF_VALUE) *conf, X509V3_CTX *ctx, int ext_nid, char *value);
+OPENSSL_EXPORT X509_EXTENSION *X509V3_EXT_conf_nid(LHASH_OF(CONF_VALUE) *conf,
+                                                   X509V3_CTX *ctx, int ext_nid,
+                                                   const char *value);
 
-OPENSSL_EXPORT X509_EXTENSION *X509V3_EXT_nconf_nid(CONF *conf, X509V3_CTX *ctx, int ext_nid, char *value);
-OPENSSL_EXPORT X509_EXTENSION *X509V3_EXT_nconf(CONF *conf, X509V3_CTX *ctx, char *name, char *value);
-OPENSSL_EXPORT int X509V3_EXT_add_nconf_sk(CONF *conf, X509V3_CTX *ctx, char *section, STACK_OF(X509_EXTENSION) **sk);
-OPENSSL_EXPORT int X509V3_EXT_add_nconf(CONF *conf, X509V3_CTX *ctx, char *section, X509 *cert);
-OPENSSL_EXPORT int X509V3_EXT_REQ_add_nconf(CONF *conf, X509V3_CTX *ctx, char *section, X509_REQ *req);
-OPENSSL_EXPORT int X509V3_EXT_CRL_add_nconf(CONF *conf, X509V3_CTX *ctx, char *section, X509_CRL *crl);
+OPENSSL_EXPORT X509_EXTENSION *X509V3_EXT_nconf_nid(CONF *conf, X509V3_CTX *ctx,
+                                                    int ext_nid,
+                                                    const char *value);
+OPENSSL_EXPORT X509_EXTENSION *X509V3_EXT_nconf(CONF *conf, X509V3_CTX *ctx,
+                                                const char *name,
+                                                const char *value);
+OPENSSL_EXPORT int X509V3_EXT_add_nconf_sk(CONF *conf, X509V3_CTX *ctx,
+                                           const char *section,
+                                           STACK_OF(X509_EXTENSION) **sk);
+OPENSSL_EXPORT int X509V3_EXT_add_nconf(CONF *conf, X509V3_CTX *ctx,
+                                        const char *section, X509 *cert);
+OPENSSL_EXPORT int X509V3_EXT_REQ_add_nconf(CONF *conf, X509V3_CTX *ctx,
+                                            const char *section, X509_REQ *req);
+OPENSSL_EXPORT int X509V3_EXT_CRL_add_nconf(CONF *conf, X509V3_CTX *ctx,
+                                            const char *section, X509_CRL *crl);
 
-OPENSSL_EXPORT int X509V3_add_value_bool_nf(char *name, int asn1_bool,
-			     STACK_OF(CONF_VALUE) **extlist);
-OPENSSL_EXPORT int X509V3_get_value_bool(CONF_VALUE *value, int *asn1_bool);
-OPENSSL_EXPORT int X509V3_get_value_int(CONF_VALUE *value, ASN1_INTEGER **aint);
+OPENSSL_EXPORT int X509V3_add_value_bool_nf(const char *name, int asn1_bool,
+                                            STACK_OF(CONF_VALUE) **extlist);
+OPENSSL_EXPORT int X509V3_get_value_bool(const CONF_VALUE *value,
+                                         int *asn1_bool);
+OPENSSL_EXPORT int X509V3_get_value_int(const CONF_VALUE *value,
+                                        ASN1_INTEGER **aint);
 OPENSSL_EXPORT void X509V3_set_nconf(X509V3_CTX *ctx, CONF *conf);
 
-OPENSSL_EXPORT char * X509V3_get_string(X509V3_CTX *ctx, char *name, char *section);
-OPENSSL_EXPORT STACK_OF(CONF_VALUE) * X509V3_get_section(X509V3_CTX *ctx, char *section);
+OPENSSL_EXPORT char *X509V3_get_string(X509V3_CTX *ctx, const char *name,
+                                       const char *section);
+OPENSSL_EXPORT STACK_OF(CONF_VALUE) *X509V3_get_section(X509V3_CTX *ctx,
+                                                        const char *section);
 OPENSSL_EXPORT void X509V3_string_free(X509V3_CTX *ctx, char *str);
-OPENSSL_EXPORT void X509V3_section_free( X509V3_CTX *ctx, STACK_OF(CONF_VALUE) *section);
+OPENSSL_EXPORT void X509V3_section_free(X509V3_CTX *ctx,
+                                        STACK_OF(CONF_VALUE) *section);
 OPENSSL_EXPORT void X509V3_set_ctx(X509V3_CTX *ctx, X509 *issuer, X509 *subject,
-				 X509_REQ *req, X509_CRL *crl, int flags);
+                                   X509_REQ *req, X509_CRL *crl, int flags);
 
 OPENSSL_EXPORT int X509V3_add_value(const char *name, const char *value,
-						STACK_OF(CONF_VALUE) **extlist);
-OPENSSL_EXPORT int X509V3_add_value_uchar(const char *name, const unsigned char *value,
-						STACK_OF(CONF_VALUE) **extlist);
+                                    STACK_OF(CONF_VALUE) **extlist);
+OPENSSL_EXPORT int X509V3_add_value_uchar(const char *name,
+                                          const unsigned char *value,
+                                          STACK_OF(CONF_VALUE) **extlist);
 OPENSSL_EXPORT int X509V3_add_value_bool(const char *name, int asn1_bool,
-						STACK_OF(CONF_VALUE) **extlist);
+                                         STACK_OF(CONF_VALUE) **extlist);
 OPENSSL_EXPORT int X509V3_add_value_int(const char *name, ASN1_INTEGER *aint,
-						STACK_OF(CONF_VALUE) **extlist);
-OPENSSL_EXPORT char * i2s_ASN1_INTEGER(X509V3_EXT_METHOD *meth, ASN1_INTEGER *aint);
-OPENSSL_EXPORT ASN1_INTEGER * s2i_ASN1_INTEGER(X509V3_EXT_METHOD *meth, char *value);
-OPENSSL_EXPORT char * i2s_ASN1_ENUMERATED(X509V3_EXT_METHOD *meth, ASN1_ENUMERATED *aint);
-OPENSSL_EXPORT char * i2s_ASN1_ENUMERATED_TABLE(X509V3_EXT_METHOD *meth, ASN1_ENUMERATED *aint);
+                                        STACK_OF(CONF_VALUE) **extlist);
+OPENSSL_EXPORT char *i2s_ASN1_INTEGER(X509V3_EXT_METHOD *meth,
+                                      const ASN1_INTEGER *aint);
+OPENSSL_EXPORT ASN1_INTEGER *s2i_ASN1_INTEGER(X509V3_EXT_METHOD *meth,
+                                              const char *value);
+OPENSSL_EXPORT char *i2s_ASN1_ENUMERATED(X509V3_EXT_METHOD *meth,
+                                         const ASN1_ENUMERATED *aint);
+OPENSSL_EXPORT char *i2s_ASN1_ENUMERATED_TABLE(X509V3_EXT_METHOD *meth,
+                                               const ASN1_ENUMERATED *aint);
 OPENSSL_EXPORT int X509V3_EXT_add(X509V3_EXT_METHOD *ext);
 OPENSSL_EXPORT int X509V3_EXT_add_list(X509V3_EXT_METHOD *extlist);
 OPENSSL_EXPORT int X509V3_EXT_add_alias(int nid_to, int nid_from);
 OPENSSL_EXPORT void X509V3_EXT_cleanup(void);
 
-OPENSSL_EXPORT const X509V3_EXT_METHOD *X509V3_EXT_get(X509_EXTENSION *ext);
+OPENSSL_EXPORT const X509V3_EXT_METHOD *X509V3_EXT_get(
+    const X509_EXTENSION *ext);
 OPENSSL_EXPORT const X509V3_EXT_METHOD *X509V3_EXT_get_nid(int nid);
 OPENSSL_EXPORT int X509V3_add_standard_extensions(void);
 OPENSSL_EXPORT STACK_OF(CONF_VALUE) *X509V3_parse_list(const char *line);
-OPENSSL_EXPORT void *X509V3_EXT_d2i(X509_EXTENSION *ext);
-OPENSSL_EXPORT void *X509V3_get_d2i(STACK_OF(X509_EXTENSION) *x, int nid, int *crit, int *idx);
+
+// X509V3_EXT_d2i decodes |ext| and returns a pointer to a newly-allocated
+// structure, with type dependent on the type of the extension. It returns NULL
+// if |ext| is an unsupported extension or if there was a syntax error in the
+// extension. The caller should cast the return value to the expected type and
+// free the structure when done.
+//
+// WARNING: Casting the return value to the wrong type is a potentially
+// exploitable memory error, so callers must not use this function before
+// checking |ext| is of a known type.
+OPENSSL_EXPORT void *X509V3_EXT_d2i(const X509_EXTENSION *ext);
+
+// X509V3_get_d2i finds and decodes the extension in |extensions| of type |nid|.
+// If found, it decodes it and returns a newly-allocated structure, with type
+// dependent on |nid|. If the extension is not found or on error, it returns
+// NULL. The caller may distinguish these cases using the |out_critical| value.
+//
+// If |out_critical| is not NULL, this function sets |*out_critical| to one if
+// the extension is found and critical, zero if it is found and not critical, -1
+// if it is not found, and -2 if there is an invalid duplicate extension. Note
+// this function may set |*out_critical| to one or zero and still return NULL if
+// the extension is found but has a syntax error.
+//
+// If |out_idx| is not NULL, this function looks for the first occurrence of the
+// extension after |*out_idx|. It then sets |*out_idx| to the index of the
+// extension, or -1 if not found. If |out_idx| is non-NULL, duplicate extensions
+// are not treated as an error. Callers, however, should not rely on this
+// behavior as it may be removed in the future. Duplicate extensions are
+// forbidden in RFC5280.
+//
+// WARNING: This function is difficult to use correctly. Callers should pass a
+// non-NULL |out_critical| and check both the return value and |*out_critical|
+// to handle errors. If the return value is NULL and |*out_critical| is not -1,
+// there was an error. Otherwise, the function succeeded and but may return NULL
+// for a missing extension. Callers should pass NULL to |out_idx| so that
+// duplicate extensions are handled correctly.
+//
+// Additionally, casting the return value to the wrong type is a potentially
+// exploitable memory error, so callers must ensure the cast and |nid| match.
+OPENSSL_EXPORT void *X509V3_get_d2i(const STACK_OF(X509_EXTENSION) *extensions,
+                                    int nid, int *out_critical, int *out_idx);
+
+// X509V3_EXT_free casts |ext_data| into the type that corresponds to |nid| and
+// releases memory associated with it. It returns one on success and zero if
+// |nid| is not a known extension.
+//
+// WARNING: Casting |ext_data| to the wrong type is a potentially exploitable
+// memory error, so callers must ensure |ext_data|'s type matches |nid|.
+//
+// TODO(davidben): OpenSSL upstream no longer exposes this function. Remove it?
 OPENSSL_EXPORT int X509V3_EXT_free(int nid, void *ext_data);
 
+// X509V3_EXT_i2d casts |ext_struc| into the type that corresponds to
+// |ext_nid|, serializes it, and returns a newly-allocated |X509_EXTENSION|
+// object containing the serialization, or NULL on error. The |X509_EXTENSION|
+// has OID |ext_nid| and is critical if |crit| is one.
+//
+// WARNING: Casting |ext_struc| to the wrong type is a potentially exploitable
+// memory error, so callers must ensure |ext_struct|'s type matches |ext_nid|.
+OPENSSL_EXPORT X509_EXTENSION *X509V3_EXT_i2d(int ext_nid, int crit,
+                                              void *ext_struc);
 
-OPENSSL_EXPORT X509_EXTENSION *X509V3_EXT_i2d(int ext_nid, int crit, void *ext_struc);
-OPENSSL_EXPORT int X509V3_add1_i2d(STACK_OF(X509_EXTENSION) **x, int nid, void *value, int crit, unsigned long flags);
+// The following constants control the behavior of |X509V3_add1_i2d| and related
+// functions.
 
-OPENSSL_EXPORT void X509V3_EXT_val_prn(BIO *out, STACK_OF(CONF_VALUE) *val, int indent,
-								 int ml);
-OPENSSL_EXPORT int X509V3_EXT_print(BIO *out, X509_EXTENSION *ext, unsigned long flag, int indent);
-OPENSSL_EXPORT int X509V3_EXT_print_fp(FILE *out, X509_EXTENSION *ext, int flag, int indent);
+// X509V3_ADD_OP_MASK can be ANDed with the flags to determine how duplicate
+// extensions are processed.
+#define X509V3_ADD_OP_MASK 0xfL
 
-OPENSSL_EXPORT int X509V3_extensions_print(BIO *out, const char *title, STACK_OF(X509_EXTENSION) *exts, unsigned long flag, int indent);
+// X509V3_ADD_DEFAULT causes the function to fail if the extension was already
+// present.
+#define X509V3_ADD_DEFAULT 0L
+
+// X509V3_ADD_APPEND causes the function to unconditionally appended the new
+// extension to to the extensions list, even if there is a duplicate.
+#define X509V3_ADD_APPEND 1L
+
+// X509V3_ADD_REPLACE causes the function to replace the existing extension, or
+// append if it is not present.
+#define X509V3_ADD_REPLACE 2L
+
+// X509V3_ADD_REPLACE causes the function to replace the existing extension and
+// fail if it is not present.
+#define X509V3_ADD_REPLACE_EXISTING 3L
+
+// X509V3_ADD_KEEP_EXISTING causes the function to succeed without replacing the
+// extension if already present.
+#define X509V3_ADD_KEEP_EXISTING 4L
+
+// X509V3_ADD_DELETE causes the function to remove the matching extension. No
+// new extension is added. If there is no matching extension, the function
+// fails. The |value| parameter is ignored in this mode.
+#define X509V3_ADD_DELETE 5L
+
+// X509V3_ADD_SILENT may be ORed into one of the values above to indicate the
+// function should not add to the error queue on duplicate or missing extension.
+// The function will continue to return zero in those cases, and it will
+// continue to return -1 and add to the error queue on other errors.
+#define X509V3_ADD_SILENT 0x10
+
+// X509V3_add1_i2d casts |value| to the type that corresponds to |nid|,
+// serializes it, and appends it to the extension list in |*x|. If |*x| is NULL,
+// it will set |*x| to a newly-allocated |STACK_OF(X509_EXTENSION)| as needed.
+// The |crit| parameter determines whether the new extension is critical.
+// |flags| may be some combination of the |X509V3_ADD_*| constants to control
+// the function's behavior on duplicate extension.
+//
+// This function returns one on success, zero if the operation failed due to a
+// missing or duplicate extension, and -1 on other errors.
+//
+// WARNING: Casting |value| to the wrong type is a potentially exploitable
+// memory error, so callers must ensure |value|'s type matches |nid|.
+OPENSSL_EXPORT int X509V3_add1_i2d(STACK_OF(X509_EXTENSION) **x, int nid,
+                                   void *value, int crit, unsigned long flags);
+
+#define X509V3_EXT_UNKNOWN_MASK (0xfL << 16)
+
+// X509V3_EXT_DEFAULT causes unknown extensions or syntax errors to return
+// failure.
+#define X509V3_EXT_DEFAULT 0
+// X509V3_EXT_ERROR_UNKNOWN causes unknown extensions or syntax errors to print
+// as "<Not Supported>" or "<Parse Error>", respectively.
+#define X509V3_EXT_ERROR_UNKNOWN (1L << 16)
+// X509V3_EXT_PARSE_UNKNOWN is deprecated and behaves like
+// |X509V3_EXT_DUMP_UNKNOWN|.
+#define X509V3_EXT_PARSE_UNKNOWN (2L << 16)
+// X509V3_EXT_DUMP_UNKNOWN causes unknown extensions to be displayed as a
+// hexdump.
+#define X509V3_EXT_DUMP_UNKNOWN (3L << 16)
+
+OPENSSL_EXPORT void X509V3_EXT_val_prn(BIO *out, STACK_OF(CONF_VALUE) *val,
+                                       int indent, int ml);
+OPENSSL_EXPORT int X509V3_EXT_print(BIO *out, X509_EXTENSION *ext,
+                                    unsigned long flag, int indent);
+OPENSSL_EXPORT int X509V3_EXT_print_fp(FILE *out, X509_EXTENSION *ext, int flag,
+                                       int indent);
+
+// X509V3_extensions_print prints |title|, followed by a human-readable
+// representation of |exts| to |out|. It returns one on success and zero on
+// error. The output is indented by |indent| spaces. |flag| is one of the
+// |X509V3_EXT_*| constants and controls printing of unknown extensions and
+// syntax errors.
+OPENSSL_EXPORT int X509V3_extensions_print(BIO *out, const char *title,
+                                           const STACK_OF(X509_EXTENSION) *exts,
+                                           unsigned long flag, int indent);
 
 OPENSSL_EXPORT int X509_check_ca(X509 *x);
 OPENSSL_EXPORT int X509_check_purpose(X509 *x, int id, int ca);
@@ -684,68 +793,112 @@
 OPENSSL_EXPORT uint32_t X509_get_key_usage(X509 *x);
 OPENSSL_EXPORT uint32_t X509_get_extended_key_usage(X509 *x);
 
+// X509_get0_subject_key_id returns |x509|'s subject key identifier, if present.
+// (See RFC5280, section 4.2.1.2.) It returns NULL if the extension is not
+// present or if some extension in |x509| was invalid.
+//
+// Note that decoding an |X509| object will not check for invalid extensions. To
+// detect the error case, call |X509_get_extensions_flags| and check the
+// |EXFLAG_INVALID| bit.
+OPENSSL_EXPORT const ASN1_OCTET_STRING *X509_get0_subject_key_id(X509 *x509);
+
+// X509_get0_authority_key_id returns keyIdentifier of |x509|'s authority key
+// identifier, if the extension and field are present. (See RFC5280,
+// section 4.2.1.1.) It returns NULL if the extension is not present, if it is
+// present but lacks a keyIdentifier field, or if some extension in |x509| was
+// invalid.
+//
+// Note that decoding an |X509| object will not check for invalid extensions. To
+// detect the error case, call |X509_get_extensions_flags| and check the
+// |EXFLAG_INVALID| bit.
+OPENSSL_EXPORT const ASN1_OCTET_STRING *X509_get0_authority_key_id(X509 *x509);
+
+// X509_get0_authority_issuer returns the authorityCertIssuer of |x509|'s
+// authority key identifier, if the extension and field are present. (See
+// RFC5280, section 4.2.1.1.) It returns NULL if the extension is not present,
+// if it is present but lacks a authorityCertIssuer field, or if some extension
+// in |x509| was invalid.
+//
+// Note that decoding an |X509| object will not check for invalid extensions. To
+// detect the error case, call |X509_get_extensions_flags| and check the
+// |EXFLAG_INVALID| bit.
+OPENSSL_EXPORT const GENERAL_NAMES *X509_get0_authority_issuer(X509 *x509);
+
+// X509_get0_authority_serial returns the authorityCertSerialNumber of |x509|'s
+// authority key identifier, if the extension and field are present. (See
+// RFC5280, section 4.2.1.1.) It returns NULL if the extension is not present,
+// if it is present but lacks a authorityCertSerialNumber field, or if some
+// extension in |x509| was invalid.
+//
+// Note that decoding an |X509| object will not check for invalid extensions. To
+// detect the error case, call |X509_get_extensions_flags| and check the
+// |EXFLAG_INVALID| bit.
+OPENSSL_EXPORT const ASN1_INTEGER *X509_get0_authority_serial(X509 *x509);
+
 OPENSSL_EXPORT int X509_PURPOSE_get_count(void);
-OPENSSL_EXPORT X509_PURPOSE * X509_PURPOSE_get0(int idx);
+OPENSSL_EXPORT X509_PURPOSE *X509_PURPOSE_get0(int idx);
 OPENSSL_EXPORT int X509_PURPOSE_get_by_sname(char *sname);
 OPENSSL_EXPORT int X509_PURPOSE_get_by_id(int id);
 OPENSSL_EXPORT int X509_PURPOSE_add(int id, int trust, int flags,
-			int (*ck)(const X509_PURPOSE *, const X509 *, int),
-				char *name, char *sname, void *arg);
-OPENSSL_EXPORT char *X509_PURPOSE_get0_name(X509_PURPOSE *xp);
-OPENSSL_EXPORT char *X509_PURPOSE_get0_sname(X509_PURPOSE *xp);
-OPENSSL_EXPORT int X509_PURPOSE_get_trust(X509_PURPOSE *xp);
+                                    int (*ck)(const X509_PURPOSE *,
+                                              const X509 *, int),
+                                    char *name, char *sname, void *arg);
+OPENSSL_EXPORT char *X509_PURPOSE_get0_name(const X509_PURPOSE *xp);
+OPENSSL_EXPORT char *X509_PURPOSE_get0_sname(const X509_PURPOSE *xp);
+OPENSSL_EXPORT int X509_PURPOSE_get_trust(const X509_PURPOSE *xp);
 OPENSSL_EXPORT void X509_PURPOSE_cleanup(void);
-OPENSSL_EXPORT int X509_PURPOSE_get_id(X509_PURPOSE *);
+OPENSSL_EXPORT int X509_PURPOSE_get_id(const X509_PURPOSE *);
 
 OPENSSL_EXPORT STACK_OF(OPENSSL_STRING) *X509_get1_email(X509 *x);
 OPENSSL_EXPORT STACK_OF(OPENSSL_STRING) *X509_REQ_get1_email(X509_REQ *x);
 OPENSSL_EXPORT void X509_email_free(STACK_OF(OPENSSL_STRING) *sk);
 OPENSSL_EXPORT STACK_OF(OPENSSL_STRING) *X509_get1_ocsp(X509 *x);
-/* Flags for X509_check_* functions */
+// Flags for X509_check_* functions
 
-/* Deprecated: this flag does nothing */
-#define X509_CHECK_FLAG_ALWAYS_CHECK_SUBJECT	0
-/* Disable wildcard matching for dnsName fields and common name. */
-#define X509_CHECK_FLAG_NO_WILDCARDS	0x2
-/* Wildcards must not match a partial label. */
+// Deprecated: this flag does nothing
+#define X509_CHECK_FLAG_ALWAYS_CHECK_SUBJECT 0
+// Disable wildcard matching for dnsName fields and common name.
+#define X509_CHECK_FLAG_NO_WILDCARDS 0x2
+// Wildcards must not match a partial label.
 #define X509_CHECK_FLAG_NO_PARTIAL_WILDCARDS 0x4
-/* Allow (non-partial) wildcards to match multiple labels. */
+// Allow (non-partial) wildcards to match multiple labels.
 #define X509_CHECK_FLAG_MULTI_LABEL_WILDCARDS 0x8
-/* Constraint verifier subdomain patterns to match a single labels. */
+// Constraint verifier subdomain patterns to match a single labels.
 #define X509_CHECK_FLAG_SINGLE_LABEL_SUBDOMAINS 0x10
-/* Skip the subject common name fallback if subjectAltNames is missing. */
+// Skip the subject common name fallback if subjectAltNames is missing.
 #define X509_CHECK_FLAG_NEVER_CHECK_SUBJECT 0x20
-/*
- * Match reference identifiers starting with "." to any sub-domain.
- * This is a non-public flag, turned on implicitly when the subject
- * reference identity is a DNS name.
- */
+//
+// Match reference identifiers starting with "." to any sub-domain.
+// This is a non-public flag, turned on implicitly when the subject
+// reference identity is a DNS name.
 #define _X509_CHECK_FLAG_DOT_SUBDOMAINS 0x8000
 
 OPENSSL_EXPORT int X509_check_host(X509 *x, const char *chk, size_t chklen,
-					unsigned int flags, char **peername);
+                                   unsigned int flags, char **peername);
 OPENSSL_EXPORT int X509_check_email(X509 *x, const char *chk, size_t chklen,
-					unsigned int flags);
-OPENSSL_EXPORT int X509_check_ip(X509 *x, const unsigned char *chk, size_t chklen,
-					unsigned int flags);
-OPENSSL_EXPORT int X509_check_ip_asc(X509 *x, const char *ipasc, unsigned int flags);
+                                    unsigned int flags);
+OPENSSL_EXPORT int X509_check_ip(X509 *x, const unsigned char *chk,
+                                 size_t chklen, unsigned int flags);
+OPENSSL_EXPORT int X509_check_ip_asc(X509 *x, const char *ipasc,
+                                     unsigned int flags);
 
 OPENSSL_EXPORT ASN1_OCTET_STRING *a2i_IPADDRESS(const char *ipasc);
 OPENSSL_EXPORT ASN1_OCTET_STRING *a2i_IPADDRESS_NC(const char *ipasc);
 OPENSSL_EXPORT int a2i_ipadd(unsigned char *ipout, const char *ipasc);
-OPENSSL_EXPORT int X509V3_NAME_from_section(X509_NAME *nm, STACK_OF(CONF_VALUE)*dn_sk,
-						unsigned long chtype);
+OPENSSL_EXPORT int X509V3_NAME_from_section(X509_NAME *nm,
+                                            STACK_OF(CONF_VALUE) *dn_sk,
+                                            unsigned long chtype);
 
-OPENSSL_EXPORT void X509_POLICY_NODE_print(BIO *out, X509_POLICY_NODE *node, int indent);
+OPENSSL_EXPORT void X509_POLICY_NODE_print(BIO *out, X509_POLICY_NODE *node,
+                                           int indent);
 DEFINE_STACK_OF(X509_POLICY_NODE)
 
-/* BEGIN ERROR CODES */
-/* The following lines are auto generated by the script mkerr.pl. Any changes
- * made after this point may be overwritten when the script is next run.
- */
+// BEGIN ERROR CODES
+// The following lines are auto generated by the script mkerr.pl. Any changes
+// made after this point may be overwritten when the script is next run.
 
 
-#ifdef  __cplusplus
+#ifdef __cplusplus
 }
 
 extern "C++" {
@@ -761,7 +914,7 @@
 
 BSSL_NAMESPACE_END
 
-}  /* extern C++ */
+}  // extern C++
 #endif
 
 #define X509V3_R_BAD_IP_ADDRESS 100
diff --git a/deps/boringssl/src/sources.cmake b/deps/boringssl/src/sources.cmake
index a5f2d21..9ac55d5 100644
--- a/deps/boringssl/src/sources.cmake
+++ b/deps/boringssl/src/sources.cmake
@@ -6,12 +6,14 @@
 set(
   CRYPTO_TEST_DATA
 
+  crypto/blake2/blake2b256_tests.txt
   crypto/cipher_extra/test/aes_128_cbc_sha1_tls_implicit_iv_tests.txt
   crypto/cipher_extra/test/aes_128_cbc_sha1_tls_tests.txt
   crypto/cipher_extra/test/aes_128_cbc_sha256_tls_tests.txt
   crypto/cipher_extra/test/aes_128_ccm_bluetooth_tests.txt
   crypto/cipher_extra/test/aes_128_ccm_bluetooth_8_tests.txt
   crypto/cipher_extra/test/aes_128_ctr_hmac_sha256.txt
+  crypto/cipher_extra/test/aes_128_gcm_randnonce_tests.txt
   crypto/cipher_extra/test/aes_128_gcm_siv_tests.txt
   crypto/cipher_extra/test/aes_128_gcm_tests.txt
   crypto/cipher_extra/test/aes_192_gcm_tests.txt
@@ -20,6 +22,7 @@
   crypto/cipher_extra/test/aes_256_cbc_sha256_tls_tests.txt
   crypto/cipher_extra/test/aes_256_cbc_sha384_tls_tests.txt
   crypto/cipher_extra/test/aes_256_ctr_hmac_sha256.txt
+  crypto/cipher_extra/test/aes_256_gcm_randnonce_tests.txt
   crypto/cipher_extra/test/aes_256_gcm_siv_tests.txt
   crypto/cipher_extra/test/aes_256_gcm_tests.txt
   crypto/cipher_extra/test/chacha20_poly1305_tests.txt
@@ -55,8 +58,15 @@
   crypto/fipsmodule/modes/gcm_tests.txt
   crypto/fipsmodule/rand/ctrdrbg_vectors.txt
   crypto/hmac_extra/hmac_tests.txt
+  crypto/hpke/hpke_test_vectors.txt
   crypto/poly1305/poly1305_tests.txt
   crypto/siphash/siphash_tests.txt
+  crypto/x509/test/basic_constraints_ca.pem
+  crypto/x509/test/basic_constraints_ca_pathlen_0.pem
+  crypto/x509/test/basic_constraints_ca_pathlen_1.pem
+  crypto/x509/test/basic_constraints_ca_pathlen_10.pem
+  crypto/x509/test/basic_constraints_leaf.pem
+  crypto/x509/test/basic_constraints_none.pem
   crypto/x509/test/invalid_extension_intermediate.pem
   crypto/x509/test/invalid_extension_intermediate_authority_key_identifier.pem
   crypto/x509/test/invalid_extension_intermediate_basic_constraints.pem
diff --git a/deps/boringssl/src/ssl/bio_ssl.cc b/deps/boringssl/src/ssl/bio_ssl.cc
index 61afee5..a249889 100644
--- a/deps/boringssl/src/ssl/bio_ssl.cc
+++ b/deps/boringssl/src/ssl/bio_ssl.cc
@@ -37,12 +37,12 @@
 
     case SSL_ERROR_WANT_ACCEPT:
       BIO_set_retry_special(bio);
-      bio->retry_reason = BIO_RR_ACCEPT;
+      BIO_set_retry_reason(bio, BIO_RR_ACCEPT);
       break;
 
     case SSL_ERROR_WANT_CONNECT:
       BIO_set_retry_special(bio);
-      bio->retry_reason = BIO_RR_CONNECT;
+      BIO_set_retry_reason(bio, BIO_RR_CONNECT);
       break;
 
     case SSL_ERROR_NONE:
@@ -77,7 +77,7 @@
 
     case SSL_ERROR_WANT_CONNECT:
       BIO_set_retry_special(bio);
-      bio->retry_reason = BIO_RR_CONNECT;
+      BIO_set_retry_reason(bio, BIO_RR_CONNECT);
       break;
 
     case SSL_ERROR_NONE:
@@ -98,6 +98,17 @@
 
   switch (cmd) {
     case BIO_C_SET_SSL:
+      if (ssl != NULL) {
+        // OpenSSL allows reusing an SSL BIO with a different SSL object. We do
+        // not support this.
+        OPENSSL_PUT_ERROR(SSL, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
+        return 0;
+      }
+
+      // Note this differs from upstream OpenSSL, which synchronizes
+      // |bio->next_bio| with |ssl|'s rbio here, and on |BIO_CTRL_PUSH|. We call
+      // into the corresponding |BIO| directly. (We can implement the upstream
+      // behavior if it ends up necessary.)
       bio->shutdown = num;
       bio->ptr = ptr;
       bio->init = 1;
@@ -117,9 +128,11 @@
       return SSL_pending(ssl);
 
     case BIO_CTRL_FLUSH: {
+      BIO *wbio = SSL_get_wbio(ssl);
       BIO_clear_retry_flags(bio);
-      long ret = BIO_ctrl(SSL_get_wbio(ssl), cmd, num, ptr);
-      BIO_copy_next_retry(bio);
+      long ret = BIO_ctrl(wbio, cmd, num, ptr);
+      BIO_set_flags(bio, BIO_get_retry_flags(wbio));
+      BIO_set_retry_reason(bio, BIO_get_retry_reason(wbio));
       return ret;
     }
 
diff --git a/deps/boringssl/src/ssl/handoff.cc b/deps/boringssl/src/ssl/handoff.cc
index fdbac18..16cbdf7 100644
--- a/deps/boringssl/src/ssl/handoff.cc
+++ b/deps/boringssl/src/ssl/handoff.cc
@@ -24,6 +24,8 @@
 constexpr int kHandoffVersion = 0;
 constexpr int kHandbackVersion = 0;
 
+static const unsigned kHandoffTagALPS = CBS_ASN1_CONTEXT_SPECIFIC | 0;
+
 // early_data_t represents the state of early data in a more compact way than
 // the 3 bits used by the implementation.
 enum early_data_t {
@@ -57,6 +59,16 @@
       return false;
     }
   }
+  // ALPS is a draft protocol and may change over time. The handoff structure
+  // contains a [0] IMPLICIT OCTET STRING OPTIONAL, containing a list of u16
+  // ALPS versions that the binary supports. For now we name them by codepoint.
+  // Once ALPS is finalized and past the support horizon, this field can be
+  // removed.
+  CBB alps;
+  if (!CBB_add_asn1(out, &alps, kHandoffTagALPS) ||
+      !CBB_add_u16(&alps, TLSEXT_TYPE_application_settings)) {
+    return false;
+  }
   return CBB_flush(out);
 }
 
@@ -189,6 +201,29 @@
   new_configured_curves.Shrink(idx);
   ssl->config->supported_group_list = std::move(new_configured_curves);
 
+  CBS alps;
+  CBS_init(&alps, nullptr, 0);
+  if (!CBS_get_optional_asn1(in, &alps, /*out_present=*/nullptr,
+                             kHandoffTagALPS)) {
+    return false;
+  }
+  bool supports_alps = false;
+  while (CBS_len(&alps) != 0) {
+    uint16_t id;
+    if (!CBS_get_u16(&alps, &id)) {
+      return false;
+    }
+    // For now, we only support one ALPS code point, so we only need to extract
+    // a boolean signal from the feature list.
+    if (id == TLSEXT_TYPE_application_settings) {
+      supports_alps = true;
+      break;
+    }
+  }
+  if (!supports_alps) {
+    ssl->config->alps_configs.clear();
+  }
+
   return true;
 }
 
@@ -632,7 +667,7 @@
     case handback_after_session_resumption:
       // The write keys are installed after server Finished, but the client
       // keys must wait for ChangeCipherSpec.
-      if (!tls1_configure_aead(ssl, evp_aead_seal, &key_block, session->cipher,
+      if (!tls1_configure_aead(ssl, evp_aead_seal, &key_block, session,
                                write_iv)) {
         return false;
       }
@@ -642,9 +677,9 @@
       break;
     case handback_after_handshake:
       // The handshake is complete, so both keys are installed.
-      if (!tls1_configure_aead(ssl, evp_aead_seal, &key_block, session->cipher,
+      if (!tls1_configure_aead(ssl, evp_aead_seal, &key_block, session,
                                write_iv) ||
-          !tls1_configure_aead(ssl, evp_aead_open, &key_block, session->cipher,
+          !tls1_configure_aead(ssl, evp_aead_open, &key_block, session,
                                read_iv)) {
         return false;
       }
diff --git a/deps/boringssl/src/ssl/handshake.cc b/deps/boringssl/src/ssl/handshake.cc
index 27fc3af..b38f96a 100644
--- a/deps/boringssl/src/ssl/handshake.cc
+++ b/deps/boringssl/src/ssl/handshake.cc
@@ -126,6 +126,8 @@
 
 SSL_HANDSHAKE::SSL_HANDSHAKE(SSL *ssl_arg)
     : ssl(ssl_arg),
+      ech_present(false),
+      ech_is_inner_present(false),
       scts_requested(false),
       needs_psk_binder(false),
       handshake_finalized(false),
@@ -235,13 +237,13 @@
   return hs->transcript.Update(msg.raw);
 }
 
-int ssl_parse_extensions(const CBS *cbs, uint8_t *out_alert,
-                         const SSL_EXTENSION_TYPE *ext_types,
-                         size_t num_ext_types, int ignore_unknown) {
+bool ssl_parse_extensions(const CBS *cbs, uint8_t *out_alert,
+                          Span<const SSL_EXTENSION_TYPE> ext_types,
+                          bool ignore_unknown) {
   // Reset everything.
-  for (size_t i = 0; i < num_ext_types; i++) {
-    *ext_types[i].out_present = 0;
-    CBS_init(ext_types[i].out_data, NULL, 0);
+  for (const SSL_EXTENSION_TYPE &ext_type : ext_types) {
+    *ext_type.out_present = false;
+    CBS_init(ext_type.out_data, nullptr, 0);
   }
 
   CBS copy = *cbs;
@@ -252,38 +254,38 @@
         !CBS_get_u16_length_prefixed(&copy, &data)) {
       OPENSSL_PUT_ERROR(SSL, SSL_R_PARSE_TLSEXT);
       *out_alert = SSL_AD_DECODE_ERROR;
-      return 0;
+      return false;
     }
 
-    const SSL_EXTENSION_TYPE *ext_type = NULL;
-    for (size_t i = 0; i < num_ext_types; i++) {
-      if (type == ext_types[i].type) {
-        ext_type = &ext_types[i];
+    const SSL_EXTENSION_TYPE *found = nullptr;
+    for (const SSL_EXTENSION_TYPE &ext_type : ext_types) {
+      if (type == ext_type.type) {
+        found = &ext_type;
         break;
       }
     }
 
-    if (ext_type == NULL) {
+    if (found == nullptr) {
       if (ignore_unknown) {
         continue;
       }
       OPENSSL_PUT_ERROR(SSL, SSL_R_UNEXPECTED_EXTENSION);
       *out_alert = SSL_AD_UNSUPPORTED_EXTENSION;
-      return 0;
+      return false;
     }
 
     // Duplicate ext_types are forbidden.
-    if (*ext_type->out_present) {
+    if (*found->out_present) {
       OPENSSL_PUT_ERROR(SSL, SSL_R_DUPLICATE_EXTENSION);
       *out_alert = SSL_AD_ILLEGAL_PARAMETER;
-      return 0;
+      return false;
     }
 
-    *ext_type->out_present = 1;
-    *ext_type->out_data = data;
+    *found->out_present = 1;
+    *found->out_data = data;
   }
 
-  return 1;
+  return true;
 }
 
 enum ssl_verify_result_t ssl_verify_peer_cert(SSL_HANDSHAKE *hs) {
@@ -441,7 +443,7 @@
   uint8_t finished[EVP_MAX_MD_SIZE];
   size_t finished_len;
   if (!hs->transcript.GetFinishedMAC(finished, &finished_len,
-                                     SSL_get_session(ssl), !ssl->server) ||
+                                     ssl_handshake_session(hs), !ssl->server) ||
       !ssl_hash_message(hs, msg)) {
     return ssl_hs_error;
   }
@@ -484,7 +486,7 @@
 
 bool ssl_send_finished(SSL_HANDSHAKE *hs) {
   SSL *const ssl = hs->ssl;
-  const SSL_SESSION *session = SSL_get_session(ssl);
+  const SSL_SESSION *session = ssl_handshake_session(hs);
 
   uint8_t finished[EVP_MAX_MD_SIZE];
   size_t finished_len;
@@ -494,9 +496,8 @@
   }
 
   // Log the master secret, if logging is enabled.
-  if (!ssl_log_secret(
-          ssl, "CLIENT_RANDOM",
-          MakeConstSpan(session->master_key, session->master_key_length))) {
+  if (!ssl_log_secret(ssl, "CLIENT_RANDOM",
+                      MakeConstSpan(session->secret, session->secret_length))) {
     return 0;
   }
 
@@ -541,6 +542,13 @@
   return true;
 }
 
+const SSL_SESSION *ssl_handshake_session(const SSL_HANDSHAKE *hs) {
+  if (hs->new_session) {
+    return hs->new_session.get();
+  }
+  return hs->ssl->session.get();
+}
+
 int ssl_run_handshake(SSL_HANDSHAKE *hs, bool *out_early_return) {
   SSL *const ssl = hs->ssl;
   for (;;) {
diff --git a/deps/boringssl/src/ssl/handshake_client.cc b/deps/boringssl/src/ssl/handshake_client.cc
index 670e476..59ef6ec 100644
--- a/deps/boringssl/src/ssl/handshake_client.cc
+++ b/deps/boringssl/src/ssl/handshake_client.cc
@@ -259,7 +259,7 @@
         continue;
       }
       any_enabled = true;
-      if (!CBB_add_u16(&child, ssl_cipher_get_value(cipher))) {
+      if (!CBB_add_u16(&child, SSL_CIPHER_get_protocol_id(cipher))) {
         return false;
       }
     }
@@ -358,8 +358,7 @@
 
   uint8_t alert = SSL_AD_DECODE_ERROR;
   if (!ssl_parse_extensions(&extensions, &alert, ext_types,
-                            OPENSSL_ARRAY_SIZE(ext_types),
-                            1 /* ignore unknown */)) {
+                            /*ignore_unknown=*/true)) {
     ssl_send_alert(ssl, SSL3_AL_FATAL, alert);
     return false;
   }
@@ -460,8 +459,8 @@
   }
 
   if (!tls13_init_early_key_schedule(
-          hs, MakeConstSpan(ssl->session->master_key,
-                            ssl->session->master_key_length)) ||
+          hs,
+          MakeConstSpan(ssl->session->secret, ssl->session->secret_length)) ||
       !tls13_derive_early_secret(hs)) {
     return ssl_hs_error;
   }
@@ -637,12 +636,9 @@
             .subspan(SSL3_RANDOM_SIZE - sizeof(kTLS13DowngradeRandom));
     if (suffix == kTLS12DowngradeRandom || suffix == kTLS13DowngradeRandom ||
         suffix == kJDK11DowngradeRandom) {
-      ssl->s3->tls13_downgrade = true;
-      if (!hs->config->ignore_tls13_downgrade) {
-        OPENSSL_PUT_ERROR(SSL, SSL_R_TLS13_DOWNGRADE);
-        ssl_send_alert(ssl, SSL3_AL_FATAL, SSL_AD_ILLEGAL_PARAMETER);
-        return ssl_hs_error;
-      }
+      OPENSSL_PUT_ERROR(SSL, SSL_R_TLS13_DOWNGRADE);
+      ssl_send_alert(ssl, SSL3_AL_FATAL, SSL_AD_ILLEGAL_PARAMETER);
+      return ssl_hs_error;
     }
   }
 
@@ -1411,9 +1407,9 @@
     return ssl_hs_error;
   }
 
-  hs->new_session->master_key_length =
-      tls1_generate_master_secret(hs, hs->new_session->master_key, pms);
-  if (hs->new_session->master_key_length == 0) {
+  hs->new_session->secret_length =
+      tls1_generate_master_secret(hs, hs->new_session->secret, pms);
+  if (hs->new_session->secret_length == 0) {
     return ssl_hs_error;
   }
   hs->new_session->extended_master_secret = hs->extended_master_secret;
@@ -1551,18 +1547,12 @@
   //
   // Now that TLS 1.3 exists, we would like to avoid similar attacks between
   // TLS 1.2 and TLS 1.3, but there are too many TLS 1.2 deployments to
-  // sacrifice False Start on them. TLS 1.3's downgrade signal fixes this, but
-  // |SSL_CTX_set_ignore_tls13_downgrade| can disable it due to compatibility
-  // issues.
-  //
-  // |SSL_CTX_set_ignore_tls13_downgrade| normally still retains Finished-based
-  // downgrade protection, but False Start bypasses that. Thus, we disable False
-  // Start based on the TLS 1.3 downgrade signal, even if otherwise unenforced.
+  // sacrifice False Start on them. Instead, we rely on the ServerHello.random
+  // downgrade signal, which we unconditionally enforce.
   if (SSL_is_dtls(ssl) ||
       SSL_version(ssl) != TLS1_2_VERSION ||
       hs->new_cipher->algorithm_mkey != SSL_kECDHE ||
-      hs->new_cipher->algorithm_mac != SSL_AEAD ||
-      ssl->s3->tls13_downgrade) {
+      hs->new_cipher->algorithm_mac != SSL_AEAD) {
     return false;
   }
 
diff --git a/deps/boringssl/src/ssl/handshake_server.cc b/deps/boringssl/src/ssl/handshake_server.cc
index 2489428..bc0a0d1 100644
--- a/deps/boringssl/src/ssl/handshake_server.cc
+++ b/deps/boringssl/src/ssl/handshake_server.cc
@@ -644,6 +644,12 @@
     return ssl_hs_error;
   }
 
+  if (hs->ech_present && hs->ech_is_inner_present) {
+    OPENSSL_PUT_ERROR(SSL, SSL_R_UNEXPECTED_EXTENSION);
+    ssl_send_alert(ssl, SSL3_AL_FATAL, SSL_AD_ILLEGAL_PARAMETER);
+    return ssl_hs_error;
+  }
+
   hs->state = state12_select_certificate;
   return ssl_hs_ok;
 }
@@ -908,7 +914,7 @@
       !CBB_add_u8_length_prefixed(&body, &session_id) ||
       !CBB_add_bytes(&session_id, session->session_id,
                      session->session_id_length) ||
-      !CBB_add_u16(&body, ssl_cipher_get_value(hs->new_cipher)) ||
+      !CBB_add_u16(&body, SSL_CIPHER_get_protocol_id(hs->new_cipher)) ||
       !CBB_add_u8(&body, 0 /* no compression */) ||
       !ssl_add_serverhello_tlsext(hs, &body) ||
       !ssl_add_message_cbb(ssl, cbb.get())) {
@@ -1402,14 +1408,13 @@
   }
 
   // Compute the master secret.
-  hs->new_session->master_key_length = tls1_generate_master_secret(
-      hs, hs->new_session->master_key, premaster_secret);
-  if (hs->new_session->master_key_length == 0) {
+  hs->new_session->secret_length = tls1_generate_master_secret(
+      hs, hs->new_session->secret, premaster_secret);
+  if (hs->new_session->secret_length == 0) {
     return ssl_hs_error;
   }
   hs->new_session->extended_master_secret = hs->extended_master_secret;
-  CONSTTIME_DECLASSIFY(hs->new_session->master_key,
-                       hs->new_session->master_key_length);
+  CONSTTIME_DECLASSIFY(hs->new_session->secret, hs->new_session->secret_length);
 
   ssl->method->next_message(ssl);
   hs->state = state12_read_client_certificate_verify;
diff --git a/deps/boringssl/src/ssl/internal.h b/deps/boringssl/src/ssl/internal.h
index 182b02f..b3b7540 100644
--- a/deps/boringssl/src/ssl/internal.h
+++ b/deps/boringssl/src/ssl/internal.h
@@ -345,6 +345,9 @@
     if (new_size > size_) {
       abort();
     }
+    for (size_t i = new_size; i < size_; i++) {
+      data_[i].~T();
+    }
     size_ = new_size;
   }
 
@@ -386,6 +389,11 @@
   T *end() { return array_.data() + size_; }
   const T *cend() const { return array_.data() + size_; }
 
+  void clear() {
+    size_ = 0;
+    array_.Reset();
+  }
+
   // Push adds |elem| at the end of the internal array, growing if necessary. It
   // returns false when allocation fails.
   bool Push(T elem) {
@@ -631,9 +639,6 @@
 bool ssl_create_cipher_list(UniquePtr<SSLCipherPreferenceList> *out_cipher_list,
                             const char *rule_str, bool strict);
 
-// ssl_cipher_get_value returns the cipher suite id of |cipher|.
-uint16_t ssl_cipher_get_value(const SSL_CIPHER *cipher);
-
 // ssl_cipher_auth_mask_for_key returns the mask of cipher |algorithm_auth|
 // values suitable for use with |key| in TLS 1.2 and below.
 uint32_t ssl_cipher_auth_mask_for_key(const EVP_PKEY *key);
@@ -1414,6 +1419,15 @@
                              const SSLMessage &msg, CBS *binders);
 
 
+// Encrypted Client Hello.
+
+// tls13_ech_accept_confirmation computes the server's ECH acceptance signal,
+// writing it to |out|. It returns true on success, and false on failure.
+bool tls13_ech_accept_confirmation(
+    SSL_HANDSHAKE *hs, bssl::Span<uint8_t> out,
+    bssl::Span<const uint8_t> server_hello_ech_conf);
+
+
 // Handshake functions.
 
 enum ssl_hs_wait_t {
@@ -1482,6 +1496,7 @@
   state13_send_half_rtt_ticket,
   state13_read_second_client_flight,
   state13_process_end_of_early_data,
+  state13_read_client_encrypted_extensions,
   state13_read_client_certificate,
   state13_read_client_certificate_verify,
   state13_read_channel_id,
@@ -1632,6 +1647,10 @@
   // cookie is the value of the cookie received from the server, if any.
   Array<uint8_t> cookie;
 
+  // ech_grease contains the bytes of the GREASE ECH extension that was sent in
+  // the first ClientHello.
+  Array<uint8_t> ech_grease;
+
   // key_share_bytes is the value of the previously sent KeyShare extension by
   // the client in TLS 1.3.
   Array<uint8_t> key_share_bytes;
@@ -1650,6 +1669,10 @@
   // advertise this extension to the client.
   Array<uint16_t> peer_supported_group_list;
 
+  // peer_delegated_credential_sigalgs are the signature algorithms the peer
+  // supports with delegated credentials.
+  Array<uint16_t> peer_delegated_credential_sigalgs;
+
   // peer_key is the peer's ECDH key for a TLS 1.2 client.
   Array<uint8_t> peer_key;
 
@@ -1706,6 +1729,14 @@
   // key_block is the record-layer key block for TLS 1.2 and earlier.
   Array<uint8_t> key_block;
 
+  // ech_present, on the server, indicates whether the ClientHello contained an
+  // encrypted_client_hello extension.
+  bool ech_present : 1;
+
+  // ech_is_inner_present, on the server, indicates whether the ClientHello
+  // contained an ech_is_inner extension.
+  bool ech_is_inner_present : 1;
+
   // scts_requested is true if the SCT extension is in the ClientHello.
   bool scts_requested : 1;
 
@@ -1872,7 +1903,8 @@
 bool ssl_ext_key_share_parse_clienthello(SSL_HANDSHAKE *hs, bool *out_found,
                                          Array<uint8_t> *out_secret,
                                          uint8_t *out_alert, CBS *contents);
-bool ssl_ext_key_share_add_serverhello(SSL_HANDSHAKE *hs, CBB *out);
+bool ssl_ext_key_share_add_serverhello(SSL_HANDSHAKE *hs, CBB *out,
+                                       bool dry_run);
 
 bool ssl_ext_pre_shared_key_parse_serverhello(SSL_HANDSHAKE *hs,
                                               uint8_t *out_alert,
@@ -1914,6 +1946,12 @@
 bool ssl_negotiate_alpn(SSL_HANDSHAKE *hs, uint8_t *out_alert,
                         const SSL_CLIENT_HELLO *client_hello);
 
+// ssl_negotiate_alps negotiates the ALPS extension, if applicable. It returns
+// true on successful negotiation or if nothing was negotiated. It returns false
+// and sets |*out_alert| to an alert on error.
+bool ssl_negotiate_alps(SSL_HANDSHAKE *hs, uint8_t *out_alert,
+                        const SSL_CLIENT_HELLO *client_hello);
+
 struct SSL_EXTENSION_TYPE {
   uint16_t type;
   bool *out_present;
@@ -1922,12 +1960,12 @@
 
 // ssl_parse_extensions parses a TLS extensions block out of |cbs| and advances
 // it. It writes the parsed extensions to pointers denoted by |ext_types|. On
-// success, it fills in the |out_present| and |out_data| fields and returns one.
-// Otherwise, it sets |*out_alert| to an alert to send and returns zero. Unknown
-// extensions are rejected unless |ignore_unknown| is 1.
-int ssl_parse_extensions(const CBS *cbs, uint8_t *out_alert,
-                         const SSL_EXTENSION_TYPE *ext_types,
-                         size_t num_ext_types, int ignore_unknown);
+// success, it fills in the |out_present| and |out_data| fields and returns
+// true. Otherwise, it sets |*out_alert| to an alert to send and returns false.
+// Unknown extensions are rejected unless |ignore_unknown| is true.
+bool ssl_parse_extensions(const CBS *cbs, uint8_t *out_alert,
+                          Span<const SSL_EXTENSION_TYPE> ext_types,
+                          bool ignore_unknown);
 
 // ssl_verify_peer_cert verifies the peer certificate for |hs|.
 enum ssl_verify_result_t ssl_verify_peer_cert(SSL_HANDSHAKE *hs);
@@ -1940,6 +1978,11 @@
 bool ssl_send_finished(SSL_HANDSHAKE *hs);
 bool ssl_output_cert_chain(SSL_HANDSHAKE *hs);
 
+// ssl_handshake_session returns the |SSL_SESSION| corresponding to the current
+// handshake. Note, in TLS 1.2 resumptions, this session is immutable.
+const SSL_SESSION *ssl_handshake_session(const SSL_HANDSHAKE *hs);
+
+
 // SSLKEYLOGFILE functions.
 
 // ssl_log_secret logs |secret| with label |label|, if logging is enabled for
@@ -2392,9 +2435,6 @@
   // early_data_accepted is true if early data was accepted by the server.
   bool early_data_accepted : 1;
 
-  // tls13_downgrade is whether the TLS 1.3 anti-downgrade logic fired.
-  bool tls13_downgrade : 1;
-
   // token_binding_negotiated is set if Token Binding was negotiated.
   bool token_binding_negotiated : 1;
 
@@ -2615,6 +2655,12 @@
   unsigned timeout_duration_ms = 0;
 };
 
+// An ALPSConfig is a pair of ALPN protocol and settings value to use with ALPS.
+struct ALPSConfig {
+  Array<uint8_t> protocol;
+  Array<uint8_t> settings;
+};
+
 // SSL_CONFIG contains configuration bits that can be shed after the handshake
 // completes.  Objects of this type are not shared; they are unique to a
 // particular |SSL|.
@@ -2681,6 +2727,10 @@
   // format.
   Array<uint8_t> alpn_client_proto_list;
 
+  // alps_configs contains the list of supported protocols to use with ALPS,
+  // along with their corresponding ALPS values.
+  GrowableArray<ALPSConfig> alps_configs;
+
   // Contains a list of supported Token Binding key parameters.
   Array<uint8_t> token_binding_params;
 
@@ -2701,6 +2751,10 @@
   // verify_mode is a bitmask of |SSL_VERIFY_*| values.
   uint8_t verify_mode = SSL_VERIFY_NONE;
 
+  // ech_grease_enabled controls whether ECH GREASE may be sent in the
+  // ClientHello.
+  bool ech_grease_enabled : 1;
+
   // Enable signed certificate time stamps. Currently client only.
   bool signed_cert_timestamps_enabled : 1;
 
@@ -2733,13 +2787,13 @@
   // should be freed after the handshake completes.
   bool shed_handshake_config : 1;
 
-  // ignore_tls13_downgrade is whether the connection should continue when the
-  // server random signals a downgrade.
-  bool ignore_tls13_downgrade : 1;
-
   // jdk11_workaround is whether to disable TLS 1.3 for JDK 11 clients, as a
   // workaround for https://bugs.openjdk.java.net/browse/JDK-8211806.
   bool jdk11_workaround : 1;
+
+  // QUIC drafts up to and including 32 used a different TLS extension
+  // codepoint to convey QUIC's transport parameters.
+  bool quic_use_legacy_codepoint : 1;
 };
 
 // From RFC 8446, used in determining PSK modes.
@@ -2921,13 +2975,14 @@
 // determined by |direction|) using the keys generated by the TLS KDF. The
 // |key_block_cache| argument is used to store the generated key block, if
 // empty. Otherwise it's assumed that the key block is already contained within
-// it. Returns one on success or zero on error.
-int tls1_configure_aead(SSL *ssl, evp_aead_direction_t direction,
-                        Array<uint8_t> *key_block_cache,
-                        const SSL_CIPHER *cipher,
-                        Span<const uint8_t> iv_override);
+// it. It returns true on success or false on error.
+bool tls1_configure_aead(SSL *ssl, evp_aead_direction_t direction,
+                         Array<uint8_t> *key_block_cache,
+                         const SSL_SESSION *session,
+                         Span<const uint8_t> iv_override);
 
-int tls1_change_cipher_state(SSL_HANDSHAKE *hs, evp_aead_direction_t direction);
+bool tls1_change_cipher_state(SSL_HANDSHAKE *hs,
+                              evp_aead_direction_t direction);
 int tls1_generate_master_secret(SSL_HANDSHAKE *hs, uint8_t *out,
                                 Span<const uint8_t> premaster);
 
@@ -3321,10 +3376,6 @@
   // |SSL_MODE_ENABLE_FALSE_START| is enabled) is allowed without ALPN.
   bool false_start_allowed_without_alpn : 1;
 
-  // ignore_tls13_downgrade is whether a connection should continue when the
-  // server random signals a downgrade.
-  bool ignore_tls13_downgrade:1;
-
   // handoff indicates that a server should stop after receiving the
   // ClientHello and pause the handshake in such a way that |SSL_get_error|
   // returns |SSL_ERROR_HANDOFF|.
@@ -3445,10 +3496,12 @@
   // the peer, or zero if not applicable or unknown.
   uint16_t peer_signature_algorithm = 0;
 
-  // master_key, in TLS 1.2 and below, is the master secret associated with the
-  // session. In TLS 1.3 and up, it is the resumption secret.
-  int master_key_length = 0;
-  uint8_t master_key[SSL_MAX_MASTER_KEY_LENGTH] = {0};
+  // secret, in TLS 1.2 and below, is the master secret associated with the
+  // session. In TLS 1.3 and up, it is the resumption PSK for sessions handed to
+  // the caller, but it stores the resumption secret when stored on |SSL|
+  // objects.
+  int secret_length = 0;
+  uint8_t secret[SSL_MAX_MASTER_KEY_LENGTH] = {0};
 
   // session_id - valid?
   unsigned session_id_length = 0;
@@ -3533,9 +3586,18 @@
 
   // early_alpn is the ALPN protocol from the initial handshake. This is only
   // stored for TLS 1.3 and above in order to enforce ALPN matching for 0-RTT
-  // resumptions.
+  // resumptions. For the current connection's ALPN protocol, see
+  // |alpn_selected| on |SSL3_STATE|.
   bssl::Array<uint8_t> early_alpn;
 
+  // local_application_settings, if |has_application_settings| is true, is the
+  // local ALPS value for this connection.
+  bssl::Array<uint8_t> local_application_settings;
+
+  // peer_application_settings, if |has_application_settings| is true, is the
+  // peer ALPS value for this connection.
+  bssl::Array<uint8_t> peer_application_settings;
+
   // extended_master_secret is whether the master secret in this session was
   // generated using EMS and thus isn't vulnerable to the Triple Handshake
   // attack.
@@ -3556,6 +3618,10 @@
   // is_quic indicates whether this session was created using QUIC.
   bool is_quic : 1;
 
+  // has_application_settings indicates whether ALPS was negotiated in this
+  // session.
+  bool has_application_settings : 1;
+
   // quic_early_data_context is used to determine whether early data must be
   // rejected when performing a QUIC handshake.
   bssl::Array<uint8_t> quic_early_data_context;
diff --git a/deps/boringssl/src/ssl/s3_lib.cc b/deps/boringssl/src/ssl/s3_lib.cc
index ee35604..3e12492 100644
--- a/deps/boringssl/src/ssl/s3_lib.cc
+++ b/deps/boringssl/src/ssl/s3_lib.cc
@@ -177,7 +177,6 @@
       key_update_pending(false),
       wpend_pending(false),
       early_data_accepted(false),
-      tls13_downgrade(false),
       token_binding_negotiated(false),
       alert_dispatch(false),
       renegotiate_pending(false),
diff --git a/deps/boringssl/src/ssl/ssl_asn1.cc b/deps/boringssl/src/ssl/ssl_asn1.cc
index e6274f1..27bc310 100644
--- a/deps/boringssl/src/ssl/ssl_asn1.cc
+++ b/deps/boringssl/src/ssl/ssl_asn1.cc
@@ -105,7 +105,7 @@
 //     sslVersion                  INTEGER,      -- protocol version number
 //     cipher                      OCTET STRING, -- two bytes long
 //     sessionID                   OCTET STRING,
-//     masterKey                   OCTET STRING,
+//     secret                      OCTET STRING,
 //     time                    [1] INTEGER, -- seconds since UNIX epoch
 //     timeout                 [2] INTEGER, -- in seconds
 //     peer                    [3] Certificate OPTIONAL,
@@ -131,6 +131,10 @@
 //     earlyALPN               [26] OCTET STRING OPTIONAL,
 //     isQuic                  [27] BOOLEAN OPTIONAL,
 //     quicEarlyDataHash       [28] OCTET STRING OPTIONAL,
+//     localALPS               [29] OCTET STRING OPTIONAL,
+//     peerALPS                [30] OCTET STRING OPTIONAL,
+//     -- Either both or none of localALPS and peerALPS must be present. If both
+//     -- are present, earlyALPN must be present and non-empty.
 // }
 //
 // Note: historically this serialization has included other optional
@@ -194,6 +198,10 @@
     CBS_ASN1_CONSTRUCTED | CBS_ASN1_CONTEXT_SPECIFIC | 27;
 static const unsigned kQuicEarlyDataContextTag =
     CBS_ASN1_CONSTRUCTED | CBS_ASN1_CONTEXT_SPECIFIC | 28;
+static const unsigned kLocalALPSTag =
+    CBS_ASN1_CONSTRUCTED | CBS_ASN1_CONTEXT_SPECIFIC | 29;
+static const unsigned kPeerALPSTag =
+    CBS_ASN1_CONSTRUCTED | CBS_ASN1_CONTEXT_SPECIFIC | 30;
 
 static int SSL_SESSION_to_bytes_full(const SSL_SESSION *in, CBB *cbb,
                                      int for_ticket) {
@@ -210,8 +218,7 @@
       // The session ID is irrelevant for a session ticket.
       !CBB_add_asn1_octet_string(&session, in->session_id,
                                  for_ticket ? 0 : in->session_id_length) ||
-      !CBB_add_asn1_octet_string(&session, in->master_key,
-                                 in->master_key_length) ||
+      !CBB_add_asn1_octet_string(&session, in->secret, in->secret_length) ||
       !CBB_add_asn1(&session, &child, kTimeTag) ||
       !CBB_add_asn1_uint64(&child, in->time) ||
       !CBB_add_asn1(&session, &child, kTimeoutTag) ||
@@ -411,6 +418,19 @@
     }
   }
 
+  if (in->has_application_settings) {
+    if (!CBB_add_asn1(&session, &child, kLocalALPSTag) ||
+        !CBB_add_asn1_octet_string(&child,
+                                   in->local_application_settings.data(),
+                                   in->local_application_settings.size()) ||
+        !CBB_add_asn1(&session, &child, kPeerALPSTag) ||
+        !CBB_add_asn1_octet_string(&child, in->peer_application_settings.data(),
+                                   in->peer_application_settings.size())) {
+      OPENSSL_PUT_ERROR(SSL, ERR_R_MALLOC_FAILURE);
+      return 0;
+    }
+  }
+
   return CBB_flush(cbb);
 }
 
@@ -572,18 +592,18 @@
     return nullptr;
   }
 
-  CBS session_id, master_key;
+  CBS session_id, secret;
   if (!CBS_get_asn1(&session, &session_id, CBS_ASN1_OCTETSTRING) ||
       CBS_len(&session_id) > SSL3_MAX_SSL_SESSION_ID_LENGTH ||
-      !CBS_get_asn1(&session, &master_key, CBS_ASN1_OCTETSTRING) ||
-      CBS_len(&master_key) > SSL_MAX_MASTER_KEY_LENGTH) {
+      !CBS_get_asn1(&session, &secret, CBS_ASN1_OCTETSTRING) ||
+      CBS_len(&secret) > SSL_MAX_MASTER_KEY_LENGTH) {
     OPENSSL_PUT_ERROR(SSL, SSL_R_INVALID_SSL_SESSION);
     return nullptr;
   }
   OPENSSL_memcpy(ret->session_id, CBS_data(&session_id), CBS_len(&session_id));
   ret->session_id_length = CBS_len(&session_id);
-  OPENSSL_memcpy(ret->master_key, CBS_data(&master_key), CBS_len(&master_key));
-  ret->master_key_length = CBS_len(&master_key);
+  OPENSSL_memcpy(ret->secret, CBS_data(&secret), CBS_len(&secret));
+  ret->secret_length = CBS_len(&secret);
 
   CBS child;
   uint64_t timeout;
@@ -753,13 +773,33 @@
       !CBS_get_optional_asn1_bool(&session, &is_quic, kIsQuicTag,
                                   /*default_value=*/false) ||
       !SSL_SESSION_parse_octet_string(&session, &ret->quic_early_data_context,
-                                      kQuicEarlyDataContextTag) ||
+                                      kQuicEarlyDataContextTag)) {
+    OPENSSL_PUT_ERROR(SSL, SSL_R_INVALID_SSL_SESSION);
+    return nullptr;
+  }
+
+  CBS settings;
+  int has_local_alps, has_peer_alps;
+  if (!CBS_get_optional_asn1_octet_string(&session, &settings, &has_local_alps,
+                                          kLocalALPSTag) ||
+      !ret->local_application_settings.CopyFrom(settings) ||
+      !CBS_get_optional_asn1_octet_string(&session, &settings, &has_peer_alps,
+                                          kPeerALPSTag) ||
+      !ret->peer_application_settings.CopyFrom(settings) ||
       CBS_len(&session) != 0) {
     OPENSSL_PUT_ERROR(SSL, SSL_R_INVALID_SSL_SESSION);
     return nullptr;
   }
   ret->is_quic = is_quic;
 
+  // The two ALPS values and ALPN must be consistent.
+  if (has_local_alps != has_peer_alps ||
+      (has_local_alps && ret->early_alpn.empty())) {
+    OPENSSL_PUT_ERROR(SSL, SSL_R_INVALID_SSL_SESSION);
+    return nullptr;
+  }
+  ret->has_application_settings = has_local_alps;
+
   if (!x509_method->session_cache_objects(ret.get())) {
     OPENSSL_PUT_ERROR(SSL, SSL_R_INVALID_SSL_SESSION);
     return nullptr;
diff --git a/deps/boringssl/src/ssl/ssl_cert.cc b/deps/boringssl/src/ssl/ssl_cert.cc
index 6bac3a9..c64303a 100644
--- a/deps/boringssl/src/ssl/ssl_cert.cc
+++ b/deps/boringssl/src/ssl/ssl_cert.cc
@@ -821,16 +821,13 @@
   }
 
   // Check that the DC signature algorithm is supported by the peer.
-  Span<const uint16_t> peer_sigalgs = tls1_get_peer_verify_algorithms(hs);
-  bool sigalg_found = false;
+  Span<const uint16_t> peer_sigalgs = hs->peer_delegated_credential_sigalgs;
   for (uint16_t peer_sigalg : peer_sigalgs) {
     if (dc->expected_cert_verify_algorithm == peer_sigalg) {
-      sigalg_found = true;
-      break;
+      return true;
     }
   }
-
-  return sigalg_found;
+  return false;
 }
 
 bool ssl_signing_with_dc(const SSL_HANDSHAKE *hs) {
diff --git a/deps/boringssl/src/ssl/ssl_cipher.cc b/deps/boringssl/src/ssl/ssl_cipher.cc
index c421292..4f5049c 100644
--- a/deps/boringssl/src/ssl/ssl_cipher.cc
+++ b/deps/boringssl/src/ssl/ssl_cipher.cc
@@ -1279,14 +1279,6 @@
   return true;
 }
 
-uint16_t ssl_cipher_get_value(const SSL_CIPHER *cipher) {
-  uint32_t id = cipher->id;
-  // All OpenSSL cipher IDs are prefaced with 0x03. Historically this referred
-  // to SSLv2 vs SSLv3.
-  assert((id & 0xff000000) == 0x03000000);
-  return id & 0xffff;
-}
-
 uint32_t ssl_cipher_auth_mask_for_key(const EVP_PKEY *key) {
   switch (EVP_PKEY_id(key)) {
     case EVP_PKEY_RSA:
@@ -1376,10 +1368,17 @@
 
 uint32_t SSL_CIPHER_get_id(const SSL_CIPHER *cipher) { return cipher->id; }
 
-uint16_t SSL_CIPHER_get_value(const SSL_CIPHER *cipher) {
+uint16_t SSL_CIPHER_get_protocol_id(const SSL_CIPHER *cipher) {
+  // All OpenSSL cipher IDs are prefaced with 0x03. Historically this referred
+  // to SSLv2 vs SSLv3.
+  assert((cipher->id & 0xff000000) == 0x03000000);
   return static_cast<uint16_t>(cipher->id);
 }
 
+uint16_t SSL_CIPHER_get_value(const SSL_CIPHER *cipher) {
+  return SSL_CIPHER_get_protocol_id(cipher);
+}
+
 int SSL_CIPHER_is_aead(const SSL_CIPHER *cipher) {
   return (cipher->algorithm_mac & SSL_AEAD) != 0;
 }
diff --git a/deps/boringssl/src/ssl/ssl_lib.cc b/deps/boringssl/src/ssl/ssl_lib.cc
index 90c265e..7c7bbbf 100644
--- a/deps/boringssl/src/ssl/ssl_lib.cc
+++ b/deps/boringssl/src/ssl/ssl_lib.cc
@@ -565,7 +565,6 @@
       grease_enabled(false),
       allow_unknown_alpn_protos(false),
       false_start_allowed_without_alpn(false),
-      ignore_tls13_downgrade(false),
       handoff(false),
       enable_early_data(false) {
   CRYPTO_MUTEX_init(&lock);
@@ -711,7 +710,6 @@
       ctx->signed_cert_timestamps_enabled;
   ssl->config->ocsp_stapling_enabled = ctx->ocsp_stapling_enabled;
   ssl->config->handoff = ctx->handoff;
-  ssl->config->ignore_tls13_downgrade = ctx->ignore_tls13_downgrade;
   ssl->quic_method = ctx->quic_method;
 
   if (!ssl->method->ssl_new(ssl.get()) ||
@@ -724,6 +722,7 @@
 
 SSL_CONFIG::SSL_CONFIG(SSL *ssl_arg)
     : ssl(ssl_arg),
+      ech_grease_enabled(false),
       signed_cert_timestamps_enabled(false),
       ocsp_stapling_enabled(false),
       channel_id_enabled(false),
@@ -731,8 +730,8 @@
       retain_only_sha256_of_client_certs(false),
       handoff(false),
       shed_handshake_config(false),
-      ignore_tls13_downgrade(false),
-      jdk11_workaround(false) {
+      jdk11_workaround(false),
+      quic_use_legacy_codepoint(true) {
   assert(ssl);
 }
 
@@ -1294,6 +1293,43 @@
   return ssl->s3->early_data_reason;
 }
 
+const char *SSL_early_data_reason_string(enum ssl_early_data_reason_t reason) {
+  switch (reason) {
+    case ssl_early_data_unknown:
+      return "unknown";
+    case ssl_early_data_disabled:
+      return "disabled";
+    case ssl_early_data_accepted:
+      return "accepted";
+    case ssl_early_data_protocol_version:
+      return "protocol_version";
+    case ssl_early_data_peer_declined:
+      return "peer_declined";
+    case ssl_early_data_no_session_offered:
+      return "no_session_offered";
+    case ssl_early_data_session_not_resumed:
+      return "session_not_resumed";
+    case ssl_early_data_unsupported_for_session:
+      return "unsupported_for_session";
+    case ssl_early_data_hello_retry_request:
+      return "hello_retry_request";
+    case ssl_early_data_alpn_mismatch:
+      return "alpn_mismatch";
+    case ssl_early_data_channel_id:
+      return "channel_id";
+    case ssl_early_data_token_binding:
+      return "token_binding";
+    case ssl_early_data_ticket_age_skew:
+      return "ticket_age_skew";
+    case ssl_early_data_quic_parameter_mismatch:
+      return "quic_parameter_mismatch";
+    case ssl_early_data_alps_mismatch:
+      return "alps_mismatch";
+  }
+
+  return nullptr;
+}
+
 static int bio_retry_reason_to_error(int reason) {
   switch (reason) {
     case BIO_RR_CONNECT:
@@ -1432,6 +1468,13 @@
   }
 }
 
+void SSL_set_enable_ech_grease(SSL *ssl, int enable) {
+  if (!ssl->config) {
+    return;
+  }
+  ssl->config->ech_grease_enabled = !!enable;
+}
+
 uint32_t SSL_CTX_set_options(SSL_CTX *ctx, uint32_t options) {
   ctx->options |= options;
   return ctx->options;
@@ -2241,6 +2284,36 @@
   ctx->allow_unknown_alpn_protos = !!enabled;
 }
 
+int SSL_add_application_settings(SSL *ssl, const uint8_t *proto,
+                                 size_t proto_len, const uint8_t *settings,
+                                 size_t settings_len) {
+  if (!ssl->config) {
+    return 0;
+  }
+  ALPSConfig config;
+  if (!config.protocol.CopyFrom(MakeConstSpan(proto, proto_len)) ||
+      !config.settings.CopyFrom(MakeConstSpan(settings, settings_len)) ||
+      !ssl->config->alps_configs.Push(std::move(config))) {
+    return 0;
+  }
+  return 1;
+}
+
+void SSL_get0_peer_application_settings(const SSL *ssl,
+                                        const uint8_t **out_data,
+                                        size_t *out_len) {
+  const SSL_SESSION *session = SSL_get_session(ssl);
+  Span<const uint8_t> settings =
+      session ? session->peer_application_settings : Span<const uint8_t>();
+  *out_data = settings.data();
+  *out_len = settings.size();
+}
+
+int SSL_has_application_settings(const SSL *ssl) {
+  const SSL_SESSION *session = SSL_get_session(ssl);
+  return session && session->has_application_settings;
+}
+
 int SSL_CTX_add_cert_compression_alg(SSL_CTX *ctx, uint16_t alg_id,
                                      ssl_cert_compression_func_t compress,
                                      ssl_cert_decompression_func_t decompress) {
@@ -2360,6 +2433,16 @@
   return sigalgs.size();
 }
 
+size_t SSL_get0_peer_delegation_algorithms(const SSL *ssl,
+                                           const uint16_t **out_sigalgs){
+  Span<const uint16_t> sigalgs;
+  if (ssl->s3->hs != nullptr) {
+    sigalgs = ssl->s3->hs->peer_delegated_credential_sigalgs;
+  }
+  *out_sigalgs = sigalgs.data();
+  return sigalgs.size();
+}
+
 EVP_PKEY *SSL_get_privatekey(const SSL *ssl) {
   if (!ssl->config) {
     assert(ssl->config);
@@ -2852,22 +2935,15 @@
   ctx->false_start_allowed_without_alpn = !!allowed;
 }
 
-int SSL_is_tls13_downgrade(const SSL *ssl) { return ssl->s3->tls13_downgrade; }
+int SSL_is_tls13_downgrade(const SSL *ssl) { return 0; }
 
 int SSL_used_hello_retry_request(const SSL *ssl) {
   return ssl->s3->used_hello_retry_request;
 }
 
-void SSL_CTX_set_ignore_tls13_downgrade(SSL_CTX *ctx, int ignore) {
-  ctx->ignore_tls13_downgrade = !!ignore;
-}
+void SSL_CTX_set_ignore_tls13_downgrade(SSL_CTX *ctx, int ignore) {}
 
-void SSL_set_ignore_tls13_downgrade(SSL *ssl, int ignore) {
-  if (!ssl->config) {
-    return;
-  }
-  ssl->config->ignore_tls13_downgrade = !!ignore;
-}
+void SSL_set_ignore_tls13_downgrade(SSL *ssl, int ignore) {}
 
 void SSL_set_shed_handshake_config(SSL *ssl, int enable) {
   if (!ssl->config) {
@@ -2883,6 +2959,13 @@
   ssl->config->jdk11_workaround = !!enable;
 }
 
+void SSL_set_quic_use_legacy_codepoint(SSL *ssl, int use_legacy) {
+  if (!ssl->config) {
+    return;
+  }
+  ssl->config->quic_use_legacy_codepoint = !!use_legacy;
+}
+
 int SSL_clear(SSL *ssl) {
   if (!ssl->config) {
     return 0;  // SSL_clear may not be used after shedding config.
diff --git a/deps/boringssl/src/ssl/ssl_session.cc b/deps/boringssl/src/ssl/ssl_session.cc
index 4c6d045..91b2fff 100644
--- a/deps/boringssl/src/ssl/ssl_session.cc
+++ b/deps/boringssl/src/ssl/ssl_session.cc
@@ -202,9 +202,8 @@
   OPENSSL_memcpy(new_session->sid_ctx, session->sid_ctx, session->sid_ctx_length);
 
   // Copy the key material.
-  new_session->master_key_length = session->master_key_length;
-  OPENSSL_memcpy(new_session->master_key, session->master_key,
-         session->master_key_length);
+  new_session->secret_length = session->secret_length;
+  OPENSSL_memcpy(new_session->secret, session->secret, session->secret_length);
   new_session->cipher = session->cipher;
 
   // Copy authentication state.
@@ -264,13 +263,15 @@
     new_session->ticket_age_add = session->ticket_age_add;
     new_session->ticket_max_early_data = session->ticket_max_early_data;
     new_session->extended_master_secret = session->extended_master_secret;
+    new_session->has_application_settings = session->has_application_settings;
 
-    if (!new_session->early_alpn.CopyFrom(session->early_alpn)) {
-      return nullptr;
-    }
-
-    if (!new_session->quic_early_data_context.CopyFrom(
-            session->quic_early_data_context)) {
+    if (!new_session->early_alpn.CopyFrom(session->early_alpn) ||
+        !new_session->quic_early_data_context.CopyFrom(
+            session->quic_early_data_context) ||
+        !new_session->local_application_settings.CopyFrom(
+            session->local_application_settings) ||
+        !new_session->peer_application_settings.CopyFrom(
+            session->peer_application_settings)) {
       return nullptr;
     }
   }
@@ -364,12 +365,6 @@
   session->is_server = is_server;
   session->ssl_version = ssl->version;
   session->is_quic = ssl->quic_method != nullptr;
-  if (is_server && ssl->enable_early_data && session->is_quic) {
-    if (!session->quic_early_data_context.CopyFrom(
-            hs->config->quic_early_data_context)) {
-      return 0;
-    }
-  }
 
   // Fill in the time from the |SSL_CTX|'s clock.
   struct OPENSSL_timeval now;
@@ -870,7 +865,8 @@
       not_resumable(false),
       ticket_age_add_valid(false),
       is_server(false),
-      is_quic(false) {
+      is_quic(false),
+      has_application_settings(false) {
   CRYPTO_new_ex_data(&ex_data);
   time = ::time(nullptr);
 }
@@ -966,14 +962,14 @@
 
 size_t SSL_SESSION_get_master_key(const SSL_SESSION *session, uint8_t *out,
                                   size_t max_out) {
-  // TODO(davidben): Fix master_key_length's type and remove these casts.
+  // TODO(davidben): Fix secret_length's type and remove these casts.
   if (max_out == 0) {
-    return (size_t)session->master_key_length;
+    return (size_t)session->secret_length;
   }
-  if (max_out > (size_t)session->master_key_length) {
-    max_out = (size_t)session->master_key_length;
+  if (max_out > (size_t)session->secret_length) {
+    max_out = (size_t)session->secret_length;
   }
-  OPENSSL_memcpy(out, session->master_key, max_out);
+  OPENSSL_memcpy(out, session->secret, max_out);
   return max_out;
 }
 
diff --git a/deps/boringssl/src/ssl/ssl_test.cc b/deps/boringssl/src/ssl/ssl_test.cc
index 3c2d852..637f4d5 100644
--- a/deps/boringssl/src/ssl/ssl_test.cc
+++ b/deps/boringssl/src/ssl/ssl_test.cc
@@ -754,7 +754,7 @@
     "NusdVm/K2rxzY5Dkf3s+Iss9B+1fOHSc4wNQTqGvmO5h8oQ/Eg==";
 
 // kBadSessionExtraField is a custom serialized SSL_SESSION generated by replacing
-// the final (optional) element of |kCustomSession| with tag number 30.
+// the final (optional) element of |kCustomSession| with tag number 99.
 static const char kBadSessionExtraField[] =
     "MIIBdgIBAQICAwMEAsAvBCAG5Q1ndq4Yfmbeo1zwLkNRKmCXGdNgWvGT3cskV0yQ"
     "kAQwJlrlzkAWBOWiLj/jJ76D7l+UXoizP2KI2C7I2FccqMmIfFmmkUy32nIJ0mZH"
@@ -763,7 +763,7 @@
     "LwjcDTpsuh3qXEaZ992r1N38VDcyS6P7I6HBYN9BsNHM362zZnY27GpTw+Kwd751"
     "CLoXFPoaMOe57dbBpXoro6Pd3BTbf/Tzr88K06yEOTDKPNj3+inbMaVigtK4PLyP"
     "q+Topyzvx9USFgRvyuoxn0Hgb+R0A3j6SLRuyOdAi4gv7Y5oliynrSIEIAYGBgYG"
-    "BgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGrgMEAQevAwQBBL4DBAEF";
+    "BgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGBgYGrgMEAQevAwQBBOMDBAEF";
 
 // kBadSessionVersion is a custom serialized SSL_SESSION generated by replacing
 // the version of |kCustomSession| with 2.
@@ -5070,6 +5070,22 @@
     SSL_CTX_set_max_proto_version(server_ctx_.get(), TLS1_3_VERSION);
     SSL_CTX_set_min_proto_version(client_ctx_.get(), TLS1_3_VERSION);
     SSL_CTX_set_max_proto_version(client_ctx_.get(), TLS1_3_VERSION);
+
+    static const uint8_t kALPNProtos[] = {0x03, 'f', 'o', 'o'};
+    ASSERT_EQ(SSL_CTX_set_alpn_protos(client_ctx_.get(), kALPNProtos,
+                                      sizeof(kALPNProtos)),
+              0);
+    SSL_CTX_set_alpn_select_cb(
+        server_ctx_.get(),
+        [](SSL *ssl, const uint8_t **out, uint8_t *out_len, const uint8_t *in,
+           unsigned in_len, void *arg) -> int {
+          return SSL_select_next_proto(
+                     const_cast<uint8_t **>(out), out_len, in, in_len,
+                     kALPNProtos, sizeof(kALPNProtos)) == OPENSSL_NPN_NEGOTIATED
+                     ? SSL_TLSEXT_ERR_OK
+                     : SSL_TLSEXT_ERR_NOACK;
+        },
+        nullptr);
   }
 
   static MockQUICTransport *TransportFromSSL(const SSL *ssl) {
@@ -5418,6 +5434,46 @@
   EXPECT_FALSE(SSL_in_early_data(server_.get()));
   EXPECT_TRUE(SSL_early_data_accepted(client_.get()));
   EXPECT_TRUE(SSL_early_data_accepted(server_.get()));
+
+  // Finish handling post-handshake messages after the first 0-RTT resumption.
+  EXPECT_TRUE(ProvideHandshakeData(client_.get()));
+  EXPECT_TRUE(SSL_process_quic_post_handshake(client_.get()));
+
+  // Perform a second 0-RTT resumption attempt, and confirm that 0-RTT is
+  // accepted again.
+  ASSERT_TRUE(CreateClientAndServer());
+  SSL_set_session(client_.get(), g_last_session.get());
+
+  // The client handshake should return immediately into the early data state.
+  ASSERT_EQ(SSL_do_handshake(client_.get()), 1);
+  EXPECT_TRUE(SSL_in_early_data(client_.get()));
+  // The transport should have keys for sending 0-RTT data.
+  EXPECT_TRUE(transport_->client()->HasWriteSecret(ssl_encryption_early_data));
+
+  // The server will consume the ClientHello and also enter the early data
+  // state.
+  ASSERT_TRUE(ProvideHandshakeData(server_.get()));
+  ASSERT_EQ(SSL_do_handshake(server_.get()), 1);
+  EXPECT_TRUE(SSL_in_early_data(server_.get()));
+  EXPECT_TRUE(transport_->SecretsMatch(ssl_encryption_early_data));
+  // At this point, the server has half-RTT write keys, but it cannot access
+  // 1-RTT read keys until client Finished.
+  EXPECT_TRUE(transport_->server()->HasWriteSecret(ssl_encryption_application));
+  EXPECT_FALSE(transport_->server()->HasReadSecret(ssl_encryption_application));
+
+  // Finish up the client and server handshakes.
+  ASSERT_TRUE(CompleteHandshakesForQUIC());
+
+  // Both sides can now exchange 1-RTT data.
+  ExpectHandshakeSuccess();
+  EXPECT_TRUE(SSL_session_reused(client_.get()));
+  EXPECT_TRUE(SSL_session_reused(server_.get()));
+  EXPECT_FALSE(SSL_in_early_data(client_.get()));
+  EXPECT_FALSE(SSL_in_early_data(server_.get()));
+  EXPECT_TRUE(SSL_early_data_accepted(client_.get()));
+  EXPECT_TRUE(SSL_early_data_accepted(server_.get()));
+  EXPECT_EQ(SSL_get_early_data_reason(client_.get()), ssl_early_data_accepted);
+  EXPECT_EQ(SSL_get_early_data_reason(server_.get()), ssl_early_data_accepted);
 }
 
 TEST_F(QUICMethodTest, ZeroRTTRejectMismatchedParameters) {
@@ -6017,6 +6073,107 @@
   ASSERT_TRUE(RunQUICHandshakesAndExpectError(ExpectedError::kClientError));
 }
 
+TEST_F(QUICMethodTest, QuicLegacyCodepointEnabled) {
+  const SSL_QUIC_METHOD quic_method = DefaultQUICMethod();
+  ASSERT_TRUE(SSL_CTX_set_quic_method(client_ctx_.get(), &quic_method));
+  ASSERT_TRUE(SSL_CTX_set_quic_method(server_ctx_.get(), &quic_method));
+
+  ASSERT_TRUE(CreateClientAndServer());
+  uint8_t kClientParams[] = {1, 2, 3, 4};
+  uint8_t kServerParams[] = {5, 6, 7};
+  SSL_set_quic_use_legacy_codepoint(client_.get(), 1);
+  SSL_set_quic_use_legacy_codepoint(server_.get(), 1);
+  ASSERT_TRUE(SSL_set_quic_transport_params(client_.get(), kClientParams,
+                                            sizeof(kClientParams)));
+  ASSERT_TRUE(SSL_set_quic_transport_params(server_.get(), kServerParams,
+                                            sizeof(kServerParams)));
+
+  ASSERT_TRUE(CompleteHandshakesForQUIC());
+  ExpectReceivedTransportParamsEqual(client_.get(), kServerParams);
+  ExpectReceivedTransportParamsEqual(server_.get(), kClientParams);
+}
+
+TEST_F(QUICMethodTest, QuicLegacyCodepointDisabled) {
+  const SSL_QUIC_METHOD quic_method = DefaultQUICMethod();
+  ASSERT_TRUE(SSL_CTX_set_quic_method(client_ctx_.get(), &quic_method));
+  ASSERT_TRUE(SSL_CTX_set_quic_method(server_ctx_.get(), &quic_method));
+
+  ASSERT_TRUE(CreateClientAndServer());
+  uint8_t kClientParams[] = {1, 2, 3, 4};
+  uint8_t kServerParams[] = {5, 6, 7};
+  SSL_set_quic_use_legacy_codepoint(client_.get(), 0);
+  SSL_set_quic_use_legacy_codepoint(server_.get(), 0);
+  ASSERT_TRUE(SSL_set_quic_transport_params(client_.get(), kClientParams,
+                                            sizeof(kClientParams)));
+  ASSERT_TRUE(SSL_set_quic_transport_params(server_.get(), kServerParams,
+                                            sizeof(kServerParams)));
+
+  ASSERT_TRUE(CompleteHandshakesForQUIC());
+  ExpectReceivedTransportParamsEqual(client_.get(), kServerParams);
+  ExpectReceivedTransportParamsEqual(server_.get(), kClientParams);
+}
+
+TEST_F(QUICMethodTest, QuicLegacyCodepointClientOnly) {
+  const SSL_QUIC_METHOD quic_method = DefaultQUICMethod();
+  ASSERT_TRUE(SSL_CTX_set_quic_method(client_ctx_.get(), &quic_method));
+  ASSERT_TRUE(SSL_CTX_set_quic_method(server_ctx_.get(), &quic_method));
+
+  ASSERT_TRUE(CreateClientAndServer());
+  uint8_t kClientParams[] = {1, 2, 3, 4};
+  uint8_t kServerParams[] = {5, 6, 7};
+  SSL_set_quic_use_legacy_codepoint(client_.get(), 1);
+  SSL_set_quic_use_legacy_codepoint(server_.get(), 0);
+  ASSERT_TRUE(SSL_set_quic_transport_params(client_.get(), kClientParams,
+                                            sizeof(kClientParams)));
+  ASSERT_TRUE(SSL_set_quic_transport_params(server_.get(), kServerParams,
+                                            sizeof(kServerParams)));
+
+  ASSERT_TRUE(RunQUICHandshakesAndExpectError(ExpectedError::kServerError));
+}
+
+TEST_F(QUICMethodTest, QuicLegacyCodepointServerOnly) {
+  const SSL_QUIC_METHOD quic_method = DefaultQUICMethod();
+  ASSERT_TRUE(SSL_CTX_set_quic_method(client_ctx_.get(), &quic_method));
+  ASSERT_TRUE(SSL_CTX_set_quic_method(server_ctx_.get(), &quic_method));
+
+  ASSERT_TRUE(CreateClientAndServer());
+  uint8_t kClientParams[] = {1, 2, 3, 4};
+  uint8_t kServerParams[] = {5, 6, 7};
+  SSL_set_quic_use_legacy_codepoint(client_.get(), 0);
+  SSL_set_quic_use_legacy_codepoint(server_.get(), 1);
+  ASSERT_TRUE(SSL_set_quic_transport_params(client_.get(), kClientParams,
+                                            sizeof(kClientParams)));
+  ASSERT_TRUE(SSL_set_quic_transport_params(server_.get(), kServerParams,
+                                            sizeof(kServerParams)));
+
+  ASSERT_TRUE(RunQUICHandshakesAndExpectError(ExpectedError::kServerError));
+}
+
+// Test that the default QUIC code point is consistent with
+// |TLSEXT_TYPE_quic_transport_parameters|. This test ensures we remember to
+// update the two values together.
+TEST_F(QUICMethodTest, QuicCodePointDefault) {
+  const SSL_QUIC_METHOD quic_method = DefaultQUICMethod();
+  ASSERT_TRUE(SSL_CTX_set_quic_method(client_ctx_.get(), &quic_method));
+  ASSERT_TRUE(SSL_CTX_set_quic_method(server_ctx_.get(), &quic_method));
+  SSL_CTX_set_select_certificate_cb(
+      server_ctx_.get(),
+      [](const SSL_CLIENT_HELLO *client_hello) -> ssl_select_cert_result_t {
+        const uint8_t *data;
+        size_t len;
+        if (!SSL_early_callback_ctx_extension_get(
+                client_hello, TLSEXT_TYPE_quic_transport_parameters, &data,
+                &len)) {
+          ADD_FAILURE() << "Could not find quic_transport_parameters extension";
+          return ssl_select_cert_error;
+        }
+        return ssl_select_cert_success;
+      });
+
+  ASSERT_TRUE(CreateClientAndServer());
+  ASSERT_TRUE(CompleteHandshakesForQUIC());
+}
+
 extern "C" {
 int BORINGSSL_enum_c_type_test(void);
 }
@@ -6473,5 +6630,64 @@
                                                     sizeof(kTicket)));
 }
 
+TEST(SSLTest, BIO) {
+  bssl::UniquePtr<SSL_CTX> client_ctx(SSL_CTX_new(TLS_method()));
+  bssl::UniquePtr<SSL_CTX> server_ctx(SSL_CTX_new(TLS_method()));
+  ASSERT_TRUE(client_ctx);
+  ASSERT_TRUE(server_ctx);
+
+  bssl::UniquePtr<X509> cert = GetTestCertificate();
+  bssl::UniquePtr<EVP_PKEY> key = GetTestKey();
+  ASSERT_TRUE(cert);
+  ASSERT_TRUE(key);
+  ASSERT_TRUE(SSL_CTX_use_certificate(server_ctx.get(), cert.get()));
+  ASSERT_TRUE(SSL_CTX_use_PrivateKey(server_ctx.get(), key.get()));
+
+  for (bool take_ownership : {true, false}) {
+    // For simplicity, get the handshake out of the way first.
+    bssl::UniquePtr<SSL> client, server;
+    ASSERT_TRUE(ConnectClientAndServer(&client, &server, client_ctx.get(),
+                                       server_ctx.get()));
+
+    // Wrap |client| in an SSL BIO.
+    bssl::UniquePtr<BIO> client_bio(BIO_new(BIO_f_ssl()));
+    ASSERT_TRUE(client_bio);
+    ASSERT_EQ(1, BIO_set_ssl(client_bio.get(), client.get(), take_ownership));
+    if (take_ownership) {
+      client.release();
+    }
+
+    // Flushing the BIO should not crash.
+    EXPECT_EQ(1, BIO_flush(client_bio.get()));
+
+    // Exchange some data.
+    EXPECT_EQ(5, BIO_write(client_bio.get(), "hello", 5));
+    uint8_t buf[5];
+    ASSERT_EQ(5, SSL_read(server.get(), buf, sizeof(buf)));
+    EXPECT_EQ(Bytes("hello"), Bytes(buf));
+
+    EXPECT_EQ(5, SSL_write(server.get(), "world", 5));
+    ASSERT_EQ(5, BIO_read(client_bio.get(), buf, sizeof(buf)));
+    EXPECT_EQ(Bytes("world"), Bytes(buf));
+
+    // |BIO_should_read| should work.
+    EXPECT_EQ(-1, BIO_read(client_bio.get(), buf, sizeof(buf)));
+    EXPECT_TRUE(BIO_should_read(client_bio.get()));
+
+    // Writing data should eventually exceed the buffer size and fail, reporting
+    // |BIO_should_write|.
+    int ret;
+    for (int i = 0; i < 1024; i++) {
+      std::vector<uint8_t> buffer(1024);
+      ret = BIO_write(client_bio.get(), buffer.data(), buffer.size());
+      if (ret <= 0) {
+        break;
+      }
+    }
+    EXPECT_EQ(-1, ret);
+    EXPECT_TRUE(BIO_should_write(client_bio.get()));
+  }
+}
+
 }  // namespace
 BSSL_NAMESPACE_END
diff --git a/deps/boringssl/src/ssl/ssl_transcript.cc b/deps/boringssl/src/ssl/ssl_transcript.cc
index c1cef2b..0bc13b9 100644
--- a/deps/boringssl/src/ssl/ssl_transcript.cc
+++ b/deps/boringssl/src/ssl/ssl_transcript.cc
@@ -265,8 +265,8 @@
 
   static const size_t kFinishedLen = 12;
   if (!tls1_prf(Digest(), MakeSpan(out, kFinishedLen),
-                MakeConstSpan(session->master_key, session->master_key_length),
-                label, MakeConstSpan(digest, digest_len), {})) {
+                MakeConstSpan(session->secret, session->secret_length), label,
+                MakeConstSpan(digest, digest_len), {})) {
     return false;
   }
 
diff --git a/deps/boringssl/src/ssl/t1_enc.cc b/deps/boringssl/src/ssl/t1_enc.cc
index 73b6544..c8db457 100644
--- a/deps/boringssl/src/ssl/t1_enc.cc
+++ b/deps/boringssl/src/ssl/t1_enc.cc
@@ -189,21 +189,35 @@
   return true;
 }
 
-int tls1_configure_aead(SSL *ssl, evp_aead_direction_t direction,
-                        Array<uint8_t> *key_block_cache,
-                        const SSL_CIPHER *cipher,
-                        Span<const uint8_t> iv_override) {
+static bool generate_key_block(const SSL *ssl, Span<uint8_t> out,
+                               const SSL_SESSION *session) {
+  auto secret = MakeConstSpan(session->secret, session->secret_length);
+  static const char kLabel[] = "key expansion";
+  auto label = MakeConstSpan(kLabel, sizeof(kLabel) - 1);
+
+  const EVP_MD *digest = ssl_session_get_digest(session);
+  // Note this function assumes that |session|'s key material corresponds to
+  // |ssl->s3->client_random| and |ssl->s3->server_random|.
+  return tls1_prf(digest, out, secret, label, ssl->s3->server_random,
+                  ssl->s3->client_random);
+}
+
+bool tls1_configure_aead(SSL *ssl, evp_aead_direction_t direction,
+                         Array<uint8_t> *key_block_cache,
+                         const SSL_SESSION *session,
+                         Span<const uint8_t> iv_override) {
   size_t mac_secret_len, key_len, iv_len;
-  if (!get_key_block_lengths(ssl, &mac_secret_len, &key_len, &iv_len, cipher)) {
-    return 0;
+  if (!get_key_block_lengths(ssl, &mac_secret_len, &key_len, &iv_len,
+                             session->cipher)) {
+    return false;
   }
 
   // Ensure that |key_block_cache| is set up.
   const size_t key_block_size = 2 * (mac_secret_len + key_len + iv_len);
   if (key_block_cache->empty()) {
     if (!key_block_cache->Init(key_block_size) ||
-        !SSL_generate_key_block(ssl, key_block_cache->data(), key_block_size)) {
-      return 0;
+        !generate_key_block(ssl, MakeSpan(*key_block_cache), session)) {
+      return false;
     }
   }
   assert(key_block_cache->size() == key_block_size);
@@ -224,15 +238,16 @@
 
   if (!iv_override.empty()) {
     if (iv_override.size() != iv_len) {
-      return 0;
+      return false;
     }
     iv = iv_override;
   }
 
-  UniquePtr<SSLAEADContext> aead_ctx = SSLAEADContext::Create(
-      direction, ssl->version, SSL_is_dtls(ssl), cipher, key, mac_secret, iv);
+  UniquePtr<SSLAEADContext> aead_ctx =
+      SSLAEADContext::Create(direction, ssl->version, SSL_is_dtls(ssl),
+                             session->cipher, key, mac_secret, iv);
   if (!aead_ctx) {
-    return 0;
+    return false;
   }
 
   if (direction == evp_aead_open) {
@@ -246,10 +261,10 @@
                                       /*secret_for_quic=*/{});
 }
 
-int tls1_change_cipher_state(SSL_HANDSHAKE *hs,
-                             evp_aead_direction_t direction) {
+bool tls1_change_cipher_state(SSL_HANDSHAKE *hs,
+                              evp_aead_direction_t direction) {
   return tls1_configure_aead(hs->ssl, direction, &hs->key_block,
-                             hs->new_cipher, {});
+                             ssl_handshake_session(hs), {});
 }
 
 int tls1_generate_master_secret(SSL_HANDSHAKE *hs, uint8_t *out,
@@ -286,6 +301,11 @@
 using namespace bssl;
 
 size_t SSL_get_key_block_len(const SSL *ssl) {
+  // See |SSL_generate_key_block|.
+  if (SSL_in_init(ssl)) {
+    return 0;
+  }
+
   size_t mac_secret_len, key_len, fixed_iv_len;
   if (!get_key_block_lengths(ssl, &mac_secret_len, &key_len, &fixed_iv_len,
                              SSL_get_current_cipher(ssl))) {
@@ -297,16 +317,16 @@
 }
 
 int SSL_generate_key_block(const SSL *ssl, uint8_t *out, size_t out_len) {
-  const SSL_SESSION *session = SSL_get_session(ssl);
-  auto out_span = MakeSpan(out, out_len);
-  auto master_key =
-      MakeConstSpan(session->master_key, session->master_key_length);
-  static const char kLabel[] = "key expansion";
-  auto label = MakeConstSpan(kLabel, sizeof(kLabel) - 1);
+  // Which cipher state to use is ambiguous during a handshake. In particular,
+  // there are points where read and write states are from different epochs.
+  // During a handshake, before ChangeCipherSpec, the encryption states may not
+  // match |ssl->s3->client_random| and |ssl->s3->server_random|.
+  if (SSL_in_init(ssl)) {
+    OPENSSL_PUT_ERROR(SSL, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
+    return 0;
+  }
 
-  const EVP_MD *digest = ssl_session_get_digest(session);
-  return tls1_prf(digest, out_span, master_key, label, ssl->s3->server_random,
-                  ssl->s3->client_random);
+  return generate_key_block(ssl, MakeSpan(out, out_len), SSL_get_session(ssl));
 }
 
 int SSL_export_keying_material(SSL *ssl, uint8_t *out, size_t out_len,
@@ -358,8 +378,7 @@
 
   const SSL_SESSION *session = SSL_get_session(ssl);
   const EVP_MD *digest = ssl_session_get_digest(session);
-  return tls1_prf(
-      digest, MakeSpan(out, out_len),
-      MakeConstSpan(session->master_key, session->master_key_length),
-      MakeConstSpan(label, label_len), seed, {});
+  return tls1_prf(digest, MakeSpan(out, out_len),
+                  MakeConstSpan(session->secret, session->secret_length),
+                  MakeConstSpan(label, label_len), seed, {});
 }
diff --git a/deps/boringssl/src/ssl/t1_lib.cc b/deps/boringssl/src/ssl/t1_lib.cc
index dad0fcf..342c170 100644
--- a/deps/boringssl/src/ssl/t1_lib.cc
+++ b/deps/boringssl/src/ssl/t1_lib.cc
@@ -113,10 +113,13 @@
 #include <stdlib.h>
 #include <string.h>
 
+#include <algorithm>
 #include <utility>
 
+#include <openssl/aead.h>
 #include <openssl/bytestring.h>
 #include <openssl/chacha.h>
+#include <openssl/curve25519.h>
 #include <openssl/digest.h>
 #include <openssl/err.h>
 #include <openssl/evp.h>
@@ -125,13 +128,15 @@
 #include <openssl/nid.h>
 #include <openssl/rand.h>
 
-#include "internal.h"
+#include "../crypto/hpke/internal.h"
 #include "../crypto/internal.h"
+#include "internal.h"
 
 
 BSSL_NAMESPACE_BEGIN
 
 static bool ssl_check_clienthello_tlsext(SSL_HANDSHAKE *hs);
+static bool ssl_check_serverhello_tlsext(SSL_HANDSHAKE *hs);
 
 static int compare_uint16_t(const void *p1, const void *p2) {
   uint16_t u1 = *((const uint16_t *)p1);
@@ -512,7 +517,7 @@
 };
 
 static bool forbid_parse_serverhello(SSL_HANDSHAKE *hs, uint8_t *out_alert,
-                                    CBS *contents) {
+                                     CBS *contents) {
   if (contents != NULL) {
     // Servers MUST NOT send this extension.
     *out_alert = SSL_AD_UNSUPPORTED_EXTENSION;
@@ -524,7 +529,7 @@
 }
 
 static bool ignore_parse_clienthello(SSL_HANDSHAKE *hs, uint8_t *out_alert,
-                                    CBS *contents) {
+                                     CBS *contents) {
   // This extension from the client is handled elsewhere.
   return true;
 }
@@ -586,6 +591,182 @@
 }
 
 
+// Encrypted Client Hello (ECH)
+//
+// https://tools.ietf.org/html/draft-ietf-tls-esni-09
+
+// random_size returns a random value between |min| and |max|, inclusive.
+static size_t random_size(size_t min, size_t max) {
+  assert(min < max);
+  size_t value;
+  RAND_bytes(reinterpret_cast<uint8_t *>(&value), sizeof(value));
+  return value % (max - min + 1) + min;
+}
+
+static bool ext_ech_add_clienthello_grease(SSL_HANDSHAKE *hs, CBB *out) {
+  // If we are responding to the server's HelloRetryRequest, we repeat the bytes
+  // of the first ECH GREASE extension.
+  if (hs->ssl->s3->used_hello_retry_request) {
+    CBB ech_body;
+    if (!CBB_add_u16(out, TLSEXT_TYPE_encrypted_client_hello) ||
+        !CBB_add_u16_length_prefixed(out, &ech_body) ||
+        !CBB_add_bytes(&ech_body, hs->ech_grease.data(),
+                       hs->ech_grease.size()) ||
+        !CBB_flush(out)) {
+      return false;
+    }
+    return true;
+  }
+
+  constexpr uint16_t kdf_id = EVP_HPKE_HKDF_SHA256;
+  const uint16_t aead_id = EVP_has_aes_hardware()
+                               ? EVP_HPKE_AEAD_AES_GCM_128
+                               : EVP_HPKE_AEAD_CHACHA20POLY1305;
+  const EVP_AEAD *aead = EVP_HPKE_get_aead(aead_id);
+  assert(aead != nullptr);
+
+  uint8_t ech_config_id[8];
+  RAND_bytes(ech_config_id, sizeof(ech_config_id));
+
+  uint8_t ech_enc[X25519_PUBLIC_VALUE_LEN];
+  uint8_t private_key_unused[X25519_PRIVATE_KEY_LEN];
+  X25519_keypair(ech_enc, private_key_unused);
+
+  // To determine a plausible length for the payload, we first estimate the size
+  // of a typical EncodedClientHelloInner, with an expected use of
+  // outer_extensions. To limit the size, we only consider initial ClientHellos
+  // that do not offer resumption.
+  //
+  //   Field/Extension                           Size
+  // ---------------------------------------------------------------------
+  //   version                                      2
+  //   random                                      32
+  //   legacy_session_id                            1
+  //      - Has a U8 length prefix, but body is
+  //        always empty string in inner CH.
+  //   cipher_suites                                2  (length prefix)
+  //      - Only includes TLS 1.3 ciphers (3).      6
+  //      - Maybe also include a GREASE suite.      2
+  //   legacy_compression_methods                   2  (length prefix)
+  //      - Always has "null" compression method.   1
+  //   extensions:                                  2  (length prefix)
+  //      - encrypted_client_hello (empty).         4  (id + length prefix)
+  //      - supported_versions.                     4  (id + length prefix)
+  //        - U8 length prefix                      1
+  //        - U16 protocol version (TLS 1.3)        2
+  //      - outer_extensions.                       4  (id + length prefix)
+  //        - U8 length prefix                      1
+  //        - N extension IDs (2 bytes each):
+  //          - key_share                           2
+  //          - sigalgs                             2
+  //          - sct                                 2
+  //          - alpn                                2
+  //          - supported_groups.                   2
+  //          - status_request.                     2
+  //          - psk_key_exchange_modes.             2
+  //          - compress_certificate.               2
+  //
+  // The server_name extension has an overhead of 9 bytes, plus up to an
+  // estimated 100 bytes of hostname. Rounding up to a multiple of 32 yields a
+  // range of 96 to 192. Note that this estimate does not fully capture
+  // optional extensions like GREASE, but the rounding gives some leeway.
+
+  uint8_t payload[EVP_AEAD_MAX_OVERHEAD + 192];
+  const size_t payload_len =
+      EVP_AEAD_max_overhead(aead) + 32 * random_size(96 / 32, 192 / 32);
+  assert(payload_len <= sizeof(payload));
+  RAND_bytes(payload, payload_len);
+
+  // Inside the TLS extension contents, write a serialized ClientEncryptedCH.
+  CBB ech_body, config_id_cbb, enc_cbb, payload_cbb;
+  if (!CBB_add_u16(out, TLSEXT_TYPE_encrypted_client_hello) ||
+      !CBB_add_u16_length_prefixed(out, &ech_body) ||
+      !CBB_add_u16(&ech_body, kdf_id) ||  //
+      !CBB_add_u16(&ech_body, aead_id) ||
+      !CBB_add_u8_length_prefixed(&ech_body, &config_id_cbb) ||
+      !CBB_add_bytes(&config_id_cbb, ech_config_id, sizeof(ech_config_id)) ||
+      !CBB_add_u16_length_prefixed(&ech_body, &enc_cbb) ||
+      !CBB_add_bytes(&enc_cbb, ech_enc, OPENSSL_ARRAY_SIZE(ech_enc)) ||
+      !CBB_add_u16_length_prefixed(&ech_body, &payload_cbb) ||
+      !CBB_add_bytes(&payload_cbb, payload, payload_len) ||  //
+      !CBB_flush(&ech_body)) {
+    return false;
+  }
+  // Save the bytes of the newly-generated extension in case the server sends
+  // a HelloRetryRequest.
+  if (!hs->ech_grease.CopyFrom(
+          MakeConstSpan(CBB_data(&ech_body), CBB_len(&ech_body)))) {
+    return false;
+  }
+  return CBB_flush(out);
+}
+
+static bool ext_ech_add_clienthello(SSL_HANDSHAKE *hs, CBB *out) {
+  if (hs->max_version < TLS1_3_VERSION) {
+    return true;
+  }
+  if (hs->config->ech_grease_enabled) {
+    return ext_ech_add_clienthello_grease(hs, out);
+  }
+  // Nothing to do, since we don't yet implement the non-GREASE parts of ECH.
+  return true;
+}
+
+static bool ext_ech_parse_serverhello(SSL_HANDSHAKE *hs, uint8_t *out_alert,
+                                      CBS *contents) {
+  if (contents == NULL) {
+    return true;
+  }
+
+  // If the client only sent GREASE, we must check the extension syntactically.
+  CBS ech_configs;
+  if (!CBS_get_u16_length_prefixed(contents, &ech_configs) ||
+      CBS_len(&ech_configs) == 0 ||  //
+      CBS_len(contents) > 0) {
+    *out_alert = SSL_AD_DECODE_ERROR;
+    return false;
+  }
+  while (CBS_len(&ech_configs) > 0) {
+    // Do a top-level parse of the ECHConfig, stopping before ECHConfigContents.
+    uint16_t version;
+    CBS ech_config_contents;
+    if (!CBS_get_u16(&ech_configs, &version) ||
+        !CBS_get_u16_length_prefixed(&ech_configs, &ech_config_contents)) {
+      *out_alert = SSL_AD_DECODE_ERROR;
+      return false;
+    }
+  }
+  return true;
+}
+
+static bool ext_ech_parse_clienthello(SSL_HANDSHAKE *hs, uint8_t *out_alert,
+                                      CBS *contents) {
+  if (contents != nullptr) {
+    hs->ech_present = true;
+    return true;
+  }
+  return true;
+}
+
+static bool ext_ech_is_inner_add_clienthello(SSL_HANDSHAKE *hs, CBB *out) {
+  return true;
+}
+
+static bool ext_ech_is_inner_parse_clienthello(SSL_HANDSHAKE *hs,
+                                               uint8_t *out_alert,
+                                               CBS *contents) {
+  if (contents == nullptr) {
+    return true;
+  }
+  if (CBS_len(contents) > 0) {
+    *out_alert = SSL_AD_ILLEGAL_PARAMETER;
+    return false;
+  }
+  hs->ech_is_inner_present = true;
+  return true;
+}
+
+
 // Renegotiation indication.
 //
 // https://tools.ietf.org/html/rfc5746
@@ -1245,6 +1426,12 @@
 
 static bool ext_alpn_add_clienthello(SSL_HANDSHAKE *hs, CBB *out) {
   SSL *const ssl = hs->ssl;
+  if (hs->config->alpn_client_proto_list.empty() && ssl->quic_method) {
+    // ALPN MUST be used with QUIC.
+    OPENSSL_PUT_ERROR(SSL, SSL_R_MISSING_ALPN);
+    return false;
+  }
+
   if (hs->config->alpn_client_proto_list.empty() ||
       ssl->s3->initial_handshake_complete) {
     return true;
@@ -1267,6 +1454,12 @@
                                        CBS *contents) {
   SSL *const ssl = hs->ssl;
   if (contents == NULL) {
+    if (ssl->quic_method) {
+      // ALPN is required when QUIC is used.
+      OPENSSL_PUT_ERROR(SSL, SSL_R_MISSING_ALPN);
+      *out_alert = SSL_AD_NO_APPLICATION_PROTOCOL;
+      return false;
+    }
     return true;
   }
 
@@ -1342,6 +1535,12 @@
       !ssl_client_hello_get_extension(
           client_hello, &contents,
           TLSEXT_TYPE_application_layer_protocol_negotiation)) {
+    if (ssl->quic_method) {
+      // ALPN is required when QUIC is used.
+      OPENSSL_PUT_ERROR(SSL, SSL_R_MISSING_ALPN);
+      *out_alert = SSL_AD_NO_APPLICATION_PROTOCOL;
+      return false;
+    }
     // Ignore ALPN if not configured or no extension was supplied.
     return true;
   }
@@ -1362,7 +1561,6 @@
   CBS protocol_name_list_copy = protocol_name_list;
   while (CBS_len(&protocol_name_list_copy) > 0) {
     CBS protocol_name;
-
     if (!CBS_get_u8_length_prefixed(&protocol_name_list_copy, &protocol_name) ||
         // Empty protocol names are forbidden.
         CBS_len(&protocol_name) == 0) {
@@ -1388,6 +1586,11 @@
       *out_alert = SSL_AD_INTERNAL_ERROR;
       return false;
     }
+  } else if (ssl->quic_method) {
+    // ALPN is required when QUIC is used.
+    OPENSSL_PUT_ERROR(SSL, SSL_R_MISSING_ALPN);
+    *out_alert = SSL_AD_NO_APPLICATION_PROTOCOL;
+    return false;
   }
 
   return true;
@@ -1923,6 +2126,21 @@
 //
 // https://tools.ietf.org/html/rfc8446#section-4.2.10
 
+// ssl_get_local_application_settings looks up the configured ALPS value for
+// |protocol|. If found, it sets |*out_settings| to the value and returns true.
+// Otherwise, it returns false.
+static bool ssl_get_local_application_settings(
+    const SSL_HANDSHAKE *hs, Span<const uint8_t> *out_settings,
+    Span<const uint8_t> protocol) {
+  for (const ALPSConfig &config : hs->config->alps_configs) {
+    if (protocol == config.protocol) {
+      *out_settings = config.settings;
+      return true;
+    }
+  }
+  return false;
+}
+
 static bool ext_early_data_add_clienthello(SSL_HANDSHAKE *hs, CBB *out) {
   SSL *const ssl = hs->ssl;
   // The second ClientHello never offers early data, and we must have already
@@ -1955,13 +2173,25 @@
     return true;
   }
 
-  // In case ALPN preferences changed since this session was established, avoid
-  // reporting a confusing value in |SSL_get0_alpn_selected| and sending early
-  // data we know will be rejected.
-  if (!ssl->session->early_alpn.empty() &&
-      !ssl_is_alpn_protocol_allowed(hs, ssl->session->early_alpn)) {
-    ssl->s3->early_data_reason = ssl_early_data_alpn_mismatch;
-    return true;
+  if (!ssl->session->early_alpn.empty()) {
+    if (!ssl_is_alpn_protocol_allowed(hs, ssl->session->early_alpn)) {
+      // Avoid reporting a confusing value in |SSL_get0_alpn_selected|.
+      ssl->s3->early_data_reason = ssl_early_data_alpn_mismatch;
+      return true;
+    }
+
+    // If the previous connection negotiated ALPS, only offer 0-RTT when the
+    // local are settings are consistent with what we'd offer for this
+    // connection.
+    if (ssl->session->has_application_settings) {
+      Span<const uint8_t> settings;
+      if (!ssl_get_local_application_settings(hs, &settings,
+                                              ssl->session->early_alpn) ||
+          settings != ssl->session->local_application_settings) {
+        ssl->s3->early_data_reason = ssl_early_data_alps_mismatch;
+        return true;
+      }
+    }
   }
 
   // |early_data_reason| will be filled in later when the server responds.
@@ -2235,7 +2465,8 @@
   return true;
 }
 
-bool ssl_ext_key_share_add_serverhello(SSL_HANDSHAKE *hs, CBB *out) {
+bool ssl_ext_key_share_add_serverhello(SSL_HANDSHAKE *hs, CBB *out,
+                                       bool dry_run) {
   uint16_t group_id;
   CBB kse_bytes, public_key;
   if (!tls1_get_shared_group(hs, &group_id) ||
@@ -2248,10 +2479,10 @@
       !CBB_flush(out)) {
     return false;
   }
-
-  hs->ecdh_public_key.Reset();
-
-  hs->new_session->group_id = group_id;
+  if (!dry_run) {
+    hs->ecdh_public_key.Reset();
+    hs->new_session->group_id = group_id;
+  }
   return true;
 }
 
@@ -2545,8 +2776,8 @@
 
 // QUIC Transport Parameters
 
-static bool ext_quic_transport_params_add_clienthello(SSL_HANDSHAKE *hs,
-                                                      CBB *out) {
+static bool ext_quic_transport_params_add_clienthello_impl(
+    SSL_HANDSHAKE *hs, CBB *out, bool use_legacy_codepoint) {
   if (hs->config->quic_transport_params.empty() && !hs->ssl->quic_method) {
     return true;
   }
@@ -2558,9 +2789,18 @@
     return false;
   }
   assert(hs->min_version > TLS1_2_VERSION);
+  if (use_legacy_codepoint != hs->config->quic_use_legacy_codepoint) {
+    // Do nothing, we'll send the other codepoint.
+    return true;
+  }
+
+  uint16_t extension_type = TLSEXT_TYPE_quic_transport_parameters_standard;
+  if (hs->config->quic_use_legacy_codepoint) {
+    extension_type = TLSEXT_TYPE_quic_transport_parameters_legacy;
+  }
 
   CBB contents;
-  if (!CBB_add_u16(out, TLSEXT_TYPE_quic_transport_parameters) ||
+  if (!CBB_add_u16(out, extension_type) ||
       !CBB_add_u16_length_prefixed(out, &contents) ||
       !CBB_add_bytes(&contents, hs->config->quic_transport_params.data(),
                      hs->config->quic_transport_params.size()) ||
@@ -2570,31 +2810,57 @@
   return true;
 }
 
-static bool ext_quic_transport_params_parse_serverhello(SSL_HANDSHAKE *hs,
-                                                        uint8_t *out_alert,
-                                                        CBS *contents) {
+static bool ext_quic_transport_params_add_clienthello(SSL_HANDSHAKE *hs,
+                                                      CBB *out) {
+  return ext_quic_transport_params_add_clienthello_impl(
+      hs, out, /*use_legacy_codepoint=*/false);
+}
+
+static bool ext_quic_transport_params_add_clienthello_legacy(SSL_HANDSHAKE *hs,
+                                                             CBB *out) {
+  return ext_quic_transport_params_add_clienthello_impl(
+      hs, out, /*use_legacy_codepoint=*/true);
+}
+
+static bool ext_quic_transport_params_parse_serverhello_impl(
+    SSL_HANDSHAKE *hs, uint8_t *out_alert, CBS *contents,
+    bool used_legacy_codepoint) {
   SSL *const ssl = hs->ssl;
   if (contents == nullptr) {
+    if (used_legacy_codepoint != hs->config->quic_use_legacy_codepoint) {
+      // Silently ignore because we expect the other QUIC codepoint.
+      return true;
+    }
     if (!ssl->quic_method) {
       return true;
     }
-    assert(ssl->quic_method);
     *out_alert = SSL_AD_MISSING_EXTENSION;
     return false;
   }
-  if (!ssl->quic_method) {
-    *out_alert = SSL_AD_UNSUPPORTED_EXTENSION;
-    return false;
-  }
-  // QUIC requires TLS 1.3.
+  // The extensions parser will check for unsolicited extensions before
+  // calling the callback.
+  assert(ssl->quic_method != nullptr);
   assert(ssl_protocol_version(ssl) == TLS1_3_VERSION);
-
+  assert(used_legacy_codepoint == hs->config->quic_use_legacy_codepoint);
   return ssl->s3->peer_quic_transport_params.CopyFrom(*contents);
 }
 
-static bool ext_quic_transport_params_parse_clienthello(SSL_HANDSHAKE *hs,
+static bool ext_quic_transport_params_parse_serverhello(SSL_HANDSHAKE *hs,
                                                         uint8_t *out_alert,
                                                         CBS *contents) {
+  return ext_quic_transport_params_parse_serverhello_impl(
+      hs, out_alert, contents, /*used_legacy_codepoint=*/false);
+}
+
+static bool ext_quic_transport_params_parse_serverhello_legacy(
+    SSL_HANDSHAKE *hs, uint8_t *out_alert, CBS *contents) {
+  return ext_quic_transport_params_parse_serverhello_impl(
+      hs, out_alert, contents, /*used_legacy_codepoint=*/true);
+}
+
+static bool ext_quic_transport_params_parse_clienthello_impl(
+    SSL_HANDSHAKE *hs, uint8_t *out_alert, CBS *contents,
+    bool used_legacy_codepoint) {
   SSL *const ssl = hs->ssl;
   if (!contents) {
     if (!ssl->quic_method) {
@@ -2605,29 +2871,72 @@
       // for QUIC.
       OPENSSL_PUT_ERROR(SSL, SSL_R_QUIC_TRANSPORT_PARAMETERS_MISCONFIGURED);
       *out_alert = SSL_AD_INTERNAL_ERROR;
+      return false;
+    }
+    if (used_legacy_codepoint != hs->config->quic_use_legacy_codepoint) {
+      // Silently ignore because we expect the other QUIC codepoint.
+      return true;
     }
     *out_alert = SSL_AD_MISSING_EXTENSION;
     return false;
   }
   if (!ssl->quic_method) {
+    if (used_legacy_codepoint) {
+      // Ignore the legacy private-use codepoint because that could be sent
+      // to mean something else than QUIC transport parameters.
+      return true;
+    }
+    // Fail if we received the codepoint registered with IANA for QUIC
+    // because that is not allowed outside of QUIC.
     *out_alert = SSL_AD_UNSUPPORTED_EXTENSION;
     return false;
   }
   assert(ssl_protocol_version(ssl) == TLS1_3_VERSION);
+  if (used_legacy_codepoint != hs->config->quic_use_legacy_codepoint) {
+    // Silently ignore because we expect the other QUIC codepoint.
+    return true;
+  }
   return ssl->s3->peer_quic_transport_params.CopyFrom(*contents);
 }
 
-static bool ext_quic_transport_params_add_serverhello(SSL_HANDSHAKE *hs,
-                                                      CBB *out) {
+static bool ext_quic_transport_params_parse_clienthello(SSL_HANDSHAKE *hs,
+                                                        uint8_t *out_alert,
+                                                        CBS *contents) {
+  return ext_quic_transport_params_parse_clienthello_impl(
+      hs, out_alert, contents, /*used_legacy_codepoint=*/false);
+}
+
+static bool ext_quic_transport_params_parse_clienthello_legacy(
+    SSL_HANDSHAKE *hs, uint8_t *out_alert, CBS *contents) {
+  return ext_quic_transport_params_parse_clienthello_impl(
+      hs, out_alert, contents, /*used_legacy_codepoint=*/true);
+}
+
+static bool ext_quic_transport_params_add_serverhello_impl(
+    SSL_HANDSHAKE *hs, CBB *out, bool use_legacy_codepoint) {
+  if (hs->ssl->quic_method == nullptr && use_legacy_codepoint) {
+    // Ignore the legacy private-use codepoint because that could be sent
+    // to mean something else than QUIC transport parameters.
+    return true;
+  }
   assert(hs->ssl->quic_method != nullptr);
   if (hs->config->quic_transport_params.empty()) {
     // Transport parameters must be set when using QUIC.
     OPENSSL_PUT_ERROR(SSL, SSL_R_QUIC_TRANSPORT_PARAMETERS_MISCONFIGURED);
     return false;
   }
+  if (use_legacy_codepoint != hs->config->quic_use_legacy_codepoint) {
+    // Do nothing, we'll send the other codepoint.
+    return true;
+  }
+
+  uint16_t extension_type = TLSEXT_TYPE_quic_transport_parameters_standard;
+  if (hs->config->quic_use_legacy_codepoint) {
+    extension_type = TLSEXT_TYPE_quic_transport_parameters_legacy;
+  }
 
   CBB contents;
-  if (!CBB_add_u16(out, TLSEXT_TYPE_quic_transport_parameters) ||
+  if (!CBB_add_u16(out, extension_type) ||
       !CBB_add_u16_length_prefixed(out, &contents) ||
       !CBB_add_bytes(&contents, hs->config->quic_transport_params.data(),
                      hs->config->quic_transport_params.size()) ||
@@ -2638,6 +2947,18 @@
   return true;
 }
 
+static bool ext_quic_transport_params_add_serverhello(SSL_HANDSHAKE *hs,
+                                                      CBB *out) {
+  return ext_quic_transport_params_add_serverhello_impl(
+      hs, out, /*use_legacy_codepoint=*/false);
+}
+
+static bool ext_quic_transport_params_add_serverhello_legacy(SSL_HANDSHAKE *hs,
+                                                             CBB *out) {
+  return ext_quic_transport_params_add_serverhello_impl(
+      hs, out, /*use_legacy_codepoint=*/true);
+}
+
 // Delegated credentials.
 //
 // https://tools.ietf.org/html/draft-ietf-tls-subcerts
@@ -2650,20 +2971,22 @@
 static bool ext_delegated_credential_parse_clienthello(SSL_HANDSHAKE *hs,
                                                        uint8_t *out_alert,
                                                        CBS *contents) {
-  assert(TLSEXT_TYPE_delegated_credential == 0xff02);
-  // TODO: Check that the extension is empty.
-  //
-  // As of draft-03, the client sends an empty extension in order indicate
-  // support for delegated credentials. This could change, however, since the
-  // spec is not yet finalized. This assertion is here to remind us to enforce
-  // this check once the extension ID is assigned.
-
   if (contents == nullptr || ssl_protocol_version(hs->ssl) < TLS1_3_VERSION) {
     // Don't use delegated credentials unless we're negotiating TLS 1.3 or
     // higher.
     return true;
   }
 
+  // The contents of the extension are the signature algorithms the client will
+  // accept for a delegated credential.
+  CBS sigalg_list;
+  if (!CBS_get_u16_length_prefixed(contents, &sigalg_list) ||
+      CBS_len(&sigalg_list) == 0 ||
+      CBS_len(contents) != 0 ||
+      !parse_u16_array(&sigalg_list, &hs->peer_delegated_credential_sigalgs)) {
+    return false;
+  }
+
   hs->delegated_credential_requested = true;
   return true;
 }
@@ -2772,6 +3095,144 @@
   return true;
 }
 
+// Application-level Protocol Settings
+//
+// https://tools.ietf.org/html/draft-vvv-tls-alps-01
+
+static bool ext_alps_add_clienthello(SSL_HANDSHAKE *hs, CBB *out) {
+  SSL *const ssl = hs->ssl;
+  if (// ALPS requires TLS 1.3.
+      hs->max_version < TLS1_3_VERSION ||
+      // Do not offer ALPS without ALPN.
+      hs->config->alpn_client_proto_list.empty() ||
+      // Do not offer ALPS if not configured.
+      hs->config->alps_configs.empty() ||
+      // Do not offer ALPS on renegotiation handshakes.
+      ssl->s3->initial_handshake_complete) {
+    return true;
+  }
+
+  CBB contents, proto_list, proto;
+  if (!CBB_add_u16(out, TLSEXT_TYPE_application_settings) ||
+      !CBB_add_u16_length_prefixed(out, &contents) ||
+      !CBB_add_u16_length_prefixed(&contents, &proto_list)) {
+    return false;
+  }
+
+  for (const ALPSConfig &config : hs->config->alps_configs) {
+    if (!CBB_add_u8_length_prefixed(&proto_list, &proto) ||
+        !CBB_add_bytes(&proto, config.protocol.data(),
+                       config.protocol.size())) {
+      return false;
+    }
+  }
+
+  return CBB_flush(out);
+}
+
+static bool ext_alps_parse_serverhello(SSL_HANDSHAKE *hs, uint8_t *out_alert,
+                                       CBS *contents) {
+  SSL *const ssl = hs->ssl;
+  if (contents == nullptr) {
+    return true;
+  }
+
+  assert(!ssl->s3->initial_handshake_complete);
+  assert(!hs->config->alpn_client_proto_list.empty());
+  assert(!hs->config->alps_configs.empty());
+
+  // ALPS requires TLS 1.3.
+  if (ssl_protocol_version(ssl) < TLS1_3_VERSION) {
+    *out_alert = SSL_AD_UNSUPPORTED_EXTENSION;
+    OPENSSL_PUT_ERROR(SSL, SSL_R_UNEXPECTED_EXTENSION);
+    return false;
+  }
+
+  // Note extension callbacks may run in any order, so we defer checking
+  // consistency with ALPN to |ssl_check_serverhello_tlsext|.
+  if (!hs->new_session->peer_application_settings.CopyFrom(*contents)) {
+    *out_alert = SSL_AD_INTERNAL_ERROR;
+    return false;
+  }
+
+  hs->new_session->has_application_settings = true;
+  return true;
+}
+
+static bool ext_alps_add_serverhello(SSL_HANDSHAKE *hs, CBB *out) {
+  SSL *const ssl = hs->ssl;
+  // If early data is accepted, we omit the ALPS extension. It is implicitly
+  // carried over from the previous connection.
+  if (hs->new_session == nullptr ||
+      !hs->new_session->has_application_settings ||
+      ssl->s3->early_data_accepted) {
+    return true;
+  }
+
+  CBB contents;
+  if (!CBB_add_u16(out, TLSEXT_TYPE_application_settings) ||
+      !CBB_add_u16_length_prefixed(out, &contents) ||
+      !CBB_add_bytes(&contents,
+                     hs->new_session->local_application_settings.data(),
+                     hs->new_session->local_application_settings.size()) ||
+      !CBB_flush(out)) {
+    return false;
+  }
+
+  return true;
+}
+
+bool ssl_negotiate_alps(SSL_HANDSHAKE *hs, uint8_t *out_alert,
+                        const SSL_CLIENT_HELLO *client_hello) {
+  SSL *const ssl = hs->ssl;
+  if (ssl->s3->alpn_selected.empty()) {
+    return true;
+  }
+
+  // If we negotiate ALPN over TLS 1.3, try to negotiate ALPS.
+  CBS alps_contents;
+  Span<const uint8_t> settings;
+  if (ssl_protocol_version(ssl) >= TLS1_3_VERSION &&
+      ssl_get_local_application_settings(hs, &settings,
+                                         ssl->s3->alpn_selected) &&
+      ssl_client_hello_get_extension(client_hello, &alps_contents,
+                                     TLSEXT_TYPE_application_settings)) {
+    // Check if the client supports ALPS with the selected ALPN.
+    bool found = false;
+    CBS alps_list;
+    if (!CBS_get_u16_length_prefixed(&alps_contents, &alps_list) ||
+        CBS_len(&alps_contents) != 0 ||
+        CBS_len(&alps_list) == 0) {
+      OPENSSL_PUT_ERROR(SSL, SSL_R_DECODE_ERROR);
+      *out_alert = SSL_AD_DECODE_ERROR;
+      return false;
+    }
+    while (CBS_len(&alps_list) > 0) {
+      CBS protocol_name;
+      if (!CBS_get_u8_length_prefixed(&alps_list, &protocol_name) ||
+          // Empty protocol names are forbidden.
+          CBS_len(&protocol_name) == 0) {
+        OPENSSL_PUT_ERROR(SSL, SSL_R_DECODE_ERROR);
+        *out_alert = SSL_AD_DECODE_ERROR;
+        return false;
+      }
+      if (protocol_name == MakeConstSpan(ssl->s3->alpn_selected)) {
+        found = true;
+      }
+    }
+
+    // Negotiate ALPS if both client also supports ALPS for this protocol.
+    if (found) {
+      hs->new_session->has_application_settings = true;
+      if (!hs->new_session->local_application_settings.CopyFrom(settings)) {
+        *out_alert = SSL_AD_INTERNAL_ERROR;
+        return false;
+      }
+    }
+  }
+
+  return true;
+}
 
 // kExtensions contains all the supported extensions.
 static const struct tls_extension kExtensions[] = {
@@ -2784,6 +3245,22 @@
     ext_sni_add_serverhello,
   },
   {
+    TLSEXT_TYPE_encrypted_client_hello,
+    NULL,
+    ext_ech_add_clienthello,
+    ext_ech_parse_serverhello,
+    ext_ech_parse_clienthello,
+    dont_add_serverhello,
+  },
+  {
+    TLSEXT_TYPE_ech_is_inner,
+    NULL,
+    ext_ech_is_inner_add_clienthello,
+    forbid_parse_serverhello,
+    ext_ech_is_inner_parse_clienthello,
+    dont_add_serverhello,
+  },
+  {
     TLSEXT_TYPE_extended_master_secret,
     NULL,
     ext_ems_add_clienthello,
@@ -2922,7 +3399,7 @@
     dont_add_serverhello,
   },
   {
-    TLSEXT_TYPE_quic_transport_parameters,
+    TLSEXT_TYPE_quic_transport_parameters_standard,
     NULL,
     ext_quic_transport_params_add_clienthello,
     ext_quic_transport_params_parse_serverhello,
@@ -2930,6 +3407,14 @@
     ext_quic_transport_params_add_serverhello,
   },
   {
+    TLSEXT_TYPE_quic_transport_parameters_legacy,
+    NULL,
+    ext_quic_transport_params_add_clienthello_legacy,
+    ext_quic_transport_params_parse_serverhello_legacy,
+    ext_quic_transport_params_parse_clienthello_legacy,
+    ext_quic_transport_params_add_serverhello_legacy,
+  },
+  {
     TLSEXT_TYPE_token_binding,
     NULL,
     ext_token_binding_add_clienthello,
@@ -2953,6 +3438,15 @@
     ext_delegated_credential_parse_clienthello,
     dont_add_serverhello,
   },
+  {
+    TLSEXT_TYPE_application_settings,
+    NULL,
+    ext_alps_add_clienthello,
+    ext_alps_parse_serverhello,
+    // ALPS is negotiated late in |ssl_negotiate_alpn|.
+    ignore_parse_clienthello,
+    ext_alps_add_serverhello,
+  },
 };
 
 #define kNumExtensions (sizeof(kExtensions) / sizeof(struct tls_extension))
@@ -3047,7 +3541,7 @@
     last_was_empty = false;
   }
 
-  if (!SSL_is_dtls(ssl)) {
+  if (!SSL_is_dtls(ssl) && !ssl->quic_method) {
     size_t psk_extension_len = ext_pre_shared_key_clienthello_length(hs);
     header_len += 2 + CBB_len(&extensions) + psk_extension_len;
     size_t padding_len = 0;
@@ -3345,6 +3839,36 @@
   }
 }
 
+static bool ssl_check_serverhello_tlsext(SSL_HANDSHAKE *hs) {
+  SSL *const ssl = hs->ssl;
+  // ALPS and ALPN have a dependency between each other, so we defer checking
+  // consistency to after the callbacks run.
+  if (hs->new_session != nullptr && hs->new_session->has_application_settings) {
+    // ALPN must be negotiated.
+    if (ssl->s3->alpn_selected.empty()) {
+      OPENSSL_PUT_ERROR(SSL, SSL_R_NEGOTIATED_ALPS_WITHOUT_ALPN);
+      ssl_send_alert(ssl, SSL3_AL_FATAL, SSL_AD_ILLEGAL_PARAMETER);
+      return false;
+    }
+
+    // The negotiated protocol must be one of the ones we advertised for ALPS.
+    Span<const uint8_t> settings;
+    if (!ssl_get_local_application_settings(hs, &settings,
+                                            ssl->s3->alpn_selected)) {
+      OPENSSL_PUT_ERROR(SSL, SSL_R_INVALID_ALPN_PROTOCOL);
+      ssl_send_alert(ssl, SSL3_AL_FATAL, SSL_AD_ILLEGAL_PARAMETER);
+      return false;
+    }
+
+    if (!hs->new_session->local_application_settings.CopyFrom(settings)) {
+      ssl_send_alert(ssl, SSL3_AL_FATAL, SSL_AD_INTERNAL_ERROR);
+      return false;
+    }
+  }
+
+  return true;
+}
+
 bool ssl_parse_serverhello_tlsext(SSL_HANDSHAKE *hs, CBS *cbs) {
   SSL *const ssl = hs->ssl;
   int alert = SSL_AD_DECODE_ERROR;
@@ -3353,6 +3877,10 @@
     return false;
   }
 
+  if (!ssl_check_serverhello_tlsext(hs)) {
+    return false;
+  }
+
   return true;
 }
 
diff --git a/deps/boringssl/src/ssl/test/bssl_shim.cc b/deps/boringssl/src/ssl/test/bssl_shim.cc
index d8652ea..31c0a01 100644
--- a/deps/boringssl/src/ssl/test/bssl_shim.cc
+++ b/deps/boringssl/src/ssl/test/bssl_shim.cc
@@ -396,41 +396,6 @@
   return true;
 }
 
-static const char *EarlyDataReasonToString(ssl_early_data_reason_t reason) {
-  switch (reason) {
-    case ssl_early_data_unknown:
-      return "unknown";
-    case ssl_early_data_disabled:
-      return "disabled";
-    case ssl_early_data_accepted:
-      return "accepted";
-    case ssl_early_data_protocol_version:
-      return "protocol_version";
-    case ssl_early_data_peer_declined:
-      return "peer_declined";
-    case ssl_early_data_no_session_offered:
-      return "no_session_offered";
-    case ssl_early_data_session_not_resumed:
-      return "session_not_resumed";
-    case ssl_early_data_unsupported_for_session:
-      return "unsupported_for_session";
-    case ssl_early_data_hello_retry_request:
-      return "hello_retry_request";
-    case ssl_early_data_alpn_mismatch:
-      return "alpn_mismatch";
-    case ssl_early_data_channel_id:
-      return "channel_id";
-    case ssl_early_data_token_binding:
-      return "token_binding";
-    case ssl_early_data_ticket_age_skew:
-      return "ticket_age_skew";
-    case ssl_early_data_quic_parameter_mismatch:
-      return "quic_parameter_mismatch";
-  }
-
-  abort();
-}
-
 // CheckHandshakeProperties checks, immediately after |ssl| completes its
 // initial handshake (or False Starts), whether all the properties are
 // consistent with the test configuration and invariants.
@@ -524,16 +489,38 @@
     }
   }
 
-  if (!config->is_server) {
-    const uint8_t *alpn_proto;
-    unsigned alpn_proto_len;
-    SSL_get0_alpn_selected(ssl, &alpn_proto, &alpn_proto_len);
-    if (alpn_proto_len != config->expect_alpn.size() ||
-        OPENSSL_memcmp(alpn_proto, config->expect_alpn.data(),
-                       alpn_proto_len) != 0) {
-      fprintf(stderr, "negotiated alpn proto mismatch\n");
-      return false;
-    }
+  // On the server, the protocol selected in the ALPN callback must be echoed
+  // out of |SSL_get0_alpn_selected|. On the client, it should report what the
+  // test expected.
+  const std::string &expect_alpn =
+      config->is_server ? config->select_alpn : config->expect_alpn;
+  const uint8_t *alpn_proto;
+  unsigned alpn_proto_len;
+  SSL_get0_alpn_selected(ssl, &alpn_proto, &alpn_proto_len);
+  if (alpn_proto_len != expect_alpn.size() ||
+      OPENSSL_memcmp(alpn_proto, expect_alpn.data(), alpn_proto_len) != 0) {
+    fprintf(stderr, "negotiated alpn proto mismatch\n");
+    return false;
+  }
+
+  if (SSL_has_application_settings(ssl) !=
+      (config->expect_peer_application_settings ? 1 : 0)) {
+    fprintf(stderr,
+            "connection %s application settings, but expected the opposite\n",
+            SSL_has_application_settings(ssl) ? "has" : "does not have");
+    return false;
+  }
+  std::string expect_settings = config->expect_peer_application_settings
+                                    ? *config->expect_peer_application_settings
+                                    : "";
+  const uint8_t *peer_settings;
+  size_t peer_settings_len;
+  SSL_get0_peer_application_settings(ssl, &peer_settings, &peer_settings_len);
+  if (expect_settings !=
+      std::string(reinterpret_cast<const char *>(peer_settings),
+                  peer_settings_len)) {
+    fprintf(stderr, "peer application settings mismatch\n");
+    return false;
   }
 
   if (!config->expect_quic_transport_params.empty() && expect_handshake_done) {
@@ -611,8 +598,7 @@
     }
   }
 
-  uint16_t cipher_id =
-      static_cast<uint16_t>(SSL_CIPHER_get_id(SSL_get_current_cipher(ssl)));
+  uint16_t cipher_id = SSL_CIPHER_get_protocol_id(SSL_get_current_cipher(ssl));
   if (config->expect_cipher_aes != 0 &&
       EVP_has_aes_hardware() &&
       static_cast<uint16_t>(config->expect_cipher_aes) != cipher_id) {
@@ -629,6 +615,13 @@
     return false;
   }
 
+  if (config->expect_cipher != 0 &&
+      static_cast<uint16_t>(config->expect_cipher) != cipher_id) {
+    fprintf(stderr, "Cipher ID was %04x, wanted %04x\n", cipher_id,
+            static_cast<uint16_t>(config->expect_cipher));
+    return false;
+  }
+
   // The early data status is only applicable after the handshake is confirmed.
   if (!SSL_in_early_data(ssl)) {
     if ((config->expect_accept_early_data && !SSL_early_data_accepted(ssl)) ||
@@ -640,7 +633,7 @@
     }
 
     const char *early_data_reason =
-        EarlyDataReasonToString(SSL_get_early_data_reason(ssl));
+        SSL_early_data_reason_string(SSL_get_early_data_reason(ssl));
     if (!config->expect_early_data_reason.empty() &&
         config->expect_early_data_reason != early_data_reason) {
       fprintf(stderr, "Early data reason was \"%s\", expected \"%s\"\n",
@@ -668,12 +661,6 @@
     return false;
   }
 
-  if (config->expect_tls13_downgrade != !!SSL_is_tls13_downgrade(ssl)) {
-    fprintf(stderr, "Got %s downgrade signal, but wanted the opposite.\n",
-            SSL_is_tls13_downgrade(ssl) ? "" : "no ");
-    return false;
-  }
-
   if (config->expect_delegated_credential_used !=
       !!SSL_delegated_credential_used(ssl)) {
     fprintf(stderr,
@@ -734,7 +721,9 @@
     BIO_push(packeted.get(), bio.release());
     bio = std::move(packeted);
   }
-  if (config->async) {
+  if (config->async && !config->is_quic) {
+    // Note async tests only affect callbacks in QUIC. The IO path does not
+    // behave differently when synchronous or asynchronous our QUIC APIs.
     bssl::UniquePtr<BIO> async_scoped =
         config->is_dtls ? AsyncBioCreateDatagram() : AsyncBioCreate();
     if (!async_scoped) {
@@ -802,6 +791,12 @@
   }
 
   if (!ret) {
+    // Print the |SSL_get_error| code. Otherwise, some failures are silent and
+    // hard to debug.
+    int ssl_err = SSL_get_error(ssl.get(), -1);
+    if (ssl_err != SSL_ERROR_NONE) {
+      fprintf(stderr, "SSL error: %s\n", SSL_error_description(ssl_err));
+    }
     return false;
   }
 
@@ -975,6 +970,11 @@
         fprintf(stderr, "-read-with-unfinished-write requires -async.\n");
         return false;
       }
+      if (config->is_quic) {
+        fprintf(stderr,
+                "-read-with-unfinished-write is incompatible with QUIC.\n");
+        return false;
+      }
 
       // Let only one byte of the record through.
       AsyncBioAllowWrite(GetTestState(ssl)->async_bio, 1);
@@ -1198,6 +1198,16 @@
     return 0;
   }
 
+  if (initial_config.wait_for_debugger) {
+#if defined(OPENSSL_WINDOWS)
+    fprintf(stderr, "-wait-for-debugger is not supported on Windows.\n");
+    return 1;
+#else
+    // The debugger will resume the process.
+    raise(SIGSTOP);
+#endif
+  }
+
   bssl::UniquePtr<SSL_CTX> ssl_ctx;
 
   bssl::UniquePtr<SSL_SESSION> session;
diff --git a/deps/boringssl/src/ssl/test/handshaker.cc b/deps/boringssl/src/ssl/test/handshaker.cc
index a6bf643..72d6b2f 100644
--- a/deps/boringssl/src/ssl/test/handshaker.cc
+++ b/deps/boringssl/src/ssl/test/handshaker.cc
@@ -127,13 +127,21 @@
   return ret;
 }
 
+int SignalError() {
+  const char msg = kControlMsgError;
+  if (write_eintr(kFdControl, &msg, 1) != 1) {
+    return 2;
+  }
+  return 1;
+}
+
 }  // namespace
 
 int main(int argc, char **argv) {
   TestConfig initial_config, resume_config, retry_config;
   if (!ParseConfig(argc - 1, argv + 1, &initial_config, &resume_config,
                    &retry_config)) {
-    return 2;
+    return SignalError();
   }
   const TestConfig *config = initial_config.handshaker_resume
       ? &resume_config : &initial_config;
@@ -160,11 +168,7 @@
   Span<uint8_t> handoff(buf.get(), len);
   if (!Handshaker(config, kFdProxyToHandshaker, kFdHandshakerToProxy, handoff,
                   kFdControl)) {
-    char msg = kControlMsgError;
-    if (write_eintr(kFdControl, &msg, 1) != 1) {
-      return 3;
-    }
-    return 1;
+    return SignalError();
   }
   return 0;
 }
diff --git a/deps/boringssl/src/ssl/test/mock_quic_transport.cc b/deps/boringssl/src/ssl/test/mock_quic_transport.cc
index 0929432..4b8bc30 100644
--- a/deps/boringssl/src/ssl/test/mock_quic_transport.cc
+++ b/deps/boringssl/src/ssl/test/mock_quic_transport.cc
@@ -19,14 +19,6 @@
 #include <cstring>
 #include <limits>
 
-namespace {
-
-const uint8_t kTagHandshake = 'H';
-const uint8_t kTagApplication = 'A';
-const uint8_t kTagAlert = 'L';
-
-}  // namespace
-
 MockQuicTransport::MockQuicTransport(bssl::UniquePtr<BIO> bio, SSL *ssl)
     : bio_(std::move(bio)),
       read_levels_(ssl_encryption_application + 1),
@@ -38,7 +30,7 @@
                                       const uint8_t *secret,
                                       size_t secret_len) {
   // TODO(davidben): Assert the various encryption secret invariants.
-  read_levels_[level].cipher = SSL_CIPHER_get_value(cipher);
+  read_levels_[level].cipher = SSL_CIPHER_get_protocol_id(cipher);
   read_levels_[level].secret.assign(secret, secret + secret_len);
   return true;
 }
@@ -48,7 +40,7 @@
                                        const uint8_t *secret,
                                        size_t secret_len) {
   // TODO(davidben): Assert the various encryption secret invariants.
-  write_levels_[level].cipher = SSL_CIPHER_get_value(cipher);
+  write_levels_[level].cipher = SSL_CIPHER_get_protocol_id(cipher);
   write_levels_[level].secret.assign(secret, secret + secret_len);
   return true;
 }
@@ -73,50 +65,100 @@
   return true;
 }
 
+const char *LevelToString(ssl_encryption_level_t level) {
+  switch (level) {
+    case ssl_encryption_initial:
+      return "initial";
+    case ssl_encryption_early_data:
+      return "early_data";
+    case ssl_encryption_handshake:
+      return "handshake";
+    case ssl_encryption_application:
+      return "application";
+  }
+  return "";
+}
+
 }  // namespace
 
-bool MockQuicTransport::ReadHeader(uint8_t *out_tag, size_t *out_len) {
-  uint8_t header[7];
-  if (!ReadAll(bio_.get(), header)) {
-    return false;
-  }
-  *out_tag = header[0];
-  uint16_t cipher_suite = header[1] << 8 | header[2];
-  size_t remaining_bytes =
-      header[3] << 24 | header[4] << 16 | header[5] << 8 | header[6];
-
-  enum ssl_encryption_level_t level = SSL_quic_read_level(ssl_);
-  if (*out_tag == kTagApplication) {
-    if (SSL_in_early_data(ssl_)) {
-      level = ssl_encryption_early_data;
-    } else {
-      level = ssl_encryption_application;
+bool MockQuicTransport::ReadHeader(uint8_t *out_type,
+                                   enum ssl_encryption_level_t *out_level,
+                                   size_t *out_len) {
+  for (;;) {
+    uint8_t header[8];
+    if (!ReadAll(bio_.get(), header)) {
+      // TODO(davidben): Distinguish between errors and EOF. See
+      // ReadApplicationData.
+      return false;
     }
+
+    CBS cbs;
+    uint8_t level_id;
+    uint16_t cipher_suite;
+    uint32_t remaining_bytes;
+    CBS_init(&cbs, header, sizeof(header));
+    if (!CBS_get_u8(&cbs, out_type) ||
+        !CBS_get_u8(&cbs, &level_id) ||
+        !CBS_get_u16(&cbs, &cipher_suite) ||
+        !CBS_get_u32(&cbs, &remaining_bytes) ||
+        level_id >= read_levels_.size()) {
+      fprintf(stderr, "Error parsing record header.\n");
+      return false;
+    }
+
+    auto level = static_cast<ssl_encryption_level_t>(level_id);
+    // Non-initial levels must be configured before use.
+    uint16_t expect_cipher = read_levels_[level].cipher;
+    if (expect_cipher == 0 && level != ssl_encryption_initial) {
+      if (level == ssl_encryption_early_data) {
+        // If we receive early data records without any early data keys, skip
+        // the record. This means early data was rejected.
+        std::vector<uint8_t> discard(remaining_bytes);
+        if (!ReadAll(bio_.get(), bssl::MakeSpan(discard))) {
+          return false;
+        }
+        continue;
+      }
+      fprintf(stderr,
+              "Got record at level %s, but keys were not configured.\n",
+              LevelToString(level));
+      return false;
+    }
+    if (cipher_suite != expect_cipher) {
+      fprintf(stderr, "Got cipher suite 0x%04x at level %s, wanted 0x%04x.\n",
+              cipher_suite, LevelToString(level), expect_cipher);
+      return false;
+    }
+    const std::vector<uint8_t> &secret = read_levels_[level].secret;
+    std::vector<uint8_t> read_secret(secret.size());
+    if (remaining_bytes < secret.size()) {
+      fprintf(stderr, "Record at level %s too small.\n", LevelToString(level));
+      return false;
+    }
+    remaining_bytes -= secret.size();
+    if (!ReadAll(bio_.get(), bssl::MakeSpan(read_secret))) {
+      fprintf(stderr, "Error reading record secret.\n");
+      return false;
+    }
+    if (read_secret != secret) {
+      fprintf(stderr, "Encryption secret at level %s did not match.\n",
+              LevelToString(level));
+      return false;
+    }
+    *out_level = level;
+    *out_len = remaining_bytes;
+    return true;
   }
-  if (cipher_suite != read_levels_[level].cipher) {
-    return false;
-  }
-  const std::vector<uint8_t> &secret = read_levels_[level].secret;
-  std::vector<uint8_t> read_secret(secret.size());
-  if (remaining_bytes < secret.size()) {
-    return false;
-  }
-  remaining_bytes -= secret.size();
-  if (!ReadAll(bio_.get(), bssl::MakeSpan(read_secret)) ||
-      read_secret != secret) {
-    return false;
-  }
-  *out_len = remaining_bytes;
-  return true;
 }
 
 bool MockQuicTransport::ReadHandshake() {
-  uint8_t tag;
+  uint8_t type;
+  ssl_encryption_level_t level;
   size_t len;
-  if (!ReadHeader(&tag, &len)) {
+  if (!ReadHeader(&type, &level, &len)) {
     return false;
   }
-  if (tag != kTagHandshake) {
+  if (type != SSL3_RT_HANDSHAKE) {
     return false;
   }
 
@@ -124,8 +166,7 @@
   if (!ReadAll(bio_.get(), bssl::MakeSpan(buf))) {
     return false;
   }
-  return SSL_provide_quic_data(ssl_, SSL_quic_read_level(ssl_), buf.data(),
-                               buf.size());
+  return SSL_provide_quic_data(ssl_, level, buf.data(), buf.size());
 }
 
 int MockQuicTransport::ReadApplicationData(uint8_t *out, size_t max_out) {
@@ -143,18 +184,19 @@
     return len;
   }
 
-  uint8_t tag = 0;
+  uint8_t type = 0;
+  ssl_encryption_level_t level;
   size_t len;
   while (true) {
-    if (!ReadHeader(&tag, &len)) {
+    if (!ReadHeader(&type, &level, &len)) {
       // Assume that a failure to read the header means there's no more to read,
       // not an error reading.
       return 0;
     }
-    if (tag == kTagApplication) {
+    if (type == SSL3_RT_APPLICATION_DATA) {
       break;
     }
-    if (tag != kTagHandshake) {
+    if (type != SSL3_RT_HANDSHAKE) {
       return -1;
     }
 
@@ -162,8 +204,7 @@
     if (!ReadAll(bio_.get(), bssl::MakeSpan(buf))) {
       return -1;
     }
-    if (SSL_provide_quic_data(ssl_, SSL_quic_read_level(ssl_), buf.data(),
-                              buf.size()) != 1) {
+    if (SSL_provide_quic_data(ssl_, level, buf.data(), buf.size()) != 1) {
       return -1;
     }
     if (SSL_in_init(ssl_)) {
@@ -198,19 +239,20 @@
 }
 
 bool MockQuicTransport::WriteRecord(enum ssl_encryption_level_t level,
-                                    uint8_t tag, const uint8_t *data,
+                                    uint8_t type, const uint8_t *data,
                                     size_t len) {
   uint16_t cipher_suite = write_levels_[level].cipher;
   const std::vector<uint8_t> &secret = write_levels_[level].secret;
   size_t tlv_len = secret.size() + len;
-  uint8_t header[7];
-  header[0] = tag;
-  header[1] = (cipher_suite >> 8) & 0xff;
-  header[2] = cipher_suite & 0xff;
-  header[3] = (tlv_len >> 24) & 0xff;
-  header[4] = (tlv_len >> 16) & 0xff;
-  header[5] = (tlv_len >> 8) & 0xff;
-  header[6] = tlv_len & 0xff;
+  uint8_t header[8];
+  header[0] = type;
+  header[1] = level;
+  header[2] = (cipher_suite >> 8) & 0xff;
+  header[3] = cipher_suite & 0xff;
+  header[4] = (tlv_len >> 24) & 0xff;
+  header[5] = (tlv_len >> 16) & 0xff;
+  header[6] = (tlv_len >> 8) & 0xff;
+  header[7] = tlv_len & 0xff;
   return BIO_write_all(bio_.get(), header, sizeof(header)) &&
          BIO_write_all(bio_.get(), secret.data(), secret.size()) &&
          BIO_write_all(bio_.get(), data, len);
@@ -218,7 +260,7 @@
 
 bool MockQuicTransport::WriteHandshakeData(enum ssl_encryption_level_t level,
                                            const uint8_t *data, size_t len) {
-  return WriteRecord(level, kTagHandshake, data, len);
+  return WriteRecord(level, SSL3_RT_HANDSHAKE, data, len);
 }
 
 bool MockQuicTransport::WriteApplicationData(const uint8_t *in, size_t len) {
@@ -226,7 +268,7 @@
   if (SSL_in_early_data(ssl_) && !SSL_is_server(ssl_)) {
     level = ssl_encryption_early_data;
   }
-  return WriteRecord(level, kTagApplication, in, len);
+  return WriteRecord(level, SSL3_RT_APPLICATION_DATA, in, len);
 }
 
 bool MockQuicTransport::Flush() { return BIO_flush(bio_.get()); }
@@ -234,5 +276,5 @@
 bool MockQuicTransport::SendAlert(enum ssl_encryption_level_t level,
                                   uint8_t alert) {
   uint8_t alert_msg[] = {2, alert};
-  return WriteRecord(level, kTagAlert, alert_msg, sizeof(alert_msg));
+  return WriteRecord(level, SSL3_RT_ALERT, alert_msg, sizeof(alert_msg));
 }
diff --git a/deps/boringssl/src/ssl/test/mock_quic_transport.h b/deps/boringssl/src/ssl/test/mock_quic_transport.h
index a56652d..2cd7437 100644
--- a/deps/boringssl/src/ssl/test/mock_quic_transport.h
+++ b/deps/boringssl/src/ssl/test/mock_quic_transport.h
@@ -44,16 +44,17 @@
  private:
   // Reads a record header from |bio_| and returns whether the record was read
   // successfully. As part of reading the header, this function checks that the
-  // cipher suite and secret in the header are correct. On success, the tag
-  // indicating the TLS record type is put in  |*out_tag|, the length of the TLS
-  // record is put in |*out_len|, and the next thing to be read from |bio_| is
-  // |*out_len| bytes of the TLS record.
-  bool ReadHeader(uint8_t *out_tag, size_t *out_len);
+  // cipher suite and secret in the header are correct. On success, the TLS
+  // record type is put in |*out_type|, the encryption level is put in
+  // |*out_level|, the length of the TLS record is put in |*out_len|, and the
+  // next thing to be read from |bio_| is |*out_len| bytes of the TLS record.
+  bool ReadHeader(uint8_t *out_type, enum ssl_encryption_level_t *out_level,
+                  size_t *out_len);
 
   // Writes a MockQuicTransport record to |bio_| at encryption level |level|
-  // with record type |tag| and a TLS record payload of length |len| from
+  // with record type |type| and a TLS record payload of length |len| from
   // |data|.
-  bool WriteRecord(enum ssl_encryption_level_t level, uint8_t tag,
+  bool WriteRecord(enum ssl_encryption_level_t level, uint8_t type,
                    const uint8_t *data, size_t len);
 
   bssl::UniquePtr<BIO> bio_;
diff --git a/deps/boringssl/src/ssl/test/runner/curve25519/const_amd64.h b/deps/boringssl/src/ssl/test/runner/curve25519/const_amd64.h
deleted file mode 100644
index 80ad222..0000000
--- a/deps/boringssl/src/ssl/test/runner/curve25519/const_amd64.h
+++ /dev/null
@@ -1,8 +0,0 @@
-// Copyright 2012 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
-
-// This code was translated into a form compatible with 6a from the public
-// domain sources in SUPERCOP: http://bench.cr.yp.to/supercop.html
-
-#define REDMASK51     0x0007FFFFFFFFFFFF
diff --git a/deps/boringssl/src/ssl/test/test_config.cc b/deps/boringssl/src/ssl/test/test_config.cc
index d4b71b4..c1d215b 100644
--- a/deps/boringssl/src/ssl/test/test_config.cc
+++ b/deps/boringssl/src/ssl/test/test_config.cc
@@ -55,6 +55,7 @@
     {"-dtls", &TestConfig::is_dtls},
     {"-quic", &TestConfig::is_quic},
     {"-fallback-scsv", &TestConfig::fallback_scsv},
+    {"-enable-ech-grease", &TestConfig::enable_ech_grease},
     {"-require-any-client-certificate",
      &TestConfig::require_any_client_certificate},
     {"-false-start", &TestConfig::false_start},
@@ -73,6 +74,7 @@
     {"-expect-session-miss", &TestConfig::expect_session_miss},
     {"-decline-alpn", &TestConfig::decline_alpn},
     {"-select-empty-alpn", &TestConfig::select_empty_alpn},
+    {"-defer-alps", &TestConfig::defer_alps},
     {"-expect-extended-master-secret",
      &TestConfig::expect_extended_master_secret},
     {"-enable-ocsp-stapling", &TestConfig::enable_ocsp_stapling},
@@ -131,8 +133,6 @@
     {"-use-custom-verify-callback", &TestConfig::use_custom_verify_callback},
     {"-allow-false-start-without-alpn",
      &TestConfig::allow_false_start_without_alpn},
-    {"-ignore-tls13-downgrade", &TestConfig::ignore_tls13_downgrade},
-    {"-expect-tls13-downgrade", &TestConfig::expect_tls13_downgrade},
     {"-handoff", &TestConfig::handoff},
     {"-use-ocsp-callback", &TestConfig::use_ocsp_callback},
     {"-set-ocsp-in-callback", &TestConfig::set_ocsp_in_callback},
@@ -152,6 +152,7 @@
      &TestConfig::expect_delegated_credential_used},
     {"-expect-hrr", &TestConfig::expect_hrr},
     {"-expect-no-hrr", &TestConfig::expect_no_hrr},
+    {"-wait-for-debugger", &TestConfig::wait_for_debugger},
 };
 
 const Flag<std::string> kStringFlags[] = {
@@ -182,6 +183,14 @@
     {"-handshaker-path", &TestConfig::handshaker_path},
     {"-delegated-credential", &TestConfig::delegated_credential},
     {"-expect-early-data-reason", &TestConfig::expect_early_data_reason},
+    {"-quic-early-data-context", &TestConfig::quic_early_data_context},
+};
+
+// TODO(davidben): When we can depend on C++17 or Abseil, switch this to
+// std::optional or absl::optional.
+const Flag<std::unique_ptr<std::string>> kOptionalStringFlags[] = {
+    {"-expect-peer-application-settings",
+     &TestConfig::expect_peer_application_settings},
 };
 
 const Flag<std::string> kBase64Flags[] = {
@@ -216,10 +225,12 @@
     {"-max-cert-list", &TestConfig::max_cert_list},
     {"-expect-cipher-aes", &TestConfig::expect_cipher_aes},
     {"-expect-cipher-no-aes", &TestConfig::expect_cipher_no_aes},
+    {"-expect-cipher", &TestConfig::expect_cipher},
     {"-resumption-delay", &TestConfig::resumption_delay},
     {"-max-send-fragment", &TestConfig::max_send_fragment},
     {"-read-size", &TestConfig::read_size},
     {"-expect-ticket-age-skew", &TestConfig::expect_ticket_age_skew},
+    {"-quic-use-legacy-codepoint", &TestConfig::quic_use_legacy_codepoint},
 };
 
 const Flag<std::vector<int>> kIntVectorFlags[] = {
@@ -229,6 +240,11 @@
     {"-curves", &TestConfig::curves},
 };
 
+const Flag<std::vector<std::pair<std::string, std::string>>>
+    kStringPairVectorFlags[] = {
+        {"-application-settings", &TestConfig::application_settings},
+};
+
 bool ParseFlag(char *flag, int argc, char **argv, int *i,
                bool skip, TestConfig *out_config) {
   bool *bool_field = FindField(out_config, kBoolFlags, flag);
@@ -252,6 +268,20 @@
     return true;
   }
 
+  std::unique_ptr<std::string> *optional_string_field =
+      FindField(out_config, kOptionalStringFlags, flag);
+  if (optional_string_field != NULL) {
+    *i = *i + 1;
+    if (*i >= argc) {
+      fprintf(stderr, "Missing parameter.\n");
+      return false;
+    }
+    if (!skip) {
+      optional_string_field->reset(new std::string(argv[*i]));
+    }
+    return true;
+  }
+
   std::string *base64_field = FindField(out_config, kBase64Flags, flag);
   if (base64_field != NULL) {
     *i = *i + 1;
@@ -307,6 +337,28 @@
     return true;
   }
 
+  std::vector<std::pair<std::string, std::string>> *string_pair_vector_field =
+      FindField(out_config, kStringPairVectorFlags, flag);
+  if (string_pair_vector_field) {
+    *i = *i + 1;
+    if (*i >= argc) {
+      fprintf(stderr, "Missing parameter.\n");
+      return false;
+    }
+    const char *comma = strchr(argv[*i], ',');
+    if (!comma) {
+      fprintf(stderr,
+              "Parameter should be a pair of comma-separated strings.\n");
+      return false;
+    }
+    // Each instance of the flag adds to the list.
+    if (!skip) {
+      string_pair_vector_field->push_back(std::make_pair(
+          std::string(argv[*i], comma - argv[*i]), std::string(comma + 1)));
+    }
+    return true;
+  }
+
   fprintf(stderr, "Unknown argument: %s.\n", flag);
   return false;
 }
@@ -626,6 +678,19 @@
     exit(1);
   }
 
+  if (config->defer_alps) {
+    for (const auto &pair : config->application_settings) {
+      if (!SSL_add_application_settings(
+              ssl, reinterpret_cast<const uint8_t *>(pair.first.data()),
+              pair.first.size(),
+              reinterpret_cast<const uint8_t *>(pair.second.data()),
+              pair.second.size())) {
+        fprintf(stderr, "error configuring ALPS.\n");
+        exit(1);
+      }
+    }
+  }
+
   assert(config->select_alpn.empty() || !config->select_empty_alpn);
   *out = (const uint8_t *)config->select_alpn.data();
   *outlen = config->select_alpn.size();
@@ -1278,10 +1343,6 @@
     SSL_CTX_set_false_start_allowed_without_alpn(ssl_ctx.get(), 1);
   }
 
-  if (ignore_tls13_downgrade) {
-    SSL_CTX_set_ignore_tls13_downgrade(ssl_ctx.get(), 1);
-  }
-
   if (use_ocsp_callback) {
     SSL_CTX_set_tlsext_status_cb(ssl_ctx.get(), LegacyOCSPCallback);
   }
@@ -1532,6 +1593,9 @@
   if (!expect_channel_id.empty() || enable_channel_id) {
     SSL_set_tls_channel_id_enabled(ssl.get(), 1);
   }
+  if (enable_ech_grease) {
+    SSL_set_enable_ech_grease(ssl.get(), 1);
+  }
   if (!send_channel_id.empty()) {
     SSL_set_tls_channel_id_enabled(ssl.get(), 1);
     if (!async) {
@@ -1553,10 +1617,22 @@
     return nullptr;
   }
   if (!advertise_alpn.empty() &&
-      SSL_set_alpn_protos(ssl.get(), (const uint8_t *)advertise_alpn.data(),
-                          advertise_alpn.size()) != 0) {
+      SSL_set_alpn_protos(
+          ssl.get(), reinterpret_cast<const uint8_t *>(advertise_alpn.data()),
+          advertise_alpn.size()) != 0) {
     return nullptr;
   }
+  if (!defer_alps) {
+    for (const auto &pair : application_settings) {
+      if (!SSL_add_application_settings(
+              ssl.get(), reinterpret_cast<const uint8_t *>(pair.first.data()),
+              pair.first.size(),
+              reinterpret_cast<const uint8_t *>(pair.second.data()),
+              pair.second.size())) {
+        return nullptr;
+      }
+    }
+  }
   if (!psk.empty()) {
     SSL_set_psk_client_callback(ssl.get(), PskClientCallback);
     SSL_set_psk_server_callback(ssl.get(), PskServerCallback);
@@ -1662,6 +1738,9 @@
   if (max_send_fragment > 0) {
     SSL_set_max_send_fragment(ssl.get(), max_send_fragment);
   }
+  if (quic_use_legacy_codepoint != -1) {
+    SSL_set_quic_use_legacy_codepoint(ssl.get(), quic_use_legacy_codepoint);
+  }
   if (!quic_transport_params.empty()) {
     if (!SSL_set_quic_transport_params(
             ssl.get(),
@@ -1723,5 +1802,13 @@
     }
   }
 
+  if (!quic_early_data_context.empty() &&
+      !SSL_set_quic_early_data_context(
+          ssl.get(),
+          reinterpret_cast<const uint8_t *>(quic_early_data_context.data()),
+          quic_early_data_context.size())) {
+    return nullptr;
+  }
+
   return ssl;
 }
diff --git a/deps/boringssl/src/ssl/test/test_config.h b/deps/boringssl/src/ssl/test/test_config.h
index 0974a16..7d77994 100644
--- a/deps/boringssl/src/ssl/test/test_config.h
+++ b/deps/boringssl/src/ssl/test/test_config.h
@@ -16,6 +16,7 @@
 #define HEADER_TEST_CONFIG
 
 #include <string>
+#include <utility>
 #include <vector>
 
 #include <openssl/base.h>
@@ -38,6 +39,7 @@
   std::string key_file;
   std::string cert_file;
   std::string expect_server_name;
+  bool enable_ech_grease = false;
   std::string expect_certificate_types;
   bool require_any_client_certificate = false;
   std::string advertise_npn;
@@ -67,8 +69,13 @@
   std::string select_alpn;
   bool decline_alpn = false;
   bool select_empty_alpn = false;
+  bool defer_alps = false;
+  std::vector<std::pair<std::string, std::string>> application_settings;
+  std::unique_ptr<std::string> expect_peer_application_settings;
   std::string quic_transport_params;
   std::string expect_quic_transport_params;
+  // Set quic_use_legacy_codepoint to 0 or 1 to configure, -1 uses default.
+  int quic_use_legacy_codepoint = -1;
   bool expect_session_miss = false;
   bool expect_extended_master_secret = false;
   std::string psk;
@@ -137,6 +144,7 @@
   bool use_exporter_between_reads = false;
   int expect_cipher_aes = 0;
   int expect_cipher_no_aes = 0;
+  int expect_cipher = 0;
   std::string expect_peer_cert_file;
   int resumption_delay = 0;
   bool retain_only_sha256_client_cert = false;
@@ -155,8 +163,6 @@
   bool use_custom_verify_callback = false;
   std::string expect_msg_callback;
   bool allow_false_start_without_alpn = false;
-  bool ignore_tls13_downgrade = false;
-  bool expect_tls13_downgrade = false;
   bool handoff = false;
   bool use_ocsp_callback = false;
   bool set_ocsp_in_callback = false;
@@ -177,6 +183,8 @@
   std::string expect_early_data_reason;
   bool expect_hrr = false;
   bool expect_no_hrr = false;
+  bool wait_for_debugger = false;
+  std::string quic_early_data_context;
 
   int argc;
   char **argv;
diff --git a/deps/boringssl/src/ssl/tls13_both.cc b/deps/boringssl/src/ssl/tls13_both.cc
index 93e2f6a..c6bc2b1 100644
--- a/deps/boringssl/src/ssl/tls13_both.cc
+++ b/deps/boringssl/src/ssl/tls13_both.cc
@@ -244,8 +244,7 @@
 
     uint8_t alert = SSL_AD_DECODE_ERROR;
     if (!ssl_parse_extensions(&extensions, &alert, ext_types,
-                              OPENSSL_ARRAY_SIZE(ext_types),
-                              0 /* reject unknown */)) {
+                              /*ignore_unknown=*/false)) {
       ssl_send_alert(ssl, SSL3_AL_FATAL, alert);
       return false;
     }
diff --git a/deps/boringssl/src/ssl/tls13_client.cc b/deps/boringssl/src/ssl/tls13_client.cc
index cb379b0..496ae01 100644
--- a/deps/boringssl/src/ssl/tls13_client.cc
+++ b/deps/boringssl/src/ssl/tls13_client.cc
@@ -44,6 +44,7 @@
   state_server_certificate_reverify,
   state_read_server_finished,
   state_send_end_of_early_data,
+  state_send_client_encrypted_extensions,
   state_send_client_certificate,
   state_send_client_certificate_verify,
   state_complete_second_flight,
@@ -172,8 +173,7 @@
 
   uint8_t alert = SSL_AD_DECODE_ERROR;
   if (!ssl_parse_extensions(&extensions, &alert, ext_types,
-                            OPENSSL_ARRAY_SIZE(ext_types),
-                            0 /* reject unknown */)) {
+                            /*ignore_unknown=*/false)) {
     ssl_send_alert(ssl, SSL3_AL_FATAL, alert);
     return ssl_hs_error;
   }
@@ -338,8 +338,7 @@
 
   uint8_t alert = SSL_AD_DECODE_ERROR;
   if (!ssl_parse_extensions(&extensions, &alert, ext_types,
-                            OPENSSL_ARRAY_SIZE(ext_types),
-                            0 /* reject unknown */)) {
+                            /*ignore_unknown=*/false)) {
     ssl_send_alert(ssl, SSL3_AL_FATAL, alert);
     return ssl_hs_error;
   }
@@ -415,8 +414,8 @@
   // Set up the key schedule and incorporate the PSK into the running secret.
   if (ssl->s3->session_reused) {
     if (!tls13_init_key_schedule(
-            hs, MakeConstSpan(hs->new_session->master_key,
-                              hs->new_session->master_key_length))) {
+            hs, MakeConstSpan(hs->new_session->secret,
+                              hs->new_session->secret_length))) {
       return ssl_hs_error;
     }
   } else if (!tls13_init_key_schedule(hs, MakeConstSpan(kZeroes, hash_len))) {
@@ -489,12 +488,6 @@
     return ssl_hs_error;
   }
 
-  // Store the negotiated ALPN in the session.
-  if (!hs->new_session->early_alpn.CopyFrom(ssl->s3->alpn_selected)) {
-    ssl_send_alert(ssl, SSL3_AL_FATAL, SSL_AD_INTERNAL_ERROR);
-    return ssl_hs_error;
-  }
-
   if (ssl->s3->early_data_accepted) {
     if (hs->early_session->cipher != hs->new_session->cipher) {
       OPENSSL_PUT_ERROR(SSL, SSL_R_CIPHER_MISMATCH_ON_EARLY_DATA);
@@ -507,11 +500,29 @@
       ssl_send_alert(ssl, SSL3_AL_FATAL, SSL_AD_ILLEGAL_PARAMETER);
       return ssl_hs_error;
     }
-    if (ssl->s3->channel_id_valid || ssl->s3->token_binding_negotiated) {
+    // Channel ID and Token Binding are incompatible with 0-RTT. The ALPS
+    // extension should be negotiated implicitly.
+    if (ssl->s3->channel_id_valid || ssl->s3->token_binding_negotiated ||
+        hs->new_session->has_application_settings) {
       OPENSSL_PUT_ERROR(SSL, SSL_R_UNEXPECTED_EXTENSION_ON_EARLY_DATA);
       ssl_send_alert(ssl, SSL3_AL_FATAL, SSL_AD_ILLEGAL_PARAMETER);
       return ssl_hs_error;
     }
+    hs->new_session->has_application_settings =
+        hs->early_session->has_application_settings;
+    if (!hs->new_session->local_application_settings.CopyFrom(
+            hs->early_session->local_application_settings) ||
+        !hs->new_session->peer_application_settings.CopyFrom(
+            hs->early_session->peer_application_settings)) {
+      ssl_send_alert(ssl, SSL3_AL_FATAL, SSL_AD_INTERNAL_ERROR);
+      return ssl_hs_error;
+    }
+  }
+
+  // Store the negotiated ALPN in the session.
+  if (!hs->new_session->early_alpn.CopyFrom(ssl->s3->alpn_selected)) {
+    ssl_send_alert(ssl, SSL3_AL_FATAL, SSL_AD_INTERNAL_ERROR);
+    return ssl_hs_error;
   }
 
   if (!ssl_hash_message(hs, msg)) {
@@ -568,8 +579,7 @@
       !CBS_get_u16_length_prefixed(&body, &extensions) ||
       CBS_len(&body) != 0 ||
       !ssl_parse_extensions(&extensions, &alert, ext_types,
-                            OPENSSL_ARRAY_SIZE(ext_types),
-                            1 /* accept unknown */) ||
+                            /*ignore_unknown=*/true) ||
       (have_ca && CBS_len(&ca) == 0) ||
       !have_sigalgs ||
       !CBS_get_u16_length_prefixed(&sigalgs,
@@ -629,8 +639,7 @@
   return ssl_hs_ok;
 }
 
-static enum ssl_hs_wait_t do_read_server_certificate_verify(
-    SSL_HANDSHAKE *hs) {
+static enum ssl_hs_wait_t do_read_server_certificate_verify(SSL_HANDSHAKE *hs) {
   SSL *const ssl = hs->ssl;
   SSLMessage msg;
   if (!ssl->method->get_message(ssl, &msg)) {
@@ -657,8 +666,7 @@
   return ssl_hs_ok;
 }
 
-static enum ssl_hs_wait_t do_server_certificate_reverify(
-    SSL_HANDSHAKE *hs) {
+static enum ssl_hs_wait_t do_server_certificate_reverify(SSL_HANDSHAKE *hs) {
   switch (ssl_reverify_peer_cert(hs, /*send_alert=*/true)) {
     case ssl_verify_ok:
       break;
@@ -721,6 +729,32 @@
     }
   }
 
+  hs->tls13_state = state_send_client_encrypted_extensions;
+  return ssl_hs_ok;
+}
+
+static enum ssl_hs_wait_t do_send_client_encrypted_extensions(
+    SSL_HANDSHAKE *hs) {
+  SSL *const ssl = hs->ssl;
+  // For now, only one extension uses client EncryptedExtensions. This function
+  // may be generalized if others use it in the future.
+  if (hs->new_session->has_application_settings &&
+      !ssl->s3->early_data_accepted) {
+    ScopedCBB cbb;
+    CBB body, extensions, extension;
+    if (!ssl->method->init_message(ssl, cbb.get(), &body,
+                                   SSL3_MT_ENCRYPTED_EXTENSIONS) ||
+        !CBB_add_u16_length_prefixed(&body, &extensions) ||
+        !CBB_add_u16(&extensions, TLSEXT_TYPE_application_settings) ||
+        !CBB_add_u16_length_prefixed(&extensions, &extension) ||
+        !CBB_add_bytes(&extension,
+                       hs->new_session->local_application_settings.data(),
+                       hs->new_session->local_application_settings.size()) ||
+        !ssl_add_message_cbb(ssl, cbb.get())) {
+      return ssl_hs_error;
+    }
+  }
+
   hs->tls13_state = state_send_client_certificate;
   return ssl_hs_ok;
 }
@@ -863,6 +897,9 @@
       case state_send_client_certificate:
         ret = do_send_client_certificate(hs);
         break;
+      case state_send_client_encrypted_extensions:
+        ret = do_send_client_encrypted_extensions(hs);
+        break;
       case state_send_client_certificate_verify:
         ret = do_send_client_certificate_verify(hs);
         break;
@@ -910,6 +947,8 @@
       return "TLS 1.3 client read_server_finished";
     case state_send_end_of_early_data:
       return "TLS 1.3 client send_end_of_early_data";
+    case state_send_client_encrypted_extensions:
+      return "TLS 1.3 client send_client_encrypted_extensions";
     case state_send_client_certificate:
       return "TLS 1.3 client send_client_certificate";
     case state_send_client_certificate_verify:
@@ -989,8 +1028,7 @@
 
   uint8_t alert = SSL_AD_DECODE_ERROR;
   if (!ssl_parse_extensions(&extensions, &alert, ext_types,
-                            OPENSSL_ARRAY_SIZE(ext_types),
-                            1 /* ignore unknown */)) {
+                            /*ignore_unknown=*/true)) {
     ssl_send_alert(ssl, SSL3_AL_FATAL, alert);
     return nullptr;
   }
diff --git a/deps/boringssl/src/ssl/tls13_enc.cc b/deps/boringssl/src/ssl/tls13_enc.cc
index 69a5578..cda53ec 100644
--- a/deps/boringssl/src/ssl/tls13_enc.cc
+++ b/deps/boringssl/src/ssl/tls13_enc.cc
@@ -303,10 +303,9 @@
     OPENSSL_PUT_ERROR(SSL, ERR_R_INTERNAL_ERROR);
     return false;
   }
-  hs->new_session->master_key_length = hs->transcript.DigestLen();
+  hs->new_session->secret_length = hs->transcript.DigestLen();
   return derive_secret(
-      hs,
-      MakeSpan(hs->new_session->master_key, hs->new_session->master_key_length),
+      hs, MakeSpan(hs->new_session->secret, hs->new_session->secret_length),
       label_to_span(kTLS13LabelResumption));
 }
 
@@ -354,8 +353,8 @@
   const EVP_MD *digest = ssl_session_get_digest(session);
   // The session initially stores the resumption_master_secret, which we
   // override with the PSK.
-  auto session_key = MakeSpan(session->master_key, session->master_key_length);
-  return hkdf_expand_label(session_key, digest, session_key,
+  auto session_secret = MakeSpan(session->secret, session->secret_length);
+  return hkdf_expand_label(session_secret, digest, session_secret,
                            label_to_span(kTLS13LabelResumptionPSK), nonce);
 }
 
@@ -460,11 +459,10 @@
   if (!hash_transcript_and_truncated_client_hello(
           hs, context, &context_len, digest, msg,
           1 /* length prefix */ + hash_len) ||
-      !tls13_psk_binder(verify_data, &verify_data_len,
-                        ssl->session->ssl_version, digest,
-                        MakeConstSpan(ssl->session->master_key,
-                                      ssl->session->master_key_length),
-                        MakeConstSpan(context, context_len)) ||
+      !tls13_psk_binder(
+          verify_data, &verify_data_len, ssl->session->ssl_version, digest,
+          MakeConstSpan(ssl->session->secret, ssl->session->secret_length),
+          MakeConstSpan(context, context_len)) ||
       verify_data_len != hash_len) {
     OPENSSL_PUT_ERROR(SSL, ERR_R_INTERNAL_ERROR);
     return false;
@@ -485,11 +483,10 @@
   if (!hash_transcript_and_truncated_client_hello(hs, context, &context_len,
                                                   hs->transcript.Digest(),
                                                   msg.raw, CBS_len(binders)) ||
-      !tls13_psk_binder(
-          verify_data, &verify_data_len, hs->ssl->version,
-          hs->transcript.Digest(),
-          MakeConstSpan(session->master_key, session->master_key_length),
-          MakeConstSpan(context, context_len)) ||
+      !tls13_psk_binder(verify_data, &verify_data_len, hs->ssl->version,
+                        hs->transcript.Digest(),
+                        MakeConstSpan(session->secret, session->secret_length),
+                        MakeConstSpan(context, context_len)) ||
       // We only consider the first PSK, so compare against the first binder.
       !CBS_get_u8_length_prefixed(binders, &binder)) {
     OPENSSL_PUT_ERROR(SSL, ERR_R_INTERNAL_ERROR);
@@ -510,4 +507,40 @@
   return true;
 }
 
+bool tls13_ech_accept_confirmation(
+    SSL_HANDSHAKE *hs, bssl::Span<uint8_t> out,
+    bssl::Span<const uint8_t> server_hello_ech_conf) {
+  // Compute the hash of the transcript concatenated with
+  // |server_hello_ech_conf| without modifying |hs->transcript|.
+  uint8_t context_hash[EVP_MAX_MD_SIZE];
+  unsigned context_hash_len;
+  ScopedEVP_MD_CTX ctx;
+  if (!hs->transcript.CopyToHashContext(ctx.get(), hs->transcript.Digest()) ||
+      !EVP_DigestUpdate(ctx.get(), server_hello_ech_conf.data(),
+                        server_hello_ech_conf.size()) ||
+      !EVP_DigestFinal_ex(ctx.get(), context_hash, &context_hash_len)) {
+    return false;
+  }
+
+  // Per draft-ietf-tls-esni-09, accept_confirmation is computed with
+  // Derive-Secret, which derives a secret of size Hash.length. That value is
+  // then truncated to the first 8 bytes. Note this differs from deriving an
+  // 8-byte secret because the target length is included in the derivation.
+  uint8_t accept_confirmation_buf[EVP_MAX_MD_SIZE];
+  bssl::Span<uint8_t> accept_confirmation =
+      MakeSpan(accept_confirmation_buf, hs->transcript.DigestLen());
+  if (!hkdf_expand_label(accept_confirmation, hs->transcript.Digest(),
+                         hs->secret(), label_to_span("ech accept confirmation"),
+                         MakeConstSpan(context_hash, context_hash_len))) {
+    return false;
+  }
+
+  if (out.size() > accept_confirmation.size()) {
+    OPENSSL_PUT_ERROR(SSL, ERR_R_INTERNAL_ERROR);
+    return false;
+  }
+  OPENSSL_memcpy(out.data(), accept_confirmation.data(), out.size());
+  return true;
+}
+
 BSSL_NAMESPACE_END
diff --git a/deps/boringssl/src/ssl/tls13_server.cc b/deps/boringssl/src/ssl/tls13_server.cc
index 33f821e..6e9cd07 100644
--- a/deps/boringssl/src/ssl/tls13_server.cc
+++ b/deps/boringssl/src/ssl/tls13_server.cc
@@ -196,6 +196,11 @@
     return ssl_hs_error;
   }
 
+  if (ssl->quic_method != nullptr && client_hello.session_id_len > 0) {
+    OPENSSL_PUT_ERROR(SSL, SSL_R_UNEXPECTED_COMPATIBILITY_MODE);
+    ssl_send_alert(ssl, SSL3_AL_FATAL, SSL_AD_ILLEGAL_PARAMETER);
+    return ssl_hs_error;
+  }
   OPENSSL_memcpy(hs->session_id, client_hello.session_id,
                  client_hello.session_id_len);
   hs->session_id_len = client_hello.session_id_len;
@@ -349,13 +354,6 @@
                          &offered_ticket, msg, &client_hello)) {
     case ssl_ticket_aead_ignore_ticket:
       assert(!session);
-      if (!ssl->enable_early_data) {
-        ssl->s3->early_data_reason = ssl_early_data_disabled;
-      } else if (!offered_ticket) {
-        ssl->s3->early_data_reason = ssl_early_data_no_session_offered;
-      } else {
-        ssl->s3->early_data_reason = ssl_early_data_session_not_resumed;
-      }
       if (!ssl_get_new_session(hs, 1 /* server */)) {
         ssl_send_alert(ssl, SSL3_AL_FATAL, SSL_AD_INTERNAL_ERROR);
         return ssl_hs_error;
@@ -372,35 +370,6 @@
         return ssl_hs_error;
       }
 
-      // |ssl_session_is_resumable| forbids cross-cipher resumptions even if the
-      // PRF hashes match.
-      assert(hs->new_cipher == session->cipher);
-
-      if (!ssl->enable_early_data) {
-        ssl->s3->early_data_reason = ssl_early_data_disabled;
-      } else if (session->ticket_max_early_data == 0) {
-        ssl->s3->early_data_reason = ssl_early_data_unsupported_for_session;
-      } else if (!hs->early_data_offered) {
-        ssl->s3->early_data_reason = ssl_early_data_peer_declined;
-      } else if (ssl->s3->channel_id_valid) {
-          // Channel ID is incompatible with 0-RTT.
-        ssl->s3->early_data_reason = ssl_early_data_channel_id;
-      } else if (ssl->s3->token_binding_negotiated) {
-          // Token Binding is incompatible with 0-RTT.
-        ssl->s3->early_data_reason = ssl_early_data_token_binding;
-      } else if (MakeConstSpan(ssl->s3->alpn_selected) != session->early_alpn) {
-        // The negotiated ALPN must match the one in the ticket.
-        ssl->s3->early_data_reason = ssl_early_data_alpn_mismatch;
-      } else if (ssl->s3->ticket_age_skew < -kMaxTicketAgeSkewSeconds ||
-                 kMaxTicketAgeSkewSeconds < ssl->s3->ticket_age_skew) {
-        ssl->s3->early_data_reason = ssl_early_data_ticket_age_skew;
-      } else if (!quic_ticket_compatible(session.get(), hs->config)) {
-        ssl->s3->early_data_reason = ssl_early_data_quic_parameter_mismatch;
-      } else {
-        ssl->s3->early_data_reason = ssl_early_data_accepted;
-        ssl->s3->early_data_accepted = true;
-      }
-
       ssl->s3->session_reused = true;
 
       // Resumption incorporates fresh key material, so refresh the timeout.
@@ -417,15 +386,83 @@
       return ssl_hs_pending_ticket;
   }
 
+  // Negotiate ALPS now, after ALPN is negotiated and |hs->new_session| is
+  // initialized.
+  if (!ssl_negotiate_alps(hs, &alert, &client_hello)) {
+    ssl_send_alert(ssl, SSL3_AL_FATAL, alert);
+    return ssl_hs_error;
+  }
+
+  // Determine if we're negotiating 0-RTT.
+  if (!ssl->enable_early_data) {
+    ssl->s3->early_data_reason = ssl_early_data_disabled;
+  } else if (!offered_ticket) {
+    ssl->s3->early_data_reason = ssl_early_data_no_session_offered;
+  } else if (!session) {
+    ssl->s3->early_data_reason = ssl_early_data_session_not_resumed;
+  } else if (session->ticket_max_early_data == 0) {
+    ssl->s3->early_data_reason = ssl_early_data_unsupported_for_session;
+  } else if (!hs->early_data_offered) {
+    ssl->s3->early_data_reason = ssl_early_data_peer_declined;
+  } else if (ssl->s3->channel_id_valid) {
+    // Channel ID is incompatible with 0-RTT.
+    ssl->s3->early_data_reason = ssl_early_data_channel_id;
+  } else if (ssl->s3->token_binding_negotiated) {
+    // Token Binding is incompatible with 0-RTT.
+    ssl->s3->early_data_reason = ssl_early_data_token_binding;
+  } else if (MakeConstSpan(ssl->s3->alpn_selected) != session->early_alpn) {
+    // The negotiated ALPN must match the one in the ticket.
+    ssl->s3->early_data_reason = ssl_early_data_alpn_mismatch;
+  } else if (hs->new_session->has_application_settings !=
+                 session->has_application_settings ||
+             MakeConstSpan(hs->new_session->local_application_settings) !=
+                 session->local_application_settings) {
+    ssl->s3->early_data_reason = ssl_early_data_alps_mismatch;
+  } else if (ssl->s3->ticket_age_skew < -kMaxTicketAgeSkewSeconds ||
+             kMaxTicketAgeSkewSeconds < ssl->s3->ticket_age_skew) {
+    ssl->s3->early_data_reason = ssl_early_data_ticket_age_skew;
+  } else if (!quic_ticket_compatible(session.get(), hs->config)) {
+    ssl->s3->early_data_reason = ssl_early_data_quic_parameter_mismatch;
+  } else {
+    // |ssl_session_is_resumable| forbids cross-cipher resumptions even if the
+    // PRF hashes match.
+    assert(hs->new_cipher == session->cipher);
+
+    ssl->s3->early_data_reason = ssl_early_data_accepted;
+    ssl->s3->early_data_accepted = true;
+  }
+
   // Record connection properties in the new session.
   hs->new_session->cipher = hs->new_cipher;
 
-  // Store the initial negotiated ALPN in the session.
+  // Store the ALPN and ALPS values in the session for 0-RTT. Note the peer
+  // applications settings are not generally known until client
+  // EncryptedExtensions.
   if (!hs->new_session->early_alpn.CopyFrom(ssl->s3->alpn_selected)) {
     ssl_send_alert(ssl, SSL3_AL_FATAL, SSL_AD_INTERNAL_ERROR);
     return ssl_hs_error;
   }
 
+  // The peer applications settings are usually received later, in
+  // EncryptedExtensions. But, in 0-RTT handshakes, we carry over the
+  // values from |session|. Do this now, before |session| is discarded.
+  if (ssl->s3->early_data_accepted &&
+      hs->new_session->has_application_settings &&
+      !hs->new_session->peer_application_settings.CopyFrom(
+          session->peer_application_settings)) {
+    ssl_send_alert(ssl, SSL3_AL_FATAL, SSL_AD_INTERNAL_ERROR);
+    return ssl_hs_error;
+  }
+
+  // Copy the QUIC early data context to the session.
+  if (ssl->enable_early_data && ssl->quic_method) {
+    if (!hs->new_session->quic_early_data_context.CopyFrom(
+            hs->config->quic_early_data_context)) {
+      ssl_send_alert(ssl, SSL3_AL_FATAL, SSL_AD_INTERNAL_ERROR);
+      return ssl_hs_error;
+    }
+  }
+
   if (ssl->ctx->dos_protection_cb != NULL &&
       ssl->ctx->dos_protection_cb(&client_hello) == 0) {
     // Connection rejected for DOS reasons.
@@ -440,8 +477,8 @@
   // Set up the key schedule and incorporate the PSK into the running secret.
   if (ssl->s3->session_reused) {
     if (!tls13_init_key_schedule(
-            hs, MakeConstSpan(hs->new_session->master_key,
-                              hs->new_session->master_key_length))) {
+            hs, MakeConstSpan(hs->new_session->secret,
+                              hs->new_session->secret_length))) {
       return ssl_hs_error;
     }
   } else if (!tls13_init_key_schedule(hs, MakeConstSpan(kZeroes, hash_len))) {
@@ -496,7 +533,7 @@
       !CBB_add_bytes(&body, kHelloRetryRequest, SSL3_RANDOM_SIZE) ||
       !CBB_add_u8_length_prefixed(&body, &session_id) ||
       !CBB_add_bytes(&session_id, hs->session_id, hs->session_id_len) ||
-      !CBB_add_u16(&body, ssl_cipher_get_value(hs->new_cipher)) ||
+      !CBB_add_u16(&body, SSL_CIPHER_get_protocol_id(hs->new_cipher)) ||
       !CBB_add_u8(&body, 0 /* no compression */) ||
       !tls1_get_shared_group(hs, &group_id) ||
       !CBB_add_u16_length_prefixed(&body, &extensions) ||
@@ -599,20 +636,58 @@
 static enum ssl_hs_wait_t do_send_server_hello(SSL_HANDSHAKE *hs) {
   SSL *const ssl = hs->ssl;
 
+  Span<uint8_t> random(ssl->s3->server_random);
+  RAND_bytes(random.data(), random.size());
+
+  // If the ClientHello has an ech_is_inner extension, we must be the ECH
+  // backend server. In response to ech_is_inner, we will overwrite part of the
+  // ServerHello.random with the ECH acceptance confirmation.
+  if (hs->ech_is_inner_present) {
+    // Construct the ServerHelloECHConf message, which is the same as
+    // ServerHello, except the last 8 bytes of its random field are zeroed out.
+    Span<uint8_t> random_suffix = random.subspan(24);
+    OPENSSL_memset(random_suffix.data(), 0, random_suffix.size());
+
+    ScopedCBB cbb;
+    CBB body, extensions, session_id;
+    if (!ssl->method->init_message(ssl, cbb.get(), &body,
+                                   SSL3_MT_SERVER_HELLO) ||
+        !CBB_add_u16(&body, TLS1_2_VERSION) ||
+        !CBB_add_bytes(&body, random.data(), random.size()) ||
+        !CBB_add_u8_length_prefixed(&body, &session_id) ||
+        !CBB_add_bytes(&session_id, hs->session_id, hs->session_id_len) ||
+        !CBB_add_u16(&body, SSL_CIPHER_get_protocol_id(hs->new_cipher)) ||
+        !CBB_add_u8(&body, 0) ||
+        !CBB_add_u16_length_prefixed(&body, &extensions) ||
+        !ssl_ext_pre_shared_key_add_serverhello(hs, &extensions) ||
+        !ssl_ext_key_share_add_serverhello(hs, &extensions, /*dry_run=*/true) ||
+        !ssl_ext_supported_versions_add_serverhello(hs, &extensions) ||
+        !CBB_flush(cbb.get())) {
+      return ssl_hs_error;
+    }
+
+    // Note that |cbb| includes the message type and length fields, but not the
+    // record layer header.
+    if (!tls13_ech_accept_confirmation(
+            hs, random_suffix,
+            bssl::MakeConstSpan(CBB_data(cbb.get()), CBB_len(cbb.get())))) {
+      return ssl_hs_error;
+    }
+  }
+
   // Send a ServerHello.
   ScopedCBB cbb;
   CBB body, extensions, session_id;
   if (!ssl->method->init_message(ssl, cbb.get(), &body, SSL3_MT_SERVER_HELLO) ||
       !CBB_add_u16(&body, TLS1_2_VERSION) ||
-      !RAND_bytes(ssl->s3->server_random, sizeof(ssl->s3->server_random)) ||
-      !CBB_add_bytes(&body, ssl->s3->server_random, SSL3_RANDOM_SIZE) ||
+      !CBB_add_bytes(&body, random.data(), random.size()) ||
       !CBB_add_u8_length_prefixed(&body, &session_id) ||
       !CBB_add_bytes(&session_id, hs->session_id, hs->session_id_len) ||
-      !CBB_add_u16(&body, ssl_cipher_get_value(hs->new_cipher)) ||
+      !CBB_add_u16(&body, SSL_CIPHER_get_protocol_id(hs->new_cipher)) ||
       !CBB_add_u8(&body, 0) ||
       !CBB_add_u16_length_prefixed(&body, &extensions) ||
       !ssl_ext_pre_shared_key_add_serverhello(hs, &extensions) ||
-      !ssl_ext_key_share_add_serverhello(hs, &extensions) ||
+      !ssl_ext_key_share_add_serverhello(hs, &extensions, /*dry_run=*/false) ||
       !ssl_ext_supported_versions_add_serverhello(hs, &extensions) ||
       !ssl_add_message_cbb(ssl, cbb.get())) {
     return ssl_hs_error;
@@ -807,7 +882,7 @@
                                hs->client_handshake_secret())) {
       return ssl_hs_error;
     }
-    hs->tls13_state = state13_read_client_certificate;
+    hs->tls13_state = state13_process_end_of_early_data;
     return ssl->s3->early_data_accepted ? ssl_hs_early_return : ssl_hs_ok;
   }
 
@@ -818,28 +893,90 @@
 
 static enum ssl_hs_wait_t do_process_end_of_early_data(SSL_HANDSHAKE *hs) {
   SSL *const ssl = hs->ssl;
-  // If early data was not accepted, the EndOfEarlyData will be in the discarded
-  // early data.
-  if (hs->ssl->s3->early_data_accepted) {
+  // In protocols that use EndOfEarlyData, we must consume the extra message and
+  // switch to client_handshake_secret after the early return.
+  if (ssl->quic_method == nullptr) {
+    // If early data was not accepted, the EndOfEarlyData will be in the
+    // discarded early data.
+    if (hs->ssl->s3->early_data_accepted) {
+      SSLMessage msg;
+      if (!ssl->method->get_message(ssl, &msg)) {
+        return ssl_hs_read_message;
+      }
+      if (!ssl_check_message_type(ssl, msg, SSL3_MT_END_OF_EARLY_DATA)) {
+        return ssl_hs_error;
+      }
+      if (CBS_len(&msg.body) != 0) {
+        ssl_send_alert(ssl, SSL3_AL_FATAL, SSL_AD_DECODE_ERROR);
+        OPENSSL_PUT_ERROR(SSL, SSL_R_DECODE_ERROR);
+        return ssl_hs_error;
+      }
+      ssl->method->next_message(ssl);
+    }
+    if (!tls13_set_traffic_key(ssl, ssl_encryption_handshake, evp_aead_open,
+                               hs->new_session.get(),
+                               hs->client_handshake_secret())) {
+      return ssl_hs_error;
+    }
+  }
+  hs->tls13_state = state13_read_client_encrypted_extensions;
+  return ssl_hs_ok;
+}
+
+static enum ssl_hs_wait_t do_read_client_encrypted_extensions(
+    SSL_HANDSHAKE *hs) {
+  SSL *const ssl = hs->ssl;
+  // For now, only one extension uses client EncryptedExtensions. This function
+  // may be generalized if others use it in the future.
+  if (hs->new_session->has_application_settings &&
+      !ssl->s3->early_data_accepted) {
     SSLMessage msg;
     if (!ssl->method->get_message(ssl, &msg)) {
       return ssl_hs_read_message;
     }
-    if (!ssl_check_message_type(ssl, msg, SSL3_MT_END_OF_EARLY_DATA)) {
+    if (!ssl_check_message_type(ssl, msg, SSL3_MT_ENCRYPTED_EXTENSIONS)) {
       return ssl_hs_error;
     }
-    if (CBS_len(&msg.body) != 0) {
-      ssl_send_alert(ssl, SSL3_AL_FATAL, SSL_AD_DECODE_ERROR);
+
+    CBS body = msg.body, extensions;
+    if (!CBS_get_u16_length_prefixed(&body, &extensions) ||
+        CBS_len(&body) != 0) {
       OPENSSL_PUT_ERROR(SSL, SSL_R_DECODE_ERROR);
+      ssl_send_alert(ssl, SSL3_AL_FATAL, SSL_AD_DECODE_ERROR);
       return ssl_hs_error;
     }
+
+    // Parse out the extensions.
+    bool have_application_settings = false;
+    CBS application_settings;
+    SSL_EXTENSION_TYPE ext_types[] = {{TLSEXT_TYPE_application_settings,
+                                       &have_application_settings,
+                                       &application_settings}};
+    uint8_t alert = SSL_AD_DECODE_ERROR;
+    if (!ssl_parse_extensions(&extensions, &alert, ext_types,
+                              /*ignore_unknown=*/false)) {
+      ssl_send_alert(ssl, SSL3_AL_FATAL, alert);
+      return ssl_hs_error;
+    }
+
+    if (!have_application_settings) {
+      OPENSSL_PUT_ERROR(SSL, SSL_R_MISSING_EXTENSION);
+      ssl_send_alert(ssl, SSL3_AL_FATAL, SSL_AD_MISSING_EXTENSION);
+      return ssl_hs_error;
+    }
+
+    // Note that, if 0-RTT was accepted, these values will already have been
+    // initialized earlier.
+    if (!hs->new_session->peer_application_settings.CopyFrom(
+            application_settings) ||
+        !ssl_hash_message(hs, msg)) {
+      ssl_send_alert(ssl, SSL3_AL_FATAL, SSL_AD_INTERNAL_ERROR);
+      return ssl_hs_error;
+    }
+
     ssl->method->next_message(ssl);
   }
-  if (!tls13_set_traffic_key(ssl, ssl_encryption_handshake, evp_aead_open,
-                             hs->new_session.get(),
-                             hs->client_handshake_secret())) {
-    return ssl_hs_error;
-  }
+
   hs->tls13_state = state13_read_client_certificate;
   return ssl_hs_ok;
 }
@@ -878,8 +1015,7 @@
   return ssl_hs_ok;
 }
 
-static enum ssl_hs_wait_t do_read_client_certificate_verify(
-    SSL_HANDSHAKE *hs) {
+static enum ssl_hs_wait_t do_read_client_certificate_verify(SSL_HANDSHAKE *hs) {
   SSL *const ssl = hs->ssl;
   if (sk_CRYPTO_BUFFER_num(hs->new_session->certs.get()) == 0) {
     // Skip this state.
@@ -1023,6 +1159,9 @@
       case state13_process_end_of_early_data:
         ret = do_process_end_of_early_data(hs);
         break;
+      case state13_read_client_encrypted_extensions:
+        ret = do_read_client_encrypted_extensions(hs);
+        break;
       case state13_read_client_certificate:
         ret = do_read_client_certificate(hs);
         break;
@@ -1079,6 +1218,8 @@
       return "TLS 1.3 server read_second_client_flight";
     case state13_process_end_of_early_data:
       return "TLS 1.3 server process_end_of_early_data";
+    case state13_read_client_encrypted_extensions:
+      return "TLS 1.3 server read_client_encrypted_extensions";
     case state13_read_client_certificate:
       return "TLS 1.3 server read_client_certificate";
     case state13_read_client_certificate_verify:
diff --git a/deps/boringssl/src/ssl/tls_record.cc b/deps/boringssl/src/ssl/tls_record.cc
index 464c5c5..acff1ad 100644
--- a/deps/boringssl/src/ssl/tls_record.cc
+++ b/deps/boringssl/src/ssl/tls_record.cc
@@ -447,13 +447,15 @@
     // TLS 1.3 adds an extra byte for encrypted record type.
     extra_in_len = 1;
   }
-  if (type == SSL3_RT_APPLICATION_DATA &&  // clang-format off
+  // clang-format off
+  if (type == SSL3_RT_APPLICATION_DATA &&
       in_len > 1 &&
       ssl_needs_record_splitting(ssl)) {
     // With record splitting enabled, the first byte gets sealed into a separate
     // record which is written into the prefix.
     in_len -= 1;
   }
+  // clang-format on
   return ssl->s3->aead_write_ctx->SuffixLen(out_suffix_len, in_len, extra_in_len);
 }
 
@@ -465,8 +467,8 @@
 // |tls_seal_scatter_record| implements TLS 1.0 CBC 1/n-1 record splitting and
 // may write two records concatenated.
 static bool tls_seal_scatter_record(SSL *ssl, uint8_t *out_prefix, uint8_t *out,
-                                   uint8_t *out_suffix, uint8_t type,
-                                   const uint8_t *in, size_t in_len) {
+                                    uint8_t *out_suffix, uint8_t type,
+                                    const uint8_t *in, size_t in_len) {
   if (type == SSL3_RT_APPLICATION_DATA && in_len > 1 &&
       ssl_needs_record_splitting(ssl)) {
     assert(ssl->s3->aead_write_ctx->ExplicitNonceLen() == 0);
diff --git a/deps/boringssl/src/third_party/googletest/CMakeLists.txt b/deps/boringssl/src/third_party/googletest/CMakeLists.txt
index c8a221d..1379afb 100644
--- a/deps/boringssl/src/third_party/googletest/CMakeLists.txt
+++ b/deps/boringssl/src/third_party/googletest/CMakeLists.txt
@@ -1,4 +1,7 @@
 ########################################################################
+# Note: CMake support is community-based. The maintainers do not use CMake
+# internally.
+#
 # CMake build script for Google Test.
 #
 # To run the tests for Google Test itself on Linux, use 'make test' or
@@ -50,7 +53,7 @@
   cmake_policy(SET CMP0048 NEW)
   project(gtest VERSION ${GOOGLETEST_VERSION} LANGUAGES CXX C)
 endif()
-cmake_minimum_required(VERSION 2.6.4)
+cmake_minimum_required(VERSION 2.8.12)
 
 if (POLICY CMP0063) # Visibility
   cmake_policy(SET CMP0063 NEW)
@@ -97,7 +100,7 @@
   set(generated_dir "${CMAKE_CURRENT_BINARY_DIR}/generated" CACHE INTERNAL "")
   set(cmake_files_install_dir "${CMAKE_INSTALL_LIBDIR}/cmake/${cmake_package_name}")
   set(version_file "${generated_dir}/${cmake_package_name}ConfigVersion.cmake")
-  write_basic_package_version_file(${version_file} COMPATIBILITY AnyNewerVersion)
+  write_basic_package_version_file(${version_file} VERSION ${GOOGLETEST_VERSION} COMPATIBILITY AnyNewerVersion)
   install(EXPORT ${targets_export_name}
     NAMESPACE ${cmake_package_name}::
     DESTINATION ${cmake_files_install_dir})
@@ -123,7 +126,9 @@
 # are used for other targets, to ensure that gtest can be compiled by a user
 # aggressive about warnings.
 cxx_library(gtest "${cxx_strict}" src/gtest-all.cc)
+set_target_properties(gtest PROPERTIES VERSION ${GOOGLETEST_VERSION})
 cxx_library(gtest_main "${cxx_strict}" src/gtest_main.cc)
+set_target_properties(gtest_main PROPERTIES VERSION ${GOOGLETEST_VERSION})
 # If the CMake version supports it, attach header directory information
 # to the targets for when we are part of a parent build (ie being pulled
 # in via add_subdirectory() rather than being a standalone build).
@@ -179,20 +184,6 @@
   # 'make test' or ctest.
   enable_testing()
 
-  if (WIN32)
-    file(GENERATE OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/$<CONFIG>/RunTest.ps1"
-         CONTENT
-"$project_bin = \"${CMAKE_BINARY_DIR}/bin/$<CONFIG>\"
-$env:Path = \"$project_bin;$env:Path\"
-& $args")
-  elseif (MINGW)
-    file(GENERATE OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/RunTest.ps1"
-         CONTENT
-"$project_bin = (cygpath --windows ${CMAKE_BINARY_DIR}/bin)
-$env:Path = \"$project_bin;$env:Path\"
-& $args")
-  endif()
-
   ############################################################
   # C++ tests built with standard compiler flags.
 
@@ -263,6 +254,7 @@
   cxx_executable(googletest-break-on-failure-unittest_ test gtest)
   py_test(googletest-break-on-failure-unittest)
 
+  py_test(gtest_skip_check_output_test)
   py_test(gtest_skip_environment_check_output_test)
 
   # Visual Studio .NET 2003 does not support STL with exceptions disabled.
@@ -314,6 +306,9 @@
   cxx_executable(googletest-uninitialized-test_ test gtest)
   py_test(googletest-uninitialized-test)
 
+  cxx_executable(gtest_list_output_unittest_ test gtest)
+  py_test(gtest_list_output_unittest)
+
   cxx_executable(gtest_xml_outfile1_test_ test gtest_main)
   cxx_executable(gtest_xml_outfile2_test_ test gtest_main)
   py_test(gtest_xml_outfiles_test)
diff --git a/deps/boringssl/src/third_party/googletest/codegear/gtest_all.cc b/deps/boringssl/src/third_party/googletest/codegear/gtest_all.cc
deleted file mode 100644
index 121b2d8..0000000
--- a/deps/boringssl/src/third_party/googletest/codegear/gtest_all.cc
+++ /dev/null
@@ -1,38 +0,0 @@
-// Copyright 2009, Google Inc.

-// All rights reserved.

-//

-// Redistribution and use in source and binary forms, with or without

-// modification, are permitted provided that the following conditions are

-// met:

-//

-//     * Redistributions of source code must retain the above copyright

-// notice, this list of conditions and the following disclaimer.

-//     * Redistributions in binary form must reproduce the above

-// copyright notice, this list of conditions and the following disclaimer

-// in the documentation and/or other materials provided with the

-// distribution.

-//     * Neither the name of Google Inc. nor the names of its

-// contributors may be used to endorse or promote products derived from

-// this software without specific prior written permission.

-//

-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-//

-// Author: Josh Kelley (joshkel@gmail.com)

-//

-// Google C++ Testing Framework (Google Test)

-//

-// C++Builder's IDE cannot build a static library from files with hyphens

-// in their name.  See http://qc.codegear.com/wc/qcmain.aspx?d=70977 .

-// This file serves as a workaround.

-

-#include "src/gtest-all.cc"

diff --git a/deps/boringssl/src/third_party/googletest/codegear/gtest_link.cc b/deps/boringssl/src/third_party/googletest/codegear/gtest_link.cc
deleted file mode 100644
index 918eccd..0000000
--- a/deps/boringssl/src/third_party/googletest/codegear/gtest_link.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-// Copyright 2009, Google Inc.

-// All rights reserved.

-//

-// Redistribution and use in source and binary forms, with or without

-// modification, are permitted provided that the following conditions are

-// met:

-//

-//     * Redistributions of source code must retain the above copyright

-// notice, this list of conditions and the following disclaimer.

-//     * Redistributions in binary form must reproduce the above

-// copyright notice, this list of conditions and the following disclaimer

-// in the documentation and/or other materials provided with the

-// distribution.

-//     * Neither the name of Google Inc. nor the names of its

-// contributors may be used to endorse or promote products derived from

-// this software without specific prior written permission.

-//

-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

-//

-// Author: Josh Kelley (joshkel@gmail.com)

-//

-// Google C++ Testing Framework (Google Test)

-//

-// Links gtest.lib and gtest_main.lib into the current project in C++Builder.

-// This means that these libraries can't be renamed, but it's the only way to

-// ensure that Debug versus Release test builds are linked against the

-// appropriate Debug or Release build of the libraries.

-

-#pragma link "gtest.lib"

-#pragma link "gtest_main.lib"

diff --git a/deps/boringssl/src/third_party/googletest/include/gtest/gtest-death-test.h b/deps/boringssl/src/third_party/googletest/include/gtest/gtest-death-test.h
index 0eb5b27..2bd41cf 100644
--- a/deps/boringssl/src/third_party/googletest/include/gtest/gtest-death-test.h
+++ b/deps/boringssl/src/third_party/googletest/include/gtest/gtest-death-test.h
@@ -190,11 +190,10 @@
 class GTEST_API_ ExitedWithCode {
  public:
   explicit ExitedWithCode(int exit_code);
+  ExitedWithCode(const ExitedWithCode&) = default;
+  void operator=(const ExitedWithCode& other) = delete;
   bool operator()(int exit_status) const;
  private:
-  // No implementation - assignment is unsupported.
-  void operator=(const ExitedWithCode& other);
-
   const int exit_code_;
 };
 
@@ -276,20 +275,20 @@
 // This macro is used for implementing macros such as
 // EXPECT_DEATH_IF_SUPPORTED and ASSERT_DEATH_IF_SUPPORTED on systems where
 // death tests are not supported. Those macros must compile on such systems
-// iff EXPECT_DEATH and ASSERT_DEATH compile with the same parameters on
-// systems that support death tests. This allows one to write such a macro
-// on a system that does not support death tests and be sure that it will
-// compile on a death-test supporting system. It is exposed publicly so that
-// systems that have death-tests with stricter requirements than
-// GTEST_HAS_DEATH_TEST can write their own equivalent of
-// EXPECT_DEATH_IF_SUPPORTED and ASSERT_DEATH_IF_SUPPORTED.
+// if and only if EXPECT_DEATH and ASSERT_DEATH compile with the same parameters
+// on systems that support death tests. This allows one to write such a macro on
+// a system that does not support death tests and be sure that it will compile
+// on a death-test supporting system. It is exposed publicly so that systems
+// that have death-tests with stricter requirements than GTEST_HAS_DEATH_TEST
+// can write their own equivalent of EXPECT_DEATH_IF_SUPPORTED and
+// ASSERT_DEATH_IF_SUPPORTED.
 //
 // Parameters:
 //   statement -  A statement that a macro such as EXPECT_DEATH would test
 //                for program termination. This macro has to make sure this
 //                statement is compiled but not executed, to ensure that
 //                EXPECT_DEATH_IF_SUPPORTED compiles with a certain
-//                parameter iff EXPECT_DEATH compiles with it.
+//                parameter if and only if EXPECT_DEATH compiles with it.
 //   regex     -  A regex that a macro such as EXPECT_DEATH would use to test
 //                the output of statement.  This parameter has to be
 //                compiled but not evaluated by this macro, to ensure that
diff --git a/deps/boringssl/src/third_party/googletest/include/gtest/gtest-matchers.h b/deps/boringssl/src/third_party/googletest/include/gtest/gtest-matchers.h
index 6e73ba1..04cc63d 100644
--- a/deps/boringssl/src/third_party/googletest/include/gtest/gtest-matchers.h
+++ b/deps/boringssl/src/third_party/googletest/include/gtest/gtest-matchers.h
@@ -42,6 +42,7 @@
 #include <memory>
 #include <ostream>
 #include <string>
+#include <type_traits>
 
 #include "gtest/gtest-printers.h"
 #include "gtest/internal/gtest-internal.h"
@@ -95,8 +96,8 @@
   // Returns the underlying ostream.
   ::std::ostream* stream() { return stream_; }
 
-  // Returns true iff the listener is interested in an explanation of
-  // the match result.  A matcher's MatchAndExplain() method can use
+  // Returns true if and only if the listener is interested in an explanation
+  // of the match result.  A matcher's MatchAndExplain() method can use
   // this information to avoid generating the explanation when no one
   // intends to hear it.
   bool IsInterested() const { return stream_ != nullptr; }
@@ -140,8 +141,8 @@
 template <typename T>
 class MatcherInterface : public MatcherDescriberInterface {
  public:
-  // Returns true iff the matcher matches x; also explains the match
-  // result to 'listener' if necessary (see the next paragraph), in
+  // Returns true if and only if the matcher matches x; also explains the
+  // match result to 'listener' if necessary (see the next paragraph), in
   // the form of a non-restrictive relative clause ("which ...",
   // "whose ...", etc) that describes x.  For example, the
   // MatchAndExplain() method of the Pointee(...) matcher should
@@ -257,13 +258,13 @@
 template <typename T>
 class MatcherBase {
  public:
-  // Returns true iff the matcher matches x; also explains the match
-  // result to 'listener'.
+  // Returns true if and only if the matcher matches x; also explains the
+  // match result to 'listener'.
   bool MatchAndExplain(const T& x, MatchResultListener* listener) const {
     return impl_->MatchAndExplain(x, listener);
   }
 
-  // Returns true iff this matcher matches x.
+  // Returns true if and only if this matcher matches x.
   bool Matches(const T& x) const {
     DummyMatchResultListener dummy;
     return MatchAndExplain(x, &dummy);
@@ -299,8 +300,8 @@
   template <typename U>
   explicit MatcherBase(
       const MatcherInterface<U>* impl,
-      typename internal::EnableIf<
-          !internal::IsSame<U, const U&>::value>::type* = nullptr)
+      typename std::enable_if<!std::is_same<U, const U&>::value>::type* =
+          nullptr)
       : impl_(new internal::MatcherInterfaceAdapter<U>(impl)) {}
 
   MatcherBase(const MatcherBase&) = default;
@@ -333,9 +334,10 @@
       : internal::MatcherBase<T>(impl) {}
 
   template <typename U>
-  explicit Matcher(const MatcherInterface<U>* impl,
-                   typename internal::EnableIf<
-                       !internal::IsSame<U, const U&>::value>::type* = nullptr)
+  explicit Matcher(
+      const MatcherInterface<U>* impl,
+      typename std::enable_if<!std::is_same<U, const U&>::value>::type* =
+          nullptr)
       : internal::MatcherBase<T>(impl) {}
 
   // Implicit constructor here allows people to write
@@ -382,18 +384,18 @@
   Matcher(const char* s);  // NOLINT
 };
 
-#if GTEST_HAS_ABSL
+#if GTEST_INTERNAL_HAS_STRING_VIEW
 // The following two specializations allow the user to write str
 // instead of Eq(str) and "foo" instead of Eq("foo") when a absl::string_view
 // matcher is expected.
 template <>
-class GTEST_API_ Matcher<const absl::string_view&>
-    : public internal::MatcherBase<const absl::string_view&> {
+class GTEST_API_ Matcher<const internal::StringView&>
+    : public internal::MatcherBase<const internal::StringView&> {
  public:
   Matcher() {}
 
-  explicit Matcher(const MatcherInterface<const absl::string_view&>* impl)
-      : internal::MatcherBase<const absl::string_view&>(impl) {}
+  explicit Matcher(const MatcherInterface<const internal::StringView&>* impl)
+      : internal::MatcherBase<const internal::StringView&>(impl) {}
 
   // Allows the user to write str instead of Eq(str) sometimes, where
   // str is a std::string object.
@@ -402,20 +404,20 @@
   // Allows the user to write "foo" instead of Eq("foo") sometimes.
   Matcher(const char* s);  // NOLINT
 
-  // Allows the user to pass absl::string_views directly.
-  Matcher(absl::string_view s);  // NOLINT
+  // Allows the user to pass absl::string_views or std::string_views directly.
+  Matcher(internal::StringView s);  // NOLINT
 };
 
 template <>
-class GTEST_API_ Matcher<absl::string_view>
-    : public internal::MatcherBase<absl::string_view> {
+class GTEST_API_ Matcher<internal::StringView>
+    : public internal::MatcherBase<internal::StringView> {
  public:
   Matcher() {}
 
-  explicit Matcher(const MatcherInterface<const absl::string_view&>* impl)
-      : internal::MatcherBase<absl::string_view>(impl) {}
-  explicit Matcher(const MatcherInterface<absl::string_view>* impl)
-      : internal::MatcherBase<absl::string_view>(impl) {}
+  explicit Matcher(const MatcherInterface<const internal::StringView&>* impl)
+      : internal::MatcherBase<internal::StringView>(impl) {}
+  explicit Matcher(const MatcherInterface<internal::StringView>* impl)
+      : internal::MatcherBase<internal::StringView>(impl) {}
 
   // Allows the user to write str instead of Eq(str) sometimes, where
   // str is a std::string object.
@@ -424,10 +426,10 @@
   // Allows the user to write "foo" instead of Eq("foo") sometimes.
   Matcher(const char* s);  // NOLINT
 
-  // Allows the user to pass absl::string_views directly.
-  Matcher(absl::string_view s);  // NOLINT
+  // Allows the user to pass absl::string_views or std::string_views directly.
+  Matcher(internal::StringView s);  // NOLINT
 };
-#endif  // GTEST_HAS_ABSL
+#endif  // GTEST_INTERNAL_HAS_STRING_VIEW
 
 // Prints a matcher in a human-readable format.
 template <typename T>
@@ -472,13 +474,13 @@
    public:
     explicit MonomorphicImpl(const Impl& impl) : impl_(impl) {}
 
-    virtual void DescribeTo(::std::ostream* os) const { impl_.DescribeTo(os); }
+    void DescribeTo(::std::ostream* os) const override { impl_.DescribeTo(os); }
 
-    virtual void DescribeNegationTo(::std::ostream* os) const {
+    void DescribeNegationTo(::std::ostream* os) const override {
       impl_.DescribeNegationTo(os);
     }
 
-    virtual bool MatchAndExplain(T x, MatchResultListener* listener) const {
+    bool MatchAndExplain(T x, MatchResultListener* listener) const override {
       return impl_.MatchAndExplain(x, listener);
     }
 
@@ -610,6 +612,10 @@
   static const char* NegatedDesc() { return "isn't >="; }
 };
 
+template <typename T, typename = typename std::enable_if<
+                          std::is_constructible<std::string, T>::value>::type>
+using StringLike = T;
+
 // Implements polymorphic matchers MatchesRegex(regex) and
 // ContainsRegex(regex), which can be used as a Matcher<T> as long as
 // T can be converted to a string.
@@ -618,12 +624,12 @@
   MatchesRegexMatcher(const RE* regex, bool full_match)
       : regex_(regex), full_match_(full_match) {}
 
-#if GTEST_HAS_ABSL
-  bool MatchAndExplain(const absl::string_view& s,
+#if GTEST_INTERNAL_HAS_STRING_VIEW
+  bool MatchAndExplain(const internal::StringView& s,
                        MatchResultListener* listener) const {
     return MatchAndExplain(std::string(s), listener);
   }
-#endif  // GTEST_HAS_ABSL
+#endif  // GTEST_INTERNAL_HAS_STRING_VIEW
 
   // Accepts pointer types, particularly:
   //   const char*
@@ -670,9 +676,10 @@
     const internal::RE* regex) {
   return MakePolymorphicMatcher(internal::MatchesRegexMatcher(regex, true));
 }
-inline PolymorphicMatcher<internal::MatchesRegexMatcher> MatchesRegex(
-    const std::string& regex) {
-  return MatchesRegex(new internal::RE(regex));
+template <typename T = std::string>
+PolymorphicMatcher<internal::MatchesRegexMatcher> MatchesRegex(
+    const internal::StringLike<T>& regex) {
+  return MatchesRegex(new internal::RE(std::string(regex)));
 }
 
 // Matches a string that contains regular expression 'regex'.
@@ -681,9 +688,10 @@
     const internal::RE* regex) {
   return MakePolymorphicMatcher(internal::MatchesRegexMatcher(regex, false));
 }
-inline PolymorphicMatcher<internal::MatchesRegexMatcher> ContainsRegex(
-    const std::string& regex) {
-  return ContainsRegex(new internal::RE(regex));
+template <typename T = std::string>
+PolymorphicMatcher<internal::MatchesRegexMatcher> ContainsRegex(
+    const internal::StringLike<T>& regex) {
+  return ContainsRegex(new internal::RE(std::string(regex)));
 }
 
 // Creates a polymorphic matcher that matches anything equal to x.
diff --git a/deps/boringssl/src/third_party/googletest/include/gtest/gtest-message.h b/deps/boringssl/src/third_party/googletest/include/gtest/gtest-message.h
index 4a80e11..2189923 100644
--- a/deps/boringssl/src/third_party/googletest/include/gtest/gtest-message.h
+++ b/deps/boringssl/src/third_party/googletest/include/gtest/gtest-message.h
@@ -49,6 +49,7 @@
 
 #include <limits>
 #include <memory>
+#include <sstream>
 
 #include "gtest/internal/gtest-port.h"
 
diff --git a/deps/boringssl/src/third_party/googletest/include/gtest/gtest-param-test.h b/deps/boringssl/src/third_party/googletest/include/gtest/gtest-param-test.h
index d7c9dd8..9a60b76 100644
--- a/deps/boringssl/src/third_party/googletest/include/gtest/gtest-param-test.h
+++ b/deps/boringssl/src/third_party/googletest/include/gtest/gtest-param-test.h
@@ -174,6 +174,7 @@
 
 #endif  // 0
 
+#include <iterator>
 #include <utility>
 
 #include "gtest/internal/gtest-internal.h"
@@ -292,10 +293,9 @@
 //
 template <typename ForwardIterator>
 internal::ParamGenerator<
-  typename ::testing::internal::IteratorTraits<ForwardIterator>::value_type>
+    typename std::iterator_traits<ForwardIterator>::value_type>
 ValuesIn(ForwardIterator begin, ForwardIterator end) {
-  typedef typename ::testing::internal::IteratorTraits<ForwardIterator>
-      ::value_type ParamType;
+  typedef typename std::iterator_traits<ForwardIterator>::value_type ParamType;
   return internal::ParamGenerator<ParamType>(
       new internal::ValuesInIteratorRangeGenerator<ParamType>(begin, end));
 }
@@ -416,19 +416,20 @@
       : public test_suite_name {                                               \
    public:                                                                     \
     GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)() {}                    \
-    virtual void TestBody();                                                   \
+    void TestBody() override;                                                  \
                                                                                \
    private:                                                                    \
     static int AddToRegistry() {                                               \
       ::testing::UnitTest::GetInstance()                                       \
           ->parameterized_test_registry()                                      \
           .GetTestSuitePatternHolder<test_suite_name>(                         \
-              #test_suite_name,                                                \
+              GTEST_STRINGIFY_(test_suite_name),                               \
               ::testing::internal::CodeLocation(__FILE__, __LINE__))           \
           ->AddTestPattern(                                                    \
               GTEST_STRINGIFY_(test_suite_name), GTEST_STRINGIFY_(test_name),  \
               new ::testing::internal::TestMetaFactory<GTEST_TEST_CLASS_NAME_( \
-                  test_suite_name, test_name)>());                             \
+                  test_suite_name, test_name)>(),                              \
+              ::testing::internal::CodeLocation(__FILE__, __LINE__));          \
       return 0;                                                                \
     }                                                                          \
     static int gtest_registering_dummy_ GTEST_ATTRIBUTE_UNUSED_;               \
@@ -483,13 +484,21 @@
           ::testing::UnitTest::GetInstance()                                  \
               ->parameterized_test_registry()                                 \
               .GetTestSuitePatternHolder<test_suite_name>(                    \
-                  #test_suite_name,                                           \
+                  GTEST_STRINGIFY_(test_suite_name),                          \
                   ::testing::internal::CodeLocation(__FILE__, __LINE__))      \
               ->AddTestSuiteInstantiation(                                    \
-                  #prefix, &gtest_##prefix##test_suite_name##_EvalGenerator_, \
+                  GTEST_STRINGIFY_(prefix),                                   \
+                  &gtest_##prefix##test_suite_name##_EvalGenerator_,          \
                   &gtest_##prefix##test_suite_name##_EvalGenerateName_,       \
                   __FILE__, __LINE__)
 
+
+// Allow Marking a Parameterized test class as not needing to be instantiated.
+#define GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(T)                   \
+  namespace gtest_do_not_use_outside_namespace_scope {}                   \
+  static const ::testing::internal::MarkAsIgnored gtest_allow_ignore_##T( \
+      GTEST_STRINGIFY_(T))
+
 // Legacy API is deprecated but still available
 #ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 #define INSTANTIATE_TEST_CASE_P                                            \
diff --git a/deps/boringssl/src/third_party/googletest/include/gtest/gtest-printers.h b/deps/boringssl/src/third_party/googletest/include/gtest/gtest-printers.h
index 97cdb2a..463f0af 100644
--- a/deps/boringssl/src/third_party/googletest/include/gtest/gtest-printers.h
+++ b/deps/boringssl/src/third_party/googletest/include/gtest/gtest-printers.h
@@ -111,60 +111,120 @@
 #include "gtest/internal/gtest-internal.h"
 #include "gtest/internal/gtest-port.h"
 
-#if GTEST_HAS_ABSL
-#include "absl/strings/string_view.h"
-#include "absl/types/optional.h"
-#include "absl/types/variant.h"
-#endif  // GTEST_HAS_ABSL
-
 namespace testing {
 
-// Definitions in the 'internal' and 'internal2' name spaces are
-// subject to change without notice.  DO NOT USE THEM IN USER CODE!
-namespace internal2 {
+// Definitions in the internal* namespaces are subject to change without notice.
+// DO NOT USE THEM IN USER CODE!
+namespace internal {
 
-// Prints the given number of bytes in the given object to the given
-// ostream.
-GTEST_API_ void PrintBytesInObjectTo(const unsigned char* obj_bytes,
-                                     size_t count,
-                                     ::std::ostream* os);
+template <typename T>
+void UniversalPrint(const T& value, ::std::ostream* os);
 
-// For selecting which printer to use when a given type has neither <<
-// nor PrintTo().
-enum TypeKind {
-  kProtobuf,              // a protobuf type
-  kConvertibleToInteger,  // a type implicitly convertible to BiggestInt
-                          // (e.g. a named or unnamed enum type)
-#if GTEST_HAS_ABSL
-  kConvertibleToStringView,  // a type implicitly convertible to
-                             // absl::string_view
-#endif
-  kOtherType  // anything else
-};
+// Used to print an STL-style container when the user doesn't define
+// a PrintTo() for it.
+struct ContainerPrinter {
+  template <typename T,
+            typename = typename std::enable_if<
+                (sizeof(IsContainerTest<T>(0)) == sizeof(IsContainer)) &&
+                !IsRecursiveContainer<T>::value>::type>
+  static void PrintValue(const T& container, std::ostream* os) {
+    const size_t kMaxCount = 32;  // The maximum number of elements to print.
+    *os << '{';
+    size_t count = 0;
+    for (auto&& elem : container) {
+      if (count > 0) {
+        *os << ',';
+        if (count == kMaxCount) {  // Enough has been printed.
+          *os << " ...";
+          break;
+        }
+      }
+      *os << ' ';
+      // We cannot call PrintTo(elem, os) here as PrintTo() doesn't
+      // handle `elem` being a native array.
+      internal::UniversalPrint(elem, os);
+      ++count;
+    }
 
-// TypeWithoutFormatter<T, kTypeKind>::PrintValue(value, os) is called
-// by the universal printer to print a value of type T when neither
-// operator<< nor PrintTo() is defined for T, where kTypeKind is the
-// "kind" of T as defined by enum TypeKind.
-template <typename T, TypeKind kTypeKind>
-class TypeWithoutFormatter {
- public:
-  // This default version is called when kTypeKind is kOtherType.
-  static void PrintValue(const T& value, ::std::ostream* os) {
-    PrintBytesInObjectTo(static_cast<const unsigned char*>(
-                             reinterpret_cast<const void*>(&value)),
-                         sizeof(value), os);
+    if (count > 0) {
+      *os << ' ';
+    }
+    *os << '}';
   }
 };
 
-// We print a protobuf using its ShortDebugString() when the string
-// doesn't exceed this many characters; otherwise we print it using
-// DebugString() for better readability.
-const size_t kProtobufOneLinerMaxLength = 50;
+// Used to print a pointer that is neither a char pointer nor a member
+// pointer, when the user doesn't define PrintTo() for it.  (A member
+// variable pointer or member function pointer doesn't really point to
+// a location in the address space.  Their representation is
+// implementation-defined.  Therefore they will be printed as raw
+// bytes.)
+struct FunctionPointerPrinter {
+  template <typename T, typename = typename std::enable_if<
+                            std::is_function<T>::value>::type>
+  static void PrintValue(T* p, ::std::ostream* os) {
+    if (p == nullptr) {
+      *os << "NULL";
+    } else {
+      // T is a function type, so '*os << p' doesn't do what we want
+      // (it just prints p as bool).  We want to print p as a const
+      // void*.
+      *os << reinterpret_cast<const void*>(p);
+    }
+  }
+};
 
-template <typename T>
-class TypeWithoutFormatter<T, kProtobuf> {
- public:
+struct PointerPrinter {
+  template <typename T>
+  static void PrintValue(T* p, ::std::ostream* os) {
+    if (p == nullptr) {
+      *os << "NULL";
+    } else {
+      // T is not a function type.  We just call << to print p,
+      // relying on ADL to pick up user-defined << for their pointer
+      // types, if any.
+      *os << p;
+    }
+  }
+};
+
+namespace internal_stream_operator_without_lexical_name_lookup {
+
+// The presence of an operator<< here will terminate lexical scope lookup
+// straight away (even though it cannot be a match because of its argument
+// types). Thus, the two operator<< calls in StreamPrinter will find only ADL
+// candidates.
+struct LookupBlocker {};
+void operator<<(LookupBlocker, LookupBlocker);
+
+struct StreamPrinter {
+  template <typename T,
+            // Don't accept member pointers here. We'd print them via implicit
+            // conversion to bool, which isn't useful.
+            typename = typename std::enable_if<
+                !std::is_member_pointer<T>::value>::type,
+            // Only accept types for which we can find a streaming operator via
+            // ADL (possibly involving implicit conversions).
+            typename = decltype(std::declval<std::ostream&>()
+                                << std::declval<const T&>())>
+  static void PrintValue(const T& value, ::std::ostream* os) {
+    // Call streaming operator found by ADL, possibly with implicit conversions
+    // of the arguments.
+    *os << value;
+  }
+};
+
+}  // namespace internal_stream_operator_without_lexical_name_lookup
+
+struct ProtobufPrinter {
+  // We print a protobuf using its ShortDebugString() when the string
+  // doesn't exceed this many characters; otherwise we print it using
+  // DebugString() for better readability.
+  static const size_t kProtobufOneLinerMaxLength = 50;
+
+  template <typename T,
+            typename = typename std::enable_if<
+                internal::HasDebugStringAndShortDebugString<T>::value>::type>
   static void PrintValue(const T& value, ::std::ostream* os) {
     std::string pretty_str = value.ShortDebugString();
     if (pretty_str.length() > kProtobufOneLinerMaxLength) {
@@ -174,9 +234,7 @@
   }
 };
 
-template <typename T>
-class TypeWithoutFormatter<T, kConvertibleToInteger> {
- public:
+struct ConvertibleToIntegerPrinter {
   // Since T has no << operator or PrintTo() but can be implicitly
   // converted to BiggestInt, we print it as a BiggestInt.
   //
@@ -184,113 +242,65 @@
   // case printing it as an integer is the desired behavior.  In case
   // T is not an enum, printing it as an integer is the best we can do
   // given that it has no user-defined printer.
-  static void PrintValue(const T& value, ::std::ostream* os) {
-    const internal::BiggestInt kBigInt = value;
-    *os << kBigInt;
+  static void PrintValue(internal::BiggestInt value, ::std::ostream* os) {
+    *os << value;
   }
 };
 
-#if GTEST_HAS_ABSL
-template <typename T>
-class TypeWithoutFormatter<T, kConvertibleToStringView> {
- public:
-  // Since T has neither operator<< nor PrintTo() but can be implicitly
-  // converted to absl::string_view, we print it as a absl::string_view.
-  //
-  // Note: the implementation is further below, as it depends on
-  // internal::PrintTo symbol which is defined later in the file.
-  static void PrintValue(const T& value, ::std::ostream* os);
+struct ConvertibleToStringViewPrinter {
+#if GTEST_INTERNAL_HAS_STRING_VIEW
+  static void PrintValue(internal::StringView value, ::std::ostream* os) {
+    internal::UniversalPrint(value, os);
+  }
+#endif
 };
-#endif
 
-// Prints the given value to the given ostream.  If the value is a
-// protocol message, its debug string is printed; if it's an enum or
-// of a type implicitly convertible to BiggestInt, it's printed as an
-// integer; otherwise the bytes in the value are printed.  This is
-// what UniversalPrinter<T>::Print() does when it knows nothing about
-// type T and T has neither << operator nor PrintTo().
-//
-// A user can override this behavior for a class type Foo by defining
-// a << operator in the namespace where Foo is defined.
-//
-// We put this operator in namespace 'internal2' instead of 'internal'
-// to simplify the implementation, as much code in 'internal' needs to
-// use << in STL, which would conflict with our own << were it defined
-// in 'internal'.
-//
-// Note that this operator<< takes a generic std::basic_ostream<Char,
-// CharTraits> type instead of the more restricted std::ostream.  If
-// we define it to take an std::ostream instead, we'll get an
-// "ambiguous overloads" compiler error when trying to print a type
-// Foo that supports streaming to std::basic_ostream<Char,
-// CharTraits>, as the compiler cannot tell whether
-// operator<<(std::ostream&, const T&) or
-// operator<<(std::basic_stream<Char, CharTraits>, const Foo&) is more
-// specific.
-template <typename Char, typename CharTraits, typename T>
-::std::basic_ostream<Char, CharTraits>& operator<<(
-    ::std::basic_ostream<Char, CharTraits>& os, const T& x) {
-  TypeWithoutFormatter<T, (internal::IsAProtocolMessage<T>::value
-                               ? kProtobuf
-                               : std::is_convertible<
-                                     const T&, internal::BiggestInt>::value
-                                     ? kConvertibleToInteger
-                                     :
-#if GTEST_HAS_ABSL
-                                     std::is_convertible<
-                                         const T&, absl::string_view>::value
-                                         ? kConvertibleToStringView
-                                         :
-#endif
-                                         kOtherType)>::PrintValue(x, &os);
-  return os;
-}
 
-}  // namespace internal2
-}  // namespace testing
+// Prints the given number of bytes in the given object to the given
+// ostream.
+GTEST_API_ void PrintBytesInObjectTo(const unsigned char* obj_bytes,
+                                     size_t count,
+                                     ::std::ostream* os);
+struct FallbackPrinter {
+  template <typename T>
+  static void PrintValue(const T& value, ::std::ostream* os) {
+    PrintBytesInObjectTo(
+        static_cast<const unsigned char*>(
+            reinterpret_cast<const void*>(std::addressof(value))),
+        sizeof(value), os);
+  }
+};
 
-// This namespace MUST NOT BE NESTED IN ::testing, or the name look-up
-// magic needed for implementing UniversalPrinter won't work.
-namespace testing_internal {
+// Try every printer in order and return the first one that works.
+template <typename T, typename E, typename Printer, typename... Printers>
+struct FindFirstPrinter : FindFirstPrinter<T, E, Printers...> {};
 
-// Used to print a value that is not an STL-style container when the
-// user doesn't define PrintTo() for it.
+template <typename T, typename Printer, typename... Printers>
+struct FindFirstPrinter<
+    T, decltype(Printer::PrintValue(std::declval<const T&>(), nullptr)),
+    Printer, Printers...> {
+  using type = Printer;
+};
+
+// Select the best printer in the following order:
+//  - Print containers (they have begin/end/etc).
+//  - Print function pointers.
+//  - Print object pointers.
+//  - Use the stream operator, if available.
+//  - Print protocol buffers.
+//  - Print types convertible to BiggestInt.
+//  - Print types convertible to StringView, if available.
+//  - Fallback to printing the raw bytes of the object.
 template <typename T>
-void DefaultPrintNonContainerTo(const T& value, ::std::ostream* os) {
-  // With the following statement, during unqualified name lookup,
-  // testing::internal2::operator<< appears as if it was declared in
-  // the nearest enclosing namespace that contains both
-  // ::testing_internal and ::testing::internal2, i.e. the global
-  // namespace.  For more details, refer to the C++ Standard section
-  // 7.3.4-1 [namespace.udir].  This allows us to fall back onto
-  // testing::internal2::operator<< in case T doesn't come with a <<
-  // operator.
-  //
-  // We cannot write 'using ::testing::internal2::operator<<;', which
-  // gcc 3.3 fails to compile due to a compiler bug.
-  using namespace ::testing::internal2;  // NOLINT
-
-  // Assuming T is defined in namespace foo, in the next statement,
-  // the compiler will consider all of:
-  //
-  //   1. foo::operator<< (thanks to Koenig look-up),
-  //   2. ::operator<< (as the current namespace is enclosed in ::),
-  //   3. testing::internal2::operator<< (thanks to the using statement above).
-  //
-  // The operator<< whose type matches T best will be picked.
-  //
-  // We deliberately allow #2 to be a candidate, as sometimes it's
-  // impossible to define #1 (e.g. when foo is ::std, defining
-  // anything in it is undefined behavior unless you are a compiler
-  // vendor.).
-  *os << value;
+void PrintWithFallback(const T& value, ::std::ostream* os) {
+  using Printer = typename FindFirstPrinter<
+      T, void, ContainerPrinter, FunctionPointerPrinter, PointerPrinter,
+      internal_stream_operator_without_lexical_name_lookup::StreamPrinter,
+      ProtobufPrinter, ConvertibleToIntegerPrinter,
+      ConvertibleToStringViewPrinter, FallbackPrinter>::type;
+  Printer::PrintValue(value, os);
 }
 
-}  // namespace testing_internal
-
-namespace testing {
-namespace internal {
-
 // FormatForComparison<ToPrint, OtherOperand>::Format(value) formats a
 // value of type ToPrint that is an operand of a comparison assertion
 // (e.g. ASSERT_EQ).  OtherOperand is the type of the other operand in
@@ -339,6 +349,14 @@
 GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(const char);
 GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(wchar_t);
 GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(const wchar_t);
+#ifdef __cpp_char8_t
+GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(char8_t);
+GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(const char8_t);
+#endif
+GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(char16_t);
+GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(const char16_t);
+GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(char32_t);
+GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(const char32_t);
 
 #undef GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_
 
@@ -356,6 +374,14 @@
 
 GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(char, ::std::string);
 GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const char, ::std::string);
+#ifdef __cpp_char8_t
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(char8_t, ::std::u8string);
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const char8_t, ::std::u8string);
+#endif
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(char16_t, ::std::u16string);
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const char16_t, ::std::u16string);
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(char32_t, ::std::u32string);
+GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const char32_t, ::std::u32string);
 
 #if GTEST_HAS_STD_WSTRING
 GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(wchar_t, ::std::wstring);
@@ -388,85 +414,6 @@
 template <typename T>
 class UniversalPrinter;
 
-template <typename T>
-void UniversalPrint(const T& value, ::std::ostream* os);
-
-enum DefaultPrinterType {
-  kPrintContainer,
-  kPrintPointer,
-  kPrintFunctionPointer,
-  kPrintOther,
-};
-template <DefaultPrinterType type> struct WrapPrinterType {};
-
-// Used to print an STL-style container when the user doesn't define
-// a PrintTo() for it.
-template <typename C>
-void DefaultPrintTo(WrapPrinterType<kPrintContainer> /* dummy */,
-                    const C& container, ::std::ostream* os) {
-  const size_t kMaxCount = 32;  // The maximum number of elements to print.
-  *os << '{';
-  size_t count = 0;
-  for (typename C::const_iterator it = container.begin();
-       it != container.end(); ++it, ++count) {
-    if (count > 0) {
-      *os << ',';
-      if (count == kMaxCount) {  // Enough has been printed.
-        *os << " ...";
-        break;
-      }
-    }
-    *os << ' ';
-    // We cannot call PrintTo(*it, os) here as PrintTo() doesn't
-    // handle *it being a native array.
-    internal::UniversalPrint(*it, os);
-  }
-
-  if (count > 0) {
-    *os << ' ';
-  }
-  *os << '}';
-}
-
-// Used to print a pointer that is neither a char pointer nor a member
-// pointer, when the user doesn't define PrintTo() for it.  (A member
-// variable pointer or member function pointer doesn't really point to
-// a location in the address space.  Their representation is
-// implementation-defined.  Therefore they will be printed as raw
-// bytes.)
-template <typename T>
-void DefaultPrintTo(WrapPrinterType<kPrintPointer> /* dummy */,
-                    T* p, ::std::ostream* os) {
-  if (p == nullptr) {
-    *os << "NULL";
-  } else {
-    // T is not a function type.  We just call << to print p,
-    // relying on ADL to pick up user-defined << for their pointer
-    // types, if any.
-    *os << p;
-  }
-}
-template <typename T>
-void DefaultPrintTo(WrapPrinterType<kPrintFunctionPointer> /* dummy */,
-                    T* p, ::std::ostream* os) {
-  if (p == nullptr) {
-    *os << "NULL";
-  } else {
-    // T is a function type, so '*os << p' doesn't do what we want
-    // (it just prints p as bool).  We want to print p as a const
-    // void*.
-    *os << reinterpret_cast<const void*>(p);
-  }
-}
-
-// Used to print a non-container, non-pointer value when the user
-// doesn't define PrintTo() for it.
-template <typename T>
-void DefaultPrintTo(WrapPrinterType<kPrintOther> /* dummy */,
-                    const T& value, ::std::ostream* os) {
-  ::testing_internal::DefaultPrintNonContainerTo(value, os);
-}
-
 // Prints the given value using the << operator if it has one;
 // otherwise prints the bytes in it.  This is what
 // UniversalPrinter<T>::Print() does when PrintTo() is not specialized
@@ -480,36 +427,7 @@
 // wants).
 template <typename T>
 void PrintTo(const T& value, ::std::ostream* os) {
-  // DefaultPrintTo() is overloaded.  The type of its first argument
-  // determines which version will be picked.
-  //
-  // Note that we check for container types here, prior to we check
-  // for protocol message types in our operator<<.  The rationale is:
-  //
-  // For protocol messages, we want to give people a chance to
-  // override Google Mock's format by defining a PrintTo() or
-  // operator<<.  For STL containers, other formats can be
-  // incompatible with Google Mock's format for the container
-  // elements; therefore we check for container types here to ensure
-  // that our format is used.
-  //
-  // Note that MSVC and clang-cl do allow an implicit conversion from
-  // pointer-to-function to pointer-to-object, but clang-cl warns on it.
-  // So don't use ImplicitlyConvertible if it can be helped since it will
-  // cause this warning, and use a separate overload of DefaultPrintTo for
-  // function pointers so that the `*os << p` in the object pointer overload
-  // doesn't cause that warning either.
-  DefaultPrintTo(
-      WrapPrinterType <
-                  (sizeof(IsContainerTest<T>(0)) == sizeof(IsContainer)) &&
-              !IsRecursiveContainer<T>::value
-          ? kPrintContainer
-          : !std::is_pointer<T>::value
-                ? kPrintOther
-                : std::is_function<typename std::remove_pointer<T>::type>::value
-                      ? kPrintFunctionPointer
-                      : kPrintPointer > (),
-      value, os);
+  internal::PrintWithFallback(value, os);
 }
 
 // The following list of PrintTo() overloads tells
@@ -540,6 +458,16 @@
 // is implemented as an unsigned type.
 GTEST_API_ void PrintTo(wchar_t wc, ::std::ostream* os);
 
+GTEST_API_ void PrintTo(char32_t c, ::std::ostream* os);
+inline void PrintTo(char16_t c, ::std::ostream* os) {
+  PrintTo(ImplicitCast_<char32_t>(c), os);
+}
+#ifdef __cpp_char8_t
+inline void PrintTo(char8_t c, ::std::ostream* os) {
+  PrintTo(ImplicitCast_<char32_t>(c), os);
+}
+#endif
+
 // Overloads for C strings.
 GTEST_API_ void PrintTo(const char* s, ::std::ostream* os);
 inline void PrintTo(char* s, ::std::ostream* os) {
@@ -560,6 +488,26 @@
 inline void PrintTo(unsigned char* s, ::std::ostream* os) {
   PrintTo(ImplicitCast_<const void*>(s), os);
 }
+#ifdef __cpp_char8_t
+inline void PrintTo(const char8_t* s, ::std::ostream* os) {
+  PrintTo(ImplicitCast_<const void*>(s), os);
+}
+inline void PrintTo(char8_t* s, ::std::ostream* os) {
+  PrintTo(ImplicitCast_<const void*>(s), os);
+}
+#endif
+inline void PrintTo(const char16_t* s, ::std::ostream* os) {
+  PrintTo(ImplicitCast_<const void*>(s), os);
+}
+inline void PrintTo(char16_t* s, ::std::ostream* os) {
+  PrintTo(ImplicitCast_<const void*>(s), os);
+}
+inline void PrintTo(const char32_t* s, ::std::ostream* os) {
+  PrintTo(ImplicitCast_<const void*>(s), os);
+}
+inline void PrintTo(char32_t* s, ::std::ostream* os) {
+  PrintTo(ImplicitCast_<const void*>(s), os);
+}
 
 // MSVC can be configured to define wchar_t as a typedef of unsigned
 // short.  It defines _NATIVE_WCHAR_T_DEFINED when wchar_t is a native
@@ -602,12 +550,12 @@
 }
 #endif  // GTEST_HAS_STD_WSTRING
 
-#if GTEST_HAS_ABSL
-// Overload for absl::string_view.
-inline void PrintTo(absl::string_view sp, ::std::ostream* os) {
+#if GTEST_INTERNAL_HAS_STRING_VIEW
+// Overload for internal::StringView.
+inline void PrintTo(internal::StringView sp, ::std::ostream* os) {
   PrintTo(::std::string(sp), os);
 }
-#endif  // GTEST_HAS_ABSL
+#endif  // GTEST_INTERNAL_HAS_STRING_VIEW
 
 inline void PrintTo(std::nullptr_t, ::std::ostream* os) { *os << "(nullptr)"; }
 
@@ -681,14 +629,42 @@
   GTEST_DISABLE_MSC_WARNINGS_POP_()
 };
 
-#if GTEST_HAS_ABSL
+#if GTEST_INTERNAL_HAS_ANY
 
-// Printer for absl::optional
+// Printer for std::any / absl::any
+
+template <>
+class UniversalPrinter<Any> {
+ public:
+  static void Print(const Any& value, ::std::ostream* os) {
+    if (value.has_value()) {
+      *os << "value of type " << GetTypeName(value);
+    } else {
+      *os << "no value";
+    }
+  }
+
+ private:
+  static std::string GetTypeName(const Any& value) {
+#if GTEST_HAS_RTTI
+    return internal::GetTypeName(value.type());
+#else
+    static_cast<void>(value);  // possibly unused
+    return "<unknown_type>";
+#endif  // GTEST_HAS_RTTI
+  }
+};
+
+#endif  // GTEST_INTERNAL_HAS_ANY
+
+#if GTEST_INTERNAL_HAS_OPTIONAL
+
+// Printer for std::optional / absl::optional
 
 template <typename T>
-class UniversalPrinter<::absl::optional<T>> {
+class UniversalPrinter<Optional<T>> {
  public:
-  static void Print(const ::absl::optional<T>& value, ::std::ostream* os) {
+  static void Print(const Optional<T>& value, ::std::ostream* os) {
     *os << '(';
     if (!value) {
       *os << "nullopt";
@@ -699,14 +675,22 @@
   }
 };
 
-// Printer for absl::variant
+#endif  // GTEST_INTERNAL_HAS_OPTIONAL
+
+#if GTEST_INTERNAL_HAS_VARIANT
+
+// Printer for std::variant / absl::variant
 
 template <typename... T>
-class UniversalPrinter<::absl::variant<T...>> {
+class UniversalPrinter<Variant<T...>> {
  public:
-  static void Print(const ::absl::variant<T...>& value, ::std::ostream* os) {
+  static void Print(const Variant<T...>& value, ::std::ostream* os) {
     *os << '(';
-    absl::visit(Visitor{os}, value);
+#if GTEST_HAS_ABSL
+    absl::visit(Visitor{os, value.index()}, value);
+#else
+    std::visit(Visitor{os, value.index()}, value);
+#endif  // GTEST_HAS_ABSL
     *os << ')';
   }
 
@@ -714,14 +698,16 @@
   struct Visitor {
     template <typename U>
     void operator()(const U& u) const {
-      *os << "'" << GetTypeName<U>() << "' with value ";
+      *os << "'" << GetTypeName<U>() << "(index = " << index
+          << ")' with value ";
       UniversalPrint(u, os);
     }
     ::std::ostream* os;
+    std::size_t index;
   };
 };
 
-#endif  // GTEST_HAS_ABSL
+#endif  // GTEST_INTERNAL_HAS_VARIANT
 
 // UniversalPrintArray(begin, len, os) prints an array of 'len'
 // elements, starting at address 'begin'.
@@ -900,16 +886,6 @@
 
 }  // namespace internal
 
-#if GTEST_HAS_ABSL
-namespace internal2 {
-template <typename T>
-void TypeWithoutFormatter<T, kConvertibleToStringView>::PrintValue(
-    const T& value, ::std::ostream* os) {
-  internal::PrintTo(absl::string_view(value), os);
-}
-}  // namespace internal2
-#endif
-
 template <typename T>
 ::std::string PrintToString(const T& value) {
   ::std::stringstream ss;
diff --git a/deps/boringssl/src/third_party/googletest/include/gtest/gtest-test-part.h b/deps/boringssl/src/third_party/googletest/include/gtest/gtest-test-part.h
index 1e1cb09..05a7985 100644
--- a/deps/boringssl/src/third_party/googletest/include/gtest/gtest-test-part.h
+++ b/deps/boringssl/src/third_party/googletest/include/gtest/gtest-test-part.h
@@ -87,19 +87,19 @@
   // Gets the message associated with the test part.
   const char* message() const { return message_.c_str(); }
 
-  // Returns true iff the test part was skipped.
+  // Returns true if and only if the test part was skipped.
   bool skipped() const { return type_ == kSkip; }
 
-  // Returns true iff the test part passed.
+  // Returns true if and only if the test part passed.
   bool passed() const { return type_ == kSuccess; }
 
-  // Returns true iff the test part non-fatally failed.
+  // Returns true if and only if the test part non-fatally failed.
   bool nonfatally_failed() const { return type_ == kNonFatalFailure; }
 
-  // Returns true iff the test part fatally failed.
+  // Returns true if and only if the test part fatally failed.
   bool fatally_failed() const { return type_ == kFatalFailure; }
 
-  // Returns true iff the test part failed.
+  // Returns true if and only if the test part failed.
   bool failed() const { return fatally_failed() || nonfatally_failed(); }
 
  private:
diff --git a/deps/boringssl/src/third_party/googletest/include/gtest/gtest-typed-test.h b/deps/boringssl/src/third_party/googletest/include/gtest/gtest-typed-test.h
index b3319f6..3ffa50b 100644
--- a/deps/boringssl/src/third_party/googletest/include/gtest/gtest-typed-test.h
+++ b/deps/boringssl/src/third_party/googletest/include/gtest/gtest-typed-test.h
@@ -27,7 +27,6 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-
 // GOOGLETEST_CM0001 DO NOT DELETE
 
 #ifndef GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_
@@ -65,9 +64,9 @@
 // Then, use TYPED_TEST() instead of TEST_F() to define as many typed
 // tests for this test suite as you want.
 TYPED_TEST(FooTest, DoesBlah) {
-  // Inside a test, refer to TypeParam to get the type parameter.
-  // Since we are inside a derived class template, C++ requires use to
-  // visit the members of FooTest via 'this'.
+  // Inside a test, refer to the special name TypeParam to get the type
+  // parameter.  Since we are inside a derived class template, C++ requires
+  // us to visit the members of FooTest via 'this'.
   TypeParam n = this->value_;
 
   // To visit static members of the fixture, add the TestFixture::
@@ -170,6 +169,7 @@
 
 #endif  // 0
 
+#include "gtest/internal/gtest-internal.h"
 #include "gtest/internal/gtest-port.h"
 #include "gtest/internal/gtest-type-util.h"
 
@@ -188,27 +188,25 @@
 #define GTEST_NAME_GENERATOR_(TestSuiteName) \
   gtest_type_params_##TestSuiteName##_NameGenerator
 
-// The 'Types' template argument below must have spaces around it
-// since some compilers may choke on '>>' when passing a template
-// instance (e.g. Types<int>)
-#define TYPED_TEST_SUITE(CaseName, Types, ...)                           \
-  typedef ::testing::internal::TypeList<Types>::type GTEST_TYPE_PARAMS_( \
-      CaseName);                                                         \
-  typedef ::testing::internal::NameGeneratorSelector<__VA_ARGS__>::type  \
+#define TYPED_TEST_SUITE(CaseName, Types, ...)                          \
+  typedef ::testing::internal::GenerateTypeList<Types>::type            \
+      GTEST_TYPE_PARAMS_(CaseName);                                     \
+  typedef ::testing::internal::NameGeneratorSelector<__VA_ARGS__>::type \
       GTEST_NAME_GENERATOR_(CaseName)
 
-# define TYPED_TEST(CaseName, TestName)                                       \
+#define TYPED_TEST(CaseName, TestName)                                        \
+  static_assert(sizeof(GTEST_STRINGIFY_(TestName)) > 1,                       \
+                "test-name must not be empty");                               \
   template <typename gtest_TypeParam_>                                        \
   class GTEST_TEST_CLASS_NAME_(CaseName, TestName)                            \
       : public CaseName<gtest_TypeParam_> {                                   \
    private:                                                                   \
     typedef CaseName<gtest_TypeParam_> TestFixture;                           \
     typedef gtest_TypeParam_ TypeParam;                                       \
-    virtual void TestBody();                                                  \
+    void TestBody() override;                                                 \
   };                                                                          \
   static bool gtest_##CaseName##_##TestName##_registered_                     \
-        GTEST_ATTRIBUTE_UNUSED_ =                                             \
-      ::testing::internal::TypeParameterizedTest<                             \
+      GTEST_ATTRIBUTE_UNUSED_ = ::testing::internal::TypeParameterizedTest<   \
           CaseName,                                                           \
           ::testing::internal::TemplateSel<GTEST_TEST_CLASS_NAME_(CaseName,   \
                                                                   TestName)>, \
@@ -216,7 +214,8 @@
               CaseName)>::Register("",                                        \
                                    ::testing::internal::CodeLocation(         \
                                        __FILE__, __LINE__),                   \
-                                   #CaseName, #TestName, 0,                   \
+                                   GTEST_STRINGIFY_(CaseName),                \
+                                   GTEST_STRINGIFY_(TestName), 0,             \
                                    ::testing::internal::GenerateNames<        \
                                        GTEST_NAME_GENERATOR_(CaseName),       \
                                        GTEST_TYPE_PARAMS_(CaseName)>());      \
@@ -279,24 +278,26 @@
      private:                                                         \
       typedef SuiteName<gtest_TypeParam_> TestFixture;                \
       typedef gtest_TypeParam_ TypeParam;                             \
-      virtual void TestBody();                                        \
+      void TestBody() override;                                       \
     };                                                                \
     static bool gtest_##TestName##_defined_ GTEST_ATTRIBUTE_UNUSED_ = \
         GTEST_TYPED_TEST_SUITE_P_STATE_(SuiteName).AddTestName(       \
-            __FILE__, __LINE__, #SuiteName, #TestName);               \
+            __FILE__, __LINE__, GTEST_STRINGIFY_(SuiteName),          \
+            GTEST_STRINGIFY_(TestName));                              \
   }                                                                   \
   template <typename gtest_TypeParam_>                                \
   void GTEST_SUITE_NAMESPACE_(                                        \
       SuiteName)::TestName<gtest_TypeParam_>::TestBody()
 
-#define REGISTER_TYPED_TEST_SUITE_P(SuiteName, ...)                            \
-  namespace GTEST_SUITE_NAMESPACE_(SuiteName) {                                \
-    typedef ::testing::internal::Templates<__VA_ARGS__>::type gtest_AllTests_; \
-  }                                                                            \
-  static const char* const GTEST_REGISTERED_TEST_NAMES_(                       \
-      SuiteName) GTEST_ATTRIBUTE_UNUSED_ =                                     \
-      GTEST_TYPED_TEST_SUITE_P_STATE_(SuiteName).VerifyRegisteredTestNames(    \
-          __FILE__, __LINE__, #__VA_ARGS__)
+// Note: this won't work correctly if the trailing arguments are macros.
+#define REGISTER_TYPED_TEST_SUITE_P(SuiteName, ...)                         \
+  namespace GTEST_SUITE_NAMESPACE_(SuiteName) {                             \
+    typedef ::testing::internal::Templates<__VA_ARGS__> gtest_AllTests_;    \
+  }                                                                         \
+  static const char* const GTEST_REGISTERED_TEST_NAMES_(                    \
+      SuiteName) GTEST_ATTRIBUTE_UNUSED_ =                                  \
+      GTEST_TYPED_TEST_SUITE_P_STATE_(SuiteName).VerifyRegisteredTestNames( \
+          GTEST_STRINGIFY_(SuiteName), __FILE__, __LINE__, #__VA_ARGS__)
 
 // Legacy API is deprecated but still available
 #ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
@@ -306,22 +307,22 @@
   REGISTER_TYPED_TEST_SUITE_P
 #endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 
-// The 'Types' template argument below must have spaces around it
-// since some compilers may choke on '>>' when passing a template
-// instance (e.g. Types<int>)
 #define INSTANTIATE_TYPED_TEST_SUITE_P(Prefix, SuiteName, Types, ...)       \
+  static_assert(sizeof(GTEST_STRINGIFY_(Prefix)) > 1,                       \
+                "test-suit-prefix must not be empty");                      \
   static bool gtest_##Prefix##_##SuiteName GTEST_ATTRIBUTE_UNUSED_ =        \
       ::testing::internal::TypeParameterizedTestSuite<                      \
           SuiteName, GTEST_SUITE_NAMESPACE_(SuiteName)::gtest_AllTests_,    \
-          ::testing::internal::TypeList<Types>::type>::                     \
-          Register(#Prefix,                                                 \
+          ::testing::internal::GenerateTypeList<Types>::type>::             \
+          Register(GTEST_STRINGIFY_(Prefix),                                \
                    ::testing::internal::CodeLocation(__FILE__, __LINE__),   \
-                   &GTEST_TYPED_TEST_SUITE_P_STATE_(SuiteName), #SuiteName, \
+                   &GTEST_TYPED_TEST_SUITE_P_STATE_(SuiteName),             \
+                   GTEST_STRINGIFY_(SuiteName),                             \
                    GTEST_REGISTERED_TEST_NAMES_(SuiteName),                 \
                    ::testing::internal::GenerateNames<                      \
                        ::testing::internal::NameGeneratorSelector<          \
                            __VA_ARGS__>::type,                              \
-                       ::testing::internal::TypeList<Types>::type>())
+                       ::testing::internal::GenerateTypeList<Types>::type>())
 
 // Legacy API is deprecated but still available
 #ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
diff --git a/deps/boringssl/src/third_party/googletest/include/gtest/gtest.h b/deps/boringssl/src/third_party/googletest/include/gtest/gtest.h
index cedef93..b3d4041 100644
--- a/deps/boringssl/src/third_party/googletest/include/gtest/gtest.h
+++ b/deps/boringssl/src/third_party/googletest/include/gtest/gtest.h
@@ -101,6 +101,10 @@
 // to let Google Test decide.
 GTEST_DECLARE_string_(color);
 
+// This flag controls whether the test runner should continue execution past
+// first failure.
+GTEST_DECLARE_bool_(fail_fast);
+
 // This flag sets up the filter to select by name using a glob pattern
 // the tests to run. If the filter is not given all tests are executed.
 GTEST_DECLARE_string_(filter);
@@ -117,6 +121,9 @@
 // in addition to its normal textual output.
 GTEST_DECLARE_string_(output);
 
+// This flags control whether Google Test prints only test failures.
+GTEST_DECLARE_bool_(brief);
+
 // This flags control whether Google Test prints the elapsed time for each
 // test.
 GTEST_DECLARE_bool_(print_time);
@@ -177,6 +184,7 @@
 class UnitTestImpl* GetUnitTestImpl();
 void ReportFailureInUnknownLocation(TestPartResult::Type result_type,
                                     const std::string& message);
+std::set<std::string>* GetIgnoredParameterizedTestSuites();
 
 }  // namespace internal
 
@@ -278,7 +286,11 @@
   // Used in EXPECT_TRUE/FALSE(assertion_result).
   AssertionResult(const AssertionResult& other);
 
-#if defined(_MSC_VER) && _MSC_VER < 1910
+// C4800 is a level 3 warning in Visual Studio 2015 and earlier.
+// This warning is not emitted in Visual Studio 2017.
+// This warning is off by default starting in Visual Studio 2019 but can be
+// enabled with command-line options.
+#if defined(_MSC_VER) && (_MSC_VER < 1910 || _MSC_VER >= 1920)
   GTEST_DISABLE_MSC_WARNINGS_PUSH_(4800 /* forcing value to bool */)
 #endif
 
@@ -292,13 +304,13 @@
   template <typename T>
   explicit AssertionResult(
       const T& success,
-      typename internal::EnableIf<
+      typename std::enable_if<
           !std::is_convertible<T, AssertionResult>::value>::type*
       /*enabler*/
       = nullptr)
       : success_(success) {}
 
-#if defined(_MSC_VER) && _MSC_VER < 1910
+#if defined(_MSC_VER) && (_MSC_VER < 1910 || _MSC_VER >= 1920)
   GTEST_DISABLE_MSC_WARNINGS_POP_()
 #endif
 
@@ -308,7 +320,7 @@
     return *this;
   }
 
-  // Returns true iff the assertion succeeded.
+  // Returns true if and only if the assertion succeeded.
   operator bool() const { return success_; }  // NOLINT
 
   // Returns the assertion's negation. Used with EXPECT/ASSERT_FALSE.
@@ -406,38 +418,39 @@
   // The d'tor is virtual as we intend to inherit from Test.
   virtual ~Test();
 
-  // Sets up the stuff shared by all tests in this test case.
+  // Sets up the stuff shared by all tests in this test suite.
   //
   // Google Test will call Foo::SetUpTestSuite() before running the first
-  // test in test case Foo.  Hence a sub-class can define its own
+  // test in test suite Foo.  Hence a sub-class can define its own
   // SetUpTestSuite() method to shadow the one defined in the super
   // class.
   static void SetUpTestSuite() {}
 
-  // Tears down the stuff shared by all tests in this test case.
+  // Tears down the stuff shared by all tests in this test suite.
   //
   // Google Test will call Foo::TearDownTestSuite() after running the last
-  // test in test case Foo.  Hence a sub-class can define its own
+  // test in test suite Foo.  Hence a sub-class can define its own
   // TearDownTestSuite() method to shadow the one defined in the super
   // class.
   static void TearDownTestSuite() {}
 
-  // Legacy API is deprecated but still available
+  // Legacy API is deprecated but still available. Use SetUpTestSuite and
+  // TearDownTestSuite instead.
 #ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
   static void TearDownTestCase() {}
   static void SetUpTestCase() {}
 #endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 
-  // Returns true iff the current test has a fatal failure.
+  // Returns true if and only if the current test has a fatal failure.
   static bool HasFatalFailure();
 
-  // Returns true iff the current test has a non-fatal failure.
+  // Returns true if and only if the current test has a non-fatal failure.
   static bool HasNonfatalFailure();
 
-  // Returns true iff the current test was skipped.
+  // Returns true if and only if the current test was skipped.
   static bool IsSkipped();
 
-  // Returns true iff the current test has a (either fatal or
+  // Returns true if and only if the current test has a (either fatal or
   // non-fatal) failure.
   static bool HasFailure() { return HasFatalFailure() || HasNonfatalFailure(); }
 
@@ -468,8 +481,8 @@
   virtual void TearDown();
 
  private:
-  // Returns true iff the current test has the same fixture class as
-  // the first test in the current test suite.
+  // Returns true if and only if the current test has the same fixture class
+  // as the first test in the current test suite.
   static bool HasSameFixtureClass();
 
   // Runs the test after the test fixture has been set up.
@@ -570,24 +583,28 @@
   // Returns the number of the test properties.
   int test_property_count() const;
 
-  // Returns true iff the test passed (i.e. no test part failed).
+  // Returns true if and only if the test passed (i.e. no test part failed).
   bool Passed() const { return !Skipped() && !Failed(); }
 
-  // Returns true iff the test was skipped.
+  // Returns true if and only if the test was skipped.
   bool Skipped() const;
 
-  // Returns true iff the test failed.
+  // Returns true if and only if the test failed.
   bool Failed() const;
 
-  // Returns true iff the test fatally failed.
+  // Returns true if and only if the test fatally failed.
   bool HasFatalFailure() const;
 
-  // Returns true iff the test has a non-fatal failure.
+  // Returns true if and only if the test has a non-fatal failure.
   bool HasNonfatalFailure() const;
 
   // Returns the elapsed time, in milliseconds.
   TimeInMillis elapsed_time() const { return elapsed_time_; }
 
+  // Gets the time of the test case start, in ms from the start of the
+  // UNIX epoch.
+  TimeInMillis start_timestamp() const { return start_timestamp_; }
+
   // Returns the i-th test part result among all the results. i can range from 0
   // to total_part_count() - 1. If i is not in that range, aborts the program.
   const TestPartResult& GetTestPartResult(int i) const;
@@ -618,6 +635,9 @@
     return test_properties_;
   }
 
+  // Sets the start time.
+  void set_start_timestamp(TimeInMillis start) { start_timestamp_ = start; }
+
   // Sets the elapsed time.
   void set_elapsed_time(TimeInMillis elapsed) { elapsed_time_ = elapsed; }
 
@@ -661,6 +681,8 @@
   std::vector<TestProperty> test_properties_;
   // Running count of death tests.
   int death_test_count_;
+  // The start time, in milliseconds since UNIX Epoch.
+  TimeInMillis start_timestamp_;
   // The elapsed time, in milliseconds.
   TimeInMillis elapsed_time_;
 
@@ -737,7 +759,7 @@
   // contains the character 'A' or starts with "Foo.".
   bool should_run() const { return should_run_; }
 
-  // Returns true iff this test will appear in the XML report.
+  // Returns true if and only if this test will appear in the XML report.
   bool is_reportable() const {
     // The XML report includes tests matching the filter, excluding those
     // run in other shards.
@@ -781,6 +803,9 @@
   // deletes it.
   void Run();
 
+  // Skip and records the test result for this object.
+  void Skip();
+
   static void ClearTestResult(TestInfo* test_info) {
     test_info->result_.Clear();
   }
@@ -795,12 +820,12 @@
   // value-parameterized test.
   const std::unique_ptr<const ::std::string> value_param_;
   internal::CodeLocation location_;
-  const internal::TypeId fixture_class_id_;   // ID of the test fixture class
-  bool should_run_;                 // True iff this test should run
-  bool is_disabled_;                // True iff this test is disabled
-  bool matches_filter_;             // True if this test matches the
-                                    // user-specified filter.
-  bool is_in_another_shard_;        // Will be run in another shard.
+  const internal::TypeId fixture_class_id_;  // ID of the test fixture class
+  bool should_run_;           // True if and only if this test should run
+  bool is_disabled_;          // True if and only if this test is disabled
+  bool matches_filter_;       // True if this test matches the
+                              // user-specified filter.
+  bool is_in_another_shard_;  // Will be run in another shard.
   internal::TestFactoryBase* const factory_;  // The factory that creates
                                               // the test object
 
@@ -872,15 +897,21 @@
   // Gets the number of all tests in this test suite.
   int total_test_count() const;
 
-  // Returns true iff the test suite passed.
+  // Returns true if and only if the test suite passed.
   bool Passed() const { return !Failed(); }
 
-  // Returns true iff the test suite failed.
-  bool Failed() const { return failed_test_count() > 0; }
+  // Returns true if and only if the test suite failed.
+  bool Failed() const {
+    return failed_test_count() > 0 || ad_hoc_test_result().Failed();
+  }
 
   // Returns the elapsed time, in milliseconds.
   TimeInMillis elapsed_time() const { return elapsed_time_; }
 
+  // Gets the time of the test suite start, in ms from the start of the
+  // UNIX epoch.
+  TimeInMillis start_timestamp() const { return start_timestamp_; }
+
   // Returns the i-th test among all the tests. i can range from 0 to
   // total_test_count() - 1. If i is not in that range, returns NULL.
   const TestInfo* GetTestInfo(int i) const;
@@ -923,6 +954,9 @@
   // Runs every test in this TestSuite.
   void Run();
 
+  // Skips the execution of tests under this TestSuite
+  void Skip();
+
   // Runs SetUpTestSuite() for this TestSuite.  This wrapper is needed
   // for catching exceptions thrown from SetUpTestSuite().
   void RunSetUpTestSuite() {
@@ -939,33 +973,33 @@
     }
   }
 
-  // Returns true iff test passed.
+  // Returns true if and only if test passed.
   static bool TestPassed(const TestInfo* test_info) {
     return test_info->should_run() && test_info->result()->Passed();
   }
 
-  // Returns true iff test skipped.
+  // Returns true if and only if test skipped.
   static bool TestSkipped(const TestInfo* test_info) {
     return test_info->should_run() && test_info->result()->Skipped();
   }
 
-  // Returns true iff test failed.
+  // Returns true if and only if test failed.
   static bool TestFailed(const TestInfo* test_info) {
     return test_info->should_run() && test_info->result()->Failed();
   }
 
-  // Returns true iff the test is disabled and will be reported in the XML
-  // report.
+  // Returns true if and only if the test is disabled and will be reported in
+  // the XML report.
   static bool TestReportableDisabled(const TestInfo* test_info) {
     return test_info->is_reportable() && test_info->is_disabled_;
   }
 
-  // Returns true iff test is disabled.
+  // Returns true if and only if test is disabled.
   static bool TestDisabled(const TestInfo* test_info) {
     return test_info->is_disabled_;
   }
 
-  // Returns true iff this test will appear in the XML report.
+  // Returns true if and only if this test will appear in the XML report.
   static bool TestReportable(const TestInfo* test_info) {
     return test_info->is_reportable();
   }
@@ -997,8 +1031,10 @@
   internal::SetUpTestSuiteFunc set_up_tc_;
   // Pointer to the function that tears down the test suite.
   internal::TearDownTestSuiteFunc tear_down_tc_;
-  // True iff any test in this test suite should run.
+  // True if and only if any test in this test suite should run.
   bool should_run_;
+  // The start time, in milliseconds since UNIX Epoch.
+  TimeInMillis start_timestamp_;
   // Elapsed time, in milliseconds.
   TimeInMillis elapsed_time_;
   // Holds test properties recorded during execution of SetUpTestSuite and
@@ -1297,7 +1333,7 @@
   int failed_test_case_count() const;
   int total_test_case_count() const;
   int test_case_to_run_count() const;
-#endif  //  EMOVE_LEGACY_TEST_CASEAPI
+#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 
   // Gets the number of successful tests.
   int successful_test_count() const;
@@ -1330,11 +1366,12 @@
   // Gets the elapsed time, in milliseconds.
   TimeInMillis elapsed_time() const;
 
-  // Returns true iff the unit test passed (i.e. all test suites passed).
+  // Returns true if and only if the unit test passed (i.e. all test suites
+  // passed).
   bool Passed() const;
 
-  // Returns true iff the unit test failed (i.e. some test suite failed
-  // or something outside of all tests failed).
+  // Returns true if and only if the unit test failed (i.e. some test suite
+  // failed or something outside of all tests failed).
   bool Failed() const;
 
   // Gets the i-th test suite among all the test suites. i can range from 0 to
@@ -1400,6 +1437,7 @@
   friend class internal::StreamingListenerTest;
   friend class internal::UnitTestRecordPropertyTestHelper;
   friend Environment* AddGlobalTestEnvironment(Environment* env);
+  friend std::set<std::string>* internal::GetIgnoredParameterizedTestSuites();
   friend internal::UnitTestImpl* internal::GetUnitTestImpl();
   friend void internal::ReportFailureInUnknownLocation(
       TestPartResult::Type result_type,
@@ -1783,12 +1821,6 @@
   GTEST_DISALLOW_COPY_AND_ASSIGN_(AssertHelper);
 };
 
-enum GTestColor { COLOR_DEFAULT, COLOR_RED, COLOR_GREEN, COLOR_YELLOW };
-
-GTEST_API_ GTEST_ATTRIBUTE_PRINTF_(2, 3) void ColoredPrintf(GTestColor color,
-                                                            const char* fmt,
-                                                            ...);
-
 }  // namespace internal
 
 // The pure interface class that all value-parameterized tests inherit from.
@@ -1869,7 +1901,7 @@
 // Skips test in runtime.
 // Skipping test aborts current function.
 // Skipped tests are neither successful nor failed.
-#define GTEST_SKIP() GTEST_SKIP_("Skipped")
+#define GTEST_SKIP() GTEST_SKIP_("")
 
 // ADD_FAILURE unconditionally adds a failure to the current test.
 // SUCCEED generates a success - it doesn't automatically make the
@@ -1900,6 +1932,11 @@
 // Generates a fatal failure with a generic message.
 #define GTEST_FAIL() GTEST_FATAL_FAILURE_("Failed")
 
+// Like GTEST_FAIL(), but at the given source file location.
+#define GTEST_FAIL_AT(file, line)         \
+  GTEST_MESSAGE_AT_(file, line, "Failed", \
+                    ::testing::TestPartResult::kFatalFailure)
+
 // Define this macro to 1 to omit the definition of FAIL(), which is a
 // generic name and clashes with some other libraries.
 #if !GTEST_DONT_DEFINE_FAIL
@@ -2241,10 +2278,9 @@
   ::testing::ScopedTrace GTEST_CONCAT_TOKEN_(gtest_trace_, __LINE__)(\
     __FILE__, __LINE__, (message))
 
-
 // Compile-time assertion for type equality.
-// StaticAssertTypeEq<type1, type2>() compiles iff type1 and type2 are
-// the same type.  The value it returns is not interesting.
+// StaticAssertTypeEq<type1, type2>() compiles if and only if type1 and type2
+// are the same type.  The value it returns is not interesting.
 //
 // Instead of making StaticAssertTypeEq a class template, we make it a
 // function template that invokes a helper class template.  This
@@ -2273,8 +2309,8 @@
 //
 // to cause a compiler error.
 template <typename T1, typename T2>
-bool StaticAssertTypeEq() {
-  (void)internal::StaticAssertTypeEqHelper<T1, T2>();
+constexpr bool StaticAssertTypeEq() noexcept {
+  static_assert(std::is_same<T1, T2>::value, "T1 and T2 are not the same type");
   return true;
 }
 
@@ -2340,9 +2376,11 @@
 //   }
 //
 // GOOGLETEST_CM0011 DO NOT DELETE
+#if !GTEST_DONT_DEFINE_TEST
 #define TEST_F(test_fixture, test_name)\
   GTEST_TEST_(test_fixture, test_name, test_fixture, \
               ::testing::internal::GetTypeId<test_fixture>())
+#endif  // !GTEST_DONT_DEFINE_TEST
 
 // Returns a path to temporary directory.
 // Tries to determine an appropriate directory for the platform.
@@ -2427,8 +2465,8 @@
   return internal::MakeAndRegisterTestInfo(
       test_suite_name, test_name, type_param, value_param,
       internal::CodeLocation(file, line), internal::GetTypeId<TestT>(),
-      internal::SuiteApiResolver<TestT>::GetSetUpCaseOrSuite(),
-      internal::SuiteApiResolver<TestT>::GetTearDownCaseOrSuite(),
+      internal::SuiteApiResolver<TestT>::GetSetUpCaseOrSuite(file, line),
+      internal::SuiteApiResolver<TestT>::GetTearDownCaseOrSuite(file, line),
       new FactoryImpl{std::move(factory)});
 }
 
diff --git a/deps/boringssl/src/third_party/googletest/include/gtest/internal/gtest-filepath.h b/deps/boringssl/src/third_party/googletest/include/gtest/internal/gtest-filepath.h
index ae38d95..c11b101 100644
--- a/deps/boringssl/src/third_party/googletest/include/gtest/internal/gtest-filepath.h
+++ b/deps/boringssl/src/third_party/googletest/include/gtest/internal/gtest-filepath.h
@@ -110,7 +110,7 @@
                                          const FilePath& base_name,
                                          const char* extension);
 
-  // Returns true iff the path is "".
+  // Returns true if and only if the path is "".
   bool IsEmpty() const { return pathname_.empty(); }
 
   // If input name has a trailing separator character, removes it and returns
diff --git a/deps/boringssl/src/third_party/googletest/include/gtest/internal/gtest-internal.h b/deps/boringssl/src/third_party/googletest/include/gtest/internal/gtest-internal.h
index d16586c..8dc74bb 100644
--- a/deps/boringssl/src/third_party/googletest/include/gtest/internal/gtest-internal.h
+++ b/deps/boringssl/src/third_party/googletest/include/gtest/internal/gtest-internal.h
@@ -53,6 +53,7 @@
 #include <ctype.h>
 #include <float.h>
 #include <string.h>
+#include <cstdint>
 #include <iomanip>
 #include <limits>
 #include <map>
@@ -78,9 +79,20 @@
 #define GTEST_CONCAT_TOKEN_IMPL_(foo, bar) foo ## bar
 
 // Stringifies its argument.
-#define GTEST_STRINGIFY_(name) #name
+// Work around a bug in visual studio which doesn't accept code like this:
+//
+//   #define GTEST_STRINGIFY_(name) #name
+//   #define MACRO(a, b, c) ... GTEST_STRINGIFY_(a) ...
+//   MACRO(, x, y)
+//
+// Complaining about the argument to GTEST_STRINGIFY_ being empty.
+// This is allowed by the spec.
+#define GTEST_STRINGIFY_HELPER_(name, ...) #name
+#define GTEST_STRINGIFY_(...) GTEST_STRINGIFY_HELPER_(__VA_ARGS__, )
 
-namespace proto2 { class Message; }
+namespace proto2 {
+class MessageLite;
+}
 
 namespace testing {
 
@@ -189,7 +201,7 @@
 //   expected_value:      "5"
 //   actual_value:        "6"
 //
-// The ignoring_case parameter is true iff the assertion is a
+// The ignoring_case parameter is true if and only if the assertion is a
 // *_STRCASEEQ*.  When it's true, the string " (ignoring case)" will
 // be inserted into the message.
 GTEST_API_ AssertionResult EqFailure(const char* expected_expression,
@@ -275,7 +287,7 @@
   //
   // See the following article for more details on ULP:
   // http://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/
-  static const size_t kMaxUlps = 4;
+  static const uint32_t kMaxUlps = 4;
 
   // Constructs a FloatingPoint from a raw floating-point number.
   //
@@ -318,15 +330,15 @@
   // Returns the sign bit of this number.
   Bits sign_bit() const { return kSignBitMask & u_.bits_; }
 
-  // Returns true iff this is NAN (not a number).
+  // Returns true if and only if this is NAN (not a number).
   bool is_nan() const {
     // It's a NAN if the exponent bits are all ones and the fraction
     // bits are not entirely zeros.
     return (exponent_bits() == kExponentBitMask) && (fraction_bits() != 0);
   }
 
-  // Returns true iff this number is at most kMaxUlps ULP's away from
-  // rhs.  In particular, this function:
+  // Returns true if and only if this number is at most kMaxUlps ULP's away
+  // from rhs.  In particular, this function:
   //
   //   - returns false if either number is (or both are) NAN.
   //   - treats really large numbers as almost equal to infinity.
@@ -506,7 +518,9 @@
   using Test =
       typename std::conditional<sizeof(T) != 0, ::testing::Test, void>::type;
 
-  static SetUpTearDownSuiteFuncType GetSetUpCaseOrSuite() {
+  static SetUpTearDownSuiteFuncType GetSetUpCaseOrSuite(const char* filename,
+                                                        int line_num) {
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
     SetUpTearDownSuiteFuncType test_case_fp =
         GetNotDefaultOrNull(&T::SetUpTestCase, &Test::SetUpTestCase);
     SetUpTearDownSuiteFuncType test_suite_fp =
@@ -514,12 +528,20 @@
 
     GTEST_CHECK_(!test_case_fp || !test_suite_fp)
         << "Test can not provide both SetUpTestSuite and SetUpTestCase, please "
-           "make sure there is only one present ";
+           "make sure there is only one present at "
+        << filename << ":" << line_num;
 
     return test_case_fp != nullptr ? test_case_fp : test_suite_fp;
+#else
+    (void)(filename);
+    (void)(line_num);
+    return &T::SetUpTestSuite;
+#endif
   }
 
-  static SetUpTearDownSuiteFuncType GetTearDownCaseOrSuite() {
+  static SetUpTearDownSuiteFuncType GetTearDownCaseOrSuite(const char* filename,
+                                                           int line_num) {
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
     SetUpTearDownSuiteFuncType test_case_fp =
         GetNotDefaultOrNull(&T::TearDownTestCase, &Test::TearDownTestCase);
     SetUpTearDownSuiteFuncType test_suite_fp =
@@ -527,9 +549,15 @@
 
     GTEST_CHECK_(!test_case_fp || !test_suite_fp)
         << "Test can not provide both TearDownTestSuite and TearDownTestCase,"
-           " please make sure there is only one present ";
+           " please make sure there is only one present at"
+        << filename << ":" << line_num;
 
     return test_case_fp != nullptr ? test_case_fp : test_suite_fp;
+#else
+    (void)(filename);
+    (void)(line_num);
+    return &T::TearDownTestSuite;
+#endif
   }
 };
 
@@ -538,11 +566,11 @@
 //
 // Arguments:
 //
-//   test_suite_name:   name of the test suite
+//   test_suite_name:  name of the test suite
 //   name:             name of the test
-//   type_param        the name of the test's type parameter, or NULL if
+//   type_param:       the name of the test's type parameter, or NULL if
 //                     this is not a typed or a type-parameterized test.
-//   value_param       text representation of the test's value parameter,
+//   value_param:      text representation of the test's value parameter,
 //                     or NULL if this is not a type-parameterized test.
 //   code_location:    code location where the test is defined
 //   fixture_class_id: ID of the test fixture class
@@ -603,8 +631,9 @@
   // Verifies that registered_tests match the test names in
   // defined_test_names_; returns registered_tests if successful, or
   // aborts the program otherwise.
-  const char* VerifyRegisteredTestNames(
-      const char* file, int line, const char* registered_tests);
+  const char* VerifyRegisteredTestNames(const char* test_suite_name,
+                                        const char* file, int line,
+                                        const char* registered_tests);
 
  private:
   typedef ::std::map<std::string, CodeLocation> RegisteredTestsMap;
@@ -658,7 +687,7 @@
 };
 
 template <typename NameGenerator>
-void GenerateNamesRecursively(Types0, std::vector<std::string>*, int) {}
+void GenerateNamesRecursively(internal::None, std::vector<std::string>*, int) {}
 
 template <typename NameGenerator, typename Types>
 void GenerateNamesRecursively(Types, std::vector<std::string>* result, int i) {
@@ -700,14 +729,16 @@
     // list.
     MakeAndRegisterTestInfo(
         (std::string(prefix) + (prefix[0] == '\0' ? "" : "/") + case_name +
-         "/" + type_names[index])
+         "/" + type_names[static_cast<size_t>(index)])
             .c_str(),
         StripTrailingSpaces(GetPrefixUntilComma(test_names)).c_str(),
         GetTypeName<Type>().c_str(),
         nullptr,  // No value parameter.
         code_location, GetTypeId<FixtureClass>(),
-        SuiteApiResolver<TestClass>::GetSetUpCaseOrSuite(),
-        SuiteApiResolver<TestClass>::GetTearDownCaseOrSuite(),
+        SuiteApiResolver<TestClass>::GetSetUpCaseOrSuite(
+            code_location.file.c_str(), code_location.line),
+        SuiteApiResolver<TestClass>::GetTearDownCaseOrSuite(
+            code_location.file.c_str(), code_location.line),
         new TestFactoryImpl<TestClass>);
 
     // Next, recurses (at compile time) with the tail of the type list.
@@ -723,7 +754,7 @@
 
 // The base case for the compile time recursion.
 template <GTEST_TEMPLATE_ Fixture, class TestSel>
-class TypeParameterizedTest<Fixture, TestSel, Types0> {
+class TypeParameterizedTest<Fixture, TestSel, internal::None> {
  public:
   static bool Register(const char* /*prefix*/, const CodeLocation&,
                        const char* /*case_name*/, const char* /*test_names*/,
@@ -734,6 +765,11 @@
   }
 };
 
+GTEST_API_ void RegisterTypeParameterizedTestSuite(const char* test_suite_name,
+                                                   CodeLocation code_location);
+GTEST_API_ void RegisterTypeParameterizedTestSuiteInstantiation(
+    const char* case_name);
+
 // TypeParameterizedTestSuite<Fixture, Tests, Types>::Register()
 // registers *all combinations* of 'Tests' and 'Types' with Google
 // Test.  The return value is insignificant - we just need to return
@@ -746,6 +782,7 @@
                        const char* test_names,
                        const std::vector<std::string>& type_names =
                            GenerateNames<DefaultNameGenerator, Types>()) {
+    RegisterTypeParameterizedTestSuiteInstantiation(case_name);
     std::string test_name = StripTrailingSpaces(
         GetPrefixUntilComma(test_names));
     if (!state->TestExists(test_name)) {
@@ -775,7 +812,7 @@
 
 // The base case for the compile time recursion.
 template <GTEST_TEMPLATE_ Fixture, typename Types>
-class TypeParameterizedTestSuite<Fixture, Templates0, Types> {
+class TypeParameterizedTestSuite<Fixture, internal::None, Types> {
  public:
   static bool Register(const char* /*prefix*/, const CodeLocation&,
                        const TypedTestSuitePState* /*state*/,
@@ -819,6 +856,16 @@
   const char* value;
 };
 
+// Helper for declaring std::string within 'if' statement
+// in pre C++17 build environment.
+struct TrueWithString {
+  TrueWithString() = default;
+  explicit TrueWithString(const char* str) : value(str) {}
+  explicit TrueWithString(const std::string& str) : value(str) {}
+  explicit operator bool() const { return true; }
+  std::string value;
+};
+
 // A simple Linear Congruential Generator for generating random
 // numbers with a uniform distribution.  Unlike rand() and srand(), it
 // doesn't use global state (and therefore can't interfere with user
@@ -826,76 +873,54 @@
 // but it's good enough for our purposes.
 class GTEST_API_ Random {
  public:
-  static const UInt32 kMaxRange = 1u << 31;
+  static const uint32_t kMaxRange = 1u << 31;
 
-  explicit Random(UInt32 seed) : state_(seed) {}
+  explicit Random(uint32_t seed) : state_(seed) {}
 
-  void Reseed(UInt32 seed) { state_ = seed; }
+  void Reseed(uint32_t seed) { state_ = seed; }
 
   // Generates a random number from [0, range).  Crashes if 'range' is
   // 0 or greater than kMaxRange.
-  UInt32 Generate(UInt32 range);
+  uint32_t Generate(uint32_t range);
 
  private:
-  UInt32 state_;
+  uint32_t state_;
   GTEST_DISALLOW_COPY_AND_ASSIGN_(Random);
 };
 
-// Defining a variable of type CompileAssertTypesEqual<T1, T2> will cause a
-// compiler error iff T1 and T2 are different types.
-template <typename T1, typename T2>
-struct CompileAssertTypesEqual;
-
-template <typename T>
-struct CompileAssertTypesEqual<T, T> {
-};
-
-// Removes the reference from a type if it is a reference type,
-// otherwise leaves it unchanged.  This is the same as
-// tr1::remove_reference, which is not widely available yet.
-template <typename T>
-struct RemoveReference { typedef T type; };  // NOLINT
-template <typename T>
-struct RemoveReference<T&> { typedef T type; };  // NOLINT
-
-// A handy wrapper around RemoveReference that works when the argument
-// T depends on template parameters.
-#define GTEST_REMOVE_REFERENCE_(T) \
-    typename ::testing::internal::RemoveReference<T>::type
-
-// Removes const from a type if it is a const type, otherwise leaves
-// it unchanged.  This is the same as tr1::remove_const, which is not
-// widely available yet.
-template <typename T>
-struct RemoveConst { typedef T type; };  // NOLINT
-template <typename T>
-struct RemoveConst<const T> { typedef T type; };  // NOLINT
-
-// MSVC 8.0, Sun C++, and IBM XL C++ have a bug which causes the above
-// definition to fail to remove the const in 'const int[3]' and 'const
-// char[3][4]'.  The following specialization works around the bug.
-template <typename T, size_t N>
-struct RemoveConst<const T[N]> {
-  typedef typename RemoveConst<T>::type type[N];
-};
-
-// A handy wrapper around RemoveConst that works when the argument
-// T depends on template parameters.
-#define GTEST_REMOVE_CONST_(T) \
-    typename ::testing::internal::RemoveConst<T>::type
-
 // Turns const U&, U&, const U, and U all into U.
 #define GTEST_REMOVE_REFERENCE_AND_CONST_(T) \
-    GTEST_REMOVE_CONST_(GTEST_REMOVE_REFERENCE_(T))
+  typename std::remove_const<typename std::remove_reference<T>::type>::type
 
-// IsAProtocolMessage<T>::value is a compile-time bool constant that's
-// true iff T is type proto2::Message or a subclass of it.
+// HasDebugStringAndShortDebugString<T>::value is a compile-time bool constant
+// that's true if and only if T has methods DebugString() and ShortDebugString()
+// that return std::string.
 template <typename T>
-struct IsAProtocolMessage
-    : public bool_constant<
-  std::is_convertible<const T*, const ::proto2::Message*>::value> {
+class HasDebugStringAndShortDebugString {
+ private:
+  template <typename C>
+  static constexpr auto CheckDebugString(C*) -> typename std::is_same<
+      std::string, decltype(std::declval<const C>().DebugString())>::type;
+  template <typename>
+  static constexpr std::false_type CheckDebugString(...);
+
+  template <typename C>
+  static constexpr auto CheckShortDebugString(C*) -> typename std::is_same<
+      std::string, decltype(std::declval<const C>().ShortDebugString())>::type;
+  template <typename>
+  static constexpr std::false_type CheckShortDebugString(...);
+
+  using HasDebugStringType = decltype(CheckDebugString<T>(nullptr));
+  using HasShortDebugStringType = decltype(CheckShortDebugString<T>(nullptr));
+
+ public:
+  static constexpr bool value =
+      HasDebugStringType::value && HasShortDebugStringType::value;
 };
 
+template <typename T>
+constexpr bool HasDebugStringAndShortDebugString<T>::value;
+
 // When the compiler sees expression IsContainerTest<C>(0), if C is an
 // STL-style container class, the first overload of IsContainerTest
 // will be viable (since both C::iterator* and C::const_iterator* are
@@ -961,7 +986,7 @@
 struct IsRecursiveContainerImpl;
 
 template <typename C>
-struct IsRecursiveContainerImpl<C, false> : public false_type {};
+struct IsRecursiveContainerImpl<C, false> : public std::false_type {};
 
 // Since the IsRecursiveContainerImpl depends on the IsContainerTest we need to
 // obey the same inconsistencies as the IsContainerTest, namely check if
@@ -971,9 +996,9 @@
 struct IsRecursiveContainerImpl<C, true> {
   using value_type = decltype(*std::declval<typename C::const_iterator>());
   using type =
-      is_same<typename std::remove_const<
-                  typename std::remove_reference<value_type>::type>::type,
-              C>;
+      std::is_same<typename std::remove_const<
+                       typename std::remove_reference<value_type>::type>::type,
+                   C>;
 };
 
 // IsRecursiveContainer<Type> is a unary compile-time predicate that
@@ -985,13 +1010,6 @@
 template <typename C>
 struct IsRecursiveContainer : public IsRecursiveContainerImpl<C>::type {};
 
-// EnableIf<condition>::type is void when 'Cond' is true, and
-// undefined when 'Cond' is false.  To use SFINAE to make a function
-// overload only apply when a particular expression is true, add
-// "typename EnableIf<expression>::type* = 0" as the last parameter.
-template<bool> struct EnableIf;
-template<> struct EnableIf<true> { typedef void type; };  // NOLINT
-
 // Utilities for native arrays.
 
 // ArrayEq() compares two k-dimensional native arrays using the
@@ -1114,10 +1132,9 @@
   }
 
  private:
-  enum {
-    kCheckTypeIsNotConstOrAReference = StaticAssertTypeEqHelper<
-        Element, GTEST_REMOVE_REFERENCE_AND_CONST_(Element)>::value
-  };
+  static_assert(!std::is_const<Element>::value, "Type must not be const");
+  static_assert(!std::is_reference<Element>::value,
+                "Type must not be a reference");
 
   // Initializes this object with a copy of the input.
   void InitCopy(const Element* array, size_t a_size) {
@@ -1138,8 +1155,6 @@
   const Element* array_;
   size_t size_;
   void (NativeArray::*clone_)(const Element*, size_t);
-
-  GTEST_DISALLOW_ASSIGN_(NativeArray);
 };
 
 // Backport of std::index_sequence.
@@ -1163,32 +1178,44 @@
 // Backport of std::make_index_sequence.
 // It uses O(ln(N)) instantiation depth.
 template <size_t N>
-struct MakeIndexSequence
-    : DoubleSequence<N % 2 == 1, typename MakeIndexSequence<N / 2>::type,
+struct MakeIndexSequenceImpl
+    : DoubleSequence<N % 2 == 1, typename MakeIndexSequenceImpl<N / 2>::type,
                      N / 2>::type {};
 
 template <>
-struct MakeIndexSequence<0> : IndexSequence<> {};
+struct MakeIndexSequenceImpl<0> : IndexSequence<> {};
 
-// FIXME: This implementation of ElemFromList is O(1) in instantiation depth,
-// but it is O(N^2) in total instantiations. Not sure if this is the best
-// tradeoff, as it will make it somewhat slow to compile.
-template <typename T, size_t, size_t>
-struct ElemFromListImpl {};
+template <size_t N>
+using MakeIndexSequence = typename MakeIndexSequenceImpl<N>::type;
 
-template <typename T, size_t I>
-struct ElemFromListImpl<T, I, I> {
-  using type = T;
+template <typename... T>
+using IndexSequenceFor = typename MakeIndexSequence<sizeof...(T)>::type;
+
+template <size_t>
+struct Ignore {
+  Ignore(...);  // NOLINT
 };
 
-// Get the Nth element from T...
-// It uses O(1) instantiation depth.
-template <size_t N, typename I, typename... T>
-struct ElemFromList;
+template <typename>
+struct ElemFromListImpl;
+template <size_t... I>
+struct ElemFromListImpl<IndexSequence<I...>> {
+  // We make Ignore a template to solve a problem with MSVC.
+  // A non-template Ignore would work fine with `decltype(Ignore(I))...`, but
+  // MSVC doesn't understand how to deal with that pack expansion.
+  // Use `0 * I` to have a single instantiation of Ignore.
+  template <typename R>
+  static R Apply(Ignore<0 * I>..., R (*)(), ...);
+};
 
-template <size_t N, size_t... I, typename... T>
-struct ElemFromList<N, IndexSequence<I...>, T...>
-    : ElemFromListImpl<T, N, I>... {};
+template <size_t N, typename... T>
+struct ElemFromList {
+  using type =
+      decltype(ElemFromListImpl<typename MakeIndexSequence<N>::type>::Apply(
+          static_cast<T (*)()>(nullptr)...));
+};
+
+struct FlatTupleConstructTag {};
 
 template <typename... T>
 class FlatTuple;
@@ -1198,11 +1225,11 @@
 
 template <typename... T, size_t I>
 struct FlatTupleElemBase<FlatTuple<T...>, I> {
-  using value_type =
-      typename ElemFromList<I, typename MakeIndexSequence<sizeof...(T)>::type,
-                            T...>::type;
+  using value_type = typename ElemFromList<I, T...>::type;
   FlatTupleElemBase() = default;
-  explicit FlatTupleElemBase(value_type t) : value(std::move(t)) {}
+  template <typename Arg>
+  explicit FlatTupleElemBase(FlatTupleConstructTag, Arg&& t)
+      : value(std::forward<Arg>(t)) {}
   value_type value;
 };
 
@@ -1214,13 +1241,35 @@
     : FlatTupleElemBase<FlatTuple<T...>, Idx>... {
   using Indices = IndexSequence<Idx...>;
   FlatTupleBase() = default;
-  explicit FlatTupleBase(T... t)
-      : FlatTupleElemBase<FlatTuple<T...>, Idx>(std::move(t))... {}
+  template <typename... Args>
+  explicit FlatTupleBase(FlatTupleConstructTag, Args&&... args)
+      : FlatTupleElemBase<FlatTuple<T...>, Idx>(FlatTupleConstructTag{},
+                                                std::forward<Args>(args))... {}
+
+  template <size_t I>
+  const typename ElemFromList<I, T...>::type& Get() const {
+    return FlatTupleElemBase<FlatTuple<T...>, I>::value;
+  }
+
+  template <size_t I>
+  typename ElemFromList<I, T...>::type& Get() {
+    return FlatTupleElemBase<FlatTuple<T...>, I>::value;
+  }
+
+  template <typename F>
+  auto Apply(F&& f) -> decltype(std::forward<F>(f)(this->Get<Idx>()...)) {
+    return std::forward<F>(f)(Get<Idx>()...);
+  }
+
+  template <typename F>
+  auto Apply(F&& f) const -> decltype(std::forward<F>(f)(this->Get<Idx>()...)) {
+    return std::forward<F>(f)(Get<Idx>()...);
+  }
 };
 
 // Analog to std::tuple but with different tradeoffs.
 // This class minimizes the template instantiation depth, thus allowing more
-// elements that std::tuple would. std::tuple has been seen to require an
+// elements than std::tuple would. std::tuple has been seen to require an
 // instantiation depth of more than 10x the number of elements in some
 // implementations.
 // FlatTuple and ElemFromList are not recursive and have a fixed depth
@@ -1231,21 +1280,17 @@
 class FlatTuple
     : private FlatTupleBase<FlatTuple<T...>,
                             typename MakeIndexSequence<sizeof...(T)>::type> {
-  using Indices = typename FlatTuple::FlatTupleBase::Indices;
+  using Indices = typename FlatTupleBase<
+      FlatTuple<T...>, typename MakeIndexSequence<sizeof...(T)>::type>::Indices;
 
  public:
   FlatTuple() = default;
-  explicit FlatTuple(T... t) : FlatTuple::FlatTupleBase(std::move(t)...) {}
+  template <typename... Args>
+  explicit FlatTuple(FlatTupleConstructTag tag, Args&&... args)
+      : FlatTuple::FlatTupleBase(tag, std::forward<Args>(args)...) {}
 
-  template <size_t I>
-  const typename ElemFromList<I, Indices, T...>::type& Get() const {
-    return static_cast<const FlatTupleElemBase<FlatTuple, I>*>(this)->value;
-  }
-
-  template <size_t I>
-  typename ElemFromList<I, Indices, T...>::type& Get() {
-    return static_cast<FlatTupleElemBase<FlatTuple, I>*>(this)->value;
-  }
+  using FlatTuple::FlatTupleBase::Apply;
+  using FlatTuple::FlatTupleBase::Get;
 };
 
 // Utility functions to be called with static_assert to induce deprecation
@@ -1278,6 +1323,22 @@
 }  // namespace internal
 }  // namespace testing
 
+namespace std {
+// Some standard library implementations use `struct tuple_size` and some use
+// `class tuple_size`. Clang warns about the mismatch.
+// https://reviews.llvm.org/D55466
+#ifdef __clang__
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wmismatched-tags"
+#endif
+template <typename... Ts>
+struct tuple_size<testing::internal::FlatTuple<Ts...>>
+    : std::integral_constant<size_t, sizeof...(Ts)> {};
+#ifdef __clang__
+#pragma clang diagnostic pop
+#endif
+}  // namespace std
+
 #define GTEST_MESSAGE_AT_(file, line, message, result_type) \
   ::testing::internal::AssertHelper(result_type, file, line, message) \
     = ::testing::Message()
@@ -1300,48 +1361,122 @@
 // Suppress MSVC warning 4072 (unreachable code) for the code following
 // statement if it returns or throws (or doesn't return or throw in some
 // situations).
+// NOTE: The "else" is important to keep this expansion to prevent a top-level
+// "else" from attaching to our "if".
 #define GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement) \
-  if (::testing::internal::AlwaysTrue()) { statement; }
+  if (::testing::internal::AlwaysTrue()) {                        \
+    statement;                                                    \
+  } else                     /* NOLINT */                         \
+    static_assert(true, "")  // User must have a semicolon after expansion.
 
-#define GTEST_TEST_THROW_(statement, expected_exception, fail) \
-  GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
-  if (::testing::internal::ConstCharPtr gtest_msg = "") { \
-    bool gtest_caught_expected = false; \
-    try { \
-      GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
-    } \
-    catch (expected_exception const&) { \
-      gtest_caught_expected = true; \
-    } \
-    catch (...) { \
-      gtest_msg.value = \
-          "Expected: " #statement " throws an exception of type " \
-          #expected_exception ".\n  Actual: it throws a different type."; \
-      goto GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__); \
-    } \
-    if (!gtest_caught_expected) { \
-      gtest_msg.value = \
-          "Expected: " #statement " throws an exception of type " \
-          #expected_exception ".\n  Actual: it throws nothing."; \
-      goto GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__); \
-    } \
-  } else \
-    GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__): \
-      fail(gtest_msg.value)
+#if GTEST_HAS_EXCEPTIONS
+
+namespace testing {
+namespace internal {
+
+class NeverThrown {
+ public:
+  const char* what() const noexcept {
+    return "this exception should never be thrown";
+  }
+};
+
+}  // namespace internal
+}  // namespace testing
+
+#if GTEST_HAS_RTTI
+
+#define GTEST_EXCEPTION_TYPE_(e) ::testing::internal::GetTypeName(typeid(e))
+
+#else  // GTEST_HAS_RTTI
+
+#define GTEST_EXCEPTION_TYPE_(e) \
+  std::string { "an std::exception-derived error" }
+
+#endif  // GTEST_HAS_RTTI
+
+#define GTEST_TEST_THROW_CATCH_STD_EXCEPTION_(statement, expected_exception)   \
+  catch (typename std::conditional<                                            \
+         std::is_same<typename std::remove_cv<typename std::remove_reference<  \
+                          expected_exception>::type>::type,                    \
+                      std::exception>::value,                                  \
+         const ::testing::internal::NeverThrown&, const std::exception&>::type \
+             e) {                                                              \
+    gtest_msg.value = "Expected: " #statement                                  \
+                      " throws an exception of type " #expected_exception      \
+                      ".\n  Actual: it throws ";                               \
+    gtest_msg.value += GTEST_EXCEPTION_TYPE_(e);                               \
+    gtest_msg.value += " with description \"";                                 \
+    gtest_msg.value += e.what();                                               \
+    gtest_msg.value += "\".";                                                  \
+    goto GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__);                \
+  }
+
+#else  // GTEST_HAS_EXCEPTIONS
+
+#define GTEST_TEST_THROW_CATCH_STD_EXCEPTION_(statement, expected_exception)
+
+#endif  // GTEST_HAS_EXCEPTIONS
+
+#define GTEST_TEST_THROW_(statement, expected_exception, fail)              \
+  GTEST_AMBIGUOUS_ELSE_BLOCKER_                                             \
+  if (::testing::internal::TrueWithString gtest_msg{}) {                    \
+    bool gtest_caught_expected = false;                                     \
+    try {                                                                   \
+      GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement);            \
+    } catch (expected_exception const&) {                                   \
+      gtest_caught_expected = true;                                         \
+    }                                                                       \
+    GTEST_TEST_THROW_CATCH_STD_EXCEPTION_(statement, expected_exception)    \
+    catch (...) {                                                           \
+      gtest_msg.value = "Expected: " #statement                             \
+                        " throws an exception of type " #expected_exception \
+                        ".\n  Actual: it throws a different type.";         \
+      goto GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__);           \
+    }                                                                       \
+    if (!gtest_caught_expected) {                                           \
+      gtest_msg.value = "Expected: " #statement                             \
+                        " throws an exception of type " #expected_exception \
+                        ".\n  Actual: it throws nothing.";                  \
+      goto GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__);           \
+    }                                                                       \
+  } else /*NOLINT*/                                                         \
+    GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__)                   \
+        : fail(gtest_msg.value.c_str())
+
+#if GTEST_HAS_EXCEPTIONS
+
+#define GTEST_TEST_NO_THROW_CATCH_STD_EXCEPTION_()                \
+  catch (std::exception const& e) {                               \
+    gtest_msg.value = "it throws ";                               \
+    gtest_msg.value += GTEST_EXCEPTION_TYPE_(e);                  \
+    gtest_msg.value += " with description \"";                    \
+    gtest_msg.value += e.what();                                  \
+    gtest_msg.value += "\".";                                     \
+    goto GTEST_CONCAT_TOKEN_(gtest_label_testnothrow_, __LINE__); \
+  }
+
+#else  // GTEST_HAS_EXCEPTIONS
+
+#define GTEST_TEST_NO_THROW_CATCH_STD_EXCEPTION_()
+
+#endif  // GTEST_HAS_EXCEPTIONS
 
 #define GTEST_TEST_NO_THROW_(statement, fail) \
   GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
-  if (::testing::internal::AlwaysTrue()) { \
+  if (::testing::internal::TrueWithString gtest_msg{}) { \
     try { \
       GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \
     } \
+    GTEST_TEST_NO_THROW_CATCH_STD_EXCEPTION_() \
     catch (...) { \
+      gtest_msg.value = "it throws."; \
       goto GTEST_CONCAT_TOKEN_(gtest_label_testnothrow_, __LINE__); \
     } \
   } else \
     GTEST_CONCAT_TOKEN_(gtest_label_testnothrow_, __LINE__): \
-      fail("Expected: " #statement " doesn't throw an exception.\n" \
-           "  Actual: it throws.")
+      fail(("Expected: " #statement " doesn't throw an exception.\n" \
+            "  Actual: " + gtest_msg.value).c_str())
 
 #define GTEST_TEST_ANY_THROW_(statement, fail) \
   GTEST_AMBIGUOUS_ELSE_BLOCKER_ \
@@ -1394,16 +1529,23 @@
 
 // Helper macro for defining tests.
 #define GTEST_TEST_(test_suite_name, test_name, parent_class, parent_id)      \
+  static_assert(sizeof(GTEST_STRINGIFY_(test_suite_name)) > 1,                \
+                "test_suite_name must not be empty");                         \
+  static_assert(sizeof(GTEST_STRINGIFY_(test_name)) > 1,                      \
+                "test_name must not be empty");                               \
   class GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)                    \
       : public parent_class {                                                 \
    public:                                                                    \
-    GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)() {}                   \
-                                                                              \
-   private:                                                                   \
-    virtual void TestBody();                                                  \
-    static ::testing::TestInfo* const test_info_ GTEST_ATTRIBUTE_UNUSED_;     \
+    GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)() = default;           \
+    ~GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)() override = default; \
     GTEST_DISALLOW_COPY_AND_ASSIGN_(GTEST_TEST_CLASS_NAME_(test_suite_name,   \
                                                            test_name));       \
+    GTEST_DISALLOW_MOVE_AND_ASSIGN_(GTEST_TEST_CLASS_NAME_(test_suite_name,   \
+                                                           test_name));       \
+                                                                              \
+   private:                                                                   \
+    void TestBody() override;                                                 \
+    static ::testing::TestInfo* const test_info_ GTEST_ATTRIBUTE_UNUSED_;     \
   };                                                                          \
                                                                               \
   ::testing::TestInfo* const GTEST_TEST_CLASS_NAME_(test_suite_name,          \
@@ -1412,9 +1554,9 @@
           #test_suite_name, #test_name, nullptr, nullptr,                     \
           ::testing::internal::CodeLocation(__FILE__, __LINE__), (parent_id), \
           ::testing::internal::SuiteApiResolver<                              \
-              parent_class>::GetSetUpCaseOrSuite(),                           \
+              parent_class>::GetSetUpCaseOrSuite(__FILE__, __LINE__),         \
           ::testing::internal::SuiteApiResolver<                              \
-              parent_class>::GetTearDownCaseOrSuite(),                        \
+              parent_class>::GetTearDownCaseOrSuite(__FILE__, __LINE__),      \
           new ::testing::internal::TestFactoryImpl<GTEST_TEST_CLASS_NAME_(    \
               test_suite_name, test_name)>);                                  \
   void GTEST_TEST_CLASS_NAME_(test_suite_name, test_name)::TestBody()
diff --git a/deps/boringssl/src/third_party/googletest/include/gtest/internal/gtest-param-util.h b/deps/boringssl/src/third_party/googletest/include/gtest/internal/gtest-param-util.h
index 3ed7d22..d12bd55 100644
--- a/deps/boringssl/src/third_party/googletest/include/gtest/internal/gtest-param-util.h
+++ b/deps/boringssl/src/third_party/googletest/include/gtest/internal/gtest-param-util.h
@@ -42,12 +42,14 @@
 #include <memory>
 #include <set>
 #include <tuple>
+#include <type_traits>
 #include <utility>
 #include <vector>
 
 #include "gtest/internal/gtest-internal.h"
 #include "gtest/internal/gtest-port.h"
 #include "gtest/gtest-printers.h"
+#include "gtest/gtest-test-part.h"
 
 namespace testing {
 // Input to a parameterized test name generator, describing a test parameter.
@@ -457,7 +459,7 @@
 
   // Base part of test suite name for display purposes.
   virtual const std::string& GetTestSuiteName() const = 0;
-  // Test case id to verify identity.
+  // Test suite id to verify identity.
   virtual TypeId GetTestSuiteTypeId() const = 0;
   // UnitTest class invokes this method to register tests in this
   // test suite right before running them in RUN_ALL_TESTS macro.
@@ -474,6 +476,17 @@
 
 // INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
 //
+// Report a the name of a test_suit as safe to ignore
+// as the side effect of construction of this type.
+struct MarkAsIgnored {
+  explicit MarkAsIgnored(const char* test_suite);
+};
+
+GTEST_API_ void InsertSyntheticTestCase(const std::string& name,
+                                        CodeLocation location, bool has_test_p);
+
+// INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE.
+//
 // ParameterizedTestSuiteInfo accumulates tests obtained from TEST_P
 // macro invocations for a particular test suite and generators
 // obtained from INSTANTIATE_TEST_SUITE_P macro invocations for that
@@ -494,11 +507,11 @@
                                       CodeLocation code_location)
       : test_suite_name_(name), code_location_(code_location) {}
 
-  // Test case base name for display purposes.
+  // Test suite base name for display purposes.
   const std::string& GetTestSuiteName() const override {
     return test_suite_name_;
   }
-  // Test case id to verify identity.
+  // Test suite id to verify identity.
   TypeId GetTestSuiteTypeId() const override { return GetTypeId<TestSuite>(); }
   // TEST_P macro uses AddTestPattern() to record information
   // about a single test in a LocalTestInfo structure.
@@ -507,9 +520,10 @@
   // parameter index. For the test SequenceA/FooTest.DoBar/1 FooTest is
   // test suite base name and DoBar is test base name.
   void AddTestPattern(const char* test_suite_name, const char* test_base_name,
-                      TestMetaFactoryBase<ParamType>* meta_factory) {
-    tests_.push_back(std::shared_ptr<TestInfo>(
-        new TestInfo(test_suite_name, test_base_name, meta_factory)));
+                      TestMetaFactoryBase<ParamType>* meta_factory,
+                      CodeLocation code_location) {
+    tests_.push_back(std::shared_ptr<TestInfo>(new TestInfo(
+        test_suite_name, test_base_name, meta_factory, code_location)));
   }
   // INSTANTIATE_TEST_SUITE_P macro uses AddGenerator() to record information
   // about a generator.
@@ -522,11 +536,13 @@
     return 0;  // Return value used only to run this method in namespace scope.
   }
   // UnitTest class invokes this method to register tests in this test suite
-  // test suites right before running tests in RUN_ALL_TESTS macro.
+  // right before running tests in RUN_ALL_TESTS macro.
   // This method should not be called more than once on any single
   // instance of a ParameterizedTestSuiteInfoBase derived class.
   // UnitTest has a guard to prevent from calling this method more than once.
   void RegisterTests() override {
+    bool generated_instantiations = false;
+
     for (typename TestInfoContainer::iterator test_it = tests_.begin();
          test_it != tests_.end(); ++test_it) {
       std::shared_ptr<TestInfo> test_info = *test_it;
@@ -549,6 +565,8 @@
         for (typename ParamGenerator<ParamType>::iterator param_it =
                  generator.begin();
              param_it != generator.end(); ++param_it, ++i) {
+          generated_instantiations = true;
+
           Message test_name_stream;
 
           std::string param_name = name_func(
@@ -565,18 +583,27 @@
 
           test_param_names.insert(param_name);
 
-          test_name_stream << test_info->test_base_name << "/" << param_name;
+          if (!test_info->test_base_name.empty()) {
+            test_name_stream << test_info->test_base_name << "/";
+          }
+          test_name_stream << param_name;
           MakeAndRegisterTestInfo(
               test_suite_name.c_str(), test_name_stream.GetString().c_str(),
               nullptr,  // No type parameter.
-              PrintToString(*param_it).c_str(), code_location_,
+              PrintToString(*param_it).c_str(), test_info->code_location,
               GetTestSuiteTypeId(),
-              SuiteApiResolver<TestSuite>::GetSetUpCaseOrSuite(),
-              SuiteApiResolver<TestSuite>::GetTearDownCaseOrSuite(),
+              SuiteApiResolver<TestSuite>::GetSetUpCaseOrSuite(file, line),
+              SuiteApiResolver<TestSuite>::GetTearDownCaseOrSuite(file, line),
               test_info->test_meta_factory->CreateTestFactory(*param_it));
         }  // for param_it
       }  // for gen_it
     }  // for test_it
+
+    if (!generated_instantiations) {
+      // There are no generaotrs, or they all generate nothing ...
+      InsertSyntheticTestCase(GetTestSuiteName(), code_location_,
+                              !tests_.empty());
+    }
   }    // RegisterTests
 
  private:
@@ -584,14 +611,17 @@
   // with TEST_P macro.
   struct TestInfo {
     TestInfo(const char* a_test_suite_base_name, const char* a_test_base_name,
-             TestMetaFactoryBase<ParamType>* a_test_meta_factory)
+             TestMetaFactoryBase<ParamType>* a_test_meta_factory,
+             CodeLocation a_code_location)
         : test_suite_base_name(a_test_suite_base_name),
           test_base_name(a_test_base_name),
-          test_meta_factory(a_test_meta_factory) {}
+          test_meta_factory(a_test_meta_factory),
+          code_location(a_code_location) {}
 
     const std::string test_suite_base_name;
     const std::string test_base_name;
     const std::unique_ptr<TestMetaFactoryBase<ParamType> > test_meta_factory;
+    const CodeLocation code_location;
   };
   using TestInfoContainer = ::std::vector<std::shared_ptr<TestInfo> >;
   // Records data received from INSTANTIATE_TEST_SUITE_P macros:
@@ -714,6 +744,34 @@
   GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestSuiteRegistry);
 };
 
+// Keep track of what type-parameterized test suite are defined and
+// where as well as which are intatiated. This allows susequently
+// identifying suits that are defined but never used.
+class TypeParameterizedTestSuiteRegistry {
+ public:
+  // Add a suite definition
+  void RegisterTestSuite(const char* test_suite_name,
+                         CodeLocation code_location);
+
+  // Add an instantiation of a suit.
+  void RegisterInstantiation(const char* test_suite_name);
+
+  // For each suit repored as defined but not reported as instantiation,
+  // emit a test that reports that fact (configurably, as an error).
+  void CheckForInstantiations();
+
+ private:
+  struct TypeParameterizedTestSuiteInfo {
+    explicit TypeParameterizedTestSuiteInfo(CodeLocation c)
+        : code_location(c), instantiated(false) {}
+
+    CodeLocation code_location;
+    bool instantiated;
+  };
+
+  std::map<std::string, TypeParameterizedTestSuiteInfo> suites_;
+};
+
 }  // namespace internal
 
 // Forward declarations of ValuesIn(), which is implemented in
@@ -725,10 +783,15 @@
 namespace internal {
 // Used in the Values() function to provide polymorphic capabilities.
 
+#ifdef _MSC_VER
+#pragma warning(push)
+#pragma warning(disable : 4100)
+#endif
+
 template <typename... Ts>
 class ValueArray {
  public:
-  ValueArray(Ts... v) : v_{std::move(v)...} {}
+  explicit ValueArray(Ts... v) : v_(FlatTupleConstructTag{}, std::move(v)...) {}
 
   template <typename T>
   operator ParamGenerator<T>() const {  // NOLINT
@@ -744,6 +807,10 @@
   FlatTuple<Ts...> v_;
 };
 
+#ifdef _MSC_VER
+#pragma warning(pop)
+#endif
+
 template <typename... T>
 class CartesianProductGenerator
     : public ParamGeneratorInterface<::std::tuple<T...>> {
diff --git a/deps/boringssl/src/third_party/googletest/include/gtest/internal/gtest-port-arch.h b/deps/boringssl/src/third_party/googletest/include/gtest/internal/gtest-port-arch.h
index 779872d..813bf2c 100644
--- a/deps/boringssl/src/third_party/googletest/include/gtest/internal/gtest-port-arch.h
+++ b/deps/boringssl/src/third_party/googletest/include/gtest/internal/gtest-port-arch.h
@@ -68,6 +68,7 @@
 # define GTEST_OS_OS2 1
 #elif defined __APPLE__
 # define GTEST_OS_MAC 1
+# include <TargetConditionals.h>
 # if TARGET_OS_IPHONE
 #  define GTEST_OS_IOS 1
 # endif
@@ -100,6 +101,12 @@
 # define GTEST_OS_OPENBSD 1
 #elif defined __QNX__
 # define GTEST_OS_QNX 1
+#elif defined(__HAIKU__)
+#define GTEST_OS_HAIKU 1
+#elif defined ESP8266
+#define GTEST_OS_ESP8266 1
+#elif defined ESP32
+#define GTEST_OS_ESP32 1
 #endif  // __CYGWIN__
 
 #endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_ARCH_H_
diff --git a/deps/boringssl/src/third_party/googletest/include/gtest/internal/gtest-port.h b/deps/boringssl/src/third_party/googletest/include/gtest/internal/gtest-port.h
index 4f80c2e..6b66362 100644
--- a/deps/boringssl/src/third_party/googletest/include/gtest/internal/gtest-port.h
+++ b/deps/boringssl/src/third_party/googletest/include/gtest/internal/gtest-port.h
@@ -117,6 +117,7 @@
 //   GTEST_OS_FREEBSD  - FreeBSD
 //   GTEST_OS_FUCHSIA  - Fuchsia
 //   GTEST_OS_GNU_KFREEBSD - GNU/kFreeBSD
+//   GTEST_OS_HAIKU    - Haiku
 //   GTEST_OS_HPUX     - HP-UX
 //   GTEST_OS_LINUX    - Linux
 //     GTEST_OS_LINUX_ANDROID - Google Android
@@ -136,7 +137,7 @@
 //     GTEST_OS_WINDOWS_RT       - Windows Store App/WinRT
 //   GTEST_OS_ZOS      - z/OS
 //
-// Among the platforms, Cygwin, Linux, Max OS X, and Windows have the
+// Among the platforms, Cygwin, Linux, Mac OS X, and Windows have the
 // most stable support.  Since core members of the Google Test project
 // don't have access to other platforms, support for them may be less
 // stable.  If you notice any problems on your platform, please notify
@@ -189,23 +190,32 @@
 //   GTEST_AMBIGUOUS_ELSE_BLOCKER_ - for disabling a gcc warning.
 //   GTEST_ATTRIBUTE_UNUSED_  - declares that a class' instances or a
 //                              variable don't have to be used.
-//   GTEST_DISALLOW_ASSIGN_   - disables operator=.
+//   GTEST_DISALLOW_ASSIGN_   - disables copy operator=.
 //   GTEST_DISALLOW_COPY_AND_ASSIGN_ - disables copy ctor and operator=.
+//   GTEST_DISALLOW_MOVE_ASSIGN_   - disables move operator=.
+//   GTEST_DISALLOW_MOVE_AND_ASSIGN_ - disables move ctor and operator=.
 //   GTEST_MUST_USE_RESULT_   - declares that a function's result must be used.
 //   GTEST_INTENTIONAL_CONST_COND_PUSH_ - start code section where MSVC C4127 is
 //                                        suppressed (constant conditional).
 //   GTEST_INTENTIONAL_CONST_COND_POP_  - finish code section where MSVC C4127
 //                                        is suppressed.
+//   GTEST_INTERNAL_HAS_ANY - for enabling UniversalPrinter<std::any> or
+//                            UniversalPrinter<absl::any> specializations.
+//   GTEST_INTERNAL_HAS_OPTIONAL - for enabling UniversalPrinter<std::optional>
+//   or
+//                                 UniversalPrinter<absl::optional>
+//                                 specializations.
+//   GTEST_INTERNAL_HAS_STRING_VIEW - for enabling Matcher<std::string_view> or
+//                                    Matcher<absl::string_view>
+//                                    specializations.
+//   GTEST_INTERNAL_HAS_VARIANT - for enabling UniversalPrinter<std::variant> or
+//                                UniversalPrinter<absl::variant>
+//                                specializations.
 //
 // Synchronization:
 //   Mutex, MutexLock, ThreadLocal, GetThreadCount()
 //                            - synchronization primitives.
 //
-// Template meta programming:
-//   IteratorTraits - partial implementation of std::iterator_traits, which
-//                    is not available in libCstd when compiled with Sun C++.
-//
-//
 // Regular expressions:
 //   RE             - a simple regular expression class using the POSIX
 //                    Extended Regular Expression syntax on UNIX-like platforms
@@ -227,8 +237,7 @@
 //
 // Integer types:
 //   TypeWithSize   - maps an integer to a int type.
-//   Int32, UInt32, Int64, UInt64, TimeInMillis
-//                  - integers of known sizes.
+//   TimeInMillis   - integers of known sizes.
 //   BiggestInt     - the biggest signed integer type.
 //
 // Command-line utilities:
@@ -239,7 +248,7 @@
 // Environment variable utilities:
 //   GetEnv()             - gets the value of an environment variable.
 //   BoolFromGTestEnv()   - parses a bool environment variable.
-//   Int32FromGTestEnv()  - parses an Int32 environment variable.
+//   Int32FromGTestEnv()  - parses an int32_t environment variable.
 //   StringFromGTestEnv() - parses a string environment variable.
 //
 // Deprecation warnings:
@@ -252,7 +261,10 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
-#include <memory>
+
+#include <cerrno>
+#include <cstdint>
+#include <limits>
 #include <type_traits>
 
 #ifndef _WIN32_WCE
@@ -265,16 +277,15 @@
 # include <TargetConditionals.h>
 #endif
 
-#include <algorithm>  // NOLINT
-#include <iostream>   // NOLINT
-#include <sstream>    // NOLINT
-#include <string>     // NOLINT
+#include <iostream>  // NOLINT
+#include <locale>
+#include <memory>
+#include <string>  // NOLINT
 #include <tuple>
-#include <utility>
 #include <vector>  // NOLINT
 
-#include "gtest/internal/gtest-port-arch.h"
 #include "gtest/internal/custom/gtest-port.h"
+#include "gtest/internal/gtest-port-arch.h"
 
 #if !defined(GTEST_DEV_EMAIL_)
 # define GTEST_DEV_EMAIL_ "googletestframework@@googlegroups.com"
@@ -361,7 +372,8 @@
 #  include <android/api-level.h>  // NOLINT
 #endif
 
-// Defines this to true iff Google Test can use POSIX regular expressions.
+// Defines this to true if and only if Google Test can use POSIX regular
+// expressions.
 #ifndef GTEST_HAS_POSIX_RE
 # if GTEST_OS_LINUX_ANDROID
 // On Android, <regex.h> is only available starting with Gingerbread.
@@ -402,7 +414,7 @@
 // The user didn't tell us whether exceptions are enabled, so we need
 // to figure it out.
 # if defined(_MSC_VER) && defined(_CPPUNWIND)
-// MSVC defines _CPPUNWIND to 1 iff exceptions are enabled.
+// MSVC defines _CPPUNWIND to 1 if and only if exceptions are enabled.
 #  define GTEST_HAS_EXCEPTIONS 1
 # elif defined(__BORLANDC__)
 // C++Builder's implementation of the STL uses the _HAS_EXCEPTIONS
@@ -413,16 +425,17 @@
 #  endif  // _HAS_EXCEPTIONS
 #  define GTEST_HAS_EXCEPTIONS _HAS_EXCEPTIONS
 # elif defined(__clang__)
-// clang defines __EXCEPTIONS iff exceptions are enabled before clang 220714,
-// but iff cleanups are enabled after that. In Obj-C++ files, there can be
-// cleanups for ObjC exceptions which also need cleanups, even if C++ exceptions
-// are disabled. clang has __has_feature(cxx_exceptions) which checks for C++
-// exceptions starting at clang r206352, but which checked for cleanups prior to
-// that. To reliably check for C++ exception availability with clang, check for
+// clang defines __EXCEPTIONS if and only if exceptions are enabled before clang
+// 220714, but if and only if cleanups are enabled after that. In Obj-C++ files,
+// there can be cleanups for ObjC exceptions which also need cleanups, even if
+// C++ exceptions are disabled. clang has __has_feature(cxx_exceptions) which
+// checks for C++ exceptions starting at clang r206352, but which checked for
+// cleanups prior to that. To reliably check for C++ exception availability with
+// clang, check for
 // __EXCEPTIONS && __has_feature(cxx_exceptions).
 #  define GTEST_HAS_EXCEPTIONS (__EXCEPTIONS && __has_feature(cxx_exceptions))
 # elif defined(__GNUC__) && __EXCEPTIONS
-// gcc defines __EXCEPTIONS to 1 iff exceptions are enabled.
+// gcc defines __EXCEPTIONS to 1 if and only if exceptions are enabled.
 #  define GTEST_HAS_EXCEPTIONS 1
 # elif defined(__SUNPRO_CC)
 // Sun Pro CC supports exceptions.  However, there is no compile-time way of
@@ -430,7 +443,7 @@
 // they are enabled unless the user tells us otherwise.
 #  define GTEST_HAS_EXCEPTIONS 1
 # elif defined(__IBMCPP__) && __EXCEPTIONS
-// xlC defines __EXCEPTIONS to 1 iff exceptions are enabled.
+// xlC defines __EXCEPTIONS to 1 if and only if exceptions are enabled.
 #  define GTEST_HAS_EXCEPTIONS 1
 # elif defined(__HP_aCC)
 // Exception handling is in effect by default in HP aCC compiler. It has to
@@ -443,23 +456,15 @@
 # endif  // defined(_MSC_VER) || defined(__BORLANDC__)
 #endif  // GTEST_HAS_EXCEPTIONS
 
-#if !defined(GTEST_HAS_STD_STRING)
-// Even though we don't use this macro any longer, we keep it in case
-// some clients still depend on it.
-# define GTEST_HAS_STD_STRING 1
-#elif !GTEST_HAS_STD_STRING
-// The user told us that ::std::string isn't available.
-# error "::std::string isn't available."
-#endif  // !defined(GTEST_HAS_STD_STRING)
-
 #ifndef GTEST_HAS_STD_WSTRING
 // The user didn't tell us whether ::std::wstring is available, so we need
 // to figure it out.
 // Cygwin 1.7 and below doesn't support ::std::wstring.
 // Solaris' libc++ doesn't support it either.  Android has
 // no support for it at least as recent as Froyo (2.2).
-# define GTEST_HAS_STD_WSTRING \
-    (!(GTEST_OS_LINUX_ANDROID || GTEST_OS_CYGWIN || GTEST_OS_SOLARIS))
+#define GTEST_HAS_STD_WSTRING                                         \
+  (!(GTEST_OS_LINUX_ANDROID || GTEST_OS_CYGWIN || GTEST_OS_SOLARIS || \
+     GTEST_OS_HAIKU || GTEST_OS_ESP32 || GTEST_OS_ESP8266))
 
 #endif  // GTEST_HAS_STD_WSTRING
 
@@ -470,13 +475,14 @@
 
 # ifdef _MSC_VER
 
-#  ifdef _CPPRTTI  // MSVC defines this macro iff RTTI is enabled.
+#ifdef _CPPRTTI  // MSVC defines this macro if and only if RTTI is enabled.
 #   define GTEST_HAS_RTTI 1
 #  else
 #   define GTEST_HAS_RTTI 0
 #  endif
 
-// Starting with version 4.3.2, gcc defines __GXX_RTTI iff RTTI is enabled.
+// Starting with version 4.3.2, gcc defines __GXX_RTTI if and only if RTTI is
+// enabled.
 # elif defined(__GNUC__)
 
 #  ifdef __GXX_RTTI
@@ -533,10 +539,11 @@
 //
 // To disable threading support in Google Test, add -DGTEST_HAS_PTHREAD=0
 // to your compiler flags.
-#define GTEST_HAS_PTHREAD                                             \
-  (GTEST_OS_LINUX || GTEST_OS_MAC || GTEST_OS_HPUX || GTEST_OS_QNX || \
+#define GTEST_HAS_PTHREAD                                                      \
+  (GTEST_OS_LINUX || GTEST_OS_MAC || GTEST_OS_HPUX || GTEST_OS_QNX ||          \
    GTEST_OS_FREEBSD || GTEST_OS_NACL || GTEST_OS_NETBSD || GTEST_OS_FUCHSIA || \
-   GTEST_OS_DRAGONFLY || GTEST_OS_GNU_KFREEBSD || GTEST_OS_OPENBSD)
+   GTEST_OS_DRAGONFLY || GTEST_OS_GNU_KFREEBSD || GTEST_OS_OPENBSD ||          \
+   GTEST_OS_HAIKU)
 #endif  // GTEST_HAS_PTHREAD
 
 #if GTEST_HAS_PTHREAD
@@ -581,7 +588,8 @@
 #ifndef GTEST_HAS_STREAM_REDIRECTION
 // By default, we assume that stream redirection is supported on all
 // platforms except known mobile ones.
-# if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_WINDOWS_PHONE || GTEST_OS_WINDOWS_RT
+#if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_WINDOWS_PHONE || \
+    GTEST_OS_WINDOWS_RT || GTEST_OS_ESP8266
 #  define GTEST_HAS_STREAM_REDIRECTION 0
 # else
 #  define GTEST_HAS_STREAM_REDIRECTION 1
@@ -590,13 +598,12 @@
 
 // Determines whether to support death tests.
 // pops up a dialog window that cannot be suppressed programmatically.
-#if (GTEST_OS_LINUX || GTEST_OS_CYGWIN || GTEST_OS_SOLARIS ||   \
-     (GTEST_OS_MAC && !GTEST_OS_IOS) ||                         \
-     (GTEST_OS_WINDOWS_DESKTOP && _MSC_VER) ||                  \
-     GTEST_OS_WINDOWS_MINGW || GTEST_OS_AIX || GTEST_OS_HPUX || \
-     GTEST_OS_OPENBSD || GTEST_OS_QNX || GTEST_OS_FREEBSD || \
-     GTEST_OS_NETBSD || GTEST_OS_FUCHSIA || GTEST_OS_DRAGONFLY || \
-     GTEST_OS_GNU_KFREEBSD)
+#if (GTEST_OS_LINUX || GTEST_OS_CYGWIN || GTEST_OS_SOLARIS ||             \
+     (GTEST_OS_MAC && !GTEST_OS_IOS) ||                                   \
+     (GTEST_OS_WINDOWS_DESKTOP && _MSC_VER) || GTEST_OS_WINDOWS_MINGW ||  \
+     GTEST_OS_AIX || GTEST_OS_HPUX || GTEST_OS_OPENBSD || GTEST_OS_QNX || \
+     GTEST_OS_FREEBSD || GTEST_OS_NETBSD || GTEST_OS_FUCHSIA ||           \
+     GTEST_OS_DRAGONFLY || GTEST_OS_GNU_KFREEBSD || GTEST_OS_HAIKU)
 # define GTEST_HAS_DEATH_TEST 1
 #endif
 
@@ -676,16 +683,27 @@
 #endif
 
 
-// A macro to disallow operator=
+// A macro to disallow copy operator=
 // This should be used in the private: declarations for a class.
 #define GTEST_DISALLOW_ASSIGN_(type) \
-  void operator=(type const &) = delete
+  type& operator=(type const &) = delete
 
 // A macro to disallow copy constructor and operator=
 // This should be used in the private: declarations for a class.
 #define GTEST_DISALLOW_COPY_AND_ASSIGN_(type) \
-  type(type const &) = delete; \
-  GTEST_DISALLOW_ASSIGN_(type)
+  type(type const&) = delete;                 \
+  type& operator=(type const&) = delete
+
+// A macro to disallow move operator=
+// This should be used in the private: declarations for a class.
+#define GTEST_DISALLOW_MOVE_ASSIGN_(type) \
+  type& operator=(type &&) noexcept = delete
+
+// A macro to disallow move constructor and operator=
+// This should be used in the private: declarations for a class.
+#define GTEST_DISALLOW_MOVE_AND_ASSIGN_(type) \
+  type(type&&) noexcept = delete;             \
+  type& operator=(type&&) noexcept = delete
 
 // Tell the compiler to warn about unused return values for functions declared
 // with this macro.  The macro should be used on function declarations
@@ -856,30 +874,6 @@
 // expression is false, compiler will issue an error containing this identifier.
 #define GTEST_COMPILE_ASSERT_(expr, msg) static_assert(expr, #msg)
 
-// StaticAssertTypeEqHelper is used by StaticAssertTypeEq defined in gtest.h.
-//
-// This template is declared, but intentionally undefined.
-template <typename T1, typename T2>
-struct StaticAssertTypeEqHelper;
-
-template <typename T>
-struct StaticAssertTypeEqHelper<T, T> {
-  enum { value = true };
-};
-
-// Same as std::is_same<>.
-template <typename T, typename U>
-struct IsSame {
-  enum { value = false };
-};
-template <typename T>
-struct IsSame<T, T> {
-  enum { value = true };
-};
-
-// Evaluates to the number of elements in 'array'.
-#define GTEST_ARRAY_SIZE_(array) (sizeof(array) / sizeof(array[0]))
-
 // A helper for suppressing warnings on constant condition.  It just
 // returns 'condition'.
 GTEST_API_ bool IsTrue(bool condition);
@@ -907,9 +901,9 @@
   // Returns the string representation of the regex.
   const char* pattern() const { return pattern_; }
 
-  // FullMatch(str, re) returns true iff regular expression re matches
-  // the entire str.
-  // PartialMatch(str, re) returns true iff regular expression re
+  // FullMatch(str, re) returns true if and only if regular expression re
+  // matches the entire str.
+  // PartialMatch(str, re) returns true if and only if regular expression re
   // matches a substring of str (including str itself).
   static bool FullMatch(const ::std::string& str, const RE& re) {
     return FullMatch(str.c_str(), re);
@@ -936,8 +930,6 @@
   const char* full_pattern_;  // For FullMatch();
 
 # endif
-
-  GTEST_DISALLOW_ASSIGN_(RE);
 };
 
 #endif  // GTEST_USES_PCRE
@@ -1027,19 +1019,6 @@
     GTEST_LOG_(FATAL) << #posix_call << "failed with error " \
                       << gtest_error
 
-// Adds reference to a type if it is not a reference type,
-// otherwise leaves it unchanged.  This is the same as
-// tr1::add_reference, which is not widely available yet.
-template <typename T>
-struct AddReference { typedef T& type; };  // NOLINT
-template <typename T>
-struct AddReference<T&> { typedef T& type; };  // NOLINT
-
-// A handy wrapper around AddReference that works when the argument T
-// depends on template parameters.
-#define GTEST_ADD_REFERENCE_(T) \
-    typename ::testing::internal::AddReference<T>::type
-
 // Transforms "T" into "const T&" according to standard reference collapsing
 // rules (this is only needed as a backport for C++98 compilers that do not
 // support reference collapsing). Specifically, it transforms:
@@ -1264,7 +1243,8 @@
   void Reset(Handle handle);
 
  private:
-  // Returns true iff the handle is a valid handle object that can be closed.
+  // Returns true if and only if the handle is a valid handle object that can be
+  // closed.
   bool IsCloseable() const;
 
   Handle handle_;
@@ -1366,7 +1346,8 @@
   // When non-NULL, used to block execution until the controller thread
   // notifies.
   Notification* const thread_can_start_;
-  bool finished_;  // true iff we know that the thread function has finished.
+  bool finished_;  // true if and only if we know that the thread function has
+                   // finished.
   pthread_t thread_;  // The native thread object.
 
   GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadWithParam);
@@ -1631,7 +1612,7 @@
   class DefaultValueHolderFactory : public ValueHolderFactory {
    public:
     DefaultValueHolderFactory() {}
-    virtual ValueHolder* MakeNewHolder() const { return new ValueHolder(); }
+    ValueHolder* MakeNewHolder() const override { return new ValueHolder(); }
 
    private:
     GTEST_DISALLOW_COPY_AND_ASSIGN_(DefaultValueHolderFactory);
@@ -1640,7 +1621,7 @@
   class InstanceValueHolderFactory : public ValueHolderFactory {
    public:
     explicit InstanceValueHolderFactory(const T& value) : value_(value) {}
-    virtual ValueHolder* MakeNewHolder() const {
+    ValueHolder* MakeNewHolder() const override {
       return new ValueHolder(value_);
     }
 
@@ -1840,7 +1821,7 @@
   class DefaultValueHolderFactory : public ValueHolderFactory {
    public:
     DefaultValueHolderFactory() {}
-    virtual ValueHolder* MakeNewHolder() const { return new ValueHolder(); }
+    ValueHolder* MakeNewHolder() const override { return new ValueHolder(); }
 
    private:
     GTEST_DISALLOW_COPY_AND_ASSIGN_(DefaultValueHolderFactory);
@@ -1849,7 +1830,7 @@
   class InstanceValueHolderFactory : public ValueHolderFactory {
    public:
     explicit InstanceValueHolderFactory(const T& value) : value_(value) {}
-    virtual ValueHolder* MakeNewHolder() const {
+    ValueHolder* MakeNewHolder() const override {
       return new ValueHolder(value_);
     }
 
@@ -1919,47 +1900,12 @@
 // we cannot detect it.
 GTEST_API_ size_t GetThreadCount();
 
-template <bool bool_value>
-struct bool_constant {
-  typedef bool_constant<bool_value> type;
-  static const bool value = bool_value;
-};
-template <bool bool_value> const bool bool_constant<bool_value>::value;
-
-typedef bool_constant<false> false_type;
-typedef bool_constant<true> true_type;
-
-template <typename T, typename U>
-struct is_same : public false_type {};
-
-template <typename T>
-struct is_same<T, T> : public true_type {};
-
-template <typename Iterator>
-struct IteratorTraits {
-  typedef typename Iterator::value_type value_type;
-};
-
-
-template <typename T>
-struct IteratorTraits<T*> {
-  typedef T value_type;
-};
-
-template <typename T>
-struct IteratorTraits<const T*> {
-  typedef T value_type;
-};
-
 #if GTEST_OS_WINDOWS
 # define GTEST_PATH_SEP_ "\\"
 # define GTEST_HAS_ALT_PATH_SEP_ 1
-// The biggest signed integer type the compiler supports.
-typedef __int64 BiggestInt;
 #else
 # define GTEST_PATH_SEP_ "/"
 # define GTEST_HAS_ALT_PATH_SEP_ 0
-typedef long long BiggestInt;  // NOLINT
 #endif  // GTEST_OS_WINDOWS
 
 // Utilities for char.
@@ -2024,16 +1970,16 @@
 typedef struct _stat StatStruct;
 
 # ifdef __BORLANDC__
-inline int IsATTY(int fd) { return isatty(fd); }
+inline int DoIsATTY(int fd) { return isatty(fd); }
 inline int StrCaseCmp(const char* s1, const char* s2) {
   return stricmp(s1, s2);
 }
 inline char* StrDup(const char* src) { return strdup(src); }
 # else  // !__BORLANDC__
 #  if GTEST_OS_WINDOWS_MOBILE
-inline int IsATTY(int /* fd */) { return 0; }
+inline int DoIsATTY(int /* fd */) { return 0; }
 #  else
-inline int IsATTY(int fd) { return _isatty(fd); }
+inline int DoIsATTY(int fd) { return _isatty(fd); }
 #  endif  // GTEST_OS_WINDOWS_MOBILE
 inline int StrCaseCmp(const char* s1, const char* s2) {
   return _stricmp(s1, s2);
@@ -2054,12 +2000,28 @@
 }
 # endif  // GTEST_OS_WINDOWS_MOBILE
 
+#elif GTEST_OS_ESP8266
+typedef struct stat StatStruct;
+
+inline int FileNo(FILE* file) { return fileno(file); }
+inline int DoIsATTY(int fd) { return isatty(fd); }
+inline int Stat(const char* path, StatStruct* buf) {
+  // stat function not implemented on ESP8266
+  return 0;
+}
+inline int StrCaseCmp(const char* s1, const char* s2) {
+  return strcasecmp(s1, s2);
+}
+inline char* StrDup(const char* src) { return strdup(src); }
+inline int RmDir(const char* dir) { return rmdir(dir); }
+inline bool IsDir(const StatStruct& st) { return S_ISDIR(st.st_mode); }
+
 #else
 
 typedef struct stat StatStruct;
 
 inline int FileNo(FILE* file) { return fileno(file); }
-inline int IsATTY(int fd) { return isatty(fd); }
+inline int DoIsATTY(int fd) { return isatty(fd); }
 inline int Stat(const char* path, StatStruct* buf) { return stat(path, buf); }
 inline int StrCaseCmp(const char* s1, const char* s2) {
   return strcasecmp(s1, s2);
@@ -2070,14 +2032,21 @@
 
 #endif  // GTEST_OS_WINDOWS
 
+inline int IsATTY(int fd) {
+  // DoIsATTY might change errno (for example ENOTTY in case you redirect stdout
+  // to a file on Linux), which is unexpected, so save the previous value, and
+  // restore it after the call.
+  int savedErrno = errno;
+  int isAttyValue = DoIsATTY(fd);
+  errno = savedErrno;
+
+  return isAttyValue;
+}
+
 // Functions deprecated by MSVC 8.0.
 
 GTEST_DISABLE_MSC_DEPRECATED_PUSH_()
 
-inline const char* StrNCpy(char* dest, const char* src, size_t n) {
-  return strncpy(dest, src, n);
-}
-
 // ChDir(), FReopen(), FDOpen(), Read(), Write(), Close(), and
 // StrError() aren't needed on Windows CE at this time and thus not
 // defined there.
@@ -2086,7 +2055,15 @@
 inline int ChDir(const char* dir) { return chdir(dir); }
 #endif
 inline FILE* FOpen(const char* path, const char* mode) {
+#if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MINGW
+  struct wchar_codecvt : public std::codecvt<wchar_t, char, std::mbstate_t> {};
+  std::wstring_convert<wchar_codecvt> converter;
+  std::wstring wide_path = converter.from_bytes(path);
+  std::wstring wide_mode = converter.from_bytes(mode);
+  return _wfopen(wide_path.c_str(), wide_mode.c_str());
+#else  // GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MINGW
   return fopen(path, mode);
+#endif  // GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MINGW
 }
 #if !GTEST_OS_WINDOWS_MOBILE
 inline FILE *FReopen(const char* path, const char* mode, FILE* stream) {
@@ -2106,8 +2083,9 @@
 inline const char* StrError(int errnum) { return strerror(errnum); }
 #endif
 inline const char* GetEnv(const char* name) {
-#if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_WINDOWS_PHONE || GTEST_OS_WINDOWS_RT
-  // We are on Windows CE, which has no environment variables.
+#if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_WINDOWS_PHONE || \
+    GTEST_OS_WINDOWS_RT || GTEST_OS_ESP8266
+  // We are on an embedded platform, which has no environment variables.
   static_cast<void>(name);  // To prevent 'unused argument' warning.
   return nullptr;
 #elif defined(__BORLANDC__) || defined(__SunOS_5_8) || defined(__SunOS_5_9)
@@ -2149,15 +2127,13 @@
 # define GTEST_SNPRINTF_ snprintf
 #endif
 
-// The maximum number a BiggestInt can represent.  This definition
-// works no matter BiggestInt is represented in one's complement or
-// two's complement.
+// The biggest signed integer type the compiler supports.
 //
-// We cannot rely on numeric_limits in STL, as __int64 and long long
-// are not part of standard C++ and numeric_limits doesn't need to be
-// defined for them.
-const BiggestInt kMaxBiggestInt =
-    ~(static_cast<BiggestInt>(1) << (8*sizeof(BiggestInt) - 1));
+// long long is guaranteed to be at least 64-bits in C++11.
+using BiggestInt = long long;  // NOLINT
+
+// The maximum number a BiggestInt can represent.
+constexpr BiggestInt kMaxBiggestInt = (std::numeric_limits<BiggestInt>::max)();
 
 // This template class serves as a compile-time function from size to
 // type.  It maps a size in bytes to a primitive type with that
@@ -2182,40 +2158,27 @@
  public:
   // This prevents the user from using TypeWithSize<N> with incorrect
   // values of N.
-  typedef void UInt;
+  using UInt = void;
 };
 
 // The specialization for size 4.
 template <>
 class TypeWithSize<4> {
  public:
-  // unsigned int has size 4 in both gcc and MSVC.
-  //
-  // As base/basictypes.h doesn't compile on Windows, we cannot use
-  // uint32, uint64, and etc here.
-  typedef int Int;
-  typedef unsigned int UInt;
+  using Int = std::int32_t;
+  using UInt = std::uint32_t;
 };
 
 // The specialization for size 8.
 template <>
 class TypeWithSize<8> {
  public:
-#if GTEST_OS_WINDOWS
-  typedef __int64 Int;
-  typedef unsigned __int64 UInt;
-#else
-  typedef long long Int;  // NOLINT
-  typedef unsigned long long UInt;  // NOLINT
-#endif  // GTEST_OS_WINDOWS
+  using Int = std::int64_t;
+  using UInt = std::uint64_t;
 };
 
 // Integer types of known sizes.
-typedef TypeWithSize<4>::Int Int32;
-typedef TypeWithSize<4>::UInt UInt32;
-typedef TypeWithSize<8>::Int Int64;
-typedef TypeWithSize<8>::UInt UInt64;
-typedef TypeWithSize<8>::Int TimeInMillis;  // Represents time in milliseconds.
+using TimeInMillis = int64_t;  // Represents time in milliseconds.
 
 // Utilities for command line flags and environment variables.
 
@@ -2234,7 +2197,7 @@
 // Macros for declaring flags.
 # define GTEST_DECLARE_bool_(name) GTEST_API_ extern bool GTEST_FLAG(name)
 # define GTEST_DECLARE_int32_(name) \
-    GTEST_API_ extern ::testing::internal::Int32 GTEST_FLAG(name)
+    GTEST_API_ extern std::int32_t GTEST_FLAG(name)
 # define GTEST_DECLARE_string_(name) \
     GTEST_API_ extern ::std::string GTEST_FLAG(name)
 
@@ -2242,7 +2205,7 @@
 # define GTEST_DEFINE_bool_(name, default_val, doc) \
     GTEST_API_ bool GTEST_FLAG(name) = (default_val)
 # define GTEST_DEFINE_int32_(name, default_val, doc) \
-    GTEST_API_ ::testing::internal::Int32 GTEST_FLAG(name) = (default_val)
+    GTEST_API_ std::int32_t GTEST_FLAG(name) = (default_val)
 # define GTEST_DEFINE_string_(name, default_val, doc) \
     GTEST_API_ ::std::string GTEST_FLAG(name) = (default_val)
 
@@ -2257,12 +2220,13 @@
 // Parses 'str' for a 32-bit signed integer.  If successful, writes the result
 // to *value and returns true; otherwise leaves *value unchanged and returns
 // false.
-bool ParseInt32(const Message& src_text, const char* str, Int32* value);
+GTEST_API_ bool ParseInt32(const Message& src_text, const char* str,
+                           int32_t* value);
 
-// Parses a bool/Int32/string from the environment variable
+// Parses a bool/int32_t/string from the environment variable
 // corresponding to the given Google Test flag.
 bool BoolFromGTestEnv(const char* flag, bool default_val);
-GTEST_API_ Int32 Int32FromGTestEnv(const char* flag, Int32 default_val);
+GTEST_API_ int32_t Int32FromGTestEnv(const char* flag, int32_t default_val);
 std::string OutputFlagAlsoCheckEnvVar();
 const char* StringFromGTestEnv(const char* flag, const char* default_val);
 
@@ -2289,4 +2253,119 @@
 
 #endif  // !defined(GTEST_INTERNAL_DEPRECATED)
 
+#if GTEST_HAS_ABSL
+// Always use absl::any for UniversalPrinter<> specializations if googletest
+// is built with absl support.
+#define GTEST_INTERNAL_HAS_ANY 1
+#include "absl/types/any.h"
+namespace testing {
+namespace internal {
+using Any = ::absl::any;
+}  // namespace internal
+}  // namespace testing
+#else
+#ifdef __has_include
+#if __has_include(<any>) && __cplusplus >= 201703L
+// Otherwise for C++17 and higher use std::any for UniversalPrinter<>
+// specializations.
+#define GTEST_INTERNAL_HAS_ANY 1
+#include <any>
+namespace testing {
+namespace internal {
+using Any = ::std::any;
+}  // namespace internal
+}  // namespace testing
+// The case where absl is configured NOT to alias std::any is not
+// supported.
+#endif  // __has_include(<any>) && __cplusplus >= 201703L
+#endif  // __has_include
+#endif  // GTEST_HAS_ABSL
+
+#if GTEST_HAS_ABSL
+// Always use absl::optional for UniversalPrinter<> specializations if
+// googletest is built with absl support.
+#define GTEST_INTERNAL_HAS_OPTIONAL 1
+#include "absl/types/optional.h"
+namespace testing {
+namespace internal {
+template <typename T>
+using Optional = ::absl::optional<T>;
+}  // namespace internal
+}  // namespace testing
+#else
+#ifdef __has_include
+#if __has_include(<optional>) && __cplusplus >= 201703L
+// Otherwise for C++17 and higher use std::optional for UniversalPrinter<>
+// specializations.
+#define GTEST_INTERNAL_HAS_OPTIONAL 1
+#include <optional>
+namespace testing {
+namespace internal {
+template <typename T>
+using Optional = ::std::optional<T>;
+}  // namespace internal
+}  // namespace testing
+// The case where absl is configured NOT to alias std::optional is not
+// supported.
+#endif  // __has_include(<optional>) && __cplusplus >= 201703L
+#endif  // __has_include
+#endif  // GTEST_HAS_ABSL
+
+#if GTEST_HAS_ABSL
+// Always use absl::string_view for Matcher<> specializations if googletest
+// is built with absl support.
+# define GTEST_INTERNAL_HAS_STRING_VIEW 1
+#include "absl/strings/string_view.h"
+namespace testing {
+namespace internal {
+using StringView = ::absl::string_view;
+}  // namespace internal
+}  // namespace testing
+#else
+# ifdef __has_include
+#   if __has_include(<string_view>) && __cplusplus >= 201703L
+// Otherwise for C++17 and higher use std::string_view for Matcher<>
+// specializations.
+#   define GTEST_INTERNAL_HAS_STRING_VIEW 1
+#include <string_view>
+namespace testing {
+namespace internal {
+using StringView = ::std::string_view;
+}  // namespace internal
+}  // namespace testing
+// The case where absl is configured NOT to alias std::string_view is not
+// supported.
+#  endif  // __has_include(<string_view>) && __cplusplus >= 201703L
+# endif  // __has_include
+#endif  // GTEST_HAS_ABSL
+
+#if GTEST_HAS_ABSL
+// Always use absl::variant for UniversalPrinter<> specializations if googletest
+// is built with absl support.
+#define GTEST_INTERNAL_HAS_VARIANT 1
+#include "absl/types/variant.h"
+namespace testing {
+namespace internal {
+template <typename... T>
+using Variant = ::absl::variant<T...>;
+}  // namespace internal
+}  // namespace testing
+#else
+#ifdef __has_include
+#if __has_include(<variant>) && __cplusplus >= 201703L
+// Otherwise for C++17 and higher use std::variant for UniversalPrinter<>
+// specializations.
+#define GTEST_INTERNAL_HAS_VARIANT 1
+#include <variant>
+namespace testing {
+namespace internal {
+template <typename... T>
+using Variant = ::std::variant<T...>;
+}  // namespace internal
+}  // namespace testing
+// The case where absl is configured NOT to alias std::variant is not supported.
+#endif  // __has_include(<variant>) && __cplusplus >= 201703L
+#endif  // __has_include
+#endif  // GTEST_HAS_ABSL
+
 #endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_
diff --git a/deps/boringssl/src/third_party/googletest/include/gtest/internal/gtest-string.h b/deps/boringssl/src/third_party/googletest/include/gtest/internal/gtest-string.h
index 4c9b626..323a36e 100644
--- a/deps/boringssl/src/third_party/googletest/include/gtest/internal/gtest-string.h
+++ b/deps/boringssl/src/third_party/googletest/include/gtest/internal/gtest-string.h
@@ -47,6 +47,7 @@
 #endif
 
 #include <string.h>
+#include <cstdint>
 #include <string>
 
 #include "gtest/internal/gtest-port.h"
@@ -94,7 +95,8 @@
   static const char* Utf16ToAnsi(LPCWSTR utf16_str);
 #endif
 
-  // Compares two C strings.  Returns true iff they have the same content.
+  // Compares two C strings.  Returns true if and only if they have the same
+  // content.
   //
   // Unlike strcmp(), this function can handle NULL argument(s).  A
   // NULL C string is considered different to any non-NULL C string,
@@ -107,16 +109,16 @@
   // returned.
   static std::string ShowWideCString(const wchar_t* wide_c_str);
 
-  // Compares two wide C strings.  Returns true iff they have the same
-  // content.
+  // Compares two wide C strings.  Returns true if and only if they have the
+  // same content.
   //
   // Unlike wcscmp(), this function can handle NULL argument(s).  A
   // NULL C string is considered different to any non-NULL C string,
   // including the empty string.
   static bool WideCStringEquals(const wchar_t* lhs, const wchar_t* rhs);
 
-  // Compares two C strings, ignoring case.  Returns true iff they
-  // have the same content.
+  // Compares two C strings, ignoring case.  Returns true if and only if
+  // they have the same content.
   //
   // Unlike strcasecmp(), this function can handle NULL argument(s).
   // A NULL C string is considered different to any non-NULL C string,
@@ -124,8 +126,8 @@
   static bool CaseInsensitiveCStringEquals(const char* lhs,
                                            const char* rhs);
 
-  // Compares two wide C strings, ignoring case.  Returns true iff they
-  // have the same content.
+  // Compares two wide C strings, ignoring case.  Returns true if and only if
+  // they have the same content.
   //
   // Unlike wcscasecmp(), this function can handle NULL argument(s).
   // A NULL C string is considered different to any non-NULL wide C string,
@@ -139,17 +141,23 @@
   static bool CaseInsensitiveWideCStringEquals(const wchar_t* lhs,
                                                const wchar_t* rhs);
 
-  // Returns true iff the given string ends with the given suffix, ignoring
-  // case. Any string is considered to end with an empty suffix.
+  // Returns true if and only if the given string ends with the given suffix,
+  // ignoring case. Any string is considered to end with an empty suffix.
   static bool EndsWithCaseInsensitive(
       const std::string& str, const std::string& suffix);
 
   // Formats an int value as "%02d".
   static std::string FormatIntWidth2(int value);  // "%02d" for width == 2
 
+  // Formats an int value to given width with leading zeros.
+  static std::string FormatIntWidthN(int value, int width);
+
   // Formats an int value as "%X".
   static std::string FormatHexInt(int value);
 
+  // Formats an int value as "%X".
+  static std::string FormatHexUInt32(uint32_t value);
+
   // Formats a byte as "%02X".
   static std::string FormatByte(unsigned char value);
 
diff --git a/deps/boringssl/src/third_party/googletest/include/gtest/internal/gtest-type-util.h b/deps/boringssl/src/third_party/googletest/include/gtest/internal/gtest-type-util.h
index 4cd1cf3..c3326f2 100644
--- a/deps/boringssl/src/third_party/googletest/include/gtest/internal/gtest-type-util.h
+++ b/deps/boringssl/src/third_party/googletest/include/gtest/internal/gtest-type-util.h
@@ -1,7 +1,3 @@
-// This file was GENERATED by command:
-//     pump.py gtest-type-util.h.pump
-// DO NOT EDIT BY HAND!!!
-
 // Copyright 2008 Google Inc.
 // All Rights Reserved.
 //
@@ -32,12 +28,7 @@
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 // Type utilities needed for implementing typed and type-parameterized
-// tests.  This file is generated by a SCRIPT.  DO NOT EDIT BY HAND!
-//
-// Currently we support at most 50 types in a list, and at most 50
-// type-parameterized tests in one type-parameterized test suite.
-// Please contact googletestframework@googlegroups.com if you need
-// more.
+// tests.
 
 // GOOGLETEST_CM0001 DO NOT DELETE
 
@@ -73,1568 +64,45 @@
   return s;
 }
 
-// GetTypeName<T>() returns a human-readable name of type T.
-// NB: This function is also used in Google Mock, so don't move it inside of
-// the typed-test-only section below.
-template <typename T>
-std::string GetTypeName() {
-# if GTEST_HAS_RTTI
-
-  const char* const name = typeid(T).name();
-#  if GTEST_HAS_CXXABI_H_ || defined(__HP_aCC)
+#if GTEST_HAS_RTTI
+// GetTypeName(const std::type_info&) returns a human-readable name of type T.
+inline std::string GetTypeName(const std::type_info& type) {
+  const char* const name = type.name();
+#if GTEST_HAS_CXXABI_H_ || defined(__HP_aCC)
   int status = 0;
   // gcc's implementation of typeid(T).name() mangles the type name,
   // so we have to demangle it.
-#   if GTEST_HAS_CXXABI_H_
+#if GTEST_HAS_CXXABI_H_
   using abi::__cxa_demangle;
-#   endif  // GTEST_HAS_CXXABI_H_
+#endif  // GTEST_HAS_CXXABI_H_
   char* const readable_name = __cxa_demangle(name, nullptr, nullptr, &status);
   const std::string name_str(status == 0 ? readable_name : name);
   free(readable_name);
   return CanonicalizeForStdLibVersioning(name_str);
-#  else
+#else
   return name;
-#  endif  // GTEST_HAS_CXXABI_H_ || __HP_aCC
+#endif  // GTEST_HAS_CXXABI_H_ || __HP_aCC
+}
+#endif  // GTEST_HAS_RTTI
 
-# else
-
+// GetTypeName<T>() returns a human-readable name of type T if and only if
+// RTTI is enabled, otherwise it returns a dummy type name.
+// NB: This function is also used in Google Mock, so don't move it inside of
+// the typed-test-only section below.
+template <typename T>
+std::string GetTypeName() {
+#if GTEST_HAS_RTTI
+  return GetTypeName(typeid(T));
+#else
   return "<type>";
-
-# endif  // GTEST_HAS_RTTI
+#endif  // GTEST_HAS_RTTI
 }
 
 #if GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P
 
-// AssertyTypeEq<T1, T2>::type is defined iff T1 and T2 are the same
-// type.  This can be used as a compile-time assertion to ensure that
-// two types are equal.
-
-template <typename T1, typename T2>
-struct AssertTypeEq;
-
-template <typename T>
-struct AssertTypeEq<T, T> {
-  typedef bool type;
-};
-
-// A unique type used as the default value for the arguments of class
-// template Types.  This allows us to simulate variadic templates
-// (e.g. Types<int>, Type<int, double>, and etc), which C++ doesn't
-// support directly.
+// A unique type indicating an empty node
 struct None {};
 
-// The following family of struct and struct templates are used to
-// represent type lists.  In particular, TypesN<T1, T2, ..., TN>
-// represents a type list with N types (T1, T2, ..., and TN) in it.
-// Except for Types0, every struct in the family has two member types:
-// Head for the first type in the list, and Tail for the rest of the
-// list.
-
-// The empty type list.
-struct Types0 {};
-
-// Type lists of length 1, 2, 3, and so on.
-
-template <typename T1>
-struct Types1 {
-  typedef T1 Head;
-  typedef Types0 Tail;
-};
-template <typename T1, typename T2>
-struct Types2 {
-  typedef T1 Head;
-  typedef Types1<T2> Tail;
-};
-
-template <typename T1, typename T2, typename T3>
-struct Types3 {
-  typedef T1 Head;
-  typedef Types2<T2, T3> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4>
-struct Types4 {
-  typedef T1 Head;
-  typedef Types3<T2, T3, T4> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5>
-struct Types5 {
-  typedef T1 Head;
-  typedef Types4<T2, T3, T4, T5> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6>
-struct Types6 {
-  typedef T1 Head;
-  typedef Types5<T2, T3, T4, T5, T6> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7>
-struct Types7 {
-  typedef T1 Head;
-  typedef Types6<T2, T3, T4, T5, T6, T7> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8>
-struct Types8 {
-  typedef T1 Head;
-  typedef Types7<T2, T3, T4, T5, T6, T7, T8> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9>
-struct Types9 {
-  typedef T1 Head;
-  typedef Types8<T2, T3, T4, T5, T6, T7, T8, T9> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10>
-struct Types10 {
-  typedef T1 Head;
-  typedef Types9<T2, T3, T4, T5, T6, T7, T8, T9, T10> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11>
-struct Types11 {
-  typedef T1 Head;
-  typedef Types10<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12>
-struct Types12 {
-  typedef T1 Head;
-  typedef Types11<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13>
-struct Types13 {
-  typedef T1 Head;
-  typedef Types12<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14>
-struct Types14 {
-  typedef T1 Head;
-  typedef Types13<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15>
-struct Types15 {
-  typedef T1 Head;
-  typedef Types14<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16>
-struct Types16 {
-  typedef T1 Head;
-  typedef Types15<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17>
-struct Types17 {
-  typedef T1 Head;
-  typedef Types16<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18>
-struct Types18 {
-  typedef T1 Head;
-  typedef Types17<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19>
-struct Types19 {
-  typedef T1 Head;
-  typedef Types18<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20>
-struct Types20 {
-  typedef T1 Head;
-  typedef Types19<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21>
-struct Types21 {
-  typedef T1 Head;
-  typedef Types20<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22>
-struct Types22 {
-  typedef T1 Head;
-  typedef Types21<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23>
-struct Types23 {
-  typedef T1 Head;
-  typedef Types22<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24>
-struct Types24 {
-  typedef T1 Head;
-  typedef Types23<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25>
-struct Types25 {
-  typedef T1 Head;
-  typedef Types24<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26>
-struct Types26 {
-  typedef T1 Head;
-  typedef Types25<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27>
-struct Types27 {
-  typedef T1 Head;
-  typedef Types26<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28>
-struct Types28 {
-  typedef T1 Head;
-  typedef Types27<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29>
-struct Types29 {
-  typedef T1 Head;
-  typedef Types28<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30>
-struct Types30 {
-  typedef T1 Head;
-  typedef Types29<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-      T30> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31>
-struct Types31 {
-  typedef T1 Head;
-  typedef Types30<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-      T30, T31> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32>
-struct Types32 {
-  typedef T1 Head;
-  typedef Types31<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-      T30, T31, T32> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33>
-struct Types33 {
-  typedef T1 Head;
-  typedef Types32<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-      T30, T31, T32, T33> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34>
-struct Types34 {
-  typedef T1 Head;
-  typedef Types33<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-      T30, T31, T32, T33, T34> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35>
-struct Types35 {
-  typedef T1 Head;
-  typedef Types34<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-      T30, T31, T32, T33, T34, T35> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36>
-struct Types36 {
-  typedef T1 Head;
-  typedef Types35<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-      T30, T31, T32, T33, T34, T35, T36> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37>
-struct Types37 {
-  typedef T1 Head;
-  typedef Types36<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-      T30, T31, T32, T33, T34, T35, T36, T37> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38>
-struct Types38 {
-  typedef T1 Head;
-  typedef Types37<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-      T30, T31, T32, T33, T34, T35, T36, T37, T38> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39>
-struct Types39 {
-  typedef T1 Head;
-  typedef Types38<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40>
-struct Types40 {
-  typedef T1 Head;
-  typedef Types39<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41>
-struct Types41 {
-  typedef T1 Head;
-  typedef Types40<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42>
-struct Types42 {
-  typedef T1 Head;
-  typedef Types41<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43>
-struct Types43 {
-  typedef T1 Head;
-  typedef Types42<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
-      T43> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43, typename T44>
-struct Types44 {
-  typedef T1 Head;
-  typedef Types43<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
-      T44> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43, typename T44, typename T45>
-struct Types45 {
-  typedef T1 Head;
-  typedef Types44<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
-      T44, T45> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43, typename T44, typename T45,
-    typename T46>
-struct Types46 {
-  typedef T1 Head;
-  typedef Types45<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
-      T44, T45, T46> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43, typename T44, typename T45,
-    typename T46, typename T47>
-struct Types47 {
-  typedef T1 Head;
-  typedef Types46<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
-      T44, T45, T46, T47> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43, typename T44, typename T45,
-    typename T46, typename T47, typename T48>
-struct Types48 {
-  typedef T1 Head;
-  typedef Types47<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
-      T44, T45, T46, T47, T48> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43, typename T44, typename T45,
-    typename T46, typename T47, typename T48, typename T49>
-struct Types49 {
-  typedef T1 Head;
-  typedef Types48<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
-      T44, T45, T46, T47, T48, T49> Tail;
-};
-
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43, typename T44, typename T45,
-    typename T46, typename T47, typename T48, typename T49, typename T50>
-struct Types50 {
-  typedef T1 Head;
-  typedef Types49<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-      T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-      T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
-      T44, T45, T46, T47, T48, T49, T50> Tail;
-};
-
-
-}  // namespace internal
-
-// We don't want to require the users to write TypesN<...> directly,
-// as that would require them to count the length.  Types<...> is much
-// easier to write, but generates horrible messages when there is a
-// compiler error, as gcc insists on printing out each template
-// argument, even if it has the default value (this means Types<int>
-// will appear as Types<int, None, None, ..., None> in the compiler
-// errors).
-//
-// Our solution is to combine the best part of the two approaches: a
-// user would write Types<T1, ..., TN>, and Google Test will translate
-// that to TypesN<T1, ..., TN> internally to make error messages
-// readable.  The translation is done by the 'type' member of the
-// Types template.
-template <typename T1 = internal::None, typename T2 = internal::None,
-    typename T3 = internal::None, typename T4 = internal::None,
-    typename T5 = internal::None, typename T6 = internal::None,
-    typename T7 = internal::None, typename T8 = internal::None,
-    typename T9 = internal::None, typename T10 = internal::None,
-    typename T11 = internal::None, typename T12 = internal::None,
-    typename T13 = internal::None, typename T14 = internal::None,
-    typename T15 = internal::None, typename T16 = internal::None,
-    typename T17 = internal::None, typename T18 = internal::None,
-    typename T19 = internal::None, typename T20 = internal::None,
-    typename T21 = internal::None, typename T22 = internal::None,
-    typename T23 = internal::None, typename T24 = internal::None,
-    typename T25 = internal::None, typename T26 = internal::None,
-    typename T27 = internal::None, typename T28 = internal::None,
-    typename T29 = internal::None, typename T30 = internal::None,
-    typename T31 = internal::None, typename T32 = internal::None,
-    typename T33 = internal::None, typename T34 = internal::None,
-    typename T35 = internal::None, typename T36 = internal::None,
-    typename T37 = internal::None, typename T38 = internal::None,
-    typename T39 = internal::None, typename T40 = internal::None,
-    typename T41 = internal::None, typename T42 = internal::None,
-    typename T43 = internal::None, typename T44 = internal::None,
-    typename T45 = internal::None, typename T46 = internal::None,
-    typename T47 = internal::None, typename T48 = internal::None,
-    typename T49 = internal::None, typename T50 = internal::None>
-struct Types {
-  typedef internal::Types50<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
-      T41, T42, T43, T44, T45, T46, T47, T48, T49, T50> type;
-};
-
-template <>
-struct Types<internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None> {
-  typedef internal::Types0 type;
-};
-template <typename T1>
-struct Types<T1, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None> {
-  typedef internal::Types1<T1> type;
-};
-template <typename T1, typename T2>
-struct Types<T1, T2, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None> {
-  typedef internal::Types2<T1, T2> type;
-};
-template <typename T1, typename T2, typename T3>
-struct Types<T1, T2, T3, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None> {
-  typedef internal::Types3<T1, T2, T3> type;
-};
-template <typename T1, typename T2, typename T3, typename T4>
-struct Types<T1, T2, T3, T4, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None> {
-  typedef internal::Types4<T1, T2, T3, T4> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5>
-struct Types<T1, T2, T3, T4, T5, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None> {
-  typedef internal::Types5<T1, T2, T3, T4, T5> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6>
-struct Types<T1, T2, T3, T4, T5, T6, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None> {
-  typedef internal::Types6<T1, T2, T3, T4, T5, T6> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7>
-struct Types<T1, T2, T3, T4, T5, T6, T7, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None> {
-  typedef internal::Types7<T1, T2, T3, T4, T5, T6, T7> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None> {
-  typedef internal::Types8<T1, T2, T3, T4, T5, T6, T7, T8> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None> {
-  typedef internal::Types9<T1, T2, T3, T4, T5, T6, T7, T8, T9> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None> {
-  typedef internal::Types10<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None> {
-  typedef internal::Types11<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None> {
-  typedef internal::Types12<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11,
-      T12> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None> {
-  typedef internal::Types13<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None> {
-  typedef internal::Types14<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None> {
-  typedef internal::Types15<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None> {
-  typedef internal::Types16<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None> {
-  typedef internal::Types17<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None> {
-  typedef internal::Types18<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None> {
-  typedef internal::Types19<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None> {
-  typedef internal::Types20<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None> {
-  typedef internal::Types21<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None> {
-  typedef internal::Types22<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None> {
-  typedef internal::Types23<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None> {
-  typedef internal::Types24<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None> {
-  typedef internal::Types25<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None> {
-  typedef internal::Types26<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25,
-      T26> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None> {
-  typedef internal::Types27<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None> {
-  typedef internal::Types28<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None> {
-  typedef internal::Types29<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None> {
-  typedef internal::Types30<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29, T30> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
-    T31, internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None> {
-  typedef internal::Types31<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29, T30, T31> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
-    T31, T32, internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None> {
-  typedef internal::Types32<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29, T30, T31, T32> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
-    T31, T32, T33, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None> {
-  typedef internal::Types33<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29, T30, T31, T32, T33> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
-    T31, T32, T33, T34, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None> {
-  typedef internal::Types34<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29, T30, T31, T32, T33, T34> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
-    T31, T32, T33, T34, T35, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None> {
-  typedef internal::Types35<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29, T30, T31, T32, T33, T34, T35> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
-    T31, T32, T33, T34, T35, T36, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None> {
-  typedef internal::Types36<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
-    T31, T32, T33, T34, T35, T36, T37, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None> {
-  typedef internal::Types37<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
-    T31, T32, T33, T34, T35, T36, T37, T38, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None> {
-  typedef internal::Types38<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
-    T31, T32, T33, T34, T35, T36, T37, T38, T39, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None> {
-  typedef internal::Types39<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
-    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None> {
-  typedef internal::Types40<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39,
-      T40> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
-    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None, internal::None> {
-  typedef internal::Types41<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
-      T41> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
-    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, internal::None,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None> {
-  typedef internal::Types42<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
-      T41, T42> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
-    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None, internal::None> {
-  typedef internal::Types43<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
-      T41, T42, T43> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43, typename T44>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
-    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None, internal::None> {
-  typedef internal::Types44<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
-      T41, T42, T43, T44> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43, typename T44, typename T45>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
-    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45,
-    internal::None, internal::None, internal::None, internal::None,
-    internal::None> {
-  typedef internal::Types45<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
-      T41, T42, T43, T44, T45> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43, typename T44, typename T45,
-    typename T46>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
-    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45,
-    T46, internal::None, internal::None, internal::None, internal::None> {
-  typedef internal::Types46<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
-      T41, T42, T43, T44, T45, T46> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43, typename T44, typename T45,
-    typename T46, typename T47>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
-    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45,
-    T46, T47, internal::None, internal::None, internal::None> {
-  typedef internal::Types47<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
-      T41, T42, T43, T44, T45, T46, T47> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43, typename T44, typename T45,
-    typename T46, typename T47, typename T48>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
-    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45,
-    T46, T47, T48, internal::None, internal::None> {
-  typedef internal::Types48<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
-      T41, T42, T43, T44, T45, T46, T47, T48> type;
-};
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43, typename T44, typename T45,
-    typename T46, typename T47, typename T48, typename T49>
-struct Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14, T15,
-    T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29, T30,
-    T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44, T45,
-    T46, T47, T48, T49, internal::None> {
-  typedef internal::Types49<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
-      T41, T42, T43, T44, T45, T46, T47, T48, T49> type;
-};
-
-namespace internal {
-
 # define GTEST_TEMPLATE_ template <typename T> class
 
 // The template "selector" struct TemplateSel<Tmpl> is used to
@@ -1656,1692 +124,64 @@
 # define GTEST_BIND_(TmplSel, T) \
   TmplSel::template Bind<T>::type
 
-// A unique struct template used as the default value for the
-// arguments of class template Templates.  This allows us to simulate
-// variadic templates (e.g. Templates<int>, Templates<int, double>,
-// and etc), which C++ doesn't support directly.
-template <typename T>
-struct NoneT {};
-
-// The following family of struct and struct templates are used to
-// represent template lists.  In particular, TemplatesN<T1, T2, ...,
-// TN> represents a list of N templates (T1, T2, ..., and TN).  Except
-// for Templates0, every struct in the family has two member types:
-// Head for the selector of the first template in the list, and Tail
-// for the rest of the list.
-
-// The empty template list.
-struct Templates0 {};
-
-// Template lists of length 1, 2, 3, and so on.
-
-template <GTEST_TEMPLATE_ T1>
-struct Templates1 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates0 Tail;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2>
-struct Templates2 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates1<T2> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3>
-struct Templates3 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates2<T2, T3> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4>
-struct Templates4 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates3<T2, T3, T4> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5>
-struct Templates5 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates4<T2, T3, T4, T5> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6>
-struct Templates6 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates5<T2, T3, T4, T5, T6> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7>
-struct Templates7 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates6<T2, T3, T4, T5, T6, T7> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8>
-struct Templates8 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates7<T2, T3, T4, T5, T6, T7, T8> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9>
-struct Templates9 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates8<T2, T3, T4, T5, T6, T7, T8, T9> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10>
-struct Templates10 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates9<T2, T3, T4, T5, T6, T7, T8, T9, T10> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11>
-struct Templates11 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates10<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12>
-struct Templates12 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates11<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13>
-struct Templates13 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates12<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14>
-struct Templates14 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates13<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15>
-struct Templates15 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates14<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16>
-struct Templates16 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates15<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17>
-struct Templates17 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates16<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18>
-struct Templates18 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates17<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19>
-struct Templates19 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates18<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20>
-struct Templates20 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates19<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21>
-struct Templates21 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates20<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22>
-struct Templates22 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates21<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23>
-struct Templates23 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates22<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24>
-struct Templates24 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates23<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25>
-struct Templates25 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates24<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26>
-struct Templates26 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates25<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27>
-struct Templates27 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates26<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28>
-struct Templates28 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates27<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29>
-struct Templates29 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates28<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30>
-struct Templates30 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates29<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29, T30> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31>
-struct Templates31 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates30<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29, T30, T31> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32>
-struct Templates32 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates31<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29, T30, T31, T32> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33>
-struct Templates33 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates32<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29, T30, T31, T32, T33> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34>
-struct Templates34 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates33<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29, T30, T31, T32, T33, T34> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35>
-struct Templates35 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates34<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29, T30, T31, T32, T33, T34, T35> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36>
-struct Templates36 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates35<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29, T30, T31, T32, T33, T34, T35, T36> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37>
-struct Templates37 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates36<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29, T30, T31, T32, T33, T34, T35, T36, T37> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38>
-struct Templates38 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates37<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39>
-struct Templates39 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates38<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
-    GTEST_TEMPLATE_ T40>
-struct Templates40 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates39<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
-    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41>
-struct Templates41 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates40<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
-    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42>
-struct Templates42 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates41<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
-      T42> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
-    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
-    GTEST_TEMPLATE_ T43>
-struct Templates43 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates42<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
-      T43> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
-    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
-    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44>
-struct Templates44 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates43<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
-      T43, T44> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
-    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
-    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45>
-struct Templates45 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates44<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
-      T43, T44, T45> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
-    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
-    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
-    GTEST_TEMPLATE_ T46>
-struct Templates46 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates45<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
-      T43, T44, T45, T46> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
-    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
-    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
-    GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47>
-struct Templates47 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates46<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
-      T43, T44, T45, T46, T47> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
-    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
-    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
-    GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48>
-struct Templates48 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates47<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
-      T43, T44, T45, T46, T47, T48> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
-    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
-    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
-    GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48,
-    GTEST_TEMPLATE_ T49>
-struct Templates49 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates48<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
-      T43, T44, T45, T46, T47, T48, T49> Tail;
-};
-
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
-    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
-    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
-    GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48,
-    GTEST_TEMPLATE_ T49, GTEST_TEMPLATE_ T50>
-struct Templates50 {
-  typedef TemplateSel<T1> Head;
-  typedef Templates49<T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-      T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-      T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42,
-      T43, T44, T45, T46, T47, T48, T49, T50> Tail;
-};
-
-
-// We don't want to require the users to write TemplatesN<...> directly,
-// as that would require them to count the length.  Templates<...> is much
-// easier to write, but generates horrible messages when there is a
-// compiler error, as gcc insists on printing out each template
-// argument, even if it has the default value (this means Templates<list>
-// will appear as Templates<list, NoneT, NoneT, ..., NoneT> in the compiler
-// errors).
-//
-// Our solution is to combine the best part of the two approaches: a
-// user would write Templates<T1, ..., TN>, and Google Test will translate
-// that to TemplatesN<T1, ..., TN> internally to make error messages
-// readable.  The translation is done by the 'type' member of the
-// Templates template.
-template <GTEST_TEMPLATE_ T1 = NoneT, GTEST_TEMPLATE_ T2 = NoneT,
-    GTEST_TEMPLATE_ T3 = NoneT, GTEST_TEMPLATE_ T4 = NoneT,
-    GTEST_TEMPLATE_ T5 = NoneT, GTEST_TEMPLATE_ T6 = NoneT,
-    GTEST_TEMPLATE_ T7 = NoneT, GTEST_TEMPLATE_ T8 = NoneT,
-    GTEST_TEMPLATE_ T9 = NoneT, GTEST_TEMPLATE_ T10 = NoneT,
-    GTEST_TEMPLATE_ T11 = NoneT, GTEST_TEMPLATE_ T12 = NoneT,
-    GTEST_TEMPLATE_ T13 = NoneT, GTEST_TEMPLATE_ T14 = NoneT,
-    GTEST_TEMPLATE_ T15 = NoneT, GTEST_TEMPLATE_ T16 = NoneT,
-    GTEST_TEMPLATE_ T17 = NoneT, GTEST_TEMPLATE_ T18 = NoneT,
-    GTEST_TEMPLATE_ T19 = NoneT, GTEST_TEMPLATE_ T20 = NoneT,
-    GTEST_TEMPLATE_ T21 = NoneT, GTEST_TEMPLATE_ T22 = NoneT,
-    GTEST_TEMPLATE_ T23 = NoneT, GTEST_TEMPLATE_ T24 = NoneT,
-    GTEST_TEMPLATE_ T25 = NoneT, GTEST_TEMPLATE_ T26 = NoneT,
-    GTEST_TEMPLATE_ T27 = NoneT, GTEST_TEMPLATE_ T28 = NoneT,
-    GTEST_TEMPLATE_ T29 = NoneT, GTEST_TEMPLATE_ T30 = NoneT,
-    GTEST_TEMPLATE_ T31 = NoneT, GTEST_TEMPLATE_ T32 = NoneT,
-    GTEST_TEMPLATE_ T33 = NoneT, GTEST_TEMPLATE_ T34 = NoneT,
-    GTEST_TEMPLATE_ T35 = NoneT, GTEST_TEMPLATE_ T36 = NoneT,
-    GTEST_TEMPLATE_ T37 = NoneT, GTEST_TEMPLATE_ T38 = NoneT,
-    GTEST_TEMPLATE_ T39 = NoneT, GTEST_TEMPLATE_ T40 = NoneT,
-    GTEST_TEMPLATE_ T41 = NoneT, GTEST_TEMPLATE_ T42 = NoneT,
-    GTEST_TEMPLATE_ T43 = NoneT, GTEST_TEMPLATE_ T44 = NoneT,
-    GTEST_TEMPLATE_ T45 = NoneT, GTEST_TEMPLATE_ T46 = NoneT,
-    GTEST_TEMPLATE_ T47 = NoneT, GTEST_TEMPLATE_ T48 = NoneT,
-    GTEST_TEMPLATE_ T49 = NoneT, GTEST_TEMPLATE_ T50 = NoneT>
+template <GTEST_TEMPLATE_ Head_, GTEST_TEMPLATE_... Tail_>
 struct Templates {
-  typedef Templates50<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
-      T42, T43, T44, T45, T46, T47, T48, T49, T50> type;
+  using Head = TemplateSel<Head_>;
+  using Tail = Templates<Tail_...>;
 };
 
-template <>
-struct Templates<NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT> {
-  typedef Templates0 type;
-};
-template <GTEST_TEMPLATE_ T1>
-struct Templates<T1, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT> {
-  typedef Templates1<T1> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2>
-struct Templates<T1, T2, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT> {
-  typedef Templates2<T1, T2> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3>
-struct Templates<T1, T2, T3, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates3<T1, T2, T3> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4>
-struct Templates<T1, T2, T3, T4, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates4<T1, T2, T3, T4> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5>
-struct Templates<T1, T2, T3, T4, T5, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates5<T1, T2, T3, T4, T5> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6>
-struct Templates<T1, T2, T3, T4, T5, T6, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates6<T1, T2, T3, T4, T5, T6> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates7<T1, T2, T3, T4, T5, T6, T7> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates8<T1, T2, T3, T4, T5, T6, T7, T8> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates9<T1, T2, T3, T4, T5, T6, T7, T8, T9> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates10<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates11<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates12<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates13<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates14<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates15<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates16<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates17<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates18<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates19<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates20<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates21<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT> {
-  typedef Templates22<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT> {
-  typedef Templates23<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT> {
-  typedef Templates24<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT> {
-  typedef Templates25<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT> {
-  typedef Templates26<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT> {
-  typedef Templates27<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT> {
-  typedef Templates28<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT> {
-  typedef Templates29<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28, T29> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-    T30, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates30<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28, T29, T30> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-    T30, T31, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates31<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28, T29, T30, T31> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-    T30, T31, T32, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates32<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28, T29, T30, T31, T32> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-    T30, T31, T32, T33, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates33<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28, T29, T30, T31, T32, T33> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-    T30, T31, T32, T33, T34, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates34<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28, T29, T30, T31, T32, T33, T34> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-    T30, T31, T32, T33, T34, T35, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates35<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28, T29, T30, T31, T32, T33, T34, T35> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-    T30, T31, T32, T33, T34, T35, T36, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates36<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28, T29, T30, T31, T32, T33, T34, T35, T36> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-    T30, T31, T32, T33, T34, T35, T36, T37, NoneT, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates37<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-    T30, T31, T32, T33, T34, T35, T36, T37, T38, NoneT, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates38<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates39<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
-    GTEST_TEMPLATE_ T40>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, NoneT, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates40<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
-    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, NoneT, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates41<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
-      T41> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
-    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, NoneT,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates42<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
-      T42> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
-    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
-    GTEST_TEMPLATE_ T43>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates43<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
-      T42, T43> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
-    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
-    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
-    NoneT, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates44<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
-      T42, T43, T44> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
-    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
-    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
-    T45, NoneT, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates45<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
-      T42, T43, T44, T45> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
-    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
-    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
-    GTEST_TEMPLATE_ T46>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
-    T45, T46, NoneT, NoneT, NoneT, NoneT> {
-  typedef Templates46<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
-      T42, T43, T44, T45, T46> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
-    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
-    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
-    GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
-    T45, T46, T47, NoneT, NoneT, NoneT> {
-  typedef Templates47<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
-      T42, T43, T44, T45, T46, T47> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
-    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
-    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
-    GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
-    T45, T46, T47, T48, NoneT, NoneT> {
-  typedef Templates48<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
-      T42, T43, T44, T45, T46, T47, T48> type;
-};
-template <GTEST_TEMPLATE_ T1, GTEST_TEMPLATE_ T2, GTEST_TEMPLATE_ T3,
-    GTEST_TEMPLATE_ T4, GTEST_TEMPLATE_ T5, GTEST_TEMPLATE_ T6,
-    GTEST_TEMPLATE_ T7, GTEST_TEMPLATE_ T8, GTEST_TEMPLATE_ T9,
-    GTEST_TEMPLATE_ T10, GTEST_TEMPLATE_ T11, GTEST_TEMPLATE_ T12,
-    GTEST_TEMPLATE_ T13, GTEST_TEMPLATE_ T14, GTEST_TEMPLATE_ T15,
-    GTEST_TEMPLATE_ T16, GTEST_TEMPLATE_ T17, GTEST_TEMPLATE_ T18,
-    GTEST_TEMPLATE_ T19, GTEST_TEMPLATE_ T20, GTEST_TEMPLATE_ T21,
-    GTEST_TEMPLATE_ T22, GTEST_TEMPLATE_ T23, GTEST_TEMPLATE_ T24,
-    GTEST_TEMPLATE_ T25, GTEST_TEMPLATE_ T26, GTEST_TEMPLATE_ T27,
-    GTEST_TEMPLATE_ T28, GTEST_TEMPLATE_ T29, GTEST_TEMPLATE_ T30,
-    GTEST_TEMPLATE_ T31, GTEST_TEMPLATE_ T32, GTEST_TEMPLATE_ T33,
-    GTEST_TEMPLATE_ T34, GTEST_TEMPLATE_ T35, GTEST_TEMPLATE_ T36,
-    GTEST_TEMPLATE_ T37, GTEST_TEMPLATE_ T38, GTEST_TEMPLATE_ T39,
-    GTEST_TEMPLATE_ T40, GTEST_TEMPLATE_ T41, GTEST_TEMPLATE_ T42,
-    GTEST_TEMPLATE_ T43, GTEST_TEMPLATE_ T44, GTEST_TEMPLATE_ T45,
-    GTEST_TEMPLATE_ T46, GTEST_TEMPLATE_ T47, GTEST_TEMPLATE_ T48,
-    GTEST_TEMPLATE_ T49>
-struct Templates<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13, T14,
-    T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28, T29,
-    T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43, T44,
-    T45, T46, T47, T48, T49, NoneT> {
-  typedef Templates49<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-      T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27,
-      T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41,
-      T42, T43, T44, T45, T46, T47, T48, T49> type;
+template <GTEST_TEMPLATE_ Head_>
+struct Templates<Head_> {
+  using Head = TemplateSel<Head_>;
+  using Tail = None;
 };
 
-// The TypeList template makes it possible to use either a single type
-// or a Types<...> list in TYPED_TEST_SUITE() and
-// INSTANTIATE_TYPED_TEST_SUITE_P().
+// Tuple-like type lists
+template <typename Head_, typename... Tail_>
+struct Types {
+  using Head = Head_;
+  using Tail = Types<Tail_...>;
+};
 
+template <typename Head_>
+struct Types<Head_> {
+  using Head = Head_;
+  using Tail = None;
+};
+
+// Helper metafunctions to tell apart a single type from types
+// generated by ::testing::Types
+template <typename... Ts>
+struct ProxyTypeList {
+  using type = Types<Ts...>;
+};
+
+template <typename>
+struct is_proxy_type_list : std::false_type {};
+
+template <typename... Ts>
+struct is_proxy_type_list<ProxyTypeList<Ts...>> : std::true_type {};
+
+// Generator which conditionally creates type lists.
+// It recognizes if a requested type list should be created
+// and prevents creating a new type list nested within another one.
 template <typename T>
-struct TypeList {
-  typedef Types1<T> type;
-};
+struct GenerateTypeList {
+ private:
+  using proxy = typename std::conditional<is_proxy_type_list<T>::value, T,
+                                          ProxyTypeList<T>>::type;
 
-template <typename T1, typename T2, typename T3, typename T4, typename T5,
-    typename T6, typename T7, typename T8, typename T9, typename T10,
-    typename T11, typename T12, typename T13, typename T14, typename T15,
-    typename T16, typename T17, typename T18, typename T19, typename T20,
-    typename T21, typename T22, typename T23, typename T24, typename T25,
-    typename T26, typename T27, typename T28, typename T29, typename T30,
-    typename T31, typename T32, typename T33, typename T34, typename T35,
-    typename T36, typename T37, typename T38, typename T39, typename T40,
-    typename T41, typename T42, typename T43, typename T44, typename T45,
-    typename T46, typename T47, typename T48, typename T49, typename T50>
-struct TypeList<Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12, T13,
-    T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26, T27, T28,
-    T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40, T41, T42, T43,
-    T44, T45, T46, T47, T48, T49, T50> > {
-  typedef typename Types<T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12,
-      T13, T14, T15, T16, T17, T18, T19, T20, T21, T22, T23, T24, T25, T26,
-      T27, T28, T29, T30, T31, T32, T33, T34, T35, T36, T37, T38, T39, T40,
-      T41, T42, T43, T44, T45, T46, T47, T48, T49, T50>::type type;
+ public:
+  using type = typename proxy::type;
 };
 
 #endif  // GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P
 
 }  // namespace internal
+
+template <typename... Ts>
+using Types = internal::ProxyTypeList<Ts...>;
+
 }  // namespace testing
 
 #endif  // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_
diff --git a/deps/boringssl/src/third_party/googletest/samples/prime_tables.h b/deps/boringssl/src/third_party/googletest/samples/prime_tables.h
index 119545a..34002f3 100644
--- a/deps/boringssl/src/third_party/googletest/samples/prime_tables.h
+++ b/deps/boringssl/src/third_party/googletest/samples/prime_tables.h
@@ -43,7 +43,7 @@
  public:
   virtual ~PrimeTable() {}
 
-  // Returns true iff n is a prime number.
+  // Returns true if and only if n is a prime number.
   virtual bool IsPrime(int n) const = 0;
 
   // Returns the smallest prime number greater than p; or returns -1
@@ -66,11 +66,11 @@
   }
 
   int GetNextPrime(int p) const override {
-    for (int n = p + 1; n > 0; n++) {
+    if (p < 0) return -1;
+
+    for (int n = p + 1;; n++) {
       if (IsPrime(n)) return n;
     }
-
-    return -1;
   }
 };
 
diff --git a/deps/boringssl/src/third_party/googletest/samples/sample1.cc b/deps/boringssl/src/third_party/googletest/samples/sample1.cc
index 13cec1d..1d42759 100644
--- a/deps/boringssl/src/third_party/googletest/samples/sample1.cc
+++ b/deps/boringssl/src/third_party/googletest/samples/sample1.cc
@@ -41,7 +41,7 @@
   return result;
 }
 
-// Returns true iff n is a prime number.
+// Returns true if and only if n is a prime number.
 bool IsPrime(int n) {
   // Trivial case 1: small numbers
   if (n <= 1) return false;
diff --git a/deps/boringssl/src/third_party/googletest/samples/sample1.h b/deps/boringssl/src/third_party/googletest/samples/sample1.h
index 2c3e9f0..12e49de 100644
--- a/deps/boringssl/src/third_party/googletest/samples/sample1.h
+++ b/deps/boringssl/src/third_party/googletest/samples/sample1.h
@@ -35,7 +35,7 @@
 // Returns n! (the factorial of n).  For negative n, n! is defined to be 1.
 int Factorial(int n);
 
-// Returns true iff n is a prime number.
+// Returns true if and only if n is a prime number.
 bool IsPrime(int n);
 
 #endif  // GTEST_SAMPLES_SAMPLE1_H_
diff --git a/deps/boringssl/src/third_party/googletest/samples/sample9_unittest.cc b/deps/boringssl/src/third_party/googletest/samples/sample9_unittest.cc
index c0d8ff2..e502d08 100644
--- a/deps/boringssl/src/third_party/googletest/samples/sample9_unittest.cc
+++ b/deps/boringssl/src/third_party/googletest/samples/sample9_unittest.cc
@@ -135,10 +135,10 @@
   // This is an example of using the UnitTest reflection API to inspect test
   // results. Here we discount failures from the tests we expected to fail.
   int unexpectedly_failed_tests = 0;
-  for (int i = 0; i < unit_test.total_test_case_count(); ++i) {
-    const TestCase& test_case = *unit_test.GetTestCase(i);
-    for (int j = 0; j < test_case.total_test_count(); ++j) {
-      const TestInfo& test_info = *test_case.GetTestInfo(j);
+  for (int i = 0; i < unit_test.total_test_suite_count(); ++i) {
+    const testing::TestSuite& test_suite = *unit_test.GetTestSuite(i);
+    for (int j = 0; j < test_suite.total_test_count(); ++j) {
+      const TestInfo& test_info = *test_suite.GetTestInfo(j);
       // Counts failed tests that were not meant to fail (those without
       // 'Fails' in the name).
       if (test_info.result()->Failed() &&
diff --git a/deps/boringssl/src/third_party/googletest/src/gtest-death-test.cc b/deps/boringssl/src/third_party/googletest/src/gtest-death-test.cc
index a78ab21..ecc47d1 100644
--- a/deps/boringssl/src/third_party/googletest/src/gtest-death-test.cc
+++ b/deps/boringssl/src/third_party/googletest/src/gtest-death-test.cc
@@ -32,6 +32,7 @@
 
 #include "gtest/gtest-death-test.h"
 
+#include <functional>
 #include <utility>
 
 #include "gtest/internal/gtest-port.h"
@@ -68,6 +69,7 @@
 #  include <lib/fdio/fd.h>
 #  include <lib/fdio/io.h>
 #  include <lib/fdio/spawn.h>
+#  include <lib/zx/channel.h>
 #  include <lib/zx/port.h>
 #  include <lib/zx/process.h>
 #  include <lib/zx/socket.h>
@@ -121,8 +123,8 @@
     "Indicates the file, line number, temporal index of "
     "the single death test to run, and a file descriptor to "
     "which a success code may be sent, all separated by "
-    "the '|' characters.  This flag is specified if and only if the current "
-    "process is a sub-process launched for running a thread-safe "
+    "the '|' characters.  This flag is specified if and only if the "
+    "current process is a sub-process launched for running a thread-safe "
     "death test.  FOR INTERNAL USE ONLY.");
 }  // namespace internal
 
@@ -562,8 +564,8 @@
 //   status_ok: true if exit_status is acceptable in the context of
 //              this particular death test, which fails if it is false
 //
-// Returns true iff all of the above conditions are met.  Otherwise, the
-// first failing condition, in the order given above, is the one that is
+// Returns true if and only if all of the above conditions are met.  Otherwise,
+// the first failing condition, in the order given above, is the one that is
 // reported. Also sets the last death test message string.
 bool DeathTestImpl::Passed(bool status_ok) {
   if (!spawned())
@@ -831,7 +833,7 @@
   std::string captured_stderr_;
 
   zx::process child_process_;
-  zx::port port_;
+  zx::channel exception_channel_;
   zx::socket stderr_socket_;
 };
 
@@ -876,43 +878,52 @@
 int FuchsiaDeathTest::Wait() {
   const int kProcessKey = 0;
   const int kSocketKey = 1;
+  const int kExceptionKey = 2;
 
   if (!spawned())
     return 0;
 
-  // Register to wait for the child process to terminate.
+  // Create a port to wait for socket/task/exception events.
   zx_status_t status_zx;
-  status_zx = child_process_.wait_async(
-      port_, kProcessKey, ZX_PROCESS_TERMINATED, ZX_WAIT_ASYNC_ONCE);
+  zx::port port;
+  status_zx = zx::port::create(0, &port);
   GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK);
+
+  // Register to wait for the child process to terminate.
+  status_zx = child_process_.wait_async(
+      port, kProcessKey, ZX_PROCESS_TERMINATED, 0);
+  GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK);
+
   // Register to wait for the socket to be readable or closed.
   status_zx = stderr_socket_.wait_async(
-      port_, kSocketKey, ZX_SOCKET_READABLE | ZX_SOCKET_PEER_CLOSED,
-      ZX_WAIT_ASYNC_REPEATING);
+      port, kSocketKey, ZX_SOCKET_READABLE | ZX_SOCKET_PEER_CLOSED, 0);
+  GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK);
+
+  // Register to wait for an exception.
+  status_zx = exception_channel_.wait_async(
+      port, kExceptionKey, ZX_CHANNEL_READABLE, 0);
   GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK);
 
   bool process_terminated = false;
   bool socket_closed = false;
   do {
     zx_port_packet_t packet = {};
-    status_zx = port_.wait(zx::time::infinite(), &packet);
+    status_zx = port.wait(zx::time::infinite(), &packet);
     GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK);
 
-    if (packet.key == kProcessKey) {
-      if (ZX_PKT_IS_EXCEPTION(packet.type)) {
-        // Process encountered an exception. Kill it directly rather than
-        // letting other handlers process the event. We will get a second
-        // kProcessKey event when the process actually terminates.
-        status_zx = child_process_.kill();
-        GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK);
-      } else {
-        // Process terminated.
-        GTEST_DEATH_TEST_CHECK_(ZX_PKT_IS_SIGNAL_ONE(packet.type));
-        GTEST_DEATH_TEST_CHECK_(packet.signal.observed & ZX_PROCESS_TERMINATED);
-        process_terminated = true;
-      }
+    if (packet.key == kExceptionKey) {
+      // Process encountered an exception. Kill it directly rather than
+      // letting other handlers process the event. We will get a kProcessKey
+      // event when the process actually terminates.
+      status_zx = child_process_.kill();
+      GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK);
+    } else if (packet.key == kProcessKey) {
+      // Process terminated.
+      GTEST_DEATH_TEST_CHECK_(ZX_PKT_IS_SIGNAL_ONE(packet.type));
+      GTEST_DEATH_TEST_CHECK_(packet.signal.observed & ZX_PROCESS_TERMINATED);
+      process_terminated = true;
     } else if (packet.key == kSocketKey) {
-      GTEST_DEATH_TEST_CHECK_(ZX_PKT_IS_SIGNAL_REP(packet.type));
+      GTEST_DEATH_TEST_CHECK_(ZX_PKT_IS_SIGNAL_ONE(packet.type));
       if (packet.signal.observed & ZX_SOCKET_READABLE) {
         // Read data from the socket.
         constexpr size_t kBufferSize = 1024;
@@ -929,6 +940,9 @@
           socket_closed = true;
         } else {
           GTEST_DEATH_TEST_CHECK_(status_zx == ZX_ERR_SHOULD_WAIT);
+          status_zx = stderr_socket_.wait_async(
+              port, kSocketKey, ZX_SOCKET_READABLE | ZX_SOCKET_PEER_CLOSED, 0);
+          GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK);
         }
       } else {
         GTEST_DEATH_TEST_CHECK_(packet.signal.observed & ZX_SOCKET_PEER_CLOSED);
@@ -989,8 +1003,8 @@
   zx_status_t status;
   zx_handle_t child_pipe_handle;
   int child_pipe_fd;
-  status = fdio_pipe_half2(&child_pipe_fd, &child_pipe_handle);
-  GTEST_DEATH_TEST_CHECK_(status != ZX_OK);
+  status = fdio_pipe_half(&child_pipe_fd, &child_pipe_handle);
+  GTEST_DEATH_TEST_CHECK_(status == ZX_OK);
   set_read_fd(child_pipe_fd);
 
   // Set the pipe handle for the child.
@@ -1029,12 +1043,11 @@
       child_job, ZX_JOB_POL_RELATIVE, ZX_JOB_POL_BASIC, &policy, 1);
   GTEST_DEATH_TEST_CHECK_(status == ZX_OK);
 
-  // Create an exception port and attach it to the |child_job|, to allow
+  // Create an exception channel attached to the |child_job|, to allow
   // us to suppress the system default exception handler from firing.
-  status = zx::port::create(0, &port_);
-  GTEST_DEATH_TEST_CHECK_(status == ZX_OK);
-  status = zx_task_bind_exception_port(
-      child_job, port_.get(), 0 /* key */, 0 /*options */);
+  status =
+      zx_task_create_exception_channel(
+          child_job, 0, exception_channel_.reset_and_get_address());
   GTEST_DEATH_TEST_CHECK_(status == ZX_OK);
 
   // Spawn the child process.
@@ -1211,21 +1224,9 @@
   int close_fd;       // File descriptor to close; the read end of a pipe
 };
 
-#  if GTEST_OS_MAC
-inline char** GetEnviron() {
-  // When Google Test is built as a framework on MacOS X, the environ variable
-  // is unavailable. Apple's documentation (man environ) recommends using
-  // _NSGetEnviron() instead.
-  return *_NSGetEnviron();
-}
-#  else
-// Some POSIX platforms expect you to declare environ. extern "C" makes
-// it reside in the global namespace.
+#  if GTEST_OS_QNX
 extern "C" char** environ;
-inline char** GetEnviron() { return environ; }
-#  endif  // GTEST_OS_MAC
-
-#  if !GTEST_OS_QNX
+#  else  // GTEST_OS_QNX
 // The main function for a threadsafe-style death test child process.
 // This function is called in a clone()-ed process and thus must avoid
 // any potentially unsafe operations like malloc or libc functions.
@@ -1245,18 +1246,18 @@
     return EXIT_FAILURE;
   }
 
-  // We can safely call execve() as it's a direct system call.  We
+  // We can safely call execv() as it's almost a direct system call. We
   // cannot use execvp() as it's a libc function and thus potentially
-  // unsafe.  Since execve() doesn't search the PATH, the user must
+  // unsafe.  Since execv() doesn't search the PATH, the user must
   // invoke the test program via a valid path that contains at least
   // one path separator.
-  execve(args->argv[0], args->argv, GetEnviron());
-  DeathTestAbort(std::string("execve(") + args->argv[0] + ", ...) in " +
+  execv(args->argv[0], args->argv);
+  DeathTestAbort(std::string("execv(") + args->argv[0] + ", ...) in " +
                  original_dir + " failed: " +
                  GetLastErrnoDescription());
   return EXIT_FAILURE;
 }
-#  endif  // !GTEST_OS_QNX
+#  endif  // GTEST_OS_QNX
 
 #  if GTEST_HAS_CLONE
 // Two utility routines that together determine the direction the stack
@@ -1270,19 +1271,24 @@
 // correct answer.
 static void StackLowerThanAddress(const void* ptr,
                                   bool* result) GTEST_NO_INLINE_;
+// Make sure sanitizers do not tamper with the stack here.
+// Ideally, we want to use `__builtin_frame_address` instead of a local variable
+// address with sanitizer disabled, but it does not work when the
+// compiler optimizes the stack frame out, which happens on PowerPC targets.
 // HWAddressSanitizer add a random tag to the MSB of the local variable address,
 // making comparison result unpredictable.
+GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
 GTEST_ATTRIBUTE_NO_SANITIZE_HWADDRESS_
 static void StackLowerThanAddress(const void* ptr, bool* result) {
-  int dummy;
-  *result = (&dummy < ptr);
+  int dummy = 0;
+  *result = std::less<const void*>()(&dummy, ptr);
 }
 
 // Make sure AddressSanitizer does not tamper with the stack here.
 GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_
 GTEST_ATTRIBUTE_NO_SANITIZE_HWADDRESS_
 static bool StackGrowsDown() {
-  int dummy;
+  int dummy = 0;
   bool result;
   StackLowerThanAddress(&dummy, &result);
   return result;
@@ -1325,8 +1331,7 @@
                                         fd_flags | FD_CLOEXEC));
   struct inheritance inherit = {0};
   // spawn is a system call.
-  child_pid =
-      spawn(args.argv[0], 0, nullptr, &inherit, args.argv, GetEnviron());
+  child_pid = spawn(args.argv[0], 0, nullptr, &inherit, args.argv, environ);
   // Restores the current working directory.
   GTEST_DEATH_TEST_CHECK_(fchdir(cwd_fd) != -1);
   GTEST_DEATH_TEST_CHECK_SYSCALL_(close(cwd_fd));
@@ -1350,7 +1355,7 @@
 
   if (!use_fork) {
     static const bool stack_grows_down = StackGrowsDown();
-    const size_t stack_size = getpagesize();
+    const auto stack_size = static_cast<size_t>(getpagesize() * 2);
     // MMAP_ANONYMOUS is not defined on Mac, so we use MAP_ANON instead.
     void* const stack = mmap(nullptr, stack_size, PROT_READ | PROT_WRITE,
                              MAP_ANON | MAP_PRIVATE, -1, 0);
@@ -1366,8 +1371,9 @@
     void* const stack_top =
         static_cast<char*>(stack) +
             (stack_grows_down ? stack_size - kMaxStackAlignment : 0);
-    GTEST_DEATH_TEST_CHECK_(stack_size > kMaxStackAlignment &&
-        reinterpret_cast<intptr_t>(stack_top) % kMaxStackAlignment == 0);
+    GTEST_DEATH_TEST_CHECK_(
+        static_cast<size_t>(stack_size) > kMaxStackAlignment &&
+        reinterpret_cast<uintptr_t>(stack_top) % kMaxStackAlignment == 0);
 
     child_pid = clone(&ExecDeathTestChildMain, stack_top, SIGCHLD, &args);
 
diff --git a/deps/boringssl/src/third_party/googletest/src/gtest-filepath.cc b/deps/boringssl/src/third_party/googletest/src/gtest-filepath.cc
index 499ec58..af29768 100644
--- a/deps/boringssl/src/third_party/googletest/src/gtest-filepath.cc
+++ b/deps/boringssl/src/third_party/googletest/src/gtest-filepath.cc
@@ -93,8 +93,8 @@
 // Returns the current working directory, or "" if unsuccessful.
 FilePath FilePath::GetCurrentDir() {
 #if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_WINDOWS_PHONE || \
-    GTEST_OS_WINDOWS_RT || ARDUINO
-  // Windows CE and Arduino don't have a current directory, so we just return
+    GTEST_OS_WINDOWS_RT || GTEST_OS_ESP8266 || GTEST_OS_ESP32
+  // These platforms do not have a current directory, so we just return
   // something reasonable.
   return FilePath(kCurrentDirectoryString);
 #elif GTEST_OS_WINDOWS
@@ -163,7 +163,7 @@
   const char* const last_sep = FindLastPathSeparator();
   std::string dir;
   if (last_sep) {
-    dir = std::string(c_str(), last_sep + 1 - c_str());
+    dir = std::string(c_str(), static_cast<size_t>(last_sep + 1 - c_str()));
   } else {
     dir = kCurrentDirectoryString;
   }
@@ -323,6 +323,9 @@
   delete [] unicode;
 #elif GTEST_OS_WINDOWS
   int result = _mkdir(pathname_.c_str());
+#elif GTEST_OS_ESP8266
+  // do nothing
+  int result = 0;
 #else
   int result = mkdir(pathname_.c_str(), 0777);
 #endif  // GTEST_OS_WINDOWS_MOBILE
@@ -346,33 +349,19 @@
 // For example, "bar///foo" becomes "bar/foo". Does not eliminate other
 // redundancies that might be in a pathname involving "." or "..".
 void FilePath::Normalize() {
-  if (pathname_.c_str() == nullptr) {
-    pathname_ = "";
-    return;
-  }
-  const char* src = pathname_.c_str();
-  char* const dest = new char[pathname_.length() + 1];
-  char* dest_ptr = dest;
-  memset(dest_ptr, 0, pathname_.length() + 1);
+  auto out = pathname_.begin();
 
-  while (*src != '\0') {
-    *dest_ptr = *src;
-    if (!IsPathSeparator(*src)) {
-      src++;
+  for (const char character : pathname_) {
+    if (!IsPathSeparator(character)) {
+      *(out++) = character;
+    } else if (out == pathname_.begin() || *std::prev(out) != kPathSeparator) {
+      *(out++) = kPathSeparator;
     } else {
-#if GTEST_HAS_ALT_PATH_SEP_
-      if (*dest_ptr == kAlternatePathSeparator) {
-        *dest_ptr = kPathSeparator;
-      }
-#endif
-      while (IsPathSeparator(*src))
-        src++;
+      continue;
     }
-    dest_ptr++;
   }
-  *dest_ptr = '\0';
-  pathname_ = dest;
-  delete[] dest;
+
+  pathname_.erase(out, pathname_.end());
 }
 
 }  // namespace internal
diff --git a/deps/boringssl/src/third_party/googletest/src/gtest-internal-inl.h b/deps/boringssl/src/third_party/googletest/src/gtest-internal-inl.h
index 29dc682..38306c8 100644
--- a/deps/boringssl/src/third_party/googletest/src/gtest-internal-inl.h
+++ b/deps/boringssl/src/third_party/googletest/src/gtest-internal-inl.h
@@ -42,6 +42,7 @@
 #include <string.h>  // For memmove.
 
 #include <algorithm>
+#include <cstdint>
 #include <memory>
 #include <string>
 #include <vector>
@@ -83,9 +84,11 @@
 const char kBreakOnFailureFlag[] = "break_on_failure";
 const char kCatchExceptionsFlag[] = "catch_exceptions";
 const char kColorFlag[] = "color";
+const char kFailFast[] = "fail_fast";
 const char kFilterFlag[] = "filter";
 const char kListTestsFlag[] = "list_tests";
 const char kOutputFlag[] = "output";
+const char kBriefFlag[] = "brief";
 const char kPrintTimeFlag[] = "print_time";
 const char kPrintUTF8Flag[] = "print_utf8";
 const char kRandomSeedFlag[] = "random_seed";
@@ -99,14 +102,14 @@
 // A valid random seed must be in [1, kMaxRandomSeed].
 const int kMaxRandomSeed = 99999;
 
-// g_help_flag is true iff the --help flag or an equivalent form is
-// specified on the command line.
+// g_help_flag is true if and only if the --help flag or an equivalent form
+// is specified on the command line.
 GTEST_API_ extern bool g_help_flag;
 
 // Returns the current time in milliseconds.
 GTEST_API_ TimeInMillis GetTimeInMillis();
 
-// Returns true iff Google Test should use colors in the output.
+// Returns true if and only if Google Test should use colors in the output.
 GTEST_API_ bool ShouldUseColor(bool stdout_is_tty);
 
 // Formats the given time in milliseconds as seconds.
@@ -123,11 +126,11 @@
 // On success, stores the value of the flag in *value, and returns
 // true.  On failure, returns false without changing *value.
 GTEST_API_ bool ParseInt32Flag(
-    const char* str, const char* flag, Int32* value);
+    const char* str, const char* flag, int32_t* value);
 
 // Returns a random seed in range [1, kMaxRandomSeed] based on the
 // given --gtest_random_seed flag value.
-inline int GetRandomSeedFromFlag(Int32 random_seed_flag) {
+inline int GetRandomSeedFromFlag(int32_t random_seed_flag) {
   const unsigned int raw_seed = (random_seed_flag == 0) ?
       static_cast<unsigned int>(GetTimeInMillis()) :
       static_cast<unsigned int>(random_seed_flag);
@@ -163,10 +166,12 @@
     color_ = GTEST_FLAG(color);
     death_test_style_ = GTEST_FLAG(death_test_style);
     death_test_use_fork_ = GTEST_FLAG(death_test_use_fork);
+    fail_fast_ = GTEST_FLAG(fail_fast);
     filter_ = GTEST_FLAG(filter);
     internal_run_death_test_ = GTEST_FLAG(internal_run_death_test);
     list_tests_ = GTEST_FLAG(list_tests);
     output_ = GTEST_FLAG(output);
+    brief_ = GTEST_FLAG(brief);
     print_time_ = GTEST_FLAG(print_time);
     print_utf8_ = GTEST_FLAG(print_utf8);
     random_seed_ = GTEST_FLAG(random_seed);
@@ -186,9 +191,11 @@
     GTEST_FLAG(death_test_style) = death_test_style_;
     GTEST_FLAG(death_test_use_fork) = death_test_use_fork_;
     GTEST_FLAG(filter) = filter_;
+    GTEST_FLAG(fail_fast) = fail_fast_;
     GTEST_FLAG(internal_run_death_test) = internal_run_death_test_;
     GTEST_FLAG(list_tests) = list_tests_;
     GTEST_FLAG(output) = output_;
+    GTEST_FLAG(brief) = brief_;
     GTEST_FLAG(print_time) = print_time_;
     GTEST_FLAG(print_utf8) = print_utf8_;
     GTEST_FLAG(random_seed) = random_seed_;
@@ -207,16 +214,18 @@
   std::string color_;
   std::string death_test_style_;
   bool death_test_use_fork_;
+  bool fail_fast_;
   std::string filter_;
   std::string internal_run_death_test_;
   bool list_tests_;
   std::string output_;
+  bool brief_;
   bool print_time_;
   bool print_utf8_;
-  internal::Int32 random_seed_;
-  internal::Int32 repeat_;
+  int32_t random_seed_;
+  int32_t repeat_;
   bool shuffle_;
-  internal::Int32 stack_trace_depth_;
+  int32_t stack_trace_depth_;
   std::string stream_result_to_;
   bool throw_on_failure_;
 } GTEST_ATTRIBUTE_UNUSED_;
@@ -227,7 +236,7 @@
 // If the code_point is not a valid Unicode code point
 // (i.e. outside of Unicode range U+0 to U+10FFFF) it will be converted
 // to "(Invalid Unicode 0xXXXXXXXX)".
-GTEST_API_ std::string CodePointToUtf8(UInt32 code_point);
+GTEST_API_ std::string CodePointToUtf8(uint32_t code_point);
 
 // Converts a wide string to a narrow string in UTF-8 encoding.
 // The wide string is assumed to have the following encoding:
@@ -260,14 +269,14 @@
                             const char* shard_index_str,
                             bool in_subprocess_for_death_test);
 
-// Parses the environment variable var as an Int32. If it is unset,
-// returns default_val. If it is not an Int32, prints an error and
+// Parses the environment variable var as a 32-bit integer. If it is unset,
+// returns default_val. If it is not a 32-bit integer, prints an error and
 // and aborts.
-GTEST_API_ Int32 Int32FromEnvOrDie(const char* env_var, Int32 default_val);
+GTEST_API_ int32_t Int32FromEnvOrDie(const char* env_var, int32_t default_val);
 
 // Given the total number of shards, the shard index, and the test id,
-// returns true iff the test should be run on this shard. The test id is
-// some arbitrary but unique non-negative integer assigned to each test
+// returns true if and only if the test should be run on this shard. The test id
+// is some arbitrary but unique non-negative integer assigned to each test
 // method. Assumes that 0 <= shard_index < total_shards.
 GTEST_API_ bool ShouldRunTestOnShard(
     int total_shards, int shard_index, int test_id);
@@ -298,7 +307,8 @@
 // in range [0, v.size()).
 template <typename E>
 inline E GetElementOr(const std::vector<E>& v, int i, E default_value) {
-  return (i < 0 || i >= static_cast<int>(v.size())) ? default_value : v[i];
+  return (i < 0 || i >= static_cast<int>(v.size())) ? default_value
+                                                    : v[static_cast<size_t>(i)];
 }
 
 // Performs an in-place shuffle of a range of the vector's elements.
@@ -320,8 +330,11 @@
   // http://en.wikipedia.org/wiki/Fisher-Yates_shuffle
   for (int range_width = end - begin; range_width >= 2; range_width--) {
     const int last_in_range = begin + range_width - 1;
-    const int selected = begin + random->Generate(range_width);
-    std::swap((*v)[selected], (*v)[last_in_range]);
+    const int selected =
+        begin +
+        static_cast<int>(random->Generate(static_cast<uint32_t>(range_width)));
+    std::swap((*v)[static_cast<size_t>(selected)],
+              (*v)[static_cast<size_t>(last_in_range)]);
   }
 }
 
@@ -348,7 +361,7 @@
   // TestPropertyKeyIs has NO default constructor.
   explicit TestPropertyKeyIs(const std::string& key) : key_(key) {}
 
-  // Returns true iff the test name of test property matches on key_.
+  // Returns true if and only if the test name of test property matches on key_.
   bool operator()(const TestProperty& test_property) const {
     return test_property.key() == key_;
   }
@@ -381,15 +394,15 @@
 
   // Functions for processing the gtest_filter flag.
 
-  // Returns true iff the wildcard pattern matches the string.  The
-  // first ':' or '\0' character in pattern marks the end of it.
+  // Returns true if and only if the wildcard pattern matches the string.
+  // The first ':' or '\0' character in pattern marks the end of it.
   //
   // This recursive algorithm isn't very efficient, but is clear and
   // works well enough for matching test names, which are short.
   static bool PatternMatchesString(const char *pattern, const char *str);
 
-  // Returns true iff the user-specified filter matches the test suite
-  // name and the test name.
+  // Returns true if and only if the user-specified filter matches the test
+  // suite name and the test name.
   static bool FilterMatchesTest(const std::string& test_suite_name,
                                 const std::string& test_name);
 
@@ -573,11 +586,12 @@
   // Gets the elapsed time, in milliseconds.
   TimeInMillis elapsed_time() const { return elapsed_time_; }
 
-  // Returns true iff the unit test passed (i.e. all test suites passed).
+  // Returns true if and only if the unit test passed (i.e. all test suites
+  // passed).
   bool Passed() const { return !Failed(); }
 
-  // Returns true iff the unit test failed (i.e. some test suite failed
-  // or something outside of all tests failed).
+  // Returns true if and only if the unit test failed (i.e. some test suite
+  // failed or something outside of all tests failed).
   bool Failed() const {
     return failed_test_suite_count() > 0 || ad_hoc_test_result()->Failed();
   }
@@ -586,7 +600,7 @@
   // total_test_suite_count() - 1. If i is not in that range, returns NULL.
   const TestSuite* GetTestSuite(int i) const {
     const int index = GetElementOr(test_suite_indices_, i, -1);
-    return index < 0 ? nullptr : test_suites_[i];
+    return index < 0 ? nullptr : test_suites_[static_cast<size_t>(i)];
   }
 
   //  Legacy API is deprecated but still available
@@ -598,7 +612,7 @@
   // total_test_suite_count() - 1. If i is not in that range, returns NULL.
   TestSuite* GetMutableSuiteCase(int i) {
     const int index = GetElementOr(test_suite_indices_, i, -1);
-    return index < 0 ? nullptr : test_suites_[index];
+    return index < 0 ? nullptr : test_suites_[static_cast<size_t>(index)];
   }
 
   // Provides access to the event listener list.
@@ -641,10 +655,10 @@
   // Arguments:
   //
   //   test_suite_name: name of the test suite
-  //   type_param:     the name of the test's type parameter, or NULL if
-  //                   this is not a typed or a type-parameterized test.
-  //   set_up_tc:      pointer to the function that sets up the test suite
-  //   tear_down_tc:   pointer to the function that tears down the test suite
+  //   type_param:      the name of the test's type parameter, or NULL if
+  //                    this is not a typed or a type-parameterized test.
+  //   set_up_tc:       pointer to the function that sets up the test suite
+  //   tear_down_tc:    pointer to the function that tears down the test suite
   TestSuite* GetTestSuite(const char* test_suite_name, const char* type_param,
                           internal::SetUpTestSuiteFunc set_up_tc,
                           internal::TearDownTestSuiteFunc tear_down_tc);
@@ -668,6 +682,7 @@
   void AddTestInfo(internal::SetUpTestSuiteFunc set_up_tc,
                    internal::TearDownTestSuiteFunc tear_down_tc,
                    TestInfo* test_info) {
+#if GTEST_HAS_DEATH_TEST
     // In order to support thread-safe death tests, we need to
     // remember the original working directory when the test program
     // was first invoked.  We cannot do this in RUN_ALL_TESTS(), as
@@ -680,6 +695,7 @@
       GTEST_CHECK_(!original_working_dir_.IsEmpty())
           << "Failed to get the current working directory.";
     }
+#endif  // GTEST_HAS_DEATH_TEST
 
     GetTestSuite(test_info->test_suite_name(), test_info->type_param(),
                  set_up_tc, tear_down_tc)
@@ -692,6 +708,17 @@
     return parameterized_test_registry_;
   }
 
+  std::set<std::string>* ignored_parameterized_test_suites() {
+    return &ignored_parameterized_test_suites_;
+  }
+
+  // Returns TypeParameterizedTestSuiteRegistry object used to keep track of
+  // type-parameterized tests and instantiations of them.
+  internal::TypeParameterizedTestSuiteRegistry&
+  type_parameterized_test_registry() {
+    return type_parameterized_test_registry_;
+  }
+
   // Sets the TestSuite object for the test that's currently running.
   void set_current_test_suite(TestSuite* a_current_test_suite) {
     current_test_suite_ = a_current_test_suite;
@@ -868,6 +895,12 @@
   // ParameterizedTestRegistry object used to register value-parameterized
   // tests.
   internal::ParameterizedTestSuiteRegistry parameterized_test_registry_;
+  internal::TypeParameterizedTestSuiteRegistry
+      type_parameterized_test_registry_;
+
+  // The set holding the name of parameterized
+  // test suites that may go uninstantiated.
+  std::set<std::string> ignored_parameterized_test_suites_;
 
   // Indicates whether RegisterParameterizedTests() has been called already.
   bool parameterized_tests_registered_;
@@ -907,7 +940,7 @@
   // desired.
   OsStackTraceGetterInterface* os_stack_trace_getter_;
 
-  // True iff PostFlagParsingInit() has been called.
+  // True if and only if PostFlagParsingInit() has been called.
   bool post_flag_parse_init_performed_;
 
   // The random number seed used at the beginning of the test run.
@@ -994,20 +1027,9 @@
   char* end;
   // BiggestConvertible is the largest integer type that system-provided
   // string-to-number conversion routines can return.
+  using BiggestConvertible = unsigned long long;  // NOLINT
 
-# if GTEST_OS_WINDOWS && !defined(__GNUC__)
-
-  // MSVC and C++ Builder define __int64 instead of the standard long long.
-  typedef unsigned __int64 BiggestConvertible;
-  const BiggestConvertible parsed = _strtoui64(str.c_str(), &end, 10);
-
-# else
-
-  typedef unsigned long long BiggestConvertible;  // NOLINT
-  const BiggestConvertible parsed = strtoull(str.c_str(), &end, 10);
-
-# endif  // GTEST_OS_WINDOWS && !defined(__GNUC__)
-
+  const BiggestConvertible parsed = strtoull(str.c_str(), &end, 10);  // NOLINT
   const bool parse_success = *end == '\0' && errno == 0;
 
   GTEST_CHECK_(sizeof(Integer) <= sizeof(parsed));
@@ -1083,8 +1105,8 @@
       GTEST_CHECK_(sockfd_ != -1)
           << "Send() can be called only when there is a connection.";
 
-      const int len = static_cast<int>(message.length());
-      if (write(sockfd_, message.c_str(), len) != len) {
+      const auto len = static_cast<size_t>(message.length());
+      if (write(sockfd_, message.c_str(), len) != static_cast<ssize_t>(len)) {
         GTEST_LOG_(WARNING)
             << "stream_result_to: failed to stream to "
             << host_name_ << ":" << port_num_;
diff --git a/deps/boringssl/src/third_party/googletest/src/gtest-matchers.cc b/deps/boringssl/src/third_party/googletest/src/gtest-matchers.cc
index 7d2fb68..65104eb 100644
--- a/deps/boringssl/src/third_party/googletest/src/gtest-matchers.cc
+++ b/deps/boringssl/src/third_party/googletest/src/gtest-matchers.cc
@@ -58,40 +58,40 @@
 // s.
 Matcher<std::string>::Matcher(const char* s) { *this = Eq(std::string(s)); }
 
-#if GTEST_HAS_ABSL
-// Constructs a matcher that matches a const absl::string_view& whose value is
+#if GTEST_INTERNAL_HAS_STRING_VIEW
+// Constructs a matcher that matches a const StringView& whose value is
 // equal to s.
-Matcher<const absl::string_view&>::Matcher(const std::string& s) {
+Matcher<const internal::StringView&>::Matcher(const std::string& s) {
   *this = Eq(s);
 }
 
-// Constructs a matcher that matches a const absl::string_view& whose value is
+// Constructs a matcher that matches a const StringView& whose value is
 // equal to s.
-Matcher<const absl::string_view&>::Matcher(const char* s) {
+Matcher<const internal::StringView&>::Matcher(const char* s) {
   *this = Eq(std::string(s));
 }
 
-// Constructs a matcher that matches a const absl::string_view& whose value is
+// Constructs a matcher that matches a const StringView& whose value is
 // equal to s.
-Matcher<const absl::string_view&>::Matcher(absl::string_view s) {
+Matcher<const internal::StringView&>::Matcher(internal::StringView s) {
   *this = Eq(std::string(s));
 }
 
-// Constructs a matcher that matches a absl::string_view whose value is equal to
+// Constructs a matcher that matches a StringView whose value is equal to
 // s.
-Matcher<absl::string_view>::Matcher(const std::string& s) { *this = Eq(s); }
+Matcher<internal::StringView>::Matcher(const std::string& s) { *this = Eq(s); }
 
-// Constructs a matcher that matches a absl::string_view whose value is equal to
+// Constructs a matcher that matches a StringView whose value is equal to
 // s.
-Matcher<absl::string_view>::Matcher(const char* s) {
+Matcher<internal::StringView>::Matcher(const char* s) {
   *this = Eq(std::string(s));
 }
 
-// Constructs a matcher that matches a absl::string_view whose value is equal to
+// Constructs a matcher that matches a StringView whose value is equal to
 // s.
-Matcher<absl::string_view>::Matcher(absl::string_view s) {
+Matcher<internal::StringView>::Matcher(internal::StringView s) {
   *this = Eq(std::string(s));
 }
-#endif  // GTEST_HAS_ABSL
+#endif  // GTEST_INTERNAL_HAS_STRING_VIEW
 
 }  // namespace testing
diff --git a/deps/boringssl/src/third_party/googletest/src/gtest-port.cc b/deps/boringssl/src/third_party/googletest/src/gtest-port.cc
index 8f7cd1c..3f39f71 100644
--- a/deps/boringssl/src/third_party/googletest/src/gtest-port.cc
+++ b/deps/boringssl/src/third_party/googletest/src/gtest-port.cc
@@ -34,6 +34,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <cstdint>
 #include <fstream>
 #include <memory>
 
@@ -117,7 +118,7 @@
 size_t GetThreadCount() {
   const std::string filename =
       (Message() << "/proc/" << getpid() << "/stat").GetString();
-  return ReadProcFileField<int>(filename, 19);
+  return ReadProcFileField<size_t>(filename, 19);
 }
 
 #elif GTEST_OS_MAC
@@ -175,7 +176,7 @@
   if (sysctl(mib, miblen, &info, &size, NULL, 0)) {
     return 0;
   }
-  return KP_NLWP(info);
+  return static_cast<size_t>(KP_NLWP(info));
 }
 #elif GTEST_OS_OPENBSD
 
@@ -197,7 +198,8 @@
   if (sysctl(mib, miblen, NULL, &size, NULL, 0)) {
     return 0;
   }
-  mib[5] = size / mib[4];
+
+  mib[5] = static_cast<int>(size / static_cast<size_t>(mib[4]));
 
   // populate array of structs
   struct kinfo_proc info[mib[5]];
@@ -206,8 +208,8 @@
   }
 
   // exclude empty members
-  int nthreads = 0;
-  for (int i = 0; i < size / mib[4]; i++) {
+  size_t nthreads = 0;
+  for (size_t i = 0; i < size / static_cast<size_t>(mib[4]); i++) {
     if (info[i].p_tid != -1)
       nthreads++;
   }
@@ -279,7 +281,7 @@
 #if GTEST_IS_THREADSAFE && GTEST_OS_WINDOWS
 
 void SleepMilliseconds(int n) {
-  ::Sleep(n);
+  ::Sleep(static_cast<DWORD>(n));
 }
 
 AutoHandle::AutoHandle()
@@ -536,6 +538,9 @@
   // Returns a value that can be used to identify the thread from other threads.
   static ThreadLocalValueHolderBase* GetValueOnCurrentThread(
       const ThreadLocalBase* thread_local_instance) {
+#ifdef _MSC_VER
+    MemoryIsNotDeallocated memory_is_not_deallocated;
+#endif  // _MSC_VER
     DWORD current_thread = ::GetCurrentThreadId();
     MutexLock lock(&mutex_);
     ThreadIdToThreadLocals* const thread_to_thread_locals =
@@ -715,7 +720,7 @@
   free(const_cast<char*>(pattern_));
 }
 
-// Returns true iff regular expression re matches the entire str.
+// Returns true if and only if regular expression re matches the entire str.
 bool RE::FullMatch(const char* str, const RE& re) {
   if (!re.is_valid_) return false;
 
@@ -723,8 +728,8 @@
   return regexec(&re.full_regex_, str, 1, &match, 0) == 0;
 }
 
-// Returns true iff regular expression re matches a substring of str
-// (including str itself).
+// Returns true if and only if regular expression re matches a substring of
+// str (including str itself).
 bool RE::PartialMatch(const char* str, const RE& re) {
   if (!re.is_valid_) return false;
 
@@ -764,14 +769,14 @@
 
 #elif GTEST_USES_SIMPLE_RE
 
-// Returns true iff ch appears anywhere in str (excluding the
+// Returns true if and only if ch appears anywhere in str (excluding the
 // terminating '\0' character).
 bool IsInSet(char ch, const char* str) {
   return ch != '\0' && strchr(str, ch) != nullptr;
 }
 
-// Returns true iff ch belongs to the given classification.  Unlike
-// similar functions in <ctype.h>, these aren't affected by the
+// Returns true if and only if ch belongs to the given classification.
+// Unlike similar functions in <ctype.h>, these aren't affected by the
 // current locale.
 bool IsAsciiDigit(char ch) { return '0' <= ch && ch <= '9'; }
 bool IsAsciiPunct(char ch) {
@@ -784,13 +789,13 @@
       ('0' <= ch && ch <= '9') || ch == '_';
 }
 
-// Returns true iff "\\c" is a supported escape sequence.
+// Returns true if and only if "\\c" is a supported escape sequence.
 bool IsValidEscape(char c) {
   return (IsAsciiPunct(c) || IsInSet(c, "dDfnrsStvwW"));
 }
 
-// Returns true iff the given atom (specified by escaped and pattern)
-// matches ch.  The result is undefined if the atom is invalid.
+// Returns true if and only if the given atom (specified by escaped and
+// pattern) matches ch.  The result is undefined if the atom is invalid.
 bool AtomMatchesChar(bool escaped, char pattern_char, char ch) {
   if (escaped) {  // "\\p" where p is pattern_char.
     switch (pattern_char) {
@@ -828,7 +833,7 @@
 
   bool is_valid = true;
 
-  // True iff ?, *, or + can follow the previous atom.
+  // True if and only if ?, *, or + can follow the previous atom.
   bool prev_repeatable = false;
   for (int i = 0; regex[i]; i++) {
     if (regex[i] == '\\') {  // An escape sequence
@@ -904,8 +909,8 @@
   return false;
 }
 
-// Returns true iff regex matches a prefix of str.  regex must be a
-// valid simple regular expression and not start with "^", or the
+// Returns true if and only if regex matches a prefix of str. regex must
+// be a valid simple regular expression and not start with "^", or the
 // result is undefined.
 bool MatchRegexAtHead(const char* regex, const char* str) {
   if (*regex == '\0')  // An empty regex matches a prefix of anything.
@@ -935,8 +940,8 @@
   }
 }
 
-// Returns true iff regex matches any substring of str.  regex must be
-// a valid simple regular expression, or the result is undefined.
+// Returns true if and only if regex matches any substring of str.  regex must
+// be a valid simple regular expression, or the result is undefined.
 //
 // The algorithm is recursive, but the recursion depth doesn't exceed
 // the regex length, so we won't need to worry about running out of
@@ -964,13 +969,13 @@
   free(const_cast<char*>(full_pattern_));
 }
 
-// Returns true iff regular expression re matches the entire str.
+// Returns true if and only if regular expression re matches the entire str.
 bool RE::FullMatch(const char* str, const RE& re) {
   return re.is_valid_ && MatchRegexAnywhere(re.full_pattern_, str);
 }
 
-// Returns true iff regular expression re matches a substring of str
-// (including str itself).
+// Returns true if and only if regular expression re matches a substring of
+// str (including str itself).
 bool RE::PartialMatch(const char* str, const RE& re) {
   return re.is_valid_ && MatchRegexAnywhere(re.pattern_, str);
 }
@@ -1102,17 +1107,19 @@
     // code as part of a regular standalone executable, which doesn't
     // run in a Dalvik process (e.g. when running it through 'adb shell').
     //
-    // The location /sdcard is directly accessible from native code
-    // and is the only location (unofficially) supported by the Android
-    // team. It's generally a symlink to the real SD Card mount point
-    // which can be /mnt/sdcard, /mnt/sdcard0, /system/media/sdcard, or
-    // other OEM-customized locations. Never rely on these, and always
-    // use /sdcard.
-    char name_template[] = "/sdcard/gtest_captured_stream.XXXXXX";
+    // The location /data/local/tmp is directly accessible from native code.
+    // '/sdcard' and other variants cannot be relied on, as they are not
+    // guaranteed to be mounted, or may have a delay in mounting.
+    char name_template[] = "/data/local/tmp/gtest_captured_stream.XXXXXX";
 #  else
     char name_template[] = "/tmp/captured_stream.XXXXXX";
 #  endif  // GTEST_OS_LINUX_ANDROID
     const int captured_fd = mkstemp(name_template);
+    if (captured_fd == -1) {
+      GTEST_LOG_(WARNING)
+          << "Failed to create tmp file " << name_template
+          << " for test; does the test have access to the /tmp directory?";
+    }
     filename_ = name_template;
 # endif  // GTEST_OS_WINDOWS
     fflush(nullptr);
@@ -1134,6 +1141,10 @@
     }
 
     FILE* const file = posix::FOpen(filename_.c_str(), "r");
+    if (file == nullptr) {
+      GTEST_LOG_(FATAL) << "Failed to open tmp file " << filename_
+                        << " for capturing stream.";
+    }
     const std::string content = ReadEntireFile(file);
     posix::FClose(file);
     return content;
@@ -1280,7 +1291,7 @@
 // Parses 'str' for a 32-bit signed integer.  If successful, writes
 // the result to *value and returns true; otherwise leaves *value
 // unchanged and returns false.
-bool ParseInt32(const Message& src_text, const char* str, Int32* value) {
+bool ParseInt32(const Message& src_text, const char* str, int32_t* value) {
   // Parses the environment variable as a decimal integer.
   char* end = nullptr;
   const long long_value = strtol(str, &end, 10);  // NOLINT
@@ -1297,13 +1308,13 @@
     return false;
   }
 
-  // Is the parsed value in the range of an Int32?
-  const Int32 result = static_cast<Int32>(long_value);
+  // Is the parsed value in the range of an int32_t?
+  const auto result = static_cast<int32_t>(long_value);
   if (long_value == LONG_MAX || long_value == LONG_MIN ||
       // The parsed value overflows as a long.  (strtol() returns
       // LONG_MAX or LONG_MIN when the input overflows.)
       result != long_value
-      // The parsed value overflows as an Int32.
+      // The parsed value overflows as an int32_t.
       ) {
     Message msg;
     msg << "WARNING: " << src_text
@@ -1321,7 +1332,7 @@
 // Reads and returns the Boolean environment variable corresponding to
 // the given flag; if it's not set, returns default_value.
 //
-// The value is considered true iff it's not "0".
+// The value is considered true if and only if it's not "0".
 bool BoolFromGTestEnv(const char* flag, bool default_value) {
 #if defined(GTEST_GET_BOOL_FROM_ENV_)
   return GTEST_GET_BOOL_FROM_ENV_(flag, default_value);
@@ -1336,7 +1347,7 @@
 // Reads and returns a 32-bit integer stored in the environment
 // variable corresponding to the given flag; if it isn't set or
 // doesn't represent a valid 32-bit integer, returns default_value.
-Int32 Int32FromGTestEnv(const char* flag, Int32 default_value) {
+int32_t Int32FromGTestEnv(const char* flag, int32_t default_value) {
 #if defined(GTEST_GET_INT32_FROM_ENV_)
   return GTEST_GET_INT32_FROM_ENV_(flag, default_value);
 #else
@@ -1347,7 +1358,7 @@
     return default_value;
   }
 
-  Int32 result = default_value;
+  int32_t result = default_value;
   if (!ParseInt32(Message() << "Environment variable " << env_var,
                   string_value, &result)) {
     printf("The default value %s is used.\n",
diff --git a/deps/boringssl/src/third_party/googletest/src/gtest-printers.cc b/deps/boringssl/src/third_party/googletest/src/gtest-printers.cc
index 40a8817..20ce1b8 100644
--- a/deps/boringssl/src/third_party/googletest/src/gtest-printers.cc
+++ b/deps/boringssl/src/third_party/googletest/src/gtest-printers.cc
@@ -44,6 +44,7 @@
 #include "gtest/gtest-printers.h"
 #include <stdio.h>
 #include <cctype>
+#include <cstdint>
 #include <cwchar>
 #include <ostream>  // NOLINT
 #include <string>
@@ -104,7 +105,7 @@
 
 }  // namespace
 
-namespace internal2 {
+namespace internal {
 
 // Delegates to PrintBytesInObjectToImpl() to print the bytes in the
 // given object.  The delegation simplifies the implementation, which
@@ -116,10 +117,6 @@
   PrintBytesInObjectToImpl(obj_bytes, count, os);
 }
 
-}  // namespace internal2
-
-namespace internal {
-
 // Depending on the value of a char (or wchar_t), we print it in one
 // of three formats:
 //   - as is if it's a printable ASCII (e.g. 'a', '2', ' '),
@@ -144,7 +141,8 @@
 // which is the type of c.
 template <typename UnsignedChar, typename Char>
 static CharFormat PrintAsCharLiteralTo(Char c, ostream* os) {
-  switch (static_cast<wchar_t>(c)) {
+  wchar_t w_c = static_cast<wchar_t>(c);
+  switch (w_c) {
     case L'\0':
       *os << "\\0";
       break;
@@ -176,7 +174,7 @@
       *os << "\\v";
       break;
     default:
-      if (IsPrintableAscii(c)) {
+      if (IsPrintableAscii(w_c)) {
         *os << static_cast<char>(c);
         return kAsIs;
       } else {
@@ -236,7 +234,7 @@
   if (format == kHexEscape || (1 <= c && c <= 9)) {
     // Do nothing.
   } else {
-    *os << ", 0x" << String::FormatHexInt(static_cast<UnsignedChar>(c));
+    *os << ", 0x" << String::FormatHexInt(static_cast<int>(c));
   }
   *os << ")";
 }
@@ -254,6 +252,11 @@
   PrintCharAndCodeTo<wchar_t>(wc, os);
 }
 
+void PrintTo(char32_t c, ::std::ostream* os) {
+  *os << std::hex << "U+" << std::uppercase << std::setfill('0') << std::setw(4)
+      << static_cast<uint32_t>(c);
+}
+
 // Prints the given array of characters to the ostream.  CharType must be either
 // char or wchar_t.
 // The array starts at begin, the length is len, it may include '\0' characters
diff --git a/deps/boringssl/src/third_party/googletest/src/gtest-test-part.cc b/deps/boringssl/src/third_party/googletest/src/gtest-test-part.cc
index 515b308..a938683 100644
--- a/deps/boringssl/src/third_party/googletest/src/gtest-test-part.cc
+++ b/deps/boringssl/src/third_party/googletest/src/gtest-test-part.cc
@@ -31,6 +31,8 @@
 // The Google C++ Testing and Mocking Framework (Google Test)
 
 #include "gtest/gtest-test-part.h"
+
+#include "gtest/internal/gtest-port.h"
 #include "src/gtest-internal-inl.h"
 
 namespace testing {
@@ -46,7 +48,9 @@
 
 // Prints a TestPartResult object.
 std::ostream& operator<<(std::ostream& os, const TestPartResult& result) {
-  return os << result.file_name() << ":" << result.line_number() << ": "
+  return os << internal::FormatFileLocation(result.file_name(),
+                                            result.line_number())
+            << " "
             << (result.type() == TestPartResult::kSuccess
                     ? "Success"
                     : result.type() == TestPartResult::kSkip
@@ -70,7 +74,7 @@
     internal::posix::Abort();
   }
 
-  return array_[index];
+  return array_[static_cast<size_t>(index)];
 }
 
 // Returns the number of TestPartResult objects in the array.
diff --git a/deps/boringssl/src/third_party/googletest/src/gtest-typed-test.cc b/deps/boringssl/src/third_party/googletest/src/gtest-typed-test.cc
index 8677caf..722c7b1 100644
--- a/deps/boringssl/src/third_party/googletest/src/gtest-typed-test.cc
+++ b/deps/boringssl/src/third_party/googletest/src/gtest-typed-test.cc
@@ -58,7 +58,10 @@
 // registered_tests_; returns registered_tests if successful, or
 // aborts the program otherwise.
 const char* TypedTestSuitePState::VerifyRegisteredTestNames(
-    const char* file, int line, const char* registered_tests) {
+    const char* test_suite_name, const char* file, int line,
+    const char* registered_tests) {
+  RegisterTypeParameterizedTestSuite(test_suite_name, CodeLocation(file, line));
+
   typedef RegisteredTestsMap::const_iterator RegisteredTestIter;
   registered_ = true;
 
@@ -75,17 +78,7 @@
       continue;
     }
 
-    bool found = false;
-    for (RegisteredTestIter it = registered_tests_.begin();
-         it != registered_tests_.end();
-         ++it) {
-      if (name == it->first) {
-        found = true;
-        break;
-      }
-    }
-
-    if (found) {
+    if (registered_tests_.count(name) != 0) {
       tests.insert(name);
     } else {
       errors << "No test named " << name
diff --git a/deps/boringssl/src/third_party/googletest/src/gtest.cc b/deps/boringssl/src/third_party/googletest/src/gtest.cc
index d4d3d03..f134a6f 100644
--- a/deps/boringssl/src/third_party/googletest/src/gtest.cc
+++ b/deps/boringssl/src/third_party/googletest/src/gtest.cc
@@ -35,7 +35,6 @@
 #include "gtest/gtest-spi.h"
 
 #include <ctype.h>
-#include <math.h>
 #include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -44,6 +43,9 @@
 #include <wctype.h>
 
 #include <algorithm>
+#include <chrono>  // NOLINT
+#include <cmath>
+#include <cstdint>
 #include <iomanip>
 #include <limits>
 #include <list>
@@ -54,8 +56,6 @@
 
 #if GTEST_OS_LINUX
 
-# define GTEST_HAS_GETTIMEOFDAY_ 1
-
 # include <fcntl.h>  // NOLINT
 # include <limits.h>  // NOLINT
 # include <sched.h>  // NOLINT
@@ -67,7 +67,6 @@
 # include <string>
 
 #elif GTEST_OS_ZOS
-# define GTEST_HAS_GETTIMEOFDAY_ 1
 # include <sys/time.h>  // NOLINT
 
 // On z/OS we additionally need strings.h for strcasecmp.
@@ -80,27 +79,24 @@
 
 #elif GTEST_OS_WINDOWS  // We are on Windows proper.
 
+# include <windows.h>  // NOLINT
+# undef min
+
+#ifdef _MSC_VER
+# include <crtdbg.h>  // NOLINT
+#endif
+
 # include <io.h>  // NOLINT
 # include <sys/timeb.h>  // NOLINT
 # include <sys/types.h>  // NOLINT
 # include <sys/stat.h>  // NOLINT
 
 # if GTEST_OS_WINDOWS_MINGW
-// MinGW has gettimeofday() but not _ftime64().
-#  define GTEST_HAS_GETTIMEOFDAY_ 1
 #  include <sys/time.h>  // NOLINT
 # endif  // GTEST_OS_WINDOWS_MINGW
 
-// cpplint thinks that the header is already included, so we want to
-// silence it.
-# include <windows.h>  // NOLINT
-# undef min
-
 #else
 
-// Assume other platforms have gettimeofday().
-# define GTEST_HAS_GETTIMEOFDAY_ 1
-
 // cpplint thinks that the header is already included, so we want to
 // silence it.
 # include <sys/time.h>  // NOLINT
@@ -177,8 +173,8 @@
 // stack trace.
 const char kStackTraceMarker[] = "\nStack trace:\n";
 
-// g_help_flag is true iff the --help flag or an equivalent form is
-// specified on the command line.
+// g_help_flag is true if and only if the --help flag or an equivalent form
+// is specified on the command line.
 bool g_help_flag = false;
 
 // Utilty function to Open File for Writing
@@ -209,21 +205,35 @@
   return kUniversalFilter;
 }
 
+// Bazel passes in the argument to '--test_runner_fail_fast' via the
+// TESTBRIDGE_TEST_RUNNER_FAIL_FAST environment variable.
+static bool GetDefaultFailFast() {
+  const char* const testbridge_test_runner_fail_fast =
+      internal::posix::GetEnv("TESTBRIDGE_TEST_RUNNER_FAIL_FAST");
+  if (testbridge_test_runner_fail_fast != nullptr) {
+    return strcmp(testbridge_test_runner_fail_fast, "1") == 0;
+  }
+  return false;
+}
+
+GTEST_DEFINE_bool_(
+    fail_fast, internal::BoolFromGTestEnv("fail_fast", GetDefaultFailFast()),
+    "True if and only if a test failure should stop further test execution.");
+
 GTEST_DEFINE_bool_(
     also_run_disabled_tests,
     internal::BoolFromGTestEnv("also_run_disabled_tests", false),
     "Run disabled tests too, in addition to the tests normally being run.");
 
 GTEST_DEFINE_bool_(
-    break_on_failure,
-    internal::BoolFromGTestEnv("break_on_failure", false),
-    "True iff a failed assertion should be a debugger break-point.");
+    break_on_failure, internal::BoolFromGTestEnv("break_on_failure", false),
+    "True if and only if a failed assertion should be a debugger "
+    "break-point.");
 
-GTEST_DEFINE_bool_(
-    catch_exceptions,
-    internal::BoolFromGTestEnv("catch_exceptions", true),
-    "True iff " GTEST_NAME_
-    " should catch exceptions and treat them as test failures.");
+GTEST_DEFINE_bool_(catch_exceptions,
+                   internal::BoolFromGTestEnv("catch_exceptions", true),
+                   "True if and only if " GTEST_NAME_
+                   " should catch exceptions and treat them as test failures.");
 
 GTEST_DEFINE_string_(
     color,
@@ -271,16 +281,16 @@
     "digits.");
 
 GTEST_DEFINE_bool_(
-    print_time,
-    internal::BoolFromGTestEnv("print_time", true),
-    "True iff " GTEST_NAME_
-    " should display elapsed time in text output.");
+    brief, internal::BoolFromGTestEnv("brief", false),
+    "True if only test failures should be displayed in text output.");
 
-GTEST_DEFINE_bool_(
-    print_utf8,
-    internal::BoolFromGTestEnv("print_utf8", true),
-    "True iff " GTEST_NAME_
-    " prints UTF8 characters as text.");
+GTEST_DEFINE_bool_(print_time, internal::BoolFromGTestEnv("print_time", true),
+                   "True if and only if " GTEST_NAME_
+                   " should display elapsed time in text output.");
+
+GTEST_DEFINE_bool_(print_utf8, internal::BoolFromGTestEnv("print_utf8", true),
+                   "True if and only if " GTEST_NAME_
+                   " prints UTF8 characters as text.");
 
 GTEST_DEFINE_int32_(
     random_seed,
@@ -294,16 +304,14 @@
     "How many times to repeat each test.  Specify a negative number "
     "for repeating forever.  Useful for shaking out flaky tests.");
 
-GTEST_DEFINE_bool_(
-    show_internal_stack_frames, false,
-    "True iff " GTEST_NAME_ " should include internal stack frames when "
-    "printing test failure stack traces.");
+GTEST_DEFINE_bool_(show_internal_stack_frames, false,
+                   "True if and only if " GTEST_NAME_
+                   " should include internal stack frames when "
+                   "printing test failure stack traces.");
 
-GTEST_DEFINE_bool_(
-    shuffle,
-    internal::BoolFromGTestEnv("shuffle", false),
-    "True iff " GTEST_NAME_
-    " should randomize tests' order on every run.");
+GTEST_DEFINE_bool_(shuffle, internal::BoolFromGTestEnv("shuffle", false),
+                   "True if and only if " GTEST_NAME_
+                   " should randomize tests' order on every run.");
 
 GTEST_DEFINE_int32_(
     stack_trace_depth,
@@ -337,10 +345,10 @@
 // Generates a random number from [0, range), using a Linear
 // Congruential Generator (LCG).  Crashes if 'range' is 0 or greater
 // than kMaxRange.
-UInt32 Random::Generate(UInt32 range) {
+uint32_t Random::Generate(uint32_t range) {
   // These constants are the same as are used in glibc's rand(3).
   // Use wider types than necessary to prevent unsigned overflow diagnostics.
-  state_ = static_cast<UInt32>(1103515245ULL*state_ + 12345U) % kMaxRange;
+  state_ = static_cast<uint32_t>(1103515245ULL*state_ + 12345U) % kMaxRange;
 
   GTEST_CHECK_(range > 0)
       << "Cannot generate a number in the range [0, 0).";
@@ -354,7 +362,7 @@
   return state_ % range;
 }
 
-// GTestIsInitialized() returns true iff the user has initialized
+// GTestIsInitialized() returns true if and only if the user has initialized
 // Google Test.  Useful for catching the user mistake of not initializing
 // Google Test before calling RUN_ALL_TESTS().
 static bool GTestIsInitialized() { return GetArgvs().size() > 0; }
@@ -371,18 +379,18 @@
   return sum;
 }
 
-// Returns true iff the test suite passed.
+// Returns true if and only if the test suite passed.
 static bool TestSuitePassed(const TestSuite* test_suite) {
   return test_suite->should_run() && test_suite->Passed();
 }
 
-// Returns true iff the test suite failed.
+// Returns true if and only if the test suite failed.
 static bool TestSuiteFailed(const TestSuite* test_suite) {
   return test_suite->should_run() && test_suite->Failed();
 }
 
-// Returns true iff test_suite contains at least one test that should
-// run.
+// Returns true if and only if test_suite contains at least one test that
+// should run.
 static bool ShouldRunTestSuite(const TestSuite* test_suite) {
   return test_suite->should_run();
 }
@@ -410,6 +418,162 @@
                       );  // NOLINT
 }
 
+namespace {
+
+// When TEST_P is found without a matching INSTANTIATE_TEST_SUITE_P
+// to creates test cases for it, a syntetic test case is
+// inserted to report ether an error or a log message.
+//
+// This configuration bit will likely be removed at some point.
+constexpr bool kErrorOnUninstantiatedParameterizedTest = true;
+constexpr bool kErrorOnUninstantiatedTypeParameterizedTest = true;
+
+// A test that fails at a given file/line location with a given message.
+class FailureTest : public Test {
+ public:
+  explicit FailureTest(const CodeLocation& loc, std::string error_message,
+                       bool as_error)
+      : loc_(loc),
+        error_message_(std::move(error_message)),
+        as_error_(as_error) {}
+
+  void TestBody() override {
+    if (as_error_) {
+      AssertHelper(TestPartResult::kNonFatalFailure, loc_.file.c_str(),
+                   loc_.line, "") = Message() << error_message_;
+    } else {
+      std::cout << error_message_ << std::endl;
+    }
+  }
+
+ private:
+  const CodeLocation loc_;
+  const std::string error_message_;
+  const bool as_error_;
+};
+
+
+}  // namespace
+
+std::set<std::string>* GetIgnoredParameterizedTestSuites() {
+  return UnitTest::GetInstance()->impl()->ignored_parameterized_test_suites();
+}
+
+// Add a given test_suit to the list of them allow to go un-instantiated.
+MarkAsIgnored::MarkAsIgnored(const char* test_suite) {
+  GetIgnoredParameterizedTestSuites()->insert(test_suite);
+}
+
+// If this parameterized test suite has no instantiations (and that
+// has not been marked as okay), emit a test case reporting that.
+void InsertSyntheticTestCase(const std::string& name, CodeLocation location,
+                             bool has_test_p) {
+  const auto& ignored = *GetIgnoredParameterizedTestSuites();
+  if (ignored.find(name) != ignored.end()) return;
+
+  const char kMissingInstantiation[] =  //
+      " is defined via TEST_P, but never instantiated. None of the test cases "
+      "will run. Either no INSTANTIATE_TEST_SUITE_P is provided or the only "
+      "ones provided expand to nothing."
+      "\n\n"
+      "Ideally, TEST_P definitions should only ever be included as part of "
+      "binaries that intend to use them. (As opposed to, for example, being "
+      "placed in a library that may be linked in to get other utilities.)";
+
+  const char kMissingTestCase[] =  //
+      " is instantiated via INSTANTIATE_TEST_SUITE_P, but no tests are "
+      "defined via TEST_P . No test cases will run."
+      "\n\n"
+      "Ideally, INSTANTIATE_TEST_SUITE_P should only ever be invoked from "
+      "code that always depend on code that provides TEST_P. Failing to do "
+      "so is often an indication of dead code, e.g. the last TEST_P was "
+      "removed but the rest got left behind.";
+
+  std::string message =
+      "Parameterized test suite " + name +
+      (has_test_p ? kMissingInstantiation : kMissingTestCase) +
+      "\n\n"
+      "To suppress this error for this test suite, insert the following line "
+      "(in a non-header) in the namespace it is defined in:"
+      "\n\n"
+      "GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(" + name + ");";
+
+  std::string full_name = "UninstantiatedParameterizedTestSuite<" + name + ">";
+  RegisterTest(  //
+      "GoogleTestVerification", full_name.c_str(),
+      nullptr,  // No type parameter.
+      nullptr,  // No value parameter.
+      location.file.c_str(), location.line, [message, location] {
+        return new FailureTest(location, message,
+                               kErrorOnUninstantiatedParameterizedTest);
+      });
+}
+
+void RegisterTypeParameterizedTestSuite(const char* test_suite_name,
+                                        CodeLocation code_location) {
+  GetUnitTestImpl()->type_parameterized_test_registry().RegisterTestSuite(
+      test_suite_name, code_location);
+}
+
+void RegisterTypeParameterizedTestSuiteInstantiation(const char* case_name) {
+  GetUnitTestImpl()
+      ->type_parameterized_test_registry()
+      .RegisterInstantiation(case_name);
+}
+
+void TypeParameterizedTestSuiteRegistry::RegisterTestSuite(
+    const char* test_suite_name, CodeLocation code_location) {
+  suites_.emplace(std::string(test_suite_name),
+                 TypeParameterizedTestSuiteInfo(code_location));
+}
+
+void TypeParameterizedTestSuiteRegistry::RegisterInstantiation(
+        const char* test_suite_name) {
+  auto it = suites_.find(std::string(test_suite_name));
+  if (it != suites_.end()) {
+    it->second.instantiated = true;
+  } else {
+    GTEST_LOG_(ERROR) << "Unknown type parameterized test suit '"
+                      << test_suite_name << "'";
+  }
+}
+
+void TypeParameterizedTestSuiteRegistry::CheckForInstantiations() {
+  const auto& ignored = *GetIgnoredParameterizedTestSuites();
+  for (const auto& testcase : suites_) {
+    if (testcase.second.instantiated) continue;
+    if (ignored.find(testcase.first) != ignored.end()) continue;
+
+    std::string message =
+        "Type parameterized test suite " + testcase.first +
+        " is defined via REGISTER_TYPED_TEST_SUITE_P, but never instantiated "
+        "via INSTANTIATE_TYPED_TEST_SUITE_P. None of the test cases will run."
+        "\n\n"
+        "Ideally, TYPED_TEST_P definitions should only ever be included as "
+        "part of binaries that intend to use them. (As opposed to, for "
+        "example, being placed in a library that may be linked in to get other "
+        "utilities.)"
+        "\n\n"
+        "To suppress this error for this test suite, insert the following line "
+        "(in a non-header) in the namespace it is defined in:"
+        "\n\n"
+        "GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(" +
+        testcase.first + ");";
+
+    std::string full_name =
+        "UninstantiatedTypeParameterizedTestSuite<" + testcase.first + ">";
+    RegisterTest(  //
+        "GoogleTestVerification", full_name.c_str(),
+        nullptr,  // No type parameter.
+        nullptr,  // No value parameter.
+        testcase.second.code_location.file.c_str(),
+        testcase.second.code_location.line, [message, testcase] {
+          return new FailureTest(testcase.second.code_location, message,
+                                 kErrorOnUninstantiatedTypeParameterizedTest);
+        });
+  }
+}
+
 // A copy of all command line arguments.  Set by InitGoogleTest().
 static ::std::vector<std::string> g_argvs;
 
@@ -446,7 +610,8 @@
   const char* const colon = strchr(gtest_output_flag, ':');
   return (colon == nullptr)
              ? std::string(gtest_output_flag)
-             : std::string(gtest_output_flag, colon - gtest_output_flag);
+             : std::string(gtest_output_flag,
+                           static_cast<size_t>(colon - gtest_output_flag));
 }
 
 // Returns the name of the requested output file, or the default if none
@@ -481,8 +646,8 @@
   return result.string();
 }
 
-// Returns true iff the wildcard pattern matches the string.  The
-// first ':' or '\0' character in pattern marks the end of it.
+// Returns true if and only if the wildcard pattern matches the string.
+// The first ':' or '\0' character in pattern marks the end of it.
 //
 // This recursive algorithm isn't very efficient, but is clear and
 // works well enough for matching test names, which are short.
@@ -524,8 +689,8 @@
   }
 }
 
-// Returns true iff the user-specified filter matches the test suite
-// name and the test name.
+// Returns true if and only if the user-specified filter matches the test
+// suite name and the test name.
 bool UnitTestOptions::FilterMatchesTest(const std::string& test_suite_name,
                                         const std::string& test_name) {
   const std::string& full_name = test_suite_name + "." + test_name.c_str();
@@ -833,42 +998,10 @@
 
 // Returns the current time in milliseconds.
 TimeInMillis GetTimeInMillis() {
-#if GTEST_OS_WINDOWS_MOBILE || defined(__BORLANDC__)
-  // Difference between 1970-01-01 and 1601-01-01 in milliseconds.
-  // http://analogous.blogspot.com/2005/04/epoch.html
-  const TimeInMillis kJavaEpochToWinFileTimeDelta =
-    static_cast<TimeInMillis>(116444736UL) * 100000UL;
-  const DWORD kTenthMicrosInMilliSecond = 10000;
-
-  SYSTEMTIME now_systime;
-  FILETIME now_filetime;
-  ULARGE_INTEGER now_int64;
-  GetSystemTime(&now_systime);
-  if (SystemTimeToFileTime(&now_systime, &now_filetime)) {
-    now_int64.LowPart = now_filetime.dwLowDateTime;
-    now_int64.HighPart = now_filetime.dwHighDateTime;
-    now_int64.QuadPart = (now_int64.QuadPart / kTenthMicrosInMilliSecond) -
-      kJavaEpochToWinFileTimeDelta;
-    return now_int64.QuadPart;
-  }
-  return 0;
-#elif GTEST_OS_WINDOWS && !GTEST_HAS_GETTIMEOFDAY_
-  __timeb64 now;
-
-  // MSVC 8 deprecates _ftime64(), so we want to suppress warning 4996
-  // (deprecated function) there.
-  GTEST_DISABLE_MSC_DEPRECATED_PUSH_()
-  _ftime64(&now);
-  GTEST_DISABLE_MSC_DEPRECATED_POP_()
-
-  return static_cast<TimeInMillis>(now.time) * 1000 + now.millitm;
-#elif GTEST_HAS_GETTIMEOFDAY_
-  struct timeval now;
-  gettimeofday(&now, nullptr);
-  return static_cast<TimeInMillis>(now.tv_sec) * 1000 + now.tv_usec / 1000;
-#else
-# error "Don't know how to get the current time on your system."
-#endif
+  return std::chrono::duration_cast<std::chrono::milliseconds>(
+             std::chrono::system_clock::now() -
+             std::chrono::system_clock::from_time_t(0))
+      .count();
 }
 
 // Utilities
@@ -909,7 +1042,8 @@
 
 #endif  // GTEST_OS_WINDOWS_MOBILE
 
-// Compares two C strings.  Returns true iff they have the same content.
+// Compares two C strings.  Returns true if and only if they have the same
+// content.
 //
 // Unlike strcmp(), this function can handle NULL argument(s).  A NULL
 // C string is considered different to any non-NULL C string,
@@ -1240,9 +1374,10 @@
     for (; edit_i < edits.size(); ++edit_i) {
       if (n_suffix >= context) {
         // Continue only if the next hunk is very close.
-        std::vector<EditType>::const_iterator it = edits.begin() + edit_i;
+        auto it = edits.begin() + static_cast<int>(edit_i);
         while (it != edits.end() && *it == kMatch) ++it;
-        if (it == edits.end() || (it - edits.begin()) - edit_i >= context) {
+        if (it == edits.end() ||
+            static_cast<size_t>(it - edits.begin()) - edit_i >= context) {
           // There is no next edit or it is too far away.
           break;
         }
@@ -1318,7 +1453,7 @@
 //   lhs_value:      "5"
 //   rhs_value:      "6"
 //
-// The ignoring_case parameter is true iff the assertion is a
+// The ignoring_case parameter is true if and only if the assertion is a
 // *_STRCASEEQ*.  When it's true, the string "Ignoring case" will
 // be inserted into the message.
 AssertionResult EqFailure(const char* lhs_expression,
@@ -1381,6 +1516,31 @@
   const double diff = fabs(val1 - val2);
   if (diff <= abs_error) return AssertionSuccess();
 
+  // Find the value which is closest to zero.
+  const double min_abs = std::min(fabs(val1), fabs(val2));
+  // Find the distance to the next double from that value.
+  const double epsilon =
+      nextafter(min_abs, std::numeric_limits<double>::infinity()) - min_abs;
+  // Detect the case where abs_error is so small that EXPECT_NEAR is
+  // effectively the same as EXPECT_EQUAL, and give an informative error
+  // message so that the situation can be more easily understood without
+  // requiring exotic floating-point knowledge.
+  // Don't do an epsilon check if abs_error is zero because that implies
+  // that an equality check was actually intended.
+  if (!(std::isnan)(val1) && !(std::isnan)(val2) && abs_error > 0 &&
+      abs_error < epsilon) {
+    return AssertionFailure()
+           << "The difference between " << expr1 << " and " << expr2 << " is "
+           << diff << ", where\n"
+           << expr1 << " evaluates to " << val1 << ",\n"
+           << expr2 << " evaluates to " << val2 << ".\nThe abs_error parameter "
+           << abs_error_expr << " evaluates to " << abs_error
+           << " which is smaller than the minimum distance between doubles for "
+              "numbers of this magnitude which is "
+           << epsilon
+           << ", thus making this EXPECT_NEAR check equivalent to "
+              "EXPECT_EQUAL. Consider using EXPECT_DOUBLE_EQ instead.";
+  }
   return AssertionFailure()
       << "The difference between " << expr1 << " and " << expr2
       << " is " << diff << ", which exceeds " << abs_error_expr << ", where\n"
@@ -1561,9 +1721,9 @@
 
 // Helper functions for implementing IsSubString() and IsNotSubstring().
 
-// This group of overloaded functions return true iff needle is a
-// substring of haystack.  NULL is considered a substring of itself
-// only.
+// This group of overloaded functions return true if and only if needle
+// is a substring of haystack.  NULL is considered a substring of
+// itself only.
 
 bool IsSubstringPred(const char* needle, const char* haystack) {
   if (needle == nullptr || haystack == nullptr) return needle == haystack;
@@ -1689,7 +1849,7 @@
   char error_text[kBufSize] = { '\0' };
   DWORD message_length = ::FormatMessageA(kFlags,
                                           0,   // no source, we're asking system
-                                          hr,  // the error
+                                          static_cast<DWORD>(hr),  // the error
                                           0,   // no line width restrictions
                                           error_text,  // output buffer
                                           kBufSize,    // buf size
@@ -1739,35 +1899,35 @@
 //  17 - 21 bits       11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
 
 // The maximum code-point a one-byte UTF-8 sequence can represent.
-const UInt32 kMaxCodePoint1 = (static_cast<UInt32>(1) <<  7) - 1;
+constexpr uint32_t kMaxCodePoint1 = (static_cast<uint32_t>(1) <<  7) - 1;
 
 // The maximum code-point a two-byte UTF-8 sequence can represent.
-const UInt32 kMaxCodePoint2 = (static_cast<UInt32>(1) << (5 + 6)) - 1;
+constexpr uint32_t kMaxCodePoint2 = (static_cast<uint32_t>(1) << (5 + 6)) - 1;
 
 // The maximum code-point a three-byte UTF-8 sequence can represent.
-const UInt32 kMaxCodePoint3 = (static_cast<UInt32>(1) << (4 + 2*6)) - 1;
+constexpr uint32_t kMaxCodePoint3 = (static_cast<uint32_t>(1) << (4 + 2*6)) - 1;
 
 // The maximum code-point a four-byte UTF-8 sequence can represent.
-const UInt32 kMaxCodePoint4 = (static_cast<UInt32>(1) << (3 + 3*6)) - 1;
+constexpr uint32_t kMaxCodePoint4 = (static_cast<uint32_t>(1) << (3 + 3*6)) - 1;
 
 // Chops off the n lowest bits from a bit pattern.  Returns the n
 // lowest bits.  As a side effect, the original bit pattern will be
 // shifted to the right by n bits.
-inline UInt32 ChopLowBits(UInt32* bits, int n) {
-  const UInt32 low_bits = *bits & ((static_cast<UInt32>(1) << n) - 1);
+inline uint32_t ChopLowBits(uint32_t* bits, int n) {
+  const uint32_t low_bits = *bits & ((static_cast<uint32_t>(1) << n) - 1);
   *bits >>= n;
   return low_bits;
 }
 
 // Converts a Unicode code point to a narrow string in UTF-8 encoding.
-// code_point parameter is of type UInt32 because wchar_t may not be
+// code_point parameter is of type uint32_t because wchar_t may not be
 // wide enough to contain a code point.
 // If the code_point is not a valid Unicode code point
 // (i.e. outside of Unicode range U+0 to U+10FFFF) it will be converted
 // to "(Invalid Unicode 0xXXXXXXXX)".
-std::string CodePointToUtf8(UInt32 code_point) {
+std::string CodePointToUtf8(uint32_t code_point) {
   if (code_point > kMaxCodePoint4) {
-    return "(Invalid Unicode 0x" + String::FormatHexInt(code_point) + ")";
+    return "(Invalid Unicode 0x" + String::FormatHexUInt32(code_point) + ")";
   }
 
   char str[5];  // Big enough for the largest valid code point.
@@ -1806,14 +1966,17 @@
 }
 
 // Creates a Unicode code point from UTF16 surrogate pair.
-inline UInt32 CreateCodePointFromUtf16SurrogatePair(wchar_t first,
-                                                    wchar_t second) {
-  const UInt32 mask = (1 << 10) - 1;
-  return (sizeof(wchar_t) == 2) ?
-      (((first & mask) << 10) | (second & mask)) + 0x10000 :
-      // This function should not be called when the condition is
-      // false, but we provide a sensible default in case it is.
-      static_cast<UInt32>(first);
+inline uint32_t CreateCodePointFromUtf16SurrogatePair(wchar_t first,
+                                                      wchar_t second) {
+  const auto first_u = static_cast<uint32_t>(first);
+  const auto second_u = static_cast<uint32_t>(second);
+  const uint32_t mask = (1 << 10) - 1;
+  return (sizeof(wchar_t) == 2)
+             ? (((first_u & mask) << 10) | (second_u & mask)) + 0x10000
+             :
+             // This function should not be called when the condition is
+             // false, but we provide a sensible default in case it is.
+             first_u;
 }
 
 // Converts a wide string to a narrow string in UTF-8 encoding.
@@ -1835,7 +1998,7 @@
 
   ::std::stringstream stream;
   for (int i = 0; i < num_chars; ++i) {
-    UInt32 unicode_code_point;
+    uint32_t unicode_code_point;
 
     if (str[i] == L'\0') {
       break;
@@ -1844,7 +2007,7 @@
                                                                  str[i + 1]);
       i++;
     } else {
-      unicode_code_point = static_cast<UInt32>(str[i]);
+      unicode_code_point = static_cast<uint32_t>(str[i]);
     }
 
     stream << CodePointToUtf8(unicode_code_point);
@@ -1860,8 +2023,8 @@
   return internal::WideStringToUtf8(wide_c_str, -1);
 }
 
-// Compares two wide C strings.  Returns true iff they have the same
-// content.
+// Compares two wide C strings.  Returns true if and only if they have the
+// same content.
 //
 // Unlike wcscmp(), this function can handle NULL argument(s).  A NULL
 // C string is considered different to any non-NULL C string,
@@ -1905,7 +2068,7 @@
                             << " vs " << PrintToString(s2);
 }
 
-// Compares two C strings, ignoring case.  Returns true iff they have
+// Compares two C strings, ignoring case.  Returns true if and only if they have
 // the same content.
 //
 // Unlike strcasecmp(), this function can handle NULL argument(s).  A
@@ -1917,18 +2080,18 @@
   return posix::StrCaseCmp(lhs, rhs) == 0;
 }
 
-  // Compares two wide C strings, ignoring case.  Returns true iff they
-  // have the same content.
-  //
-  // Unlike wcscasecmp(), this function can handle NULL argument(s).
-  // A NULL C string is considered different to any non-NULL wide C string,
-  // including the empty string.
-  // NB: The implementations on different platforms slightly differ.
-  // On windows, this method uses _wcsicmp which compares according to LC_CTYPE
-  // environment variable. On GNU platform this method uses wcscasecmp
-  // which compares according to LC_CTYPE category of the current locale.
-  // On MacOS X, it uses towlower, which also uses LC_CTYPE category of the
-  // current locale.
+// Compares two wide C strings, ignoring case.  Returns true if and only if they
+// have the same content.
+//
+// Unlike wcscasecmp(), this function can handle NULL argument(s).
+// A NULL C string is considered different to any non-NULL wide C string,
+// including the empty string.
+// NB: The implementations on different platforms slightly differ.
+// On windows, this method uses _wcsicmp which compares according to LC_CTYPE
+// environment variable. On GNU platform this method uses wcscasecmp
+// which compares according to LC_CTYPE category of the current locale.
+// On MacOS X, it uses towlower, which also uses LC_CTYPE category of the
+// current locale.
 bool String::CaseInsensitiveWideCStringEquals(const wchar_t* lhs,
                                               const wchar_t* rhs) {
   if (lhs == nullptr) return rhs == nullptr;
@@ -1944,14 +2107,14 @@
   // Other unknown OSes may not define it either.
   wint_t left, right;
   do {
-    left = towlower(*lhs++);
-    right = towlower(*rhs++);
+    left = towlower(static_cast<wint_t>(*lhs++));
+    right = towlower(static_cast<wint_t>(*rhs++));
   } while (left && left == right);
   return left == right;
 #endif  // OS selector
 }
 
-// Returns true iff str ends with the given suffix, ignoring case.
+// Returns true if and only if str ends with the given suffix, ignoring case.
 // Any string is considered to end with an empty suffix.
 bool String::EndsWithCaseInsensitive(
     const std::string& str, const std::string& suffix) {
@@ -1964,16 +2127,26 @@
 
 // Formats an int value as "%02d".
 std::string String::FormatIntWidth2(int value) {
+  return FormatIntWidthN(value, 2);
+}
+
+// Formats an int value to given width with leading zeros.
+std::string String::FormatIntWidthN(int value, int width) {
   std::stringstream ss;
-  ss << std::setfill('0') << std::setw(2) << value;
+  ss << std::setfill('0') << std::setw(width) << value;
+  return ss.str();
+}
+
+// Formats an int value as "%X".
+std::string String::FormatHexUInt32(uint32_t value) {
+  std::stringstream ss;
+  ss << std::hex << std::uppercase << value;
   return ss.str();
 }
 
 // Formats an int value as "%X".
 std::string String::FormatHexInt(int value) {
-  std::stringstream ss;
-  ss << std::hex << std::uppercase << value;
-  return ss.str();
+  return FormatHexUInt32(static_cast<uint32_t>(value));
 }
 
 // Formats a byte as "%02X".
@@ -1992,7 +2165,7 @@
   const char* const end = start + str.length();
 
   std::string result;
-  result.reserve(2 * (end - start));
+  result.reserve(static_cast<size_t>(2 * (end - start)));
   for (const char* ch = start; ch != end; ++ch) {
     if (*ch == '\0') {
       result += "\\0";  // Replaces NUL with "\\0";
@@ -2012,7 +2185,9 @@
   if (user_msg_string.empty()) {
     return gtest_msg;
   }
-
+  if (gtest_msg.empty()) {
+    return user_msg_string;
+  }
   return gtest_msg + "\n" + user_msg_string;
 }
 
@@ -2022,9 +2197,7 @@
 
 // Creates an empty TestResult.
 TestResult::TestResult()
-    : death_test_count_(0),
-      elapsed_time_(0) {
-}
+    : death_test_count_(0), start_timestamp_(0), elapsed_time_(0) {}
 
 // D'tor.
 TestResult::~TestResult() {
@@ -2036,7 +2209,7 @@
 const TestPartResult& TestResult::GetTestPartResult(int i) const {
   if (i < 0 || i >= total_part_count())
     internal::posix::Abort();
-  return test_part_results_.at(i);
+  return test_part_results_.at(static_cast<size_t>(i));
 }
 
 // Returns the i-th test property. i can range from 0 to
@@ -2045,7 +2218,7 @@
 const TestProperty& TestResult::GetTestProperty(int i) const {
   if (i < 0 || i >= test_property_count())
     internal::posix::Abort();
-  return test_properties_.at(i);
+  return test_properties_.at(static_cast<size_t>(i));
 }
 
 // Clears the test part results.
@@ -2093,13 +2266,8 @@
 // The list of reserved attributes used in the <testsuite> element of XML
 // output.
 static const char* const kReservedTestSuiteAttributes[] = {
-  "disabled",
-  "errors",
-  "failures",
-  "name",
-  "tests",
-  "time"
-};
+    "disabled", "errors", "failures",  "name",
+    "tests",    "time",   "timestamp", "skipped"};
 
 // The list of reserved attributes used in the <testcase> element of XML output.
 static const char* const kReservedTestCaseAttributes[] = {
@@ -2107,12 +2275,12 @@
     "value_param", "file", "line"};
 
 // Use a slightly different set for allowed output to ensure existing tests can
-// still RecordProperty("result")
+// still RecordProperty("result") or "RecordProperty(timestamp")
 static const char* const kReservedOutputTestCaseAttributes[] = {
-    "classname",   "name", "status", "time",  "type_param",
-    "value_param", "file", "line", "result"};
+    "classname",   "name", "status", "time",   "type_param",
+    "value_param", "file", "line",   "result", "timestamp"};
 
-template <int kSize>
+template <size_t kSize>
 std::vector<std::string> ArrayAsVector(const char* const (&array)[kSize]) {
   return std::vector<std::string>(array, array + kSize);
 }
@@ -2196,12 +2364,12 @@
   return result.skipped();
 }
 
-// Returns true iff the test was skipped.
+// Returns true if and only if the test was skipped.
 bool TestResult::Skipped() const {
   return !Failed() && CountIf(test_part_results_, TestPartSkipped) > 0;
 }
 
-// Returns true iff the test failed.
+// Returns true if and only if the test failed.
 bool TestResult::Failed() const {
   for (int i = 0; i < total_part_count(); ++i) {
     if (GetTestPartResult(i).failed())
@@ -2210,22 +2378,22 @@
   return false;
 }
 
-// Returns true iff the test part fatally failed.
+// Returns true if and only if the test part fatally failed.
 static bool TestPartFatallyFailed(const TestPartResult& result) {
   return result.fatally_failed();
 }
 
-// Returns true iff the test fatally failed.
+// Returns true if and only if the test fatally failed.
 bool TestResult::HasFatalFailure() const {
   return CountIf(test_part_results_, TestPartFatallyFailed) > 0;
 }
 
-// Returns true iff the test part non-fatally failed.
+// Returns true if and only if the test part non-fatally failed.
 static bool TestPartNonfatallyFailed(const TestPartResult& result) {
   return result.nonfatally_failed();
 }
 
-// Returns true iff the test has a non-fatal failure.
+// Returns true if and only if the test has a non-fatal failure.
 bool TestResult::HasNonfatalFailure() const {
   return CountIf(test_part_results_, TestPartNonfatallyFailed) > 0;
 }
@@ -2521,18 +2689,18 @@
       this, &Test::TearDown, "TearDown()");
 }
 
-// Returns true iff the current test has a fatal failure.
+// Returns true if and only if the current test has a fatal failure.
 bool Test::HasFatalFailure() {
   return internal::GetUnitTestImpl()->current_test_result()->HasFatalFailure();
 }
 
-// Returns true iff the current test has a non-fatal failure.
+// Returns true if and only if the current test has a non-fatal failure.
 bool Test::HasNonfatalFailure() {
   return internal::GetUnitTestImpl()->current_test_result()->
       HasNonfatalFailure();
 }
 
-// Returns true iff the current test was skipped.
+// Returns true if and only if the current test was skipped.
 bool Test::IsSkipped() {
   return internal::GetUnitTestImpl()->current_test_result()->Skipped();
 }
@@ -2556,6 +2724,7 @@
       should_run_(false),
       is_disabled_(false),
       matches_filter_(false),
+      is_in_another_shard_(false),
       factory_(factory),
       result_() {}
 
@@ -2569,7 +2738,7 @@
 //
 // Arguments:
 //
-//   test_suite_name:   name of the test suite
+//   test_suite_name:  name of the test suite
 //   name:             name of the test
 //   type_param:       the name of the test's type parameter, or NULL if
 //                     this is not a typed or a type-parameterized test.
@@ -2631,7 +2800,7 @@
   explicit TestNameIs(const char* name)
       : name_(name) {}
 
-  // Returns true iff the test name of test_info matches name_.
+  // Returns true if and only if the test name of test_info matches name_.
   bool operator()(const TestInfo * test_info) const {
     return test_info && test_info->name() == name_;
   }
@@ -2650,6 +2819,7 @@
 void UnitTestImpl::RegisterParameterizedTests() {
   if (!parameterized_tests_registered_) {
     parameterized_test_registry_.RegisterTests();
+    type_parameterized_test_registry_.CheckForInstantiations();
     parameterized_tests_registered_ = true;
   }
 }
@@ -2695,6 +2865,7 @@
         test, &Test::DeleteSelf_, "the test fixture's destructor");
   }
 
+  result_.set_start_timestamp(start);
   result_.set_elapsed_time(internal::GetTimeInMillis() - start);
 
   // Notifies the unit test event listener that a test has just finished.
@@ -2705,6 +2876,28 @@
   impl->set_current_test_info(nullptr);
 }
 
+// Skip and records a skipped test result for this object.
+void TestInfo::Skip() {
+  if (!should_run_) return;
+
+  internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
+  impl->set_current_test_info(this);
+
+  TestEventListener* repeater = UnitTest::GetInstance()->listeners().repeater();
+
+  // Notifies the unit test event listeners that a test is about to start.
+  repeater->OnTestStart(*this);
+
+  const TestPartResult test_part_result =
+      TestPartResult(TestPartResult::kSkip, this->file(), this->line(), "");
+  impl->GetTestPartResultReporterForCurrentThread()->ReportTestPartResult(
+      test_part_result);
+
+  // Notifies the unit test event listener that a test has just finished.
+  repeater->OnTestEnd(*this);
+  impl->set_current_test_info(nullptr);
+}
+
 // class TestSuite
 
 // Gets the number of successful tests in this test suite.
@@ -2751,7 +2944,7 @@
 //
 // Arguments:
 //
-//   name:         name of the test suite
+//   a_name:       name of the test suite
 //   a_type_param: the name of the test suite's type parameter, or NULL if
 //                 this is not a typed or a type-parameterized test suite.
 //   set_up_tc:    pointer to the function that sets up the test suite
@@ -2764,6 +2957,7 @@
       set_up_tc_(set_up_tc),
       tear_down_tc_(tear_down_tc),
       should_run_(false),
+      start_timestamp_(0),
       elapsed_time_(0) {}
 
 // Destructor of TestSuite.
@@ -2776,14 +2970,14 @@
 // total_test_count() - 1. If i is not in that range, returns NULL.
 const TestInfo* TestSuite::GetTestInfo(int i) const {
   const int index = GetElementOr(test_indices_, i, -1);
-  return index < 0 ? nullptr : test_info_list_[index];
+  return index < 0 ? nullptr : test_info_list_[static_cast<size_t>(index)];
 }
 
 // Returns the i-th test among all the tests. i can range from 0 to
 // total_test_count() - 1. If i is not in that range, returns NULL.
 TestInfo* TestSuite::GetMutableTestInfo(int i) {
   const int index = GetElementOr(test_indices_, i, -1);
-  return index < 0 ? nullptr : test_info_list_[index];
+  return index < 0 ? nullptr : test_info_list_[static_cast<size_t>(index)];
 }
 
 // Adds a test to this test suite.  Will delete the test upon
@@ -2805,19 +2999,25 @@
   // Call both legacy and the new API
   repeater->OnTestSuiteStart(*this);
 //  Legacy API is deprecated but still available
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
   repeater->OnTestCaseStart(*this);
-#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI
+#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 
   impl->os_stack_trace_getter()->UponLeavingGTest();
   internal::HandleExceptionsInMethodIfSupported(
       this, &TestSuite::RunSetUpTestSuite, "SetUpTestSuite()");
 
-  const internal::TimeInMillis start = internal::GetTimeInMillis();
+  start_timestamp_ = internal::GetTimeInMillis();
   for (int i = 0; i < total_test_count(); i++) {
     GetMutableTestInfo(i)->Run();
+    if (GTEST_FLAG(fail_fast) && GetMutableTestInfo(i)->result()->Failed()) {
+      for (int j = i + 1; j < total_test_count(); j++) {
+        GetMutableTestInfo(j)->Skip();
+      }
+      break;
+    }
   }
-  elapsed_time_ = internal::GetTimeInMillis() - start;
+  elapsed_time_ = internal::GetTimeInMillis() - start_timestamp_;
 
   impl->os_stack_trace_getter()->UponLeavingGTest();
   internal::HandleExceptionsInMethodIfSupported(
@@ -2826,9 +3026,39 @@
   // Call both legacy and the new API
   repeater->OnTestSuiteEnd(*this);
 //  Legacy API is deprecated but still available
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
   repeater->OnTestCaseEnd(*this);
-#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI
+#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+  impl->set_current_test_suite(nullptr);
+}
+
+// Skips all tests under this TestSuite.
+void TestSuite::Skip() {
+  if (!should_run_) return;
+
+  internal::UnitTestImpl* const impl = internal::GetUnitTestImpl();
+  impl->set_current_test_suite(this);
+
+  TestEventListener* repeater = UnitTest::GetInstance()->listeners().repeater();
+
+  // Call both legacy and the new API
+  repeater->OnTestSuiteStart(*this);
+//  Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+  repeater->OnTestCaseStart(*this);
+#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+  for (int i = 0; i < total_test_count(); i++) {
+    GetMutableTestInfo(i)->Skip();
+  }
+
+  // Call both legacy and the new API
+  repeater->OnTestSuiteEnd(*this);
+  // Legacy API is deprecated but still available
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+  repeater->OnTestCaseEnd(*this);
+#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 
   impl->set_current_test_suite(nullptr);
 }
@@ -2880,7 +3110,7 @@
 static const char * TestPartResultTypeToString(TestPartResult::Type type) {
   switch (type) {
     case TestPartResult::kSkip:
-      return "Skipped";
+      return "Skipped\n";
     case TestPartResult::kSuccess:
       return "Success";
 
@@ -2897,6 +3127,9 @@
 }
 
 namespace internal {
+namespace {
+enum class GTestColor { kDefault, kRed, kGreen, kYellow };
+}  // namespace
 
 // Prints a TestPartResult to an std::string.
 static std::string PrintTestPartResultToString(
@@ -2934,9 +3167,12 @@
 // Returns the character attribute for the given color.
 static WORD GetColorAttribute(GTestColor color) {
   switch (color) {
-    case COLOR_RED:    return FOREGROUND_RED;
-    case COLOR_GREEN:  return FOREGROUND_GREEN;
-    case COLOR_YELLOW: return FOREGROUND_RED | FOREGROUND_GREEN;
+    case GTestColor::kRed:
+      return FOREGROUND_RED;
+    case GTestColor::kGreen:
+      return FOREGROUND_GREEN;
+    case GTestColor::kYellow:
+      return FOREGROUND_RED | FOREGROUND_GREEN;
     default:           return 0;
   }
 }
@@ -2974,13 +3210,16 @@
 
 #else
 
-// Returns the ANSI color code for the given color.  COLOR_DEFAULT is
+// Returns the ANSI color code for the given color. GTestColor::kDefault is
 // an invalid input.
 static const char* GetAnsiColorCode(GTestColor color) {
   switch (color) {
-    case COLOR_RED:     return "1";
-    case COLOR_GREEN:   return "2";
-    case COLOR_YELLOW:  return "3";
+    case GTestColor::kRed:
+      return "1";
+    case GTestColor::kGreen:
+      return "2";
+    case GTestColor::kYellow:
+      return "3";
     default:
       return nullptr;
   }
@@ -2988,7 +3227,7 @@
 
 #endif  // GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE
 
-// Returns true iff Google Test should use colors in the output.
+// Returns true if and only if Google Test should use colors in the output.
 bool ShouldUseColor(bool stdout_is_tty) {
   const char* const gtest_color = GTEST_FLAG(color).c_str();
 
@@ -3029,17 +3268,19 @@
 // cannot simply emit special characters and have the terminal change colors.
 // This routine must actually emit the characters rather than return a string
 // that would be colored when printed, as can be done on Linux.
-void ColoredPrintf(GTestColor color, const char* fmt, ...) {
+
+GTEST_ATTRIBUTE_PRINTF_(2, 3)
+static void ColoredPrintf(GTestColor color, const char *fmt, ...) {
   va_list args;
   va_start(args, fmt);
 
 #if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_ZOS || GTEST_OS_IOS || \
-    GTEST_OS_WINDOWS_PHONE || GTEST_OS_WINDOWS_RT
+    GTEST_OS_WINDOWS_PHONE || GTEST_OS_WINDOWS_RT || defined(ESP_PLATFORM)
   const bool use_color = AlwaysFalse();
 #else
   static const bool in_color_mode =
       ShouldUseColor(posix::IsATTY(posix::FileNo(stdout)) != 0);
-  const bool use_color = in_color_mode && (color != COLOR_DEFAULT);
+  const bool use_color = in_color_mode && (color != GTestColor::kDefault);
 #endif  // GTEST_OS_WINDOWS_MOBILE || GTEST_OS_ZOS
 
   if (!use_color) {
@@ -3113,11 +3354,22 @@
   void OnTestIterationStart(const UnitTest& unit_test, int iteration) override;
   void OnEnvironmentsSetUpStart(const UnitTest& unit_test) override;
   void OnEnvironmentsSetUpEnd(const UnitTest& /*unit_test*/) override {}
-  void OnTestCaseStart(const TestSuite& test_suite) override;
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+  void OnTestCaseStart(const TestCase& test_case) override;
+#else
+  void OnTestSuiteStart(const TestSuite& test_suite) override;
+#endif  // OnTestCaseStart
+
   void OnTestStart(const TestInfo& test_info) override;
+
   void OnTestPartResult(const TestPartResult& result) override;
   void OnTestEnd(const TestInfo& test_info) override;
-  void OnTestCaseEnd(const TestSuite& test_suite) override;
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+  void OnTestCaseEnd(const TestCase& test_case) override;
+#else
+  void OnTestSuiteEnd(const TestSuite& test_suite) override;
+#endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
   void OnEnvironmentsTearDownStart(const UnitTest& unit_test) override;
   void OnEnvironmentsTearDownEnd(const UnitTest& /*unit_test*/) override {}
   void OnTestIterationEnd(const UnitTest& unit_test, int iteration) override;
@@ -3125,6 +3377,7 @@
 
  private:
   static void PrintFailedTests(const UnitTest& unit_test);
+  static void PrintFailedTestSuites(const UnitTest& unit_test);
   static void PrintSkippedTests(const UnitTest& unit_test);
 };
 
@@ -3139,25 +3392,24 @@
   // Prints the filter if it's not *.  This reminds the user that some
   // tests may be skipped.
   if (!String::CStringEquals(filter, kUniversalFilter)) {
-    ColoredPrintf(COLOR_YELLOW,
-                  "Note: %s filter = %s\n", GTEST_NAME_, filter);
+    ColoredPrintf(GTestColor::kYellow, "Note: %s filter = %s\n", GTEST_NAME_,
+                  filter);
   }
 
   if (internal::ShouldShard(kTestTotalShards, kTestShardIndex, false)) {
-    const Int32 shard_index = Int32FromEnvOrDie(kTestShardIndex, -1);
-    ColoredPrintf(COLOR_YELLOW,
-                  "Note: This is test shard %d of %s.\n",
+    const int32_t shard_index = Int32FromEnvOrDie(kTestShardIndex, -1);
+    ColoredPrintf(GTestColor::kYellow, "Note: This is test shard %d of %s.\n",
                   static_cast<int>(shard_index) + 1,
                   internal::posix::GetEnv(kTestTotalShards));
   }
 
   if (GTEST_FLAG(shuffle)) {
-    ColoredPrintf(COLOR_YELLOW,
+    ColoredPrintf(GTestColor::kYellow,
                   "Note: Randomizing tests' orders with a seed of %d .\n",
                   unit_test.random_seed());
   }
 
-  ColoredPrintf(COLOR_GREEN,  "[==========] ");
+  ColoredPrintf(GTestColor::kGreen, "[==========] ");
   printf("Running %s from %s.\n",
          FormatTestCount(unit_test.test_to_run_count()).c_str(),
          FormatTestSuiteCount(unit_test.test_suite_to_run_count()).c_str());
@@ -3166,15 +3418,30 @@
 
 void PrettyUnitTestResultPrinter::OnEnvironmentsSetUpStart(
     const UnitTest& /*unit_test*/) {
-  ColoredPrintf(COLOR_GREEN,  "[----------] ");
+  ColoredPrintf(GTestColor::kGreen, "[----------] ");
   printf("Global test environment set-up.\n");
   fflush(stdout);
 }
 
-void PrettyUnitTestResultPrinter::OnTestCaseStart(const TestSuite& test_suite) {
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+void PrettyUnitTestResultPrinter::OnTestCaseStart(const TestCase& test_case) {
+  const std::string counts =
+      FormatCountableNoun(test_case.test_to_run_count(), "test", "tests");
+  ColoredPrintf(GTestColor::kGreen, "[----------] ");
+  printf("%s from %s", counts.c_str(), test_case.name());
+  if (test_case.type_param() == nullptr) {
+    printf("\n");
+  } else {
+    printf(", where %s = %s\n", kTypeParamLabel, test_case.type_param());
+  }
+  fflush(stdout);
+}
+#else
+void PrettyUnitTestResultPrinter::OnTestSuiteStart(
+    const TestSuite& test_suite) {
   const std::string counts =
       FormatCountableNoun(test_suite.test_to_run_count(), "test", "tests");
-  ColoredPrintf(COLOR_GREEN, "[----------] ");
+  ColoredPrintf(GTestColor::kGreen, "[----------] ");
   printf("%s from %s", counts.c_str(), test_suite.name());
   if (test_suite.type_param() == nullptr) {
     printf("\n");
@@ -3183,9 +3450,10 @@
   }
   fflush(stdout);
 }
+#endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 
 void PrettyUnitTestResultPrinter::OnTestStart(const TestInfo& test_info) {
-  ColoredPrintf(COLOR_GREEN,  "[ RUN      ] ");
+  ColoredPrintf(GTestColor::kGreen, "[ RUN      ] ");
   PrintTestName(test_info.test_suite_name(), test_info.name());
   printf("\n");
   fflush(stdout);
@@ -3195,9 +3463,7 @@
 void PrettyUnitTestResultPrinter::OnTestPartResult(
     const TestPartResult& result) {
   switch (result.type()) {
-    // If the test part succeeded, or was skipped,
-    // we don't need to do anything.
-    case TestPartResult::kSkip:
+    // If the test part succeeded, we don't need to do anything.
     case TestPartResult::kSuccess:
       return;
     default:
@@ -3210,11 +3476,11 @@
 
 void PrettyUnitTestResultPrinter::OnTestEnd(const TestInfo& test_info) {
   if (test_info.result()->Passed()) {
-    ColoredPrintf(COLOR_GREEN, "[       OK ] ");
+    ColoredPrintf(GTestColor::kGreen, "[       OK ] ");
   } else if (test_info.result()->Skipped()) {
-    ColoredPrintf(COLOR_GREEN, "[  SKIPPED ] ");
+    ColoredPrintf(GTestColor::kGreen, "[  SKIPPED ] ");
   } else {
-    ColoredPrintf(COLOR_RED, "[  FAILED  ] ");
+    ColoredPrintf(GTestColor::kRed, "[  FAILED  ] ");
   }
   PrintTestName(test_info.test_suite_name(), test_info.name());
   if (test_info.result()->Failed())
@@ -3229,20 +3495,33 @@
   fflush(stdout);
 }
 
-void PrettyUnitTestResultPrinter::OnTestCaseEnd(const TestSuite& test_suite) {
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+void PrettyUnitTestResultPrinter::OnTestCaseEnd(const TestCase& test_case) {
+  if (!GTEST_FLAG(print_time)) return;
+
+  const std::string counts =
+      FormatCountableNoun(test_case.test_to_run_count(), "test", "tests");
+  ColoredPrintf(GTestColor::kGreen, "[----------] ");
+  printf("%s from %s (%s ms total)\n\n", counts.c_str(), test_case.name(),
+         internal::StreamableToString(test_case.elapsed_time()).c_str());
+  fflush(stdout);
+}
+#else
+void PrettyUnitTestResultPrinter::OnTestSuiteEnd(const TestSuite& test_suite) {
   if (!GTEST_FLAG(print_time)) return;
 
   const std::string counts =
       FormatCountableNoun(test_suite.test_to_run_count(), "test", "tests");
-  ColoredPrintf(COLOR_GREEN, "[----------] ");
+  ColoredPrintf(GTestColor::kGreen, "[----------] ");
   printf("%s from %s (%s ms total)\n\n", counts.c_str(), test_suite.name(),
          internal::StreamableToString(test_suite.elapsed_time()).c_str());
   fflush(stdout);
 }
+#endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 
 void PrettyUnitTestResultPrinter::OnEnvironmentsTearDownStart(
     const UnitTest& /*unit_test*/) {
-  ColoredPrintf(COLOR_GREEN,  "[----------] ");
+  ColoredPrintf(GTestColor::kGreen, "[----------] ");
   printf("Global test environment tear-down\n");
   fflush(stdout);
 }
@@ -3250,9 +3529,8 @@
 // Internal helper for printing the list of failed tests.
 void PrettyUnitTestResultPrinter::PrintFailedTests(const UnitTest& unit_test) {
   const int failed_test_count = unit_test.failed_test_count();
-  if (failed_test_count == 0) {
-    return;
-  }
+  ColoredPrintf(GTestColor::kRed, "[  FAILED  ] ");
+  printf("%s, listed below:\n", FormatTestCount(failed_test_count).c_str());
 
   for (int i = 0; i < unit_test.total_test_suite_count(); ++i) {
     const TestSuite& test_suite = *unit_test.GetTestSuite(i);
@@ -3264,12 +3542,36 @@
       if (!test_info.should_run() || !test_info.result()->Failed()) {
         continue;
       }
-      ColoredPrintf(COLOR_RED, "[  FAILED  ] ");
+      ColoredPrintf(GTestColor::kRed, "[  FAILED  ] ");
       printf("%s.%s", test_suite.name(), test_info.name());
       PrintFullTestCommentIfPresent(test_info);
       printf("\n");
     }
   }
+  printf("\n%2d FAILED %s\n", failed_test_count,
+         failed_test_count == 1 ? "TEST" : "TESTS");
+}
+
+// Internal helper for printing the list of test suite failures not covered by
+// PrintFailedTests.
+void PrettyUnitTestResultPrinter::PrintFailedTestSuites(
+    const UnitTest& unit_test) {
+  int suite_failure_count = 0;
+  for (int i = 0; i < unit_test.total_test_suite_count(); ++i) {
+    const TestSuite& test_suite = *unit_test.GetTestSuite(i);
+    if (!test_suite.should_run()) {
+      continue;
+    }
+    if (test_suite.ad_hoc_test_result().Failed()) {
+      ColoredPrintf(GTestColor::kRed, "[  FAILED  ] ");
+      printf("%s: SetUpTestSuite or TearDownTestSuite\n", test_suite.name());
+      ++suite_failure_count;
+    }
+  }
+  if (suite_failure_count > 0) {
+    printf("\n%2d FAILED TEST %s\n", suite_failure_count,
+           suite_failure_count == 1 ? "SUITE" : "SUITES");
+  }
 }
 
 // Internal helper for printing the list of skipped tests.
@@ -3289,7 +3591,7 @@
       if (!test_info.should_run() || !test_info.result()->Skipped()) {
         continue;
       }
-      ColoredPrintf(COLOR_GREEN, "[  SKIPPED ] ");
+      ColoredPrintf(GTestColor::kGreen, "[  SKIPPED ] ");
       printf("%s.%s", test_suite.name(), test_info.name());
       printf("\n");
     }
@@ -3298,7 +3600,7 @@
 
 void PrettyUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test,
                                                      int /*iteration*/) {
-  ColoredPrintf(COLOR_GREEN,  "[==========] ");
+  ColoredPrintf(GTestColor::kGreen, "[==========] ");
   printf("%s from %s ran.",
          FormatTestCount(unit_test.test_to_run_count()).c_str(),
          FormatTestSuiteCount(unit_test.test_suite_to_run_count()).c_str());
@@ -3307,35 +3609,28 @@
            internal::StreamableToString(unit_test.elapsed_time()).c_str());
   }
   printf("\n");
-  ColoredPrintf(COLOR_GREEN,  "[  PASSED  ] ");
+  ColoredPrintf(GTestColor::kGreen, "[  PASSED  ] ");
   printf("%s.\n", FormatTestCount(unit_test.successful_test_count()).c_str());
 
   const int skipped_test_count = unit_test.skipped_test_count();
   if (skipped_test_count > 0) {
-    ColoredPrintf(COLOR_GREEN, "[  SKIPPED ] ");
+    ColoredPrintf(GTestColor::kGreen, "[  SKIPPED ] ");
     printf("%s, listed below:\n", FormatTestCount(skipped_test_count).c_str());
     PrintSkippedTests(unit_test);
   }
 
-  int num_failures = unit_test.failed_test_count();
   if (!unit_test.Passed()) {
-    const int failed_test_count = unit_test.failed_test_count();
-    ColoredPrintf(COLOR_RED,  "[  FAILED  ] ");
-    printf("%s, listed below:\n", FormatTestCount(failed_test_count).c_str());
     PrintFailedTests(unit_test);
-    printf("\n%2d FAILED %s\n", num_failures,
-                        num_failures == 1 ? "TEST" : "TESTS");
+    PrintFailedTestSuites(unit_test);
   }
 
   int num_disabled = unit_test.reportable_disabled_test_count();
   if (num_disabled && !GTEST_FLAG(also_run_disabled_tests)) {
-    if (!num_failures) {
+    if (unit_test.Passed()) {
       printf("\n");  // Add a spacer if no FAILURE banner is displayed.
     }
-    ColoredPrintf(COLOR_YELLOW,
-                  "  YOU HAVE %d DISABLED %s\n\n",
-                  num_disabled,
-                  num_disabled == 1 ? "TEST" : "TESTS");
+    ColoredPrintf(GTestColor::kYellow, "  YOU HAVE %d DISABLED %s\n\n",
+                  num_disabled, num_disabled == 1 ? "TEST" : "TESTS");
   }
   // Ensure that Google Test output is printed before, e.g., heapchecker output.
   fflush(stdout);
@@ -3343,6 +3638,110 @@
 
 // End PrettyUnitTestResultPrinter
 
+// This class implements the TestEventListener interface.
+//
+// Class BriefUnitTestResultPrinter is copyable.
+class BriefUnitTestResultPrinter : public TestEventListener {
+ public:
+  BriefUnitTestResultPrinter() {}
+  static void PrintTestName(const char* test_suite, const char* test) {
+    printf("%s.%s", test_suite, test);
+  }
+
+  // The following methods override what's in the TestEventListener class.
+  void OnTestProgramStart(const UnitTest& /*unit_test*/) override {}
+  void OnTestIterationStart(const UnitTest& /*unit_test*/,
+                            int /*iteration*/) override {}
+  void OnEnvironmentsSetUpStart(const UnitTest& /*unit_test*/) override {}
+  void OnEnvironmentsSetUpEnd(const UnitTest& /*unit_test*/) override {}
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+  void OnTestCaseStart(const TestCase& /*test_case*/) override {}
+#else
+  void OnTestSuiteStart(const TestSuite& /*test_suite*/) override {}
+#endif  // OnTestCaseStart
+
+  void OnTestStart(const TestInfo& /*test_info*/) override {}
+
+  void OnTestPartResult(const TestPartResult& result) override;
+  void OnTestEnd(const TestInfo& test_info) override;
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+  void OnTestCaseEnd(const TestCase& /*test_case*/) override {}
+#else
+  void OnTestSuiteEnd(const TestSuite& /*test_suite*/) override {}
+#endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+  void OnEnvironmentsTearDownStart(const UnitTest& /*unit_test*/) override {}
+  void OnEnvironmentsTearDownEnd(const UnitTest& /*unit_test*/) override {}
+  void OnTestIterationEnd(const UnitTest& unit_test, int iteration) override;
+  void OnTestProgramEnd(const UnitTest& /*unit_test*/) override {}
+};
+
+// Called after an assertion failure.
+void BriefUnitTestResultPrinter::OnTestPartResult(
+    const TestPartResult& result) {
+  switch (result.type()) {
+    // If the test part succeeded, we don't need to do anything.
+    case TestPartResult::kSuccess:
+      return;
+    default:
+      // Print failure message from the assertion
+      // (e.g. expected this and got that).
+      PrintTestPartResult(result);
+      fflush(stdout);
+  }
+}
+
+void BriefUnitTestResultPrinter::OnTestEnd(const TestInfo& test_info) {
+  if (test_info.result()->Failed()) {
+    ColoredPrintf(GTestColor::kRed, "[  FAILED  ] ");
+    PrintTestName(test_info.test_suite_name(), test_info.name());
+    PrintFullTestCommentIfPresent(test_info);
+
+    if (GTEST_FLAG(print_time)) {
+      printf(" (%s ms)\n",
+             internal::StreamableToString(test_info.result()->elapsed_time())
+                 .c_str());
+    } else {
+      printf("\n");
+    }
+    fflush(stdout);
+  }
+}
+
+void BriefUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test,
+                                                    int /*iteration*/) {
+  ColoredPrintf(GTestColor::kGreen, "[==========] ");
+  printf("%s from %s ran.",
+         FormatTestCount(unit_test.test_to_run_count()).c_str(),
+         FormatTestSuiteCount(unit_test.test_suite_to_run_count()).c_str());
+  if (GTEST_FLAG(print_time)) {
+    printf(" (%s ms total)",
+           internal::StreamableToString(unit_test.elapsed_time()).c_str());
+  }
+  printf("\n");
+  ColoredPrintf(GTestColor::kGreen, "[  PASSED  ] ");
+  printf("%s.\n", FormatTestCount(unit_test.successful_test_count()).c_str());
+
+  const int skipped_test_count = unit_test.skipped_test_count();
+  if (skipped_test_count > 0) {
+    ColoredPrintf(GTestColor::kGreen, "[  SKIPPED ] ");
+    printf("%s.\n", FormatTestCount(skipped_test_count).c_str());
+  }
+
+  int num_disabled = unit_test.reportable_disabled_test_count();
+  if (num_disabled && !GTEST_FLAG(also_run_disabled_tests)) {
+    if (unit_test.Passed()) {
+      printf("\n");  // Add a spacer if no FAILURE banner is displayed.
+    }
+    ColoredPrintf(GTestColor::kYellow, "  YOU HAVE %d DISABLED %s\n\n",
+                  num_disabled, num_disabled == 1 ? "TEST" : "TESTS");
+  }
+  // Ensure that Google Test output is printed before, e.g., heapchecker output.
+  fflush(stdout);
+}
+
+// End BriefUnitTestResultPrinter
+
 // class TestEventRepeater
 //
 // This class forwards events to other event listeners.
@@ -3363,17 +3762,17 @@
   void OnEnvironmentsSetUpStart(const UnitTest& unit_test) override;
   void OnEnvironmentsSetUpEnd(const UnitTest& unit_test) override;
 //  Legacy API is deprecated but still available
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
   void OnTestCaseStart(const TestSuite& parameter) override;
-#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI
+#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
   void OnTestSuiteStart(const TestSuite& parameter) override;
   void OnTestStart(const TestInfo& test_info) override;
   void OnTestPartResult(const TestPartResult& result) override;
   void OnTestEnd(const TestInfo& test_info) override;
 //  Legacy API is deprecated but still available
-#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI
-  void OnTestCaseEnd(const TestSuite& parameter) override;
-#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+  void OnTestCaseEnd(const TestCase& parameter) override;
+#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
   void OnTestSuiteEnd(const TestSuite& parameter) override;
   void OnEnvironmentsTearDownStart(const UnitTest& unit_test) override;
   void OnEnvironmentsTearDownEnd(const UnitTest& unit_test) override;
@@ -3401,7 +3800,7 @@
 TestEventListener* TestEventRepeater::Release(TestEventListener *listener) {
   for (size_t i = 0; i < listeners_.size(); ++i) {
     if (listeners_[i] == listener) {
-      listeners_.erase(listeners_.begin() + i);
+      listeners_.erase(listeners_.begin() + static_cast<int>(i));
       return listener;
     }
   }
@@ -3421,14 +3820,14 @@
 }
 // This defines a member that forwards the call to all listeners in reverse
 // order.
-#define GTEST_REVERSE_REPEATER_METHOD_(Name, Type) \
-void TestEventRepeater::Name(const Type& parameter) { \
-  if (forwarding_enabled_) { \
-    for (int i = static_cast<int>(listeners_.size()) - 1; i >= 0; i--) { \
-      listeners_[i]->Name(parameter); \
-    } \
-  } \
-}
+#define GTEST_REVERSE_REPEATER_METHOD_(Name, Type)      \
+  void TestEventRepeater::Name(const Type& parameter) { \
+    if (forwarding_enabled_) {                          \
+      for (size_t i = listeners_.size(); i != 0; i--) { \
+        listeners_[i - 1]->Name(parameter);             \
+      }                                                 \
+    }                                                   \
+  }
 
 GTEST_REPEATER_METHOD_(OnTestProgramStart, UnitTest)
 GTEST_REPEATER_METHOD_(OnEnvironmentsSetUpStart, UnitTest)
@@ -3465,8 +3864,8 @@
 void TestEventRepeater::OnTestIterationEnd(const UnitTest& unit_test,
                                            int iteration) {
   if (forwarding_enabled_) {
-    for (int i = static_cast<int>(listeners_.size()) - 1; i >= 0; i--) {
-      listeners_[i]->OnTestIterationEnd(unit_test, iteration);
+    for (size_t i = listeners_.size(); i > 0; i--) {
+      listeners_[i - 1]->OnTestIterationEnd(unit_test, iteration);
     }
   }
 }
@@ -3695,13 +4094,14 @@
   struct tm time_struct;
   if (!PortableLocaltime(static_cast<time_t>(ms / 1000), &time_struct))
     return "";
-  // YYYY-MM-DDThh:mm:ss
+  // YYYY-MM-DDThh:mm:ss.sss
   return StreamableToString(time_struct.tm_year + 1900) + "-" +
       String::FormatIntWidth2(time_struct.tm_mon + 1) + "-" +
       String::FormatIntWidth2(time_struct.tm_mday) + "T" +
       String::FormatIntWidth2(time_struct.tm_hour) + ":" +
       String::FormatIntWidth2(time_struct.tm_min) + ":" +
-      String::FormatIntWidth2(time_struct.tm_sec);
+      String::FormatIntWidth2(time_struct.tm_sec) + "." +
+      String::FormatIntWidthN(static_cast<int>(ms % 1000), 3);
 }
 
 // Streams an XML CDATA section, escaping invalid CDATA sequences as needed.
@@ -3778,13 +4178,17 @@
                          : "suppressed");
   OutputXmlAttribute(stream, kTestsuite, "time",
                      FormatTimeInMillisAsSeconds(result.elapsed_time()));
+  OutputXmlAttribute(
+      stream, kTestsuite, "timestamp",
+      FormatEpochTimeInMillisAsIso8601(result.start_timestamp()));
   OutputXmlAttribute(stream, kTestsuite, "classname", test_suite_name);
 
   int failures = 0;
+  int skips = 0;
   for (int i = 0; i < result.total_part_count(); ++i) {
     const TestPartResult& part = result.GetTestPartResult(i);
     if (part.failed()) {
-      if (++failures == 1) {
+      if (++failures == 1 && skips == 0) {
         *stream << ">\n";
       }
       const std::string location =
@@ -3792,18 +4196,31 @@
                                                           part.line_number());
       const std::string summary = location + "\n" + part.summary();
       *stream << "      <failure message=\""
-              << EscapeXmlAttribute(summary.c_str())
+              << EscapeXmlAttribute(summary)
               << "\" type=\"\">";
       const std::string detail = location + "\n" + part.message();
       OutputXmlCDataSection(stream, RemoveInvalidXmlCharacters(detail).c_str());
       *stream << "</failure>\n";
+    } else if (part.skipped()) {
+      if (++skips == 1 && failures == 0) {
+        *stream << ">\n";
+      }
+      const std::string location =
+          internal::FormatCompilerIndependentFileLocation(part.file_name(),
+                                                          part.line_number());
+      const std::string summary = location + "\n" + part.summary();
+      *stream << "      <skipped message=\""
+              << EscapeXmlAttribute(summary.c_str()) << "\">";
+      const std::string detail = location + "\n" + part.message();
+      OutputXmlCDataSection(stream, RemoveInvalidXmlCharacters(detail).c_str());
+      *stream << "</skipped>\n";
     }
   }
 
-  if (failures == 0 && result.test_property_count() == 0) {
+  if (failures == 0 && skips == 0 && result.test_property_count() == 0) {
     *stream << " />\n";
   } else {
-    if (failures == 0) {
+    if (failures == 0 && skips == 0) {
       *stream << ">\n";
     }
     OutputXmlTestProperties(stream, result);
@@ -3825,9 +4242,16 @@
     OutputXmlAttribute(
         stream, kTestsuite, "disabled",
         StreamableToString(test_suite.reportable_disabled_test_count()));
+    OutputXmlAttribute(stream, kTestsuite, "skipped",
+                       StreamableToString(test_suite.skipped_test_count()));
+
     OutputXmlAttribute(stream, kTestsuite, "errors", "0");
+
     OutputXmlAttribute(stream, kTestsuite, "time",
                        FormatTimeInMillisAsSeconds(test_suite.elapsed_time()));
+    OutputXmlAttribute(
+        stream, kTestsuite, "timestamp",
+        FormatEpochTimeInMillisAsIso8601(test_suite.start_timestamp()));
     *stream << TestPropertiesAsXmlAttributes(test_suite.ad_hoc_test_result());
   }
   *stream << ">\n";
@@ -3854,11 +4278,11 @@
       stream, kTestsuites, "disabled",
       StreamableToString(unit_test.reportable_disabled_test_count()));
   OutputXmlAttribute(stream, kTestsuites, "errors", "0");
+  OutputXmlAttribute(stream, kTestsuites, "time",
+                     FormatTimeInMillisAsSeconds(unit_test.elapsed_time()));
   OutputXmlAttribute(
       stream, kTestsuites, "timestamp",
       FormatEpochTimeInMillisAsIso8601(unit_test.start_timestamp()));
-  OutputXmlAttribute(stream, kTestsuites, "time",
-                     FormatTimeInMillisAsSeconds(unit_test.elapsed_time()));
 
   if (GTEST_FLAG(shuffle)) {
     OutputXmlAttribute(stream, kTestsuites, "random_seed",
@@ -4069,7 +4493,7 @@
       String::FormatIntWidth2(time_struct.tm_sec) + "Z";
 }
 
-static inline std::string Indent(int width) {
+static inline std::string Indent(size_t width) {
   return std::string(width, ' ');
 }
 
@@ -4146,6 +4570,9 @@
                     ? (result.Skipped() ? "SKIPPED" : "COMPLETED")
                     : "SUPPRESSED",
                 kIndent);
+  OutputJsonKey(stream, kTestsuite, "timestamp",
+                FormatEpochTimeInMillisAsRFC3339(result.start_timestamp()),
+                kIndent);
   OutputJsonKey(stream, kTestsuite, "time",
                 FormatTimeInMillisAsDuration(result.elapsed_time()), kIndent);
   OutputJsonKey(stream, kTestsuite, "classname", test_suite_name, kIndent,
@@ -4192,6 +4619,10 @@
     OutputJsonKey(stream, kTestsuite, "disabled",
                   test_suite.reportable_disabled_test_count(), kIndent);
     OutputJsonKey(stream, kTestsuite, "errors", 0, kIndent);
+    OutputJsonKey(
+        stream, kTestsuite, "timestamp",
+        FormatEpochTimeInMillisAsRFC3339(test_suite.start_timestamp()),
+        kIndent);
     OutputJsonKey(stream, kTestsuite, "time",
                   FormatTimeInMillisAsDuration(test_suite.elapsed_time()),
                   kIndent, false);
@@ -4457,6 +4888,7 @@
   }
 
   ~ScopedPrematureExitFile() {
+#if !defined GTEST_OS_ESP8266
     if (!premature_exit_filepath_.empty()) {
       int retval = remove(premature_exit_filepath_.c_str());
       if (retval) {
@@ -4465,6 +4897,7 @@
                           << retval;
       }
     }
+#endif
   }
 
  private:
@@ -4652,11 +5085,12 @@
   return impl()->elapsed_time();
 }
 
-// Returns true iff the unit test passed (i.e. all test suites passed).
+// Returns true if and only if the unit test passed (i.e. all test suites
+// passed).
 bool UnitTest::Passed() const { return impl()->Passed(); }
 
-// Returns true iff the unit test failed (i.e. some test suite failed
-// or something outside of all tests failed).
+// Returns true if and only if the unit test failed (i.e. some test suite
+// failed or something outside of all tests failed).
 bool UnitTest::Failed() const { return impl()->Failed(); }
 
 // Gets the i-th test suite among all the test suites. i can range from 0 to
@@ -4726,8 +5160,7 @@
   if (impl_->gtest_trace_stack().size() > 0) {
     msg << "\n" << GTEST_NAME_ << " trace:";
 
-    for (int i = static_cast<int>(impl_->gtest_trace_stack().size());
-         i > 0; --i) {
+    for (size_t i = impl_->gtest_trace_stack().size(); i > 0; --i) {
       const internal::TraceInfo& trace = impl_->gtest_trace_stack()[i - 1];
       msg << "\n" << internal::FormatFileLocation(trace.file, trace.line)
           << " " << trace.message;
@@ -4857,6 +5290,16 @@
       _set_abort_behavior(
           0x0,                                    // Clear the following flags:
           _WRITE_ABORT_MSG | _CALL_REPORTFAULT);  // pop-up window, core dump.
+
+    // In debug mode, the Windows CRT can crash with an assertion over invalid
+    // input (e.g. passing an invalid file descriptor).  The default handling
+    // for these assertions is to pop up a dialog and wait for user input.
+    // Instead ask the CRT to dump such assertions to stderr non-interactively.
+    if (!IsDebuggerPresent()) {
+      (void)_CrtSetReportMode(_CRT_ASSERT,
+                              _CRTDBG_MODE_FILE | _CRTDBG_MODE_DEBUG);
+      (void)_CrtSetReportFile(_CRT_ASSERT, _CRTDBG_FILE_STDERR);
+    }
 # endif
   }
 #endif  // GTEST_OS_WINDOWS
@@ -5068,6 +5511,10 @@
     // to shut down the default XML output before invoking RUN_ALL_TESTS.
     ConfigureXmlOutput();
 
+    if (GTEST_FLAG(brief)) {
+      listeners()->SetDefaultResultPrinter(new BriefUnitTestResultPrinter);
+    }
+
 #if GTEST_CAN_STREAM_RESULTS_
     // Configures listeners for streaming test results to the specified server.
     ConfigureStreamingOutput();
@@ -5095,7 +5542,7 @@
   // Constructor.
   explicit TestSuiteNameIs(const std::string& name) : name_(name) {}
 
-  // Returns true iff the name of test_suite matches name_.
+  // Returns true if and only if the name of test_suite matches name_.
   bool operator()(const TestSuite* test_suite) const {
     return test_suite != nullptr &&
            strcmp(test_suite->name(), name_.c_str()) == 0;
@@ -5113,10 +5560,10 @@
 // Arguments:
 //
 //   test_suite_name: name of the test suite
-//   type_param:     the name of the test suite's type parameter, or NULL if
-//                   this is not a typed or a type-parameterized test suite.
-//   set_up_tc:      pointer to the function that sets up the test suite
-//   tear_down_tc:   pointer to the function that tears down the test suite
+//   type_param:      the name of the test suite's type parameter, or NULL if
+//                    this is not a typed or a type-parameterized test suite.
+//   set_up_tc:       pointer to the function that sets up the test suite
+//   tear_down_tc:    pointer to the function that tears down the test suite
 TestSuite* UnitTestImpl::GetTestSuite(
     const char* test_suite_name, const char* type_param,
     internal::SetUpTestSuiteFunc set_up_tc,
@@ -5166,7 +5613,8 @@
 // All other functions called from RunAllTests() may safely assume that
 // parameterized tests are ready to be counted and run.
 bool UnitTestImpl::RunAllTests() {
-  // True iff Google Test is initialized before RUN_ALL_TESTS() is called.
+  // True if and only if Google Test is initialized before RUN_ALL_TESTS() is
+  // called.
   const bool gtest_is_initialized_before_run_all_tests = GTestIsInitialized();
 
   // Do not run any test if the --help flag was specified.
@@ -5182,7 +5630,7 @@
   // protocol.
   internal::WriteToShardStatusFileIfNeeded();
 
-  // True iff we are in a subprocess for running a thread-safe-style
+  // True if and only if we are in a subprocess for running a thread-safe-style
   // death test.
   bool in_subprocess_for_death_test = false;
 
@@ -5215,7 +5663,7 @@
   random_seed_ = GTEST_FLAG(shuffle) ?
       GetRandomSeedFromFlag(GTEST_FLAG(random_seed)) : 0;
 
-  // True iff at least one test has failed.
+  // True if and only if at least one test has failed.
   bool failed = false;
 
   TestEventListener* repeater = listeners()->repeater();
@@ -5227,8 +5675,8 @@
   // when we are inside the subprocess of a death test.
   const int repeat = in_subprocess_for_death_test ? 1 : GTEST_FLAG(repeat);
   // Repeats forever if the repeat count is negative.
-  const bool forever = repeat < 0;
-  for (int i = 0; forever || i != repeat; i++) {
+  const bool gtest_repeat_forever = repeat < 0;
+  for (int i = 0; gtest_repeat_forever || i != repeat; i++) {
     // We want to preserve failures generated by ad-hoc test
     // assertions executed before RUN_ALL_TESTS().
     ClearNonAdHocTestResult();
@@ -5237,7 +5685,7 @@
 
     // Shuffles test suites and tests if requested.
     if (has_tests_to_run && GTEST_FLAG(shuffle)) {
-      random()->Reseed(random_seed_);
+      random()->Reseed(static_cast<uint32_t>(random_seed_));
       // This should be done before calling OnTestIterationStart(),
       // such that a test event listener can see the actual test order
       // in the event.
@@ -5274,6 +5722,13 @@
         for (int test_index = 0; test_index < total_test_suite_count();
              test_index++) {
           GetMutableSuiteCase(test_index)->Run();
+          if (GTEST_FLAG(fail_fast) &&
+              GetMutableSuiteCase(test_index)->Failed()) {
+            for (int j = test_index + 1; j < total_test_suite_count(); j++) {
+              GetMutableSuiteCase(j)->Skip();
+            }
+            break;
+          }
         }
       }
 
@@ -5312,14 +5767,14 @@
 
   if (!gtest_is_initialized_before_run_all_tests) {
     ColoredPrintf(
-        COLOR_RED,
+        GTestColor::kRed,
         "\nIMPORTANT NOTICE - DO NOT IGNORE:\n"
         "This test program did NOT call " GTEST_INIT_GOOGLE_TEST_NAME_
         "() before calling RUN_ALL_TESTS(). This is INVALID. Soon " GTEST_NAME_
         " will start to enforce the valid usage. "
         "Please fix it ASAP, or IT WILL START TO FAIL.\n");  // NOLINT
 #if GTEST_FOR_GOOGLE_
-    ColoredPrintf(COLOR_RED,
+    ColoredPrintf(GTestColor::kRed,
                   "For more details, see http://wiki/Main/ValidGUnitMain.\n");
 #endif  // GTEST_FOR_GOOGLE_
   }
@@ -5336,7 +5791,7 @@
   if (test_shard_file != nullptr) {
     FILE* const file = posix::FOpen(test_shard_file, "w");
     if (file == nullptr) {
-      ColoredPrintf(COLOR_RED,
+      ColoredPrintf(GTestColor::kRed,
                     "Could not write to the test shard status file \"%s\" "
                     "specified by the %s environment variable.\n",
                     test_shard_file, kTestShardStatusFile);
@@ -5360,8 +5815,8 @@
     return false;
   }
 
-  const Int32 total_shards = Int32FromEnvOrDie(total_shards_env, -1);
-  const Int32 shard_index = Int32FromEnvOrDie(shard_index_env, -1);
+  const int32_t total_shards = Int32FromEnvOrDie(total_shards_env, -1);
+  const int32_t shard_index = Int32FromEnvOrDie(shard_index_env, -1);
 
   if (total_shards == -1 && shard_index == -1) {
     return false;
@@ -5370,7 +5825,7 @@
       << "Invalid environment variables: you have "
       << kTestShardIndex << " = " << shard_index
       << ", but have left " << kTestTotalShards << " unset.\n";
-    ColoredPrintf(COLOR_RED, "%s", msg.GetString().c_str());
+    ColoredPrintf(GTestColor::kRed, "%s", msg.GetString().c_str());
     fflush(stdout);
     exit(EXIT_FAILURE);
   } else if (total_shards != -1 && shard_index == -1) {
@@ -5378,7 +5833,7 @@
       << "Invalid environment variables: you have "
       << kTestTotalShards << " = " << total_shards
       << ", but have left " << kTestShardIndex << " unset.\n";
-    ColoredPrintf(COLOR_RED, "%s", msg.GetString().c_str());
+    ColoredPrintf(GTestColor::kRed, "%s", msg.GetString().c_str());
     fflush(stdout);
     exit(EXIT_FAILURE);
   } else if (shard_index < 0 || shard_index >= total_shards) {
@@ -5387,7 +5842,7 @@
       << kTestShardIndex << " < " << kTestTotalShards
       << ", but you have " << kTestShardIndex << "=" << shard_index
       << ", " << kTestTotalShards << "=" << total_shards << ".\n";
-    ColoredPrintf(COLOR_RED, "%s", msg.GetString().c_str());
+    ColoredPrintf(GTestColor::kRed, "%s", msg.GetString().c_str());
     fflush(stdout);
     exit(EXIT_FAILURE);
   }
@@ -5398,13 +5853,13 @@
 // Parses the environment variable var as an Int32. If it is unset,
 // returns default_val. If it is not an Int32, prints an error
 // and aborts.
-Int32 Int32FromEnvOrDie(const char* var, Int32 default_val) {
+int32_t Int32FromEnvOrDie(const char* var, int32_t default_val) {
   const char* str_val = posix::GetEnv(var);
   if (str_val == nullptr) {
     return default_val;
   }
 
-  Int32 result;
+  int32_t result;
   if (!ParseInt32(Message() << "The value of environment variable " << var,
                   str_val, &result)) {
     exit(EXIT_FAILURE);
@@ -5413,8 +5868,8 @@
 }
 
 // Given the total number of shards, the shard index, and the test id,
-// returns true iff the test should be run on this shard. The test id is
-// some arbitrary but unique non-negative integer assigned to each test
+// returns true if and only if the test should be run on this shard. The test id
+// is some arbitrary but unique non-negative integer assigned to each test
 // method. Assumes that 0 <= shard_index < total_shards.
 bool ShouldRunTestOnShard(int total_shards, int shard_index, int test_id) {
   return (test_id % total_shards) == shard_index;
@@ -5428,9 +5883,9 @@
 // https://github.com/google/googletest/blob/master/googletest/docs/advanced.md
 // . Returns the number of tests that should run.
 int UnitTestImpl::FilterTests(ReactionToSharding shard_tests) {
-  const Int32 total_shards = shard_tests == HONOR_SHARDING_PROTOCOL ?
+  const int32_t total_shards = shard_tests == HONOR_SHARDING_PROTOCOL ?
       Int32FromEnvOrDie(kTestTotalShards, -1) : -1;
-  const Int32 shard_index = shard_tests == HONOR_SHARDING_PROTOCOL ?
+  const int32_t shard_index = shard_tests == HONOR_SHARDING_PROTOCOL ?
       Int32FromEnvOrDie(kTestShardIndex, -1) : -1;
 
   // num_runnable_tests are the number of tests that will
@@ -5719,12 +6174,11 @@
   return true;
 }
 
-// Parses a string for an Int32 flag, in the form of
-// "--flag=value".
+// Parses a string for an int32_t flag, in the form of "--flag=value".
 //
 // On success, stores the value of the flag in *value, and returns
 // true.  On failure, returns false without changing *value.
-bool ParseInt32Flag(const char* str, const char* flag, Int32* value) {
+bool ParseInt32Flag(const char* str, const char* flag, int32_t* value) {
   // Gets the value of the flag as a string.
   const char* const value_str = ParseFlagValue(str, flag, false);
 
@@ -5736,8 +6190,7 @@
                     value_str, value);
 }
 
-// Parses a string for a string flag, in the form of
-// "--flag=value".
+// Parses a string for a string flag, in the form of "--flag=value".
 //
 // On success, stores the value of the flag in *value, and returns
 // true.  On failure, returns false without changing *value.
@@ -5779,7 +6232,7 @@
 //   @D    changes to the default terminal text color.
 //
 static void PrintColorEncoded(const char* str) {
-  GTestColor color = COLOR_DEFAULT;  // The current color.
+  GTestColor color = GTestColor::kDefault;  // The current color.
 
   // Conceptually, we split the string into segments divided by escape
   // sequences.  Then we print one segment at a time.  At the end of
@@ -5799,13 +6252,13 @@
     if (ch == '@') {
       ColoredPrintf(color, "@");
     } else if (ch == 'D') {
-      color = COLOR_DEFAULT;
+      color = GTestColor::kDefault;
     } else if (ch == 'R') {
-      color = COLOR_RED;
+      color = GTestColor::kRed;
     } else if (ch == 'G') {
-      color = COLOR_GREEN;
+      color = GTestColor::kGreen;
     } else if (ch == 'Y') {
-      color = COLOR_YELLOW;
+      color = GTestColor::kYellow;
     } else {
       --str;
     }
@@ -5813,98 +6266,126 @@
 }
 
 static const char kColorEncodedHelpMessage[] =
-"This program contains tests written using " GTEST_NAME_ ". You can use the\n"
-"following command line flags to control its behavior:\n"
-"\n"
-"Test Selection:\n"
-"  @G--" GTEST_FLAG_PREFIX_ "list_tests@D\n"
-"      List the names of all tests instead of running them. The name of\n"
-"      TEST(Foo, Bar) is \"Foo.Bar\".\n"
-"  @G--" GTEST_FLAG_PREFIX_ "filter=@YPOSTIVE_PATTERNS"
+    "This program contains tests written using " GTEST_NAME_
+    ". You can use the\n"
+    "following command line flags to control its behavior:\n"
+    "\n"
+    "Test Selection:\n"
+    "  @G--" GTEST_FLAG_PREFIX_
+    "list_tests@D\n"
+    "      List the names of all tests instead of running them. The name of\n"
+    "      TEST(Foo, Bar) is \"Foo.Bar\".\n"
+    "  @G--" GTEST_FLAG_PREFIX_
+    "filter=@YPOSITIVE_PATTERNS"
     "[@G-@YNEGATIVE_PATTERNS]@D\n"
-"      Run only the tests whose name matches one of the positive patterns but\n"
-"      none of the negative patterns. '?' matches any single character; '*'\n"
-"      matches any substring; ':' separates two patterns.\n"
-"  @G--" GTEST_FLAG_PREFIX_ "also_run_disabled_tests@D\n"
-"      Run all disabled tests too.\n"
-"\n"
-"Test Execution:\n"
-"  @G--" GTEST_FLAG_PREFIX_ "repeat=@Y[COUNT]@D\n"
-"      Run the tests repeatedly; use a negative count to repeat forever.\n"
-"  @G--" GTEST_FLAG_PREFIX_ "shuffle@D\n"
-"      Randomize tests' orders on every iteration.\n"
-"  @G--" GTEST_FLAG_PREFIX_ "random_seed=@Y[NUMBER]@D\n"
-"      Random number seed to use for shuffling test orders (between 1 and\n"
-"      99999, or 0 to use a seed based on the current time).\n"
-"\n"
-"Test Output:\n"
-"  @G--" GTEST_FLAG_PREFIX_ "color=@Y(@Gyes@Y|@Gno@Y|@Gauto@Y)@D\n"
-"      Enable/disable colored output. The default is @Gauto@D.\n"
-"  -@G-" GTEST_FLAG_PREFIX_ "print_time=0@D\n"
-"      Don't print the elapsed time of each test.\n"
-"  @G--" GTEST_FLAG_PREFIX_ "output=@Y(@Gjson@Y|@Gxml@Y)[@G:@YDIRECTORY_PATH@G"
-    GTEST_PATH_SEP_ "@Y|@G:@YFILE_PATH]@D\n"
-"      Generate a JSON or XML report in the given directory or with the given\n"
-"      file name. @YFILE_PATH@D defaults to @Gtest_detail.xml@D.\n"
+    "      Run only the tests whose name matches one of the positive patterns "
+    "but\n"
+    "      none of the negative patterns. '?' matches any single character; "
+    "'*'\n"
+    "      matches any substring; ':' separates two patterns.\n"
+    "  @G--" GTEST_FLAG_PREFIX_
+    "also_run_disabled_tests@D\n"
+    "      Run all disabled tests too.\n"
+    "\n"
+    "Test Execution:\n"
+    "  @G--" GTEST_FLAG_PREFIX_
+    "repeat=@Y[COUNT]@D\n"
+    "      Run the tests repeatedly; use a negative count to repeat forever.\n"
+    "  @G--" GTEST_FLAG_PREFIX_
+    "shuffle@D\n"
+    "      Randomize tests' orders on every iteration.\n"
+    "  @G--" GTEST_FLAG_PREFIX_
+    "random_seed=@Y[NUMBER]@D\n"
+    "      Random number seed to use for shuffling test orders (between 1 and\n"
+    "      99999, or 0 to use a seed based on the current time).\n"
+    "\n"
+    "Test Output:\n"
+    "  @G--" GTEST_FLAG_PREFIX_
+    "color=@Y(@Gyes@Y|@Gno@Y|@Gauto@Y)@D\n"
+    "      Enable/disable colored output. The default is @Gauto@D.\n"
+    "  @G--" GTEST_FLAG_PREFIX_
+    "brief=1@D\n"
+    "      Only print test failures.\n"
+    "  @G--" GTEST_FLAG_PREFIX_
+    "print_time=0@D\n"
+    "      Don't print the elapsed time of each test.\n"
+    "  @G--" GTEST_FLAG_PREFIX_
+    "output=@Y(@Gjson@Y|@Gxml@Y)[@G:@YDIRECTORY_PATH@G" GTEST_PATH_SEP_
+    "@Y|@G:@YFILE_PATH]@D\n"
+    "      Generate a JSON or XML report in the given directory or with the "
+    "given\n"
+    "      file name. @YFILE_PATH@D defaults to @Gtest_detail.xml@D.\n"
 # if GTEST_CAN_STREAM_RESULTS_
-"  @G--" GTEST_FLAG_PREFIX_ "stream_result_to=@YHOST@G:@YPORT@D\n"
-"      Stream test results to the given server.\n"
+    "  @G--" GTEST_FLAG_PREFIX_
+    "stream_result_to=@YHOST@G:@YPORT@D\n"
+    "      Stream test results to the given server.\n"
 # endif  // GTEST_CAN_STREAM_RESULTS_
-"\n"
-"Assertion Behavior:\n"
+    "\n"
+    "Assertion Behavior:\n"
 # if GTEST_HAS_DEATH_TEST && !GTEST_OS_WINDOWS
-"  @G--" GTEST_FLAG_PREFIX_ "death_test_style=@Y(@Gfast@Y|@Gthreadsafe@Y)@D\n"
-"      Set the default death test style.\n"
+    "  @G--" GTEST_FLAG_PREFIX_
+    "death_test_style=@Y(@Gfast@Y|@Gthreadsafe@Y)@D\n"
+    "      Set the default death test style.\n"
 # endif  // GTEST_HAS_DEATH_TEST && !GTEST_OS_WINDOWS
-"  @G--" GTEST_FLAG_PREFIX_ "break_on_failure@D\n"
-"      Turn assertion failures into debugger break-points.\n"
-"  @G--" GTEST_FLAG_PREFIX_ "throw_on_failure@D\n"
-"      Turn assertion failures into C++ exceptions for use by an external\n"
-"      test framework.\n"
-"  @G--" GTEST_FLAG_PREFIX_ "catch_exceptions=0@D\n"
-"      Do not report exceptions as test failures. Instead, allow them\n"
-"      to crash the program or throw a pop-up (on Windows).\n"
-"\n"
-"Except for @G--" GTEST_FLAG_PREFIX_ "list_tests@D, you can alternatively set "
+    "  @G--" GTEST_FLAG_PREFIX_
+    "break_on_failure@D\n"
+    "      Turn assertion failures into debugger break-points.\n"
+    "  @G--" GTEST_FLAG_PREFIX_
+    "throw_on_failure@D\n"
+    "      Turn assertion failures into C++ exceptions for use by an external\n"
+    "      test framework.\n"
+    "  @G--" GTEST_FLAG_PREFIX_
+    "catch_exceptions=0@D\n"
+    "      Do not report exceptions as test failures. Instead, allow them\n"
+    "      to crash the program or throw a pop-up (on Windows).\n"
+    "\n"
+    "Except for @G--" GTEST_FLAG_PREFIX_
+    "list_tests@D, you can alternatively set "
     "the corresponding\n"
-"environment variable of a flag (all letters in upper-case). For example, to\n"
-"disable colored text output, you can either specify @G--" GTEST_FLAG_PREFIX_
+    "environment variable of a flag (all letters in upper-case). For example, "
+    "to\n"
+    "disable colored text output, you can either specify "
+    "@G--" GTEST_FLAG_PREFIX_
     "color=no@D or set\n"
-"the @G" GTEST_FLAG_PREFIX_UPPER_ "COLOR@D environment variable to @Gno@D.\n"
-"\n"
-"For more information, please read the " GTEST_NAME_ " documentation at\n"
-"@G" GTEST_PROJECT_URL_ "@D. If you find a bug in " GTEST_NAME_ "\n"
-"(not one in your own code or tests), please report it to\n"
-"@G<" GTEST_DEV_EMAIL_ ">@D.\n";
+    "the @G" GTEST_FLAG_PREFIX_UPPER_
+    "COLOR@D environment variable to @Gno@D.\n"
+    "\n"
+    "For more information, please read the " GTEST_NAME_
+    " documentation at\n"
+    "@G" GTEST_PROJECT_URL_ "@D. If you find a bug in " GTEST_NAME_
+    "\n"
+    "(not one in your own code or tests), please report it to\n"
+    "@G<" GTEST_DEV_EMAIL_ ">@D.\n";
 
 static bool ParseGoogleTestFlag(const char* const arg) {
   return ParseBoolFlag(arg, kAlsoRunDisabledTestsFlag,
                        &GTEST_FLAG(also_run_disabled_tests)) ||
-      ParseBoolFlag(arg, kBreakOnFailureFlag,
-                    &GTEST_FLAG(break_on_failure)) ||
-      ParseBoolFlag(arg, kCatchExceptionsFlag,
-                    &GTEST_FLAG(catch_exceptions)) ||
-      ParseStringFlag(arg, kColorFlag, &GTEST_FLAG(color)) ||
-      ParseStringFlag(arg, kDeathTestStyleFlag,
-                      &GTEST_FLAG(death_test_style)) ||
-      ParseBoolFlag(arg, kDeathTestUseFork,
-                    &GTEST_FLAG(death_test_use_fork)) ||
-      ParseStringFlag(arg, kFilterFlag, &GTEST_FLAG(filter)) ||
-      ParseStringFlag(arg, kInternalRunDeathTestFlag,
-                      &GTEST_FLAG(internal_run_death_test)) ||
-      ParseBoolFlag(arg, kListTestsFlag, &GTEST_FLAG(list_tests)) ||
-      ParseStringFlag(arg, kOutputFlag, &GTEST_FLAG(output)) ||
-      ParseBoolFlag(arg, kPrintTimeFlag, &GTEST_FLAG(print_time)) ||
-      ParseBoolFlag(arg, kPrintUTF8Flag, &GTEST_FLAG(print_utf8)) ||
-      ParseInt32Flag(arg, kRandomSeedFlag, &GTEST_FLAG(random_seed)) ||
-      ParseInt32Flag(arg, kRepeatFlag, &GTEST_FLAG(repeat)) ||
-      ParseBoolFlag(arg, kShuffleFlag, &GTEST_FLAG(shuffle)) ||
-      ParseInt32Flag(arg, kStackTraceDepthFlag,
-                     &GTEST_FLAG(stack_trace_depth)) ||
-      ParseStringFlag(arg, kStreamResultToFlag,
-                      &GTEST_FLAG(stream_result_to)) ||
-      ParseBoolFlag(arg, kThrowOnFailureFlag,
-                    &GTEST_FLAG(throw_on_failure));
+         ParseBoolFlag(arg, kBreakOnFailureFlag,
+                       &GTEST_FLAG(break_on_failure)) ||
+         ParseBoolFlag(arg, kCatchExceptionsFlag,
+                       &GTEST_FLAG(catch_exceptions)) ||
+         ParseStringFlag(arg, kColorFlag, &GTEST_FLAG(color)) ||
+         ParseStringFlag(arg, kDeathTestStyleFlag,
+                         &GTEST_FLAG(death_test_style)) ||
+         ParseBoolFlag(arg, kDeathTestUseFork,
+                       &GTEST_FLAG(death_test_use_fork)) ||
+         ParseBoolFlag(arg, kFailFast, &GTEST_FLAG(fail_fast)) ||
+         ParseStringFlag(arg, kFilterFlag, &GTEST_FLAG(filter)) ||
+         ParseStringFlag(arg, kInternalRunDeathTestFlag,
+                         &GTEST_FLAG(internal_run_death_test)) ||
+         ParseBoolFlag(arg, kListTestsFlag, &GTEST_FLAG(list_tests)) ||
+         ParseStringFlag(arg, kOutputFlag, &GTEST_FLAG(output)) ||
+         ParseBoolFlag(arg, kBriefFlag, &GTEST_FLAG(brief)) ||
+         ParseBoolFlag(arg, kPrintTimeFlag, &GTEST_FLAG(print_time)) ||
+         ParseBoolFlag(arg, kPrintUTF8Flag, &GTEST_FLAG(print_utf8)) ||
+         ParseInt32Flag(arg, kRandomSeedFlag, &GTEST_FLAG(random_seed)) ||
+         ParseInt32Flag(arg, kRepeatFlag, &GTEST_FLAG(repeat)) ||
+         ParseBoolFlag(arg, kShuffleFlag, &GTEST_FLAG(shuffle)) ||
+         ParseInt32Flag(arg, kStackTraceDepthFlag,
+                        &GTEST_FLAG(stack_trace_depth)) ||
+         ParseStringFlag(arg, kStreamResultToFlag,
+                         &GTEST_FLAG(stream_result_to)) ||
+         ParseBoolFlag(arg, kThrowOnFailureFlag, &GTEST_FLAG(throw_on_failure));
 }
 
 #if GTEST_USE_OWN_FLAGFILE_FLAG_
@@ -5987,7 +6468,7 @@
 void ParseGoogleTestFlagsOnly(int* argc, char** argv) {
   ParseGoogleTestFlagsOnlyImpl(argc, argv);
 
-  // Fix the value of *_NSGetArgc() on macOS, but iff
+  // Fix the value of *_NSGetArgc() on macOS, but if and only if
   // *_NSGetArgv() == argv
   // Only applicable to char** version of argv
 #if GTEST_OS_MAC
@@ -6074,20 +6555,31 @@
 std::string TempDir() {
 #if defined(GTEST_CUSTOM_TEMPDIR_FUNCTION_)
   return GTEST_CUSTOM_TEMPDIR_FUNCTION_();
-#endif
-
-#if GTEST_OS_WINDOWS_MOBILE
+#elif GTEST_OS_WINDOWS_MOBILE
   return "\\temp\\";
 #elif GTEST_OS_WINDOWS
   const char* temp_dir = internal::posix::GetEnv("TEMP");
-  if (temp_dir == nullptr || temp_dir[0] == '\0')
+  if (temp_dir == nullptr || temp_dir[0] == '\0') {
     return "\\temp\\";
-  else if (temp_dir[strlen(temp_dir) - 1] == '\\')
+  } else if (temp_dir[strlen(temp_dir) - 1] == '\\') {
     return temp_dir;
-  else
+  } else {
     return std::string(temp_dir) + "\\";
+  }
 #elif GTEST_OS_LINUX_ANDROID
-  return "/sdcard/";
+  const char* temp_dir = internal::posix::GetEnv("TEST_TMPDIR");
+  if (temp_dir == nullptr || temp_dir[0] == '\0') {
+    return "/data/local/tmp/";
+  } else {
+    return temp_dir;
+  }
+#elif GTEST_OS_LINUX
+  const char* temp_dir = internal::posix::GetEnv("TEST_TMPDIR");
+  if (temp_dir == nullptr || temp_dir[0] == '\0') {
+    return "/tmp/";
+  } else {
+    return temp_dir;
+  }
 #else
   return "/tmp/";
 #endif  // GTEST_OS_WINDOWS_MOBILE
diff --git a/deps/boringssl/src/third_party/googletest/src/gtest_main.cc b/deps/boringssl/src/third_party/googletest/src/gtest_main.cc
index f6e1dd9..46b27c3 100644
--- a/deps/boringssl/src/third_party/googletest/src/gtest_main.cc
+++ b/deps/boringssl/src/third_party/googletest/src/gtest_main.cc
@@ -30,13 +30,20 @@
 #include <cstdio>
 #include "gtest/gtest.h"
 
-#ifdef ARDUINO
+#if GTEST_OS_ESP8266 || GTEST_OS_ESP32
+#if GTEST_OS_ESP8266
+extern "C" {
+#endif
 void setup() {
   testing::InitGoogleTest();
 }
 
 void loop() { RUN_ALL_TESTS(); }
 
+#if GTEST_OS_ESP8266
+}
+#endif
+
 #else
 
 GTEST_API_ int main(int argc, char **argv) {
diff --git a/deps/boringssl/src/third_party/googletest/test/googletest-death-test-test.cc b/deps/boringssl/src/third_party/googletest/test/googletest-death-test-test.cc
index b864541..c0b3d1f 100644
--- a/deps/boringssl/src/third_party/googletest/test/googletest-death-test-test.cc
+++ b/deps/boringssl/src/third_party/googletest/test/googletest-death-test-test.cc
@@ -41,7 +41,9 @@
 #if GTEST_HAS_DEATH_TEST
 
 # if GTEST_OS_WINDOWS
+#  include <fcntl.h>           // For O_BINARY
 #  include <direct.h>          // For chdir().
+#  include <io.h>
 # else
 #  include <unistd.h>
 #  include <sys/wait.h>        // For waitpid.
@@ -139,7 +141,7 @@
       DieInside("MemberFunction");
   }
 
-  // True iff MemberFunction() should die.
+  // True if and only if MemberFunction() should die.
   bool should_die_;
   const FilePath original_dir_;
 };
@@ -156,7 +158,7 @@
   }
 
  private:
-  // True iff MemberFunction() should die.
+  // True if and only if MemberFunction() should die.
   bool should_die_;
 };
 
@@ -202,6 +204,26 @@
   return 12;
 }
 
+# if GTEST_OS_WINDOWS
+
+// Death in dbg due to Windows CRT assertion failure, not opt.
+int DieInCRTDebugElse12(int* sideeffect) {
+  if (sideeffect) *sideeffect = 12;
+
+  // Create an invalid fd by closing a valid one
+  int fdpipe[2];
+  EXPECT_EQ(_pipe(fdpipe, 256, O_BINARY), 0);
+  EXPECT_EQ(_close(fdpipe[0]), 0);
+  EXPECT_EQ(_close(fdpipe[1]), 0);
+
+  // _dup() should crash in debug mode
+  EXPECT_EQ(_dup(fdpipe[0]), -1);
+
+  return 12;
+}
+
+#endif  // GTEST_OS_WINDOWS
+
 # if GTEST_OS_WINDOWS || GTEST_OS_FUCHSIA
 
 // Tests the ExitedWithCode predicate.
@@ -276,6 +298,13 @@
 
 # endif  // GTEST_OS_WINDOWS || GTEST_OS_FUCHSIA
 
+// The following code intentionally tests a suboptimal syntax.
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdangling-else"
+#pragma GCC diagnostic ignored "-Wempty-body"
+#pragma GCC diagnostic ignored "-Wpragmas"
+#endif
 // Tests that the death test macros expand to code which may or may not
 // be followed by operator<<, and that in either case the complete text
 // comprises only a single C++ statement.
@@ -299,6 +328,9 @@
   else
     EXPECT_DEATH(_exit(1), "") << 1 << 2 << 3;
 }
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
 
 # if GTEST_USES_PCRE
 
@@ -369,17 +401,19 @@
 
 // Sets SIGPROF action and ITIMER_PROF timer (interval: 1ms).
 void SetSigprofActionAndTimer() {
-  struct itimerval timer;
-  timer.it_interval.tv_sec = 0;
-  timer.it_interval.tv_usec = 1;
-  timer.it_value = timer.it_interval;
-  ASSERT_EQ(0, setitimer(ITIMER_PROF, &timer, nullptr));
   struct sigaction signal_action;
   memset(&signal_action, 0, sizeof(signal_action));
   sigemptyset(&signal_action.sa_mask);
   signal_action.sa_sigaction = SigprofAction;
   signal_action.sa_flags = SA_RESTART | SA_SIGINFO;
   ASSERT_EQ(0, sigaction(SIGPROF, &signal_action, nullptr));
+  // timer comes second, to avoid SIGPROF premature delivery, as suggested at
+  // https://www.gnu.org/software/libc/manual/html_node/Setting-an-Alarm.html
+  struct itimerval timer;
+  timer.it_interval.tv_sec = 0;
+  timer.it_interval.tv_usec = 1;
+  timer.it_value = timer.it_interval;
+  ASSERT_EQ(0, setitimer(ITIMER_PROF, &timer, nullptr));
 }
 
 // Disables ITIMER_PROF timer and ignores SIGPROF signal.
@@ -551,8 +585,8 @@
   }, "died but not with expected error");
 }
 
-// On exit, *aborted will be true iff the EXPECT_DEATH() statement
-// aborted the function.
+// On exit, *aborted will be true if and only if the EXPECT_DEATH()
+// statement aborted the function.
 void ExpectDeathTestHelper(bool* aborted) {
   *aborted = true;
   EXPECT_DEATH(DieIf(false), "DieIf");  // This assertion should fail.
@@ -632,6 +666,40 @@
 # endif
 }
 
+# if GTEST_OS_WINDOWS
+
+// Tests that EXPECT_DEBUG_DEATH works as expected when in debug mode
+// the Windows CRT crashes the process with an assertion failure.
+// 1. Asserts on death.
+// 2. Has no side effect (doesn't pop up a window or wait for user input).
+//
+// And in opt mode, it:
+// 1.  Has side effects but does not assert.
+TEST_F(TestForDeathTest, CRTDebugDeath) {
+  int sideeffect = 0;
+
+  // Put the regex in a local variable to make sure we don't get an "unused"
+  // warning in opt mode.
+  const char* regex = "dup.* : Assertion failed";
+
+  EXPECT_DEBUG_DEATH(DieInCRTDebugElse12(&sideeffect), regex)
+      << "Must accept a streamed message";
+
+# ifdef NDEBUG
+
+  // Checks that the assignment occurs in opt mode (sideeffect).
+  EXPECT_EQ(12, sideeffect);
+
+# else
+
+  // Checks that the assignment does not occur in dbg mode (no sideeffect).
+  EXPECT_EQ(0, sideeffect);
+
+# endif
+}
+
+# endif  // GTEST_OS_WINDOWS
+
 // Tests that ASSERT_DEBUG_DEATH works as expected, that is, you can stream a
 // message to it, and in debug mode it:
 // 1. Asserts on death.
@@ -884,10 +952,12 @@
   int AssumeRoleCalls() const { return assume_role_calls_; }
   int WaitCalls() const { return wait_calls_; }
   size_t PassedCalls() const { return passed_args_.size(); }
-  bool PassedArgument(int n) const { return passed_args_[n]; }
+  bool PassedArgument(int n) const {
+    return passed_args_[static_cast<size_t>(n)];
+  }
   size_t AbortCalls() const { return abort_args_.size(); }
   DeathTest::AbortReason AbortArgument(int n) const {
-    return abort_args_[n];
+    return abort_args_[static_cast<size_t>(n)];
   }
   bool TestDeleted() const { return test_deleted_; }
 
@@ -1316,7 +1386,11 @@
 TEST(MatcherDeathTest, DoesNotBreakBareRegexMatching) {
   // googletest tests this, of course; here we ensure that including googlemock
   // has not broken it.
+#if GTEST_USES_POSIX_RE
   EXPECT_DEATH(DieWithMessage("O, I die, Horatio."), "I d[aeiou]e");
+#else
+  EXPECT_DEATH(DieWithMessage("O, I die, Horatio."), "I di?e");
+#endif
 }
 
 TEST(MatcherDeathTest, MonomorphicMatcherMatches) {
@@ -1404,6 +1478,13 @@
 
 namespace {
 
+// The following code intentionally tests a suboptimal syntax.
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdangling-else"
+#pragma GCC diagnostic ignored "-Wempty-body"
+#pragma GCC diagnostic ignored "-Wpragmas"
+#endif
 // Tests that the death test macros expand to code which may or may not
 // be followed by operator<<, and that in either case the complete text
 // comprises only a single C++ statement.
@@ -1429,6 +1510,9 @@
   else
     EXPECT_DEATH_IF_SUPPORTED(_exit(1), "") << 1 << 2 << 3;
 }
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
 
 // Tests that conditional death test macros expand to code which interacts
 // well with switch statements.
diff --git a/deps/boringssl/src/third_party/googletest/test/googletest-death-test_ex_test.cc b/deps/boringssl/src/third_party/googletest/test/googletest-death-test_ex_test.cc
index 7ea5b94..7219680 100644
--- a/deps/boringssl/src/third_party/googletest/test/googletest-death-test_ex_test.cc
+++ b/deps/boringssl/src/third_party/googletest/test/googletest-death-test_ex_test.cc
@@ -59,7 +59,7 @@
 
 class TestException : public std::exception {
  public:
-  const char* what() const throw() override { return "exceptional message"; }
+  const char* what() const noexcept override { return "exceptional message"; }
 };
 
 TEST(CxxExceptionDeathTest, PrintsMessageForStdExceptions) {
diff --git a/deps/boringssl/src/third_party/googletest/test/googletest-env-var-test_.cc b/deps/boringssl/src/third_party/googletest/test/googletest-env-var-test_.cc
index fd2aa82..52f9586 100644
--- a/deps/boringssl/src/third_party/googletest/test/googletest-env-var-test_.cc
+++ b/deps/boringssl/src/third_party/googletest/test/googletest-env-var-test_.cc
@@ -72,6 +72,11 @@
     return;
   }
 
+  if (strcmp(flag, "fail_fast") == 0) {
+    cout << GTEST_FLAG(fail_fast);
+    return;
+  }
+
   if (strcmp(flag, "filter") == 0) {
     cout << GTEST_FLAG(filter);
     return;
@@ -82,6 +87,11 @@
     return;
   }
 
+  if (strcmp(flag, "brief") == 0) {
+    cout << GTEST_FLAG(brief);
+    return;
+  }
+
   if (strcmp(flag, "print_time") == 0) {
     cout << GTEST_FLAG(print_time);
     return;
diff --git a/deps/boringssl/src/third_party/googletest/test/googletest-listener-test.cc b/deps/boringssl/src/third_party/googletest/test/googletest-listener-test.cc
index f50faaf..10457af 100644
--- a/deps/boringssl/src/third_party/googletest/test/googletest-listener-test.cc
+++ b/deps/boringssl/src/third_party/googletest/test/googletest-listener-test.cc
@@ -35,6 +35,7 @@
 #include <vector>
 
 #include "gtest/gtest.h"
+#include "gtest/internal/custom/gtest.h"
 
 using ::testing::AddGlobalTestEnvironment;
 using ::testing::Environment;
@@ -76,10 +77,11 @@
   void OnEnvironmentsSetUpEnd(const UnitTest& /*unit_test*/) override {
     g_events->push_back(GetFullMethodName("OnEnvironmentsSetUpEnd"));
   }
-
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
   void OnTestCaseStart(const TestCase& /*test_case*/) override {
     g_events->push_back(GetFullMethodName("OnTestCaseStart"));
   }
+#endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 
   void OnTestStart(const TestInfo& /*test_info*/) override {
     g_events->push_back(GetFullMethodName("OnTestStart"));
@@ -93,9 +95,11 @@
     g_events->push_back(GetFullMethodName("OnTestEnd"));
   }
 
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
   void OnTestCaseEnd(const TestCase& /*test_case*/) override {
     g_events->push_back(GetFullMethodName("OnTestCaseEnd"));
   }
+#endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 
   void OnEnvironmentsTearDownStart(const UnitTest& /*unit_test*/) override {
     g_events->push_back(GetFullMethodName("OnEnvironmentsTearDownStart"));
@@ -283,6 +287,9 @@
   ::testing::GTEST_FLAG(repeat) = 2;
   int ret_val = RUN_ALL_TESTS();
 
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
+
+  // The deprecated OnTestSuiteStart/OnTestCaseStart events are included
   const char* const expected_events[] = {"1st.OnTestProgramStart",
                                          "2nd.OnTestProgramStart",
                                          "3rd.OnTestProgramStart",
@@ -393,6 +400,110 @@
                                          "3rd.OnTestProgramEnd",
                                          "2nd.OnTestProgramEnd",
                                          "1st.OnTestProgramEnd"};
+#else
+  const char* const expected_events[] = {"1st.OnTestProgramStart",
+                                         "2nd.OnTestProgramStart",
+                                         "3rd.OnTestProgramStart",
+                                         "1st.OnTestIterationStart(0)",
+                                         "2nd.OnTestIterationStart(0)",
+                                         "3rd.OnTestIterationStart(0)",
+                                         "1st.OnEnvironmentsSetUpStart",
+                                         "2nd.OnEnvironmentsSetUpStart",
+                                         "3rd.OnEnvironmentsSetUpStart",
+                                         "Environment::SetUp",
+                                         "3rd.OnEnvironmentsSetUpEnd",
+                                         "2nd.OnEnvironmentsSetUpEnd",
+                                         "1st.OnEnvironmentsSetUpEnd",
+                                         "3rd.OnTestSuiteStart",
+                                         "ListenerTest::SetUpTestSuite",
+                                         "1st.OnTestStart",
+                                         "2nd.OnTestStart",
+                                         "3rd.OnTestStart",
+                                         "ListenerTest::SetUp",
+                                         "ListenerTest::* Test Body",
+                                         "1st.OnTestPartResult",
+                                         "2nd.OnTestPartResult",
+                                         "3rd.OnTestPartResult",
+                                         "ListenerTest::TearDown",
+                                         "3rd.OnTestEnd",
+                                         "2nd.OnTestEnd",
+                                         "1st.OnTestEnd",
+                                         "1st.OnTestStart",
+                                         "2nd.OnTestStart",
+                                         "3rd.OnTestStart",
+                                         "ListenerTest::SetUp",
+                                         "ListenerTest::* Test Body",
+                                         "1st.OnTestPartResult",
+                                         "2nd.OnTestPartResult",
+                                         "3rd.OnTestPartResult",
+                                         "ListenerTest::TearDown",
+                                         "3rd.OnTestEnd",
+                                         "2nd.OnTestEnd",
+                                         "1st.OnTestEnd",
+                                         "ListenerTest::TearDownTestSuite",
+                                         "3rd.OnTestSuiteEnd",
+                                         "1st.OnEnvironmentsTearDownStart",
+                                         "2nd.OnEnvironmentsTearDownStart",
+                                         "3rd.OnEnvironmentsTearDownStart",
+                                         "Environment::TearDown",
+                                         "3rd.OnEnvironmentsTearDownEnd",
+                                         "2nd.OnEnvironmentsTearDownEnd",
+                                         "1st.OnEnvironmentsTearDownEnd",
+                                         "3rd.OnTestIterationEnd(0)",
+                                         "2nd.OnTestIterationEnd(0)",
+                                         "1st.OnTestIterationEnd(0)",
+                                         "1st.OnTestIterationStart(1)",
+                                         "2nd.OnTestIterationStart(1)",
+                                         "3rd.OnTestIterationStart(1)",
+                                         "1st.OnEnvironmentsSetUpStart",
+                                         "2nd.OnEnvironmentsSetUpStart",
+                                         "3rd.OnEnvironmentsSetUpStart",
+                                         "Environment::SetUp",
+                                         "3rd.OnEnvironmentsSetUpEnd",
+                                         "2nd.OnEnvironmentsSetUpEnd",
+                                         "1st.OnEnvironmentsSetUpEnd",
+                                         "3rd.OnTestSuiteStart",
+                                         "ListenerTest::SetUpTestSuite",
+                                         "1st.OnTestStart",
+                                         "2nd.OnTestStart",
+                                         "3rd.OnTestStart",
+                                         "ListenerTest::SetUp",
+                                         "ListenerTest::* Test Body",
+                                         "1st.OnTestPartResult",
+                                         "2nd.OnTestPartResult",
+                                         "3rd.OnTestPartResult",
+                                         "ListenerTest::TearDown",
+                                         "3rd.OnTestEnd",
+                                         "2nd.OnTestEnd",
+                                         "1st.OnTestEnd",
+                                         "1st.OnTestStart",
+                                         "2nd.OnTestStart",
+                                         "3rd.OnTestStart",
+                                         "ListenerTest::SetUp",
+                                         "ListenerTest::* Test Body",
+                                         "1st.OnTestPartResult",
+                                         "2nd.OnTestPartResult",
+                                         "3rd.OnTestPartResult",
+                                         "ListenerTest::TearDown",
+                                         "3rd.OnTestEnd",
+                                         "2nd.OnTestEnd",
+                                         "1st.OnTestEnd",
+                                         "ListenerTest::TearDownTestSuite",
+                                         "3rd.OnTestSuiteEnd",
+                                         "1st.OnEnvironmentsTearDownStart",
+                                         "2nd.OnEnvironmentsTearDownStart",
+                                         "3rd.OnEnvironmentsTearDownStart",
+                                         "Environment::TearDown",
+                                         "3rd.OnEnvironmentsTearDownEnd",
+                                         "2nd.OnEnvironmentsTearDownEnd",
+                                         "1st.OnEnvironmentsTearDownEnd",
+                                         "3rd.OnTestIterationEnd(1)",
+                                         "2nd.OnTestIterationEnd(1)",
+                                         "1st.OnTestIterationEnd(1)",
+                                         "3rd.OnTestProgramEnd",
+                                         "2nd.OnTestProgramEnd",
+                                         "1st.OnTestProgramEnd"};
+#endif  // GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 
   VerifyResults(events,
                 expected_events,
diff --git a/deps/boringssl/src/third_party/googletest/test/googletest-output-test_.cc b/deps/boringssl/src/third_party/googletest/test/googletest-output-test_.cc
index c6ce59e..b32b8f3 100644
--- a/deps/boringssl/src/third_party/googletest/test/googletest-output-test_.cc
+++ b/deps/boringssl/src/third_party/googletest/test/googletest-output-test_.cc
@@ -96,6 +96,14 @@
                          FailingParamTest,
                          testing::Values(2));
 
+// Tests that an empty value for the test suite basename yields just
+// the test name without any prior /
+class EmptyBasenameParamInst : public testing::TestWithParam<int> {};
+
+TEST_P(EmptyBasenameParamInst, Passes) { EXPECT_EQ(1, GetParam()); }
+
+INSTANTIATE_TEST_SUITE_P(, EmptyBasenameParamInst, testing::Values(1));
+
 static const char kGoldenString[] = "\"Line\0 1\"\nLine 2";
 
 TEST(NonfatalFailureTest, EscapesStringOperands) {
@@ -461,66 +469,13 @@
 }
 
 TEST(AddFailureAtTest, MessageContainsSpecifiedFileAndLineNumber) {
-  ADD_FAILURE_AT("foo.cc", 42) << "Expected failure in foo.cc";
+  ADD_FAILURE_AT("foo.cc", 42) << "Expected nonfatal failure in foo.cc";
 }
 
-#if GTEST_IS_THREADSAFE
-
-// A unary function that may die.
-void DieIf(bool should_die) {
-  GTEST_CHECK_(!should_die) << " - death inside DieIf().";
+TEST(GtestFailAtTest, MessageContainsSpecifiedFileAndLineNumber) {
+  GTEST_FAIL_AT("foo.cc", 42) << "Expected fatal failure in foo.cc";
 }
 
-// Tests running death tests in a multi-threaded context.
-
-// Used for coordination between the main and the spawn thread.
-struct SpawnThreadNotifications {
-  SpawnThreadNotifications() {}
-
-  Notification spawn_thread_started;
-  Notification spawn_thread_ok_to_terminate;
-
- private:
-  GTEST_DISALLOW_COPY_AND_ASSIGN_(SpawnThreadNotifications);
-};
-
-// The function to be executed in the thread spawn by the
-// MultipleThreads test (below).
-static void ThreadRoutine(SpawnThreadNotifications* notifications) {
-  // Signals the main thread that this thread has started.
-  notifications->spawn_thread_started.Notify();
-
-  // Waits for permission to finish from the main thread.
-  notifications->spawn_thread_ok_to_terminate.WaitForNotification();
-}
-
-// This is a death-test test, but it's not named with a DeathTest
-// suffix.  It starts threads which might interfere with later
-// death tests, so it must run after all other death tests.
-class DeathTestAndMultiThreadsTest : public testing::Test {
- protected:
-  // Starts a thread and waits for it to begin.
-  void SetUp() override {
-    thread_.reset(new ThreadWithParam<SpawnThreadNotifications*>(
-        &ThreadRoutine, &notifications_, nullptr));
-    notifications_.spawn_thread_started.WaitForNotification();
-  }
-  // Tells the thread to finish, and reaps it.
-  // Depending on the version of the thread library in use,
-  // a manager thread might still be left running that will interfere
-  // with later death tests.  This is unfortunate, but this class
-  // cleans up after itself as best it can.
-  void TearDown() override {
-    notifications_.spawn_thread_ok_to_terminate.Notify();
-  }
-
- private:
-  SpawnThreadNotifications notifications_;
-  std::unique_ptr<ThreadWithParam<SpawnThreadNotifications*> > thread_;
-};
-
-#endif  // GTEST_IS_THREADSAFE
-
 // The MixedUpTestSuiteTest test case verifies that Google Test will fail a
 // test if it uses a different fixture class than what other tests in
 // the same test case use.  It deliberately contains two fixture
@@ -778,6 +733,17 @@
                          testing::Values(std::string("a")),
                          ParamNameFunc);
 
+// The case where a suite has INSTANTIATE_TEST_SUITE_P but not TEST_P.
+using NoTests = ParamTest;
+INSTANTIATE_TEST_SUITE_P(ThisIsOdd, NoTests, ::testing::Values("Hello"));
+
+// fails under kErrorOnUninstantiatedParameterizedTest=true
+class DetectNotInstantiatedTest : public testing::TestWithParam<int> {};
+TEST_P(DetectNotInstantiatedTest, Used) { }
+
+// This would make the test failure from the above go away.
+// INSTANTIATE_TEST_SUITE_P(Fix, DetectNotInstantiatedTest, testing::Values(1));
+
 // This #ifdef block tests the output of typed tests.
 #if GTEST_HAS_TYPED_TEST
 
@@ -804,9 +770,9 @@
  public:
   template <typename T>
   static std::string GetName(int i) {
-    if (testing::internal::IsSame<T, char>::value)
+    if (std::is_same<T, char>::value)
       return std::string("char") + ::testing::PrintToString(i);
-    if (testing::internal::IsSame<T, int>::value)
+    if (std::is_same<T, int>::value)
       return std::string("int") + ::testing::PrintToString(i);
   }
 };
@@ -845,10 +811,10 @@
  public:
   template <typename T>
   static std::string GetName(int i) {
-    if (testing::internal::IsSame<T, unsigned char>::value) {
+    if (std::is_same<T, unsigned char>::value) {
       return std::string("unsignedChar") + ::testing::PrintToString(i);
     }
-    if (testing::internal::IsSame<T, unsigned int>::value) {
+    if (std::is_same<T, unsigned int>::value) {
       return std::string("unsignedInt") + ::testing::PrintToString(i);
     }
   }
@@ -857,6 +823,21 @@
 INSTANTIATE_TYPED_TEST_SUITE_P(UnsignedCustomName, TypedTestP, UnsignedTypes,
                               TypedTestPNames);
 
+template <typename T>
+class DetectNotInstantiatedTypesTest : public testing::Test {};
+TYPED_TEST_SUITE_P(DetectNotInstantiatedTypesTest);
+TYPED_TEST_P(DetectNotInstantiatedTypesTest, Used) {
+  TypeParam instantiate;
+  (void)instantiate;
+}
+REGISTER_TYPED_TEST_SUITE_P(DetectNotInstantiatedTypesTest, Used);
+
+// kErrorOnUninstantiatedTypeParameterizedTest=true would make the above fail.
+// Adding the following would make that test failure go away.
+//
+// typedef ::testing::Types<char, int, unsigned int> MyTypes;
+// INSTANTIATE_TYPED_TEST_SUITE_P(All, DetectNotInstantiatedTypesTest, MyTypes);
+
 #endif  // GTEST_HAS_TYPED_TEST_P
 
 #if GTEST_HAS_DEATH_TEST
diff --git a/deps/boringssl/src/third_party/googletest/test/googletest-param-test-test.cc b/deps/boringssl/src/third_party/googletest/test/googletest-param-test-test.cc
index 6c187df..c852220 100644
--- a/deps/boringssl/src/third_party/googletest/test/googletest-param-test-test.cc
+++ b/deps/boringssl/src/third_party/googletest/test/googletest-param-test-test.cc
@@ -37,6 +37,7 @@
 # include <algorithm>
 # include <iostream>
 # include <list>
+# include <set>
 # include <sstream>
 # include <string>
 # include <vector>
@@ -489,16 +490,17 @@
 class NonDefaultConstructAssignString {
  public:
   NonDefaultConstructAssignString(const std::string& s) : str_(s) {}
+  NonDefaultConstructAssignString() = delete;
+  NonDefaultConstructAssignString(const NonDefaultConstructAssignString&) =
+      default;
+  NonDefaultConstructAssignString& operator=(
+      const NonDefaultConstructAssignString&) = delete;
+  ~NonDefaultConstructAssignString() = default;
 
   const std::string& str() const { return str_; }
 
  private:
   std::string str_;
-
-  // Not default constructible
-  NonDefaultConstructAssignString();
-  // Not assignable
-  void operator=(const NonDefaultConstructAssignString&);
 };
 
 TEST(CombineTest, NonDefaultConstructAssign) {
@@ -802,7 +804,7 @@
      ::testing::UnitTest::GetInstance()->current_test_info();
 
   EXPECT_STREQ("FortyTwo/MacroNamingTest", test_info->test_suite_name());
-  EXPECT_STREQ("FooSomeTestName", test_info->name());
+  EXPECT_STREQ("FooSomeTestName/0", test_info->name());
 }
 
 INSTANTIATE_TEST_SUITE_P(FortyTwo, MacroNamingTest, Values(42));
@@ -819,6 +821,36 @@
   EXPECT_STREQ("FooSomeTestName", test_info->name());
 }
 
+TEST(MacroNameing, LookupNames) {
+  std::set<std::string> know_suite_names, know_test_names;
+
+  auto ins = testing::UnitTest::GetInstance();
+  int ts = 0;
+  while (const testing::TestSuite* suite = ins->GetTestSuite(ts++)) {
+    know_suite_names.insert(suite->name());
+
+    int ti = 0;
+    while (const testing::TestInfo* info = suite->GetTestInfo(ti++)) {
+      know_test_names.insert(std::string(suite->name()) + "." + info->name());
+    }
+  }
+
+  // Check that the expected form of the test suit name actualy exists.
+  EXPECT_NE(  //
+      know_suite_names.find("FortyTwo/MacroNamingTest"),
+      know_suite_names.end());
+  EXPECT_NE(
+      know_suite_names.find("MacroNamingTestNonParametrized"),
+      know_suite_names.end());
+  // Check that the expected form of the test name actualy exists.
+  EXPECT_NE(  //
+      know_test_names.find("FortyTwo/MacroNamingTest.FooSomeTestName/0"),
+      know_test_names.end());
+  EXPECT_NE(
+      know_test_names.find("MacroNamingTestNonParametrized.FooSomeTestName"),
+      know_test_names.end());
+}
+
 // Tests that user supplied custom parameter names are working correctly.
 // Runs the test with a builtin helper method which uses PrintToString,
 // as well as a custom function and custom functor to ensure all possible
@@ -1037,6 +1069,38 @@
 INSTANTIATE_TEST_SUITE_P(MyEnumTests, MyEnumTest,
                          ::testing::Values(ENUM1, ENUM2, 0));
 
+namespace works_here {
+// Never used not instantiated, this should work.
+class NotUsedTest : public testing::TestWithParam<int> {};
+
+///////
+// Never used not instantiated, this should work.
+template <typename T>
+class NotUsedTypeTest : public testing::Test {};
+TYPED_TEST_SUITE_P(NotUsedTypeTest);
+
+// Used but not instantiated, this would fail. but...
+class NotInstantiatedTest : public testing::TestWithParam<int> {};
+// ... we mark is as allowed.
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(NotInstantiatedTest);
+
+TEST_P(NotInstantiatedTest, Used) { }
+
+using OtherName = NotInstantiatedTest;
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(OtherName);
+TEST_P(OtherName, Used) { }
+
+// Used but not instantiated, this would fail. but...
+template <typename T>
+class NotInstantiatedTypeTest : public testing::Test {};
+TYPED_TEST_SUITE_P(NotInstantiatedTypeTest);
+// ... we mark is as allowed.
+GTEST_ALLOW_UNINSTANTIATED_PARAMETERIZED_TEST(NotInstantiatedTypeTest);
+
+TYPED_TEST_P(NotInstantiatedTypeTest, Used) { }
+REGISTER_TYPED_TEST_SUITE_P(NotInstantiatedTypeTest, Used);
+}  // namespace works_here
+
 int main(int argc, char **argv) {
   // Used in TestGenerationTest test suite.
   AddGlobalTestEnvironment(TestGenerationTest::Environment::Instance());
diff --git a/deps/boringssl/src/third_party/googletest/test/googletest-port-test.cc b/deps/boringssl/src/third_party/googletest/test/googletest-port-test.cc
index 6cb791e..4a87df0 100644
--- a/deps/boringssl/src/third_party/googletest/test/googletest-port-test.cc
+++ b/deps/boringssl/src/third_party/googletest/test/googletest-port-test.cc
@@ -90,10 +90,10 @@
 
 class Base {
  public:
-  // Copy constructor and assignment operator do exactly what we need, so we
-  // use them.
   Base() : member_(0) {}
   explicit Base(int n) : member_(n) {}
+  Base(const Base&) = default;
+  Base& operator=(const Base&) = default;
   virtual ~Base() {}
   int member() { return member_; }
 
@@ -201,24 +201,13 @@
   EXPECT_TRUE(converted);
 }
 
-TEST(IteratorTraitsTest, WorksForSTLContainerIterators) {
-  StaticAssertTypeEq<int,
-      IteratorTraits< ::std::vector<int>::const_iterator>::value_type>();
-  StaticAssertTypeEq<bool,
-      IteratorTraits< ::std::list<bool>::iterator>::value_type>();
-}
-
-TEST(IteratorTraitsTest, WorksForPointerToNonConst) {
-  StaticAssertTypeEq<char, IteratorTraits<char*>::value_type>();
-  StaticAssertTypeEq<const void*, IteratorTraits<const void**>::value_type>();
-}
-
-TEST(IteratorTraitsTest, WorksForPointerToConst) {
-  StaticAssertTypeEq<char, IteratorTraits<const char*>::value_type>();
-  StaticAssertTypeEq<const void*,
-      IteratorTraits<const void* const*>::value_type>();
-}
-
+// The following code intentionally tests a suboptimal syntax.
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdangling-else"
+#pragma GCC diagnostic ignored "-Wempty-body"
+#pragma GCC diagnostic ignored "-Wpragmas"
+#endif
 TEST(GtestCheckSyntaxTest, BehavesLikeASingleStatement) {
   if (AlwaysFalse())
     GTEST_CHECK_(false) << "This should never be executed; "
@@ -234,6 +223,9 @@
   else
     GTEST_CHECK_(true) << "";
 }
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
 
 TEST(GtestCheckSyntaxTest, WorksWithSwitch) {
   switch (0) {
@@ -1048,7 +1040,7 @@
           pthread_mutex_init(&memory_barrier_mutex, nullptr));
       GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_lock(&memory_barrier_mutex));
 
-      SleepMilliseconds(random_.Generate(30));
+      SleepMilliseconds(static_cast<int>(random_.Generate(30)));
 
       GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_unlock(&memory_barrier_mutex));
       GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_destroy(&memory_barrier_mutex));
@@ -1056,7 +1048,7 @@
       // On Windows, performing an interlocked access puts up a memory barrier.
       volatile LONG dummy = 0;
       ::InterlockedIncrement(&dummy);
-      SleepMilliseconds(random_.Generate(30));
+      SleepMilliseconds(static_cast<int>(random_.Generate(30)));
       ::InterlockedIncrement(&dummy);
 #else
 # error "Memory barrier not implemented on this platform."
@@ -1198,8 +1190,6 @@
     return DestructorCall::List().size() - 1;
   }
   const size_t index_;
-
-  GTEST_DISALLOW_ASSIGN_(DestructorTracker);
 };
 
 typedef ThreadLocal<DestructorTracker>* ThreadParam;
diff --git a/deps/boringssl/src/third_party/googletest/test/googletest-printers-test.cc b/deps/boringssl/src/third_party/googletest/test/googletest-printers-test.cc
index d896c24..bf1e0b5 100644
--- a/deps/boringssl/src/third_party/googletest/test/googletest-printers-test.cc
+++ b/deps/boringssl/src/third_party/googletest/test/googletest-printers-test.cc
@@ -33,11 +33,12 @@
 // This file tests the universal value printer.
 
 #include <ctype.h>
-#include <limits.h>
 #include <string.h>
 #include <algorithm>
+#include <cstdint>
 #include <deque>
 #include <forward_list>
+#include <limits>
 #include <list>
 #include <map>
 #include <set>
@@ -89,6 +90,18 @@
   operator ::testing::internal::BiggestInt() const { return 42; }
 };
 
+// A parent class with two child classes. The parent and one of the kids have
+// stream operators.
+class ParentClass {};
+class ChildClassWithStreamOperator : public ParentClass {};
+class ChildClassWithoutStreamOperator : public ParentClass {};
+static void operator<<(std::ostream& os, const ParentClass&) {
+  os << "ParentClass";
+}
+static void operator<<(std::ostream& os, const ChildClassWithStreamOperator&) {
+  os << "ChildClassWithStreamOperator";
+}
+
 // A user-defined unprintable class template in the global namespace.
 template <typename T>
 class UnprintableTemplateInGlobal {
@@ -176,6 +189,17 @@
   return os << "StreamableTemplateInFoo: " << x.value();
 }
 
+// A user-defined streamable type in a user namespace whose operator<< is
+// templated on the type of the output stream.
+struct TemplatedStreamableInFoo {};
+
+template <typename OutputStream>
+OutputStream& operator<<(OutputStream& os,
+                         const TemplatedStreamableInFoo& /*ts*/) {
+  os << "TemplatedStreamableInFoo";
+  return os;
+}
+
 // A user-defined streamable but recursivly-defined container type in
 // a user namespace, it mimics therefore std::filesystem::path or
 // boost::filesystem::path.
@@ -219,7 +243,6 @@
 using ::testing::internal::FormatForComparisonFailureMessage;
 using ::testing::internal::ImplicitCast_;
 using ::testing::internal::NativeArray;
-using ::testing::internal::RE;
 using ::testing::internal::RelationToSourceReference;
 using ::testing::internal::Strings;
 using ::testing::internal::UniversalPrint;
@@ -310,6 +333,20 @@
             Print(static_cast<unsigned char>('b')));
 }
 
+TEST(PrintCharTest, Char16) {
+  EXPECT_EQ("U+0041", Print(u'A'));
+}
+
+TEST(PrintCharTest, Char32) {
+  EXPECT_EQ("U+0041", Print(U'A'));
+}
+
+#ifdef __cpp_char8_t
+TEST(PrintCharTest, Char8) {
+  EXPECT_EQ("U+0041", Print(u8'A'));
+}
+#endif
+
 // Tests printing other simple, built-in types.
 
 // bool.
@@ -340,23 +377,39 @@
   EXPECT_EQ("L'\\xC74D' (51021)", Print(static_cast<wchar_t>(0xC74D)));
 }
 
-// Test that Int64 provides more storage than wchar_t.
+// Test that int64_t provides more storage than wchar_t.
 TEST(PrintTypeSizeTest, Wchar_t) {
-  EXPECT_LT(sizeof(wchar_t), sizeof(testing::internal::Int64));
+  EXPECT_LT(sizeof(wchar_t), sizeof(int64_t));
 }
 
 // Various integer types.
 TEST(PrintBuiltInTypeTest, Integer) {
   EXPECT_EQ("'\\xFF' (255)", Print(static_cast<unsigned char>(255)));  // uint8
   EXPECT_EQ("'\\x80' (-128)", Print(static_cast<signed char>(-128)));  // int8
-  EXPECT_EQ("65535", Print(USHRT_MAX));  // uint16
-  EXPECT_EQ("-32768", Print(SHRT_MIN));  // int16
-  EXPECT_EQ("4294967295", Print(UINT_MAX));  // uint32
-  EXPECT_EQ("-2147483648", Print(INT_MIN));  // int32
+  EXPECT_EQ("65535", Print(std::numeric_limits<uint16_t>::max()));  // uint16
+  EXPECT_EQ("-32768", Print(std::numeric_limits<int16_t>::min()));  // int16
+  EXPECT_EQ("4294967295",
+            Print(std::numeric_limits<uint32_t>::max()));  // uint32
+  EXPECT_EQ("-2147483648",
+            Print(std::numeric_limits<int32_t>::min()));  // int32
   EXPECT_EQ("18446744073709551615",
-            Print(static_cast<testing::internal::UInt64>(-1)));  // uint64
+            Print(std::numeric_limits<uint64_t>::max()));  // uint64
   EXPECT_EQ("-9223372036854775808",
-            Print(static_cast<testing::internal::Int64>(1) << 63));  // int64
+            Print(std::numeric_limits<int64_t>::min()));  // int64
+#ifdef __cpp_char8_t
+  EXPECT_EQ("U+0000",
+            Print(std::numeric_limits<char8_t>::min()));  // char8_t
+  EXPECT_EQ("U+00FF",
+            Print(std::numeric_limits<char8_t>::max()));  // char8_t
+#endif
+  EXPECT_EQ("U+0000",
+            Print(std::numeric_limits<char16_t>::min()));  // char16_t
+  EXPECT_EQ("U+FFFF",
+            Print(std::numeric_limits<char16_t>::max()));  // char16_t
+  EXPECT_EQ("U+0000",
+            Print(std::numeric_limits<char32_t>::min()));  // char32_t
+  EXPECT_EQ("U+FFFFFFFF",
+            Print(std::numeric_limits<char32_t>::max()));  // char32_t
 }
 
 // Size types.
@@ -483,6 +536,56 @@
   EXPECT_EQ("NULL", Print(p));
 }
 
+#ifdef __cpp_char8_t
+// char8_t*
+TEST(PrintCharPointerTest, Char8) {
+  char8_t* p = reinterpret_cast<char8_t*>(0x1234);
+  EXPECT_EQ(PrintPointer(p), Print(p));
+  p = nullptr;
+  EXPECT_EQ("NULL", Print(p));
+}
+
+// const char8_t*
+TEST(PrintCharPointerTest, ConstChar8) {
+  const char8_t* p = reinterpret_cast<const char8_t*>(0x1234);
+  EXPECT_EQ(PrintPointer(p), Print(p));
+  p = nullptr;
+  EXPECT_EQ("NULL", Print(p));
+}
+#endif
+
+// char16_t*
+TEST(PrintCharPointerTest, Char16) {
+  char16_t* p = reinterpret_cast<char16_t*>(0x1234);
+  EXPECT_EQ(PrintPointer(p), Print(p));
+  p = nullptr;
+  EXPECT_EQ("NULL", Print(p));
+}
+
+// const char16_t*
+TEST(PrintCharPointerTest, ConstChar16) {
+  const char16_t* p = reinterpret_cast<const char16_t*>(0x1234);
+  EXPECT_EQ(PrintPointer(p), Print(p));
+  p = nullptr;
+  EXPECT_EQ("NULL", Print(p));
+}
+
+// char32_t*
+TEST(PrintCharPointerTest, Char32) {
+  char32_t* p = reinterpret_cast<char32_t*>(0x1234);
+  EXPECT_EQ(PrintPointer(p), Print(p));
+  p = nullptr;
+  EXPECT_EQ("NULL", Print(p));
+}
+
+// const char32_t*
+TEST(PrintCharPointerTest, ConstChar32) {
+  const char32_t* p = reinterpret_cast<const char32_t*>(0x1234);
+  EXPECT_EQ(PrintPointer(p), Print(p));
+  p = nullptr;
+  EXPECT_EQ("NULL", Print(p));
+}
+
 // Tests printing pointers to simple, built-in types.
 
 // bool*.
@@ -641,6 +744,45 @@
   EXPECT_EQ("L\"\\0Hi\"", PrintArrayHelper(a));
 }
 
+#ifdef __cpp_char8_t
+// char8_t array.
+TEST(PrintArrayTest, Char8Array) {
+  const char8_t a[] = u8"Hello, world!";
+  EXPECT_EQ(
+      "{ U+0048, U+0065, U+006C, U+006C, U+006F, U+002C, U+0020, U+0077, "
+      "U+006F, U+0072, U+006C, U+0064, U+0021, U+0000 }",
+      PrintArrayHelper(a));
+}
+#endif
+
+// char16_t array.
+#ifdef _MSC_VER
+// TODO(b/173029407): Figure out why this doesn't work under MSVC.
+TEST(PrintArrayTest, DISABLED_Char16Array) {
+#else
+TEST(PrintArrayTest, Char16Array) {
+#endif
+  const char16_t a[] = u"Hello, 世界";
+  EXPECT_EQ(
+      "{ U+0048, U+0065, U+006C, U+006C, U+006F, U+002C, U+0020, U+4E16, "
+      "U+754C, U+0000 }",
+      PrintArrayHelper(a));
+}
+
+// char32_t array.
+#ifdef _MSC_VER
+// TODO(b/173029407): Figure out why this doesn't work under MSVC.
+TEST(PrintArrayTest, DISABLED_Char32Array) {
+#else
+TEST(PrintArrayTest, Char32Array) {
+#endif
+  const char32_t a[] = U"Hello, 世界";
+  EXPECT_EQ(
+      "{ U+0048, U+0065, U+006C, U+006C, U+006F, U+002C, U+0020, U+4E16, "
+      "U+754C, U+0000 }",
+      PrintArrayHelper(a));
+}
+
 // Array of objects.
 TEST(PrintArrayTest, ObjectArray) {
   std::string a[3] = {"Hi", "Hello", "Ni hao"};
@@ -700,6 +842,45 @@
 }
 #endif  // GTEST_HAS_STD_WSTRING
 
+#ifdef __cpp_char8_t
+TEST(PrintStringTest, U8String) {
+  std::u8string str = u8"Hello, world!";
+  EXPECT_EQ(str, str);  // Verify EXPECT_EQ compiles with this type.
+  EXPECT_EQ(
+      "{ U+0048, U+0065, U+006C, U+006C, U+006F, U+002C, U+0020, U+0077, "
+      "U+006F, U+0072, U+006C, U+0064, U+0021 }",
+      Print(str));
+}
+#endif
+
+#ifdef _MSC_VER
+// TODO(b/173029407): Figure out why this doesn't work under MSVC.
+TEST(PrintStringTest, DISABLED_U16String) {
+#else
+TEST(PrintStringTest, U16String) {
+#endif
+  std::u16string str = u"Hello, 世界";
+  EXPECT_EQ(str, str);  // Verify EXPECT_EQ compiles with this type.
+  EXPECT_EQ(
+      "{ U+0048, U+0065, U+006C, U+006C, U+006F, U+002C, U+0020, U+4E16, "
+      "U+754C }",
+      Print(str));
+}
+
+#ifdef _MSC_VER
+// TODO(b/173029407): Figure out why this doesn't work under MSVC.
+TEST(PrintStringTest, DISABLED_U32String) {
+#else
+TEST(PrintStringTest, U32String) {
+#endif
+  std::u32string str = U"Hello, 世界";
+  EXPECT_EQ(str, str);  // Verify EXPECT_EQ compiles with this type.
+  EXPECT_EQ(
+      "{ U+0048, U+0065, U+006C, U+006C, U+006F, U+002C, U+0020, U+4E16, "
+      "U+754C }",
+      Print(str));
+}
+
 // Tests printing types that support generic streaming (i.e. streaming
 // to std::basic_ostream<Char, CharTraits> for any valid Char and
 // CharTraits types).
@@ -758,22 +939,22 @@
   EXPECT_EQ("AllowsGenericStreamingAndImplicitConversionTemplate", Print(a));
 }
 
-#if GTEST_HAS_ABSL
+#if GTEST_INTERNAL_HAS_STRING_VIEW
 
-// Tests printing ::absl::string_view.
+// Tests printing internal::StringView.
 
 TEST(PrintStringViewTest, SimpleStringView) {
-  const ::absl::string_view sp = "Hello";
+  const internal::StringView sp = "Hello";
   EXPECT_EQ("\"Hello\"", Print(sp));
 }
 
 TEST(PrintStringViewTest, UnprintableCharacters) {
   const char str[] = "NUL (\0) and \r\t";
-  const ::absl::string_view sp(str, sizeof(str) - 1);
+  const internal::StringView sp(str, sizeof(str) - 1);
   EXPECT_EQ("\"NUL (\\0) and \\r\\t\"", Print(sp));
 }
 
-#endif  // GTEST_HAS_ABSL
+#endif  // GTEST_INTERNAL_HAS_STRING_VIEW
 
 // Tests printing STL containers.
 
@@ -978,9 +1159,8 @@
   EXPECT_EQ("(false, 2, 3, 4)", Print(t4));
 
   const char* const str = "8";
-  ::std::tuple<bool, char, short, testing::internal::Int32,  // NOLINT
-               testing::internal::Int64, float, double, const char*, void*,
-               std::string>
+  ::std::tuple<bool, char, short, int32_t, int64_t, float, double,  // NOLINT
+               const char*, void*, std::string>
       t10(false, 'a', static_cast<short>(3), 4, 5, 1.5F, -2.5, str,  // NOLINT
           nullptr, "10");
   EXPECT_EQ("(false, 'a' (97, 0x61), 3, 4, 5, 1.5, -2.5, " + PrintPointer(str) +
@@ -1064,6 +1244,20 @@
             Print(::foo::StreamableTemplateInFoo<int>()));
 }
 
+TEST(PrintStreamableTypeTest, TypeInUserNamespaceWithTemplatedStreamOperator) {
+  EXPECT_EQ("TemplatedStreamableInFoo",
+            Print(::foo::TemplatedStreamableInFoo()));
+}
+
+TEST(PrintStreamableTypeTest, SubclassUsesSuperclassStreamOperator) {
+  ParentClass parent;
+  ChildClassWithStreamOperator child_stream;
+  ChildClassWithoutStreamOperator child_no_stream;
+  EXPECT_EQ("ParentClass", Print(parent));
+  EXPECT_EQ("ChildClassWithStreamOperator", Print(child_stream));
+  EXPECT_EQ("ParentClass", Print(child_no_stream));
+}
+
 // Tests printing a user-defined recursive container type that has a <<
 // operator.
 TEST(PrintStreamableTypeTest, PathLikeInUserNamespace) {
@@ -1530,32 +1724,124 @@
   EXPECT_EQ("\"a\"", result[1]);
 }
 
-#if GTEST_HAS_ABSL
+#if GTEST_INTERNAL_HAS_ANY
+class PrintAnyTest : public ::testing::Test {
+ protected:
+  template <typename T>
+  static std::string ExpectedTypeName() {
+#if GTEST_HAS_RTTI
+    return internal::GetTypeName<T>();
+#else
+    return "<unknown_type>";
+#endif  // GTEST_HAS_RTTI
+  }
+};
 
+TEST_F(PrintAnyTest, Empty) {
+  internal::Any any;
+  EXPECT_EQ("no value", PrintToString(any));
+}
+
+TEST_F(PrintAnyTest, NonEmpty) {
+  internal::Any any;
+  constexpr int val1 = 10;
+  const std::string val2 = "content";
+
+  any = val1;
+  EXPECT_EQ("value of type " + ExpectedTypeName<int>(), PrintToString(any));
+
+  any = val2;
+  EXPECT_EQ("value of type " + ExpectedTypeName<std::string>(),
+            PrintToString(any));
+}
+#endif  // GTEST_INTERNAL_HAS_ANY
+
+#if GTEST_INTERNAL_HAS_OPTIONAL
 TEST(PrintOptionalTest, Basic) {
-  absl::optional<int> value;
+  internal::Optional<int> value;
   EXPECT_EQ("(nullopt)", PrintToString(value));
   value = {7};
   EXPECT_EQ("(7)", PrintToString(value));
-  EXPECT_EQ("(1.1)", PrintToString(absl::optional<double>{1.1}));
-  EXPECT_EQ("(\"A\")", PrintToString(absl::optional<std::string>{"A"}));
+  EXPECT_EQ("(1.1)", PrintToString(internal::Optional<double>{1.1}));
+  EXPECT_EQ("(\"A\")", PrintToString(internal::Optional<std::string>{"A"}));
 }
+#endif  // GTEST_INTERNAL_HAS_OPTIONAL
 
+#if GTEST_INTERNAL_HAS_VARIANT
 struct NonPrintable {
   unsigned char contents = 17;
 };
 
 TEST(PrintOneofTest, Basic) {
-  using Type = absl::variant<int, StreamableInGlobal, NonPrintable>;
-  EXPECT_EQ("('int' with value 7)", PrintToString(Type(7)));
-  EXPECT_EQ("('StreamableInGlobal' with value StreamableInGlobal)",
+  using Type = internal::Variant<int, StreamableInGlobal, NonPrintable>;
+  EXPECT_EQ("('int(index = 0)' with value 7)", PrintToString(Type(7)));
+  EXPECT_EQ("('StreamableInGlobal(index = 1)' with value StreamableInGlobal)",
             PrintToString(Type(StreamableInGlobal{})));
   EXPECT_EQ(
-      "('testing::gtest_printers_test::NonPrintable' with value 1-byte object "
-      "<11>)",
+      "('testing::gtest_printers_test::NonPrintable(index = 2)' with value "
+      "1-byte object <11>)",
       PrintToString(Type(NonPrintable{})));
 }
-#endif  // GTEST_HAS_ABSL
+#endif  // GTEST_INTERNAL_HAS_VARIANT
+namespace {
+class string_ref;
+
+/**
+ * This is a synthetic pointer to a fixed size string.
+ */
+class string_ptr {
+ public:
+  string_ptr(const char* data, size_t size) : data_(data), size_(size) {}
+
+  string_ptr& operator++() noexcept {
+    data_ += size_;
+    return *this;
+  }
+
+  string_ref operator*() const noexcept;
+
+ private:
+  const char* data_;
+  size_t size_;
+};
+
+/**
+ * This is a synthetic reference of a fixed size string.
+ */
+class string_ref {
+ public:
+  string_ref(const char* data, size_t size) : data_(data), size_(size) {}
+
+  string_ptr operator&() const noexcept { return {data_, size_}; }  // NOLINT
+
+  bool operator==(const char* s) const noexcept {
+    if (size_ > 0 && data_[size_ - 1] != 0) {
+      return std::string(data_, size_) == std::string(s);
+    } else {
+      return std::string(data_) == std::string(s);
+    }
+  }
+
+ private:
+  const char* data_;
+  size_t size_;
+};
+
+string_ref string_ptr::operator*() const noexcept { return {data_, size_}; }
+
+TEST(string_ref, compare) {
+  const char* s = "alex\0davidjohn\0";
+  string_ptr ptr(s, 5);
+  EXPECT_EQ(*ptr, "alex");
+  EXPECT_TRUE(*ptr == "alex");
+  ++ptr;
+  EXPECT_EQ(*ptr, "david");
+  EXPECT_TRUE(*ptr == "david");
+  ++ptr;
+  EXPECT_EQ(*ptr, "john");
+}
+
+}  // namespace
 
 }  // namespace gtest_printers_test
 }  // namespace testing
diff --git a/deps/boringssl/src/third_party/googletest/test/googletest-shuffle-test_.cc b/deps/boringssl/src/third_party/googletest/test/googletest-shuffle-test_.cc
index c1fc106..4505663 100644
--- a/deps/boringssl/src/third_party/googletest/test/googletest-shuffle-test_.cc
+++ b/deps/boringssl/src/third_party/googletest/test/googletest-shuffle-test_.cc
@@ -82,7 +82,7 @@
   }
 
   void OnTestStart(const TestInfo& test_info) override {
-    printf("%s.%s\n", test_info.test_case_name(), test_info.name());
+    printf("%s.%s\n", test_info.test_suite_name(), test_info.name());
   }
 };
 
diff --git a/deps/boringssl/src/third_party/googletest/test/googletest-test2_test.cc b/deps/boringssl/src/third_party/googletest/test/googletest-test2_test.cc
deleted file mode 100644
index 2e425da..0000000
--- a/deps/boringssl/src/third_party/googletest/test/googletest-test2_test.cc
+++ /dev/null
@@ -1,61 +0,0 @@
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-//
-// Tests for Google Test itself.  This verifies that the basic constructs of
-// Google Test work.
-
-#include "gtest/gtest.h"
-#include "googletest-param-test-test.h"
-
-using ::testing::Values;
-using ::testing::internal::ParamGenerator;
-
-// Tests that generators defined in a different translation unit
-// are functional. The test using extern_gen_2 is defined
-// in googletest-param-test-test.cc.
-ParamGenerator<int> extern_gen_2 = Values(33);
-
-// Tests that a parameterized test case can be defined in one translation unit
-// and instantiated in another. The test is defined in
-// googletest-param-test-test.cc and ExternalInstantiationTest fixture class is
-// defined in gtest-param-test_test.h.
-INSTANTIATE_TEST_SUITE_P(MultiplesOf33,
-                         ExternalInstantiationTest,
-                         Values(33, 66));
-
-// Tests that a parameterized test case can be instantiated
-// in multiple translation units. Another instantiation is defined
-// in googletest-param-test-test.cc and
-// InstantiationInMultipleTranslationUnitsTest fixture is defined in
-// gtest-param-test_test.h
-INSTANTIATE_TEST_SUITE_P(Sequence2,
-                         InstantiationInMultipleTranslationUnitsTest,
-                         Values(42*3, 42*4, 42*5));
-
diff --git a/deps/boringssl/src/third_party/googletest/test/gtest-typed-test_test.cc b/deps/boringssl/src/third_party/googletest/test/gtest-typed-test_test.cc
index f1ca937..de1db0c 100644
--- a/deps/boringssl/src/third_party/googletest/test/gtest-typed-test_test.cc
+++ b/deps/boringssl/src/third_party/googletest/test/gtest-typed-test_test.cc
@@ -31,6 +31,7 @@
 #include "test/gtest-typed-test_test.h"
 
 #include <set>
+#include <type_traits>
 #include <vector>
 
 #include "gtest/gtest.h"
@@ -177,10 +178,10 @@
  public:
   template <typename T>
   static std::string GetName(int i) {
-    if (testing::internal::IsSame<T, char>::value) {
+    if (std::is_same<T, char>::value) {
       return std::string("char") + ::testing::PrintToString(i);
     }
-    if (testing::internal::IsSame<T, int>::value) {
+    if (std::is_same<T, int>::value) {
       return std::string("int") + ::testing::PrintToString(i);
     }
   }
@@ -189,16 +190,16 @@
 TYPED_TEST_SUITE(TypedTestWithNames, TwoTypes, TypedTestNames);
 
 TYPED_TEST(TypedTestWithNames, TestSuiteName) {
-  if (testing::internal::IsSame<TypeParam, char>::value) {
+  if (std::is_same<TypeParam, char>::value) {
     EXPECT_STREQ(::testing::UnitTest::GetInstance()
                      ->current_test_info()
-                     ->test_case_name(),
+                     ->test_suite_name(),
                  "TypedTestWithNames/char0");
   }
-  if (testing::internal::IsSame<TypeParam, int>::value) {
+  if (std::is_same<TypeParam, int>::value) {
     EXPECT_STREQ(::testing::UnitTest::GetInstance()
                      ->current_test_info()
-                     ->test_case_name(),
+                     ->test_suite_name(),
                  "TypedTestWithNames/int1");
   }
 }
@@ -227,7 +228,7 @@
 TEST_F(TypedTestSuitePStateTest, SucceedsForMatchingList) {
   const char* tests = "A, B, C";
   EXPECT_EQ(tests,
-            state_.VerifyRegisteredTestNames("foo.cc", 1, tests));
+            state_.VerifyRegisteredTestNames("Suite", "foo.cc", 1, tests));
 }
 
 // Makes sure that the order of the tests and spaces around the names
@@ -235,33 +236,33 @@
 TEST_F(TypedTestSuitePStateTest, IgnoresOrderAndSpaces) {
   const char* tests = "A,C,   B";
   EXPECT_EQ(tests,
-            state_.VerifyRegisteredTestNames("foo.cc", 1, tests));
+            state_.VerifyRegisteredTestNames("Suite", "foo.cc", 1, tests));
 }
 
 using TypedTestSuitePStateDeathTest = TypedTestSuitePStateTest;
 
 TEST_F(TypedTestSuitePStateDeathTest, DetectsDuplicates) {
   EXPECT_DEATH_IF_SUPPORTED(
-      state_.VerifyRegisteredTestNames("foo.cc", 1, "A, B, A, C"),
+      state_.VerifyRegisteredTestNames("Suite", "foo.cc", 1, "A, B, A, C"),
       "foo\\.cc.1.?: Test A is listed more than once\\.");
 }
 
 TEST_F(TypedTestSuitePStateDeathTest, DetectsExtraTest) {
   EXPECT_DEATH_IF_SUPPORTED(
-      state_.VerifyRegisteredTestNames("foo.cc", 1, "A, B, C, D"),
+      state_.VerifyRegisteredTestNames("Suite", "foo.cc", 1, "A, B, C, D"),
       "foo\\.cc.1.?: No test named D can be found in this test suite\\.");
 }
 
 TEST_F(TypedTestSuitePStateDeathTest, DetectsMissedTest) {
   EXPECT_DEATH_IF_SUPPORTED(
-      state_.VerifyRegisteredTestNames("foo.cc", 1, "A, C"),
+      state_.VerifyRegisteredTestNames("Suite", "foo.cc", 1, "A, C"),
       "foo\\.cc.1.?: You forgot to list test B\\.");
 }
 
 // Tests that defining a test for a parameterized test case generates
 // a run-time error if the test case has been registered.
 TEST_F(TypedTestSuitePStateDeathTest, DetectsTestAfterRegistration) {
-  state_.VerifyRegisteredTestNames("foo.cc", 1, "A, B, C");
+  state_.VerifyRegisteredTestNames("Suite", "foo.cc", 1, "A, B, C");
   EXPECT_DEATH_IF_SUPPORTED(
       state_.AddTestName("foo.cc", 2, "FooTest", "D"),
       "foo\\.cc.2.?: Test D must be defined before REGISTER_TYPED_TEST_SUITE_P"
@@ -311,16 +312,16 @@
 TYPED_TEST_SUITE_P(TypeParametrizedTestWithNames);
 
 TYPED_TEST_P(TypeParametrizedTestWithNames, TestSuiteName) {
-  if (testing::internal::IsSame<TypeParam, char>::value) {
+  if (std::is_same<TypeParam, char>::value) {
     EXPECT_STREQ(::testing::UnitTest::GetInstance()
                      ->current_test_info()
-                     ->test_case_name(),
+                     ->test_suite_name(),
                  "CustomName/TypeParametrizedTestWithNames/parChar0");
   }
-  if (testing::internal::IsSame<TypeParam, int>::value) {
+  if (std::is_same<TypeParam, int>::value) {
     EXPECT_STREQ(::testing::UnitTest::GetInstance()
                      ->current_test_info()
-                     ->test_case_name(),
+                     ->test_suite_name(),
                  "CustomName/TypeParametrizedTestWithNames/parInt1");
   }
 }
@@ -331,10 +332,10 @@
  public:
   template <typename T>
   static std::string GetName(int i) {
-    if (testing::internal::IsSame<T, char>::value) {
+    if (std::is_same<T, char>::value) {
       return std::string("parChar") + ::testing::PrintToString(i);
     }
-    if (testing::internal::IsSame<T, int>::value) {
+    if (std::is_same<T, int>::value) {
       return std::string("parInt") + ::testing::PrintToString(i);
     }
   }
diff --git a/deps/boringssl/src/third_party/googletest/test/gtest-unittest-api_test.cc b/deps/boringssl/src/third_party/googletest/test/gtest-unittest-api_test.cc
index 480a41f..25a8afb 100644
--- a/deps/boringssl/src/third_party/googletest/test/gtest-unittest-api_test.cc
+++ b/deps/boringssl/src/third_party/googletest/test/gtest-unittest-api_test.cc
@@ -55,8 +55,8 @@
   // name.  The caller is responsible for deleting the array.
   static TestSuite const** GetSortedTestSuites() {
     UnitTest& unit_test = *UnitTest::GetInstance();
-    auto const** const test_suites =
-        new const TestSuite*[unit_test.total_test_suite_count()];
+    auto const** const test_suites = new const TestSuite*[static_cast<size_t>(
+      unit_test.total_test_suite_count())];
 
     for (int i = 0; i < unit_test.total_test_suite_count(); ++i)
       test_suites[i] = unit_test.GetTestSuite(i);
@@ -83,8 +83,8 @@
   // sorted by the test name.  The caller is responsible for deleting the
   // array.
   static TestInfo const** GetSortedTests(const TestSuite* test_suite) {
-    TestInfo const** const tests =
-        new const TestInfo*[test_suite->total_test_count()];
+    TestInfo const** const tests = new const TestInfo*[static_cast<size_t>(
+      test_suite->total_test_count())];
 
     for (int i = 0; i < test_suite->total_test_count(); ++i)
       tests[i] = test_suite->GetTestInfo(i);
@@ -188,7 +188,7 @@
   ASSERT_TRUE(test_suite != nullptr);
 
   EXPECT_STREQ("TestSuiteWithCommentTest/0", test_suite->name());
-  EXPECT_STREQ(GetTypeName<int>().c_str(), test_suite->type_param());
+  EXPECT_STREQ(GetTypeName<Types<int>>().c_str(), test_suite->type_param());
   EXPECT_TRUE(test_suite->should_run());
   EXPECT_EQ(0, test_suite->disabled_test_count());
   EXPECT_EQ(1, test_suite->test_to_run_count());
@@ -199,7 +199,7 @@
   EXPECT_STREQ("Dummy", tests[0]->name());
   EXPECT_STREQ("TestSuiteWithCommentTest/0", tests[0]->test_suite_name());
   EXPECT_TRUE(IsNull(tests[0]->value_param()));
-  EXPECT_STREQ(GetTypeName<int>().c_str(), tests[0]->type_param());
+  EXPECT_STREQ(GetTypeName<Types<int>>().c_str(), tests[0]->type_param());
   EXPECT_TRUE(tests[0]->should_run());
 
   delete[] tests;
@@ -265,7 +265,8 @@
 
 #if GTEST_HAS_TYPED_TEST
     EXPECT_STREQ("TestSuiteWithCommentTest/0", test_suites[2]->name());
-    EXPECT_STREQ(GetTypeName<int>().c_str(), test_suites[2]->type_param());
+    EXPECT_STREQ(GetTypeName<Types<int>>().c_str(),
+                 test_suites[2]->type_param());
     EXPECT_TRUE(test_suites[2]->should_run());
     EXPECT_EQ(0, test_suites[2]->disabled_test_count());
     ASSERT_EQ(1, test_suites[2]->total_test_count());
@@ -317,7 +318,7 @@
     EXPECT_STREQ("Dummy", tests[0]->name());
     EXPECT_STREQ("TestSuiteWithCommentTest/0", tests[0]->test_suite_name());
     EXPECT_TRUE(IsNull(tests[0]->value_param()));
-    EXPECT_STREQ(GetTypeName<int>().c_str(), tests[0]->type_param());
+    EXPECT_STREQ(GetTypeName<Types<int>>().c_str(), tests[0]->type_param());
     EXPECT_TRUE(tests[0]->should_run());
     EXPECT_TRUE(tests[0]->result()->Passed());
     EXPECT_EQ(0, tests[0]->result()->test_property_count());
diff --git a/deps/boringssl/src/third_party/googletest/test/gtest_environment_test.cc b/deps/boringssl/src/third_party/googletest/test/gtest_environment_test.cc
index fea542a..064bfc5 100644
--- a/deps/boringssl/src/third_party/googletest/test/gtest_environment_test.cc
+++ b/deps/boringssl/src/third_party/googletest/test/gtest_environment_test.cc
@@ -116,7 +116,7 @@
   }
 }
 
-// Runs the tests.  Return true iff successful.
+// Runs the tests.  Return true if and only if successful.
 //
 // The 'failure' parameter specifies the type of failure that should
 // be generated by the global set-up.
diff --git a/deps/boringssl/src/third_party/googletest/test/gtest_list_output_unittest_.cc b/deps/boringssl/src/third_party/googletest/test/gtest_list_output_unittest_.cc
index b1c7b4d..2eea3eb 100644
--- a/deps/boringssl/src/third_party/googletest/test/gtest_list_output_unittest_.cc
+++ b/deps/boringssl/src/third_party/googletest/test/gtest_list_output_unittest_.cc
@@ -44,6 +44,36 @@
 
 TEST(FooTest, Test2) {}
 
+class FooTestFixture : public ::testing::Test {};
+TEST_F(FooTestFixture, Test3) {}
+TEST_F(FooTestFixture, Test4) {}
+
+class ValueParamTest : public ::testing::TestWithParam<int> {};
+TEST_P(ValueParamTest, Test5) {}
+TEST_P(ValueParamTest, Test6) {}
+INSTANTIATE_TEST_SUITE_P(ValueParam, ValueParamTest, ::testing::Values(33, 42));
+
+#if GTEST_HAS_TYPED_TEST
+template <typename T>
+class TypedTest : public ::testing::Test {};
+typedef testing::Types<int, bool> TypedTestTypes;
+TYPED_TEST_SUITE(TypedTest, TypedTestTypes);
+TYPED_TEST(TypedTest, Test7) {}
+TYPED_TEST(TypedTest, Test8) {}
+#endif
+
+#if GTEST_HAS_TYPED_TEST_P
+template <typename T>
+class TypeParameterizedTestSuite : public ::testing::Test {};
+TYPED_TEST_SUITE_P(TypeParameterizedTestSuite);
+TYPED_TEST_P(TypeParameterizedTestSuite, Test9) {}
+TYPED_TEST_P(TypeParameterizedTestSuite, Test10) {}
+REGISTER_TYPED_TEST_SUITE_P(TypeParameterizedTestSuite, Test9, Test10);
+typedef testing::Types<int, bool> TypeParameterizedTestSuiteTypes;  // NOLINT
+INSTANTIATE_TYPED_TEST_SUITE_P(Single, TypeParameterizedTestSuite,
+                               TypeParameterizedTestSuiteTypes);
+#endif
+
 int main(int argc, char **argv) {
   ::testing::InitGoogleTest(&argc, argv);
 
diff --git a/deps/boringssl/src/third_party/googletest/test/gtest_pred_impl_unittest.cc b/deps/boringssl/src/third_party/googletest/test/gtest_pred_impl_unittest.cc
index 049ef98..bbef994 100644
--- a/deps/boringssl/src/third_party/googletest/test/gtest_pred_impl_unittest.cc
+++ b/deps/boringssl/src/third_party/googletest/test/gtest_pred_impl_unittest.cc
@@ -27,7 +27,7 @@
 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-// This file is AUTOMATICALLY GENERATED on 01/02/2019 by command
+// This file is AUTOMATICALLY GENERATED on 11/05/2019 by command
 // 'gen_gtest_pred_impl.py 5'.  DO NOT EDIT BY HAND!
 
 // Regression test for gtest_pred_impl.h
@@ -78,9 +78,8 @@
   return v1 > 0;
 }
 
-// The following two functions are needed to circumvent a bug in
-// gcc 2.95.3, which sometimes has problem with the above template
-// function.
+// The following two functions are needed because a compiler doesn't have
+// a context yet to know which template function must be instantiated.
 bool PredFunction1Int(int v1) {
   return v1 > 0;
 }
@@ -144,10 +143,10 @@
     }
   }
 
-  // true iff the test function is expected to run to finish.
+  // true if and only if the test function is expected to run to finish.
   static bool expected_to_finish_;
 
-  // true iff the test function did run to finish.
+  // true if and only if the test function did run to finish.
   static bool finished_;
 
   static int n1_;
@@ -465,9 +464,8 @@
   return v1 + v2 > 0;
 }
 
-// The following two functions are needed to circumvent a bug in
-// gcc 2.95.3, which sometimes has problem with the above template
-// function.
+// The following two functions are needed because a compiler doesn't have
+// a context yet to know which template function must be instantiated.
 bool PredFunction2Int(int v1, int v2) {
   return v1 + v2 > 0;
 }
@@ -539,10 +537,10 @@
     }
   }
 
-  // true iff the test function is expected to run to finish.
+  // true if and only if the test function is expected to run to finish.
   static bool expected_to_finish_;
 
-  // true iff the test function did run to finish.
+  // true if and only if the test function did run to finish.
   static bool finished_;
 
   static int n1_;
@@ -894,9 +892,8 @@
   return v1 + v2 + v3 > 0;
 }
 
-// The following two functions are needed to circumvent a bug in
-// gcc 2.95.3, which sometimes has problem with the above template
-// function.
+// The following two functions are needed because a compiler doesn't have
+// a context yet to know which template function must be instantiated.
 bool PredFunction3Int(int v1, int v2, int v3) {
   return v1 + v2 + v3 > 0;
 }
@@ -976,10 +973,10 @@
     }
   }
 
-  // true iff the test function is expected to run to finish.
+  // true if and only if the test function is expected to run to finish.
   static bool expected_to_finish_;
 
-  // true iff the test function did run to finish.
+  // true if and only if the test function did run to finish.
   static bool finished_;
 
   static int n1_;
@@ -1365,9 +1362,8 @@
   return v1 + v2 + v3 + v4 > 0;
 }
 
-// The following two functions are needed to circumvent a bug in
-// gcc 2.95.3, which sometimes has problem with the above template
-// function.
+// The following two functions are needed because a compiler doesn't have
+// a context yet to know which template function must be instantiated.
 bool PredFunction4Int(int v1, int v2, int v3, int v4) {
   return v1 + v2 + v3 + v4 > 0;
 }
@@ -1455,10 +1451,10 @@
     }
   }
 
-  // true iff the test function is expected to run to finish.
+  // true if and only if the test function is expected to run to finish.
   static bool expected_to_finish_;
 
-  // true iff the test function did run to finish.
+  // true if and only if the test function did run to finish.
   static bool finished_;
 
   static int n1_;
@@ -1878,9 +1874,8 @@
   return v1 + v2 + v3 + v4 + v5 > 0;
 }
 
-// The following two functions are needed to circumvent a bug in
-// gcc 2.95.3, which sometimes has problem with the above template
-// function.
+// The following two functions are needed because a compiler doesn't have
+// a context yet to know which template function must be instantiated.
 bool PredFunction5Int(int v1, int v2, int v3, int v4, int v5) {
   return v1 + v2 + v3 + v4 + v5 > 0;
 }
@@ -1976,10 +1971,10 @@
     }
   }
 
-  // true iff the test function is expected to run to finish.
+  // true if and only if the test function is expected to run to finish.
   static bool expected_to_finish_;
 
-  // true iff the test function did run to finish.
+  // true if and only if the test function did run to finish.
   static bool finished_;
 
   static int n1_;
diff --git a/deps/boringssl/src/third_party/googletest/test/gtest_premature_exit_test.cc b/deps/boringssl/src/third_party/googletest/test/gtest_premature_exit_test.cc
index 0920a97..1d1187e 100644
--- a/deps/boringssl/src/third_party/googletest/test/gtest_premature_exit_test.cc
+++ b/deps/boringssl/src/third_party/googletest/test/gtest_premature_exit_test.cc
@@ -45,7 +45,7 @@
 
 class PrematureExitTest : public Test {
  public:
-  // Returns true iff the given file exists.
+  // Returns true if and only if the given file exists.
   static bool FileExists(const char* filepath) {
     StatStruct stat;
     return Stat(filepath, &stat) == 0;
@@ -61,7 +61,7 @@
     }
   }
 
-  // Returns true iff the premature-exit file exists.
+  // Returns true if and only if the premature-exit file exists.
   bool PrematureExitFileExists() const {
     return FileExists(premature_exit_file_path_);
   }
diff --git a/deps/boringssl/src/third_party/googletest/test/gtest_skip_test.cc b/deps/boringssl/src/third_party/googletest/test/gtest_skip_test.cc
index 717e105..4a23004 100644
--- a/deps/boringssl/src/third_party/googletest/test/gtest_skip_test.cc
+++ b/deps/boringssl/src/third_party/googletest/test/gtest_skip_test.cc
@@ -35,7 +35,7 @@
 using ::testing::Test;
 
 TEST(SkipTest, DoesSkip) {
-  GTEST_SKIP();
+  GTEST_SKIP() << "skipping single test";
   EXPECT_EQ(0, 1);
 }
 
diff --git a/deps/boringssl/src/third_party/googletest/test/gtest_unittest.cc b/deps/boringssl/src/third_party/googletest/test/gtest_unittest.cc
index 9c1827d..ac0f179 100644
--- a/deps/boringssl/src/third_party/googletest/test/gtest_unittest.cc
+++ b/deps/boringssl/src/third_party/googletest/test/gtest_unittest.cc
@@ -37,21 +37,22 @@
 // code once "gtest.h" has been #included.
 // Do not move it after other gtest #includes.
 TEST(CommandLineFlagsTest, CanBeAccessedInCodeOnceGTestHIsIncluded) {
-  bool dummy = testing::GTEST_FLAG(also_run_disabled_tests)
-      || testing::GTEST_FLAG(break_on_failure)
-      || testing::GTEST_FLAG(catch_exceptions)
-      || testing::GTEST_FLAG(color) != "unknown"
-      || testing::GTEST_FLAG(filter) != "unknown"
-      || testing::GTEST_FLAG(list_tests)
-      || testing::GTEST_FLAG(output) != "unknown"
-      || testing::GTEST_FLAG(print_time)
-      || testing::GTEST_FLAG(random_seed)
-      || testing::GTEST_FLAG(repeat) > 0
-      || testing::GTEST_FLAG(show_internal_stack_frames)
-      || testing::GTEST_FLAG(shuffle)
-      || testing::GTEST_FLAG(stack_trace_depth) > 0
-      || testing::GTEST_FLAG(stream_result_to) != "unknown"
-      || testing::GTEST_FLAG(throw_on_failure);
+  bool dummy = testing::GTEST_FLAG(also_run_disabled_tests) ||
+               testing::GTEST_FLAG(break_on_failure) ||
+               testing::GTEST_FLAG(catch_exceptions) ||
+               testing::GTEST_FLAG(color) != "unknown" ||
+               testing::GTEST_FLAG(fail_fast) ||
+               testing::GTEST_FLAG(filter) != "unknown" ||
+               testing::GTEST_FLAG(list_tests) ||
+               testing::GTEST_FLAG(output) != "unknown" ||
+               testing::GTEST_FLAG(brief) || testing::GTEST_FLAG(print_time) ||
+               testing::GTEST_FLAG(random_seed) ||
+               testing::GTEST_FLAG(repeat) > 0 ||
+               testing::GTEST_FLAG(show_internal_stack_frames) ||
+               testing::GTEST_FLAG(shuffle) ||
+               testing::GTEST_FLAG(stack_trace_depth) > 0 ||
+               testing::GTEST_FLAG(stream_result_to) != "unknown" ||
+               testing::GTEST_FLAG(throw_on_failure);
   EXPECT_TRUE(dummy || !dummy);  // Suppresses warning that dummy is unused.
 }
 
@@ -60,10 +61,13 @@
 #include <string.h>
 #include <time.h>
 
+#include <cstdint>
 #include <map>
-#include <vector>
 #include <ostream>
+#include <string>
+#include <type_traits>
 #include <unordered_set>
+#include <vector>
 
 #include "gtest/gtest-spi.h"
 #include "src/gtest-internal-inl.h"
@@ -200,9 +204,11 @@
 using testing::GTEST_FLAG(catch_exceptions);
 using testing::GTEST_FLAG(color);
 using testing::GTEST_FLAG(death_test_use_fork);
+using testing::GTEST_FLAG(fail_fast);
 using testing::GTEST_FLAG(filter);
 using testing::GTEST_FLAG(list_tests);
 using testing::GTEST_FLAG(output);
+using testing::GTEST_FLAG(brief);
 using testing::GTEST_FLAG(print_time);
 using testing::GTEST_FLAG(random_seed);
 using testing::GTEST_FLAG(repeat);
@@ -213,27 +219,26 @@
 using testing::GTEST_FLAG(throw_on_failure);
 using testing::IsNotSubstring;
 using testing::IsSubstring;
+using testing::kMaxStackTraceDepth;
 using testing::Message;
 using testing::ScopedFakeTestPartResultReporter;
 using testing::StaticAssertTypeEq;
 using testing::Test;
-using testing::TestCase;
 using testing::TestEventListeners;
 using testing::TestInfo;
 using testing::TestPartResult;
 using testing::TestPartResultArray;
 using testing::TestProperty;
 using testing::TestResult;
+using testing::TestSuite;
 using testing::TimeInMillis;
 using testing::UnitTest;
-using testing::internal::AddReference;
 using testing::internal::AlwaysFalse;
 using testing::internal::AlwaysTrue;
 using testing::internal::AppendUserMessage;
 using testing::internal::ArrayAwareFind;
 using testing::internal::ArrayEq;
 using testing::internal::CodePointToUtf8;
-using testing::internal::CompileAssertTypesEqual;
 using testing::internal::CopyArray;
 using testing::internal::CountIf;
 using testing::internal::EqFailure;
@@ -241,7 +246,6 @@
 using testing::internal::ForEach;
 using testing::internal::FormatEpochTimeInMillisAsIso8601;
 using testing::internal::FormatTimeInMillisAsSeconds;
-using testing::internal::GTestFlagSaver;
 using testing::internal::GetCurrentOsStackTraceExceptTop;
 using testing::internal::GetElementOr;
 using testing::internal::GetNextRandomSeed;
@@ -250,20 +254,20 @@
 using testing::internal::GetTimeInMillis;
 using testing::internal::GetTypeId;
 using testing::internal::GetUnitTestImpl;
-using testing::internal::Int32;
+using testing::internal::GTestFlagSaver;
+using testing::internal::HasDebugStringAndShortDebugString;
 using testing::internal::Int32FromEnvOrDie;
-using testing::internal::IsAProtocolMessage;
 using testing::internal::IsContainer;
 using testing::internal::IsContainerTest;
 using testing::internal::IsNotContainer;
+using testing::internal::kMaxRandomSeed;
+using testing::internal::kTestTypeIdInGoogleTest;
 using testing::internal::NativeArray;
 using testing::internal::OsStackTraceGetter;
 using testing::internal::OsStackTraceGetterInterface;
 using testing::internal::ParseInt32Flag;
 using testing::internal::RelationToSourceCopy;
 using testing::internal::RelationToSourceReference;
-using testing::internal::RemoveConst;
-using testing::internal::RemoveReference;
 using testing::internal::ShouldRunTestOnShard;
 using testing::internal::ShouldShard;
 using testing::internal::ShouldUseColor;
@@ -274,15 +278,11 @@
 using testing::internal::String;
 using testing::internal::TestEventListenersAccessor;
 using testing::internal::TestResultAccessor;
-using testing::internal::UInt32;
 using testing::internal::UnitTestImpl;
 using testing::internal::WideStringToUtf8;
 using testing::internal::edit_distance::CalculateOptimalEdits;
 using testing::internal::edit_distance::CreateUnifiedDiff;
 using testing::internal::edit_distance::EditType;
-using testing::internal::kMaxRandomSeed;
-using testing::internal::kTestTypeIdInGoogleTest;
-using testing::kMaxStackTraceDepth;
 
 #if GTEST_HAS_STREAM_REDIRECTION
 using testing::internal::CaptureStdout;
@@ -486,28 +486,28 @@
 const TimeInMillis FormatEpochTimeInMillisAsIso8601Test::kMillisPerSec;
 
 TEST_F(FormatEpochTimeInMillisAsIso8601Test, PrintsTwoDigitSegments) {
-  EXPECT_EQ("2011-10-31T18:52:42",
+  EXPECT_EQ("2011-10-31T18:52:42.000",
             FormatEpochTimeInMillisAsIso8601(1320087162 * kMillisPerSec));
 }
 
-TEST_F(FormatEpochTimeInMillisAsIso8601Test, MillisecondsDoNotAffectResult) {
+TEST_F(FormatEpochTimeInMillisAsIso8601Test, IncludesMillisecondsAfterDot) {
   EXPECT_EQ(
-      "2011-10-31T18:52:42",
+      "2011-10-31T18:52:42.234",
       FormatEpochTimeInMillisAsIso8601(1320087162 * kMillisPerSec + 234));
 }
 
 TEST_F(FormatEpochTimeInMillisAsIso8601Test, PrintsLeadingZeroes) {
-  EXPECT_EQ("2011-09-03T05:07:02",
+  EXPECT_EQ("2011-09-03T05:07:02.000",
             FormatEpochTimeInMillisAsIso8601(1315026422 * kMillisPerSec));
 }
 
 TEST_F(FormatEpochTimeInMillisAsIso8601Test, Prints24HourTime) {
-  EXPECT_EQ("2011-09-28T17:08:22",
+  EXPECT_EQ("2011-09-28T17:08:22.000",
             FormatEpochTimeInMillisAsIso8601(1317229702 * kMillisPerSec));
 }
 
 TEST_F(FormatEpochTimeInMillisAsIso8601Test, PrintsEpochStart) {
-  EXPECT_EQ("1970-01-01T00:00:00", FormatEpochTimeInMillisAsIso8601(0));
+  EXPECT_EQ("1970-01-01T00:00:00.000", FormatEpochTimeInMillisAsIso8601(0));
 }
 
 # ifdef __BORLANDC__
@@ -791,7 +791,7 @@
 }
 
 TEST(RandomTest, GeneratesNumbersWithinRange) {
-  const UInt32 kRange = 10000;
+  constexpr uint32_t kRange = 10000;
   testing::internal::Random random(12345);
   for (int i = 0; i < 10; i++) {
     EXPECT_LT(random.Generate(kRange), kRange) << " for iteration " << i;
@@ -804,10 +804,10 @@
 }
 
 TEST(RandomTest, RepeatsWhenReseeded) {
-  const int kSeed = 123;
-  const int kArraySize = 10;
-  const UInt32 kRange = 10000;
-  UInt32 values[kArraySize];
+  constexpr int kSeed = 123;
+  constexpr int kArraySize = 10;
+  constexpr uint32_t kRange = 10000;
+  uint32_t values[kArraySize];
 
   testing::internal::Random random(kSeed);
   for (int i = 0; i < kArraySize; i++) {
@@ -899,23 +899,23 @@
 
 class VectorShuffleTest : public Test {
  protected:
-  static const int kVectorSize = 20;
+  static const size_t kVectorSize = 20;
 
   VectorShuffleTest() : random_(1) {
-    for (int i = 0; i < kVectorSize; i++) {
+    for (int i = 0; i < static_cast<int>(kVectorSize); i++) {
       vector_.push_back(i);
     }
   }
 
   static bool VectorIsCorrupt(const TestingVector& vector) {
-    if (kVectorSize != static_cast<int>(vector.size())) {
+    if (kVectorSize != vector.size()) {
       return true;
     }
 
     bool found_in_vector[kVectorSize] = { false };
     for (size_t i = 0; i < vector.size(); i++) {
       const int e = vector[i];
-      if (e < 0 || e >= kVectorSize || found_in_vector[e]) {
+      if (e < 0 || e >= static_cast<int>(kVectorSize) || found_in_vector[e]) {
         return true;
       }
       found_in_vector[e] = true;
@@ -932,7 +932,7 @@
 
   static bool RangeIsShuffled(const TestingVector& vector, int begin, int end) {
     for (int i = begin; i < end; i++) {
-      if (i != vector[i]) {
+      if (i != vector[static_cast<size_t>(i)]) {
         return true;
       }
     }
@@ -956,7 +956,7 @@
   TestingVector vector_;
 };  // class VectorShuffleTest
 
-const int VectorShuffleTest::kVectorSize;
+const size_t VectorShuffleTest::kVectorSize;
 
 TEST_F(VectorShuffleTest, HandlesEmptyRange) {
   // Tests an empty range at the beginning...
@@ -1008,7 +1008,7 @@
   // Tests the first and last elements in particular to ensure that
   // there are no off-by-one problems in our shuffle algorithm.
   EXPECT_NE(0, vector_[0]);
-  EXPECT_NE(kVectorSize - 1, vector_[kVectorSize - 1]);
+  EXPECT_NE(static_cast<int>(kVectorSize - 1), vector_[kVectorSize - 1]);
 }
 
 TEST_F(VectorShuffleTest, ShufflesStartOfVector) {
@@ -1018,7 +1018,8 @@
 
   ASSERT_PRED1(VectorIsNotCorrupt, vector_);
   EXPECT_PRED3(RangeIsShuffled, vector_, 0, kRangeSize);
-  EXPECT_PRED3(RangeIsUnshuffled, vector_, kRangeSize, kVectorSize);
+  EXPECT_PRED3(RangeIsUnshuffled, vector_, kRangeSize,
+               static_cast<int>(kVectorSize));
 }
 
 TEST_F(VectorShuffleTest, ShufflesEndOfVector) {
@@ -1027,23 +1028,25 @@
 
   ASSERT_PRED1(VectorIsNotCorrupt, vector_);
   EXPECT_PRED3(RangeIsUnshuffled, vector_, 0, kRangeSize);
-  EXPECT_PRED3(RangeIsShuffled, vector_, kRangeSize, kVectorSize);
+  EXPECT_PRED3(RangeIsShuffled, vector_, kRangeSize,
+               static_cast<int>(kVectorSize));
 }
 
 TEST_F(VectorShuffleTest, ShufflesMiddleOfVector) {
-  int kRangeSize = kVectorSize/3;
+  const int kRangeSize = static_cast<int>(kVectorSize) / 3;
   ShuffleRange(&random_, kRangeSize, 2*kRangeSize, &vector_);
 
   ASSERT_PRED1(VectorIsNotCorrupt, vector_);
   EXPECT_PRED3(RangeIsUnshuffled, vector_, 0, kRangeSize);
   EXPECT_PRED3(RangeIsShuffled, vector_, kRangeSize, 2*kRangeSize);
-  EXPECT_PRED3(RangeIsUnshuffled, vector_, 2*kRangeSize, kVectorSize);
+  EXPECT_PRED3(RangeIsUnshuffled, vector_, 2 * kRangeSize,
+               static_cast<int>(kVectorSize));
 }
 
 TEST_F(VectorShuffleTest, ShufflesRepeatably) {
   TestingVector vector2;
-  for (int i = 0; i < kVectorSize; i++) {
-    vector2.push_back(i);
+  for (size_t i = 0; i < kVectorSize; i++) {
+    vector2.push_back(static_cast<int>(i));
   }
 
   random_.Reseed(1234);
@@ -1054,7 +1057,7 @@
   ASSERT_PRED1(VectorIsNotCorrupt, vector_);
   ASSERT_PRED1(VectorIsNotCorrupt, vector2);
 
-  for (int i = 0; i < kVectorSize; i++) {
+  for (size_t i = 0; i < kVectorSize; i++) {
     EXPECT_EQ(vector_[i], vector2[i]) << " where i is " << i;
   }
 }
@@ -1599,9 +1602,11 @@
     GTEST_FLAG(catch_exceptions) = false;
     GTEST_FLAG(death_test_use_fork) = false;
     GTEST_FLAG(color) = "auto";
+    GTEST_FLAG(fail_fast) = false;
     GTEST_FLAG(filter) = "";
     GTEST_FLAG(list_tests) = false;
     GTEST_FLAG(output) = "";
+    GTEST_FLAG(brief) = false;
     GTEST_FLAG(print_time) = true;
     GTEST_FLAG(random_seed) = 0;
     GTEST_FLAG(repeat) = 1;
@@ -1626,9 +1631,11 @@
     EXPECT_FALSE(GTEST_FLAG(catch_exceptions));
     EXPECT_STREQ("auto", GTEST_FLAG(color).c_str());
     EXPECT_FALSE(GTEST_FLAG(death_test_use_fork));
+    EXPECT_FALSE(GTEST_FLAG(fail_fast));
     EXPECT_STREQ("", GTEST_FLAG(filter).c_str());
     EXPECT_FALSE(GTEST_FLAG(list_tests));
     EXPECT_STREQ("", GTEST_FLAG(output).c_str());
+    EXPECT_FALSE(GTEST_FLAG(brief));
     EXPECT_TRUE(GTEST_FLAG(print_time));
     EXPECT_EQ(0, GTEST_FLAG(random_seed));
     EXPECT_EQ(1, GTEST_FLAG(repeat));
@@ -1642,9 +1649,11 @@
     GTEST_FLAG(catch_exceptions) = true;
     GTEST_FLAG(color) = "no";
     GTEST_FLAG(death_test_use_fork) = true;
+    GTEST_FLAG(fail_fast) = true;
     GTEST_FLAG(filter) = "abc";
     GTEST_FLAG(list_tests) = true;
     GTEST_FLAG(output) = "xml:foo.xml";
+    GTEST_FLAG(brief) = true;
     GTEST_FLAG(print_time) = false;
     GTEST_FLAG(random_seed) = 1;
     GTEST_FLAG(repeat) = 100;
@@ -1772,7 +1781,7 @@
 // Tests that ParseInt32Flag() returns false and doesn't change the
 // output value when the flag has wrong format
 TEST(ParseInt32FlagTest, ReturnsFalseForInvalidFlag) {
-  Int32 value = 123;
+  int32_t value = 123;
   EXPECT_FALSE(ParseInt32Flag("--a=100", "b", &value));
   EXPECT_EQ(123, value);
 
@@ -1785,7 +1794,7 @@
 TEST(ParseInt32FlagTest, ReturnsDefaultWhenValueOverflows) {
   printf("(expecting 2 warnings)\n");
 
-  Int32 value = 123;
+  int32_t value = 123;
   EXPECT_FALSE(ParseInt32Flag("--abc=12345678987654321", "abc", &value));
   EXPECT_EQ(123, value);
 
@@ -1799,7 +1808,7 @@
 TEST(ParseInt32FlagTest, ReturnsDefaultWhenValueIsInvalid) {
   printf("(expecting 2 warnings)\n");
 
-  Int32 value = 123;
+  int32_t value = 123;
   EXPECT_FALSE(ParseInt32Flag("--abc=A1", "abc", &value));
   EXPECT_EQ(123, value);
 
@@ -1811,7 +1820,7 @@
 // returns true when the flag represents a valid decimal integer in
 // the range of an Int32.
 TEST(ParseInt32FlagTest, ParsesAndReturnsValidValue) {
-  Int32 value = 123;
+  int32_t value = 123;
   EXPECT_TRUE(ParseInt32Flag("--" GTEST_FLAG_PREFIX_ "abc=456", "abc", &value));
   EXPECT_EQ(456, value);
 
@@ -1834,7 +1843,7 @@
 #endif  // !GTEST_OS_WINDOWS_MOBILE
 
 // Tests that Int32FromEnvOrDie() aborts with an error message
-// if the variable is not an Int32.
+// if the variable is not an int32_t.
 TEST(Int32FromEnvOrDieDeathTest, AbortsOnFailure) {
   SetEnv(GTEST_FLAG_PREFIX_UPPER_ "VAR", "xxx");
   EXPECT_DEATH_IF_SUPPORTED(
@@ -1843,7 +1852,7 @@
 }
 
 // Tests that Int32FromEnvOrDie() aborts with an error message
-// if the variable cannot be represented by an Int32.
+// if the variable cannot be represented by an int32_t.
 TEST(Int32FromEnvOrDieDeathTest, AbortsOnInt32Overflow) {
   SetEnv(GTEST_FLAG_PREFIX_UPPER_ "VAR", "1234567891234567891234");
   EXPECT_DEATH_IF_SUPPORTED(
@@ -2013,10 +2022,11 @@
 
 void ExpectNonFatalFailureRecordingPropertyWithReservedKeyForCurrentTestSuite(
     const char* key) {
-  const TestCase* test_case = UnitTest::GetInstance()->current_test_case();
-  ASSERT_TRUE(test_case != nullptr);
+  const testing::TestSuite* test_suite =
+      UnitTest::GetInstance()->current_test_suite();
+  ASSERT_TRUE(test_suite != nullptr);
   ExpectNonFatalFailureRecordingPropertyWithReservedKey(
-      test_case->ad_hoc_test_result(), key);
+      test_suite->ad_hoc_test_result(), key);
 }
 
 void ExpectNonFatalFailureRecordingPropertyWithReservedKeyOutsideOfTestSuite(
@@ -2046,8 +2056,10 @@
         "time");
 
     Test::RecordProperty("test_case_key_1", "1");
+
     const testing::TestSuite* test_suite =
-        UnitTest::GetInstance()->current_test_case();
+        UnitTest::GetInstance()->current_test_suite();
+
     ASSERT_TRUE(test_suite != nullptr);
 
     ASSERT_EQ(1, test_suite->ad_hoc_test_result().test_property_count());
@@ -2164,12 +2176,12 @@
 
 // First, some predicates and predicate-formatters needed by the tests.
 
-// Returns true iff the argument is an even number.
+// Returns true if and only if the argument is an even number.
 bool IsEven(int n) {
   return (n % 2) == 0;
 }
 
-// A functor that returns true iff the argument is an even number.
+// A functor that returns true if and only if the argument is an even number.
 struct IsEvenFunctor {
   bool operator()(int n) { return IsEven(n); }
 };
@@ -2213,13 +2225,13 @@
   }
 };
 
-// Returns true iff the sum of the arguments is an even number.
+// Returns true if and only if the sum of the arguments is an even number.
 bool SumIsEven2(int n1, int n2) {
   return IsEven(n1 + n2);
 }
 
-// A functor that returns true iff the sum of the arguments is an even
-// number.
+// A functor that returns true if and only if the sum of the arguments is an
+// even number.
 struct SumIsEven3Functor {
   bool operator()(int n1, int n2, int n3) {
     return IsEven(n1 + n2 + n3);
@@ -2756,7 +2768,7 @@
   typedef typename Floating::Bits Bits;
 
   void SetUp() override {
-    const size_t max_ulps = Floating::kMaxUlps;
+    const uint32_t max_ulps = Floating::kMaxUlps;
 
     // The bits that represent 0.0.
     const Bits zero_bits = Floating(0).bits();
@@ -2921,22 +2933,18 @@
 TEST_F(FloatTest, EXPECT_NEAR) {
   EXPECT_NEAR(-1.0f, -1.1f, 0.2f);
   EXPECT_NEAR(2.0f, 3.0f, 1.0f);
-  EXPECT_NONFATAL_FAILURE(EXPECT_NEAR(1.0f,1.5f, 0.25f),  // NOLINT
+  EXPECT_NONFATAL_FAILURE(EXPECT_NEAR(1.0f, 1.5f, 0.25f),  // NOLINT
                           "The difference between 1.0f and 1.5f is 0.5, "
                           "which exceeds 0.25f");
-  // To work around a bug in gcc 2.95.0, there is intentionally no
-  // space after the first comma in the previous line.
 }
 
 // Tests ASSERT_NEAR.
 TEST_F(FloatTest, ASSERT_NEAR) {
   ASSERT_NEAR(-1.0f, -1.1f, 0.2f);
   ASSERT_NEAR(2.0f, 3.0f, 1.0f);
-  EXPECT_FATAL_FAILURE(ASSERT_NEAR(1.0f,1.5f, 0.25f),  // NOLINT
+  EXPECT_FATAL_FAILURE(ASSERT_NEAR(1.0f, 1.5f, 0.25f),  // NOLINT
                        "The difference between 1.0f and 1.5f is 0.5, "
                        "which exceeds 0.25f");
-  // To work around a bug in gcc 2.95.0, there is intentionally no
-  // space after the first comma in the previous line.
 }
 
 // Tests the cases where FloatLE() should succeed.
@@ -3077,8 +3085,13 @@
   EXPECT_NONFATAL_FAILURE(EXPECT_NEAR(1.0, 1.5, 0.25),  // NOLINT
                           "The difference between 1.0 and 1.5 is 0.5, "
                           "which exceeds 0.25");
-  // To work around a bug in gcc 2.95.0, there is intentionally no
-  // space after the first comma in the previous statement.
+  // At this magnitude adjacent doubles are 512.0 apart, so this triggers a
+  // slightly different failure reporting path.
+  EXPECT_NONFATAL_FAILURE(
+      EXPECT_NEAR(4.2934311416234112e+18, 4.2934311416234107e+18, 1.0),
+      "The abs_error parameter 1.0 evaluates to 1 which is smaller than the "
+      "minimum distance between doubles for numbers of this magnitude which is "
+      "512");
 }
 
 // Tests ASSERT_NEAR.
@@ -3088,8 +3101,6 @@
   EXPECT_FATAL_FAILURE(ASSERT_NEAR(1.0, 1.5, 0.25),  // NOLINT
                        "The difference between 1.0 and 1.5 is 0.5, "
                        "which exceeds 0.25");
-  // To work around a bug in gcc 2.95.0, there is intentionally no
-  // space after the first comma in the previous statement.
 }
 
 // Tests the cases where DoubleLE() should succeed.
@@ -3342,9 +3353,26 @@
 
 #if GTEST_HAS_EXCEPTIONS
 
+#if GTEST_HAS_RTTI
+
+#ifdef _MSC_VER
+#define ERROR_DESC "class std::runtime_error"
+#else
+#define ERROR_DESC "std::runtime_error"
+#endif
+
+#else  // GTEST_HAS_RTTI
+
+#define ERROR_DESC "an std::exception-derived error"
+
+#endif  // GTEST_HAS_RTTI
+
 void ThrowAnInteger() {
   throw 1;
 }
+void ThrowRuntimeError(const char* what) {
+  throw std::runtime_error(what);
+}
 
 // Tests that assertion arguments are evaluated exactly once.
 TEST_F(SingleEvaluationTest, ExceptionTests) {
@@ -3362,31 +3390,38 @@
   }, bool), "throws a different type");
   EXPECT_EQ(2, a_);
 
+  // failed EXPECT_THROW, throws runtime error
+  EXPECT_NONFATAL_FAILURE(EXPECT_THROW({  // NOLINT
+    a_++;
+    ThrowRuntimeError("A description");
+  }, bool), "throws " ERROR_DESC " with description \"A description\"");
+  EXPECT_EQ(3, a_);
+
   // failed EXPECT_THROW, throws nothing
   EXPECT_NONFATAL_FAILURE(EXPECT_THROW(a_++, bool), "throws nothing");
-  EXPECT_EQ(3, a_);
+  EXPECT_EQ(4, a_);
 
   // successful EXPECT_NO_THROW
   EXPECT_NO_THROW(a_++);
-  EXPECT_EQ(4, a_);
+  EXPECT_EQ(5, a_);
 
   // failed EXPECT_NO_THROW
   EXPECT_NONFATAL_FAILURE(EXPECT_NO_THROW({  // NOLINT
     a_++;
     ThrowAnInteger();
   }), "it throws");
-  EXPECT_EQ(5, a_);
+  EXPECT_EQ(6, a_);
 
   // successful EXPECT_ANY_THROW
   EXPECT_ANY_THROW({  // NOLINT
     a_++;
     ThrowAnInteger();
   });
-  EXPECT_EQ(6, a_);
+  EXPECT_EQ(7, a_);
 
   // failed EXPECT_ANY_THROW
   EXPECT_NONFATAL_FAILURE(EXPECT_ANY_THROW(a_++), "it doesn't");
-  EXPECT_EQ(7, a_);
+  EXPECT_EQ(8, a_);
 }
 
 #endif  // GTEST_HAS_EXCEPTIONS
@@ -3496,7 +3531,7 @@
 std::vector<size_t> CharsToIndices(const std::string& str) {
   std::vector<size_t> out;
   for (size_t i = 0; i < str.size(); ++i) {
-    out.push_back(str[i]);
+    out.push_back(static_cast<size_t>(str[i]));
   }
   return out;
 }
@@ -3731,10 +3766,6 @@
 TEST(AssertionTest, ASSERT_EQ_NULL) {
   // A success.
   const char* p = nullptr;
-  // Some older GCC versions may issue a spurious warning in this or the next
-  // assertion statement. This warning should not be suppressed with
-  // static_cast since the test verifies the ability to use bare NULL as the
-  // expected parameter to the macro.
   ASSERT_EQ(nullptr, p);
 
   // A failure.
@@ -3810,6 +3841,12 @@
       ASSERT_THROW(ThrowAnInteger(), bool),
       "Expected: ThrowAnInteger() throws an exception of type bool.\n"
       "  Actual: it throws a different type.");
+  EXPECT_FATAL_FAILURE(
+      ASSERT_THROW(ThrowRuntimeError("A description"), std::logic_error),
+      "Expected: ThrowRuntimeError(\"A description\") "
+      "throws an exception of type std::logic_error.\n  "
+      "Actual: it throws " ERROR_DESC " "
+      "with description \"A description\".");
 # endif
 
   EXPECT_FATAL_FAILURE(
@@ -3824,6 +3861,11 @@
   EXPECT_FATAL_FAILURE(ASSERT_NO_THROW(ThrowAnInteger()),
                        "Expected: ThrowAnInteger() doesn't throw an exception."
                        "\n  Actual: it throws.");
+  EXPECT_FATAL_FAILURE(ASSERT_NO_THROW(ThrowRuntimeError("A description")),
+                       "Expected: ThrowRuntimeError(\"A description\") "
+                       "doesn't throw an exception.\n  "
+                       "Actual: it throws " ERROR_DESC " "
+                       "with description \"A description\".");
 }
 
 // Tests ASSERT_ANY_THROW.
@@ -4093,11 +4135,13 @@
 
 #endif  // GTEST_OS_WINDOWS
 
-#ifdef __BORLANDC__
-// Silences warnings: "Condition is always true", "Unreachable code"
-# pragma option push -w-ccc -w-rch
+// The following code intentionally tests a suboptimal syntax.
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdangling-else"
+#pragma GCC diagnostic ignored "-Wempty-body"
+#pragma GCC diagnostic ignored "-Wpragmas"
 #endif
-
 // Tests that the assertion macros behave like single statements.
 TEST(AssertionSyntaxTest, BasicAssertionsBehavesLikeSingleStatement) {
   if (AlwaysFalse())
@@ -4117,6 +4161,9 @@
   else
     EXPECT_GT(3, 2) << "";
 }
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
 
 #if GTEST_HAS_EXCEPTIONS
 // Tests that the compiler will not complain about unreachable code in the
@@ -4133,6 +4180,17 @@
   EXPECT_NONFATAL_FAILURE(EXPECT_ANY_THROW(n++), "");
 }
 
+TEST(ExpectThrowTest, DoesNotGenerateDuplicateCatchClauseWarning) {
+  EXPECT_THROW(throw std::exception(), std::exception);
+}
+
+// The following code intentionally tests a suboptimal syntax.
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdangling-else"
+#pragma GCC diagnostic ignored "-Wempty-body"
+#pragma GCC diagnostic ignored "-Wpragmas"
+#endif
 TEST(AssertionSyntaxTest, ExceptionAssertionsBehavesLikeSingleStatement) {
   if (AlwaysFalse())
     EXPECT_THROW(ThrowNothing(), bool);
@@ -4158,8 +4216,19 @@
   else
     ;  // NOLINT
 }
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
+
 #endif  // GTEST_HAS_EXCEPTIONS
 
+// The following code intentionally tests a suboptimal syntax.
+#ifdef __GNUC__
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdangling-else"
+#pragma GCC diagnostic ignored "-Wempty-body"
+#pragma GCC diagnostic ignored "-Wpragmas"
+#endif
 TEST(AssertionSyntaxTest, NoFatalFailureAssertionsBehavesLikeSingleStatement) {
   if (AlwaysFalse())
     EXPECT_NO_FATAL_FAILURE(FAIL()) << "This should never be executed. "
@@ -4182,6 +4251,9 @@
   else
     ASSERT_NO_FATAL_FAILURE(SUCCEED());
 }
+#ifdef __GNUC__
+#pragma GCC diagnostic pop
+#endif
 
 // Tests that the assertion macros work well with switch statements.
 TEST(AssertionSyntaxTest, WorksWithSwitch) {
@@ -4311,10 +4383,8 @@
 TEST(AssertionWithMessageTest, ASSERT_FLOATING) {
   ASSERT_FLOAT_EQ(1, 1) << "This should succeed.";
   ASSERT_DOUBLE_EQ(1, 1) << "This should succeed.";
-  EXPECT_FATAL_FAILURE(ASSERT_NEAR(1,1.2, 0.1) << "Expect failure.",  // NOLINT
+  EXPECT_FATAL_FAILURE(ASSERT_NEAR(1, 1.2, 0.1) << "Expect failure.",  // NOLINT
                        "Expect failure.");
-  // To work around a bug in gcc 2.95.0, there is intentionally no
-  // space after the first comma in the previous statement.
 }
 
 // Tests using ASSERT_FALSE with a streamed message.
@@ -4455,10 +4525,6 @@
 TEST(ExpectTest, EXPECT_EQ_NULL) {
   // A success.
   const char* p = nullptr;
-  // Some older GCC versions may issue a spurious warning in this or the next
-  // assertion statement. This warning should not be suppressed with
-  // static_cast since the test verifies the ability to use bare NULL as the
-  // expected parameter to the macro.
   EXPECT_EQ(nullptr, p);
 
   // A failure.
@@ -4549,6 +4615,12 @@
   EXPECT_NONFATAL_FAILURE(EXPECT_THROW(ThrowAnInteger(), bool),
                           "Expected: ThrowAnInteger() throws an exception of "
                           "type bool.\n  Actual: it throws a different type.");
+  EXPECT_NONFATAL_FAILURE(EXPECT_THROW(ThrowRuntimeError("A description"),
+                                       std::logic_error),
+                          "Expected: ThrowRuntimeError(\"A description\") "
+                          "throws an exception of type std::logic_error.\n  "
+                          "Actual: it throws " ERROR_DESC " "
+                          "with description \"A description\".");
   EXPECT_NONFATAL_FAILURE(
       EXPECT_THROW(ThrowNothing(), bool),
       "Expected: ThrowNothing() throws an exception of type bool.\n"
@@ -4561,6 +4633,11 @@
   EXPECT_NONFATAL_FAILURE(EXPECT_NO_THROW(ThrowAnInteger()),
                           "Expected: ThrowAnInteger() doesn't throw an "
                           "exception.\n  Actual: it throws.");
+  EXPECT_NONFATAL_FAILURE(EXPECT_NO_THROW(ThrowRuntimeError("A description")),
+                          "Expected: ThrowRuntimeError(\"A description\") "
+                          "doesn't throw an exception.\n  "
+                          "Actual: it throws " ERROR_DESC " "
+                          "with description \"A description\".");
 }
 
 // Tests EXPECT_ANY_THROW.
@@ -4705,6 +4782,19 @@
                        "Intentional failure.");
 }
 
+// Tests GTEST_FAIL_AT.
+TEST(MacroTest, GTEST_FAIL_AT) {
+  // Verifies that GTEST_FAIL_AT does generate a fatal failure and
+  // the failure message contains the user-streamed part.
+  EXPECT_FATAL_FAILURE(GTEST_FAIL_AT("foo.cc", 42) << "Wrong!", "Wrong!");
+
+  // Verifies that the user-streamed part is optional.
+  EXPECT_FATAL_FAILURE(GTEST_FAIL_AT("foo.cc", 42), "Failed");
+
+  // See the ADD_FAIL_AT test above to see how we test that the failure message
+  // contains the right filename and line number -- the same applies here.
+}
+
 // Tests SUCCEED
 TEST(MacroTest, SUCCEED) {
   SUCCEED();
@@ -5287,7 +5377,7 @@
 TEST_F(TestInfoTest, Names) {
   const TestInfo* const test_info = GetTestInfo("Names");
 
-  ASSERT_STREQ("TestInfoTest", test_info->test_case_name());
+  ASSERT_STREQ("TestInfoTest", test_info->test_suite_name());
   ASSERT_STREQ("Names", test_info->name());
 }
 
@@ -5357,7 +5447,7 @@
 
 // Tests setting up and tearing down a test case.
 // Legacy API is deprecated but still available
-#ifndef REMOVE_LEGACY_TEST_CASEAPI
+#ifndef GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 class SetUpTestCaseTest : public Test {
  protected:
   // This will be called once before the first test in this test case
@@ -5416,7 +5506,7 @@
 TEST_F(SetUpTestCaseTest, Test2) {
   EXPECT_STREQ("123", shared_resource_);
 }
-#endif  //  REMOVE_LEGACY_TEST_CASEAPI
+#endif  //  GTEST_REMOVE_LEGACY_TEST_CASEAPI_
 
 // Tests SetupTestSuite/TearDown TestSuite
 class SetUpTestSuiteTest : public Test {
@@ -5485,20 +5575,23 @@
 // The Flags struct stores a copy of all Google Test flags.
 struct Flags {
   // Constructs a Flags struct where each flag has its default value.
-  Flags() : also_run_disabled_tests(false),
-            break_on_failure(false),
-            catch_exceptions(false),
-            death_test_use_fork(false),
-            filter(""),
-            list_tests(false),
-            output(""),
-            print_time(true),
-            random_seed(0),
-            repeat(1),
-            shuffle(false),
-            stack_trace_depth(kMaxStackTraceDepth),
-            stream_result_to(""),
-            throw_on_failure(false) {}
+  Flags()
+      : also_run_disabled_tests(false),
+        break_on_failure(false),
+        catch_exceptions(false),
+        death_test_use_fork(false),
+        fail_fast(false),
+        filter(""),
+        list_tests(false),
+        output(""),
+        brief(false),
+        print_time(true),
+        random_seed(0),
+        repeat(1),
+        shuffle(false),
+        stack_trace_depth(kMaxStackTraceDepth),
+        stream_result_to(""),
+        throw_on_failure(false) {}
 
   // Factory methods.
 
@@ -5534,6 +5627,14 @@
     return flags;
   }
 
+  // Creates a Flags struct where the gtest_fail_fast flag has
+  // the given value.
+  static Flags FailFast(bool fail_fast) {
+    Flags flags;
+    flags.fail_fast = fail_fast;
+    return flags;
+  }
+
   // Creates a Flags struct where the gtest_filter flag has the given
   // value.
   static Flags Filter(const char* filter) {
@@ -5558,6 +5659,14 @@
     return flags;
   }
 
+  // Creates a Flags struct where the gtest_brief flag has the given
+  // value.
+  static Flags Brief(bool brief) {
+    Flags flags;
+    flags.brief = brief;
+    return flags;
+  }
+
   // Creates a Flags struct where the gtest_print_time flag has the given
   // value.
   static Flags PrintTime(bool print_time) {
@@ -5568,7 +5677,7 @@
 
   // Creates a Flags struct where the gtest_random_seed flag has the given
   // value.
-  static Flags RandomSeed(Int32 random_seed) {
+  static Flags RandomSeed(int32_t random_seed) {
     Flags flags;
     flags.random_seed = random_seed;
     return flags;
@@ -5576,7 +5685,7 @@
 
   // Creates a Flags struct where the gtest_repeat flag has the given
   // value.
-  static Flags Repeat(Int32 repeat) {
+  static Flags Repeat(int32_t repeat) {
     Flags flags;
     flags.repeat = repeat;
     return flags;
@@ -5592,7 +5701,7 @@
 
   // Creates a Flags struct where the GTEST_FLAG(stack_trace_depth) flag has
   // the given value.
-  static Flags StackTraceDepth(Int32 stack_trace_depth) {
+  static Flags StackTraceDepth(int32_t stack_trace_depth) {
     Flags flags;
     flags.stack_trace_depth = stack_trace_depth;
     return flags;
@@ -5619,14 +5728,16 @@
   bool break_on_failure;
   bool catch_exceptions;
   bool death_test_use_fork;
+  bool fail_fast;
   const char* filter;
   bool list_tests;
   const char* output;
+  bool brief;
   bool print_time;
-  Int32 random_seed;
-  Int32 repeat;
+  int32_t random_seed;
+  int32_t repeat;
   bool shuffle;
-  Int32 stack_trace_depth;
+  int32_t stack_trace_depth;
   const char* stream_result_to;
   bool throw_on_failure;
 };
@@ -5640,9 +5751,11 @@
     GTEST_FLAG(break_on_failure) = false;
     GTEST_FLAG(catch_exceptions) = false;
     GTEST_FLAG(death_test_use_fork) = false;
+    GTEST_FLAG(fail_fast) = false;
     GTEST_FLAG(filter) = "";
     GTEST_FLAG(list_tests) = false;
     GTEST_FLAG(output) = "";
+    GTEST_FLAG(brief) = false;
     GTEST_FLAG(print_time) = true;
     GTEST_FLAG(random_seed) = 0;
     GTEST_FLAG(repeat) = 1;
@@ -5654,11 +5767,11 @@
 
   // Asserts that two narrow or wide string arrays are equal.
   template <typename CharType>
-  static void AssertStringArrayEq(size_t size1, CharType** array1,
-                                  size_t size2, CharType** array2) {
+  static void AssertStringArrayEq(int size1, CharType** array1, int size2,
+                                  CharType** array2) {
     ASSERT_EQ(size1, size2) << " Array sizes different.";
 
-    for (size_t i = 0; i != size1; i++) {
+    for (int i = 0; i != size1; i++) {
       ASSERT_STREQ(array1[i], array2[i]) << " where i == " << i;
     }
   }
@@ -5670,9 +5783,11 @@
     EXPECT_EQ(expected.break_on_failure, GTEST_FLAG(break_on_failure));
     EXPECT_EQ(expected.catch_exceptions, GTEST_FLAG(catch_exceptions));
     EXPECT_EQ(expected.death_test_use_fork, GTEST_FLAG(death_test_use_fork));
+    EXPECT_EQ(expected.fail_fast, GTEST_FLAG(fail_fast));
     EXPECT_STREQ(expected.filter, GTEST_FLAG(filter).c_str());
     EXPECT_EQ(expected.list_tests, GTEST_FLAG(list_tests));
     EXPECT_STREQ(expected.output, GTEST_FLAG(output).c_str());
+    EXPECT_EQ(expected.brief, GTEST_FLAG(brief));
     EXPECT_EQ(expected.print_time, GTEST_FLAG(print_time));
     EXPECT_EQ(expected.random_seed, GTEST_FLAG(random_seed));
     EXPECT_EQ(expected.repeat, GTEST_FLAG(repeat));
@@ -5756,6 +5871,15 @@
   GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags(), false);
 }
 
+// Tests parsing --gtest_fail_fast.
+TEST_F(ParseFlagsTest, FailFast) {
+  const char* argv[] = {"foo.exe", "--gtest_fail_fast", nullptr};
+
+  const char* argv2[] = {"foo.exe", nullptr};
+
+  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::FailFast(true), false);
+}
+
 // Tests parsing a bad --gtest_filter flag.
 TEST_F(ParseFlagsTest, FilterBad) {
   const char* argv[] = {"foo.exe", "--gtest_filter", nullptr};
@@ -5955,6 +6079,33 @@
                             Flags::Output("xml:directory/path/"), false);
 }
 
+// Tests having a --gtest_brief flag
+TEST_F(ParseFlagsTest, BriefFlag) {
+  const char* argv[] = {"foo.exe", "--gtest_brief", nullptr};
+
+  const char* argv2[] = {"foo.exe", nullptr};
+
+  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::Brief(true), false);
+}
+
+// Tests having a --gtest_brief flag with a "true" value
+TEST_F(ParseFlagsTest, BriefFlagTrue) {
+  const char* argv[] = {"foo.exe", "--gtest_brief=1", nullptr};
+
+  const char* argv2[] = {"foo.exe", nullptr};
+
+  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::Brief(true), false);
+}
+
+// Tests having a --gtest_brief flag with a "false" value
+TEST_F(ParseFlagsTest, BriefFlagFalse) {
+  const char* argv[] = {"foo.exe", "--gtest_brief=0", nullptr};
+
+  const char* argv2[] = {"foo.exe", nullptr};
+
+  GTEST_TEST_PARSING_FLAGS_(argv, argv2, Flags::Brief(false), false);
+}
+
 // Tests having a --gtest_print_time flag
 TEST_F(ParseFlagsTest, PrintTimeFlag) {
   const char* argv[] = {"foo.exe", "--gtest_print_time", nullptr};
@@ -6154,7 +6305,7 @@
 #if GTEST_USE_OWN_FLAGFILE_FLAG_
 class FlagfileTest : public ParseFlagsTest {
  public:
-  virtual void SetUp() {
+  void SetUp() override {
     ParseFlagsTest::SetUp();
 
     testdata_path_.Set(internal::FilePath(
@@ -6164,7 +6315,7 @@
     EXPECT_TRUE(testdata_path_.CreateFolder());
   }
 
-  virtual void TearDown() {
+  void TearDown() override {
     testing::internal::posix::RmDir(testdata_path_.c_str());
     ParseFlagsTest::TearDown();
   }
@@ -6261,8 +6412,8 @@
     UnitTest::GetInstance()->current_test_info();
   ASSERT_TRUE(nullptr != test_info)
       << "There is a test running so we should have a valid TestInfo.";
-  EXPECT_STREQ("CurrentTestInfoTest", test_info->test_case_name())
-      << "Expected the name of the currently running test case.";
+  EXPECT_STREQ("CurrentTestInfoTest", test_info->test_suite_name())
+      << "Expected the name of the currently running test suite.";
   EXPECT_STREQ("WorksForFirstTestInATestSuite", test_info->name())
       << "Expected the name of the currently running test.";
 }
@@ -6276,8 +6427,8 @@
     UnitTest::GetInstance()->current_test_info();
   ASSERT_TRUE(nullptr != test_info)
       << "There is a test running so we should have a valid TestInfo.";
-  EXPECT_STREQ("CurrentTestInfoTest", test_info->test_case_name())
-      << "Expected the name of the currently running test case.";
+  EXPECT_STREQ("CurrentTestInfoTest", test_info->test_suite_name())
+      << "Expected the name of the currently running test suite.";
   EXPECT_STREQ("WorksForSecondTestInATestSuite", test_info->name())
       << "Expected the name of the currently running test.";
 }
@@ -7065,89 +7216,79 @@
 class ConversionHelperBase {};
 class ConversionHelperDerived : public ConversionHelperBase {};
 
-// Tests that IsAProtocolMessage<T>::value is a compile-time constant.
-TEST(IsAProtocolMessageTest, ValueIsCompileTimeConstant) {
-  GTEST_COMPILE_ASSERT_(IsAProtocolMessage<::proto2::Message>::value,
+struct HasDebugStringMethods {
+  std::string DebugString() const { return ""; }
+  std::string ShortDebugString() const { return ""; }
+};
+
+struct InheritsDebugStringMethods : public HasDebugStringMethods {};
+
+struct WrongTypeDebugStringMethod {
+  std::string DebugString() const { return ""; }
+  int ShortDebugString() const { return 1; }
+};
+
+struct NotConstDebugStringMethod {
+  std::string DebugString() { return ""; }
+  std::string ShortDebugString() const { return ""; }
+};
+
+struct MissingDebugStringMethod {
+  std::string DebugString() { return ""; }
+};
+
+struct IncompleteType;
+
+// Tests that HasDebugStringAndShortDebugString<T>::value is a compile-time
+// constant.
+TEST(HasDebugStringAndShortDebugStringTest, ValueIsCompileTimeConstant) {
+  GTEST_COMPILE_ASSERT_(
+      HasDebugStringAndShortDebugString<HasDebugStringMethods>::value,
+      const_true);
+  GTEST_COMPILE_ASSERT_(
+      HasDebugStringAndShortDebugString<InheritsDebugStringMethods>::value,
+      const_true);
+  GTEST_COMPILE_ASSERT_(HasDebugStringAndShortDebugString<
+                            const InheritsDebugStringMethods>::value,
                         const_true);
-  GTEST_COMPILE_ASSERT_(!IsAProtocolMessage<int>::value, const_false);
+  GTEST_COMPILE_ASSERT_(
+      !HasDebugStringAndShortDebugString<WrongTypeDebugStringMethod>::value,
+      const_false);
+  GTEST_COMPILE_ASSERT_(
+      !HasDebugStringAndShortDebugString<NotConstDebugStringMethod>::value,
+      const_false);
+  GTEST_COMPILE_ASSERT_(
+      !HasDebugStringAndShortDebugString<MissingDebugStringMethod>::value,
+      const_false);
+  GTEST_COMPILE_ASSERT_(
+      !HasDebugStringAndShortDebugString<IncompleteType>::value, const_false);
+  GTEST_COMPILE_ASSERT_(!HasDebugStringAndShortDebugString<int>::value,
+                        const_false);
 }
 
-// Tests that IsAProtocolMessage<T>::value is true when T is
-// proto2::Message or a sub-class of it.
-TEST(IsAProtocolMessageTest, ValueIsTrueWhenTypeIsAProtocolMessage) {
-  EXPECT_TRUE(IsAProtocolMessage< ::proto2::Message>::value);
+// Tests that HasDebugStringAndShortDebugString<T>::value is true when T has
+// needed methods.
+TEST(HasDebugStringAndShortDebugStringTest,
+     ValueIsTrueWhenTypeHasDebugStringAndShortDebugString) {
+  EXPECT_TRUE(
+      HasDebugStringAndShortDebugString<InheritsDebugStringMethods>::value);
 }
 
-// Tests that IsAProtocolMessage<T>::value is false when T is neither
-// ::proto2::Message nor a sub-class of it.
-TEST(IsAProtocolMessageTest, ValueIsFalseWhenTypeIsNotAProtocolMessage) {
-  EXPECT_FALSE(IsAProtocolMessage<int>::value);
-  EXPECT_FALSE(IsAProtocolMessage<const ConversionHelperBase>::value);
-}
-
-// Tests that CompileAssertTypesEqual compiles when the type arguments are
-// equal.
-TEST(CompileAssertTypesEqual, CompilesWhenTypesAreEqual) {
-  CompileAssertTypesEqual<void, void>();
-  CompileAssertTypesEqual<int*, int*>();
-}
-
-// Tests that RemoveReference does not affect non-reference types.
-TEST(RemoveReferenceTest, DoesNotAffectNonReferenceType) {
-  CompileAssertTypesEqual<int, RemoveReference<int>::type>();
-  CompileAssertTypesEqual<const char, RemoveReference<const char>::type>();
-}
-
-// Tests that RemoveReference removes reference from reference types.
-TEST(RemoveReferenceTest, RemovesReference) {
-  CompileAssertTypesEqual<int, RemoveReference<int&>::type>();
-  CompileAssertTypesEqual<const char, RemoveReference<const char&>::type>();
-}
-
-// Tests GTEST_REMOVE_REFERENCE_.
-
-template <typename T1, typename T2>
-void TestGTestRemoveReference() {
-  CompileAssertTypesEqual<T1, GTEST_REMOVE_REFERENCE_(T2)>();
-}
-
-TEST(RemoveReferenceTest, MacroVersion) {
-  TestGTestRemoveReference<int, int>();
-  TestGTestRemoveReference<const char, const char&>();
-}
-
-
-// Tests that RemoveConst does not affect non-const types.
-TEST(RemoveConstTest, DoesNotAffectNonConstType) {
-  CompileAssertTypesEqual<int, RemoveConst<int>::type>();
-  CompileAssertTypesEqual<char&, RemoveConst<char&>::type>();
-}
-
-// Tests that RemoveConst removes const from const types.
-TEST(RemoveConstTest, RemovesConst) {
-  CompileAssertTypesEqual<int, RemoveConst<const int>::type>();
-  CompileAssertTypesEqual<char[2], RemoveConst<const char[2]>::type>();
-  CompileAssertTypesEqual<char[2][3], RemoveConst<const char[2][3]>::type>();
-}
-
-// Tests GTEST_REMOVE_CONST_.
-
-template <typename T1, typename T2>
-void TestGTestRemoveConst() {
-  CompileAssertTypesEqual<T1, GTEST_REMOVE_CONST_(T2)>();
-}
-
-TEST(RemoveConstTest, MacroVersion) {
-  TestGTestRemoveConst<int, int>();
-  TestGTestRemoveConst<double&, double&>();
-  TestGTestRemoveConst<char, const char>();
+// Tests that HasDebugStringAndShortDebugString<T>::value is false when T
+// doesn't have needed methods.
+TEST(HasDebugStringAndShortDebugStringTest,
+     ValueIsFalseWhenTypeIsNotAProtocolMessage) {
+  EXPECT_FALSE(HasDebugStringAndShortDebugString<int>::value);
+  EXPECT_FALSE(
+      HasDebugStringAndShortDebugString<const ConversionHelperBase>::value);
 }
 
 // Tests GTEST_REMOVE_REFERENCE_AND_CONST_.
 
 template <typename T1, typename T2>
 void TestGTestRemoveReferenceAndConst() {
-  CompileAssertTypesEqual<T1, GTEST_REMOVE_REFERENCE_AND_CONST_(T2)>();
+  static_assert(std::is_same<T1, GTEST_REMOVE_REFERENCE_AND_CONST_(T2)>::value,
+                "GTEST_REMOVE_REFERENCE_AND_CONST_ failed.");
 }
 
 TEST(RemoveReferenceToConstTest, Works) {
@@ -7158,35 +7299,12 @@
   TestGTestRemoveReferenceAndConst<const char*, const char*>();
 }
 
-// Tests that AddReference does not affect reference types.
-TEST(AddReferenceTest, DoesNotAffectReferenceType) {
-  CompileAssertTypesEqual<int&, AddReference<int&>::type>();
-  CompileAssertTypesEqual<const char&, AddReference<const char&>::type>();
-}
-
-// Tests that AddReference adds reference to non-reference types.
-TEST(AddReferenceTest, AddsReference) {
-  CompileAssertTypesEqual<int&, AddReference<int>::type>();
-  CompileAssertTypesEqual<const char&, AddReference<const char>::type>();
-}
-
-// Tests GTEST_ADD_REFERENCE_.
-
-template <typename T1, typename T2>
-void TestGTestAddReference() {
-  CompileAssertTypesEqual<T1, GTEST_ADD_REFERENCE_(T2)>();
-}
-
-TEST(AddReferenceTest, MacroVersion) {
-  TestGTestAddReference<int&, int>();
-  TestGTestAddReference<const char&, const char&>();
-}
-
 // Tests GTEST_REFERENCE_TO_CONST_.
 
 template <typename T1, typename T2>
 void TestGTestReferenceToConst() {
-  CompileAssertTypesEqual<T1, GTEST_REFERENCE_TO_CONST_(T2)>();
+  static_assert(std::is_same<T1, GTEST_REFERENCE_TO_CONST_(T2)>::value,
+                "GTEST_REFERENCE_TO_CONST_ failed.");
 }
 
 TEST(GTestReferenceToConstTest, Works) {
@@ -7417,20 +7535,15 @@
 // ElemFromList
 TEST(ElemFromList, Basic) {
   using testing::internal::ElemFromList;
-  using Idx = testing::internal::MakeIndexSequence<3>::type;
+  EXPECT_TRUE(
+      (std::is_same<int, ElemFromList<0, int, double, char>::type>::value));
+  EXPECT_TRUE(
+      (std::is_same<double, ElemFromList<1, int, double, char>::type>::value));
+  EXPECT_TRUE(
+      (std::is_same<char, ElemFromList<2, int, double, char>::type>::value));
   EXPECT_TRUE((
-      std::is_same<int, ElemFromList<0, Idx, int, double, char>::type>::value));
-  EXPECT_TRUE(
-      (std::is_same<double,
-                    ElemFromList<1, Idx, int, double, char>::type>::value));
-  EXPECT_TRUE(
-      (std::is_same<char,
-                    ElemFromList<2, Idx, int, double, char>::type>::value));
-  EXPECT_TRUE(
-      (std::is_same<
-          char, ElemFromList<7, testing::internal::MakeIndexSequence<12>::type,
-                             int, int, int, int, int, int, int, char, int, int,
-                             int, int>::type>::value));
+      std::is_same<char, ElemFromList<7, int, int, int, int, int, int, int,
+                                      char, int, int, int, int>::type>::value));
 }
 
 // FlatTuple
@@ -7442,7 +7555,8 @@
   EXPECT_EQ(0.0, tuple.Get<1>());
   EXPECT_EQ(nullptr, tuple.Get<2>());
 
-  tuple = FlatTuple<int, double, const char*>(7, 3.2, "Foo");
+  tuple = FlatTuple<int, double, const char*>(
+      testing::internal::FlatTupleConstructTag{}, 7, 3.2, "Foo");
   EXPECT_EQ(7, tuple.Get<0>());
   EXPECT_EQ(3.2, tuple.Get<1>());
   EXPECT_EQ(std::string("Foo"), tuple.Get<2>());
@@ -7451,6 +7565,147 @@
   EXPECT_EQ(5.1, tuple.Get<1>());
 }
 
+namespace {
+std::string AddIntToString(int i, const std::string& s) {
+  return s + std::to_string(i);
+}
+}  // namespace
+
+TEST(FlatTuple, Apply) {
+  using testing::internal::FlatTuple;
+
+  FlatTuple<int, std::string> tuple{testing::internal::FlatTupleConstructTag{},
+                                    5, "Hello"};
+
+  // Lambda.
+  EXPECT_TRUE(tuple.Apply([](int i, const std::string& s) -> bool {
+    return i == static_cast<int>(s.size());
+  }));
+
+  // Function.
+  EXPECT_EQ(tuple.Apply(AddIntToString), "Hello5");
+
+  // Mutating operations.
+  tuple.Apply([](int& i, std::string& s) {
+    ++i;
+    s += s;
+  });
+  EXPECT_EQ(tuple.Get<0>(), 6);
+  EXPECT_EQ(tuple.Get<1>(), "HelloHello");
+}
+
+struct ConstructionCounting {
+  ConstructionCounting() { ++default_ctor_calls; }
+  ~ConstructionCounting() { ++dtor_calls; }
+  ConstructionCounting(const ConstructionCounting&) { ++copy_ctor_calls; }
+  ConstructionCounting(ConstructionCounting&&) noexcept { ++move_ctor_calls; }
+  ConstructionCounting& operator=(const ConstructionCounting&) {
+    ++copy_assignment_calls;
+    return *this;
+  }
+  ConstructionCounting& operator=(ConstructionCounting&&) noexcept {
+    ++move_assignment_calls;
+    return *this;
+  }
+
+  static void Reset() {
+    default_ctor_calls = 0;
+    dtor_calls = 0;
+    copy_ctor_calls = 0;
+    move_ctor_calls = 0;
+    copy_assignment_calls = 0;
+    move_assignment_calls = 0;
+  }
+
+  static int default_ctor_calls;
+  static int dtor_calls;
+  static int copy_ctor_calls;
+  static int move_ctor_calls;
+  static int copy_assignment_calls;
+  static int move_assignment_calls;
+};
+
+int ConstructionCounting::default_ctor_calls = 0;
+int ConstructionCounting::dtor_calls = 0;
+int ConstructionCounting::copy_ctor_calls = 0;
+int ConstructionCounting::move_ctor_calls = 0;
+int ConstructionCounting::copy_assignment_calls = 0;
+int ConstructionCounting::move_assignment_calls = 0;
+
+TEST(FlatTuple, ConstructorCalls) {
+  using testing::internal::FlatTuple;
+
+  // Default construction.
+  ConstructionCounting::Reset();
+  { FlatTuple<ConstructionCounting> tuple; }
+  EXPECT_EQ(ConstructionCounting::default_ctor_calls, 1);
+  EXPECT_EQ(ConstructionCounting::dtor_calls, 1);
+  EXPECT_EQ(ConstructionCounting::copy_ctor_calls, 0);
+  EXPECT_EQ(ConstructionCounting::move_ctor_calls, 0);
+  EXPECT_EQ(ConstructionCounting::copy_assignment_calls, 0);
+  EXPECT_EQ(ConstructionCounting::move_assignment_calls, 0);
+
+  // Copy construction.
+  ConstructionCounting::Reset();
+  {
+    ConstructionCounting elem;
+    FlatTuple<ConstructionCounting> tuple{
+        testing::internal::FlatTupleConstructTag{}, elem};
+  }
+  EXPECT_EQ(ConstructionCounting::default_ctor_calls, 1);
+  EXPECT_EQ(ConstructionCounting::dtor_calls, 2);
+  EXPECT_EQ(ConstructionCounting::copy_ctor_calls, 1);
+  EXPECT_EQ(ConstructionCounting::move_ctor_calls, 0);
+  EXPECT_EQ(ConstructionCounting::copy_assignment_calls, 0);
+  EXPECT_EQ(ConstructionCounting::move_assignment_calls, 0);
+
+  // Move construction.
+  ConstructionCounting::Reset();
+  {
+    FlatTuple<ConstructionCounting> tuple{
+        testing::internal::FlatTupleConstructTag{}, ConstructionCounting{}};
+  }
+  EXPECT_EQ(ConstructionCounting::default_ctor_calls, 1);
+  EXPECT_EQ(ConstructionCounting::dtor_calls, 2);
+  EXPECT_EQ(ConstructionCounting::copy_ctor_calls, 0);
+  EXPECT_EQ(ConstructionCounting::move_ctor_calls, 1);
+  EXPECT_EQ(ConstructionCounting::copy_assignment_calls, 0);
+  EXPECT_EQ(ConstructionCounting::move_assignment_calls, 0);
+
+  // Copy assignment.
+  // TODO(ofats): it should be testing assignment operator of FlatTuple, not its
+  // elements
+  ConstructionCounting::Reset();
+  {
+    FlatTuple<ConstructionCounting> tuple;
+    ConstructionCounting elem;
+    tuple.Get<0>() = elem;
+  }
+  EXPECT_EQ(ConstructionCounting::default_ctor_calls, 2);
+  EXPECT_EQ(ConstructionCounting::dtor_calls, 2);
+  EXPECT_EQ(ConstructionCounting::copy_ctor_calls, 0);
+  EXPECT_EQ(ConstructionCounting::move_ctor_calls, 0);
+  EXPECT_EQ(ConstructionCounting::copy_assignment_calls, 1);
+  EXPECT_EQ(ConstructionCounting::move_assignment_calls, 0);
+
+  // Move assignment.
+  // TODO(ofats): it should be testing assignment operator of FlatTuple, not its
+  // elements
+  ConstructionCounting::Reset();
+  {
+    FlatTuple<ConstructionCounting> tuple;
+    tuple.Get<0>() = ConstructionCounting{};
+  }
+  EXPECT_EQ(ConstructionCounting::default_ctor_calls, 2);
+  EXPECT_EQ(ConstructionCounting::dtor_calls, 2);
+  EXPECT_EQ(ConstructionCounting::copy_ctor_calls, 0);
+  EXPECT_EQ(ConstructionCounting::move_ctor_calls, 0);
+  EXPECT_EQ(ConstructionCounting::copy_assignment_calls, 0);
+  EXPECT_EQ(ConstructionCounting::move_assignment_calls, 1);
+
+  ConstructionCounting::Reset();
+}
+
 TEST(FlatTuple, ManyTypes) {
   using testing::internal::FlatTuple;
 
@@ -7503,22 +7758,7 @@
 }
 
 // Tests ad_hoc_test_result().
-
-class AdHocTestResultTest : public testing::Test {
- protected:
-  static void SetUpTestSuite() {
-    FAIL() << "A failure happened inside SetUpTestSuite().";
-  }
-};
-
-TEST_F(AdHocTestResultTest, AdHocTestResultForTestSuiteShowsFailure) {
-  const testing::TestResult& test_result = testing::UnitTest::GetInstance()
-                                               ->current_test_suite()
-                                               ->ad_hoc_test_result();
-  EXPECT_TRUE(test_result.Failed());
-}
-
-TEST_F(AdHocTestResultTest, AdHocTestResultTestForUnitTestDoesNotShowFailure) {
+TEST(AdHocTestResultTest, AdHocTestResultForUnitTestDoesNotShowFailure) {
   const testing::TestResult& test_result =
       testing::UnitTest::GetInstance()->ad_hoc_test_result();
   EXPECT_FALSE(test_result.Failed());
diff --git a/deps/boringssl/src/third_party/googletest/test/gtest_xml_output_unittest_.cc b/deps/boringssl/src/third_party/googletest/test/gtest_xml_output_unittest_.cc
index c95fd66..2b6634b 100644
--- a/deps/boringssl/src/third_party/googletest/test/gtest_xml_output_unittest_.cc
+++ b/deps/boringssl/src/third_party/googletest/test/gtest_xml_output_unittest_.cc
@@ -74,6 +74,15 @@
   GTEST_SKIP();
 }
 
+TEST_F(SkippedTest, SkippedWithMessage) {
+  GTEST_SKIP() << "It is good practice to tell why you skip a test.";
+}
+
+TEST_F(SkippedTest, SkippedAfterFailure) {
+  EXPECT_EQ(1, 2);
+  GTEST_SKIP() << "It is good practice to tell why you skip a test.";
+}
+
 TEST(MixedResultTest, Succeeds) {
   EXPECT_EQ(1, 1);
   ASSERT_EQ(1, 1);
diff --git a/deps/boringssl/src/third_party/googletest/xcode/Samples/FrameworkSample/widget.cc b/deps/boringssl/src/third_party/googletest/xcode/Samples/FrameworkSample/widget.cc
deleted file mode 100644
index bfc4e7f..0000000
--- a/deps/boringssl/src/third_party/googletest/xcode/Samples/FrameworkSample/widget.cc
+++ /dev/null
@@ -1,63 +0,0 @@
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: preston.a.jackson@gmail.com (Preston Jackson)
-//
-// Google Test - FrameworkSample
-// widget.cc
-//
-
-// Widget is a very simple class used for demonstrating the use of gtest
-
-#include "widget.h"
-
-Widget::Widget(int number, const std::string& name)
-    : number_(number),
-      name_(name) {}
-
-Widget::~Widget() {}
-
-float Widget::GetFloatValue() const {
-  return number_;
-}
-
-int Widget::GetIntValue() const {
-  return static_cast<int>(number_);
-}
-
-std::string Widget::GetStringValue() const {
-  return name_;
-}
-
-void Widget::GetCharPtrValue(char* buffer, size_t max_size) const {
-  // Copy the char* representation of name_ into buffer, up to max_size.
-  strncpy(buffer, name_.c_str(), max_size-1);
-  buffer[max_size-1] = '\0';
-  return;
-}
diff --git a/deps/boringssl/src/third_party/googletest/xcode/Samples/FrameworkSample/widget.h b/deps/boringssl/src/third_party/googletest/xcode/Samples/FrameworkSample/widget.h
deleted file mode 100644
index 0c55cdc..0000000
--- a/deps/boringssl/src/third_party/googletest/xcode/Samples/FrameworkSample/widget.h
+++ /dev/null
@@ -1,59 +0,0 @@
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: preston.a.jackson@gmail.com (Preston Jackson)
-//
-// Google Test - FrameworkSample
-// widget.h
-//
-
-// Widget is a very simple class used for demonstrating the use of gtest. It
-// simply stores two values a string and an integer, which are returned via
-// public accessors in multiple forms.
-
-#import <string>
-
-class Widget {
- public:
-  Widget(int number, const std::string& name);
-  ~Widget();
-
-  // Public accessors to number data
-  float GetFloatValue() const;
-  int GetIntValue() const;
-
-  // Public accessors to the string data
-  std::string GetStringValue() const;
-  void GetCharPtrValue(char* buffer, size_t max_size) const;
-
- private:
-  // Data members
-  float number_;
-  std::string name_;
-};
diff --git a/deps/boringssl/src/third_party/googletest/xcode/Samples/FrameworkSample/widget_test.cc b/deps/boringssl/src/third_party/googletest/xcode/Samples/FrameworkSample/widget_test.cc
deleted file mode 100644
index 8725994..0000000
--- a/deps/boringssl/src/third_party/googletest/xcode/Samples/FrameworkSample/widget_test.cc
+++ /dev/null
@@ -1,68 +0,0 @@
-// Copyright 2008, Google Inc.
-// All rights reserved.
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are
-// met:
-//
-//     * Redistributions of source code must retain the above copyright
-// notice, this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above
-// copyright notice, this list of conditions and the following disclaimer
-// in the documentation and/or other materials provided with the
-// distribution.
-//     * Neither the name of Google Inc. nor the names of its
-// contributors may be used to endorse or promote products derived from
-// this software without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-//
-// Author: preston.a.jackson@gmail.com (Preston Jackson)
-//
-// Google Test - FrameworkSample
-// widget_test.cc
-//
-
-// This is a simple test file for the Widget class in the Widget.framework
-
-#include <string>
-#include "gtest/gtest.h"
-
-#include <Widget/widget.h>
-
-// This test verifies that the constructor sets the internal state of the
-// Widget class correctly.
-TEST(WidgetInitializerTest, TestConstructor) {
-  Widget widget(1.0f, "name");
-  EXPECT_FLOAT_EQ(1.0f, widget.GetFloatValue());
-  EXPECT_EQ(std::string("name"), widget.GetStringValue());
-}
-
-// This test verifies the conversion of the float and string values to int and
-// char*, respectively.
-TEST(WidgetInitializerTest, TestConversion) {
-  Widget widget(1.0f, "name");
-  EXPECT_EQ(1, widget.GetIntValue());
-
-  size_t max_size = 128;
-  char buffer[max_size];
-  widget.GetCharPtrValue(buffer, max_size);
-  EXPECT_STREQ("name", buffer);
-}
-
-// Use the Google Test main that is linked into the framework. It does something
-// like this:
-// int main(int argc, char** argv) {
-//   testing::InitGoogleTest(&argc, argv);
-//   return RUN_ALL_TESTS();
-// }
diff --git a/deps/boringssl/src/tool/CMakeLists.txt b/deps/boringssl/src/tool/CMakeLists.txt
index 7658713..e9e387b 100644
--- a/deps/boringssl/src/tool/CMakeLists.txt
+++ b/deps/boringssl/src/tool/CMakeLists.txt
@@ -23,6 +23,10 @@
 
 add_dependencies(bssl global_target)
 
+if(WIN32)
+  target_link_libraries(bssl ws2_32)
+endif()
+
 if(APPLE OR WIN32 OR ANDROID)
   target_link_libraries(bssl ssl crypto)
 else()
diff --git a/deps/boringssl/src/tool/speed.cc b/deps/boringssl/src/tool/speed.cc
index 2c36c80..1b89b42 100644
--- a/deps/boringssl/src/tool/speed.cc
+++ b/deps/boringssl/src/tool/speed.cc
@@ -28,6 +28,7 @@
 #include <openssl/aes.h>
 #include <openssl/bn.h>
 #include <openssl/curve25519.h>
+#include <openssl/crypto.h>
 #include <openssl/digest.h>
 #include <openssl/err.h>
 #include <openssl/ec.h>
@@ -956,32 +957,6 @@
 
   TimeResults results;
   {
-    EC_GROUP *group = EC_GROUP_new_by_curve_name(NID_secp521r1);
-    if (group == NULL) {
-      return false;
-    }
-    if (!TimeFunction(&results, [&]() -> bool {
-          EC_RAW_POINT out;
-          return ec_hash_to_curve_p521_xmd_sha512_sswu_draft06(
-              group, &out, kLabel, sizeof(kLabel), input, sizeof(input));
-        })) {
-      fprintf(stderr, "hash-to-curve failed.\n");
-      return false;
-    }
-    results.Print("hash-to-curve P521_XMD:SHA-512_SSWU_RO_");
-
-    if (!TimeFunction(&results, [&]() -> bool {
-          EC_SCALAR out;
-          return ec_hash_to_scalar_p521_xmd_sha512_draft06(
-              group, &out, kLabel, sizeof(kLabel), input, sizeof(input));
-        })) {
-      fprintf(stderr, "hash-to-scalar failed.\n");
-      return false;
-    }
-    results.Print("hash-to-scalar P521_XMD:SHA-512");
-  }
-
-  {
     EC_GROUP *group = EC_GROUP_new_by_curve_name(NID_secp384r1);
     if (group == NULL) {
       return false;
@@ -1010,11 +985,12 @@
   return true;
 }
 
-static PMBTOKEN_PRETOKEN *pmbtoken_pretoken_dup(PMBTOKEN_PRETOKEN *in) {
-  PMBTOKEN_PRETOKEN *out =
-      (PMBTOKEN_PRETOKEN *)OPENSSL_malloc(sizeof(PMBTOKEN_PRETOKEN));
+static TRUST_TOKEN_PRETOKEN *trust_token_pretoken_dup(
+    TRUST_TOKEN_PRETOKEN *in) {
+  TRUST_TOKEN_PRETOKEN *out =
+      (TRUST_TOKEN_PRETOKEN *)OPENSSL_malloc(sizeof(TRUST_TOKEN_PRETOKEN));
   if (out) {
-    OPENSSL_memcpy(out, in, sizeof(PMBTOKEN_PRETOKEN));
+    OPENSSL_memcpy(out, in, sizeof(TRUST_TOKEN_PRETOKEN));
   }
   return out;
 }
@@ -1085,9 +1061,9 @@
                                                    &msg_len, batchsize);
         OPENSSL_free(issue_msg);
         // Clear pretokens.
-        sk_PMBTOKEN_PRETOKEN_pop_free(client->pretokens,
-                                      PMBTOKEN_PRETOKEN_free);
-        client->pretokens = sk_PMBTOKEN_PRETOKEN_new_null();
+        sk_TRUST_TOKEN_PRETOKEN_pop_free(client->pretokens,
+                                         TRUST_TOKEN_PRETOKEN_free);
+        client->pretokens = sk_TRUST_TOKEN_PRETOKEN_new_null();
         return ok;
       })) {
     fprintf(stderr, "TRUST_TOKEN_CLIENT_begin_issuance failed.\n");
@@ -1104,9 +1080,10 @@
   }
   bssl::UniquePtr<uint8_t> free_issue_msg(issue_msg);
 
-  bssl::UniquePtr<STACK_OF(PMBTOKEN_PRETOKEN)> pretokens(
-      sk_PMBTOKEN_PRETOKEN_deep_copy(client->pretokens, pmbtoken_pretoken_dup,
-                                     PMBTOKEN_PRETOKEN_free));
+  bssl::UniquePtr<STACK_OF(TRUST_TOKEN_PRETOKEN)> pretokens(
+      sk_TRUST_TOKEN_PRETOKEN_deep_copy(client->pretokens,
+                                        trust_token_pretoken_dup,
+                                        TRUST_TOKEN_PRETOKEN_free));
 
   if (!TimeFunction(&results, [&]() -> bool {
         uint8_t *issue_resp = NULL;
@@ -1142,8 +1119,9 @@
                                                issue_resp, resp_len));
 
         // Reset pretokens.
-        client->pretokens = sk_PMBTOKEN_PRETOKEN_deep_copy(
-            pretokens.get(), pmbtoken_pretoken_dup, PMBTOKEN_PRETOKEN_free);
+        client->pretokens = sk_TRUST_TOKEN_PRETOKEN_deep_copy(
+            pretokens.get(), trust_token_pretoken_dup,
+            TRUST_TOKEN_PRETOKEN_free);
         return !!tokens;
       })) {
     fprintf(stderr, "TRUST_TOKEN_CLIENT_finish_issuance failed.\n");
@@ -1229,9 +1207,9 @@
   if (!TimeFunction(&results, [&]() -> bool {
         uint8_t *srr = NULL, *sig = NULL;
         size_t srr_len, sig_len;
-        int ok = TRUST_TOKEN_CLIENT_finish_redemption(client.get(), &srr,
-                                                      &srr_len, &sig, &sig_len,
-                                                      redeem_resp, resp_len);
+        int ok = TRUST_TOKEN_CLIENT_finish_redemption(
+            client.get(), &srr, &srr_len, &sig, &sig_len, redeem_resp,
+            redeem_resp_len);
         OPENSSL_free(srr);
         OPENSSL_free(sig);
         return ok;
@@ -1244,6 +1222,24 @@
   return true;
 }
 
+#if defined(BORINGSSL_FIPS)
+static bool SpeedSelfTest(const std::string &selected) {
+  if (!selected.empty() && selected.find("self-test") == std::string::npos) {
+    return true;
+  }
+
+  TimeResults results;
+  if (!TimeFunction(&results, []() -> bool { return BORINGSSL_self_test(); })) {
+    fprintf(stderr, "BORINGSSL_self_test faileid.\n");
+    ERR_print_errors_fp(stderr);
+    return false;
+  }
+
+  results.Print("self-test");
+  return true;
+}
+#endif
+
 static const struct argument kArguments[] = {
     {
         "-filter",
@@ -1366,6 +1362,7 @@
       !SpeedHash(EVP_sha1(), "SHA-1", selected) ||
       !SpeedHash(EVP_sha256(), "SHA-256", selected) ||
       !SpeedHash(EVP_sha512(), "SHA-512", selected) ||
+      !SpeedHash(EVP_blake2b256(), "BLAKE2b-256", selected) ||
       !SpeedRandom(selected) ||
       !SpeedECDH(selected) ||
       !SpeedECDSA(selected) ||
@@ -1375,16 +1372,25 @@
       !SpeedRSAKeyGen(selected) ||
       !SpeedHRSS(selected) ||
       !SpeedHashToCurve(selected) ||
-      !SpeedTrustToken("TrustToken-Exp0-Batch1", TRUST_TOKEN_experiment_v0(), 1,
-                       selected) ||
-      !SpeedTrustToken("TrustToken-Exp0-Batch10", TRUST_TOKEN_experiment_v0(),
-                       10, selected) ||
       !SpeedTrustToken("TrustToken-Exp1-Batch1", TRUST_TOKEN_experiment_v1(), 1,
                        selected) ||
       !SpeedTrustToken("TrustToken-Exp1-Batch10", TRUST_TOKEN_experiment_v1(),
-                       10, selected)) {
+                       10, selected) ||
+      !SpeedTrustToken("TrustToken-Exp2VOPRF-Batch1",
+                       TRUST_TOKEN_experiment_v2_voprf(), 1, selected) ||
+      !SpeedTrustToken("TrustToken-Exp2VOPRF-Batch10",
+                       TRUST_TOKEN_experiment_v2_voprf(), 10, selected) ||
+      !SpeedTrustToken("TrustToken-Exp2PMB-Batch1",
+                       TRUST_TOKEN_experiment_v2_pmb(), 1, selected) ||
+      !SpeedTrustToken("TrustToken-Exp2PMB-Batch10",
+                       TRUST_TOKEN_experiment_v2_pmb(), 10, selected)) {
     return false;
   }
+#if defined(BORINGSSL_FIPS)
+  if (!SpeedSelfTest(selected)) {
+    return false;
+  }
+#endif
   if (g_print_json) {
     puts("\n]");
   }
diff --git a/deps/boringssl/src/tool/transport_common.cc b/deps/boringssl/src/tool/transport_common.cc
index 88e9169..b985221 100644
--- a/deps/boringssl/src/tool/transport_common.cc
+++ b/deps/boringssl/src/tool/transport_common.cc
@@ -156,7 +156,12 @@
 
   int ret = getaddrinfo(hostname.c_str(), port.c_str(), &hint, &result);
   if (ret != 0) {
-    fprintf(stderr, "getaddrinfo returned: %s\n", gai_strerror(ret));
+#if defined(OPENSSL_WINDOWS)
+    const char *error = gai_strerrorA(ret);
+#else
+    const char *error = gai_strerror(ret);
+#endif
+    fprintf(stderr, "getaddrinfo returned: %s\n", error);
     return false;
   }
 
diff --git a/deps/boringssl/src/util/fipstools/acvp/modulewrapper/modulewrapper.cc b/deps/boringssl/src/util/fipstools/acvp/modulewrapper/modulewrapper.cc
index d6a9b0a..06eac8b 100644
--- a/deps/boringssl/src/util/fipstools/acvp/modulewrapper/modulewrapper.cc
+++ b/deps/boringssl/src/util/fipstools/acvp/modulewrapper/modulewrapper.cc
@@ -12,28 +12,39 @@
  * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
 
+#include <map>
 #include <string>
 #include <vector>
 
 #include <assert.h>
 #include <errno.h>
+#include <limits.h>
 #include <string.h>
 #include <sys/uio.h>
 #include <unistd.h>
 #include <cstdarg>
 
+#include <openssl/aead.h>
 #include <openssl/aes.h>
 #include <openssl/bn.h>
+#include <openssl/cipher.h>
+#include <openssl/cmac.h>
+#include <openssl/dh.h>
 #include <openssl/digest.h>
 #include <openssl/ec.h>
 #include <openssl/ec_key.h>
+#include <openssl/ecdh.h>
 #include <openssl/ecdsa.h>
+#include <openssl/err.h>
 #include <openssl/hmac.h>
 #include <openssl/obj.h>
+#include <openssl/rsa.h>
 #include <openssl/sha.h>
 #include <openssl/span.h>
 
+#include "../../../../crypto/fipsmodule/ec/internal.h"
 #include "../../../../crypto/fipsmodule/rand/internal.h"
+#include "../../../../crypto/fipsmodule/tls/internal.h"
 
 static constexpr size_t kMaxArgs = 8;
 static constexpr size_t kMaxArgLength = (1 << 20);
@@ -77,14 +88,19 @@
   iovs[0].iov_base = nums;
   iovs[0].iov_len = sizeof(uint32_t) * (1 + spans.size());
 
+  size_t num_iov = 1;
   for (size_t i = 0; i < spans.size(); i++) {
     const auto &span = spans[i];
     nums[i + 1] = span.size();
-    iovs[i + 1].iov_base = const_cast<uint8_t *>(span.data());
-    iovs[i + 1].iov_len = span.size();
+    if (span.empty()) {
+      continue;
+    }
+
+    iovs[num_iov].iov_base = const_cast<uint8_t *>(span.data());
+    iovs[num_iov].iov_len = span.size();
+    num_iov++;
   }
 
-  const size_t num_iov = spans.size() + 1;
   size_t iov_done = 0;
   while (iov_done < num_iov) {
     ssize_t r;
@@ -97,7 +113,7 @@
     }
 
     size_t written = r;
-    for (size_t i = iov_done; written > 0 && i < num_iov; i++) {
+    for (size_t i = iov_done; i < num_iov && written > 0; i++) {
       iovec &iov = iovs[i];
 
       size_t done = written;
@@ -152,6 +168,13 @@
         }]
       },
       {
+        "algorithm": "SHA2-512/256",
+        "revision": "1.0",
+        "messageLength": [{
+          "min": 0, "max": 65528, "increment": 8
+        }]
+      },
+      {
         "algorithm": "SHA-1",
         "revision": "1.0",
         "messageLength": [{
@@ -165,12 +188,98 @@
         "keyLen": [128, 192, 256]
       },
       {
+        "algorithm": "ACVP-AES-CTR",
+        "revision": "1.0",
+        "direction": ["encrypt", "decrypt"],
+        "keyLen": [128, 192, 256],
+        "payloadLen": [{
+          "min": 8, "max": 128, "increment": 8
+        }],
+        "incrementalCounter": true,
+        "overflowCounter": true,
+        "performCounterTests": true
+      },
+      {
         "algorithm": "ACVP-AES-CBC",
         "revision": "1.0",
         "direction": ["encrypt", "decrypt"],
         "keyLen": [128, 192, 256]
       },
       {
+        "algorithm": "ACVP-AES-GCM",
+        "revision": "1.0",
+        "direction": ["encrypt", "decrypt"],
+        "keyLen": [128, 192, 256],
+        "payloadLen": [{
+          "min": 0, "max": 256, "increment": 8
+        }],
+        "aadLen": [{
+          "min": 0, "max": 320, "increment": 8
+        }],
+        "tagLen": [32, 64, 96, 104, 112, 120, 128],
+        "ivLen": [96],
+        "ivGen": "external"
+      },
+      {
+        "algorithm": "ACVP-AES-KW",
+        "revision": "1.0",
+        "direction": [
+            "encrypt",
+            "decrypt"
+        ],
+        "kwCipher": [
+            "cipher"
+        ],
+        "keyLen": [
+            128, 192, 256
+        ],
+        "payloadLen": [{"min": 128, "max": 1024, "increment": 64}]
+      },
+      {
+        "algorithm": "ACVP-AES-KWP",
+        "revision": "1.0",
+        "direction": [
+            "encrypt",
+            "decrypt"
+        ],
+        "kwCipher": [
+            "cipher"
+        ],
+        "keyLen": [
+            128, 192, 256
+        ],
+        "payloadLen": [{"min": 8, "max": 4096, "increment": 8}]
+      },
+      {
+        "algorithm": "ACVP-AES-CCM",
+        "revision": "1.0",
+        "direction": [
+            "encrypt",
+            "decrypt"
+        ],
+        "keyLen": [
+            128
+        ],
+        "payloadLen": [{"min": 0, "max": 256, "increment": 8}],
+        "ivLen": [104],
+        "tagLen": [32],
+        "aadLen": [{"min": 0, "max": 1024, "increment": 8}]
+      },
+      {
+        "algorithm": "ACVP-TDES-ECB",
+        "revision": "1.0",
+        "direction": ["encrypt", "decrypt"],
+        "keyLen": [192],
+        "keyingOption": [1]
+      },
+      {
+        "algorithm": "ACVP-TDES-CBC",
+        "revision": "1.0",
+        "direction": ["encrypt", "decrypt"],
+        "keyLen": [192],
+        "keyingOption": [1]
+      },
+      {
         "algorithm": "HMAC-SHA-1",
         "revision": "1.0",
         "keyLen": [{
@@ -299,6 +408,336 @@
             "SHA2-512"
           ]
         }]
+      },
+      {
+        "algorithm": "RSA",
+        "mode": "keyGen",
+        "revision": "FIPS186-4",
+        "infoGeneratedByServer": true,
+        "pubExpMode": "fixed",
+        "fixedPubExp": "010001",
+        "keyFormat": "standard",
+        "capabilities": [{
+          "randPQ": "B.3.3",
+          "properties": [{
+            "modulo": 2048,
+            "primeTest": [
+              "tblC2"
+            ]
+          },{
+            "modulo": 3072,
+            "primeTest": [
+              "tblC2"
+            ]
+          },{
+            "modulo": 4096,
+            "primeTest": [
+              "tblC2"
+            ]
+          }]
+        }]
+      },
+      {
+        "algorithm": "RSA",
+        "mode": "sigGen",
+        "revision": "FIPS186-4",
+        "capabilities": [{
+          "sigType": "pkcs1v1.5",
+          "properties": [{
+            "modulo": 2048,
+            "hashPair": [{
+              "hashAlg": "SHA2-224"
+            }, {
+              "hashAlg": "SHA2-256"
+            }, {
+              "hashAlg": "SHA2-384"
+            }, {
+              "hashAlg": "SHA2-512"
+            }]
+          }]
+        },{
+          "sigType": "pkcs1v1.5",
+          "properties": [{
+            "modulo": 3072,
+            "hashPair": [{
+              "hashAlg": "SHA2-224"
+            }, {
+              "hashAlg": "SHA2-256"
+            }, {
+              "hashAlg": "SHA2-384"
+            }, {
+              "hashAlg": "SHA2-512"
+            }]
+          }]
+        },{
+          "sigType": "pkcs1v1.5",
+          "properties": [{
+            "modulo": 4096,
+            "hashPair": [{
+              "hashAlg": "SHA2-224"
+            }, {
+              "hashAlg": "SHA2-256"
+            }, {
+              "hashAlg": "SHA2-384"
+            }, {
+              "hashAlg": "SHA2-512"
+            }]
+          }]
+        },{
+          "sigType": "pss",
+          "properties": [{
+            "modulo": 2048,
+            "hashPair": [{
+              "hashAlg": "SHA2-224",
+              "saltLen": 28
+            }, {
+              "hashAlg": "SHA2-256",
+              "saltLen": 32
+            }, {
+              "hashAlg": "SHA2-384",
+              "saltLen": 48
+            }, {
+              "hashAlg": "SHA2-512",
+              "saltLen": 64
+            }]
+          }]
+        },{
+          "sigType": "pss",
+          "properties": [{
+            "modulo": 3072,
+            "hashPair": [{
+              "hashAlg": "SHA2-224",
+              "saltLen": 28
+            }, {
+              "hashAlg": "SHA2-256",
+              "saltLen": 32
+            }, {
+              "hashAlg": "SHA2-384",
+              "saltLen": 48
+            }, {
+              "hashAlg": "SHA2-512",
+              "saltLen": 64
+            }]
+          }]
+        },{
+          "sigType": "pss",
+          "properties": [{
+            "modulo": 4096,
+            "hashPair": [{
+              "hashAlg": "SHA2-224",
+              "saltLen": 28
+            }, {
+              "hashAlg": "SHA2-256",
+              "saltLen": 32
+            }, {
+              "hashAlg": "SHA2-384",
+              "saltLen": 48
+            }, {
+              "hashAlg": "SHA2-512",
+              "saltLen": 64
+            }]
+          }]
+        }]
+      },
+      {
+        "algorithm": "RSA",
+        "mode": "sigVer",
+        "revision": "FIPS186-4",
+        "pubExpMode": "fixed",
+        "fixedPubExp": "010001",
+        "capabilities": [{
+          "sigType": "pkcs1v1.5",
+          "properties": [{
+            "modulo": 1024,
+            "hashPair": [{
+              "hashAlg": "SHA2-224"
+            }, {
+              "hashAlg": "SHA2-256"
+            }, {
+              "hashAlg": "SHA2-384"
+            }, {
+              "hashAlg": "SHA2-512"
+            }, {
+              "hashAlg": "SHA-1"
+            }]
+          }]
+        },{
+          "sigType": "pkcs1v1.5",
+          "properties": [{
+            "modulo": 2048,
+            "hashPair": [{
+              "hashAlg": "SHA2-224"
+            }, {
+              "hashAlg": "SHA2-256"
+            }, {
+              "hashAlg": "SHA2-384"
+            }, {
+              "hashAlg": "SHA2-512"
+            }, {
+              "hashAlg": "SHA-1"
+            }]
+          }]
+        },{
+          "sigType": "pkcs1v1.5",
+          "properties": [{
+            "modulo": 3072,
+            "hashPair": [{
+              "hashAlg": "SHA2-224"
+            }, {
+              "hashAlg": "SHA2-256"
+            }, {
+              "hashAlg": "SHA2-384"
+            }, {
+              "hashAlg": "SHA2-512"
+            }, {
+              "hashAlg": "SHA-1"
+            }]
+          }]
+        },{
+          "sigType": "pkcs1v1.5",
+          "properties": [{
+            "modulo": 4096,
+            "hashPair": [{
+              "hashAlg": "SHA2-224"
+            }, {
+              "hashAlg": "SHA2-256"
+            }, {
+              "hashAlg": "SHA2-384"
+            }, {
+              "hashAlg": "SHA2-512"
+            }, {
+              "hashAlg": "SHA-1"
+            }]
+          }]
+        },{
+          "sigType": "pss",
+          "properties": [{
+            "modulo": 2048,
+            "hashPair": [{
+              "hashAlg": "SHA2-224",
+              "saltLen": 28
+            }, {
+              "hashAlg": "SHA2-256",
+              "saltLen": 32
+            }, {
+              "hashAlg": "SHA2-384",
+              "saltLen": 48
+            }, {
+              "hashAlg": "SHA2-512",
+              "saltLen": 64
+            }, {
+              "hashAlg": "SHA-1",
+              "saltLen": 20
+            }]
+          }]
+        },{
+          "sigType": "pss",
+          "properties": [{
+            "modulo": 3072,
+            "hashPair": [{
+              "hashAlg": "SHA2-224",
+              "saltLen": 28
+            }, {
+              "hashAlg": "SHA2-256",
+              "saltLen": 32
+            }, {
+              "hashAlg": "SHA2-384",
+              "saltLen": 48
+            }, {
+              "hashAlg": "SHA2-512",
+              "saltLen": 64
+            }, {
+              "hashAlg": "SHA-1",
+              "saltLen": 20
+            }]
+          }]
+        },{
+          "sigType": "pss",
+          "properties": [{
+            "modulo": 4096,
+            "hashPair": [{
+              "hashAlg": "SHA2-224",
+              "saltLen": 28
+            }, {
+              "hashAlg": "SHA2-256",
+              "saltLen": 32
+            }, {
+              "hashAlg": "SHA2-384",
+              "saltLen": 48
+            }, {
+              "hashAlg": "SHA2-512",
+              "saltLen": 64
+            }, {
+              "hashAlg": "SHA-1",
+              "saltLen": 20
+            }]
+          }]
+        }]
+      },
+      {
+        "algorithm": "CMAC-AES",
+        "revision": "1.0",
+        "capabilities": [{
+          "direction": ["gen"],
+          "msgLen": [{
+            "min": 0,
+            "max": 65536,
+            "increment": 8
+          }],
+          "keyLen": [128, 256],
+          "macLen": [{
+            "min": 32,
+            "max": 128,
+            "increment": 8
+          }]
+        }]
+      },
+      {
+        "algorithm": "kdf-components",
+        "revision": "1.0",
+        "mode": "tls",
+        "tlsVersion": [
+          "v1.0/1.1",
+          "v1.2"
+        ],
+        "hashAlg": [
+          "SHA2-256",
+          "SHA2-384",
+          "SHA2-512"
+        ]
+      },
+      {
+        "algorithm": "KAS-ECC-SSC",
+        "revision": "Sp800-56Ar3",
+        "scheme": {
+          "ephemeralUnified": {
+            "kasRole": [
+              "initiator",
+              "responder"
+            ]
+          }
+        },
+        "domainParameterGenerationMethods": [
+          "P-224",
+          "P-256",
+          "P-384",
+          "P-521"
+        ]
+      },
+      {
+        "algorithm": "KAS-FFC-SSC",
+        "revision": "Sp800-56Ar3",
+        "scheme": {
+          "dhEphem": {
+            "kasRole": [
+              "initiator"
+            ]
+          }
+        },
+        "domainParameterGenerationMethods": [
+          "FB",
+          "FC"
+        ]
       }
     ])";
   return WriteReply(
@@ -315,6 +754,27 @@
   return WriteReply(STDOUT_FILENO, Span<const uint8_t>(digest));
 }
 
+static uint32_t GetIterations(const Span<const uint8_t> iterations_bytes) {
+  uint32_t iterations;
+  if (iterations_bytes.size() != sizeof(iterations)) {
+    fprintf(stderr,
+            "Expected %u-byte input for number of iterations, but found %u "
+            "bytes.\n",
+            static_cast<unsigned>(sizeof(iterations)),
+            static_cast<unsigned>(iterations_bytes.size()));
+    abort();
+  }
+
+  memcpy(&iterations, iterations_bytes.data(), sizeof(iterations));
+  if (iterations == 0 || iterations == UINT32_MAX) {
+    fprintf(stderr, "Invalid number of iterations: %x.\n",
+            static_cast<unsigned>(iterations));
+    abort();
+  }
+
+  return iterations;
+}
+
 template <int (*SetKey)(const uint8_t *key, unsigned bits, AES_KEY *out),
           void (*Block)(const uint8_t *in, uint8_t *out, const AES_KEY *key)>
 static bool AES(const Span<const uint8_t> args[]) {
@@ -325,13 +785,22 @@
   if (args[1].size() % AES_BLOCK_SIZE != 0) {
     return false;
   }
+  std::vector<uint8_t> result(args[1].begin(), args[1].end());
+  const uint32_t iterations = GetIterations(args[2]);
 
-  std::vector<uint8_t> out;
-  out.resize(args[1].size());
-  for (size_t i = 0; i < args[1].size(); i += AES_BLOCK_SIZE) {
-    Block(args[1].data() + i, &out[i], &key);
+  std::vector<uint8_t> prev_result;
+  for (uint32_t j = 0; j < iterations; j++) {
+    if (j == iterations - 1) {
+      prev_result = result;
+    }
+
+    for (size_t i = 0; i < args[1].size(); i += AES_BLOCK_SIZE) {
+      Block(result.data() + i, result.data() + i, &key);
+    }
   }
-  return WriteReply(STDOUT_FILENO, Span<const uint8_t>(out));
+
+  return WriteReply(STDOUT_FILENO, Span<const uint8_t>(result),
+                    Span<const uint8_t>(prev_result));
 }
 
 template <int (*SetKey)(const uint8_t *key, unsigned bits, AES_KEY *out),
@@ -341,20 +810,438 @@
   if (SetKey(args[0].data(), args[0].size() * 8, &key) != 0) {
     return false;
   }
-  if (args[1].size() % AES_BLOCK_SIZE != 0 ||
+  if (args[1].size() % AES_BLOCK_SIZE != 0 || args[1].empty() ||
       args[2].size() != AES_BLOCK_SIZE) {
     return false;
   }
+  std::vector<uint8_t> input(args[1].begin(), args[1].end());
+  std::vector<uint8_t> iv(args[2].begin(), args[2].end());
+  const uint32_t iterations = GetIterations(args[3]);
+
+  std::vector<uint8_t> result(input.size());
+  std::vector<uint8_t> prev_result, prev_input;
+
+  for (uint32_t j = 0; j < iterations; j++) {
+    prev_result = result;
+    if (j > 0) {
+      if (Direction == AES_ENCRYPT) {
+        iv = result;
+      } else {
+        iv = prev_input;
+      }
+    }
+
+    // AES_cbc_encrypt will mutate the given IV, but we need it later.
+    uint8_t iv_copy[AES_BLOCK_SIZE];
+    memcpy(iv_copy, iv.data(), sizeof(iv_copy));
+    AES_cbc_encrypt(input.data(), result.data(), input.size(), &key, iv_copy,
+                    Direction);
+
+    if (Direction == AES_DECRYPT) {
+      prev_input = input;
+    }
+
+    if (j == 0) {
+      input = iv;
+    } else {
+      input = prev_result;
+    }
+  }
+
+  return WriteReply(STDOUT_FILENO, Span<const uint8_t>(result),
+                    Span<const uint8_t>(prev_result));
+}
+
+static bool AES_CTR(const Span<const uint8_t> args[]) {
+  static const uint32_t kOneIteration = 1;
+  if (args[3].size() != sizeof(kOneIteration) ||
+      memcmp(args[3].data(), &kOneIteration, sizeof(kOneIteration))) {
+    fprintf(stderr, "Only a single iteration supported with AES-CTR\n");
+    return false;
+  }
+
+  AES_KEY key;
+  if (AES_set_encrypt_key(args[0].data(), args[0].size() * 8, &key) != 0) {
+    return false;
+  }
+  if (args[2].size() != AES_BLOCK_SIZE) {
+    return false;
+  }
   uint8_t iv[AES_BLOCK_SIZE];
   memcpy(iv, args[2].data(), AES_BLOCK_SIZE);
+  if (GetIterations(args[3]) != 1) {
+    fprintf(stderr, "Multiple iterations of AES-CTR is not supported.\n");
+    return false;
+  }
 
   std::vector<uint8_t> out;
   out.resize(args[1].size());
-  AES_cbc_encrypt(args[1].data(), out.data(), args[1].size(), &key, iv,
-                  Direction);
+  unsigned num = 0;
+  uint8_t ecount_buf[AES_BLOCK_SIZE];
+  AES_ctr128_encrypt(args[1].data(), out.data(), args[1].size(), &key, iv,
+                     ecount_buf, &num);
   return WriteReply(STDOUT_FILENO, Span<const uint8_t>(out));
 }
 
+static bool AESGCMSetup(EVP_AEAD_CTX *ctx, Span<const uint8_t> tag_len_span,
+                        Span<const uint8_t> key) {
+  uint32_t tag_len_32;
+  if (tag_len_span.size() != sizeof(tag_len_32)) {
+    fprintf(stderr, "Tag size value is %u bytes, not an uint32_t\n",
+            static_cast<unsigned>(tag_len_span.size()));
+    return false;
+  }
+  memcpy(&tag_len_32, tag_len_span.data(), sizeof(tag_len_32));
+
+  const EVP_AEAD *aead;
+  switch (key.size()) {
+    case 16:
+      aead = EVP_aead_aes_128_gcm();
+      break;
+    case 24:
+      aead = EVP_aead_aes_192_gcm();
+      break;
+    case 32:
+      aead = EVP_aead_aes_256_gcm();
+      break;
+    default:
+      fprintf(stderr, "Bad AES-GCM key length %u\n",
+              static_cast<unsigned>(key.size()));
+      return false;
+  }
+
+  if (!EVP_AEAD_CTX_init(ctx, aead, key.data(), key.size(), tag_len_32,
+                         nullptr)) {
+    fprintf(stderr, "Failed to setup AES-GCM with tag length %u\n",
+            static_cast<unsigned>(tag_len_32));
+    return false;
+  }
+
+  return true;
+}
+
+static bool AESCCMSetup(EVP_AEAD_CTX *ctx, Span<const uint8_t> tag_len_span,
+                        Span<const uint8_t> key) {
+  uint32_t tag_len_32;
+  if (tag_len_span.size() != sizeof(tag_len_32)) {
+    fprintf(stderr, "Tag size value is %u bytes, not an uint32_t\n",
+            static_cast<unsigned>(tag_len_span.size()));
+    return false;
+  }
+  memcpy(&tag_len_32, tag_len_span.data(), sizeof(tag_len_32));
+  if (tag_len_32 != 4) {
+    fprintf(stderr, "AES-CCM only supports 4-byte tags, but %u was requested\n",
+            static_cast<unsigned>(tag_len_32));
+    return false;
+  }
+
+  if (key.size() != 16) {
+    fprintf(stderr,
+            "AES-CCM only supports 128-bit keys, but %u bits were given\n",
+            static_cast<unsigned>(key.size() * 8));
+    return false;
+  }
+
+  if (!EVP_AEAD_CTX_init(ctx, EVP_aead_aes_128_ccm_bluetooth(), key.data(),
+                         key.size(), tag_len_32, nullptr)) {
+    fprintf(stderr, "Failed to setup AES-CCM with tag length %u\n",
+            static_cast<unsigned>(tag_len_32));
+    return false;
+  }
+
+  return true;
+}
+
+template <bool (*SetupFunc)(EVP_AEAD_CTX *ctx, Span<const uint8_t> tag_len_span,
+                            Span<const uint8_t> key)>
+static bool AEADSeal(const Span<const uint8_t> args[]) {
+  Span<const uint8_t> tag_len_span = args[0];
+  Span<const uint8_t> key = args[1];
+  Span<const uint8_t> plaintext = args[2];
+  Span<const uint8_t> nonce = args[3];
+  Span<const uint8_t> ad = args[4];
+
+  bssl::ScopedEVP_AEAD_CTX ctx;
+  if (!SetupFunc(ctx.get(), tag_len_span, key)) {
+    return false;
+  }
+
+  if (EVP_AEAD_MAX_OVERHEAD + plaintext.size() < EVP_AEAD_MAX_OVERHEAD) {
+    return false;
+  }
+  std::vector<uint8_t> out(EVP_AEAD_MAX_OVERHEAD + plaintext.size());
+
+  size_t out_len;
+  if (!EVP_AEAD_CTX_seal(ctx.get(), out.data(), &out_len, out.size(),
+                         nonce.data(), nonce.size(), plaintext.data(),
+                         plaintext.size(), ad.data(), ad.size())) {
+    return false;
+  }
+
+  out.resize(out_len);
+  return WriteReply(STDOUT_FILENO, Span<const uint8_t>(out));
+}
+
+template <bool (*SetupFunc)(EVP_AEAD_CTX *ctx, Span<const uint8_t> tag_len_span,
+                            Span<const uint8_t> key)>
+static bool AEADOpen(const Span<const uint8_t> args[]) {
+  Span<const uint8_t> tag_len_span = args[0];
+  Span<const uint8_t> key = args[1];
+  Span<const uint8_t> ciphertext = args[2];
+  Span<const uint8_t> nonce = args[3];
+  Span<const uint8_t> ad = args[4];
+
+  bssl::ScopedEVP_AEAD_CTX ctx;
+  if (!SetupFunc(ctx.get(), tag_len_span, key)) {
+    return false;
+  }
+
+  std::vector<uint8_t> out(ciphertext.size());
+  size_t out_len;
+  uint8_t success_flag[1] = {0};
+
+  if (!EVP_AEAD_CTX_open(ctx.get(), out.data(), &out_len, out.size(),
+                         nonce.data(), nonce.size(), ciphertext.data(),
+                         ciphertext.size(), ad.data(), ad.size())) {
+    return WriteReply(STDOUT_FILENO, Span<const uint8_t>(success_flag),
+                      Span<const uint8_t>());
+  }
+
+  out.resize(out_len);
+  success_flag[0] = 1;
+  return WriteReply(STDOUT_FILENO, Span<const uint8_t>(success_flag),
+                    Span<const uint8_t>(out));
+}
+
+static bool AESPaddedKeyWrapSetup(AES_KEY *out, bool decrypt,
+                                  Span<const uint8_t> key) {
+  if ((decrypt ? AES_set_decrypt_key : AES_set_encrypt_key)(
+          key.data(), key.size() * 8, out) != 0) {
+    fprintf(stderr, "Invalid AES key length for AES-KW(P): %u\n",
+            static_cast<unsigned>(key.size()));
+    return false;
+  }
+  return true;
+}
+
+static bool AESKeyWrapSetup(AES_KEY *out, bool decrypt, Span<const uint8_t> key,
+                            Span<const uint8_t> input) {
+  if (!AESPaddedKeyWrapSetup(out, decrypt, key)) {
+    return false;
+  }
+
+  if (input.size() % 8) {
+    fprintf(stderr, "Invalid AES-KW input length: %u\n",
+            static_cast<unsigned>(input.size()));
+    return false;
+  }
+
+  return true;
+}
+
+static bool AESKeyWrapSeal(const Span<const uint8_t> args[]) {
+  Span<const uint8_t> key = args[1];
+  Span<const uint8_t> plaintext = args[2];
+
+  AES_KEY aes;
+  if (!AESKeyWrapSetup(&aes, /*decrypt=*/false, key, plaintext) ||
+      plaintext.size() > INT_MAX - 8) {
+    return false;
+  }
+
+  std::vector<uint8_t> out(plaintext.size() + 8);
+  if (AES_wrap_key(&aes, /*iv=*/nullptr, out.data(), plaintext.data(),
+                   plaintext.size()) != static_cast<int>(out.size())) {
+    fprintf(stderr, "AES-KW failed\n");
+    return false;
+  }
+
+  return WriteReply(STDOUT_FILENO, Span<const uint8_t>(out));
+}
+
+static bool AESKeyWrapOpen(const Span<const uint8_t> args[]) {
+  Span<const uint8_t> key = args[1];
+  Span<const uint8_t> ciphertext = args[2];
+
+  AES_KEY aes;
+  if (!AESKeyWrapSetup(&aes, /*decrypt=*/true, key, ciphertext) ||
+      ciphertext.size() < 8 ||
+      ciphertext.size() > INT_MAX) {
+    return false;
+  }
+
+  std::vector<uint8_t> out(ciphertext.size() - 8);
+  uint8_t success_flag[1] = {0};
+  if (AES_unwrap_key(&aes, /*iv=*/nullptr, out.data(), ciphertext.data(),
+                     ciphertext.size()) != static_cast<int>(out.size())) {
+    return WriteReply(STDOUT_FILENO, Span<const uint8_t>(success_flag),
+                      Span<const uint8_t>());
+  }
+
+  success_flag[0] = 1;
+  return WriteReply(STDOUT_FILENO, Span<const uint8_t>(success_flag),
+                    Span<const uint8_t>(out));
+}
+
+static bool AESPaddedKeyWrapSeal(const Span<const uint8_t> args[]) {
+  Span<const uint8_t> key = args[1];
+  Span<const uint8_t> plaintext = args[2];
+
+  AES_KEY aes;
+  if (!AESPaddedKeyWrapSetup(&aes, /*decrypt=*/false, key) ||
+      plaintext.size() + 15 < 15) {
+    return false;
+  }
+
+  std::vector<uint8_t> out(plaintext.size() + 15);
+  size_t out_len;
+  if (!AES_wrap_key_padded(&aes, out.data(), &out_len, out.size(),
+                           plaintext.data(), plaintext.size())) {
+    fprintf(stderr, "AES-KWP failed\n");
+    return false;
+  }
+
+  out.resize(out_len);
+  return WriteReply(STDOUT_FILENO, Span<const uint8_t>(out));
+}
+
+static bool AESPaddedKeyWrapOpen(const Span<const uint8_t> args[]) {
+  Span<const uint8_t> key = args[1];
+  Span<const uint8_t> ciphertext = args[2];
+
+  AES_KEY aes;
+  if (!AESPaddedKeyWrapSetup(&aes, /*decrypt=*/true, key) ||
+      ciphertext.size() % 8) {
+    return false;
+  }
+
+  std::vector<uint8_t> out(ciphertext.size());
+  size_t out_len;
+  uint8_t success_flag[1] = {0};
+  if (!AES_unwrap_key_padded(&aes, out.data(), &out_len, out.size(),
+                             ciphertext.data(), ciphertext.size())) {
+    return WriteReply(STDOUT_FILENO, Span<const uint8_t>(success_flag),
+                      Span<const uint8_t>());
+  }
+
+  success_flag[0] = 1;
+  out.resize(out_len);
+  return WriteReply(STDOUT_FILENO, Span<const uint8_t>(success_flag),
+                    Span<const uint8_t>(out));
+}
+
+template <bool Encrypt>
+static bool TDES(const Span<const uint8_t> args[]) {
+  const EVP_CIPHER *cipher = EVP_des_ede3();
+
+  if (args[0].size() != 24) {
+    fprintf(stderr, "Bad key length %u for 3DES.\n",
+            static_cast<unsigned>(args[0].size()));
+    return false;
+  }
+  bssl::ScopedEVP_CIPHER_CTX ctx;
+  if (!EVP_CipherInit_ex(ctx.get(), cipher, nullptr, args[0].data(), nullptr,
+                         Encrypt ? 1 : 0) ||
+      !EVP_CIPHER_CTX_set_padding(ctx.get(), 0)) {
+    return false;
+  }
+
+  if (args[1].size() % 8) {
+    fprintf(stderr, "Bad input length %u for 3DES.\n",
+            static_cast<unsigned>(args[1].size()));
+    return false;
+  }
+  std::vector<uint8_t> result(args[1].begin(), args[1].end());
+
+  const uint32_t iterations = GetIterations(args[2]);
+  std::vector<uint8_t> prev_result, prev_prev_result;
+
+  for (uint32_t j = 0; j < iterations; j++) {
+    if (j == iterations - 1) {
+      prev_result = result;
+    } else if (iterations >= 2 && j == iterations - 2) {
+      prev_prev_result = result;
+    }
+
+    int out_len;
+    if (!EVP_CipherUpdate(ctx.get(), result.data(), &out_len, result.data(),
+                          result.size()) ||
+        out_len != static_cast<int>(result.size())) {
+      return false;
+    }
+  }
+
+  return WriteReply(STDOUT_FILENO, Span<const uint8_t>(result),
+                    Span<const uint8_t>(prev_result),
+                    Span<const uint8_t>(prev_prev_result));
+}
+
+template <bool Encrypt>
+static bool TDES_CBC(const Span<const uint8_t> args[]) {
+  const EVP_CIPHER *cipher = EVP_des_ede3_cbc();
+
+  if (args[0].size() != 24) {
+    fprintf(stderr, "Bad key length %u for 3DES.\n",
+            static_cast<unsigned>(args[0].size()));
+    return false;
+  }
+
+  if (args[1].size() % 8 || args[1].size() == 0) {
+    fprintf(stderr, "Bad input length %u for 3DES.\n",
+            static_cast<unsigned>(args[1].size()));
+    return false;
+  }
+  std::vector<uint8_t> input(args[1].begin(), args[1].end());
+
+  if (args[2].size() != EVP_CIPHER_iv_length(cipher)) {
+    fprintf(stderr, "Bad IV length %u for 3DES.\n",
+            static_cast<unsigned>(args[2].size()));
+    return false;
+  }
+  std::vector<uint8_t> iv(args[2].begin(), args[2].end());
+  const uint32_t iterations = GetIterations(args[3]);
+
+  std::vector<uint8_t> result(input.size());
+  std::vector<uint8_t> prev_result, prev_prev_result;
+  bssl::ScopedEVP_CIPHER_CTX ctx;
+  if (!EVP_CipherInit_ex(ctx.get(), cipher, nullptr, args[0].data(), iv.data(),
+                         Encrypt ? 1 : 0) ||
+      !EVP_CIPHER_CTX_set_padding(ctx.get(), 0)) {
+    return false;
+  }
+
+  for (uint32_t j = 0; j < iterations; j++) {
+    prev_prev_result = prev_result;
+    prev_result = result;
+
+    int out_len, out_len2;
+    if (!EVP_CipherInit_ex(ctx.get(), nullptr, nullptr, nullptr, iv.data(),
+                           -1) ||
+        !EVP_CipherUpdate(ctx.get(), result.data(), &out_len, input.data(),
+                          input.size()) ||
+        !EVP_CipherFinal_ex(ctx.get(), result.data() + out_len, &out_len2) ||
+        (out_len + out_len2) != static_cast<int>(result.size())) {
+      return false;
+    }
+
+    if (Encrypt) {
+      if (j == 0) {
+        input = iv;
+      } else {
+        input = prev_result;
+      }
+      iv = result;
+    } else {
+      iv = input;
+      input = result;
+    }
+  }
+
+  return WriteReply(STDOUT_FILENO, Span<const uint8_t>(result),
+                    Span<const uint8_t>(prev_result),
+                    Span<const uint8_t>(prev_prev_result));
+}
+
 template <const EVP_MD *HashFunc()>
 static bool HMAC(const Span<const uint8_t> args[]) {
   const EVP_MD *const md = HashFunc();
@@ -566,6 +1453,299 @@
   return WriteReply(STDOUT_FILENO, Span<const uint8_t>(reply));
 }
 
+static bool CMAC_AES(const Span<const uint8_t> args[]) {
+  uint8_t mac[16];
+  if (!AES_CMAC(mac, args[1].data(), args[1].size(), args[2].data(),
+                args[2].size())) {
+    return false;
+  }
+
+  uint32_t mac_len;
+  if (args[0].size() != sizeof(mac_len)) {
+    return false;
+  }
+  memcpy(&mac_len, args[0].data(), sizeof(mac_len));
+  if (mac_len > sizeof(mac)) {
+    return false;
+  }
+
+  return WriteReply(STDOUT_FILENO, Span<const uint8_t>(mac, mac_len));
+}
+
+static bool CMAC_AESVerify(const Span<const uint8_t> args[]) {
+  // This function is just for testing since libcrypto doesn't do the
+  // verification itself. The regcap doesn't advertise "ver" support.
+  uint8_t mac[16];
+  if (!AES_CMAC(mac, args[0].data(), args[0].size(), args[1].data(),
+                args[1].size()) ||
+      args[2].size() > sizeof(mac)) {
+    return false;
+  }
+
+  const uint8_t ok = OPENSSL_memcmp(mac, args[2].data(), args[2].size());
+  return WriteReply(STDOUT_FILENO, Span<const uint8_t>(&ok, sizeof(ok)));
+}
+
+static std::map<unsigned, bssl::UniquePtr<RSA>>& CachedRSAKeys() {
+  static std::map<unsigned, bssl::UniquePtr<RSA>> keys;
+  return keys;
+}
+
+static RSA* GetRSAKey(unsigned bits) {
+  auto it = CachedRSAKeys().find(bits);
+  if (it != CachedRSAKeys().end()) {
+    return it->second.get();
+  }
+
+  bssl::UniquePtr<RSA> key(RSA_new());
+  if (!RSA_generate_key_fips(key.get(), bits, nullptr)) {
+    abort();
+  }
+
+  RSA *const ret = key.get();
+  CachedRSAKeys().emplace(static_cast<unsigned>(bits), std::move(key));
+
+  return ret;
+}
+
+static bool RSAKeyGen(const Span<const uint8_t> args[]) {
+  uint32_t bits;
+  if (args[0].size() != sizeof(bits)) {
+    return false;
+  }
+  memcpy(&bits, args[0].data(), sizeof(bits));
+
+  bssl::UniquePtr<RSA> key(RSA_new());
+  if (!RSA_generate_key_fips(key.get(), bits, nullptr)) {
+    fprintf(stderr, "RSA_generate_key_fips failed for modulus length %u.\n",
+            bits);
+    return false;
+  }
+
+  const BIGNUM *n, *e, *d, *p, *q;
+  RSA_get0_key(key.get(), &n, &e, &d);
+  RSA_get0_factors(key.get(), &p, &q);
+
+  if (!WriteReply(STDOUT_FILENO, BIGNUMBytes(e), BIGNUMBytes(p), BIGNUMBytes(q),
+                  BIGNUMBytes(n), BIGNUMBytes(d))) {
+    return false;
+  }
+
+  CachedRSAKeys().emplace(static_cast<unsigned>(bits), std::move(key));
+  return true;
+}
+
+template<const EVP_MD *(MDFunc)(), bool UsePSS>
+static bool RSASigGen(const Span<const uint8_t> args[]) {
+  uint32_t bits;
+  if (args[0].size() != sizeof(bits)) {
+    return false;
+  }
+  memcpy(&bits, args[0].data(), sizeof(bits));
+  const Span<const uint8_t> msg = args[1];
+
+  RSA *const key = GetRSAKey(bits);
+  const EVP_MD *const md = MDFunc();
+  uint8_t digest_buf[EVP_MAX_MD_SIZE];
+  unsigned digest_len;
+  if (!EVP_Digest(msg.data(), msg.size(), digest_buf, &digest_len, md, NULL)) {
+    return false;
+  }
+
+  std::vector<uint8_t> sig(RSA_size(key));
+  size_t sig_len;
+  if (UsePSS) {
+    if (!RSA_sign_pss_mgf1(key, &sig_len, sig.data(), sig.size(), digest_buf,
+                           digest_len, md, md, -1)) {
+      return false;
+    }
+  } else {
+    unsigned sig_len_u;
+    if (!RSA_sign(EVP_MD_type(md), digest_buf, digest_len, sig.data(),
+                  &sig_len_u, key)) {
+      return false;
+    }
+    sig_len = sig_len_u;
+  }
+
+  sig.resize(sig_len);
+
+  return WriteReply(STDOUT_FILENO, BIGNUMBytes(RSA_get0_n(key)),
+                    BIGNUMBytes(RSA_get0_e(key)), sig);
+}
+
+template<const EVP_MD *(MDFunc)(), bool UsePSS>
+static bool RSASigVer(const Span<const uint8_t> args[]) {
+  const Span<const uint8_t> n_bytes = args[0];
+  const Span<const uint8_t> e_bytes = args[1];
+  const Span<const uint8_t> msg = args[2];
+  const Span<const uint8_t> sig = args[3];
+
+  BIGNUM *n = BN_new();
+  BIGNUM *e = BN_new();
+  bssl::UniquePtr<RSA> key(RSA_new());
+  if (!BN_bin2bn(n_bytes.data(), n_bytes.size(), n) ||
+      !BN_bin2bn(e_bytes.data(), e_bytes.size(), e) ||
+      !RSA_set0_key(key.get(), n, e, /*d=*/nullptr)) {
+    return false;
+  }
+
+  const EVP_MD *const md = MDFunc();
+  uint8_t digest_buf[EVP_MAX_MD_SIZE];
+  unsigned digest_len;
+  if (!EVP_Digest(msg.data(), msg.size(), digest_buf, &digest_len, md, NULL)) {
+    return false;
+  }
+
+  uint8_t ok;
+  if (UsePSS) {
+    ok = RSA_verify_pss_mgf1(key.get(), digest_buf, digest_len, md, md, -1,
+                             sig.data(), sig.size());
+  } else {
+    ok = RSA_verify(EVP_MD_type(md), digest_buf, digest_len, sig.data(),
+                    sig.size(), key.get());
+  }
+  ERR_clear_error();
+
+  return WriteReply(STDOUT_FILENO, Span<const uint8_t>(&ok, 1));
+}
+
+template<const EVP_MD *(MDFunc)()>
+static bool TLSKDF(const Span<const uint8_t> args[]) {
+  const Span<const uint8_t> out_len_bytes = args[0];
+  const Span<const uint8_t> secret = args[1];
+  const Span<const uint8_t> label = args[2];
+  const Span<const uint8_t> seed1 = args[3];
+  const Span<const uint8_t> seed2 = args[4];
+  const EVP_MD *md = MDFunc();
+
+  uint32_t out_len;
+  if (out_len_bytes.size() != sizeof(out_len)) {
+    return 0;
+  }
+  memcpy(&out_len, out_len_bytes.data(), sizeof(out_len));
+
+  std::vector<uint8_t> out(static_cast<size_t>(out_len));
+  if (!CRYPTO_tls1_prf(md, out.data(), out.size(), secret.data(), secret.size(),
+                       reinterpret_cast<const char *>(label.data()),
+                       label.size(), seed1.data(), seed1.size(), seed2.data(),
+                       seed2.size())) {
+    return 0;
+  }
+
+  return WriteReply(STDOUT_FILENO, out);
+}
+
+template <int Nid>
+static bool ECDH(const Span<const uint8_t> args[]) {
+  bssl::UniquePtr<BIGNUM> their_x(BytesToBIGNUM(args[0]));
+  bssl::UniquePtr<BIGNUM> their_y(BytesToBIGNUM(args[1]));
+  const Span<const uint8_t> private_key = args[2];
+
+  bssl::UniquePtr<EC_KEY> ec_key(EC_KEY_new_by_curve_name(Nid));
+  bssl::UniquePtr<BN_CTX> ctx(BN_CTX_new());
+
+  const EC_GROUP *const group = EC_KEY_get0_group(ec_key.get());
+  bssl::UniquePtr<EC_POINT> their_point(EC_POINT_new(group));
+  if (!EC_POINT_set_affine_coordinates_GFp(
+          group, their_point.get(), their_x.get(), their_y.get(), ctx.get())) {
+    fprintf(stderr, "Invalid peer point for ECDH.\n");
+    return false;
+  }
+
+  if (!private_key.empty()) {
+    bssl::UniquePtr<BIGNUM> our_k(BytesToBIGNUM(private_key));
+    if (!EC_KEY_set_private_key(ec_key.get(), our_k.get())) {
+      fprintf(stderr, "EC_KEY_set_private_key failed.\n");
+      return false;
+    }
+
+    bssl::UniquePtr<EC_POINT> our_pub(EC_POINT_new(group));
+    if (!EC_POINT_mul(group, our_pub.get(), our_k.get(), nullptr, nullptr,
+                      ctx.get()) ||
+        !EC_KEY_set_public_key(ec_key.get(), our_pub.get())) {
+      fprintf(stderr, "Calculating public key failed.\n");
+      return false;
+    }
+  } else if (!EC_KEY_generate_key_fips(ec_key.get())) {
+    fprintf(stderr, "EC_KEY_generate_key_fips failed.\n");
+    return false;
+  }
+
+  // The output buffer is one larger than |EC_MAX_BYTES| so that truncation
+  // can be detected.
+  std::vector<uint8_t> output(EC_MAX_BYTES + 1);
+  const int out_len =
+      ECDH_compute_key(output.data(), output.size(), their_point.get(),
+                       ec_key.get(), /*kdf=*/nullptr);
+  if (out_len < 0) {
+    fprintf(stderr, "ECDH_compute_key failed.\n");
+    return false;
+  } else if (static_cast<size_t>(out_len) == output.size()) {
+    fprintf(stderr, "ECDH_compute_key output may have been truncated.\n");
+    return false;
+  }
+  output.resize(static_cast<size_t>(out_len));
+
+  const EC_POINT *pub = EC_KEY_get0_public_key(ec_key.get());
+  bssl::UniquePtr<BIGNUM> x(BN_new());
+  bssl::UniquePtr<BIGNUM> y(BN_new());
+  if (!EC_POINT_get_affine_coordinates_GFp(group, pub, x.get(), y.get(),
+                                           ctx.get())) {
+    fprintf(stderr, "EC_POINT_get_affine_coordinates_GFp failed.\n");
+    return false;
+  }
+
+  return WriteReply(STDOUT_FILENO, BIGNUMBytes(x.get()), BIGNUMBytes(y.get()),
+                    output);
+}
+
+static bool FFDH(const Span<const uint8_t> args[]) {
+  bssl::UniquePtr<BIGNUM> p(BytesToBIGNUM(args[0]));
+  bssl::UniquePtr<BIGNUM> q(BytesToBIGNUM(args[1]));
+  bssl::UniquePtr<BIGNUM> g(BytesToBIGNUM(args[2]));
+  bssl::UniquePtr<BIGNUM> their_pub(BytesToBIGNUM(args[3]));
+  const Span<const uint8_t> private_key_span = args[4];
+  const Span<const uint8_t> public_key_span = args[5];
+
+  bssl::UniquePtr<DH> dh(DH_new());
+  if (!DH_set0_pqg(dh.get(), p.get(), q.get(), g.get())) {
+    fprintf(stderr, "DH_set0_pqg failed.\n");
+    return 0;
+  }
+
+  // DH_set0_pqg took ownership of these values.
+  p.release();
+  q.release();
+  g.release();
+
+  if (!private_key_span.empty()) {
+    bssl::UniquePtr<BIGNUM> private_key(BytesToBIGNUM(private_key_span));
+    bssl::UniquePtr<BIGNUM> public_key(BytesToBIGNUM(public_key_span));
+
+    if (!DH_set0_key(dh.get(), public_key.get(), private_key.get())) {
+      fprintf(stderr, "DH_set0_key failed.\n");
+      return 0;
+    }
+
+    // DH_set0_key took ownership of these values.
+    public_key.release();
+    private_key.release();
+  } else if (!DH_generate_key(dh.get())) {
+    fprintf(stderr, "DH_generate_key failed.\n");
+    return false;
+  }
+
+  std::vector<uint8_t> z(DH_size(dh.get()));
+  if (DH_compute_key_padded(z.data(), their_pub.get(), dh.get()) !=
+      static_cast<int>(z.size())) {
+    fprintf(stderr, "DH_compute_key_hashed failed.\n");
+    return false;
+  }
+
+  return WriteReply(STDOUT_FILENO, BIGNUMBytes(DH_get0_pub_key(dh.get())), z);
+}
+
 static constexpr struct {
   const char name[kMaxNameLength + 1];
   uint8_t expected_args;
@@ -575,12 +1755,27 @@
     {"SHA-1", 1, Hash<SHA1, SHA_DIGEST_LENGTH>},
     {"SHA2-224", 1, Hash<SHA224, SHA224_DIGEST_LENGTH>},
     {"SHA2-256", 1, Hash<SHA256, SHA256_DIGEST_LENGTH>},
-    {"SHA2-384", 1, Hash<SHA384, SHA256_DIGEST_LENGTH>},
+    {"SHA2-384", 1, Hash<SHA384, SHA384_DIGEST_LENGTH>},
     {"SHA2-512", 1, Hash<SHA512, SHA512_DIGEST_LENGTH>},
-    {"AES/encrypt", 2, AES<AES_set_encrypt_key, AES_encrypt>},
-    {"AES/decrypt", 2, AES<AES_set_decrypt_key, AES_decrypt>},
-    {"AES-CBC/encrypt", 3, AES_CBC<AES_set_encrypt_key, AES_ENCRYPT>},
-    {"AES-CBC/decrypt", 3, AES_CBC<AES_set_decrypt_key, AES_DECRYPT>},
+    {"SHA2-512/256", 1, Hash<SHA512_256, SHA512_256_DIGEST_LENGTH>},
+    {"AES/encrypt", 3, AES<AES_set_encrypt_key, AES_encrypt>},
+    {"AES/decrypt", 3, AES<AES_set_decrypt_key, AES_decrypt>},
+    {"AES-CBC/encrypt", 4, AES_CBC<AES_set_encrypt_key, AES_ENCRYPT>},
+    {"AES-CBC/decrypt", 4, AES_CBC<AES_set_decrypt_key, AES_DECRYPT>},
+    {"AES-CTR/encrypt", 4, AES_CTR},
+    {"AES-CTR/decrypt", 4, AES_CTR},
+    {"AES-GCM/seal", 5, AEADSeal<AESGCMSetup>},
+    {"AES-GCM/open", 5, AEADOpen<AESGCMSetup>},
+    {"AES-KW/seal", 5, AESKeyWrapSeal},
+    {"AES-KW/open", 5, AESKeyWrapOpen},
+    {"AES-KWP/seal", 5, AESPaddedKeyWrapSeal},
+    {"AES-KWP/open", 5, AESPaddedKeyWrapOpen},
+    {"AES-CCM/seal", 5, AEADSeal<AESCCMSetup>},
+    {"AES-CCM/open", 5, AEADOpen<AESCCMSetup>},
+    {"3DES-ECB/encrypt", 3, TDES<true>},
+    {"3DES-ECB/decrypt", 3, TDES<false>},
+    {"3DES-CBC/encrypt", 4, TDES_CBC<true>},
+    {"3DES-CBC/decrypt", 4, TDES_CBC<false>},
     {"HMAC-SHA-1", 2, HMAC<EVP_sha1>},
     {"HMAC-SHA2-224", 2, HMAC<EVP_sha224>},
     {"HMAC-SHA2-256", 2, HMAC<EVP_sha256>},
@@ -591,6 +1786,38 @@
     {"ECDSA/keyVer", 3, ECDSAKeyVer},
     {"ECDSA/sigGen", 4, ECDSASigGen},
     {"ECDSA/sigVer", 7, ECDSASigVer},
+    {"CMAC-AES", 3, CMAC_AES},
+    {"CMAC-AES/verify", 3, CMAC_AESVerify},
+    {"RSA/keyGen", 1, RSAKeyGen},
+    {"RSA/sigGen/SHA2-224/pkcs1v1.5", 2, RSASigGen<EVP_sha224, false>},
+    {"RSA/sigGen/SHA2-256/pkcs1v1.5", 2, RSASigGen<EVP_sha256, false>},
+    {"RSA/sigGen/SHA2-384/pkcs1v1.5", 2, RSASigGen<EVP_sha384, false>},
+    {"RSA/sigGen/SHA2-512/pkcs1v1.5", 2, RSASigGen<EVP_sha512, false>},
+    {"RSA/sigGen/SHA-1/pkcs1v1.5", 2, RSASigGen<EVP_sha1, false>},
+    {"RSA/sigGen/SHA2-224/pss", 2, RSASigGen<EVP_sha224, true>},
+    {"RSA/sigGen/SHA2-256/pss", 2, RSASigGen<EVP_sha256, true>},
+    {"RSA/sigGen/SHA2-384/pss", 2, RSASigGen<EVP_sha384, true>},
+    {"RSA/sigGen/SHA2-512/pss", 2, RSASigGen<EVP_sha512, true>},
+    {"RSA/sigGen/SHA-1/pss", 2, RSASigGen<EVP_sha1, true>},
+    {"RSA/sigVer/SHA2-224/pkcs1v1.5", 4, RSASigVer<EVP_sha224, false>},
+    {"RSA/sigVer/SHA2-256/pkcs1v1.5", 4, RSASigVer<EVP_sha256, false>},
+    {"RSA/sigVer/SHA2-384/pkcs1v1.5", 4, RSASigVer<EVP_sha384, false>},
+    {"RSA/sigVer/SHA2-512/pkcs1v1.5", 4, RSASigVer<EVP_sha512, false>},
+    {"RSA/sigVer/SHA-1/pkcs1v1.5", 4, RSASigVer<EVP_sha1, false>},
+    {"RSA/sigVer/SHA2-224/pss", 4, RSASigVer<EVP_sha224, true>},
+    {"RSA/sigVer/SHA2-256/pss", 4, RSASigVer<EVP_sha256, true>},
+    {"RSA/sigVer/SHA2-384/pss", 4, RSASigVer<EVP_sha384, true>},
+    {"RSA/sigVer/SHA2-512/pss", 4, RSASigVer<EVP_sha512, true>},
+    {"RSA/sigVer/SHA-1/pss", 4, RSASigVer<EVP_sha1, true>},
+    {"TLSKDF/1.0/SHA-1", 5, TLSKDF<EVP_md5_sha1>},
+    {"TLSKDF/1.2/SHA2-256", 5, TLSKDF<EVP_sha256>},
+    {"TLSKDF/1.2/SHA2-384", 5, TLSKDF<EVP_sha384>},
+    {"TLSKDF/1.2/SHA2-512", 5, TLSKDF<EVP_sha512>},
+    {"ECDH/P-224", 3, ECDH<NID_secp224r1>},
+    {"ECDH/P-256", 3, ECDH<NID_X9_62_prime256v1>},
+    {"ECDH/P-384", 3, ECDH<NID_secp384r1>},
+    {"ECDH/P-521", 3, ECDH<NID_secp521r1>},
+    {"FFDH", 6, FFDH},
 };
 
 int main() {
@@ -659,7 +1886,7 @@
       offset += nums[i + 1];
     }
 
-    bool found = true;
+    bool found = false;
     for (const auto &func : kFunctions) {
       if (args[0].size() == strlen(func.name) &&
           memcmp(args[0].data(), func.name, args[0].size()) == 0) {
@@ -671,6 +1898,7 @@
         }
 
         if (!func.handler(&args[1])) {
+          fprintf(stderr, "\'%s\' operation failed.\n", func.name);
           return 4;
         }
 
diff --git a/deps/boringssl/src/util/fipstools/cavp/test_fips.c b/deps/boringssl/src/util/fipstools/cavp/test_fips.c
index 66359c5..dd82d65 100644
--- a/deps/boringssl/src/util/fipstools/cavp/test_fips.c
+++ b/deps/boringssl/src/util/fipstools/cavp/test_fips.c
@@ -22,6 +22,7 @@
 #include <openssl/bn.h>
 #include <openssl/crypto.h>
 #include <openssl/des.h>
+#include <openssl/dh.h>
 #include <openssl/ecdsa.h>
 #include <openssl/ec_key.h>
 #include <openssl/hmac.h>
@@ -30,6 +31,7 @@
 #include <openssl/sha.h>
 
 #include "../crypto/fipsmodule/rand/internal.h"
+#include "../crypto/fipsmodule/tls/internal.h"
 #include "../crypto/internal.h"
 
 
@@ -217,6 +219,24 @@
     goto err;
   }
 
+  /* Primitive Z Computation */
+  const EC_GROUP *const ec_group = EC_KEY_get0_group(ec_key);
+  EC_POINT *z_point = EC_POINT_new(ec_group);
+  uint8_t z_result[65];
+  printf("About to compute key-agreement Z with P-256:\n");
+  if (!EC_POINT_mul(ec_group, z_point, NULL, EC_KEY_get0_public_key(ec_key),
+                    EC_KEY_get0_private_key(ec_key), NULL) ||
+      EC_POINT_point2oct(ec_group, z_point, POINT_CONVERSION_UNCOMPRESSED,
+                         z_result, sizeof(z_result),
+                         NULL) != sizeof(z_result)) {
+    fprintf(stderr, "EC_POINT_mul failed.\n");
+    goto err;
+  }
+  EC_POINT_free(z_point);
+
+  printf("  got ");
+  hexdump(z_result, sizeof(z_result));
+
   /* ECDSA Sign/Verify PWCT */
   printf("About to ECDSA sign ");
   hexdump(kPlaintextSHA256, sizeof(kPlaintextSHA256));
@@ -250,6 +270,36 @@
   hexdump(output, sizeof(output));
   CTR_DRBG_clear(&drbg);
 
+  /* TLS KDF */
+  printf("About to run TLS KDF\n");
+  uint8_t tls_output[32];
+  if (!CRYPTO_tls1_prf(EVP_sha256(), tls_output, sizeof(tls_output), kAESKey,
+                       sizeof(kAESKey), "foo", 3, kPlaintextSHA256,
+                       sizeof(kPlaintextSHA256), kPlaintextSHA256,
+                       sizeof(kPlaintextSHA256))) {
+    fprintf(stderr, "TLS KDF failed.\n");
+    goto err;
+  }
+  printf("  got ");
+  hexdump(tls_output, sizeof(tls_output));
+
+  /* FFDH */
+  printf("About to compute FFDH key-agreement:\n");
+  DH *dh = DH_get_rfc7919_2048();
+  uint8_t dh_result[2048/8];
+  if (!dh ||
+      !DH_generate_key(dh) ||
+      sizeof(dh_result) != DH_size(dh) ||
+      DH_compute_key_padded(dh_result, DH_get0_pub_key(dh), dh) !=
+          sizeof(dh_result)) {
+    fprintf(stderr, "FFDH failed.\n");
+    goto err;
+  }
+  DH_free(dh);
+
+  printf("  got ");
+  hexdump(dh_result, sizeof(dh_result));
+
   printf("PASS\n");
   return 0;
 
diff --git a/deps/boringssl/win-aarch64/crypto/chacha/chacha-armv8.S b/deps/boringssl/win-aarch64/crypto/chacha/chacha-armv8.S
new file mode 100644
index 0000000..ef06f9b
--- /dev/null
+++ b/deps/boringssl/win-aarch64/crypto/chacha/chacha-armv8.S
@@ -0,0 +1,2000 @@
+// This file is generated from a similarly-named Perl script in the BoringSSL
+// source tree. Do not edit by hand.
+
+#if !defined(__has_feature)
+#define __has_feature(x) 0
+#endif
+#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
+#define OPENSSL_NO_ASM
+#endif
+
+#if !defined(OPENSSL_NO_ASM)
+#if defined(__aarch64__)
+#if defined(BORINGSSL_PREFIX)
+#include <boringssl_prefix_symbols_asm.h>
+#endif
+#include <openssl/arm_arch.h>
+
+
+
+
+.section	.rodata
+
+.align	5
+Lsigma:
+.quad	0x3320646e61707865,0x6b20657479622d32		// endian-neutral
+Lone:
+.long	1,0,0,0
+.byte	67,104,97,67,104,97,50,48,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align	2
+
+.text
+
+.globl	ChaCha20_ctr32
+
+.def ChaCha20_ctr32
+   .type 32
+.endef
+.align	5
+ChaCha20_ctr32:
+	AARCH64_VALID_CALL_TARGET
+	cbz	x2,Labort
+#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
+	adrp	x5,:pg_hi21_nc:OPENSSL_armcap_P
+#else
+	adrp	x5,OPENSSL_armcap_P
+#endif
+	cmp	x2,#192
+	b.lo	Lshort
+	ldr	w17,[x5,:lo12:OPENSSL_armcap_P]
+	tst	w17,#ARMV7_NEON
+	b.ne	ChaCha20_neon
+
+Lshort:
+	AARCH64_SIGN_LINK_REGISTER
+	stp	x29,x30,[sp,#-96]!
+	add	x29,sp,#0
+
+	adrp	x5,Lsigma
+	add	x5,x5,:lo12:Lsigma
+	stp	x19,x20,[sp,#16]
+	stp	x21,x22,[sp,#32]
+	stp	x23,x24,[sp,#48]
+	stp	x25,x26,[sp,#64]
+	stp	x27,x28,[sp,#80]
+	sub	sp,sp,#64
+
+	ldp	x22,x23,[x5]		// load sigma
+	ldp	x24,x25,[x3]		// load key
+	ldp	x26,x27,[x3,#16]
+	ldp	x28,x30,[x4]		// load counter
+#ifdef	__ARMEB__
+	ror	x24,x24,#32
+	ror	x25,x25,#32
+	ror	x26,x26,#32
+	ror	x27,x27,#32
+	ror	x28,x28,#32
+	ror	x30,x30,#32
+#endif
+
+Loop_outer:
+	mov	w5,w22			// unpack key block
+	lsr	x6,x22,#32
+	mov	w7,w23
+	lsr	x8,x23,#32
+	mov	w9,w24
+	lsr	x10,x24,#32
+	mov	w11,w25
+	lsr	x12,x25,#32
+	mov	w13,w26
+	lsr	x14,x26,#32
+	mov	w15,w27
+	lsr	x16,x27,#32
+	mov	w17,w28
+	lsr	x19,x28,#32
+	mov	w20,w30
+	lsr	x21,x30,#32
+
+	mov	x4,#10
+	subs	x2,x2,#64
+Loop:
+	sub	x4,x4,#1
+	add	w5,w5,w9
+	add	w6,w6,w10
+	add	w7,w7,w11
+	add	w8,w8,w12
+	eor	w17,w17,w5
+	eor	w19,w19,w6
+	eor	w20,w20,w7
+	eor	w21,w21,w8
+	ror	w17,w17,#16
+	ror	w19,w19,#16
+	ror	w20,w20,#16
+	ror	w21,w21,#16
+	add	w13,w13,w17
+	add	w14,w14,w19
+	add	w15,w15,w20
+	add	w16,w16,w21
+	eor	w9,w9,w13
+	eor	w10,w10,w14
+	eor	w11,w11,w15
+	eor	w12,w12,w16
+	ror	w9,w9,#20
+	ror	w10,w10,#20
+	ror	w11,w11,#20
+	ror	w12,w12,#20
+	add	w5,w5,w9
+	add	w6,w6,w10
+	add	w7,w7,w11
+	add	w8,w8,w12
+	eor	w17,w17,w5
+	eor	w19,w19,w6
+	eor	w20,w20,w7
+	eor	w21,w21,w8
+	ror	w17,w17,#24
+	ror	w19,w19,#24
+	ror	w20,w20,#24
+	ror	w21,w21,#24
+	add	w13,w13,w17
+	add	w14,w14,w19
+	add	w15,w15,w20
+	add	w16,w16,w21
+	eor	w9,w9,w13
+	eor	w10,w10,w14
+	eor	w11,w11,w15
+	eor	w12,w12,w16
+	ror	w9,w9,#25
+	ror	w10,w10,#25
+	ror	w11,w11,#25
+	ror	w12,w12,#25
+	add	w5,w5,w10
+	add	w6,w6,w11
+	add	w7,w7,w12
+	add	w8,w8,w9
+	eor	w21,w21,w5
+	eor	w17,w17,w6
+	eor	w19,w19,w7
+	eor	w20,w20,w8
+	ror	w21,w21,#16
+	ror	w17,w17,#16
+	ror	w19,w19,#16
+	ror	w20,w20,#16
+	add	w15,w15,w21
+	add	w16,w16,w17
+	add	w13,w13,w19
+	add	w14,w14,w20
+	eor	w10,w10,w15
+	eor	w11,w11,w16
+	eor	w12,w12,w13
+	eor	w9,w9,w14
+	ror	w10,w10,#20
+	ror	w11,w11,#20
+	ror	w12,w12,#20
+	ror	w9,w9,#20
+	add	w5,w5,w10
+	add	w6,w6,w11
+	add	w7,w7,w12
+	add	w8,w8,w9
+	eor	w21,w21,w5
+	eor	w17,w17,w6
+	eor	w19,w19,w7
+	eor	w20,w20,w8
+	ror	w21,w21,#24
+	ror	w17,w17,#24
+	ror	w19,w19,#24
+	ror	w20,w20,#24
+	add	w15,w15,w21
+	add	w16,w16,w17
+	add	w13,w13,w19
+	add	w14,w14,w20
+	eor	w10,w10,w15
+	eor	w11,w11,w16
+	eor	w12,w12,w13
+	eor	w9,w9,w14
+	ror	w10,w10,#25
+	ror	w11,w11,#25
+	ror	w12,w12,#25
+	ror	w9,w9,#25
+	cbnz	x4,Loop
+
+	add	w5,w5,w22		// accumulate key block
+	add	x6,x6,x22,lsr#32
+	add	w7,w7,w23
+	add	x8,x8,x23,lsr#32
+	add	w9,w9,w24
+	add	x10,x10,x24,lsr#32
+	add	w11,w11,w25
+	add	x12,x12,x25,lsr#32
+	add	w13,w13,w26
+	add	x14,x14,x26,lsr#32
+	add	w15,w15,w27
+	add	x16,x16,x27,lsr#32
+	add	w17,w17,w28
+	add	x19,x19,x28,lsr#32
+	add	w20,w20,w30
+	add	x21,x21,x30,lsr#32
+
+	b.lo	Ltail
+
+	add	x5,x5,x6,lsl#32	// pack
+	add	x7,x7,x8,lsl#32
+	ldp	x6,x8,[x1,#0]		// load input
+	add	x9,x9,x10,lsl#32
+	add	x11,x11,x12,lsl#32
+	ldp	x10,x12,[x1,#16]
+	add	x13,x13,x14,lsl#32
+	add	x15,x15,x16,lsl#32
+	ldp	x14,x16,[x1,#32]
+	add	x17,x17,x19,lsl#32
+	add	x20,x20,x21,lsl#32
+	ldp	x19,x21,[x1,#48]
+	add	x1,x1,#64
+#ifdef	__ARMEB__
+	rev	x5,x5
+	rev	x7,x7
+	rev	x9,x9
+	rev	x11,x11
+	rev	x13,x13
+	rev	x15,x15
+	rev	x17,x17
+	rev	x20,x20
+#endif
+	eor	x5,x5,x6
+	eor	x7,x7,x8
+	eor	x9,x9,x10
+	eor	x11,x11,x12
+	eor	x13,x13,x14
+	eor	x15,x15,x16
+	eor	x17,x17,x19
+	eor	x20,x20,x21
+
+	stp	x5,x7,[x0,#0]		// store output
+	add	x28,x28,#1			// increment counter
+	stp	x9,x11,[x0,#16]
+	stp	x13,x15,[x0,#32]
+	stp	x17,x20,[x0,#48]
+	add	x0,x0,#64
+
+	b.hi	Loop_outer
+
+	ldp	x19,x20,[x29,#16]
+	add	sp,sp,#64
+	ldp	x21,x22,[x29,#32]
+	ldp	x23,x24,[x29,#48]
+	ldp	x25,x26,[x29,#64]
+	ldp	x27,x28,[x29,#80]
+	ldp	x29,x30,[sp],#96
+	AARCH64_VALIDATE_LINK_REGISTER
+Labort:
+	ret
+
+.align	4
+Ltail:
+	add	x2,x2,#64
+Less_than_64:
+	sub	x0,x0,#1
+	add	x1,x1,x2
+	add	x0,x0,x2
+	add	x4,sp,x2
+	neg	x2,x2
+
+	add	x5,x5,x6,lsl#32	// pack
+	add	x7,x7,x8,lsl#32
+	add	x9,x9,x10,lsl#32
+	add	x11,x11,x12,lsl#32
+	add	x13,x13,x14,lsl#32
+	add	x15,x15,x16,lsl#32
+	add	x17,x17,x19,lsl#32
+	add	x20,x20,x21,lsl#32
+#ifdef	__ARMEB__
+	rev	x5,x5
+	rev	x7,x7
+	rev	x9,x9
+	rev	x11,x11
+	rev	x13,x13
+	rev	x15,x15
+	rev	x17,x17
+	rev	x20,x20
+#endif
+	stp	x5,x7,[sp,#0]
+	stp	x9,x11,[sp,#16]
+	stp	x13,x15,[sp,#32]
+	stp	x17,x20,[sp,#48]
+
+Loop_tail:
+	ldrb	w10,[x1,x2]
+	ldrb	w11,[x4,x2]
+	add	x2,x2,#1
+	eor	w10,w10,w11
+	strb	w10,[x0,x2]
+	cbnz	x2,Loop_tail
+
+	stp	xzr,xzr,[sp,#0]
+	stp	xzr,xzr,[sp,#16]
+	stp	xzr,xzr,[sp,#32]
+	stp	xzr,xzr,[sp,#48]
+
+	ldp	x19,x20,[x29,#16]
+	add	sp,sp,#64
+	ldp	x21,x22,[x29,#32]
+	ldp	x23,x24,[x29,#48]
+	ldp	x25,x26,[x29,#64]
+	ldp	x27,x28,[x29,#80]
+	ldp	x29,x30,[sp],#96
+	AARCH64_VALIDATE_LINK_REGISTER
+	ret
+
+
+.def ChaCha20_neon
+   .type 32
+.endef
+.align	5
+ChaCha20_neon:
+	AARCH64_SIGN_LINK_REGISTER
+	stp	x29,x30,[sp,#-96]!
+	add	x29,sp,#0
+
+	adrp	x5,Lsigma
+	add	x5,x5,:lo12:Lsigma
+	stp	x19,x20,[sp,#16]
+	stp	x21,x22,[sp,#32]
+	stp	x23,x24,[sp,#48]
+	stp	x25,x26,[sp,#64]
+	stp	x27,x28,[sp,#80]
+	cmp	x2,#512
+	b.hs	L512_or_more_neon
+
+	sub	sp,sp,#64
+
+	ldp	x22,x23,[x5]		// load sigma
+	ld1	{v24.4s},[x5],#16
+	ldp	x24,x25,[x3]		// load key
+	ldp	x26,x27,[x3,#16]
+	ld1	{v25.4s,v26.4s},[x3]
+	ldp	x28,x30,[x4]		// load counter
+	ld1	{v27.4s},[x4]
+	ld1	{v31.4s},[x5]
+#ifdef	__ARMEB__
+	rev64	v24.4s,v24.4s
+	ror	x24,x24,#32
+	ror	x25,x25,#32
+	ror	x26,x26,#32
+	ror	x27,x27,#32
+	ror	x28,x28,#32
+	ror	x30,x30,#32
+#endif
+	add	v27.4s,v27.4s,v31.4s		// += 1
+	add	v28.4s,v27.4s,v31.4s
+	add	v29.4s,v28.4s,v31.4s
+	shl	v31.4s,v31.4s,#2			// 1 -> 4
+
+Loop_outer_neon:
+	mov	w5,w22			// unpack key block
+	lsr	x6,x22,#32
+	mov	v0.16b,v24.16b
+	mov	w7,w23
+	lsr	x8,x23,#32
+	mov	v4.16b,v24.16b
+	mov	w9,w24
+	lsr	x10,x24,#32
+	mov	v16.16b,v24.16b
+	mov	w11,w25
+	mov	v1.16b,v25.16b
+	lsr	x12,x25,#32
+	mov	v5.16b,v25.16b
+	mov	w13,w26
+	mov	v17.16b,v25.16b
+	lsr	x14,x26,#32
+	mov	v3.16b,v27.16b
+	mov	w15,w27
+	mov	v7.16b,v28.16b
+	lsr	x16,x27,#32
+	mov	v19.16b,v29.16b
+	mov	w17,w28
+	mov	v2.16b,v26.16b
+	lsr	x19,x28,#32
+	mov	v6.16b,v26.16b
+	mov	w20,w30
+	mov	v18.16b,v26.16b
+	lsr	x21,x30,#32
+
+	mov	x4,#10
+	subs	x2,x2,#256
+Loop_neon:
+	sub	x4,x4,#1
+	add	v0.4s,v0.4s,v1.4s
+	add	w5,w5,w9
+	add	v4.4s,v4.4s,v5.4s
+	add	w6,w6,w10
+	add	v16.4s,v16.4s,v17.4s
+	add	w7,w7,w11
+	eor	v3.16b,v3.16b,v0.16b
+	add	w8,w8,w12
+	eor	v7.16b,v7.16b,v4.16b
+	eor	w17,w17,w5
+	eor	v19.16b,v19.16b,v16.16b
+	eor	w19,w19,w6
+	rev32	v3.8h,v3.8h
+	eor	w20,w20,w7
+	rev32	v7.8h,v7.8h
+	eor	w21,w21,w8
+	rev32	v19.8h,v19.8h
+	ror	w17,w17,#16
+	add	v2.4s,v2.4s,v3.4s
+	ror	w19,w19,#16
+	add	v6.4s,v6.4s,v7.4s
+	ror	w20,w20,#16
+	add	v18.4s,v18.4s,v19.4s
+	ror	w21,w21,#16
+	eor	v20.16b,v1.16b,v2.16b
+	add	w13,w13,w17
+	eor	v21.16b,v5.16b,v6.16b
+	add	w14,w14,w19
+	eor	v22.16b,v17.16b,v18.16b
+	add	w15,w15,w20
+	ushr	v1.4s,v20.4s,#20
+	add	w16,w16,w21
+	ushr	v5.4s,v21.4s,#20
+	eor	w9,w9,w13
+	ushr	v17.4s,v22.4s,#20
+	eor	w10,w10,w14
+	sli	v1.4s,v20.4s,#12
+	eor	w11,w11,w15
+	sli	v5.4s,v21.4s,#12
+	eor	w12,w12,w16
+	sli	v17.4s,v22.4s,#12
+	ror	w9,w9,#20
+	add	v0.4s,v0.4s,v1.4s
+	ror	w10,w10,#20
+	add	v4.4s,v4.4s,v5.4s
+	ror	w11,w11,#20
+	add	v16.4s,v16.4s,v17.4s
+	ror	w12,w12,#20
+	eor	v20.16b,v3.16b,v0.16b
+	add	w5,w5,w9
+	eor	v21.16b,v7.16b,v4.16b
+	add	w6,w6,w10
+	eor	v22.16b,v19.16b,v16.16b
+	add	w7,w7,w11
+	ushr	v3.4s,v20.4s,#24
+	add	w8,w8,w12
+	ushr	v7.4s,v21.4s,#24
+	eor	w17,w17,w5
+	ushr	v19.4s,v22.4s,#24
+	eor	w19,w19,w6
+	sli	v3.4s,v20.4s,#8
+	eor	w20,w20,w7
+	sli	v7.4s,v21.4s,#8
+	eor	w21,w21,w8
+	sli	v19.4s,v22.4s,#8
+	ror	w17,w17,#24
+	add	v2.4s,v2.4s,v3.4s
+	ror	w19,w19,#24
+	add	v6.4s,v6.4s,v7.4s
+	ror	w20,w20,#24
+	add	v18.4s,v18.4s,v19.4s
+	ror	w21,w21,#24
+	eor	v20.16b,v1.16b,v2.16b
+	add	w13,w13,w17
+	eor	v21.16b,v5.16b,v6.16b
+	add	w14,w14,w19
+	eor	v22.16b,v17.16b,v18.16b
+	add	w15,w15,w20
+	ushr	v1.4s,v20.4s,#25
+	add	w16,w16,w21
+	ushr	v5.4s,v21.4s,#25
+	eor	w9,w9,w13
+	ushr	v17.4s,v22.4s,#25
+	eor	w10,w10,w14
+	sli	v1.4s,v20.4s,#7
+	eor	w11,w11,w15
+	sli	v5.4s,v21.4s,#7
+	eor	w12,w12,w16
+	sli	v17.4s,v22.4s,#7
+	ror	w9,w9,#25
+	ext	v2.16b,v2.16b,v2.16b,#8
+	ror	w10,w10,#25
+	ext	v6.16b,v6.16b,v6.16b,#8
+	ror	w11,w11,#25
+	ext	v18.16b,v18.16b,v18.16b,#8
+	ror	w12,w12,#25
+	ext	v3.16b,v3.16b,v3.16b,#12
+	ext	v7.16b,v7.16b,v7.16b,#12
+	ext	v19.16b,v19.16b,v19.16b,#12
+	ext	v1.16b,v1.16b,v1.16b,#4
+	ext	v5.16b,v5.16b,v5.16b,#4
+	ext	v17.16b,v17.16b,v17.16b,#4
+	add	v0.4s,v0.4s,v1.4s
+	add	w5,w5,w10
+	add	v4.4s,v4.4s,v5.4s
+	add	w6,w6,w11
+	add	v16.4s,v16.4s,v17.4s
+	add	w7,w7,w12
+	eor	v3.16b,v3.16b,v0.16b
+	add	w8,w8,w9
+	eor	v7.16b,v7.16b,v4.16b
+	eor	w21,w21,w5
+	eor	v19.16b,v19.16b,v16.16b
+	eor	w17,w17,w6
+	rev32	v3.8h,v3.8h
+	eor	w19,w19,w7
+	rev32	v7.8h,v7.8h
+	eor	w20,w20,w8
+	rev32	v19.8h,v19.8h
+	ror	w21,w21,#16
+	add	v2.4s,v2.4s,v3.4s
+	ror	w17,w17,#16
+	add	v6.4s,v6.4s,v7.4s
+	ror	w19,w19,#16
+	add	v18.4s,v18.4s,v19.4s
+	ror	w20,w20,#16
+	eor	v20.16b,v1.16b,v2.16b
+	add	w15,w15,w21
+	eor	v21.16b,v5.16b,v6.16b
+	add	w16,w16,w17
+	eor	v22.16b,v17.16b,v18.16b
+	add	w13,w13,w19
+	ushr	v1.4s,v20.4s,#20
+	add	w14,w14,w20
+	ushr	v5.4s,v21.4s,#20
+	eor	w10,w10,w15
+	ushr	v17.4s,v22.4s,#20
+	eor	w11,w11,w16
+	sli	v1.4s,v20.4s,#12
+	eor	w12,w12,w13
+	sli	v5.4s,v21.4s,#12
+	eor	w9,w9,w14
+	sli	v17.4s,v22.4s,#12
+	ror	w10,w10,#20
+	add	v0.4s,v0.4s,v1.4s
+	ror	w11,w11,#20
+	add	v4.4s,v4.4s,v5.4s
+	ror	w12,w12,#20
+	add	v16.4s,v16.4s,v17.4s
+	ror	w9,w9,#20
+	eor	v20.16b,v3.16b,v0.16b
+	add	w5,w5,w10
+	eor	v21.16b,v7.16b,v4.16b
+	add	w6,w6,w11
+	eor	v22.16b,v19.16b,v16.16b
+	add	w7,w7,w12
+	ushr	v3.4s,v20.4s,#24
+	add	w8,w8,w9
+	ushr	v7.4s,v21.4s,#24
+	eor	w21,w21,w5
+	ushr	v19.4s,v22.4s,#24
+	eor	w17,w17,w6
+	sli	v3.4s,v20.4s,#8
+	eor	w19,w19,w7
+	sli	v7.4s,v21.4s,#8
+	eor	w20,w20,w8
+	sli	v19.4s,v22.4s,#8
+	ror	w21,w21,#24
+	add	v2.4s,v2.4s,v3.4s
+	ror	w17,w17,#24
+	add	v6.4s,v6.4s,v7.4s
+	ror	w19,w19,#24
+	add	v18.4s,v18.4s,v19.4s
+	ror	w20,w20,#24
+	eor	v20.16b,v1.16b,v2.16b
+	add	w15,w15,w21
+	eor	v21.16b,v5.16b,v6.16b
+	add	w16,w16,w17
+	eor	v22.16b,v17.16b,v18.16b
+	add	w13,w13,w19
+	ushr	v1.4s,v20.4s,#25
+	add	w14,w14,w20
+	ushr	v5.4s,v21.4s,#25
+	eor	w10,w10,w15
+	ushr	v17.4s,v22.4s,#25
+	eor	w11,w11,w16
+	sli	v1.4s,v20.4s,#7
+	eor	w12,w12,w13
+	sli	v5.4s,v21.4s,#7
+	eor	w9,w9,w14
+	sli	v17.4s,v22.4s,#7
+	ror	w10,w10,#25
+	ext	v2.16b,v2.16b,v2.16b,#8
+	ror	w11,w11,#25
+	ext	v6.16b,v6.16b,v6.16b,#8
+	ror	w12,w12,#25
+	ext	v18.16b,v18.16b,v18.16b,#8
+	ror	w9,w9,#25
+	ext	v3.16b,v3.16b,v3.16b,#4
+	ext	v7.16b,v7.16b,v7.16b,#4
+	ext	v19.16b,v19.16b,v19.16b,#4
+	ext	v1.16b,v1.16b,v1.16b,#12
+	ext	v5.16b,v5.16b,v5.16b,#12
+	ext	v17.16b,v17.16b,v17.16b,#12
+	cbnz	x4,Loop_neon
+
+	add	w5,w5,w22		// accumulate key block
+	add	v0.4s,v0.4s,v24.4s
+	add	x6,x6,x22,lsr#32
+	add	v4.4s,v4.4s,v24.4s
+	add	w7,w7,w23
+	add	v16.4s,v16.4s,v24.4s
+	add	x8,x8,x23,lsr#32
+	add	v2.4s,v2.4s,v26.4s
+	add	w9,w9,w24
+	add	v6.4s,v6.4s,v26.4s
+	add	x10,x10,x24,lsr#32
+	add	v18.4s,v18.4s,v26.4s
+	add	w11,w11,w25
+	add	v3.4s,v3.4s,v27.4s
+	add	x12,x12,x25,lsr#32
+	add	w13,w13,w26
+	add	v7.4s,v7.4s,v28.4s
+	add	x14,x14,x26,lsr#32
+	add	w15,w15,w27
+	add	v19.4s,v19.4s,v29.4s
+	add	x16,x16,x27,lsr#32
+	add	w17,w17,w28
+	add	v1.4s,v1.4s,v25.4s
+	add	x19,x19,x28,lsr#32
+	add	w20,w20,w30
+	add	v5.4s,v5.4s,v25.4s
+	add	x21,x21,x30,lsr#32
+	add	v17.4s,v17.4s,v25.4s
+
+	b.lo	Ltail_neon
+
+	add	x5,x5,x6,lsl#32	// pack
+	add	x7,x7,x8,lsl#32
+	ldp	x6,x8,[x1,#0]		// load input
+	add	x9,x9,x10,lsl#32
+	add	x11,x11,x12,lsl#32
+	ldp	x10,x12,[x1,#16]
+	add	x13,x13,x14,lsl#32
+	add	x15,x15,x16,lsl#32
+	ldp	x14,x16,[x1,#32]
+	add	x17,x17,x19,lsl#32
+	add	x20,x20,x21,lsl#32
+	ldp	x19,x21,[x1,#48]
+	add	x1,x1,#64
+#ifdef	__ARMEB__
+	rev	x5,x5
+	rev	x7,x7
+	rev	x9,x9
+	rev	x11,x11
+	rev	x13,x13
+	rev	x15,x15
+	rev	x17,x17
+	rev	x20,x20
+#endif
+	ld1	{v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64
+	eor	x5,x5,x6
+	eor	x7,x7,x8
+	eor	x9,x9,x10
+	eor	x11,x11,x12
+	eor	x13,x13,x14
+	eor	v0.16b,v0.16b,v20.16b
+	eor	x15,x15,x16
+	eor	v1.16b,v1.16b,v21.16b
+	eor	x17,x17,x19
+	eor	v2.16b,v2.16b,v22.16b
+	eor	x20,x20,x21
+	eor	v3.16b,v3.16b,v23.16b
+	ld1	{v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64
+
+	stp	x5,x7,[x0,#0]		// store output
+	add	x28,x28,#4			// increment counter
+	stp	x9,x11,[x0,#16]
+	add	v27.4s,v27.4s,v31.4s		// += 4
+	stp	x13,x15,[x0,#32]
+	add	v28.4s,v28.4s,v31.4s
+	stp	x17,x20,[x0,#48]
+	add	v29.4s,v29.4s,v31.4s
+	add	x0,x0,#64
+
+	st1	{v0.16b,v1.16b,v2.16b,v3.16b},[x0],#64
+	ld1	{v0.16b,v1.16b,v2.16b,v3.16b},[x1],#64
+
+	eor	v4.16b,v4.16b,v20.16b
+	eor	v5.16b,v5.16b,v21.16b
+	eor	v6.16b,v6.16b,v22.16b
+	eor	v7.16b,v7.16b,v23.16b
+	st1	{v4.16b,v5.16b,v6.16b,v7.16b},[x0],#64
+
+	eor	v16.16b,v16.16b,v0.16b
+	eor	v17.16b,v17.16b,v1.16b
+	eor	v18.16b,v18.16b,v2.16b
+	eor	v19.16b,v19.16b,v3.16b
+	st1	{v16.16b,v17.16b,v18.16b,v19.16b},[x0],#64
+
+	b.hi	Loop_outer_neon
+
+	ldp	x19,x20,[x29,#16]
+	add	sp,sp,#64
+	ldp	x21,x22,[x29,#32]
+	ldp	x23,x24,[x29,#48]
+	ldp	x25,x26,[x29,#64]
+	ldp	x27,x28,[x29,#80]
+	ldp	x29,x30,[sp],#96
+	AARCH64_VALIDATE_LINK_REGISTER
+	ret
+
+Ltail_neon:
+	add	x2,x2,#256
+	cmp	x2,#64
+	b.lo	Less_than_64
+
+	add	x5,x5,x6,lsl#32	// pack
+	add	x7,x7,x8,lsl#32
+	ldp	x6,x8,[x1,#0]		// load input
+	add	x9,x9,x10,lsl#32
+	add	x11,x11,x12,lsl#32
+	ldp	x10,x12,[x1,#16]
+	add	x13,x13,x14,lsl#32
+	add	x15,x15,x16,lsl#32
+	ldp	x14,x16,[x1,#32]
+	add	x17,x17,x19,lsl#32
+	add	x20,x20,x21,lsl#32
+	ldp	x19,x21,[x1,#48]
+	add	x1,x1,#64
+#ifdef	__ARMEB__
+	rev	x5,x5
+	rev	x7,x7
+	rev	x9,x9
+	rev	x11,x11
+	rev	x13,x13
+	rev	x15,x15
+	rev	x17,x17
+	rev	x20,x20
+#endif
+	eor	x5,x5,x6
+	eor	x7,x7,x8
+	eor	x9,x9,x10
+	eor	x11,x11,x12
+	eor	x13,x13,x14
+	eor	x15,x15,x16
+	eor	x17,x17,x19
+	eor	x20,x20,x21
+
+	stp	x5,x7,[x0,#0]		// store output
+	add	x28,x28,#4			// increment counter
+	stp	x9,x11,[x0,#16]
+	stp	x13,x15,[x0,#32]
+	stp	x17,x20,[x0,#48]
+	add	x0,x0,#64
+	b.eq	Ldone_neon
+	sub	x2,x2,#64
+	cmp	x2,#64
+	b.lo	Less_than_128
+
+	ld1	{v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64
+	eor	v0.16b,v0.16b,v20.16b
+	eor	v1.16b,v1.16b,v21.16b
+	eor	v2.16b,v2.16b,v22.16b
+	eor	v3.16b,v3.16b,v23.16b
+	st1	{v0.16b,v1.16b,v2.16b,v3.16b},[x0],#64
+	b.eq	Ldone_neon
+	sub	x2,x2,#64
+	cmp	x2,#64
+	b.lo	Less_than_192
+
+	ld1	{v20.16b,v21.16b,v22.16b,v23.16b},[x1],#64
+	eor	v4.16b,v4.16b,v20.16b
+	eor	v5.16b,v5.16b,v21.16b
+	eor	v6.16b,v6.16b,v22.16b
+	eor	v7.16b,v7.16b,v23.16b
+	st1	{v4.16b,v5.16b,v6.16b,v7.16b},[x0],#64
+	b.eq	Ldone_neon
+	sub	x2,x2,#64
+
+	st1	{v16.16b,v17.16b,v18.16b,v19.16b},[sp]
+	b	Last_neon
+
+Less_than_128:
+	st1	{v0.16b,v1.16b,v2.16b,v3.16b},[sp]
+	b	Last_neon
+Less_than_192:
+	st1	{v4.16b,v5.16b,v6.16b,v7.16b},[sp]
+	b	Last_neon
+
+.align	4
+Last_neon:
+	sub	x0,x0,#1
+	add	x1,x1,x2
+	add	x0,x0,x2
+	add	x4,sp,x2
+	neg	x2,x2
+
+Loop_tail_neon:
+	ldrb	w10,[x1,x2]
+	ldrb	w11,[x4,x2]
+	add	x2,x2,#1
+	eor	w10,w10,w11
+	strb	w10,[x0,x2]
+	cbnz	x2,Loop_tail_neon
+
+	stp	xzr,xzr,[sp,#0]
+	stp	xzr,xzr,[sp,#16]
+	stp	xzr,xzr,[sp,#32]
+	stp	xzr,xzr,[sp,#48]
+
+Ldone_neon:
+	ldp	x19,x20,[x29,#16]
+	add	sp,sp,#64
+	ldp	x21,x22,[x29,#32]
+	ldp	x23,x24,[x29,#48]
+	ldp	x25,x26,[x29,#64]
+	ldp	x27,x28,[x29,#80]
+	ldp	x29,x30,[sp],#96
+	AARCH64_VALIDATE_LINK_REGISTER
+	ret
+
+.def ChaCha20_512_neon
+   .type 32
+.endef
+.align	5
+ChaCha20_512_neon:
+	AARCH64_SIGN_LINK_REGISTER
+	stp	x29,x30,[sp,#-96]!
+	add	x29,sp,#0
+
+	adrp	x5,Lsigma
+	add	x5,x5,:lo12:Lsigma
+	stp	x19,x20,[sp,#16]
+	stp	x21,x22,[sp,#32]
+	stp	x23,x24,[sp,#48]
+	stp	x25,x26,[sp,#64]
+	stp	x27,x28,[sp,#80]
+
+L512_or_more_neon:
+	sub	sp,sp,#128+64
+
+	ldp	x22,x23,[x5]		// load sigma
+	ld1	{v24.4s},[x5],#16
+	ldp	x24,x25,[x3]		// load key
+	ldp	x26,x27,[x3,#16]
+	ld1	{v25.4s,v26.4s},[x3]
+	ldp	x28,x30,[x4]		// load counter
+	ld1	{v27.4s},[x4]
+	ld1	{v31.4s},[x5]
+#ifdef	__ARMEB__
+	rev64	v24.4s,v24.4s
+	ror	x24,x24,#32
+	ror	x25,x25,#32
+	ror	x26,x26,#32
+	ror	x27,x27,#32
+	ror	x28,x28,#32
+	ror	x30,x30,#32
+#endif
+	add	v27.4s,v27.4s,v31.4s		// += 1
+	stp	q24,q25,[sp,#0]		// off-load key block, invariant part
+	add	v27.4s,v27.4s,v31.4s		// not typo
+	str	q26,[sp,#32]
+	add	v28.4s,v27.4s,v31.4s
+	add	v29.4s,v28.4s,v31.4s
+	add	v30.4s,v29.4s,v31.4s
+	shl	v31.4s,v31.4s,#2			// 1 -> 4
+
+	stp	d8,d9,[sp,#128+0]		// meet ABI requirements
+	stp	d10,d11,[sp,#128+16]
+	stp	d12,d13,[sp,#128+32]
+	stp	d14,d15,[sp,#128+48]
+
+	sub	x2,x2,#512			// not typo
+
+Loop_outer_512_neon:
+	mov	v0.16b,v24.16b
+	mov	v4.16b,v24.16b
+	mov	v8.16b,v24.16b
+	mov	v12.16b,v24.16b
+	mov	v16.16b,v24.16b
+	mov	v20.16b,v24.16b
+	mov	v1.16b,v25.16b
+	mov	w5,w22			// unpack key block
+	mov	v5.16b,v25.16b
+	lsr	x6,x22,#32
+	mov	v9.16b,v25.16b
+	mov	w7,w23
+	mov	v13.16b,v25.16b
+	lsr	x8,x23,#32
+	mov	v17.16b,v25.16b
+	mov	w9,w24
+	mov	v21.16b,v25.16b
+	lsr	x10,x24,#32
+	mov	v3.16b,v27.16b
+	mov	w11,w25
+	mov	v7.16b,v28.16b
+	lsr	x12,x25,#32
+	mov	v11.16b,v29.16b
+	mov	w13,w26
+	mov	v15.16b,v30.16b
+	lsr	x14,x26,#32
+	mov	v2.16b,v26.16b
+	mov	w15,w27
+	mov	v6.16b,v26.16b
+	lsr	x16,x27,#32
+	add	v19.4s,v3.4s,v31.4s			// +4
+	mov	w17,w28
+	add	v23.4s,v7.4s,v31.4s			// +4
+	lsr	x19,x28,#32
+	mov	v10.16b,v26.16b
+	mov	w20,w30
+	mov	v14.16b,v26.16b
+	lsr	x21,x30,#32
+	mov	v18.16b,v26.16b
+	stp	q27,q28,[sp,#48]		// off-load key block, variable part
+	mov	v22.16b,v26.16b
+	str	q29,[sp,#80]
+
+	mov	x4,#5
+	subs	x2,x2,#512
+Loop_upper_neon:
+	sub	x4,x4,#1
+	add	v0.4s,v0.4s,v1.4s
+	add	w5,w5,w9
+	add	v4.4s,v4.4s,v5.4s
+	add	w6,w6,w10
+	add	v8.4s,v8.4s,v9.4s
+	add	w7,w7,w11
+	add	v12.4s,v12.4s,v13.4s
+	add	w8,w8,w12
+	add	v16.4s,v16.4s,v17.4s
+	eor	w17,w17,w5
+	add	v20.4s,v20.4s,v21.4s
+	eor	w19,w19,w6
+	eor	v3.16b,v3.16b,v0.16b
+	eor	w20,w20,w7
+	eor	v7.16b,v7.16b,v4.16b
+	eor	w21,w21,w8
+	eor	v11.16b,v11.16b,v8.16b
+	ror	w17,w17,#16
+	eor	v15.16b,v15.16b,v12.16b
+	ror	w19,w19,#16
+	eor	v19.16b,v19.16b,v16.16b
+	ror	w20,w20,#16
+	eor	v23.16b,v23.16b,v20.16b
+	ror	w21,w21,#16
+	rev32	v3.8h,v3.8h
+	add	w13,w13,w17
+	rev32	v7.8h,v7.8h
+	add	w14,w14,w19
+	rev32	v11.8h,v11.8h
+	add	w15,w15,w20
+	rev32	v15.8h,v15.8h
+	add	w16,w16,w21
+	rev32	v19.8h,v19.8h
+	eor	w9,w9,w13
+	rev32	v23.8h,v23.8h
+	eor	w10,w10,w14
+	add	v2.4s,v2.4s,v3.4s
+	eor	w11,w11,w15
+	add	v6.4s,v6.4s,v7.4s
+	eor	w12,w12,w16
+	add	v10.4s,v10.4s,v11.4s
+	ror	w9,w9,#20
+	add	v14.4s,v14.4s,v15.4s
+	ror	w10,w10,#20
+	add	v18.4s,v18.4s,v19.4s
+	ror	w11,w11,#20
+	add	v22.4s,v22.4s,v23.4s
+	ror	w12,w12,#20
+	eor	v24.16b,v1.16b,v2.16b
+	add	w5,w5,w9
+	eor	v25.16b,v5.16b,v6.16b
+	add	w6,w6,w10
+	eor	v26.16b,v9.16b,v10.16b
+	add	w7,w7,w11
+	eor	v27.16b,v13.16b,v14.16b
+	add	w8,w8,w12
+	eor	v28.16b,v17.16b,v18.16b
+	eor	w17,w17,w5
+	eor	v29.16b,v21.16b,v22.16b
+	eor	w19,w19,w6
+	ushr	v1.4s,v24.4s,#20
+	eor	w20,w20,w7
+	ushr	v5.4s,v25.4s,#20
+	eor	w21,w21,w8
+	ushr	v9.4s,v26.4s,#20
+	ror	w17,w17,#24
+	ushr	v13.4s,v27.4s,#20
+	ror	w19,w19,#24
+	ushr	v17.4s,v28.4s,#20
+	ror	w20,w20,#24
+	ushr	v21.4s,v29.4s,#20
+	ror	w21,w21,#24
+	sli	v1.4s,v24.4s,#12
+	add	w13,w13,w17
+	sli	v5.4s,v25.4s,#12
+	add	w14,w14,w19
+	sli	v9.4s,v26.4s,#12
+	add	w15,w15,w20
+	sli	v13.4s,v27.4s,#12
+	add	w16,w16,w21
+	sli	v17.4s,v28.4s,#12
+	eor	w9,w9,w13
+	sli	v21.4s,v29.4s,#12
+	eor	w10,w10,w14
+	add	v0.4s,v0.4s,v1.4s
+	eor	w11,w11,w15
+	add	v4.4s,v4.4s,v5.4s
+	eor	w12,w12,w16
+	add	v8.4s,v8.4s,v9.4s
+	ror	w9,w9,#25
+	add	v12.4s,v12.4s,v13.4s
+	ror	w10,w10,#25
+	add	v16.4s,v16.4s,v17.4s
+	ror	w11,w11,#25
+	add	v20.4s,v20.4s,v21.4s
+	ror	w12,w12,#25
+	eor	v24.16b,v3.16b,v0.16b
+	add	w5,w5,w10
+	eor	v25.16b,v7.16b,v4.16b
+	add	w6,w6,w11
+	eor	v26.16b,v11.16b,v8.16b
+	add	w7,w7,w12
+	eor	v27.16b,v15.16b,v12.16b
+	add	w8,w8,w9
+	eor	v28.16b,v19.16b,v16.16b
+	eor	w21,w21,w5
+	eor	v29.16b,v23.16b,v20.16b
+	eor	w17,w17,w6
+	ushr	v3.4s,v24.4s,#24
+	eor	w19,w19,w7
+	ushr	v7.4s,v25.4s,#24
+	eor	w20,w20,w8
+	ushr	v11.4s,v26.4s,#24
+	ror	w21,w21,#16
+	ushr	v15.4s,v27.4s,#24
+	ror	w17,w17,#16
+	ushr	v19.4s,v28.4s,#24
+	ror	w19,w19,#16
+	ushr	v23.4s,v29.4s,#24
+	ror	w20,w20,#16
+	sli	v3.4s,v24.4s,#8
+	add	w15,w15,w21
+	sli	v7.4s,v25.4s,#8
+	add	w16,w16,w17
+	sli	v11.4s,v26.4s,#8
+	add	w13,w13,w19
+	sli	v15.4s,v27.4s,#8
+	add	w14,w14,w20
+	sli	v19.4s,v28.4s,#8
+	eor	w10,w10,w15
+	sli	v23.4s,v29.4s,#8
+	eor	w11,w11,w16
+	add	v2.4s,v2.4s,v3.4s
+	eor	w12,w12,w13
+	add	v6.4s,v6.4s,v7.4s
+	eor	w9,w9,w14
+	add	v10.4s,v10.4s,v11.4s
+	ror	w10,w10,#20
+	add	v14.4s,v14.4s,v15.4s
+	ror	w11,w11,#20
+	add	v18.4s,v18.4s,v19.4s
+	ror	w12,w12,#20
+	add	v22.4s,v22.4s,v23.4s
+	ror	w9,w9,#20
+	eor	v24.16b,v1.16b,v2.16b
+	add	w5,w5,w10
+	eor	v25.16b,v5.16b,v6.16b
+	add	w6,w6,w11
+	eor	v26.16b,v9.16b,v10.16b
+	add	w7,w7,w12
+	eor	v27.16b,v13.16b,v14.16b
+	add	w8,w8,w9
+	eor	v28.16b,v17.16b,v18.16b
+	eor	w21,w21,w5
+	eor	v29.16b,v21.16b,v22.16b
+	eor	w17,w17,w6
+	ushr	v1.4s,v24.4s,#25
+	eor	w19,w19,w7
+	ushr	v5.4s,v25.4s,#25
+	eor	w20,w20,w8
+	ushr	v9.4s,v26.4s,#25
+	ror	w21,w21,#24
+	ushr	v13.4s,v27.4s,#25
+	ror	w17,w17,#24
+	ushr	v17.4s,v28.4s,#25
+	ror	w19,w19,#24
+	ushr	v21.4s,v29.4s,#25
+	ror	w20,w20,#24
+	sli	v1.4s,v24.4s,#7
+	add	w15,w15,w21
+	sli	v5.4s,v25.4s,#7
+	add	w16,w16,w17
+	sli	v9.4s,v26.4s,#7
+	add	w13,w13,w19
+	sli	v13.4s,v27.4s,#7
+	add	w14,w14,w20
+	sli	v17.4s,v28.4s,#7
+	eor	w10,w10,w15
+	sli	v21.4s,v29.4s,#7
+	eor	w11,w11,w16
+	ext	v2.16b,v2.16b,v2.16b,#8
+	eor	w12,w12,w13
+	ext	v6.16b,v6.16b,v6.16b,#8
+	eor	w9,w9,w14
+	ext	v10.16b,v10.16b,v10.16b,#8
+	ror	w10,w10,#25
+	ext	v14.16b,v14.16b,v14.16b,#8
+	ror	w11,w11,#25
+	ext	v18.16b,v18.16b,v18.16b,#8
+	ror	w12,w12,#25
+	ext	v22.16b,v22.16b,v22.16b,#8
+	ror	w9,w9,#25
+	ext	v3.16b,v3.16b,v3.16b,#12
+	ext	v7.16b,v7.16b,v7.16b,#12
+	ext	v11.16b,v11.16b,v11.16b,#12
+	ext	v15.16b,v15.16b,v15.16b,#12
+	ext	v19.16b,v19.16b,v19.16b,#12
+	ext	v23.16b,v23.16b,v23.16b,#12
+	ext	v1.16b,v1.16b,v1.16b,#4
+	ext	v5.16b,v5.16b,v5.16b,#4
+	ext	v9.16b,v9.16b,v9.16b,#4
+	ext	v13.16b,v13.16b,v13.16b,#4
+	ext	v17.16b,v17.16b,v17.16b,#4
+	ext	v21.16b,v21.16b,v21.16b,#4
+	add	v0.4s,v0.4s,v1.4s
+	add	w5,w5,w9
+	add	v4.4s,v4.4s,v5.4s
+	add	w6,w6,w10
+	add	v8.4s,v8.4s,v9.4s
+	add	w7,w7,w11
+	add	v12.4s,v12.4s,v13.4s
+	add	w8,w8,w12
+	add	v16.4s,v16.4s,v17.4s
+	eor	w17,w17,w5
+	add	v20.4s,v20.4s,v21.4s
+	eor	w19,w19,w6
+	eor	v3.16b,v3.16b,v0.16b
+	eor	w20,w20,w7
+	eor	v7.16b,v7.16b,v4.16b
+	eor	w21,w21,w8
+	eor	v11.16b,v11.16b,v8.16b
+	ror	w17,w17,#16
+	eor	v15.16b,v15.16b,v12.16b
+	ror	w19,w19,#16
+	eor	v19.16b,v19.16b,v16.16b
+	ror	w20,w20,#16
+	eor	v23.16b,v23.16b,v20.16b
+	ror	w21,w21,#16
+	rev32	v3.8h,v3.8h
+	add	w13,w13,w17
+	rev32	v7.8h,v7.8h
+	add	w14,w14,w19
+	rev32	v11.8h,v11.8h
+	add	w15,w15,w20
+	rev32	v15.8h,v15.8h
+	add	w16,w16,w21
+	rev32	v19.8h,v19.8h
+	eor	w9,w9,w13
+	rev32	v23.8h,v23.8h
+	eor	w10,w10,w14
+	add	v2.4s,v2.4s,v3.4s
+	eor	w11,w11,w15
+	add	v6.4s,v6.4s,v7.4s
+	eor	w12,w12,w16
+	add	v10.4s,v10.4s,v11.4s
+	ror	w9,w9,#20
+	add	v14.4s,v14.4s,v15.4s
+	ror	w10,w10,#20
+	add	v18.4s,v18.4s,v19.4s
+	ror	w11,w11,#20
+	add	v22.4s,v22.4s,v23.4s
+	ror	w12,w12,#20
+	eor	v24.16b,v1.16b,v2.16b
+	add	w5,w5,w9
+	eor	v25.16b,v5.16b,v6.16b
+	add	w6,w6,w10
+	eor	v26.16b,v9.16b,v10.16b
+	add	w7,w7,w11
+	eor	v27.16b,v13.16b,v14.16b
+	add	w8,w8,w12
+	eor	v28.16b,v17.16b,v18.16b
+	eor	w17,w17,w5
+	eor	v29.16b,v21.16b,v22.16b
+	eor	w19,w19,w6
+	ushr	v1.4s,v24.4s,#20
+	eor	w20,w20,w7
+	ushr	v5.4s,v25.4s,#20
+	eor	w21,w21,w8
+	ushr	v9.4s,v26.4s,#20
+	ror	w17,w17,#24
+	ushr	v13.4s,v27.4s,#20
+	ror	w19,w19,#24
+	ushr	v17.4s,v28.4s,#20
+	ror	w20,w20,#24
+	ushr	v21.4s,v29.4s,#20
+	ror	w21,w21,#24
+	sli	v1.4s,v24.4s,#12
+	add	w13,w13,w17
+	sli	v5.4s,v25.4s,#12
+	add	w14,w14,w19
+	sli	v9.4s,v26.4s,#12
+	add	w15,w15,w20
+	sli	v13.4s,v27.4s,#12
+	add	w16,w16,w21
+	sli	v17.4s,v28.4s,#12
+	eor	w9,w9,w13
+	sli	v21.4s,v29.4s,#12
+	eor	w10,w10,w14
+	add	v0.4s,v0.4s,v1.4s
+	eor	w11,w11,w15
+	add	v4.4s,v4.4s,v5.4s
+	eor	w12,w12,w16
+	add	v8.4s,v8.4s,v9.4s
+	ror	w9,w9,#25
+	add	v12.4s,v12.4s,v13.4s
+	ror	w10,w10,#25
+	add	v16.4s,v16.4s,v17.4s
+	ror	w11,w11,#25
+	add	v20.4s,v20.4s,v21.4s
+	ror	w12,w12,#25
+	eor	v24.16b,v3.16b,v0.16b
+	add	w5,w5,w10
+	eor	v25.16b,v7.16b,v4.16b
+	add	w6,w6,w11
+	eor	v26.16b,v11.16b,v8.16b
+	add	w7,w7,w12
+	eor	v27.16b,v15.16b,v12.16b
+	add	w8,w8,w9
+	eor	v28.16b,v19.16b,v16.16b
+	eor	w21,w21,w5
+	eor	v29.16b,v23.16b,v20.16b
+	eor	w17,w17,w6
+	ushr	v3.4s,v24.4s,#24
+	eor	w19,w19,w7
+	ushr	v7.4s,v25.4s,#24
+	eor	w20,w20,w8
+	ushr	v11.4s,v26.4s,#24
+	ror	w21,w21,#16
+	ushr	v15.4s,v27.4s,#24
+	ror	w17,w17,#16
+	ushr	v19.4s,v28.4s,#24
+	ror	w19,w19,#16
+	ushr	v23.4s,v29.4s,#24
+	ror	w20,w20,#16
+	sli	v3.4s,v24.4s,#8
+	add	w15,w15,w21
+	sli	v7.4s,v25.4s,#8
+	add	w16,w16,w17
+	sli	v11.4s,v26.4s,#8
+	add	w13,w13,w19
+	sli	v15.4s,v27.4s,#8
+	add	w14,w14,w20
+	sli	v19.4s,v28.4s,#8
+	eor	w10,w10,w15
+	sli	v23.4s,v29.4s,#8
+	eor	w11,w11,w16
+	add	v2.4s,v2.4s,v3.4s
+	eor	w12,w12,w13
+	add	v6.4s,v6.4s,v7.4s
+	eor	w9,w9,w14
+	add	v10.4s,v10.4s,v11.4s
+	ror	w10,w10,#20
+	add	v14.4s,v14.4s,v15.4s
+	ror	w11,w11,#20
+	add	v18.4s,v18.4s,v19.4s
+	ror	w12,w12,#20
+	add	v22.4s,v22.4s,v23.4s
+	ror	w9,w9,#20
+	eor	v24.16b,v1.16b,v2.16b
+	add	w5,w5,w10
+	eor	v25.16b,v5.16b,v6.16b
+	add	w6,w6,w11
+	eor	v26.16b,v9.16b,v10.16b
+	add	w7,w7,w12
+	eor	v27.16b,v13.16b,v14.16b
+	add	w8,w8,w9
+	eor	v28.16b,v17.16b,v18.16b
+	eor	w21,w21,w5
+	eor	v29.16b,v21.16b,v22.16b
+	eor	w17,w17,w6
+	ushr	v1.4s,v24.4s,#25
+	eor	w19,w19,w7
+	ushr	v5.4s,v25.4s,#25
+	eor	w20,w20,w8
+	ushr	v9.4s,v26.4s,#25
+	ror	w21,w21,#24
+	ushr	v13.4s,v27.4s,#25
+	ror	w17,w17,#24
+	ushr	v17.4s,v28.4s,#25
+	ror	w19,w19,#24
+	ushr	v21.4s,v29.4s,#25
+	ror	w20,w20,#24
+	sli	v1.4s,v24.4s,#7
+	add	w15,w15,w21
+	sli	v5.4s,v25.4s,#7
+	add	w16,w16,w17
+	sli	v9.4s,v26.4s,#7
+	add	w13,w13,w19
+	sli	v13.4s,v27.4s,#7
+	add	w14,w14,w20
+	sli	v17.4s,v28.4s,#7
+	eor	w10,w10,w15
+	sli	v21.4s,v29.4s,#7
+	eor	w11,w11,w16
+	ext	v2.16b,v2.16b,v2.16b,#8
+	eor	w12,w12,w13
+	ext	v6.16b,v6.16b,v6.16b,#8
+	eor	w9,w9,w14
+	ext	v10.16b,v10.16b,v10.16b,#8
+	ror	w10,w10,#25
+	ext	v14.16b,v14.16b,v14.16b,#8
+	ror	w11,w11,#25
+	ext	v18.16b,v18.16b,v18.16b,#8
+	ror	w12,w12,#25
+	ext	v22.16b,v22.16b,v22.16b,#8
+	ror	w9,w9,#25
+	ext	v3.16b,v3.16b,v3.16b,#4
+	ext	v7.16b,v7.16b,v7.16b,#4
+	ext	v11.16b,v11.16b,v11.16b,#4
+	ext	v15.16b,v15.16b,v15.16b,#4
+	ext	v19.16b,v19.16b,v19.16b,#4
+	ext	v23.16b,v23.16b,v23.16b,#4
+	ext	v1.16b,v1.16b,v1.16b,#12
+	ext	v5.16b,v5.16b,v5.16b,#12
+	ext	v9.16b,v9.16b,v9.16b,#12
+	ext	v13.16b,v13.16b,v13.16b,#12
+	ext	v17.16b,v17.16b,v17.16b,#12
+	ext	v21.16b,v21.16b,v21.16b,#12
+	cbnz	x4,Loop_upper_neon
+
+	add	w5,w5,w22		// accumulate key block
+	add	x6,x6,x22,lsr#32
+	add	w7,w7,w23
+	add	x8,x8,x23,lsr#32
+	add	w9,w9,w24
+	add	x10,x10,x24,lsr#32
+	add	w11,w11,w25
+	add	x12,x12,x25,lsr#32
+	add	w13,w13,w26
+	add	x14,x14,x26,lsr#32
+	add	w15,w15,w27
+	add	x16,x16,x27,lsr#32
+	add	w17,w17,w28
+	add	x19,x19,x28,lsr#32
+	add	w20,w20,w30
+	add	x21,x21,x30,lsr#32
+
+	add	x5,x5,x6,lsl#32	// pack
+	add	x7,x7,x8,lsl#32
+	ldp	x6,x8,[x1,#0]		// load input
+	add	x9,x9,x10,lsl#32
+	add	x11,x11,x12,lsl#32
+	ldp	x10,x12,[x1,#16]
+	add	x13,x13,x14,lsl#32
+	add	x15,x15,x16,lsl#32
+	ldp	x14,x16,[x1,#32]
+	add	x17,x17,x19,lsl#32
+	add	x20,x20,x21,lsl#32
+	ldp	x19,x21,[x1,#48]
+	add	x1,x1,#64
+#ifdef	__ARMEB__
+	rev	x5,x5
+	rev	x7,x7
+	rev	x9,x9
+	rev	x11,x11
+	rev	x13,x13
+	rev	x15,x15
+	rev	x17,x17
+	rev	x20,x20
+#endif
+	eor	x5,x5,x6
+	eor	x7,x7,x8
+	eor	x9,x9,x10
+	eor	x11,x11,x12
+	eor	x13,x13,x14
+	eor	x15,x15,x16
+	eor	x17,x17,x19
+	eor	x20,x20,x21
+
+	stp	x5,x7,[x0,#0]		// store output
+	add	x28,x28,#1			// increment counter
+	mov	w5,w22			// unpack key block
+	lsr	x6,x22,#32
+	stp	x9,x11,[x0,#16]
+	mov	w7,w23
+	lsr	x8,x23,#32
+	stp	x13,x15,[x0,#32]
+	mov	w9,w24
+	lsr	x10,x24,#32
+	stp	x17,x20,[x0,#48]
+	add	x0,x0,#64
+	mov	w11,w25
+	lsr	x12,x25,#32
+	mov	w13,w26
+	lsr	x14,x26,#32
+	mov	w15,w27
+	lsr	x16,x27,#32
+	mov	w17,w28
+	lsr	x19,x28,#32
+	mov	w20,w30
+	lsr	x21,x30,#32
+
+	mov	x4,#5
+Loop_lower_neon:
+	sub	x4,x4,#1
+	add	v0.4s,v0.4s,v1.4s
+	add	w5,w5,w9
+	add	v4.4s,v4.4s,v5.4s
+	add	w6,w6,w10
+	add	v8.4s,v8.4s,v9.4s
+	add	w7,w7,w11
+	add	v12.4s,v12.4s,v13.4s
+	add	w8,w8,w12
+	add	v16.4s,v16.4s,v17.4s
+	eor	w17,w17,w5
+	add	v20.4s,v20.4s,v21.4s
+	eor	w19,w19,w6
+	eor	v3.16b,v3.16b,v0.16b
+	eor	w20,w20,w7
+	eor	v7.16b,v7.16b,v4.16b
+	eor	w21,w21,w8
+	eor	v11.16b,v11.16b,v8.16b
+	ror	w17,w17,#16
+	eor	v15.16b,v15.16b,v12.16b
+	ror	w19,w19,#16
+	eor	v19.16b,v19.16b,v16.16b
+	ror	w20,w20,#16
+	eor	v23.16b,v23.16b,v20.16b
+	ror	w21,w21,#16
+	rev32	v3.8h,v3.8h
+	add	w13,w13,w17
+	rev32	v7.8h,v7.8h
+	add	w14,w14,w19
+	rev32	v11.8h,v11.8h
+	add	w15,w15,w20
+	rev32	v15.8h,v15.8h
+	add	w16,w16,w21
+	rev32	v19.8h,v19.8h
+	eor	w9,w9,w13
+	rev32	v23.8h,v23.8h
+	eor	w10,w10,w14
+	add	v2.4s,v2.4s,v3.4s
+	eor	w11,w11,w15
+	add	v6.4s,v6.4s,v7.4s
+	eor	w12,w12,w16
+	add	v10.4s,v10.4s,v11.4s
+	ror	w9,w9,#20
+	add	v14.4s,v14.4s,v15.4s
+	ror	w10,w10,#20
+	add	v18.4s,v18.4s,v19.4s
+	ror	w11,w11,#20
+	add	v22.4s,v22.4s,v23.4s
+	ror	w12,w12,#20
+	eor	v24.16b,v1.16b,v2.16b
+	add	w5,w5,w9
+	eor	v25.16b,v5.16b,v6.16b
+	add	w6,w6,w10
+	eor	v26.16b,v9.16b,v10.16b
+	add	w7,w7,w11
+	eor	v27.16b,v13.16b,v14.16b
+	add	w8,w8,w12
+	eor	v28.16b,v17.16b,v18.16b
+	eor	w17,w17,w5
+	eor	v29.16b,v21.16b,v22.16b
+	eor	w19,w19,w6
+	ushr	v1.4s,v24.4s,#20
+	eor	w20,w20,w7
+	ushr	v5.4s,v25.4s,#20
+	eor	w21,w21,w8
+	ushr	v9.4s,v26.4s,#20
+	ror	w17,w17,#24
+	ushr	v13.4s,v27.4s,#20
+	ror	w19,w19,#24
+	ushr	v17.4s,v28.4s,#20
+	ror	w20,w20,#24
+	ushr	v21.4s,v29.4s,#20
+	ror	w21,w21,#24
+	sli	v1.4s,v24.4s,#12
+	add	w13,w13,w17
+	sli	v5.4s,v25.4s,#12
+	add	w14,w14,w19
+	sli	v9.4s,v26.4s,#12
+	add	w15,w15,w20
+	sli	v13.4s,v27.4s,#12
+	add	w16,w16,w21
+	sli	v17.4s,v28.4s,#12
+	eor	w9,w9,w13
+	sli	v21.4s,v29.4s,#12
+	eor	w10,w10,w14
+	add	v0.4s,v0.4s,v1.4s
+	eor	w11,w11,w15
+	add	v4.4s,v4.4s,v5.4s
+	eor	w12,w12,w16
+	add	v8.4s,v8.4s,v9.4s
+	ror	w9,w9,#25
+	add	v12.4s,v12.4s,v13.4s
+	ror	w10,w10,#25
+	add	v16.4s,v16.4s,v17.4s
+	ror	w11,w11,#25
+	add	v20.4s,v20.4s,v21.4s
+	ror	w12,w12,#25
+	eor	v24.16b,v3.16b,v0.16b
+	add	w5,w5,w10
+	eor	v25.16b,v7.16b,v4.16b
+	add	w6,w6,w11
+	eor	v26.16b,v11.16b,v8.16b
+	add	w7,w7,w12
+	eor	v27.16b,v15.16b,v12.16b
+	add	w8,w8,w9
+	eor	v28.16b,v19.16b,v16.16b
+	eor	w21,w21,w5
+	eor	v29.16b,v23.16b,v20.16b
+	eor	w17,w17,w6
+	ushr	v3.4s,v24.4s,#24
+	eor	w19,w19,w7
+	ushr	v7.4s,v25.4s,#24
+	eor	w20,w20,w8
+	ushr	v11.4s,v26.4s,#24
+	ror	w21,w21,#16
+	ushr	v15.4s,v27.4s,#24
+	ror	w17,w17,#16
+	ushr	v19.4s,v28.4s,#24
+	ror	w19,w19,#16
+	ushr	v23.4s,v29.4s,#24
+	ror	w20,w20,#16
+	sli	v3.4s,v24.4s,#8
+	add	w15,w15,w21
+	sli	v7.4s,v25.4s,#8
+	add	w16,w16,w17
+	sli	v11.4s,v26.4s,#8
+	add	w13,w13,w19
+	sli	v15.4s,v27.4s,#8
+	add	w14,w14,w20
+	sli	v19.4s,v28.4s,#8
+	eor	w10,w10,w15
+	sli	v23.4s,v29.4s,#8
+	eor	w11,w11,w16
+	add	v2.4s,v2.4s,v3.4s
+	eor	w12,w12,w13
+	add	v6.4s,v6.4s,v7.4s
+	eor	w9,w9,w14
+	add	v10.4s,v10.4s,v11.4s
+	ror	w10,w10,#20
+	add	v14.4s,v14.4s,v15.4s
+	ror	w11,w11,#20
+	add	v18.4s,v18.4s,v19.4s
+	ror	w12,w12,#20
+	add	v22.4s,v22.4s,v23.4s
+	ror	w9,w9,#20
+	eor	v24.16b,v1.16b,v2.16b
+	add	w5,w5,w10
+	eor	v25.16b,v5.16b,v6.16b
+	add	w6,w6,w11
+	eor	v26.16b,v9.16b,v10.16b
+	add	w7,w7,w12
+	eor	v27.16b,v13.16b,v14.16b
+	add	w8,w8,w9
+	eor	v28.16b,v17.16b,v18.16b
+	eor	w21,w21,w5
+	eor	v29.16b,v21.16b,v22.16b
+	eor	w17,w17,w6
+	ushr	v1.4s,v24.4s,#25
+	eor	w19,w19,w7
+	ushr	v5.4s,v25.4s,#25
+	eor	w20,w20,w8
+	ushr	v9.4s,v26.4s,#25
+	ror	w21,w21,#24
+	ushr	v13.4s,v27.4s,#25
+	ror	w17,w17,#24
+	ushr	v17.4s,v28.4s,#25
+	ror	w19,w19,#24
+	ushr	v21.4s,v29.4s,#25
+	ror	w20,w20,#24
+	sli	v1.4s,v24.4s,#7
+	add	w15,w15,w21
+	sli	v5.4s,v25.4s,#7
+	add	w16,w16,w17
+	sli	v9.4s,v26.4s,#7
+	add	w13,w13,w19
+	sli	v13.4s,v27.4s,#7
+	add	w14,w14,w20
+	sli	v17.4s,v28.4s,#7
+	eor	w10,w10,w15
+	sli	v21.4s,v29.4s,#7
+	eor	w11,w11,w16
+	ext	v2.16b,v2.16b,v2.16b,#8
+	eor	w12,w12,w13
+	ext	v6.16b,v6.16b,v6.16b,#8
+	eor	w9,w9,w14
+	ext	v10.16b,v10.16b,v10.16b,#8
+	ror	w10,w10,#25
+	ext	v14.16b,v14.16b,v14.16b,#8
+	ror	w11,w11,#25
+	ext	v18.16b,v18.16b,v18.16b,#8
+	ror	w12,w12,#25
+	ext	v22.16b,v22.16b,v22.16b,#8
+	ror	w9,w9,#25
+	ext	v3.16b,v3.16b,v3.16b,#12
+	ext	v7.16b,v7.16b,v7.16b,#12
+	ext	v11.16b,v11.16b,v11.16b,#12
+	ext	v15.16b,v15.16b,v15.16b,#12
+	ext	v19.16b,v19.16b,v19.16b,#12
+	ext	v23.16b,v23.16b,v23.16b,#12
+	ext	v1.16b,v1.16b,v1.16b,#4
+	ext	v5.16b,v5.16b,v5.16b,#4
+	ext	v9.16b,v9.16b,v9.16b,#4
+	ext	v13.16b,v13.16b,v13.16b,#4
+	ext	v17.16b,v17.16b,v17.16b,#4
+	ext	v21.16b,v21.16b,v21.16b,#4
+	add	v0.4s,v0.4s,v1.4s
+	add	w5,w5,w9
+	add	v4.4s,v4.4s,v5.4s
+	add	w6,w6,w10
+	add	v8.4s,v8.4s,v9.4s
+	add	w7,w7,w11
+	add	v12.4s,v12.4s,v13.4s
+	add	w8,w8,w12
+	add	v16.4s,v16.4s,v17.4s
+	eor	w17,w17,w5
+	add	v20.4s,v20.4s,v21.4s
+	eor	w19,w19,w6
+	eor	v3.16b,v3.16b,v0.16b
+	eor	w20,w20,w7
+	eor	v7.16b,v7.16b,v4.16b
+	eor	w21,w21,w8
+	eor	v11.16b,v11.16b,v8.16b
+	ror	w17,w17,#16
+	eor	v15.16b,v15.16b,v12.16b
+	ror	w19,w19,#16
+	eor	v19.16b,v19.16b,v16.16b
+	ror	w20,w20,#16
+	eor	v23.16b,v23.16b,v20.16b
+	ror	w21,w21,#16
+	rev32	v3.8h,v3.8h
+	add	w13,w13,w17
+	rev32	v7.8h,v7.8h
+	add	w14,w14,w19
+	rev32	v11.8h,v11.8h
+	add	w15,w15,w20
+	rev32	v15.8h,v15.8h
+	add	w16,w16,w21
+	rev32	v19.8h,v19.8h
+	eor	w9,w9,w13
+	rev32	v23.8h,v23.8h
+	eor	w10,w10,w14
+	add	v2.4s,v2.4s,v3.4s
+	eor	w11,w11,w15
+	add	v6.4s,v6.4s,v7.4s
+	eor	w12,w12,w16
+	add	v10.4s,v10.4s,v11.4s
+	ror	w9,w9,#20
+	add	v14.4s,v14.4s,v15.4s
+	ror	w10,w10,#20
+	add	v18.4s,v18.4s,v19.4s
+	ror	w11,w11,#20
+	add	v22.4s,v22.4s,v23.4s
+	ror	w12,w12,#20
+	eor	v24.16b,v1.16b,v2.16b
+	add	w5,w5,w9
+	eor	v25.16b,v5.16b,v6.16b
+	add	w6,w6,w10
+	eor	v26.16b,v9.16b,v10.16b
+	add	w7,w7,w11
+	eor	v27.16b,v13.16b,v14.16b
+	add	w8,w8,w12
+	eor	v28.16b,v17.16b,v18.16b
+	eor	w17,w17,w5
+	eor	v29.16b,v21.16b,v22.16b
+	eor	w19,w19,w6
+	ushr	v1.4s,v24.4s,#20
+	eor	w20,w20,w7
+	ushr	v5.4s,v25.4s,#20
+	eor	w21,w21,w8
+	ushr	v9.4s,v26.4s,#20
+	ror	w17,w17,#24
+	ushr	v13.4s,v27.4s,#20
+	ror	w19,w19,#24
+	ushr	v17.4s,v28.4s,#20
+	ror	w20,w20,#24
+	ushr	v21.4s,v29.4s,#20
+	ror	w21,w21,#24
+	sli	v1.4s,v24.4s,#12
+	add	w13,w13,w17
+	sli	v5.4s,v25.4s,#12
+	add	w14,w14,w19
+	sli	v9.4s,v26.4s,#12
+	add	w15,w15,w20
+	sli	v13.4s,v27.4s,#12
+	add	w16,w16,w21
+	sli	v17.4s,v28.4s,#12
+	eor	w9,w9,w13
+	sli	v21.4s,v29.4s,#12
+	eor	w10,w10,w14
+	add	v0.4s,v0.4s,v1.4s
+	eor	w11,w11,w15
+	add	v4.4s,v4.4s,v5.4s
+	eor	w12,w12,w16
+	add	v8.4s,v8.4s,v9.4s
+	ror	w9,w9,#25
+	add	v12.4s,v12.4s,v13.4s
+	ror	w10,w10,#25
+	add	v16.4s,v16.4s,v17.4s
+	ror	w11,w11,#25
+	add	v20.4s,v20.4s,v21.4s
+	ror	w12,w12,#25
+	eor	v24.16b,v3.16b,v0.16b
+	add	w5,w5,w10
+	eor	v25.16b,v7.16b,v4.16b
+	add	w6,w6,w11
+	eor	v26.16b,v11.16b,v8.16b
+	add	w7,w7,w12
+	eor	v27.16b,v15.16b,v12.16b
+	add	w8,w8,w9
+	eor	v28.16b,v19.16b,v16.16b
+	eor	w21,w21,w5
+	eor	v29.16b,v23.16b,v20.16b
+	eor	w17,w17,w6
+	ushr	v3.4s,v24.4s,#24
+	eor	w19,w19,w7
+	ushr	v7.4s,v25.4s,#24
+	eor	w20,w20,w8
+	ushr	v11.4s,v26.4s,#24
+	ror	w21,w21,#16
+	ushr	v15.4s,v27.4s,#24
+	ror	w17,w17,#16
+	ushr	v19.4s,v28.4s,#24
+	ror	w19,w19,#16
+	ushr	v23.4s,v29.4s,#24
+	ror	w20,w20,#16
+	sli	v3.4s,v24.4s,#8
+	add	w15,w15,w21
+	sli	v7.4s,v25.4s,#8
+	add	w16,w16,w17
+	sli	v11.4s,v26.4s,#8
+	add	w13,w13,w19
+	sli	v15.4s,v27.4s,#8
+	add	w14,w14,w20
+	sli	v19.4s,v28.4s,#8
+	eor	w10,w10,w15
+	sli	v23.4s,v29.4s,#8
+	eor	w11,w11,w16
+	add	v2.4s,v2.4s,v3.4s
+	eor	w12,w12,w13
+	add	v6.4s,v6.4s,v7.4s
+	eor	w9,w9,w14
+	add	v10.4s,v10.4s,v11.4s
+	ror	w10,w10,#20
+	add	v14.4s,v14.4s,v15.4s
+	ror	w11,w11,#20
+	add	v18.4s,v18.4s,v19.4s
+	ror	w12,w12,#20
+	add	v22.4s,v22.4s,v23.4s
+	ror	w9,w9,#20
+	eor	v24.16b,v1.16b,v2.16b
+	add	w5,w5,w10
+	eor	v25.16b,v5.16b,v6.16b
+	add	w6,w6,w11
+	eor	v26.16b,v9.16b,v10.16b
+	add	w7,w7,w12
+	eor	v27.16b,v13.16b,v14.16b
+	add	w8,w8,w9
+	eor	v28.16b,v17.16b,v18.16b
+	eor	w21,w21,w5
+	eor	v29.16b,v21.16b,v22.16b
+	eor	w17,w17,w6
+	ushr	v1.4s,v24.4s,#25
+	eor	w19,w19,w7
+	ushr	v5.4s,v25.4s,#25
+	eor	w20,w20,w8
+	ushr	v9.4s,v26.4s,#25
+	ror	w21,w21,#24
+	ushr	v13.4s,v27.4s,#25
+	ror	w17,w17,#24
+	ushr	v17.4s,v28.4s,#25
+	ror	w19,w19,#24
+	ushr	v21.4s,v29.4s,#25
+	ror	w20,w20,#24
+	sli	v1.4s,v24.4s,#7
+	add	w15,w15,w21
+	sli	v5.4s,v25.4s,#7
+	add	w16,w16,w17
+	sli	v9.4s,v26.4s,#7
+	add	w13,w13,w19
+	sli	v13.4s,v27.4s,#7
+	add	w14,w14,w20
+	sli	v17.4s,v28.4s,#7
+	eor	w10,w10,w15
+	sli	v21.4s,v29.4s,#7
+	eor	w11,w11,w16
+	ext	v2.16b,v2.16b,v2.16b,#8
+	eor	w12,w12,w13
+	ext	v6.16b,v6.16b,v6.16b,#8
+	eor	w9,w9,w14
+	ext	v10.16b,v10.16b,v10.16b,#8
+	ror	w10,w10,#25
+	ext	v14.16b,v14.16b,v14.16b,#8
+	ror	w11,w11,#25
+	ext	v18.16b,v18.16b,v18.16b,#8
+	ror	w12,w12,#25
+	ext	v22.16b,v22.16b,v22.16b,#8
+	ror	w9,w9,#25
+	ext	v3.16b,v3.16b,v3.16b,#4
+	ext	v7.16b,v7.16b,v7.16b,#4
+	ext	v11.16b,v11.16b,v11.16b,#4
+	ext	v15.16b,v15.16b,v15.16b,#4
+	ext	v19.16b,v19.16b,v19.16b,#4
+	ext	v23.16b,v23.16b,v23.16b,#4
+	ext	v1.16b,v1.16b,v1.16b,#12
+	ext	v5.16b,v5.16b,v5.16b,#12
+	ext	v9.16b,v9.16b,v9.16b,#12
+	ext	v13.16b,v13.16b,v13.16b,#12
+	ext	v17.16b,v17.16b,v17.16b,#12
+	ext	v21.16b,v21.16b,v21.16b,#12
+	cbnz	x4,Loop_lower_neon
+
+	add	w5,w5,w22		// accumulate key block
+	ldp	q24,q25,[sp,#0]
+	add	x6,x6,x22,lsr#32
+	ldp	q26,q27,[sp,#32]
+	add	w7,w7,w23
+	ldp	q28,q29,[sp,#64]
+	add	x8,x8,x23,lsr#32
+	add	v0.4s,v0.4s,v24.4s
+	add	w9,w9,w24
+	add	v4.4s,v4.4s,v24.4s
+	add	x10,x10,x24,lsr#32
+	add	v8.4s,v8.4s,v24.4s
+	add	w11,w11,w25
+	add	v12.4s,v12.4s,v24.4s
+	add	x12,x12,x25,lsr#32
+	add	v16.4s,v16.4s,v24.4s
+	add	w13,w13,w26
+	add	v20.4s,v20.4s,v24.4s
+	add	x14,x14,x26,lsr#32
+	add	v2.4s,v2.4s,v26.4s
+	add	w15,w15,w27
+	add	v6.4s,v6.4s,v26.4s
+	add	x16,x16,x27,lsr#32
+	add	v10.4s,v10.4s,v26.4s
+	add	w17,w17,w28
+	add	v14.4s,v14.4s,v26.4s
+	add	x19,x19,x28,lsr#32
+	add	v18.4s,v18.4s,v26.4s
+	add	w20,w20,w30
+	add	v22.4s,v22.4s,v26.4s
+	add	x21,x21,x30,lsr#32
+	add	v19.4s,v19.4s,v31.4s			// +4
+	add	x5,x5,x6,lsl#32	// pack
+	add	v23.4s,v23.4s,v31.4s			// +4
+	add	x7,x7,x8,lsl#32
+	add	v3.4s,v3.4s,v27.4s
+	ldp	x6,x8,[x1,#0]		// load input
+	add	v7.4s,v7.4s,v28.4s
+	add	x9,x9,x10,lsl#32
+	add	v11.4s,v11.4s,v29.4s
+	add	x11,x11,x12,lsl#32
+	add	v15.4s,v15.4s,v30.4s
+	ldp	x10,x12,[x1,#16]
+	add	v19.4s,v19.4s,v27.4s
+	add	x13,x13,x14,lsl#32
+	add	v23.4s,v23.4s,v28.4s
+	add	x15,x15,x16,lsl#32
+	add	v1.4s,v1.4s,v25.4s
+	ldp	x14,x16,[x1,#32]
+	add	v5.4s,v5.4s,v25.4s
+	add	x17,x17,x19,lsl#32
+	add	v9.4s,v9.4s,v25.4s
+	add	x20,x20,x21,lsl#32
+	add	v13.4s,v13.4s,v25.4s
+	ldp	x19,x21,[x1,#48]
+	add	v17.4s,v17.4s,v25.4s
+	add	x1,x1,#64
+	add	v21.4s,v21.4s,v25.4s
+
+#ifdef	__ARMEB__
+	rev	x5,x5
+	rev	x7,x7
+	rev	x9,x9
+	rev	x11,x11
+	rev	x13,x13
+	rev	x15,x15
+	rev	x17,x17
+	rev	x20,x20
+#endif
+	ld1	{v24.16b,v25.16b,v26.16b,v27.16b},[x1],#64
+	eor	x5,x5,x6
+	eor	x7,x7,x8
+	eor	x9,x9,x10
+	eor	x11,x11,x12
+	eor	x13,x13,x14
+	eor	v0.16b,v0.16b,v24.16b
+	eor	x15,x15,x16
+	eor	v1.16b,v1.16b,v25.16b
+	eor	x17,x17,x19
+	eor	v2.16b,v2.16b,v26.16b
+	eor	x20,x20,x21
+	eor	v3.16b,v3.16b,v27.16b
+	ld1	{v24.16b,v25.16b,v26.16b,v27.16b},[x1],#64
+
+	stp	x5,x7,[x0,#0]		// store output
+	add	x28,x28,#7			// increment counter
+	stp	x9,x11,[x0,#16]
+	stp	x13,x15,[x0,#32]
+	stp	x17,x20,[x0,#48]
+	add	x0,x0,#64
+	st1	{v0.16b,v1.16b,v2.16b,v3.16b},[x0],#64
+
+	ld1	{v0.16b,v1.16b,v2.16b,v3.16b},[x1],#64
+	eor	v4.16b,v4.16b,v24.16b
+	eor	v5.16b,v5.16b,v25.16b
+	eor	v6.16b,v6.16b,v26.16b
+	eor	v7.16b,v7.16b,v27.16b
+	st1	{v4.16b,v5.16b,v6.16b,v7.16b},[x0],#64
+
+	ld1	{v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64
+	eor	v8.16b,v8.16b,v0.16b
+	ldp	q24,q25,[sp,#0]
+	eor	v9.16b,v9.16b,v1.16b
+	ldp	q26,q27,[sp,#32]
+	eor	v10.16b,v10.16b,v2.16b
+	eor	v11.16b,v11.16b,v3.16b
+	st1	{v8.16b,v9.16b,v10.16b,v11.16b},[x0],#64
+
+	ld1	{v8.16b,v9.16b,v10.16b,v11.16b},[x1],#64
+	eor	v12.16b,v12.16b,v4.16b
+	eor	v13.16b,v13.16b,v5.16b
+	eor	v14.16b,v14.16b,v6.16b
+	eor	v15.16b,v15.16b,v7.16b
+	st1	{v12.16b,v13.16b,v14.16b,v15.16b},[x0],#64
+
+	ld1	{v12.16b,v13.16b,v14.16b,v15.16b},[x1],#64
+	eor	v16.16b,v16.16b,v8.16b
+	eor	v17.16b,v17.16b,v9.16b
+	eor	v18.16b,v18.16b,v10.16b
+	eor	v19.16b,v19.16b,v11.16b
+	st1	{v16.16b,v17.16b,v18.16b,v19.16b},[x0],#64
+
+	shl	v0.4s,v31.4s,#1			// 4 -> 8
+	eor	v20.16b,v20.16b,v12.16b
+	eor	v21.16b,v21.16b,v13.16b
+	eor	v22.16b,v22.16b,v14.16b
+	eor	v23.16b,v23.16b,v15.16b
+	st1	{v20.16b,v21.16b,v22.16b,v23.16b},[x0],#64
+
+	add	v27.4s,v27.4s,v0.4s			// += 8
+	add	v28.4s,v28.4s,v0.4s
+	add	v29.4s,v29.4s,v0.4s
+	add	v30.4s,v30.4s,v0.4s
+
+	b.hs	Loop_outer_512_neon
+
+	adds	x2,x2,#512
+	ushr	v0.4s,v31.4s,#2			// 4 -> 1
+
+	ldp	d8,d9,[sp,#128+0]		// meet ABI requirements
+	ldp	d10,d11,[sp,#128+16]
+	ldp	d12,d13,[sp,#128+32]
+	ldp	d14,d15,[sp,#128+48]
+
+	stp	q24,q31,[sp,#0]		// wipe off-load area
+	stp	q24,q31,[sp,#32]
+	stp	q24,q31,[sp,#64]
+
+	b.eq	Ldone_512_neon
+
+	cmp	x2,#192
+	sub	v27.4s,v27.4s,v0.4s			// -= 1
+	sub	v28.4s,v28.4s,v0.4s
+	sub	v29.4s,v29.4s,v0.4s
+	add	sp,sp,#128
+	b.hs	Loop_outer_neon
+
+	eor	v25.16b,v25.16b,v25.16b
+	eor	v26.16b,v26.16b,v26.16b
+	eor	v27.16b,v27.16b,v27.16b
+	eor	v28.16b,v28.16b,v28.16b
+	eor	v29.16b,v29.16b,v29.16b
+	eor	v30.16b,v30.16b,v30.16b
+	b	Loop_outer
+
+Ldone_512_neon:
+	ldp	x19,x20,[x29,#16]
+	add	sp,sp,#128+64
+	ldp	x21,x22,[x29,#32]
+	ldp	x23,x24,[x29,#48]
+	ldp	x25,x26,[x29,#64]
+	ldp	x27,x28,[x29,#80]
+	ldp	x29,x30,[sp],#96
+	AARCH64_VALIDATE_LINK_REGISTER
+	ret
+
+#endif
+#endif  // !OPENSSL_NO_ASM
diff --git a/deps/boringssl/win-aarch64/crypto/fipsmodule/aesv8-armx64.S b/deps/boringssl/win-aarch64/crypto/fipsmodule/aesv8-armx64.S
new file mode 100644
index 0000000..9a633fb
--- /dev/null
+++ b/deps/boringssl/win-aarch64/crypto/fipsmodule/aesv8-armx64.S
@@ -0,0 +1,813 @@
+// This file is generated from a similarly-named Perl script in the BoringSSL
+// source tree. Do not edit by hand.
+
+#if !defined(__has_feature)
+#define __has_feature(x) 0
+#endif
+#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
+#define OPENSSL_NO_ASM
+#endif
+
+#if !defined(OPENSSL_NO_ASM)
+#if defined(__aarch64__)
+#if defined(BORINGSSL_PREFIX)
+#include <boringssl_prefix_symbols_asm.h>
+#endif
+#include <openssl/arm_arch.h>
+
+#if __ARM_MAX_ARCH__>=7
+.text
+.arch	armv8-a+crypto
+.section	.rodata
+.align	5
+Lrcon:
+.long	0x01,0x01,0x01,0x01
+.long	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d	// rotate-n-splat
+.long	0x1b,0x1b,0x1b,0x1b
+
+.text
+
+.globl	aes_hw_set_encrypt_key
+
+.def aes_hw_set_encrypt_key
+   .type 32
+.endef
+.align	5
+aes_hw_set_encrypt_key:
+Lenc_key:
+	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
+	AARCH64_VALID_CALL_TARGET
+	stp	x29,x30,[sp,#-16]!
+	add	x29,sp,#0
+	mov	x3,#-1
+	cmp	x0,#0
+	b.eq	Lenc_key_abort
+	cmp	x2,#0
+	b.eq	Lenc_key_abort
+	mov	x3,#-2
+	cmp	w1,#128
+	b.lt	Lenc_key_abort
+	cmp	w1,#256
+	b.gt	Lenc_key_abort
+	tst	w1,#0x3f
+	b.ne	Lenc_key_abort
+
+	adrp	x3,Lrcon
+	add	x3,x3,:lo12:Lrcon
+	cmp	w1,#192
+
+	eor	v0.16b,v0.16b,v0.16b
+	ld1	{v3.16b},[x0],#16
+	mov	w1,#8		// reuse w1
+	ld1	{v1.4s,v2.4s},[x3],#32
+
+	b.lt	Loop128
+	b.eq	L192
+	b	L256
+
+.align	4
+Loop128:
+	tbl	v6.16b,{v3.16b},v2.16b
+	ext	v5.16b,v0.16b,v3.16b,#12
+	st1	{v3.4s},[x2],#16
+	aese	v6.16b,v0.16b
+	subs	w1,w1,#1
+
+	eor	v3.16b,v3.16b,v5.16b
+	ext	v5.16b,v0.16b,v5.16b,#12
+	eor	v3.16b,v3.16b,v5.16b
+	ext	v5.16b,v0.16b,v5.16b,#12
+	eor	v6.16b,v6.16b,v1.16b
+	eor	v3.16b,v3.16b,v5.16b
+	shl	v1.16b,v1.16b,#1
+	eor	v3.16b,v3.16b,v6.16b
+	b.ne	Loop128
+
+	ld1	{v1.4s},[x3]
+
+	tbl	v6.16b,{v3.16b},v2.16b
+	ext	v5.16b,v0.16b,v3.16b,#12
+	st1	{v3.4s},[x2],#16
+	aese	v6.16b,v0.16b
+
+	eor	v3.16b,v3.16b,v5.16b
+	ext	v5.16b,v0.16b,v5.16b,#12
+	eor	v3.16b,v3.16b,v5.16b
+	ext	v5.16b,v0.16b,v5.16b,#12
+	eor	v6.16b,v6.16b,v1.16b
+	eor	v3.16b,v3.16b,v5.16b
+	shl	v1.16b,v1.16b,#1
+	eor	v3.16b,v3.16b,v6.16b
+
+	tbl	v6.16b,{v3.16b},v2.16b
+	ext	v5.16b,v0.16b,v3.16b,#12
+	st1	{v3.4s},[x2],#16
+	aese	v6.16b,v0.16b
+
+	eor	v3.16b,v3.16b,v5.16b
+	ext	v5.16b,v0.16b,v5.16b,#12
+	eor	v3.16b,v3.16b,v5.16b
+	ext	v5.16b,v0.16b,v5.16b,#12
+	eor	v6.16b,v6.16b,v1.16b
+	eor	v3.16b,v3.16b,v5.16b
+	eor	v3.16b,v3.16b,v6.16b
+	st1	{v3.4s},[x2]
+	add	x2,x2,#0x50
+
+	mov	w12,#10
+	b	Ldone
+
+.align	4
+L192:
+	ld1	{v4.8b},[x0],#8
+	movi	v6.16b,#8			// borrow v6.16b
+	st1	{v3.4s},[x2],#16
+	sub	v2.16b,v2.16b,v6.16b	// adjust the mask
+
+Loop192:
+	tbl	v6.16b,{v4.16b},v2.16b
+	ext	v5.16b,v0.16b,v3.16b,#12
+	st1	{v4.8b},[x2],#8
+	aese	v6.16b,v0.16b
+	subs	w1,w1,#1
+
+	eor	v3.16b,v3.16b,v5.16b
+	ext	v5.16b,v0.16b,v5.16b,#12
+	eor	v3.16b,v3.16b,v5.16b
+	ext	v5.16b,v0.16b,v5.16b,#12
+	eor	v3.16b,v3.16b,v5.16b
+
+	dup	v5.4s,v3.s[3]
+	eor	v5.16b,v5.16b,v4.16b
+	eor	v6.16b,v6.16b,v1.16b
+	ext	v4.16b,v0.16b,v4.16b,#12
+	shl	v1.16b,v1.16b,#1
+	eor	v4.16b,v4.16b,v5.16b
+	eor	v3.16b,v3.16b,v6.16b
+	eor	v4.16b,v4.16b,v6.16b
+	st1	{v3.4s},[x2],#16
+	b.ne	Loop192
+
+	mov	w12,#12
+	add	x2,x2,#0x20
+	b	Ldone
+
+.align	4
+L256:
+	ld1	{v4.16b},[x0]
+	mov	w1,#7
+	mov	w12,#14
+	st1	{v3.4s},[x2],#16
+
+Loop256:
+	tbl	v6.16b,{v4.16b},v2.16b
+	ext	v5.16b,v0.16b,v3.16b,#12
+	st1	{v4.4s},[x2],#16
+	aese	v6.16b,v0.16b
+	subs	w1,w1,#1
+
+	eor	v3.16b,v3.16b,v5.16b
+	ext	v5.16b,v0.16b,v5.16b,#12
+	eor	v3.16b,v3.16b,v5.16b
+	ext	v5.16b,v0.16b,v5.16b,#12
+	eor	v6.16b,v6.16b,v1.16b
+	eor	v3.16b,v3.16b,v5.16b
+	shl	v1.16b,v1.16b,#1
+	eor	v3.16b,v3.16b,v6.16b
+	st1	{v3.4s},[x2],#16
+	b.eq	Ldone
+
+	dup	v6.4s,v3.s[3]		// just splat
+	ext	v5.16b,v0.16b,v4.16b,#12
+	aese	v6.16b,v0.16b
+
+	eor	v4.16b,v4.16b,v5.16b
+	ext	v5.16b,v0.16b,v5.16b,#12
+	eor	v4.16b,v4.16b,v5.16b
+	ext	v5.16b,v0.16b,v5.16b,#12
+	eor	v4.16b,v4.16b,v5.16b
+
+	eor	v4.16b,v4.16b,v6.16b
+	b	Loop256
+
+Ldone:
+	str	w12,[x2]
+	mov	x3,#0
+
+Lenc_key_abort:
+	mov	x0,x3			// return value
+	ldr	x29,[sp],#16
+	ret
+
+
+.globl	aes_hw_set_decrypt_key
+
+.def aes_hw_set_decrypt_key
+   .type 32
+.endef
+.align	5
+aes_hw_set_decrypt_key:
+	AARCH64_SIGN_LINK_REGISTER
+	stp	x29,x30,[sp,#-16]!
+	add	x29,sp,#0
+	bl	Lenc_key
+
+	cmp	x0,#0
+	b.ne	Ldec_key_abort
+
+	sub	x2,x2,#240		// restore original x2
+	mov	x4,#-16
+	add	x0,x2,x12,lsl#4	// end of key schedule
+
+	ld1	{v0.4s},[x2]
+	ld1	{v1.4s},[x0]
+	st1	{v0.4s},[x0],x4
+	st1	{v1.4s},[x2],#16
+
+Loop_imc:
+	ld1	{v0.4s},[x2]
+	ld1	{v1.4s},[x0]
+	aesimc	v0.16b,v0.16b
+	aesimc	v1.16b,v1.16b
+	st1	{v0.4s},[x0],x4
+	st1	{v1.4s},[x2],#16
+	cmp	x0,x2
+	b.hi	Loop_imc
+
+	ld1	{v0.4s},[x2]
+	aesimc	v0.16b,v0.16b
+	st1	{v0.4s},[x0]
+
+	eor	x0,x0,x0		// return value
+Ldec_key_abort:
+	ldp	x29,x30,[sp],#16
+	AARCH64_VALIDATE_LINK_REGISTER
+	ret
+
+.globl	aes_hw_encrypt
+
+.def aes_hw_encrypt
+   .type 32
+.endef
+.align	5
+aes_hw_encrypt:
+	AARCH64_VALID_CALL_TARGET
+	ldr	w3,[x2,#240]
+	ld1	{v0.4s},[x2],#16
+	ld1	{v2.16b},[x0]
+	sub	w3,w3,#2
+	ld1	{v1.4s},[x2],#16
+
+Loop_enc:
+	aese	v2.16b,v0.16b
+	aesmc	v2.16b,v2.16b
+	ld1	{v0.4s},[x2],#16
+	subs	w3,w3,#2
+	aese	v2.16b,v1.16b
+	aesmc	v2.16b,v2.16b
+	ld1	{v1.4s},[x2],#16
+	b.gt	Loop_enc
+
+	aese	v2.16b,v0.16b
+	aesmc	v2.16b,v2.16b
+	ld1	{v0.4s},[x2]
+	aese	v2.16b,v1.16b
+	eor	v2.16b,v2.16b,v0.16b
+
+	st1	{v2.16b},[x1]
+	ret
+
+.globl	aes_hw_decrypt
+
+.def aes_hw_decrypt
+   .type 32
+.endef
+.align	5
+aes_hw_decrypt:
+	AARCH64_VALID_CALL_TARGET
+	ldr	w3,[x2,#240]
+	ld1	{v0.4s},[x2],#16
+	ld1	{v2.16b},[x0]
+	sub	w3,w3,#2
+	ld1	{v1.4s},[x2],#16
+
+Loop_dec:
+	aesd	v2.16b,v0.16b
+	aesimc	v2.16b,v2.16b
+	ld1	{v0.4s},[x2],#16
+	subs	w3,w3,#2
+	aesd	v2.16b,v1.16b
+	aesimc	v2.16b,v2.16b
+	ld1	{v1.4s},[x2],#16
+	b.gt	Loop_dec
+
+	aesd	v2.16b,v0.16b
+	aesimc	v2.16b,v2.16b
+	ld1	{v0.4s},[x2]
+	aesd	v2.16b,v1.16b
+	eor	v2.16b,v2.16b,v0.16b
+
+	st1	{v2.16b},[x1]
+	ret
+
+.globl	aes_hw_cbc_encrypt
+
+.def aes_hw_cbc_encrypt
+   .type 32
+.endef
+.align	5
+aes_hw_cbc_encrypt:
+	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
+	AARCH64_VALID_CALL_TARGET
+	stp	x29,x30,[sp,#-16]!
+	add	x29,sp,#0
+	subs	x2,x2,#16
+	mov	x8,#16
+	b.lo	Lcbc_abort
+	csel	x8,xzr,x8,eq
+
+	cmp	w5,#0			// en- or decrypting?
+	ldr	w5,[x3,#240]
+	and	x2,x2,#-16
+	ld1	{v6.16b},[x4]
+	ld1	{v0.16b},[x0],x8
+
+	ld1	{v16.4s,v17.4s},[x3]		// load key schedule...
+	sub	w5,w5,#6
+	add	x7,x3,x5,lsl#4	// pointer to last 7 round keys
+	sub	w5,w5,#2
+	ld1	{v18.4s,v19.4s},[x7],#32
+	ld1	{v20.4s,v21.4s},[x7],#32
+	ld1	{v22.4s,v23.4s},[x7],#32
+	ld1	{v7.4s},[x7]
+
+	add	x7,x3,#32
+	mov	w6,w5
+	b.eq	Lcbc_dec
+
+	cmp	w5,#2
+	eor	v0.16b,v0.16b,v6.16b
+	eor	v5.16b,v16.16b,v7.16b
+	b.eq	Lcbc_enc128
+
+	ld1	{v2.4s,v3.4s},[x7]
+	add	x7,x3,#16
+	add	x6,x3,#16*4
+	add	x12,x3,#16*5
+	aese	v0.16b,v16.16b
+	aesmc	v0.16b,v0.16b
+	add	x14,x3,#16*6
+	add	x3,x3,#16*7
+	b	Lenter_cbc_enc
+
+.align	4
+Loop_cbc_enc:
+	aese	v0.16b,v16.16b
+	aesmc	v0.16b,v0.16b
+	st1	{v6.16b},[x1],#16
+Lenter_cbc_enc:
+	aese	v0.16b,v17.16b
+	aesmc	v0.16b,v0.16b
+	aese	v0.16b,v2.16b
+	aesmc	v0.16b,v0.16b
+	ld1	{v16.4s},[x6]
+	cmp	w5,#4
+	aese	v0.16b,v3.16b
+	aesmc	v0.16b,v0.16b
+	ld1	{v17.4s},[x12]
+	b.eq	Lcbc_enc192
+
+	aese	v0.16b,v16.16b
+	aesmc	v0.16b,v0.16b
+	ld1	{v16.4s},[x14]
+	aese	v0.16b,v17.16b
+	aesmc	v0.16b,v0.16b
+	ld1	{v17.4s},[x3]
+	nop
+
+Lcbc_enc192:
+	aese	v0.16b,v16.16b
+	aesmc	v0.16b,v0.16b
+	subs	x2,x2,#16
+	aese	v0.16b,v17.16b
+	aesmc	v0.16b,v0.16b
+	csel	x8,xzr,x8,eq
+	aese	v0.16b,v18.16b
+	aesmc	v0.16b,v0.16b
+	aese	v0.16b,v19.16b
+	aesmc	v0.16b,v0.16b
+	ld1	{v16.16b},[x0],x8
+	aese	v0.16b,v20.16b
+	aesmc	v0.16b,v0.16b
+	eor	v16.16b,v16.16b,v5.16b
+	aese	v0.16b,v21.16b
+	aesmc	v0.16b,v0.16b
+	ld1	{v17.4s},[x7]		// re-pre-load rndkey[1]
+	aese	v0.16b,v22.16b
+	aesmc	v0.16b,v0.16b
+	aese	v0.16b,v23.16b
+	eor	v6.16b,v0.16b,v7.16b
+	b.hs	Loop_cbc_enc
+
+	st1	{v6.16b},[x1],#16
+	b	Lcbc_done
+
+.align	5
+Lcbc_enc128:
+	ld1	{v2.4s,v3.4s},[x7]
+	aese	v0.16b,v16.16b
+	aesmc	v0.16b,v0.16b
+	b	Lenter_cbc_enc128
+Loop_cbc_enc128:
+	aese	v0.16b,v16.16b
+	aesmc	v0.16b,v0.16b
+	st1	{v6.16b},[x1],#16
+Lenter_cbc_enc128:
+	aese	v0.16b,v17.16b
+	aesmc	v0.16b,v0.16b
+	subs	x2,x2,#16
+	aese	v0.16b,v2.16b
+	aesmc	v0.16b,v0.16b
+	csel	x8,xzr,x8,eq
+	aese	v0.16b,v3.16b
+	aesmc	v0.16b,v0.16b
+	aese	v0.16b,v18.16b
+	aesmc	v0.16b,v0.16b
+	aese	v0.16b,v19.16b
+	aesmc	v0.16b,v0.16b
+	ld1	{v16.16b},[x0],x8
+	aese	v0.16b,v20.16b
+	aesmc	v0.16b,v0.16b
+	aese	v0.16b,v21.16b
+	aesmc	v0.16b,v0.16b
+	aese	v0.16b,v22.16b
+	aesmc	v0.16b,v0.16b
+	eor	v16.16b,v16.16b,v5.16b
+	aese	v0.16b,v23.16b
+	eor	v6.16b,v0.16b,v7.16b
+	b.hs	Loop_cbc_enc128
+
+	st1	{v6.16b},[x1],#16
+	b	Lcbc_done
+.align	5
+Lcbc_dec:
+	ld1	{v18.16b},[x0],#16
+	subs	x2,x2,#32		// bias
+	add	w6,w5,#2
+	orr	v3.16b,v0.16b,v0.16b
+	orr	v1.16b,v0.16b,v0.16b
+	orr	v19.16b,v18.16b,v18.16b
+	b.lo	Lcbc_dec_tail
+
+	orr	v1.16b,v18.16b,v18.16b
+	ld1	{v18.16b},[x0],#16
+	orr	v2.16b,v0.16b,v0.16b
+	orr	v3.16b,v1.16b,v1.16b
+	orr	v19.16b,v18.16b,v18.16b
+
+Loop3x_cbc_dec:
+	aesd	v0.16b,v16.16b
+	aesimc	v0.16b,v0.16b
+	aesd	v1.16b,v16.16b
+	aesimc	v1.16b,v1.16b
+	aesd	v18.16b,v16.16b
+	aesimc	v18.16b,v18.16b
+	ld1	{v16.4s},[x7],#16
+	subs	w6,w6,#2
+	aesd	v0.16b,v17.16b
+	aesimc	v0.16b,v0.16b
+	aesd	v1.16b,v17.16b
+	aesimc	v1.16b,v1.16b
+	aesd	v18.16b,v17.16b
+	aesimc	v18.16b,v18.16b
+	ld1	{v17.4s},[x7],#16
+	b.gt	Loop3x_cbc_dec
+
+	aesd	v0.16b,v16.16b
+	aesimc	v0.16b,v0.16b
+	aesd	v1.16b,v16.16b
+	aesimc	v1.16b,v1.16b
+	aesd	v18.16b,v16.16b
+	aesimc	v18.16b,v18.16b
+	eor	v4.16b,v6.16b,v7.16b
+	subs	x2,x2,#0x30
+	eor	v5.16b,v2.16b,v7.16b
+	csel	x6,x2,x6,lo			// x6, w6, is zero at this point
+	aesd	v0.16b,v17.16b
+	aesimc	v0.16b,v0.16b
+	aesd	v1.16b,v17.16b
+	aesimc	v1.16b,v1.16b
+	aesd	v18.16b,v17.16b
+	aesimc	v18.16b,v18.16b
+	eor	v17.16b,v3.16b,v7.16b
+	add	x0,x0,x6		// x0 is adjusted in such way that
+					// at exit from the loop v1.16b-v18.16b
+					// are loaded with last "words"
+	orr	v6.16b,v19.16b,v19.16b
+	mov	x7,x3
+	aesd	v0.16b,v20.16b
+	aesimc	v0.16b,v0.16b
+	aesd	v1.16b,v20.16b
+	aesimc	v1.16b,v1.16b
+	aesd	v18.16b,v20.16b
+	aesimc	v18.16b,v18.16b
+	ld1	{v2.16b},[x0],#16
+	aesd	v0.16b,v21.16b
+	aesimc	v0.16b,v0.16b
+	aesd	v1.16b,v21.16b
+	aesimc	v1.16b,v1.16b
+	aesd	v18.16b,v21.16b
+	aesimc	v18.16b,v18.16b
+	ld1	{v3.16b},[x0],#16
+	aesd	v0.16b,v22.16b
+	aesimc	v0.16b,v0.16b
+	aesd	v1.16b,v22.16b
+	aesimc	v1.16b,v1.16b
+	aesd	v18.16b,v22.16b
+	aesimc	v18.16b,v18.16b
+	ld1	{v19.16b},[x0],#16
+	aesd	v0.16b,v23.16b
+	aesd	v1.16b,v23.16b
+	aesd	v18.16b,v23.16b
+	ld1	{v16.4s},[x7],#16	// re-pre-load rndkey[0]
+	add	w6,w5,#2
+	eor	v4.16b,v4.16b,v0.16b
+	eor	v5.16b,v5.16b,v1.16b
+	eor	v18.16b,v18.16b,v17.16b
+	ld1	{v17.4s},[x7],#16	// re-pre-load rndkey[1]
+	st1	{v4.16b},[x1],#16
+	orr	v0.16b,v2.16b,v2.16b
+	st1	{v5.16b},[x1],#16
+	orr	v1.16b,v3.16b,v3.16b
+	st1	{v18.16b},[x1],#16
+	orr	v18.16b,v19.16b,v19.16b
+	b.hs	Loop3x_cbc_dec
+
+	cmn	x2,#0x30
+	b.eq	Lcbc_done
+	nop
+
+Lcbc_dec_tail:
+	aesd	v1.16b,v16.16b
+	aesimc	v1.16b,v1.16b
+	aesd	v18.16b,v16.16b
+	aesimc	v18.16b,v18.16b
+	ld1	{v16.4s},[x7],#16
+	subs	w6,w6,#2
+	aesd	v1.16b,v17.16b
+	aesimc	v1.16b,v1.16b
+	aesd	v18.16b,v17.16b
+	aesimc	v18.16b,v18.16b
+	ld1	{v17.4s},[x7],#16
+	b.gt	Lcbc_dec_tail
+
+	aesd	v1.16b,v16.16b
+	aesimc	v1.16b,v1.16b
+	aesd	v18.16b,v16.16b
+	aesimc	v18.16b,v18.16b
+	aesd	v1.16b,v17.16b
+	aesimc	v1.16b,v1.16b
+	aesd	v18.16b,v17.16b
+	aesimc	v18.16b,v18.16b
+	aesd	v1.16b,v20.16b
+	aesimc	v1.16b,v1.16b
+	aesd	v18.16b,v20.16b
+	aesimc	v18.16b,v18.16b
+	cmn	x2,#0x20
+	aesd	v1.16b,v21.16b
+	aesimc	v1.16b,v1.16b
+	aesd	v18.16b,v21.16b
+	aesimc	v18.16b,v18.16b
+	eor	v5.16b,v6.16b,v7.16b
+	aesd	v1.16b,v22.16b
+	aesimc	v1.16b,v1.16b
+	aesd	v18.16b,v22.16b
+	aesimc	v18.16b,v18.16b
+	eor	v17.16b,v3.16b,v7.16b
+	aesd	v1.16b,v23.16b
+	aesd	v18.16b,v23.16b
+	b.eq	Lcbc_dec_one
+	eor	v5.16b,v5.16b,v1.16b
+	eor	v17.16b,v17.16b,v18.16b
+	orr	v6.16b,v19.16b,v19.16b
+	st1	{v5.16b},[x1],#16
+	st1	{v17.16b},[x1],#16
+	b	Lcbc_done
+
+Lcbc_dec_one:
+	eor	v5.16b,v5.16b,v18.16b
+	orr	v6.16b,v19.16b,v19.16b
+	st1	{v5.16b},[x1],#16
+
+Lcbc_done:
+	st1	{v6.16b},[x4]
+Lcbc_abort:
+	ldr	x29,[sp],#16
+	ret
+
+.globl	aes_hw_ctr32_encrypt_blocks
+
+.def aes_hw_ctr32_encrypt_blocks
+   .type 32
+.endef
+.align	5
+aes_hw_ctr32_encrypt_blocks:
+	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
+	AARCH64_VALID_CALL_TARGET
+	stp	x29,x30,[sp,#-16]!
+	add	x29,sp,#0
+	ldr	w5,[x3,#240]
+
+	ldr	w8, [x4, #12]
+	ld1	{v0.4s},[x4]
+
+	ld1	{v16.4s,v17.4s},[x3]		// load key schedule...
+	sub	w5,w5,#4
+	mov	x12,#16
+	cmp	x2,#2
+	add	x7,x3,x5,lsl#4	// pointer to last 5 round keys
+	sub	w5,w5,#2
+	ld1	{v20.4s,v21.4s},[x7],#32
+	ld1	{v22.4s,v23.4s},[x7],#32
+	ld1	{v7.4s},[x7]
+	add	x7,x3,#32
+	mov	w6,w5
+	csel	x12,xzr,x12,lo
+
+	// ARM Cortex-A57 and Cortex-A72 cores running in 32-bit mode are
+	// affected by silicon errata #1742098 [0] and #1655431 [1],
+	// respectively, where the second instruction of an aese/aesmc
+	// instruction pair may execute twice if an interrupt is taken right
+	// after the first instruction consumes an input register of which a
+	// single 32-bit lane has been updated the last time it was modified.
+	//
+	// This function uses a counter in one 32-bit lane. The vmov lines
+	// could write to v1.16b and v18.16b directly, but that trips this bugs.
+	// We write to v6.16b and copy to the final register as a workaround.
+	//
+	// [0] ARM-EPM-049219 v23 Cortex-A57 MPCore Software Developers Errata Notice
+	// [1] ARM-EPM-012079 v11.0 Cortex-A72 MPCore Software Developers Errata Notice
+#ifndef __ARMEB__
+	rev	w8, w8
+#endif
+	add	w10, w8, #1
+	orr	v6.16b,v0.16b,v0.16b
+	rev	w10, w10
+	mov	v6.s[3],w10
+	add	w8, w8, #2
+	orr	v1.16b,v6.16b,v6.16b
+	b.ls	Lctr32_tail
+	rev	w12, w8
+	mov	v6.s[3],w12
+	sub	x2,x2,#3		// bias
+	orr	v18.16b,v6.16b,v6.16b
+	b	Loop3x_ctr32
+
+.align	4
+Loop3x_ctr32:
+	aese	v0.16b,v16.16b
+	aesmc	v0.16b,v0.16b
+	aese	v1.16b,v16.16b
+	aesmc	v1.16b,v1.16b
+	aese	v18.16b,v16.16b
+	aesmc	v18.16b,v18.16b
+	ld1	{v16.4s},[x7],#16
+	subs	w6,w6,#2
+	aese	v0.16b,v17.16b
+	aesmc	v0.16b,v0.16b
+	aese	v1.16b,v17.16b
+	aesmc	v1.16b,v1.16b
+	aese	v18.16b,v17.16b
+	aesmc	v18.16b,v18.16b
+	ld1	{v17.4s},[x7],#16
+	b.gt	Loop3x_ctr32
+
+	aese	v0.16b,v16.16b
+	aesmc	v4.16b,v0.16b
+	aese	v1.16b,v16.16b
+	aesmc	v5.16b,v1.16b
+	ld1	{v2.16b},[x0],#16
+	add	w9,w8,#1
+	aese	v18.16b,v16.16b
+	aesmc	v18.16b,v18.16b
+	ld1	{v3.16b},[x0],#16
+	rev	w9,w9
+	aese	v4.16b,v17.16b
+	aesmc	v4.16b,v4.16b
+	aese	v5.16b,v17.16b
+	aesmc	v5.16b,v5.16b
+	ld1	{v19.16b},[x0],#16
+	mov	x7,x3
+	aese	v18.16b,v17.16b
+	aesmc	v17.16b,v18.16b
+	aese	v4.16b,v20.16b
+	aesmc	v4.16b,v4.16b
+	aese	v5.16b,v20.16b
+	aesmc	v5.16b,v5.16b
+	eor	v2.16b,v2.16b,v7.16b
+	add	w10,w8,#2
+	aese	v17.16b,v20.16b
+	aesmc	v17.16b,v17.16b
+	eor	v3.16b,v3.16b,v7.16b
+	add	w8,w8,#3
+	aese	v4.16b,v21.16b
+	aesmc	v4.16b,v4.16b
+	aese	v5.16b,v21.16b
+	aesmc	v5.16b,v5.16b
+	 // Note the logic to update v0.16b, v1.16b, and v1.16b is written to work
+	 // around a bug in ARM Cortex-A57 and Cortex-A72 cores running in
+	 // 32-bit mode. See the comment above.
+	eor	v19.16b,v19.16b,v7.16b
+	mov	v6.s[3], w9
+	aese	v17.16b,v21.16b
+	aesmc	v17.16b,v17.16b
+	orr	v0.16b,v6.16b,v6.16b
+	rev	w10,w10
+	aese	v4.16b,v22.16b
+	aesmc	v4.16b,v4.16b
+	mov	v6.s[3], w10
+	rev	w12,w8
+	aese	v5.16b,v22.16b
+	aesmc	v5.16b,v5.16b
+	orr	v1.16b,v6.16b,v6.16b
+	mov	v6.s[3], w12
+	aese	v17.16b,v22.16b
+	aesmc	v17.16b,v17.16b
+	orr	v18.16b,v6.16b,v6.16b
+	subs	x2,x2,#3
+	aese	v4.16b,v23.16b
+	aese	v5.16b,v23.16b
+	aese	v17.16b,v23.16b
+
+	eor	v2.16b,v2.16b,v4.16b
+	ld1	{v16.4s},[x7],#16	// re-pre-load rndkey[0]
+	st1	{v2.16b},[x1],#16
+	eor	v3.16b,v3.16b,v5.16b
+	mov	w6,w5
+	st1	{v3.16b},[x1],#16
+	eor	v19.16b,v19.16b,v17.16b
+	ld1	{v17.4s},[x7],#16	// re-pre-load rndkey[1]
+	st1	{v19.16b},[x1],#16
+	b.hs	Loop3x_ctr32
+
+	adds	x2,x2,#3
+	b.eq	Lctr32_done
+	cmp	x2,#1
+	mov	x12,#16
+	csel	x12,xzr,x12,eq
+
+Lctr32_tail:
+	aese	v0.16b,v16.16b
+	aesmc	v0.16b,v0.16b
+	aese	v1.16b,v16.16b
+	aesmc	v1.16b,v1.16b
+	ld1	{v16.4s},[x7],#16
+	subs	w6,w6,#2
+	aese	v0.16b,v17.16b
+	aesmc	v0.16b,v0.16b
+	aese	v1.16b,v17.16b
+	aesmc	v1.16b,v1.16b
+	ld1	{v17.4s},[x7],#16
+	b.gt	Lctr32_tail
+
+	aese	v0.16b,v16.16b
+	aesmc	v0.16b,v0.16b
+	aese	v1.16b,v16.16b
+	aesmc	v1.16b,v1.16b
+	aese	v0.16b,v17.16b
+	aesmc	v0.16b,v0.16b
+	aese	v1.16b,v17.16b
+	aesmc	v1.16b,v1.16b
+	ld1	{v2.16b},[x0],x12
+	aese	v0.16b,v20.16b
+	aesmc	v0.16b,v0.16b
+	aese	v1.16b,v20.16b
+	aesmc	v1.16b,v1.16b
+	ld1	{v3.16b},[x0]
+	aese	v0.16b,v21.16b
+	aesmc	v0.16b,v0.16b
+	aese	v1.16b,v21.16b
+	aesmc	v1.16b,v1.16b
+	eor	v2.16b,v2.16b,v7.16b
+	aese	v0.16b,v22.16b
+	aesmc	v0.16b,v0.16b
+	aese	v1.16b,v22.16b
+	aesmc	v1.16b,v1.16b
+	eor	v3.16b,v3.16b,v7.16b
+	aese	v0.16b,v23.16b
+	aese	v1.16b,v23.16b
+
+	cmp	x2,#1
+	eor	v2.16b,v2.16b,v0.16b
+	eor	v3.16b,v3.16b,v1.16b
+	st1	{v2.16b},[x1],#16
+	b.eq	Lctr32_done
+	st1	{v3.16b},[x1]
+
+Lctr32_done:
+	ldr	x29,[sp],#16
+	ret
+
+#endif
+#endif
+#endif  // !OPENSSL_NO_ASM
diff --git a/deps/boringssl/win-aarch64/crypto/fipsmodule/armv8-mont.S b/deps/boringssl/win-aarch64/crypto/fipsmodule/armv8-mont.S
new file mode 100644
index 0000000..e1bee28
--- /dev/null
+++ b/deps/boringssl/win-aarch64/crypto/fipsmodule/armv8-mont.S
@@ -0,0 +1,1441 @@
+// This file is generated from a similarly-named Perl script in the BoringSSL
+// source tree. Do not edit by hand.
+
+#if !defined(__has_feature)
+#define __has_feature(x) 0
+#endif
+#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
+#define OPENSSL_NO_ASM
+#endif
+
+#if !defined(OPENSSL_NO_ASM)
+#if defined(__aarch64__)
+#if defined(BORINGSSL_PREFIX)
+#include <boringssl_prefix_symbols_asm.h>
+#endif
+#include <openssl/arm_arch.h>
+
+.text
+
+.globl	bn_mul_mont
+
+.def bn_mul_mont
+   .type 32
+.endef
+.align	5
+bn_mul_mont:
+	AARCH64_SIGN_LINK_REGISTER
+	tst	x5,#7
+	b.eq	__bn_sqr8x_mont
+	tst	x5,#3
+	b.eq	__bn_mul4x_mont
+Lmul_mont:
+	stp	x29,x30,[sp,#-64]!
+	add	x29,sp,#0
+	stp	x19,x20,[sp,#16]
+	stp	x21,x22,[sp,#32]
+	stp	x23,x24,[sp,#48]
+
+	ldr	x9,[x2],#8		// bp[0]
+	sub	x22,sp,x5,lsl#3
+	ldp	x7,x8,[x1],#16	// ap[0..1]
+	lsl	x5,x5,#3
+	ldr	x4,[x4]		// *n0
+	and	x22,x22,#-16		// ABI says so
+	ldp	x13,x14,[x3],#16	// np[0..1]
+
+	mul	x6,x7,x9		// ap[0]*bp[0]
+	sub	x21,x5,#16		// j=num-2
+	umulh	x7,x7,x9
+	mul	x10,x8,x9		// ap[1]*bp[0]
+	umulh	x11,x8,x9
+
+	mul	x15,x6,x4		// "tp[0]"*n0
+	mov	sp,x22			// alloca
+
+	// (*)	mul	x12,x13,x15	// np[0]*m1
+	umulh	x13,x13,x15
+	mul	x16,x14,x15		// np[1]*m1
+	// (*)	adds	x12,x12,x6	// discarded
+	// (*)	As for removal of first multiplication and addition
+	//	instructions. The outcome of first addition is
+	//	guaranteed to be zero, which leaves two computationally
+	//	significant outcomes: it either carries or not. Then
+	//	question is when does it carry? Is there alternative
+	//	way to deduce it? If you follow operations, you can
+	//	observe that condition for carry is quite simple:
+	//	x6 being non-zero. So that carry can be calculated
+	//	by adding -1 to x6. That's what next instruction does.
+	subs	xzr,x6,#1		// (*)
+	umulh	x17,x14,x15
+	adc	x13,x13,xzr
+	cbz	x21,L1st_skip
+
+L1st:
+	ldr	x8,[x1],#8
+	adds	x6,x10,x7
+	sub	x21,x21,#8		// j--
+	adc	x7,x11,xzr
+
+	ldr	x14,[x3],#8
+	adds	x12,x16,x13
+	mul	x10,x8,x9		// ap[j]*bp[0]
+	adc	x13,x17,xzr
+	umulh	x11,x8,x9
+
+	adds	x12,x12,x6
+	mul	x16,x14,x15		// np[j]*m1
+	adc	x13,x13,xzr
+	umulh	x17,x14,x15
+	str	x12,[x22],#8		// tp[j-1]
+	cbnz	x21,L1st
+
+L1st_skip:
+	adds	x6,x10,x7
+	sub	x1,x1,x5		// rewind x1
+	adc	x7,x11,xzr
+
+	adds	x12,x16,x13
+	sub	x3,x3,x5		// rewind x3
+	adc	x13,x17,xzr
+
+	adds	x12,x12,x6
+	sub	x20,x5,#8		// i=num-1
+	adcs	x13,x13,x7
+
+	adc	x19,xzr,xzr		// upmost overflow bit
+	stp	x12,x13,[x22]
+
+Louter:
+	ldr	x9,[x2],#8		// bp[i]
+	ldp	x7,x8,[x1],#16
+	ldr	x23,[sp]		// tp[0]
+	add	x22,sp,#8
+
+	mul	x6,x7,x9		// ap[0]*bp[i]
+	sub	x21,x5,#16		// j=num-2
+	umulh	x7,x7,x9
+	ldp	x13,x14,[x3],#16
+	mul	x10,x8,x9		// ap[1]*bp[i]
+	adds	x6,x6,x23
+	umulh	x11,x8,x9
+	adc	x7,x7,xzr
+
+	mul	x15,x6,x4
+	sub	x20,x20,#8		// i--
+
+	// (*)	mul	x12,x13,x15	// np[0]*m1
+	umulh	x13,x13,x15
+	mul	x16,x14,x15		// np[1]*m1
+	// (*)	adds	x12,x12,x6
+	subs	xzr,x6,#1		// (*)
+	umulh	x17,x14,x15
+	cbz	x21,Linner_skip
+
+Linner:
+	ldr	x8,[x1],#8
+	adc	x13,x13,xzr
+	ldr	x23,[x22],#8		// tp[j]
+	adds	x6,x10,x7
+	sub	x21,x21,#8		// j--
+	adc	x7,x11,xzr
+
+	adds	x12,x16,x13
+	ldr	x14,[x3],#8
+	adc	x13,x17,xzr
+
+	mul	x10,x8,x9		// ap[j]*bp[i]
+	adds	x6,x6,x23
+	umulh	x11,x8,x9
+	adc	x7,x7,xzr
+
+	mul	x16,x14,x15		// np[j]*m1
+	adds	x12,x12,x6
+	umulh	x17,x14,x15
+	str	x12,[x22,#-16]		// tp[j-1]
+	cbnz	x21,Linner
+
+Linner_skip:
+	ldr	x23,[x22],#8		// tp[j]
+	adc	x13,x13,xzr
+	adds	x6,x10,x7
+	sub	x1,x1,x5		// rewind x1
+	adc	x7,x11,xzr
+
+	adds	x12,x16,x13
+	sub	x3,x3,x5		// rewind x3
+	adcs	x13,x17,x19
+	adc	x19,xzr,xzr
+
+	adds	x6,x6,x23
+	adc	x7,x7,xzr
+
+	adds	x12,x12,x6
+	adcs	x13,x13,x7
+	adc	x19,x19,xzr		// upmost overflow bit
+	stp	x12,x13,[x22,#-16]
+
+	cbnz	x20,Louter
+
+	// Final step. We see if result is larger than modulus, and
+	// if it is, subtract the modulus. But comparison implies
+	// subtraction. So we subtract modulus, see if it borrowed,
+	// and conditionally copy original value.
+	ldr	x23,[sp]		// tp[0]
+	add	x22,sp,#8
+	ldr	x14,[x3],#8		// np[0]
+	subs	x21,x5,#8		// j=num-1 and clear borrow
+	mov	x1,x0
+Lsub:
+	sbcs	x8,x23,x14		// tp[j]-np[j]
+	ldr	x23,[x22],#8
+	sub	x21,x21,#8		// j--
+	ldr	x14,[x3],#8
+	str	x8,[x1],#8		// rp[j]=tp[j]-np[j]
+	cbnz	x21,Lsub
+
+	sbcs	x8,x23,x14
+	sbcs	x19,x19,xzr		// did it borrow?
+	str	x8,[x1],#8		// rp[num-1]
+
+	ldr	x23,[sp]		// tp[0]
+	add	x22,sp,#8
+	ldr	x8,[x0],#8		// rp[0]
+	sub	x5,x5,#8		// num--
+	nop
+Lcond_copy:
+	sub	x5,x5,#8		// num--
+	csel	x14,x23,x8,lo		// did it borrow?
+	ldr	x23,[x22],#8
+	ldr	x8,[x0],#8
+	str	xzr,[x22,#-16]		// wipe tp
+	str	x14,[x0,#-16]
+	cbnz	x5,Lcond_copy
+
+	csel	x14,x23,x8,lo
+	str	xzr,[x22,#-8]		// wipe tp
+	str	x14,[x0,#-8]
+
+	ldp	x19,x20,[x29,#16]
+	mov	sp,x29
+	ldp	x21,x22,[x29,#32]
+	mov	x0,#1
+	ldp	x23,x24,[x29,#48]
+	ldr	x29,[sp],#64
+	AARCH64_VALIDATE_LINK_REGISTER
+	ret
+
+.def __bn_sqr8x_mont
+   .type 32
+.endef
+.align	5
+__bn_sqr8x_mont:
+	// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_sqr8x_mont is jumped to
+	// only from bn_mul_mont which has already signed the return address.
+	cmp	x1,x2
+	b.ne	__bn_mul4x_mont
+Lsqr8x_mont:
+	stp	x29,x30,[sp,#-128]!
+	add	x29,sp,#0
+	stp	x19,x20,[sp,#16]
+	stp	x21,x22,[sp,#32]
+	stp	x23,x24,[sp,#48]
+	stp	x25,x26,[sp,#64]
+	stp	x27,x28,[sp,#80]
+	stp	x0,x3,[sp,#96]	// offload rp and np
+
+	ldp	x6,x7,[x1,#8*0]
+	ldp	x8,x9,[x1,#8*2]
+	ldp	x10,x11,[x1,#8*4]
+	ldp	x12,x13,[x1,#8*6]
+
+	sub	x2,sp,x5,lsl#4
+	lsl	x5,x5,#3
+	ldr	x4,[x4]		// *n0
+	mov	sp,x2			// alloca
+	sub	x27,x5,#8*8
+	b	Lsqr8x_zero_start
+
+Lsqr8x_zero:
+	sub	x27,x27,#8*8
+	stp	xzr,xzr,[x2,#8*0]
+	stp	xzr,xzr,[x2,#8*2]
+	stp	xzr,xzr,[x2,#8*4]
+	stp	xzr,xzr,[x2,#8*6]
+Lsqr8x_zero_start:
+	stp	xzr,xzr,[x2,#8*8]
+	stp	xzr,xzr,[x2,#8*10]
+	stp	xzr,xzr,[x2,#8*12]
+	stp	xzr,xzr,[x2,#8*14]
+	add	x2,x2,#8*16
+	cbnz	x27,Lsqr8x_zero
+
+	add	x3,x1,x5
+	add	x1,x1,#8*8
+	mov	x19,xzr
+	mov	x20,xzr
+	mov	x21,xzr
+	mov	x22,xzr
+	mov	x23,xzr
+	mov	x24,xzr
+	mov	x25,xzr
+	mov	x26,xzr
+	mov	x2,sp
+	str	x4,[x29,#112]		// offload n0
+
+	// Multiply everything but a[i]*a[i]
+.align	4
+Lsqr8x_outer_loop:
+        //                                                 a[1]a[0]	(i)
+        //                                             a[2]a[0]
+        //                                         a[3]a[0]
+        //                                     a[4]a[0]
+        //                                 a[5]a[0]
+        //                             a[6]a[0]
+        //                         a[7]a[0]
+        //                                         a[2]a[1]		(ii)
+        //                                     a[3]a[1]
+        //                                 a[4]a[1]
+        //                             a[5]a[1]
+        //                         a[6]a[1]
+        //                     a[7]a[1]
+        //                                 a[3]a[2]			(iii)
+        //                             a[4]a[2]
+        //                         a[5]a[2]
+        //                     a[6]a[2]
+        //                 a[7]a[2]
+        //                         a[4]a[3]				(iv)
+        //                     a[5]a[3]
+        //                 a[6]a[3]
+        //             a[7]a[3]
+        //                 a[5]a[4]					(v)
+        //             a[6]a[4]
+        //         a[7]a[4]
+        //         a[6]a[5]						(vi)
+        //     a[7]a[5]
+        // a[7]a[6]							(vii)
+
+	mul	x14,x7,x6		// lo(a[1..7]*a[0])		(i)
+	mul	x15,x8,x6
+	mul	x16,x9,x6
+	mul	x17,x10,x6
+	adds	x20,x20,x14		// t[1]+lo(a[1]*a[0])
+	mul	x14,x11,x6
+	adcs	x21,x21,x15
+	mul	x15,x12,x6
+	adcs	x22,x22,x16
+	mul	x16,x13,x6
+	adcs	x23,x23,x17
+	umulh	x17,x7,x6		// hi(a[1..7]*a[0])
+	adcs	x24,x24,x14
+	umulh	x14,x8,x6
+	adcs	x25,x25,x15
+	umulh	x15,x9,x6
+	adcs	x26,x26,x16
+	umulh	x16,x10,x6
+	stp	x19,x20,[x2],#8*2	// t[0..1]
+	adc	x19,xzr,xzr		// t[8]
+	adds	x21,x21,x17		// t[2]+lo(a[1]*a[0])
+	umulh	x17,x11,x6
+	adcs	x22,x22,x14
+	umulh	x14,x12,x6
+	adcs	x23,x23,x15
+	umulh	x15,x13,x6
+	adcs	x24,x24,x16
+	mul	x16,x8,x7		// lo(a[2..7]*a[1])		(ii)
+	adcs	x25,x25,x17
+	mul	x17,x9,x7
+	adcs	x26,x26,x14
+	mul	x14,x10,x7
+	adc	x19,x19,x15
+
+	mul	x15,x11,x7
+	adds	x22,x22,x16
+	mul	x16,x12,x7
+	adcs	x23,x23,x17
+	mul	x17,x13,x7
+	adcs	x24,x24,x14
+	umulh	x14,x8,x7		// hi(a[2..7]*a[1])
+	adcs	x25,x25,x15
+	umulh	x15,x9,x7
+	adcs	x26,x26,x16
+	umulh	x16,x10,x7
+	adcs	x19,x19,x17
+	umulh	x17,x11,x7
+	stp	x21,x22,[x2],#8*2	// t[2..3]
+	adc	x20,xzr,xzr		// t[9]
+	adds	x23,x23,x14
+	umulh	x14,x12,x7
+	adcs	x24,x24,x15
+	umulh	x15,x13,x7
+	adcs	x25,x25,x16
+	mul	x16,x9,x8		// lo(a[3..7]*a[2])		(iii)
+	adcs	x26,x26,x17
+	mul	x17,x10,x8
+	adcs	x19,x19,x14
+	mul	x14,x11,x8
+	adc	x20,x20,x15
+
+	mul	x15,x12,x8
+	adds	x24,x24,x16
+	mul	x16,x13,x8
+	adcs	x25,x25,x17
+	umulh	x17,x9,x8		// hi(a[3..7]*a[2])
+	adcs	x26,x26,x14
+	umulh	x14,x10,x8
+	adcs	x19,x19,x15
+	umulh	x15,x11,x8
+	adcs	x20,x20,x16
+	umulh	x16,x12,x8
+	stp	x23,x24,[x2],#8*2	// t[4..5]
+	adc	x21,xzr,xzr		// t[10]
+	adds	x25,x25,x17
+	umulh	x17,x13,x8
+	adcs	x26,x26,x14
+	mul	x14,x10,x9		// lo(a[4..7]*a[3])		(iv)
+	adcs	x19,x19,x15
+	mul	x15,x11,x9
+	adcs	x20,x20,x16
+	mul	x16,x12,x9
+	adc	x21,x21,x17
+
+	mul	x17,x13,x9
+	adds	x26,x26,x14
+	umulh	x14,x10,x9		// hi(a[4..7]*a[3])
+	adcs	x19,x19,x15
+	umulh	x15,x11,x9
+	adcs	x20,x20,x16
+	umulh	x16,x12,x9
+	adcs	x21,x21,x17
+	umulh	x17,x13,x9
+	stp	x25,x26,[x2],#8*2	// t[6..7]
+	adc	x22,xzr,xzr		// t[11]
+	adds	x19,x19,x14
+	mul	x14,x11,x10		// lo(a[5..7]*a[4])		(v)
+	adcs	x20,x20,x15
+	mul	x15,x12,x10
+	adcs	x21,x21,x16
+	mul	x16,x13,x10
+	adc	x22,x22,x17
+
+	umulh	x17,x11,x10		// hi(a[5..7]*a[4])
+	adds	x20,x20,x14
+	umulh	x14,x12,x10
+	adcs	x21,x21,x15
+	umulh	x15,x13,x10
+	adcs	x22,x22,x16
+	mul	x16,x12,x11		// lo(a[6..7]*a[5])		(vi)
+	adc	x23,xzr,xzr		// t[12]
+	adds	x21,x21,x17
+	mul	x17,x13,x11
+	adcs	x22,x22,x14
+	umulh	x14,x12,x11		// hi(a[6..7]*a[5])
+	adc	x23,x23,x15
+
+	umulh	x15,x13,x11
+	adds	x22,x22,x16
+	mul	x16,x13,x12		// lo(a[7]*a[6])		(vii)
+	adcs	x23,x23,x17
+	umulh	x17,x13,x12		// hi(a[7]*a[6])
+	adc	x24,xzr,xzr		// t[13]
+	adds	x23,x23,x14
+	sub	x27,x3,x1	// done yet?
+	adc	x24,x24,x15
+
+	adds	x24,x24,x16
+	sub	x14,x3,x5	// rewinded ap
+	adc	x25,xzr,xzr		// t[14]
+	add	x25,x25,x17
+
+	cbz	x27,Lsqr8x_outer_break
+
+	mov	x4,x6
+	ldp	x6,x7,[x2,#8*0]
+	ldp	x8,x9,[x2,#8*2]
+	ldp	x10,x11,[x2,#8*4]
+	ldp	x12,x13,[x2,#8*6]
+	adds	x19,x19,x6
+	adcs	x20,x20,x7
+	ldp	x6,x7,[x1,#8*0]
+	adcs	x21,x21,x8
+	adcs	x22,x22,x9
+	ldp	x8,x9,[x1,#8*2]
+	adcs	x23,x23,x10
+	adcs	x24,x24,x11
+	ldp	x10,x11,[x1,#8*4]
+	adcs	x25,x25,x12
+	mov	x0,x1
+	adcs	x26,xzr,x13
+	ldp	x12,x13,[x1,#8*6]
+	add	x1,x1,#8*8
+	//adc	x28,xzr,xzr		// moved below
+	mov	x27,#-8*8
+
+	//                                                         a[8]a[0]
+	//                                                     a[9]a[0]
+	//                                                 a[a]a[0]
+	//                                             a[b]a[0]
+	//                                         a[c]a[0]
+	//                                     a[d]a[0]
+	//                                 a[e]a[0]
+	//                             a[f]a[0]
+	//                                                     a[8]a[1]
+	//                         a[f]a[1]........................
+	//                                                 a[8]a[2]
+	//                     a[f]a[2]........................
+	//                                             a[8]a[3]
+	//                 a[f]a[3]........................
+	//                                         a[8]a[4]
+	//             a[f]a[4]........................
+	//                                     a[8]a[5]
+	//         a[f]a[5]........................
+	//                                 a[8]a[6]
+	//     a[f]a[6]........................
+	//                             a[8]a[7]
+	// a[f]a[7]........................
+Lsqr8x_mul:
+	mul	x14,x6,x4
+	adc	x28,xzr,xzr		// carry bit, modulo-scheduled
+	mul	x15,x7,x4
+	add	x27,x27,#8
+	mul	x16,x8,x4
+	mul	x17,x9,x4
+	adds	x19,x19,x14
+	mul	x14,x10,x4
+	adcs	x20,x20,x15
+	mul	x15,x11,x4
+	adcs	x21,x21,x16
+	mul	x16,x12,x4
+	adcs	x22,x22,x17
+	mul	x17,x13,x4
+	adcs	x23,x23,x14
+	umulh	x14,x6,x4
+	adcs	x24,x24,x15
+	umulh	x15,x7,x4
+	adcs	x25,x25,x16
+	umulh	x16,x8,x4
+	adcs	x26,x26,x17
+	umulh	x17,x9,x4
+	adc	x28,x28,xzr
+	str	x19,[x2],#8
+	adds	x19,x20,x14
+	umulh	x14,x10,x4
+	adcs	x20,x21,x15
+	umulh	x15,x11,x4
+	adcs	x21,x22,x16
+	umulh	x16,x12,x4
+	adcs	x22,x23,x17
+	umulh	x17,x13,x4
+	ldr	x4,[x0,x27]
+	adcs	x23,x24,x14
+	adcs	x24,x25,x15
+	adcs	x25,x26,x16
+	adcs	x26,x28,x17
+	//adc	x28,xzr,xzr		// moved above
+	cbnz	x27,Lsqr8x_mul
+					// note that carry flag is guaranteed
+					// to be zero at this point
+	cmp	x1,x3		// done yet?
+	b.eq	Lsqr8x_break
+
+	ldp	x6,x7,[x2,#8*0]
+	ldp	x8,x9,[x2,#8*2]
+	ldp	x10,x11,[x2,#8*4]
+	ldp	x12,x13,[x2,#8*6]
+	adds	x19,x19,x6
+	ldr	x4,[x0,#-8*8]
+	adcs	x20,x20,x7
+	ldp	x6,x7,[x1,#8*0]
+	adcs	x21,x21,x8
+	adcs	x22,x22,x9
+	ldp	x8,x9,[x1,#8*2]
+	adcs	x23,x23,x10
+	adcs	x24,x24,x11
+	ldp	x10,x11,[x1,#8*4]
+	adcs	x25,x25,x12
+	mov	x27,#-8*8
+	adcs	x26,x26,x13
+	ldp	x12,x13,[x1,#8*6]
+	add	x1,x1,#8*8
+	//adc	x28,xzr,xzr		// moved above
+	b	Lsqr8x_mul
+
+.align	4
+Lsqr8x_break:
+	ldp	x6,x7,[x0,#8*0]
+	add	x1,x0,#8*8
+	ldp	x8,x9,[x0,#8*2]
+	sub	x14,x3,x1		// is it last iteration?
+	ldp	x10,x11,[x0,#8*4]
+	sub	x15,x2,x14
+	ldp	x12,x13,[x0,#8*6]
+	cbz	x14,Lsqr8x_outer_loop
+
+	stp	x19,x20,[x2,#8*0]
+	ldp	x19,x20,[x15,#8*0]
+	stp	x21,x22,[x2,#8*2]
+	ldp	x21,x22,[x15,#8*2]
+	stp	x23,x24,[x2,#8*4]
+	ldp	x23,x24,[x15,#8*4]
+	stp	x25,x26,[x2,#8*6]
+	mov	x2,x15
+	ldp	x25,x26,[x15,#8*6]
+	b	Lsqr8x_outer_loop
+
+.align	4
+Lsqr8x_outer_break:
+	// Now multiply above result by 2 and add a[n-1]*a[n-1]|...|a[0]*a[0]
+	ldp	x7,x9,[x14,#8*0]	// recall that x14 is &a[0]
+	ldp	x15,x16,[sp,#8*1]
+	ldp	x11,x13,[x14,#8*2]
+	add	x1,x14,#8*4
+	ldp	x17,x14,[sp,#8*3]
+
+	stp	x19,x20,[x2,#8*0]
+	mul	x19,x7,x7
+	stp	x21,x22,[x2,#8*2]
+	umulh	x7,x7,x7
+	stp	x23,x24,[x2,#8*4]
+	mul	x8,x9,x9
+	stp	x25,x26,[x2,#8*6]
+	mov	x2,sp
+	umulh	x9,x9,x9
+	adds	x20,x7,x15,lsl#1
+	extr	x15,x16,x15,#63
+	sub	x27,x5,#8*4
+
+Lsqr4x_shift_n_add:
+	adcs	x21,x8,x15
+	extr	x16,x17,x16,#63
+	sub	x27,x27,#8*4
+	adcs	x22,x9,x16
+	ldp	x15,x16,[x2,#8*5]
+	mul	x10,x11,x11
+	ldp	x7,x9,[x1],#8*2
+	umulh	x11,x11,x11
+	mul	x12,x13,x13
+	umulh	x13,x13,x13
+	extr	x17,x14,x17,#63
+	stp	x19,x20,[x2,#8*0]
+	adcs	x23,x10,x17
+	extr	x14,x15,x14,#63
+	stp	x21,x22,[x2,#8*2]
+	adcs	x24,x11,x14
+	ldp	x17,x14,[x2,#8*7]
+	extr	x15,x16,x15,#63
+	adcs	x25,x12,x15
+	extr	x16,x17,x16,#63
+	adcs	x26,x13,x16
+	ldp	x15,x16,[x2,#8*9]
+	mul	x6,x7,x7
+	ldp	x11,x13,[x1],#8*2
+	umulh	x7,x7,x7
+	mul	x8,x9,x9
+	umulh	x9,x9,x9
+	stp	x23,x24,[x2,#8*4]
+	extr	x17,x14,x17,#63
+	stp	x25,x26,[x2,#8*6]
+	add	x2,x2,#8*8
+	adcs	x19,x6,x17
+	extr	x14,x15,x14,#63
+	adcs	x20,x7,x14
+	ldp	x17,x14,[x2,#8*3]
+	extr	x15,x16,x15,#63
+	cbnz	x27,Lsqr4x_shift_n_add
+	ldp	x1,x4,[x29,#104]	// pull np and n0
+
+	adcs	x21,x8,x15
+	extr	x16,x17,x16,#63
+	adcs	x22,x9,x16
+	ldp	x15,x16,[x2,#8*5]
+	mul	x10,x11,x11
+	umulh	x11,x11,x11
+	stp	x19,x20,[x2,#8*0]
+	mul	x12,x13,x13
+	umulh	x13,x13,x13
+	stp	x21,x22,[x2,#8*2]
+	extr	x17,x14,x17,#63
+	adcs	x23,x10,x17
+	extr	x14,x15,x14,#63
+	ldp	x19,x20,[sp,#8*0]
+	adcs	x24,x11,x14
+	extr	x15,x16,x15,#63
+	ldp	x6,x7,[x1,#8*0]
+	adcs	x25,x12,x15
+	extr	x16,xzr,x16,#63
+	ldp	x8,x9,[x1,#8*2]
+	adc	x26,x13,x16
+	ldp	x10,x11,[x1,#8*4]
+
+	// Reduce by 512 bits per iteration
+	mul	x28,x4,x19		// t[0]*n0
+	ldp	x12,x13,[x1,#8*6]
+	add	x3,x1,x5
+	ldp	x21,x22,[sp,#8*2]
+	stp	x23,x24,[x2,#8*4]
+	ldp	x23,x24,[sp,#8*4]
+	stp	x25,x26,[x2,#8*6]
+	ldp	x25,x26,[sp,#8*6]
+	add	x1,x1,#8*8
+	mov	x30,xzr		// initial top-most carry
+	mov	x2,sp
+	mov	x27,#8
+
+Lsqr8x_reduction:
+	// (*)	mul	x14,x6,x28	// lo(n[0-7])*lo(t[0]*n0)
+	mul	x15,x7,x28
+	sub	x27,x27,#1
+	mul	x16,x8,x28
+	str	x28,[x2],#8		// put aside t[0]*n0 for tail processing
+	mul	x17,x9,x28
+	// (*)	adds	xzr,x19,x14
+	subs	xzr,x19,#1		// (*)
+	mul	x14,x10,x28
+	adcs	x19,x20,x15
+	mul	x15,x11,x28
+	adcs	x20,x21,x16
+	mul	x16,x12,x28
+	adcs	x21,x22,x17
+	mul	x17,x13,x28
+	adcs	x22,x23,x14
+	umulh	x14,x6,x28		// hi(n[0-7])*lo(t[0]*n0)
+	adcs	x23,x24,x15
+	umulh	x15,x7,x28
+	adcs	x24,x25,x16
+	umulh	x16,x8,x28
+	adcs	x25,x26,x17
+	umulh	x17,x9,x28
+	adc	x26,xzr,xzr
+	adds	x19,x19,x14
+	umulh	x14,x10,x28
+	adcs	x20,x20,x15
+	umulh	x15,x11,x28
+	adcs	x21,x21,x16
+	umulh	x16,x12,x28
+	adcs	x22,x22,x17
+	umulh	x17,x13,x28
+	mul	x28,x4,x19		// next t[0]*n0
+	adcs	x23,x23,x14
+	adcs	x24,x24,x15
+	adcs	x25,x25,x16
+	adc	x26,x26,x17
+	cbnz	x27,Lsqr8x_reduction
+
+	ldp	x14,x15,[x2,#8*0]
+	ldp	x16,x17,[x2,#8*2]
+	mov	x0,x2
+	sub	x27,x3,x1	// done yet?
+	adds	x19,x19,x14
+	adcs	x20,x20,x15
+	ldp	x14,x15,[x2,#8*4]
+	adcs	x21,x21,x16
+	adcs	x22,x22,x17
+	ldp	x16,x17,[x2,#8*6]
+	adcs	x23,x23,x14
+	adcs	x24,x24,x15
+	adcs	x25,x25,x16
+	adcs	x26,x26,x17
+	//adc	x28,xzr,xzr		// moved below
+	cbz	x27,Lsqr8x8_post_condition
+
+	ldr	x4,[x2,#-8*8]
+	ldp	x6,x7,[x1,#8*0]
+	ldp	x8,x9,[x1,#8*2]
+	ldp	x10,x11,[x1,#8*4]
+	mov	x27,#-8*8
+	ldp	x12,x13,[x1,#8*6]
+	add	x1,x1,#8*8
+
+Lsqr8x_tail:
+	mul	x14,x6,x4
+	adc	x28,xzr,xzr		// carry bit, modulo-scheduled
+	mul	x15,x7,x4
+	add	x27,x27,#8
+	mul	x16,x8,x4
+	mul	x17,x9,x4
+	adds	x19,x19,x14
+	mul	x14,x10,x4
+	adcs	x20,x20,x15
+	mul	x15,x11,x4
+	adcs	x21,x21,x16
+	mul	x16,x12,x4
+	adcs	x22,x22,x17
+	mul	x17,x13,x4
+	adcs	x23,x23,x14
+	umulh	x14,x6,x4
+	adcs	x24,x24,x15
+	umulh	x15,x7,x4
+	adcs	x25,x25,x16
+	umulh	x16,x8,x4
+	adcs	x26,x26,x17
+	umulh	x17,x9,x4
+	adc	x28,x28,xzr
+	str	x19,[x2],#8
+	adds	x19,x20,x14
+	umulh	x14,x10,x4
+	adcs	x20,x21,x15
+	umulh	x15,x11,x4
+	adcs	x21,x22,x16
+	umulh	x16,x12,x4
+	adcs	x22,x23,x17
+	umulh	x17,x13,x4
+	ldr	x4,[x0,x27]
+	adcs	x23,x24,x14
+	adcs	x24,x25,x15
+	adcs	x25,x26,x16
+	adcs	x26,x28,x17
+	//adc	x28,xzr,xzr		// moved above
+	cbnz	x27,Lsqr8x_tail
+					// note that carry flag is guaranteed
+					// to be zero at this point
+	ldp	x6,x7,[x2,#8*0]
+	sub	x27,x3,x1	// done yet?
+	sub	x16,x3,x5	// rewinded np
+	ldp	x8,x9,[x2,#8*2]
+	ldp	x10,x11,[x2,#8*4]
+	ldp	x12,x13,[x2,#8*6]
+	cbz	x27,Lsqr8x_tail_break
+
+	ldr	x4,[x0,#-8*8]
+	adds	x19,x19,x6
+	adcs	x20,x20,x7
+	ldp	x6,x7,[x1,#8*0]
+	adcs	x21,x21,x8
+	adcs	x22,x22,x9
+	ldp	x8,x9,[x1,#8*2]
+	adcs	x23,x23,x10
+	adcs	x24,x24,x11
+	ldp	x10,x11,[x1,#8*4]
+	adcs	x25,x25,x12
+	mov	x27,#-8*8
+	adcs	x26,x26,x13
+	ldp	x12,x13,[x1,#8*6]
+	add	x1,x1,#8*8
+	//adc	x28,xzr,xzr		// moved above
+	b	Lsqr8x_tail
+
+.align	4
+Lsqr8x_tail_break:
+	ldr	x4,[x29,#112]		// pull n0
+	add	x27,x2,#8*8		// end of current t[num] window
+
+	subs	xzr,x30,#1		// "move" top-most carry to carry bit
+	adcs	x14,x19,x6
+	adcs	x15,x20,x7
+	ldp	x19,x20,[x0,#8*0]
+	adcs	x21,x21,x8
+	ldp	x6,x7,[x16,#8*0]	// recall that x16 is &n[0]
+	adcs	x22,x22,x9
+	ldp	x8,x9,[x16,#8*2]
+	adcs	x23,x23,x10
+	adcs	x24,x24,x11
+	ldp	x10,x11,[x16,#8*4]
+	adcs	x25,x25,x12
+	adcs	x26,x26,x13
+	ldp	x12,x13,[x16,#8*6]
+	add	x1,x16,#8*8
+	adc	x30,xzr,xzr	// top-most carry
+	mul	x28,x4,x19
+	stp	x14,x15,[x2,#8*0]
+	stp	x21,x22,[x2,#8*2]
+	ldp	x21,x22,[x0,#8*2]
+	stp	x23,x24,[x2,#8*4]
+	ldp	x23,x24,[x0,#8*4]
+	cmp	x27,x29		// did we hit the bottom?
+	stp	x25,x26,[x2,#8*6]
+	mov	x2,x0			// slide the window
+	ldp	x25,x26,[x0,#8*6]
+	mov	x27,#8
+	b.ne	Lsqr8x_reduction
+
+	// Final step. We see if result is larger than modulus, and
+	// if it is, subtract the modulus. But comparison implies
+	// subtraction. So we subtract modulus, see if it borrowed,
+	// and conditionally copy original value.
+	ldr	x0,[x29,#96]		// pull rp
+	add	x2,x2,#8*8
+	subs	x14,x19,x6
+	sbcs	x15,x20,x7
+	sub	x27,x5,#8*8
+	mov	x3,x0		// x0 copy
+
+Lsqr8x_sub:
+	sbcs	x16,x21,x8
+	ldp	x6,x7,[x1,#8*0]
+	sbcs	x17,x22,x9
+	stp	x14,x15,[x0,#8*0]
+	sbcs	x14,x23,x10
+	ldp	x8,x9,[x1,#8*2]
+	sbcs	x15,x24,x11
+	stp	x16,x17,[x0,#8*2]
+	sbcs	x16,x25,x12
+	ldp	x10,x11,[x1,#8*4]
+	sbcs	x17,x26,x13
+	ldp	x12,x13,[x1,#8*6]
+	add	x1,x1,#8*8
+	ldp	x19,x20,[x2,#8*0]
+	sub	x27,x27,#8*8
+	ldp	x21,x22,[x2,#8*2]
+	ldp	x23,x24,[x2,#8*4]
+	ldp	x25,x26,[x2,#8*6]
+	add	x2,x2,#8*8
+	stp	x14,x15,[x0,#8*4]
+	sbcs	x14,x19,x6
+	stp	x16,x17,[x0,#8*6]
+	add	x0,x0,#8*8
+	sbcs	x15,x20,x7
+	cbnz	x27,Lsqr8x_sub
+
+	sbcs	x16,x21,x8
+	mov	x2,sp
+	add	x1,sp,x5
+	ldp	x6,x7,[x3,#8*0]
+	sbcs	x17,x22,x9
+	stp	x14,x15,[x0,#8*0]
+	sbcs	x14,x23,x10
+	ldp	x8,x9,[x3,#8*2]
+	sbcs	x15,x24,x11
+	stp	x16,x17,[x0,#8*2]
+	sbcs	x16,x25,x12
+	ldp	x19,x20,[x1,#8*0]
+	sbcs	x17,x26,x13
+	ldp	x21,x22,[x1,#8*2]
+	sbcs	xzr,x30,xzr	// did it borrow?
+	ldr	x30,[x29,#8]		// pull return address
+	stp	x14,x15,[x0,#8*4]
+	stp	x16,x17,[x0,#8*6]
+
+	sub	x27,x5,#8*4
+Lsqr4x_cond_copy:
+	sub	x27,x27,#8*4
+	csel	x14,x19,x6,lo
+	stp	xzr,xzr,[x2,#8*0]
+	csel	x15,x20,x7,lo
+	ldp	x6,x7,[x3,#8*4]
+	ldp	x19,x20,[x1,#8*4]
+	csel	x16,x21,x8,lo
+	stp	xzr,xzr,[x2,#8*2]
+	add	x2,x2,#8*4
+	csel	x17,x22,x9,lo
+	ldp	x8,x9,[x3,#8*6]
+	ldp	x21,x22,[x1,#8*6]
+	add	x1,x1,#8*4
+	stp	x14,x15,[x3,#8*0]
+	stp	x16,x17,[x3,#8*2]
+	add	x3,x3,#8*4
+	stp	xzr,xzr,[x1,#8*0]
+	stp	xzr,xzr,[x1,#8*2]
+	cbnz	x27,Lsqr4x_cond_copy
+
+	csel	x14,x19,x6,lo
+	stp	xzr,xzr,[x2,#8*0]
+	csel	x15,x20,x7,lo
+	stp	xzr,xzr,[x2,#8*2]
+	csel	x16,x21,x8,lo
+	csel	x17,x22,x9,lo
+	stp	x14,x15,[x3,#8*0]
+	stp	x16,x17,[x3,#8*2]
+
+	b	Lsqr8x_done
+
+.align	4
+Lsqr8x8_post_condition:
+	adc	x28,xzr,xzr
+	ldr	x30,[x29,#8]		// pull return address
+	// x19-7,x28 hold result, x6-7 hold modulus
+	subs	x6,x19,x6
+	ldr	x1,[x29,#96]		// pull rp
+	sbcs	x7,x20,x7
+	stp	xzr,xzr,[sp,#8*0]
+	sbcs	x8,x21,x8
+	stp	xzr,xzr,[sp,#8*2]
+	sbcs	x9,x22,x9
+	stp	xzr,xzr,[sp,#8*4]
+	sbcs	x10,x23,x10
+	stp	xzr,xzr,[sp,#8*6]
+	sbcs	x11,x24,x11
+	stp	xzr,xzr,[sp,#8*8]
+	sbcs	x12,x25,x12
+	stp	xzr,xzr,[sp,#8*10]
+	sbcs	x13,x26,x13
+	stp	xzr,xzr,[sp,#8*12]
+	sbcs	x28,x28,xzr	// did it borrow?
+	stp	xzr,xzr,[sp,#8*14]
+
+	// x6-7 hold result-modulus
+	csel	x6,x19,x6,lo
+	csel	x7,x20,x7,lo
+	csel	x8,x21,x8,lo
+	csel	x9,x22,x9,lo
+	stp	x6,x7,[x1,#8*0]
+	csel	x10,x23,x10,lo
+	csel	x11,x24,x11,lo
+	stp	x8,x9,[x1,#8*2]
+	csel	x12,x25,x12,lo
+	csel	x13,x26,x13,lo
+	stp	x10,x11,[x1,#8*4]
+	stp	x12,x13,[x1,#8*6]
+
+Lsqr8x_done:
+	ldp	x19,x20,[x29,#16]
+	mov	sp,x29
+	ldp	x21,x22,[x29,#32]
+	mov	x0,#1
+	ldp	x23,x24,[x29,#48]
+	ldp	x25,x26,[x29,#64]
+	ldp	x27,x28,[x29,#80]
+	ldr	x29,[sp],#128
+	// x30 is popped earlier
+	AARCH64_VALIDATE_LINK_REGISTER
+	ret
+
+.def __bn_mul4x_mont
+   .type 32
+.endef
+.align	5
+__bn_mul4x_mont:
+	// Not adding AARCH64_SIGN_LINK_REGISTER here because __bn_mul4x_mont is jumped to
+	// only from bn_mul_mont or __bn_mul8x_mont which have already signed the
+	// return address.
+	stp	x29,x30,[sp,#-128]!
+	add	x29,sp,#0
+	stp	x19,x20,[sp,#16]
+	stp	x21,x22,[sp,#32]
+	stp	x23,x24,[sp,#48]
+	stp	x25,x26,[sp,#64]
+	stp	x27,x28,[sp,#80]
+
+	sub	x26,sp,x5,lsl#3
+	lsl	x5,x5,#3
+	ldr	x4,[x4]		// *n0
+	sub	sp,x26,#8*4		// alloca
+
+	add	x10,x2,x5
+	add	x27,x1,x5
+	stp	x0,x10,[x29,#96]	// offload rp and &b[num]
+
+	ldr	x24,[x2,#8*0]		// b[0]
+	ldp	x6,x7,[x1,#8*0]	// a[0..3]
+	ldp	x8,x9,[x1,#8*2]
+	add	x1,x1,#8*4
+	mov	x19,xzr
+	mov	x20,xzr
+	mov	x21,xzr
+	mov	x22,xzr
+	ldp	x14,x15,[x3,#8*0]	// n[0..3]
+	ldp	x16,x17,[x3,#8*2]
+	adds	x3,x3,#8*4		// clear carry bit
+	mov	x0,xzr
+	mov	x28,#0
+	mov	x26,sp
+
+Loop_mul4x_1st_reduction:
+	mul	x10,x6,x24		// lo(a[0..3]*b[0])
+	adc	x0,x0,xzr	// modulo-scheduled
+	mul	x11,x7,x24
+	add	x28,x28,#8
+	mul	x12,x8,x24
+	and	x28,x28,#31
+	mul	x13,x9,x24
+	adds	x19,x19,x10
+	umulh	x10,x6,x24		// hi(a[0..3]*b[0])
+	adcs	x20,x20,x11
+	mul	x25,x19,x4		// t[0]*n0
+	adcs	x21,x21,x12
+	umulh	x11,x7,x24
+	adcs	x22,x22,x13
+	umulh	x12,x8,x24
+	adc	x23,xzr,xzr
+	umulh	x13,x9,x24
+	ldr	x24,[x2,x28]		// next b[i] (or b[0])
+	adds	x20,x20,x10
+	// (*)	mul	x10,x14,x25	// lo(n[0..3]*t[0]*n0)
+	str	x25,[x26],#8		// put aside t[0]*n0 for tail processing
+	adcs	x21,x21,x11
+	mul	x11,x15,x25
+	adcs	x22,x22,x12
+	mul	x12,x16,x25
+	adc	x23,x23,x13		// can't overflow
+	mul	x13,x17,x25
+	// (*)	adds	xzr,x19,x10
+	subs	xzr,x19,#1		// (*)
+	umulh	x10,x14,x25		// hi(n[0..3]*t[0]*n0)
+	adcs	x19,x20,x11
+	umulh	x11,x15,x25
+	adcs	x20,x21,x12
+	umulh	x12,x16,x25
+	adcs	x21,x22,x13
+	umulh	x13,x17,x25
+	adcs	x22,x23,x0
+	adc	x0,xzr,xzr
+	adds	x19,x19,x10
+	sub	x10,x27,x1
+	adcs	x20,x20,x11
+	adcs	x21,x21,x12
+	adcs	x22,x22,x13
+	//adc	x0,x0,xzr
+	cbnz	x28,Loop_mul4x_1st_reduction
+
+	cbz	x10,Lmul4x4_post_condition
+
+	ldp	x6,x7,[x1,#8*0]	// a[4..7]
+	ldp	x8,x9,[x1,#8*2]
+	add	x1,x1,#8*4
+	ldr	x25,[sp]		// a[0]*n0
+	ldp	x14,x15,[x3,#8*0]	// n[4..7]
+	ldp	x16,x17,[x3,#8*2]
+	add	x3,x3,#8*4
+
+Loop_mul4x_1st_tail:
+	mul	x10,x6,x24		// lo(a[4..7]*b[i])
+	adc	x0,x0,xzr	// modulo-scheduled
+	mul	x11,x7,x24
+	add	x28,x28,#8
+	mul	x12,x8,x24
+	and	x28,x28,#31
+	mul	x13,x9,x24
+	adds	x19,x19,x10
+	umulh	x10,x6,x24		// hi(a[4..7]*b[i])
+	adcs	x20,x20,x11
+	umulh	x11,x7,x24
+	adcs	x21,x21,x12
+	umulh	x12,x8,x24
+	adcs	x22,x22,x13
+	umulh	x13,x9,x24
+	adc	x23,xzr,xzr
+	ldr	x24,[x2,x28]		// next b[i] (or b[0])
+	adds	x20,x20,x10
+	mul	x10,x14,x25		// lo(n[4..7]*a[0]*n0)
+	adcs	x21,x21,x11
+	mul	x11,x15,x25
+	adcs	x22,x22,x12
+	mul	x12,x16,x25
+	adc	x23,x23,x13		// can't overflow
+	mul	x13,x17,x25
+	adds	x19,x19,x10
+	umulh	x10,x14,x25		// hi(n[4..7]*a[0]*n0)
+	adcs	x20,x20,x11
+	umulh	x11,x15,x25
+	adcs	x21,x21,x12
+	umulh	x12,x16,x25
+	adcs	x22,x22,x13
+	adcs	x23,x23,x0
+	umulh	x13,x17,x25
+	adc	x0,xzr,xzr
+	ldr	x25,[sp,x28]		// next t[0]*n0
+	str	x19,[x26],#8		// result!!!
+	adds	x19,x20,x10
+	sub	x10,x27,x1		// done yet?
+	adcs	x20,x21,x11
+	adcs	x21,x22,x12
+	adcs	x22,x23,x13
+	//adc	x0,x0,xzr
+	cbnz	x28,Loop_mul4x_1st_tail
+
+	sub	x11,x27,x5	// rewinded x1
+	cbz	x10,Lmul4x_proceed
+
+	ldp	x6,x7,[x1,#8*0]
+	ldp	x8,x9,[x1,#8*2]
+	add	x1,x1,#8*4
+	ldp	x14,x15,[x3,#8*0]
+	ldp	x16,x17,[x3,#8*2]
+	add	x3,x3,#8*4
+	b	Loop_mul4x_1st_tail
+
+.align	5
+Lmul4x_proceed:
+	ldr	x24,[x2,#8*4]!		// *++b
+	adc	x30,x0,xzr
+	ldp	x6,x7,[x11,#8*0]	// a[0..3]
+	sub	x3,x3,x5		// rewind np
+	ldp	x8,x9,[x11,#8*2]
+	add	x1,x11,#8*4
+
+	stp	x19,x20,[x26,#8*0]	// result!!!
+	ldp	x19,x20,[sp,#8*4]	// t[0..3]
+	stp	x21,x22,[x26,#8*2]	// result!!!
+	ldp	x21,x22,[sp,#8*6]
+
+	ldp	x14,x15,[x3,#8*0]	// n[0..3]
+	mov	x26,sp
+	ldp	x16,x17,[x3,#8*2]
+	adds	x3,x3,#8*4		// clear carry bit
+	mov	x0,xzr
+
+.align	4
+Loop_mul4x_reduction:
+	mul	x10,x6,x24		// lo(a[0..3]*b[4])
+	adc	x0,x0,xzr	// modulo-scheduled
+	mul	x11,x7,x24
+	add	x28,x28,#8
+	mul	x12,x8,x24
+	and	x28,x28,#31
+	mul	x13,x9,x24
+	adds	x19,x19,x10
+	umulh	x10,x6,x24		// hi(a[0..3]*b[4])
+	adcs	x20,x20,x11
+	mul	x25,x19,x4		// t[0]*n0
+	adcs	x21,x21,x12
+	umulh	x11,x7,x24
+	adcs	x22,x22,x13
+	umulh	x12,x8,x24
+	adc	x23,xzr,xzr
+	umulh	x13,x9,x24
+	ldr	x24,[x2,x28]		// next b[i]
+	adds	x20,x20,x10
+	// (*)	mul	x10,x14,x25
+	str	x25,[x26],#8		// put aside t[0]*n0 for tail processing
+	adcs	x21,x21,x11
+	mul	x11,x15,x25		// lo(n[0..3]*t[0]*n0
+	adcs	x22,x22,x12
+	mul	x12,x16,x25
+	adc	x23,x23,x13		// can't overflow
+	mul	x13,x17,x25
+	// (*)	adds	xzr,x19,x10
+	subs	xzr,x19,#1		// (*)
+	umulh	x10,x14,x25		// hi(n[0..3]*t[0]*n0
+	adcs	x19,x20,x11
+	umulh	x11,x15,x25
+	adcs	x20,x21,x12
+	umulh	x12,x16,x25
+	adcs	x21,x22,x13
+	umulh	x13,x17,x25
+	adcs	x22,x23,x0
+	adc	x0,xzr,xzr
+	adds	x19,x19,x10
+	adcs	x20,x20,x11
+	adcs	x21,x21,x12
+	adcs	x22,x22,x13
+	//adc	x0,x0,xzr
+	cbnz	x28,Loop_mul4x_reduction
+
+	adc	x0,x0,xzr
+	ldp	x10,x11,[x26,#8*4]	// t[4..7]
+	ldp	x12,x13,[x26,#8*6]
+	ldp	x6,x7,[x1,#8*0]	// a[4..7]
+	ldp	x8,x9,[x1,#8*2]
+	add	x1,x1,#8*4
+	adds	x19,x19,x10
+	adcs	x20,x20,x11
+	adcs	x21,x21,x12
+	adcs	x22,x22,x13
+	//adc	x0,x0,xzr
+
+	ldr	x25,[sp]		// t[0]*n0
+	ldp	x14,x15,[x3,#8*0]	// n[4..7]
+	ldp	x16,x17,[x3,#8*2]
+	add	x3,x3,#8*4
+
+.align	4
+Loop_mul4x_tail:
+	mul	x10,x6,x24		// lo(a[4..7]*b[4])
+	adc	x0,x0,xzr	// modulo-scheduled
+	mul	x11,x7,x24
+	add	x28,x28,#8
+	mul	x12,x8,x24
+	and	x28,x28,#31
+	mul	x13,x9,x24
+	adds	x19,x19,x10
+	umulh	x10,x6,x24		// hi(a[4..7]*b[4])
+	adcs	x20,x20,x11
+	umulh	x11,x7,x24
+	adcs	x21,x21,x12
+	umulh	x12,x8,x24
+	adcs	x22,x22,x13
+	umulh	x13,x9,x24
+	adc	x23,xzr,xzr
+	ldr	x24,[x2,x28]		// next b[i]
+	adds	x20,x20,x10
+	mul	x10,x14,x25		// lo(n[4..7]*t[0]*n0)
+	adcs	x21,x21,x11
+	mul	x11,x15,x25
+	adcs	x22,x22,x12
+	mul	x12,x16,x25
+	adc	x23,x23,x13		// can't overflow
+	mul	x13,x17,x25
+	adds	x19,x19,x10
+	umulh	x10,x14,x25		// hi(n[4..7]*t[0]*n0)
+	adcs	x20,x20,x11
+	umulh	x11,x15,x25
+	adcs	x21,x21,x12
+	umulh	x12,x16,x25
+	adcs	x22,x22,x13
+	umulh	x13,x17,x25
+	adcs	x23,x23,x0
+	ldr	x25,[sp,x28]		// next a[0]*n0
+	adc	x0,xzr,xzr
+	str	x19,[x26],#8		// result!!!
+	adds	x19,x20,x10
+	sub	x10,x27,x1		// done yet?
+	adcs	x20,x21,x11
+	adcs	x21,x22,x12
+	adcs	x22,x23,x13
+	//adc	x0,x0,xzr
+	cbnz	x28,Loop_mul4x_tail
+
+	sub	x11,x3,x5		// rewinded np?
+	adc	x0,x0,xzr
+	cbz	x10,Loop_mul4x_break
+
+	ldp	x10,x11,[x26,#8*4]
+	ldp	x12,x13,[x26,#8*6]
+	ldp	x6,x7,[x1,#8*0]
+	ldp	x8,x9,[x1,#8*2]
+	add	x1,x1,#8*4
+	adds	x19,x19,x10
+	adcs	x20,x20,x11
+	adcs	x21,x21,x12
+	adcs	x22,x22,x13
+	//adc	x0,x0,xzr
+	ldp	x14,x15,[x3,#8*0]
+	ldp	x16,x17,[x3,#8*2]
+	add	x3,x3,#8*4
+	b	Loop_mul4x_tail
+
+.align	4
+Loop_mul4x_break:
+	ldp	x12,x13,[x29,#96]	// pull rp and &b[num]
+	adds	x19,x19,x30
+	add	x2,x2,#8*4		// bp++
+	adcs	x20,x20,xzr
+	sub	x1,x1,x5		// rewind ap
+	adcs	x21,x21,xzr
+	stp	x19,x20,[x26,#8*0]	// result!!!
+	adcs	x22,x22,xzr
+	ldp	x19,x20,[sp,#8*4]	// t[0..3]
+	adc	x30,x0,xzr
+	stp	x21,x22,[x26,#8*2]	// result!!!
+	cmp	x2,x13			// done yet?
+	ldp	x21,x22,[sp,#8*6]
+	ldp	x14,x15,[x11,#8*0]	// n[0..3]
+	ldp	x16,x17,[x11,#8*2]
+	add	x3,x11,#8*4
+	b.eq	Lmul4x_post
+
+	ldr	x24,[x2]
+	ldp	x6,x7,[x1,#8*0]	// a[0..3]
+	ldp	x8,x9,[x1,#8*2]
+	adds	x1,x1,#8*4		// clear carry bit
+	mov	x0,xzr
+	mov	x26,sp
+	b	Loop_mul4x_reduction
+
+.align	4
+Lmul4x_post:
+	// Final step. We see if result is larger than modulus, and
+	// if it is, subtract the modulus. But comparison implies
+	// subtraction. So we subtract modulus, see if it borrowed,
+	// and conditionally copy original value.
+	mov	x0,x12
+	mov	x27,x12		// x0 copy
+	subs	x10,x19,x14
+	add	x26,sp,#8*8
+	sbcs	x11,x20,x15
+	sub	x28,x5,#8*4
+
+Lmul4x_sub:
+	sbcs	x12,x21,x16
+	ldp	x14,x15,[x3,#8*0]
+	sub	x28,x28,#8*4
+	ldp	x19,x20,[x26,#8*0]
+	sbcs	x13,x22,x17
+	ldp	x16,x17,[x3,#8*2]
+	add	x3,x3,#8*4
+	ldp	x21,x22,[x26,#8*2]
+	add	x26,x26,#8*4
+	stp	x10,x11,[x0,#8*0]
+	sbcs	x10,x19,x14
+	stp	x12,x13,[x0,#8*2]
+	add	x0,x0,#8*4
+	sbcs	x11,x20,x15
+	cbnz	x28,Lmul4x_sub
+
+	sbcs	x12,x21,x16
+	mov	x26,sp
+	add	x1,sp,#8*4
+	ldp	x6,x7,[x27,#8*0]
+	sbcs	x13,x22,x17
+	stp	x10,x11,[x0,#8*0]
+	ldp	x8,x9,[x27,#8*2]
+	stp	x12,x13,[x0,#8*2]
+	ldp	x19,x20,[x1,#8*0]
+	ldp	x21,x22,[x1,#8*2]
+	sbcs	xzr,x30,xzr	// did it borrow?
+	ldr	x30,[x29,#8]		// pull return address
+
+	sub	x28,x5,#8*4
+Lmul4x_cond_copy:
+	sub	x28,x28,#8*4
+	csel	x10,x19,x6,lo
+	stp	xzr,xzr,[x26,#8*0]
+	csel	x11,x20,x7,lo
+	ldp	x6,x7,[x27,#8*4]
+	ldp	x19,x20,[x1,#8*4]
+	csel	x12,x21,x8,lo
+	stp	xzr,xzr,[x26,#8*2]
+	add	x26,x26,#8*4
+	csel	x13,x22,x9,lo
+	ldp	x8,x9,[x27,#8*6]
+	ldp	x21,x22,[x1,#8*6]
+	add	x1,x1,#8*4
+	stp	x10,x11,[x27,#8*0]
+	stp	x12,x13,[x27,#8*2]
+	add	x27,x27,#8*4
+	cbnz	x28,Lmul4x_cond_copy
+
+	csel	x10,x19,x6,lo
+	stp	xzr,xzr,[x26,#8*0]
+	csel	x11,x20,x7,lo
+	stp	xzr,xzr,[x26,#8*2]
+	csel	x12,x21,x8,lo
+	stp	xzr,xzr,[x26,#8*3]
+	csel	x13,x22,x9,lo
+	stp	xzr,xzr,[x26,#8*4]
+	stp	x10,x11,[x27,#8*0]
+	stp	x12,x13,[x27,#8*2]
+
+	b	Lmul4x_done
+
+.align	4
+Lmul4x4_post_condition:
+	adc	x0,x0,xzr
+	ldr	x1,[x29,#96]		// pull rp
+	// x19-3,x0 hold result, x14-7 hold modulus
+	subs	x6,x19,x14
+	ldr	x30,[x29,#8]		// pull return address
+	sbcs	x7,x20,x15
+	stp	xzr,xzr,[sp,#8*0]
+	sbcs	x8,x21,x16
+	stp	xzr,xzr,[sp,#8*2]
+	sbcs	x9,x22,x17
+	stp	xzr,xzr,[sp,#8*4]
+	sbcs	xzr,x0,xzr		// did it borrow?
+	stp	xzr,xzr,[sp,#8*6]
+
+	// x6-3 hold result-modulus
+	csel	x6,x19,x6,lo
+	csel	x7,x20,x7,lo
+	csel	x8,x21,x8,lo
+	csel	x9,x22,x9,lo
+	stp	x6,x7,[x1,#8*0]
+	stp	x8,x9,[x1,#8*2]
+
+Lmul4x_done:
+	ldp	x19,x20,[x29,#16]
+	mov	sp,x29
+	ldp	x21,x22,[x29,#32]
+	mov	x0,#1
+	ldp	x23,x24,[x29,#48]
+	ldp	x25,x26,[x29,#64]
+	ldp	x27,x28,[x29,#80]
+	ldr	x29,[sp],#128
+	// x30 is popped earlier
+	AARCH64_VALIDATE_LINK_REGISTER
+	ret
+
+.byte	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align	2
+.align	4
+#endif
+#endif  // !OPENSSL_NO_ASM
diff --git a/deps/boringssl/win-aarch64/crypto/fipsmodule/ghash-neon-armv8.S b/deps/boringssl/win-aarch64/crypto/fipsmodule/ghash-neon-armv8.S
new file mode 100644
index 0000000..6881d09
--- /dev/null
+++ b/deps/boringssl/win-aarch64/crypto/fipsmodule/ghash-neon-armv8.S
@@ -0,0 +1,351 @@
+// This file is generated from a similarly-named Perl script in the BoringSSL
+// source tree. Do not edit by hand.
+
+#if !defined(__has_feature)
+#define __has_feature(x) 0
+#endif
+#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
+#define OPENSSL_NO_ASM
+#endif
+
+#if !defined(OPENSSL_NO_ASM)
+#if defined(__aarch64__)
+#if defined(BORINGSSL_PREFIX)
+#include <boringssl_prefix_symbols_asm.h>
+#endif
+#include <openssl/arm_arch.h>
+
+.text
+
+.globl	gcm_init_neon
+
+.def gcm_init_neon
+   .type 32
+.endef
+.align	4
+gcm_init_neon:
+	AARCH64_VALID_CALL_TARGET
+	// This function is adapted from gcm_init_v8. xC2 is t3.
+	ld1	{v17.2d}, [x1]			// load H
+	movi	v19.16b, #0xe1
+	shl	v19.2d, v19.2d, #57		// 0xc2.0
+	ext	v3.16b, v17.16b, v17.16b, #8
+	ushr	v18.2d, v19.2d, #63
+	dup	v17.4s, v17.s[1]
+	ext	v16.16b, v18.16b, v19.16b, #8	// t0=0xc2....01
+	ushr	v18.2d, v3.2d, #63
+	sshr	v17.4s, v17.4s, #31		// broadcast carry bit
+	and	v18.16b, v18.16b, v16.16b
+	shl	v3.2d, v3.2d, #1
+	ext	v18.16b, v18.16b, v18.16b, #8
+	and	v16.16b, v16.16b, v17.16b
+	orr	v3.16b, v3.16b, v18.16b	// H<<<=1
+	eor	v5.16b, v3.16b, v16.16b	// twisted H
+	st1	{v5.2d}, [x0]			// store Htable[0]
+	ret
+
+
+.globl	gcm_gmult_neon
+
+.def gcm_gmult_neon
+   .type 32
+.endef
+.align	4
+gcm_gmult_neon:
+	AARCH64_VALID_CALL_TARGET
+	ld1	{v3.16b}, [x0]		// load Xi
+	ld1	{v5.1d}, [x1], #8		// load twisted H
+	ld1	{v6.1d}, [x1]
+	adrp	x9, Lmasks		// load constants
+	add	x9, x9, :lo12:Lmasks
+	ld1	{v24.2d, v25.2d}, [x9]
+	rev64	v3.16b, v3.16b		// byteswap Xi
+	ext	v3.16b, v3.16b, v3.16b, #8
+	eor	v7.8b, v5.8b, v6.8b	// Karatsuba pre-processing
+
+	mov	x3, #16
+	b	Lgmult_neon
+
+
+.globl	gcm_ghash_neon
+
+.def gcm_ghash_neon
+   .type 32
+.endef
+.align	4
+gcm_ghash_neon:
+	AARCH64_VALID_CALL_TARGET
+	ld1	{v0.16b}, [x0]		// load Xi
+	ld1	{v5.1d}, [x1], #8		// load twisted H
+	ld1	{v6.1d}, [x1]
+	adrp	x9, Lmasks		// load constants
+	add	x9, x9, :lo12:Lmasks
+	ld1	{v24.2d, v25.2d}, [x9]
+	rev64	v0.16b, v0.16b		// byteswap Xi
+	ext	v0.16b, v0.16b, v0.16b, #8
+	eor	v7.8b, v5.8b, v6.8b	// Karatsuba pre-processing
+
+Loop_neon:
+	ld1	{v3.16b}, [x2], #16	// load inp
+	rev64	v3.16b, v3.16b		// byteswap inp
+	ext	v3.16b, v3.16b, v3.16b, #8
+	eor	v3.16b, v3.16b, v0.16b	// inp ^= Xi
+
+Lgmult_neon:
+	// Split the input into v3 and v4. (The upper halves are unused,
+	// so it is okay to leave them alone.)
+	ins	v4.d[0], v3.d[1]
+	ext	v16.8b, v5.8b, v5.8b, #1	// A1
+	pmull	v16.8h, v16.8b, v3.8b		// F = A1*B
+	ext	v0.8b, v3.8b, v3.8b, #1		// B1
+	pmull	v0.8h, v5.8b, v0.8b		// E = A*B1
+	ext	v17.8b, v5.8b, v5.8b, #2	// A2
+	pmull	v17.8h, v17.8b, v3.8b		// H = A2*B
+	ext	v19.8b, v3.8b, v3.8b, #2	// B2
+	pmull	v19.8h, v5.8b, v19.8b		// G = A*B2
+	ext	v18.8b, v5.8b, v5.8b, #3	// A3
+	eor	v16.16b, v16.16b, v0.16b	// L = E + F
+	pmull	v18.8h, v18.8b, v3.8b		// J = A3*B
+	ext	v0.8b, v3.8b, v3.8b, #3		// B3
+	eor	v17.16b, v17.16b, v19.16b	// M = G + H
+	pmull	v0.8h, v5.8b, v0.8b		// I = A*B3
+
+	// Here we diverge from the 32-bit version. It computes the following
+	// (instructions reordered for clarity):
+	//
+	//     veor	$t0#lo, $t0#lo, $t0#hi	@ t0 = P0 + P1 (L)
+	//     vand	$t0#hi, $t0#hi, $k48
+	//     veor	$t0#lo, $t0#lo, $t0#hi
+	//
+	//     veor	$t1#lo, $t1#lo, $t1#hi	@ t1 = P2 + P3 (M)
+	//     vand	$t1#hi, $t1#hi, $k32
+	//     veor	$t1#lo, $t1#lo, $t1#hi
+	//
+	//     veor	$t2#lo, $t2#lo, $t2#hi	@ t2 = P4 + P5 (N)
+	//     vand	$t2#hi, $t2#hi, $k16
+	//     veor	$t2#lo, $t2#lo, $t2#hi
+	//
+	//     veor	$t3#lo, $t3#lo, $t3#hi	@ t3 = P6 + P7 (K)
+	//     vmov.i64	$t3#hi, #0
+	//
+	// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
+	// upper halves of SIMD registers, so we must split each half into
+	// separate registers. To compensate, we pair computations up and
+	// parallelize.
+
+	ext	v19.8b, v3.8b, v3.8b, #4	// B4
+	eor	v18.16b, v18.16b, v0.16b	// N = I + J
+	pmull	v19.8h, v5.8b, v19.8b		// K = A*B4
+
+	// This can probably be scheduled more efficiently. For now, we just
+	// pair up independent instructions.
+	zip1	v20.2d, v16.2d, v17.2d
+	zip1	v22.2d, v18.2d, v19.2d
+	zip2	v21.2d, v16.2d, v17.2d
+	zip2	v23.2d, v18.2d, v19.2d
+	eor	v20.16b, v20.16b, v21.16b
+	eor	v22.16b, v22.16b, v23.16b
+	and	v21.16b, v21.16b, v24.16b
+	and	v23.16b, v23.16b, v25.16b
+	eor	v20.16b, v20.16b, v21.16b
+	eor	v22.16b, v22.16b, v23.16b
+	zip1	v16.2d, v20.2d, v21.2d
+	zip1	v18.2d, v22.2d, v23.2d
+	zip2	v17.2d, v20.2d, v21.2d
+	zip2	v19.2d, v22.2d, v23.2d
+
+	ext	v16.16b, v16.16b, v16.16b, #15	// t0 = t0 << 8
+	ext	v17.16b, v17.16b, v17.16b, #14	// t1 = t1 << 16
+	pmull	v0.8h, v5.8b, v3.8b		// D = A*B
+	ext	v19.16b, v19.16b, v19.16b, #12	// t3 = t3 << 32
+	ext	v18.16b, v18.16b, v18.16b, #13	// t2 = t2 << 24
+	eor	v16.16b, v16.16b, v17.16b
+	eor	v18.16b, v18.16b, v19.16b
+	eor	v0.16b, v0.16b, v16.16b
+	eor	v0.16b, v0.16b, v18.16b
+	eor	v3.8b, v3.8b, v4.8b	// Karatsuba pre-processing
+	ext	v16.8b, v7.8b, v7.8b, #1	// A1
+	pmull	v16.8h, v16.8b, v3.8b		// F = A1*B
+	ext	v1.8b, v3.8b, v3.8b, #1		// B1
+	pmull	v1.8h, v7.8b, v1.8b		// E = A*B1
+	ext	v17.8b, v7.8b, v7.8b, #2	// A2
+	pmull	v17.8h, v17.8b, v3.8b		// H = A2*B
+	ext	v19.8b, v3.8b, v3.8b, #2	// B2
+	pmull	v19.8h, v7.8b, v19.8b		// G = A*B2
+	ext	v18.8b, v7.8b, v7.8b, #3	// A3
+	eor	v16.16b, v16.16b, v1.16b	// L = E + F
+	pmull	v18.8h, v18.8b, v3.8b		// J = A3*B
+	ext	v1.8b, v3.8b, v3.8b, #3		// B3
+	eor	v17.16b, v17.16b, v19.16b	// M = G + H
+	pmull	v1.8h, v7.8b, v1.8b		// I = A*B3
+
+	// Here we diverge from the 32-bit version. It computes the following
+	// (instructions reordered for clarity):
+	//
+	//     veor	$t0#lo, $t0#lo, $t0#hi	@ t0 = P0 + P1 (L)
+	//     vand	$t0#hi, $t0#hi, $k48
+	//     veor	$t0#lo, $t0#lo, $t0#hi
+	//
+	//     veor	$t1#lo, $t1#lo, $t1#hi	@ t1 = P2 + P3 (M)
+	//     vand	$t1#hi, $t1#hi, $k32
+	//     veor	$t1#lo, $t1#lo, $t1#hi
+	//
+	//     veor	$t2#lo, $t2#lo, $t2#hi	@ t2 = P4 + P5 (N)
+	//     vand	$t2#hi, $t2#hi, $k16
+	//     veor	$t2#lo, $t2#lo, $t2#hi
+	//
+	//     veor	$t3#lo, $t3#lo, $t3#hi	@ t3 = P6 + P7 (K)
+	//     vmov.i64	$t3#hi, #0
+	//
+	// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
+	// upper halves of SIMD registers, so we must split each half into
+	// separate registers. To compensate, we pair computations up and
+	// parallelize.
+
+	ext	v19.8b, v3.8b, v3.8b, #4	// B4
+	eor	v18.16b, v18.16b, v1.16b	// N = I + J
+	pmull	v19.8h, v7.8b, v19.8b		// K = A*B4
+
+	// This can probably be scheduled more efficiently. For now, we just
+	// pair up independent instructions.
+	zip1	v20.2d, v16.2d, v17.2d
+	zip1	v22.2d, v18.2d, v19.2d
+	zip2	v21.2d, v16.2d, v17.2d
+	zip2	v23.2d, v18.2d, v19.2d
+	eor	v20.16b, v20.16b, v21.16b
+	eor	v22.16b, v22.16b, v23.16b
+	and	v21.16b, v21.16b, v24.16b
+	and	v23.16b, v23.16b, v25.16b
+	eor	v20.16b, v20.16b, v21.16b
+	eor	v22.16b, v22.16b, v23.16b
+	zip1	v16.2d, v20.2d, v21.2d
+	zip1	v18.2d, v22.2d, v23.2d
+	zip2	v17.2d, v20.2d, v21.2d
+	zip2	v19.2d, v22.2d, v23.2d
+
+	ext	v16.16b, v16.16b, v16.16b, #15	// t0 = t0 << 8
+	ext	v17.16b, v17.16b, v17.16b, #14	// t1 = t1 << 16
+	pmull	v1.8h, v7.8b, v3.8b		// D = A*B
+	ext	v19.16b, v19.16b, v19.16b, #12	// t3 = t3 << 32
+	ext	v18.16b, v18.16b, v18.16b, #13	// t2 = t2 << 24
+	eor	v16.16b, v16.16b, v17.16b
+	eor	v18.16b, v18.16b, v19.16b
+	eor	v1.16b, v1.16b, v16.16b
+	eor	v1.16b, v1.16b, v18.16b
+	ext	v16.8b, v6.8b, v6.8b, #1	// A1
+	pmull	v16.8h, v16.8b, v4.8b		// F = A1*B
+	ext	v2.8b, v4.8b, v4.8b, #1		// B1
+	pmull	v2.8h, v6.8b, v2.8b		// E = A*B1
+	ext	v17.8b, v6.8b, v6.8b, #2	// A2
+	pmull	v17.8h, v17.8b, v4.8b		// H = A2*B
+	ext	v19.8b, v4.8b, v4.8b, #2	// B2
+	pmull	v19.8h, v6.8b, v19.8b		// G = A*B2
+	ext	v18.8b, v6.8b, v6.8b, #3	// A3
+	eor	v16.16b, v16.16b, v2.16b	// L = E + F
+	pmull	v18.8h, v18.8b, v4.8b		// J = A3*B
+	ext	v2.8b, v4.8b, v4.8b, #3		// B3
+	eor	v17.16b, v17.16b, v19.16b	// M = G + H
+	pmull	v2.8h, v6.8b, v2.8b		// I = A*B3
+
+	// Here we diverge from the 32-bit version. It computes the following
+	// (instructions reordered for clarity):
+	//
+	//     veor	$t0#lo, $t0#lo, $t0#hi	@ t0 = P0 + P1 (L)
+	//     vand	$t0#hi, $t0#hi, $k48
+	//     veor	$t0#lo, $t0#lo, $t0#hi
+	//
+	//     veor	$t1#lo, $t1#lo, $t1#hi	@ t1 = P2 + P3 (M)
+	//     vand	$t1#hi, $t1#hi, $k32
+	//     veor	$t1#lo, $t1#lo, $t1#hi
+	//
+	//     veor	$t2#lo, $t2#lo, $t2#hi	@ t2 = P4 + P5 (N)
+	//     vand	$t2#hi, $t2#hi, $k16
+	//     veor	$t2#lo, $t2#lo, $t2#hi
+	//
+	//     veor	$t3#lo, $t3#lo, $t3#hi	@ t3 = P6 + P7 (K)
+	//     vmov.i64	$t3#hi, #0
+	//
+	// $kN is a mask with the bottom N bits set. AArch64 cannot compute on
+	// upper halves of SIMD registers, so we must split each half into
+	// separate registers. To compensate, we pair computations up and
+	// parallelize.
+
+	ext	v19.8b, v4.8b, v4.8b, #4	// B4
+	eor	v18.16b, v18.16b, v2.16b	// N = I + J
+	pmull	v19.8h, v6.8b, v19.8b		// K = A*B4
+
+	// This can probably be scheduled more efficiently. For now, we just
+	// pair up independent instructions.
+	zip1	v20.2d, v16.2d, v17.2d
+	zip1	v22.2d, v18.2d, v19.2d
+	zip2	v21.2d, v16.2d, v17.2d
+	zip2	v23.2d, v18.2d, v19.2d
+	eor	v20.16b, v20.16b, v21.16b
+	eor	v22.16b, v22.16b, v23.16b
+	and	v21.16b, v21.16b, v24.16b
+	and	v23.16b, v23.16b, v25.16b
+	eor	v20.16b, v20.16b, v21.16b
+	eor	v22.16b, v22.16b, v23.16b
+	zip1	v16.2d, v20.2d, v21.2d
+	zip1	v18.2d, v22.2d, v23.2d
+	zip2	v17.2d, v20.2d, v21.2d
+	zip2	v19.2d, v22.2d, v23.2d
+
+	ext	v16.16b, v16.16b, v16.16b, #15	// t0 = t0 << 8
+	ext	v17.16b, v17.16b, v17.16b, #14	// t1 = t1 << 16
+	pmull	v2.8h, v6.8b, v4.8b		// D = A*B
+	ext	v19.16b, v19.16b, v19.16b, #12	// t3 = t3 << 32
+	ext	v18.16b, v18.16b, v18.16b, #13	// t2 = t2 << 24
+	eor	v16.16b, v16.16b, v17.16b
+	eor	v18.16b, v18.16b, v19.16b
+	eor	v2.16b, v2.16b, v16.16b
+	eor	v2.16b, v2.16b, v18.16b
+	ext	v16.16b, v0.16b, v2.16b, #8
+	eor	v1.16b, v1.16b, v0.16b	// Karatsuba post-processing
+	eor	v1.16b, v1.16b, v2.16b
+	eor	v1.16b, v1.16b, v16.16b	// Xm overlaps Xh.lo and Xl.hi
+	ins	v0.d[1], v1.d[0]		// Xh|Xl - 256-bit result
+	// This is a no-op due to the ins instruction below.
+	// ins	v2.d[0], v1.d[1]
+
+	// equivalent of reduction_avx from ghash-x86_64.pl
+	shl	v17.2d, v0.2d, #57		// 1st phase
+	shl	v18.2d, v0.2d, #62
+	eor	v18.16b, v18.16b, v17.16b	//
+	shl	v17.2d, v0.2d, #63
+	eor	v18.16b, v18.16b, v17.16b	//
+	// Note Xm contains {Xl.d[1], Xh.d[0]}.
+	eor	v18.16b, v18.16b, v1.16b
+	ins	v0.d[1], v18.d[0]		// Xl.d[1] ^= t2.d[0]
+	ins	v2.d[0], v18.d[1]		// Xh.d[0] ^= t2.d[1]
+
+	ushr	v18.2d, v0.2d, #1		// 2nd phase
+	eor	v2.16b, v2.16b,v0.16b
+	eor	v0.16b, v0.16b,v18.16b	//
+	ushr	v18.2d, v18.2d, #6
+	ushr	v0.2d, v0.2d, #1		//
+	eor	v0.16b, v0.16b, v2.16b	//
+	eor	v0.16b, v0.16b, v18.16b	//
+
+	subs	x3, x3, #16
+	bne	Loop_neon
+
+	rev64	v0.16b, v0.16b		// byteswap Xi and write
+	ext	v0.16b, v0.16b, v0.16b, #8
+	st1	{v0.16b}, [x0]
+
+	ret
+
+
+.section	.rodata
+.align	4
+Lmasks:
+.quad	0x0000ffffffffffff	// k48
+.quad	0x00000000ffffffff	// k32
+.quad	0x000000000000ffff	// k16
+.quad	0x0000000000000000	// k0
+.byte	71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,100,101,114,105,118,101,100,32,102,114,111,109,32,65,82,77,118,52,32,118,101,114,115,105,111,110,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align	2
+.align	2
+#endif
+#endif  // !OPENSSL_NO_ASM
diff --git a/deps/boringssl/win-aarch64/crypto/fipsmodule/ghashv8-armx64.S b/deps/boringssl/win-aarch64/crypto/fipsmodule/ghashv8-armx64.S
new file mode 100644
index 0000000..75f7b64
--- /dev/null
+++ b/deps/boringssl/win-aarch64/crypto/fipsmodule/ghashv8-armx64.S
@@ -0,0 +1,257 @@
+// This file is generated from a similarly-named Perl script in the BoringSSL
+// source tree. Do not edit by hand.
+
+#if !defined(__has_feature)
+#define __has_feature(x) 0
+#endif
+#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
+#define OPENSSL_NO_ASM
+#endif
+
+#if !defined(OPENSSL_NO_ASM)
+#if defined(__aarch64__)
+#if defined(BORINGSSL_PREFIX)
+#include <boringssl_prefix_symbols_asm.h>
+#endif
+#include <openssl/arm_arch.h>
+
+.text
+.arch	armv8-a+crypto
+.globl	gcm_init_v8
+
+.def gcm_init_v8
+   .type 32
+.endef
+.align	4
+gcm_init_v8:
+	AARCH64_VALID_CALL_TARGET
+	ld1	{v17.2d},[x1]		//load input H
+	movi	v19.16b,#0xe1
+	shl	v19.2d,v19.2d,#57		//0xc2.0
+	ext	v3.16b,v17.16b,v17.16b,#8
+	ushr	v18.2d,v19.2d,#63
+	dup	v17.4s,v17.s[1]
+	ext	v16.16b,v18.16b,v19.16b,#8		//t0=0xc2....01
+	ushr	v18.2d,v3.2d,#63
+	sshr	v17.4s,v17.4s,#31		//broadcast carry bit
+	and	v18.16b,v18.16b,v16.16b
+	shl	v3.2d,v3.2d,#1
+	ext	v18.16b,v18.16b,v18.16b,#8
+	and	v16.16b,v16.16b,v17.16b
+	orr	v3.16b,v3.16b,v18.16b		//H<<<=1
+	eor	v20.16b,v3.16b,v16.16b		//twisted H
+	st1	{v20.2d},[x0],#16		//store Htable[0]
+
+	//calculate H^2
+	ext	v16.16b,v20.16b,v20.16b,#8		//Karatsuba pre-processing
+	pmull	v0.1q,v20.1d,v20.1d
+	eor	v16.16b,v16.16b,v20.16b
+	pmull2	v2.1q,v20.2d,v20.2d
+	pmull	v1.1q,v16.1d,v16.1d
+
+	ext	v17.16b,v0.16b,v2.16b,#8		//Karatsuba post-processing
+	eor	v18.16b,v0.16b,v2.16b
+	eor	v1.16b,v1.16b,v17.16b
+	eor	v1.16b,v1.16b,v18.16b
+	pmull	v18.1q,v0.1d,v19.1d		//1st phase
+
+	ins	v2.d[0],v1.d[1]
+	ins	v1.d[1],v0.d[0]
+	eor	v0.16b,v1.16b,v18.16b
+
+	ext	v18.16b,v0.16b,v0.16b,#8		//2nd phase
+	pmull	v0.1q,v0.1d,v19.1d
+	eor	v18.16b,v18.16b,v2.16b
+	eor	v22.16b,v0.16b,v18.16b
+
+	ext	v17.16b,v22.16b,v22.16b,#8		//Karatsuba pre-processing
+	eor	v17.16b,v17.16b,v22.16b
+	ext	v21.16b,v16.16b,v17.16b,#8		//pack Karatsuba pre-processed
+	st1	{v21.2d,v22.2d},[x0]		//store Htable[1..2]
+
+	ret
+
+.globl	gcm_gmult_v8
+
+.def gcm_gmult_v8
+   .type 32
+.endef
+.align	4
+gcm_gmult_v8:
+	AARCH64_VALID_CALL_TARGET
+	ld1	{v17.2d},[x0]		//load Xi
+	movi	v19.16b,#0xe1
+	ld1	{v20.2d,v21.2d},[x1]	//load twisted H, ...
+	shl	v19.2d,v19.2d,#57
+#ifndef __ARMEB__
+	rev64	v17.16b,v17.16b
+#endif
+	ext	v3.16b,v17.16b,v17.16b,#8
+
+	pmull	v0.1q,v20.1d,v3.1d		//H.lo·Xi.lo
+	eor	v17.16b,v17.16b,v3.16b		//Karatsuba pre-processing
+	pmull2	v2.1q,v20.2d,v3.2d		//H.hi·Xi.hi
+	pmull	v1.1q,v21.1d,v17.1d		//(H.lo+H.hi)·(Xi.lo+Xi.hi)
+
+	ext	v17.16b,v0.16b,v2.16b,#8		//Karatsuba post-processing
+	eor	v18.16b,v0.16b,v2.16b
+	eor	v1.16b,v1.16b,v17.16b
+	eor	v1.16b,v1.16b,v18.16b
+	pmull	v18.1q,v0.1d,v19.1d		//1st phase of reduction
+
+	ins	v2.d[0],v1.d[1]
+	ins	v1.d[1],v0.d[0]
+	eor	v0.16b,v1.16b,v18.16b
+
+	ext	v18.16b,v0.16b,v0.16b,#8		//2nd phase of reduction
+	pmull	v0.1q,v0.1d,v19.1d
+	eor	v18.16b,v18.16b,v2.16b
+	eor	v0.16b,v0.16b,v18.16b
+
+#ifndef __ARMEB__
+	rev64	v0.16b,v0.16b
+#endif
+	ext	v0.16b,v0.16b,v0.16b,#8
+	st1	{v0.2d},[x0]		//write out Xi
+
+	ret
+
+.globl	gcm_ghash_v8
+
+.def gcm_ghash_v8
+   .type 32
+.endef
+.align	4
+gcm_ghash_v8:
+	AARCH64_VALID_CALL_TARGET
+	ld1	{v0.2d},[x0]		//load [rotated] Xi
+						//"[rotated]" means that
+						//loaded value would have
+						//to be rotated in order to
+						//make it appear as in
+						//algorithm specification
+	subs	x3,x3,#32		//see if x3 is 32 or larger
+	mov	x12,#16		//x12 is used as post-
+						//increment for input pointer;
+						//as loop is modulo-scheduled
+						//x12 is zeroed just in time
+						//to preclude overstepping
+						//inp[len], which means that
+						//last block[s] are actually
+						//loaded twice, but last
+						//copy is not processed
+	ld1	{v20.2d,v21.2d},[x1],#32	//load twisted H, ..., H^2
+	movi	v19.16b,#0xe1
+	ld1	{v22.2d},[x1]
+	csel	x12,xzr,x12,eq			//is it time to zero x12?
+	ext	v0.16b,v0.16b,v0.16b,#8		//rotate Xi
+	ld1	{v16.2d},[x2],#16	//load [rotated] I[0]
+	shl	v19.2d,v19.2d,#57		//compose 0xc2.0 constant
+#ifndef __ARMEB__
+	rev64	v16.16b,v16.16b
+	rev64	v0.16b,v0.16b
+#endif
+	ext	v3.16b,v16.16b,v16.16b,#8		//rotate I[0]
+	b.lo	Lodd_tail_v8		//x3 was less than 32
+	ld1	{v17.2d},[x2],x12	//load [rotated] I[1]
+#ifndef __ARMEB__
+	rev64	v17.16b,v17.16b
+#endif
+	ext	v7.16b,v17.16b,v17.16b,#8
+	eor	v3.16b,v3.16b,v0.16b		//I[i]^=Xi
+	pmull	v4.1q,v20.1d,v7.1d		//H·Ii+1
+	eor	v17.16b,v17.16b,v7.16b		//Karatsuba pre-processing
+	pmull2	v6.1q,v20.2d,v7.2d
+	b	Loop_mod2x_v8
+
+.align	4
+Loop_mod2x_v8:
+	ext	v18.16b,v3.16b,v3.16b,#8
+	subs	x3,x3,#32		//is there more data?
+	pmull	v0.1q,v22.1d,v3.1d		//H^2.lo·Xi.lo
+	csel	x12,xzr,x12,lo			//is it time to zero x12?
+
+	pmull	v5.1q,v21.1d,v17.1d
+	eor	v18.16b,v18.16b,v3.16b		//Karatsuba pre-processing
+	pmull2	v2.1q,v22.2d,v3.2d		//H^2.hi·Xi.hi
+	eor	v0.16b,v0.16b,v4.16b		//accumulate
+	pmull2	v1.1q,v21.2d,v18.2d		//(H^2.lo+H^2.hi)·(Xi.lo+Xi.hi)
+	ld1	{v16.2d},[x2],x12	//load [rotated] I[i+2]
+
+	eor	v2.16b,v2.16b,v6.16b
+	csel	x12,xzr,x12,eq			//is it time to zero x12?
+	eor	v1.16b,v1.16b,v5.16b
+
+	ext	v17.16b,v0.16b,v2.16b,#8		//Karatsuba post-processing
+	eor	v18.16b,v0.16b,v2.16b
+	eor	v1.16b,v1.16b,v17.16b
+	ld1	{v17.2d},[x2],x12	//load [rotated] I[i+3]
+#ifndef __ARMEB__
+	rev64	v16.16b,v16.16b
+#endif
+	eor	v1.16b,v1.16b,v18.16b
+	pmull	v18.1q,v0.1d,v19.1d		//1st phase of reduction
+
+#ifndef __ARMEB__
+	rev64	v17.16b,v17.16b
+#endif
+	ins	v2.d[0],v1.d[1]
+	ins	v1.d[1],v0.d[0]
+	ext	v7.16b,v17.16b,v17.16b,#8
+	ext	v3.16b,v16.16b,v16.16b,#8
+	eor	v0.16b,v1.16b,v18.16b
+	pmull	v4.1q,v20.1d,v7.1d		//H·Ii+1
+	eor	v3.16b,v3.16b,v2.16b		//accumulate v3.16b early
+
+	ext	v18.16b,v0.16b,v0.16b,#8		//2nd phase of reduction
+	pmull	v0.1q,v0.1d,v19.1d
+	eor	v3.16b,v3.16b,v18.16b
+	eor	v17.16b,v17.16b,v7.16b		//Karatsuba pre-processing
+	eor	v3.16b,v3.16b,v0.16b
+	pmull2	v6.1q,v20.2d,v7.2d
+	b.hs	Loop_mod2x_v8		//there was at least 32 more bytes
+
+	eor	v2.16b,v2.16b,v18.16b
+	ext	v3.16b,v16.16b,v16.16b,#8		//re-construct v3.16b
+	adds	x3,x3,#32		//re-construct x3
+	eor	v0.16b,v0.16b,v2.16b		//re-construct v0.16b
+	b.eq	Ldone_v8		//is x3 zero?
+Lodd_tail_v8:
+	ext	v18.16b,v0.16b,v0.16b,#8
+	eor	v3.16b,v3.16b,v0.16b		//inp^=Xi
+	eor	v17.16b,v16.16b,v18.16b		//v17.16b is rotated inp^Xi
+
+	pmull	v0.1q,v20.1d,v3.1d		//H.lo·Xi.lo
+	eor	v17.16b,v17.16b,v3.16b		//Karatsuba pre-processing
+	pmull2	v2.1q,v20.2d,v3.2d		//H.hi·Xi.hi
+	pmull	v1.1q,v21.1d,v17.1d		//(H.lo+H.hi)·(Xi.lo+Xi.hi)
+
+	ext	v17.16b,v0.16b,v2.16b,#8		//Karatsuba post-processing
+	eor	v18.16b,v0.16b,v2.16b
+	eor	v1.16b,v1.16b,v17.16b
+	eor	v1.16b,v1.16b,v18.16b
+	pmull	v18.1q,v0.1d,v19.1d		//1st phase of reduction
+
+	ins	v2.d[0],v1.d[1]
+	ins	v1.d[1],v0.d[0]
+	eor	v0.16b,v1.16b,v18.16b
+
+	ext	v18.16b,v0.16b,v0.16b,#8		//2nd phase of reduction
+	pmull	v0.1q,v0.1d,v19.1d
+	eor	v18.16b,v18.16b,v2.16b
+	eor	v0.16b,v0.16b,v18.16b
+
+Ldone_v8:
+#ifndef __ARMEB__
+	rev64	v0.16b,v0.16b
+#endif
+	ext	v0.16b,v0.16b,v0.16b,#8
+	st1	{v0.2d},[x0]		//write out Xi
+
+	ret
+
+.byte	71,72,65,83,72,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align	2
+.align	2
+#endif
+#endif  // !OPENSSL_NO_ASM
diff --git a/deps/boringssl/win-aarch64/crypto/fipsmodule/sha1-armv8.S b/deps/boringssl/win-aarch64/crypto/fipsmodule/sha1-armv8.S
new file mode 100644
index 0000000..2a6a9dc
--- /dev/null
+++ b/deps/boringssl/win-aarch64/crypto/fipsmodule/sha1-armv8.S
@@ -0,0 +1,1241 @@
+// This file is generated from a similarly-named Perl script in the BoringSSL
+// source tree. Do not edit by hand.
+
+#if !defined(__has_feature)
+#define __has_feature(x) 0
+#endif
+#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
+#define OPENSSL_NO_ASM
+#endif
+
+#if !defined(OPENSSL_NO_ASM)
+#if defined(__aarch64__)
+#if defined(BORINGSSL_PREFIX)
+#include <boringssl_prefix_symbols_asm.h>
+#endif
+#include <openssl/arm_arch.h>
+
+.text
+
+
+
+.globl	sha1_block_data_order
+
+.def sha1_block_data_order
+   .type 32
+.endef
+.align	6
+sha1_block_data_order:
+	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
+	AARCH64_VALID_CALL_TARGET
+#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
+	adrp	x16,:pg_hi21_nc:OPENSSL_armcap_P
+#else
+	adrp	x16,OPENSSL_armcap_P
+#endif
+	ldr	w16,[x16,:lo12:OPENSSL_armcap_P]
+	tst	w16,#ARMV8_SHA1
+	b.ne	Lv8_entry
+
+	stp	x29,x30,[sp,#-96]!
+	add	x29,sp,#0
+	stp	x19,x20,[sp,#16]
+	stp	x21,x22,[sp,#32]
+	stp	x23,x24,[sp,#48]
+	stp	x25,x26,[sp,#64]
+	stp	x27,x28,[sp,#80]
+
+	ldp	w20,w21,[x0]
+	ldp	w22,w23,[x0,#8]
+	ldr	w24,[x0,#16]
+
+Loop:
+	ldr	x3,[x1],#64
+	movz	w28,#0x7999
+	sub	x2,x2,#1
+	movk	w28,#0x5a82,lsl#16
+#ifdef	__ARMEB__
+	ror	x3,x3,#32
+#else
+	rev32	x3,x3
+#endif
+	add	w24,w24,w28		// warm it up
+	add	w24,w24,w3
+	lsr	x4,x3,#32
+	ldr	x5,[x1,#-56]
+	bic	w25,w23,w21
+	and	w26,w22,w21
+	ror	w27,w20,#27
+	add	w23,w23,w28		// future e+=K
+	orr	w25,w25,w26
+	add	w24,w24,w27		// e+=rot(a,5)
+	ror	w21,w21,#2
+	add	w23,w23,w4	// future e+=X[i]
+	add	w24,w24,w25		// e+=F(b,c,d)
+#ifdef	__ARMEB__
+	ror	x5,x5,#32
+#else
+	rev32	x5,x5
+#endif
+	bic	w25,w22,w20
+	and	w26,w21,w20
+	ror	w27,w24,#27
+	add	w22,w22,w28		// future e+=K
+	orr	w25,w25,w26
+	add	w23,w23,w27		// e+=rot(a,5)
+	ror	w20,w20,#2
+	add	w22,w22,w5	// future e+=X[i]
+	add	w23,w23,w25		// e+=F(b,c,d)
+	lsr	x6,x5,#32
+	ldr	x7,[x1,#-48]
+	bic	w25,w21,w24
+	and	w26,w20,w24
+	ror	w27,w23,#27
+	add	w21,w21,w28		// future e+=K
+	orr	w25,w25,w26
+	add	w22,w22,w27		// e+=rot(a,5)
+	ror	w24,w24,#2
+	add	w21,w21,w6	// future e+=X[i]
+	add	w22,w22,w25		// e+=F(b,c,d)
+#ifdef	__ARMEB__
+	ror	x7,x7,#32
+#else
+	rev32	x7,x7
+#endif
+	bic	w25,w20,w23
+	and	w26,w24,w23
+	ror	w27,w22,#27
+	add	w20,w20,w28		// future e+=K
+	orr	w25,w25,w26
+	add	w21,w21,w27		// e+=rot(a,5)
+	ror	w23,w23,#2
+	add	w20,w20,w7	// future e+=X[i]
+	add	w21,w21,w25		// e+=F(b,c,d)
+	lsr	x8,x7,#32
+	ldr	x9,[x1,#-40]
+	bic	w25,w24,w22
+	and	w26,w23,w22
+	ror	w27,w21,#27
+	add	w24,w24,w28		// future e+=K
+	orr	w25,w25,w26
+	add	w20,w20,w27		// e+=rot(a,5)
+	ror	w22,w22,#2
+	add	w24,w24,w8	// future e+=X[i]
+	add	w20,w20,w25		// e+=F(b,c,d)
+#ifdef	__ARMEB__
+	ror	x9,x9,#32
+#else
+	rev32	x9,x9
+#endif
+	bic	w25,w23,w21
+	and	w26,w22,w21
+	ror	w27,w20,#27
+	add	w23,w23,w28		// future e+=K
+	orr	w25,w25,w26
+	add	w24,w24,w27		// e+=rot(a,5)
+	ror	w21,w21,#2
+	add	w23,w23,w9	// future e+=X[i]
+	add	w24,w24,w25		// e+=F(b,c,d)
+	lsr	x10,x9,#32
+	ldr	x11,[x1,#-32]
+	bic	w25,w22,w20
+	and	w26,w21,w20
+	ror	w27,w24,#27
+	add	w22,w22,w28		// future e+=K
+	orr	w25,w25,w26
+	add	w23,w23,w27		// e+=rot(a,5)
+	ror	w20,w20,#2
+	add	w22,w22,w10	// future e+=X[i]
+	add	w23,w23,w25		// e+=F(b,c,d)
+#ifdef	__ARMEB__
+	ror	x11,x11,#32
+#else
+	rev32	x11,x11
+#endif
+	bic	w25,w21,w24
+	and	w26,w20,w24
+	ror	w27,w23,#27
+	add	w21,w21,w28		// future e+=K
+	orr	w25,w25,w26
+	add	w22,w22,w27		// e+=rot(a,5)
+	ror	w24,w24,#2
+	add	w21,w21,w11	// future e+=X[i]
+	add	w22,w22,w25		// e+=F(b,c,d)
+	lsr	x12,x11,#32
+	ldr	x13,[x1,#-24]
+	bic	w25,w20,w23
+	and	w26,w24,w23
+	ror	w27,w22,#27
+	add	w20,w20,w28		// future e+=K
+	orr	w25,w25,w26
+	add	w21,w21,w27		// e+=rot(a,5)
+	ror	w23,w23,#2
+	add	w20,w20,w12	// future e+=X[i]
+	add	w21,w21,w25		// e+=F(b,c,d)
+#ifdef	__ARMEB__
+	ror	x13,x13,#32
+#else
+	rev32	x13,x13
+#endif
+	bic	w25,w24,w22
+	and	w26,w23,w22
+	ror	w27,w21,#27
+	add	w24,w24,w28		// future e+=K
+	orr	w25,w25,w26
+	add	w20,w20,w27		// e+=rot(a,5)
+	ror	w22,w22,#2
+	add	w24,w24,w13	// future e+=X[i]
+	add	w20,w20,w25		// e+=F(b,c,d)
+	lsr	x14,x13,#32
+	ldr	x15,[x1,#-16]
+	bic	w25,w23,w21
+	and	w26,w22,w21
+	ror	w27,w20,#27
+	add	w23,w23,w28		// future e+=K
+	orr	w25,w25,w26
+	add	w24,w24,w27		// e+=rot(a,5)
+	ror	w21,w21,#2
+	add	w23,w23,w14	// future e+=X[i]
+	add	w24,w24,w25		// e+=F(b,c,d)
+#ifdef	__ARMEB__
+	ror	x15,x15,#32
+#else
+	rev32	x15,x15
+#endif
+	bic	w25,w22,w20
+	and	w26,w21,w20
+	ror	w27,w24,#27
+	add	w22,w22,w28		// future e+=K
+	orr	w25,w25,w26
+	add	w23,w23,w27		// e+=rot(a,5)
+	ror	w20,w20,#2
+	add	w22,w22,w15	// future e+=X[i]
+	add	w23,w23,w25		// e+=F(b,c,d)
+	lsr	x16,x15,#32
+	ldr	x17,[x1,#-8]
+	bic	w25,w21,w24
+	and	w26,w20,w24
+	ror	w27,w23,#27
+	add	w21,w21,w28		// future e+=K
+	orr	w25,w25,w26
+	add	w22,w22,w27		// e+=rot(a,5)
+	ror	w24,w24,#2
+	add	w21,w21,w16	// future e+=X[i]
+	add	w22,w22,w25		// e+=F(b,c,d)
+#ifdef	__ARMEB__
+	ror	x17,x17,#32
+#else
+	rev32	x17,x17
+#endif
+	bic	w25,w20,w23
+	and	w26,w24,w23
+	ror	w27,w22,#27
+	add	w20,w20,w28		// future e+=K
+	orr	w25,w25,w26
+	add	w21,w21,w27		// e+=rot(a,5)
+	ror	w23,w23,#2
+	add	w20,w20,w17	// future e+=X[i]
+	add	w21,w21,w25		// e+=F(b,c,d)
+	lsr	x19,x17,#32
+	eor	w3,w3,w5
+	bic	w25,w24,w22
+	and	w26,w23,w22
+	ror	w27,w21,#27
+	eor	w3,w3,w11
+	add	w24,w24,w28		// future e+=K
+	orr	w25,w25,w26
+	add	w20,w20,w27		// e+=rot(a,5)
+	eor	w3,w3,w16
+	ror	w22,w22,#2
+	add	w24,w24,w19	// future e+=X[i]
+	add	w20,w20,w25		// e+=F(b,c,d)
+	ror	w3,w3,#31
+	eor	w4,w4,w6
+	bic	w25,w23,w21
+	and	w26,w22,w21
+	ror	w27,w20,#27
+	eor	w4,w4,w12
+	add	w23,w23,w28		// future e+=K
+	orr	w25,w25,w26
+	add	w24,w24,w27		// e+=rot(a,5)
+	eor	w4,w4,w17
+	ror	w21,w21,#2
+	add	w23,w23,w3	// future e+=X[i]
+	add	w24,w24,w25		// e+=F(b,c,d)
+	ror	w4,w4,#31
+	eor	w5,w5,w7
+	bic	w25,w22,w20
+	and	w26,w21,w20
+	ror	w27,w24,#27
+	eor	w5,w5,w13
+	add	w22,w22,w28		// future e+=K
+	orr	w25,w25,w26
+	add	w23,w23,w27		// e+=rot(a,5)
+	eor	w5,w5,w19
+	ror	w20,w20,#2
+	add	w22,w22,w4	// future e+=X[i]
+	add	w23,w23,w25		// e+=F(b,c,d)
+	ror	w5,w5,#31
+	eor	w6,w6,w8
+	bic	w25,w21,w24
+	and	w26,w20,w24
+	ror	w27,w23,#27
+	eor	w6,w6,w14
+	add	w21,w21,w28		// future e+=K
+	orr	w25,w25,w26
+	add	w22,w22,w27		// e+=rot(a,5)
+	eor	w6,w6,w3
+	ror	w24,w24,#2
+	add	w21,w21,w5	// future e+=X[i]
+	add	w22,w22,w25		// e+=F(b,c,d)
+	ror	w6,w6,#31
+	eor	w7,w7,w9
+	bic	w25,w20,w23
+	and	w26,w24,w23
+	ror	w27,w22,#27
+	eor	w7,w7,w15
+	add	w20,w20,w28		// future e+=K
+	orr	w25,w25,w26
+	add	w21,w21,w27		// e+=rot(a,5)
+	eor	w7,w7,w4
+	ror	w23,w23,#2
+	add	w20,w20,w6	// future e+=X[i]
+	add	w21,w21,w25		// e+=F(b,c,d)
+	ror	w7,w7,#31
+	movz	w28,#0xeba1
+	movk	w28,#0x6ed9,lsl#16
+	eor	w8,w8,w10
+	bic	w25,w24,w22
+	and	w26,w23,w22
+	ror	w27,w21,#27
+	eor	w8,w8,w16
+	add	w24,w24,w28		// future e+=K
+	orr	w25,w25,w26
+	add	w20,w20,w27		// e+=rot(a,5)
+	eor	w8,w8,w5
+	ror	w22,w22,#2
+	add	w24,w24,w7	// future e+=X[i]
+	add	w20,w20,w25		// e+=F(b,c,d)
+	ror	w8,w8,#31
+	eor	w9,w9,w11
+	eor	w25,w23,w21
+	ror	w27,w20,#27
+	add	w23,w23,w28		// future e+=K
+	eor	w9,w9,w17
+	eor	w25,w25,w22
+	add	w24,w24,w27		// e+=rot(a,5)
+	ror	w21,w21,#2
+	eor	w9,w9,w6
+	add	w23,w23,w8	// future e+=X[i]
+	add	w24,w24,w25		// e+=F(b,c,d)
+	ror	w9,w9,#31
+	eor	w10,w10,w12
+	eor	w25,w22,w20
+	ror	w27,w24,#27
+	add	w22,w22,w28		// future e+=K
+	eor	w10,w10,w19
+	eor	w25,w25,w21
+	add	w23,w23,w27		// e+=rot(a,5)
+	ror	w20,w20,#2
+	eor	w10,w10,w7
+	add	w22,w22,w9	// future e+=X[i]
+	add	w23,w23,w25		// e+=F(b,c,d)
+	ror	w10,w10,#31
+	eor	w11,w11,w13
+	eor	w25,w21,w24
+	ror	w27,w23,#27
+	add	w21,w21,w28		// future e+=K
+	eor	w11,w11,w3
+	eor	w25,w25,w20
+	add	w22,w22,w27		// e+=rot(a,5)
+	ror	w24,w24,#2
+	eor	w11,w11,w8
+	add	w21,w21,w10	// future e+=X[i]
+	add	w22,w22,w25		// e+=F(b,c,d)
+	ror	w11,w11,#31
+	eor	w12,w12,w14
+	eor	w25,w20,w23
+	ror	w27,w22,#27
+	add	w20,w20,w28		// future e+=K
+	eor	w12,w12,w4
+	eor	w25,w25,w24
+	add	w21,w21,w27		// e+=rot(a,5)
+	ror	w23,w23,#2
+	eor	w12,w12,w9
+	add	w20,w20,w11	// future e+=X[i]
+	add	w21,w21,w25		// e+=F(b,c,d)
+	ror	w12,w12,#31
+	eor	w13,w13,w15
+	eor	w25,w24,w22
+	ror	w27,w21,#27
+	add	w24,w24,w28		// future e+=K
+	eor	w13,w13,w5
+	eor	w25,w25,w23
+	add	w20,w20,w27		// e+=rot(a,5)
+	ror	w22,w22,#2
+	eor	w13,w13,w10
+	add	w24,w24,w12	// future e+=X[i]
+	add	w20,w20,w25		// e+=F(b,c,d)
+	ror	w13,w13,#31
+	eor	w14,w14,w16
+	eor	w25,w23,w21
+	ror	w27,w20,#27
+	add	w23,w23,w28		// future e+=K
+	eor	w14,w14,w6
+	eor	w25,w25,w22
+	add	w24,w24,w27		// e+=rot(a,5)
+	ror	w21,w21,#2
+	eor	w14,w14,w11
+	add	w23,w23,w13	// future e+=X[i]
+	add	w24,w24,w25		// e+=F(b,c,d)
+	ror	w14,w14,#31
+	eor	w15,w15,w17
+	eor	w25,w22,w20
+	ror	w27,w24,#27
+	add	w22,w22,w28		// future e+=K
+	eor	w15,w15,w7
+	eor	w25,w25,w21
+	add	w23,w23,w27		// e+=rot(a,5)
+	ror	w20,w20,#2
+	eor	w15,w15,w12
+	add	w22,w22,w14	// future e+=X[i]
+	add	w23,w23,w25		// e+=F(b,c,d)
+	ror	w15,w15,#31
+	eor	w16,w16,w19
+	eor	w25,w21,w24
+	ror	w27,w23,#27
+	add	w21,w21,w28		// future e+=K
+	eor	w16,w16,w8
+	eor	w25,w25,w20
+	add	w22,w22,w27		// e+=rot(a,5)
+	ror	w24,w24,#2
+	eor	w16,w16,w13
+	add	w21,w21,w15	// future e+=X[i]
+	add	w22,w22,w25		// e+=F(b,c,d)
+	ror	w16,w16,#31
+	eor	w17,w17,w3
+	eor	w25,w20,w23
+	ror	w27,w22,#27
+	add	w20,w20,w28		// future e+=K
+	eor	w17,w17,w9
+	eor	w25,w25,w24
+	add	w21,w21,w27		// e+=rot(a,5)
+	ror	w23,w23,#2
+	eor	w17,w17,w14
+	add	w20,w20,w16	// future e+=X[i]
+	add	w21,w21,w25		// e+=F(b,c,d)
+	ror	w17,w17,#31
+	eor	w19,w19,w4
+	eor	w25,w24,w22
+	ror	w27,w21,#27
+	add	w24,w24,w28		// future e+=K
+	eor	w19,w19,w10
+	eor	w25,w25,w23
+	add	w20,w20,w27		// e+=rot(a,5)
+	ror	w22,w22,#2
+	eor	w19,w19,w15
+	add	w24,w24,w17	// future e+=X[i]
+	add	w20,w20,w25		// e+=F(b,c,d)
+	ror	w19,w19,#31
+	eor	w3,w3,w5
+	eor	w25,w23,w21
+	ror	w27,w20,#27
+	add	w23,w23,w28		// future e+=K
+	eor	w3,w3,w11
+	eor	w25,w25,w22
+	add	w24,w24,w27		// e+=rot(a,5)
+	ror	w21,w21,#2
+	eor	w3,w3,w16
+	add	w23,w23,w19	// future e+=X[i]
+	add	w24,w24,w25		// e+=F(b,c,d)
+	ror	w3,w3,#31
+	eor	w4,w4,w6
+	eor	w25,w22,w20
+	ror	w27,w24,#27
+	add	w22,w22,w28		// future e+=K
+	eor	w4,w4,w12
+	eor	w25,w25,w21
+	add	w23,w23,w27		// e+=rot(a,5)
+	ror	w20,w20,#2
+	eor	w4,w4,w17
+	add	w22,w22,w3	// future e+=X[i]
+	add	w23,w23,w25		// e+=F(b,c,d)
+	ror	w4,w4,#31
+	eor	w5,w5,w7
+	eor	w25,w21,w24
+	ror	w27,w23,#27
+	add	w21,w21,w28		// future e+=K
+	eor	w5,w5,w13
+	eor	w25,w25,w20
+	add	w22,w22,w27		// e+=rot(a,5)
+	ror	w24,w24,#2
+	eor	w5,w5,w19
+	add	w21,w21,w4	// future e+=X[i]
+	add	w22,w22,w25		// e+=F(b,c,d)
+	ror	w5,w5,#31
+	eor	w6,w6,w8
+	eor	w25,w20,w23
+	ror	w27,w22,#27
+	add	w20,w20,w28		// future e+=K
+	eor	w6,w6,w14
+	eor	w25,w25,w24
+	add	w21,w21,w27		// e+=rot(a,5)
+	ror	w23,w23,#2
+	eor	w6,w6,w3
+	add	w20,w20,w5	// future e+=X[i]
+	add	w21,w21,w25		// e+=F(b,c,d)
+	ror	w6,w6,#31
+	eor	w7,w7,w9
+	eor	w25,w24,w22
+	ror	w27,w21,#27
+	add	w24,w24,w28		// future e+=K
+	eor	w7,w7,w15
+	eor	w25,w25,w23
+	add	w20,w20,w27		// e+=rot(a,5)
+	ror	w22,w22,#2
+	eor	w7,w7,w4
+	add	w24,w24,w6	// future e+=X[i]
+	add	w20,w20,w25		// e+=F(b,c,d)
+	ror	w7,w7,#31
+	eor	w8,w8,w10
+	eor	w25,w23,w21
+	ror	w27,w20,#27
+	add	w23,w23,w28		// future e+=K
+	eor	w8,w8,w16
+	eor	w25,w25,w22
+	add	w24,w24,w27		// e+=rot(a,5)
+	ror	w21,w21,#2
+	eor	w8,w8,w5
+	add	w23,w23,w7	// future e+=X[i]
+	add	w24,w24,w25		// e+=F(b,c,d)
+	ror	w8,w8,#31
+	eor	w9,w9,w11
+	eor	w25,w22,w20
+	ror	w27,w24,#27
+	add	w22,w22,w28		// future e+=K
+	eor	w9,w9,w17
+	eor	w25,w25,w21
+	add	w23,w23,w27		// e+=rot(a,5)
+	ror	w20,w20,#2
+	eor	w9,w9,w6
+	add	w22,w22,w8	// future e+=X[i]
+	add	w23,w23,w25		// e+=F(b,c,d)
+	ror	w9,w9,#31
+	eor	w10,w10,w12
+	eor	w25,w21,w24
+	ror	w27,w23,#27
+	add	w21,w21,w28		// future e+=K
+	eor	w10,w10,w19
+	eor	w25,w25,w20
+	add	w22,w22,w27		// e+=rot(a,5)
+	ror	w24,w24,#2
+	eor	w10,w10,w7
+	add	w21,w21,w9	// future e+=X[i]
+	add	w22,w22,w25		// e+=F(b,c,d)
+	ror	w10,w10,#31
+	eor	w11,w11,w13
+	eor	w25,w20,w23
+	ror	w27,w22,#27
+	add	w20,w20,w28		// future e+=K
+	eor	w11,w11,w3
+	eor	w25,w25,w24
+	add	w21,w21,w27		// e+=rot(a,5)
+	ror	w23,w23,#2
+	eor	w11,w11,w8
+	add	w20,w20,w10	// future e+=X[i]
+	add	w21,w21,w25		// e+=F(b,c,d)
+	ror	w11,w11,#31
+	movz	w28,#0xbcdc
+	movk	w28,#0x8f1b,lsl#16
+	eor	w12,w12,w14
+	eor	w25,w24,w22
+	ror	w27,w21,#27
+	add	w24,w24,w28		// future e+=K
+	eor	w12,w12,w4
+	eor	w25,w25,w23
+	add	w20,w20,w27		// e+=rot(a,5)
+	ror	w22,w22,#2
+	eor	w12,w12,w9
+	add	w24,w24,w11	// future e+=X[i]
+	add	w20,w20,w25		// e+=F(b,c,d)
+	ror	w12,w12,#31
+	orr	w25,w21,w22
+	and	w26,w21,w22
+	eor	w13,w13,w15
+	ror	w27,w20,#27
+	and	w25,w25,w23
+	add	w23,w23,w28		// future e+=K
+	eor	w13,w13,w5
+	add	w24,w24,w27		// e+=rot(a,5)
+	orr	w25,w25,w26
+	ror	w21,w21,#2
+	eor	w13,w13,w10
+	add	w23,w23,w12	// future e+=X[i]
+	add	w24,w24,w25		// e+=F(b,c,d)
+	ror	w13,w13,#31
+	orr	w25,w20,w21
+	and	w26,w20,w21
+	eor	w14,w14,w16
+	ror	w27,w24,#27
+	and	w25,w25,w22
+	add	w22,w22,w28		// future e+=K
+	eor	w14,w14,w6
+	add	w23,w23,w27		// e+=rot(a,5)
+	orr	w25,w25,w26
+	ror	w20,w20,#2
+	eor	w14,w14,w11
+	add	w22,w22,w13	// future e+=X[i]
+	add	w23,w23,w25		// e+=F(b,c,d)
+	ror	w14,w14,#31
+	orr	w25,w24,w20
+	and	w26,w24,w20
+	eor	w15,w15,w17
+	ror	w27,w23,#27
+	and	w25,w25,w21
+	add	w21,w21,w28		// future e+=K
+	eor	w15,w15,w7
+	add	w22,w22,w27		// e+=rot(a,5)
+	orr	w25,w25,w26
+	ror	w24,w24,#2
+	eor	w15,w15,w12
+	add	w21,w21,w14	// future e+=X[i]
+	add	w22,w22,w25		// e+=F(b,c,d)
+	ror	w15,w15,#31
+	orr	w25,w23,w24
+	and	w26,w23,w24
+	eor	w16,w16,w19
+	ror	w27,w22,#27
+	and	w25,w25,w20
+	add	w20,w20,w28		// future e+=K
+	eor	w16,w16,w8
+	add	w21,w21,w27		// e+=rot(a,5)
+	orr	w25,w25,w26
+	ror	w23,w23,#2
+	eor	w16,w16,w13
+	add	w20,w20,w15	// future e+=X[i]
+	add	w21,w21,w25		// e+=F(b,c,d)
+	ror	w16,w16,#31
+	orr	w25,w22,w23
+	and	w26,w22,w23
+	eor	w17,w17,w3
+	ror	w27,w21,#27
+	and	w25,w25,w24
+	add	w24,w24,w28		// future e+=K
+	eor	w17,w17,w9
+	add	w20,w20,w27		// e+=rot(a,5)
+	orr	w25,w25,w26
+	ror	w22,w22,#2
+	eor	w17,w17,w14
+	add	w24,w24,w16	// future e+=X[i]
+	add	w20,w20,w25		// e+=F(b,c,d)
+	ror	w17,w17,#31
+	orr	w25,w21,w22
+	and	w26,w21,w22
+	eor	w19,w19,w4
+	ror	w27,w20,#27
+	and	w25,w25,w23
+	add	w23,w23,w28		// future e+=K
+	eor	w19,w19,w10
+	add	w24,w24,w27		// e+=rot(a,5)
+	orr	w25,w25,w26
+	ror	w21,w21,#2
+	eor	w19,w19,w15
+	add	w23,w23,w17	// future e+=X[i]
+	add	w24,w24,w25		// e+=F(b,c,d)
+	ror	w19,w19,#31
+	orr	w25,w20,w21
+	and	w26,w20,w21
+	eor	w3,w3,w5
+	ror	w27,w24,#27
+	and	w25,w25,w22
+	add	w22,w22,w28		// future e+=K
+	eor	w3,w3,w11
+	add	w23,w23,w27		// e+=rot(a,5)
+	orr	w25,w25,w26
+	ror	w20,w20,#2
+	eor	w3,w3,w16
+	add	w22,w22,w19	// future e+=X[i]
+	add	w23,w23,w25		// e+=F(b,c,d)
+	ror	w3,w3,#31
+	orr	w25,w24,w20
+	and	w26,w24,w20
+	eor	w4,w4,w6
+	ror	w27,w23,#27
+	and	w25,w25,w21
+	add	w21,w21,w28		// future e+=K
+	eor	w4,w4,w12
+	add	w22,w22,w27		// e+=rot(a,5)
+	orr	w25,w25,w26
+	ror	w24,w24,#2
+	eor	w4,w4,w17
+	add	w21,w21,w3	// future e+=X[i]
+	add	w22,w22,w25		// e+=F(b,c,d)
+	ror	w4,w4,#31
+	orr	w25,w23,w24
+	and	w26,w23,w24
+	eor	w5,w5,w7
+	ror	w27,w22,#27
+	and	w25,w25,w20
+	add	w20,w20,w28		// future e+=K
+	eor	w5,w5,w13
+	add	w21,w21,w27		// e+=rot(a,5)
+	orr	w25,w25,w26
+	ror	w23,w23,#2
+	eor	w5,w5,w19
+	add	w20,w20,w4	// future e+=X[i]
+	add	w21,w21,w25		// e+=F(b,c,d)
+	ror	w5,w5,#31
+	orr	w25,w22,w23
+	and	w26,w22,w23
+	eor	w6,w6,w8
+	ror	w27,w21,#27
+	and	w25,w25,w24
+	add	w24,w24,w28		// future e+=K
+	eor	w6,w6,w14
+	add	w20,w20,w27		// e+=rot(a,5)
+	orr	w25,w25,w26
+	ror	w22,w22,#2
+	eor	w6,w6,w3
+	add	w24,w24,w5	// future e+=X[i]
+	add	w20,w20,w25		// e+=F(b,c,d)
+	ror	w6,w6,#31
+	orr	w25,w21,w22
+	and	w26,w21,w22
+	eor	w7,w7,w9
+	ror	w27,w20,#27
+	and	w25,w25,w23
+	add	w23,w23,w28		// future e+=K
+	eor	w7,w7,w15
+	add	w24,w24,w27		// e+=rot(a,5)
+	orr	w25,w25,w26
+	ror	w21,w21,#2
+	eor	w7,w7,w4
+	add	w23,w23,w6	// future e+=X[i]
+	add	w24,w24,w25		// e+=F(b,c,d)
+	ror	w7,w7,#31
+	orr	w25,w20,w21
+	and	w26,w20,w21
+	eor	w8,w8,w10
+	ror	w27,w24,#27
+	and	w25,w25,w22
+	add	w22,w22,w28		// future e+=K
+	eor	w8,w8,w16
+	add	w23,w23,w27		// e+=rot(a,5)
+	orr	w25,w25,w26
+	ror	w20,w20,#2
+	eor	w8,w8,w5
+	add	w22,w22,w7	// future e+=X[i]
+	add	w23,w23,w25		// e+=F(b,c,d)
+	ror	w8,w8,#31
+	orr	w25,w24,w20
+	and	w26,w24,w20
+	eor	w9,w9,w11
+	ror	w27,w23,#27
+	and	w25,w25,w21
+	add	w21,w21,w28		// future e+=K
+	eor	w9,w9,w17
+	add	w22,w22,w27		// e+=rot(a,5)
+	orr	w25,w25,w26
+	ror	w24,w24,#2
+	eor	w9,w9,w6
+	add	w21,w21,w8	// future e+=X[i]
+	add	w22,w22,w25		// e+=F(b,c,d)
+	ror	w9,w9,#31
+	orr	w25,w23,w24
+	and	w26,w23,w24
+	eor	w10,w10,w12
+	ror	w27,w22,#27
+	and	w25,w25,w20
+	add	w20,w20,w28		// future e+=K
+	eor	w10,w10,w19
+	add	w21,w21,w27		// e+=rot(a,5)
+	orr	w25,w25,w26
+	ror	w23,w23,#2
+	eor	w10,w10,w7
+	add	w20,w20,w9	// future e+=X[i]
+	add	w21,w21,w25		// e+=F(b,c,d)
+	ror	w10,w10,#31
+	orr	w25,w22,w23
+	and	w26,w22,w23
+	eor	w11,w11,w13
+	ror	w27,w21,#27
+	and	w25,w25,w24
+	add	w24,w24,w28		// future e+=K
+	eor	w11,w11,w3
+	add	w20,w20,w27		// e+=rot(a,5)
+	orr	w25,w25,w26
+	ror	w22,w22,#2
+	eor	w11,w11,w8
+	add	w24,w24,w10	// future e+=X[i]
+	add	w20,w20,w25		// e+=F(b,c,d)
+	ror	w11,w11,#31
+	orr	w25,w21,w22
+	and	w26,w21,w22
+	eor	w12,w12,w14
+	ror	w27,w20,#27
+	and	w25,w25,w23
+	add	w23,w23,w28		// future e+=K
+	eor	w12,w12,w4
+	add	w24,w24,w27		// e+=rot(a,5)
+	orr	w25,w25,w26
+	ror	w21,w21,#2
+	eor	w12,w12,w9
+	add	w23,w23,w11	// future e+=X[i]
+	add	w24,w24,w25		// e+=F(b,c,d)
+	ror	w12,w12,#31
+	orr	w25,w20,w21
+	and	w26,w20,w21
+	eor	w13,w13,w15
+	ror	w27,w24,#27
+	and	w25,w25,w22
+	add	w22,w22,w28		// future e+=K
+	eor	w13,w13,w5
+	add	w23,w23,w27		// e+=rot(a,5)
+	orr	w25,w25,w26
+	ror	w20,w20,#2
+	eor	w13,w13,w10
+	add	w22,w22,w12	// future e+=X[i]
+	add	w23,w23,w25		// e+=F(b,c,d)
+	ror	w13,w13,#31
+	orr	w25,w24,w20
+	and	w26,w24,w20
+	eor	w14,w14,w16
+	ror	w27,w23,#27
+	and	w25,w25,w21
+	add	w21,w21,w28		// future e+=K
+	eor	w14,w14,w6
+	add	w22,w22,w27		// e+=rot(a,5)
+	orr	w25,w25,w26
+	ror	w24,w24,#2
+	eor	w14,w14,w11
+	add	w21,w21,w13	// future e+=X[i]
+	add	w22,w22,w25		// e+=F(b,c,d)
+	ror	w14,w14,#31
+	orr	w25,w23,w24
+	and	w26,w23,w24
+	eor	w15,w15,w17
+	ror	w27,w22,#27
+	and	w25,w25,w20
+	add	w20,w20,w28		// future e+=K
+	eor	w15,w15,w7
+	add	w21,w21,w27		// e+=rot(a,5)
+	orr	w25,w25,w26
+	ror	w23,w23,#2
+	eor	w15,w15,w12
+	add	w20,w20,w14	// future e+=X[i]
+	add	w21,w21,w25		// e+=F(b,c,d)
+	ror	w15,w15,#31
+	movz	w28,#0xc1d6
+	movk	w28,#0xca62,lsl#16
+	orr	w25,w22,w23
+	and	w26,w22,w23
+	eor	w16,w16,w19
+	ror	w27,w21,#27
+	and	w25,w25,w24
+	add	w24,w24,w28		// future e+=K
+	eor	w16,w16,w8
+	add	w20,w20,w27		// e+=rot(a,5)
+	orr	w25,w25,w26
+	ror	w22,w22,#2
+	eor	w16,w16,w13
+	add	w24,w24,w15	// future e+=X[i]
+	add	w20,w20,w25		// e+=F(b,c,d)
+	ror	w16,w16,#31
+	eor	w17,w17,w3
+	eor	w25,w23,w21
+	ror	w27,w20,#27
+	add	w23,w23,w28		// future e+=K
+	eor	w17,w17,w9
+	eor	w25,w25,w22
+	add	w24,w24,w27		// e+=rot(a,5)
+	ror	w21,w21,#2
+	eor	w17,w17,w14
+	add	w23,w23,w16	// future e+=X[i]
+	add	w24,w24,w25		// e+=F(b,c,d)
+	ror	w17,w17,#31
+	eor	w19,w19,w4
+	eor	w25,w22,w20
+	ror	w27,w24,#27
+	add	w22,w22,w28		// future e+=K
+	eor	w19,w19,w10
+	eor	w25,w25,w21
+	add	w23,w23,w27		// e+=rot(a,5)
+	ror	w20,w20,#2
+	eor	w19,w19,w15
+	add	w22,w22,w17	// future e+=X[i]
+	add	w23,w23,w25		// e+=F(b,c,d)
+	ror	w19,w19,#31
+	eor	w3,w3,w5
+	eor	w25,w21,w24
+	ror	w27,w23,#27
+	add	w21,w21,w28		// future e+=K
+	eor	w3,w3,w11
+	eor	w25,w25,w20
+	add	w22,w22,w27		// e+=rot(a,5)
+	ror	w24,w24,#2
+	eor	w3,w3,w16
+	add	w21,w21,w19	// future e+=X[i]
+	add	w22,w22,w25		// e+=F(b,c,d)
+	ror	w3,w3,#31
+	eor	w4,w4,w6
+	eor	w25,w20,w23
+	ror	w27,w22,#27
+	add	w20,w20,w28		// future e+=K
+	eor	w4,w4,w12
+	eor	w25,w25,w24
+	add	w21,w21,w27		// e+=rot(a,5)
+	ror	w23,w23,#2
+	eor	w4,w4,w17
+	add	w20,w20,w3	// future e+=X[i]
+	add	w21,w21,w25		// e+=F(b,c,d)
+	ror	w4,w4,#31
+	eor	w5,w5,w7
+	eor	w25,w24,w22
+	ror	w27,w21,#27
+	add	w24,w24,w28		// future e+=K
+	eor	w5,w5,w13
+	eor	w25,w25,w23
+	add	w20,w20,w27		// e+=rot(a,5)
+	ror	w22,w22,#2
+	eor	w5,w5,w19
+	add	w24,w24,w4	// future e+=X[i]
+	add	w20,w20,w25		// e+=F(b,c,d)
+	ror	w5,w5,#31
+	eor	w6,w6,w8
+	eor	w25,w23,w21
+	ror	w27,w20,#27
+	add	w23,w23,w28		// future e+=K
+	eor	w6,w6,w14
+	eor	w25,w25,w22
+	add	w24,w24,w27		// e+=rot(a,5)
+	ror	w21,w21,#2
+	eor	w6,w6,w3
+	add	w23,w23,w5	// future e+=X[i]
+	add	w24,w24,w25		// e+=F(b,c,d)
+	ror	w6,w6,#31
+	eor	w7,w7,w9
+	eor	w25,w22,w20
+	ror	w27,w24,#27
+	add	w22,w22,w28		// future e+=K
+	eor	w7,w7,w15
+	eor	w25,w25,w21
+	add	w23,w23,w27		// e+=rot(a,5)
+	ror	w20,w20,#2
+	eor	w7,w7,w4
+	add	w22,w22,w6	// future e+=X[i]
+	add	w23,w23,w25		// e+=F(b,c,d)
+	ror	w7,w7,#31
+	eor	w8,w8,w10
+	eor	w25,w21,w24
+	ror	w27,w23,#27
+	add	w21,w21,w28		// future e+=K
+	eor	w8,w8,w16
+	eor	w25,w25,w20
+	add	w22,w22,w27		// e+=rot(a,5)
+	ror	w24,w24,#2
+	eor	w8,w8,w5
+	add	w21,w21,w7	// future e+=X[i]
+	add	w22,w22,w25		// e+=F(b,c,d)
+	ror	w8,w8,#31
+	eor	w9,w9,w11
+	eor	w25,w20,w23
+	ror	w27,w22,#27
+	add	w20,w20,w28		// future e+=K
+	eor	w9,w9,w17
+	eor	w25,w25,w24
+	add	w21,w21,w27		// e+=rot(a,5)
+	ror	w23,w23,#2
+	eor	w9,w9,w6
+	add	w20,w20,w8	// future e+=X[i]
+	add	w21,w21,w25		// e+=F(b,c,d)
+	ror	w9,w9,#31
+	eor	w10,w10,w12
+	eor	w25,w24,w22
+	ror	w27,w21,#27
+	add	w24,w24,w28		// future e+=K
+	eor	w10,w10,w19
+	eor	w25,w25,w23
+	add	w20,w20,w27		// e+=rot(a,5)
+	ror	w22,w22,#2
+	eor	w10,w10,w7
+	add	w24,w24,w9	// future e+=X[i]
+	add	w20,w20,w25		// e+=F(b,c,d)
+	ror	w10,w10,#31
+	eor	w11,w11,w13
+	eor	w25,w23,w21
+	ror	w27,w20,#27
+	add	w23,w23,w28		// future e+=K
+	eor	w11,w11,w3
+	eor	w25,w25,w22
+	add	w24,w24,w27		// e+=rot(a,5)
+	ror	w21,w21,#2
+	eor	w11,w11,w8
+	add	w23,w23,w10	// future e+=X[i]
+	add	w24,w24,w25		// e+=F(b,c,d)
+	ror	w11,w11,#31
+	eor	w12,w12,w14
+	eor	w25,w22,w20
+	ror	w27,w24,#27
+	add	w22,w22,w28		// future e+=K
+	eor	w12,w12,w4
+	eor	w25,w25,w21
+	add	w23,w23,w27		// e+=rot(a,5)
+	ror	w20,w20,#2
+	eor	w12,w12,w9
+	add	w22,w22,w11	// future e+=X[i]
+	add	w23,w23,w25		// e+=F(b,c,d)
+	ror	w12,w12,#31
+	eor	w13,w13,w15
+	eor	w25,w21,w24
+	ror	w27,w23,#27
+	add	w21,w21,w28		// future e+=K
+	eor	w13,w13,w5
+	eor	w25,w25,w20
+	add	w22,w22,w27		// e+=rot(a,5)
+	ror	w24,w24,#2
+	eor	w13,w13,w10
+	add	w21,w21,w12	// future e+=X[i]
+	add	w22,w22,w25		// e+=F(b,c,d)
+	ror	w13,w13,#31
+	eor	w14,w14,w16
+	eor	w25,w20,w23
+	ror	w27,w22,#27
+	add	w20,w20,w28		// future e+=K
+	eor	w14,w14,w6
+	eor	w25,w25,w24
+	add	w21,w21,w27		// e+=rot(a,5)
+	ror	w23,w23,#2
+	eor	w14,w14,w11
+	add	w20,w20,w13	// future e+=X[i]
+	add	w21,w21,w25		// e+=F(b,c,d)
+	ror	w14,w14,#31
+	eor	w15,w15,w17
+	eor	w25,w24,w22
+	ror	w27,w21,#27
+	add	w24,w24,w28		// future e+=K
+	eor	w15,w15,w7
+	eor	w25,w25,w23
+	add	w20,w20,w27		// e+=rot(a,5)
+	ror	w22,w22,#2
+	eor	w15,w15,w12
+	add	w24,w24,w14	// future e+=X[i]
+	add	w20,w20,w25		// e+=F(b,c,d)
+	ror	w15,w15,#31
+	eor	w16,w16,w19
+	eor	w25,w23,w21
+	ror	w27,w20,#27
+	add	w23,w23,w28		// future e+=K
+	eor	w16,w16,w8
+	eor	w25,w25,w22
+	add	w24,w24,w27		// e+=rot(a,5)
+	ror	w21,w21,#2
+	eor	w16,w16,w13
+	add	w23,w23,w15	// future e+=X[i]
+	add	w24,w24,w25		// e+=F(b,c,d)
+	ror	w16,w16,#31
+	eor	w17,w17,w3
+	eor	w25,w22,w20
+	ror	w27,w24,#27
+	add	w22,w22,w28		// future e+=K
+	eor	w17,w17,w9
+	eor	w25,w25,w21
+	add	w23,w23,w27		// e+=rot(a,5)
+	ror	w20,w20,#2
+	eor	w17,w17,w14
+	add	w22,w22,w16	// future e+=X[i]
+	add	w23,w23,w25		// e+=F(b,c,d)
+	ror	w17,w17,#31
+	eor	w19,w19,w4
+	eor	w25,w21,w24
+	ror	w27,w23,#27
+	add	w21,w21,w28		// future e+=K
+	eor	w19,w19,w10
+	eor	w25,w25,w20
+	add	w22,w22,w27		// e+=rot(a,5)
+	ror	w24,w24,#2
+	eor	w19,w19,w15
+	add	w21,w21,w17	// future e+=X[i]
+	add	w22,w22,w25		// e+=F(b,c,d)
+	ror	w19,w19,#31
+	ldp	w4,w5,[x0]
+	eor	w25,w20,w23
+	ror	w27,w22,#27
+	add	w20,w20,w28		// future e+=K
+	eor	w25,w25,w24
+	add	w21,w21,w27		// e+=rot(a,5)
+	ror	w23,w23,#2
+	add	w20,w20,w19	// future e+=X[i]
+	add	w21,w21,w25		// e+=F(b,c,d)
+	ldp	w6,w7,[x0,#8]
+	eor	w25,w24,w22
+	ror	w27,w21,#27
+	eor	w25,w25,w23
+	add	w20,w20,w27		// e+=rot(a,5)
+	ror	w22,w22,#2
+	ldr	w8,[x0,#16]
+	add	w20,w20,w25		// e+=F(b,c,d)
+	add	w21,w21,w5
+	add	w22,w22,w6
+	add	w20,w20,w4
+	add	w23,w23,w7
+	add	w24,w24,w8
+	stp	w20,w21,[x0]
+	stp	w22,w23,[x0,#8]
+	str	w24,[x0,#16]
+	cbnz	x2,Loop
+
+	ldp	x19,x20,[sp,#16]
+	ldp	x21,x22,[sp,#32]
+	ldp	x23,x24,[sp,#48]
+	ldp	x25,x26,[sp,#64]
+	ldp	x27,x28,[sp,#80]
+	ldr	x29,[sp],#96
+	ret
+
+.def sha1_block_armv8
+   .type 32
+.endef
+.align	6
+sha1_block_armv8:
+	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
+	AARCH64_VALID_CALL_TARGET
+Lv8_entry:
+	stp	x29,x30,[sp,#-16]!
+	add	x29,sp,#0
+
+	adrp	x4,Lconst
+	add	x4,x4,:lo12:Lconst
+	eor	v1.16b,v1.16b,v1.16b
+	ld1	{v0.4s},[x0],#16
+	ld1	{v1.s}[0],[x0]
+	sub	x0,x0,#16
+	ld1	{v16.4s,v17.4s,v18.4s,v19.4s},[x4]
+
+Loop_hw:
+	ld1	{v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64
+	sub	x2,x2,#1
+	rev32	v4.16b,v4.16b
+	rev32	v5.16b,v5.16b
+
+	add	v20.4s,v16.4s,v4.4s
+	rev32	v6.16b,v6.16b
+	orr	v22.16b,v0.16b,v0.16b	// offload
+
+	add	v21.4s,v16.4s,v5.4s
+	rev32	v7.16b,v7.16b
+.long	0x5e280803	//sha1h v3.16b,v0.16b
+.long	0x5e140020	//sha1c v0.16b,v1.16b,v20.4s		// 0
+	add	v20.4s,v16.4s,v6.4s
+.long	0x5e0630a4	//sha1su0 v4.16b,v5.16b,v6.16b
+.long	0x5e280802	//sha1h v2.16b,v0.16b		// 1
+.long	0x5e150060	//sha1c v0.16b,v3.16b,v21.4s
+	add	v21.4s,v16.4s,v7.4s
+.long	0x5e2818e4	//sha1su1 v4.16b,v7.16b
+.long	0x5e0730c5	//sha1su0 v5.16b,v6.16b,v7.16b
+.long	0x5e280803	//sha1h v3.16b,v0.16b		// 2
+.long	0x5e140040	//sha1c v0.16b,v2.16b,v20.4s
+	add	v20.4s,v16.4s,v4.4s
+.long	0x5e281885	//sha1su1 v5.16b,v4.16b
+.long	0x5e0430e6	//sha1su0 v6.16b,v7.16b,v4.16b
+.long	0x5e280802	//sha1h v2.16b,v0.16b		// 3
+.long	0x5e150060	//sha1c v0.16b,v3.16b,v21.4s
+	add	v21.4s,v17.4s,v5.4s
+.long	0x5e2818a6	//sha1su1 v6.16b,v5.16b
+.long	0x5e053087	//sha1su0 v7.16b,v4.16b,v5.16b
+.long	0x5e280803	//sha1h v3.16b,v0.16b		// 4
+.long	0x5e140040	//sha1c v0.16b,v2.16b,v20.4s
+	add	v20.4s,v17.4s,v6.4s
+.long	0x5e2818c7	//sha1su1 v7.16b,v6.16b
+.long	0x5e0630a4	//sha1su0 v4.16b,v5.16b,v6.16b
+.long	0x5e280802	//sha1h v2.16b,v0.16b		// 5
+.long	0x5e151060	//sha1p v0.16b,v3.16b,v21.4s
+	add	v21.4s,v17.4s,v7.4s
+.long	0x5e2818e4	//sha1su1 v4.16b,v7.16b
+.long	0x5e0730c5	//sha1su0 v5.16b,v6.16b,v7.16b
+.long	0x5e280803	//sha1h v3.16b,v0.16b		// 6
+.long	0x5e141040	//sha1p v0.16b,v2.16b,v20.4s
+	add	v20.4s,v17.4s,v4.4s
+.long	0x5e281885	//sha1su1 v5.16b,v4.16b
+.long	0x5e0430e6	//sha1su0 v6.16b,v7.16b,v4.16b
+.long	0x5e280802	//sha1h v2.16b,v0.16b		// 7
+.long	0x5e151060	//sha1p v0.16b,v3.16b,v21.4s
+	add	v21.4s,v17.4s,v5.4s
+.long	0x5e2818a6	//sha1su1 v6.16b,v5.16b
+.long	0x5e053087	//sha1su0 v7.16b,v4.16b,v5.16b
+.long	0x5e280803	//sha1h v3.16b,v0.16b		// 8
+.long	0x5e141040	//sha1p v0.16b,v2.16b,v20.4s
+	add	v20.4s,v18.4s,v6.4s
+.long	0x5e2818c7	//sha1su1 v7.16b,v6.16b
+.long	0x5e0630a4	//sha1su0 v4.16b,v5.16b,v6.16b
+.long	0x5e280802	//sha1h v2.16b,v0.16b		// 9
+.long	0x5e151060	//sha1p v0.16b,v3.16b,v21.4s
+	add	v21.4s,v18.4s,v7.4s
+.long	0x5e2818e4	//sha1su1 v4.16b,v7.16b
+.long	0x5e0730c5	//sha1su0 v5.16b,v6.16b,v7.16b
+.long	0x5e280803	//sha1h v3.16b,v0.16b		// 10
+.long	0x5e142040	//sha1m v0.16b,v2.16b,v20.4s
+	add	v20.4s,v18.4s,v4.4s
+.long	0x5e281885	//sha1su1 v5.16b,v4.16b
+.long	0x5e0430e6	//sha1su0 v6.16b,v7.16b,v4.16b
+.long	0x5e280802	//sha1h v2.16b,v0.16b		// 11
+.long	0x5e152060	//sha1m v0.16b,v3.16b,v21.4s
+	add	v21.4s,v18.4s,v5.4s
+.long	0x5e2818a6	//sha1su1 v6.16b,v5.16b
+.long	0x5e053087	//sha1su0 v7.16b,v4.16b,v5.16b
+.long	0x5e280803	//sha1h v3.16b,v0.16b		// 12
+.long	0x5e142040	//sha1m v0.16b,v2.16b,v20.4s
+	add	v20.4s,v18.4s,v6.4s
+.long	0x5e2818c7	//sha1su1 v7.16b,v6.16b
+.long	0x5e0630a4	//sha1su0 v4.16b,v5.16b,v6.16b
+.long	0x5e280802	//sha1h v2.16b,v0.16b		// 13
+.long	0x5e152060	//sha1m v0.16b,v3.16b,v21.4s
+	add	v21.4s,v19.4s,v7.4s
+.long	0x5e2818e4	//sha1su1 v4.16b,v7.16b
+.long	0x5e0730c5	//sha1su0 v5.16b,v6.16b,v7.16b
+.long	0x5e280803	//sha1h v3.16b,v0.16b		// 14
+.long	0x5e142040	//sha1m v0.16b,v2.16b,v20.4s
+	add	v20.4s,v19.4s,v4.4s
+.long	0x5e281885	//sha1su1 v5.16b,v4.16b
+.long	0x5e0430e6	//sha1su0 v6.16b,v7.16b,v4.16b
+.long	0x5e280802	//sha1h v2.16b,v0.16b		// 15
+.long	0x5e151060	//sha1p v0.16b,v3.16b,v21.4s
+	add	v21.4s,v19.4s,v5.4s
+.long	0x5e2818a6	//sha1su1 v6.16b,v5.16b
+.long	0x5e053087	//sha1su0 v7.16b,v4.16b,v5.16b
+.long	0x5e280803	//sha1h v3.16b,v0.16b		// 16
+.long	0x5e141040	//sha1p v0.16b,v2.16b,v20.4s
+	add	v20.4s,v19.4s,v6.4s
+.long	0x5e2818c7	//sha1su1 v7.16b,v6.16b
+.long	0x5e280802	//sha1h v2.16b,v0.16b		// 17
+.long	0x5e151060	//sha1p v0.16b,v3.16b,v21.4s
+	add	v21.4s,v19.4s,v7.4s
+
+.long	0x5e280803	//sha1h v3.16b,v0.16b		// 18
+.long	0x5e141040	//sha1p v0.16b,v2.16b,v20.4s
+
+.long	0x5e280802	//sha1h v2.16b,v0.16b		// 19
+.long	0x5e151060	//sha1p v0.16b,v3.16b,v21.4s
+
+	add	v1.4s,v1.4s,v2.4s
+	add	v0.4s,v0.4s,v22.4s
+
+	cbnz	x2,Loop_hw
+
+	st1	{v0.4s},[x0],#16
+	st1	{v1.s}[0],[x0]
+
+	ldr	x29,[sp],#16
+	ret
+
+.section	.rodata
+.align	6
+Lconst:
+.long	0x5a827999,0x5a827999,0x5a827999,0x5a827999	//K_00_19
+.long	0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1	//K_20_39
+.long	0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc	//K_40_59
+.long	0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6	//K_60_79
+.byte	83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align	2
+.align	2
+#endif
+#endif  // !OPENSSL_NO_ASM
diff --git a/deps/boringssl/win-aarch64/crypto/fipsmodule/sha256-armv8.S b/deps/boringssl/win-aarch64/crypto/fipsmodule/sha256-armv8.S
new file mode 100644
index 0000000..5f233f9
--- /dev/null
+++ b/deps/boringssl/win-aarch64/crypto/fipsmodule/sha256-armv8.S
@@ -0,0 +1,1217 @@
+// This file is generated from a similarly-named Perl script in the BoringSSL
+// source tree. Do not edit by hand.
+
+#if !defined(__has_feature)
+#define __has_feature(x) 0
+#endif
+#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
+#define OPENSSL_NO_ASM
+#endif
+
+#if !defined(OPENSSL_NO_ASM)
+#if defined(__aarch64__)
+#if defined(BORINGSSL_PREFIX)
+#include <boringssl_prefix_symbols_asm.h>
+#endif
+// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
+//
+// Licensed under the OpenSSL license (the "License").  You may not use
+// this file except in compliance with the License.  You can obtain a copy
+// in the file LICENSE in the source distribution or at
+// https://www.openssl.org/source/license.html
+
+// ====================================================================
+// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+// project. The module is, however, dual licensed under OpenSSL and
+// CRYPTOGAMS licenses depending on where you obtain it. For further
+// details see http://www.openssl.org/~appro/cryptogams/.
+//
+// Permission to use under GPLv2 terms is granted.
+// ====================================================================
+//
+// SHA256/512 for ARMv8.
+//
+// Performance in cycles per processed byte and improvement coefficient
+// over code generated with "default" compiler:
+//
+//		SHA256-hw	SHA256(*)	SHA512
+// Apple A7	1.97		10.5 (+33%)	6.73 (-1%(**))
+// Cortex-A53	2.38		15.5 (+115%)	10.0 (+150%(***))
+// Cortex-A57	2.31		11.6 (+86%)	7.51 (+260%(***))
+// Denver	2.01		10.5 (+26%)	6.70 (+8%)
+// X-Gene			20.0 (+100%)	12.8 (+300%(***))
+// Mongoose	2.36		13.0 (+50%)	8.36 (+33%)
+//
+// (*)	Software SHA256 results are of lesser relevance, presented
+//	mostly for informational purposes.
+// (**)	The result is a trade-off: it's possible to improve it by
+//	10% (or by 1 cycle per round), but at the cost of 20% loss
+//	on Cortex-A53 (or by 4 cycles per round).
+// (***)	Super-impressive coefficients over gcc-generated code are
+//	indication of some compiler "pathology", most notably code
+//	generated with -mgeneral-regs-only is significanty faster
+//	and the gap is only 40-90%.
+
+#ifndef	__KERNEL__
+# include <openssl/arm_arch.h>
+#endif
+
+.text
+
+
+
+.globl	sha256_block_data_order
+
+.def sha256_block_data_order
+   .type 32
+.endef
+.align	6
+sha256_block_data_order:
+	AARCH64_VALID_CALL_TARGET
+#ifndef	__KERNEL__
+#if __has_feature(hwaddress_sanitizer) && __clang_major__ >= 10
+	adrp	x16,:pg_hi21_nc:OPENSSL_armcap_P
+#else
+	adrp	x16,OPENSSL_armcap_P
+#endif
+	ldr	w16,[x16,:lo12:OPENSSL_armcap_P]
+	tst	w16,#ARMV8_SHA256
+	b.ne	Lv8_entry
+#endif
+	AARCH64_SIGN_LINK_REGISTER
+	stp	x29,x30,[sp,#-128]!
+	add	x29,sp,#0
+
+	stp	x19,x20,[sp,#16]
+	stp	x21,x22,[sp,#32]
+	stp	x23,x24,[sp,#48]
+	stp	x25,x26,[sp,#64]
+	stp	x27,x28,[sp,#80]
+	sub	sp,sp,#4*4
+
+	ldp	w20,w21,[x0]				// load context
+	ldp	w22,w23,[x0,#2*4]
+	ldp	w24,w25,[x0,#4*4]
+	add	x2,x1,x2,lsl#6	// end of input
+	ldp	w26,w27,[x0,#6*4]
+	adrp	x30,LK256
+	add	x30,x30,:lo12:LK256
+	stp	x0,x2,[x29,#96]
+
+Loop:
+	ldp	w3,w4,[x1],#2*4
+	ldr	w19,[x30],#4			// *K++
+	eor	w28,w21,w22				// magic seed
+	str	x1,[x29,#112]
+#ifndef	__ARMEB__
+	rev	w3,w3			// 0
+#endif
+	ror	w16,w24,#6
+	add	w27,w27,w19			// h+=K[i]
+	eor	w6,w24,w24,ror#14
+	and	w17,w25,w24
+	bic	w19,w26,w24
+	add	w27,w27,w3			// h+=X[i]
+	orr	w17,w17,w19			// Ch(e,f,g)
+	eor	w19,w20,w21			// a^b, b^c in next round
+	eor	w16,w16,w6,ror#11	// Sigma1(e)
+	ror	w6,w20,#2
+	add	w27,w27,w17			// h+=Ch(e,f,g)
+	eor	w17,w20,w20,ror#9
+	add	w27,w27,w16			// h+=Sigma1(e)
+	and	w28,w28,w19			// (b^c)&=(a^b)
+	add	w23,w23,w27			// d+=h
+	eor	w28,w28,w21			// Maj(a,b,c)
+	eor	w17,w6,w17,ror#13	// Sigma0(a)
+	add	w27,w27,w28			// h+=Maj(a,b,c)
+	ldr	w28,[x30],#4		// *K++, w19 in next round
+	//add	w27,w27,w17			// h+=Sigma0(a)
+#ifndef	__ARMEB__
+	rev	w4,w4			// 1
+#endif
+	ldp	w5,w6,[x1],#2*4
+	add	w27,w27,w17			// h+=Sigma0(a)
+	ror	w16,w23,#6
+	add	w26,w26,w28			// h+=K[i]
+	eor	w7,w23,w23,ror#14
+	and	w17,w24,w23
+	bic	w28,w25,w23
+	add	w26,w26,w4			// h+=X[i]
+	orr	w17,w17,w28			// Ch(e,f,g)
+	eor	w28,w27,w20			// a^b, b^c in next round
+	eor	w16,w16,w7,ror#11	// Sigma1(e)
+	ror	w7,w27,#2
+	add	w26,w26,w17			// h+=Ch(e,f,g)
+	eor	w17,w27,w27,ror#9
+	add	w26,w26,w16			// h+=Sigma1(e)
+	and	w19,w19,w28			// (b^c)&=(a^b)
+	add	w22,w22,w26			// d+=h
+	eor	w19,w19,w20			// Maj(a,b,c)
+	eor	w17,w7,w17,ror#13	// Sigma0(a)
+	add	w26,w26,w19			// h+=Maj(a,b,c)
+	ldr	w19,[x30],#4		// *K++, w28 in next round
+	//add	w26,w26,w17			// h+=Sigma0(a)
+#ifndef	__ARMEB__
+	rev	w5,w5			// 2
+#endif
+	add	w26,w26,w17			// h+=Sigma0(a)
+	ror	w16,w22,#6
+	add	w25,w25,w19			// h+=K[i]
+	eor	w8,w22,w22,ror#14
+	and	w17,w23,w22
+	bic	w19,w24,w22
+	add	w25,w25,w5			// h+=X[i]
+	orr	w17,w17,w19			// Ch(e,f,g)
+	eor	w19,w26,w27			// a^b, b^c in next round
+	eor	w16,w16,w8,ror#11	// Sigma1(e)
+	ror	w8,w26,#2
+	add	w25,w25,w17			// h+=Ch(e,f,g)
+	eor	w17,w26,w26,ror#9
+	add	w25,w25,w16			// h+=Sigma1(e)
+	and	w28,w28,w19			// (b^c)&=(a^b)
+	add	w21,w21,w25			// d+=h
+	eor	w28,w28,w27			// Maj(a,b,c)
+	eor	w17,w8,w17,ror#13	// Sigma0(a)
+	add	w25,w25,w28			// h+=Maj(a,b,c)
+	ldr	w28,[x30],#4		// *K++, w19 in next round
+	//add	w25,w25,w17			// h+=Sigma0(a)
+#ifndef	__ARMEB__
+	rev	w6,w6			// 3
+#endif
+	ldp	w7,w8,[x1],#2*4
+	add	w25,w25,w17			// h+=Sigma0(a)
+	ror	w16,w21,#6
+	add	w24,w24,w28			// h+=K[i]
+	eor	w9,w21,w21,ror#14
+	and	w17,w22,w21
+	bic	w28,w23,w21
+	add	w24,w24,w6			// h+=X[i]
+	orr	w17,w17,w28			// Ch(e,f,g)
+	eor	w28,w25,w26			// a^b, b^c in next round
+	eor	w16,w16,w9,ror#11	// Sigma1(e)
+	ror	w9,w25,#2
+	add	w24,w24,w17			// h+=Ch(e,f,g)
+	eor	w17,w25,w25,ror#9
+	add	w24,w24,w16			// h+=Sigma1(e)
+	and	w19,w19,w28			// (b^c)&=(a^b)
+	add	w20,w20,w24			// d+=h
+	eor	w19,w19,w26			// Maj(a,b,c)
+	eor	w17,w9,w17,ror#13	// Sigma0(a)
+	add	w24,w24,w19			// h+=Maj(a,b,c)
+	ldr	w19,[x30],#4		// *K++, w28 in next round
+	//add	w24,w24,w17			// h+=Sigma0(a)
+#ifndef	__ARMEB__
+	rev	w7,w7			// 4
+#endif
+	add	w24,w24,w17			// h+=Sigma0(a)
+	ror	w16,w20,#6
+	add	w23,w23,w19			// h+=K[i]
+	eor	w10,w20,w20,ror#14
+	and	w17,w21,w20
+	bic	w19,w22,w20
+	add	w23,w23,w7			// h+=X[i]
+	orr	w17,w17,w19			// Ch(e,f,g)
+	eor	w19,w24,w25			// a^b, b^c in next round
+	eor	w16,w16,w10,ror#11	// Sigma1(e)
+	ror	w10,w24,#2
+	add	w23,w23,w17			// h+=Ch(e,f,g)
+	eor	w17,w24,w24,ror#9
+	add	w23,w23,w16			// h+=Sigma1(e)
+	and	w28,w28,w19			// (b^c)&=(a^b)
+	add	w27,w27,w23			// d+=h
+	eor	w28,w28,w25			// Maj(a,b,c)
+	eor	w17,w10,w17,ror#13	// Sigma0(a)
+	add	w23,w23,w28			// h+=Maj(a,b,c)
+	ldr	w28,[x30],#4		// *K++, w19 in next round
+	//add	w23,w23,w17			// h+=Sigma0(a)
+#ifndef	__ARMEB__
+	rev	w8,w8			// 5
+#endif
+	ldp	w9,w10,[x1],#2*4
+	add	w23,w23,w17			// h+=Sigma0(a)
+	ror	w16,w27,#6
+	add	w22,w22,w28			// h+=K[i]
+	eor	w11,w27,w27,ror#14
+	and	w17,w20,w27
+	bic	w28,w21,w27
+	add	w22,w22,w8			// h+=X[i]
+	orr	w17,w17,w28			// Ch(e,f,g)
+	eor	w28,w23,w24			// a^b, b^c in next round
+	eor	w16,w16,w11,ror#11	// Sigma1(e)
+	ror	w11,w23,#2
+	add	w22,w22,w17			// h+=Ch(e,f,g)
+	eor	w17,w23,w23,ror#9
+	add	w22,w22,w16			// h+=Sigma1(e)
+	and	w19,w19,w28			// (b^c)&=(a^b)
+	add	w26,w26,w22			// d+=h
+	eor	w19,w19,w24			// Maj(a,b,c)
+	eor	w17,w11,w17,ror#13	// Sigma0(a)
+	add	w22,w22,w19			// h+=Maj(a,b,c)
+	ldr	w19,[x30],#4		// *K++, w28 in next round
+	//add	w22,w22,w17			// h+=Sigma0(a)
+#ifndef	__ARMEB__
+	rev	w9,w9			// 6
+#endif
+	add	w22,w22,w17			// h+=Sigma0(a)
+	ror	w16,w26,#6
+	add	w21,w21,w19			// h+=K[i]
+	eor	w12,w26,w26,ror#14
+	and	w17,w27,w26
+	bic	w19,w20,w26
+	add	w21,w21,w9			// h+=X[i]
+	orr	w17,w17,w19			// Ch(e,f,g)
+	eor	w19,w22,w23			// a^b, b^c in next round
+	eor	w16,w16,w12,ror#11	// Sigma1(e)
+	ror	w12,w22,#2
+	add	w21,w21,w17			// h+=Ch(e,f,g)
+	eor	w17,w22,w22,ror#9
+	add	w21,w21,w16			// h+=Sigma1(e)
+	and	w28,w28,w19			// (b^c)&=(a^b)
+	add	w25,w25,w21			// d+=h
+	eor	w28,w28,w23			// Maj(a,b,c)
+	eor	w17,w12,w17,ror#13	// Sigma0(a)
+	add	w21,w21,w28			// h+=Maj(a,b,c)
+	ldr	w28,[x30],#4		// *K++, w19 in next round
+	//add	w21,w21,w17			// h+=Sigma0(a)
+#ifndef	__ARMEB__
+	rev	w10,w10			// 7
+#endif
+	ldp	w11,w12,[x1],#2*4
+	add	w21,w21,w17			// h+=Sigma0(a)
+	ror	w16,w25,#6
+	add	w20,w20,w28			// h+=K[i]
+	eor	w13,w25,w25,ror#14
+	and	w17,w26,w25
+	bic	w28,w27,w25
+	add	w20,w20,w10			// h+=X[i]
+	orr	w17,w17,w28			// Ch(e,f,g)
+	eor	w28,w21,w22			// a^b, b^c in next round
+	eor	w16,w16,w13,ror#11	// Sigma1(e)
+	ror	w13,w21,#2
+	add	w20,w20,w17			// h+=Ch(e,f,g)
+	eor	w17,w21,w21,ror#9
+	add	w20,w20,w16			// h+=Sigma1(e)
+	and	w19,w19,w28			// (b^c)&=(a^b)
+	add	w24,w24,w20			// d+=h
+	eor	w19,w19,w22			// Maj(a,b,c)
+	eor	w17,w13,w17,ror#13	// Sigma0(a)
+	add	w20,w20,w19			// h+=Maj(a,b,c)
+	ldr	w19,[x30],#4		// *K++, w28 in next round
+	//add	w20,w20,w17			// h+=Sigma0(a)
+#ifndef	__ARMEB__
+	rev	w11,w11			// 8
+#endif
+	add	w20,w20,w17			// h+=Sigma0(a)
+	ror	w16,w24,#6
+	add	w27,w27,w19			// h+=K[i]
+	eor	w14,w24,w24,ror#14
+	and	w17,w25,w24
+	bic	w19,w26,w24
+	add	w27,w27,w11			// h+=X[i]
+	orr	w17,w17,w19			// Ch(e,f,g)
+	eor	w19,w20,w21			// a^b, b^c in next round
+	eor	w16,w16,w14,ror#11	// Sigma1(e)
+	ror	w14,w20,#2
+	add	w27,w27,w17			// h+=Ch(e,f,g)
+	eor	w17,w20,w20,ror#9
+	add	w27,w27,w16			// h+=Sigma1(e)
+	and	w28,w28,w19			// (b^c)&=(a^b)
+	add	w23,w23,w27			// d+=h
+	eor	w28,w28,w21			// Maj(a,b,c)
+	eor	w17,w14,w17,ror#13	// Sigma0(a)
+	add	w27,w27,w28			// h+=Maj(a,b,c)
+	ldr	w28,[x30],#4		// *K++, w19 in next round
+	//add	w27,w27,w17			// h+=Sigma0(a)
+#ifndef	__ARMEB__
+	rev	w12,w12			// 9
+#endif
+	ldp	w13,w14,[x1],#2*4
+	add	w27,w27,w17			// h+=Sigma0(a)
+	ror	w16,w23,#6
+	add	w26,w26,w28			// h+=K[i]
+	eor	w15,w23,w23,ror#14
+	and	w17,w24,w23
+	bic	w28,w25,w23
+	add	w26,w26,w12			// h+=X[i]
+	orr	w17,w17,w28			// Ch(e,f,g)
+	eor	w28,w27,w20			// a^b, b^c in next round
+	eor	w16,w16,w15,ror#11	// Sigma1(e)
+	ror	w15,w27,#2
+	add	w26,w26,w17			// h+=Ch(e,f,g)
+	eor	w17,w27,w27,ror#9
+	add	w26,w26,w16			// h+=Sigma1(e)
+	and	w19,w19,w28			// (b^c)&=(a^b)
+	add	w22,w22,w26			// d+=h
+	eor	w19,w19,w20			// Maj(a,b,c)
+	eor	w17,w15,w17,ror#13	// Sigma0(a)
+	add	w26,w26,w19			// h+=Maj(a,b,c)
+	ldr	w19,[x30],#4		// *K++, w28 in next round
+	//add	w26,w26,w17			// h+=Sigma0(a)
+#ifndef	__ARMEB__
+	rev	w13,w13			// 10
+#endif
+	add	w26,w26,w17			// h+=Sigma0(a)
+	ror	w16,w22,#6
+	add	w25,w25,w19			// h+=K[i]
+	eor	w0,w22,w22,ror#14
+	and	w17,w23,w22
+	bic	w19,w24,w22
+	add	w25,w25,w13			// h+=X[i]
+	orr	w17,w17,w19			// Ch(e,f,g)
+	eor	w19,w26,w27			// a^b, b^c in next round
+	eor	w16,w16,w0,ror#11	// Sigma1(e)
+	ror	w0,w26,#2
+	add	w25,w25,w17			// h+=Ch(e,f,g)
+	eor	w17,w26,w26,ror#9
+	add	w25,w25,w16			// h+=Sigma1(e)
+	and	w28,w28,w19			// (b^c)&=(a^b)
+	add	w21,w21,w25			// d+=h
+	eor	w28,w28,w27			// Maj(a,b,c)
+	eor	w17,w0,w17,ror#13	// Sigma0(a)
+	add	w25,w25,w28			// h+=Maj(a,b,c)
+	ldr	w28,[x30],#4		// *K++, w19 in next round
+	//add	w25,w25,w17			// h+=Sigma0(a)
+#ifndef	__ARMEB__
+	rev	w14,w14			// 11
+#endif
+	ldp	w15,w0,[x1],#2*4
+	add	w25,w25,w17			// h+=Sigma0(a)
+	str	w6,[sp,#12]
+	ror	w16,w21,#6
+	add	w24,w24,w28			// h+=K[i]
+	eor	w6,w21,w21,ror#14
+	and	w17,w22,w21
+	bic	w28,w23,w21
+	add	w24,w24,w14			// h+=X[i]
+	orr	w17,w17,w28			// Ch(e,f,g)
+	eor	w28,w25,w26			// a^b, b^c in next round
+	eor	w16,w16,w6,ror#11	// Sigma1(e)
+	ror	w6,w25,#2
+	add	w24,w24,w17			// h+=Ch(e,f,g)
+	eor	w17,w25,w25,ror#9
+	add	w24,w24,w16			// h+=Sigma1(e)
+	and	w19,w19,w28			// (b^c)&=(a^b)
+	add	w20,w20,w24			// d+=h
+	eor	w19,w19,w26			// Maj(a,b,c)
+	eor	w17,w6,w17,ror#13	// Sigma0(a)
+	add	w24,w24,w19			// h+=Maj(a,b,c)
+	ldr	w19,[x30],#4		// *K++, w28 in next round
+	//add	w24,w24,w17			// h+=Sigma0(a)
+#ifndef	__ARMEB__
+	rev	w15,w15			// 12
+#endif
+	add	w24,w24,w17			// h+=Sigma0(a)
+	str	w7,[sp,#0]
+	ror	w16,w20,#6
+	add	w23,w23,w19			// h+=K[i]
+	eor	w7,w20,w20,ror#14
+	and	w17,w21,w20
+	bic	w19,w22,w20
+	add	w23,w23,w15			// h+=X[i]
+	orr	w17,w17,w19			// Ch(e,f,g)
+	eor	w19,w24,w25			// a^b, b^c in next round
+	eor	w16,w16,w7,ror#11	// Sigma1(e)
+	ror	w7,w24,#2
+	add	w23,w23,w17			// h+=Ch(e,f,g)
+	eor	w17,w24,w24,ror#9
+	add	w23,w23,w16			// h+=Sigma1(e)
+	and	w28,w28,w19			// (b^c)&=(a^b)
+	add	w27,w27,w23			// d+=h
+	eor	w28,w28,w25			// Maj(a,b,c)
+	eor	w17,w7,w17,ror#13	// Sigma0(a)
+	add	w23,w23,w28			// h+=Maj(a,b,c)
+	ldr	w28,[x30],#4		// *K++, w19 in next round
+	//add	w23,w23,w17			// h+=Sigma0(a)
+#ifndef	__ARMEB__
+	rev	w0,w0			// 13
+#endif
+	ldp	w1,w2,[x1]
+	add	w23,w23,w17			// h+=Sigma0(a)
+	str	w8,[sp,#4]
+	ror	w16,w27,#6
+	add	w22,w22,w28			// h+=K[i]
+	eor	w8,w27,w27,ror#14
+	and	w17,w20,w27
+	bic	w28,w21,w27
+	add	w22,w22,w0			// h+=X[i]
+	orr	w17,w17,w28			// Ch(e,f,g)
+	eor	w28,w23,w24			// a^b, b^c in next round
+	eor	w16,w16,w8,ror#11	// Sigma1(e)
+	ror	w8,w23,#2
+	add	w22,w22,w17			// h+=Ch(e,f,g)
+	eor	w17,w23,w23,ror#9
+	add	w22,w22,w16			// h+=Sigma1(e)
+	and	w19,w19,w28			// (b^c)&=(a^b)
+	add	w26,w26,w22			// d+=h
+	eor	w19,w19,w24			// Maj(a,b,c)
+	eor	w17,w8,w17,ror#13	// Sigma0(a)
+	add	w22,w22,w19			// h+=Maj(a,b,c)
+	ldr	w19,[x30],#4		// *K++, w28 in next round
+	//add	w22,w22,w17			// h+=Sigma0(a)
+#ifndef	__ARMEB__
+	rev	w1,w1			// 14
+#endif
+	ldr	w6,[sp,#12]
+	add	w22,w22,w17			// h+=Sigma0(a)
+	str	w9,[sp,#8]
+	ror	w16,w26,#6
+	add	w21,w21,w19			// h+=K[i]
+	eor	w9,w26,w26,ror#14
+	and	w17,w27,w26
+	bic	w19,w20,w26
+	add	w21,w21,w1			// h+=X[i]
+	orr	w17,w17,w19			// Ch(e,f,g)
+	eor	w19,w22,w23			// a^b, b^c in next round
+	eor	w16,w16,w9,ror#11	// Sigma1(e)
+	ror	w9,w22,#2
+	add	w21,w21,w17			// h+=Ch(e,f,g)
+	eor	w17,w22,w22,ror#9
+	add	w21,w21,w16			// h+=Sigma1(e)
+	and	w28,w28,w19			// (b^c)&=(a^b)
+	add	w25,w25,w21			// d+=h
+	eor	w28,w28,w23			// Maj(a,b,c)
+	eor	w17,w9,w17,ror#13	// Sigma0(a)
+	add	w21,w21,w28			// h+=Maj(a,b,c)
+	ldr	w28,[x30],#4		// *K++, w19 in next round
+	//add	w21,w21,w17			// h+=Sigma0(a)
+#ifndef	__ARMEB__
+	rev	w2,w2			// 15
+#endif
+	ldr	w7,[sp,#0]
+	add	w21,w21,w17			// h+=Sigma0(a)
+	str	w10,[sp,#12]
+	ror	w16,w25,#6
+	add	w20,w20,w28			// h+=K[i]
+	ror	w9,w4,#7
+	and	w17,w26,w25
+	ror	w8,w1,#17
+	bic	w28,w27,w25
+	ror	w10,w21,#2
+	add	w20,w20,w2			// h+=X[i]
+	eor	w16,w16,w25,ror#11
+	eor	w9,w9,w4,ror#18
+	orr	w17,w17,w28			// Ch(e,f,g)
+	eor	w28,w21,w22			// a^b, b^c in next round
+	eor	w16,w16,w25,ror#25	// Sigma1(e)
+	eor	w10,w10,w21,ror#13
+	add	w20,w20,w17			// h+=Ch(e,f,g)
+	and	w19,w19,w28			// (b^c)&=(a^b)
+	eor	w8,w8,w1,ror#19
+	eor	w9,w9,w4,lsr#3	// sigma0(X[i+1])
+	add	w20,w20,w16			// h+=Sigma1(e)
+	eor	w19,w19,w22			// Maj(a,b,c)
+	eor	w17,w10,w21,ror#22	// Sigma0(a)
+	eor	w8,w8,w1,lsr#10	// sigma1(X[i+14])
+	add	w3,w3,w12
+	add	w24,w24,w20			// d+=h
+	add	w20,w20,w19			// h+=Maj(a,b,c)
+	ldr	w19,[x30],#4		// *K++, w28 in next round
+	add	w3,w3,w9
+	add	w20,w20,w17			// h+=Sigma0(a)
+	add	w3,w3,w8
+Loop_16_xx:
+	ldr	w8,[sp,#4]
+	str	w11,[sp,#0]
+	ror	w16,w24,#6
+	add	w27,w27,w19			// h+=K[i]
+	ror	w10,w5,#7
+	and	w17,w25,w24
+	ror	w9,w2,#17
+	bic	w19,w26,w24
+	ror	w11,w20,#2
+	add	w27,w27,w3			// h+=X[i]
+	eor	w16,w16,w24,ror#11
+	eor	w10,w10,w5,ror#18
+	orr	w17,w17,w19			// Ch(e,f,g)
+	eor	w19,w20,w21			// a^b, b^c in next round
+	eor	w16,w16,w24,ror#25	// Sigma1(e)
+	eor	w11,w11,w20,ror#13
+	add	w27,w27,w17			// h+=Ch(e,f,g)
+	and	w28,w28,w19			// (b^c)&=(a^b)
+	eor	w9,w9,w2,ror#19
+	eor	w10,w10,w5,lsr#3	// sigma0(X[i+1])
+	add	w27,w27,w16			// h+=Sigma1(e)
+	eor	w28,w28,w21			// Maj(a,b,c)
+	eor	w17,w11,w20,ror#22	// Sigma0(a)
+	eor	w9,w9,w2,lsr#10	// sigma1(X[i+14])
+	add	w4,w4,w13
+	add	w23,w23,w27			// d+=h
+	add	w27,w27,w28			// h+=Maj(a,b,c)
+	ldr	w28,[x30],#4		// *K++, w19 in next round
+	add	w4,w4,w10
+	add	w27,w27,w17			// h+=Sigma0(a)
+	add	w4,w4,w9
+	ldr	w9,[sp,#8]
+	str	w12,[sp,#4]
+	ror	w16,w23,#6
+	add	w26,w26,w28			// h+=K[i]
+	ror	w11,w6,#7
+	and	w17,w24,w23
+	ror	w10,w3,#17
+	bic	w28,w25,w23
+	ror	w12,w27,#2
+	add	w26,w26,w4			// h+=X[i]
+	eor	w16,w16,w23,ror#11
+	eor	w11,w11,w6,ror#18
+	orr	w17,w17,w28			// Ch(e,f,g)
+	eor	w28,w27,w20			// a^b, b^c in next round
+	eor	w16,w16,w23,ror#25	// Sigma1(e)
+	eor	w12,w12,w27,ror#13
+	add	w26,w26,w17			// h+=Ch(e,f,g)
+	and	w19,w19,w28			// (b^c)&=(a^b)
+	eor	w10,w10,w3,ror#19
+	eor	w11,w11,w6,lsr#3	// sigma0(X[i+1])
+	add	w26,w26,w16			// h+=Sigma1(e)
+	eor	w19,w19,w20			// Maj(a,b,c)
+	eor	w17,w12,w27,ror#22	// Sigma0(a)
+	eor	w10,w10,w3,lsr#10	// sigma1(X[i+14])
+	add	w5,w5,w14
+	add	w22,w22,w26			// d+=h
+	add	w26,w26,w19			// h+=Maj(a,b,c)
+	ldr	w19,[x30],#4		// *K++, w28 in next round
+	add	w5,w5,w11
+	add	w26,w26,w17			// h+=Sigma0(a)
+	add	w5,w5,w10
+	ldr	w10,[sp,#12]
+	str	w13,[sp,#8]
+	ror	w16,w22,#6
+	add	w25,w25,w19			// h+=K[i]
+	ror	w12,w7,#7
+	and	w17,w23,w22
+	ror	w11,w4,#17
+	bic	w19,w24,w22
+	ror	w13,w26,#2
+	add	w25,w25,w5			// h+=X[i]
+	eor	w16,w16,w22,ror#11
+	eor	w12,w12,w7,ror#18
+	orr	w17,w17,w19			// Ch(e,f,g)
+	eor	w19,w26,w27			// a^b, b^c in next round
+	eor	w16,w16,w22,ror#25	// Sigma1(e)
+	eor	w13,w13,w26,ror#13
+	add	w25,w25,w17			// h+=Ch(e,f,g)
+	and	w28,w28,w19			// (b^c)&=(a^b)
+	eor	w11,w11,w4,ror#19
+	eor	w12,w12,w7,lsr#3	// sigma0(X[i+1])
+	add	w25,w25,w16			// h+=Sigma1(e)
+	eor	w28,w28,w27			// Maj(a,b,c)
+	eor	w17,w13,w26,ror#22	// Sigma0(a)
+	eor	w11,w11,w4,lsr#10	// sigma1(X[i+14])
+	add	w6,w6,w15
+	add	w21,w21,w25			// d+=h
+	add	w25,w25,w28			// h+=Maj(a,b,c)
+	ldr	w28,[x30],#4		// *K++, w19 in next round
+	add	w6,w6,w12
+	add	w25,w25,w17			// h+=Sigma0(a)
+	add	w6,w6,w11
+	ldr	w11,[sp,#0]
+	str	w14,[sp,#12]
+	ror	w16,w21,#6
+	add	w24,w24,w28			// h+=K[i]
+	ror	w13,w8,#7
+	and	w17,w22,w21
+	ror	w12,w5,#17
+	bic	w28,w23,w21
+	ror	w14,w25,#2
+	add	w24,w24,w6			// h+=X[i]
+	eor	w16,w16,w21,ror#11
+	eor	w13,w13,w8,ror#18
+	orr	w17,w17,w28			// Ch(e,f,g)
+	eor	w28,w25,w26			// a^b, b^c in next round
+	eor	w16,w16,w21,ror#25	// Sigma1(e)
+	eor	w14,w14,w25,ror#13
+	add	w24,w24,w17			// h+=Ch(e,f,g)
+	and	w19,w19,w28			// (b^c)&=(a^b)
+	eor	w12,w12,w5,ror#19
+	eor	w13,w13,w8,lsr#3	// sigma0(X[i+1])
+	add	w24,w24,w16			// h+=Sigma1(e)
+	eor	w19,w19,w26			// Maj(a,b,c)
+	eor	w17,w14,w25,ror#22	// Sigma0(a)
+	eor	w12,w12,w5,lsr#10	// sigma1(X[i+14])
+	add	w7,w7,w0
+	add	w20,w20,w24			// d+=h
+	add	w24,w24,w19			// h+=Maj(a,b,c)
+	ldr	w19,[x30],#4		// *K++, w28 in next round
+	add	w7,w7,w13
+	add	w24,w24,w17			// h+=Sigma0(a)
+	add	w7,w7,w12
+	ldr	w12,[sp,#4]
+	str	w15,[sp,#0]
+	ror	w16,w20,#6
+	add	w23,w23,w19			// h+=K[i]
+	ror	w14,w9,#7
+	and	w17,w21,w20
+	ror	w13,w6,#17
+	bic	w19,w22,w20
+	ror	w15,w24,#2
+	add	w23,w23,w7			// h+=X[i]
+	eor	w16,w16,w20,ror#11
+	eor	w14,w14,w9,ror#18
+	orr	w17,w17,w19			// Ch(e,f,g)
+	eor	w19,w24,w25			// a^b, b^c in next round
+	eor	w16,w16,w20,ror#25	// Sigma1(e)
+	eor	w15,w15,w24,ror#13
+	add	w23,w23,w17			// h+=Ch(e,f,g)
+	and	w28,w28,w19			// (b^c)&=(a^b)
+	eor	w13,w13,w6,ror#19
+	eor	w14,w14,w9,lsr#3	// sigma0(X[i+1])
+	add	w23,w23,w16			// h+=Sigma1(e)
+	eor	w28,w28,w25			// Maj(a,b,c)
+	eor	w17,w15,w24,ror#22	// Sigma0(a)
+	eor	w13,w13,w6,lsr#10	// sigma1(X[i+14])
+	add	w8,w8,w1
+	add	w27,w27,w23			// d+=h
+	add	w23,w23,w28			// h+=Maj(a,b,c)
+	ldr	w28,[x30],#4		// *K++, w19 in next round
+	add	w8,w8,w14
+	add	w23,w23,w17			// h+=Sigma0(a)
+	add	w8,w8,w13
+	ldr	w13,[sp,#8]
+	str	w0,[sp,#4]
+	ror	w16,w27,#6
+	add	w22,w22,w28			// h+=K[i]
+	ror	w15,w10,#7
+	and	w17,w20,w27
+	ror	w14,w7,#17
+	bic	w28,w21,w27
+	ror	w0,w23,#2
+	add	w22,w22,w8			// h+=X[i]
+	eor	w16,w16,w27,ror#11
+	eor	w15,w15,w10,ror#18
+	orr	w17,w17,w28			// Ch(e,f,g)
+	eor	w28,w23,w24			// a^b, b^c in next round
+	eor	w16,w16,w27,ror#25	// Sigma1(e)
+	eor	w0,w0,w23,ror#13
+	add	w22,w22,w17			// h+=Ch(e,f,g)
+	and	w19,w19,w28			// (b^c)&=(a^b)
+	eor	w14,w14,w7,ror#19
+	eor	w15,w15,w10,lsr#3	// sigma0(X[i+1])
+	add	w22,w22,w16			// h+=Sigma1(e)
+	eor	w19,w19,w24			// Maj(a,b,c)
+	eor	w17,w0,w23,ror#22	// Sigma0(a)
+	eor	w14,w14,w7,lsr#10	// sigma1(X[i+14])
+	add	w9,w9,w2
+	add	w26,w26,w22			// d+=h
+	add	w22,w22,w19			// h+=Maj(a,b,c)
+	ldr	w19,[x30],#4		// *K++, w28 in next round
+	add	w9,w9,w15
+	add	w22,w22,w17			// h+=Sigma0(a)
+	add	w9,w9,w14
+	ldr	w14,[sp,#12]
+	str	w1,[sp,#8]
+	ror	w16,w26,#6
+	add	w21,w21,w19			// h+=K[i]
+	ror	w0,w11,#7
+	and	w17,w27,w26
+	ror	w15,w8,#17
+	bic	w19,w20,w26
+	ror	w1,w22,#2
+	add	w21,w21,w9			// h+=X[i]
+	eor	w16,w16,w26,ror#11
+	eor	w0,w0,w11,ror#18
+	orr	w17,w17,w19			// Ch(e,f,g)
+	eor	w19,w22,w23			// a^b, b^c in next round
+	eor	w16,w16,w26,ror#25	// Sigma1(e)
+	eor	w1,w1,w22,ror#13
+	add	w21,w21,w17			// h+=Ch(e,f,g)
+	and	w28,w28,w19			// (b^c)&=(a^b)
+	eor	w15,w15,w8,ror#19
+	eor	w0,w0,w11,lsr#3	// sigma0(X[i+1])
+	add	w21,w21,w16			// h+=Sigma1(e)
+	eor	w28,w28,w23			// Maj(a,b,c)
+	eor	w17,w1,w22,ror#22	// Sigma0(a)
+	eor	w15,w15,w8,lsr#10	// sigma1(X[i+14])
+	add	w10,w10,w3
+	add	w25,w25,w21			// d+=h
+	add	w21,w21,w28			// h+=Maj(a,b,c)
+	ldr	w28,[x30],#4		// *K++, w19 in next round
+	add	w10,w10,w0
+	add	w21,w21,w17			// h+=Sigma0(a)
+	add	w10,w10,w15
+	ldr	w15,[sp,#0]
+	str	w2,[sp,#12]
+	ror	w16,w25,#6
+	add	w20,w20,w28			// h+=K[i]
+	ror	w1,w12,#7
+	and	w17,w26,w25
+	ror	w0,w9,#17
+	bic	w28,w27,w25
+	ror	w2,w21,#2
+	add	w20,w20,w10			// h+=X[i]
+	eor	w16,w16,w25,ror#11
+	eor	w1,w1,w12,ror#18
+	orr	w17,w17,w28			// Ch(e,f,g)
+	eor	w28,w21,w22			// a^b, b^c in next round
+	eor	w16,w16,w25,ror#25	// Sigma1(e)
+	eor	w2,w2,w21,ror#13
+	add	w20,w20,w17			// h+=Ch(e,f,g)
+	and	w19,w19,w28			// (b^c)&=(a^b)
+	eor	w0,w0,w9,ror#19
+	eor	w1,w1,w12,lsr#3	// sigma0(X[i+1])
+	add	w20,w20,w16			// h+=Sigma1(e)
+	eor	w19,w19,w22			// Maj(a,b,c)
+	eor	w17,w2,w21,ror#22	// Sigma0(a)
+	eor	w0,w0,w9,lsr#10	// sigma1(X[i+14])
+	add	w11,w11,w4
+	add	w24,w24,w20			// d+=h
+	add	w20,w20,w19			// h+=Maj(a,b,c)
+	ldr	w19,[x30],#4		// *K++, w28 in next round
+	add	w11,w11,w1
+	add	w20,w20,w17			// h+=Sigma0(a)
+	add	w11,w11,w0
+	ldr	w0,[sp,#4]
+	str	w3,[sp,#0]
+	ror	w16,w24,#6
+	add	w27,w27,w19			// h+=K[i]
+	ror	w2,w13,#7
+	and	w17,w25,w24
+	ror	w1,w10,#17
+	bic	w19,w26,w24
+	ror	w3,w20,#2
+	add	w27,w27,w11			// h+=X[i]
+	eor	w16,w16,w24,ror#11
+	eor	w2,w2,w13,ror#18
+	orr	w17,w17,w19			// Ch(e,f,g)
+	eor	w19,w20,w21			// a^b, b^c in next round
+	eor	w16,w16,w24,ror#25	// Sigma1(e)
+	eor	w3,w3,w20,ror#13
+	add	w27,w27,w17			// h+=Ch(e,f,g)
+	and	w28,w28,w19			// (b^c)&=(a^b)
+	eor	w1,w1,w10,ror#19
+	eor	w2,w2,w13,lsr#3	// sigma0(X[i+1])
+	add	w27,w27,w16			// h+=Sigma1(e)
+	eor	w28,w28,w21			// Maj(a,b,c)
+	eor	w17,w3,w20,ror#22	// Sigma0(a)
+	eor	w1,w1,w10,lsr#10	// sigma1(X[i+14])
+	add	w12,w12,w5
+	add	w23,w23,w27			// d+=h
+	add	w27,w27,w28			// h+=Maj(a,b,c)
+	ldr	w28,[x30],#4		// *K++, w19 in next round
+	add	w12,w12,w2
+	add	w27,w27,w17			// h+=Sigma0(a)
+	add	w12,w12,w1
+	ldr	w1,[sp,#8]
+	str	w4,[sp,#4]
+	ror	w16,w23,#6
+	add	w26,w26,w28			// h+=K[i]
+	ror	w3,w14,#7
+	and	w17,w24,w23
+	ror	w2,w11,#17
+	bic	w28,w25,w23
+	ror	w4,w27,#2
+	add	w26,w26,w12			// h+=X[i]
+	eor	w16,w16,w23,ror#11
+	eor	w3,w3,w14,ror#18
+	orr	w17,w17,w28			// Ch(e,f,g)
+	eor	w28,w27,w20			// a^b, b^c in next round
+	eor	w16,w16,w23,ror#25	// Sigma1(e)
+	eor	w4,w4,w27,ror#13
+	add	w26,w26,w17			// h+=Ch(e,f,g)
+	and	w19,w19,w28			// (b^c)&=(a^b)
+	eor	w2,w2,w11,ror#19
+	eor	w3,w3,w14,lsr#3	// sigma0(X[i+1])
+	add	w26,w26,w16			// h+=Sigma1(e)
+	eor	w19,w19,w20			// Maj(a,b,c)
+	eor	w17,w4,w27,ror#22	// Sigma0(a)
+	eor	w2,w2,w11,lsr#10	// sigma1(X[i+14])
+	add	w13,w13,w6
+	add	w22,w22,w26			// d+=h
+	add	w26,w26,w19			// h+=Maj(a,b,c)
+	ldr	w19,[x30],#4		// *K++, w28 in next round
+	add	w13,w13,w3
+	add	w26,w26,w17			// h+=Sigma0(a)
+	add	w13,w13,w2
+	ldr	w2,[sp,#12]
+	str	w5,[sp,#8]
+	ror	w16,w22,#6
+	add	w25,w25,w19			// h+=K[i]
+	ror	w4,w15,#7
+	and	w17,w23,w22
+	ror	w3,w12,#17
+	bic	w19,w24,w22
+	ror	w5,w26,#2
+	add	w25,w25,w13			// h+=X[i]
+	eor	w16,w16,w22,ror#11
+	eor	w4,w4,w15,ror#18
+	orr	w17,w17,w19			// Ch(e,f,g)
+	eor	w19,w26,w27			// a^b, b^c in next round
+	eor	w16,w16,w22,ror#25	// Sigma1(e)
+	eor	w5,w5,w26,ror#13
+	add	w25,w25,w17			// h+=Ch(e,f,g)
+	and	w28,w28,w19			// (b^c)&=(a^b)
+	eor	w3,w3,w12,ror#19
+	eor	w4,w4,w15,lsr#3	// sigma0(X[i+1])
+	add	w25,w25,w16			// h+=Sigma1(e)
+	eor	w28,w28,w27			// Maj(a,b,c)
+	eor	w17,w5,w26,ror#22	// Sigma0(a)
+	eor	w3,w3,w12,lsr#10	// sigma1(X[i+14])
+	add	w14,w14,w7
+	add	w21,w21,w25			// d+=h
+	add	w25,w25,w28			// h+=Maj(a,b,c)
+	ldr	w28,[x30],#4		// *K++, w19 in next round
+	add	w14,w14,w4
+	add	w25,w25,w17			// h+=Sigma0(a)
+	add	w14,w14,w3
+	ldr	w3,[sp,#0]
+	str	w6,[sp,#12]
+	ror	w16,w21,#6
+	add	w24,w24,w28			// h+=K[i]
+	ror	w5,w0,#7
+	and	w17,w22,w21
+	ror	w4,w13,#17
+	bic	w28,w23,w21
+	ror	w6,w25,#2
+	add	w24,w24,w14			// h+=X[i]
+	eor	w16,w16,w21,ror#11
+	eor	w5,w5,w0,ror#18
+	orr	w17,w17,w28			// Ch(e,f,g)
+	eor	w28,w25,w26			// a^b, b^c in next round
+	eor	w16,w16,w21,ror#25	// Sigma1(e)
+	eor	w6,w6,w25,ror#13
+	add	w24,w24,w17			// h+=Ch(e,f,g)
+	and	w19,w19,w28			// (b^c)&=(a^b)
+	eor	w4,w4,w13,ror#19
+	eor	w5,w5,w0,lsr#3	// sigma0(X[i+1])
+	add	w24,w24,w16			// h+=Sigma1(e)
+	eor	w19,w19,w26			// Maj(a,b,c)
+	eor	w17,w6,w25,ror#22	// Sigma0(a)
+	eor	w4,w4,w13,lsr#10	// sigma1(X[i+14])
+	add	w15,w15,w8
+	add	w20,w20,w24			// d+=h
+	add	w24,w24,w19			// h+=Maj(a,b,c)
+	ldr	w19,[x30],#4		// *K++, w28 in next round
+	add	w15,w15,w5
+	add	w24,w24,w17			// h+=Sigma0(a)
+	add	w15,w15,w4
+	ldr	w4,[sp,#4]
+	str	w7,[sp,#0]
+	ror	w16,w20,#6
+	add	w23,w23,w19			// h+=K[i]
+	ror	w6,w1,#7
+	and	w17,w21,w20
+	ror	w5,w14,#17
+	bic	w19,w22,w20
+	ror	w7,w24,#2
+	add	w23,w23,w15			// h+=X[i]
+	eor	w16,w16,w20,ror#11
+	eor	w6,w6,w1,ror#18
+	orr	w17,w17,w19			// Ch(e,f,g)
+	eor	w19,w24,w25			// a^b, b^c in next round
+	eor	w16,w16,w20,ror#25	// Sigma1(e)
+	eor	w7,w7,w24,ror#13
+	add	w23,w23,w17			// h+=Ch(e,f,g)
+	and	w28,w28,w19			// (b^c)&=(a^b)
+	eor	w5,w5,w14,ror#19
+	eor	w6,w6,w1,lsr#3	// sigma0(X[i+1])
+	add	w23,w23,w16			// h+=Sigma1(e)
+	eor	w28,w28,w25			// Maj(a,b,c)
+	eor	w17,w7,w24,ror#22	// Sigma0(a)
+	eor	w5,w5,w14,lsr#10	// sigma1(X[i+14])
+	add	w0,w0,w9
+	add	w27,w27,w23			// d+=h
+	add	w23,w23,w28			// h+=Maj(a,b,c)
+	ldr	w28,[x30],#4		// *K++, w19 in next round
+	add	w0,w0,w6
+	add	w23,w23,w17			// h+=Sigma0(a)
+	add	w0,w0,w5
+	ldr	w5,[sp,#8]
+	str	w8,[sp,#4]
+	ror	w16,w27,#6
+	add	w22,w22,w28			// h+=K[i]
+	ror	w7,w2,#7
+	and	w17,w20,w27
+	ror	w6,w15,#17
+	bic	w28,w21,w27
+	ror	w8,w23,#2
+	add	w22,w22,w0			// h+=X[i]
+	eor	w16,w16,w27,ror#11
+	eor	w7,w7,w2,ror#18
+	orr	w17,w17,w28			// Ch(e,f,g)
+	eor	w28,w23,w24			// a^b, b^c in next round
+	eor	w16,w16,w27,ror#25	// Sigma1(e)
+	eor	w8,w8,w23,ror#13
+	add	w22,w22,w17			// h+=Ch(e,f,g)
+	and	w19,w19,w28			// (b^c)&=(a^b)
+	eor	w6,w6,w15,ror#19
+	eor	w7,w7,w2,lsr#3	// sigma0(X[i+1])
+	add	w22,w22,w16			// h+=Sigma1(e)
+	eor	w19,w19,w24			// Maj(a,b,c)
+	eor	w17,w8,w23,ror#22	// Sigma0(a)
+	eor	w6,w6,w15,lsr#10	// sigma1(X[i+14])
+	add	w1,w1,w10
+	add	w26,w26,w22			// d+=h
+	add	w22,w22,w19			// h+=Maj(a,b,c)
+	ldr	w19,[x30],#4		// *K++, w28 in next round
+	add	w1,w1,w7
+	add	w22,w22,w17			// h+=Sigma0(a)
+	add	w1,w1,w6
+	ldr	w6,[sp,#12]
+	str	w9,[sp,#8]
+	ror	w16,w26,#6
+	add	w21,w21,w19			// h+=K[i]
+	ror	w8,w3,#7
+	and	w17,w27,w26
+	ror	w7,w0,#17
+	bic	w19,w20,w26
+	ror	w9,w22,#2
+	add	w21,w21,w1			// h+=X[i]
+	eor	w16,w16,w26,ror#11
+	eor	w8,w8,w3,ror#18
+	orr	w17,w17,w19			// Ch(e,f,g)
+	eor	w19,w22,w23			// a^b, b^c in next round
+	eor	w16,w16,w26,ror#25	// Sigma1(e)
+	eor	w9,w9,w22,ror#13
+	add	w21,w21,w17			// h+=Ch(e,f,g)
+	and	w28,w28,w19			// (b^c)&=(a^b)
+	eor	w7,w7,w0,ror#19
+	eor	w8,w8,w3,lsr#3	// sigma0(X[i+1])
+	add	w21,w21,w16			// h+=Sigma1(e)
+	eor	w28,w28,w23			// Maj(a,b,c)
+	eor	w17,w9,w22,ror#22	// Sigma0(a)
+	eor	w7,w7,w0,lsr#10	// sigma1(X[i+14])
+	add	w2,w2,w11
+	add	w25,w25,w21			// d+=h
+	add	w21,w21,w28			// h+=Maj(a,b,c)
+	ldr	w28,[x30],#4		// *K++, w19 in next round
+	add	w2,w2,w8
+	add	w21,w21,w17			// h+=Sigma0(a)
+	add	w2,w2,w7
+	ldr	w7,[sp,#0]
+	str	w10,[sp,#12]
+	ror	w16,w25,#6
+	add	w20,w20,w28			// h+=K[i]
+	ror	w9,w4,#7
+	and	w17,w26,w25
+	ror	w8,w1,#17
+	bic	w28,w27,w25
+	ror	w10,w21,#2
+	add	w20,w20,w2			// h+=X[i]
+	eor	w16,w16,w25,ror#11
+	eor	w9,w9,w4,ror#18
+	orr	w17,w17,w28			// Ch(e,f,g)
+	eor	w28,w21,w22			// a^b, b^c in next round
+	eor	w16,w16,w25,ror#25	// Sigma1(e)
+	eor	w10,w10,w21,ror#13
+	add	w20,w20,w17			// h+=Ch(e,f,g)
+	and	w19,w19,w28			// (b^c)&=(a^b)
+	eor	w8,w8,w1,ror#19
+	eor	w9,w9,w4,lsr#3	// sigma0(X[i+1])
+	add	w20,w20,w16			// h+=Sigma1(e)
+	eor	w19,w19,w22			// Maj(a,b,c)
+	eor	w17,w10,w21,ror#22	// Sigma0(a)
+	eor	w8,w8,w1,lsr#10	// sigma1(X[i+14])
+	add	w3,w3,w12
+	add	w24,w24,w20			// d+=h
+	add	w20,w20,w19			// h+=Maj(a,b,c)
+	ldr	w19,[x30],#4		// *K++, w28 in next round
+	add	w3,w3,w9
+	add	w20,w20,w17			// h+=Sigma0(a)
+	add	w3,w3,w8
+	cbnz	w19,Loop_16_xx
+
+	ldp	x0,x2,[x29,#96]
+	ldr	x1,[x29,#112]
+	sub	x30,x30,#260		// rewind
+
+	ldp	w3,w4,[x0]
+	ldp	w5,w6,[x0,#2*4]
+	add	x1,x1,#14*4			// advance input pointer
+	ldp	w7,w8,[x0,#4*4]
+	add	w20,w20,w3
+	ldp	w9,w10,[x0,#6*4]
+	add	w21,w21,w4
+	add	w22,w22,w5
+	add	w23,w23,w6
+	stp	w20,w21,[x0]
+	add	w24,w24,w7
+	add	w25,w25,w8
+	stp	w22,w23,[x0,#2*4]
+	add	w26,w26,w9
+	add	w27,w27,w10
+	cmp	x1,x2
+	stp	w24,w25,[x0,#4*4]
+	stp	w26,w27,[x0,#6*4]
+	b.ne	Loop
+
+	ldp	x19,x20,[x29,#16]
+	add	sp,sp,#4*4
+	ldp	x21,x22,[x29,#32]
+	ldp	x23,x24,[x29,#48]
+	ldp	x25,x26,[x29,#64]
+	ldp	x27,x28,[x29,#80]
+	ldp	x29,x30,[sp],#128
+	AARCH64_VALIDATE_LINK_REGISTER
+	ret
+
+
+.section	.rodata
+.align	6
+
+LK256:
+.long	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+.long	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+.long	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+.long	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+.long	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+.long	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+.long	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+.long	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+.long	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+.long	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+.long	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+.long	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+.long	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+.long	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+.long	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+.long	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+.long	0	//terminator
+
+.byte	83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align	2
+.align	2
+.text
+#ifndef	__KERNEL__
+.def sha256_block_armv8
+   .type 32
+.endef
+.align	6
+sha256_block_armv8:
+Lv8_entry:
+	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later.
+	stp	x29,x30,[sp,#-16]!
+	add	x29,sp,#0
+
+	ld1	{v0.4s,v1.4s},[x0]
+	adrp	x3,LK256
+	add	x3,x3,:lo12:LK256
+
+Loop_hw:
+	ld1	{v4.16b,v5.16b,v6.16b,v7.16b},[x1],#64
+	sub	x2,x2,#1
+	ld1	{v16.4s},[x3],#16
+	rev32	v4.16b,v4.16b
+	rev32	v5.16b,v5.16b
+	rev32	v6.16b,v6.16b
+	rev32	v7.16b,v7.16b
+	orr	v18.16b,v0.16b,v0.16b		// offload
+	orr	v19.16b,v1.16b,v1.16b
+	ld1	{v17.4s},[x3],#16
+	add	v16.4s,v16.4s,v4.4s
+.long	0x5e2828a4	//sha256su0 v4.16b,v5.16b
+	orr	v2.16b,v0.16b,v0.16b
+.long	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s
+.long	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s
+.long	0x5e0760c4	//sha256su1 v4.16b,v6.16b,v7.16b
+	ld1	{v16.4s},[x3],#16
+	add	v17.4s,v17.4s,v5.4s
+.long	0x5e2828c5	//sha256su0 v5.16b,v6.16b
+	orr	v2.16b,v0.16b,v0.16b
+.long	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s
+.long	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s
+.long	0x5e0460e5	//sha256su1 v5.16b,v7.16b,v4.16b
+	ld1	{v17.4s},[x3],#16
+	add	v16.4s,v16.4s,v6.4s
+.long	0x5e2828e6	//sha256su0 v6.16b,v7.16b
+	orr	v2.16b,v0.16b,v0.16b
+.long	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s
+.long	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s
+.long	0x5e056086	//sha256su1 v6.16b,v4.16b,v5.16b
+	ld1	{v16.4s},[x3],#16
+	add	v17.4s,v17.4s,v7.4s
+.long	0x5e282887	//sha256su0 v7.16b,v4.16b
+	orr	v2.16b,v0.16b,v0.16b
+.long	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s
+.long	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s
+.long	0x5e0660a7	//sha256su1 v7.16b,v5.16b,v6.16b
+	ld1	{v17.4s},[x3],#16
+	add	v16.4s,v16.4s,v4.4s
+.long	0x5e2828a4	//sha256su0 v4.16b,v5.16b
+	orr	v2.16b,v0.16b,v0.16b
+.long	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s
+.long	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s
+.long	0x5e0760c4	//sha256su1 v4.16b,v6.16b,v7.16b
+	ld1	{v16.4s},[x3],#16
+	add	v17.4s,v17.4s,v5.4s
+.long	0x5e2828c5	//sha256su0 v5.16b,v6.16b
+	orr	v2.16b,v0.16b,v0.16b
+.long	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s
+.long	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s
+.long	0x5e0460e5	//sha256su1 v5.16b,v7.16b,v4.16b
+	ld1	{v17.4s},[x3],#16
+	add	v16.4s,v16.4s,v6.4s
+.long	0x5e2828e6	//sha256su0 v6.16b,v7.16b
+	orr	v2.16b,v0.16b,v0.16b
+.long	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s
+.long	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s
+.long	0x5e056086	//sha256su1 v6.16b,v4.16b,v5.16b
+	ld1	{v16.4s},[x3],#16
+	add	v17.4s,v17.4s,v7.4s
+.long	0x5e282887	//sha256su0 v7.16b,v4.16b
+	orr	v2.16b,v0.16b,v0.16b
+.long	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s
+.long	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s
+.long	0x5e0660a7	//sha256su1 v7.16b,v5.16b,v6.16b
+	ld1	{v17.4s},[x3],#16
+	add	v16.4s,v16.4s,v4.4s
+.long	0x5e2828a4	//sha256su0 v4.16b,v5.16b
+	orr	v2.16b,v0.16b,v0.16b
+.long	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s
+.long	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s
+.long	0x5e0760c4	//sha256su1 v4.16b,v6.16b,v7.16b
+	ld1	{v16.4s},[x3],#16
+	add	v17.4s,v17.4s,v5.4s
+.long	0x5e2828c5	//sha256su0 v5.16b,v6.16b
+	orr	v2.16b,v0.16b,v0.16b
+.long	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s
+.long	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s
+.long	0x5e0460e5	//sha256su1 v5.16b,v7.16b,v4.16b
+	ld1	{v17.4s},[x3],#16
+	add	v16.4s,v16.4s,v6.4s
+.long	0x5e2828e6	//sha256su0 v6.16b,v7.16b
+	orr	v2.16b,v0.16b,v0.16b
+.long	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s
+.long	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s
+.long	0x5e056086	//sha256su1 v6.16b,v4.16b,v5.16b
+	ld1	{v16.4s},[x3],#16
+	add	v17.4s,v17.4s,v7.4s
+.long	0x5e282887	//sha256su0 v7.16b,v4.16b
+	orr	v2.16b,v0.16b,v0.16b
+.long	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s
+.long	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s
+.long	0x5e0660a7	//sha256su1 v7.16b,v5.16b,v6.16b
+	ld1	{v17.4s},[x3],#16
+	add	v16.4s,v16.4s,v4.4s
+	orr	v2.16b,v0.16b,v0.16b
+.long	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s
+.long	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s
+
+	ld1	{v16.4s},[x3],#16
+	add	v17.4s,v17.4s,v5.4s
+	orr	v2.16b,v0.16b,v0.16b
+.long	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s
+.long	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s
+
+	ld1	{v17.4s},[x3]
+	add	v16.4s,v16.4s,v6.4s
+	sub	x3,x3,#64*4-16	// rewind
+	orr	v2.16b,v0.16b,v0.16b
+.long	0x5e104020	//sha256h v0.16b,v1.16b,v16.4s
+.long	0x5e105041	//sha256h2 v1.16b,v2.16b,v16.4s
+
+	add	v17.4s,v17.4s,v7.4s
+	orr	v2.16b,v0.16b,v0.16b
+.long	0x5e114020	//sha256h v0.16b,v1.16b,v17.4s
+.long	0x5e115041	//sha256h2 v1.16b,v2.16b,v17.4s
+
+	add	v0.4s,v0.4s,v18.4s
+	add	v1.4s,v1.4s,v19.4s
+
+	cbnz	x2,Loop_hw
+
+	st1	{v0.4s,v1.4s},[x0]
+
+	ldr	x29,[sp],#16
+	ret
+
+#endif
+#endif
+#endif  // !OPENSSL_NO_ASM
diff --git a/deps/boringssl/win-aarch64/crypto/fipsmodule/sha512-armv8.S b/deps/boringssl/win-aarch64/crypto/fipsmodule/sha512-armv8.S
new file mode 100644
index 0000000..d01304f
--- /dev/null
+++ b/deps/boringssl/win-aarch64/crypto/fipsmodule/sha512-armv8.S
@@ -0,0 +1,1085 @@
+// This file is generated from a similarly-named Perl script in the BoringSSL
+// source tree. Do not edit by hand.
+
+#if !defined(__has_feature)
+#define __has_feature(x) 0
+#endif
+#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
+#define OPENSSL_NO_ASM
+#endif
+
+#if !defined(OPENSSL_NO_ASM)
+#if defined(__aarch64__)
+#if defined(BORINGSSL_PREFIX)
+#include <boringssl_prefix_symbols_asm.h>
+#endif
+// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
+//
+// Licensed under the OpenSSL license (the "License").  You may not use
+// this file except in compliance with the License.  You can obtain a copy
+// in the file LICENSE in the source distribution or at
+// https://www.openssl.org/source/license.html
+
+// ====================================================================
+// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
+// project. The module is, however, dual licensed under OpenSSL and
+// CRYPTOGAMS licenses depending on where you obtain it. For further
+// details see http://www.openssl.org/~appro/cryptogams/.
+//
+// Permission to use under GPLv2 terms is granted.
+// ====================================================================
+//
+// SHA256/512 for ARMv8.
+//
+// Performance in cycles per processed byte and improvement coefficient
+// over code generated with "default" compiler:
+//
+//		SHA256-hw	SHA256(*)	SHA512
+// Apple A7	1.97		10.5 (+33%)	6.73 (-1%(**))
+// Cortex-A53	2.38		15.5 (+115%)	10.0 (+150%(***))
+// Cortex-A57	2.31		11.6 (+86%)	7.51 (+260%(***))
+// Denver	2.01		10.5 (+26%)	6.70 (+8%)
+// X-Gene			20.0 (+100%)	12.8 (+300%(***))
+// Mongoose	2.36		13.0 (+50%)	8.36 (+33%)
+//
+// (*)	Software SHA256 results are of lesser relevance, presented
+//	mostly for informational purposes.
+// (**)	The result is a trade-off: it's possible to improve it by
+//	10% (or by 1 cycle per round), but at the cost of 20% loss
+//	on Cortex-A53 (or by 4 cycles per round).
+// (***)	Super-impressive coefficients over gcc-generated code are
+//	indication of some compiler "pathology", most notably code
+//	generated with -mgeneral-regs-only is significanty faster
+//	and the gap is only 40-90%.
+
+#ifndef	__KERNEL__
+# include <openssl/arm_arch.h>
+#endif
+
+.text
+
+
+
+.globl	sha512_block_data_order
+
+.def sha512_block_data_order
+   .type 32
+.endef
+.align	6
+sha512_block_data_order:
+	AARCH64_SIGN_LINK_REGISTER
+	stp	x29,x30,[sp,#-128]!
+	add	x29,sp,#0
+
+	stp	x19,x20,[sp,#16]
+	stp	x21,x22,[sp,#32]
+	stp	x23,x24,[sp,#48]
+	stp	x25,x26,[sp,#64]
+	stp	x27,x28,[sp,#80]
+	sub	sp,sp,#4*8
+
+	ldp	x20,x21,[x0]				// load context
+	ldp	x22,x23,[x0,#2*8]
+	ldp	x24,x25,[x0,#4*8]
+	add	x2,x1,x2,lsl#7	// end of input
+	ldp	x26,x27,[x0,#6*8]
+	adrp	x30,LK512
+	add	x30,x30,:lo12:LK512
+	stp	x0,x2,[x29,#96]
+
+Loop:
+	ldp	x3,x4,[x1],#2*8
+	ldr	x19,[x30],#8			// *K++
+	eor	x28,x21,x22				// magic seed
+	str	x1,[x29,#112]
+#ifndef	__ARMEB__
+	rev	x3,x3			// 0
+#endif
+	ror	x16,x24,#14
+	add	x27,x27,x19			// h+=K[i]
+	eor	x6,x24,x24,ror#23
+	and	x17,x25,x24
+	bic	x19,x26,x24
+	add	x27,x27,x3			// h+=X[i]
+	orr	x17,x17,x19			// Ch(e,f,g)
+	eor	x19,x20,x21			// a^b, b^c in next round
+	eor	x16,x16,x6,ror#18	// Sigma1(e)
+	ror	x6,x20,#28
+	add	x27,x27,x17			// h+=Ch(e,f,g)
+	eor	x17,x20,x20,ror#5
+	add	x27,x27,x16			// h+=Sigma1(e)
+	and	x28,x28,x19			// (b^c)&=(a^b)
+	add	x23,x23,x27			// d+=h
+	eor	x28,x28,x21			// Maj(a,b,c)
+	eor	x17,x6,x17,ror#34	// Sigma0(a)
+	add	x27,x27,x28			// h+=Maj(a,b,c)
+	ldr	x28,[x30],#8		// *K++, x19 in next round
+	//add	x27,x27,x17			// h+=Sigma0(a)
+#ifndef	__ARMEB__
+	rev	x4,x4			// 1
+#endif
+	ldp	x5,x6,[x1],#2*8
+	add	x27,x27,x17			// h+=Sigma0(a)
+	ror	x16,x23,#14
+	add	x26,x26,x28			// h+=K[i]
+	eor	x7,x23,x23,ror#23
+	and	x17,x24,x23
+	bic	x28,x25,x23
+	add	x26,x26,x4			// h+=X[i]
+	orr	x17,x17,x28			// Ch(e,f,g)
+	eor	x28,x27,x20			// a^b, b^c in next round
+	eor	x16,x16,x7,ror#18	// Sigma1(e)
+	ror	x7,x27,#28
+	add	x26,x26,x17			// h+=Ch(e,f,g)
+	eor	x17,x27,x27,ror#5
+	add	x26,x26,x16			// h+=Sigma1(e)
+	and	x19,x19,x28			// (b^c)&=(a^b)
+	add	x22,x22,x26			// d+=h
+	eor	x19,x19,x20			// Maj(a,b,c)
+	eor	x17,x7,x17,ror#34	// Sigma0(a)
+	add	x26,x26,x19			// h+=Maj(a,b,c)
+	ldr	x19,[x30],#8		// *K++, x28 in next round
+	//add	x26,x26,x17			// h+=Sigma0(a)
+#ifndef	__ARMEB__
+	rev	x5,x5			// 2
+#endif
+	add	x26,x26,x17			// h+=Sigma0(a)
+	ror	x16,x22,#14
+	add	x25,x25,x19			// h+=K[i]
+	eor	x8,x22,x22,ror#23
+	and	x17,x23,x22
+	bic	x19,x24,x22
+	add	x25,x25,x5			// h+=X[i]
+	orr	x17,x17,x19			// Ch(e,f,g)
+	eor	x19,x26,x27			// a^b, b^c in next round
+	eor	x16,x16,x8,ror#18	// Sigma1(e)
+	ror	x8,x26,#28
+	add	x25,x25,x17			// h+=Ch(e,f,g)
+	eor	x17,x26,x26,ror#5
+	add	x25,x25,x16			// h+=Sigma1(e)
+	and	x28,x28,x19			// (b^c)&=(a^b)
+	add	x21,x21,x25			// d+=h
+	eor	x28,x28,x27			// Maj(a,b,c)
+	eor	x17,x8,x17,ror#34	// Sigma0(a)
+	add	x25,x25,x28			// h+=Maj(a,b,c)
+	ldr	x28,[x30],#8		// *K++, x19 in next round
+	//add	x25,x25,x17			// h+=Sigma0(a)
+#ifndef	__ARMEB__
+	rev	x6,x6			// 3
+#endif
+	ldp	x7,x8,[x1],#2*8
+	add	x25,x25,x17			// h+=Sigma0(a)
+	ror	x16,x21,#14
+	add	x24,x24,x28			// h+=K[i]
+	eor	x9,x21,x21,ror#23
+	and	x17,x22,x21
+	bic	x28,x23,x21
+	add	x24,x24,x6			// h+=X[i]
+	orr	x17,x17,x28			// Ch(e,f,g)
+	eor	x28,x25,x26			// a^b, b^c in next round
+	eor	x16,x16,x9,ror#18	// Sigma1(e)
+	ror	x9,x25,#28
+	add	x24,x24,x17			// h+=Ch(e,f,g)
+	eor	x17,x25,x25,ror#5
+	add	x24,x24,x16			// h+=Sigma1(e)
+	and	x19,x19,x28			// (b^c)&=(a^b)
+	add	x20,x20,x24			// d+=h
+	eor	x19,x19,x26			// Maj(a,b,c)
+	eor	x17,x9,x17,ror#34	// Sigma0(a)
+	add	x24,x24,x19			// h+=Maj(a,b,c)
+	ldr	x19,[x30],#8		// *K++, x28 in next round
+	//add	x24,x24,x17			// h+=Sigma0(a)
+#ifndef	__ARMEB__
+	rev	x7,x7			// 4
+#endif
+	add	x24,x24,x17			// h+=Sigma0(a)
+	ror	x16,x20,#14
+	add	x23,x23,x19			// h+=K[i]
+	eor	x10,x20,x20,ror#23
+	and	x17,x21,x20
+	bic	x19,x22,x20
+	add	x23,x23,x7			// h+=X[i]
+	orr	x17,x17,x19			// Ch(e,f,g)
+	eor	x19,x24,x25			// a^b, b^c in next round
+	eor	x16,x16,x10,ror#18	// Sigma1(e)
+	ror	x10,x24,#28
+	add	x23,x23,x17			// h+=Ch(e,f,g)
+	eor	x17,x24,x24,ror#5
+	add	x23,x23,x16			// h+=Sigma1(e)
+	and	x28,x28,x19			// (b^c)&=(a^b)
+	add	x27,x27,x23			// d+=h
+	eor	x28,x28,x25			// Maj(a,b,c)
+	eor	x17,x10,x17,ror#34	// Sigma0(a)
+	add	x23,x23,x28			// h+=Maj(a,b,c)
+	ldr	x28,[x30],#8		// *K++, x19 in next round
+	//add	x23,x23,x17			// h+=Sigma0(a)
+#ifndef	__ARMEB__
+	rev	x8,x8			// 5
+#endif
+	ldp	x9,x10,[x1],#2*8
+	add	x23,x23,x17			// h+=Sigma0(a)
+	ror	x16,x27,#14
+	add	x22,x22,x28			// h+=K[i]
+	eor	x11,x27,x27,ror#23
+	and	x17,x20,x27
+	bic	x28,x21,x27
+	add	x22,x22,x8			// h+=X[i]
+	orr	x17,x17,x28			// Ch(e,f,g)
+	eor	x28,x23,x24			// a^b, b^c in next round
+	eor	x16,x16,x11,ror#18	// Sigma1(e)
+	ror	x11,x23,#28
+	add	x22,x22,x17			// h+=Ch(e,f,g)
+	eor	x17,x23,x23,ror#5
+	add	x22,x22,x16			// h+=Sigma1(e)
+	and	x19,x19,x28			// (b^c)&=(a^b)
+	add	x26,x26,x22			// d+=h
+	eor	x19,x19,x24			// Maj(a,b,c)
+	eor	x17,x11,x17,ror#34	// Sigma0(a)
+	add	x22,x22,x19			// h+=Maj(a,b,c)
+	ldr	x19,[x30],#8		// *K++, x28 in next round
+	//add	x22,x22,x17			// h+=Sigma0(a)
+#ifndef	__ARMEB__
+	rev	x9,x9			// 6
+#endif
+	add	x22,x22,x17			// h+=Sigma0(a)
+	ror	x16,x26,#14
+	add	x21,x21,x19			// h+=K[i]
+	eor	x12,x26,x26,ror#23
+	and	x17,x27,x26
+	bic	x19,x20,x26
+	add	x21,x21,x9			// h+=X[i]
+	orr	x17,x17,x19			// Ch(e,f,g)
+	eor	x19,x22,x23			// a^b, b^c in next round
+	eor	x16,x16,x12,ror#18	// Sigma1(e)
+	ror	x12,x22,#28
+	add	x21,x21,x17			// h+=Ch(e,f,g)
+	eor	x17,x22,x22,ror#5
+	add	x21,x21,x16			// h+=Sigma1(e)
+	and	x28,x28,x19			// (b^c)&=(a^b)
+	add	x25,x25,x21			// d+=h
+	eor	x28,x28,x23			// Maj(a,b,c)
+	eor	x17,x12,x17,ror#34	// Sigma0(a)
+	add	x21,x21,x28			// h+=Maj(a,b,c)
+	ldr	x28,[x30],#8		// *K++, x19 in next round
+	//add	x21,x21,x17			// h+=Sigma0(a)
+#ifndef	__ARMEB__
+	rev	x10,x10			// 7
+#endif
+	ldp	x11,x12,[x1],#2*8
+	add	x21,x21,x17			// h+=Sigma0(a)
+	ror	x16,x25,#14
+	add	x20,x20,x28			// h+=K[i]
+	eor	x13,x25,x25,ror#23
+	and	x17,x26,x25
+	bic	x28,x27,x25
+	add	x20,x20,x10			// h+=X[i]
+	orr	x17,x17,x28			// Ch(e,f,g)
+	eor	x28,x21,x22			// a^b, b^c in next round
+	eor	x16,x16,x13,ror#18	// Sigma1(e)
+	ror	x13,x21,#28
+	add	x20,x20,x17			// h+=Ch(e,f,g)
+	eor	x17,x21,x21,ror#5
+	add	x20,x20,x16			// h+=Sigma1(e)
+	and	x19,x19,x28			// (b^c)&=(a^b)
+	add	x24,x24,x20			// d+=h
+	eor	x19,x19,x22			// Maj(a,b,c)
+	eor	x17,x13,x17,ror#34	// Sigma0(a)
+	add	x20,x20,x19			// h+=Maj(a,b,c)
+	ldr	x19,[x30],#8		// *K++, x28 in next round
+	//add	x20,x20,x17			// h+=Sigma0(a)
+#ifndef	__ARMEB__
+	rev	x11,x11			// 8
+#endif
+	add	x20,x20,x17			// h+=Sigma0(a)
+	ror	x16,x24,#14
+	add	x27,x27,x19			// h+=K[i]
+	eor	x14,x24,x24,ror#23
+	and	x17,x25,x24
+	bic	x19,x26,x24
+	add	x27,x27,x11			// h+=X[i]
+	orr	x17,x17,x19			// Ch(e,f,g)
+	eor	x19,x20,x21			// a^b, b^c in next round
+	eor	x16,x16,x14,ror#18	// Sigma1(e)
+	ror	x14,x20,#28
+	add	x27,x27,x17			// h+=Ch(e,f,g)
+	eor	x17,x20,x20,ror#5
+	add	x27,x27,x16			// h+=Sigma1(e)
+	and	x28,x28,x19			// (b^c)&=(a^b)
+	add	x23,x23,x27			// d+=h
+	eor	x28,x28,x21			// Maj(a,b,c)
+	eor	x17,x14,x17,ror#34	// Sigma0(a)
+	add	x27,x27,x28			// h+=Maj(a,b,c)
+	ldr	x28,[x30],#8		// *K++, x19 in next round
+	//add	x27,x27,x17			// h+=Sigma0(a)
+#ifndef	__ARMEB__
+	rev	x12,x12			// 9
+#endif
+	ldp	x13,x14,[x1],#2*8
+	add	x27,x27,x17			// h+=Sigma0(a)
+	ror	x16,x23,#14
+	add	x26,x26,x28			// h+=K[i]
+	eor	x15,x23,x23,ror#23
+	and	x17,x24,x23
+	bic	x28,x25,x23
+	add	x26,x26,x12			// h+=X[i]
+	orr	x17,x17,x28			// Ch(e,f,g)
+	eor	x28,x27,x20			// a^b, b^c in next round
+	eor	x16,x16,x15,ror#18	// Sigma1(e)
+	ror	x15,x27,#28
+	add	x26,x26,x17			// h+=Ch(e,f,g)
+	eor	x17,x27,x27,ror#5
+	add	x26,x26,x16			// h+=Sigma1(e)
+	and	x19,x19,x28			// (b^c)&=(a^b)
+	add	x22,x22,x26			// d+=h
+	eor	x19,x19,x20			// Maj(a,b,c)
+	eor	x17,x15,x17,ror#34	// Sigma0(a)
+	add	x26,x26,x19			// h+=Maj(a,b,c)
+	ldr	x19,[x30],#8		// *K++, x28 in next round
+	//add	x26,x26,x17			// h+=Sigma0(a)
+#ifndef	__ARMEB__
+	rev	x13,x13			// 10
+#endif
+	add	x26,x26,x17			// h+=Sigma0(a)
+	ror	x16,x22,#14
+	add	x25,x25,x19			// h+=K[i]
+	eor	x0,x22,x22,ror#23
+	and	x17,x23,x22
+	bic	x19,x24,x22
+	add	x25,x25,x13			// h+=X[i]
+	orr	x17,x17,x19			// Ch(e,f,g)
+	eor	x19,x26,x27			// a^b, b^c in next round
+	eor	x16,x16,x0,ror#18	// Sigma1(e)
+	ror	x0,x26,#28
+	add	x25,x25,x17			// h+=Ch(e,f,g)
+	eor	x17,x26,x26,ror#5
+	add	x25,x25,x16			// h+=Sigma1(e)
+	and	x28,x28,x19			// (b^c)&=(a^b)
+	add	x21,x21,x25			// d+=h
+	eor	x28,x28,x27			// Maj(a,b,c)
+	eor	x17,x0,x17,ror#34	// Sigma0(a)
+	add	x25,x25,x28			// h+=Maj(a,b,c)
+	ldr	x28,[x30],#8		// *K++, x19 in next round
+	//add	x25,x25,x17			// h+=Sigma0(a)
+#ifndef	__ARMEB__
+	rev	x14,x14			// 11
+#endif
+	ldp	x15,x0,[x1],#2*8
+	add	x25,x25,x17			// h+=Sigma0(a)
+	str	x6,[sp,#24]
+	ror	x16,x21,#14
+	add	x24,x24,x28			// h+=K[i]
+	eor	x6,x21,x21,ror#23
+	and	x17,x22,x21
+	bic	x28,x23,x21
+	add	x24,x24,x14			// h+=X[i]
+	orr	x17,x17,x28			// Ch(e,f,g)
+	eor	x28,x25,x26			// a^b, b^c in next round
+	eor	x16,x16,x6,ror#18	// Sigma1(e)
+	ror	x6,x25,#28
+	add	x24,x24,x17			// h+=Ch(e,f,g)
+	eor	x17,x25,x25,ror#5
+	add	x24,x24,x16			// h+=Sigma1(e)
+	and	x19,x19,x28			// (b^c)&=(a^b)
+	add	x20,x20,x24			// d+=h
+	eor	x19,x19,x26			// Maj(a,b,c)
+	eor	x17,x6,x17,ror#34	// Sigma0(a)
+	add	x24,x24,x19			// h+=Maj(a,b,c)
+	ldr	x19,[x30],#8		// *K++, x28 in next round
+	//add	x24,x24,x17			// h+=Sigma0(a)
+#ifndef	__ARMEB__
+	rev	x15,x15			// 12
+#endif
+	add	x24,x24,x17			// h+=Sigma0(a)
+	str	x7,[sp,#0]
+	ror	x16,x20,#14
+	add	x23,x23,x19			// h+=K[i]
+	eor	x7,x20,x20,ror#23
+	and	x17,x21,x20
+	bic	x19,x22,x20
+	add	x23,x23,x15			// h+=X[i]
+	orr	x17,x17,x19			// Ch(e,f,g)
+	eor	x19,x24,x25			// a^b, b^c in next round
+	eor	x16,x16,x7,ror#18	// Sigma1(e)
+	ror	x7,x24,#28
+	add	x23,x23,x17			// h+=Ch(e,f,g)
+	eor	x17,x24,x24,ror#5
+	add	x23,x23,x16			// h+=Sigma1(e)
+	and	x28,x28,x19			// (b^c)&=(a^b)
+	add	x27,x27,x23			// d+=h
+	eor	x28,x28,x25			// Maj(a,b,c)
+	eor	x17,x7,x17,ror#34	// Sigma0(a)
+	add	x23,x23,x28			// h+=Maj(a,b,c)
+	ldr	x28,[x30],#8		// *K++, x19 in next round
+	//add	x23,x23,x17			// h+=Sigma0(a)
+#ifndef	__ARMEB__
+	rev	x0,x0			// 13
+#endif
+	ldp	x1,x2,[x1]
+	add	x23,x23,x17			// h+=Sigma0(a)
+	str	x8,[sp,#8]
+	ror	x16,x27,#14
+	add	x22,x22,x28			// h+=K[i]
+	eor	x8,x27,x27,ror#23
+	and	x17,x20,x27
+	bic	x28,x21,x27
+	add	x22,x22,x0			// h+=X[i]
+	orr	x17,x17,x28			// Ch(e,f,g)
+	eor	x28,x23,x24			// a^b, b^c in next round
+	eor	x16,x16,x8,ror#18	// Sigma1(e)
+	ror	x8,x23,#28
+	add	x22,x22,x17			// h+=Ch(e,f,g)
+	eor	x17,x23,x23,ror#5
+	add	x22,x22,x16			// h+=Sigma1(e)
+	and	x19,x19,x28			// (b^c)&=(a^b)
+	add	x26,x26,x22			// d+=h
+	eor	x19,x19,x24			// Maj(a,b,c)
+	eor	x17,x8,x17,ror#34	// Sigma0(a)
+	add	x22,x22,x19			// h+=Maj(a,b,c)
+	ldr	x19,[x30],#8		// *K++, x28 in next round
+	//add	x22,x22,x17			// h+=Sigma0(a)
+#ifndef	__ARMEB__
+	rev	x1,x1			// 14
+#endif
+	ldr	x6,[sp,#24]
+	add	x22,x22,x17			// h+=Sigma0(a)
+	str	x9,[sp,#16]
+	ror	x16,x26,#14
+	add	x21,x21,x19			// h+=K[i]
+	eor	x9,x26,x26,ror#23
+	and	x17,x27,x26
+	bic	x19,x20,x26
+	add	x21,x21,x1			// h+=X[i]
+	orr	x17,x17,x19			// Ch(e,f,g)
+	eor	x19,x22,x23			// a^b, b^c in next round
+	eor	x16,x16,x9,ror#18	// Sigma1(e)
+	ror	x9,x22,#28
+	add	x21,x21,x17			// h+=Ch(e,f,g)
+	eor	x17,x22,x22,ror#5
+	add	x21,x21,x16			// h+=Sigma1(e)
+	and	x28,x28,x19			// (b^c)&=(a^b)
+	add	x25,x25,x21			// d+=h
+	eor	x28,x28,x23			// Maj(a,b,c)
+	eor	x17,x9,x17,ror#34	// Sigma0(a)
+	add	x21,x21,x28			// h+=Maj(a,b,c)
+	ldr	x28,[x30],#8		// *K++, x19 in next round
+	//add	x21,x21,x17			// h+=Sigma0(a)
+#ifndef	__ARMEB__
+	rev	x2,x2			// 15
+#endif
+	ldr	x7,[sp,#0]
+	add	x21,x21,x17			// h+=Sigma0(a)
+	str	x10,[sp,#24]
+	ror	x16,x25,#14
+	add	x20,x20,x28			// h+=K[i]
+	ror	x9,x4,#1
+	and	x17,x26,x25
+	ror	x8,x1,#19
+	bic	x28,x27,x25
+	ror	x10,x21,#28
+	add	x20,x20,x2			// h+=X[i]
+	eor	x16,x16,x25,ror#18
+	eor	x9,x9,x4,ror#8
+	orr	x17,x17,x28			// Ch(e,f,g)
+	eor	x28,x21,x22			// a^b, b^c in next round
+	eor	x16,x16,x25,ror#41	// Sigma1(e)
+	eor	x10,x10,x21,ror#34
+	add	x20,x20,x17			// h+=Ch(e,f,g)
+	and	x19,x19,x28			// (b^c)&=(a^b)
+	eor	x8,x8,x1,ror#61
+	eor	x9,x9,x4,lsr#7	// sigma0(X[i+1])
+	add	x20,x20,x16			// h+=Sigma1(e)
+	eor	x19,x19,x22			// Maj(a,b,c)
+	eor	x17,x10,x21,ror#39	// Sigma0(a)
+	eor	x8,x8,x1,lsr#6	// sigma1(X[i+14])
+	add	x3,x3,x12
+	add	x24,x24,x20			// d+=h
+	add	x20,x20,x19			// h+=Maj(a,b,c)
+	ldr	x19,[x30],#8		// *K++, x28 in next round
+	add	x3,x3,x9
+	add	x20,x20,x17			// h+=Sigma0(a)
+	add	x3,x3,x8
+Loop_16_xx:
+	ldr	x8,[sp,#8]
+	str	x11,[sp,#0]
+	ror	x16,x24,#14
+	add	x27,x27,x19			// h+=K[i]
+	ror	x10,x5,#1
+	and	x17,x25,x24
+	ror	x9,x2,#19
+	bic	x19,x26,x24
+	ror	x11,x20,#28
+	add	x27,x27,x3			// h+=X[i]
+	eor	x16,x16,x24,ror#18
+	eor	x10,x10,x5,ror#8
+	orr	x17,x17,x19			// Ch(e,f,g)
+	eor	x19,x20,x21			// a^b, b^c in next round
+	eor	x16,x16,x24,ror#41	// Sigma1(e)
+	eor	x11,x11,x20,ror#34
+	add	x27,x27,x17			// h+=Ch(e,f,g)
+	and	x28,x28,x19			// (b^c)&=(a^b)
+	eor	x9,x9,x2,ror#61
+	eor	x10,x10,x5,lsr#7	// sigma0(X[i+1])
+	add	x27,x27,x16			// h+=Sigma1(e)
+	eor	x28,x28,x21			// Maj(a,b,c)
+	eor	x17,x11,x20,ror#39	// Sigma0(a)
+	eor	x9,x9,x2,lsr#6	// sigma1(X[i+14])
+	add	x4,x4,x13
+	add	x23,x23,x27			// d+=h
+	add	x27,x27,x28			// h+=Maj(a,b,c)
+	ldr	x28,[x30],#8		// *K++, x19 in next round
+	add	x4,x4,x10
+	add	x27,x27,x17			// h+=Sigma0(a)
+	add	x4,x4,x9
+	ldr	x9,[sp,#16]
+	str	x12,[sp,#8]
+	ror	x16,x23,#14
+	add	x26,x26,x28			// h+=K[i]
+	ror	x11,x6,#1
+	and	x17,x24,x23
+	ror	x10,x3,#19
+	bic	x28,x25,x23
+	ror	x12,x27,#28
+	add	x26,x26,x4			// h+=X[i]
+	eor	x16,x16,x23,ror#18
+	eor	x11,x11,x6,ror#8
+	orr	x17,x17,x28			// Ch(e,f,g)
+	eor	x28,x27,x20			// a^b, b^c in next round
+	eor	x16,x16,x23,ror#41	// Sigma1(e)
+	eor	x12,x12,x27,ror#34
+	add	x26,x26,x17			// h+=Ch(e,f,g)
+	and	x19,x19,x28			// (b^c)&=(a^b)
+	eor	x10,x10,x3,ror#61
+	eor	x11,x11,x6,lsr#7	// sigma0(X[i+1])
+	add	x26,x26,x16			// h+=Sigma1(e)
+	eor	x19,x19,x20			// Maj(a,b,c)
+	eor	x17,x12,x27,ror#39	// Sigma0(a)
+	eor	x10,x10,x3,lsr#6	// sigma1(X[i+14])
+	add	x5,x5,x14
+	add	x22,x22,x26			// d+=h
+	add	x26,x26,x19			// h+=Maj(a,b,c)
+	ldr	x19,[x30],#8		// *K++, x28 in next round
+	add	x5,x5,x11
+	add	x26,x26,x17			// h+=Sigma0(a)
+	add	x5,x5,x10
+	ldr	x10,[sp,#24]
+	str	x13,[sp,#16]
+	ror	x16,x22,#14
+	add	x25,x25,x19			// h+=K[i]
+	ror	x12,x7,#1
+	and	x17,x23,x22
+	ror	x11,x4,#19
+	bic	x19,x24,x22
+	ror	x13,x26,#28
+	add	x25,x25,x5			// h+=X[i]
+	eor	x16,x16,x22,ror#18
+	eor	x12,x12,x7,ror#8
+	orr	x17,x17,x19			// Ch(e,f,g)
+	eor	x19,x26,x27			// a^b, b^c in next round
+	eor	x16,x16,x22,ror#41	// Sigma1(e)
+	eor	x13,x13,x26,ror#34
+	add	x25,x25,x17			// h+=Ch(e,f,g)
+	and	x28,x28,x19			// (b^c)&=(a^b)
+	eor	x11,x11,x4,ror#61
+	eor	x12,x12,x7,lsr#7	// sigma0(X[i+1])
+	add	x25,x25,x16			// h+=Sigma1(e)
+	eor	x28,x28,x27			// Maj(a,b,c)
+	eor	x17,x13,x26,ror#39	// Sigma0(a)
+	eor	x11,x11,x4,lsr#6	// sigma1(X[i+14])
+	add	x6,x6,x15
+	add	x21,x21,x25			// d+=h
+	add	x25,x25,x28			// h+=Maj(a,b,c)
+	ldr	x28,[x30],#8		// *K++, x19 in next round
+	add	x6,x6,x12
+	add	x25,x25,x17			// h+=Sigma0(a)
+	add	x6,x6,x11
+	ldr	x11,[sp,#0]
+	str	x14,[sp,#24]
+	ror	x16,x21,#14
+	add	x24,x24,x28			// h+=K[i]
+	ror	x13,x8,#1
+	and	x17,x22,x21
+	ror	x12,x5,#19
+	bic	x28,x23,x21
+	ror	x14,x25,#28
+	add	x24,x24,x6			// h+=X[i]
+	eor	x16,x16,x21,ror#18
+	eor	x13,x13,x8,ror#8
+	orr	x17,x17,x28			// Ch(e,f,g)
+	eor	x28,x25,x26			// a^b, b^c in next round
+	eor	x16,x16,x21,ror#41	// Sigma1(e)
+	eor	x14,x14,x25,ror#34
+	add	x24,x24,x17			// h+=Ch(e,f,g)
+	and	x19,x19,x28			// (b^c)&=(a^b)
+	eor	x12,x12,x5,ror#61
+	eor	x13,x13,x8,lsr#7	// sigma0(X[i+1])
+	add	x24,x24,x16			// h+=Sigma1(e)
+	eor	x19,x19,x26			// Maj(a,b,c)
+	eor	x17,x14,x25,ror#39	// Sigma0(a)
+	eor	x12,x12,x5,lsr#6	// sigma1(X[i+14])
+	add	x7,x7,x0
+	add	x20,x20,x24			// d+=h
+	add	x24,x24,x19			// h+=Maj(a,b,c)
+	ldr	x19,[x30],#8		// *K++, x28 in next round
+	add	x7,x7,x13
+	add	x24,x24,x17			// h+=Sigma0(a)
+	add	x7,x7,x12
+	ldr	x12,[sp,#8]
+	str	x15,[sp,#0]
+	ror	x16,x20,#14
+	add	x23,x23,x19			// h+=K[i]
+	ror	x14,x9,#1
+	and	x17,x21,x20
+	ror	x13,x6,#19
+	bic	x19,x22,x20
+	ror	x15,x24,#28
+	add	x23,x23,x7			// h+=X[i]
+	eor	x16,x16,x20,ror#18
+	eor	x14,x14,x9,ror#8
+	orr	x17,x17,x19			// Ch(e,f,g)
+	eor	x19,x24,x25			// a^b, b^c in next round
+	eor	x16,x16,x20,ror#41	// Sigma1(e)
+	eor	x15,x15,x24,ror#34
+	add	x23,x23,x17			// h+=Ch(e,f,g)
+	and	x28,x28,x19			// (b^c)&=(a^b)
+	eor	x13,x13,x6,ror#61
+	eor	x14,x14,x9,lsr#7	// sigma0(X[i+1])
+	add	x23,x23,x16			// h+=Sigma1(e)
+	eor	x28,x28,x25			// Maj(a,b,c)
+	eor	x17,x15,x24,ror#39	// Sigma0(a)
+	eor	x13,x13,x6,lsr#6	// sigma1(X[i+14])
+	add	x8,x8,x1
+	add	x27,x27,x23			// d+=h
+	add	x23,x23,x28			// h+=Maj(a,b,c)
+	ldr	x28,[x30],#8		// *K++, x19 in next round
+	add	x8,x8,x14
+	add	x23,x23,x17			// h+=Sigma0(a)
+	add	x8,x8,x13
+	ldr	x13,[sp,#16]
+	str	x0,[sp,#8]
+	ror	x16,x27,#14
+	add	x22,x22,x28			// h+=K[i]
+	ror	x15,x10,#1
+	and	x17,x20,x27
+	ror	x14,x7,#19
+	bic	x28,x21,x27
+	ror	x0,x23,#28
+	add	x22,x22,x8			// h+=X[i]
+	eor	x16,x16,x27,ror#18
+	eor	x15,x15,x10,ror#8
+	orr	x17,x17,x28			// Ch(e,f,g)
+	eor	x28,x23,x24			// a^b, b^c in next round
+	eor	x16,x16,x27,ror#41	// Sigma1(e)
+	eor	x0,x0,x23,ror#34
+	add	x22,x22,x17			// h+=Ch(e,f,g)
+	and	x19,x19,x28			// (b^c)&=(a^b)
+	eor	x14,x14,x7,ror#61
+	eor	x15,x15,x10,lsr#7	// sigma0(X[i+1])
+	add	x22,x22,x16			// h+=Sigma1(e)
+	eor	x19,x19,x24			// Maj(a,b,c)
+	eor	x17,x0,x23,ror#39	// Sigma0(a)
+	eor	x14,x14,x7,lsr#6	// sigma1(X[i+14])
+	add	x9,x9,x2
+	add	x26,x26,x22			// d+=h
+	add	x22,x22,x19			// h+=Maj(a,b,c)
+	ldr	x19,[x30],#8		// *K++, x28 in next round
+	add	x9,x9,x15
+	add	x22,x22,x17			// h+=Sigma0(a)
+	add	x9,x9,x14
+	ldr	x14,[sp,#24]
+	str	x1,[sp,#16]
+	ror	x16,x26,#14
+	add	x21,x21,x19			// h+=K[i]
+	ror	x0,x11,#1
+	and	x17,x27,x26
+	ror	x15,x8,#19
+	bic	x19,x20,x26
+	ror	x1,x22,#28
+	add	x21,x21,x9			// h+=X[i]
+	eor	x16,x16,x26,ror#18
+	eor	x0,x0,x11,ror#8
+	orr	x17,x17,x19			// Ch(e,f,g)
+	eor	x19,x22,x23			// a^b, b^c in next round
+	eor	x16,x16,x26,ror#41	// Sigma1(e)
+	eor	x1,x1,x22,ror#34
+	add	x21,x21,x17			// h+=Ch(e,f,g)
+	and	x28,x28,x19			// (b^c)&=(a^b)
+	eor	x15,x15,x8,ror#61
+	eor	x0,x0,x11,lsr#7	// sigma0(X[i+1])
+	add	x21,x21,x16			// h+=Sigma1(e)
+	eor	x28,x28,x23			// Maj(a,b,c)
+	eor	x17,x1,x22,ror#39	// Sigma0(a)
+	eor	x15,x15,x8,lsr#6	// sigma1(X[i+14])
+	add	x10,x10,x3
+	add	x25,x25,x21			// d+=h
+	add	x21,x21,x28			// h+=Maj(a,b,c)
+	ldr	x28,[x30],#8		// *K++, x19 in next round
+	add	x10,x10,x0
+	add	x21,x21,x17			// h+=Sigma0(a)
+	add	x10,x10,x15
+	ldr	x15,[sp,#0]
+	str	x2,[sp,#24]
+	ror	x16,x25,#14
+	add	x20,x20,x28			// h+=K[i]
+	ror	x1,x12,#1
+	and	x17,x26,x25
+	ror	x0,x9,#19
+	bic	x28,x27,x25
+	ror	x2,x21,#28
+	add	x20,x20,x10			// h+=X[i]
+	eor	x16,x16,x25,ror#18
+	eor	x1,x1,x12,ror#8
+	orr	x17,x17,x28			// Ch(e,f,g)
+	eor	x28,x21,x22			// a^b, b^c in next round
+	eor	x16,x16,x25,ror#41	// Sigma1(e)
+	eor	x2,x2,x21,ror#34
+	add	x20,x20,x17			// h+=Ch(e,f,g)
+	and	x19,x19,x28			// (b^c)&=(a^b)
+	eor	x0,x0,x9,ror#61
+	eor	x1,x1,x12,lsr#7	// sigma0(X[i+1])
+	add	x20,x20,x16			// h+=Sigma1(e)
+	eor	x19,x19,x22			// Maj(a,b,c)
+	eor	x17,x2,x21,ror#39	// Sigma0(a)
+	eor	x0,x0,x9,lsr#6	// sigma1(X[i+14])
+	add	x11,x11,x4
+	add	x24,x24,x20			// d+=h
+	add	x20,x20,x19			// h+=Maj(a,b,c)
+	ldr	x19,[x30],#8		// *K++, x28 in next round
+	add	x11,x11,x1
+	add	x20,x20,x17			// h+=Sigma0(a)
+	add	x11,x11,x0
+	ldr	x0,[sp,#8]
+	str	x3,[sp,#0]
+	ror	x16,x24,#14
+	add	x27,x27,x19			// h+=K[i]
+	ror	x2,x13,#1
+	and	x17,x25,x24
+	ror	x1,x10,#19
+	bic	x19,x26,x24
+	ror	x3,x20,#28
+	add	x27,x27,x11			// h+=X[i]
+	eor	x16,x16,x24,ror#18
+	eor	x2,x2,x13,ror#8
+	orr	x17,x17,x19			// Ch(e,f,g)
+	eor	x19,x20,x21			// a^b, b^c in next round
+	eor	x16,x16,x24,ror#41	// Sigma1(e)
+	eor	x3,x3,x20,ror#34
+	add	x27,x27,x17			// h+=Ch(e,f,g)
+	and	x28,x28,x19			// (b^c)&=(a^b)
+	eor	x1,x1,x10,ror#61
+	eor	x2,x2,x13,lsr#7	// sigma0(X[i+1])
+	add	x27,x27,x16			// h+=Sigma1(e)
+	eor	x28,x28,x21			// Maj(a,b,c)
+	eor	x17,x3,x20,ror#39	// Sigma0(a)
+	eor	x1,x1,x10,lsr#6	// sigma1(X[i+14])
+	add	x12,x12,x5
+	add	x23,x23,x27			// d+=h
+	add	x27,x27,x28			// h+=Maj(a,b,c)
+	ldr	x28,[x30],#8		// *K++, x19 in next round
+	add	x12,x12,x2
+	add	x27,x27,x17			// h+=Sigma0(a)
+	add	x12,x12,x1
+	ldr	x1,[sp,#16]
+	str	x4,[sp,#8]
+	ror	x16,x23,#14
+	add	x26,x26,x28			// h+=K[i]
+	ror	x3,x14,#1
+	and	x17,x24,x23
+	ror	x2,x11,#19
+	bic	x28,x25,x23
+	ror	x4,x27,#28
+	add	x26,x26,x12			// h+=X[i]
+	eor	x16,x16,x23,ror#18
+	eor	x3,x3,x14,ror#8
+	orr	x17,x17,x28			// Ch(e,f,g)
+	eor	x28,x27,x20			// a^b, b^c in next round
+	eor	x16,x16,x23,ror#41	// Sigma1(e)
+	eor	x4,x4,x27,ror#34
+	add	x26,x26,x17			// h+=Ch(e,f,g)
+	and	x19,x19,x28			// (b^c)&=(a^b)
+	eor	x2,x2,x11,ror#61
+	eor	x3,x3,x14,lsr#7	// sigma0(X[i+1])
+	add	x26,x26,x16			// h+=Sigma1(e)
+	eor	x19,x19,x20			// Maj(a,b,c)
+	eor	x17,x4,x27,ror#39	// Sigma0(a)
+	eor	x2,x2,x11,lsr#6	// sigma1(X[i+14])
+	add	x13,x13,x6
+	add	x22,x22,x26			// d+=h
+	add	x26,x26,x19			// h+=Maj(a,b,c)
+	ldr	x19,[x30],#8		// *K++, x28 in next round
+	add	x13,x13,x3
+	add	x26,x26,x17			// h+=Sigma0(a)
+	add	x13,x13,x2
+	ldr	x2,[sp,#24]
+	str	x5,[sp,#16]
+	ror	x16,x22,#14
+	add	x25,x25,x19			// h+=K[i]
+	ror	x4,x15,#1
+	and	x17,x23,x22
+	ror	x3,x12,#19
+	bic	x19,x24,x22
+	ror	x5,x26,#28
+	add	x25,x25,x13			// h+=X[i]
+	eor	x16,x16,x22,ror#18
+	eor	x4,x4,x15,ror#8
+	orr	x17,x17,x19			// Ch(e,f,g)
+	eor	x19,x26,x27			// a^b, b^c in next round
+	eor	x16,x16,x22,ror#41	// Sigma1(e)
+	eor	x5,x5,x26,ror#34
+	add	x25,x25,x17			// h+=Ch(e,f,g)
+	and	x28,x28,x19			// (b^c)&=(a^b)
+	eor	x3,x3,x12,ror#61
+	eor	x4,x4,x15,lsr#7	// sigma0(X[i+1])
+	add	x25,x25,x16			// h+=Sigma1(e)
+	eor	x28,x28,x27			// Maj(a,b,c)
+	eor	x17,x5,x26,ror#39	// Sigma0(a)
+	eor	x3,x3,x12,lsr#6	// sigma1(X[i+14])
+	add	x14,x14,x7
+	add	x21,x21,x25			// d+=h
+	add	x25,x25,x28			// h+=Maj(a,b,c)
+	ldr	x28,[x30],#8		// *K++, x19 in next round
+	add	x14,x14,x4
+	add	x25,x25,x17			// h+=Sigma0(a)
+	add	x14,x14,x3
+	ldr	x3,[sp,#0]
+	str	x6,[sp,#24]
+	ror	x16,x21,#14
+	add	x24,x24,x28			// h+=K[i]
+	ror	x5,x0,#1
+	and	x17,x22,x21
+	ror	x4,x13,#19
+	bic	x28,x23,x21
+	ror	x6,x25,#28
+	add	x24,x24,x14			// h+=X[i]
+	eor	x16,x16,x21,ror#18
+	eor	x5,x5,x0,ror#8
+	orr	x17,x17,x28			// Ch(e,f,g)
+	eor	x28,x25,x26			// a^b, b^c in next round
+	eor	x16,x16,x21,ror#41	// Sigma1(e)
+	eor	x6,x6,x25,ror#34
+	add	x24,x24,x17			// h+=Ch(e,f,g)
+	and	x19,x19,x28			// (b^c)&=(a^b)
+	eor	x4,x4,x13,ror#61
+	eor	x5,x5,x0,lsr#7	// sigma0(X[i+1])
+	add	x24,x24,x16			// h+=Sigma1(e)
+	eor	x19,x19,x26			// Maj(a,b,c)
+	eor	x17,x6,x25,ror#39	// Sigma0(a)
+	eor	x4,x4,x13,lsr#6	// sigma1(X[i+14])
+	add	x15,x15,x8
+	add	x20,x20,x24			// d+=h
+	add	x24,x24,x19			// h+=Maj(a,b,c)
+	ldr	x19,[x30],#8		// *K++, x28 in next round
+	add	x15,x15,x5
+	add	x24,x24,x17			// h+=Sigma0(a)
+	add	x15,x15,x4
+	ldr	x4,[sp,#8]
+	str	x7,[sp,#0]
+	ror	x16,x20,#14
+	add	x23,x23,x19			// h+=K[i]
+	ror	x6,x1,#1
+	and	x17,x21,x20
+	ror	x5,x14,#19
+	bic	x19,x22,x20
+	ror	x7,x24,#28
+	add	x23,x23,x15			// h+=X[i]
+	eor	x16,x16,x20,ror#18
+	eor	x6,x6,x1,ror#8
+	orr	x17,x17,x19			// Ch(e,f,g)
+	eor	x19,x24,x25			// a^b, b^c in next round
+	eor	x16,x16,x20,ror#41	// Sigma1(e)
+	eor	x7,x7,x24,ror#34
+	add	x23,x23,x17			// h+=Ch(e,f,g)
+	and	x28,x28,x19			// (b^c)&=(a^b)
+	eor	x5,x5,x14,ror#61
+	eor	x6,x6,x1,lsr#7	// sigma0(X[i+1])
+	add	x23,x23,x16			// h+=Sigma1(e)
+	eor	x28,x28,x25			// Maj(a,b,c)
+	eor	x17,x7,x24,ror#39	// Sigma0(a)
+	eor	x5,x5,x14,lsr#6	// sigma1(X[i+14])
+	add	x0,x0,x9
+	add	x27,x27,x23			// d+=h
+	add	x23,x23,x28			// h+=Maj(a,b,c)
+	ldr	x28,[x30],#8		// *K++, x19 in next round
+	add	x0,x0,x6
+	add	x23,x23,x17			// h+=Sigma0(a)
+	add	x0,x0,x5
+	ldr	x5,[sp,#16]
+	str	x8,[sp,#8]
+	ror	x16,x27,#14
+	add	x22,x22,x28			// h+=K[i]
+	ror	x7,x2,#1
+	and	x17,x20,x27
+	ror	x6,x15,#19
+	bic	x28,x21,x27
+	ror	x8,x23,#28
+	add	x22,x22,x0			// h+=X[i]
+	eor	x16,x16,x27,ror#18
+	eor	x7,x7,x2,ror#8
+	orr	x17,x17,x28			// Ch(e,f,g)
+	eor	x28,x23,x24			// a^b, b^c in next round
+	eor	x16,x16,x27,ror#41	// Sigma1(e)
+	eor	x8,x8,x23,ror#34
+	add	x22,x22,x17			// h+=Ch(e,f,g)
+	and	x19,x19,x28			// (b^c)&=(a^b)
+	eor	x6,x6,x15,ror#61
+	eor	x7,x7,x2,lsr#7	// sigma0(X[i+1])
+	add	x22,x22,x16			// h+=Sigma1(e)
+	eor	x19,x19,x24			// Maj(a,b,c)
+	eor	x17,x8,x23,ror#39	// Sigma0(a)
+	eor	x6,x6,x15,lsr#6	// sigma1(X[i+14])
+	add	x1,x1,x10
+	add	x26,x26,x22			// d+=h
+	add	x22,x22,x19			// h+=Maj(a,b,c)
+	ldr	x19,[x30],#8		// *K++, x28 in next round
+	add	x1,x1,x7
+	add	x22,x22,x17			// h+=Sigma0(a)
+	add	x1,x1,x6
+	ldr	x6,[sp,#24]
+	str	x9,[sp,#16]
+	ror	x16,x26,#14
+	add	x21,x21,x19			// h+=K[i]
+	ror	x8,x3,#1
+	and	x17,x27,x26
+	ror	x7,x0,#19
+	bic	x19,x20,x26
+	ror	x9,x22,#28
+	add	x21,x21,x1			// h+=X[i]
+	eor	x16,x16,x26,ror#18
+	eor	x8,x8,x3,ror#8
+	orr	x17,x17,x19			// Ch(e,f,g)
+	eor	x19,x22,x23			// a^b, b^c in next round
+	eor	x16,x16,x26,ror#41	// Sigma1(e)
+	eor	x9,x9,x22,ror#34
+	add	x21,x21,x17			// h+=Ch(e,f,g)
+	and	x28,x28,x19			// (b^c)&=(a^b)
+	eor	x7,x7,x0,ror#61
+	eor	x8,x8,x3,lsr#7	// sigma0(X[i+1])
+	add	x21,x21,x16			// h+=Sigma1(e)
+	eor	x28,x28,x23			// Maj(a,b,c)
+	eor	x17,x9,x22,ror#39	// Sigma0(a)
+	eor	x7,x7,x0,lsr#6	// sigma1(X[i+14])
+	add	x2,x2,x11
+	add	x25,x25,x21			// d+=h
+	add	x21,x21,x28			// h+=Maj(a,b,c)
+	ldr	x28,[x30],#8		// *K++, x19 in next round
+	add	x2,x2,x8
+	add	x21,x21,x17			// h+=Sigma0(a)
+	add	x2,x2,x7
+	ldr	x7,[sp,#0]
+	str	x10,[sp,#24]
+	ror	x16,x25,#14
+	add	x20,x20,x28			// h+=K[i]
+	ror	x9,x4,#1
+	and	x17,x26,x25
+	ror	x8,x1,#19
+	bic	x28,x27,x25
+	ror	x10,x21,#28
+	add	x20,x20,x2			// h+=X[i]
+	eor	x16,x16,x25,ror#18
+	eor	x9,x9,x4,ror#8
+	orr	x17,x17,x28			// Ch(e,f,g)
+	eor	x28,x21,x22			// a^b, b^c in next round
+	eor	x16,x16,x25,ror#41	// Sigma1(e)
+	eor	x10,x10,x21,ror#34
+	add	x20,x20,x17			// h+=Ch(e,f,g)
+	and	x19,x19,x28			// (b^c)&=(a^b)
+	eor	x8,x8,x1,ror#61
+	eor	x9,x9,x4,lsr#7	// sigma0(X[i+1])
+	add	x20,x20,x16			// h+=Sigma1(e)
+	eor	x19,x19,x22			// Maj(a,b,c)
+	eor	x17,x10,x21,ror#39	// Sigma0(a)
+	eor	x8,x8,x1,lsr#6	// sigma1(X[i+14])
+	add	x3,x3,x12
+	add	x24,x24,x20			// d+=h
+	add	x20,x20,x19			// h+=Maj(a,b,c)
+	ldr	x19,[x30],#8		// *K++, x28 in next round
+	add	x3,x3,x9
+	add	x20,x20,x17			// h+=Sigma0(a)
+	add	x3,x3,x8
+	cbnz	x19,Loop_16_xx
+
+	ldp	x0,x2,[x29,#96]
+	ldr	x1,[x29,#112]
+	sub	x30,x30,#648		// rewind
+
+	ldp	x3,x4,[x0]
+	ldp	x5,x6,[x0,#2*8]
+	add	x1,x1,#14*8			// advance input pointer
+	ldp	x7,x8,[x0,#4*8]
+	add	x20,x20,x3
+	ldp	x9,x10,[x0,#6*8]
+	add	x21,x21,x4
+	add	x22,x22,x5
+	add	x23,x23,x6
+	stp	x20,x21,[x0]
+	add	x24,x24,x7
+	add	x25,x25,x8
+	stp	x22,x23,[x0,#2*8]
+	add	x26,x26,x9
+	add	x27,x27,x10
+	cmp	x1,x2
+	stp	x24,x25,[x0,#4*8]
+	stp	x26,x27,[x0,#6*8]
+	b.ne	Loop
+
+	ldp	x19,x20,[x29,#16]
+	add	sp,sp,#4*8
+	ldp	x21,x22,[x29,#32]
+	ldp	x23,x24,[x29,#48]
+	ldp	x25,x26,[x29,#64]
+	ldp	x27,x28,[x29,#80]
+	ldp	x29,x30,[sp],#128
+	AARCH64_VALIDATE_LINK_REGISTER
+	ret
+
+
+.section	.rodata
+.align	6
+
+LK512:
+.quad	0x428a2f98d728ae22,0x7137449123ef65cd
+.quad	0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+.quad	0x3956c25bf348b538,0x59f111f1b605d019
+.quad	0x923f82a4af194f9b,0xab1c5ed5da6d8118
+.quad	0xd807aa98a3030242,0x12835b0145706fbe
+.quad	0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+.quad	0x72be5d74f27b896f,0x80deb1fe3b1696b1
+.quad	0x9bdc06a725c71235,0xc19bf174cf692694
+.quad	0xe49b69c19ef14ad2,0xefbe4786384f25e3
+.quad	0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
+.quad	0x2de92c6f592b0275,0x4a7484aa6ea6e483
+.quad	0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+.quad	0x983e5152ee66dfab,0xa831c66d2db43210
+.quad	0xb00327c898fb213f,0xbf597fc7beef0ee4
+.quad	0xc6e00bf33da88fc2,0xd5a79147930aa725
+.quad	0x06ca6351e003826f,0x142929670a0e6e70
+.quad	0x27b70a8546d22ffc,0x2e1b21385c26c926
+.quad	0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+.quad	0x650a73548baf63de,0x766a0abb3c77b2a8
+.quad	0x81c2c92e47edaee6,0x92722c851482353b
+.quad	0xa2bfe8a14cf10364,0xa81a664bbc423001
+.quad	0xc24b8b70d0f89791,0xc76c51a30654be30
+.quad	0xd192e819d6ef5218,0xd69906245565a910
+.quad	0xf40e35855771202a,0x106aa07032bbd1b8
+.quad	0x19a4c116b8d2d0c8,0x1e376c085141ab53
+.quad	0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+.quad	0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+.quad	0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+.quad	0x748f82ee5defb2fc,0x78a5636f43172f60
+.quad	0x84c87814a1f0ab72,0x8cc702081a6439ec
+.quad	0x90befffa23631e28,0xa4506cebde82bde9
+.quad	0xbef9a3f7b2c67915,0xc67178f2e372532b
+.quad	0xca273eceea26619c,0xd186b8c721c0c207
+.quad	0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+.quad	0x06f067aa72176fba,0x0a637dc5a2c898a6
+.quad	0x113f9804bef90dae,0x1b710b35131c471b
+.quad	0x28db77f523047d84,0x32caab7b40c72493
+.quad	0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+.quad	0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+.quad	0x5fcb6fab3ad6faec,0x6c44198c4a475817
+.quad	0	// terminator
+
+.byte	83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+.align	2
+.align	2
+#endif
+#endif  // !OPENSSL_NO_ASM
diff --git a/deps/boringssl/win-aarch64/crypto/fipsmodule/vpaes-armv8.S b/deps/boringssl/win-aarch64/crypto/fipsmodule/vpaes-armv8.S
new file mode 100644
index 0000000..522daa6
--- /dev/null
+++ b/deps/boringssl/win-aarch64/crypto/fipsmodule/vpaes-armv8.S
@@ -0,0 +1,1272 @@
+// This file is generated from a similarly-named Perl script in the BoringSSL
+// source tree. Do not edit by hand.
+
+#if !defined(__has_feature)
+#define __has_feature(x) 0
+#endif
+#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
+#define OPENSSL_NO_ASM
+#endif
+
+#if !defined(OPENSSL_NO_ASM)
+#if defined(__aarch64__)
+#if defined(BORINGSSL_PREFIX)
+#include <boringssl_prefix_symbols_asm.h>
+#endif
+#include <openssl/arm_arch.h>
+
+.section	.rodata
+
+
+.align	7	// totally strategic alignment
+_vpaes_consts:
+Lk_mc_forward:	//	mc_forward
+.quad	0x0407060500030201, 0x0C0F0E0D080B0A09
+.quad	0x080B0A0904070605, 0x000302010C0F0E0D
+.quad	0x0C0F0E0D080B0A09, 0x0407060500030201
+.quad	0x000302010C0F0E0D, 0x080B0A0904070605
+Lk_mc_backward:	//	mc_backward
+.quad	0x0605040702010003, 0x0E0D0C0F0A09080B
+.quad	0x020100030E0D0C0F, 0x0A09080B06050407
+.quad	0x0E0D0C0F0A09080B, 0x0605040702010003
+.quad	0x0A09080B06050407, 0x020100030E0D0C0F
+Lk_sr:	//	sr
+.quad	0x0706050403020100, 0x0F0E0D0C0B0A0908
+.quad	0x030E09040F0A0500, 0x0B06010C07020D08
+.quad	0x0F060D040B020900, 0x070E050C030A0108
+.quad	0x0B0E0104070A0D00, 0x0306090C0F020508
+
+//
+// "Hot" constants
+//
+Lk_inv:	//	inv, inva
+.quad	0x0E05060F0D080180, 0x040703090A0B0C02
+.quad	0x01040A060F0B0780, 0x030D0E0C02050809
+Lk_ipt:	//	input transform (lo, hi)
+.quad	0xC2B2E8985A2A7000, 0xCABAE09052227808
+.quad	0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81
+Lk_sbo:	//	sbou, sbot
+.quad	0xD0D26D176FBDC700, 0x15AABF7AC502A878
+.quad	0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA
+Lk_sb1:	//	sb1u, sb1t
+.quad	0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF
+.quad	0xB19BE18FCB503E00, 0xA5DF7A6E142AF544
+Lk_sb2:	//	sb2u, sb2t
+.quad	0x69EB88400AE12900, 0xC2A163C8AB82234A
+.quad	0xE27A93C60B712400, 0x5EB7E955BC982FCD
+
+//
+//  Decryption stuff
+//
+Lk_dipt:	//	decryption input transform
+.quad	0x0F505B040B545F00, 0x154A411E114E451A
+.quad	0x86E383E660056500, 0x12771772F491F194
+Lk_dsbo:	//	decryption sbox final output
+.quad	0x1387EA537EF94000, 0xC7AA6DB9D4943E2D
+.quad	0x12D7560F93441D00, 0xCA4B8159D8C58E9C
+Lk_dsb9:	//	decryption sbox output *9*u, *9*t
+.quad	0x851C03539A86D600, 0xCAD51F504F994CC9
+.quad	0xC03B1789ECD74900, 0x725E2C9EB2FBA565
+Lk_dsbd:	//	decryption sbox output *D*u, *D*t
+.quad	0x7D57CCDFE6B1A200, 0xF56E9B13882A4439
+.quad	0x3CE2FAF724C6CB00, 0x2931180D15DEEFD3
+Lk_dsbb:	//	decryption sbox output *B*u, *B*t
+.quad	0xD022649296B44200, 0x602646F6B0F2D404
+.quad	0xC19498A6CD596700, 0xF3FF0C3E3255AA6B
+Lk_dsbe:	//	decryption sbox output *E*u, *E*t
+.quad	0x46F2929626D4D000, 0x2242600464B4F6B0
+.quad	0x0C55A6CDFFAAC100, 0x9467F36B98593E32
+
+//
+//  Key schedule constants
+//
+Lk_dksd:	//	decryption key schedule: invskew x*D
+.quad	0xFEB91A5DA3E44700, 0x0740E3A45A1DBEF9
+.quad	0x41C277F4B5368300, 0x5FDC69EAAB289D1E
+Lk_dksb:	//	decryption key schedule: invskew x*B
+.quad	0x9A4FCA1F8550D500, 0x03D653861CC94C99
+.quad	0x115BEDA7B6FC4A00, 0xD993256F7E3482C8
+Lk_dkse:	//	decryption key schedule: invskew x*E + 0x63
+.quad	0xD5031CCA1FC9D600, 0x53859A4C994F5086
+.quad	0xA23196054FDC7BE8, 0xCD5EF96A20B31487
+Lk_dks9:	//	decryption key schedule: invskew x*9
+.quad	0xB6116FC87ED9A700, 0x4AED933482255BFC
+.quad	0x4576516227143300, 0x8BB89FACE9DAFDCE
+
+Lk_rcon:	//	rcon
+.quad	0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81
+
+Lk_opt:	//	output transform
+.quad	0xFF9F4929D6B66000, 0xF7974121DEBE6808
+.quad	0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0
+Lk_deskew:	//	deskew tables: inverts the sbox's "skew"
+.quad	0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A
+.quad	0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77
+
+.byte	86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,65,82,77,118,56,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0
+.align	2
+
+.align	6
+
+.text
+##
+##  _aes_preheat
+##
+##  Fills register %r10 -> .aes_consts (so you can -fPIC)
+##  and %xmm9-%xmm15 as specified below.
+##
+.def _vpaes_encrypt_preheat
+   .type 32
+.endef
+.align	4
+_vpaes_encrypt_preheat:
+	adrp	x10, Lk_inv
+	add	x10, x10, :lo12:Lk_inv
+	movi	v17.16b, #0x0f
+	ld1	{v18.2d,v19.2d}, [x10],#32	// Lk_inv
+	ld1	{v20.2d,v21.2d,v22.2d,v23.2d}, [x10],#64	// Lk_ipt, Lk_sbo
+	ld1	{v24.2d,v25.2d,v26.2d,v27.2d}, [x10]		// Lk_sb1, Lk_sb2
+	ret
+
+
+##
+##  _aes_encrypt_core
+##
+##  AES-encrypt %xmm0.
+##
+##  Inputs:
+##     %xmm0 = input
+##     %xmm9-%xmm15 as in _vpaes_preheat
+##    (%rdx) = scheduled keys
+##
+##  Output in %xmm0
+##  Clobbers  %xmm1-%xmm5, %r9, %r10, %r11, %rax
+##  Preserves %xmm6 - %xmm8 so you get some local vectors
+##
+##
+.def _vpaes_encrypt_core
+   .type 32
+.endef
+.align	4
+_vpaes_encrypt_core:
+	mov	x9, x2
+	ldr	w8, [x2,#240]			// pull rounds
+	adrp	x11, Lk_mc_forward+16
+	add	x11, x11, :lo12:Lk_mc_forward+16
+						// vmovdqa	.Lk_ipt(%rip),	%xmm2	# iptlo
+	ld1	{v16.2d}, [x9], #16		// vmovdqu	(%r9),	%xmm5		# round0 key
+	and	v1.16b, v7.16b, v17.16b		// vpand	%xmm9,	%xmm0,	%xmm1
+	ushr	v0.16b, v7.16b, #4		// vpsrlb	$4,	%xmm0,	%xmm0
+	tbl	v1.16b, {v20.16b}, v1.16b	// vpshufb	%xmm1,	%xmm2,	%xmm1
+						// vmovdqa	.Lk_ipt+16(%rip), %xmm3	# ipthi
+	tbl	v2.16b, {v21.16b}, v0.16b	// vpshufb	%xmm0,	%xmm3,	%xmm2
+	eor	v0.16b, v1.16b, v16.16b		// vpxor	%xmm5,	%xmm1,	%xmm0
+	eor	v0.16b, v0.16b, v2.16b		// vpxor	%xmm2,	%xmm0,	%xmm0
+	b	Lenc_entry
+
+.align	4
+Lenc_loop:
+	// middle of middle round
+	add	x10, x11, #0x40
+	tbl	v4.16b, {v25.16b}, v2.16b		// vpshufb	%xmm2,	%xmm13,	%xmm4	# 4 = sb1u
+	ld1	{v1.2d}, [x11], #16		// vmovdqa	-0x40(%r11,%r10), %xmm1	# Lk_mc_forward[]
+	tbl	v0.16b, {v24.16b}, v3.16b		// vpshufb	%xmm3,	%xmm12,	%xmm0	# 0 = sb1t
+	eor	v4.16b, v4.16b, v16.16b		// vpxor	%xmm5,	%xmm4,	%xmm4	# 4 = sb1u + k
+	tbl	v5.16b,	{v27.16b}, v2.16b		// vpshufb	%xmm2,	%xmm15,	%xmm5	# 4 = sb2u
+	eor	v0.16b, v0.16b, v4.16b		// vpxor	%xmm4,	%xmm0,	%xmm0	# 0 = A
+	tbl	v2.16b, {v26.16b}, v3.16b		// vpshufb	%xmm3,	%xmm14,	%xmm2	# 2 = sb2t
+	ld1	{v4.2d}, [x10]			// vmovdqa	(%r11,%r10), %xmm4	# Lk_mc_backward[]
+	tbl	v3.16b, {v0.16b}, v1.16b	// vpshufb	%xmm1,	%xmm0,	%xmm3	# 0 = B
+	eor	v2.16b, v2.16b, v5.16b		// vpxor	%xmm5,	%xmm2,	%xmm2	# 2 = 2A
+	tbl	v0.16b, {v0.16b}, v4.16b	// vpshufb	%xmm4,	%xmm0,	%xmm0	# 3 = D
+	eor	v3.16b, v3.16b, v2.16b		// vpxor	%xmm2,	%xmm3,	%xmm3	# 0 = 2A+B
+	tbl	v4.16b, {v3.16b}, v1.16b	// vpshufb	%xmm1,	%xmm3,	%xmm4	# 0 = 2B+C
+	eor	v0.16b, v0.16b, v3.16b		// vpxor	%xmm3,	%xmm0,	%xmm0	# 3 = 2A+B+D
+	and	x11, x11, #~(1<<6)		// and		$0x30,	%r11		# ... mod 4
+	eor	v0.16b, v0.16b, v4.16b		// vpxor	%xmm4,	%xmm0, %xmm0	# 0 = 2A+3B+C+D
+	sub	w8, w8, #1			// nr--
+
+Lenc_entry:
+	// top of round
+	and	v1.16b, v0.16b, v17.16b		// vpand	%xmm0,	%xmm9,	%xmm1   # 0 = k
+	ushr	v0.16b, v0.16b, #4		// vpsrlb	$4,	%xmm0,	%xmm0	# 1 = i
+	tbl	v5.16b, {v19.16b}, v1.16b	// vpshufb	%xmm1,	%xmm11,	%xmm5	# 2 = a/k
+	eor	v1.16b, v1.16b, v0.16b		// vpxor	%xmm0,	%xmm1,	%xmm1	# 0 = j
+	tbl	v3.16b, {v18.16b}, v0.16b	// vpshufb	%xmm0, 	%xmm10,	%xmm3  	# 3 = 1/i
+	tbl	v4.16b, {v18.16b}, v1.16b	// vpshufb	%xmm1, 	%xmm10,	%xmm4  	# 4 = 1/j
+	eor	v3.16b, v3.16b, v5.16b		// vpxor	%xmm5,	%xmm3,	%xmm3	# 3 = iak = 1/i + a/k
+	eor	v4.16b, v4.16b, v5.16b		// vpxor	%xmm5,	%xmm4,	%xmm4  	# 4 = jak = 1/j + a/k
+	tbl	v2.16b, {v18.16b}, v3.16b	// vpshufb	%xmm3,	%xmm10,	%xmm2  	# 2 = 1/iak
+	tbl	v3.16b, {v18.16b}, v4.16b	// vpshufb	%xmm4,	%xmm10,	%xmm3	# 3 = 1/jak
+	eor	v2.16b, v2.16b, v1.16b		// vpxor	%xmm1,	%xmm2,	%xmm2  	# 2 = io
+	eor	v3.16b, v3.16b, v0.16b		// vpxor	%xmm0,	%xmm3,	%xmm3	# 3 = jo
+	ld1	{v16.2d}, [x9],#16		// vmovdqu	(%r9),	%xmm5
+	cbnz	w8, Lenc_loop
+
+	// middle of last round
+	add	x10, x11, #0x80
+						// vmovdqa	-0x60(%r10), %xmm4	# 3 : sbou	.Lk_sbo
+						// vmovdqa	-0x50(%r10), %xmm0	# 0 : sbot	.Lk_sbo+16
+	tbl	v4.16b, {v22.16b}, v2.16b		// vpshufb	%xmm2,	%xmm4,	%xmm4	# 4 = sbou
+	ld1	{v1.2d}, [x10]			// vmovdqa	0x40(%r11,%r10), %xmm1	# Lk_sr[]
+	tbl	v0.16b, {v23.16b}, v3.16b		// vpshufb	%xmm3,	%xmm0,	%xmm0	# 0 = sb1t
+	eor	v4.16b, v4.16b, v16.16b		// vpxor	%xmm5,	%xmm4,	%xmm4	# 4 = sb1u + k
+	eor	v0.16b, v0.16b, v4.16b		// vpxor	%xmm4,	%xmm0,	%xmm0	# 0 = A
+	tbl	v0.16b, {v0.16b}, v1.16b	// vpshufb	%xmm1,	%xmm0,	%xmm0
+	ret
+
+
+.globl	vpaes_encrypt
+
+.def vpaes_encrypt
+   .type 32
+.endef
+.align	4
+vpaes_encrypt:
+	AARCH64_SIGN_LINK_REGISTER
+	stp	x29,x30,[sp,#-16]!
+	add	x29,sp,#0
+
+	ld1	{v7.16b}, [x0]
+	bl	_vpaes_encrypt_preheat
+	bl	_vpaes_encrypt_core
+	st1	{v0.16b}, [x1]
+
+	ldp	x29,x30,[sp],#16
+	AARCH64_VALIDATE_LINK_REGISTER
+	ret
+
+
+.def _vpaes_encrypt_2x
+   .type 32
+.endef
+.align	4
+_vpaes_encrypt_2x:
+	mov	x9, x2
+	ldr	w8, [x2,#240]			// pull rounds
+	adrp	x11, Lk_mc_forward+16
+	add	x11, x11, :lo12:Lk_mc_forward+16
+						// vmovdqa	.Lk_ipt(%rip),	%xmm2	# iptlo
+	ld1	{v16.2d}, [x9], #16		// vmovdqu	(%r9),	%xmm5		# round0 key
+	and	v1.16b,  v14.16b,  v17.16b	// vpand	%xmm9,	%xmm0,	%xmm1
+	ushr	v0.16b,  v14.16b,  #4		// vpsrlb	$4,	%xmm0,	%xmm0
+	and	v9.16b,  v15.16b,  v17.16b
+	ushr	v8.16b,  v15.16b,  #4
+	tbl	v1.16b,  {v20.16b}, v1.16b	// vpshufb	%xmm1,	%xmm2,	%xmm1
+	tbl	v9.16b,  {v20.16b}, v9.16b
+						// vmovdqa	.Lk_ipt+16(%rip), %xmm3	# ipthi
+	tbl	v2.16b,  {v21.16b}, v0.16b	// vpshufb	%xmm0,	%xmm3,	%xmm2
+	tbl	v10.16b, {v21.16b}, v8.16b
+	eor	v0.16b,  v1.16b,   v16.16b	// vpxor	%xmm5,	%xmm1,	%xmm0
+	eor	v8.16b,  v9.16b,   v16.16b
+	eor	v0.16b,  v0.16b,   v2.16b	// vpxor	%xmm2,	%xmm0,	%xmm0
+	eor	v8.16b,  v8.16b,   v10.16b
+	b	Lenc_2x_entry
+
+.align	4
+Lenc_2x_loop:
+	// middle of middle round
+	add	x10, x11, #0x40
+	tbl	v4.16b,  {v25.16b}, v2.16b	// vpshufb	%xmm2,	%xmm13,	%xmm4	# 4 = sb1u
+	tbl	v12.16b, {v25.16b}, v10.16b
+	ld1	{v1.2d}, [x11], #16		// vmovdqa	-0x40(%r11,%r10), %xmm1	# Lk_mc_forward[]
+	tbl	v0.16b,  {v24.16b}, v3.16b	// vpshufb	%xmm3,	%xmm12,	%xmm0	# 0 = sb1t
+	tbl	v8.16b,  {v24.16b}, v11.16b
+	eor	v4.16b,  v4.16b,  v16.16b	// vpxor	%xmm5,	%xmm4,	%xmm4	# 4 = sb1u + k
+	eor	v12.16b, v12.16b, v16.16b
+	tbl	v5.16b,	 {v27.16b}, v2.16b	// vpshufb	%xmm2,	%xmm15,	%xmm5	# 4 = sb2u
+	tbl	v13.16b, {v27.16b}, v10.16b
+	eor	v0.16b,  v0.16b,  v4.16b	// vpxor	%xmm4,	%xmm0,	%xmm0	# 0 = A
+	eor	v8.16b,  v8.16b,  v12.16b
+	tbl	v2.16b,  {v26.16b}, v3.16b	// vpshufb	%xmm3,	%xmm14,	%xmm2	# 2 = sb2t
+	tbl	v10.16b, {v26.16b}, v11.16b
+	ld1	{v4.2d}, [x10]			// vmovdqa	(%r11,%r10), %xmm4	# Lk_mc_backward[]
+	tbl	v3.16b,  {v0.16b}, v1.16b	// vpshufb	%xmm1,	%xmm0,	%xmm3	# 0 = B
+	tbl	v11.16b, {v8.16b}, v1.16b
+	eor	v2.16b,  v2.16b,  v5.16b	// vpxor	%xmm5,	%xmm2,	%xmm2	# 2 = 2A
+	eor	v10.16b, v10.16b, v13.16b
+	tbl	v0.16b,  {v0.16b}, v4.16b	// vpshufb	%xmm4,	%xmm0,	%xmm0	# 3 = D
+	tbl	v8.16b,  {v8.16b}, v4.16b
+	eor	v3.16b,  v3.16b,  v2.16b	// vpxor	%xmm2,	%xmm3,	%xmm3	# 0 = 2A+B
+	eor	v11.16b, v11.16b, v10.16b
+	tbl	v4.16b,  {v3.16b}, v1.16b	// vpshufb	%xmm1,	%xmm3,	%xmm4	# 0 = 2B+C
+	tbl	v12.16b, {v11.16b},v1.16b
+	eor	v0.16b,  v0.16b,  v3.16b	// vpxor	%xmm3,	%xmm0,	%xmm0	# 3 = 2A+B+D
+	eor	v8.16b,  v8.16b,  v11.16b
+	and	x11, x11, #~(1<<6)		// and		$0x30,	%r11		# ... mod 4
+	eor	v0.16b,  v0.16b,  v4.16b	// vpxor	%xmm4,	%xmm0, %xmm0	# 0 = 2A+3B+C+D
+	eor	v8.16b,  v8.16b,  v12.16b
+	sub	w8, w8, #1			// nr--
+
+Lenc_2x_entry:
+	// top of round
+	and	v1.16b,  v0.16b, v17.16b	// vpand	%xmm0,	%xmm9,	%xmm1   # 0 = k
+	ushr	v0.16b,  v0.16b, #4		// vpsrlb	$4,	%xmm0,	%xmm0	# 1 = i
+	and	v9.16b,  v8.16b, v17.16b
+	ushr	v8.16b,  v8.16b, #4
+	tbl	v5.16b,  {v19.16b},v1.16b	// vpshufb	%xmm1,	%xmm11,	%xmm5	# 2 = a/k
+	tbl	v13.16b, {v19.16b},v9.16b
+	eor	v1.16b,  v1.16b,  v0.16b	// vpxor	%xmm0,	%xmm1,	%xmm1	# 0 = j
+	eor	v9.16b,  v9.16b,  v8.16b
+	tbl	v3.16b,  {v18.16b},v0.16b	// vpshufb	%xmm0, 	%xmm10,	%xmm3  	# 3 = 1/i
+	tbl	v11.16b, {v18.16b},v8.16b
+	tbl	v4.16b,  {v18.16b},v1.16b	// vpshufb	%xmm1, 	%xmm10,	%xmm4  	# 4 = 1/j
+	tbl	v12.16b, {v18.16b},v9.16b
+	eor	v3.16b,  v3.16b,  v5.16b	// vpxor	%xmm5,	%xmm3,	%xmm3	# 3 = iak = 1/i + a/k
+	eor	v11.16b, v11.16b, v13.16b
+	eor	v4.16b,  v4.16b,  v5.16b	// vpxor	%xmm5,	%xmm4,	%xmm4  	# 4 = jak = 1/j + a/k
+	eor	v12.16b, v12.16b, v13.16b
+	tbl	v2.16b,  {v18.16b},v3.16b	// vpshufb	%xmm3,	%xmm10,	%xmm2  	# 2 = 1/iak
+	tbl	v10.16b, {v18.16b},v11.16b
+	tbl	v3.16b,  {v18.16b},v4.16b	// vpshufb	%xmm4,	%xmm10,	%xmm3	# 3 = 1/jak
+	tbl	v11.16b, {v18.16b},v12.16b
+	eor	v2.16b,  v2.16b,  v1.16b	// vpxor	%xmm1,	%xmm2,	%xmm2  	# 2 = io
+	eor	v10.16b, v10.16b, v9.16b
+	eor	v3.16b,  v3.16b,  v0.16b	// vpxor	%xmm0,	%xmm3,	%xmm3	# 3 = jo
+	eor	v11.16b, v11.16b, v8.16b
+	ld1	{v16.2d}, [x9],#16		// vmovdqu	(%r9),	%xmm5
+	cbnz	w8, Lenc_2x_loop
+
+	// middle of last round
+	add	x10, x11, #0x80
+						// vmovdqa	-0x60(%r10), %xmm4	# 3 : sbou	.Lk_sbo
+						// vmovdqa	-0x50(%r10), %xmm0	# 0 : sbot	.Lk_sbo+16
+	tbl	v4.16b,  {v22.16b}, v2.16b	// vpshufb	%xmm2,	%xmm4,	%xmm4	# 4 = sbou
+	tbl	v12.16b, {v22.16b}, v10.16b
+	ld1	{v1.2d}, [x10]			// vmovdqa	0x40(%r11,%r10), %xmm1	# Lk_sr[]
+	tbl	v0.16b,  {v23.16b}, v3.16b	// vpshufb	%xmm3,	%xmm0,	%xmm0	# 0 = sb1t
+	tbl	v8.16b,  {v23.16b}, v11.16b
+	eor	v4.16b,  v4.16b,  v16.16b	// vpxor	%xmm5,	%xmm4,	%xmm4	# 4 = sb1u + k
+	eor	v12.16b, v12.16b, v16.16b
+	eor	v0.16b,  v0.16b,  v4.16b	// vpxor	%xmm4,	%xmm0,	%xmm0	# 0 = A
+	eor	v8.16b,  v8.16b,  v12.16b
+	tbl	v0.16b,  {v0.16b},v1.16b	// vpshufb	%xmm1,	%xmm0,	%xmm0
+	tbl	v1.16b,  {v8.16b},v1.16b
+	ret
+
+
+.def _vpaes_decrypt_preheat
+   .type 32
+.endef
+.align	4
+_vpaes_decrypt_preheat:
+	adrp	x10, Lk_inv
+	add	x10, x10, :lo12:Lk_inv
+	movi	v17.16b, #0x0f
+	adrp	x11, Lk_dipt
+	add	x11, x11, :lo12:Lk_dipt
+	ld1	{v18.2d,v19.2d}, [x10],#32	// Lk_inv
+	ld1	{v20.2d,v21.2d,v22.2d,v23.2d}, [x11],#64	// Lk_dipt, Lk_dsbo
+	ld1	{v24.2d,v25.2d,v26.2d,v27.2d}, [x11],#64	// Lk_dsb9, Lk_dsbd
+	ld1	{v28.2d,v29.2d,v30.2d,v31.2d}, [x11]		// Lk_dsbb, Lk_dsbe
+	ret
+
+
+##
+##  Decryption core
+##
+##  Same API as encryption core.
+##
+.def _vpaes_decrypt_core
+   .type 32
+.endef
+.align	4
+_vpaes_decrypt_core:
+	mov	x9, x2
+	ldr	w8, [x2,#240]			// pull rounds
+
+						// vmovdqa	.Lk_dipt(%rip), %xmm2	# iptlo
+	lsl	x11, x8, #4			// mov	%rax,	%r11;	shl	$4, %r11
+	eor	x11, x11, #0x30			// xor		$0x30,	%r11
+	adrp	x10, Lk_sr
+	add	x10, x10, :lo12:Lk_sr
+	and	x11, x11, #0x30			// and		$0x30,	%r11
+	add	x11, x11, x10
+	adrp	x10, Lk_mc_forward+48
+	add	x10, x10, :lo12:Lk_mc_forward+48
+
+	ld1	{v16.2d}, [x9],#16		// vmovdqu	(%r9),	%xmm4		# round0 key
+	and	v1.16b, v7.16b, v17.16b		// vpand	%xmm9,	%xmm0,	%xmm1
+	ushr	v0.16b, v7.16b, #4		// vpsrlb	$4,	%xmm0,	%xmm0
+	tbl	v2.16b, {v20.16b}, v1.16b	// vpshufb	%xmm1,	%xmm2,	%xmm2
+	ld1	{v5.2d}, [x10]			// vmovdqa	Lk_mc_forward+48(%rip), %xmm5
+						// vmovdqa	.Lk_dipt+16(%rip), %xmm1 # ipthi
+	tbl	v0.16b, {v21.16b}, v0.16b	// vpshufb	%xmm0,	%xmm1,	%xmm0
+	eor	v2.16b, v2.16b, v16.16b		// vpxor	%xmm4,	%xmm2,	%xmm2
+	eor	v0.16b, v0.16b, v2.16b		// vpxor	%xmm2,	%xmm0,	%xmm0
+	b	Ldec_entry
+
+.align	4
+Ldec_loop:
+//
+//  Inverse mix columns
+//
+						// vmovdqa	-0x20(%r10),%xmm4		# 4 : sb9u
+						// vmovdqa	-0x10(%r10),%xmm1		# 0 : sb9t
+	tbl	v4.16b, {v24.16b}, v2.16b		// vpshufb	%xmm2,	%xmm4,	%xmm4		# 4 = sb9u
+	tbl	v1.16b, {v25.16b}, v3.16b		// vpshufb	%xmm3,	%xmm1,	%xmm1		# 0 = sb9t
+	eor	v0.16b, v4.16b, v16.16b		// vpxor	%xmm4,	%xmm0,	%xmm0
+						// vmovdqa	0x00(%r10),%xmm4		# 4 : sbdu
+	eor	v0.16b, v0.16b, v1.16b		// vpxor	%xmm1,	%xmm0,	%xmm0		# 0 = ch
+						// vmovdqa	0x10(%r10),%xmm1		# 0 : sbdt
+
+	tbl	v4.16b, {v26.16b}, v2.16b		// vpshufb	%xmm2,	%xmm4,	%xmm4		# 4 = sbdu
+	tbl	v0.16b, {v0.16b}, v5.16b	// vpshufb	%xmm5,	%xmm0,	%xmm0		# MC ch
+	tbl	v1.16b, {v27.16b}, v3.16b		// vpshufb	%xmm3,	%xmm1,	%xmm1		# 0 = sbdt
+	eor	v0.16b, v0.16b, v4.16b		// vpxor	%xmm4,	%xmm0,	%xmm0		# 4 = ch
+						// vmovdqa	0x20(%r10),	%xmm4		# 4 : sbbu
+	eor	v0.16b, v0.16b, v1.16b		// vpxor	%xmm1,	%xmm0,	%xmm0		# 0 = ch
+						// vmovdqa	0x30(%r10),	%xmm1		# 0 : sbbt
+
+	tbl	v4.16b, {v28.16b}, v2.16b		// vpshufb	%xmm2,	%xmm4,	%xmm4		# 4 = sbbu
+	tbl	v0.16b, {v0.16b}, v5.16b	// vpshufb	%xmm5,	%xmm0,	%xmm0		# MC ch
+	tbl	v1.16b, {v29.16b}, v3.16b		// vpshufb	%xmm3,	%xmm1,	%xmm1		# 0 = sbbt
+	eor	v0.16b, v0.16b, v4.16b		// vpxor	%xmm4,	%xmm0,	%xmm0		# 4 = ch
+						// vmovdqa	0x40(%r10),	%xmm4		# 4 : sbeu
+	eor	v0.16b, v0.16b, v1.16b		// vpxor	%xmm1,	%xmm0,	%xmm0		# 0 = ch
+						// vmovdqa	0x50(%r10),	%xmm1		# 0 : sbet
+
+	tbl	v4.16b, {v30.16b}, v2.16b		// vpshufb	%xmm2,	%xmm4,	%xmm4		# 4 = sbeu
+	tbl	v0.16b, {v0.16b}, v5.16b	// vpshufb	%xmm5,	%xmm0,	%xmm0		# MC ch
+	tbl	v1.16b, {v31.16b}, v3.16b		// vpshufb	%xmm3,	%xmm1,	%xmm1		# 0 = sbet
+	eor	v0.16b, v0.16b, v4.16b		// vpxor	%xmm4,	%xmm0,	%xmm0		# 4 = ch
+	ext	v5.16b, v5.16b, v5.16b, #12	// vpalignr $12,	%xmm5,	%xmm5,	%xmm5
+	eor	v0.16b, v0.16b, v1.16b		// vpxor	%xmm1,	%xmm0,	%xmm0		# 0 = ch
+	sub	w8, w8, #1			// sub		$1,%rax			# nr--
+
+Ldec_entry:
+	// top of round
+	and	v1.16b, v0.16b, v17.16b		// vpand	%xmm9,	%xmm0,	%xmm1	# 0 = k
+	ushr	v0.16b, v0.16b, #4		// vpsrlb	$4,	%xmm0,	%xmm0	# 1 = i
+	tbl	v2.16b, {v19.16b}, v1.16b	// vpshufb	%xmm1,	%xmm11,	%xmm2	# 2 = a/k
+	eor	v1.16b,	v1.16b, v0.16b		// vpxor	%xmm0,	%xmm1,	%xmm1	# 0 = j
+	tbl	v3.16b, {v18.16b}, v0.16b	// vpshufb	%xmm0, 	%xmm10,	%xmm3	# 3 = 1/i
+	tbl	v4.16b, {v18.16b}, v1.16b	// vpshufb	%xmm1,	%xmm10,	%xmm4	# 4 = 1/j
+	eor	v3.16b, v3.16b, v2.16b		// vpxor	%xmm2,	%xmm3,	%xmm3	# 3 = iak = 1/i + a/k
+	eor	v4.16b, v4.16b, v2.16b		// vpxor	%xmm2, 	%xmm4,	%xmm4	# 4 = jak = 1/j + a/k
+	tbl	v2.16b, {v18.16b}, v3.16b	// vpshufb	%xmm3,	%xmm10,	%xmm2	# 2 = 1/iak
+	tbl	v3.16b, {v18.16b}, v4.16b	// vpshufb	%xmm4,  %xmm10,	%xmm3	# 3 = 1/jak
+	eor	v2.16b, v2.16b, v1.16b		// vpxor	%xmm1,	%xmm2,	%xmm2	# 2 = io
+	eor	v3.16b, v3.16b, v0.16b		// vpxor	%xmm0,  %xmm3,	%xmm3	# 3 = jo
+	ld1	{v16.2d}, [x9],#16		// vmovdqu	(%r9),	%xmm0
+	cbnz	w8, Ldec_loop
+
+	// middle of last round
+						// vmovdqa	0x60(%r10),	%xmm4	# 3 : sbou
+	tbl	v4.16b, {v22.16b}, v2.16b		// vpshufb	%xmm2,	%xmm4,	%xmm4	# 4 = sbou
+						// vmovdqa	0x70(%r10),	%xmm1	# 0 : sbot
+	ld1	{v2.2d}, [x11]			// vmovdqa	-0x160(%r11),	%xmm2	# Lk_sr-Lk_dsbd=-0x160
+	tbl	v1.16b, {v23.16b}, v3.16b		// vpshufb	%xmm3,	%xmm1,	%xmm1	# 0 = sb1t
+	eor	v4.16b, v4.16b, v16.16b		// vpxor	%xmm0,	%xmm4,	%xmm4	# 4 = sb1u + k
+	eor	v0.16b, v1.16b, v4.16b		// vpxor	%xmm4,	%xmm1,	%xmm0	# 0 = A
+	tbl	v0.16b, {v0.16b}, v2.16b	// vpshufb	%xmm2,	%xmm0,	%xmm0
+	ret
+
+
+.globl	vpaes_decrypt
+
+.def vpaes_decrypt
+   .type 32
+.endef
+.align	4
+vpaes_decrypt:
+	AARCH64_SIGN_LINK_REGISTER
+	stp	x29,x30,[sp,#-16]!
+	add	x29,sp,#0
+
+	ld1	{v7.16b}, [x0]
+	bl	_vpaes_decrypt_preheat
+	bl	_vpaes_decrypt_core
+	st1	{v0.16b}, [x1]
+
+	ldp	x29,x30,[sp],#16
+	AARCH64_VALIDATE_LINK_REGISTER
+	ret
+
+
+// v14-v15 input, v0-v1 output
+.def _vpaes_decrypt_2x
+   .type 32
+.endef
+.align	4
+_vpaes_decrypt_2x:
+	mov	x9, x2
+	ldr	w8, [x2,#240]			// pull rounds
+
+						// vmovdqa	.Lk_dipt(%rip), %xmm2	# iptlo
+	lsl	x11, x8, #4			// mov	%rax,	%r11;	shl	$4, %r11
+	eor	x11, x11, #0x30			// xor		$0x30,	%r11
+	adrp	x10, Lk_sr
+	add	x10, x10, :lo12:Lk_sr
+	and	x11, x11, #0x30			// and		$0x30,	%r11
+	add	x11, x11, x10
+	adrp	x10, Lk_mc_forward+48
+	add	x10, x10, :lo12:Lk_mc_forward+48
+
+	ld1	{v16.2d}, [x9],#16		// vmovdqu	(%r9),	%xmm4		# round0 key
+	and	v1.16b,  v14.16b, v17.16b	// vpand	%xmm9,	%xmm0,	%xmm1
+	ushr	v0.16b,  v14.16b, #4		// vpsrlb	$4,	%xmm0,	%xmm0
+	and	v9.16b,  v15.16b, v17.16b
+	ushr	v8.16b,  v15.16b, #4
+	tbl	v2.16b,  {v20.16b},v1.16b	// vpshufb	%xmm1,	%xmm2,	%xmm2
+	tbl	v10.16b, {v20.16b},v9.16b
+	ld1	{v5.2d}, [x10]			// vmovdqa	Lk_mc_forward+48(%rip), %xmm5
+						// vmovdqa	.Lk_dipt+16(%rip), %xmm1 # ipthi
+	tbl	v0.16b,  {v21.16b},v0.16b	// vpshufb	%xmm0,	%xmm1,	%xmm0
+	tbl	v8.16b,  {v21.16b},v8.16b
+	eor	v2.16b,  v2.16b,  v16.16b	// vpxor	%xmm4,	%xmm2,	%xmm2
+	eor	v10.16b, v10.16b, v16.16b
+	eor	v0.16b,  v0.16b,  v2.16b	// vpxor	%xmm2,	%xmm0,	%xmm0
+	eor	v8.16b,  v8.16b,  v10.16b
+	b	Ldec_2x_entry
+
+.align	4
+Ldec_2x_loop:
+//
+//  Inverse mix columns
+//
+						// vmovdqa	-0x20(%r10),%xmm4		# 4 : sb9u
+						// vmovdqa	-0x10(%r10),%xmm1		# 0 : sb9t
+	tbl	v4.16b,  {v24.16b}, v2.16b	// vpshufb	%xmm2,	%xmm4,	%xmm4		# 4 = sb9u
+	tbl	v12.16b, {v24.16b}, v10.16b
+	tbl	v1.16b,  {v25.16b}, v3.16b	// vpshufb	%xmm3,	%xmm1,	%xmm1		# 0 = sb9t
+	tbl	v9.16b,  {v25.16b}, v11.16b
+	eor	v0.16b,  v4.16b,  v16.16b	// vpxor	%xmm4,	%xmm0,	%xmm0
+	eor	v8.16b,  v12.16b, v16.16b
+						// vmovdqa	0x00(%r10),%xmm4		# 4 : sbdu
+	eor	v0.16b,  v0.16b,  v1.16b	// vpxor	%xmm1,	%xmm0,	%xmm0		# 0 = ch
+	eor	v8.16b,  v8.16b,  v9.16b	// vpxor	%xmm1,	%xmm0,	%xmm0		# 0 = ch
+						// vmovdqa	0x10(%r10),%xmm1		# 0 : sbdt
+
+	tbl	v4.16b,  {v26.16b}, v2.16b	// vpshufb	%xmm2,	%xmm4,	%xmm4		# 4 = sbdu
+	tbl	v12.16b, {v26.16b}, v10.16b
+	tbl	v0.16b,  {v0.16b},v5.16b	// vpshufb	%xmm5,	%xmm0,	%xmm0		# MC ch
+	tbl	v8.16b,  {v8.16b},v5.16b
+	tbl	v1.16b,  {v27.16b}, v3.16b	// vpshufb	%xmm3,	%xmm1,	%xmm1		# 0 = sbdt
+	tbl	v9.16b,  {v27.16b}, v11.16b
+	eor	v0.16b,  v0.16b,  v4.16b	// vpxor	%xmm4,	%xmm0,	%xmm0		# 4 = ch
+	eor	v8.16b,  v8.16b,  v12.16b
+						// vmovdqa	0x20(%r10),	%xmm4		# 4 : sbbu
+	eor	v0.16b,  v0.16b,  v1.16b	// vpxor	%xmm1,	%xmm0,	%xmm0		# 0 = ch
+	eor	v8.16b,  v8.16b,  v9.16b
+						// vmovdqa	0x30(%r10),	%xmm1		# 0 : sbbt
+
+	tbl	v4.16b,  {v28.16b}, v2.16b	// vpshufb	%xmm2,	%xmm4,	%xmm4		# 4 = sbbu
+	tbl	v12.16b, {v28.16b}, v10.16b
+	tbl	v0.16b,  {v0.16b},v5.16b	// vpshufb	%xmm5,	%xmm0,	%xmm0		# MC ch
+	tbl	v8.16b,  {v8.16b},v5.16b
+	tbl	v1.16b,  {v29.16b}, v3.16b	// vpshufb	%xmm3,	%xmm1,	%xmm1		# 0 = sbbt
+	tbl	v9.16b,  {v29.16b}, v11.16b
+	eor	v0.16b,  v0.16b,  v4.16b	// vpxor	%xmm4,	%xmm0,	%xmm0		# 4 = ch
+	eor	v8.16b,  v8.16b,  v12.16b
+						// vmovdqa	0x40(%r10),	%xmm4		# 4 : sbeu
+	eor	v0.16b,  v0.16b,  v1.16b	// vpxor	%xmm1,	%xmm0,	%xmm0		# 0 = ch
+	eor	v8.16b,  v8.16b,  v9.16b
+						// vmovdqa	0x50(%r10),	%xmm1		# 0 : sbet
+
+	tbl	v4.16b,  {v30.16b}, v2.16b	// vpshufb	%xmm2,	%xmm4,	%xmm4		# 4 = sbeu
+	tbl	v12.16b, {v30.16b}, v10.16b
+	tbl	v0.16b,  {v0.16b},v5.16b	// vpshufb	%xmm5,	%xmm0,	%xmm0		# MC ch
+	tbl	v8.16b,  {v8.16b},v5.16b
+	tbl	v1.16b,  {v31.16b}, v3.16b	// vpshufb	%xmm3,	%xmm1,	%xmm1		# 0 = sbet
+	tbl	v9.16b,  {v31.16b}, v11.16b
+	eor	v0.16b,  v0.16b,  v4.16b	// vpxor	%xmm4,	%xmm0,	%xmm0		# 4 = ch
+	eor	v8.16b,  v8.16b,  v12.16b
+	ext	v5.16b,  v5.16b,  v5.16b, #12	// vpalignr $12,	%xmm5,	%xmm5,	%xmm5
+	eor	v0.16b,  v0.16b,  v1.16b	// vpxor	%xmm1,	%xmm0,	%xmm0		# 0 = ch
+	eor	v8.16b,  v8.16b,  v9.16b
+	sub	w8, w8, #1			// sub		$1,%rax			# nr--
+
+Ldec_2x_entry:
+	// top of round
+	and	v1.16b,  v0.16b,  v17.16b	// vpand	%xmm9,	%xmm0,	%xmm1	# 0 = k
+	ushr	v0.16b,  v0.16b,  #4		// vpsrlb	$4,	%xmm0,	%xmm0	# 1 = i
+	and	v9.16b,  v8.16b,  v17.16b
+	ushr	v8.16b,  v8.16b,  #4
+	tbl	v2.16b,  {v19.16b},v1.16b	// vpshufb	%xmm1,	%xmm11,	%xmm2	# 2 = a/k
+	tbl	v10.16b, {v19.16b},v9.16b
+	eor	v1.16b,	 v1.16b,  v0.16b	// vpxor	%xmm0,	%xmm1,	%xmm1	# 0 = j
+	eor	v9.16b,	 v9.16b,  v8.16b
+	tbl	v3.16b,  {v18.16b},v0.16b	// vpshufb	%xmm0, 	%xmm10,	%xmm3	# 3 = 1/i
+	tbl	v11.16b, {v18.16b},v8.16b
+	tbl	v4.16b,  {v18.16b},v1.16b	// vpshufb	%xmm1,	%xmm10,	%xmm4	# 4 = 1/j
+	tbl	v12.16b, {v18.16b},v9.16b
+	eor	v3.16b,  v3.16b,  v2.16b	// vpxor	%xmm2,	%xmm3,	%xmm3	# 3 = iak = 1/i + a/k
+	eor	v11.16b, v11.16b, v10.16b
+	eor	v4.16b,  v4.16b,  v2.16b	// vpxor	%xmm2, 	%xmm4,	%xmm4	# 4 = jak = 1/j + a/k
+	eor	v12.16b, v12.16b, v10.16b
+	tbl	v2.16b,  {v18.16b},v3.16b	// vpshufb	%xmm3,	%xmm10,	%xmm2	# 2 = 1/iak
+	tbl	v10.16b, {v18.16b},v11.16b
+	tbl	v3.16b,  {v18.16b},v4.16b	// vpshufb	%xmm4,  %xmm10,	%xmm3	# 3 = 1/jak
+	tbl	v11.16b, {v18.16b},v12.16b
+	eor	v2.16b,  v2.16b,  v1.16b	// vpxor	%xmm1,	%xmm2,	%xmm2	# 2 = io
+	eor	v10.16b, v10.16b, v9.16b
+	eor	v3.16b,  v3.16b,  v0.16b	// vpxor	%xmm0,  %xmm3,	%xmm3	# 3 = jo
+	eor	v11.16b, v11.16b, v8.16b
+	ld1	{v16.2d}, [x9],#16		// vmovdqu	(%r9),	%xmm0
+	cbnz	w8, Ldec_2x_loop
+
+	// middle of last round
+						// vmovdqa	0x60(%r10),	%xmm4	# 3 : sbou
+	tbl	v4.16b,  {v22.16b}, v2.16b	// vpshufb	%xmm2,	%xmm4,	%xmm4	# 4 = sbou
+	tbl	v12.16b, {v22.16b}, v10.16b
+						// vmovdqa	0x70(%r10),	%xmm1	# 0 : sbot
+	tbl	v1.16b,  {v23.16b}, v3.16b	// vpshufb	%xmm3,	%xmm1,	%xmm1	# 0 = sb1t
+	tbl	v9.16b,  {v23.16b}, v11.16b
+	ld1	{v2.2d}, [x11]			// vmovdqa	-0x160(%r11),	%xmm2	# Lk_sr-Lk_dsbd=-0x160
+	eor	v4.16b,  v4.16b,  v16.16b	// vpxor	%xmm0,	%xmm4,	%xmm4	# 4 = sb1u + k
+	eor	v12.16b, v12.16b, v16.16b
+	eor	v0.16b,  v1.16b,  v4.16b	// vpxor	%xmm4,	%xmm1,	%xmm0	# 0 = A
+	eor	v8.16b,  v9.16b,  v12.16b
+	tbl	v0.16b,  {v0.16b},v2.16b	// vpshufb	%xmm2,	%xmm0,	%xmm0
+	tbl	v1.16b,  {v8.16b},v2.16b
+	ret
+
+########################################################
+##                                                    ##
+##                  AES key schedule                  ##
+##                                                    ##
+########################################################
+.def _vpaes_key_preheat
+   .type 32
+.endef
+.align	4
+_vpaes_key_preheat:
+	adrp	x10, Lk_inv
+	add	x10, x10, :lo12:Lk_inv
+	movi	v16.16b, #0x5b			// Lk_s63
+	adrp	x11, Lk_sb1
+	add	x11, x11, :lo12:Lk_sb1
+	movi	v17.16b, #0x0f			// Lk_s0F
+	ld1	{v18.2d,v19.2d,v20.2d,v21.2d}, [x10]		// Lk_inv, Lk_ipt
+	adrp	x10, Lk_dksd
+	add	x10, x10, :lo12:Lk_dksd
+	ld1	{v22.2d,v23.2d}, [x11]		// Lk_sb1
+	adrp	x11, Lk_mc_forward
+	add	x11, x11, :lo12:Lk_mc_forward
+	ld1	{v24.2d,v25.2d,v26.2d,v27.2d}, [x10],#64	// Lk_dksd, Lk_dksb
+	ld1	{v28.2d,v29.2d,v30.2d,v31.2d}, [x10],#64	// Lk_dkse, Lk_dks9
+	ld1	{v8.2d}, [x10]			// Lk_rcon
+	ld1	{v9.2d}, [x11]			// Lk_mc_forward[0]
+	ret
+
+
+.def _vpaes_schedule_core
+   .type 32
+.endef
+.align	4
+_vpaes_schedule_core:
+	AARCH64_SIGN_LINK_REGISTER
+	stp	x29, x30, [sp,#-16]!
+	add	x29,sp,#0
+
+	bl	_vpaes_key_preheat		// load the tables
+
+	ld1	{v0.16b}, [x0],#16		// vmovdqu	(%rdi),	%xmm0		# load key (unaligned)
+
+	// input transform
+	mov	v3.16b, v0.16b			// vmovdqa	%xmm0,	%xmm3
+	bl	_vpaes_schedule_transform
+	mov	v7.16b, v0.16b			// vmovdqa	%xmm0,	%xmm7
+
+	adrp	x10, Lk_sr		// lea	Lk_sr(%rip),%r10
+	add	x10, x10, :lo12:Lk_sr
+
+	add	x8, x8, x10
+	cbnz	w3, Lschedule_am_decrypting
+
+	// encrypting, output zeroth round key after transform
+	st1	{v0.2d}, [x2]			// vmovdqu	%xmm0,	(%rdx)
+	b	Lschedule_go
+
+Lschedule_am_decrypting:
+	// decrypting, output zeroth round key after shiftrows
+	ld1	{v1.2d}, [x8]			// vmovdqa	(%r8,%r10),	%xmm1
+	tbl	v3.16b, {v3.16b}, v1.16b	// vpshufb  %xmm1,	%xmm3,	%xmm3
+	st1	{v3.2d}, [x2]			// vmovdqu	%xmm3,	(%rdx)
+	eor	x8, x8, #0x30			// xor	$0x30, %r8
+
+Lschedule_go:
+	cmp	w1, #192			// cmp	$192,	%esi
+	b.hi	Lschedule_256
+	b.eq	Lschedule_192
+	// 128: fall though
+
+##
+##  .schedule_128
+##
+##  128-bit specific part of key schedule.
+##
+##  This schedule is really simple, because all its parts
+##  are accomplished by the subroutines.
+##
+Lschedule_128:
+	mov	x0, #10			// mov	$10, %esi
+
+Loop_schedule_128:
+	sub	x0, x0, #1			// dec	%esi
+	bl	_vpaes_schedule_round
+	cbz	x0, Lschedule_mangle_last
+	bl	_vpaes_schedule_mangle		// write output
+	b	Loop_schedule_128
+
+##
+##  .aes_schedule_192
+##
+##  192-bit specific part of key schedule.
+##
+##  The main body of this schedule is the same as the 128-bit
+##  schedule, but with more smearing.  The long, high side is
+##  stored in %xmm7 as before, and the short, low side is in
+##  the high bits of %xmm6.
+##
+##  This schedule is somewhat nastier, however, because each
+##  round produces 192 bits of key material, or 1.5 round keys.
+##  Therefore, on each cycle we do 2 rounds and produce 3 round
+##  keys.
+##
+.align	4
+Lschedule_192:
+	sub	x0, x0, #8
+	ld1	{v0.16b}, [x0]		// vmovdqu	8(%rdi),%xmm0		# load key part 2 (very unaligned)
+	bl	_vpaes_schedule_transform	// input transform
+	mov	v6.16b, v0.16b			// vmovdqa	%xmm0,	%xmm6		# save short part
+	eor	v4.16b, v4.16b, v4.16b		// vpxor	%xmm4,	%xmm4, %xmm4	# clear 4
+	ins	v6.d[0], v4.d[0]		// vmovhlps	%xmm4,	%xmm6,	%xmm6		# clobber low side with zeros
+	mov	x0, #4			// mov	$4,	%esi
+
+Loop_schedule_192:
+	sub	x0, x0, #1			// dec	%esi
+	bl	_vpaes_schedule_round
+	ext	v0.16b, v6.16b, v0.16b, #8	// vpalignr	$8,%xmm6,%xmm0,%xmm0
+	bl	_vpaes_schedule_mangle		// save key n
+	bl	_vpaes_schedule_192_smear
+	bl	_vpaes_schedule_mangle		// save key n+1
+	bl	_vpaes_schedule_round
+	cbz	x0, Lschedule_mangle_last
+	bl	_vpaes_schedule_mangle		// save key n+2
+	bl	_vpaes_schedule_192_smear
+	b	Loop_schedule_192
+
+##
+##  .aes_schedule_256
+##
+##  256-bit specific part of key schedule.
+##
+##  The structure here is very similar to the 128-bit
+##  schedule, but with an additional "low side" in
+##  %xmm6.  The low side's rounds are the same as the
+##  high side's, except no rcon and no rotation.
+##
+.align	4
+Lschedule_256:
+	ld1	{v0.16b}, [x0]		// vmovdqu	16(%rdi),%xmm0		# load key part 2 (unaligned)
+	bl	_vpaes_schedule_transform	// input transform
+	mov	x0, #7			// mov	$7, %esi
+
+Loop_schedule_256:
+	sub	x0, x0, #1			// dec	%esi
+	bl	_vpaes_schedule_mangle		// output low result
+	mov	v6.16b, v0.16b			// vmovdqa	%xmm0,	%xmm6		# save cur_lo in xmm6
+
+	// high round
+	bl	_vpaes_schedule_round
+	cbz	x0, Lschedule_mangle_last
+	bl	_vpaes_schedule_mangle
+
+	// low round. swap xmm7 and xmm6
+	dup	v0.4s, v0.s[3]			// vpshufd	$0xFF,	%xmm0,	%xmm0
+	movi	v4.16b, #0
+	mov	v5.16b, v7.16b			// vmovdqa	%xmm7,	%xmm5
+	mov	v7.16b, v6.16b			// vmovdqa	%xmm6,	%xmm7
+	bl	_vpaes_schedule_low_round
+	mov	v7.16b, v5.16b			// vmovdqa	%xmm5,	%xmm7
+
+	b	Loop_schedule_256
+
+##
+##  .aes_schedule_mangle_last
+##
+##  Mangler for last round of key schedule
+##  Mangles %xmm0
+##    when encrypting, outputs out(%xmm0) ^ 63
+##    when decrypting, outputs unskew(%xmm0)
+##
+##  Always called right before return... jumps to cleanup and exits
+##
+.align	4
+Lschedule_mangle_last:
+	// schedule last round key from xmm0
+	adrp	x11, Lk_deskew	// lea	Lk_deskew(%rip),%r11	# prepare to deskew
+	add	x11, x11, :lo12:Lk_deskew
+
+	cbnz	w3, Lschedule_mangle_last_dec
+
+	// encrypting
+	ld1	{v1.2d}, [x8]			// vmovdqa	(%r8,%r10),%xmm1
+	adrp	x11, Lk_opt		// lea	Lk_opt(%rip),	%r11		# prepare to output transform
+	add	x11, x11, :lo12:Lk_opt
+	add	x2, x2, #32			// add	$32,	%rdx
+	tbl	v0.16b, {v0.16b}, v1.16b	// vpshufb	%xmm1,	%xmm0,	%xmm0		# output permute
+
+Lschedule_mangle_last_dec:
+	ld1	{v20.2d,v21.2d}, [x11]		// reload constants
+	sub	x2, x2, #16			// add	$-16,	%rdx
+	eor	v0.16b, v0.16b, v16.16b		// vpxor	Lk_s63(%rip),	%xmm0,	%xmm0
+	bl	_vpaes_schedule_transform	// output transform
+	st1	{v0.2d}, [x2]			// vmovdqu	%xmm0,	(%rdx)		# save last key
+
+	// cleanup
+	eor	v0.16b, v0.16b, v0.16b		// vpxor	%xmm0,	%xmm0,	%xmm0
+	eor	v1.16b, v1.16b, v1.16b		// vpxor	%xmm1,	%xmm1,	%xmm1
+	eor	v2.16b, v2.16b, v2.16b		// vpxor	%xmm2,	%xmm2,	%xmm2
+	eor	v3.16b, v3.16b, v3.16b		// vpxor	%xmm3,	%xmm3,	%xmm3
+	eor	v4.16b, v4.16b, v4.16b		// vpxor	%xmm4,	%xmm4,	%xmm4
+	eor	v5.16b, v5.16b, v5.16b		// vpxor	%xmm5,	%xmm5,	%xmm5
+	eor	v6.16b, v6.16b, v6.16b		// vpxor	%xmm6,	%xmm6,	%xmm6
+	eor	v7.16b, v7.16b, v7.16b		// vpxor	%xmm7,	%xmm7,	%xmm7
+	ldp	x29, x30, [sp],#16
+	AARCH64_VALIDATE_LINK_REGISTER
+	ret
+
+
+##
+##  .aes_schedule_192_smear
+##
+##  Smear the short, low side in the 192-bit key schedule.
+##
+##  Inputs:
+##    %xmm7: high side, b  a  x  y
+##    %xmm6:  low side, d  c  0  0
+##    %xmm13: 0
+##
+##  Outputs:
+##    %xmm6: b+c+d  b+c  0  0
+##    %xmm0: b+c+d  b+c  b  a
+##
+.def _vpaes_schedule_192_smear
+   .type 32
+.endef
+.align	4
+_vpaes_schedule_192_smear:
+	movi	v1.16b, #0
+	dup	v0.4s, v7.s[3]
+	ins	v1.s[3], v6.s[2]	// vpshufd	$0x80,	%xmm6,	%xmm1	# d c 0 0 -> c 0 0 0
+	ins	v0.s[0], v7.s[2]	// vpshufd	$0xFE,	%xmm7,	%xmm0	# b a _ _ -> b b b a
+	eor	v6.16b, v6.16b, v1.16b	// vpxor	%xmm1,	%xmm6,	%xmm6	# -> c+d c 0 0
+	eor	v1.16b, v1.16b, v1.16b	// vpxor	%xmm1,	%xmm1,	%xmm1
+	eor	v6.16b, v6.16b, v0.16b	// vpxor	%xmm0,	%xmm6,	%xmm6	# -> b+c+d b+c b a
+	mov	v0.16b, v6.16b		// vmovdqa	%xmm6,	%xmm0
+	ins	v6.d[0], v1.d[0]	// vmovhlps	%xmm1,	%xmm6,	%xmm6	# clobber low side with zeros
+	ret
+
+
+##
+##  .aes_schedule_round
+##
+##  Runs one main round of the key schedule on %xmm0, %xmm7
+##
+##  Specifically, runs subbytes on the high dword of %xmm0
+##  then rotates it by one byte and xors into the low dword of
+##  %xmm7.
+##
+##  Adds rcon from low byte of %xmm8, then rotates %xmm8 for
+##  next rcon.
+##
+##  Smears the dwords of %xmm7 by xoring the low into the
+##  second low, result into third, result into highest.
+##
+##  Returns results in %xmm7 = %xmm0.
+##  Clobbers %xmm1-%xmm4, %r11.
+##
+.def _vpaes_schedule_round
+   .type 32
+.endef
+.align	4
+_vpaes_schedule_round:
+	// extract rcon from xmm8
+	movi	v4.16b, #0			// vpxor	%xmm4,	%xmm4,	%xmm4
+	ext	v1.16b, v8.16b, v4.16b, #15	// vpalignr	$15,	%xmm8,	%xmm4,	%xmm1
+	ext	v8.16b, v8.16b, v8.16b, #15	// vpalignr	$15,	%xmm8,	%xmm8,	%xmm8
+	eor	v7.16b, v7.16b, v1.16b		// vpxor	%xmm1,	%xmm7,	%xmm7
+
+	// rotate
+	dup	v0.4s, v0.s[3]			// vpshufd	$0xFF,	%xmm0,	%xmm0
+	ext	v0.16b, v0.16b, v0.16b, #1	// vpalignr	$1,	%xmm0,	%xmm0,	%xmm0
+
+	// fall through...
+
+	// low round: same as high round, but no rotation and no rcon.
+_vpaes_schedule_low_round:
+	// smear xmm7
+	ext	v1.16b, v4.16b, v7.16b, #12	// vpslldq	$4,	%xmm7,	%xmm1
+	eor	v7.16b, v7.16b, v1.16b		// vpxor	%xmm1,	%xmm7,	%xmm7
+	ext	v4.16b, v4.16b, v7.16b, #8	// vpslldq	$8,	%xmm7,	%xmm4
+
+	// subbytes
+	and	v1.16b, v0.16b, v17.16b		// vpand	%xmm9,	%xmm0,	%xmm1		# 0 = k
+	ushr	v0.16b, v0.16b, #4		// vpsrlb	$4,	%xmm0,	%xmm0		# 1 = i
+	eor	v7.16b, v7.16b, v4.16b		// vpxor	%xmm4,	%xmm7,	%xmm7
+	tbl	v2.16b, {v19.16b}, v1.16b	// vpshufb	%xmm1,	%xmm11,	%xmm2		# 2 = a/k
+	eor	v1.16b, v1.16b, v0.16b		// vpxor	%xmm0,	%xmm1,	%xmm1		# 0 = j
+	tbl	v3.16b, {v18.16b}, v0.16b	// vpshufb	%xmm0, 	%xmm10,	%xmm3		# 3 = 1/i
+	eor	v3.16b, v3.16b, v2.16b		// vpxor	%xmm2,	%xmm3,	%xmm3		# 3 = iak = 1/i + a/k
+	tbl	v4.16b, {v18.16b}, v1.16b	// vpshufb	%xmm1,	%xmm10,	%xmm4		# 4 = 1/j
+	eor	v7.16b, v7.16b, v16.16b		// vpxor	Lk_s63(%rip),	%xmm7,	%xmm7
+	tbl	v3.16b, {v18.16b}, v3.16b	// vpshufb	%xmm3,	%xmm10,	%xmm3		# 2 = 1/iak
+	eor	v4.16b, v4.16b, v2.16b		// vpxor	%xmm2,	%xmm4,	%xmm4		# 4 = jak = 1/j + a/k
+	tbl	v2.16b, {v18.16b}, v4.16b	// vpshufb	%xmm4,	%xmm10,	%xmm2		# 3 = 1/jak
+	eor	v3.16b, v3.16b, v1.16b		// vpxor	%xmm1,	%xmm3,	%xmm3		# 2 = io
+	eor	v2.16b, v2.16b, v0.16b		// vpxor	%xmm0,	%xmm2,	%xmm2		# 3 = jo
+	tbl	v4.16b, {v23.16b}, v3.16b	// vpshufb	%xmm3,	%xmm13,	%xmm4		# 4 = sbou
+	tbl	v1.16b, {v22.16b}, v2.16b	// vpshufb	%xmm2,	%xmm12,	%xmm1		# 0 = sb1t
+	eor	v1.16b, v1.16b, v4.16b		// vpxor	%xmm4,	%xmm1,	%xmm1		# 0 = sbox output
+
+	// add in smeared stuff
+	eor	v0.16b, v1.16b, v7.16b		// vpxor	%xmm7,	%xmm1,	%xmm0
+	eor	v7.16b, v1.16b, v7.16b		// vmovdqa	%xmm0,	%xmm7
+	ret
+
+
+##
+##  .aes_schedule_transform
+##
+##  Linear-transform %xmm0 according to tables at (%r11)
+##
+##  Requires that %xmm9 = 0x0F0F... as in preheat
+##  Output in %xmm0
+##  Clobbers %xmm1, %xmm2
+##
+.def _vpaes_schedule_transform
+   .type 32
+.endef
+.align	4
+_vpaes_schedule_transform:
+	and	v1.16b, v0.16b, v17.16b		// vpand	%xmm9,	%xmm0,	%xmm1
+	ushr	v0.16b, v0.16b, #4		// vpsrlb	$4,	%xmm0,	%xmm0
+						// vmovdqa	(%r11),	%xmm2 	# lo
+	tbl	v2.16b, {v20.16b}, v1.16b	// vpshufb	%xmm1,	%xmm2,	%xmm2
+						// vmovdqa	16(%r11),	%xmm1 # hi
+	tbl	v0.16b, {v21.16b}, v0.16b	// vpshufb	%xmm0,	%xmm1,	%xmm0
+	eor	v0.16b, v0.16b, v2.16b		// vpxor	%xmm2,	%xmm0,	%xmm0
+	ret
+
+
+##
+##  .aes_schedule_mangle
+##
+##  Mangle xmm0 from (basis-transformed) standard version
+##  to our version.
+##
+##  On encrypt,
+##    xor with 0x63
+##    multiply by circulant 0,1,1,1
+##    apply shiftrows transform
+##
+##  On decrypt,
+##    xor with 0x63
+##    multiply by "inverse mixcolumns" circulant E,B,D,9
+##    deskew
+##    apply shiftrows transform
+##
+##
+##  Writes out to (%rdx), and increments or decrements it
+##  Keeps track of round number mod 4 in %r8
+##  Preserves xmm0
+##  Clobbers xmm1-xmm5
+##
+.def _vpaes_schedule_mangle
+   .type 32
+.endef
+.align	4
+_vpaes_schedule_mangle:
+	mov	v4.16b, v0.16b			// vmovdqa	%xmm0,	%xmm4	# save xmm0 for later
+						// vmovdqa	.Lk_mc_forward(%rip),%xmm5
+	cbnz	w3, Lschedule_mangle_dec
+
+	// encrypting
+	eor	v4.16b, v0.16b, v16.16b		// vpxor	Lk_s63(%rip),	%xmm0,	%xmm4
+	add	x2, x2, #16			// add	$16,	%rdx
+	tbl	v4.16b, {v4.16b}, v9.16b	// vpshufb	%xmm5,	%xmm4,	%xmm4
+	tbl	v1.16b, {v4.16b}, v9.16b	// vpshufb	%xmm5,	%xmm4,	%xmm1
+	tbl	v3.16b, {v1.16b}, v9.16b	// vpshufb	%xmm5,	%xmm1,	%xmm3
+	eor	v4.16b, v4.16b, v1.16b		// vpxor	%xmm1,	%xmm4,	%xmm4
+	ld1	{v1.2d}, [x8]			// vmovdqa	(%r8,%r10),	%xmm1
+	eor	v3.16b, v3.16b, v4.16b		// vpxor	%xmm4,	%xmm3,	%xmm3
+
+	b	Lschedule_mangle_both
+.align	4
+Lschedule_mangle_dec:
+	// inverse mix columns
+						// lea	.Lk_dksd(%rip),%r11
+	ushr	v1.16b, v4.16b, #4		// vpsrlb	$4,	%xmm4,	%xmm1	# 1 = hi
+	and	v4.16b, v4.16b, v17.16b		// vpand	%xmm9,	%xmm4,	%xmm4	# 4 = lo
+
+						// vmovdqa	0x00(%r11),	%xmm2
+	tbl	v2.16b, {v24.16b}, v4.16b	// vpshufb	%xmm4,	%xmm2,	%xmm2
+						// vmovdqa	0x10(%r11),	%xmm3
+	tbl	v3.16b,	{v25.16b}, v1.16b	// vpshufb	%xmm1,	%xmm3,	%xmm3
+	eor	v3.16b, v3.16b, v2.16b		// vpxor	%xmm2,	%xmm3,	%xmm3
+	tbl	v3.16b, {v3.16b}, v9.16b	// vpshufb	%xmm5,	%xmm3,	%xmm3
+
+						// vmovdqa	0x20(%r11),	%xmm2
+	tbl	v2.16b, {v26.16b}, v4.16b	// vpshufb	%xmm4,	%xmm2,	%xmm2
+	eor	v2.16b, v2.16b, v3.16b		// vpxor	%xmm3,	%xmm2,	%xmm2
+						// vmovdqa	0x30(%r11),	%xmm3
+	tbl	v3.16b, {v27.16b}, v1.16b	// vpshufb	%xmm1,	%xmm3,	%xmm3
+	eor	v3.16b, v3.16b, v2.16b		// vpxor	%xmm2,	%xmm3,	%xmm3
+	tbl	v3.16b, {v3.16b}, v9.16b	// vpshufb	%xmm5,	%xmm3,	%xmm3
+
+						// vmovdqa	0x40(%r11),	%xmm2
+	tbl	v2.16b, {v28.16b}, v4.16b	// vpshufb	%xmm4,	%xmm2,	%xmm2
+	eor	v2.16b, v2.16b, v3.16b		// vpxor	%xmm3,	%xmm2,	%xmm2
+						// vmovdqa	0x50(%r11),	%xmm3
+	tbl	v3.16b, {v29.16b}, v1.16b	// vpshufb	%xmm1,	%xmm3,	%xmm3
+	eor	v3.16b, v3.16b, v2.16b		// vpxor	%xmm2,	%xmm3,	%xmm3
+
+						// vmovdqa	0x60(%r11),	%xmm2
+	tbl	v2.16b, {v30.16b}, v4.16b	// vpshufb	%xmm4,	%xmm2,	%xmm2
+	tbl	v3.16b, {v3.16b}, v9.16b	// vpshufb	%xmm5,	%xmm3,	%xmm3
+						// vmovdqa	0x70(%r11),	%xmm4
+	tbl	v4.16b, {v31.16b}, v1.16b	// vpshufb	%xmm1,	%xmm4,	%xmm4
+	ld1	{v1.2d}, [x8]			// vmovdqa	(%r8,%r10),	%xmm1
+	eor	v2.16b, v2.16b, v3.16b		// vpxor	%xmm3,	%xmm2,	%xmm2
+	eor	v3.16b, v4.16b, v2.16b		// vpxor	%xmm2,	%xmm4,	%xmm3
+
+	sub	x2, x2, #16			// add	$-16,	%rdx
+
+Lschedule_mangle_both:
+	tbl	v3.16b, {v3.16b}, v1.16b	// vpshufb	%xmm1,	%xmm3,	%xmm3
+	add	x8, x8, #48			// add	$-16,	%r8
+	and	x8, x8, #~(1<<6)		// and	$0x30,	%r8
+	st1	{v3.2d}, [x2]			// vmovdqu	%xmm3,	(%rdx)
+	ret
+
+
+.globl	vpaes_set_encrypt_key
+
+.def vpaes_set_encrypt_key
+   .type 32
+.endef
+.align	4
+vpaes_set_encrypt_key:
+	AARCH64_SIGN_LINK_REGISTER
+	stp	x29,x30,[sp,#-16]!
+	add	x29,sp,#0
+	stp	d8,d9,[sp,#-16]!	// ABI spec says so
+
+	lsr	w9, w1, #5		// shr	$5,%eax
+	add	w9, w9, #5		// $5,%eax
+	str	w9, [x2,#240]		// mov	%eax,240(%rdx)	# AES_KEY->rounds = nbits/32+5;
+
+	mov	w3, #0		// mov	$0,%ecx
+	mov	x8, #0x30		// mov	$0x30,%r8d
+	bl	_vpaes_schedule_core
+	eor	x0, x0, x0
+
+	ldp	d8,d9,[sp],#16
+	ldp	x29,x30,[sp],#16
+	AARCH64_VALIDATE_LINK_REGISTER
+	ret
+
+
+.globl	vpaes_set_decrypt_key
+
+.def vpaes_set_decrypt_key
+   .type 32
+.endef
+.align	4
+vpaes_set_decrypt_key:
+	AARCH64_SIGN_LINK_REGISTER
+	stp	x29,x30,[sp,#-16]!
+	add	x29,sp,#0
+	stp	d8,d9,[sp,#-16]!	// ABI spec says so
+
+	lsr	w9, w1, #5		// shr	$5,%eax
+	add	w9, w9, #5		// $5,%eax
+	str	w9, [x2,#240]		// mov	%eax,240(%rdx)	# AES_KEY->rounds = nbits/32+5;
+	lsl	w9, w9, #4		// shl	$4,%eax
+	add	x2, x2, #16		// lea	16(%rdx,%rax),%rdx
+	add	x2, x2, x9
+
+	mov	w3, #1		// mov	$1,%ecx
+	lsr	w8, w1, #1		// shr	$1,%r8d
+	and	x8, x8, #32		// and	$32,%r8d
+	eor	x8, x8, #32		// xor	$32,%r8d	# nbits==192?0:32
+	bl	_vpaes_schedule_core
+
+	ldp	d8,d9,[sp],#16
+	ldp	x29,x30,[sp],#16
+	AARCH64_VALIDATE_LINK_REGISTER
+	ret
+
+.globl	vpaes_cbc_encrypt
+
+.def vpaes_cbc_encrypt
+   .type 32
+.endef
+.align	4
+vpaes_cbc_encrypt:
+	AARCH64_SIGN_LINK_REGISTER
+	cbz	x2, Lcbc_abort
+	cmp	w5, #0			// check direction
+	b.eq	vpaes_cbc_decrypt
+
+	stp	x29,x30,[sp,#-16]!
+	add	x29,sp,#0
+
+	mov	x17, x2		// reassign
+	mov	x2,  x3		// reassign
+
+	ld1	{v0.16b}, [x4]	// load ivec
+	bl	_vpaes_encrypt_preheat
+	b	Lcbc_enc_loop
+
+.align	4
+Lcbc_enc_loop:
+	ld1	{v7.16b}, [x0],#16	// load input
+	eor	v7.16b, v7.16b, v0.16b	// xor with ivec
+	bl	_vpaes_encrypt_core
+	st1	{v0.16b}, [x1],#16	// save output
+	subs	x17, x17, #16
+	b.hi	Lcbc_enc_loop
+
+	st1	{v0.16b}, [x4]	// write ivec
+
+	ldp	x29,x30,[sp],#16
+	AARCH64_VALIDATE_LINK_REGISTER
+Lcbc_abort:
+	ret
+
+
+.def vpaes_cbc_decrypt
+   .type 32
+.endef
+.align	4
+vpaes_cbc_decrypt:
+	// Not adding AARCH64_SIGN_LINK_REGISTER here because vpaes_cbc_decrypt is jumped to
+	// only from vpaes_cbc_encrypt which has already signed the return address.
+	stp	x29,x30,[sp,#-16]!
+	add	x29,sp,#0
+	stp	d8,d9,[sp,#-16]!	// ABI spec says so
+	stp	d10,d11,[sp,#-16]!
+	stp	d12,d13,[sp,#-16]!
+	stp	d14,d15,[sp,#-16]!
+
+	mov	x17, x2		// reassign
+	mov	x2,  x3		// reassign
+	ld1	{v6.16b}, [x4]	// load ivec
+	bl	_vpaes_decrypt_preheat
+	tst	x17, #16
+	b.eq	Lcbc_dec_loop2x
+
+	ld1	{v7.16b}, [x0], #16	// load input
+	bl	_vpaes_decrypt_core
+	eor	v0.16b, v0.16b, v6.16b	// xor with ivec
+	orr	v6.16b, v7.16b, v7.16b	// next ivec value
+	st1	{v0.16b}, [x1], #16
+	subs	x17, x17, #16
+	b.ls	Lcbc_dec_done
+
+.align	4
+Lcbc_dec_loop2x:
+	ld1	{v14.16b,v15.16b}, [x0], #32
+	bl	_vpaes_decrypt_2x
+	eor	v0.16b, v0.16b, v6.16b	// xor with ivec
+	eor	v1.16b, v1.16b, v14.16b
+	orr	v6.16b, v15.16b, v15.16b
+	st1	{v0.16b,v1.16b}, [x1], #32
+	subs	x17, x17, #32
+	b.hi	Lcbc_dec_loop2x
+
+Lcbc_dec_done:
+	st1	{v6.16b}, [x4]
+
+	ldp	d14,d15,[sp],#16
+	ldp	d12,d13,[sp],#16
+	ldp	d10,d11,[sp],#16
+	ldp	d8,d9,[sp],#16
+	ldp	x29,x30,[sp],#16
+	AARCH64_VALIDATE_LINK_REGISTER
+	ret
+
+.globl	vpaes_ctr32_encrypt_blocks
+
+.def vpaes_ctr32_encrypt_blocks
+   .type 32
+.endef
+.align	4
+vpaes_ctr32_encrypt_blocks:
+	AARCH64_SIGN_LINK_REGISTER
+	stp	x29,x30,[sp,#-16]!
+	add	x29,sp,#0
+	stp	d8,d9,[sp,#-16]!	// ABI spec says so
+	stp	d10,d11,[sp,#-16]!
+	stp	d12,d13,[sp,#-16]!
+	stp	d14,d15,[sp,#-16]!
+
+	cbz	x2, Lctr32_done
+
+	// Note, unlike the other functions, x2 here is measured in blocks,
+	// not bytes.
+	mov	x17, x2
+	mov	x2,  x3
+
+	// Load the IV and counter portion.
+	ldr	w6, [x4, #12]
+	ld1	{v7.16b}, [x4]
+
+	bl	_vpaes_encrypt_preheat
+	tst	x17, #1
+	rev	w6, w6		// The counter is big-endian.
+	b.eq	Lctr32_prep_loop
+
+	// Handle one block so the remaining block count is even for
+	// _vpaes_encrypt_2x.
+	ld1	{v6.16b}, [x0], #16	// Load input ahead of time
+	bl	_vpaes_encrypt_core
+	eor	v0.16b, v0.16b, v6.16b	// XOR input and result
+	st1	{v0.16b}, [x1], #16
+	subs	x17, x17, #1
+	// Update the counter.
+	add	w6, w6, #1
+	rev	w7, w6
+	mov	v7.s[3], w7
+	b.ls	Lctr32_done
+
+Lctr32_prep_loop:
+	// _vpaes_encrypt_core takes its input from v7, while _vpaes_encrypt_2x
+	// uses v14 and v15.
+	mov	v15.16b, v7.16b
+	mov	v14.16b, v7.16b
+	add	w6, w6, #1
+	rev	w7, w6
+	mov	v15.s[3], w7
+
+Lctr32_loop:
+	ld1	{v6.16b,v7.16b}, [x0], #32	// Load input ahead of time
+	bl	_vpaes_encrypt_2x
+	eor	v0.16b, v0.16b, v6.16b		// XOR input and result
+	eor	v1.16b, v1.16b, v7.16b		// XOR input and result (#2)
+	st1	{v0.16b,v1.16b}, [x1], #32
+	subs	x17, x17, #2
+	// Update the counter.
+	add	w7, w6, #1
+	add	w6, w6, #2
+	rev	w7, w7
+	mov	v14.s[3], w7
+	rev	w7, w6
+	mov	v15.s[3], w7
+	b.hi	Lctr32_loop
+
+Lctr32_done:
+	ldp	d14,d15,[sp],#16
+	ldp	d12,d13,[sp],#16
+	ldp	d10,d11,[sp],#16
+	ldp	d8,d9,[sp],#16
+	ldp	x29,x30,[sp],#16
+	AARCH64_VALIDATE_LINK_REGISTER
+	ret
+
+#endif
+#endif  // !OPENSSL_NO_ASM
diff --git a/deps/boringssl/win-aarch64/crypto/test/trampoline-armv8.S b/deps/boringssl/win-aarch64/crypto/test/trampoline-armv8.S
new file mode 100644
index 0000000..4e17d4b
--- /dev/null
+++ b/deps/boringssl/win-aarch64/crypto/test/trampoline-armv8.S
@@ -0,0 +1,760 @@
+// This file is generated from a similarly-named Perl script in the BoringSSL
+// source tree. Do not edit by hand.
+
+#if !defined(__has_feature)
+#define __has_feature(x) 0
+#endif
+#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
+#define OPENSSL_NO_ASM
+#endif
+
+#if !defined(OPENSSL_NO_ASM)
+#if defined(__aarch64__)
+#if defined(BORINGSSL_PREFIX)
+#include <boringssl_prefix_symbols_asm.h>
+#endif
+#include <openssl/arm_arch.h>
+
+.text
+
+// abi_test_trampoline loads callee-saved registers from |state|, calls |func|
+// with |argv|, then saves the callee-saved registers into |state|. It returns
+// the result of |func|. The |unwind| argument is unused.
+// uint64_t abi_test_trampoline(void (*func)(...), CallerState *state,
+//                              const uint64_t *argv, size_t argc,
+//                              uint64_t unwind);
+
+.globl	abi_test_trampoline
+
+.align	4
+abi_test_trampoline:
+Labi_test_trampoline_begin:
+	AARCH64_SIGN_LINK_REGISTER
+	// Stack layout (low to high addresses)
+	//   x29,x30 (16 bytes)
+	//    d8-d15 (64 bytes)
+	//   x19-x28 (80 bytes)
+	//    x1 (8 bytes)
+	//   padding (8 bytes)
+	stp	x29, x30, [sp, #-176]!
+	mov	x29, sp
+
+	// Saved callee-saved registers and |state|.
+	stp	d8, d9, [sp, #16]
+	stp	d10, d11, [sp, #32]
+	stp	d12, d13, [sp, #48]
+	stp	d14, d15, [sp, #64]
+	stp	x19, x20, [sp, #80]
+	stp	x21, x22, [sp, #96]
+	stp	x23, x24, [sp, #112]
+	stp	x25, x26, [sp, #128]
+	stp	x27, x28, [sp, #144]
+	str	x1, [sp, #160]
+
+	// Load registers from |state|, with the exception of x29. x29 is the
+	// frame pointer and also callee-saved, but AAPCS64 allows platforms to
+	// mandate that x29 always point to a frame. iOS64 does so, which means
+	// we cannot fill x29 with entropy without violating ABI rules
+	// ourselves. x29 is tested separately below.
+	ldp	d8, d9, [x1], #16
+	ldp	d10, d11, [x1], #16
+	ldp	d12, d13, [x1], #16
+	ldp	d14, d15, [x1], #16
+	ldp	x19, x20, [x1], #16
+	ldp	x21, x22, [x1], #16
+	ldp	x23, x24, [x1], #16
+	ldp	x25, x26, [x1], #16
+	ldp	x27, x28, [x1], #16
+
+	// Move parameters into temporary registers.
+	mov	x9, x0
+	mov	x10, x2
+	mov	x11, x3
+
+	// Load parameters into registers.
+	cbz	x11, Largs_done
+	ldr	x0, [x10], #8
+	subs	x11, x11, #1
+	b.eq	Largs_done
+	ldr	x1, [x10], #8
+	subs	x11, x11, #1
+	b.eq	Largs_done
+	ldr	x2, [x10], #8
+	subs	x11, x11, #1
+	b.eq	Largs_done
+	ldr	x3, [x10], #8
+	subs	x11, x11, #1
+	b.eq	Largs_done
+	ldr	x4, [x10], #8
+	subs	x11, x11, #1
+	b.eq	Largs_done
+	ldr	x5, [x10], #8
+	subs	x11, x11, #1
+	b.eq	Largs_done
+	ldr	x6, [x10], #8
+	subs	x11, x11, #1
+	b.eq	Largs_done
+	ldr	x7, [x10], #8
+
+Largs_done:
+	blr	x9
+
+	// Reload |state| and store registers.
+	ldr	x1, [sp, #160]
+	stp	d8, d9, [x1], #16
+	stp	d10, d11, [x1], #16
+	stp	d12, d13, [x1], #16
+	stp	d14, d15, [x1], #16
+	stp	x19, x20, [x1], #16
+	stp	x21, x22, [x1], #16
+	stp	x23, x24, [x1], #16
+	stp	x25, x26, [x1], #16
+	stp	x27, x28, [x1], #16
+
+	// |func| is required to preserve x29, the frame pointer. We cannot load
+	// random values into x29 (see comment above), so compare it against the
+	// expected value and zero the field of |state| if corrupted.
+	mov	x9, sp
+	cmp	x29, x9
+	b.eq	Lx29_ok
+	str	xzr, [x1]
+
+Lx29_ok:
+	// Restore callee-saved registers.
+	ldp	d8, d9, [sp, #16]
+	ldp	d10, d11, [sp, #32]
+	ldp	d12, d13, [sp, #48]
+	ldp	d14, d15, [sp, #64]
+	ldp	x19, x20, [sp, #80]
+	ldp	x21, x22, [sp, #96]
+	ldp	x23, x24, [sp, #112]
+	ldp	x25, x26, [sp, #128]
+	ldp	x27, x28, [sp, #144]
+
+	ldp	x29, x30, [sp], #176
+	AARCH64_VALIDATE_LINK_REGISTER
+	ret
+
+
+.globl	abi_test_clobber_x0
+
+.align	4
+abi_test_clobber_x0:
+	AARCH64_VALID_CALL_TARGET
+	mov	x0, xzr
+	ret
+
+
+.globl	abi_test_clobber_x1
+
+.align	4
+abi_test_clobber_x1:
+	AARCH64_VALID_CALL_TARGET
+	mov	x1, xzr
+	ret
+
+
+.globl	abi_test_clobber_x2
+
+.align	4
+abi_test_clobber_x2:
+	AARCH64_VALID_CALL_TARGET
+	mov	x2, xzr
+	ret
+
+
+.globl	abi_test_clobber_x3
+
+.align	4
+abi_test_clobber_x3:
+	AARCH64_VALID_CALL_TARGET
+	mov	x3, xzr
+	ret
+
+
+.globl	abi_test_clobber_x4
+
+.align	4
+abi_test_clobber_x4:
+	AARCH64_VALID_CALL_TARGET
+	mov	x4, xzr
+	ret
+
+
+.globl	abi_test_clobber_x5
+
+.align	4
+abi_test_clobber_x5:
+	AARCH64_VALID_CALL_TARGET
+	mov	x5, xzr
+	ret
+
+
+.globl	abi_test_clobber_x6
+
+.align	4
+abi_test_clobber_x6:
+	AARCH64_VALID_CALL_TARGET
+	mov	x6, xzr
+	ret
+
+
+.globl	abi_test_clobber_x7
+
+.align	4
+abi_test_clobber_x7:
+	AARCH64_VALID_CALL_TARGET
+	mov	x7, xzr
+	ret
+
+
+.globl	abi_test_clobber_x8
+
+.align	4
+abi_test_clobber_x8:
+	AARCH64_VALID_CALL_TARGET
+	mov	x8, xzr
+	ret
+
+
+.globl	abi_test_clobber_x9
+
+.align	4
+abi_test_clobber_x9:
+	AARCH64_VALID_CALL_TARGET
+	mov	x9, xzr
+	ret
+
+
+.globl	abi_test_clobber_x10
+
+.align	4
+abi_test_clobber_x10:
+	AARCH64_VALID_CALL_TARGET
+	mov	x10, xzr
+	ret
+
+
+.globl	abi_test_clobber_x11
+
+.align	4
+abi_test_clobber_x11:
+	AARCH64_VALID_CALL_TARGET
+	mov	x11, xzr
+	ret
+
+
+.globl	abi_test_clobber_x12
+
+.align	4
+abi_test_clobber_x12:
+	AARCH64_VALID_CALL_TARGET
+	mov	x12, xzr
+	ret
+
+
+.globl	abi_test_clobber_x13
+
+.align	4
+abi_test_clobber_x13:
+	AARCH64_VALID_CALL_TARGET
+	mov	x13, xzr
+	ret
+
+
+.globl	abi_test_clobber_x14
+
+.align	4
+abi_test_clobber_x14:
+	AARCH64_VALID_CALL_TARGET
+	mov	x14, xzr
+	ret
+
+
+.globl	abi_test_clobber_x15
+
+.align	4
+abi_test_clobber_x15:
+	AARCH64_VALID_CALL_TARGET
+	mov	x15, xzr
+	ret
+
+
+.globl	abi_test_clobber_x16
+
+.align	4
+abi_test_clobber_x16:
+	AARCH64_VALID_CALL_TARGET
+	mov	x16, xzr
+	ret
+
+
+.globl	abi_test_clobber_x17
+
+.align	4
+abi_test_clobber_x17:
+	AARCH64_VALID_CALL_TARGET
+	mov	x17, xzr
+	ret
+
+
+.globl	abi_test_clobber_x19
+
+.align	4
+abi_test_clobber_x19:
+	AARCH64_VALID_CALL_TARGET
+	mov	x19, xzr
+	ret
+
+
+.globl	abi_test_clobber_x20
+
+.align	4
+abi_test_clobber_x20:
+	AARCH64_VALID_CALL_TARGET
+	mov	x20, xzr
+	ret
+
+
+.globl	abi_test_clobber_x21
+
+.align	4
+abi_test_clobber_x21:
+	AARCH64_VALID_CALL_TARGET
+	mov	x21, xzr
+	ret
+
+
+.globl	abi_test_clobber_x22
+
+.align	4
+abi_test_clobber_x22:
+	AARCH64_VALID_CALL_TARGET
+	mov	x22, xzr
+	ret
+
+
+.globl	abi_test_clobber_x23
+
+.align	4
+abi_test_clobber_x23:
+	AARCH64_VALID_CALL_TARGET
+	mov	x23, xzr
+	ret
+
+
+.globl	abi_test_clobber_x24
+
+.align	4
+abi_test_clobber_x24:
+	AARCH64_VALID_CALL_TARGET
+	mov	x24, xzr
+	ret
+
+
+.globl	abi_test_clobber_x25
+
+.align	4
+abi_test_clobber_x25:
+	AARCH64_VALID_CALL_TARGET
+	mov	x25, xzr
+	ret
+
+
+.globl	abi_test_clobber_x26
+
+.align	4
+abi_test_clobber_x26:
+	AARCH64_VALID_CALL_TARGET
+	mov	x26, xzr
+	ret
+
+
+.globl	abi_test_clobber_x27
+
+.align	4
+abi_test_clobber_x27:
+	AARCH64_VALID_CALL_TARGET
+	mov	x27, xzr
+	ret
+
+
+.globl	abi_test_clobber_x28
+
+.align	4
+abi_test_clobber_x28:
+	AARCH64_VALID_CALL_TARGET
+	mov	x28, xzr
+	ret
+
+
+.globl	abi_test_clobber_x29
+
+.align	4
+abi_test_clobber_x29:
+	AARCH64_VALID_CALL_TARGET
+	mov	x29, xzr
+	ret
+
+
+.globl	abi_test_clobber_d0
+
+.align	4
+abi_test_clobber_d0:
+	AARCH64_VALID_CALL_TARGET
+	fmov	d0, xzr
+	ret
+
+
+.globl	abi_test_clobber_d1
+
+.align	4
+abi_test_clobber_d1:
+	AARCH64_VALID_CALL_TARGET
+	fmov	d1, xzr
+	ret
+
+
+.globl	abi_test_clobber_d2
+
+.align	4
+abi_test_clobber_d2:
+	AARCH64_VALID_CALL_TARGET
+	fmov	d2, xzr
+	ret
+
+
+.globl	abi_test_clobber_d3
+
+.align	4
+abi_test_clobber_d3:
+	AARCH64_VALID_CALL_TARGET
+	fmov	d3, xzr
+	ret
+
+
+.globl	abi_test_clobber_d4
+
+.align	4
+abi_test_clobber_d4:
+	AARCH64_VALID_CALL_TARGET
+	fmov	d4, xzr
+	ret
+
+
+.globl	abi_test_clobber_d5
+
+.align	4
+abi_test_clobber_d5:
+	AARCH64_VALID_CALL_TARGET
+	fmov	d5, xzr
+	ret
+
+
+.globl	abi_test_clobber_d6
+
+.align	4
+abi_test_clobber_d6:
+	AARCH64_VALID_CALL_TARGET
+	fmov	d6, xzr
+	ret
+
+
+.globl	abi_test_clobber_d7
+
+.align	4
+abi_test_clobber_d7:
+	AARCH64_VALID_CALL_TARGET
+	fmov	d7, xzr
+	ret
+
+
+.globl	abi_test_clobber_d8
+
+.align	4
+abi_test_clobber_d8:
+	AARCH64_VALID_CALL_TARGET
+	fmov	d8, xzr
+	ret
+
+
+.globl	abi_test_clobber_d9
+
+.align	4
+abi_test_clobber_d9:
+	AARCH64_VALID_CALL_TARGET
+	fmov	d9, xzr
+	ret
+
+
+.globl	abi_test_clobber_d10
+
+.align	4
+abi_test_clobber_d10:
+	AARCH64_VALID_CALL_TARGET
+	fmov	d10, xzr
+	ret
+
+
+.globl	abi_test_clobber_d11
+
+.align	4
+abi_test_clobber_d11:
+	AARCH64_VALID_CALL_TARGET
+	fmov	d11, xzr
+	ret
+
+
+.globl	abi_test_clobber_d12
+
+.align	4
+abi_test_clobber_d12:
+	AARCH64_VALID_CALL_TARGET
+	fmov	d12, xzr
+	ret
+
+
+.globl	abi_test_clobber_d13
+
+.align	4
+abi_test_clobber_d13:
+	AARCH64_VALID_CALL_TARGET
+	fmov	d13, xzr
+	ret
+
+
+.globl	abi_test_clobber_d14
+
+.align	4
+abi_test_clobber_d14:
+	AARCH64_VALID_CALL_TARGET
+	fmov	d14, xzr
+	ret
+
+
+.globl	abi_test_clobber_d15
+
+.align	4
+abi_test_clobber_d15:
+	AARCH64_VALID_CALL_TARGET
+	fmov	d15, xzr
+	ret
+
+
+.globl	abi_test_clobber_d16
+
+.align	4
+abi_test_clobber_d16:
+	AARCH64_VALID_CALL_TARGET
+	fmov	d16, xzr
+	ret
+
+
+.globl	abi_test_clobber_d17
+
+.align	4
+abi_test_clobber_d17:
+	AARCH64_VALID_CALL_TARGET
+	fmov	d17, xzr
+	ret
+
+
+.globl	abi_test_clobber_d18
+
+.align	4
+abi_test_clobber_d18:
+	AARCH64_VALID_CALL_TARGET
+	fmov	d18, xzr
+	ret
+
+
+.globl	abi_test_clobber_d19
+
+.align	4
+abi_test_clobber_d19:
+	AARCH64_VALID_CALL_TARGET
+	fmov	d19, xzr
+	ret
+
+
+.globl	abi_test_clobber_d20
+
+.align	4
+abi_test_clobber_d20:
+	AARCH64_VALID_CALL_TARGET
+	fmov	d20, xzr
+	ret
+
+
+.globl	abi_test_clobber_d21
+
+.align	4
+abi_test_clobber_d21:
+	AARCH64_VALID_CALL_TARGET
+	fmov	d21, xzr
+	ret
+
+
+.globl	abi_test_clobber_d22
+
+.align	4
+abi_test_clobber_d22:
+	AARCH64_VALID_CALL_TARGET
+	fmov	d22, xzr
+	ret
+
+
+.globl	abi_test_clobber_d23
+
+.align	4
+abi_test_clobber_d23:
+	AARCH64_VALID_CALL_TARGET
+	fmov	d23, xzr
+	ret
+
+
+.globl	abi_test_clobber_d24
+
+.align	4
+abi_test_clobber_d24:
+	AARCH64_VALID_CALL_TARGET
+	fmov	d24, xzr
+	ret
+
+
+.globl	abi_test_clobber_d25
+
+.align	4
+abi_test_clobber_d25:
+	AARCH64_VALID_CALL_TARGET
+	fmov	d25, xzr
+	ret
+
+
+.globl	abi_test_clobber_d26
+
+.align	4
+abi_test_clobber_d26:
+	AARCH64_VALID_CALL_TARGET
+	fmov	d26, xzr
+	ret
+
+
+.globl	abi_test_clobber_d27
+
+.align	4
+abi_test_clobber_d27:
+	AARCH64_VALID_CALL_TARGET
+	fmov	d27, xzr
+	ret
+
+
+.globl	abi_test_clobber_d28
+
+.align	4
+abi_test_clobber_d28:
+	AARCH64_VALID_CALL_TARGET
+	fmov	d28, xzr
+	ret
+
+
+.globl	abi_test_clobber_d29
+
+.align	4
+abi_test_clobber_d29:
+	AARCH64_VALID_CALL_TARGET
+	fmov	d29, xzr
+	ret
+
+
+.globl	abi_test_clobber_d30
+
+.align	4
+abi_test_clobber_d30:
+	AARCH64_VALID_CALL_TARGET
+	fmov	d30, xzr
+	ret
+
+
+.globl	abi_test_clobber_d31
+
+.align	4
+abi_test_clobber_d31:
+	AARCH64_VALID_CALL_TARGET
+	fmov	d31, xzr
+	ret
+
+
+.globl	abi_test_clobber_v8_upper
+
+.align	4
+abi_test_clobber_v8_upper:
+	AARCH64_VALID_CALL_TARGET
+	fmov	v8.d[1], xzr
+	ret
+
+
+.globl	abi_test_clobber_v9_upper
+
+.align	4
+abi_test_clobber_v9_upper:
+	AARCH64_VALID_CALL_TARGET
+	fmov	v9.d[1], xzr
+	ret
+
+
+.globl	abi_test_clobber_v10_upper
+
+.align	4
+abi_test_clobber_v10_upper:
+	AARCH64_VALID_CALL_TARGET
+	fmov	v10.d[1], xzr
+	ret
+
+
+.globl	abi_test_clobber_v11_upper
+
+.align	4
+abi_test_clobber_v11_upper:
+	AARCH64_VALID_CALL_TARGET
+	fmov	v11.d[1], xzr
+	ret
+
+
+.globl	abi_test_clobber_v12_upper
+
+.align	4
+abi_test_clobber_v12_upper:
+	AARCH64_VALID_CALL_TARGET
+	fmov	v12.d[1], xzr
+	ret
+
+
+.globl	abi_test_clobber_v13_upper
+
+.align	4
+abi_test_clobber_v13_upper:
+	AARCH64_VALID_CALL_TARGET
+	fmov	v13.d[1], xzr
+	ret
+
+
+.globl	abi_test_clobber_v14_upper
+
+.align	4
+abi_test_clobber_v14_upper:
+	AARCH64_VALID_CALL_TARGET
+	fmov	v14.d[1], xzr
+	ret
+
+
+.globl	abi_test_clobber_v15_upper
+
+.align	4
+abi_test_clobber_v15_upper:
+	AARCH64_VALID_CALL_TARGET
+	fmov	v15.d[1], xzr
+	ret
+
+#endif
+#endif  // !OPENSSL_NO_ASM
diff --git a/deps/boringssl/win-x86/crypto/chacha/chacha-x86.asm b/deps/boringssl/win-x86/crypto/chacha/chacha-x86.asm
new file mode 100644
index 0000000..34393af
--- /dev/null
+++ b/deps/boringssl/win-x86/crypto/chacha/chacha-x86.asm
@@ -0,0 +1,975 @@
+; This file is generated from a similarly-named Perl script in the BoringSSL
+; source tree. Do not edit by hand.
+
+%ifdef BORINGSSL_PREFIX
+%include "boringssl_prefix_symbols_nasm.inc"
+%endif
+%ifidn __OUTPUT_FORMAT__,obj
+section	code	use32 class=code align=64
+%elifidn __OUTPUT_FORMAT__,win32
+$@feat.00 equ 1
+section	.text	code align=64
+%else
+section	.text	code
+%endif
+global	_ChaCha20_ctr32
+align	16
+_ChaCha20_ctr32:
+L$_ChaCha20_ctr32_begin:
+	push	ebp
+	push	ebx
+	push	esi
+	push	edi
+	xor	eax,eax
+	cmp	eax,DWORD [28+esp]
+	je	NEAR L$000no_data
+	call	L$pic_point
+L$pic_point:
+	pop	eax
+	lea	ebp,[_OPENSSL_ia32cap_P]
+	test	DWORD [ebp],16777216
+	jz	NEAR L$001x86
+	test	DWORD [4+ebp],512
+	jz	NEAR L$001x86
+	jmp	NEAR L$ssse3_shortcut
+L$001x86:
+	mov	esi,DWORD [32+esp]
+	mov	edi,DWORD [36+esp]
+	sub	esp,132
+	mov	eax,DWORD [esi]
+	mov	ebx,DWORD [4+esi]
+	mov	ecx,DWORD [8+esi]
+	mov	edx,DWORD [12+esi]
+	mov	DWORD [80+esp],eax
+	mov	DWORD [84+esp],ebx
+	mov	DWORD [88+esp],ecx
+	mov	DWORD [92+esp],edx
+	mov	eax,DWORD [16+esi]
+	mov	ebx,DWORD [20+esi]
+	mov	ecx,DWORD [24+esi]
+	mov	edx,DWORD [28+esi]
+	mov	DWORD [96+esp],eax
+	mov	DWORD [100+esp],ebx
+	mov	DWORD [104+esp],ecx
+	mov	DWORD [108+esp],edx
+	mov	eax,DWORD [edi]
+	mov	ebx,DWORD [4+edi]
+	mov	ecx,DWORD [8+edi]
+	mov	edx,DWORD [12+edi]
+	sub	eax,1
+	mov	DWORD [112+esp],eax
+	mov	DWORD [116+esp],ebx
+	mov	DWORD [120+esp],ecx
+	mov	DWORD [124+esp],edx
+	jmp	NEAR L$002entry
+align	16
+L$003outer_loop:
+	mov	DWORD [156+esp],ebx
+	mov	DWORD [152+esp],eax
+	mov	DWORD [160+esp],ecx
+L$002entry:
+	mov	eax,1634760805
+	mov	DWORD [4+esp],857760878
+	mov	DWORD [8+esp],2036477234
+	mov	DWORD [12+esp],1797285236
+	mov	ebx,DWORD [84+esp]
+	mov	ebp,DWORD [88+esp]
+	mov	ecx,DWORD [104+esp]
+	mov	esi,DWORD [108+esp]
+	mov	edx,DWORD [116+esp]
+	mov	edi,DWORD [120+esp]
+	mov	DWORD [20+esp],ebx
+	mov	DWORD [24+esp],ebp
+	mov	DWORD [40+esp],ecx
+	mov	DWORD [44+esp],esi
+	mov	DWORD [52+esp],edx
+	mov	DWORD [56+esp],edi
+	mov	ebx,DWORD [92+esp]
+	mov	edi,DWORD [124+esp]
+	mov	edx,DWORD [112+esp]
+	mov	ebp,DWORD [80+esp]
+	mov	ecx,DWORD [96+esp]
+	mov	esi,DWORD [100+esp]
+	add	edx,1
+	mov	DWORD [28+esp],ebx
+	mov	DWORD [60+esp],edi
+	mov	DWORD [112+esp],edx
+	mov	ebx,10
+	jmp	NEAR L$004loop
+align	16
+L$004loop:
+	add	eax,ebp
+	mov	DWORD [128+esp],ebx
+	mov	ebx,ebp
+	xor	edx,eax
+	rol	edx,16
+	add	ecx,edx
+	xor	ebx,ecx
+	mov	edi,DWORD [52+esp]
+	rol	ebx,12
+	mov	ebp,DWORD [20+esp]
+	add	eax,ebx
+	xor	edx,eax
+	mov	DWORD [esp],eax
+	rol	edx,8
+	mov	eax,DWORD [4+esp]
+	add	ecx,edx
+	mov	DWORD [48+esp],edx
+	xor	ebx,ecx
+	add	eax,ebp
+	rol	ebx,7
+	xor	edi,eax
+	mov	DWORD [32+esp],ecx
+	rol	edi,16
+	mov	DWORD [16+esp],ebx
+	add	esi,edi
+	mov	ecx,DWORD [40+esp]
+	xor	ebp,esi
+	mov	edx,DWORD [56+esp]
+	rol	ebp,12
+	mov	ebx,DWORD [24+esp]
+	add	eax,ebp
+	xor	edi,eax
+	mov	DWORD [4+esp],eax
+	rol	edi,8
+	mov	eax,DWORD [8+esp]
+	add	esi,edi
+	mov	DWORD [52+esp],edi
+	xor	ebp,esi
+	add	eax,ebx
+	rol	ebp,7
+	xor	edx,eax
+	mov	DWORD [36+esp],esi
+	rol	edx,16
+	mov	DWORD [20+esp],ebp
+	add	ecx,edx
+	mov	esi,DWORD [44+esp]
+	xor	ebx,ecx
+	mov	edi,DWORD [60+esp]
+	rol	ebx,12
+	mov	ebp,DWORD [28+esp]
+	add	eax,ebx
+	xor	edx,eax
+	mov	DWORD [8+esp],eax
+	rol	edx,8
+	mov	eax,DWORD [12+esp]
+	add	ecx,edx
+	mov	DWORD [56+esp],edx
+	xor	ebx,ecx
+	add	eax,ebp
+	rol	ebx,7
+	xor	edi,eax
+	rol	edi,16
+	mov	DWORD [24+esp],ebx
+	add	esi,edi
+	xor	ebp,esi
+	rol	ebp,12
+	mov	ebx,DWORD [20+esp]
+	add	eax,ebp
+	xor	edi,eax
+	mov	DWORD [12+esp],eax
+	rol	edi,8
+	mov	eax,DWORD [esp]
+	add	esi,edi
+	mov	edx,edi
+	xor	ebp,esi
+	add	eax,ebx
+	rol	ebp,7
+	xor	edx,eax
+	rol	edx,16
+	mov	DWORD [28+esp],ebp
+	add	ecx,edx
+	xor	ebx,ecx
+	mov	edi,DWORD [48+esp]
+	rol	ebx,12
+	mov	ebp,DWORD [24+esp]
+	add	eax,ebx
+	xor	edx,eax
+	mov	DWORD [esp],eax
+	rol	edx,8
+	mov	eax,DWORD [4+esp]
+	add	ecx,edx
+	mov	DWORD [60+esp],edx
+	xor	ebx,ecx
+	add	eax,ebp
+	rol	ebx,7
+	xor	edi,eax
+	mov	DWORD [40+esp],ecx
+	rol	edi,16
+	mov	DWORD [20+esp],ebx
+	add	esi,edi
+	mov	ecx,DWORD [32+esp]
+	xor	ebp,esi
+	mov	edx,DWORD [52+esp]
+	rol	ebp,12
+	mov	ebx,DWORD [28+esp]
+	add	eax,ebp
+	xor	edi,eax
+	mov	DWORD [4+esp],eax
+	rol	edi,8
+	mov	eax,DWORD [8+esp]
+	add	esi,edi
+	mov	DWORD [48+esp],edi
+	xor	ebp,esi
+	add	eax,ebx
+	rol	ebp,7
+	xor	edx,eax
+	mov	DWORD [44+esp],esi
+	rol	edx,16
+	mov	DWORD [24+esp],ebp
+	add	ecx,edx
+	mov	esi,DWORD [36+esp]
+	xor	ebx,ecx
+	mov	edi,DWORD [56+esp]
+	rol	ebx,12
+	mov	ebp,DWORD [16+esp]
+	add	eax,ebx
+	xor	edx,eax
+	mov	DWORD [8+esp],eax
+	rol	edx,8
+	mov	eax,DWORD [12+esp]
+	add	ecx,edx
+	mov	DWORD [52+esp],edx
+	xor	ebx,ecx
+	add	eax,ebp
+	rol	ebx,7
+	xor	edi,eax
+	rol	edi,16
+	mov	DWORD [28+esp],ebx
+	add	esi,edi
+	xor	ebp,esi
+	mov	edx,DWORD [48+esp]
+	rol	ebp,12
+	mov	ebx,DWORD [128+esp]
+	add	eax,ebp
+	xor	edi,eax
+	mov	DWORD [12+esp],eax
+	rol	edi,8
+	mov	eax,DWORD [esp]
+	add	esi,edi
+	mov	DWORD [56+esp],edi
+	xor	ebp,esi
+	rol	ebp,7
+	dec	ebx
+	jnz	NEAR L$004loop
+	mov	ebx,DWORD [160+esp]
+	add	eax,1634760805
+	add	ebp,DWORD [80+esp]
+	add	ecx,DWORD [96+esp]
+	add	esi,DWORD [100+esp]
+	cmp	ebx,64
+	jb	NEAR L$005tail
+	mov	ebx,DWORD [156+esp]
+	add	edx,DWORD [112+esp]
+	add	edi,DWORD [120+esp]
+	xor	eax,DWORD [ebx]
+	xor	ebp,DWORD [16+ebx]
+	mov	DWORD [esp],eax
+	mov	eax,DWORD [152+esp]
+	xor	ecx,DWORD [32+ebx]
+	xor	esi,DWORD [36+ebx]
+	xor	edx,DWORD [48+ebx]
+	xor	edi,DWORD [56+ebx]
+	mov	DWORD [16+eax],ebp
+	mov	DWORD [32+eax],ecx
+	mov	DWORD [36+eax],esi
+	mov	DWORD [48+eax],edx
+	mov	DWORD [56+eax],edi
+	mov	ebp,DWORD [4+esp]
+	mov	ecx,DWORD [8+esp]
+	mov	esi,DWORD [12+esp]
+	mov	edx,DWORD [20+esp]
+	mov	edi,DWORD [24+esp]
+	add	ebp,857760878
+	add	ecx,2036477234
+	add	esi,1797285236
+	add	edx,DWORD [84+esp]
+	add	edi,DWORD [88+esp]
+	xor	ebp,DWORD [4+ebx]
+	xor	ecx,DWORD [8+ebx]
+	xor	esi,DWORD [12+ebx]
+	xor	edx,DWORD [20+ebx]
+	xor	edi,DWORD [24+ebx]
+	mov	DWORD [4+eax],ebp
+	mov	DWORD [8+eax],ecx
+	mov	DWORD [12+eax],esi
+	mov	DWORD [20+eax],edx
+	mov	DWORD [24+eax],edi
+	mov	ebp,DWORD [28+esp]
+	mov	ecx,DWORD [40+esp]
+	mov	esi,DWORD [44+esp]
+	mov	edx,DWORD [52+esp]
+	mov	edi,DWORD [60+esp]
+	add	ebp,DWORD [92+esp]
+	add	ecx,DWORD [104+esp]
+	add	esi,DWORD [108+esp]
+	add	edx,DWORD [116+esp]
+	add	edi,DWORD [124+esp]
+	xor	ebp,DWORD [28+ebx]
+	xor	ecx,DWORD [40+ebx]
+	xor	esi,DWORD [44+ebx]
+	xor	edx,DWORD [52+ebx]
+	xor	edi,DWORD [60+ebx]
+	lea	ebx,[64+ebx]
+	mov	DWORD [28+eax],ebp
+	mov	ebp,DWORD [esp]
+	mov	DWORD [40+eax],ecx
+	mov	ecx,DWORD [160+esp]
+	mov	DWORD [44+eax],esi
+	mov	DWORD [52+eax],edx
+	mov	DWORD [60+eax],edi
+	mov	DWORD [eax],ebp
+	lea	eax,[64+eax]
+	sub	ecx,64
+	jnz	NEAR L$003outer_loop
+	jmp	NEAR L$006done
+L$005tail:
+	add	edx,DWORD [112+esp]
+	add	edi,DWORD [120+esp]
+	mov	DWORD [esp],eax
+	mov	DWORD [16+esp],ebp
+	mov	DWORD [32+esp],ecx
+	mov	DWORD [36+esp],esi
+	mov	DWORD [48+esp],edx
+	mov	DWORD [56+esp],edi
+	mov	ebp,DWORD [4+esp]
+	mov	ecx,DWORD [8+esp]
+	mov	esi,DWORD [12+esp]
+	mov	edx,DWORD [20+esp]
+	mov	edi,DWORD [24+esp]
+	add	ebp,857760878
+	add	ecx,2036477234
+	add	esi,1797285236
+	add	edx,DWORD [84+esp]
+	add	edi,DWORD [88+esp]
+	mov	DWORD [4+esp],ebp
+	mov	DWORD [8+esp],ecx
+	mov	DWORD [12+esp],esi
+	mov	DWORD [20+esp],edx
+	mov	DWORD [24+esp],edi
+	mov	ebp,DWORD [28+esp]
+	mov	ecx,DWORD [40+esp]
+	mov	esi,DWORD [44+esp]
+	mov	edx,DWORD [52+esp]
+	mov	edi,DWORD [60+esp]
+	add	ebp,DWORD [92+esp]
+	add	ecx,DWORD [104+esp]
+	add	esi,DWORD [108+esp]
+	add	edx,DWORD [116+esp]
+	add	edi,DWORD [124+esp]
+	mov	DWORD [28+esp],ebp
+	mov	ebp,DWORD [156+esp]
+	mov	DWORD [40+esp],ecx
+	mov	ecx,DWORD [152+esp]
+	mov	DWORD [44+esp],esi
+	xor	esi,esi
+	mov	DWORD [52+esp],edx
+	mov	DWORD [60+esp],edi
+	xor	eax,eax
+	xor	edx,edx
+L$007tail_loop:
+	mov	al,BYTE [ebp*1+esi]
+	mov	dl,BYTE [esi*1+esp]
+	lea	esi,[1+esi]
+	xor	al,dl
+	mov	BYTE [esi*1+ecx-1],al
+	dec	ebx
+	jnz	NEAR L$007tail_loop
+L$006done:
+	add	esp,132
+L$000no_data:
+	pop	edi
+	pop	esi
+	pop	ebx
+	pop	ebp
+	ret
+global	_ChaCha20_ssse3
+align	16
+_ChaCha20_ssse3:
+L$_ChaCha20_ssse3_begin:
+	push	ebp
+	push	ebx
+	push	esi
+	push	edi
+L$ssse3_shortcut:
+	mov	edi,DWORD [20+esp]
+	mov	esi,DWORD [24+esp]
+	mov	ecx,DWORD [28+esp]
+	mov	edx,DWORD [32+esp]
+	mov	ebx,DWORD [36+esp]
+	mov	ebp,esp
+	sub	esp,524
+	and	esp,-64
+	mov	DWORD [512+esp],ebp
+	lea	eax,[(L$ssse3_data-L$pic_point)+eax]
+	movdqu	xmm3,[ebx]
+	cmp	ecx,256
+	jb	NEAR L$0081x
+	mov	DWORD [516+esp],edx
+	mov	DWORD [520+esp],ebx
+	sub	ecx,256
+	lea	ebp,[384+esp]
+	movdqu	xmm7,[edx]
+	pshufd	xmm0,xmm3,0
+	pshufd	xmm1,xmm3,85
+	pshufd	xmm2,xmm3,170
+	pshufd	xmm3,xmm3,255
+	paddd	xmm0,[48+eax]
+	pshufd	xmm4,xmm7,0
+	pshufd	xmm5,xmm7,85
+	psubd	xmm0,[64+eax]
+	pshufd	xmm6,xmm7,170
+	pshufd	xmm7,xmm7,255
+	movdqa	[64+ebp],xmm0
+	movdqa	[80+ebp],xmm1
+	movdqa	[96+ebp],xmm2
+	movdqa	[112+ebp],xmm3
+	movdqu	xmm3,[16+edx]
+	movdqa	[ebp-64],xmm4
+	movdqa	[ebp-48],xmm5
+	movdqa	[ebp-32],xmm6
+	movdqa	[ebp-16],xmm7
+	movdqa	xmm7,[32+eax]
+	lea	ebx,[128+esp]
+	pshufd	xmm0,xmm3,0
+	pshufd	xmm1,xmm3,85
+	pshufd	xmm2,xmm3,170
+	pshufd	xmm3,xmm3,255
+	pshufd	xmm4,xmm7,0
+	pshufd	xmm5,xmm7,85
+	pshufd	xmm6,xmm7,170
+	pshufd	xmm7,xmm7,255
+	movdqa	[ebp],xmm0
+	movdqa	[16+ebp],xmm1
+	movdqa	[32+ebp],xmm2
+	movdqa	[48+ebp],xmm3
+	movdqa	[ebp-128],xmm4
+	movdqa	[ebp-112],xmm5
+	movdqa	[ebp-96],xmm6
+	movdqa	[ebp-80],xmm7
+	lea	esi,[128+esi]
+	lea	edi,[128+edi]
+	jmp	NEAR L$009outer_loop
+align	16
+L$009outer_loop:
+	movdqa	xmm1,[ebp-112]
+	movdqa	xmm2,[ebp-96]
+	movdqa	xmm3,[ebp-80]
+	movdqa	xmm5,[ebp-48]
+	movdqa	xmm6,[ebp-32]
+	movdqa	xmm7,[ebp-16]
+	movdqa	[ebx-112],xmm1
+	movdqa	[ebx-96],xmm2
+	movdqa	[ebx-80],xmm3
+	movdqa	[ebx-48],xmm5
+	movdqa	[ebx-32],xmm6
+	movdqa	[ebx-16],xmm7
+	movdqa	xmm2,[32+ebp]
+	movdqa	xmm3,[48+ebp]
+	movdqa	xmm4,[64+ebp]
+	movdqa	xmm5,[80+ebp]
+	movdqa	xmm6,[96+ebp]
+	movdqa	xmm7,[112+ebp]
+	paddd	xmm4,[64+eax]
+	movdqa	[32+ebx],xmm2
+	movdqa	[48+ebx],xmm3
+	movdqa	[64+ebx],xmm4
+	movdqa	[80+ebx],xmm5
+	movdqa	[96+ebx],xmm6
+	movdqa	[112+ebx],xmm7
+	movdqa	[64+ebp],xmm4
+	movdqa	xmm0,[ebp-128]
+	movdqa	xmm6,xmm4
+	movdqa	xmm3,[ebp-64]
+	movdqa	xmm4,[ebp]
+	movdqa	xmm5,[16+ebp]
+	mov	edx,10
+	nop
+align	16
+L$010loop:
+	paddd	xmm0,xmm3
+	movdqa	xmm2,xmm3
+	pxor	xmm6,xmm0
+	pshufb	xmm6,[eax]
+	paddd	xmm4,xmm6
+	pxor	xmm2,xmm4
+	movdqa	xmm3,[ebx-48]
+	movdqa	xmm1,xmm2
+	pslld	xmm2,12
+	psrld	xmm1,20
+	por	xmm2,xmm1
+	movdqa	xmm1,[ebx-112]
+	paddd	xmm0,xmm2
+	movdqa	xmm7,[80+ebx]
+	pxor	xmm6,xmm0
+	movdqa	[ebx-128],xmm0
+	pshufb	xmm6,[16+eax]
+	paddd	xmm4,xmm6
+	movdqa	[64+ebx],xmm6
+	pxor	xmm2,xmm4
+	paddd	xmm1,xmm3
+	movdqa	xmm0,xmm2
+	pslld	xmm2,7
+	psrld	xmm0,25
+	pxor	xmm7,xmm1
+	por	xmm2,xmm0
+	movdqa	[ebx],xmm4
+	pshufb	xmm7,[eax]
+	movdqa	[ebx-64],xmm2
+	paddd	xmm5,xmm7
+	movdqa	xmm4,[32+ebx]
+	pxor	xmm3,xmm5
+	movdqa	xmm2,[ebx-32]
+	movdqa	xmm0,xmm3
+	pslld	xmm3,12
+	psrld	xmm0,20
+	por	xmm3,xmm0
+	movdqa	xmm0,[ebx-96]
+	paddd	xmm1,xmm3
+	movdqa	xmm6,[96+ebx]
+	pxor	xmm7,xmm1
+	movdqa	[ebx-112],xmm1
+	pshufb	xmm7,[16+eax]
+	paddd	xmm5,xmm7
+	movdqa	[80+ebx],xmm7
+	pxor	xmm3,xmm5
+	paddd	xmm0,xmm2
+	movdqa	xmm1,xmm3
+	pslld	xmm3,7
+	psrld	xmm1,25
+	pxor	xmm6,xmm0
+	por	xmm3,xmm1
+	movdqa	[16+ebx],xmm5
+	pshufb	xmm6,[eax]
+	movdqa	[ebx-48],xmm3
+	paddd	xmm4,xmm6
+	movdqa	xmm5,[48+ebx]
+	pxor	xmm2,xmm4
+	movdqa	xmm3,[ebx-16]
+	movdqa	xmm1,xmm2
+	pslld	xmm2,12
+	psrld	xmm1,20
+	por	xmm2,xmm1
+	movdqa	xmm1,[ebx-80]
+	paddd	xmm0,xmm2
+	movdqa	xmm7,[112+ebx]
+	pxor	xmm6,xmm0
+	movdqa	[ebx-96],xmm0
+	pshufb	xmm6,[16+eax]
+	paddd	xmm4,xmm6
+	movdqa	[96+ebx],xmm6
+	pxor	xmm2,xmm4
+	paddd	xmm1,xmm3
+	movdqa	xmm0,xmm2
+	pslld	xmm2,7
+	psrld	xmm0,25
+	pxor	xmm7,xmm1
+	por	xmm2,xmm0
+	pshufb	xmm7,[eax]
+	movdqa	[ebx-32],xmm2
+	paddd	xmm5,xmm7
+	pxor	xmm3,xmm5
+	movdqa	xmm2,[ebx-48]
+	movdqa	xmm0,xmm3
+	pslld	xmm3,12
+	psrld	xmm0,20
+	por	xmm3,xmm0
+	movdqa	xmm0,[ebx-128]
+	paddd	xmm1,xmm3
+	pxor	xmm7,xmm1
+	movdqa	[ebx-80],xmm1
+	pshufb	xmm7,[16+eax]
+	paddd	xmm5,xmm7
+	movdqa	xmm6,xmm7
+	pxor	xmm3,xmm5
+	paddd	xmm0,xmm2
+	movdqa	xmm1,xmm3
+	pslld	xmm3,7
+	psrld	xmm1,25
+	pxor	xmm6,xmm0
+	por	xmm3,xmm1
+	pshufb	xmm6,[eax]
+	movdqa	[ebx-16],xmm3
+	paddd	xmm4,xmm6
+	pxor	xmm2,xmm4
+	movdqa	xmm3,[ebx-32]
+	movdqa	xmm1,xmm2
+	pslld	xmm2,12
+	psrld	xmm1,20
+	por	xmm2,xmm1
+	movdqa	xmm1,[ebx-112]
+	paddd	xmm0,xmm2
+	movdqa	xmm7,[64+ebx]
+	pxor	xmm6,xmm0
+	movdqa	[ebx-128],xmm0
+	pshufb	xmm6,[16+eax]
+	paddd	xmm4,xmm6
+	movdqa	[112+ebx],xmm6
+	pxor	xmm2,xmm4
+	paddd	xmm1,xmm3
+	movdqa	xmm0,xmm2
+	pslld	xmm2,7
+	psrld	xmm0,25
+	pxor	xmm7,xmm1
+	por	xmm2,xmm0
+	movdqa	[32+ebx],xmm4
+	pshufb	xmm7,[eax]
+	movdqa	[ebx-48],xmm2
+	paddd	xmm5,xmm7
+	movdqa	xmm4,[ebx]
+	pxor	xmm3,xmm5
+	movdqa	xmm2,[ebx-16]
+	movdqa	xmm0,xmm3
+	pslld	xmm3,12
+	psrld	xmm0,20
+	por	xmm3,xmm0
+	movdqa	xmm0,[ebx-96]
+	paddd	xmm1,xmm3
+	movdqa	xmm6,[80+ebx]
+	pxor	xmm7,xmm1
+	movdqa	[ebx-112],xmm1
+	pshufb	xmm7,[16+eax]
+	paddd	xmm5,xmm7
+	movdqa	[64+ebx],xmm7
+	pxor	xmm3,xmm5
+	paddd	xmm0,xmm2
+	movdqa	xmm1,xmm3
+	pslld	xmm3,7
+	psrld	xmm1,25
+	pxor	xmm6,xmm0
+	por	xmm3,xmm1
+	movdqa	[48+ebx],xmm5
+	pshufb	xmm6,[eax]
+	movdqa	[ebx-32],xmm3
+	paddd	xmm4,xmm6
+	movdqa	xmm5,[16+ebx]
+	pxor	xmm2,xmm4
+	movdqa	xmm3,[ebx-64]
+	movdqa	xmm1,xmm2
+	pslld	xmm2,12
+	psrld	xmm1,20
+	por	xmm2,xmm1
+	movdqa	xmm1,[ebx-80]
+	paddd	xmm0,xmm2
+	movdqa	xmm7,[96+ebx]
+	pxor	xmm6,xmm0
+	movdqa	[ebx-96],xmm0
+	pshufb	xmm6,[16+eax]
+	paddd	xmm4,xmm6
+	movdqa	[80+ebx],xmm6
+	pxor	xmm2,xmm4
+	paddd	xmm1,xmm3
+	movdqa	xmm0,xmm2
+	pslld	xmm2,7
+	psrld	xmm0,25
+	pxor	xmm7,xmm1
+	por	xmm2,xmm0
+	pshufb	xmm7,[eax]
+	movdqa	[ebx-16],xmm2
+	paddd	xmm5,xmm7
+	pxor	xmm3,xmm5
+	movdqa	xmm0,xmm3
+	pslld	xmm3,12
+	psrld	xmm0,20
+	por	xmm3,xmm0
+	movdqa	xmm0,[ebx-128]
+	paddd	xmm1,xmm3
+	movdqa	xmm6,[64+ebx]
+	pxor	xmm7,xmm1
+	movdqa	[ebx-80],xmm1
+	pshufb	xmm7,[16+eax]
+	paddd	xmm5,xmm7
+	movdqa	[96+ebx],xmm7
+	pxor	xmm3,xmm5
+	movdqa	xmm1,xmm3
+	pslld	xmm3,7
+	psrld	xmm1,25
+	por	xmm3,xmm1
+	dec	edx
+	jnz	NEAR L$010loop
+	movdqa	[ebx-64],xmm3
+	movdqa	[ebx],xmm4
+	movdqa	[16+ebx],xmm5
+	movdqa	[64+ebx],xmm6
+	movdqa	[96+ebx],xmm7
+	movdqa	xmm1,[ebx-112]
+	movdqa	xmm2,[ebx-96]
+	movdqa	xmm3,[ebx-80]
+	paddd	xmm0,[ebp-128]
+	paddd	xmm1,[ebp-112]
+	paddd	xmm2,[ebp-96]
+	paddd	xmm3,[ebp-80]
+	movdqa	xmm6,xmm0
+	punpckldq	xmm0,xmm1
+	movdqa	xmm7,xmm2
+	punpckldq	xmm2,xmm3
+	punpckhdq	xmm6,xmm1
+	punpckhdq	xmm7,xmm3
+	movdqa	xmm1,xmm0
+	punpcklqdq	xmm0,xmm2
+	movdqa	xmm3,xmm6
+	punpcklqdq	xmm6,xmm7
+	punpckhqdq	xmm1,xmm2
+	punpckhqdq	xmm3,xmm7
+	movdqu	xmm4,[esi-128]
+	movdqu	xmm5,[esi-64]
+	movdqu	xmm2,[esi]
+	movdqu	xmm7,[64+esi]
+	lea	esi,[16+esi]
+	pxor	xmm4,xmm0
+	movdqa	xmm0,[ebx-64]
+	pxor	xmm5,xmm1
+	movdqa	xmm1,[ebx-48]
+	pxor	xmm6,xmm2
+	movdqa	xmm2,[ebx-32]
+	pxor	xmm7,xmm3
+	movdqa	xmm3,[ebx-16]
+	movdqu	[edi-128],xmm4
+	movdqu	[edi-64],xmm5
+	movdqu	[edi],xmm6
+	movdqu	[64+edi],xmm7
+	lea	edi,[16+edi]
+	paddd	xmm0,[ebp-64]
+	paddd	xmm1,[ebp-48]
+	paddd	xmm2,[ebp-32]
+	paddd	xmm3,[ebp-16]
+	movdqa	xmm6,xmm0
+	punpckldq	xmm0,xmm1
+	movdqa	xmm7,xmm2
+	punpckldq	xmm2,xmm3
+	punpckhdq	xmm6,xmm1
+	punpckhdq	xmm7,xmm3
+	movdqa	xmm1,xmm0
+	punpcklqdq	xmm0,xmm2
+	movdqa	xmm3,xmm6
+	punpcklqdq	xmm6,xmm7
+	punpckhqdq	xmm1,xmm2
+	punpckhqdq	xmm3,xmm7
+	movdqu	xmm4,[esi-128]
+	movdqu	xmm5,[esi-64]
+	movdqu	xmm2,[esi]
+	movdqu	xmm7,[64+esi]
+	lea	esi,[16+esi]
+	pxor	xmm4,xmm0
+	movdqa	xmm0,[ebx]
+	pxor	xmm5,xmm1
+	movdqa	xmm1,[16+ebx]
+	pxor	xmm6,xmm2
+	movdqa	xmm2,[32+ebx]
+	pxor	xmm7,xmm3
+	movdqa	xmm3,[48+ebx]
+	movdqu	[edi-128],xmm4
+	movdqu	[edi-64],xmm5
+	movdqu	[edi],xmm6
+	movdqu	[64+edi],xmm7
+	lea	edi,[16+edi]
+	paddd	xmm0,[ebp]
+	paddd	xmm1,[16+ebp]
+	paddd	xmm2,[32+ebp]
+	paddd	xmm3,[48+ebp]
+	movdqa	xmm6,xmm0
+	punpckldq	xmm0,xmm1
+	movdqa	xmm7,xmm2
+	punpckldq	xmm2,xmm3
+	punpckhdq	xmm6,xmm1
+	punpckhdq	xmm7,xmm3
+	movdqa	xmm1,xmm0
+	punpcklqdq	xmm0,xmm2
+	movdqa	xmm3,xmm6
+	punpcklqdq	xmm6,xmm7
+	punpckhqdq	xmm1,xmm2
+	punpckhqdq	xmm3,xmm7
+	movdqu	xmm4,[esi-128]
+	movdqu	xmm5,[esi-64]
+	movdqu	xmm2,[esi]
+	movdqu	xmm7,[64+esi]
+	lea	esi,[16+esi]
+	pxor	xmm4,xmm0
+	movdqa	xmm0,[64+ebx]
+	pxor	xmm5,xmm1
+	movdqa	xmm1,[80+ebx]
+	pxor	xmm6,xmm2
+	movdqa	xmm2,[96+ebx]
+	pxor	xmm7,xmm3
+	movdqa	xmm3,[112+ebx]
+	movdqu	[edi-128],xmm4
+	movdqu	[edi-64],xmm5
+	movdqu	[edi],xmm6
+	movdqu	[64+edi],xmm7
+	lea	edi,[16+edi]
+	paddd	xmm0,[64+ebp]
+	paddd	xmm1,[80+ebp]
+	paddd	xmm2,[96+ebp]
+	paddd	xmm3,[112+ebp]
+	movdqa	xmm6,xmm0
+	punpckldq	xmm0,xmm1
+	movdqa	xmm7,xmm2
+	punpckldq	xmm2,xmm3
+	punpckhdq	xmm6,xmm1
+	punpckhdq	xmm7,xmm3
+	movdqa	xmm1,xmm0
+	punpcklqdq	xmm0,xmm2
+	movdqa	xmm3,xmm6
+	punpcklqdq	xmm6,xmm7
+	punpckhqdq	xmm1,xmm2
+	punpckhqdq	xmm3,xmm7
+	movdqu	xmm4,[esi-128]
+	movdqu	xmm5,[esi-64]
+	movdqu	xmm2,[esi]
+	movdqu	xmm7,[64+esi]
+	lea	esi,[208+esi]
+	pxor	xmm4,xmm0
+	pxor	xmm5,xmm1
+	pxor	xmm6,xmm2
+	pxor	xmm7,xmm3
+	movdqu	[edi-128],xmm4
+	movdqu	[edi-64],xmm5
+	movdqu	[edi],xmm6
+	movdqu	[64+edi],xmm7
+	lea	edi,[208+edi]
+	sub	ecx,256
+	jnc	NEAR L$009outer_loop
+	add	ecx,256
+	jz	NEAR L$011done
+	mov	ebx,DWORD [520+esp]
+	lea	esi,[esi-128]
+	mov	edx,DWORD [516+esp]
+	lea	edi,[edi-128]
+	movd	xmm2,DWORD [64+ebp]
+	movdqu	xmm3,[ebx]
+	paddd	xmm2,[96+eax]
+	pand	xmm3,[112+eax]
+	por	xmm3,xmm2
+L$0081x:
+	movdqa	xmm0,[32+eax]
+	movdqu	xmm1,[edx]
+	movdqu	xmm2,[16+edx]
+	movdqa	xmm6,[eax]
+	movdqa	xmm7,[16+eax]
+	mov	DWORD [48+esp],ebp
+	movdqa	[esp],xmm0
+	movdqa	[16+esp],xmm1
+	movdqa	[32+esp],xmm2
+	movdqa	[48+esp],xmm3
+	mov	edx,10
+	jmp	NEAR L$012loop1x
+align	16
+L$013outer1x:
+	movdqa	xmm3,[80+eax]
+	movdqa	xmm0,[esp]
+	movdqa	xmm1,[16+esp]
+	movdqa	xmm2,[32+esp]
+	paddd	xmm3,[48+esp]
+	mov	edx,10
+	movdqa	[48+esp],xmm3
+	jmp	NEAR L$012loop1x
+align	16
+L$012loop1x:
+	paddd	xmm0,xmm1
+	pxor	xmm3,xmm0
+db	102,15,56,0,222
+	paddd	xmm2,xmm3
+	pxor	xmm1,xmm2
+	movdqa	xmm4,xmm1
+	psrld	xmm1,20
+	pslld	xmm4,12
+	por	xmm1,xmm4
+	paddd	xmm0,xmm1
+	pxor	xmm3,xmm0
+db	102,15,56,0,223
+	paddd	xmm2,xmm3
+	pxor	xmm1,xmm2
+	movdqa	xmm4,xmm1
+	psrld	xmm1,25
+	pslld	xmm4,7
+	por	xmm1,xmm4
+	pshufd	xmm2,xmm2,78
+	pshufd	xmm1,xmm1,57
+	pshufd	xmm3,xmm3,147
+	nop
+	paddd	xmm0,xmm1
+	pxor	xmm3,xmm0
+db	102,15,56,0,222
+	paddd	xmm2,xmm3
+	pxor	xmm1,xmm2
+	movdqa	xmm4,xmm1
+	psrld	xmm1,20
+	pslld	xmm4,12
+	por	xmm1,xmm4
+	paddd	xmm0,xmm1
+	pxor	xmm3,xmm0
+db	102,15,56,0,223
+	paddd	xmm2,xmm3
+	pxor	xmm1,xmm2
+	movdqa	xmm4,xmm1
+	psrld	xmm1,25
+	pslld	xmm4,7
+	por	xmm1,xmm4
+	pshufd	xmm2,xmm2,78
+	pshufd	xmm1,xmm1,147
+	pshufd	xmm3,xmm3,57
+	dec	edx
+	jnz	NEAR L$012loop1x
+	paddd	xmm0,[esp]
+	paddd	xmm1,[16+esp]
+	paddd	xmm2,[32+esp]
+	paddd	xmm3,[48+esp]
+	cmp	ecx,64
+	jb	NEAR L$014tail
+	movdqu	xmm4,[esi]
+	movdqu	xmm5,[16+esi]
+	pxor	xmm0,xmm4
+	movdqu	xmm4,[32+esi]
+	pxor	xmm1,xmm5
+	movdqu	xmm5,[48+esi]
+	pxor	xmm2,xmm4
+	pxor	xmm3,xmm5
+	lea	esi,[64+esi]
+	movdqu	[edi],xmm0
+	movdqu	[16+edi],xmm1
+	movdqu	[32+edi],xmm2
+	movdqu	[48+edi],xmm3
+	lea	edi,[64+edi]
+	sub	ecx,64
+	jnz	NEAR L$013outer1x
+	jmp	NEAR L$011done
+L$014tail:
+	movdqa	[esp],xmm0
+	movdqa	[16+esp],xmm1
+	movdqa	[32+esp],xmm2
+	movdqa	[48+esp],xmm3
+	xor	eax,eax
+	xor	edx,edx
+	xor	ebp,ebp
+L$015tail_loop:
+	mov	al,BYTE [ebp*1+esp]
+	mov	dl,BYTE [ebp*1+esi]
+	lea	ebp,[1+ebp]
+	xor	al,dl
+	mov	BYTE [ebp*1+edi-1],al
+	dec	ecx
+	jnz	NEAR L$015tail_loop
+L$011done:
+	mov	esp,DWORD [512+esp]
+	pop	edi
+	pop	esi
+	pop	ebx
+	pop	ebp
+	ret
+align	64
+L$ssse3_data:
+db	2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
+db	3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
+dd	1634760805,857760878,2036477234,1797285236
+dd	0,1,2,3
+dd	4,4,4,4
+dd	1,0,0,0
+dd	4,0,0,0
+dd	0,-1,-1,-1
+align	64
+db	67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
+db	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
+db	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
+db	114,103,62,0
+segment	.bss
+common	_OPENSSL_ia32cap_P 16
diff --git a/deps/boringssl/win-x86/crypto/fipsmodule/aesni-x86.asm b/deps/boringssl/win-x86/crypto/fipsmodule/aesni-x86.asm
new file mode 100644
index 0000000..9ea1375
--- /dev/null
+++ b/deps/boringssl/win-x86/crypto/fipsmodule/aesni-x86.asm
@@ -0,0 +1,2461 @@
+; This file is generated from a similarly-named Perl script in the BoringSSL
+; source tree. Do not edit by hand.
+
+%ifdef BORINGSSL_PREFIX
+%include "boringssl_prefix_symbols_nasm.inc"
+%endif
+%ifidn __OUTPUT_FORMAT__,obj
+section	code	use32 class=code align=64
+%elifidn __OUTPUT_FORMAT__,win32
+$@feat.00 equ 1
+section	.text	code align=64
+%else
+section	.text	code
+%endif
+;extern	_OPENSSL_ia32cap_P
+%ifdef BORINGSSL_DISPATCH_TEST
+extern	_BORINGSSL_function_hit
+%endif
+global	_aes_hw_encrypt
+align	16
+_aes_hw_encrypt:
+L$_aes_hw_encrypt_begin:
+%ifdef BORINGSSL_DISPATCH_TEST
+	push	ebx
+	push	edx
+	call	L$000pic
+L$000pic:
+	pop	ebx
+	lea	ebx,[(_BORINGSSL_function_hit+1-L$000pic)+ebx]
+	mov	edx,1
+	mov	BYTE [ebx],dl
+	pop	edx
+	pop	ebx
+%endif
+	mov	eax,DWORD [4+esp]
+	mov	edx,DWORD [12+esp]
+	movups	xmm2,[eax]
+	mov	ecx,DWORD [240+edx]
+	mov	eax,DWORD [8+esp]
+	movups	xmm0,[edx]
+	movups	xmm1,[16+edx]
+	lea	edx,[32+edx]
+	xorps	xmm2,xmm0
+L$001enc1_loop_1:
+db	102,15,56,220,209
+	dec	ecx
+	movups	xmm1,[edx]
+	lea	edx,[16+edx]
+	jnz	NEAR L$001enc1_loop_1
+db	102,15,56,221,209
+	pxor	xmm0,xmm0
+	pxor	xmm1,xmm1
+	movups	[eax],xmm2
+	pxor	xmm2,xmm2
+	ret
+global	_aes_hw_decrypt
+align	16
+_aes_hw_decrypt:
+L$_aes_hw_decrypt_begin:
+	mov	eax,DWORD [4+esp]
+	mov	edx,DWORD [12+esp]
+	movups	xmm2,[eax]
+	mov	ecx,DWORD [240+edx]
+	mov	eax,DWORD [8+esp]
+	movups	xmm0,[edx]
+	movups	xmm1,[16+edx]
+	lea	edx,[32+edx]
+	xorps	xmm2,xmm0
+L$002dec1_loop_2:
+db	102,15,56,222,209
+	dec	ecx
+	movups	xmm1,[edx]
+	lea	edx,[16+edx]
+	jnz	NEAR L$002dec1_loop_2
+db	102,15,56,223,209
+	pxor	xmm0,xmm0
+	pxor	xmm1,xmm1
+	movups	[eax],xmm2
+	pxor	xmm2,xmm2
+	ret
+align	16
+__aesni_encrypt2:
+	movups	xmm0,[edx]
+	shl	ecx,4
+	movups	xmm1,[16+edx]
+	xorps	xmm2,xmm0
+	pxor	xmm3,xmm0
+	movups	xmm0,[32+edx]
+	lea	edx,[32+ecx*1+edx]
+	neg	ecx
+	add	ecx,16
+L$003enc2_loop:
+db	102,15,56,220,209
+db	102,15,56,220,217
+	movups	xmm1,[ecx*1+edx]
+	add	ecx,32
+db	102,15,56,220,208
+db	102,15,56,220,216
+	movups	xmm0,[ecx*1+edx-16]
+	jnz	NEAR L$003enc2_loop
+db	102,15,56,220,209
+db	102,15,56,220,217
+db	102,15,56,221,208
+db	102,15,56,221,216
+	ret
+align	16
+__aesni_decrypt2:
+	movups	xmm0,[edx]
+	shl	ecx,4
+	movups	xmm1,[16+edx]
+	xorps	xmm2,xmm0
+	pxor	xmm3,xmm0
+	movups	xmm0,[32+edx]
+	lea	edx,[32+ecx*1+edx]
+	neg	ecx
+	add	ecx,16
+L$004dec2_loop:
+db	102,15,56,222,209
+db	102,15,56,222,217
+	movups	xmm1,[ecx*1+edx]
+	add	ecx,32
+db	102,15,56,222,208
+db	102,15,56,222,216
+	movups	xmm0,[ecx*1+edx-16]
+	jnz	NEAR L$004dec2_loop
+db	102,15,56,222,209
+db	102,15,56,222,217
+db	102,15,56,223,208
+db	102,15,56,223,216
+	ret
+align	16
+__aesni_encrypt3:
+	movups	xmm0,[edx]
+	shl	ecx,4
+	movups	xmm1,[16+edx]
+	xorps	xmm2,xmm0
+	pxor	xmm3,xmm0
+	pxor	xmm4,xmm0
+	movups	xmm0,[32+edx]
+	lea	edx,[32+ecx*1+edx]
+	neg	ecx
+	add	ecx,16
+L$005enc3_loop:
+db	102,15,56,220,209
+db	102,15,56,220,217
+db	102,15,56,220,225
+	movups	xmm1,[ecx*1+edx]
+	add	ecx,32
+db	102,15,56,220,208
+db	102,15,56,220,216
+db	102,15,56,220,224
+	movups	xmm0,[ecx*1+edx-16]
+	jnz	NEAR L$005enc3_loop
+db	102,15,56,220,209
+db	102,15,56,220,217
+db	102,15,56,220,225
+db	102,15,56,221,208
+db	102,15,56,221,216
+db	102,15,56,221,224
+	ret
+align	16
+__aesni_decrypt3:
+	movups	xmm0,[edx]
+	shl	ecx,4
+	movups	xmm1,[16+edx]
+	xorps	xmm2,xmm0
+	pxor	xmm3,xmm0
+	pxor	xmm4,xmm0
+	movups	xmm0,[32+edx]
+	lea	edx,[32+ecx*1+edx]
+	neg	ecx
+	add	ecx,16
+L$006dec3_loop:
+db	102,15,56,222,209
+db	102,15,56,222,217
+db	102,15,56,222,225
+	movups	xmm1,[ecx*1+edx]
+	add	ecx,32
+db	102,15,56,222,208
+db	102,15,56,222,216
+db	102,15,56,222,224
+	movups	xmm0,[ecx*1+edx-16]
+	jnz	NEAR L$006dec3_loop
+db	102,15,56,222,209
+db	102,15,56,222,217
+db	102,15,56,222,225
+db	102,15,56,223,208
+db	102,15,56,223,216
+db	102,15,56,223,224
+	ret
+align	16
+__aesni_encrypt4:
+	movups	xmm0,[edx]
+	movups	xmm1,[16+edx]
+	shl	ecx,4
+	xorps	xmm2,xmm0
+	pxor	xmm3,xmm0
+	pxor	xmm4,xmm0
+	pxor	xmm5,xmm0
+	movups	xmm0,[32+edx]
+	lea	edx,[32+ecx*1+edx]
+	neg	ecx
+db	15,31,64,0
+	add	ecx,16
+L$007enc4_loop:
+db	102,15,56,220,209
+db	102,15,56,220,217
+db	102,15,56,220,225
+db	102,15,56,220,233
+	movups	xmm1,[ecx*1+edx]
+	add	ecx,32
+db	102,15,56,220,208
+db	102,15,56,220,216
+db	102,15,56,220,224
+db	102,15,56,220,232
+	movups	xmm0,[ecx*1+edx-16]
+	jnz	NEAR L$007enc4_loop
+db	102,15,56,220,209
+db	102,15,56,220,217
+db	102,15,56,220,225
+db	102,15,56,220,233
+db	102,15,56,221,208
+db	102,15,56,221,216
+db	102,15,56,221,224
+db	102,15,56,221,232
+	ret
+align	16
+__aesni_decrypt4:
+	movups	xmm0,[edx]
+	movups	xmm1,[16+edx]
+	shl	ecx,4
+	xorps	xmm2,xmm0
+	pxor	xmm3,xmm0
+	pxor	xmm4,xmm0
+	pxor	xmm5,xmm0
+	movups	xmm0,[32+edx]
+	lea	edx,[32+ecx*1+edx]
+	neg	ecx
+db	15,31,64,0
+	add	ecx,16
+L$008dec4_loop:
+db	102,15,56,222,209
+db	102,15,56,222,217
+db	102,15,56,222,225
+db	102,15,56,222,233
+	movups	xmm1,[ecx*1+edx]
+	add	ecx,32
+db	102,15,56,222,208
+db	102,15,56,222,216
+db	102,15,56,222,224
+db	102,15,56,222,232
+	movups	xmm0,[ecx*1+edx-16]
+	jnz	NEAR L$008dec4_loop
+db	102,15,56,222,209
+db	102,15,56,222,217
+db	102,15,56,222,225
+db	102,15,56,222,233
+db	102,15,56,223,208
+db	102,15,56,223,216
+db	102,15,56,223,224
+db	102,15,56,223,232
+	ret
+align	16
+__aesni_encrypt6:
+	movups	xmm0,[edx]
+	shl	ecx,4
+	movups	xmm1,[16+edx]
+	xorps	xmm2,xmm0
+	pxor	xmm3,xmm0
+	pxor	xmm4,xmm0
+db	102,15,56,220,209
+	pxor	xmm5,xmm0
+	pxor	xmm6,xmm0
+db	102,15,56,220,217
+	lea	edx,[32+ecx*1+edx]
+	neg	ecx
+db	102,15,56,220,225
+	pxor	xmm7,xmm0
+	movups	xmm0,[ecx*1+edx]
+	add	ecx,16
+	jmp	NEAR L$009_aesni_encrypt6_inner
+align	16
+L$010enc6_loop:
+db	102,15,56,220,209
+db	102,15,56,220,217
+db	102,15,56,220,225
+L$009_aesni_encrypt6_inner:
+db	102,15,56,220,233
+db	102,15,56,220,241
+db	102,15,56,220,249
+L$_aesni_encrypt6_enter:
+	movups	xmm1,[ecx*1+edx]
+	add	ecx,32
+db	102,15,56,220,208
+db	102,15,56,220,216
+db	102,15,56,220,224
+db	102,15,56,220,232
+db	102,15,56,220,240
+db	102,15,56,220,248
+	movups	xmm0,[ecx*1+edx-16]
+	jnz	NEAR L$010enc6_loop
+db	102,15,56,220,209
+db	102,15,56,220,217
+db	102,15,56,220,225
+db	102,15,56,220,233
+db	102,15,56,220,241
+db	102,15,56,220,249
+db	102,15,56,221,208
+db	102,15,56,221,216
+db	102,15,56,221,224
+db	102,15,56,221,232
+db	102,15,56,221,240
+db	102,15,56,221,248
+	ret
+align	16
+__aesni_decrypt6:
+	movups	xmm0,[edx]
+	shl	ecx,4
+	movups	xmm1,[16+edx]
+	xorps	xmm2,xmm0
+	pxor	xmm3,xmm0
+	pxor	xmm4,xmm0
+db	102,15,56,222,209
+	pxor	xmm5,xmm0
+	pxor	xmm6,xmm0
+db	102,15,56,222,217
+	lea	edx,[32+ecx*1+edx]
+	neg	ecx
+db	102,15,56,222,225
+	pxor	xmm7,xmm0
+	movups	xmm0,[ecx*1+edx]
+	add	ecx,16
+	jmp	NEAR L$011_aesni_decrypt6_inner
+align	16
+L$012dec6_loop:
+db	102,15,56,222,209
+db	102,15,56,222,217
+db	102,15,56,222,225
+L$011_aesni_decrypt6_inner:
+db	102,15,56,222,233
+db	102,15,56,222,241
+db	102,15,56,222,249
+L$_aesni_decrypt6_enter:
+	movups	xmm1,[ecx*1+edx]
+	add	ecx,32
+db	102,15,56,222,208
+db	102,15,56,222,216
+db	102,15,56,222,224
+db	102,15,56,222,232
+db	102,15,56,222,240
+db	102,15,56,222,248
+	movups	xmm0,[ecx*1+edx-16]
+	jnz	NEAR L$012dec6_loop
+db	102,15,56,222,209
+db	102,15,56,222,217
+db	102,15,56,222,225
+db	102,15,56,222,233
+db	102,15,56,222,241
+db	102,15,56,222,249
+db	102,15,56,223,208
+db	102,15,56,223,216
+db	102,15,56,223,224
+db	102,15,56,223,232
+db	102,15,56,223,240
+db	102,15,56,223,248
+	ret
+global	_aes_hw_ecb_encrypt
+align	16
+_aes_hw_ecb_encrypt:
+L$_aes_hw_ecb_encrypt_begin:
+	push	ebp
+	push	ebx
+	push	esi
+	push	edi
+	mov	esi,DWORD [20+esp]
+	mov	edi,DWORD [24+esp]
+	mov	eax,DWORD [28+esp]
+	mov	edx,DWORD [32+esp]
+	mov	ebx,DWORD [36+esp]
+	and	eax,-16
+	jz	NEAR L$013ecb_ret
+	mov	ecx,DWORD [240+edx]
+	test	ebx,ebx
+	jz	NEAR L$014ecb_decrypt
+	mov	ebp,edx
+	mov	ebx,ecx
+	cmp	eax,96
+	jb	NEAR L$015ecb_enc_tail
+	movdqu	xmm2,[esi]
+	movdqu	xmm3,[16+esi]
+	movdqu	xmm4,[32+esi]
+	movdqu	xmm5,[48+esi]
+	movdqu	xmm6,[64+esi]
+	movdqu	xmm7,[80+esi]
+	lea	esi,[96+esi]
+	sub	eax,96
+	jmp	NEAR L$016ecb_enc_loop6_enter
+align	16
+L$017ecb_enc_loop6:
+	movups	[edi],xmm2
+	movdqu	xmm2,[esi]
+	movups	[16+edi],xmm3
+	movdqu	xmm3,[16+esi]
+	movups	[32+edi],xmm4
+	movdqu	xmm4,[32+esi]
+	movups	[48+edi],xmm5
+	movdqu	xmm5,[48+esi]
+	movups	[64+edi],xmm6
+	movdqu	xmm6,[64+esi]
+	movups	[80+edi],xmm7
+	lea	edi,[96+edi]
+	movdqu	xmm7,[80+esi]
+	lea	esi,[96+esi]
+L$016ecb_enc_loop6_enter:
+	call	__aesni_encrypt6
+	mov	edx,ebp
+	mov	ecx,ebx
+	sub	eax,96
+	jnc	NEAR L$017ecb_enc_loop6
+	movups	[edi],xmm2
+	movups	[16+edi],xmm3
+	movups	[32+edi],xmm4
+	movups	[48+edi],xmm5
+	movups	[64+edi],xmm6
+	movups	[80+edi],xmm7
+	lea	edi,[96+edi]
+	add	eax,96
+	jz	NEAR L$013ecb_ret
+L$015ecb_enc_tail:
+	movups	xmm2,[esi]
+	cmp	eax,32
+	jb	NEAR L$018ecb_enc_one
+	movups	xmm3,[16+esi]
+	je	NEAR L$019ecb_enc_two
+	movups	xmm4,[32+esi]
+	cmp	eax,64
+	jb	NEAR L$020ecb_enc_three
+	movups	xmm5,[48+esi]
+	je	NEAR L$021ecb_enc_four
+	movups	xmm6,[64+esi]
+	xorps	xmm7,xmm7
+	call	__aesni_encrypt6
+	movups	[edi],xmm2
+	movups	[16+edi],xmm3
+	movups	[32+edi],xmm4
+	movups	[48+edi],xmm5
+	movups	[64+edi],xmm6
+	jmp	NEAR L$013ecb_ret
+align	16
+L$018ecb_enc_one:
+	movups	xmm0,[edx]
+	movups	xmm1,[16+edx]
+	lea	edx,[32+edx]
+	xorps	xmm2,xmm0
+L$022enc1_loop_3:
+db	102,15,56,220,209
+	dec	ecx
+	movups	xmm1,[edx]
+	lea	edx,[16+edx]
+	jnz	NEAR L$022enc1_loop_3
+db	102,15,56,221,209
+	movups	[edi],xmm2
+	jmp	NEAR L$013ecb_ret
+align	16
+L$019ecb_enc_two:
+	call	__aesni_encrypt2
+	movups	[edi],xmm2
+	movups	[16+edi],xmm3
+	jmp	NEAR L$013ecb_ret
+align	16
+L$020ecb_enc_three:
+	call	__aesni_encrypt3
+	movups	[edi],xmm2
+	movups	[16+edi],xmm3
+	movups	[32+edi],xmm4
+	jmp	NEAR L$013ecb_ret
+align	16
+L$021ecb_enc_four:
+	call	__aesni_encrypt4
+	movups	[edi],xmm2
+	movups	[16+edi],xmm3
+	movups	[32+edi],xmm4
+	movups	[48+edi],xmm5
+	jmp	NEAR L$013ecb_ret
+align	16
+L$014ecb_decrypt:
+	mov	ebp,edx
+	mov	ebx,ecx
+	cmp	eax,96
+	jb	NEAR L$023ecb_dec_tail
+	movdqu	xmm2,[esi]
+	movdqu	xmm3,[16+esi]
+	movdqu	xmm4,[32+esi]
+	movdqu	xmm5,[48+esi]
+	movdqu	xmm6,[64+esi]
+	movdqu	xmm7,[80+esi]
+	lea	esi,[96+esi]
+	sub	eax,96
+	jmp	NEAR L$024ecb_dec_loop6_enter
+align	16
+L$025ecb_dec_loop6:
+	movups	[edi],xmm2
+	movdqu	xmm2,[esi]
+	movups	[16+edi],xmm3
+	movdqu	xmm3,[16+esi]
+	movups	[32+edi],xmm4
+	movdqu	xmm4,[32+esi]
+	movups	[48+edi],xmm5
+	movdqu	xmm5,[48+esi]
+	movups	[64+edi],xmm6
+	movdqu	xmm6,[64+esi]
+	movups	[80+edi],xmm7
+	lea	edi,[96+edi]
+	movdqu	xmm7,[80+esi]
+	lea	esi,[96+esi]
+L$024ecb_dec_loop6_enter:
+	call	__aesni_decrypt6
+	mov	edx,ebp
+	mov	ecx,ebx
+	sub	eax,96
+	jnc	NEAR L$025ecb_dec_loop6
+	movups	[edi],xmm2
+	movups	[16+edi],xmm3
+	movups	[32+edi],xmm4
+	movups	[48+edi],xmm5
+	movups	[64+edi],xmm6
+	movups	[80+edi],xmm7
+	lea	edi,[96+edi]
+	add	eax,96
+	jz	NEAR L$013ecb_ret
+L$023ecb_dec_tail:
+	movups	xmm2,[esi]
+	cmp	eax,32
+	jb	NEAR L$026ecb_dec_one
+	movups	xmm3,[16+esi]
+	je	NEAR L$027ecb_dec_two
+	movups	xmm4,[32+esi]
+	cmp	eax,64
+	jb	NEAR L$028ecb_dec_three
+	movups	xmm5,[48+esi]
+	je	NEAR L$029ecb_dec_four
+	movups	xmm6,[64+esi]
+	xorps	xmm7,xmm7
+	call	__aesni_decrypt6
+	movups	[edi],xmm2
+	movups	[16+edi],xmm3
+	movups	[32+edi],xmm4
+	movups	[48+edi],xmm5
+	movups	[64+edi],xmm6
+	jmp	NEAR L$013ecb_ret
+align	16
+L$026ecb_dec_one:
+	movups	xmm0,[edx]
+	movups	xmm1,[16+edx]
+	lea	edx,[32+edx]
+	xorps	xmm2,xmm0
+L$030dec1_loop_4:
+db	102,15,56,222,209
+	dec	ecx
+	movups	xmm1,[edx]
+	lea	edx,[16+edx]
+	jnz	NEAR L$030dec1_loop_4
+db	102,15,56,223,209
+	movups	[edi],xmm2
+	jmp	NEAR L$013ecb_ret
+align	16
+L$027ecb_dec_two:
+	call	__aesni_decrypt2
+	movups	[edi],xmm2
+	movups	[16+edi],xmm3
+	jmp	NEAR L$013ecb_ret
+align	16
+L$028ecb_dec_three:
+	call	__aesni_decrypt3
+	movups	[edi],xmm2
+	movups	[16+edi],xmm3
+	movups	[32+edi],xmm4
+	jmp	NEAR L$013ecb_ret
+align	16
+L$029ecb_dec_four:
+	call	__aesni_decrypt4
+	movups	[edi],xmm2
+	movups	[16+edi],xmm3
+	movups	[32+edi],xmm4
+	movups	[48+edi],xmm5
+L$013ecb_ret:
+	pxor	xmm0,xmm0
+	pxor	xmm1,xmm1
+	pxor	xmm2,xmm2
+	pxor	xmm3,xmm3
+	pxor	xmm4,xmm4
+	pxor	xmm5,xmm5
+	pxor	xmm6,xmm6
+	pxor	xmm7,xmm7
+	pop	edi
+	pop	esi
+	pop	ebx
+	pop	ebp
+	ret
+global	_aes_hw_ccm64_encrypt_blocks
+align	16
+_aes_hw_ccm64_encrypt_blocks:
+L$_aes_hw_ccm64_encrypt_blocks_begin:
+	push	ebp
+	push	ebx
+	push	esi
+	push	edi
+	mov	esi,DWORD [20+esp]
+	mov	edi,DWORD [24+esp]
+	mov	eax,DWORD [28+esp]
+	mov	edx,DWORD [32+esp]
+	mov	ebx,DWORD [36+esp]
+	mov	ecx,DWORD [40+esp]
+	mov	ebp,esp
+	sub	esp,60
+	and	esp,-16
+	mov	DWORD [48+esp],ebp
+	movdqu	xmm7,[ebx]
+	movdqu	xmm3,[ecx]
+	mov	ecx,DWORD [240+edx]
+	mov	DWORD [esp],202182159
+	mov	DWORD [4+esp],134810123
+	mov	DWORD [8+esp],67438087
+	mov	DWORD [12+esp],66051
+	mov	ebx,1
+	xor	ebp,ebp
+	mov	DWORD [16+esp],ebx
+	mov	DWORD [20+esp],ebp
+	mov	DWORD [24+esp],ebp
+	mov	DWORD [28+esp],ebp
+	shl	ecx,4
+	mov	ebx,16
+	lea	ebp,[edx]
+	movdqa	xmm5,[esp]
+	movdqa	xmm2,xmm7
+	lea	edx,[32+ecx*1+edx]
+	sub	ebx,ecx
+db	102,15,56,0,253
+L$031ccm64_enc_outer:
+	movups	xmm0,[ebp]
+	mov	ecx,ebx
+	movups	xmm6,[esi]
+	xorps	xmm2,xmm0
+	movups	xmm1,[16+ebp]
+	xorps	xmm0,xmm6
+	xorps	xmm3,xmm0
+	movups	xmm0,[32+ebp]
+L$032ccm64_enc2_loop:
+db	102,15,56,220,209
+db	102,15,56,220,217
+	movups	xmm1,[ecx*1+edx]
+	add	ecx,32
+db	102,15,56,220,208
+db	102,15,56,220,216
+	movups	xmm0,[ecx*1+edx-16]
+	jnz	NEAR L$032ccm64_enc2_loop
+db	102,15,56,220,209
+db	102,15,56,220,217
+	paddq	xmm7,[16+esp]
+	dec	eax
+db	102,15,56,221,208
+db	102,15,56,221,216
+	lea	esi,[16+esi]
+	xorps	xmm6,xmm2
+	movdqa	xmm2,xmm7
+	movups	[edi],xmm6
+db	102,15,56,0,213
+	lea	edi,[16+edi]
+	jnz	NEAR L$031ccm64_enc_outer
+	mov	esp,DWORD [48+esp]
+	mov	edi,DWORD [40+esp]
+	movups	[edi],xmm3
+	pxor	xmm0,xmm0
+	pxor	xmm1,xmm1
+	pxor	xmm2,xmm2
+	pxor	xmm3,xmm3
+	pxor	xmm4,xmm4
+	pxor	xmm5,xmm5
+	pxor	xmm6,xmm6
+	pxor	xmm7,xmm7
+	pop	edi
+	pop	esi
+	pop	ebx
+	pop	ebp
+	ret
+global	_aes_hw_ccm64_decrypt_blocks
+align	16
+_aes_hw_ccm64_decrypt_blocks:
+L$_aes_hw_ccm64_decrypt_blocks_begin:
+	push	ebp
+	push	ebx
+	push	esi
+	push	edi
+	mov	esi,DWORD [20+esp]
+	mov	edi,DWORD [24+esp]
+	mov	eax,DWORD [28+esp]
+	mov	edx,DWORD [32+esp]
+	mov	ebx,DWORD [36+esp]
+	mov	ecx,DWORD [40+esp]
+	mov	ebp,esp
+	sub	esp,60
+	and	esp,-16
+	mov	DWORD [48+esp],ebp
+	movdqu	xmm7,[ebx]
+	movdqu	xmm3,[ecx]
+	mov	ecx,DWORD [240+edx]
+	mov	DWORD [esp],202182159
+	mov	DWORD [4+esp],134810123
+	mov	DWORD [8+esp],67438087
+	mov	DWORD [12+esp],66051
+	mov	ebx,1
+	xor	ebp,ebp
+	mov	DWORD [16+esp],ebx
+	mov	DWORD [20+esp],ebp
+	mov	DWORD [24+esp],ebp
+	mov	DWORD [28+esp],ebp
+	movdqa	xmm5,[esp]
+	movdqa	xmm2,xmm7
+	mov	ebp,edx
+	mov	ebx,ecx
+db	102,15,56,0,253
+	movups	xmm0,[edx]
+	movups	xmm1,[16+edx]
+	lea	edx,[32+edx]
+	xorps	xmm2,xmm0
+L$033enc1_loop_5:
+db	102,15,56,220,209
+	dec	ecx
+	movups	xmm1,[edx]
+	lea	edx,[16+edx]
+	jnz	NEAR L$033enc1_loop_5
+db	102,15,56,221,209
+	shl	ebx,4
+	mov	ecx,16
+	movups	xmm6,[esi]
+	paddq	xmm7,[16+esp]
+	lea	esi,[16+esi]
+	sub	ecx,ebx
+	lea	edx,[32+ebx*1+ebp]
+	mov	ebx,ecx
+	jmp	NEAR L$034ccm64_dec_outer
+align	16
+L$034ccm64_dec_outer:
+	xorps	xmm6,xmm2
+	movdqa	xmm2,xmm7
+	movups	[edi],xmm6
+	lea	edi,[16+edi]
+db	102,15,56,0,213
+	sub	eax,1
+	jz	NEAR L$035ccm64_dec_break
+	movups	xmm0,[ebp]
+	mov	ecx,ebx
+	movups	xmm1,[16+ebp]
+	xorps	xmm6,xmm0
+	xorps	xmm2,xmm0
+	xorps	xmm3,xmm6
+	movups	xmm0,[32+ebp]
+L$036ccm64_dec2_loop:
+db	102,15,56,220,209
+db	102,15,56,220,217
+	movups	xmm1,[ecx*1+edx]
+	add	ecx,32
+db	102,15,56,220,208
+db	102,15,56,220,216
+	movups	xmm0,[ecx*1+edx-16]
+	jnz	NEAR L$036ccm64_dec2_loop
+	movups	xmm6,[esi]
+	paddq	xmm7,[16+esp]
+db	102,15,56,220,209
+db	102,15,56,220,217
+db	102,15,56,221,208
+db	102,15,56,221,216
+	lea	esi,[16+esi]
+	jmp	NEAR L$034ccm64_dec_outer
+align	16
+L$035ccm64_dec_break:
+	mov	ecx,DWORD [240+ebp]
+	mov	edx,ebp
+	movups	xmm0,[edx]
+	movups	xmm1,[16+edx]
+	xorps	xmm6,xmm0
+	lea	edx,[32+edx]
+	xorps	xmm3,xmm6
+L$037enc1_loop_6:
+db	102,15,56,220,217
+	dec	ecx
+	movups	xmm1,[edx]
+	lea	edx,[16+edx]
+	jnz	NEAR L$037enc1_loop_6
+db	102,15,56,221,217
+	mov	esp,DWORD [48+esp]
+	mov	edi,DWORD [40+esp]
+	movups	[edi],xmm3
+	pxor	xmm0,xmm0
+	pxor	xmm1,xmm1
+	pxor	xmm2,xmm2
+	pxor	xmm3,xmm3
+	pxor	xmm4,xmm4
+	pxor	xmm5,xmm5
+	pxor	xmm6,xmm6
+	pxor	xmm7,xmm7
+	pop	edi
+	pop	esi
+	pop	ebx
+	pop	ebp
+	ret
+global	_aes_hw_ctr32_encrypt_blocks
+align	16
+_aes_hw_ctr32_encrypt_blocks:
+L$_aes_hw_ctr32_encrypt_blocks_begin:
+	push	ebp
+	push	ebx
+	push	esi
+	push	edi
+%ifdef BORINGSSL_DISPATCH_TEST
+	push	ebx
+	push	edx
+	call	L$038pic
+L$038pic:
+	pop	ebx
+	lea	ebx,[(_BORINGSSL_function_hit+0-L$038pic)+ebx]
+	mov	edx,1
+	mov	BYTE [ebx],dl
+	pop	edx
+	pop	ebx
+%endif
+	mov	esi,DWORD [20+esp]
+	mov	edi,DWORD [24+esp]
+	mov	eax,DWORD [28+esp]
+	mov	edx,DWORD [32+esp]
+	mov	ebx,DWORD [36+esp]
+	mov	ebp,esp
+	sub	esp,88
+	and	esp,-16
+	mov	DWORD [80+esp],ebp
+	cmp	eax,1
+	je	NEAR L$039ctr32_one_shortcut
+	movdqu	xmm7,[ebx]
+	mov	DWORD [esp],202182159
+	mov	DWORD [4+esp],134810123
+	mov	DWORD [8+esp],67438087
+	mov	DWORD [12+esp],66051
+	mov	ecx,6
+	xor	ebp,ebp
+	mov	DWORD [16+esp],ecx
+	mov	DWORD [20+esp],ecx
+	mov	DWORD [24+esp],ecx
+	mov	DWORD [28+esp],ebp
+db	102,15,58,22,251,3
+db	102,15,58,34,253,3
+	mov	ecx,DWORD [240+edx]
+	bswap	ebx
+	pxor	xmm0,xmm0
+	pxor	xmm1,xmm1
+	movdqa	xmm2,[esp]
+db	102,15,58,34,195,0
+	lea	ebp,[3+ebx]
+db	102,15,58,34,205,0
+	inc	ebx
+db	102,15,58,34,195,1
+	inc	ebp
+db	102,15,58,34,205,1
+	inc	ebx
+db	102,15,58,34,195,2
+	inc	ebp
+db	102,15,58,34,205,2
+	movdqa	[48+esp],xmm0
+db	102,15,56,0,194
+	movdqu	xmm6,[edx]
+	movdqa	[64+esp],xmm1
+db	102,15,56,0,202
+	pshufd	xmm2,xmm0,192
+	pshufd	xmm3,xmm0,128
+	cmp	eax,6
+	jb	NEAR L$040ctr32_tail
+	pxor	xmm7,xmm6
+	shl	ecx,4
+	mov	ebx,16
+	movdqa	[32+esp],xmm7
+	mov	ebp,edx
+	sub	ebx,ecx
+	lea	edx,[32+ecx*1+edx]
+	sub	eax,6
+	jmp	NEAR L$041ctr32_loop6
+align	16
+L$041ctr32_loop6:
+	pshufd	xmm4,xmm0,64
+	movdqa	xmm0,[32+esp]
+	pshufd	xmm5,xmm1,192
+	pxor	xmm2,xmm0
+	pshufd	xmm6,xmm1,128
+	pxor	xmm3,xmm0
+	pshufd	xmm7,xmm1,64
+	movups	xmm1,[16+ebp]
+	pxor	xmm4,xmm0
+	pxor	xmm5,xmm0
+db	102,15,56,220,209
+	pxor	xmm6,xmm0
+	pxor	xmm7,xmm0
+db	102,15,56,220,217
+	movups	xmm0,[32+ebp]
+	mov	ecx,ebx
+db	102,15,56,220,225
+db	102,15,56,220,233
+db	102,15,56,220,241
+db	102,15,56,220,249
+	call	L$_aesni_encrypt6_enter
+	movups	xmm1,[esi]
+	movups	xmm0,[16+esi]
+	xorps	xmm2,xmm1
+	movups	xmm1,[32+esi]
+	xorps	xmm3,xmm0
+	movups	[edi],xmm2
+	movdqa	xmm0,[16+esp]
+	xorps	xmm4,xmm1
+	movdqa	xmm1,[64+esp]
+	movups	[16+edi],xmm3
+	movups	[32+edi],xmm4
+	paddd	xmm1,xmm0
+	paddd	xmm0,[48+esp]
+	movdqa	xmm2,[esp]
+	movups	xmm3,[48+esi]
+	movups	xmm4,[64+esi]
+	xorps	xmm5,xmm3
+	movups	xmm3,[80+esi]
+	lea	esi,[96+esi]
+	movdqa	[48+esp],xmm0
+db	102,15,56,0,194
+	xorps	xmm6,xmm4
+	movups	[48+edi],xmm5
+	xorps	xmm7,xmm3
+	movdqa	[64+esp],xmm1
+db	102,15,56,0,202
+	movups	[64+edi],xmm6
+	pshufd	xmm2,xmm0,192
+	movups	[80+edi],xmm7
+	lea	edi,[96+edi]
+	pshufd	xmm3,xmm0,128
+	sub	eax,6
+	jnc	NEAR L$041ctr32_loop6
+	add	eax,6
+	jz	NEAR L$042ctr32_ret
+	movdqu	xmm7,[ebp]
+	mov	edx,ebp
+	pxor	xmm7,[32+esp]
+	mov	ecx,DWORD [240+ebp]
+L$040ctr32_tail:
+	por	xmm2,xmm7
+	cmp	eax,2
+	jb	NEAR L$043ctr32_one
+	pshufd	xmm4,xmm0,64
+	por	xmm3,xmm7
+	je	NEAR L$044ctr32_two
+	pshufd	xmm5,xmm1,192
+	por	xmm4,xmm7
+	cmp	eax,4
+	jb	NEAR L$045ctr32_three
+	pshufd	xmm6,xmm1,128
+	por	xmm5,xmm7
+	je	NEAR L$046ctr32_four
+	por	xmm6,xmm7
+	call	__aesni_encrypt6
+	movups	xmm1,[esi]
+	movups	xmm0,[16+esi]
+	xorps	xmm2,xmm1
+	movups	xmm1,[32+esi]
+	xorps	xmm3,xmm0
+	movups	xmm0,[48+esi]
+	xorps	xmm4,xmm1
+	movups	xmm1,[64+esi]
+	xorps	xmm5,xmm0
+	movups	[edi],xmm2
+	xorps	xmm6,xmm1
+	movups	[16+edi],xmm3
+	movups	[32+edi],xmm4
+	movups	[48+edi],xmm5
+	movups	[64+edi],xmm6
+	jmp	NEAR L$042ctr32_ret
+align	16
+L$039ctr32_one_shortcut:
+	movups	xmm2,[ebx]
+	mov	ecx,DWORD [240+edx]
+L$043ctr32_one:
+	movups	xmm0,[edx]
+	movups	xmm1,[16+edx]
+	lea	edx,[32+edx]
+	xorps	xmm2,xmm0
+L$047enc1_loop_7:
+db	102,15,56,220,209
+	dec	ecx
+	movups	xmm1,[edx]
+	lea	edx,[16+edx]
+	jnz	NEAR L$047enc1_loop_7
+db	102,15,56,221,209
+	movups	xmm6,[esi]
+	xorps	xmm6,xmm2
+	movups	[edi],xmm6
+	jmp	NEAR L$042ctr32_ret
+align	16
+L$044ctr32_two:
+	call	__aesni_encrypt2
+	movups	xmm5,[esi]
+	movups	xmm6,[16+esi]
+	xorps	xmm2,xmm5
+	xorps	xmm3,xmm6
+	movups	[edi],xmm2
+	movups	[16+edi],xmm3
+	jmp	NEAR L$042ctr32_ret
+align	16
+L$045ctr32_three:
+	call	__aesni_encrypt3
+	movups	xmm5,[esi]
+	movups	xmm6,[16+esi]
+	xorps	xmm2,xmm5
+	movups	xmm7,[32+esi]
+	xorps	xmm3,xmm6
+	movups	[edi],xmm2
+	xorps	xmm4,xmm7
+	movups	[16+edi],xmm3
+	movups	[32+edi],xmm4
+	jmp	NEAR L$042ctr32_ret
+align	16
+L$046ctr32_four:
+	call	__aesni_encrypt4
+	movups	xmm6,[esi]
+	movups	xmm7,[16+esi]
+	movups	xmm1,[32+esi]
+	xorps	xmm2,xmm6
+	movups	xmm0,[48+esi]
+	xorps	xmm3,xmm7
+	movups	[edi],xmm2
+	xorps	xmm4,xmm1
+	movups	[16+edi],xmm3
+	xorps	xmm5,xmm0
+	movups	[32+edi],xmm4
+	movups	[48+edi],xmm5
+L$042ctr32_ret:
+	pxor	xmm0,xmm0
+	pxor	xmm1,xmm1
+	pxor	xmm2,xmm2
+	pxor	xmm3,xmm3
+	pxor	xmm4,xmm4
+	movdqa	[32+esp],xmm0
+	pxor	xmm5,xmm5
+	movdqa	[48+esp],xmm0
+	pxor	xmm6,xmm6
+	movdqa	[64+esp],xmm0
+	pxor	xmm7,xmm7
+	mov	esp,DWORD [80+esp]
+	pop	edi
+	pop	esi
+	pop	ebx
+	pop	ebp
+	ret
+global	_aes_hw_xts_encrypt
+align	16
+_aes_hw_xts_encrypt:
+L$_aes_hw_xts_encrypt_begin:
+	push	ebp
+	push	ebx
+	push	esi
+	push	edi
+	mov	edx,DWORD [36+esp]
+	mov	esi,DWORD [40+esp]
+	mov	ecx,DWORD [240+edx]
+	movups	xmm2,[esi]
+	movups	xmm0,[edx]
+	movups	xmm1,[16+edx]
+	lea	edx,[32+edx]
+	xorps	xmm2,xmm0
+L$048enc1_loop_8:
+db	102,15,56,220,209
+	dec	ecx
+	movups	xmm1,[edx]
+	lea	edx,[16+edx]
+	jnz	NEAR L$048enc1_loop_8
+db	102,15,56,221,209
+	mov	esi,DWORD [20+esp]
+	mov	edi,DWORD [24+esp]
+	mov	eax,DWORD [28+esp]
+	mov	edx,DWORD [32+esp]
+	mov	ebp,esp
+	sub	esp,120
+	mov	ecx,DWORD [240+edx]
+	and	esp,-16
+	mov	DWORD [96+esp],135
+	mov	DWORD [100+esp],0
+	mov	DWORD [104+esp],1
+	mov	DWORD [108+esp],0
+	mov	DWORD [112+esp],eax
+	mov	DWORD [116+esp],ebp
+	movdqa	xmm1,xmm2
+	pxor	xmm0,xmm0
+	movdqa	xmm3,[96+esp]
+	pcmpgtd	xmm0,xmm1
+	and	eax,-16
+	mov	ebp,edx
+	mov	ebx,ecx
+	sub	eax,96
+	jc	NEAR L$049xts_enc_short
+	shl	ecx,4
+	mov	ebx,16
+	sub	ebx,ecx
+	lea	edx,[32+ecx*1+edx]
+	jmp	NEAR L$050xts_enc_loop6
+align	16
+L$050xts_enc_loop6:
+	pshufd	xmm2,xmm0,19
+	pxor	xmm0,xmm0
+	movdqa	[esp],xmm1
+	paddq	xmm1,xmm1
+	pand	xmm2,xmm3
+	pcmpgtd	xmm0,xmm1
+	pxor	xmm1,xmm2
+	pshufd	xmm2,xmm0,19
+	pxor	xmm0,xmm0
+	movdqa	[16+esp],xmm1
+	paddq	xmm1,xmm1
+	pand	xmm2,xmm3
+	pcmpgtd	xmm0,xmm1
+	pxor	xmm1,xmm2
+	pshufd	xmm2,xmm0,19
+	pxor	xmm0,xmm0
+	movdqa	[32+esp],xmm1
+	paddq	xmm1,xmm1
+	pand	xmm2,xmm3
+	pcmpgtd	xmm0,xmm1
+	pxor	xmm1,xmm2
+	pshufd	xmm2,xmm0,19
+	pxor	xmm0,xmm0
+	movdqa	[48+esp],xmm1
+	paddq	xmm1,xmm1
+	pand	xmm2,xmm3
+	pcmpgtd	xmm0,xmm1
+	pxor	xmm1,xmm2
+	pshufd	xmm7,xmm0,19
+	movdqa	[64+esp],xmm1
+	paddq	xmm1,xmm1
+	movups	xmm0,[ebp]
+	pand	xmm7,xmm3
+	movups	xmm2,[esi]
+	pxor	xmm7,xmm1
+	mov	ecx,ebx
+	movdqu	xmm3,[16+esi]
+	xorps	xmm2,xmm0
+	movdqu	xmm4,[32+esi]
+	pxor	xmm3,xmm0
+	movdqu	xmm5,[48+esi]
+	pxor	xmm4,xmm0
+	movdqu	xmm6,[64+esi]
+	pxor	xmm5,xmm0
+	movdqu	xmm1,[80+esi]
+	pxor	xmm6,xmm0
+	lea	esi,[96+esi]
+	pxor	xmm2,[esp]
+	movdqa	[80+esp],xmm7
+	pxor	xmm7,xmm1
+	movups	xmm1,[16+ebp]
+	pxor	xmm3,[16+esp]
+	pxor	xmm4,[32+esp]
+db	102,15,56,220,209
+	pxor	xmm5,[48+esp]
+	pxor	xmm6,[64+esp]
+db	102,15,56,220,217
+	pxor	xmm7,xmm0
+	movups	xmm0,[32+ebp]
+db	102,15,56,220,225
+db	102,15,56,220,233
+db	102,15,56,220,241
+db	102,15,56,220,249
+	call	L$_aesni_encrypt6_enter
+	movdqa	xmm1,[80+esp]
+	pxor	xmm0,xmm0
+	xorps	xmm2,[esp]
+	pcmpgtd	xmm0,xmm1
+	xorps	xmm3,[16+esp]
+	movups	[edi],xmm2
+	xorps	xmm4,[32+esp]
+	movups	[16+edi],xmm3
+	xorps	xmm5,[48+esp]
+	movups	[32+edi],xmm4
+	xorps	xmm6,[64+esp]
+	movups	[48+edi],xmm5
+	xorps	xmm7,xmm1
+	movups	[64+edi],xmm6
+	pshufd	xmm2,xmm0,19
+	movups	[80+edi],xmm7
+	lea	edi,[96+edi]
+	movdqa	xmm3,[96+esp]
+	pxor	xmm0,xmm0
+	paddq	xmm1,xmm1
+	pand	xmm2,xmm3
+	pcmpgtd	xmm0,xmm1
+	pxor	xmm1,xmm2
+	sub	eax,96
+	jnc	NEAR L$050xts_enc_loop6
+	mov	ecx,DWORD [240+ebp]
+	mov	edx,ebp
+	mov	ebx,ecx
+L$049xts_enc_short:
+	add	eax,96
+	jz	NEAR L$051xts_enc_done6x
+	movdqa	xmm5,xmm1
+	cmp	eax,32
+	jb	NEAR L$052xts_enc_one
+	pshufd	xmm2,xmm0,19
+	pxor	xmm0,xmm0
+	paddq	xmm1,xmm1
+	pand	xmm2,xmm3
+	pcmpgtd	xmm0,xmm1
+	pxor	xmm1,xmm2
+	je	NEAR L$053xts_enc_two
+	pshufd	xmm2,xmm0,19
+	pxor	xmm0,xmm0
+	movdqa	xmm6,xmm1
+	paddq	xmm1,xmm1
+	pand	xmm2,xmm3
+	pcmpgtd	xmm0,xmm1
+	pxor	xmm1,xmm2
+	cmp	eax,64
+	jb	NEAR L$054xts_enc_three
+	pshufd	xmm2,xmm0,19
+	pxor	xmm0,xmm0
+	movdqa	xmm7,xmm1
+	paddq	xmm1,xmm1
+	pand	xmm2,xmm3
+	pcmpgtd	xmm0,xmm1
+	pxor	xmm1,xmm2
+	movdqa	[esp],xmm5
+	movdqa	[16+esp],xmm6
+	je	NEAR L$055xts_enc_four
+	movdqa	[32+esp],xmm7
+	pshufd	xmm7,xmm0,19
+	movdqa	[48+esp],xmm1
+	paddq	xmm1,xmm1
+	pand	xmm7,xmm3
+	pxor	xmm7,xmm1
+	movdqu	xmm2,[esi]
+	movdqu	xmm3,[16+esi]
+	movdqu	xmm4,[32+esi]
+	pxor	xmm2,[esp]
+	movdqu	xmm5,[48+esi]
+	pxor	xmm3,[16+esp]
+	movdqu	xmm6,[64+esi]
+	pxor	xmm4,[32+esp]
+	lea	esi,[80+esi]
+	pxor	xmm5,[48+esp]
+	movdqa	[64+esp],xmm7
+	pxor	xmm6,xmm7
+	call	__aesni_encrypt6
+	movaps	xmm1,[64+esp]
+	xorps	xmm2,[esp]
+	xorps	xmm3,[16+esp]
+	xorps	xmm4,[32+esp]
+	movups	[edi],xmm2
+	xorps	xmm5,[48+esp]
+	movups	[16+edi],xmm3
+	xorps	xmm6,xmm1
+	movups	[32+edi],xmm4
+	movups	[48+edi],xmm5
+	movups	[64+edi],xmm6
+	lea	edi,[80+edi]
+	jmp	NEAR L$056xts_enc_done
+align	16
+L$052xts_enc_one:
+	movups	xmm2,[esi]
+	lea	esi,[16+esi]
+	xorps	xmm2,xmm5
+	movups	xmm0,[edx]
+	movups	xmm1,[16+edx]
+	lea	edx,[32+edx]
+	xorps	xmm2,xmm0
+L$057enc1_loop_9:
+db	102,15,56,220,209
+	dec	ecx
+	movups	xmm1,[edx]
+	lea	edx,[16+edx]
+	jnz	NEAR L$057enc1_loop_9
+db	102,15,56,221,209
+	xorps	xmm2,xmm5
+	movups	[edi],xmm2
+	lea	edi,[16+edi]
+	movdqa	xmm1,xmm5
+	jmp	NEAR L$056xts_enc_done
+align	16
+L$053xts_enc_two:
+	movaps	xmm6,xmm1
+	movups	xmm2,[esi]
+	movups	xmm3,[16+esi]
+	lea	esi,[32+esi]
+	xorps	xmm2,xmm5
+	xorps	xmm3,xmm6
+	call	__aesni_encrypt2
+	xorps	xmm2,xmm5
+	xorps	xmm3,xmm6
+	movups	[edi],xmm2
+	movups	[16+edi],xmm3
+	lea	edi,[32+edi]
+	movdqa	xmm1,xmm6
+	jmp	NEAR L$056xts_enc_done
+align	16
+L$054xts_enc_three:
+	movaps	xmm7,xmm1
+	movups	xmm2,[esi]
+	movups	xmm3,[16+esi]
+	movups	xmm4,[32+esi]
+	lea	esi,[48+esi]
+	xorps	xmm2,xmm5
+	xorps	xmm3,xmm6
+	xorps	xmm4,xmm7
+	call	__aesni_encrypt3
+	xorps	xmm2,xmm5
+	xorps	xmm3,xmm6
+	xorps	xmm4,xmm7
+	movups	[edi],xmm2
+	movups	[16+edi],xmm3
+	movups	[32+edi],xmm4
+	lea	edi,[48+edi]
+	movdqa	xmm1,xmm7
+	jmp	NEAR L$056xts_enc_done
+align	16
+L$055xts_enc_four:
+	movaps	xmm6,xmm1
+	movups	xmm2,[esi]
+	movups	xmm3,[16+esi]
+	movups	xmm4,[32+esi]
+	xorps	xmm2,[esp]
+	movups	xmm5,[48+esi]
+	lea	esi,[64+esi]
+	xorps	xmm3,[16+esp]
+	xorps	xmm4,xmm7
+	xorps	xmm5,xmm6
+	call	__aesni_encrypt4
+	xorps	xmm2,[esp]
+	xorps	xmm3,[16+esp]
+	xorps	xmm4,xmm7
+	movups	[edi],xmm2
+	xorps	xmm5,xmm6
+	movups	[16+edi],xmm3
+	movups	[32+edi],xmm4
+	movups	[48+edi],xmm5
+	lea	edi,[64+edi]
+	movdqa	xmm1,xmm6
+	jmp	NEAR L$056xts_enc_done
+align	16
+L$051xts_enc_done6x:
+	mov	eax,DWORD [112+esp]
+	and	eax,15
+	jz	NEAR L$058xts_enc_ret
+	movdqa	xmm5,xmm1
+	mov	DWORD [112+esp],eax
+	jmp	NEAR L$059xts_enc_steal
+align	16
+L$056xts_enc_done:
+	mov	eax,DWORD [112+esp]
+	pxor	xmm0,xmm0
+	and	eax,15
+	jz	NEAR L$058xts_enc_ret
+	pcmpgtd	xmm0,xmm1
+	mov	DWORD [112+esp],eax
+	pshufd	xmm5,xmm0,19
+	paddq	xmm1,xmm1
+	pand	xmm5,[96+esp]
+	pxor	xmm5,xmm1
+L$059xts_enc_steal:
+	movzx	ecx,BYTE [esi]
+	movzx	edx,BYTE [edi-16]
+	lea	esi,[1+esi]
+	mov	BYTE [edi-16],cl
+	mov	BYTE [edi],dl
+	lea	edi,[1+edi]
+	sub	eax,1
+	jnz	NEAR L$059xts_enc_steal
+	sub	edi,DWORD [112+esp]
+	mov	edx,ebp
+	mov	ecx,ebx
+	movups	xmm2,[edi-16]
+	xorps	xmm2,xmm5
+	movups	xmm0,[edx]
+	movups	xmm1,[16+edx]
+	lea	edx,[32+edx]
+	xorps	xmm2,xmm0
+L$060enc1_loop_10:
+db	102,15,56,220,209
+	dec	ecx
+	movups	xmm1,[edx]
+	lea	edx,[16+edx]
+	jnz	NEAR L$060enc1_loop_10
+db	102,15,56,221,209
+	xorps	xmm2,xmm5
+	movups	[edi-16],xmm2
+L$058xts_enc_ret:
+	pxor	xmm0,xmm0
+	pxor	xmm1,xmm1
+	pxor	xmm2,xmm2
+	movdqa	[esp],xmm0
+	pxor	xmm3,xmm3
+	movdqa	[16+esp],xmm0
+	pxor	xmm4,xmm4
+	movdqa	[32+esp],xmm0
+	pxor	xmm5,xmm5
+	movdqa	[48+esp],xmm0
+	pxor	xmm6,xmm6
+	movdqa	[64+esp],xmm0
+	pxor	xmm7,xmm7
+	movdqa	[80+esp],xmm0
+	mov	esp,DWORD [116+esp]
+	pop	edi
+	pop	esi
+	pop	ebx
+	pop	ebp
+	ret
+global	_aes_hw_xts_decrypt
+align	16
+_aes_hw_xts_decrypt:
+L$_aes_hw_xts_decrypt_begin:
+	push	ebp
+	push	ebx
+	push	esi
+	push	edi
+	mov	edx,DWORD [36+esp]
+	mov	esi,DWORD [40+esp]
+	mov	ecx,DWORD [240+edx]
+	movups	xmm2,[esi]
+	movups	xmm0,[edx]
+	movups	xmm1,[16+edx]
+	lea	edx,[32+edx]
+	xorps	xmm2,xmm0
+L$061enc1_loop_11:
+db	102,15,56,220,209
+	dec	ecx
+	movups	xmm1,[edx]
+	lea	edx,[16+edx]
+	jnz	NEAR L$061enc1_loop_11
+db	102,15,56,221,209
+	mov	esi,DWORD [20+esp]
+	mov	edi,DWORD [24+esp]
+	mov	eax,DWORD [28+esp]
+	mov	edx,DWORD [32+esp]
+	mov	ebp,esp
+	sub	esp,120
+	and	esp,-16
+	xor	ebx,ebx
+	test	eax,15
+	setnz	bl
+	shl	ebx,4
+	sub	eax,ebx
+	mov	DWORD [96+esp],135
+	mov	DWORD [100+esp],0
+	mov	DWORD [104+esp],1
+	mov	DWORD [108+esp],0
+	mov	DWORD [112+esp],eax
+	mov	DWORD [116+esp],ebp
+	mov	ecx,DWORD [240+edx]
+	mov	ebp,edx
+	mov	ebx,ecx
+	movdqa	xmm1,xmm2
+	pxor	xmm0,xmm0
+	movdqa	xmm3,[96+esp]
+	pcmpgtd	xmm0,xmm1
+	and	eax,-16
+	sub	eax,96
+	jc	NEAR L$062xts_dec_short
+	shl	ecx,4
+	mov	ebx,16
+	sub	ebx,ecx
+	lea	edx,[32+ecx*1+edx]
+	jmp	NEAR L$063xts_dec_loop6
+align	16
+L$063xts_dec_loop6:
+	pshufd	xmm2,xmm0,19
+	pxor	xmm0,xmm0
+	movdqa	[esp],xmm1
+	paddq	xmm1,xmm1
+	pand	xmm2,xmm3
+	pcmpgtd	xmm0,xmm1
+	pxor	xmm1,xmm2
+	pshufd	xmm2,xmm0,19
+	pxor	xmm0,xmm0
+	movdqa	[16+esp],xmm1
+	paddq	xmm1,xmm1
+	pand	xmm2,xmm3
+	pcmpgtd	xmm0,xmm1
+	pxor	xmm1,xmm2
+	pshufd	xmm2,xmm0,19
+	pxor	xmm0,xmm0
+	movdqa	[32+esp],xmm1
+	paddq	xmm1,xmm1
+	pand	xmm2,xmm3
+	pcmpgtd	xmm0,xmm1
+	pxor	xmm1,xmm2
+	pshufd	xmm2,xmm0,19
+	pxor	xmm0,xmm0
+	movdqa	[48+esp],xmm1
+	paddq	xmm1,xmm1
+	pand	xmm2,xmm3
+	pcmpgtd	xmm0,xmm1
+	pxor	xmm1,xmm2
+	pshufd	xmm7,xmm0,19
+	movdqa	[64+esp],xmm1
+	paddq	xmm1,xmm1
+	movups	xmm0,[ebp]
+	pand	xmm7,xmm3
+	movups	xmm2,[esi]
+	pxor	xmm7,xmm1
+	mov	ecx,ebx
+	movdqu	xmm3,[16+esi]
+	xorps	xmm2,xmm0
+	movdqu	xmm4,[32+esi]
+	pxor	xmm3,xmm0
+	movdqu	xmm5,[48+esi]
+	pxor	xmm4,xmm0
+	movdqu	xmm6,[64+esi]
+	pxor	xmm5,xmm0
+	movdqu	xmm1,[80+esi]
+	pxor	xmm6,xmm0
+	lea	esi,[96+esi]
+	pxor	xmm2,[esp]
+	movdqa	[80+esp],xmm7
+	pxor	xmm7,xmm1
+	movups	xmm1,[16+ebp]
+	pxor	xmm3,[16+esp]
+	pxor	xmm4,[32+esp]
+db	102,15,56,222,209
+	pxor	xmm5,[48+esp]
+	pxor	xmm6,[64+esp]
+db	102,15,56,222,217
+	pxor	xmm7,xmm0
+	movups	xmm0,[32+ebp]
+db	102,15,56,222,225
+db	102,15,56,222,233
+db	102,15,56,222,241
+db	102,15,56,222,249
+	call	L$_aesni_decrypt6_enter
+	movdqa	xmm1,[80+esp]
+	pxor	xmm0,xmm0
+	xorps	xmm2,[esp]
+	pcmpgtd	xmm0,xmm1
+	xorps	xmm3,[16+esp]
+	movups	[edi],xmm2
+	xorps	xmm4,[32+esp]
+	movups	[16+edi],xmm3
+	xorps	xmm5,[48+esp]
+	movups	[32+edi],xmm4
+	xorps	xmm6,[64+esp]
+	movups	[48+edi],xmm5
+	xorps	xmm7,xmm1
+	movups	[64+edi],xmm6
+	pshufd	xmm2,xmm0,19
+	movups	[80+edi],xmm7
+	lea	edi,[96+edi]
+	movdqa	xmm3,[96+esp]
+	pxor	xmm0,xmm0
+	paddq	xmm1,xmm1
+	pand	xmm2,xmm3
+	pcmpgtd	xmm0,xmm1
+	pxor	xmm1,xmm2
+	sub	eax,96
+	jnc	NEAR L$063xts_dec_loop6
+	mov	ecx,DWORD [240+ebp]
+	mov	edx,ebp
+	mov	ebx,ecx
+L$062xts_dec_short:
+	add	eax,96
+	jz	NEAR L$064xts_dec_done6x
+	movdqa	xmm5,xmm1
+	cmp	eax,32
+	jb	NEAR L$065xts_dec_one
+	pshufd	xmm2,xmm0,19
+	pxor	xmm0,xmm0
+	paddq	xmm1,xmm1
+	pand	xmm2,xmm3
+	pcmpgtd	xmm0,xmm1
+	pxor	xmm1,xmm2
+	je	NEAR L$066xts_dec_two
+	pshufd	xmm2,xmm0,19
+	pxor	xmm0,xmm0
+	movdqa	xmm6,xmm1
+	paddq	xmm1,xmm1
+	pand	xmm2,xmm3
+	pcmpgtd	xmm0,xmm1
+	pxor	xmm1,xmm2
+	cmp	eax,64
+	jb	NEAR L$067xts_dec_three
+	pshufd	xmm2,xmm0,19
+	pxor	xmm0,xmm0
+	movdqa	xmm7,xmm1
+	paddq	xmm1,xmm1
+	pand	xmm2,xmm3
+	pcmpgtd	xmm0,xmm1
+	pxor	xmm1,xmm2
+	movdqa	[esp],xmm5
+	movdqa	[16+esp],xmm6
+	je	NEAR L$068xts_dec_four
+	movdqa	[32+esp],xmm7
+	pshufd	xmm7,xmm0,19
+	movdqa	[48+esp],xmm1
+	paddq	xmm1,xmm1
+	pand	xmm7,xmm3
+	pxor	xmm7,xmm1
+	movdqu	xmm2,[esi]
+	movdqu	xmm3,[16+esi]
+	movdqu	xmm4,[32+esi]
+	pxor	xmm2,[esp]
+	movdqu	xmm5,[48+esi]
+	pxor	xmm3,[16+esp]
+	movdqu	xmm6,[64+esi]
+	pxor	xmm4,[32+esp]
+	lea	esi,[80+esi]
+	pxor	xmm5,[48+esp]
+	movdqa	[64+esp],xmm7
+	pxor	xmm6,xmm7
+	call	__aesni_decrypt6
+	movaps	xmm1,[64+esp]
+	xorps	xmm2,[esp]
+	xorps	xmm3,[16+esp]
+	xorps	xmm4,[32+esp]
+	movups	[edi],xmm2
+	xorps	xmm5,[48+esp]
+	movups	[16+edi],xmm3
+	xorps	xmm6,xmm1
+	movups	[32+edi],xmm4
+	movups	[48+edi],xmm5
+	movups	[64+edi],xmm6
+	lea	edi,[80+edi]
+	jmp	NEAR L$069xts_dec_done
+align	16
+L$065xts_dec_one:
+	movups	xmm2,[esi]
+	lea	esi,[16+esi]
+	xorps	xmm2,xmm5
+	movups	xmm0,[edx]
+	movups	xmm1,[16+edx]
+	lea	edx,[32+edx]
+	xorps	xmm2,xmm0
+L$070dec1_loop_12:
+db	102,15,56,222,209
+	dec	ecx
+	movups	xmm1,[edx]
+	lea	edx,[16+edx]
+	jnz	NEAR L$070dec1_loop_12
+db	102,15,56,223,209
+	xorps	xmm2,xmm5
+	movups	[edi],xmm2
+	lea	edi,[16+edi]
+	movdqa	xmm1,xmm5
+	jmp	NEAR L$069xts_dec_done
+align	16
+L$066xts_dec_two:
+	movaps	xmm6,xmm1
+	movups	xmm2,[esi]
+	movups	xmm3,[16+esi]
+	lea	esi,[32+esi]
+	xorps	xmm2,xmm5
+	xorps	xmm3,xmm6
+	call	__aesni_decrypt2
+	xorps	xmm2,xmm5
+	xorps	xmm3,xmm6
+	movups	[edi],xmm2
+	movups	[16+edi],xmm3
+	lea	edi,[32+edi]
+	movdqa	xmm1,xmm6
+	jmp	NEAR L$069xts_dec_done
+align	16
+L$067xts_dec_three:
+	movaps	xmm7,xmm1
+	movups	xmm2,[esi]
+	movups	xmm3,[16+esi]
+	movups	xmm4,[32+esi]
+	lea	esi,[48+esi]
+	xorps	xmm2,xmm5
+	xorps	xmm3,xmm6
+	xorps	xmm4,xmm7
+	call	__aesni_decrypt3
+	xorps	xmm2,xmm5
+	xorps	xmm3,xmm6
+	xorps	xmm4,xmm7
+	movups	[edi],xmm2
+	movups	[16+edi],xmm3
+	movups	[32+edi],xmm4
+	lea	edi,[48+edi]
+	movdqa	xmm1,xmm7
+	jmp	NEAR L$069xts_dec_done
+align	16
+L$068xts_dec_four:
+	movaps	xmm6,xmm1
+	movups	xmm2,[esi]
+	movups	xmm3,[16+esi]
+	movups	xmm4,[32+esi]
+	xorps	xmm2,[esp]
+	movups	xmm5,[48+esi]
+	lea	esi,[64+esi]
+	xorps	xmm3,[16+esp]
+	xorps	xmm4,xmm7
+	xorps	xmm5,xmm6
+	call	__aesni_decrypt4
+	xorps	xmm2,[esp]
+	xorps	xmm3,[16+esp]
+	xorps	xmm4,xmm7
+	movups	[edi],xmm2
+	xorps	xmm5,xmm6
+	movups	[16+edi],xmm3
+	movups	[32+edi],xmm4
+	movups	[48+edi],xmm5
+	lea	edi,[64+edi]
+	movdqa	xmm1,xmm6
+	jmp	NEAR L$069xts_dec_done
+align	16
+L$064xts_dec_done6x:
+	mov	eax,DWORD [112+esp]
+	and	eax,15
+	jz	NEAR L$071xts_dec_ret
+	mov	DWORD [112+esp],eax
+	jmp	NEAR L$072xts_dec_only_one_more
+align	16
+L$069xts_dec_done:
+	mov	eax,DWORD [112+esp]
+	pxor	xmm0,xmm0
+	and	eax,15
+	jz	NEAR L$071xts_dec_ret
+	pcmpgtd	xmm0,xmm1
+	mov	DWORD [112+esp],eax
+	pshufd	xmm2,xmm0,19
+	pxor	xmm0,xmm0
+	movdqa	xmm3,[96+esp]
+	paddq	xmm1,xmm1
+	pand	xmm2,xmm3
+	pcmpgtd	xmm0,xmm1
+	pxor	xmm1,xmm2
+L$072xts_dec_only_one_more:
+	pshufd	xmm5,xmm0,19
+	movdqa	xmm6,xmm1
+	paddq	xmm1,xmm1
+	pand	xmm5,xmm3
+	pxor	xmm5,xmm1
+	mov	edx,ebp
+	mov	ecx,ebx
+	movups	xmm2,[esi]
+	xorps	xmm2,xmm5
+	movups	xmm0,[edx]
+	movups	xmm1,[16+edx]
+	lea	edx,[32+edx]
+	xorps	xmm2,xmm0
+L$073dec1_loop_13:
+db	102,15,56,222,209
+	dec	ecx
+	movups	xmm1,[edx]
+	lea	edx,[16+edx]
+	jnz	NEAR L$073dec1_loop_13
+db	102,15,56,223,209
+	xorps	xmm2,xmm5
+	movups	[edi],xmm2
+L$074xts_dec_steal:
+	movzx	ecx,BYTE [16+esi]
+	movzx	edx,BYTE [edi]
+	lea	esi,[1+esi]
+	mov	BYTE [edi],cl
+	mov	BYTE [16+edi],dl
+	lea	edi,[1+edi]
+	sub	eax,1
+	jnz	NEAR L$074xts_dec_steal
+	sub	edi,DWORD [112+esp]
+	mov	edx,ebp
+	mov	ecx,ebx
+	movups	xmm2,[edi]
+	xorps	xmm2,xmm6
+	movups	xmm0,[edx]
+	movups	xmm1,[16+edx]
+	lea	edx,[32+edx]
+	xorps	xmm2,xmm0
+L$075dec1_loop_14:
+db	102,15,56,222,209
+	dec	ecx
+	movups	xmm1,[edx]
+	lea	edx,[16+edx]
+	jnz	NEAR L$075dec1_loop_14
+db	102,15,56,223,209
+	xorps	xmm2,xmm6
+	movups	[edi],xmm2
+L$071xts_dec_ret:
+	pxor	xmm0,xmm0
+	pxor	xmm1,xmm1
+	pxor	xmm2,xmm2
+	movdqa	[esp],xmm0
+	pxor	xmm3,xmm3
+	movdqa	[16+esp],xmm0
+	pxor	xmm4,xmm4
+	movdqa	[32+esp],xmm0
+	pxor	xmm5,xmm5
+	movdqa	[48+esp],xmm0
+	pxor	xmm6,xmm6
+	movdqa	[64+esp],xmm0
+	pxor	xmm7,xmm7
+	movdqa	[80+esp],xmm0
+	mov	esp,DWORD [116+esp]
+	pop	edi
+	pop	esi
+	pop	ebx
+	pop	ebp
+	ret
+global	_aes_hw_cbc_encrypt
+align	16
+_aes_hw_cbc_encrypt:
+L$_aes_hw_cbc_encrypt_begin:
+	push	ebp
+	push	ebx
+	push	esi
+	push	edi
+	mov	esi,DWORD [20+esp]
+	mov	ebx,esp
+	mov	edi,DWORD [24+esp]
+	sub	ebx,24
+	mov	eax,DWORD [28+esp]
+	and	ebx,-16
+	mov	edx,DWORD [32+esp]
+	mov	ebp,DWORD [36+esp]
+	test	eax,eax
+	jz	NEAR L$076cbc_abort
+	cmp	DWORD [40+esp],0
+	xchg	ebx,esp
+	movups	xmm7,[ebp]
+	mov	ecx,DWORD [240+edx]
+	mov	ebp,edx
+	mov	DWORD [16+esp],ebx
+	mov	ebx,ecx
+	je	NEAR L$077cbc_decrypt
+	movaps	xmm2,xmm7
+	cmp	eax,16
+	jb	NEAR L$078cbc_enc_tail
+	sub	eax,16
+	jmp	NEAR L$079cbc_enc_loop
+align	16
+L$079cbc_enc_loop:
+	movups	xmm7,[esi]
+	lea	esi,[16+esi]
+	movups	xmm0,[edx]
+	movups	xmm1,[16+edx]
+	xorps	xmm7,xmm0
+	lea	edx,[32+edx]
+	xorps	xmm2,xmm7
+L$080enc1_loop_15:
+db	102,15,56,220,209
+	dec	ecx
+	movups	xmm1,[edx]
+	lea	edx,[16+edx]
+	jnz	NEAR L$080enc1_loop_15
+db	102,15,56,221,209
+	mov	ecx,ebx
+	mov	edx,ebp
+	movups	[edi],xmm2
+	lea	edi,[16+edi]
+	sub	eax,16
+	jnc	NEAR L$079cbc_enc_loop
+	add	eax,16
+	jnz	NEAR L$078cbc_enc_tail
+	movaps	xmm7,xmm2
+	pxor	xmm2,xmm2
+	jmp	NEAR L$081cbc_ret
+L$078cbc_enc_tail:
+	mov	ecx,eax
+dd	2767451785
+	mov	ecx,16
+	sub	ecx,eax
+	xor	eax,eax
+dd	2868115081
+	lea	edi,[edi-16]
+	mov	ecx,ebx
+	mov	esi,edi
+	mov	edx,ebp
+	jmp	NEAR L$079cbc_enc_loop
+align	16
+L$077cbc_decrypt:
+	cmp	eax,80
+	jbe	NEAR L$082cbc_dec_tail
+	movaps	[esp],xmm7
+	sub	eax,80
+	jmp	NEAR L$083cbc_dec_loop6_enter
+align	16
+L$084cbc_dec_loop6:
+	movaps	[esp],xmm0
+	movups	[edi],xmm7
+	lea	edi,[16+edi]
+L$083cbc_dec_loop6_enter:
+	movdqu	xmm2,[esi]
+	movdqu	xmm3,[16+esi]
+	movdqu	xmm4,[32+esi]
+	movdqu	xmm5,[48+esi]
+	movdqu	xmm6,[64+esi]
+	movdqu	xmm7,[80+esi]
+	call	__aesni_decrypt6
+	movups	xmm1,[esi]
+	movups	xmm0,[16+esi]
+	xorps	xmm2,[esp]
+	xorps	xmm3,xmm1
+	movups	xmm1,[32+esi]
+	xorps	xmm4,xmm0
+	movups	xmm0,[48+esi]
+	xorps	xmm5,xmm1
+	movups	xmm1,[64+esi]
+	xorps	xmm6,xmm0
+	movups	xmm0,[80+esi]
+	xorps	xmm7,xmm1
+	movups	[edi],xmm2
+	movups	[16+edi],xmm3
+	lea	esi,[96+esi]
+	movups	[32+edi],xmm4
+	mov	ecx,ebx
+	movups	[48+edi],xmm5
+	mov	edx,ebp
+	movups	[64+edi],xmm6
+	lea	edi,[80+edi]
+	sub	eax,96
+	ja	NEAR L$084cbc_dec_loop6
+	movaps	xmm2,xmm7
+	movaps	xmm7,xmm0
+	add	eax,80
+	jle	NEAR L$085cbc_dec_clear_tail_collected
+	movups	[edi],xmm2
+	lea	edi,[16+edi]
+L$082cbc_dec_tail:
+	movups	xmm2,[esi]
+	movaps	xmm6,xmm2
+	cmp	eax,16
+	jbe	NEAR L$086cbc_dec_one
+	movups	xmm3,[16+esi]
+	movaps	xmm5,xmm3
+	cmp	eax,32
+	jbe	NEAR L$087cbc_dec_two
+	movups	xmm4,[32+esi]
+	cmp	eax,48
+	jbe	NEAR L$088cbc_dec_three
+	movups	xmm5,[48+esi]
+	cmp	eax,64
+	jbe	NEAR L$089cbc_dec_four
+	movups	xmm6,[64+esi]
+	movaps	[esp],xmm7
+	movups	xmm2,[esi]
+	xorps	xmm7,xmm7
+	call	__aesni_decrypt6
+	movups	xmm1,[esi]
+	movups	xmm0,[16+esi]
+	xorps	xmm2,[esp]
+	xorps	xmm3,xmm1
+	movups	xmm1,[32+esi]
+	xorps	xmm4,xmm0
+	movups	xmm0,[48+esi]
+	xorps	xmm5,xmm1
+	movups	xmm7,[64+esi]
+	xorps	xmm6,xmm0
+	movups	[edi],xmm2
+	movups	[16+edi],xmm3
+	pxor	xmm3,xmm3
+	movups	[32+edi],xmm4
+	pxor	xmm4,xmm4
+	movups	[48+edi],xmm5
+	pxor	xmm5,xmm5
+	lea	edi,[64+edi]
+	movaps	xmm2,xmm6
+	pxor	xmm6,xmm6
+	sub	eax,80
+	jmp	NEAR L$090cbc_dec_tail_collected
+align	16
+L$086cbc_dec_one:
+	movups	xmm0,[edx]
+	movups	xmm1,[16+edx]
+	lea	edx,[32+edx]
+	xorps	xmm2,xmm0
+L$091dec1_loop_16:
+db	102,15,56,222,209
+	dec	ecx
+	movups	xmm1,[edx]
+	lea	edx,[16+edx]
+	jnz	NEAR L$091dec1_loop_16
+db	102,15,56,223,209
+	xorps	xmm2,xmm7
+	movaps	xmm7,xmm6
+	sub	eax,16
+	jmp	NEAR L$090cbc_dec_tail_collected
+align	16
+L$087cbc_dec_two:
+	call	__aesni_decrypt2
+	xorps	xmm2,xmm7
+	xorps	xmm3,xmm6
+	movups	[edi],xmm2
+	movaps	xmm2,xmm3
+	pxor	xmm3,xmm3
+	lea	edi,[16+edi]
+	movaps	xmm7,xmm5
+	sub	eax,32
+	jmp	NEAR L$090cbc_dec_tail_collected
+align	16
+L$088cbc_dec_three:
+	call	__aesni_decrypt3
+	xorps	xmm2,xmm7
+	xorps	xmm3,xmm6
+	xorps	xmm4,xmm5
+	movups	[edi],xmm2
+	movaps	xmm2,xmm4
+	pxor	xmm4,xmm4
+	movups	[16+edi],xmm3
+	pxor	xmm3,xmm3
+	lea	edi,[32+edi]
+	movups	xmm7,[32+esi]
+	sub	eax,48
+	jmp	NEAR L$090cbc_dec_tail_collected
+align	16
+L$089cbc_dec_four:
+	call	__aesni_decrypt4
+	movups	xmm1,[16+esi]
+	movups	xmm0,[32+esi]
+	xorps	xmm2,xmm7
+	movups	xmm7,[48+esi]
+	xorps	xmm3,xmm6
+	movups	[edi],xmm2
+	xorps	xmm4,xmm1
+	movups	[16+edi],xmm3
+	pxor	xmm3,xmm3
+	xorps	xmm5,xmm0
+	movups	[32+edi],xmm4
+	pxor	xmm4,xmm4
+	lea	edi,[48+edi]
+	movaps	xmm2,xmm5
+	pxor	xmm5,xmm5
+	sub	eax,64
+	jmp	NEAR L$090cbc_dec_tail_collected
+align	16
+L$085cbc_dec_clear_tail_collected:
+	pxor	xmm3,xmm3
+	pxor	xmm4,xmm4
+	pxor	xmm5,xmm5
+	pxor	xmm6,xmm6
+L$090cbc_dec_tail_collected:
+	and	eax,15
+	jnz	NEAR L$092cbc_dec_tail_partial
+	movups	[edi],xmm2
+	pxor	xmm0,xmm0
+	jmp	NEAR L$081cbc_ret
+align	16
+L$092cbc_dec_tail_partial:
+	movaps	[esp],xmm2
+	pxor	xmm0,xmm0
+	mov	ecx,16
+	mov	esi,esp
+	sub	ecx,eax
+dd	2767451785
+	movdqa	[esp],xmm2
+L$081cbc_ret:
+	mov	esp,DWORD [16+esp]
+	mov	ebp,DWORD [36+esp]
+	pxor	xmm2,xmm2
+	pxor	xmm1,xmm1
+	movups	[ebp],xmm7
+	pxor	xmm7,xmm7
+L$076cbc_abort:
+	pop	edi
+	pop	esi
+	pop	ebx
+	pop	ebp
+	ret
+align	16
+__aesni_set_encrypt_key:
+	push	ebp
+	push	ebx
+	test	eax,eax
+	jz	NEAR L$093bad_pointer
+	test	edx,edx
+	jz	NEAR L$093bad_pointer
+	call	L$094pic
+L$094pic:
+	pop	ebx
+	lea	ebx,[(L$key_const-L$094pic)+ebx]
+	lea	ebp,[_OPENSSL_ia32cap_P]
+	movups	xmm0,[eax]
+	xorps	xmm4,xmm4
+	mov	ebp,DWORD [4+ebp]
+	lea	edx,[16+edx]
+	and	ebp,268437504
+	cmp	ecx,256
+	je	NEAR L$09514rounds
+	cmp	ecx,192
+	je	NEAR L$09612rounds
+	cmp	ecx,128
+	jne	NEAR L$097bad_keybits
+align	16
+L$09810rounds:
+	cmp	ebp,268435456
+	je	NEAR L$09910rounds_alt
+	mov	ecx,9
+	movups	[edx-16],xmm0
+db	102,15,58,223,200,1
+	call	L$100key_128_cold
+db	102,15,58,223,200,2
+	call	L$101key_128
+db	102,15,58,223,200,4
+	call	L$101key_128
+db	102,15,58,223,200,8
+	call	L$101key_128
+db	102,15,58,223,200,16
+	call	L$101key_128
+db	102,15,58,223,200,32
+	call	L$101key_128
+db	102,15,58,223,200,64
+	call	L$101key_128
+db	102,15,58,223,200,128
+	call	L$101key_128
+db	102,15,58,223,200,27
+	call	L$101key_128
+db	102,15,58,223,200,54
+	call	L$101key_128
+	movups	[edx],xmm0
+	mov	DWORD [80+edx],ecx
+	jmp	NEAR L$102good_key
+align	16
+L$101key_128:
+	movups	[edx],xmm0
+	lea	edx,[16+edx]
+L$100key_128_cold:
+	shufps	xmm4,xmm0,16
+	xorps	xmm0,xmm4
+	shufps	xmm4,xmm0,140
+	xorps	xmm0,xmm4
+	shufps	xmm1,xmm1,255
+	xorps	xmm0,xmm1
+	ret
+align	16
+L$09910rounds_alt:
+	movdqa	xmm5,[ebx]
+	mov	ecx,8
+	movdqa	xmm4,[32+ebx]
+	movdqa	xmm2,xmm0
+	movdqu	[edx-16],xmm0
+L$103loop_key128:
+db	102,15,56,0,197
+db	102,15,56,221,196
+	pslld	xmm4,1
+	lea	edx,[16+edx]
+	movdqa	xmm3,xmm2
+	pslldq	xmm2,4
+	pxor	xmm3,xmm2
+	pslldq	xmm2,4
+	pxor	xmm3,xmm2
+	pslldq	xmm2,4
+	pxor	xmm2,xmm3
+	pxor	xmm0,xmm2
+	movdqu	[edx-16],xmm0
+	movdqa	xmm2,xmm0
+	dec	ecx
+	jnz	NEAR L$103loop_key128
+	movdqa	xmm4,[48+ebx]
+db	102,15,56,0,197
+db	102,15,56,221,196
+	pslld	xmm4,1
+	movdqa	xmm3,xmm2
+	pslldq	xmm2,4
+	pxor	xmm3,xmm2
+	pslldq	xmm2,4
+	pxor	xmm3,xmm2
+	pslldq	xmm2,4
+	pxor	xmm2,xmm3
+	pxor	xmm0,xmm2
+	movdqu	[edx],xmm0
+	movdqa	xmm2,xmm0
+db	102,15,56,0,197
+db	102,15,56,221,196
+	movdqa	xmm3,xmm2
+	pslldq	xmm2,4
+	pxor	xmm3,xmm2
+	pslldq	xmm2,4
+	pxor	xmm3,xmm2
+	pslldq	xmm2,4
+	pxor	xmm2,xmm3
+	pxor	xmm0,xmm2
+	movdqu	[16+edx],xmm0
+	mov	ecx,9
+	mov	DWORD [96+edx],ecx
+	jmp	NEAR L$102good_key
+align	16
+L$09612rounds:
+	movq	xmm2,[16+eax]
+	cmp	ebp,268435456
+	je	NEAR L$10412rounds_alt
+	mov	ecx,11
+	movups	[edx-16],xmm0
+db	102,15,58,223,202,1
+	call	L$105key_192a_cold
+db	102,15,58,223,202,2
+	call	L$106key_192b
+db	102,15,58,223,202,4
+	call	L$107key_192a
+db	102,15,58,223,202,8
+	call	L$106key_192b
+db	102,15,58,223,202,16
+	call	L$107key_192a
+db	102,15,58,223,202,32
+	call	L$106key_192b
+db	102,15,58,223,202,64
+	call	L$107key_192a
+db	102,15,58,223,202,128
+	call	L$106key_192b
+	movups	[edx],xmm0
+	mov	DWORD [48+edx],ecx
+	jmp	NEAR L$102good_key
+align	16
+L$107key_192a:
+	movups	[edx],xmm0
+	lea	edx,[16+edx]
+align	16
+L$105key_192a_cold:
+	movaps	xmm5,xmm2
+L$108key_192b_warm:
+	shufps	xmm4,xmm0,16
+	movdqa	xmm3,xmm2
+	xorps	xmm0,xmm4
+	shufps	xmm4,xmm0,140
+	pslldq	xmm3,4
+	xorps	xmm0,xmm4
+	pshufd	xmm1,xmm1,85
+	pxor	xmm2,xmm3
+	pxor	xmm0,xmm1
+	pshufd	xmm3,xmm0,255
+	pxor	xmm2,xmm3
+	ret
+align	16
+L$106key_192b:
+	movaps	xmm3,xmm0
+	shufps	xmm5,xmm0,68
+	movups	[edx],xmm5
+	shufps	xmm3,xmm2,78
+	movups	[16+edx],xmm3
+	lea	edx,[32+edx]
+	jmp	NEAR L$108key_192b_warm
+align	16
+L$10412rounds_alt:
+	movdqa	xmm5,[16+ebx]
+	movdqa	xmm4,[32+ebx]
+	mov	ecx,8
+	movdqu	[edx-16],xmm0
+L$109loop_key192:
+	movq	[edx],xmm2
+	movdqa	xmm1,xmm2
+db	102,15,56,0,213
+db	102,15,56,221,212
+	pslld	xmm4,1
+	lea	edx,[24+edx]
+	movdqa	xmm3,xmm0
+	pslldq	xmm0,4
+	pxor	xmm3,xmm0
+	pslldq	xmm0,4
+	pxor	xmm3,xmm0
+	pslldq	xmm0,4
+	pxor	xmm0,xmm3
+	pshufd	xmm3,xmm0,255
+	pxor	xmm3,xmm1
+	pslldq	xmm1,4
+	pxor	xmm3,xmm1
+	pxor	xmm0,xmm2
+	pxor	xmm2,xmm3
+	movdqu	[edx-16],xmm0
+	dec	ecx
+	jnz	NEAR L$109loop_key192
+	mov	ecx,11
+	mov	DWORD [32+edx],ecx
+	jmp	NEAR L$102good_key
+align	16
+L$09514rounds:
+	movups	xmm2,[16+eax]
+	lea	edx,[16+edx]
+	cmp	ebp,268435456
+	je	NEAR L$11014rounds_alt
+	mov	ecx,13
+	movups	[edx-32],xmm0
+	movups	[edx-16],xmm2
+db	102,15,58,223,202,1
+	call	L$111key_256a_cold
+db	102,15,58,223,200,1
+	call	L$112key_256b
+db	102,15,58,223,202,2
+	call	L$113key_256a
+db	102,15,58,223,200,2
+	call	L$112key_256b
+db	102,15,58,223,202,4
+	call	L$113key_256a
+db	102,15,58,223,200,4
+	call	L$112key_256b
+db	102,15,58,223,202,8
+	call	L$113key_256a
+db	102,15,58,223,200,8
+	call	L$112key_256b
+db	102,15,58,223,202,16
+	call	L$113key_256a
+db	102,15,58,223,200,16
+	call	L$112key_256b
+db	102,15,58,223,202,32
+	call	L$113key_256a
+db	102,15,58,223,200,32
+	call	L$112key_256b
+db	102,15,58,223,202,64
+	call	L$113key_256a
+	movups	[edx],xmm0
+	mov	DWORD [16+edx],ecx
+	xor	eax,eax
+	jmp	NEAR L$102good_key
+align	16
+L$113key_256a:
+	movups	[edx],xmm2
+	lea	edx,[16+edx]
+L$111key_256a_cold:
+	shufps	xmm4,xmm0,16
+	xorps	xmm0,xmm4
+	shufps	xmm4,xmm0,140
+	xorps	xmm0,xmm4
+	shufps	xmm1,xmm1,255
+	xorps	xmm0,xmm1
+	ret
+align	16
+L$112key_256b:
+	movups	[edx],xmm0
+	lea	edx,[16+edx]
+	shufps	xmm4,xmm2,16
+	xorps	xmm2,xmm4
+	shufps	xmm4,xmm2,140
+	xorps	xmm2,xmm4
+	shufps	xmm1,xmm1,170
+	xorps	xmm2,xmm1
+	ret
+align	16
+L$11014rounds_alt:
+	movdqa	xmm5,[ebx]
+	movdqa	xmm4,[32+ebx]
+	mov	ecx,7
+	movdqu	[edx-32],xmm0
+	movdqa	xmm1,xmm2
+	movdqu	[edx-16],xmm2
+L$114loop_key256:
+db	102,15,56,0,213
+db	102,15,56,221,212
+	movdqa	xmm3,xmm0
+	pslldq	xmm0,4
+	pxor	xmm3,xmm0
+	pslldq	xmm0,4
+	pxor	xmm3,xmm0
+	pslldq	xmm0,4
+	pxor	xmm0,xmm3
+	pslld	xmm4,1
+	pxor	xmm0,xmm2
+	movdqu	[edx],xmm0
+	dec	ecx
+	jz	NEAR L$115done_key256
+	pshufd	xmm2,xmm0,255
+	pxor	xmm3,xmm3
+db	102,15,56,221,211
+	movdqa	xmm3,xmm1
+	pslldq	xmm1,4
+	pxor	xmm3,xmm1
+	pslldq	xmm1,4
+	pxor	xmm3,xmm1
+	pslldq	xmm1,4
+	pxor	xmm1,xmm3
+	pxor	xmm2,xmm1
+	movdqu	[16+edx],xmm2
+	lea	edx,[32+edx]
+	movdqa	xmm1,xmm2
+	jmp	NEAR L$114loop_key256
+L$115done_key256:
+	mov	ecx,13
+	mov	DWORD [16+edx],ecx
+L$102good_key:
+	pxor	xmm0,xmm0
+	pxor	xmm1,xmm1
+	pxor	xmm2,xmm2
+	pxor	xmm3,xmm3
+	pxor	xmm4,xmm4
+	pxor	xmm5,xmm5
+	xor	eax,eax
+	pop	ebx
+	pop	ebp
+	ret
+align	4
+L$093bad_pointer:
+	mov	eax,-1
+	pop	ebx
+	pop	ebp
+	ret
+align	4
+L$097bad_keybits:
+	pxor	xmm0,xmm0
+	mov	eax,-2
+	pop	ebx
+	pop	ebp
+	ret
+global	_aes_hw_set_encrypt_key
+align	16
+_aes_hw_set_encrypt_key:
+L$_aes_hw_set_encrypt_key_begin:
+%ifdef BORINGSSL_DISPATCH_TEST
+	push	ebx
+	push	edx
+	call	L$116pic
+L$116pic:
+	pop	ebx
+	lea	ebx,[(_BORINGSSL_function_hit+3-L$116pic)+ebx]
+	mov	edx,1
+	mov	BYTE [ebx],dl
+	pop	edx
+	pop	ebx
+%endif
+	mov	eax,DWORD [4+esp]
+	mov	ecx,DWORD [8+esp]
+	mov	edx,DWORD [12+esp]
+	call	__aesni_set_encrypt_key
+	ret
+global	_aes_hw_set_decrypt_key
+align	16
+_aes_hw_set_decrypt_key:
+L$_aes_hw_set_decrypt_key_begin:
+	mov	eax,DWORD [4+esp]
+	mov	ecx,DWORD [8+esp]
+	mov	edx,DWORD [12+esp]
+	call	__aesni_set_encrypt_key
+	mov	edx,DWORD [12+esp]
+	shl	ecx,4
+	test	eax,eax
+	jnz	NEAR L$117dec_key_ret
+	lea	eax,[16+ecx*1+edx]
+	movups	xmm0,[edx]
+	movups	xmm1,[eax]
+	movups	[eax],xmm0
+	movups	[edx],xmm1
+	lea	edx,[16+edx]
+	lea	eax,[eax-16]
+L$118dec_key_inverse:
+	movups	xmm0,[edx]
+	movups	xmm1,[eax]
+db	102,15,56,219,192
+db	102,15,56,219,201
+	lea	edx,[16+edx]
+	lea	eax,[eax-16]
+	movups	[16+eax],xmm0
+	movups	[edx-16],xmm1
+	cmp	eax,edx
+	ja	NEAR L$118dec_key_inverse
+	movups	xmm0,[edx]
+db	102,15,56,219,192
+	movups	[edx],xmm0
+	pxor	xmm0,xmm0
+	pxor	xmm1,xmm1
+	xor	eax,eax
+L$117dec_key_ret:
+	ret
+align	64
+L$key_const:
+dd	202313229,202313229,202313229,202313229
+dd	67569157,67569157,67569157,67569157
+dd	1,1,1,1
+dd	27,27,27,27
+db	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
+db	83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
+db	32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
+db	115,108,46,111,114,103,62,0
+segment	.bss
+common	_OPENSSL_ia32cap_P 16
diff --git a/deps/boringssl/win-x86/crypto/fipsmodule/bn-586.asm b/deps/boringssl/win-x86/crypto/fipsmodule/bn-586.asm
new file mode 100644
index 0000000..4d1b793
--- /dev/null
+++ b/deps/boringssl/win-x86/crypto/fipsmodule/bn-586.asm
@@ -0,0 +1,977 @@
+; This file is generated from a similarly-named Perl script in the BoringSSL
+; source tree. Do not edit by hand.
+
+%ifdef BORINGSSL_PREFIX
+%include "boringssl_prefix_symbols_nasm.inc"
+%endif
+%ifidn __OUTPUT_FORMAT__,obj
+section	code	use32 class=code align=64
+%elifidn __OUTPUT_FORMAT__,win32
+$@feat.00 equ 1
+section	.text	code align=64
+%else
+section	.text	code
+%endif
+;extern	_OPENSSL_ia32cap_P
+global	_bn_mul_add_words
+align	16
+_bn_mul_add_words:
+L$_bn_mul_add_words_begin:
+	lea	eax,[_OPENSSL_ia32cap_P]
+	bt	DWORD [eax],26
+	jnc	NEAR L$000maw_non_sse2
+	mov	eax,DWORD [4+esp]
+	mov	edx,DWORD [8+esp]
+	mov	ecx,DWORD [12+esp]
+	movd	mm0,DWORD [16+esp]
+	pxor	mm1,mm1
+	jmp	NEAR L$001maw_sse2_entry
+align	16
+L$002maw_sse2_unrolled:
+	movd	mm3,DWORD [eax]
+	paddq	mm1,mm3
+	movd	mm2,DWORD [edx]
+	pmuludq	mm2,mm0
+	movd	mm4,DWORD [4+edx]
+	pmuludq	mm4,mm0
+	movd	mm6,DWORD [8+edx]
+	pmuludq	mm6,mm0
+	movd	mm7,DWORD [12+edx]
+	pmuludq	mm7,mm0
+	paddq	mm1,mm2
+	movd	mm3,DWORD [4+eax]
+	paddq	mm3,mm4
+	movd	mm5,DWORD [8+eax]
+	paddq	mm5,mm6
+	movd	mm4,DWORD [12+eax]
+	paddq	mm7,mm4
+	movd	DWORD [eax],mm1
+	movd	mm2,DWORD [16+edx]
+	pmuludq	mm2,mm0
+	psrlq	mm1,32
+	movd	mm4,DWORD [20+edx]
+	pmuludq	mm4,mm0
+	paddq	mm1,mm3
+	movd	mm6,DWORD [24+edx]
+	pmuludq	mm6,mm0
+	movd	DWORD [4+eax],mm1
+	psrlq	mm1,32
+	movd	mm3,DWORD [28+edx]
+	add	edx,32
+	pmuludq	mm3,mm0
+	paddq	mm1,mm5
+	movd	mm5,DWORD [16+eax]
+	paddq	mm2,mm5
+	movd	DWORD [8+eax],mm1
+	psrlq	mm1,32
+	paddq	mm1,mm7
+	movd	mm5,DWORD [20+eax]
+	paddq	mm4,mm5
+	movd	DWORD [12+eax],mm1
+	psrlq	mm1,32
+	paddq	mm1,mm2
+	movd	mm5,DWORD [24+eax]
+	paddq	mm6,mm5
+	movd	DWORD [16+eax],mm1
+	psrlq	mm1,32
+	paddq	mm1,mm4
+	movd	mm5,DWORD [28+eax]
+	paddq	mm3,mm5
+	movd	DWORD [20+eax],mm1
+	psrlq	mm1,32
+	paddq	mm1,mm6
+	movd	DWORD [24+eax],mm1
+	psrlq	mm1,32
+	paddq	mm1,mm3
+	movd	DWORD [28+eax],mm1
+	lea	eax,[32+eax]
+	psrlq	mm1,32
+	sub	ecx,8
+	jz	NEAR L$003maw_sse2_exit
+L$001maw_sse2_entry:
+	test	ecx,4294967288
+	jnz	NEAR L$002maw_sse2_unrolled
+align	4
+L$004maw_sse2_loop:
+	movd	mm2,DWORD [edx]
+	movd	mm3,DWORD [eax]
+	pmuludq	mm2,mm0
+	lea	edx,[4+edx]
+	paddq	mm1,mm3
+	paddq	mm1,mm2
+	movd	DWORD [eax],mm1
+	sub	ecx,1
+	psrlq	mm1,32
+	lea	eax,[4+eax]
+	jnz	NEAR L$004maw_sse2_loop
+L$003maw_sse2_exit:
+	movd	eax,mm1
+	emms
+	ret
+align	16
+L$000maw_non_sse2:
+	push	ebp
+	push	ebx
+	push	esi
+	push	edi
+	; 
+	xor	esi,esi
+	mov	edi,DWORD [20+esp]
+	mov	ecx,DWORD [28+esp]
+	mov	ebx,DWORD [24+esp]
+	and	ecx,4294967288
+	mov	ebp,DWORD [32+esp]
+	push	ecx
+	jz	NEAR L$005maw_finish
+align	16
+L$006maw_loop:
+	; Round 0
+	mov	eax,DWORD [ebx]
+	mul	ebp
+	add	eax,esi
+	adc	edx,0
+	add	eax,DWORD [edi]
+	adc	edx,0
+	mov	DWORD [edi],eax
+	mov	esi,edx
+	; Round 4
+	mov	eax,DWORD [4+ebx]
+	mul	ebp
+	add	eax,esi
+	adc	edx,0
+	add	eax,DWORD [4+edi]
+	adc	edx,0
+	mov	DWORD [4+edi],eax
+	mov	esi,edx
+	; Round 8
+	mov	eax,DWORD [8+ebx]
+	mul	ebp
+	add	eax,esi
+	adc	edx,0
+	add	eax,DWORD [8+edi]
+	adc	edx,0
+	mov	DWORD [8+edi],eax
+	mov	esi,edx
+	; Round 12
+	mov	eax,DWORD [12+ebx]
+	mul	ebp
+	add	eax,esi
+	adc	edx,0
+	add	eax,DWORD [12+edi]
+	adc	edx,0
+	mov	DWORD [12+edi],eax
+	mov	esi,edx
+	; Round 16
+	mov	eax,DWORD [16+ebx]
+	mul	ebp
+	add	eax,esi
+	adc	edx,0
+	add	eax,DWORD [16+edi]
+	adc	edx,0
+	mov	DWORD [16+edi],eax
+	mov	esi,edx
+	; Round 20
+	mov	eax,DWORD [20+ebx]
+	mul	ebp
+	add	eax,esi
+	adc	edx,0
+	add	eax,DWORD [20+edi]
+	adc	edx,0
+	mov	DWORD [20+edi],eax
+	mov	esi,edx
+	; Round 24
+	mov	eax,DWORD [24+ebx]
+	mul	ebp
+	add	eax,esi
+	adc	edx,0
+	add	eax,DWORD [24+edi]
+	adc	edx,0
+	mov	DWORD [24+edi],eax
+	mov	esi,edx
+	; Round 28
+	mov	eax,DWORD [28+ebx]
+	mul	ebp
+	add	eax,esi
+	adc	edx,0
+	add	eax,DWORD [28+edi]
+	adc	edx,0
+	mov	DWORD [28+edi],eax
+	mov	esi,edx
+	; 
+	sub	ecx,8
+	lea	ebx,[32+ebx]
+	lea	edi,[32+edi]
+	jnz	NEAR L$006maw_loop
+L$005maw_finish:
+	mov	ecx,DWORD [32+esp]
+	and	ecx,7
+	jnz	NEAR L$007maw_finish2
+	jmp	NEAR L$008maw_end
+L$007maw_finish2:
+	; Tail Round 0
+	mov	eax,DWORD [ebx]
+	mul	ebp
+	add	eax,esi
+	adc	edx,0
+	add	eax,DWORD [edi]
+	adc	edx,0
+	dec	ecx
+	mov	DWORD [edi],eax
+	mov	esi,edx
+	jz	NEAR L$008maw_end
+	; Tail Round 1
+	mov	eax,DWORD [4+ebx]
+	mul	ebp
+	add	eax,esi
+	adc	edx,0
+	add	eax,DWORD [4+edi]
+	adc	edx,0
+	dec	ecx
+	mov	DWORD [4+edi],eax
+	mov	esi,edx
+	jz	NEAR L$008maw_end
+	; Tail Round 2
+	mov	eax,DWORD [8+ebx]
+	mul	ebp
+	add	eax,esi
+	adc	edx,0
+	add	eax,DWORD [8+edi]
+	adc	edx,0
+	dec	ecx
+	mov	DWORD [8+edi],eax
+	mov	esi,edx
+	jz	NEAR L$008maw_end
+	; Tail Round 3
+	mov	eax,DWORD [12+ebx]
+	mul	ebp
+	add	eax,esi
+	adc	edx,0
+	add	eax,DWORD [12+edi]
+	adc	edx,0
+	dec	ecx
+	mov	DWORD [12+edi],eax
+	mov	esi,edx
+	jz	NEAR L$008maw_end
+	; Tail Round 4
+	mov	eax,DWORD [16+ebx]
+	mul	ebp
+	add	eax,esi
+	adc	edx,0
+	add	eax,DWORD [16+edi]
+	adc	edx,0
+	dec	ecx
+	mov	DWORD [16+edi],eax
+	mov	esi,edx
+	jz	NEAR L$008maw_end
+	; Tail Round 5
+	mov	eax,DWORD [20+ebx]
+	mul	ebp
+	add	eax,esi
+	adc	edx,0
+	add	eax,DWORD [20+edi]
+	adc	edx,0
+	dec	ecx
+	mov	DWORD [20+edi],eax
+	mov	esi,edx
+	jz	NEAR L$008maw_end
+	; Tail Round 6
+	mov	eax,DWORD [24+ebx]
+	mul	ebp
+	add	eax,esi
+	adc	edx,0
+	add	eax,DWORD [24+edi]
+	adc	edx,0
+	mov	DWORD [24+edi],eax
+	mov	esi,edx
+L$008maw_end:
+	mov	eax,esi
+	pop	ecx
+	pop	edi
+	pop	esi
+	pop	ebx
+	pop	ebp
+	ret
+global	_bn_mul_words
+align	16
+_bn_mul_words:
+L$_bn_mul_words_begin:
+	lea	eax,[_OPENSSL_ia32cap_P]
+	bt	DWORD [eax],26
+	jnc	NEAR L$009mw_non_sse2
+	mov	eax,DWORD [4+esp]
+	mov	edx,DWORD [8+esp]
+	mov	ecx,DWORD [12+esp]
+	movd	mm0,DWORD [16+esp]
+	pxor	mm1,mm1
+align	16
+L$010mw_sse2_loop:
+	movd	mm2,DWORD [edx]
+	pmuludq	mm2,mm0
+	lea	edx,[4+edx]
+	paddq	mm1,mm2
+	movd	DWORD [eax],mm1
+	sub	ecx,1
+	psrlq	mm1,32
+	lea	eax,[4+eax]
+	jnz	NEAR L$010mw_sse2_loop
+	movd	eax,mm1
+	emms
+	ret
+align	16
+L$009mw_non_sse2:
+	push	ebp
+	push	ebx
+	push	esi
+	push	edi
+	; 
+	xor	esi,esi
+	mov	edi,DWORD [20+esp]
+	mov	ebx,DWORD [24+esp]
+	mov	ebp,DWORD [28+esp]
+	mov	ecx,DWORD [32+esp]
+	and	ebp,4294967288
+	jz	NEAR L$011mw_finish
+L$012mw_loop:
+	; Round 0
+	mov	eax,DWORD [ebx]
+	mul	ecx
+	add	eax,esi
+	adc	edx,0
+	mov	DWORD [edi],eax
+	mov	esi,edx
+	; Round 4
+	mov	eax,DWORD [4+ebx]
+	mul	ecx
+	add	eax,esi
+	adc	edx,0
+	mov	DWORD [4+edi],eax
+	mov	esi,edx
+	; Round 8
+	mov	eax,DWORD [8+ebx]
+	mul	ecx
+	add	eax,esi
+	adc	edx,0
+	mov	DWORD [8+edi],eax
+	mov	esi,edx
+	; Round 12
+	mov	eax,DWORD [12+ebx]
+	mul	ecx
+	add	eax,esi
+	adc	edx,0
+	mov	DWORD [12+edi],eax
+	mov	esi,edx
+	; Round 16
+	mov	eax,DWORD [16+ebx]
+	mul	ecx
+	add	eax,esi
+	adc	edx,0
+	mov	DWORD [16+edi],eax
+	mov	esi,edx
+	; Round 20
+	mov	eax,DWORD [20+ebx]
+	mul	ecx
+	add	eax,esi
+	adc	edx,0
+	mov	DWORD [20+edi],eax
+	mov	esi,edx
+	; Round 24
+	mov	eax,DWORD [24+ebx]
+	mul	ecx
+	add	eax,esi
+	adc	edx,0
+	mov	DWORD [24+edi],eax
+	mov	esi,edx
+	; Round 28
+	mov	eax,DWORD [28+ebx]
+	mul	ecx
+	add	eax,esi
+	adc	edx,0
+	mov	DWORD [28+edi],eax
+	mov	esi,edx
+	; 
+	add	ebx,32
+	add	edi,32
+	sub	ebp,8
+	jz	NEAR L$011mw_finish
+	jmp	NEAR L$012mw_loop
+L$011mw_finish:
+	mov	ebp,DWORD [28+esp]
+	and	ebp,7
+	jnz	NEAR L$013mw_finish2
+	jmp	NEAR L$014mw_end
+L$013mw_finish2:
+	; Tail Round 0
+	mov	eax,DWORD [ebx]
+	mul	ecx
+	add	eax,esi
+	adc	edx,0
+	mov	DWORD [edi],eax
+	mov	esi,edx
+	dec	ebp
+	jz	NEAR L$014mw_end
+	; Tail Round 1
+	mov	eax,DWORD [4+ebx]
+	mul	ecx
+	add	eax,esi
+	adc	edx,0
+	mov	DWORD [4+edi],eax
+	mov	esi,edx
+	dec	ebp
+	jz	NEAR L$014mw_end
+	; Tail Round 2
+	mov	eax,DWORD [8+ebx]
+	mul	ecx
+	add	eax,esi
+	adc	edx,0
+	mov	DWORD [8+edi],eax
+	mov	esi,edx
+	dec	ebp
+	jz	NEAR L$014mw_end
+	; Tail Round 3
+	mov	eax,DWORD [12+ebx]
+	mul	ecx
+	add	eax,esi
+	adc	edx,0
+	mov	DWORD [12+edi],eax
+	mov	esi,edx
+	dec	ebp
+	jz	NEAR L$014mw_end
+	; Tail Round 4
+	mov	eax,DWORD [16+ebx]
+	mul	ecx
+	add	eax,esi
+	adc	edx,0
+	mov	DWORD [16+edi],eax
+	mov	esi,edx
+	dec	ebp
+	jz	NEAR L$014mw_end
+	; Tail Round 5
+	mov	eax,DWORD [20+ebx]
+	mul	ecx
+	add	eax,esi
+	adc	edx,0
+	mov	DWORD [20+edi],eax
+	mov	esi,edx
+	dec	ebp
+	jz	NEAR L$014mw_end
+	; Tail Round 6
+	mov	eax,DWORD [24+ebx]
+	mul	ecx
+	add	eax,esi
+	adc	edx,0
+	mov	DWORD [24+edi],eax
+	mov	esi,edx
+L$014mw_end:
+	mov	eax,esi
+	pop	edi
+	pop	esi
+	pop	ebx
+	pop	ebp
+	ret
+global	_bn_sqr_words
+align	16
+_bn_sqr_words:
+L$_bn_sqr_words_begin:
+	lea	eax,[_OPENSSL_ia32cap_P]
+	bt	DWORD [eax],26
+	jnc	NEAR L$015sqr_non_sse2
+	mov	eax,DWORD [4+esp]
+	mov	edx,DWORD [8+esp]
+	mov	ecx,DWORD [12+esp]
+align	16
+L$016sqr_sse2_loop:
+	movd	mm0,DWORD [edx]
+	pmuludq	mm0,mm0
+	lea	edx,[4+edx]
+	movq	[eax],mm0
+	sub	ecx,1
+	lea	eax,[8+eax]
+	jnz	NEAR L$016sqr_sse2_loop
+	emms
+	ret
+align	16
+L$015sqr_non_sse2:
+	push	ebp
+	push	ebx
+	push	esi
+	push	edi
+	; 
+	mov	esi,DWORD [20+esp]
+	mov	edi,DWORD [24+esp]
+	mov	ebx,DWORD [28+esp]
+	and	ebx,4294967288
+	jz	NEAR L$017sw_finish
+L$018sw_loop:
+	; Round 0
+	mov	eax,DWORD [edi]
+	mul	eax
+	mov	DWORD [esi],eax
+	mov	DWORD [4+esi],edx
+	; Round 4
+	mov	eax,DWORD [4+edi]
+	mul	eax
+	mov	DWORD [8+esi],eax
+	mov	DWORD [12+esi],edx
+	; Round 8
+	mov	eax,DWORD [8+edi]
+	mul	eax
+	mov	DWORD [16+esi],eax
+	mov	DWORD [20+esi],edx
+	; Round 12
+	mov	eax,DWORD [12+edi]
+	mul	eax
+	mov	DWORD [24+esi],eax
+	mov	DWORD [28+esi],edx
+	; Round 16
+	mov	eax,DWORD [16+edi]
+	mul	eax
+	mov	DWORD [32+esi],eax
+	mov	DWORD [36+esi],edx
+	; Round 20
+	mov	eax,DWORD [20+edi]
+	mul	eax
+	mov	DWORD [40+esi],eax
+	mov	DWORD [44+esi],edx
+	; Round 24
+	mov	eax,DWORD [24+edi]
+	mul	eax
+	mov	DWORD [48+esi],eax
+	mov	DWORD [52+esi],edx
+	; Round 28
+	mov	eax,DWORD [28+edi]
+	mul	eax
+	mov	DWORD [56+esi],eax
+	mov	DWORD [60+esi],edx
+	; 
+	add	edi,32
+	add	esi,64
+	sub	ebx,8
+	jnz	NEAR L$018sw_loop
+L$017sw_finish:
+	mov	ebx,DWORD [28+esp]
+	and	ebx,7
+	jz	NEAR L$019sw_end
+	; Tail Round 0
+	mov	eax,DWORD [edi]
+	mul	eax
+	mov	DWORD [esi],eax
+	dec	ebx
+	mov	DWORD [4+esi],edx
+	jz	NEAR L$019sw_end
+	; Tail Round 1
+	mov	eax,DWORD [4+edi]
+	mul	eax
+	mov	DWORD [8+esi],eax
+	dec	ebx
+	mov	DWORD [12+esi],edx
+	jz	NEAR L$019sw_end
+	; Tail Round 2
+	mov	eax,DWORD [8+edi]
+	mul	eax
+	mov	DWORD [16+esi],eax
+	dec	ebx
+	mov	DWORD [20+esi],edx
+	jz	NEAR L$019sw_end
+	; Tail Round 3
+	mov	eax,DWORD [12+edi]
+	mul	eax
+	mov	DWORD [24+esi],eax
+	dec	ebx
+	mov	DWORD [28+esi],edx
+	jz	NEAR L$019sw_end
+	; Tail Round 4
+	mov	eax,DWORD [16+edi]
+	mul	eax
+	mov	DWORD [32+esi],eax
+	dec	ebx
+	mov	DWORD [36+esi],edx
+	jz	NEAR L$019sw_end
+	; Tail Round 5
+	mov	eax,DWORD [20+edi]
+	mul	eax
+	mov	DWORD [40+esi],eax
+	dec	ebx
+	mov	DWORD [44+esi],edx
+	jz	NEAR L$019sw_end
+	; Tail Round 6
+	mov	eax,DWORD [24+edi]
+	mul	eax
+	mov	DWORD [48+esi],eax
+	mov	DWORD [52+esi],edx
+L$019sw_end:
+	pop	edi
+	pop	esi
+	pop	ebx
+	pop	ebp
+	ret
+global	_bn_div_words
+align	16
+_bn_div_words:
+L$_bn_div_words_begin:
+	mov	edx,DWORD [4+esp]
+	mov	eax,DWORD [8+esp]
+	mov	ecx,DWORD [12+esp]
+	div	ecx
+	ret
+global	_bn_add_words
+align	16
+_bn_add_words:
+L$_bn_add_words_begin:
+	push	ebp
+	push	ebx
+	push	esi
+	push	edi
+	; 
+	mov	ebx,DWORD [20+esp]
+	mov	esi,DWORD [24+esp]
+	mov	edi,DWORD [28+esp]
+	mov	ebp,DWORD [32+esp]
+	xor	eax,eax
+	and	ebp,4294967288
+	jz	NEAR L$020aw_finish
+L$021aw_loop:
+	; Round 0
+	mov	ecx,DWORD [esi]
+	mov	edx,DWORD [edi]
+	add	ecx,eax
+	mov	eax,0
+	adc	eax,eax
+	add	ecx,edx
+	adc	eax,0
+	mov	DWORD [ebx],ecx
+	; Round 1
+	mov	ecx,DWORD [4+esi]
+	mov	edx,DWORD [4+edi]
+	add	ecx,eax
+	mov	eax,0
+	adc	eax,eax
+	add	ecx,edx
+	adc	eax,0
+	mov	DWORD [4+ebx],ecx
+	; Round 2
+	mov	ecx,DWORD [8+esi]
+	mov	edx,DWORD [8+edi]
+	add	ecx,eax
+	mov	eax,0
+	adc	eax,eax
+	add	ecx,edx
+	adc	eax,0
+	mov	DWORD [8+ebx],ecx
+	; Round 3
+	mov	ecx,DWORD [12+esi]
+	mov	edx,DWORD [12+edi]
+	add	ecx,eax
+	mov	eax,0
+	adc	eax,eax
+	add	ecx,edx
+	adc	eax,0
+	mov	DWORD [12+ebx],ecx
+	; Round 4
+	mov	ecx,DWORD [16+esi]
+	mov	edx,DWORD [16+edi]
+	add	ecx,eax
+	mov	eax,0
+	adc	eax,eax
+	add	ecx,edx
+	adc	eax,0
+	mov	DWORD [16+ebx],ecx
+	; Round 5
+	mov	ecx,DWORD [20+esi]
+	mov	edx,DWORD [20+edi]
+	add	ecx,eax
+	mov	eax,0
+	adc	eax,eax
+	add	ecx,edx
+	adc	eax,0
+	mov	DWORD [20+ebx],ecx
+	; Round 6
+	mov	ecx,DWORD [24+esi]
+	mov	edx,DWORD [24+edi]
+	add	ecx,eax
+	mov	eax,0
+	adc	eax,eax
+	add	ecx,edx
+	adc	eax,0
+	mov	DWORD [24+ebx],ecx
+	; Round 7
+	mov	ecx,DWORD [28+esi]
+	mov	edx,DWORD [28+edi]
+	add	ecx,eax
+	mov	eax,0
+	adc	eax,eax
+	add	ecx,edx
+	adc	eax,0
+	mov	DWORD [28+ebx],ecx
+	; 
+	add	esi,32
+	add	edi,32
+	add	ebx,32
+	sub	ebp,8
+	jnz	NEAR L$021aw_loop
+L$020aw_finish:
+	mov	ebp,DWORD [32+esp]
+	and	ebp,7
+	jz	NEAR L$022aw_end
+	; Tail Round 0
+	mov	ecx,DWORD [esi]
+	mov	edx,DWORD [edi]
+	add	ecx,eax
+	mov	eax,0
+	adc	eax,eax
+	add	ecx,edx
+	adc	eax,0
+	dec	ebp
+	mov	DWORD [ebx],ecx
+	jz	NEAR L$022aw_end
+	; Tail Round 1
+	mov	ecx,DWORD [4+esi]
+	mov	edx,DWORD [4+edi]
+	add	ecx,eax
+	mov	eax,0
+	adc	eax,eax
+	add	ecx,edx
+	adc	eax,0
+	dec	ebp
+	mov	DWORD [4+ebx],ecx
+	jz	NEAR L$022aw_end
+	; Tail Round 2
+	mov	ecx,DWORD [8+esi]
+	mov	edx,DWORD [8+edi]
+	add	ecx,eax
+	mov	eax,0
+	adc	eax,eax
+	add	ecx,edx
+	adc	eax,0
+	dec	ebp
+	mov	DWORD [8+ebx],ecx
+	jz	NEAR L$022aw_end
+	; Tail Round 3
+	mov	ecx,DWORD [12+esi]
+	mov	edx,DWORD [12+edi]
+	add	ecx,eax
+	mov	eax,0
+	adc	eax,eax
+	add	ecx,edx
+	adc	eax,0
+	dec	ebp
+	mov	DWORD [12+ebx],ecx
+	jz	NEAR L$022aw_end
+	; Tail Round 4
+	mov	ecx,DWORD [16+esi]
+	mov	edx,DWORD [16+edi]
+	add	ecx,eax
+	mov	eax,0
+	adc	eax,eax
+	add	ecx,edx
+	adc	eax,0
+	dec	ebp
+	mov	DWORD [16+ebx],ecx
+	jz	NEAR L$022aw_end
+	; Tail Round 5
+	mov	ecx,DWORD [20+esi]
+	mov	edx,DWORD [20+edi]
+	add	ecx,eax
+	mov	eax,0
+	adc	eax,eax
+	add	ecx,edx
+	adc	eax,0
+	dec	ebp
+	mov	DWORD [20+ebx],ecx
+	jz	NEAR L$022aw_end
+	; Tail Round 6
+	mov	ecx,DWORD [24+esi]
+	mov	edx,DWORD [24+edi]
+	add	ecx,eax
+	mov	eax,0
+	adc	eax,eax
+	add	ecx,edx
+	adc	eax,0
+	mov	DWORD [24+ebx],ecx
+L$022aw_end:
+	pop	edi
+	pop	esi
+	pop	ebx
+	pop	ebp
+	ret
+global	_bn_sub_words
+align	16
+_bn_sub_words:
+L$_bn_sub_words_begin:
+	push	ebp
+	push	ebx
+	push	esi
+	push	edi
+	; 
+	mov	ebx,DWORD [20+esp]
+	mov	esi,DWORD [24+esp]
+	mov	edi,DWORD [28+esp]
+	mov	ebp,DWORD [32+esp]
+	xor	eax,eax
+	and	ebp,4294967288
+	jz	NEAR L$023aw_finish
+L$024aw_loop:
+	; Round 0
+	mov	ecx,DWORD [esi]
+	mov	edx,DWORD [edi]
+	sub	ecx,eax
+	mov	eax,0
+	adc	eax,eax
+	sub	ecx,edx
+	adc	eax,0
+	mov	DWORD [ebx],ecx
+	; Round 1
+	mov	ecx,DWORD [4+esi]
+	mov	edx,DWORD [4+edi]
+	sub	ecx,eax
+	mov	eax,0
+	adc	eax,eax
+	sub	ecx,edx
+	adc	eax,0
+	mov	DWORD [4+ebx],ecx
+	; Round 2
+	mov	ecx,DWORD [8+esi]
+	mov	edx,DWORD [8+edi]
+	sub	ecx,eax
+	mov	eax,0
+	adc	eax,eax
+	sub	ecx,edx
+	adc	eax,0
+	mov	DWORD [8+ebx],ecx
+	; Round 3
+	mov	ecx,DWORD [12+esi]
+	mov	edx,DWORD [12+edi]
+	sub	ecx,eax
+	mov	eax,0
+	adc	eax,eax
+	sub	ecx,edx
+	adc	eax,0
+	mov	DWORD [12+ebx],ecx
+	; Round 4
+	mov	ecx,DWORD [16+esi]
+	mov	edx,DWORD [16+edi]
+	sub	ecx,eax
+	mov	eax,0
+	adc	eax,eax
+	sub	ecx,edx
+	adc	eax,0
+	mov	DWORD [16+ebx],ecx
+	; Round 5
+	mov	ecx,DWORD [20+esi]
+	mov	edx,DWORD [20+edi]
+	sub	ecx,eax
+	mov	eax,0
+	adc	eax,eax
+	sub	ecx,edx
+	adc	eax,0
+	mov	DWORD [20+ebx],ecx
+	; Round 6
+	mov	ecx,DWORD [24+esi]
+	mov	edx,DWORD [24+edi]
+	sub	ecx,eax
+	mov	eax,0
+	adc	eax,eax
+	sub	ecx,edx
+	adc	eax,0
+	mov	DWORD [24+ebx],ecx
+	; Round 7
+	mov	ecx,DWORD [28+esi]
+	mov	edx,DWORD [28+edi]
+	sub	ecx,eax
+	mov	eax,0
+	adc	eax,eax
+	sub	ecx,edx
+	adc	eax,0
+	mov	DWORD [28+ebx],ecx
+	; 
+	add	esi,32
+	add	edi,32
+	add	ebx,32
+	sub	ebp,8
+	jnz	NEAR L$024aw_loop
+L$023aw_finish:
+	mov	ebp,DWORD [32+esp]
+	and	ebp,7
+	jz	NEAR L$025aw_end
+	; Tail Round 0
+	mov	ecx,DWORD [esi]
+	mov	edx,DWORD [edi]
+	sub	ecx,eax
+	mov	eax,0
+	adc	eax,eax
+	sub	ecx,edx
+	adc	eax,0
+	dec	ebp
+	mov	DWORD [ebx],ecx
+	jz	NEAR L$025aw_end
+	; Tail Round 1
+	mov	ecx,DWORD [4+esi]
+	mov	edx,DWORD [4+edi]
+	sub	ecx,eax
+	mov	eax,0
+	adc	eax,eax
+	sub	ecx,edx
+	adc	eax,0
+	dec	ebp
+	mov	DWORD [4+ebx],ecx
+	jz	NEAR L$025aw_end
+	; Tail Round 2
+	mov	ecx,DWORD [8+esi]
+	mov	edx,DWORD [8+edi]
+	sub	ecx,eax
+	mov	eax,0
+	adc	eax,eax
+	sub	ecx,edx
+	adc	eax,0
+	dec	ebp
+	mov	DWORD [8+ebx],ecx
+	jz	NEAR L$025aw_end
+	; Tail Round 3
+	mov	ecx,DWORD [12+esi]
+	mov	edx,DWORD [12+edi]
+	sub	ecx,eax
+	mov	eax,0
+	adc	eax,eax
+	sub	ecx,edx
+	adc	eax,0
+	dec	ebp
+	mov	DWORD [12+ebx],ecx
+	jz	NEAR L$025aw_end
+	; Tail Round 4
+	mov	ecx,DWORD [16+esi]
+	mov	edx,DWORD [16+edi]
+	sub	ecx,eax
+	mov	eax,0
+	adc	eax,eax
+	sub	ecx,edx
+	adc	eax,0
+	dec	ebp
+	mov	DWORD [16+ebx],ecx
+	jz	NEAR L$025aw_end
+	; Tail Round 5
+	mov	ecx,DWORD [20+esi]
+	mov	edx,DWORD [20+edi]
+	sub	ecx,eax
+	mov	eax,0
+	adc	eax,eax
+	sub	ecx,edx
+	adc	eax,0
+	dec	ebp
+	mov	DWORD [20+ebx],ecx
+	jz	NEAR L$025aw_end
+	; Tail Round 6
+	mov	ecx,DWORD [24+esi]
+	mov	edx,DWORD [24+edi]
+	sub	ecx,eax
+	mov	eax,0
+	adc	eax,eax
+	sub	ecx,edx
+	adc	eax,0
+	mov	DWORD [24+ebx],ecx
+L$025aw_end:
+	pop	edi
+	pop	esi
+	pop	ebx
+	pop	ebp
+	ret
+segment	.bss
+common	_OPENSSL_ia32cap_P 16
diff --git a/deps/boringssl/win-x86/crypto/fipsmodule/co-586.asm b/deps/boringssl/win-x86/crypto/fipsmodule/co-586.asm
new file mode 100644
index 0000000..7c2afe8
--- /dev/null
+++ b/deps/boringssl/win-x86/crypto/fipsmodule/co-586.asm
@@ -0,0 +1,1258 @@
+; This file is generated from a similarly-named Perl script in the BoringSSL
+; source tree. Do not edit by hand.
+
+%ifdef BORINGSSL_PREFIX
+%include "boringssl_prefix_symbols_nasm.inc"
+%endif
+%ifidn __OUTPUT_FORMAT__,obj
+section	code	use32 class=code align=64
+%elifidn __OUTPUT_FORMAT__,win32
+$@feat.00 equ 1
+section	.text	code align=64
+%else
+section	.text	code
+%endif
+global	_bn_mul_comba8
+align	16
+_bn_mul_comba8:
+L$_bn_mul_comba8_begin:
+	push	esi
+	mov	esi,DWORD [12+esp]
+	push	edi
+	mov	edi,DWORD [20+esp]
+	push	ebp
+	push	ebx
+	xor	ebx,ebx
+	mov	eax,DWORD [esi]
+	xor	ecx,ecx
+	mov	edx,DWORD [edi]
+	; ################## Calculate word 0
+	xor	ebp,ebp
+	; mul a[0]*b[0]
+	mul	edx
+	add	ebx,eax
+	mov	eax,DWORD [20+esp]
+	adc	ecx,edx
+	mov	edx,DWORD [edi]
+	adc	ebp,0
+	mov	DWORD [eax],ebx
+	mov	eax,DWORD [4+esi]
+	; saved r[0]
+	; ################## Calculate word 1
+	xor	ebx,ebx
+	; mul a[1]*b[0]
+	mul	edx
+	add	ecx,eax
+	mov	eax,DWORD [esi]
+	adc	ebp,edx
+	mov	edx,DWORD [4+edi]
+	adc	ebx,0
+	; mul a[0]*b[1]
+	mul	edx
+	add	ecx,eax
+	mov	eax,DWORD [20+esp]
+	adc	ebp,edx
+	mov	edx,DWORD [edi]
+	adc	ebx,0
+	mov	DWORD [4+eax],ecx
+	mov	eax,DWORD [8+esi]
+	; saved r[1]
+	; ################## Calculate word 2
+	xor	ecx,ecx
+	; mul a[2]*b[0]
+	mul	edx
+	add	ebp,eax
+	mov	eax,DWORD [4+esi]
+	adc	ebx,edx
+	mov	edx,DWORD [4+edi]
+	adc	ecx,0
+	; mul a[1]*b[1]
+	mul	edx
+	add	ebp,eax
+	mov	eax,DWORD [esi]
+	adc	ebx,edx
+	mov	edx,DWORD [8+edi]
+	adc	ecx,0
+	; mul a[0]*b[2]
+	mul	edx
+	add	ebp,eax
+	mov	eax,DWORD [20+esp]
+	adc	ebx,edx
+	mov	edx,DWORD [edi]
+	adc	ecx,0
+	mov	DWORD [8+eax],ebp
+	mov	eax,DWORD [12+esi]
+	; saved r[2]
+	; ################## Calculate word 3
+	xor	ebp,ebp
+	; mul a[3]*b[0]
+	mul	edx
+	add	ebx,eax
+	mov	eax,DWORD [8+esi]
+	adc	ecx,edx
+	mov	edx,DWORD [4+edi]
+	adc	ebp,0
+	; mul a[2]*b[1]
+	mul	edx
+	add	ebx,eax
+	mov	eax,DWORD [4+esi]
+	adc	ecx,edx
+	mov	edx,DWORD [8+edi]
+	adc	ebp,0
+	; mul a[1]*b[2]
+	mul	edx
+	add	ebx,eax
+	mov	eax,DWORD [esi]
+	adc	ecx,edx
+	mov	edx,DWORD [12+edi]
+	adc	ebp,0
+	; mul a[0]*b[3]
+	mul	edx
+	add	ebx,eax
+	mov	eax,DWORD [20+esp]
+	adc	ecx,edx
+	mov	edx,DWORD [edi]
+	adc	ebp,0
+	mov	DWORD [12+eax],ebx
+	mov	eax,DWORD [16+esi]
+	; saved r[3]
+	; ################## Calculate word 4
+	xor	ebx,ebx
+	; mul a[4]*b[0]
+	mul	edx
+	add	ecx,eax
+	mov	eax,DWORD [12+esi]
+	adc	ebp,edx
+	mov	edx,DWORD [4+edi]
+	adc	ebx,0
+	; mul a[3]*b[1]
+	mul	edx
+	add	ecx,eax
+	mov	eax,DWORD [8+esi]
+	adc	ebp,edx
+	mov	edx,DWORD [8+edi]
+	adc	ebx,0
+	; mul a[2]*b[2]
+	mul	edx
+	add	ecx,eax
+	mov	eax,DWORD [4+esi]
+	adc	ebp,edx
+	mov	edx,DWORD [12+edi]
+	adc	ebx,0
+	; mul a[1]*b[3]
+	mul	edx
+	add	ecx,eax
+	mov	eax,DWORD [esi]
+	adc	ebp,edx
+	mov	edx,DWORD [16+edi]
+	adc	ebx,0
+	; mul a[0]*b[4]
+	mul	edx
+	add	ecx,eax
+	mov	eax,DWORD [20+esp]
+	adc	ebp,edx
+	mov	edx,DWORD [edi]
+	adc	ebx,0
+	mov	DWORD [16+eax],ecx
+	mov	eax,DWORD [20+esi]
+	; saved r[4]
+	; ################## Calculate word 5
+	xor	ecx,ecx
+	; mul a[5]*b[0]
+	mul	edx
+	add	ebp,eax
+	mov	eax,DWORD [16+esi]
+	adc	ebx,edx
+	mov	edx,DWORD [4+edi]
+	adc	ecx,0
+	; mul a[4]*b[1]
+	mul	edx
+	add	ebp,eax
+	mov	eax,DWORD [12+esi]
+	adc	ebx,edx
+	mov	edx,DWORD [8+edi]
+	adc	ecx,0
+	; mul a[3]*b[2]
+	mul	edx
+	add	ebp,eax
+	mov	eax,DWORD [8+esi]
+	adc	ebx,edx
+	mov	edx,DWORD [12+edi]
+	adc	ecx,0
+	; mul a[2]*b[3]
+	mul	edx
+	add	ebp,eax
+	mov	eax,DWORD [4+esi]
+	adc	ebx,edx
+	mov	edx,DWORD [16+edi]
+	adc	ecx,0
+	; mul a[1]*b[4]
+	mul	edx
+	add	ebp,eax
+	mov	eax,DWORD [esi]
+	adc	ebx,edx
+	mov	edx,DWORD [20+edi]
+	adc	ecx,0
+	; mul a[0]*b[5]
+	mul	edx
+	add	ebp,eax
+	mov	eax,DWORD [20+esp]
+	adc	ebx,edx
+	mov	edx,DWORD [edi]
+	adc	ecx,0
+	mov	DWORD [20+eax],ebp
+	mov	eax,DWORD [24+esi]
+	; saved r[5]
+	; ################## Calculate word 6
+	xor	ebp,ebp
+	; mul a[6]*b[0]
+	mul	edx
+	add	ebx,eax
+	mov	eax,DWORD [20+esi]
+	adc	ecx,edx
+	mov	edx,DWORD [4+edi]
+	adc	ebp,0
+	; mul a[5]*b[1]
+	mul	edx
+	add	ebx,eax
+	mov	eax,DWORD [16+esi]
+	adc	ecx,edx
+	mov	edx,DWORD [8+edi]
+	adc	ebp,0
+	; mul a[4]*b[2]
+	mul	edx
+	add	ebx,eax
+	mov	eax,DWORD [12+esi]
+	adc	ecx,edx
+	mov	edx,DWORD [12+edi]
+	adc	ebp,0
+	; mul a[3]*b[3]
+	mul	edx
+	add	ebx,eax
+	mov	eax,DWORD [8+esi]
+	adc	ecx,edx
+	mov	edx,DWORD [16+edi]
+	adc	ebp,0
+	; mul a[2]*b[4]
+	mul	edx
+	add	ebx,eax
+	mov	eax,DWORD [4+esi]
+	adc	ecx,edx
+	mov	edx,DWORD [20+edi]
+	adc	ebp,0
+	; mul a[1]*b[5]
+	mul	edx
+	add	ebx,eax
+	mov	eax,DWORD [esi]
+	adc	ecx,edx
+	mov	edx,DWORD [24+edi]
+	adc	ebp,0
+	; mul a[0]*b[6]
+	mul	edx
+	add	ebx,eax
+	mov	eax,DWORD [20+esp]
+	adc	ecx,edx
+	mov	edx,DWORD [edi]
+	adc	ebp,0
+	mov	DWORD [24+eax],ebx
+	mov	eax,DWORD [28+esi]
+	; saved r[6]
+	; ################## Calculate word 7
+	xor	ebx,ebx
+	; mul a[7]*b[0]
+	mul	edx
+	add	ecx,eax
+	mov	eax,DWORD [24+esi]
+	adc	ebp,edx
+	mov	edx,DWORD [4+edi]
+	adc	ebx,0
+	; mul a[6]*b[1]
+	mul	edx
+	add	ecx,eax
+	mov	eax,DWORD [20+esi]
+	adc	ebp,edx
+	mov	edx,DWORD [8+edi]
+	adc	ebx,0
+	; mul a[5]*b[2]
+	mul	edx
+	add	ecx,eax
+	mov	eax,DWORD [16+esi]
+	adc	ebp,edx
+	mov	edx,DWORD [12+edi]
+	adc	ebx,0
+	; mul a[4]*b[3]
+	mul	edx
+	add	ecx,eax
+	mov	eax,DWORD [12+esi]
+	adc	ebp,edx
+	mov	edx,DWORD [16+edi]
+	adc	ebx,0
+	; mul a[3]*b[4]
+	mul	edx
+	add	ecx,eax
+	mov	eax,DWORD [8+esi]
+	adc	ebp,edx
+	mov	edx,DWORD [20+edi]
+	adc	ebx,0
+	; mul a[2]*b[5]
+	mul	edx
+	add	ecx,eax
+	mov	eax,DWORD [4+esi]
+	adc	ebp,edx
+	mov	edx,DWORD [24+edi]
+	adc	ebx,0
+	; mul a[1]*b[6]
+	mul	edx
+	add	ecx,eax
+	mov	eax,DWORD [esi]
+	adc	ebp,edx
+	mov	edx,DWORD [28+edi]
+	adc	ebx,0
+	; mul a[0]*b[7]
+	mul	edx
+	add	ecx,eax
+	mov	eax,DWORD [20+esp]
+	adc	ebp,edx
+	mov	edx,DWORD [4+edi]
+	adc	ebx,0
+	mov	DWORD [28+eax],ecx
+	mov	eax,DWORD [28+esi]
+	; saved r[7]
+	; ################## Calculate word 8
+	xor	ecx,ecx
+	; mul a[7]*b[1]
+	mul	edx
+	add	ebp,eax
+	mov	eax,DWORD [24+esi]
+	adc	ebx,edx
+	mov	edx,DWORD [8+edi]
+	adc	ecx,0
+	; mul a[6]*b[2]
+	mul	edx
+	add	ebp,eax
+	mov	eax,DWORD [20+esi]
+	adc	ebx,edx
+	mov	edx,DWORD [12+edi]
+	adc	ecx,0
+	; mul a[5]*b[3]
+	mul	edx
+	add	ebp,eax
+	mov	eax,DWORD [16+esi]
+	adc	ebx,edx
+	mov	edx,DWORD [16+edi]
+	adc	ecx,0
+	; mul a[4]*b[4]
+	mul	edx
+	add	ebp,eax
+	mov	eax,DWORD [12+esi]
+	adc	ebx,edx
+	mov	edx,DWORD [20+edi]
+	adc	ecx,0
+	; mul a[3]*b[5]
+	mul	edx
+	add	ebp,eax
+	mov	eax,DWORD [8+esi]
+	adc	ebx,edx
+	mov	edx,DWORD [24+edi]
+	adc	ecx,0
+	; mul a[2]*b[6]
+	mul	edx
+	add	ebp,eax
+	mov	eax,DWORD [4+esi]
+	adc	ebx,edx
+	mov	edx,DWORD [28+edi]
+	adc	ecx,0
+	; mul a[1]*b[7]
+	mul	edx
+	add	ebp,eax
+	mov	eax,DWORD [20+esp]
+	adc	ebx,edx
+	mov	edx,DWORD [8+edi]
+	adc	ecx,0
+	mov	DWORD [32+eax],ebp
+	mov	eax,DWORD [28+esi]
+	; saved r[8]
+	; ################## Calculate word 9
+	xor	ebp,ebp
+	; mul a[7]*b[2]
+	mul	edx
+	add	ebx,eax
+	mov	eax,DWORD [24+esi]
+	adc	ecx,edx
+	mov	edx,DWORD [12+edi]
+	adc	ebp,0
+	; mul a[6]*b[3]
+	mul	edx
+	add	ebx,eax
+	mov	eax,DWORD [20+esi]
+	adc	ecx,edx
+	mov	edx,DWORD [16+edi]
+	adc	ebp,0
+	; mul a[5]*b[4]
+	mul	edx
+	add	ebx,eax
+	mov	eax,DWORD [16+esi]
+	adc	ecx,edx
+	mov	edx,DWORD [20+edi]
+	adc	ebp,0
+	; mul a[4]*b[5]
+	mul	edx
+	add	ebx,eax
+	mov	eax,DWORD [12+esi]
+	adc	ecx,edx
+	mov	edx,DWORD [24+edi]
+	adc	ebp,0
+	; mul a[3]*b[6]
+	mul	edx
+	add	ebx,eax
+	mov	eax,DWORD [8+esi]
+	adc	ecx,edx
+	mov	edx,DWORD [28+edi]
+	adc	ebp,0
+	; mul a[2]*b[7]
+	mul	edx
+	add	ebx,eax
+	mov	eax,DWORD [20+esp]
+	adc	ecx,edx
+	mov	edx,DWORD [12+edi]
+	adc	ebp,0
+	mov	DWORD [36+eax],ebx
+	mov	eax,DWORD [28+esi]
+	; saved r[9]
+	; ################## Calculate word 10
+	xor	ebx,ebx
+	; mul a[7]*b[3]
+	mul	edx
+	add	ecx,eax
+	mov	eax,DWORD [24+esi]
+	adc	ebp,edx
+	mov	edx,DWORD [16+edi]
+	adc	ebx,0
+	; mul a[6]*b[4]
+	mul	edx
+	add	ecx,eax
+	mov	eax,DWORD [20+esi]
+	adc	ebp,edx
+	mov	edx,DWORD [20+edi]
+	adc	ebx,0
+	; mul a[5]*b[5]
+	mul	edx
+	add	ecx,eax
+	mov	eax,DWORD [16+esi]
+	adc	ebp,edx
+	mov	edx,DWORD [24+edi]
+	adc	ebx,0
+	; mul a[4]*b[6]
+	mul	edx
+	add	ecx,eax
+	mov	eax,DWORD [12+esi]
+	adc	ebp,edx
+	mov	edx,DWORD [28+edi]
+	adc	ebx,0
+	; mul a[3]*b[7]
+	mul	edx
+	add	ecx,eax
+	mov	eax,DWORD [20+esp]
+	adc	ebp,edx
+	mov	edx,DWORD [16+edi]
+	adc	ebx,0
+	mov	DWORD [40+eax],ecx
+	mov	eax,DWORD [28+esi]
+	; saved r[10]
+	; ################## Calculate word 11
+	xor	ecx,ecx
+	; mul a[7]*b[4]
+	mul	edx
+	add	ebp,eax
+	mov	eax,DWORD [24+esi]
+	adc	ebx,edx
+	mov	edx,DWORD [20+edi]
+	adc	ecx,0
+	; mul a[6]*b[5]
+	mul	edx
+	add	ebp,eax
+	mov	eax,DWORD [20+esi]
+	adc	ebx,edx
+	mov	edx,DWORD [24+edi]
+	adc	ecx,0
+	; mul a[5]*b[6]
+	mul	edx
+	add	ebp,eax
+	mov	eax,DWORD [16+esi]
+	adc	ebx,edx
+	mov	edx,DWORD [28+edi]
+	adc	ecx,0
+	; mul a[4]*b[7]
+	mul	edx
+	add	ebp,eax
+	mov	eax,DWORD [20+esp]
+	adc	ebx,edx
+	mov	edx,DWORD [20+edi]
+	adc	ecx,0
+	mov	DWORD [44+eax],ebp
+	mov	eax,DWORD [28+esi]
+	; saved r[11]
+	; ################## Calculate word 12
+	xor	ebp,ebp
+	; mul a[7]*b[5]
+	mul	edx
+	add	ebx,eax
+	mov	eax,DWORD [24+esi]
+	adc	ecx,edx
+	mov	edx,DWORD [24+edi]
+	adc	ebp,0
+	; mul a[6]*b[6]
+	mul	edx
+	add	ebx,eax
+	mov	eax,DWORD [20+esi]
+	adc	ecx,edx
+	mov	edx,DWORD [28+edi]
+	adc	ebp,0
+	; mul a[5]*b[7]
+	mul	edx
+	add	ebx,eax
+	mov	eax,DWORD [20+esp]
+	adc	ecx,edx
+	mov	edx,DWORD [24+edi]
+	adc	ebp,0
+	mov	DWORD [48+eax],ebx
+	mov	eax,DWORD [28+esi]
+	; saved r[12]
+	; ################## Calculate word 13
+	xor	ebx,ebx
+	; mul a[7]*b[6]
+	mul	edx
+	add	ecx,eax
+	mov	eax,DWORD [24+esi]
+	adc	ebp,edx
+	mov	edx,DWORD [28+edi]
+	adc	ebx,0
+	; mul a[6]*b[7]
+	mul	edx
+	add	ecx,eax
+	mov	eax,DWORD [20+esp]
+	adc	ebp,edx
+	mov	edx,DWORD [28+edi]
+	adc	ebx,0
+	mov	DWORD [52+eax],ecx
+	mov	eax,DWORD [28+esi]
+	; saved r[13]
+	; ################## Calculate word 14
+	xor	ecx,ecx
+	; mul a[7]*b[7]
+	mul	edx
+	add	ebp,eax
+	mov	eax,DWORD [20+esp]
+	adc	ebx,edx
+	adc	ecx,0
+	mov	DWORD [56+eax],ebp
+	; saved r[14]
+	; save r[15]
+	mov	DWORD [60+eax],ebx
+	pop	ebx
+	pop	ebp
+	pop	edi
+	pop	esi
+	ret
+global	_bn_mul_comba4
+align	16
+_bn_mul_comba4:
+L$_bn_mul_comba4_begin:
+	push	esi
+	mov	esi,DWORD [12+esp]
+	push	edi
+	mov	edi,DWORD [20+esp]
+	push	ebp
+	push	ebx
+	xor	ebx,ebx
+	mov	eax,DWORD [esi]
+	xor	ecx,ecx
+	mov	edx,DWORD [edi]
+	; ################## Calculate word 0
+	xor	ebp,ebp
+	; mul a[0]*b[0]
+	mul	edx
+	add	ebx,eax
+	mov	eax,DWORD [20+esp]
+	adc	ecx,edx
+	mov	edx,DWORD [edi]
+	adc	ebp,0
+	mov	DWORD [eax],ebx
+	mov	eax,DWORD [4+esi]
+	; saved r[0]
+	; ################## Calculate word 1
+	xor	ebx,ebx
+	; mul a[1]*b[0]
+	mul	edx
+	add	ecx,eax
+	mov	eax,DWORD [esi]
+	adc	ebp,edx
+	mov	edx,DWORD [4+edi]
+	adc	ebx,0
+	; mul a[0]*b[1]
+	mul	edx
+	add	ecx,eax
+	mov	eax,DWORD [20+esp]
+	adc	ebp,edx
+	mov	edx,DWORD [edi]
+	adc	ebx,0
+	mov	DWORD [4+eax],ecx
+	mov	eax,DWORD [8+esi]
+	; saved r[1]
+	; ################## Calculate word 2
+	xor	ecx,ecx
+	; mul a[2]*b[0]
+	mul	edx
+	add	ebp,eax
+	mov	eax,DWORD [4+esi]
+	adc	ebx,edx
+	mov	edx,DWORD [4+edi]
+	adc	ecx,0
+	; mul a[1]*b[1]
+	mul	edx
+	add	ebp,eax
+	mov	eax,DWORD [esi]
+	adc	ebx,edx
+	mov	edx,DWORD [8+edi]
+	adc	ecx,0
+	; mul a[0]*b[2]
+	mul	edx
+	add	ebp,eax
+	mov	eax,DWORD [20+esp]
+	adc	ebx,edx
+	mov	edx,DWORD [edi]
+	adc	ecx,0
+	mov	DWORD [8+eax],ebp
+	mov	eax,DWORD [12+esi]
+	; saved r[2]
+	; ################## Calculate word 3
+	xor	ebp,ebp
+	; mul a[3]*b[0]
+	mul	edx
+	add	ebx,eax
+	mov	eax,DWORD [8+esi]
+	adc	ecx,edx
+	mov	edx,DWORD [4+edi]
+	adc	ebp,0
+	; mul a[2]*b[1]
+	mul	edx
+	add	ebx,eax
+	mov	eax,DWORD [4+esi]
+	adc	ecx,edx
+	mov	edx,DWORD [8+edi]
+	adc	ebp,0
+	; mul a[1]*b[2]
+	mul	edx
+	add	ebx,eax
+	mov	eax,DWORD [esi]
+	adc	ecx,edx
+	mov	edx,DWORD [12+edi]
+	adc	ebp,0
+	; mul a[0]*b[3]
+	mul	edx
+	add	ebx,eax
+	mov	eax,DWORD [20+esp]
+	adc	ecx,edx
+	mov	edx,DWORD [4+edi]
+	adc	ebp,0
+	mov	DWORD [12+eax],ebx
+	mov	eax,DWORD [12+esi]
+	; saved r[3]
+	; ################## Calculate word 4
+	xor	ebx,ebx
+	; mul a[3]*b[1]
+	mul	edx
+	add	ecx,eax
+	mov	eax,DWORD [8+esi]
+	adc	ebp,edx
+	mov	edx,DWORD [8+edi]
+	adc	ebx,0
+	; mul a[2]*b[2]
+	mul	edx
+	add	ecx,eax
+	mov	eax,DWORD [4+esi]
+	adc	ebp,edx
+	mov	edx,DWORD [12+edi]
+	adc	ebx,0
+	; mul a[1]*b[3]
+	mul	edx
+	add	ecx,eax
+	mov	eax,DWORD [20+esp]
+	adc	ebp,edx
+	mov	edx,DWORD [8+edi]
+	adc	ebx,0
+	mov	DWORD [16+eax],ecx
+	mov	eax,DWORD [12+esi]
+	; saved r[4]
+	; ################## Calculate word 5
+	xor	ecx,ecx
+	; mul a[3]*b[2]
+	mul	edx
+	add	ebp,eax
+	mov	eax,DWORD [8+esi]
+	adc	ebx,edx
+	mov	edx,DWORD [12+edi]
+	adc	ecx,0
+	; mul a[2]*b[3]
+	mul	edx
+	add	ebp,eax
+	mov	eax,DWORD [20+esp]
+	adc	ebx,edx
+	mov	edx,DWORD [12+edi]
+	adc	ecx,0
+	mov	DWORD [20+eax],ebp
+	mov	eax,DWORD [12+esi]
+	; saved r[5]
+	; ################## Calculate word 6
+	xor	ebp,ebp
+	; mul a[3]*b[3]
+	mul	edx
+	add	ebx,eax
+	mov	eax,DWORD [20+esp]
+	adc	ecx,edx
+	adc	ebp,0
+	mov	DWORD [24+eax],ebx
+	; saved r[6]
+	; save r[7]
+	mov	DWORD [28+eax],ecx
+	pop	ebx
+	pop	ebp
+	pop	edi
+	pop	esi
+	ret
+global	_bn_sqr_comba8
+align	16
+_bn_sqr_comba8:
+L$_bn_sqr_comba8_begin:
+	push	esi
+	push	edi
+	push	ebp
+	push	ebx
+	mov	edi,DWORD [20+esp]
+	mov	esi,DWORD [24+esp]
+	xor	ebx,ebx
+	xor	ecx,ecx
+	mov	eax,DWORD [esi]
+	; ############### Calculate word 0
+	xor	ebp,ebp
+	; sqr a[0]*a[0]
+	mul	eax
+	add	ebx,eax
+	adc	ecx,edx
+	mov	edx,DWORD [esi]
+	adc	ebp,0
+	mov	DWORD [edi],ebx
+	mov	eax,DWORD [4+esi]
+	; saved r[0]
+	; ############### Calculate word 1
+	xor	ebx,ebx
+	; sqr a[1]*a[0]
+	mul	edx
+	add	eax,eax
+	adc	edx,edx
+	adc	ebx,0
+	add	ecx,eax
+	adc	ebp,edx
+	mov	eax,DWORD [8+esi]
+	adc	ebx,0
+	mov	DWORD [4+edi],ecx
+	mov	edx,DWORD [esi]
+	; saved r[1]
+	; ############### Calculate word 2
+	xor	ecx,ecx
+	; sqr a[2]*a[0]
+	mul	edx
+	add	eax,eax
+	adc	edx,edx
+	adc	ecx,0
+	add	ebp,eax
+	adc	ebx,edx
+	mov	eax,DWORD [4+esi]
+	adc	ecx,0
+	; sqr a[1]*a[1]
+	mul	eax
+	add	ebp,eax
+	adc	ebx,edx
+	mov	edx,DWORD [esi]
+	adc	ecx,0
+	mov	DWORD [8+edi],ebp
+	mov	eax,DWORD [12+esi]
+	; saved r[2]
+	; ############### Calculate word 3
+	xor	ebp,ebp
+	; sqr a[3]*a[0]
+	mul	edx
+	add	eax,eax
+	adc	edx,edx
+	adc	ebp,0
+	add	ebx,eax
+	adc	ecx,edx
+	mov	eax,DWORD [8+esi]
+	adc	ebp,0
+	mov	edx,DWORD [4+esi]
+	; sqr a[2]*a[1]
+	mul	edx
+	add	eax,eax
+	adc	edx,edx
+	adc	ebp,0
+	add	ebx,eax
+	adc	ecx,edx
+	mov	eax,DWORD [16+esi]
+	adc	ebp,0
+	mov	DWORD [12+edi],ebx
+	mov	edx,DWORD [esi]
+	; saved r[3]
+	; ############### Calculate word 4
+	xor	ebx,ebx
+	; sqr a[4]*a[0]
+	mul	edx
+	add	eax,eax
+	adc	edx,edx
+	adc	ebx,0
+	add	ecx,eax
+	adc	ebp,edx
+	mov	eax,DWORD [12+esi]
+	adc	ebx,0
+	mov	edx,DWORD [4+esi]
+	; sqr a[3]*a[1]
+	mul	edx
+	add	eax,eax
+	adc	edx,edx
+	adc	ebx,0
+	add	ecx,eax
+	adc	ebp,edx
+	mov	eax,DWORD [8+esi]
+	adc	ebx,0
+	; sqr a[2]*a[2]
+	mul	eax
+	add	ecx,eax
+	adc	ebp,edx
+	mov	edx,DWORD [esi]
+	adc	ebx,0
+	mov	DWORD [16+edi],ecx
+	mov	eax,DWORD [20+esi]
+	; saved r[4]
+	; ############### Calculate word 5
+	xor	ecx,ecx
+	; sqr a[5]*a[0]
+	mul	edx
+	add	eax,eax
+	adc	edx,edx
+	adc	ecx,0
+	add	ebp,eax
+	adc	ebx,edx
+	mov	eax,DWORD [16+esi]
+	adc	ecx,0
+	mov	edx,DWORD [4+esi]
+	; sqr a[4]*a[1]
+	mul	edx
+	add	eax,eax
+	adc	edx,edx
+	adc	ecx,0
+	add	ebp,eax
+	adc	ebx,edx
+	mov	eax,DWORD [12+esi]
+	adc	ecx,0
+	mov	edx,DWORD [8+esi]
+	; sqr a[3]*a[2]
+	mul	edx
+	add	eax,eax
+	adc	edx,edx
+	adc	ecx,0
+	add	ebp,eax
+	adc	ebx,edx
+	mov	eax,DWORD [24+esi]
+	adc	ecx,0
+	mov	DWORD [20+edi],ebp
+	mov	edx,DWORD [esi]
+	; saved r[5]
+	; ############### Calculate word 6
+	xor	ebp,ebp
+	; sqr a[6]*a[0]
+	mul	edx
+	add	eax,eax
+	adc	edx,edx
+	adc	ebp,0
+	add	ebx,eax
+	adc	ecx,edx
+	mov	eax,DWORD [20+esi]
+	adc	ebp,0
+	mov	edx,DWORD [4+esi]
+	; sqr a[5]*a[1]
+	mul	edx
+	add	eax,eax
+	adc	edx,edx
+	adc	ebp,0
+	add	ebx,eax
+	adc	ecx,edx
+	mov	eax,DWORD [16+esi]
+	adc	ebp,0
+	mov	edx,DWORD [8+esi]
+	; sqr a[4]*a[2]
+	mul	edx
+	add	eax,eax
+	adc	edx,edx
+	adc	ebp,0
+	add	ebx,eax
+	adc	ecx,edx
+	mov	eax,DWORD [12+esi]
+	adc	ebp,0
+	; sqr a[3]*a[3]
+	mul	eax
+	add	ebx,eax
+	adc	ecx,edx
+	mov	edx,DWORD [esi]
+	adc	ebp,0
+	mov	DWORD [24+edi],ebx
+	mov	eax,DWORD [28+esi]
+	; saved r[6]
+	; ############### Calculate word 7
+	xor	ebx,ebx
+	; sqr a[7]*a[0]
+	mul	edx
+	add	eax,eax
+	adc	edx,edx
+	adc	ebx,0
+	add	ecx,eax
+	adc	ebp,edx
+	mov	eax,DWORD [24+esi]
+	adc	ebx,0
+	mov	edx,DWORD [4+esi]
+	; sqr a[6]*a[1]
+	mul	edx
+	add	eax,eax
+	adc	edx,edx
+	adc	ebx,0
+	add	ecx,eax
+	adc	ebp,edx
+	mov	eax,DWORD [20+esi]
+	adc	ebx,0
+	mov	edx,DWORD [8+esi]
+	; sqr a[5]*a[2]
+	mul	edx
+	add	eax,eax
+	adc	edx,edx
+	adc	ebx,0
+	add	ecx,eax
+	adc	ebp,edx
+	mov	eax,DWORD [16+esi]
+	adc	ebx,0
+	mov	edx,DWORD [12+esi]
+	; sqr a[4]*a[3]
+	mul	edx
+	add	eax,eax
+	adc	edx,edx
+	adc	ebx,0
+	add	ecx,eax
+	adc	ebp,edx
+	mov	eax,DWORD [28+esi]
+	adc	ebx,0
+	mov	DWORD [28+edi],ecx
+	mov	edx,DWORD [4+esi]
+	; saved r[7]
+	; ############### Calculate word 8
+	xor	ecx,ecx
+	; sqr a[7]*a[1]
+	mul	edx
+	add	eax,eax
+	adc	edx,edx
+	adc	ecx,0
+	add	ebp,eax
+	adc	ebx,edx
+	mov	eax,DWORD [24+esi]
+	adc	ecx,0
+	mov	edx,DWORD [8+esi]
+	; sqr a[6]*a[2]
+	mul	edx
+	add	eax,eax
+	adc	edx,edx
+	adc	ecx,0
+	add	ebp,eax
+	adc	ebx,edx
+	mov	eax,DWORD [20+esi]
+	adc	ecx,0
+	mov	edx,DWORD [12+esi]
+	; sqr a[5]*a[3]
+	mul	edx
+	add	eax,eax
+	adc	edx,edx
+	adc	ecx,0
+	add	ebp,eax
+	adc	ebx,edx
+	mov	eax,DWORD [16+esi]
+	adc	ecx,0
+	; sqr a[4]*a[4]
+	mul	eax
+	add	ebp,eax
+	adc	ebx,edx
+	mov	edx,DWORD [8+esi]
+	adc	ecx,0
+	mov	DWORD [32+edi],ebp
+	mov	eax,DWORD [28+esi]
+	; saved r[8]
+	; ############### Calculate word 9
+	xor	ebp,ebp
+	; sqr a[7]*a[2]
+	mul	edx
+	add	eax,eax
+	adc	edx,edx
+	adc	ebp,0
+	add	ebx,eax
+	adc	ecx,edx
+	mov	eax,DWORD [24+esi]
+	adc	ebp,0
+	mov	edx,DWORD [12+esi]
+	; sqr a[6]*a[3]
+	mul	edx
+	add	eax,eax
+	adc	edx,edx
+	adc	ebp,0
+	add	ebx,eax
+	adc	ecx,edx
+	mov	eax,DWORD [20+esi]
+	adc	ebp,0
+	mov	edx,DWORD [16+esi]
+	; sqr a[5]*a[4]
+	mul	edx
+	add	eax,eax
+	adc	edx,edx
+	adc	ebp,0
+	add	ebx,eax
+	adc	ecx,edx
+	mov	eax,DWORD [28+esi]
+	adc	ebp,0
+	mov	DWORD [36+edi],ebx
+	mov	edx,DWORD [12+esi]
+	; saved r[9]
+	; ############### Calculate word 10
+	xor	ebx,ebx
+	; sqr a[7]*a[3]
+	mul	edx
+	add	eax,eax
+	adc	edx,edx
+	adc	ebx,0
+	add	ecx,eax
+	adc	ebp,edx
+	mov	eax,DWORD [24+esi]
+	adc	ebx,0
+	mov	edx,DWORD [16+esi]
+	; sqr a[6]*a[4]
+	mul	edx
+	add	eax,eax
+	adc	edx,edx
+	adc	ebx,0
+	add	ecx,eax
+	adc	ebp,edx
+	mov	eax,DWORD [20+esi]
+	adc	ebx,0
+	; sqr a[5]*a[5]
+	mul	eax
+	add	ecx,eax
+	adc	ebp,edx
+	mov	edx,DWORD [16+esi]
+	adc	ebx,0
+	mov	DWORD [40+edi],ecx
+	mov	eax,DWORD [28+esi]
+	; saved r[10]
+	; ############### Calculate word 11
+	xor	ecx,ecx
+	; sqr a[7]*a[4]
+	mul	edx
+	add	eax,eax
+	adc	edx,edx
+	adc	ecx,0
+	add	ebp,eax
+	adc	ebx,edx
+	mov	eax,DWORD [24+esi]
+	adc	ecx,0
+	mov	edx,DWORD [20+esi]
+	; sqr a[6]*a[5]
+	mul	edx
+	add	eax,eax
+	adc	edx,edx
+	adc	ecx,0
+	add	ebp,eax
+	adc	ebx,edx
+	mov	eax,DWORD [28+esi]
+	adc	ecx,0
+	mov	DWORD [44+edi],ebp
+	mov	edx,DWORD [20+esi]
+	; saved r[11]
+	; ############### Calculate word 12
+	xor	ebp,ebp
+	; sqr a[7]*a[5]
+	mul	edx
+	add	eax,eax
+	adc	edx,edx
+	adc	ebp,0
+	add	ebx,eax
+	adc	ecx,edx
+	mov	eax,DWORD [24+esi]
+	adc	ebp,0
+	; sqr a[6]*a[6]
+	mul	eax
+	add	ebx,eax
+	adc	ecx,edx
+	mov	edx,DWORD [24+esi]
+	adc	ebp,0
+	mov	DWORD [48+edi],ebx
+	mov	eax,DWORD [28+esi]
+	; saved r[12]
+	; ############### Calculate word 13
+	xor	ebx,ebx
+	; sqr a[7]*a[6]
+	mul	edx
+	add	eax,eax
+	adc	edx,edx
+	adc	ebx,0
+	add	ecx,eax
+	adc	ebp,edx
+	mov	eax,DWORD [28+esi]
+	adc	ebx,0
+	mov	DWORD [52+edi],ecx
+	; saved r[13]
+	; ############### Calculate word 14
+	xor	ecx,ecx
+	; sqr a[7]*a[7]
+	mul	eax
+	add	ebp,eax
+	adc	ebx,edx
+	adc	ecx,0
+	mov	DWORD [56+edi],ebp
+	; saved r[14]
+	mov	DWORD [60+edi],ebx
+	pop	ebx
+	pop	ebp
+	pop	edi
+	pop	esi
+	ret
+global	_bn_sqr_comba4
+align	16
+_bn_sqr_comba4:
+L$_bn_sqr_comba4_begin:
+	push	esi
+	push	edi
+	push	ebp
+	push	ebx
+	mov	edi,DWORD [20+esp]
+	mov	esi,DWORD [24+esp]
+	xor	ebx,ebx
+	xor	ecx,ecx
+	mov	eax,DWORD [esi]
+	; ############### Calculate word 0
+	xor	ebp,ebp
+	; sqr a[0]*a[0]
+	mul	eax
+	add	ebx,eax
+	adc	ecx,edx
+	mov	edx,DWORD [esi]
+	adc	ebp,0
+	mov	DWORD [edi],ebx
+	mov	eax,DWORD [4+esi]
+	; saved r[0]
+	; ############### Calculate word 1
+	xor	ebx,ebx
+	; sqr a[1]*a[0]
+	mul	edx
+	add	eax,eax
+	adc	edx,edx
+	adc	ebx,0
+	add	ecx,eax
+	adc	ebp,edx
+	mov	eax,DWORD [8+esi]
+	adc	ebx,0
+	mov	DWORD [4+edi],ecx
+	mov	edx,DWORD [esi]
+	; saved r[1]
+	; ############### Calculate word 2
+	xor	ecx,ecx
+	; sqr a[2]*a[0]
+	mul	edx
+	add	eax,eax
+	adc	edx,edx
+	adc	ecx,0
+	add	ebp,eax
+	adc	ebx,edx
+	mov	eax,DWORD [4+esi]
+	adc	ecx,0
+	; sqr a[1]*a[1]
+	mul	eax
+	add	ebp,eax
+	adc	ebx,edx
+	mov	edx,DWORD [esi]
+	adc	ecx,0
+	mov	DWORD [8+edi],ebp
+	mov	eax,DWORD [12+esi]
+	; saved r[2]
+	; ############### Calculate word 3
+	xor	ebp,ebp
+	; sqr a[3]*a[0]
+	mul	edx
+	add	eax,eax
+	adc	edx,edx
+	adc	ebp,0
+	add	ebx,eax
+	adc	ecx,edx
+	mov	eax,DWORD [8+esi]
+	adc	ebp,0
+	mov	edx,DWORD [4+esi]
+	; sqr a[2]*a[1]
+	mul	edx
+	add	eax,eax
+	adc	edx,edx
+	adc	ebp,0
+	add	ebx,eax
+	adc	ecx,edx
+	mov	eax,DWORD [12+esi]
+	adc	ebp,0
+	mov	DWORD [12+edi],ebx
+	mov	edx,DWORD [4+esi]
+	; saved r[3]
+	; ############### Calculate word 4
+	xor	ebx,ebx
+	; sqr a[3]*a[1]
+	mul	edx
+	add	eax,eax
+	adc	edx,edx
+	adc	ebx,0
+	add	ecx,eax
+	adc	ebp,edx
+	mov	eax,DWORD [8+esi]
+	adc	ebx,0
+	; sqr a[2]*a[2]
+	mul	eax
+	add	ecx,eax
+	adc	ebp,edx
+	mov	edx,DWORD [8+esi]
+	adc	ebx,0
+	mov	DWORD [16+edi],ecx
+	mov	eax,DWORD [12+esi]
+	; saved r[4]
+	; ############### Calculate word 5
+	xor	ecx,ecx
+	; sqr a[3]*a[2]
+	mul	edx
+	add	eax,eax
+	adc	edx,edx
+	adc	ecx,0
+	add	ebp,eax
+	adc	ebx,edx
+	mov	eax,DWORD [12+esi]
+	adc	ecx,0
+	mov	DWORD [20+edi],ebp
+	; saved r[5]
+	; ############### Calculate word 6
+	xor	ebp,ebp
+	; sqr a[3]*a[3]
+	mul	eax
+	add	ebx,eax
+	adc	ecx,edx
+	adc	ebp,0
+	mov	DWORD [24+edi],ebx
+	; saved r[6]
+	mov	DWORD [28+edi],ecx
+	pop	ebx
+	pop	ebp
+	pop	edi
+	pop	esi
+	ret
diff --git a/deps/boringssl/win-x86/crypto/fipsmodule/ghash-ssse3-x86.asm b/deps/boringssl/win-x86/crypto/fipsmodule/ghash-ssse3-x86.asm
new file mode 100644
index 0000000..e0192fc
--- /dev/null
+++ b/deps/boringssl/win-x86/crypto/fipsmodule/ghash-ssse3-x86.asm
@@ -0,0 +1,292 @@
+; This file is generated from a similarly-named Perl script in the BoringSSL
+; source tree. Do not edit by hand.
+
+%ifdef BORINGSSL_PREFIX
+%include "boringssl_prefix_symbols_nasm.inc"
+%endif
+%ifidn __OUTPUT_FORMAT__,obj
+section	code	use32 class=code align=64
+%elifidn __OUTPUT_FORMAT__,win32
+$@feat.00 equ 1
+section	.text	code align=64
+%else
+section	.text	code
+%endif
+global	_gcm_gmult_ssse3
+align	16
+_gcm_gmult_ssse3:
+L$_gcm_gmult_ssse3_begin:
+	push	ebp
+	push	ebx
+	push	esi
+	push	edi
+	mov	edi,DWORD [20+esp]
+	mov	esi,DWORD [24+esp]
+	movdqu	xmm0,[edi]
+	call	L$000pic_point
+L$000pic_point:
+	pop	eax
+	movdqa	xmm7,[(L$reverse_bytes-L$000pic_point)+eax]
+	movdqa	xmm2,[(L$low4_mask-L$000pic_point)+eax]
+db	102,15,56,0,199
+	movdqa	xmm1,xmm2
+	pandn	xmm1,xmm0
+	psrld	xmm1,4
+	pand	xmm0,xmm2
+	pxor	xmm2,xmm2
+	pxor	xmm3,xmm3
+	mov	eax,5
+L$001loop_row_1:
+	movdqa	xmm4,[esi]
+	lea	esi,[16+esi]
+	movdqa	xmm6,xmm2
+db	102,15,58,15,243,1
+	movdqa	xmm3,xmm6
+	psrldq	xmm2,1
+	movdqa	xmm5,xmm4
+db	102,15,56,0,224
+db	102,15,56,0,233
+	pxor	xmm2,xmm5
+	movdqa	xmm5,xmm4
+	psllq	xmm5,60
+	movdqa	xmm6,xmm5
+	pslldq	xmm6,8
+	pxor	xmm3,xmm6
+	psrldq	xmm5,8
+	pxor	xmm2,xmm5
+	psrlq	xmm4,4
+	pxor	xmm2,xmm4
+	sub	eax,1
+	jnz	NEAR L$001loop_row_1
+	pxor	xmm2,xmm3
+	psrlq	xmm3,1
+	pxor	xmm2,xmm3
+	psrlq	xmm3,1
+	pxor	xmm2,xmm3
+	psrlq	xmm3,5
+	pxor	xmm2,xmm3
+	pxor	xmm3,xmm3
+	mov	eax,5
+L$002loop_row_2:
+	movdqa	xmm4,[esi]
+	lea	esi,[16+esi]
+	movdqa	xmm6,xmm2
+db	102,15,58,15,243,1
+	movdqa	xmm3,xmm6
+	psrldq	xmm2,1
+	movdqa	xmm5,xmm4
+db	102,15,56,0,224
+db	102,15,56,0,233
+	pxor	xmm2,xmm5
+	movdqa	xmm5,xmm4
+	psllq	xmm5,60
+	movdqa	xmm6,xmm5
+	pslldq	xmm6,8
+	pxor	xmm3,xmm6
+	psrldq	xmm5,8
+	pxor	xmm2,xmm5
+	psrlq	xmm4,4
+	pxor	xmm2,xmm4
+	sub	eax,1
+	jnz	NEAR L$002loop_row_2
+	pxor	xmm2,xmm3
+	psrlq	xmm3,1
+	pxor	xmm2,xmm3
+	psrlq	xmm3,1
+	pxor	xmm2,xmm3
+	psrlq	xmm3,5
+	pxor	xmm2,xmm3
+	pxor	xmm3,xmm3
+	mov	eax,6
+L$003loop_row_3:
+	movdqa	xmm4,[esi]
+	lea	esi,[16+esi]
+	movdqa	xmm6,xmm2
+db	102,15,58,15,243,1
+	movdqa	xmm3,xmm6
+	psrldq	xmm2,1
+	movdqa	xmm5,xmm4
+db	102,15,56,0,224
+db	102,15,56,0,233
+	pxor	xmm2,xmm5
+	movdqa	xmm5,xmm4
+	psllq	xmm5,60
+	movdqa	xmm6,xmm5
+	pslldq	xmm6,8
+	pxor	xmm3,xmm6
+	psrldq	xmm5,8
+	pxor	xmm2,xmm5
+	psrlq	xmm4,4
+	pxor	xmm2,xmm4
+	sub	eax,1
+	jnz	NEAR L$003loop_row_3
+	pxor	xmm2,xmm3
+	psrlq	xmm3,1
+	pxor	xmm2,xmm3
+	psrlq	xmm3,1
+	pxor	xmm2,xmm3
+	psrlq	xmm3,5
+	pxor	xmm2,xmm3
+	pxor	xmm3,xmm3
+db	102,15,56,0,215
+	movdqu	[edi],xmm2
+	pxor	xmm0,xmm0
+	pxor	xmm1,xmm1
+	pxor	xmm2,xmm2
+	pxor	xmm3,xmm3
+	pxor	xmm4,xmm4
+	pxor	xmm5,xmm5
+	pxor	xmm6,xmm6
+	pop	edi
+	pop	esi
+	pop	ebx
+	pop	ebp
+	ret
+global	_gcm_ghash_ssse3
+align	16
+_gcm_ghash_ssse3:
+L$_gcm_ghash_ssse3_begin:
+	push	ebp
+	push	ebx
+	push	esi
+	push	edi
+	mov	edi,DWORD [20+esp]
+	mov	esi,DWORD [24+esp]
+	mov	edx,DWORD [28+esp]
+	mov	ecx,DWORD [32+esp]
+	movdqu	xmm0,[edi]
+	call	L$004pic_point
+L$004pic_point:
+	pop	ebx
+	movdqa	xmm7,[(L$reverse_bytes-L$004pic_point)+ebx]
+	and	ecx,-16
+db	102,15,56,0,199
+	pxor	xmm3,xmm3
+L$005loop_ghash:
+	movdqa	xmm2,[(L$low4_mask-L$004pic_point)+ebx]
+	movdqu	xmm1,[edx]
+db	102,15,56,0,207
+	pxor	xmm0,xmm1
+	movdqa	xmm1,xmm2
+	pandn	xmm1,xmm0
+	psrld	xmm1,4
+	pand	xmm0,xmm2
+	pxor	xmm2,xmm2
+	mov	eax,5
+L$006loop_row_4:
+	movdqa	xmm4,[esi]
+	lea	esi,[16+esi]
+	movdqa	xmm6,xmm2
+db	102,15,58,15,243,1
+	movdqa	xmm3,xmm6
+	psrldq	xmm2,1
+	movdqa	xmm5,xmm4
+db	102,15,56,0,224
+db	102,15,56,0,233
+	pxor	xmm2,xmm5
+	movdqa	xmm5,xmm4
+	psllq	xmm5,60
+	movdqa	xmm6,xmm5
+	pslldq	xmm6,8
+	pxor	xmm3,xmm6
+	psrldq	xmm5,8
+	pxor	xmm2,xmm5
+	psrlq	xmm4,4
+	pxor	xmm2,xmm4
+	sub	eax,1
+	jnz	NEAR L$006loop_row_4
+	pxor	xmm2,xmm3
+	psrlq	xmm3,1
+	pxor	xmm2,xmm3
+	psrlq	xmm3,1
+	pxor	xmm2,xmm3
+	psrlq	xmm3,5
+	pxor	xmm2,xmm3
+	pxor	xmm3,xmm3
+	mov	eax,5
+L$007loop_row_5:
+	movdqa	xmm4,[esi]
+	lea	esi,[16+esi]
+	movdqa	xmm6,xmm2
+db	102,15,58,15,243,1
+	movdqa	xmm3,xmm6
+	psrldq	xmm2,1
+	movdqa	xmm5,xmm4
+db	102,15,56,0,224
+db	102,15,56,0,233
+	pxor	xmm2,xmm5
+	movdqa	xmm5,xmm4
+	psllq	xmm5,60
+	movdqa	xmm6,xmm5
+	pslldq	xmm6,8
+	pxor	xmm3,xmm6
+	psrldq	xmm5,8
+	pxor	xmm2,xmm5
+	psrlq	xmm4,4
+	pxor	xmm2,xmm4
+	sub	eax,1
+	jnz	NEAR L$007loop_row_5
+	pxor	xmm2,xmm3
+	psrlq	xmm3,1
+	pxor	xmm2,xmm3
+	psrlq	xmm3,1
+	pxor	xmm2,xmm3
+	psrlq	xmm3,5
+	pxor	xmm2,xmm3
+	pxor	xmm3,xmm3
+	mov	eax,6
+L$008loop_row_6:
+	movdqa	xmm4,[esi]
+	lea	esi,[16+esi]
+	movdqa	xmm6,xmm2
+db	102,15,58,15,243,1
+	movdqa	xmm3,xmm6
+	psrldq	xmm2,1
+	movdqa	xmm5,xmm4
+db	102,15,56,0,224
+db	102,15,56,0,233
+	pxor	xmm2,xmm5
+	movdqa	xmm5,xmm4
+	psllq	xmm5,60
+	movdqa	xmm6,xmm5
+	pslldq	xmm6,8
+	pxor	xmm3,xmm6
+	psrldq	xmm5,8
+	pxor	xmm2,xmm5
+	psrlq	xmm4,4
+	pxor	xmm2,xmm4
+	sub	eax,1
+	jnz	NEAR L$008loop_row_6
+	pxor	xmm2,xmm3
+	psrlq	xmm3,1
+	pxor	xmm2,xmm3
+	psrlq	xmm3,1
+	pxor	xmm2,xmm3
+	psrlq	xmm3,5
+	pxor	xmm2,xmm3
+	pxor	xmm3,xmm3
+	movdqa	xmm0,xmm2
+	lea	esi,[esi-256]
+	lea	edx,[16+edx]
+	sub	ecx,16
+	jnz	NEAR L$005loop_ghash
+db	102,15,56,0,199
+	movdqu	[edi],xmm0
+	pxor	xmm0,xmm0
+	pxor	xmm1,xmm1
+	pxor	xmm2,xmm2
+	pxor	xmm3,xmm3
+	pxor	xmm4,xmm4
+	pxor	xmm5,xmm5
+	pxor	xmm6,xmm6
+	pop	edi
+	pop	esi
+	pop	ebx
+	pop	ebp
+	ret
+align	16
+L$reverse_bytes:
+db	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+align	16
+L$low4_mask:
+dd	252645135,252645135,252645135,252645135
diff --git a/deps/boringssl/win-x86/crypto/fipsmodule/ghash-x86.asm b/deps/boringssl/win-x86/crypto/fipsmodule/ghash-x86.asm
new file mode 100644
index 0000000..3703cb5
--- /dev/null
+++ b/deps/boringssl/win-x86/crypto/fipsmodule/ghash-x86.asm
@@ -0,0 +1,325 @@
+; This file is generated from a similarly-named Perl script in the BoringSSL
+; source tree. Do not edit by hand.
+
+%ifdef BORINGSSL_PREFIX
+%include "boringssl_prefix_symbols_nasm.inc"
+%endif
+%ifidn __OUTPUT_FORMAT__,obj
+section	code	use32 class=code align=64
+%elifidn __OUTPUT_FORMAT__,win32
+$@feat.00 equ 1
+section	.text	code align=64
+%else
+section	.text	code
+%endif
+global	_gcm_init_clmul
+align	16
+_gcm_init_clmul:
+L$_gcm_init_clmul_begin:
+	mov	edx,DWORD [4+esp]
+	mov	eax,DWORD [8+esp]
+	call	L$000pic
+L$000pic:
+	pop	ecx
+	lea	ecx,[(L$bswap-L$000pic)+ecx]
+	movdqu	xmm2,[eax]
+	pshufd	xmm2,xmm2,78
+	pshufd	xmm4,xmm2,255
+	movdqa	xmm3,xmm2
+	psllq	xmm2,1
+	pxor	xmm5,xmm5
+	psrlq	xmm3,63
+	pcmpgtd	xmm5,xmm4
+	pslldq	xmm3,8
+	por	xmm2,xmm3
+	pand	xmm5,[16+ecx]
+	pxor	xmm2,xmm5
+	movdqa	xmm0,xmm2
+	movdqa	xmm1,xmm0
+	pshufd	xmm3,xmm0,78
+	pshufd	xmm4,xmm2,78
+	pxor	xmm3,xmm0
+	pxor	xmm4,xmm2
+db	102,15,58,68,194,0
+db	102,15,58,68,202,17
+db	102,15,58,68,220,0
+	xorps	xmm3,xmm0
+	xorps	xmm3,xmm1
+	movdqa	xmm4,xmm3
+	psrldq	xmm3,8
+	pslldq	xmm4,8
+	pxor	xmm1,xmm3
+	pxor	xmm0,xmm4
+	movdqa	xmm4,xmm0
+	movdqa	xmm3,xmm0
+	psllq	xmm0,5
+	pxor	xmm3,xmm0
+	psllq	xmm0,1
+	pxor	xmm0,xmm3
+	psllq	xmm0,57
+	movdqa	xmm3,xmm0
+	pslldq	xmm0,8
+	psrldq	xmm3,8
+	pxor	xmm0,xmm4
+	pxor	xmm1,xmm3
+	movdqa	xmm4,xmm0
+	psrlq	xmm0,1
+	pxor	xmm1,xmm4
+	pxor	xmm4,xmm0
+	psrlq	xmm0,5
+	pxor	xmm0,xmm4
+	psrlq	xmm0,1
+	pxor	xmm0,xmm1
+	pshufd	xmm3,xmm2,78
+	pshufd	xmm4,xmm0,78
+	pxor	xmm3,xmm2
+	movdqu	[edx],xmm2
+	pxor	xmm4,xmm0
+	movdqu	[16+edx],xmm0
+db	102,15,58,15,227,8
+	movdqu	[32+edx],xmm4
+	ret
+global	_gcm_gmult_clmul
+align	16
+_gcm_gmult_clmul:
+L$_gcm_gmult_clmul_begin:
+	mov	eax,DWORD [4+esp]
+	mov	edx,DWORD [8+esp]
+	call	L$001pic
+L$001pic:
+	pop	ecx
+	lea	ecx,[(L$bswap-L$001pic)+ecx]
+	movdqu	xmm0,[eax]
+	movdqa	xmm5,[ecx]
+	movups	xmm2,[edx]
+db	102,15,56,0,197
+	movups	xmm4,[32+edx]
+	movdqa	xmm1,xmm0
+	pshufd	xmm3,xmm0,78
+	pxor	xmm3,xmm0
+db	102,15,58,68,194,0
+db	102,15,58,68,202,17
+db	102,15,58,68,220,0
+	xorps	xmm3,xmm0
+	xorps	xmm3,xmm1
+	movdqa	xmm4,xmm3
+	psrldq	xmm3,8
+	pslldq	xmm4,8
+	pxor	xmm1,xmm3
+	pxor	xmm0,xmm4
+	movdqa	xmm4,xmm0
+	movdqa	xmm3,xmm0
+	psllq	xmm0,5
+	pxor	xmm3,xmm0
+	psllq	xmm0,1
+	pxor	xmm0,xmm3
+	psllq	xmm0,57
+	movdqa	xmm3,xmm0
+	pslldq	xmm0,8
+	psrldq	xmm3,8
+	pxor	xmm0,xmm4
+	pxor	xmm1,xmm3
+	movdqa	xmm4,xmm0
+	psrlq	xmm0,1
+	pxor	xmm1,xmm4
+	pxor	xmm4,xmm0
+	psrlq	xmm0,5
+	pxor	xmm0,xmm4
+	psrlq	xmm0,1
+	pxor	xmm0,xmm1
+db	102,15,56,0,197
+	movdqu	[eax],xmm0
+	ret
+global	_gcm_ghash_clmul
+align	16
+_gcm_ghash_clmul:
+L$_gcm_ghash_clmul_begin:
+	push	ebp
+	push	ebx
+	push	esi
+	push	edi
+	mov	eax,DWORD [20+esp]
+	mov	edx,DWORD [24+esp]
+	mov	esi,DWORD [28+esp]
+	mov	ebx,DWORD [32+esp]
+	call	L$002pic
+L$002pic:
+	pop	ecx
+	lea	ecx,[(L$bswap-L$002pic)+ecx]
+	movdqu	xmm0,[eax]
+	movdqa	xmm5,[ecx]
+	movdqu	xmm2,[edx]
+db	102,15,56,0,197
+	sub	ebx,16
+	jz	NEAR L$003odd_tail
+	movdqu	xmm3,[esi]
+	movdqu	xmm6,[16+esi]
+db	102,15,56,0,221
+db	102,15,56,0,245
+	movdqu	xmm5,[32+edx]
+	pxor	xmm0,xmm3
+	pshufd	xmm3,xmm6,78
+	movdqa	xmm7,xmm6
+	pxor	xmm3,xmm6
+	lea	esi,[32+esi]
+db	102,15,58,68,242,0
+db	102,15,58,68,250,17
+db	102,15,58,68,221,0
+	movups	xmm2,[16+edx]
+	nop
+	sub	ebx,32
+	jbe	NEAR L$004even_tail
+	jmp	NEAR L$005mod_loop
+align	32
+L$005mod_loop:
+	pshufd	xmm4,xmm0,78
+	movdqa	xmm1,xmm0
+	pxor	xmm4,xmm0
+	nop
+db	102,15,58,68,194,0
+db	102,15,58,68,202,17
+db	102,15,58,68,229,16
+	movups	xmm2,[edx]
+	xorps	xmm0,xmm6
+	movdqa	xmm5,[ecx]
+	xorps	xmm1,xmm7
+	movdqu	xmm7,[esi]
+	pxor	xmm3,xmm0
+	movdqu	xmm6,[16+esi]
+	pxor	xmm3,xmm1
+db	102,15,56,0,253
+	pxor	xmm4,xmm3
+	movdqa	xmm3,xmm4
+	psrldq	xmm4,8
+	pslldq	xmm3,8
+	pxor	xmm1,xmm4
+	pxor	xmm0,xmm3
+db	102,15,56,0,245
+	pxor	xmm1,xmm7
+	movdqa	xmm7,xmm6
+	movdqa	xmm4,xmm0
+	movdqa	xmm3,xmm0
+	psllq	xmm0,5
+	pxor	xmm3,xmm0
+	psllq	xmm0,1
+	pxor	xmm0,xmm3
+db	102,15,58,68,242,0
+	movups	xmm5,[32+edx]
+	psllq	xmm0,57
+	movdqa	xmm3,xmm0
+	pslldq	xmm0,8
+	psrldq	xmm3,8
+	pxor	xmm0,xmm4
+	pxor	xmm1,xmm3
+	pshufd	xmm3,xmm7,78
+	movdqa	xmm4,xmm0
+	psrlq	xmm0,1
+	pxor	xmm3,xmm7
+	pxor	xmm1,xmm4
+db	102,15,58,68,250,17
+	movups	xmm2,[16+edx]
+	pxor	xmm4,xmm0
+	psrlq	xmm0,5
+	pxor	xmm0,xmm4
+	psrlq	xmm0,1
+	pxor	xmm0,xmm1
+db	102,15,58,68,221,0
+	lea	esi,[32+esi]
+	sub	ebx,32
+	ja	NEAR L$005mod_loop
+L$004even_tail:
+	pshufd	xmm4,xmm0,78
+	movdqa	xmm1,xmm0
+	pxor	xmm4,xmm0
+db	102,15,58,68,194,0
+db	102,15,58,68,202,17
+db	102,15,58,68,229,16
+	movdqa	xmm5,[ecx]
+	xorps	xmm0,xmm6
+	xorps	xmm1,xmm7
+	pxor	xmm3,xmm0
+	pxor	xmm3,xmm1
+	pxor	xmm4,xmm3
+	movdqa	xmm3,xmm4
+	psrldq	xmm4,8
+	pslldq	xmm3,8
+	pxor	xmm1,xmm4
+	pxor	xmm0,xmm3
+	movdqa	xmm4,xmm0
+	movdqa	xmm3,xmm0
+	psllq	xmm0,5
+	pxor	xmm3,xmm0
+	psllq	xmm0,1
+	pxor	xmm0,xmm3
+	psllq	xmm0,57
+	movdqa	xmm3,xmm0
+	pslldq	xmm0,8
+	psrldq	xmm3,8
+	pxor	xmm0,xmm4
+	pxor	xmm1,xmm3
+	movdqa	xmm4,xmm0
+	psrlq	xmm0,1
+	pxor	xmm1,xmm4
+	pxor	xmm4,xmm0
+	psrlq	xmm0,5
+	pxor	xmm0,xmm4
+	psrlq	xmm0,1
+	pxor	xmm0,xmm1
+	test	ebx,ebx
+	jnz	NEAR L$006done
+	movups	xmm2,[edx]
+L$003odd_tail:
+	movdqu	xmm3,[esi]
+db	102,15,56,0,221
+	pxor	xmm0,xmm3
+	movdqa	xmm1,xmm0
+	pshufd	xmm3,xmm0,78
+	pshufd	xmm4,xmm2,78
+	pxor	xmm3,xmm0
+	pxor	xmm4,xmm2
+db	102,15,58,68,194,0
+db	102,15,58,68,202,17
+db	102,15,58,68,220,0
+	xorps	xmm3,xmm0
+	xorps	xmm3,xmm1
+	movdqa	xmm4,xmm3
+	psrldq	xmm3,8
+	pslldq	xmm4,8
+	pxor	xmm1,xmm3
+	pxor	xmm0,xmm4
+	movdqa	xmm4,xmm0
+	movdqa	xmm3,xmm0
+	psllq	xmm0,5
+	pxor	xmm3,xmm0
+	psllq	xmm0,1
+	pxor	xmm0,xmm3
+	psllq	xmm0,57
+	movdqa	xmm3,xmm0
+	pslldq	xmm0,8
+	psrldq	xmm3,8
+	pxor	xmm0,xmm4
+	pxor	xmm1,xmm3
+	movdqa	xmm4,xmm0
+	psrlq	xmm0,1
+	pxor	xmm1,xmm4
+	pxor	xmm4,xmm0
+	psrlq	xmm0,5
+	pxor	xmm0,xmm4
+	psrlq	xmm0,1
+	pxor	xmm0,xmm1
+L$006done:
+db	102,15,56,0,197
+	movdqu	[eax],xmm0
+	pop	edi
+	pop	esi
+	pop	ebx
+	pop	ebp
+	ret
+align	64
+L$bswap:
+db	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+db	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194
+db	71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
+db	82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
+db	112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
+db	0
diff --git a/deps/boringssl/win-x86/crypto/fipsmodule/md5-586.asm b/deps/boringssl/win-x86/crypto/fipsmodule/md5-586.asm
new file mode 100644
index 0000000..e09bd0c
--- /dev/null
+++ b/deps/boringssl/win-x86/crypto/fipsmodule/md5-586.asm
@@ -0,0 +1,689 @@
+; This file is generated from a similarly-named Perl script in the BoringSSL
+; source tree. Do not edit by hand.
+
+%ifdef BORINGSSL_PREFIX
+%include "boringssl_prefix_symbols_nasm.inc"
+%endif
+%ifidn __OUTPUT_FORMAT__,obj
+section	code	use32 class=code align=64
+%elifidn __OUTPUT_FORMAT__,win32
+$@feat.00 equ 1
+section	.text	code align=64
+%else
+section	.text	code
+%endif
+global	_md5_block_asm_data_order
+align	16
+_md5_block_asm_data_order:
+L$_md5_block_asm_data_order_begin:
+	push	esi
+	push	edi
+	mov	edi,DWORD [12+esp]
+	mov	esi,DWORD [16+esp]
+	mov	ecx,DWORD [20+esp]
+	push	ebp
+	shl	ecx,6
+	push	ebx
+	add	ecx,esi
+	sub	ecx,64
+	mov	eax,DWORD [edi]
+	push	ecx
+	mov	ebx,DWORD [4+edi]
+	mov	ecx,DWORD [8+edi]
+	mov	edx,DWORD [12+edi]
+L$000start:
+	; 
+	; R0 section
+	mov	edi,ecx
+	mov	ebp,DWORD [esi]
+	; R0 0
+	xor	edi,edx
+	and	edi,ebx
+	lea	eax,[3614090360+ebp*1+eax]
+	xor	edi,edx
+	add	eax,edi
+	mov	edi,ebx
+	rol	eax,7
+	mov	ebp,DWORD [4+esi]
+	add	eax,ebx
+	; R0 1
+	xor	edi,ecx
+	and	edi,eax
+	lea	edx,[3905402710+ebp*1+edx]
+	xor	edi,ecx
+	add	edx,edi
+	mov	edi,eax
+	rol	edx,12
+	mov	ebp,DWORD [8+esi]
+	add	edx,eax
+	; R0 2
+	xor	edi,ebx
+	and	edi,edx
+	lea	ecx,[606105819+ebp*1+ecx]
+	xor	edi,ebx
+	add	ecx,edi
+	mov	edi,edx
+	rol	ecx,17
+	mov	ebp,DWORD [12+esi]
+	add	ecx,edx
+	; R0 3
+	xor	edi,eax
+	and	edi,ecx
+	lea	ebx,[3250441966+ebp*1+ebx]
+	xor	edi,eax
+	add	ebx,edi
+	mov	edi,ecx
+	rol	ebx,22
+	mov	ebp,DWORD [16+esi]
+	add	ebx,ecx
+	; R0 4
+	xor	edi,edx
+	and	edi,ebx
+	lea	eax,[4118548399+ebp*1+eax]
+	xor	edi,edx
+	add	eax,edi
+	mov	edi,ebx
+	rol	eax,7
+	mov	ebp,DWORD [20+esi]
+	add	eax,ebx
+	; R0 5
+	xor	edi,ecx
+	and	edi,eax
+	lea	edx,[1200080426+ebp*1+edx]
+	xor	edi,ecx
+	add	edx,edi
+	mov	edi,eax
+	rol	edx,12
+	mov	ebp,DWORD [24+esi]
+	add	edx,eax
+	; R0 6
+	xor	edi,ebx
+	and	edi,edx
+	lea	ecx,[2821735955+ebp*1+ecx]
+	xor	edi,ebx
+	add	ecx,edi
+	mov	edi,edx
+	rol	ecx,17
+	mov	ebp,DWORD [28+esi]
+	add	ecx,edx
+	; R0 7
+	xor	edi,eax
+	and	edi,ecx
+	lea	ebx,[4249261313+ebp*1+ebx]
+	xor	edi,eax
+	add	ebx,edi
+	mov	edi,ecx
+	rol	ebx,22
+	mov	ebp,DWORD [32+esi]
+	add	ebx,ecx
+	; R0 8
+	xor	edi,edx
+	and	edi,ebx
+	lea	eax,[1770035416+ebp*1+eax]
+	xor	edi,edx
+	add	eax,edi
+	mov	edi,ebx
+	rol	eax,7
+	mov	ebp,DWORD [36+esi]
+	add	eax,ebx
+	; R0 9
+	xor	edi,ecx
+	and	edi,eax
+	lea	edx,[2336552879+ebp*1+edx]
+	xor	edi,ecx
+	add	edx,edi
+	mov	edi,eax
+	rol	edx,12
+	mov	ebp,DWORD [40+esi]
+	add	edx,eax
+	; R0 10
+	xor	edi,ebx
+	and	edi,edx
+	lea	ecx,[4294925233+ebp*1+ecx]
+	xor	edi,ebx
+	add	ecx,edi
+	mov	edi,edx
+	rol	ecx,17
+	mov	ebp,DWORD [44+esi]
+	add	ecx,edx
+	; R0 11
+	xor	edi,eax
+	and	edi,ecx
+	lea	ebx,[2304563134+ebp*1+ebx]
+	xor	edi,eax
+	add	ebx,edi
+	mov	edi,ecx
+	rol	ebx,22
+	mov	ebp,DWORD [48+esi]
+	add	ebx,ecx
+	; R0 12
+	xor	edi,edx
+	and	edi,ebx
+	lea	eax,[1804603682+ebp*1+eax]
+	xor	edi,edx
+	add	eax,edi
+	mov	edi,ebx
+	rol	eax,7
+	mov	ebp,DWORD [52+esi]
+	add	eax,ebx
+	; R0 13
+	xor	edi,ecx
+	and	edi,eax
+	lea	edx,[4254626195+ebp*1+edx]
+	xor	edi,ecx
+	add	edx,edi
+	mov	edi,eax
+	rol	edx,12
+	mov	ebp,DWORD [56+esi]
+	add	edx,eax
+	; R0 14
+	xor	edi,ebx
+	and	edi,edx
+	lea	ecx,[2792965006+ebp*1+ecx]
+	xor	edi,ebx
+	add	ecx,edi
+	mov	edi,edx
+	rol	ecx,17
+	mov	ebp,DWORD [60+esi]
+	add	ecx,edx
+	; R0 15
+	xor	edi,eax
+	and	edi,ecx
+	lea	ebx,[1236535329+ebp*1+ebx]
+	xor	edi,eax
+	add	ebx,edi
+	mov	edi,ecx
+	rol	ebx,22
+	mov	ebp,DWORD [4+esi]
+	add	ebx,ecx
+	; 
+	; R1 section
+	; R1 16
+	lea	eax,[4129170786+ebp*1+eax]
+	xor	edi,ebx
+	and	edi,edx
+	mov	ebp,DWORD [24+esi]
+	xor	edi,ecx
+	add	eax,edi
+	mov	edi,ebx
+	rol	eax,5
+	add	eax,ebx
+	; R1 17
+	lea	edx,[3225465664+ebp*1+edx]
+	xor	edi,eax
+	and	edi,ecx
+	mov	ebp,DWORD [44+esi]
+	xor	edi,ebx
+	add	edx,edi
+	mov	edi,eax
+	rol	edx,9
+	add	edx,eax
+	; R1 18
+	lea	ecx,[643717713+ebp*1+ecx]
+	xor	edi,edx
+	and	edi,ebx
+	mov	ebp,DWORD [esi]
+	xor	edi,eax
+	add	ecx,edi
+	mov	edi,edx
+	rol	ecx,14
+	add	ecx,edx
+	; R1 19
+	lea	ebx,[3921069994+ebp*1+ebx]
+	xor	edi,ecx
+	and	edi,eax
+	mov	ebp,DWORD [20+esi]
+	xor	edi,edx
+	add	ebx,edi
+	mov	edi,ecx
+	rol	ebx,20
+	add	ebx,ecx
+	; R1 20
+	lea	eax,[3593408605+ebp*1+eax]
+	xor	edi,ebx
+	and	edi,edx
+	mov	ebp,DWORD [40+esi]
+	xor	edi,ecx
+	add	eax,edi
+	mov	edi,ebx
+	rol	eax,5
+	add	eax,ebx
+	; R1 21
+	lea	edx,[38016083+ebp*1+edx]
+	xor	edi,eax
+	and	edi,ecx
+	mov	ebp,DWORD [60+esi]
+	xor	edi,ebx
+	add	edx,edi
+	mov	edi,eax
+	rol	edx,9
+	add	edx,eax
+	; R1 22
+	lea	ecx,[3634488961+ebp*1+ecx]
+	xor	edi,edx
+	and	edi,ebx
+	mov	ebp,DWORD [16+esi]
+	xor	edi,eax
+	add	ecx,edi
+	mov	edi,edx
+	rol	ecx,14
+	add	ecx,edx
+	; R1 23
+	lea	ebx,[3889429448+ebp*1+ebx]
+	xor	edi,ecx
+	and	edi,eax
+	mov	ebp,DWORD [36+esi]
+	xor	edi,edx
+	add	ebx,edi
+	mov	edi,ecx
+	rol	ebx,20
+	add	ebx,ecx
+	; R1 24
+	lea	eax,[568446438+ebp*1+eax]
+	xor	edi,ebx
+	and	edi,edx
+	mov	ebp,DWORD [56+esi]
+	xor	edi,ecx
+	add	eax,edi
+	mov	edi,ebx
+	rol	eax,5
+	add	eax,ebx
+	; R1 25
+	lea	edx,[3275163606+ebp*1+edx]
+	xor	edi,eax
+	and	edi,ecx
+	mov	ebp,DWORD [12+esi]
+	xor	edi,ebx
+	add	edx,edi
+	mov	edi,eax
+	rol	edx,9
+	add	edx,eax
+	; R1 26
+	lea	ecx,[4107603335+ebp*1+ecx]
+	xor	edi,edx
+	and	edi,ebx
+	mov	ebp,DWORD [32+esi]
+	xor	edi,eax
+	add	ecx,edi
+	mov	edi,edx
+	rol	ecx,14
+	add	ecx,edx
+	; R1 27
+	lea	ebx,[1163531501+ebp*1+ebx]
+	xor	edi,ecx
+	and	edi,eax
+	mov	ebp,DWORD [52+esi]
+	xor	edi,edx
+	add	ebx,edi
+	mov	edi,ecx
+	rol	ebx,20
+	add	ebx,ecx
+	; R1 28
+	lea	eax,[2850285829+ebp*1+eax]
+	xor	edi,ebx
+	and	edi,edx
+	mov	ebp,DWORD [8+esi]
+	xor	edi,ecx
+	add	eax,edi
+	mov	edi,ebx
+	rol	eax,5
+	add	eax,ebx
+	; R1 29
+	lea	edx,[4243563512+ebp*1+edx]
+	xor	edi,eax
+	and	edi,ecx
+	mov	ebp,DWORD [28+esi]
+	xor	edi,ebx
+	add	edx,edi
+	mov	edi,eax
+	rol	edx,9
+	add	edx,eax
+	; R1 30
+	lea	ecx,[1735328473+ebp*1+ecx]
+	xor	edi,edx
+	and	edi,ebx
+	mov	ebp,DWORD [48+esi]
+	xor	edi,eax
+	add	ecx,edi
+	mov	edi,edx
+	rol	ecx,14
+	add	ecx,edx
+	; R1 31
+	lea	ebx,[2368359562+ebp*1+ebx]
+	xor	edi,ecx
+	and	edi,eax
+	mov	ebp,DWORD [20+esi]
+	xor	edi,edx
+	add	ebx,edi
+	mov	edi,ecx
+	rol	ebx,20
+	add	ebx,ecx
+	; 
+	; R2 section
+	; R2 32
+	xor	edi,edx
+	xor	edi,ebx
+	lea	eax,[4294588738+ebp*1+eax]
+	add	eax,edi
+	rol	eax,4
+	mov	ebp,DWORD [32+esi]
+	mov	edi,ebx
+	; R2 33
+	lea	edx,[2272392833+ebp*1+edx]
+	add	eax,ebx
+	xor	edi,ecx
+	xor	edi,eax
+	mov	ebp,DWORD [44+esi]
+	add	edx,edi
+	mov	edi,eax
+	rol	edx,11
+	add	edx,eax
+	; R2 34
+	xor	edi,ebx
+	xor	edi,edx
+	lea	ecx,[1839030562+ebp*1+ecx]
+	add	ecx,edi
+	rol	ecx,16
+	mov	ebp,DWORD [56+esi]
+	mov	edi,edx
+	; R2 35
+	lea	ebx,[4259657740+ebp*1+ebx]
+	add	ecx,edx
+	xor	edi,eax
+	xor	edi,ecx
+	mov	ebp,DWORD [4+esi]
+	add	ebx,edi
+	mov	edi,ecx
+	rol	ebx,23
+	add	ebx,ecx
+	; R2 36
+	xor	edi,edx
+	xor	edi,ebx
+	lea	eax,[2763975236+ebp*1+eax]
+	add	eax,edi
+	rol	eax,4
+	mov	ebp,DWORD [16+esi]
+	mov	edi,ebx
+	; R2 37
+	lea	edx,[1272893353+ebp*1+edx]
+	add	eax,ebx
+	xor	edi,ecx
+	xor	edi,eax
+	mov	ebp,DWORD [28+esi]
+	add	edx,edi
+	mov	edi,eax
+	rol	edx,11
+	add	edx,eax
+	; R2 38
+	xor	edi,ebx
+	xor	edi,edx
+	lea	ecx,[4139469664+ebp*1+ecx]
+	add	ecx,edi
+	rol	ecx,16
+	mov	ebp,DWORD [40+esi]
+	mov	edi,edx
+	; R2 39
+	lea	ebx,[3200236656+ebp*1+ebx]
+	add	ecx,edx
+	xor	edi,eax
+	xor	edi,ecx
+	mov	ebp,DWORD [52+esi]
+	add	ebx,edi
+	mov	edi,ecx
+	rol	ebx,23
+	add	ebx,ecx
+	; R2 40
+	xor	edi,edx
+	xor	edi,ebx
+	lea	eax,[681279174+ebp*1+eax]
+	add	eax,edi
+	rol	eax,4
+	mov	ebp,DWORD [esi]
+	mov	edi,ebx
+	; R2 41
+	lea	edx,[3936430074+ebp*1+edx]
+	add	eax,ebx
+	xor	edi,ecx
+	xor	edi,eax
+	mov	ebp,DWORD [12+esi]
+	add	edx,edi
+	mov	edi,eax
+	rol	edx,11
+	add	edx,eax
+	; R2 42
+	xor	edi,ebx
+	xor	edi,edx
+	lea	ecx,[3572445317+ebp*1+ecx]
+	add	ecx,edi
+	rol	ecx,16
+	mov	ebp,DWORD [24+esi]
+	mov	edi,edx
+	; R2 43
+	lea	ebx,[76029189+ebp*1+ebx]
+	add	ecx,edx
+	xor	edi,eax
+	xor	edi,ecx
+	mov	ebp,DWORD [36+esi]
+	add	ebx,edi
+	mov	edi,ecx
+	rol	ebx,23
+	add	ebx,ecx
+	; R2 44
+	xor	edi,edx
+	xor	edi,ebx
+	lea	eax,[3654602809+ebp*1+eax]
+	add	eax,edi
+	rol	eax,4
+	mov	ebp,DWORD [48+esi]
+	mov	edi,ebx
+	; R2 45
+	lea	edx,[3873151461+ebp*1+edx]
+	add	eax,ebx
+	xor	edi,ecx
+	xor	edi,eax
+	mov	ebp,DWORD [60+esi]
+	add	edx,edi
+	mov	edi,eax
+	rol	edx,11
+	add	edx,eax
+	; R2 46
+	xor	edi,ebx
+	xor	edi,edx
+	lea	ecx,[530742520+ebp*1+ecx]
+	add	ecx,edi
+	rol	ecx,16
+	mov	ebp,DWORD [8+esi]
+	mov	edi,edx
+	; R2 47
+	lea	ebx,[3299628645+ebp*1+ebx]
+	add	ecx,edx
+	xor	edi,eax
+	xor	edi,ecx
+	mov	ebp,DWORD [esi]
+	add	ebx,edi
+	mov	edi,-1
+	rol	ebx,23
+	add	ebx,ecx
+	; 
+	; R3 section
+	; R3 48
+	xor	edi,edx
+	or	edi,ebx
+	lea	eax,[4096336452+ebp*1+eax]
+	xor	edi,ecx
+	mov	ebp,DWORD [28+esi]
+	add	eax,edi
+	mov	edi,-1
+	rol	eax,6
+	xor	edi,ecx
+	add	eax,ebx
+	; R3 49
+	or	edi,eax
+	lea	edx,[1126891415+ebp*1+edx]
+	xor	edi,ebx
+	mov	ebp,DWORD [56+esi]
+	add	edx,edi
+	mov	edi,-1
+	rol	edx,10
+	xor	edi,ebx
+	add	edx,eax
+	; R3 50
+	or	edi,edx
+	lea	ecx,[2878612391+ebp*1+ecx]
+	xor	edi,eax
+	mov	ebp,DWORD [20+esi]
+	add	ecx,edi
+	mov	edi,-1
+	rol	ecx,15
+	xor	edi,eax
+	add	ecx,edx
+	; R3 51
+	or	edi,ecx
+	lea	ebx,[4237533241+ebp*1+ebx]
+	xor	edi,edx
+	mov	ebp,DWORD [48+esi]
+	add	ebx,edi
+	mov	edi,-1
+	rol	ebx,21
+	xor	edi,edx
+	add	ebx,ecx
+	; R3 52
+	or	edi,ebx
+	lea	eax,[1700485571+ebp*1+eax]
+	xor	edi,ecx
+	mov	ebp,DWORD [12+esi]
+	add	eax,edi
+	mov	edi,-1
+	rol	eax,6
+	xor	edi,ecx
+	add	eax,ebx
+	; R3 53
+	or	edi,eax
+	lea	edx,[2399980690+ebp*1+edx]
+	xor	edi,ebx
+	mov	ebp,DWORD [40+esi]
+	add	edx,edi
+	mov	edi,-1
+	rol	edx,10
+	xor	edi,ebx
+	add	edx,eax
+	; R3 54
+	or	edi,edx
+	lea	ecx,[4293915773+ebp*1+ecx]
+	xor	edi,eax
+	mov	ebp,DWORD [4+esi]
+	add	ecx,edi
+	mov	edi,-1
+	rol	ecx,15
+	xor	edi,eax
+	add	ecx,edx
+	; R3 55
+	or	edi,ecx
+	lea	ebx,[2240044497+ebp*1+ebx]
+	xor	edi,edx
+	mov	ebp,DWORD [32+esi]
+	add	ebx,edi
+	mov	edi,-1
+	rol	ebx,21
+	xor	edi,edx
+	add	ebx,ecx
+	; R3 56
+	or	edi,ebx
+	lea	eax,[1873313359+ebp*1+eax]
+	xor	edi,ecx
+	mov	ebp,DWORD [60+esi]
+	add	eax,edi
+	mov	edi,-1
+	rol	eax,6
+	xor	edi,ecx
+	add	eax,ebx
+	; R3 57
+	or	edi,eax
+	lea	edx,[4264355552+ebp*1+edx]
+	xor	edi,ebx
+	mov	ebp,DWORD [24+esi]
+	add	edx,edi
+	mov	edi,-1
+	rol	edx,10
+	xor	edi,ebx
+	add	edx,eax
+	; R3 58
+	or	edi,edx
+	lea	ecx,[2734768916+ebp*1+ecx]
+	xor	edi,eax
+	mov	ebp,DWORD [52+esi]
+	add	ecx,edi
+	mov	edi,-1
+	rol	ecx,15
+	xor	edi,eax
+	add	ecx,edx
+	; R3 59
+	or	edi,ecx
+	lea	ebx,[1309151649+ebp*1+ebx]
+	xor	edi,edx
+	mov	ebp,DWORD [16+esi]
+	add	ebx,edi
+	mov	edi,-1
+	rol	ebx,21
+	xor	edi,edx
+	add	ebx,ecx
+	; R3 60
+	or	edi,ebx
+	lea	eax,[4149444226+ebp*1+eax]
+	xor	edi,ecx
+	mov	ebp,DWORD [44+esi]
+	add	eax,edi
+	mov	edi,-1
+	rol	eax,6
+	xor	edi,ecx
+	add	eax,ebx
+	; R3 61
+	or	edi,eax
+	lea	edx,[3174756917+ebp*1+edx]
+	xor	edi,ebx
+	mov	ebp,DWORD [8+esi]
+	add	edx,edi
+	mov	edi,-1
+	rol	edx,10
+	xor	edi,ebx
+	add	edx,eax
+	; R3 62
+	or	edi,edx
+	lea	ecx,[718787259+ebp*1+ecx]
+	xor	edi,eax
+	mov	ebp,DWORD [36+esi]
+	add	ecx,edi
+	mov	edi,-1
+	rol	ecx,15
+	xor	edi,eax
+	add	ecx,edx
+	; R3 63
+	or	edi,ecx
+	lea	ebx,[3951481745+ebp*1+ebx]
+	xor	edi,edx
+	mov	ebp,DWORD [24+esp]
+	add	ebx,edi
+	add	esi,64
+	rol	ebx,21
+	mov	edi,DWORD [ebp]
+	add	ebx,ecx
+	add	eax,edi
+	mov	edi,DWORD [4+ebp]
+	add	ebx,edi
+	mov	edi,DWORD [8+ebp]
+	add	ecx,edi
+	mov	edi,DWORD [12+ebp]
+	add	edx,edi
+	mov	DWORD [ebp],eax
+	mov	DWORD [4+ebp],ebx
+	mov	edi,DWORD [esp]
+	mov	DWORD [8+ebp],ecx
+	mov	DWORD [12+ebp],edx
+	cmp	edi,esi
+	jae	NEAR L$000start
+	pop	eax
+	pop	ebx
+	pop	ebp
+	pop	edi
+	pop	esi
+	ret
diff --git a/deps/boringssl/win-x86/crypto/fipsmodule/sha1-586.asm b/deps/boringssl/win-x86/crypto/fipsmodule/sha1-586.asm
new file mode 100644
index 0000000..4b05c9d
--- /dev/null
+++ b/deps/boringssl/win-x86/crypto/fipsmodule/sha1-586.asm
@@ -0,0 +1,3806 @@
+; This file is generated from a similarly-named Perl script in the BoringSSL
+; source tree. Do not edit by hand.
+
+%ifdef BORINGSSL_PREFIX
+%include "boringssl_prefix_symbols_nasm.inc"
+%endif
+%ifidn __OUTPUT_FORMAT__,obj
+section	code	use32 class=code align=64
+%elifidn __OUTPUT_FORMAT__,win32
+$@feat.00 equ 1
+section	.text	code align=64
+%else
+section	.text	code
+%endif
+;extern	_OPENSSL_ia32cap_P
+global	_sha1_block_data_order
+align	16
+_sha1_block_data_order:
+L$_sha1_block_data_order_begin:
+	push	ebp
+	push	ebx
+	push	esi
+	push	edi
+	call	L$000pic_point
+L$000pic_point:
+	pop	ebp
+	lea	esi,[_OPENSSL_ia32cap_P]
+	lea	ebp,[(L$K_XX_XX-L$000pic_point)+ebp]
+	mov	eax,DWORD [esi]
+	mov	edx,DWORD [4+esi]
+	test	edx,512
+	jz	NEAR L$001x86
+	mov	ecx,DWORD [8+esi]
+	test	eax,16777216
+	jz	NEAR L$001x86
+	and	edx,268435456
+	and	eax,1073741824
+	or	eax,edx
+	cmp	eax,1342177280
+	je	NEAR L$avx_shortcut
+	jmp	NEAR L$ssse3_shortcut
+align	16
+L$001x86:
+	mov	ebp,DWORD [20+esp]
+	mov	esi,DWORD [24+esp]
+	mov	eax,DWORD [28+esp]
+	sub	esp,76
+	shl	eax,6
+	add	eax,esi
+	mov	DWORD [104+esp],eax
+	mov	edi,DWORD [16+ebp]
+	jmp	NEAR L$002loop
+align	16
+L$002loop:
+	mov	eax,DWORD [esi]
+	mov	ebx,DWORD [4+esi]
+	mov	ecx,DWORD [8+esi]
+	mov	edx,DWORD [12+esi]
+	bswap	eax
+	bswap	ebx
+	bswap	ecx
+	bswap	edx
+	mov	DWORD [esp],eax
+	mov	DWORD [4+esp],ebx
+	mov	DWORD [8+esp],ecx
+	mov	DWORD [12+esp],edx
+	mov	eax,DWORD [16+esi]
+	mov	ebx,DWORD [20+esi]
+	mov	ecx,DWORD [24+esi]
+	mov	edx,DWORD [28+esi]
+	bswap	eax
+	bswap	ebx
+	bswap	ecx
+	bswap	edx
+	mov	DWORD [16+esp],eax
+	mov	DWORD [20+esp],ebx
+	mov	DWORD [24+esp],ecx
+	mov	DWORD [28+esp],edx
+	mov	eax,DWORD [32+esi]
+	mov	ebx,DWORD [36+esi]
+	mov	ecx,DWORD [40+esi]
+	mov	edx,DWORD [44+esi]
+	bswap	eax
+	bswap	ebx
+	bswap	ecx
+	bswap	edx
+	mov	DWORD [32+esp],eax
+	mov	DWORD [36+esp],ebx
+	mov	DWORD [40+esp],ecx
+	mov	DWORD [44+esp],edx
+	mov	eax,DWORD [48+esi]
+	mov	ebx,DWORD [52+esi]
+	mov	ecx,DWORD [56+esi]
+	mov	edx,DWORD [60+esi]
+	bswap	eax
+	bswap	ebx
+	bswap	ecx
+	bswap	edx
+	mov	DWORD [48+esp],eax
+	mov	DWORD [52+esp],ebx
+	mov	DWORD [56+esp],ecx
+	mov	DWORD [60+esp],edx
+	mov	DWORD [100+esp],esi
+	mov	eax,DWORD [ebp]
+	mov	ebx,DWORD [4+ebp]
+	mov	ecx,DWORD [8+ebp]
+	mov	edx,DWORD [12+ebp]
+	; 00_15 0
+	mov	esi,ecx
+	mov	ebp,eax
+	rol	ebp,5
+	xor	esi,edx
+	add	ebp,edi
+	mov	edi,DWORD [esp]
+	and	esi,ebx
+	ror	ebx,2
+	xor	esi,edx
+	lea	ebp,[1518500249+edi*1+ebp]
+	add	ebp,esi
+	; 00_15 1
+	mov	edi,ebx
+	mov	esi,ebp
+	rol	ebp,5
+	xor	edi,ecx
+	add	ebp,edx
+	mov	edx,DWORD [4+esp]
+	and	edi,eax
+	ror	eax,2
+	xor	edi,ecx
+	lea	ebp,[1518500249+edx*1+ebp]
+	add	ebp,edi
+	; 00_15 2
+	mov	edx,eax
+	mov	edi,ebp
+	rol	ebp,5
+	xor	edx,ebx
+	add	ebp,ecx
+	mov	ecx,DWORD [8+esp]
+	and	edx,esi
+	ror	esi,2
+	xor	edx,ebx
+	lea	ebp,[1518500249+ecx*1+ebp]
+	add	ebp,edx
+	; 00_15 3
+	mov	ecx,esi
+	mov	edx,ebp
+	rol	ebp,5
+	xor	ecx,eax
+	add	ebp,ebx
+	mov	ebx,DWORD [12+esp]
+	and	ecx,edi
+	ror	edi,2
+	xor	ecx,eax
+	lea	ebp,[1518500249+ebx*1+ebp]
+	add	ebp,ecx
+	; 00_15 4
+	mov	ebx,edi
+	mov	ecx,ebp
+	rol	ebp,5
+	xor	ebx,esi
+	add	ebp,eax
+	mov	eax,DWORD [16+esp]
+	and	ebx,edx
+	ror	edx,2
+	xor	ebx,esi
+	lea	ebp,[1518500249+eax*1+ebp]
+	add	ebp,ebx
+	; 00_15 5
+	mov	eax,edx
+	mov	ebx,ebp
+	rol	ebp,5
+	xor	eax,edi
+	add	ebp,esi
+	mov	esi,DWORD [20+esp]
+	and	eax,ecx
+	ror	ecx,2
+	xor	eax,edi
+	lea	ebp,[1518500249+esi*1+ebp]
+	add	ebp,eax
+	; 00_15 6
+	mov	esi,ecx
+	mov	eax,ebp
+	rol	ebp,5
+	xor	esi,edx
+	add	ebp,edi
+	mov	edi,DWORD [24+esp]
+	and	esi,ebx
+	ror	ebx,2
+	xor	esi,edx
+	lea	ebp,[1518500249+edi*1+ebp]
+	add	ebp,esi
+	; 00_15 7
+	mov	edi,ebx
+	mov	esi,ebp
+	rol	ebp,5
+	xor	edi,ecx
+	add	ebp,edx
+	mov	edx,DWORD [28+esp]
+	and	edi,eax
+	ror	eax,2
+	xor	edi,ecx
+	lea	ebp,[1518500249+edx*1+ebp]
+	add	ebp,edi
+	; 00_15 8
+	mov	edx,eax
+	mov	edi,ebp
+	rol	ebp,5
+	xor	edx,ebx
+	add	ebp,ecx
+	mov	ecx,DWORD [32+esp]
+	and	edx,esi
+	ror	esi,2
+	xor	edx,ebx
+	lea	ebp,[1518500249+ecx*1+ebp]
+	add	ebp,edx
+	; 00_15 9
+	mov	ecx,esi
+	mov	edx,ebp
+	rol	ebp,5
+	xor	ecx,eax
+	add	ebp,ebx
+	mov	ebx,DWORD [36+esp]
+	and	ecx,edi
+	ror	edi,2
+	xor	ecx,eax
+	lea	ebp,[1518500249+ebx*1+ebp]
+	add	ebp,ecx
+	; 00_15 10
+	mov	ebx,edi
+	mov	ecx,ebp
+	rol	ebp,5
+	xor	ebx,esi
+	add	ebp,eax
+	mov	eax,DWORD [40+esp]
+	and	ebx,edx
+	ror	edx,2
+	xor	ebx,esi
+	lea	ebp,[1518500249+eax*1+ebp]
+	add	ebp,ebx
+	; 00_15 11
+	mov	eax,edx
+	mov	ebx,ebp
+	rol	ebp,5
+	xor	eax,edi
+	add	ebp,esi
+	mov	esi,DWORD [44+esp]
+	and	eax,ecx
+	ror	ecx,2
+	xor	eax,edi
+	lea	ebp,[1518500249+esi*1+ebp]
+	add	ebp,eax
+	; 00_15 12
+	mov	esi,ecx
+	mov	eax,ebp
+	rol	ebp,5
+	xor	esi,edx
+	add	ebp,edi
+	mov	edi,DWORD [48+esp]
+	and	esi,ebx
+	ror	ebx,2
+	xor	esi,edx
+	lea	ebp,[1518500249+edi*1+ebp]
+	add	ebp,esi
+	; 00_15 13
+	mov	edi,ebx
+	mov	esi,ebp
+	rol	ebp,5
+	xor	edi,ecx
+	add	ebp,edx
+	mov	edx,DWORD [52+esp]
+	and	edi,eax
+	ror	eax,2
+	xor	edi,ecx
+	lea	ebp,[1518500249+edx*1+ebp]
+	add	ebp,edi
+	; 00_15 14
+	mov	edx,eax
+	mov	edi,ebp
+	rol	ebp,5
+	xor	edx,ebx
+	add	ebp,ecx
+	mov	ecx,DWORD [56+esp]
+	and	edx,esi
+	ror	esi,2
+	xor	edx,ebx
+	lea	ebp,[1518500249+ecx*1+ebp]
+	add	ebp,edx
+	; 00_15 15
+	mov	ecx,esi
+	mov	edx,ebp
+	rol	ebp,5
+	xor	ecx,eax
+	add	ebp,ebx
+	mov	ebx,DWORD [60+esp]
+	and	ecx,edi
+	ror	edi,2
+	xor	ecx,eax
+	lea	ebp,[1518500249+ebx*1+ebp]
+	mov	ebx,DWORD [esp]
+	add	ecx,ebp
+	; 16_19 16
+	mov	ebp,edi
+	xor	ebx,DWORD [8+esp]
+	xor	ebp,esi
+	xor	ebx,DWORD [32+esp]
+	and	ebp,edx
+	xor	ebx,DWORD [52+esp]
+	rol	ebx,1
+	xor	ebp,esi
+	add	eax,ebp
+	mov	ebp,ecx
+	ror	edx,2
+	mov	DWORD [esp],ebx
+	rol	ebp,5
+	lea	ebx,[1518500249+eax*1+ebx]
+	mov	eax,DWORD [4+esp]
+	add	ebx,ebp
+	; 16_19 17
+	mov	ebp,edx
+	xor	eax,DWORD [12+esp]
+	xor	ebp,edi
+	xor	eax,DWORD [36+esp]
+	and	ebp,ecx
+	xor	eax,DWORD [56+esp]
+	rol	eax,1
+	xor	ebp,edi
+	add	esi,ebp
+	mov	ebp,ebx
+	ror	ecx,2
+	mov	DWORD [4+esp],eax
+	rol	ebp,5
+	lea	eax,[1518500249+esi*1+eax]
+	mov	esi,DWORD [8+esp]
+	add	eax,ebp
+	; 16_19 18
+	mov	ebp,ecx
+	xor	esi,DWORD [16+esp]
+	xor	ebp,edx
+	xor	esi,DWORD [40+esp]
+	and	ebp,ebx
+	xor	esi,DWORD [60+esp]
+	rol	esi,1
+	xor	ebp,edx
+	add	edi,ebp
+	mov	ebp,eax
+	ror	ebx,2
+	mov	DWORD [8+esp],esi
+	rol	ebp,5
+	lea	esi,[1518500249+edi*1+esi]
+	mov	edi,DWORD [12+esp]
+	add	esi,ebp
+	; 16_19 19
+	mov	ebp,ebx
+	xor	edi,DWORD [20+esp]
+	xor	ebp,ecx
+	xor	edi,DWORD [44+esp]
+	and	ebp,eax
+	xor	edi,DWORD [esp]
+	rol	edi,1
+	xor	ebp,ecx
+	add	edx,ebp
+	mov	ebp,esi
+	ror	eax,2
+	mov	DWORD [12+esp],edi
+	rol	ebp,5
+	lea	edi,[1518500249+edx*1+edi]
+	mov	edx,DWORD [16+esp]
+	add	edi,ebp
+	; 20_39 20
+	mov	ebp,esi
+	xor	edx,DWORD [24+esp]
+	xor	ebp,eax
+	xor	edx,DWORD [48+esp]
+	xor	ebp,ebx
+	xor	edx,DWORD [4+esp]
+	rol	edx,1
+	add	ecx,ebp
+	ror	esi,2
+	mov	ebp,edi
+	rol	ebp,5
+	mov	DWORD [16+esp],edx
+	lea	edx,[1859775393+ecx*1+edx]
+	mov	ecx,DWORD [20+esp]
+	add	edx,ebp
+	; 20_39 21
+	mov	ebp,edi
+	xor	ecx,DWORD [28+esp]
+	xor	ebp,esi
+	xor	ecx,DWORD [52+esp]
+	xor	ebp,eax
+	xor	ecx,DWORD [8+esp]
+	rol	ecx,1
+	add	ebx,ebp
+	ror	edi,2
+	mov	ebp,edx
+	rol	ebp,5
+	mov	DWORD [20+esp],ecx
+	lea	ecx,[1859775393+ebx*1+ecx]
+	mov	ebx,DWORD [24+esp]
+	add	ecx,ebp
+	; 20_39 22
+	mov	ebp,edx
+	xor	ebx,DWORD [32+esp]
+	xor	ebp,edi
+	xor	ebx,DWORD [56+esp]
+	xor	ebp,esi
+	xor	ebx,DWORD [12+esp]
+	rol	ebx,1
+	add	eax,ebp
+	ror	edx,2
+	mov	ebp,ecx
+	rol	ebp,5
+	mov	DWORD [24+esp],ebx
+	lea	ebx,[1859775393+eax*1+ebx]
+	mov	eax,DWORD [28+esp]
+	add	ebx,ebp
+	; 20_39 23
+	mov	ebp,ecx
+	xor	eax,DWORD [36+esp]
+	xor	ebp,edx
+	xor	eax,DWORD [60+esp]
+	xor	ebp,edi
+	xor	eax,DWORD [16+esp]
+	rol	eax,1
+	add	esi,ebp
+	ror	ecx,2
+	mov	ebp,ebx
+	rol	ebp,5
+	mov	DWORD [28+esp],eax
+	lea	eax,[1859775393+esi*1+eax]
+	mov	esi,DWORD [32+esp]
+	add	eax,ebp
+	; 20_39 24
+	mov	ebp,ebx
+	xor	esi,DWORD [40+esp]
+	xor	ebp,ecx
+	xor	esi,DWORD [esp]
+	xor	ebp,edx
+	xor	esi,DWORD [20+esp]
+	rol	esi,1
+	add	edi,ebp
+	ror	ebx,2
+	mov	ebp,eax
+	rol	ebp,5
+	mov	DWORD [32+esp],esi
+	lea	esi,[1859775393+edi*1+esi]
+	mov	edi,DWORD [36+esp]
+	add	esi,ebp
+	; 20_39 25
+	mov	ebp,eax
+	xor	edi,DWORD [44+esp]
+	xor	ebp,ebx
+	xor	edi,DWORD [4+esp]
+	xor	ebp,ecx
+	xor	edi,DWORD [24+esp]
+	rol	edi,1
+	add	edx,ebp
+	ror	eax,2
+	mov	ebp,esi
+	rol	ebp,5
+	mov	DWORD [36+esp],edi
+	lea	edi,[1859775393+edx*1+edi]
+	mov	edx,DWORD [40+esp]
+	add	edi,ebp
+	; 20_39 26
+	mov	ebp,esi
+	xor	edx,DWORD [48+esp]
+	xor	ebp,eax
+	xor	edx,DWORD [8+esp]
+	xor	ebp,ebx
+	xor	edx,DWORD [28+esp]
+	rol	edx,1
+	add	ecx,ebp
+	ror	esi,2
+	mov	ebp,edi
+	rol	ebp,5
+	mov	DWORD [40+esp],edx
+	lea	edx,[1859775393+ecx*1+edx]
+	mov	ecx,DWORD [44+esp]
+	add	edx,ebp
+	; 20_39 27
+	mov	ebp,edi
+	xor	ecx,DWORD [52+esp]
+	xor	ebp,esi
+	xor	ecx,DWORD [12+esp]
+	xor	ebp,eax
+	xor	ecx,DWORD [32+esp]
+	rol	ecx,1
+	add	ebx,ebp
+	ror	edi,2
+	mov	ebp,edx
+	rol	ebp,5
+	mov	DWORD [44+esp],ecx
+	lea	ecx,[1859775393+ebx*1+ecx]
+	mov	ebx,DWORD [48+esp]
+	add	ecx,ebp
+	; 20_39 28
+	mov	ebp,edx
+	xor	ebx,DWORD [56+esp]
+	xor	ebp,edi
+	xor	ebx,DWORD [16+esp]
+	xor	ebp,esi
+	xor	ebx,DWORD [36+esp]
+	rol	ebx,1
+	add	eax,ebp
+	ror	edx,2
+	mov	ebp,ecx
+	rol	ebp,5
+	mov	DWORD [48+esp],ebx
+	lea	ebx,[1859775393+eax*1+ebx]
+	mov	eax,DWORD [52+esp]
+	add	ebx,ebp
+	; 20_39 29
+	mov	ebp,ecx
+	xor	eax,DWORD [60+esp]
+	xor	ebp,edx
+	xor	eax,DWORD [20+esp]
+	xor	ebp,edi
+	xor	eax,DWORD [40+esp]
+	rol	eax,1
+	add	esi,ebp
+	ror	ecx,2
+	mov	ebp,ebx
+	rol	ebp,5
+	mov	DWORD [52+esp],eax
+	lea	eax,[1859775393+esi*1+eax]
+	mov	esi,DWORD [56+esp]
+	add	eax,ebp
+	; 20_39 30
+	mov	ebp,ebx
+	xor	esi,DWORD [esp]
+	xor	ebp,ecx
+	xor	esi,DWORD [24+esp]
+	xor	ebp,edx
+	xor	esi,DWORD [44+esp]
+	rol	esi,1
+	add	edi,ebp
+	ror	ebx,2
+	mov	ebp,eax
+	rol	ebp,5
+	mov	DWORD [56+esp],esi
+	lea	esi,[1859775393+edi*1+esi]
+	mov	edi,DWORD [60+esp]
+	add	esi,ebp
+	; 20_39 31
+	mov	ebp,eax
+	xor	edi,DWORD [4+esp]
+	xor	ebp,ebx
+	xor	edi,DWORD [28+esp]
+	xor	ebp,ecx
+	xor	edi,DWORD [48+esp]
+	rol	edi,1
+	add	edx,ebp
+	ror	eax,2
+	mov	ebp,esi
+	rol	ebp,5
+	mov	DWORD [60+esp],edi
+	lea	edi,[1859775393+edx*1+edi]
+	mov	edx,DWORD [esp]
+	add	edi,ebp
+	; 20_39 32
+	mov	ebp,esi
+	xor	edx,DWORD [8+esp]
+	xor	ebp,eax
+	xor	edx,DWORD [32+esp]
+	xor	ebp,ebx
+	xor	edx,DWORD [52+esp]
+	rol	edx,1
+	add	ecx,ebp
+	ror	esi,2
+	mov	ebp,edi
+	rol	ebp,5
+	mov	DWORD [esp],edx
+	lea	edx,[1859775393+ecx*1+edx]
+	mov	ecx,DWORD [4+esp]
+	add	edx,ebp
+	; 20_39 33
+	mov	ebp,edi
+	xor	ecx,DWORD [12+esp]
+	xor	ebp,esi
+	xor	ecx,DWORD [36+esp]
+	xor	ebp,eax
+	xor	ecx,DWORD [56+esp]
+	rol	ecx,1
+	add	ebx,ebp
+	ror	edi,2
+	mov	ebp,edx
+	rol	ebp,5
+	mov	DWORD [4+esp],ecx
+	lea	ecx,[1859775393+ebx*1+ecx]
+	mov	ebx,DWORD [8+esp]
+	add	ecx,ebp
+	; 20_39 34
+	mov	ebp,edx
+	xor	ebx,DWORD [16+esp]
+	xor	ebp,edi
+	xor	ebx,DWORD [40+esp]
+	xor	ebp,esi
+	xor	ebx,DWORD [60+esp]
+	rol	ebx,1
+	add	eax,ebp
+	ror	edx,2
+	mov	ebp,ecx
+	rol	ebp,5
+	mov	DWORD [8+esp],ebx
+	lea	ebx,[1859775393+eax*1+ebx]
+	mov	eax,DWORD [12+esp]
+	add	ebx,ebp
+	; 20_39 35
+	mov	ebp,ecx
+	xor	eax,DWORD [20+esp]
+	xor	ebp,edx
+	xor	eax,DWORD [44+esp]
+	xor	ebp,edi
+	xor	eax,DWORD [esp]
+	rol	eax,1
+	add	esi,ebp
+	ror	ecx,2
+	mov	ebp,ebx
+	rol	ebp,5
+	mov	DWORD [12+esp],eax
+	lea	eax,[1859775393+esi*1+eax]
+	mov	esi,DWORD [16+esp]
+	add	eax,ebp
+	; 20_39 36
+	mov	ebp,ebx
+	xor	esi,DWORD [24+esp]
+	xor	ebp,ecx
+	xor	esi,DWORD [48+esp]
+	xor	ebp,edx
+	xor	esi,DWORD [4+esp]
+	rol	esi,1
+	add	edi,ebp
+	ror	ebx,2
+	mov	ebp,eax
+	rol	ebp,5
+	mov	DWORD [16+esp],esi
+	lea	esi,[1859775393+edi*1+esi]
+	mov	edi,DWORD [20+esp]
+	add	esi,ebp
+	; 20_39 37
+	mov	ebp,eax
+	xor	edi,DWORD [28+esp]
+	xor	ebp,ebx
+	xor	edi,DWORD [52+esp]
+	xor	ebp,ecx
+	xor	edi,DWORD [8+esp]
+	rol	edi,1
+	add	edx,ebp
+	ror	eax,2
+	mov	ebp,esi
+	rol	ebp,5
+	mov	DWORD [20+esp],edi
+	lea	edi,[1859775393+edx*1+edi]
+	mov	edx,DWORD [24+esp]
+	add	edi,ebp
+	; 20_39 38
+	mov	ebp,esi
+	xor	edx,DWORD [32+esp]
+	xor	ebp,eax
+	xor	edx,DWORD [56+esp]
+	xor	ebp,ebx
+	xor	edx,DWORD [12+esp]
+	rol	edx,1
+	add	ecx,ebp
+	ror	esi,2
+	mov	ebp,edi
+	rol	ebp,5
+	mov	DWORD [24+esp],edx
+	lea	edx,[1859775393+ecx*1+edx]
+	mov	ecx,DWORD [28+esp]
+	add	edx,ebp
+	; 20_39 39
+	mov	ebp,edi
+	xor	ecx,DWORD [36+esp]
+	xor	ebp,esi
+	xor	ecx,DWORD [60+esp]
+	xor	ebp,eax
+	xor	ecx,DWORD [16+esp]
+	rol	ecx,1
+	add	ebx,ebp
+	ror	edi,2
+	mov	ebp,edx
+	rol	ebp,5
+	mov	DWORD [28+esp],ecx
+	lea	ecx,[1859775393+ebx*1+ecx]
+	mov	ebx,DWORD [32+esp]
+	add	ecx,ebp
+	; 40_59 40
+	mov	ebp,edi
+	xor	ebx,DWORD [40+esp]
+	xor	ebp,esi
+	xor	ebx,DWORD [esp]
+	and	ebp,edx
+	xor	ebx,DWORD [20+esp]
+	rol	ebx,1
+	add	ebp,eax
+	ror	edx,2
+	mov	eax,ecx
+	rol	eax,5
+	mov	DWORD [32+esp],ebx
+	lea	ebx,[2400959708+ebp*1+ebx]
+	mov	ebp,edi
+	add	ebx,eax
+	and	ebp,esi
+	mov	eax,DWORD [36+esp]
+	add	ebx,ebp
+	; 40_59 41
+	mov	ebp,edx
+	xor	eax,DWORD [44+esp]
+	xor	ebp,edi
+	xor	eax,DWORD [4+esp]
+	and	ebp,ecx
+	xor	eax,DWORD [24+esp]
+	rol	eax,1
+	add	ebp,esi
+	ror	ecx,2
+	mov	esi,ebx
+	rol	esi,5
+	mov	DWORD [36+esp],eax
+	lea	eax,[2400959708+ebp*1+eax]
+	mov	ebp,edx
+	add	eax,esi
+	and	ebp,edi
+	mov	esi,DWORD [40+esp]
+	add	eax,ebp
+	; 40_59 42
+	mov	ebp,ecx
+	xor	esi,DWORD [48+esp]
+	xor	ebp,edx
+	xor	esi,DWORD [8+esp]
+	and	ebp,ebx
+	xor	esi,DWORD [28+esp]
+	rol	esi,1
+	add	ebp,edi
+	ror	ebx,2
+	mov	edi,eax
+	rol	edi,5
+	mov	DWORD [40+esp],esi
+	lea	esi,[2400959708+ebp*1+esi]
+	mov	ebp,ecx
+	add	esi,edi
+	and	ebp,edx
+	mov	edi,DWORD [44+esp]
+	add	esi,ebp
+	; 40_59 43
+	mov	ebp,ebx
+	xor	edi,DWORD [52+esp]
+	xor	ebp,ecx
+	xor	edi,DWORD [12+esp]
+	and	ebp,eax
+	xor	edi,DWORD [32+esp]
+	rol	edi,1
+	add	ebp,edx
+	ror	eax,2
+	mov	edx,esi
+	rol	edx,5
+	mov	DWORD [44+esp],edi
+	lea	edi,[2400959708+ebp*1+edi]
+	mov	ebp,ebx
+	add	edi,edx
+	and	ebp,ecx
+	mov	edx,DWORD [48+esp]
+	add	edi,ebp
+	; 40_59 44
+	mov	ebp,eax
+	xor	edx,DWORD [56+esp]
+	xor	ebp,ebx
+	xor	edx,DWORD [16+esp]
+	and	ebp,esi
+	xor	edx,DWORD [36+esp]
+	rol	edx,1
+	add	ebp,ecx
+	ror	esi,2
+	mov	ecx,edi
+	rol	ecx,5
+	mov	DWORD [48+esp],edx
+	lea	edx,[2400959708+ebp*1+edx]
+	mov	ebp,eax
+	add	edx,ecx
+	and	ebp,ebx
+	mov	ecx,DWORD [52+esp]
+	add	edx,ebp
+	; 40_59 45
+	mov	ebp,esi
+	xor	ecx,DWORD [60+esp]
+	xor	ebp,eax
+	xor	ecx,DWORD [20+esp]
+	and	ebp,edi
+	xor	ecx,DWORD [40+esp]
+	rol	ecx,1
+	add	ebp,ebx
+	ror	edi,2
+	mov	ebx,edx
+	rol	ebx,5
+	mov	DWORD [52+esp],ecx
+	lea	ecx,[2400959708+ebp*1+ecx]
+	mov	ebp,esi
+	add	ecx,ebx
+	and	ebp,eax
+	mov	ebx,DWORD [56+esp]
+	add	ecx,ebp
+	; 40_59 46
+	mov	ebp,edi
+	xor	ebx,DWORD [esp]
+	xor	ebp,esi
+	xor	ebx,DWORD [24+esp]
+	and	ebp,edx
+	xor	ebx,DWORD [44+esp]
+	rol	ebx,1
+	add	ebp,eax
+	ror	edx,2
+	mov	eax,ecx
+	rol	eax,5
+	mov	DWORD [56+esp],ebx
+	lea	ebx,[2400959708+ebp*1+ebx]
+	mov	ebp,edi
+	add	ebx,eax
+	and	ebp,esi
+	mov	eax,DWORD [60+esp]
+	add	ebx,ebp
+	; 40_59 47
+	mov	ebp,edx
+	xor	eax,DWORD [4+esp]
+	xor	ebp,edi
+	xor	eax,DWORD [28+esp]
+	and	ebp,ecx
+	xor	eax,DWORD [48+esp]
+	rol	eax,1
+	add	ebp,esi
+	ror	ecx,2
+	mov	esi,ebx
+	rol	esi,5
+	mov	DWORD [60+esp],eax
+	lea	eax,[2400959708+ebp*1+eax]
+	mov	ebp,edx
+	add	eax,esi
+	and	ebp,edi
+	mov	esi,DWORD [esp]
+	add	eax,ebp
+	; 40_59 48
+	mov	ebp,ecx
+	xor	esi,DWORD [8+esp]
+	xor	ebp,edx
+	xor	esi,DWORD [32+esp]
+	and	ebp,ebx
+	xor	esi,DWORD [52+esp]
+	rol	esi,1
+	add	ebp,edi
+	ror	ebx,2
+	mov	edi,eax
+	rol	edi,5
+	mov	DWORD [esp],esi
+	lea	esi,[2400959708+ebp*1+esi]
+	mov	ebp,ecx
+	add	esi,edi
+	and	ebp,edx
+	mov	edi,DWORD [4+esp]
+	add	esi,ebp
+	; 40_59 49
+	mov	ebp,ebx
+	xor	edi,DWORD [12+esp]
+	xor	ebp,ecx
+	xor	edi,DWORD [36+esp]
+	and	ebp,eax
+	xor	edi,DWORD [56+esp]
+	rol	edi,1
+	add	ebp,edx
+	ror	eax,2
+	mov	edx,esi
+	rol	edx,5
+	mov	DWORD [4+esp],edi
+	lea	edi,[2400959708+ebp*1+edi]
+	mov	ebp,ebx
+	add	edi,edx
+	and	ebp,ecx
+	mov	edx,DWORD [8+esp]
+	add	edi,ebp
+	; 40_59 50
+	mov	ebp,eax
+	xor	edx,DWORD [16+esp]
+	xor	ebp,ebx
+	xor	edx,DWORD [40+esp]
+	and	ebp,esi
+	xor	edx,DWORD [60+esp]
+	rol	edx,1
+	add	ebp,ecx
+	ror	esi,2
+	mov	ecx,edi
+	rol	ecx,5
+	mov	DWORD [8+esp],edx
+	lea	edx,[2400959708+ebp*1+edx]
+	mov	ebp,eax
+	add	edx,ecx
+	and	ebp,ebx
+	mov	ecx,DWORD [12+esp]
+	add	edx,ebp
+	; 40_59 51
+	mov	ebp,esi
+	xor	ecx,DWORD [20+esp]
+	xor	ebp,eax
+	xor	ecx,DWORD [44+esp]
+	and	ebp,edi
+	xor	ecx,DWORD [esp]
+	rol	ecx,1
+	add	ebp,ebx
+	ror	edi,2
+	mov	ebx,edx
+	rol	ebx,5
+	mov	DWORD [12+esp],ecx
+	lea	ecx,[2400959708+ebp*1+ecx]
+	mov	ebp,esi
+	add	ecx,ebx
+	and	ebp,eax
+	mov	ebx,DWORD [16+esp]
+	add	ecx,ebp
+	; 40_59 52
+	mov	ebp,edi
+	xor	ebx,DWORD [24+esp]
+	xor	ebp,esi
+	xor	ebx,DWORD [48+esp]
+	and	ebp,edx
+	xor	ebx,DWORD [4+esp]
+	rol	ebx,1
+	add	ebp,eax
+	ror	edx,2
+	mov	eax,ecx
+	rol	eax,5
+	mov	DWORD [16+esp],ebx
+	lea	ebx,[2400959708+ebp*1+ebx]
+	mov	ebp,edi
+	add	ebx,eax
+	and	ebp,esi
+	mov	eax,DWORD [20+esp]
+	add	ebx,ebp
+	; 40_59 53
+	mov	ebp,edx
+	xor	eax,DWORD [28+esp]
+	xor	ebp,edi
+	xor	eax,DWORD [52+esp]
+	and	ebp,ecx
+	xor	eax,DWORD [8+esp]
+	rol	eax,1
+	add	ebp,esi
+	ror	ecx,2
+	mov	esi,ebx
+	rol	esi,5
+	mov	DWORD [20+esp],eax
+	lea	eax,[2400959708+ebp*1+eax]
+	mov	ebp,edx
+	add	eax,esi
+	and	ebp,edi
+	mov	esi,DWORD [24+esp]
+	add	eax,ebp
+	; 40_59 54
+	mov	ebp,ecx
+	xor	esi,DWORD [32+esp]
+	xor	ebp,edx
+	xor	esi,DWORD [56+esp]
+	and	ebp,ebx
+	xor	esi,DWORD [12+esp]
+	rol	esi,1
+	add	ebp,edi
+	ror	ebx,2
+	mov	edi,eax
+	rol	edi,5
+	mov	DWORD [24+esp],esi
+	lea	esi,[2400959708+ebp*1+esi]
+	mov	ebp,ecx
+	add	esi,edi
+	and	ebp,edx
+	mov	edi,DWORD [28+esp]
+	add	esi,ebp
+	; 40_59 55
+	mov	ebp,ebx
+	xor	edi,DWORD [36+esp]
+	xor	ebp,ecx
+	xor	edi,DWORD [60+esp]
+	and	ebp,eax
+	xor	edi,DWORD [16+esp]
+	rol	edi,1
+	add	ebp,edx
+	ror	eax,2
+	mov	edx,esi
+	rol	edx,5
+	mov	DWORD [28+esp],edi
+	lea	edi,[2400959708+ebp*1+edi]
+	mov	ebp,ebx
+	add	edi,edx
+	and	ebp,ecx
+	mov	edx,DWORD [32+esp]
+	add	edi,ebp
+	; 40_59 56
+	mov	ebp,eax
+	xor	edx,DWORD [40+esp]
+	xor	ebp,ebx
+	xor	edx,DWORD [esp]
+	and	ebp,esi
+	xor	edx,DWORD [20+esp]
+	rol	edx,1
+	add	ebp,ecx
+	ror	esi,2
+	mov	ecx,edi
+	rol	ecx,5
+	mov	DWORD [32+esp],edx
+	lea	edx,[2400959708+ebp*1+edx]
+	mov	ebp,eax
+	add	edx,ecx
+	and	ebp,ebx
+	mov	ecx,DWORD [36+esp]
+	add	edx,ebp
+	; 40_59 57
+	mov	ebp,esi
+	xor	ecx,DWORD [44+esp]
+	xor	ebp,eax
+	xor	ecx,DWORD [4+esp]
+	and	ebp,edi
+	xor	ecx,DWORD [24+esp]
+	rol	ecx,1
+	add	ebp,ebx
+	ror	edi,2
+	mov	ebx,edx
+	rol	ebx,5
+	mov	DWORD [36+esp],ecx
+	lea	ecx,[2400959708+ebp*1+ecx]
+	mov	ebp,esi
+	add	ecx,ebx
+	and	ebp,eax
+	mov	ebx,DWORD [40+esp]
+	add	ecx,ebp
+	; 40_59 58
+	mov	ebp,edi
+	xor	ebx,DWORD [48+esp]
+	xor	ebp,esi
+	xor	ebx,DWORD [8+esp]
+	and	ebp,edx
+	xor	ebx,DWORD [28+esp]
+	rol	ebx,1
+	add	ebp,eax
+	ror	edx,2
+	mov	eax,ecx
+	rol	eax,5
+	mov	DWORD [40+esp],ebx
+	lea	ebx,[2400959708+ebp*1+ebx]
+	mov	ebp,edi
+	add	ebx,eax
+	and	ebp,esi
+	mov	eax,DWORD [44+esp]
+	add	ebx,ebp
+	; 40_59 59
+	mov	ebp,edx
+	xor	eax,DWORD [52+esp]
+	xor	ebp,edi
+	xor	eax,DWORD [12+esp]
+	and	ebp,ecx
+	xor	eax,DWORD [32+esp]
+	rol	eax,1
+	add	ebp,esi
+	ror	ecx,2
+	mov	esi,ebx
+	rol	esi,5
+	mov	DWORD [44+esp],eax
+	lea	eax,[2400959708+ebp*1+eax]
+	mov	ebp,edx
+	add	eax,esi
+	and	ebp,edi
+	mov	esi,DWORD [48+esp]
+	add	eax,ebp
+	; 20_39 60
+	mov	ebp,ebx
+	xor	esi,DWORD [56+esp]
+	xor	ebp,ecx
+	xor	esi,DWORD [16+esp]
+	xor	ebp,edx
+	xor	esi,DWORD [36+esp]
+	rol	esi,1
+	add	edi,ebp
+	ror	ebx,2
+	mov	ebp,eax
+	rol	ebp,5
+	mov	DWORD [48+esp],esi
+	lea	esi,[3395469782+edi*1+esi]
+	mov	edi,DWORD [52+esp]
+	add	esi,ebp
+	; 20_39 61
+	mov	ebp,eax
+	xor	edi,DWORD [60+esp]
+	xor	ebp,ebx
+	xor	edi,DWORD [20+esp]
+	xor	ebp,ecx
+	xor	edi,DWORD [40+esp]
+	rol	edi,1
+	add	edx,ebp
+	ror	eax,2
+	mov	ebp,esi
+	rol	ebp,5
+	mov	DWORD [52+esp],edi
+	lea	edi,[3395469782+edx*1+edi]
+	mov	edx,DWORD [56+esp]
+	add	edi,ebp
+	; 20_39 62
+	mov	ebp,esi
+	xor	edx,DWORD [esp]
+	xor	ebp,eax
+	xor	edx,DWORD [24+esp]
+	xor	ebp,ebx
+	xor	edx,DWORD [44+esp]
+	rol	edx,1
+	add	ecx,ebp
+	ror	esi,2
+	mov	ebp,edi
+	rol	ebp,5
+	mov	DWORD [56+esp],edx
+	lea	edx,[3395469782+ecx*1+edx]
+	mov	ecx,DWORD [60+esp]
+	add	edx,ebp
+	; 20_39 63
+	mov	ebp,edi
+	xor	ecx,DWORD [4+esp]
+	xor	ebp,esi
+	xor	ecx,DWORD [28+esp]
+	xor	ebp,eax
+	xor	ecx,DWORD [48+esp]
+	rol	ecx,1
+	add	ebx,ebp
+	ror	edi,2
+	mov	ebp,edx
+	rol	ebp,5
+	mov	DWORD [60+esp],ecx
+	lea	ecx,[3395469782+ebx*1+ecx]
+	mov	ebx,DWORD [esp]
+	add	ecx,ebp
+	; 20_39 64
+	mov	ebp,edx
+	xor	ebx,DWORD [8+esp]
+	xor	ebp,edi
+	xor	ebx,DWORD [32+esp]
+	xor	ebp,esi
+	xor	ebx,DWORD [52+esp]
+	rol	ebx,1
+	add	eax,ebp
+	ror	edx,2
+	mov	ebp,ecx
+	rol	ebp,5
+	mov	DWORD [esp],ebx
+	lea	ebx,[3395469782+eax*1+ebx]
+	mov	eax,DWORD [4+esp]
+	add	ebx,ebp
+	; 20_39 65
+	mov	ebp,ecx
+	xor	eax,DWORD [12+esp]
+	xor	ebp,edx
+	xor	eax,DWORD [36+esp]
+	xor	ebp,edi
+	xor	eax,DWORD [56+esp]
+	rol	eax,1
+	add	esi,ebp
+	ror	ecx,2
+	mov	ebp,ebx
+	rol	ebp,5
+	mov	DWORD [4+esp],eax
+	lea	eax,[3395469782+esi*1+eax]
+	mov	esi,DWORD [8+esp]
+	add	eax,ebp
+	; 20_39 66
+	mov	ebp,ebx
+	xor	esi,DWORD [16+esp]
+	xor	ebp,ecx
+	xor	esi,DWORD [40+esp]
+	xor	ebp,edx
+	xor	esi,DWORD [60+esp]
+	rol	esi,1
+	add	edi,ebp
+	ror	ebx,2
+	mov	ebp,eax
+	rol	ebp,5
+	mov	DWORD [8+esp],esi
+	lea	esi,[3395469782+edi*1+esi]
+	mov	edi,DWORD [12+esp]
+	add	esi,ebp
+	; 20_39 67
+	mov	ebp,eax
+	xor	edi,DWORD [20+esp]
+	xor	ebp,ebx
+	xor	edi,DWORD [44+esp]
+	xor	ebp,ecx
+	xor	edi,DWORD [esp]
+	rol	edi,1
+	add	edx,ebp
+	ror	eax,2
+	mov	ebp,esi
+	rol	ebp,5
+	mov	DWORD [12+esp],edi
+	lea	edi,[3395469782+edx*1+edi]
+	mov	edx,DWORD [16+esp]
+	add	edi,ebp
+	; 20_39 68
+	mov	ebp,esi
+	xor	edx,DWORD [24+esp]
+	xor	ebp,eax
+	xor	edx,DWORD [48+esp]
+	xor	ebp,ebx
+	xor	edx,DWORD [4+esp]
+	rol	edx,1
+	add	ecx,ebp
+	ror	esi,2
+	mov	ebp,edi
+	rol	ebp,5
+	mov	DWORD [16+esp],edx
+	lea	edx,[3395469782+ecx*1+edx]
+	mov	ecx,DWORD [20+esp]
+	add	edx,ebp
+	; 20_39 69
+	mov	ebp,edi
+	xor	ecx,DWORD [28+esp]
+	xor	ebp,esi
+	xor	ecx,DWORD [52+esp]
+	xor	ebp,eax
+	xor	ecx,DWORD [8+esp]
+	rol	ecx,1
+	add	ebx,ebp
+	ror	edi,2
+	mov	ebp,edx
+	rol	ebp,5
+	mov	DWORD [20+esp],ecx
+	lea	ecx,[3395469782+ebx*1+ecx]
+	mov	ebx,DWORD [24+esp]
+	add	ecx,ebp
+	; 20_39 70
+	mov	ebp,edx
+	xor	ebx,DWORD [32+esp]
+	xor	ebp,edi
+	xor	ebx,DWORD [56+esp]
+	xor	ebp,esi
+	xor	ebx,DWORD [12+esp]
+	rol	ebx,1
+	add	eax,ebp
+	ror	edx,2
+	mov	ebp,ecx
+	rol	ebp,5
+	mov	DWORD [24+esp],ebx
+	lea	ebx,[3395469782+eax*1+ebx]
+	mov	eax,DWORD [28+esp]
+	add	ebx,ebp
+	; 20_39 71
+	mov	ebp,ecx
+	xor	eax,DWORD [36+esp]
+	xor	ebp,edx
+	xor	eax,DWORD [60+esp]
+	xor	ebp,edi
+	xor	eax,DWORD [16+esp]
+	rol	eax,1
+	add	esi,ebp
+	ror	ecx,2
+	mov	ebp,ebx
+	rol	ebp,5
+	mov	DWORD [28+esp],eax
+	lea	eax,[3395469782+esi*1+eax]
+	mov	esi,DWORD [32+esp]
+	add	eax,ebp
+	; 20_39 72
+	mov	ebp,ebx
+	xor	esi,DWORD [40+esp]
+	xor	ebp,ecx
+	xor	esi,DWORD [esp]
+	xor	ebp,edx
+	xor	esi,DWORD [20+esp]
+	rol	esi,1
+	add	edi,ebp
+	ror	ebx,2
+	mov	ebp,eax
+	rol	ebp,5
+	mov	DWORD [32+esp],esi
+	lea	esi,[3395469782+edi*1+esi]
+	mov	edi,DWORD [36+esp]
+	add	esi,ebp
+	; 20_39 73
+	mov	ebp,eax
+	xor	edi,DWORD [44+esp]
+	xor	ebp,ebx
+	xor	edi,DWORD [4+esp]
+	xor	ebp,ecx
+	xor	edi,DWORD [24+esp]
+	rol	edi,1
+	add	edx,ebp
+	ror	eax,2
+	mov	ebp,esi
+	rol	ebp,5
+	mov	DWORD [36+esp],edi
+	lea	edi,[3395469782+edx*1+edi]
+	mov	edx,DWORD [40+esp]
+	add	edi,ebp
+	; 20_39 74
+	mov	ebp,esi
+	xor	edx,DWORD [48+esp]
+	xor	ebp,eax
+	xor	edx,DWORD [8+esp]
+	xor	ebp,ebx
+	xor	edx,DWORD [28+esp]
+	rol	edx,1
+	add	ecx,ebp
+	ror	esi,2
+	mov	ebp,edi
+	rol	ebp,5
+	mov	DWORD [40+esp],edx
+	lea	edx,[3395469782+ecx*1+edx]
+	mov	ecx,DWORD [44+esp]
+	add	edx,ebp
+	; 20_39 75
+	mov	ebp,edi
+	xor	ecx,DWORD [52+esp]
+	xor	ebp,esi
+	xor	ecx,DWORD [12+esp]
+	xor	ebp,eax
+	xor	ecx,DWORD [32+esp]
+	rol	ecx,1
+	add	ebx,ebp
+	ror	edi,2
+	mov	ebp,edx
+	rol	ebp,5
+	mov	DWORD [44+esp],ecx
+	lea	ecx,[3395469782+ebx*1+ecx]
+	mov	ebx,DWORD [48+esp]
+	add	ecx,ebp
+	; 20_39 76
+	mov	ebp,edx
+	xor	ebx,DWORD [56+esp]
+	xor	ebp,edi
+	xor	ebx,DWORD [16+esp]
+	xor	ebp,esi
+	xor	ebx,DWORD [36+esp]
+	rol	ebx,1
+	add	eax,ebp
+	ror	edx,2
+	mov	ebp,ecx
+	rol	ebp,5
+	mov	DWORD [48+esp],ebx
+	lea	ebx,[3395469782+eax*1+ebx]
+	mov	eax,DWORD [52+esp]
+	add	ebx,ebp
+	; 20_39 77
+	mov	ebp,ecx
+	xor	eax,DWORD [60+esp]
+	xor	ebp,edx
+	xor	eax,DWORD [20+esp]
+	xor	ebp,edi
+	xor	eax,DWORD [40+esp]
+	rol	eax,1
+	add	esi,ebp
+	ror	ecx,2
+	mov	ebp,ebx
+	rol	ebp,5
+	lea	eax,[3395469782+esi*1+eax]
+	mov	esi,DWORD [56+esp]
+	add	eax,ebp
+	; 20_39 78
+	mov	ebp,ebx
+	xor	esi,DWORD [esp]
+	xor	ebp,ecx
+	xor	esi,DWORD [24+esp]
+	xor	ebp,edx
+	xor	esi,DWORD [44+esp]
+	rol	esi,1
+	add	edi,ebp
+	ror	ebx,2
+	mov	ebp,eax
+	rol	ebp,5
+	lea	esi,[3395469782+edi*1+esi]
+	mov	edi,DWORD [60+esp]
+	add	esi,ebp
+	; 20_39 79
+	mov	ebp,eax
+	xor	edi,DWORD [4+esp]
+	xor	ebp,ebx
+	xor	edi,DWORD [28+esp]
+	xor	ebp,ecx
+	xor	edi,DWORD [48+esp]
+	rol	edi,1
+	add	edx,ebp
+	ror	eax,2
+	mov	ebp,esi
+	rol	ebp,5
+	lea	edi,[3395469782+edx*1+edi]
+	add	edi,ebp
+	mov	ebp,DWORD [96+esp]
+	mov	edx,DWORD [100+esp]
+	add	edi,DWORD [ebp]
+	add	esi,DWORD [4+ebp]
+	add	eax,DWORD [8+ebp]
+	add	ebx,DWORD [12+ebp]
+	add	ecx,DWORD [16+ebp]
+	mov	DWORD [ebp],edi
+	add	edx,64
+	mov	DWORD [4+ebp],esi
+	cmp	edx,DWORD [104+esp]
+	mov	DWORD [8+ebp],eax
+	mov	edi,ecx
+	mov	DWORD [12+ebp],ebx
+	mov	esi,edx
+	mov	DWORD [16+ebp],ecx
+	jb	NEAR L$002loop
+	add	esp,76
+	pop	edi
+	pop	esi
+	pop	ebx
+	pop	ebp
+	ret
+align	16
+__sha1_block_data_order_ssse3:
+	push	ebp
+	push	ebx
+	push	esi
+	push	edi
+	call	L$003pic_point
+L$003pic_point:
+	pop	ebp
+	lea	ebp,[(L$K_XX_XX-L$003pic_point)+ebp]
+L$ssse3_shortcut:
+	movdqa	xmm7,[ebp]
+	movdqa	xmm0,[16+ebp]
+	movdqa	xmm1,[32+ebp]
+	movdqa	xmm2,[48+ebp]
+	movdqa	xmm6,[64+ebp]
+	mov	edi,DWORD [20+esp]
+	mov	ebp,DWORD [24+esp]
+	mov	edx,DWORD [28+esp]
+	mov	esi,esp
+	sub	esp,208
+	and	esp,-64
+	movdqa	[112+esp],xmm0
+	movdqa	[128+esp],xmm1
+	movdqa	[144+esp],xmm2
+	shl	edx,6
+	movdqa	[160+esp],xmm7
+	add	edx,ebp
+	movdqa	[176+esp],xmm6
+	add	ebp,64
+	mov	DWORD [192+esp],edi
+	mov	DWORD [196+esp],ebp
+	mov	DWORD [200+esp],edx
+	mov	DWORD [204+esp],esi
+	mov	eax,DWORD [edi]
+	mov	ebx,DWORD [4+edi]
+	mov	ecx,DWORD [8+edi]
+	mov	edx,DWORD [12+edi]
+	mov	edi,DWORD [16+edi]
+	mov	esi,ebx
+	movdqu	xmm0,[ebp-64]
+	movdqu	xmm1,[ebp-48]
+	movdqu	xmm2,[ebp-32]
+	movdqu	xmm3,[ebp-16]
+db	102,15,56,0,198
+db	102,15,56,0,206
+db	102,15,56,0,214
+	movdqa	[96+esp],xmm7
+db	102,15,56,0,222
+	paddd	xmm0,xmm7
+	paddd	xmm1,xmm7
+	paddd	xmm2,xmm7
+	movdqa	[esp],xmm0
+	psubd	xmm0,xmm7
+	movdqa	[16+esp],xmm1
+	psubd	xmm1,xmm7
+	movdqa	[32+esp],xmm2
+	mov	ebp,ecx
+	psubd	xmm2,xmm7
+	xor	ebp,edx
+	pshufd	xmm4,xmm0,238
+	and	esi,ebp
+	jmp	NEAR L$004loop
+align	16
+L$004loop:
+	ror	ebx,2
+	xor	esi,edx
+	mov	ebp,eax
+	punpcklqdq	xmm4,xmm1
+	movdqa	xmm6,xmm3
+	add	edi,DWORD [esp]
+	xor	ebx,ecx
+	paddd	xmm7,xmm3
+	movdqa	[64+esp],xmm0
+	rol	eax,5
+	add	edi,esi
+	psrldq	xmm6,4
+	and	ebp,ebx
+	xor	ebx,ecx
+	pxor	xmm4,xmm0
+	add	edi,eax
+	ror	eax,7
+	pxor	xmm6,xmm2
+	xor	ebp,ecx
+	mov	esi,edi
+	add	edx,DWORD [4+esp]
+	pxor	xmm4,xmm6
+	xor	eax,ebx
+	rol	edi,5
+	movdqa	[48+esp],xmm7
+	add	edx,ebp
+	and	esi,eax
+	movdqa	xmm0,xmm4
+	xor	eax,ebx
+	add	edx,edi
+	ror	edi,7
+	movdqa	xmm6,xmm4
+	xor	esi,ebx
+	pslldq	xmm0,12
+	paddd	xmm4,xmm4
+	mov	ebp,edx
+	add	ecx,DWORD [8+esp]
+	psrld	xmm6,31
+	xor	edi,eax
+	rol	edx,5
+	movdqa	xmm7,xmm0
+	add	ecx,esi
+	and	ebp,edi
+	xor	edi,eax
+	psrld	xmm0,30
+	add	ecx,edx
+	ror	edx,7
+	por	xmm4,xmm6
+	xor	ebp,eax
+	mov	esi,ecx
+	add	ebx,DWORD [12+esp]
+	pslld	xmm7,2
+	xor	edx,edi
+	rol	ecx,5
+	pxor	xmm4,xmm0
+	movdqa	xmm0,[96+esp]
+	add	ebx,ebp
+	and	esi,edx
+	pxor	xmm4,xmm7
+	pshufd	xmm5,xmm1,238
+	xor	edx,edi
+	add	ebx,ecx
+	ror	ecx,7
+	xor	esi,edi
+	mov	ebp,ebx
+	punpcklqdq	xmm5,xmm2
+	movdqa	xmm7,xmm4
+	add	eax,DWORD [16+esp]
+	xor	ecx,edx
+	paddd	xmm0,xmm4
+	movdqa	[80+esp],xmm1
+	rol	ebx,5
+	add	eax,esi
+	psrldq	xmm7,4
+	and	ebp,ecx
+	xor	ecx,edx
+	pxor	xmm5,xmm1
+	add	eax,ebx
+	ror	ebx,7
+	pxor	xmm7,xmm3
+	xor	ebp,edx
+	mov	esi,eax
+	add	edi,DWORD [20+esp]
+	pxor	xmm5,xmm7
+	xor	ebx,ecx
+	rol	eax,5
+	movdqa	[esp],xmm0
+	add	edi,ebp
+	and	esi,ebx
+	movdqa	xmm1,xmm5
+	xor	ebx,ecx
+	add	edi,eax
+	ror	eax,7
+	movdqa	xmm7,xmm5
+	xor	esi,ecx
+	pslldq	xmm1,12
+	paddd	xmm5,xmm5
+	mov	ebp,edi
+	add	edx,DWORD [24+esp]
+	psrld	xmm7,31
+	xor	eax,ebx
+	rol	edi,5
+	movdqa	xmm0,xmm1
+	add	edx,esi
+	and	ebp,eax
+	xor	eax,ebx
+	psrld	xmm1,30
+	add	edx,edi
+	ror	edi,7
+	por	xmm5,xmm7
+	xor	ebp,ebx
+	mov	esi,edx
+	add	ecx,DWORD [28+esp]
+	pslld	xmm0,2
+	xor	edi,eax
+	rol	edx,5
+	pxor	xmm5,xmm1
+	movdqa	xmm1,[112+esp]
+	add	ecx,ebp
+	and	esi,edi
+	pxor	xmm5,xmm0
+	pshufd	xmm6,xmm2,238
+	xor	edi,eax
+	add	ecx,edx
+	ror	edx,7
+	xor	esi,eax
+	mov	ebp,ecx
+	punpcklqdq	xmm6,xmm3
+	movdqa	xmm0,xmm5
+	add	ebx,DWORD [32+esp]
+	xor	edx,edi
+	paddd	xmm1,xmm5
+	movdqa	[96+esp],xmm2
+	rol	ecx,5
+	add	ebx,esi
+	psrldq	xmm0,4
+	and	ebp,edx
+	xor	edx,edi
+	pxor	xmm6,xmm2
+	add	ebx,ecx
+	ror	ecx,7
+	pxor	xmm0,xmm4
+	xor	ebp,edi
+	mov	esi,ebx
+	add	eax,DWORD [36+esp]
+	pxor	xmm6,xmm0
+	xor	ecx,edx
+	rol	ebx,5
+	movdqa	[16+esp],xmm1
+	add	eax,ebp
+	and	esi,ecx
+	movdqa	xmm2,xmm6
+	xor	ecx,edx
+	add	eax,ebx
+	ror	ebx,7
+	movdqa	xmm0,xmm6
+	xor	esi,edx
+	pslldq	xmm2,12
+	paddd	xmm6,xmm6
+	mov	ebp,eax
+	add	edi,DWORD [40+esp]
+	psrld	xmm0,31
+	xor	ebx,ecx
+	rol	eax,5
+	movdqa	xmm1,xmm2
+	add	edi,esi
+	and	ebp,ebx
+	xor	ebx,ecx
+	psrld	xmm2,30
+	add	edi,eax
+	ror	eax,7
+	por	xmm6,xmm0
+	xor	ebp,ecx
+	movdqa	xmm0,[64+esp]
+	mov	esi,edi
+	add	edx,DWORD [44+esp]
+	pslld	xmm1,2
+	xor	eax,ebx
+	rol	edi,5
+	pxor	xmm6,xmm2
+	movdqa	xmm2,[112+esp]
+	add	edx,ebp
+	and	esi,eax
+	pxor	xmm6,xmm1
+	pshufd	xmm7,xmm3,238
+	xor	eax,ebx
+	add	edx,edi
+	ror	edi,7
+	xor	esi,ebx
+	mov	ebp,edx
+	punpcklqdq	xmm7,xmm4
+	movdqa	xmm1,xmm6
+	add	ecx,DWORD [48+esp]
+	xor	edi,eax
+	paddd	xmm2,xmm6
+	movdqa	[64+esp],xmm3
+	rol	edx,5
+	add	ecx,esi
+	psrldq	xmm1,4
+	and	ebp,edi
+	xor	edi,eax
+	pxor	xmm7,xmm3
+	add	ecx,edx
+	ror	edx,7
+	pxor	xmm1,xmm5
+	xor	ebp,eax
+	mov	esi,ecx
+	add	ebx,DWORD [52+esp]
+	pxor	xmm7,xmm1
+	xor	edx,edi
+	rol	ecx,5
+	movdqa	[32+esp],xmm2
+	add	ebx,ebp
+	and	esi,edx
+	movdqa	xmm3,xmm7
+	xor	edx,edi
+	add	ebx,ecx
+	ror	ecx,7
+	movdqa	xmm1,xmm7
+	xor	esi,edi
+	pslldq	xmm3,12
+	paddd	xmm7,xmm7
+	mov	ebp,ebx
+	add	eax,DWORD [56+esp]
+	psrld	xmm1,31
+	xor	ecx,edx
+	rol	ebx,5
+	movdqa	xmm2,xmm3
+	add	eax,esi
+	and	ebp,ecx
+	xor	ecx,edx
+	psrld	xmm3,30
+	add	eax,ebx
+	ror	ebx,7
+	por	xmm7,xmm1
+	xor	ebp,edx
+	movdqa	xmm1,[80+esp]
+	mov	esi,eax
+	add	edi,DWORD [60+esp]
+	pslld	xmm2,2
+	xor	ebx,ecx
+	rol	eax,5
+	pxor	xmm7,xmm3
+	movdqa	xmm3,[112+esp]
+	add	edi,ebp
+	and	esi,ebx
+	pxor	xmm7,xmm2
+	pshufd	xmm2,xmm6,238
+	xor	ebx,ecx
+	add	edi,eax
+	ror	eax,7
+	pxor	xmm0,xmm4
+	punpcklqdq	xmm2,xmm7
+	xor	esi,ecx
+	mov	ebp,edi
+	add	edx,DWORD [esp]
+	pxor	xmm0,xmm1
+	movdqa	[80+esp],xmm4
+	xor	eax,ebx
+	rol	edi,5
+	movdqa	xmm4,xmm3
+	add	edx,esi
+	paddd	xmm3,xmm7
+	and	ebp,eax
+	pxor	xmm0,xmm2
+	xor	eax,ebx
+	add	edx,edi
+	ror	edi,7
+	xor	ebp,ebx
+	movdqa	xmm2,xmm0
+	movdqa	[48+esp],xmm3
+	mov	esi,edx
+	add	ecx,DWORD [4+esp]
+	xor	edi,eax
+	rol	edx,5
+	pslld	xmm0,2
+	add	ecx,ebp
+	and	esi,edi
+	psrld	xmm2,30
+	xor	edi,eax
+	add	ecx,edx
+	ror	edx,7
+	xor	esi,eax
+	mov	ebp,ecx
+	add	ebx,DWORD [8+esp]
+	xor	edx,edi
+	rol	ecx,5
+	por	xmm0,xmm2
+	add	ebx,esi
+	and	ebp,edx
+	movdqa	xmm2,[96+esp]
+	xor	edx,edi
+	add	ebx,ecx
+	add	eax,DWORD [12+esp]
+	xor	ebp,edi
+	mov	esi,ebx
+	pshufd	xmm3,xmm7,238
+	rol	ebx,5
+	add	eax,ebp
+	xor	esi,edx
+	ror	ecx,7
+	add	eax,ebx
+	add	edi,DWORD [16+esp]
+	pxor	xmm1,xmm5
+	punpcklqdq	xmm3,xmm0
+	xor	esi,ecx
+	mov	ebp,eax
+	rol	eax,5
+	pxor	xmm1,xmm2
+	movdqa	[96+esp],xmm5
+	add	edi,esi
+	xor	ebp,ecx
+	movdqa	xmm5,xmm4
+	ror	ebx,7
+	paddd	xmm4,xmm0
+	add	edi,eax
+	pxor	xmm1,xmm3
+	add	edx,DWORD [20+esp]
+	xor	ebp,ebx
+	mov	esi,edi
+	rol	edi,5
+	movdqa	xmm3,xmm1
+	movdqa	[esp],xmm4
+	add	edx,ebp
+	xor	esi,ebx
+	ror	eax,7
+	add	edx,edi
+	pslld	xmm1,2
+	add	ecx,DWORD [24+esp]
+	xor	esi,eax
+	psrld	xmm3,30
+	mov	ebp,edx
+	rol	edx,5
+	add	ecx,esi
+	xor	ebp,eax
+	ror	edi,7
+	add	ecx,edx
+	por	xmm1,xmm3
+	add	ebx,DWORD [28+esp]
+	xor	ebp,edi
+	movdqa	xmm3,[64+esp]
+	mov	esi,ecx
+	rol	ecx,5
+	add	ebx,ebp
+	xor	esi,edi
+	ror	edx,7
+	pshufd	xmm4,xmm0,238
+	add	ebx,ecx
+	add	eax,DWORD [32+esp]
+	pxor	xmm2,xmm6
+	punpcklqdq	xmm4,xmm1
+	xor	esi,edx
+	mov	ebp,ebx
+	rol	ebx,5
+	pxor	xmm2,xmm3
+	movdqa	[64+esp],xmm6
+	add	eax,esi
+	xor	ebp,edx
+	movdqa	xmm6,[128+esp]
+	ror	ecx,7
+	paddd	xmm5,xmm1
+	add	eax,ebx
+	pxor	xmm2,xmm4
+	add	edi,DWORD [36+esp]
+	xor	ebp,ecx
+	mov	esi,eax
+	rol	eax,5
+	movdqa	xmm4,xmm2
+	movdqa	[16+esp],xmm5
+	add	edi,ebp
+	xor	esi,ecx
+	ror	ebx,7
+	add	edi,eax
+	pslld	xmm2,2
+	add	edx,DWORD [40+esp]
+	xor	esi,ebx
+	psrld	xmm4,30
+	mov	ebp,edi
+	rol	edi,5
+	add	edx,esi
+	xor	ebp,ebx
+	ror	eax,7
+	add	edx,edi
+	por	xmm2,xmm4
+	add	ecx,DWORD [44+esp]
+	xor	ebp,eax
+	movdqa	xmm4,[80+esp]
+	mov	esi,edx
+	rol	edx,5
+	add	ecx,ebp
+	xor	esi,eax
+	ror	edi,7
+	pshufd	xmm5,xmm1,238
+	add	ecx,edx
+	add	ebx,DWORD [48+esp]
+	pxor	xmm3,xmm7
+	punpcklqdq	xmm5,xmm2
+	xor	esi,edi
+	mov	ebp,ecx
+	rol	ecx,5
+	pxor	xmm3,xmm4
+	movdqa	[80+esp],xmm7
+	add	ebx,esi
+	xor	ebp,edi
+	movdqa	xmm7,xmm6
+	ror	edx,7
+	paddd	xmm6,xmm2
+	add	ebx,ecx
+	pxor	xmm3,xmm5
+	add	eax,DWORD [52+esp]
+	xor	ebp,edx
+	mov	esi,ebx
+	rol	ebx,5
+	movdqa	xmm5,xmm3
+	movdqa	[32+esp],xmm6
+	add	eax,ebp
+	xor	esi,edx
+	ror	ecx,7
+	add	eax,ebx
+	pslld	xmm3,2
+	add	edi,DWORD [56+esp]
+	xor	esi,ecx
+	psrld	xmm5,30
+	mov	ebp,eax
+	rol	eax,5
+	add	edi,esi
+	xor	ebp,ecx
+	ror	ebx,7
+	add	edi,eax
+	por	xmm3,xmm5
+	add	edx,DWORD [60+esp]
+	xor	ebp,ebx
+	movdqa	xmm5,[96+esp]
+	mov	esi,edi
+	rol	edi,5
+	add	edx,ebp
+	xor	esi,ebx
+	ror	eax,7
+	pshufd	xmm6,xmm2,238
+	add	edx,edi
+	add	ecx,DWORD [esp]
+	pxor	xmm4,xmm0
+	punpcklqdq	xmm6,xmm3
+	xor	esi,eax
+	mov	ebp,edx
+	rol	edx,5
+	pxor	xmm4,xmm5
+	movdqa	[96+esp],xmm0
+	add	ecx,esi
+	xor	ebp,eax
+	movdqa	xmm0,xmm7
+	ror	edi,7
+	paddd	xmm7,xmm3
+	add	ecx,edx
+	pxor	xmm4,xmm6
+	add	ebx,DWORD [4+esp]
+	xor	ebp,edi
+	mov	esi,ecx
+	rol	ecx,5
+	movdqa	xmm6,xmm4
+	movdqa	[48+esp],xmm7
+	add	ebx,ebp
+	xor	esi,edi
+	ror	edx,7
+	add	ebx,ecx
+	pslld	xmm4,2
+	add	eax,DWORD [8+esp]
+	xor	esi,edx
+	psrld	xmm6,30
+	mov	ebp,ebx
+	rol	ebx,5
+	add	eax,esi
+	xor	ebp,edx
+	ror	ecx,7
+	add	eax,ebx
+	por	xmm4,xmm6
+	add	edi,DWORD [12+esp]
+	xor	ebp,ecx
+	movdqa	xmm6,[64+esp]
+	mov	esi,eax
+	rol	eax,5
+	add	edi,ebp
+	xor	esi,ecx
+	ror	ebx,7
+	pshufd	xmm7,xmm3,238
+	add	edi,eax
+	add	edx,DWORD [16+esp]
+	pxor	xmm5,xmm1
+	punpcklqdq	xmm7,xmm4
+	xor	esi,ebx
+	mov	ebp,edi
+	rol	edi,5
+	pxor	xmm5,xmm6
+	movdqa	[64+esp],xmm1
+	add	edx,esi
+	xor	ebp,ebx
+	movdqa	xmm1,xmm0
+	ror	eax,7
+	paddd	xmm0,xmm4
+	add	edx,edi
+	pxor	xmm5,xmm7
+	add	ecx,DWORD [20+esp]
+	xor	ebp,eax
+	mov	esi,edx
+	rol	edx,5
+	movdqa	xmm7,xmm5
+	movdqa	[esp],xmm0
+	add	ecx,ebp
+	xor	esi,eax
+	ror	edi,7
+	add	ecx,edx
+	pslld	xmm5,2
+	add	ebx,DWORD [24+esp]
+	xor	esi,edi
+	psrld	xmm7,30
+	mov	ebp,ecx
+	rol	ecx,5
+	add	ebx,esi
+	xor	ebp,edi
+	ror	edx,7
+	add	ebx,ecx
+	por	xmm5,xmm7
+	add	eax,DWORD [28+esp]
+	movdqa	xmm7,[80+esp]
+	ror	ecx,7
+	mov	esi,ebx
+	xor	ebp,edx
+	rol	ebx,5
+	pshufd	xmm0,xmm4,238
+	add	eax,ebp
+	xor	esi,ecx
+	xor	ecx,edx
+	add	eax,ebx
+	add	edi,DWORD [32+esp]
+	pxor	xmm6,xmm2
+	punpcklqdq	xmm0,xmm5
+	and	esi,ecx
+	xor	ecx,edx
+	ror	ebx,7
+	pxor	xmm6,xmm7
+	movdqa	[80+esp],xmm2
+	mov	ebp,eax
+	xor	esi,ecx
+	rol	eax,5
+	movdqa	xmm2,xmm1
+	add	edi,esi
+	paddd	xmm1,xmm5
+	xor	ebp,ebx
+	pxor	xmm6,xmm0
+	xor	ebx,ecx
+	add	edi,eax
+	add	edx,DWORD [36+esp]
+	and	ebp,ebx
+	movdqa	xmm0,xmm6
+	movdqa	[16+esp],xmm1
+	xor	ebx,ecx
+	ror	eax,7
+	mov	esi,edi
+	xor	ebp,ebx
+	rol	edi,5
+	pslld	xmm6,2
+	add	edx,ebp
+	xor	esi,eax
+	psrld	xmm0,30
+	xor	eax,ebx
+	add	edx,edi
+	add	ecx,DWORD [40+esp]
+	and	esi,eax
+	xor	eax,ebx
+	ror	edi,7
+	por	xmm6,xmm0
+	mov	ebp,edx
+	xor	esi,eax
+	movdqa	xmm0,[96+esp]
+	rol	edx,5
+	add	ecx,esi
+	xor	ebp,edi
+	xor	edi,eax
+	add	ecx,edx
+	pshufd	xmm1,xmm5,238
+	add	ebx,DWORD [44+esp]
+	and	ebp,edi
+	xor	edi,eax
+	ror	edx,7
+	mov	esi,ecx
+	xor	ebp,edi
+	rol	ecx,5
+	add	ebx,ebp
+	xor	esi,edx
+	xor	edx,edi
+	add	ebx,ecx
+	add	eax,DWORD [48+esp]
+	pxor	xmm7,xmm3
+	punpcklqdq	xmm1,xmm6
+	and	esi,edx
+	xor	edx,edi
+	ror	ecx,7
+	pxor	xmm7,xmm0
+	movdqa	[96+esp],xmm3
+	mov	ebp,ebx
+	xor	esi,edx
+	rol	ebx,5
+	movdqa	xmm3,[144+esp]
+	add	eax,esi
+	paddd	xmm2,xmm6
+	xor	ebp,ecx
+	pxor	xmm7,xmm1
+	xor	ecx,edx
+	add	eax,ebx
+	add	edi,DWORD [52+esp]
+	and	ebp,ecx
+	movdqa	xmm1,xmm7
+	movdqa	[32+esp],xmm2
+	xor	ecx,edx
+	ror	ebx,7
+	mov	esi,eax
+	xor	ebp,ecx
+	rol	eax,5
+	pslld	xmm7,2
+	add	edi,ebp
+	xor	esi,ebx
+	psrld	xmm1,30
+	xor	ebx,ecx
+	add	edi,eax
+	add	edx,DWORD [56+esp]
+	and	esi,ebx
+	xor	ebx,ecx
+	ror	eax,7
+	por	xmm7,xmm1
+	mov	ebp,edi
+	xor	esi,ebx
+	movdqa	xmm1,[64+esp]
+	rol	edi,5
+	add	edx,esi
+	xor	ebp,eax
+	xor	eax,ebx
+	add	edx,edi
+	pshufd	xmm2,xmm6,238
+	add	ecx,DWORD [60+esp]
+	and	ebp,eax
+	xor	eax,ebx
+	ror	edi,7
+	mov	esi,edx
+	xor	ebp,eax
+	rol	edx,5
+	add	ecx,ebp
+	xor	esi,edi
+	xor	edi,eax
+	add	ecx,edx
+	add	ebx,DWORD [esp]
+	pxor	xmm0,xmm4
+	punpcklqdq	xmm2,xmm7
+	and	esi,edi
+	xor	edi,eax
+	ror	edx,7
+	pxor	xmm0,xmm1
+	movdqa	[64+esp],xmm4
+	mov	ebp,ecx
+	xor	esi,edi
+	rol	ecx,5
+	movdqa	xmm4,xmm3
+	add	ebx,esi
+	paddd	xmm3,xmm7
+	xor	ebp,edx
+	pxor	xmm0,xmm2
+	xor	edx,edi
+	add	ebx,ecx
+	add	eax,DWORD [4+esp]
+	and	ebp,edx
+	movdqa	xmm2,xmm0
+	movdqa	[48+esp],xmm3
+	xor	edx,edi
+	ror	ecx,7
+	mov	esi,ebx
+	xor	ebp,edx
+	rol	ebx,5
+	pslld	xmm0,2
+	add	eax,ebp
+	xor	esi,ecx
+	psrld	xmm2,30
+	xor	ecx,edx
+	add	eax,ebx
+	add	edi,DWORD [8+esp]
+	and	esi,ecx
+	xor	ecx,edx
+	ror	ebx,7
+	por	xmm0,xmm2
+	mov	ebp,eax
+	xor	esi,ecx
+	movdqa	xmm2,[80+esp]
+	rol	eax,5
+	add	edi,esi
+	xor	ebp,ebx
+	xor	ebx,ecx
+	add	edi,eax
+	pshufd	xmm3,xmm7,238
+	add	edx,DWORD [12+esp]
+	and	ebp,ebx
+	xor	ebx,ecx
+	ror	eax,7
+	mov	esi,edi
+	xor	ebp,ebx
+	rol	edi,5
+	add	edx,ebp
+	xor	esi,eax
+	xor	eax,ebx
+	add	edx,edi
+	add	ecx,DWORD [16+esp]
+	pxor	xmm1,xmm5
+	punpcklqdq	xmm3,xmm0
+	and	esi,eax
+	xor	eax,ebx
+	ror	edi,7
+	pxor	xmm1,xmm2
+	movdqa	[80+esp],xmm5
+	mov	ebp,edx
+	xor	esi,eax
+	rol	edx,5
+	movdqa	xmm5,xmm4
+	add	ecx,esi
+	paddd	xmm4,xmm0
+	xor	ebp,edi
+	pxor	xmm1,xmm3
+	xor	edi,eax
+	add	ecx,edx
+	add	ebx,DWORD [20+esp]
+	and	ebp,edi
+	movdqa	xmm3,xmm1
+	movdqa	[esp],xmm4
+	xor	edi,eax
+	ror	edx,7
+	mov	esi,ecx
+	xor	ebp,edi
+	rol	ecx,5
+	pslld	xmm1,2
+	add	ebx,ebp
+	xor	esi,edx
+	psrld	xmm3,30
+	xor	edx,edi
+	add	ebx,ecx
+	add	eax,DWORD [24+esp]
+	and	esi,edx
+	xor	edx,edi
+	ror	ecx,7
+	por	xmm1,xmm3
+	mov	ebp,ebx
+	xor	esi,edx
+	movdqa	xmm3,[96+esp]
+	rol	ebx,5
+	add	eax,esi
+	xor	ebp,ecx
+	xor	ecx,edx
+	add	eax,ebx
+	pshufd	xmm4,xmm0,238
+	add	edi,DWORD [28+esp]
+	and	ebp,ecx
+	xor	ecx,edx
+	ror	ebx,7
+	mov	esi,eax
+	xor	ebp,ecx
+	rol	eax,5
+	add	edi,ebp
+	xor	esi,ebx
+	xor	ebx,ecx
+	add	edi,eax
+	add	edx,DWORD [32+esp]
+	pxor	xmm2,xmm6
+	punpcklqdq	xmm4,xmm1
+	and	esi,ebx
+	xor	ebx,ecx
+	ror	eax,7
+	pxor	xmm2,xmm3
+	movdqa	[96+esp],xmm6
+	mov	ebp,edi
+	xor	esi,ebx
+	rol	edi,5
+	movdqa	xmm6,xmm5
+	add	edx,esi
+	paddd	xmm5,xmm1
+	xor	ebp,eax
+	pxor	xmm2,xmm4
+	xor	eax,ebx
+	add	edx,edi
+	add	ecx,DWORD [36+esp]
+	and	ebp,eax
+	movdqa	xmm4,xmm2
+	movdqa	[16+esp],xmm5
+	xor	eax,ebx
+	ror	edi,7
+	mov	esi,edx
+	xor	ebp,eax
+	rol	edx,5
+	pslld	xmm2,2
+	add	ecx,ebp
+	xor	esi,edi
+	psrld	xmm4,30
+	xor	edi,eax
+	add	ecx,edx
+	add	ebx,DWORD [40+esp]
+	and	esi,edi
+	xor	edi,eax
+	ror	edx,7
+	por	xmm2,xmm4
+	mov	ebp,ecx
+	xor	esi,edi
+	movdqa	xmm4,[64+esp]
+	rol	ecx,5
+	add	ebx,esi
+	xor	ebp,edx
+	xor	edx,edi
+	add	ebx,ecx
+	pshufd	xmm5,xmm1,238
+	add	eax,DWORD [44+esp]
+	and	ebp,edx
+	xor	edx,edi
+	ror	ecx,7
+	mov	esi,ebx
+	xor	ebp,edx
+	rol	ebx,5
+	add	eax,ebp
+	xor	esi,edx
+	add	eax,ebx
+	add	edi,DWORD [48+esp]
+	pxor	xmm3,xmm7
+	punpcklqdq	xmm5,xmm2
+	xor	esi,ecx
+	mov	ebp,eax
+	rol	eax,5
+	pxor	xmm3,xmm4
+	movdqa	[64+esp],xmm7
+	add	edi,esi
+	xor	ebp,ecx
+	movdqa	xmm7,xmm6
+	ror	ebx,7
+	paddd	xmm6,xmm2
+	add	edi,eax
+	pxor	xmm3,xmm5
+	add	edx,DWORD [52+esp]
+	xor	ebp,ebx
+	mov	esi,edi
+	rol	edi,5
+	movdqa	xmm5,xmm3
+	movdqa	[32+esp],xmm6
+	add	edx,ebp
+	xor	esi,ebx
+	ror	eax,7
+	add	edx,edi
+	pslld	xmm3,2
+	add	ecx,DWORD [56+esp]
+	xor	esi,eax
+	psrld	xmm5,30
+	mov	ebp,edx
+	rol	edx,5
+	add	ecx,esi
+	xor	ebp,eax
+	ror	edi,7
+	add	ecx,edx
+	por	xmm3,xmm5
+	add	ebx,DWORD [60+esp]
+	xor	ebp,edi
+	mov	esi,ecx
+	rol	ecx,5
+	add	ebx,ebp
+	xor	esi,edi
+	ror	edx,7
+	add	ebx,ecx
+	add	eax,DWORD [esp]
+	xor	esi,edx
+	mov	ebp,ebx
+	rol	ebx,5
+	add	eax,esi
+	xor	ebp,edx
+	ror	ecx,7
+	paddd	xmm7,xmm3
+	add	eax,ebx
+	add	edi,DWORD [4+esp]
+	xor	ebp,ecx
+	mov	esi,eax
+	movdqa	[48+esp],xmm7
+	rol	eax,5
+	add	edi,ebp
+	xor	esi,ecx
+	ror	ebx,7
+	add	edi,eax
+	add	edx,DWORD [8+esp]
+	xor	esi,ebx
+	mov	ebp,edi
+	rol	edi,5
+	add	edx,esi
+	xor	ebp,ebx
+	ror	eax,7
+	add	edx,edi
+	add	ecx,DWORD [12+esp]
+	xor	ebp,eax
+	mov	esi,edx
+	rol	edx,5
+	add	ecx,ebp
+	xor	esi,eax
+	ror	edi,7
+	add	ecx,edx
+	mov	ebp,DWORD [196+esp]
+	cmp	ebp,DWORD [200+esp]
+	je	NEAR L$005done
+	movdqa	xmm7,[160+esp]
+	movdqa	xmm6,[176+esp]
+	movdqu	xmm0,[ebp]
+	movdqu	xmm1,[16+ebp]
+	movdqu	xmm2,[32+ebp]
+	movdqu	xmm3,[48+ebp]
+	add	ebp,64
+db	102,15,56,0,198
+	mov	DWORD [196+esp],ebp
+	movdqa	[96+esp],xmm7
+	add	ebx,DWORD [16+esp]
+	xor	esi,edi
+	mov	ebp,ecx
+	rol	ecx,5
+	add	ebx,esi
+	xor	ebp,edi
+	ror	edx,7
+db	102,15,56,0,206
+	add	ebx,ecx
+	add	eax,DWORD [20+esp]
+	xor	ebp,edx
+	mov	esi,ebx
+	paddd	xmm0,xmm7
+	rol	ebx,5
+	add	eax,ebp
+	xor	esi,edx
+	ror	ecx,7
+	movdqa	[esp],xmm0
+	add	eax,ebx
+	add	edi,DWORD [24+esp]
+	xor	esi,ecx
+	mov	ebp,eax
+	psubd	xmm0,xmm7
+	rol	eax,5
+	add	edi,esi
+	xor	ebp,ecx
+	ror	ebx,7
+	add	edi,eax
+	add	edx,DWORD [28+esp]
+	xor	ebp,ebx
+	mov	esi,edi
+	rol	edi,5
+	add	edx,ebp
+	xor	esi,ebx
+	ror	eax,7
+	add	edx,edi
+	add	ecx,DWORD [32+esp]
+	xor	esi,eax
+	mov	ebp,edx
+	rol	edx,5
+	add	ecx,esi
+	xor	ebp,eax
+	ror	edi,7
+db	102,15,56,0,214
+	add	ecx,edx
+	add	ebx,DWORD [36+esp]
+	xor	ebp,edi
+	mov	esi,ecx
+	paddd	xmm1,xmm7
+	rol	ecx,5
+	add	ebx,ebp
+	xor	esi,edi
+	ror	edx,7
+	movdqa	[16+esp],xmm1
+	add	ebx,ecx
+	add	eax,DWORD [40+esp]
+	xor	esi,edx
+	mov	ebp,ebx
+	psubd	xmm1,xmm7
+	rol	ebx,5
+	add	eax,esi
+	xor	ebp,edx
+	ror	ecx,7
+	add	eax,ebx
+	add	edi,DWORD [44+esp]
+	xor	ebp,ecx
+	mov	esi,eax
+	rol	eax,5
+	add	edi,ebp
+	xor	esi,ecx
+	ror	ebx,7
+	add	edi,eax
+	add	edx,DWORD [48+esp]
+	xor	esi,ebx
+	mov	ebp,edi
+	rol	edi,5
+	add	edx,esi
+	xor	ebp,ebx
+	ror	eax,7
+db	102,15,56,0,222
+	add	edx,edi
+	add	ecx,DWORD [52+esp]
+	xor	ebp,eax
+	mov	esi,edx
+	paddd	xmm2,xmm7
+	rol	edx,5
+	add	ecx,ebp
+	xor	esi,eax
+	ror	edi,7
+	movdqa	[32+esp],xmm2
+	add	ecx,edx
+	add	ebx,DWORD [56+esp]
+	xor	esi,edi
+	mov	ebp,ecx
+	psubd	xmm2,xmm7
+	rol	ecx,5
+	add	ebx,esi
+	xor	ebp,edi
+	ror	edx,7
+	add	ebx,ecx
+	add	eax,DWORD [60+esp]
+	xor	ebp,edx
+	mov	esi,ebx
+	rol	ebx,5
+	add	eax,ebp
+	ror	ecx,7
+	add	eax,ebx
+	mov	ebp,DWORD [192+esp]
+	add	eax,DWORD [ebp]
+	add	esi,DWORD [4+ebp]
+	add	ecx,DWORD [8+ebp]
+	mov	DWORD [ebp],eax
+	add	edx,DWORD [12+ebp]
+	mov	DWORD [4+ebp],esi
+	add	edi,DWORD [16+ebp]
+	mov	DWORD [8+ebp],ecx
+	mov	ebx,ecx
+	mov	DWORD [12+ebp],edx
+	xor	ebx,edx
+	mov	DWORD [16+ebp],edi
+	mov	ebp,esi
+	pshufd	xmm4,xmm0,238
+	and	esi,ebx
+	mov	ebx,ebp
+	jmp	NEAR L$004loop
+align	16
+L$005done:
+	add	ebx,DWORD [16+esp]
+	xor	esi,edi
+	mov	ebp,ecx
+	rol	ecx,5
+	add	ebx,esi
+	xor	ebp,edi
+	ror	edx,7
+	add	ebx,ecx
+	add	eax,DWORD [20+esp]
+	xor	ebp,edx
+	mov	esi,ebx
+	rol	ebx,5
+	add	eax,ebp
+	xor	esi,edx
+	ror	ecx,7
+	add	eax,ebx
+	add	edi,DWORD [24+esp]
+	xor	esi,ecx
+	mov	ebp,eax
+	rol	eax,5
+	add	edi,esi
+	xor	ebp,ecx
+	ror	ebx,7
+	add	edi,eax
+	add	edx,DWORD [28+esp]
+	xor	ebp,ebx
+	mov	esi,edi
+	rol	edi,5
+	add	edx,ebp
+	xor	esi,ebx
+	ror	eax,7
+	add	edx,edi
+	add	ecx,DWORD [32+esp]
+	xor	esi,eax
+	mov	ebp,edx
+	rol	edx,5
+	add	ecx,esi
+	xor	ebp,eax
+	ror	edi,7
+	add	ecx,edx
+	add	ebx,DWORD [36+esp]
+	xor	ebp,edi
+	mov	esi,ecx
+	rol	ecx,5
+	add	ebx,ebp
+	xor	esi,edi
+	ror	edx,7
+	add	ebx,ecx
+	add	eax,DWORD [40+esp]
+	xor	esi,edx
+	mov	ebp,ebx
+	rol	ebx,5
+	add	eax,esi
+	xor	ebp,edx
+	ror	ecx,7
+	add	eax,ebx
+	add	edi,DWORD [44+esp]
+	xor	ebp,ecx
+	mov	esi,eax
+	rol	eax,5
+	add	edi,ebp
+	xor	esi,ecx
+	ror	ebx,7
+	add	edi,eax
+	add	edx,DWORD [48+esp]
+	xor	esi,ebx
+	mov	ebp,edi
+	rol	edi,5
+	add	edx,esi
+	xor	ebp,ebx
+	ror	eax,7
+	add	edx,edi
+	add	ecx,DWORD [52+esp]
+	xor	ebp,eax
+	mov	esi,edx
+	rol	edx,5
+	add	ecx,ebp
+	xor	esi,eax
+	ror	edi,7
+	add	ecx,edx
+	add	ebx,DWORD [56+esp]
+	xor	esi,edi
+	mov	ebp,ecx
+	rol	ecx,5
+	add	ebx,esi
+	xor	ebp,edi
+	ror	edx,7
+	add	ebx,ecx
+	add	eax,DWORD [60+esp]
+	xor	ebp,edx
+	mov	esi,ebx
+	rol	ebx,5
+	add	eax,ebp
+	ror	ecx,7
+	add	eax,ebx
+	mov	ebp,DWORD [192+esp]
+	add	eax,DWORD [ebp]
+	mov	esp,DWORD [204+esp]
+	add	esi,DWORD [4+ebp]
+	add	ecx,DWORD [8+ebp]
+	mov	DWORD [ebp],eax
+	add	edx,DWORD [12+ebp]
+	mov	DWORD [4+ebp],esi
+	add	edi,DWORD [16+ebp]
+	mov	DWORD [8+ebp],ecx
+	mov	DWORD [12+ebp],edx
+	mov	DWORD [16+ebp],edi
+	pop	edi
+	pop	esi
+	pop	ebx
+	pop	ebp
+	ret
+align	16
+__sha1_block_data_order_avx:
+	push	ebp
+	push	ebx
+	push	esi
+	push	edi
+	call	L$006pic_point
+L$006pic_point:
+	pop	ebp
+	lea	ebp,[(L$K_XX_XX-L$006pic_point)+ebp]
+L$avx_shortcut:
+	vzeroall
+	vmovdqa	xmm7,[ebp]
+	vmovdqa	xmm0,[16+ebp]
+	vmovdqa	xmm1,[32+ebp]
+	vmovdqa	xmm2,[48+ebp]
+	vmovdqa	xmm6,[64+ebp]
+	mov	edi,DWORD [20+esp]
+	mov	ebp,DWORD [24+esp]
+	mov	edx,DWORD [28+esp]
+	mov	esi,esp
+	sub	esp,208
+	and	esp,-64
+	vmovdqa	[112+esp],xmm0
+	vmovdqa	[128+esp],xmm1
+	vmovdqa	[144+esp],xmm2
+	shl	edx,6
+	vmovdqa	[160+esp],xmm7
+	add	edx,ebp
+	vmovdqa	[176+esp],xmm6
+	add	ebp,64
+	mov	DWORD [192+esp],edi
+	mov	DWORD [196+esp],ebp
+	mov	DWORD [200+esp],edx
+	mov	DWORD [204+esp],esi
+	mov	eax,DWORD [edi]
+	mov	ebx,DWORD [4+edi]
+	mov	ecx,DWORD [8+edi]
+	mov	edx,DWORD [12+edi]
+	mov	edi,DWORD [16+edi]
+	mov	esi,ebx
+	vmovdqu	xmm0,[ebp-64]
+	vmovdqu	xmm1,[ebp-48]
+	vmovdqu	xmm2,[ebp-32]
+	vmovdqu	xmm3,[ebp-16]
+	vpshufb	xmm0,xmm0,xmm6
+	vpshufb	xmm1,xmm1,xmm6
+	vpshufb	xmm2,xmm2,xmm6
+	vmovdqa	[96+esp],xmm7
+	vpshufb	xmm3,xmm3,xmm6
+	vpaddd	xmm4,xmm0,xmm7
+	vpaddd	xmm5,xmm1,xmm7
+	vpaddd	xmm6,xmm2,xmm7
+	vmovdqa	[esp],xmm4
+	mov	ebp,ecx
+	vmovdqa	[16+esp],xmm5
+	xor	ebp,edx
+	vmovdqa	[32+esp],xmm6
+	and	esi,ebp
+	jmp	NEAR L$007loop
+align	16
+L$007loop:
+	shrd	ebx,ebx,2
+	xor	esi,edx
+	vpalignr	xmm4,xmm1,xmm0,8
+	mov	ebp,eax
+	add	edi,DWORD [esp]
+	vpaddd	xmm7,xmm7,xmm3
+	vmovdqa	[64+esp],xmm0
+	xor	ebx,ecx
+	shld	eax,eax,5
+	vpsrldq	xmm6,xmm3,4
+	add	edi,esi
+	and	ebp,ebx
+	vpxor	xmm4,xmm4,xmm0
+	xor	ebx,ecx
+	add	edi,eax
+	vpxor	xmm6,xmm6,xmm2
+	shrd	eax,eax,7
+	xor	ebp,ecx
+	vmovdqa	[48+esp],xmm7
+	mov	esi,edi
+	add	edx,DWORD [4+esp]
+	vpxor	xmm4,xmm4,xmm6
+	xor	eax,ebx
+	shld	edi,edi,5
+	add	edx,ebp
+	and	esi,eax
+	vpsrld	xmm6,xmm4,31
+	xor	eax,ebx
+	add	edx,edi
+	shrd	edi,edi,7
+	xor	esi,ebx
+	vpslldq	xmm0,xmm4,12
+	vpaddd	xmm4,xmm4,xmm4
+	mov	ebp,edx
+	add	ecx,DWORD [8+esp]
+	xor	edi,eax
+	shld	edx,edx,5
+	vpsrld	xmm7,xmm0,30
+	vpor	xmm4,xmm4,xmm6
+	add	ecx,esi
+	and	ebp,edi
+	xor	edi,eax
+	add	ecx,edx
+	vpslld	xmm0,xmm0,2
+	shrd	edx,edx,7
+	xor	ebp,eax
+	vpxor	xmm4,xmm4,xmm7
+	mov	esi,ecx
+	add	ebx,DWORD [12+esp]
+	xor	edx,edi
+	shld	ecx,ecx,5
+	vpxor	xmm4,xmm4,xmm0
+	add	ebx,ebp
+	and	esi,edx
+	vmovdqa	xmm0,[96+esp]
+	xor	edx,edi
+	add	ebx,ecx
+	shrd	ecx,ecx,7
+	xor	esi,edi
+	vpalignr	xmm5,xmm2,xmm1,8
+	mov	ebp,ebx
+	add	eax,DWORD [16+esp]
+	vpaddd	xmm0,xmm0,xmm4
+	vmovdqa	[80+esp],xmm1
+	xor	ecx,edx
+	shld	ebx,ebx,5
+	vpsrldq	xmm7,xmm4,4
+	add	eax,esi
+	and	ebp,ecx
+	vpxor	xmm5,xmm5,xmm1
+	xor	ecx,edx
+	add	eax,ebx
+	vpxor	xmm7,xmm7,xmm3
+	shrd	ebx,ebx,7
+	xor	ebp,edx
+	vmovdqa	[esp],xmm0
+	mov	esi,eax
+	add	edi,DWORD [20+esp]
+	vpxor	xmm5,xmm5,xmm7
+	xor	ebx,ecx
+	shld	eax,eax,5
+	add	edi,ebp
+	and	esi,ebx
+	vpsrld	xmm7,xmm5,31
+	xor	ebx,ecx
+	add	edi,eax
+	shrd	eax,eax,7
+	xor	esi,ecx
+	vpslldq	xmm1,xmm5,12
+	vpaddd	xmm5,xmm5,xmm5
+	mov	ebp,edi
+	add	edx,DWORD [24+esp]
+	xor	eax,ebx
+	shld	edi,edi,5
+	vpsrld	xmm0,xmm1,30
+	vpor	xmm5,xmm5,xmm7
+	add	edx,esi
+	and	ebp,eax
+	xor	eax,ebx
+	add	edx,edi
+	vpslld	xmm1,xmm1,2
+	shrd	edi,edi,7
+	xor	ebp,ebx
+	vpxor	xmm5,xmm5,xmm0
+	mov	esi,edx
+	add	ecx,DWORD [28+esp]
+	xor	edi,eax
+	shld	edx,edx,5
+	vpxor	xmm5,xmm5,xmm1
+	add	ecx,ebp
+	and	esi,edi
+	vmovdqa	xmm1,[112+esp]
+	xor	edi,eax
+	add	ecx,edx
+	shrd	edx,edx,7
+	xor	esi,eax
+	vpalignr	xmm6,xmm3,xmm2,8
+	mov	ebp,ecx
+	add	ebx,DWORD [32+esp]
+	vpaddd	xmm1,xmm1,xmm5
+	vmovdqa	[96+esp],xmm2
+	xor	edx,edi
+	shld	ecx,ecx,5
+	vpsrldq	xmm0,xmm5,4
+	add	ebx,esi
+	and	ebp,edx
+	vpxor	xmm6,xmm6,xmm2
+	xor	edx,edi
+	add	ebx,ecx
+	vpxor	xmm0,xmm0,xmm4
+	shrd	ecx,ecx,7
+	xor	ebp,edi
+	vmovdqa	[16+esp],xmm1
+	mov	esi,ebx
+	add	eax,DWORD [36+esp]
+	vpxor	xmm6,xmm6,xmm0
+	xor	ecx,edx
+	shld	ebx,ebx,5
+	add	eax,ebp
+	and	esi,ecx
+	vpsrld	xmm0,xmm6,31
+	xor	ecx,edx
+	add	eax,ebx
+	shrd	ebx,ebx,7
+	xor	esi,edx
+	vpslldq	xmm2,xmm6,12
+	vpaddd	xmm6,xmm6,xmm6
+	mov	ebp,eax
+	add	edi,DWORD [40+esp]
+	xor	ebx,ecx
+	shld	eax,eax,5
+	vpsrld	xmm1,xmm2,30
+	vpor	xmm6,xmm6,xmm0
+	add	edi,esi
+	and	ebp,ebx
+	xor	ebx,ecx
+	add	edi,eax
+	vpslld	xmm2,xmm2,2
+	vmovdqa	xmm0,[64+esp]
+	shrd	eax,eax,7
+	xor	ebp,ecx
+	vpxor	xmm6,xmm6,xmm1
+	mov	esi,edi
+	add	edx,DWORD [44+esp]
+	xor	eax,ebx
+	shld	edi,edi,5
+	vpxor	xmm6,xmm6,xmm2
+	add	edx,ebp
+	and	esi,eax
+	vmovdqa	xmm2,[112+esp]
+	xor	eax,ebx
+	add	edx,edi
+	shrd	edi,edi,7
+	xor	esi,ebx
+	vpalignr	xmm7,xmm4,xmm3,8
+	mov	ebp,edx
+	add	ecx,DWORD [48+esp]
+	vpaddd	xmm2,xmm2,xmm6
+	vmovdqa	[64+esp],xmm3
+	xor	edi,eax
+	shld	edx,edx,5
+	vpsrldq	xmm1,xmm6,4
+	add	ecx,esi
+	and	ebp,edi
+	vpxor	xmm7,xmm7,xmm3
+	xor	edi,eax
+	add	ecx,edx
+	vpxor	xmm1,xmm1,xmm5
+	shrd	edx,edx,7
+	xor	ebp,eax
+	vmovdqa	[32+esp],xmm2
+	mov	esi,ecx
+	add	ebx,DWORD [52+esp]
+	vpxor	xmm7,xmm7,xmm1
+	xor	edx,edi
+	shld	ecx,ecx,5
+	add	ebx,ebp
+	and	esi,edx
+	vpsrld	xmm1,xmm7,31
+	xor	edx,edi
+	add	ebx,ecx
+	shrd	ecx,ecx,7
+	xor	esi,edi
+	vpslldq	xmm3,xmm7,12
+	vpaddd	xmm7,xmm7,xmm7
+	mov	ebp,ebx
+	add	eax,DWORD [56+esp]
+	xor	ecx,edx
+	shld	ebx,ebx,5
+	vpsrld	xmm2,xmm3,30
+	vpor	xmm7,xmm7,xmm1
+	add	eax,esi
+	and	ebp,ecx
+	xor	ecx,edx
+	add	eax,ebx
+	vpslld	xmm3,xmm3,2
+	vmovdqa	xmm1,[80+esp]
+	shrd	ebx,ebx,7
+	xor	ebp,edx
+	vpxor	xmm7,xmm7,xmm2
+	mov	esi,eax
+	add	edi,DWORD [60+esp]
+	xor	ebx,ecx
+	shld	eax,eax,5
+	vpxor	xmm7,xmm7,xmm3
+	add	edi,ebp
+	and	esi,ebx
+	vmovdqa	xmm3,[112+esp]
+	xor	ebx,ecx
+	add	edi,eax
+	vpalignr	xmm2,xmm7,xmm6,8
+	vpxor	xmm0,xmm0,xmm4
+	shrd	eax,eax,7
+	xor	esi,ecx
+	mov	ebp,edi
+	add	edx,DWORD [esp]
+	vpxor	xmm0,xmm0,xmm1
+	vmovdqa	[80+esp],xmm4
+	xor	eax,ebx
+	shld	edi,edi,5
+	vmovdqa	xmm4,xmm3
+	vpaddd	xmm3,xmm3,xmm7
+	add	edx,esi
+	and	ebp,eax
+	vpxor	xmm0,xmm0,xmm2
+	xor	eax,ebx
+	add	edx,edi
+	shrd	edi,edi,7
+	xor	ebp,ebx
+	vpsrld	xmm2,xmm0,30
+	vmovdqa	[48+esp],xmm3
+	mov	esi,edx
+	add	ecx,DWORD [4+esp]
+	xor	edi,eax
+	shld	edx,edx,5
+	vpslld	xmm0,xmm0,2
+	add	ecx,ebp
+	and	esi,edi
+	xor	edi,eax
+	add	ecx,edx
+	shrd	edx,edx,7
+	xor	esi,eax
+	mov	ebp,ecx
+	add	ebx,DWORD [8+esp]
+	vpor	xmm0,xmm0,xmm2
+	xor	edx,edi
+	shld	ecx,ecx,5
+	vmovdqa	xmm2,[96+esp]
+	add	ebx,esi
+	and	ebp,edx
+	xor	edx,edi
+	add	ebx,ecx
+	add	eax,DWORD [12+esp]
+	xor	ebp,edi
+	mov	esi,ebx
+	shld	ebx,ebx,5
+	add	eax,ebp
+	xor	esi,edx
+	shrd	ecx,ecx,7
+	add	eax,ebx
+	vpalignr	xmm3,xmm0,xmm7,8
+	vpxor	xmm1,xmm1,xmm5
+	add	edi,DWORD [16+esp]
+	xor	esi,ecx
+	mov	ebp,eax
+	shld	eax,eax,5
+	vpxor	xmm1,xmm1,xmm2
+	vmovdqa	[96+esp],xmm5
+	add	edi,esi
+	xor	ebp,ecx
+	vmovdqa	xmm5,xmm4
+	vpaddd	xmm4,xmm4,xmm0
+	shrd	ebx,ebx,7
+	add	edi,eax
+	vpxor	xmm1,xmm1,xmm3
+	add	edx,DWORD [20+esp]
+	xor	ebp,ebx
+	mov	esi,edi
+	shld	edi,edi,5
+	vpsrld	xmm3,xmm1,30
+	vmovdqa	[esp],xmm4
+	add	edx,ebp
+	xor	esi,ebx
+	shrd	eax,eax,7
+	add	edx,edi
+	vpslld	xmm1,xmm1,2
+	add	ecx,DWORD [24+esp]
+	xor	esi,eax
+	mov	ebp,edx
+	shld	edx,edx,5
+	add	ecx,esi
+	xor	ebp,eax
+	shrd	edi,edi,7
+	add	ecx,edx
+	vpor	xmm1,xmm1,xmm3
+	add	ebx,DWORD [28+esp]
+	xor	ebp,edi
+	vmovdqa	xmm3,[64+esp]
+	mov	esi,ecx
+	shld	ecx,ecx,5
+	add	ebx,ebp
+	xor	esi,edi
+	shrd	edx,edx,7
+	add	ebx,ecx
+	vpalignr	xmm4,xmm1,xmm0,8
+	vpxor	xmm2,xmm2,xmm6
+	add	eax,DWORD [32+esp]
+	xor	esi,edx
+	mov	ebp,ebx
+	shld	ebx,ebx,5
+	vpxor	xmm2,xmm2,xmm3
+	vmovdqa	[64+esp],xmm6
+	add	eax,esi
+	xor	ebp,edx
+	vmovdqa	xmm6,[128+esp]
+	vpaddd	xmm5,xmm5,xmm1
+	shrd	ecx,ecx,7
+	add	eax,ebx
+	vpxor	xmm2,xmm2,xmm4
+	add	edi,DWORD [36+esp]
+	xor	ebp,ecx
+	mov	esi,eax
+	shld	eax,eax,5
+	vpsrld	xmm4,xmm2,30
+	vmovdqa	[16+esp],xmm5
+	add	edi,ebp
+	xor	esi,ecx
+	shrd	ebx,ebx,7
+	add	edi,eax
+	vpslld	xmm2,xmm2,2
+	add	edx,DWORD [40+esp]
+	xor	esi,ebx
+	mov	ebp,edi
+	shld	edi,edi,5
+	add	edx,esi
+	xor	ebp,ebx
+	shrd	eax,eax,7
+	add	edx,edi
+	vpor	xmm2,xmm2,xmm4
+	add	ecx,DWORD [44+esp]
+	xor	ebp,eax
+	vmovdqa	xmm4,[80+esp]
+	mov	esi,edx
+	shld	edx,edx,5
+	add	ecx,ebp
+	xor	esi,eax
+	shrd	edi,edi,7
+	add	ecx,edx
+	vpalignr	xmm5,xmm2,xmm1,8
+	vpxor	xmm3,xmm3,xmm7
+	add	ebx,DWORD [48+esp]
+	xor	esi,edi
+	mov	ebp,ecx
+	shld	ecx,ecx,5
+	vpxor	xmm3,xmm3,xmm4
+	vmovdqa	[80+esp],xmm7
+	add	ebx,esi
+	xor	ebp,edi
+	vmovdqa	xmm7,xmm6
+	vpaddd	xmm6,xmm6,xmm2
+	shrd	edx,edx,7
+	add	ebx,ecx
+	vpxor	xmm3,xmm3,xmm5
+	add	eax,DWORD [52+esp]
+	xor	ebp,edx
+	mov	esi,ebx
+	shld	ebx,ebx,5
+	vpsrld	xmm5,xmm3,30
+	vmovdqa	[32+esp],xmm6
+	add	eax,ebp
+	xor	esi,edx
+	shrd	ecx,ecx,7
+	add	eax,ebx
+	vpslld	xmm3,xmm3,2
+	add	edi,DWORD [56+esp]
+	xor	esi,ecx
+	mov	ebp,eax
+	shld	eax,eax,5
+	add	edi,esi
+	xor	ebp,ecx
+	shrd	ebx,ebx,7
+	add	edi,eax
+	vpor	xmm3,xmm3,xmm5
+	add	edx,DWORD [60+esp]
+	xor	ebp,ebx
+	vmovdqa	xmm5,[96+esp]
+	mov	esi,edi
+	shld	edi,edi,5
+	add	edx,ebp
+	xor	esi,ebx
+	shrd	eax,eax,7
+	add	edx,edi
+	vpalignr	xmm6,xmm3,xmm2,8
+	vpxor	xmm4,xmm4,xmm0
+	add	ecx,DWORD [esp]
+	xor	esi,eax
+	mov	ebp,edx
+	shld	edx,edx,5
+	vpxor	xmm4,xmm4,xmm5
+	vmovdqa	[96+esp],xmm0
+	add	ecx,esi
+	xor	ebp,eax
+	vmovdqa	xmm0,xmm7
+	vpaddd	xmm7,xmm7,xmm3
+	shrd	edi,edi,7
+	add	ecx,edx
+	vpxor	xmm4,xmm4,xmm6
+	add	ebx,DWORD [4+esp]
+	xor	ebp,edi
+	mov	esi,ecx
+	shld	ecx,ecx,5
+	vpsrld	xmm6,xmm4,30
+	vmovdqa	[48+esp],xmm7
+	add	ebx,ebp
+	xor	esi,edi
+	shrd	edx,edx,7
+	add	ebx,ecx
+	vpslld	xmm4,xmm4,2
+	add	eax,DWORD [8+esp]
+	xor	esi,edx
+	mov	ebp,ebx
+	shld	ebx,ebx,5
+	add	eax,esi
+	xor	ebp,edx
+	shrd	ecx,ecx,7
+	add	eax,ebx
+	vpor	xmm4,xmm4,xmm6
+	add	edi,DWORD [12+esp]
+	xor	ebp,ecx
+	vmovdqa	xmm6,[64+esp]
+	mov	esi,eax
+	shld	eax,eax,5
+	add	edi,ebp
+	xor	esi,ecx
+	shrd	ebx,ebx,7
+	add	edi,eax
+	vpalignr	xmm7,xmm4,xmm3,8
+	vpxor	xmm5,xmm5,xmm1
+	add	edx,DWORD [16+esp]
+	xor	esi,ebx
+	mov	ebp,edi
+	shld	edi,edi,5
+	vpxor	xmm5,xmm5,xmm6
+	vmovdqa	[64+esp],xmm1
+	add	edx,esi
+	xor	ebp,ebx
+	vmovdqa	xmm1,xmm0
+	vpaddd	xmm0,xmm0,xmm4
+	shrd	eax,eax,7
+	add	edx,edi
+	vpxor	xmm5,xmm5,xmm7
+	add	ecx,DWORD [20+esp]
+	xor	ebp,eax
+	mov	esi,edx
+	shld	edx,edx,5
+	vpsrld	xmm7,xmm5,30
+	vmovdqa	[esp],xmm0
+	add	ecx,ebp
+	xor	esi,eax
+	shrd	edi,edi,7
+	add	ecx,edx
+	vpslld	xmm5,xmm5,2
+	add	ebx,DWORD [24+esp]
+	xor	esi,edi
+	mov	ebp,ecx
+	shld	ecx,ecx,5
+	add	ebx,esi
+	xor	ebp,edi
+	shrd	edx,edx,7
+	add	ebx,ecx
+	vpor	xmm5,xmm5,xmm7
+	add	eax,DWORD [28+esp]
+	vmovdqa	xmm7,[80+esp]
+	shrd	ecx,ecx,7
+	mov	esi,ebx
+	xor	ebp,edx
+	shld	ebx,ebx,5
+	add	eax,ebp
+	xor	esi,ecx
+	xor	ecx,edx
+	add	eax,ebx
+	vpalignr	xmm0,xmm5,xmm4,8
+	vpxor	xmm6,xmm6,xmm2
+	add	edi,DWORD [32+esp]
+	and	esi,ecx
+	xor	ecx,edx
+	shrd	ebx,ebx,7
+	vpxor	xmm6,xmm6,xmm7
+	vmovdqa	[80+esp],xmm2
+	mov	ebp,eax
+	xor	esi,ecx
+	vmovdqa	xmm2,xmm1
+	vpaddd	xmm1,xmm1,xmm5
+	shld	eax,eax,5
+	add	edi,esi
+	vpxor	xmm6,xmm6,xmm0
+	xor	ebp,ebx
+	xor	ebx,ecx
+	add	edi,eax
+	add	edx,DWORD [36+esp]
+	vpsrld	xmm0,xmm6,30
+	vmovdqa	[16+esp],xmm1
+	and	ebp,ebx
+	xor	ebx,ecx
+	shrd	eax,eax,7
+	mov	esi,edi
+	vpslld	xmm6,xmm6,2
+	xor	ebp,ebx
+	shld	edi,edi,5
+	add	edx,ebp
+	xor	esi,eax
+	xor	eax,ebx
+	add	edx,edi
+	add	ecx,DWORD [40+esp]
+	and	esi,eax
+	vpor	xmm6,xmm6,xmm0
+	xor	eax,ebx
+	shrd	edi,edi,7
+	vmovdqa	xmm0,[96+esp]
+	mov	ebp,edx
+	xor	esi,eax
+	shld	edx,edx,5
+	add	ecx,esi
+	xor	ebp,edi
+	xor	edi,eax
+	add	ecx,edx
+	add	ebx,DWORD [44+esp]
+	and	ebp,edi
+	xor	edi,eax
+	shrd	edx,edx,7
+	mov	esi,ecx
+	xor	ebp,edi
+	shld	ecx,ecx,5
+	add	ebx,ebp
+	xor	esi,edx
+	xor	edx,edi
+	add	ebx,ecx
+	vpalignr	xmm1,xmm6,xmm5,8
+	vpxor	xmm7,xmm7,xmm3
+	add	eax,DWORD [48+esp]
+	and	esi,edx
+	xor	edx,edi
+	shrd	ecx,ecx,7
+	vpxor	xmm7,xmm7,xmm0
+	vmovdqa	[96+esp],xmm3
+	mov	ebp,ebx
+	xor	esi,edx
+	vmovdqa	xmm3,[144+esp]
+	vpaddd	xmm2,xmm2,xmm6
+	shld	ebx,ebx,5
+	add	eax,esi
+	vpxor	xmm7,xmm7,xmm1
+	xor	ebp,ecx
+	xor	ecx,edx
+	add	eax,ebx
+	add	edi,DWORD [52+esp]
+	vpsrld	xmm1,xmm7,30
+	vmovdqa	[32+esp],xmm2
+	and	ebp,ecx
+	xor	ecx,edx
+	shrd	ebx,ebx,7
+	mov	esi,eax
+	vpslld	xmm7,xmm7,2
+	xor	ebp,ecx
+	shld	eax,eax,5
+	add	edi,ebp
+	xor	esi,ebx
+	xor	ebx,ecx
+	add	edi,eax
+	add	edx,DWORD [56+esp]
+	and	esi,ebx
+	vpor	xmm7,xmm7,xmm1
+	xor	ebx,ecx
+	shrd	eax,eax,7
+	vmovdqa	xmm1,[64+esp]
+	mov	ebp,edi
+	xor	esi,ebx
+	shld	edi,edi,5
+	add	edx,esi
+	xor	ebp,eax
+	xor	eax,ebx
+	add	edx,edi
+	add	ecx,DWORD [60+esp]
+	and	ebp,eax
+	xor	eax,ebx
+	shrd	edi,edi,7
+	mov	esi,edx
+	xor	ebp,eax
+	shld	edx,edx,5
+	add	ecx,ebp
+	xor	esi,edi
+	xor	edi,eax
+	add	ecx,edx
+	vpalignr	xmm2,xmm7,xmm6,8
+	vpxor	xmm0,xmm0,xmm4
+	add	ebx,DWORD [esp]
+	and	esi,edi
+	xor	edi,eax
+	shrd	edx,edx,7
+	vpxor	xmm0,xmm0,xmm1
+	vmovdqa	[64+esp],xmm4
+	mov	ebp,ecx
+	xor	esi,edi
+	vmovdqa	xmm4,xmm3
+	vpaddd	xmm3,xmm3,xmm7
+	shld	ecx,ecx,5
+	add	ebx,esi
+	vpxor	xmm0,xmm0,xmm2
+	xor	ebp,edx
+	xor	edx,edi
+	add	ebx,ecx
+	add	eax,DWORD [4+esp]
+	vpsrld	xmm2,xmm0,30
+	vmovdqa	[48+esp],xmm3
+	and	ebp,edx
+	xor	edx,edi
+	shrd	ecx,ecx,7
+	mov	esi,ebx
+	vpslld	xmm0,xmm0,2
+	xor	ebp,edx
+	shld	ebx,ebx,5
+	add	eax,ebp
+	xor	esi,ecx
+	xor	ecx,edx
+	add	eax,ebx
+	add	edi,DWORD [8+esp]
+	and	esi,ecx
+	vpor	xmm0,xmm0,xmm2
+	xor	ecx,edx
+	shrd	ebx,ebx,7
+	vmovdqa	xmm2,[80+esp]
+	mov	ebp,eax
+	xor	esi,ecx
+	shld	eax,eax,5
+	add	edi,esi
+	xor	ebp,ebx
+	xor	ebx,ecx
+	add	edi,eax
+	add	edx,DWORD [12+esp]
+	and	ebp,ebx
+	xor	ebx,ecx
+	shrd	eax,eax,7
+	mov	esi,edi
+	xor	ebp,ebx
+	shld	edi,edi,5
+	add	edx,ebp
+	xor	esi,eax
+	xor	eax,ebx
+	add	edx,edi
+	vpalignr	xmm3,xmm0,xmm7,8
+	vpxor	xmm1,xmm1,xmm5
+	add	ecx,DWORD [16+esp]
+	and	esi,eax
+	xor	eax,ebx
+	shrd	edi,edi,7
+	vpxor	xmm1,xmm1,xmm2
+	vmovdqa	[80+esp],xmm5
+	mov	ebp,edx
+	xor	esi,eax
+	vmovdqa	xmm5,xmm4
+	vpaddd	xmm4,xmm4,xmm0
+	shld	edx,edx,5
+	add	ecx,esi
+	vpxor	xmm1,xmm1,xmm3
+	xor	ebp,edi
+	xor	edi,eax
+	add	ecx,edx
+	add	ebx,DWORD [20+esp]
+	vpsrld	xmm3,xmm1,30
+	vmovdqa	[esp],xmm4
+	and	ebp,edi
+	xor	edi,eax
+	shrd	edx,edx,7
+	mov	esi,ecx
+	vpslld	xmm1,xmm1,2
+	xor	ebp,edi
+	shld	ecx,ecx,5
+	add	ebx,ebp
+	xor	esi,edx
+	xor	edx,edi
+	add	ebx,ecx
+	add	eax,DWORD [24+esp]
+	and	esi,edx
+	vpor	xmm1,xmm1,xmm3
+	xor	edx,edi
+	shrd	ecx,ecx,7
+	vmovdqa	xmm3,[96+esp]
+	mov	ebp,ebx
+	xor	esi,edx
+	shld	ebx,ebx,5
+	add	eax,esi
+	xor	ebp,ecx
+	xor	ecx,edx
+	add	eax,ebx
+	add	edi,DWORD [28+esp]
+	and	ebp,ecx
+	xor	ecx,edx
+	shrd	ebx,ebx,7
+	mov	esi,eax
+	xor	ebp,ecx
+	shld	eax,eax,5
+	add	edi,ebp
+	xor	esi,ebx
+	xor	ebx,ecx
+	add	edi,eax
+	vpalignr	xmm4,xmm1,xmm0,8
+	vpxor	xmm2,xmm2,xmm6
+	add	edx,DWORD [32+esp]
+	and	esi,ebx
+	xor	ebx,ecx
+	shrd	eax,eax,7
+	vpxor	xmm2,xmm2,xmm3
+	vmovdqa	[96+esp],xmm6
+	mov	ebp,edi
+	xor	esi,ebx
+	vmovdqa	xmm6,xmm5
+	vpaddd	xmm5,xmm5,xmm1
+	shld	edi,edi,5
+	add	edx,esi
+	vpxor	xmm2,xmm2,xmm4
+	xor	ebp,eax
+	xor	eax,ebx
+	add	edx,edi
+	add	ecx,DWORD [36+esp]
+	vpsrld	xmm4,xmm2,30
+	vmovdqa	[16+esp],xmm5
+	and	ebp,eax
+	xor	eax,ebx
+	shrd	edi,edi,7
+	mov	esi,edx
+	vpslld	xmm2,xmm2,2
+	xor	ebp,eax
+	shld	edx,edx,5
+	add	ecx,ebp
+	xor	esi,edi
+	xor	edi,eax
+	add	ecx,edx
+	add	ebx,DWORD [40+esp]
+	and	esi,edi
+	vpor	xmm2,xmm2,xmm4
+	xor	edi,eax
+	shrd	edx,edx,7
+	vmovdqa	xmm4,[64+esp]
+	mov	ebp,ecx
+	xor	esi,edi
+	shld	ecx,ecx,5
+	add	ebx,esi
+	xor	ebp,edx
+	xor	edx,edi
+	add	ebx,ecx
+	add	eax,DWORD [44+esp]
+	and	ebp,edx
+	xor	edx,edi
+	shrd	ecx,ecx,7
+	mov	esi,ebx
+	xor	ebp,edx
+	shld	ebx,ebx,5
+	add	eax,ebp
+	xor	esi,edx
+	add	eax,ebx
+	vpalignr	xmm5,xmm2,xmm1,8
+	vpxor	xmm3,xmm3,xmm7
+	add	edi,DWORD [48+esp]
+	xor	esi,ecx
+	mov	ebp,eax
+	shld	eax,eax,5
+	vpxor	xmm3,xmm3,xmm4
+	vmovdqa	[64+esp],xmm7
+	add	edi,esi
+	xor	ebp,ecx
+	vmovdqa	xmm7,xmm6
+	vpaddd	xmm6,xmm6,xmm2
+	shrd	ebx,ebx,7
+	add	edi,eax
+	vpxor	xmm3,xmm3,xmm5
+	add	edx,DWORD [52+esp]
+	xor	ebp,ebx
+	mov	esi,edi
+	shld	edi,edi,5
+	vpsrld	xmm5,xmm3,30
+	vmovdqa	[32+esp],xmm6
+	add	edx,ebp
+	xor	esi,ebx
+	shrd	eax,eax,7
+	add	edx,edi
+	vpslld	xmm3,xmm3,2
+	add	ecx,DWORD [56+esp]
+	xor	esi,eax
+	mov	ebp,edx
+	shld	edx,edx,5
+	add	ecx,esi
+	xor	ebp,eax
+	shrd	edi,edi,7
+	add	ecx,edx
+	vpor	xmm3,xmm3,xmm5
+	add	ebx,DWORD [60+esp]
+	xor	ebp,edi
+	mov	esi,ecx
+	shld	ecx,ecx,5
+	add	ebx,ebp
+	xor	esi,edi
+	shrd	edx,edx,7
+	add	ebx,ecx
+	add	eax,DWORD [esp]
+	vpaddd	xmm7,xmm7,xmm3
+	xor	esi,edx
+	mov	ebp,ebx
+	shld	ebx,ebx,5
+	add	eax,esi
+	vmovdqa	[48+esp],xmm7
+	xor	ebp,edx
+	shrd	ecx,ecx,7
+	add	eax,ebx
+	add	edi,DWORD [4+esp]
+	xor	ebp,ecx
+	mov	esi,eax
+	shld	eax,eax,5
+	add	edi,ebp
+	xor	esi,ecx
+	shrd	ebx,ebx,7
+	add	edi,eax
+	add	edx,DWORD [8+esp]
+	xor	esi,ebx
+	mov	ebp,edi
+	shld	edi,edi,5
+	add	edx,esi
+	xor	ebp,ebx
+	shrd	eax,eax,7
+	add	edx,edi
+	add	ecx,DWORD [12+esp]
+	xor	ebp,eax
+	mov	esi,edx
+	shld	edx,edx,5
+	add	ecx,ebp
+	xor	esi,eax
+	shrd	edi,edi,7
+	add	ecx,edx
+	mov	ebp,DWORD [196+esp]
+	cmp	ebp,DWORD [200+esp]
+	je	NEAR L$008done
+	vmovdqa	xmm7,[160+esp]
+	vmovdqa	xmm6,[176+esp]
+	vmovdqu	xmm0,[ebp]
+	vmovdqu	xmm1,[16+ebp]
+	vmovdqu	xmm2,[32+ebp]
+	vmovdqu	xmm3,[48+ebp]
+	add	ebp,64
+	vpshufb	xmm0,xmm0,xmm6
+	mov	DWORD [196+esp],ebp
+	vmovdqa	[96+esp],xmm7
+	add	ebx,DWORD [16+esp]
+	xor	esi,edi
+	vpshufb	xmm1,xmm1,xmm6
+	mov	ebp,ecx
+	shld	ecx,ecx,5
+	vpaddd	xmm4,xmm0,xmm7
+	add	ebx,esi
+	xor	ebp,edi
+	shrd	edx,edx,7
+	add	ebx,ecx
+	vmovdqa	[esp],xmm4
+	add	eax,DWORD [20+esp]
+	xor	ebp,edx
+	mov	esi,ebx
+	shld	ebx,ebx,5
+	add	eax,ebp
+	xor	esi,edx
+	shrd	ecx,ecx,7
+	add	eax,ebx
+	add	edi,DWORD [24+esp]
+	xor	esi,ecx
+	mov	ebp,eax
+	shld	eax,eax,5
+	add	edi,esi
+	xor	ebp,ecx
+	shrd	ebx,ebx,7
+	add	edi,eax
+	add	edx,DWORD [28+esp]
+	xor	ebp,ebx
+	mov	esi,edi
+	shld	edi,edi,5
+	add	edx,ebp
+	xor	esi,ebx
+	shrd	eax,eax,7
+	add	edx,edi
+	add	ecx,DWORD [32+esp]
+	xor	esi,eax
+	vpshufb	xmm2,xmm2,xmm6
+	mov	ebp,edx
+	shld	edx,edx,5
+	vpaddd	xmm5,xmm1,xmm7
+	add	ecx,esi
+	xor	ebp,eax
+	shrd	edi,edi,7
+	add	ecx,edx
+	vmovdqa	[16+esp],xmm5
+	add	ebx,DWORD [36+esp]
+	xor	ebp,edi
+	mov	esi,ecx
+	shld	ecx,ecx,5
+	add	ebx,ebp
+	xor	esi,edi
+	shrd	edx,edx,7
+	add	ebx,ecx
+	add	eax,DWORD [40+esp]
+	xor	esi,edx
+	mov	ebp,ebx
+	shld	ebx,ebx,5
+	add	eax,esi
+	xor	ebp,edx
+	shrd	ecx,ecx,7
+	add	eax,ebx
+	add	edi,DWORD [44+esp]
+	xor	ebp,ecx
+	mov	esi,eax
+	shld	eax,eax,5
+	add	edi,ebp
+	xor	esi,ecx
+	shrd	ebx,ebx,7
+	add	edi,eax
+	add	edx,DWORD [48+esp]
+	xor	esi,ebx
+	vpshufb	xmm3,xmm3,xmm6
+	mov	ebp,edi
+	shld	edi,edi,5
+	vpaddd	xmm6,xmm2,xmm7
+	add	edx,esi
+	xor	ebp,ebx
+	shrd	eax,eax,7
+	add	edx,edi
+	vmovdqa	[32+esp],xmm6
+	add	ecx,DWORD [52+esp]
+	xor	ebp,eax
+	mov	esi,edx
+	shld	edx,edx,5
+	add	ecx,ebp
+	xor	esi,eax
+	shrd	edi,edi,7
+	add	ecx,edx
+	add	ebx,DWORD [56+esp]
+	xor	esi,edi
+	mov	ebp,ecx
+	shld	ecx,ecx,5
+	add	ebx,esi
+	xor	ebp,edi
+	shrd	edx,edx,7
+	add	ebx,ecx
+	add	eax,DWORD [60+esp]
+	xor	ebp,edx
+	mov	esi,ebx
+	shld	ebx,ebx,5
+	add	eax,ebp
+	shrd	ecx,ecx,7
+	add	eax,ebx
+	mov	ebp,DWORD [192+esp]
+	add	eax,DWORD [ebp]
+	add	esi,DWORD [4+ebp]
+	add	ecx,DWORD [8+ebp]
+	mov	DWORD [ebp],eax
+	add	edx,DWORD [12+ebp]
+	mov	DWORD [4+ebp],esi
+	add	edi,DWORD [16+ebp]
+	mov	ebx,ecx
+	mov	DWORD [8+ebp],ecx
+	xor	ebx,edx
+	mov	DWORD [12+ebp],edx
+	mov	DWORD [16+ebp],edi
+	mov	ebp,esi
+	and	esi,ebx
+	mov	ebx,ebp
+	jmp	NEAR L$007loop
+align	16
+L$008done:
+	add	ebx,DWORD [16+esp]
+	xor	esi,edi
+	mov	ebp,ecx
+	shld	ecx,ecx,5
+	add	ebx,esi
+	xor	ebp,edi
+	shrd	edx,edx,7
+	add	ebx,ecx
+	add	eax,DWORD [20+esp]
+	xor	ebp,edx
+	mov	esi,ebx
+	shld	ebx,ebx,5
+	add	eax,ebp
+	xor	esi,edx
+	shrd	ecx,ecx,7
+	add	eax,ebx
+	add	edi,DWORD [24+esp]
+	xor	esi,ecx
+	mov	ebp,eax
+	shld	eax,eax,5
+	add	edi,esi
+	xor	ebp,ecx
+	shrd	ebx,ebx,7
+	add	edi,eax
+	add	edx,DWORD [28+esp]
+	xor	ebp,ebx
+	mov	esi,edi
+	shld	edi,edi,5
+	add	edx,ebp
+	xor	esi,ebx
+	shrd	eax,eax,7
+	add	edx,edi
+	add	ecx,DWORD [32+esp]
+	xor	esi,eax
+	mov	ebp,edx
+	shld	edx,edx,5
+	add	ecx,esi
+	xor	ebp,eax
+	shrd	edi,edi,7
+	add	ecx,edx
+	add	ebx,DWORD [36+esp]
+	xor	ebp,edi
+	mov	esi,ecx
+	shld	ecx,ecx,5
+	add	ebx,ebp
+	xor	esi,edi
+	shrd	edx,edx,7
+	add	ebx,ecx
+	add	eax,DWORD [40+esp]
+	xor	esi,edx
+	mov	ebp,ebx
+	shld	ebx,ebx,5
+	add	eax,esi
+	xor	ebp,edx
+	shrd	ecx,ecx,7
+	add	eax,ebx
+	add	edi,DWORD [44+esp]
+	xor	ebp,ecx
+	mov	esi,eax
+	shld	eax,eax,5
+	add	edi,ebp
+	xor	esi,ecx
+	shrd	ebx,ebx,7
+	add	edi,eax
+	add	edx,DWORD [48+esp]
+	xor	esi,ebx
+	mov	ebp,edi
+	shld	edi,edi,5
+	add	edx,esi
+	xor	ebp,ebx
+	shrd	eax,eax,7
+	add	edx,edi
+	add	ecx,DWORD [52+esp]
+	xor	ebp,eax
+	mov	esi,edx
+	shld	edx,edx,5
+	add	ecx,ebp
+	xor	esi,eax
+	shrd	edi,edi,7
+	add	ecx,edx
+	add	ebx,DWORD [56+esp]
+	xor	esi,edi
+	mov	ebp,ecx
+	shld	ecx,ecx,5
+	add	ebx,esi
+	xor	ebp,edi
+	shrd	edx,edx,7
+	add	ebx,ecx
+	add	eax,DWORD [60+esp]
+	xor	ebp,edx
+	mov	esi,ebx
+	shld	ebx,ebx,5
+	add	eax,ebp
+	shrd	ecx,ecx,7
+	add	eax,ebx
+	vzeroall
+	mov	ebp,DWORD [192+esp]
+	add	eax,DWORD [ebp]
+	mov	esp,DWORD [204+esp]
+	add	esi,DWORD [4+ebp]
+	add	ecx,DWORD [8+ebp]
+	mov	DWORD [ebp],eax
+	add	edx,DWORD [12+ebp]
+	mov	DWORD [4+ebp],esi
+	add	edi,DWORD [16+ebp]
+	mov	DWORD [8+ebp],ecx
+	mov	DWORD [12+ebp],edx
+	mov	DWORD [16+ebp],edi
+	pop	edi
+	pop	esi
+	pop	ebx
+	pop	ebp
+	ret
+align	64
+L$K_XX_XX:
+dd	1518500249,1518500249,1518500249,1518500249
+dd	1859775393,1859775393,1859775393,1859775393
+dd	2400959708,2400959708,2400959708,2400959708
+dd	3395469782,3395469782,3395469782,3395469782
+dd	66051,67438087,134810123,202182159
+db	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+db	83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115
+db	102,111,114,109,32,102,111,114,32,120,56,54,44,32,67,82
+db	89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112
+db	114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+segment	.bss
+common	_OPENSSL_ia32cap_P 16
diff --git a/deps/boringssl/win-x86/crypto/fipsmodule/sha256-586.asm b/deps/boringssl/win-x86/crypto/fipsmodule/sha256-586.asm
new file mode 100644
index 0000000..5d6661d
--- /dev/null
+++ b/deps/boringssl/win-x86/crypto/fipsmodule/sha256-586.asm
@@ -0,0 +1,5571 @@
+; This file is generated from a similarly-named Perl script in the BoringSSL
+; source tree. Do not edit by hand.
+
+%ifdef BORINGSSL_PREFIX
+%include "boringssl_prefix_symbols_nasm.inc"
+%endif
+%ifidn __OUTPUT_FORMAT__,obj
+section	code	use32 class=code align=64
+%elifidn __OUTPUT_FORMAT__,win32
+$@feat.00 equ 1
+section	.text	code align=64
+%else
+section	.text	code
+%endif
+;extern	_OPENSSL_ia32cap_P
+global	_sha256_block_data_order
+align	16
+_sha256_block_data_order:
+L$_sha256_block_data_order_begin:
+	push	ebp
+	push	ebx
+	push	esi
+	push	edi
+	mov	esi,DWORD [20+esp]
+	mov	edi,DWORD [24+esp]
+	mov	eax,DWORD [28+esp]
+	mov	ebx,esp
+	call	L$000pic_point
+L$000pic_point:
+	pop	ebp
+	lea	ebp,[(L$001K256-L$000pic_point)+ebp]
+	sub	esp,16
+	and	esp,-64
+	shl	eax,6
+	add	eax,edi
+	mov	DWORD [esp],esi
+	mov	DWORD [4+esp],edi
+	mov	DWORD [8+esp],eax
+	mov	DWORD [12+esp],ebx
+	lea	edx,[_OPENSSL_ia32cap_P]
+	mov	ecx,DWORD [edx]
+	mov	ebx,DWORD [4+edx]
+	test	ecx,1048576
+	jnz	NEAR L$002loop
+	mov	edx,DWORD [8+edx]
+	test	ecx,16777216
+	jz	NEAR L$003no_xmm
+	and	ecx,1073741824
+	and	ebx,268435968
+	or	ecx,ebx
+	and	ecx,1342177280
+	cmp	ecx,1342177280
+	je	NEAR L$004AVX
+	test	ebx,512
+	jnz	NEAR L$005SSSE3
+L$003no_xmm:
+	sub	eax,edi
+	cmp	eax,256
+	jae	NEAR L$006unrolled
+	jmp	NEAR L$002loop
+align	16
+L$002loop:
+	mov	eax,DWORD [edi]
+	mov	ebx,DWORD [4+edi]
+	mov	ecx,DWORD [8+edi]
+	bswap	eax
+	mov	edx,DWORD [12+edi]
+	bswap	ebx
+	push	eax
+	bswap	ecx
+	push	ebx
+	bswap	edx
+	push	ecx
+	push	edx
+	mov	eax,DWORD [16+edi]
+	mov	ebx,DWORD [20+edi]
+	mov	ecx,DWORD [24+edi]
+	bswap	eax
+	mov	edx,DWORD [28+edi]
+	bswap	ebx
+	push	eax
+	bswap	ecx
+	push	ebx
+	bswap	edx
+	push	ecx
+	push	edx
+	mov	eax,DWORD [32+edi]
+	mov	ebx,DWORD [36+edi]
+	mov	ecx,DWORD [40+edi]
+	bswap	eax
+	mov	edx,DWORD [44+edi]
+	bswap	ebx
+	push	eax
+	bswap	ecx
+	push	ebx
+	bswap	edx
+	push	ecx
+	push	edx
+	mov	eax,DWORD [48+edi]
+	mov	ebx,DWORD [52+edi]
+	mov	ecx,DWORD [56+edi]
+	bswap	eax
+	mov	edx,DWORD [60+edi]
+	bswap	ebx
+	push	eax
+	bswap	ecx
+	push	ebx
+	bswap	edx
+	push	ecx
+	push	edx
+	add	edi,64
+	lea	esp,[esp-36]
+	mov	DWORD [104+esp],edi
+	mov	eax,DWORD [esi]
+	mov	ebx,DWORD [4+esi]
+	mov	ecx,DWORD [8+esi]
+	mov	edi,DWORD [12+esi]
+	mov	DWORD [8+esp],ebx
+	xor	ebx,ecx
+	mov	DWORD [12+esp],ecx
+	mov	DWORD [16+esp],edi
+	mov	DWORD [esp],ebx
+	mov	edx,DWORD [16+esi]
+	mov	ebx,DWORD [20+esi]
+	mov	ecx,DWORD [24+esi]
+	mov	edi,DWORD [28+esi]
+	mov	DWORD [24+esp],ebx
+	mov	DWORD [28+esp],ecx
+	mov	DWORD [32+esp],edi
+align	16
+L$00700_15:
+	mov	ecx,edx
+	mov	esi,DWORD [24+esp]
+	ror	ecx,14
+	mov	edi,DWORD [28+esp]
+	xor	ecx,edx
+	xor	esi,edi
+	mov	ebx,DWORD [96+esp]
+	ror	ecx,5
+	and	esi,edx
+	mov	DWORD [20+esp],edx
+	xor	edx,ecx
+	add	ebx,DWORD [32+esp]
+	xor	esi,edi
+	ror	edx,6
+	mov	ecx,eax
+	add	ebx,esi
+	ror	ecx,9
+	add	ebx,edx
+	mov	edi,DWORD [8+esp]
+	xor	ecx,eax
+	mov	DWORD [4+esp],eax
+	lea	esp,[esp-4]
+	ror	ecx,11
+	mov	esi,DWORD [ebp]
+	xor	ecx,eax
+	mov	edx,DWORD [20+esp]
+	xor	eax,edi
+	ror	ecx,2
+	add	ebx,esi
+	mov	DWORD [esp],eax
+	add	edx,ebx
+	and	eax,DWORD [4+esp]
+	add	ebx,ecx
+	xor	eax,edi
+	add	ebp,4
+	add	eax,ebx
+	cmp	esi,3248222580
+	jne	NEAR L$00700_15
+	mov	ecx,DWORD [156+esp]
+	jmp	NEAR L$00816_63
+align	16
+L$00816_63:
+	mov	ebx,ecx
+	mov	esi,DWORD [104+esp]
+	ror	ecx,11
+	mov	edi,esi
+	ror	esi,2
+	xor	ecx,ebx
+	shr	ebx,3
+	ror	ecx,7
+	xor	esi,edi
+	xor	ebx,ecx
+	ror	esi,17
+	add	ebx,DWORD [160+esp]
+	shr	edi,10
+	add	ebx,DWORD [124+esp]
+	mov	ecx,edx
+	xor	edi,esi
+	mov	esi,DWORD [24+esp]
+	ror	ecx,14
+	add	ebx,edi
+	mov	edi,DWORD [28+esp]
+	xor	ecx,edx
+	xor	esi,edi
+	mov	DWORD [96+esp],ebx
+	ror	ecx,5
+	and	esi,edx
+	mov	DWORD [20+esp],edx
+	xor	edx,ecx
+	add	ebx,DWORD [32+esp]
+	xor	esi,edi
+	ror	edx,6
+	mov	ecx,eax
+	add	ebx,esi
+	ror	ecx,9
+	add	ebx,edx
+	mov	edi,DWORD [8+esp]
+	xor	ecx,eax
+	mov	DWORD [4+esp],eax
+	lea	esp,[esp-4]
+	ror	ecx,11
+	mov	esi,DWORD [ebp]
+	xor	ecx,eax
+	mov	edx,DWORD [20+esp]
+	xor	eax,edi
+	ror	ecx,2
+	add	ebx,esi
+	mov	DWORD [esp],eax
+	add	edx,ebx
+	and	eax,DWORD [4+esp]
+	add	ebx,ecx
+	xor	eax,edi
+	mov	ecx,DWORD [156+esp]
+	add	ebp,4
+	add	eax,ebx
+	cmp	esi,3329325298
+	jne	NEAR L$00816_63
+	mov	esi,DWORD [356+esp]
+	mov	ebx,DWORD [8+esp]
+	mov	ecx,DWORD [16+esp]
+	add	eax,DWORD [esi]
+	add	ebx,DWORD [4+esi]
+	add	edi,DWORD [8+esi]
+	add	ecx,DWORD [12+esi]
+	mov	DWORD [esi],eax
+	mov	DWORD [4+esi],ebx
+	mov	DWORD [8+esi],edi
+	mov	DWORD [12+esi],ecx
+	mov	eax,DWORD [24+esp]
+	mov	ebx,DWORD [28+esp]
+	mov	ecx,DWORD [32+esp]
+	mov	edi,DWORD [360+esp]
+	add	edx,DWORD [16+esi]
+	add	eax,DWORD [20+esi]
+	add	ebx,DWORD [24+esi]
+	add	ecx,DWORD [28+esi]
+	mov	DWORD [16+esi],edx
+	mov	DWORD [20+esi],eax
+	mov	DWORD [24+esi],ebx
+	mov	DWORD [28+esi],ecx
+	lea	esp,[356+esp]
+	sub	ebp,256
+	cmp	edi,DWORD [8+esp]
+	jb	NEAR L$002loop
+	mov	esp,DWORD [12+esp]
+	pop	edi
+	pop	esi
+	pop	ebx
+	pop	ebp
+	ret
+align	64
+L$001K256:
+dd	1116352408,1899447441,3049323471,3921009573,961987163,1508970993,2453635748,2870763221,3624381080,310598401,607225278,1426881987,1925078388,2162078206,2614888103,3248222580,3835390401,4022224774,264347078,604807628,770255983,1249150122,1555081692,1996064986,2554220882,2821834349,2952996808,3210313671,3336571891,3584528711,113926993,338241895,666307205,773529912,1294757372,1396182291,1695183700,1986661051,2177026350,2456956037,2730485921,2820302411,3259730800,3345764771,3516065817,3600352804,4094571909,275423344,430227734,506948616,659060556,883997877,958139571,1322822218,1537002063,1747873779,1955562222,2024104815,2227730452,2361852424,2428436474,2756734187,3204031479,3329325298
+dd	66051,67438087,134810123,202182159
+db	83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97
+db	110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32
+db	67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
+db	112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
+db	62,0
+align	16
+L$006unrolled:
+	lea	esp,[esp-96]
+	mov	eax,DWORD [esi]
+	mov	ebp,DWORD [4+esi]
+	mov	ecx,DWORD [8+esi]
+	mov	ebx,DWORD [12+esi]
+	mov	DWORD [4+esp],ebp
+	xor	ebp,ecx
+	mov	DWORD [8+esp],ecx
+	mov	DWORD [12+esp],ebx
+	mov	edx,DWORD [16+esi]
+	mov	ebx,DWORD [20+esi]
+	mov	ecx,DWORD [24+esi]
+	mov	esi,DWORD [28+esi]
+	mov	DWORD [20+esp],ebx
+	mov	DWORD [24+esp],ecx
+	mov	DWORD [28+esp],esi
+	jmp	NEAR L$009grand_loop
+align	16
+L$009grand_loop:
+	mov	ebx,DWORD [edi]
+	mov	ecx,DWORD [4+edi]
+	bswap	ebx
+	mov	esi,DWORD [8+edi]
+	bswap	ecx
+	mov	DWORD [32+esp],ebx
+	bswap	esi
+	mov	DWORD [36+esp],ecx
+	mov	DWORD [40+esp],esi
+	mov	ebx,DWORD [12+edi]
+	mov	ecx,DWORD [16+edi]
+	bswap	ebx
+	mov	esi,DWORD [20+edi]
+	bswap	ecx
+	mov	DWORD [44+esp],ebx
+	bswap	esi
+	mov	DWORD [48+esp],ecx
+	mov	DWORD [52+esp],esi
+	mov	ebx,DWORD [24+edi]
+	mov	ecx,DWORD [28+edi]
+	bswap	ebx
+	mov	esi,DWORD [32+edi]
+	bswap	ecx
+	mov	DWORD [56+esp],ebx
+	bswap	esi
+	mov	DWORD [60+esp],ecx
+	mov	DWORD [64+esp],esi
+	mov	ebx,DWORD [36+edi]
+	mov	ecx,DWORD [40+edi]
+	bswap	ebx
+	mov	esi,DWORD [44+edi]
+	bswap	ecx
+	mov	DWORD [68+esp],ebx
+	bswap	esi
+	mov	DWORD [72+esp],ecx
+	mov	DWORD [76+esp],esi
+	mov	ebx,DWORD [48+edi]
+	mov	ecx,DWORD [52+edi]
+	bswap	ebx
+	mov	esi,DWORD [56+edi]
+	bswap	ecx
+	mov	DWORD [80+esp],ebx
+	bswap	esi
+	mov	DWORD [84+esp],ecx
+	mov	DWORD [88+esp],esi
+	mov	ebx,DWORD [60+edi]
+	add	edi,64
+	bswap	ebx
+	mov	DWORD [100+esp],edi
+	mov	DWORD [92+esp],ebx
+	mov	ecx,edx
+	mov	esi,DWORD [20+esp]
+	ror	edx,14
+	mov	edi,DWORD [24+esp]
+	xor	edx,ecx
+	mov	ebx,DWORD [32+esp]
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [16+esp],ecx
+	xor	edx,ecx
+	add	ebx,DWORD [28+esp]
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,eax
+	add	ebx,edi
+	ror	ecx,9
+	mov	esi,eax
+	mov	edi,DWORD [4+esp]
+	xor	ecx,eax
+	mov	DWORD [esp],eax
+	xor	eax,edi
+	ror	ecx,11
+	and	ebp,eax
+	lea	edx,[1116352408+edx*1+ebx]
+	xor	ecx,esi
+	xor	ebp,edi
+	ror	ecx,2
+	add	ebp,edx
+	add	edx,DWORD [12+esp]
+	add	ebp,ecx
+	mov	esi,edx
+	mov	ecx,DWORD [16+esp]
+	ror	edx,14
+	mov	edi,DWORD [20+esp]
+	xor	edx,esi
+	mov	ebx,DWORD [36+esp]
+	xor	ecx,edi
+	ror	edx,5
+	and	ecx,esi
+	mov	DWORD [12+esp],esi
+	xor	edx,esi
+	add	ebx,DWORD [24+esp]
+	xor	edi,ecx
+	ror	edx,6
+	mov	esi,ebp
+	add	ebx,edi
+	ror	esi,9
+	mov	ecx,ebp
+	mov	edi,DWORD [esp]
+	xor	esi,ebp
+	mov	DWORD [28+esp],ebp
+	xor	ebp,edi
+	ror	esi,11
+	and	eax,ebp
+	lea	edx,[1899447441+edx*1+ebx]
+	xor	esi,ecx
+	xor	eax,edi
+	ror	esi,2
+	add	eax,edx
+	add	edx,DWORD [8+esp]
+	add	eax,esi
+	mov	ecx,edx
+	mov	esi,DWORD [12+esp]
+	ror	edx,14
+	mov	edi,DWORD [16+esp]
+	xor	edx,ecx
+	mov	ebx,DWORD [40+esp]
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [8+esp],ecx
+	xor	edx,ecx
+	add	ebx,DWORD [20+esp]
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,eax
+	add	ebx,edi
+	ror	ecx,9
+	mov	esi,eax
+	mov	edi,DWORD [28+esp]
+	xor	ecx,eax
+	mov	DWORD [24+esp],eax
+	xor	eax,edi
+	ror	ecx,11
+	and	ebp,eax
+	lea	edx,[3049323471+edx*1+ebx]
+	xor	ecx,esi
+	xor	ebp,edi
+	ror	ecx,2
+	add	ebp,edx
+	add	edx,DWORD [4+esp]
+	add	ebp,ecx
+	mov	esi,edx
+	mov	ecx,DWORD [8+esp]
+	ror	edx,14
+	mov	edi,DWORD [12+esp]
+	xor	edx,esi
+	mov	ebx,DWORD [44+esp]
+	xor	ecx,edi
+	ror	edx,5
+	and	ecx,esi
+	mov	DWORD [4+esp],esi
+	xor	edx,esi
+	add	ebx,DWORD [16+esp]
+	xor	edi,ecx
+	ror	edx,6
+	mov	esi,ebp
+	add	ebx,edi
+	ror	esi,9
+	mov	ecx,ebp
+	mov	edi,DWORD [24+esp]
+	xor	esi,ebp
+	mov	DWORD [20+esp],ebp
+	xor	ebp,edi
+	ror	esi,11
+	and	eax,ebp
+	lea	edx,[3921009573+edx*1+ebx]
+	xor	esi,ecx
+	xor	eax,edi
+	ror	esi,2
+	add	eax,edx
+	add	edx,DWORD [esp]
+	add	eax,esi
+	mov	ecx,edx
+	mov	esi,DWORD [4+esp]
+	ror	edx,14
+	mov	edi,DWORD [8+esp]
+	xor	edx,ecx
+	mov	ebx,DWORD [48+esp]
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [esp],ecx
+	xor	edx,ecx
+	add	ebx,DWORD [12+esp]
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,eax
+	add	ebx,edi
+	ror	ecx,9
+	mov	esi,eax
+	mov	edi,DWORD [20+esp]
+	xor	ecx,eax
+	mov	DWORD [16+esp],eax
+	xor	eax,edi
+	ror	ecx,11
+	and	ebp,eax
+	lea	edx,[961987163+edx*1+ebx]
+	xor	ecx,esi
+	xor	ebp,edi
+	ror	ecx,2
+	add	ebp,edx
+	add	edx,DWORD [28+esp]
+	add	ebp,ecx
+	mov	esi,edx
+	mov	ecx,DWORD [esp]
+	ror	edx,14
+	mov	edi,DWORD [4+esp]
+	xor	edx,esi
+	mov	ebx,DWORD [52+esp]
+	xor	ecx,edi
+	ror	edx,5
+	and	ecx,esi
+	mov	DWORD [28+esp],esi
+	xor	edx,esi
+	add	ebx,DWORD [8+esp]
+	xor	edi,ecx
+	ror	edx,6
+	mov	esi,ebp
+	add	ebx,edi
+	ror	esi,9
+	mov	ecx,ebp
+	mov	edi,DWORD [16+esp]
+	xor	esi,ebp
+	mov	DWORD [12+esp],ebp
+	xor	ebp,edi
+	ror	esi,11
+	and	eax,ebp
+	lea	edx,[1508970993+edx*1+ebx]
+	xor	esi,ecx
+	xor	eax,edi
+	ror	esi,2
+	add	eax,edx
+	add	edx,DWORD [24+esp]
+	add	eax,esi
+	mov	ecx,edx
+	mov	esi,DWORD [28+esp]
+	ror	edx,14
+	mov	edi,DWORD [esp]
+	xor	edx,ecx
+	mov	ebx,DWORD [56+esp]
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [24+esp],ecx
+	xor	edx,ecx
+	add	ebx,DWORD [4+esp]
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,eax
+	add	ebx,edi
+	ror	ecx,9
+	mov	esi,eax
+	mov	edi,DWORD [12+esp]
+	xor	ecx,eax
+	mov	DWORD [8+esp],eax
+	xor	eax,edi
+	ror	ecx,11
+	and	ebp,eax
+	lea	edx,[2453635748+edx*1+ebx]
+	xor	ecx,esi
+	xor	ebp,edi
+	ror	ecx,2
+	add	ebp,edx
+	add	edx,DWORD [20+esp]
+	add	ebp,ecx
+	mov	esi,edx
+	mov	ecx,DWORD [24+esp]
+	ror	edx,14
+	mov	edi,DWORD [28+esp]
+	xor	edx,esi
+	mov	ebx,DWORD [60+esp]
+	xor	ecx,edi
+	ror	edx,5
+	and	ecx,esi
+	mov	DWORD [20+esp],esi
+	xor	edx,esi
+	add	ebx,DWORD [esp]
+	xor	edi,ecx
+	ror	edx,6
+	mov	esi,ebp
+	add	ebx,edi
+	ror	esi,9
+	mov	ecx,ebp
+	mov	edi,DWORD [8+esp]
+	xor	esi,ebp
+	mov	DWORD [4+esp],ebp
+	xor	ebp,edi
+	ror	esi,11
+	and	eax,ebp
+	lea	edx,[2870763221+edx*1+ebx]
+	xor	esi,ecx
+	xor	eax,edi
+	ror	esi,2
+	add	eax,edx
+	add	edx,DWORD [16+esp]
+	add	eax,esi
+	mov	ecx,edx
+	mov	esi,DWORD [20+esp]
+	ror	edx,14
+	mov	edi,DWORD [24+esp]
+	xor	edx,ecx
+	mov	ebx,DWORD [64+esp]
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [16+esp],ecx
+	xor	edx,ecx
+	add	ebx,DWORD [28+esp]
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,eax
+	add	ebx,edi
+	ror	ecx,9
+	mov	esi,eax
+	mov	edi,DWORD [4+esp]
+	xor	ecx,eax
+	mov	DWORD [esp],eax
+	xor	eax,edi
+	ror	ecx,11
+	and	ebp,eax
+	lea	edx,[3624381080+edx*1+ebx]
+	xor	ecx,esi
+	xor	ebp,edi
+	ror	ecx,2
+	add	ebp,edx
+	add	edx,DWORD [12+esp]
+	add	ebp,ecx
+	mov	esi,edx
+	mov	ecx,DWORD [16+esp]
+	ror	edx,14
+	mov	edi,DWORD [20+esp]
+	xor	edx,esi
+	mov	ebx,DWORD [68+esp]
+	xor	ecx,edi
+	ror	edx,5
+	and	ecx,esi
+	mov	DWORD [12+esp],esi
+	xor	edx,esi
+	add	ebx,DWORD [24+esp]
+	xor	edi,ecx
+	ror	edx,6
+	mov	esi,ebp
+	add	ebx,edi
+	ror	esi,9
+	mov	ecx,ebp
+	mov	edi,DWORD [esp]
+	xor	esi,ebp
+	mov	DWORD [28+esp],ebp
+	xor	ebp,edi
+	ror	esi,11
+	and	eax,ebp
+	lea	edx,[310598401+edx*1+ebx]
+	xor	esi,ecx
+	xor	eax,edi
+	ror	esi,2
+	add	eax,edx
+	add	edx,DWORD [8+esp]
+	add	eax,esi
+	mov	ecx,edx
+	mov	esi,DWORD [12+esp]
+	ror	edx,14
+	mov	edi,DWORD [16+esp]
+	xor	edx,ecx
+	mov	ebx,DWORD [72+esp]
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [8+esp],ecx
+	xor	edx,ecx
+	add	ebx,DWORD [20+esp]
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,eax
+	add	ebx,edi
+	ror	ecx,9
+	mov	esi,eax
+	mov	edi,DWORD [28+esp]
+	xor	ecx,eax
+	mov	DWORD [24+esp],eax
+	xor	eax,edi
+	ror	ecx,11
+	and	ebp,eax
+	lea	edx,[607225278+edx*1+ebx]
+	xor	ecx,esi
+	xor	ebp,edi
+	ror	ecx,2
+	add	ebp,edx
+	add	edx,DWORD [4+esp]
+	add	ebp,ecx
+	mov	esi,edx
+	mov	ecx,DWORD [8+esp]
+	ror	edx,14
+	mov	edi,DWORD [12+esp]
+	xor	edx,esi
+	mov	ebx,DWORD [76+esp]
+	xor	ecx,edi
+	ror	edx,5
+	and	ecx,esi
+	mov	DWORD [4+esp],esi
+	xor	edx,esi
+	add	ebx,DWORD [16+esp]
+	xor	edi,ecx
+	ror	edx,6
+	mov	esi,ebp
+	add	ebx,edi
+	ror	esi,9
+	mov	ecx,ebp
+	mov	edi,DWORD [24+esp]
+	xor	esi,ebp
+	mov	DWORD [20+esp],ebp
+	xor	ebp,edi
+	ror	esi,11
+	and	eax,ebp
+	lea	edx,[1426881987+edx*1+ebx]
+	xor	esi,ecx
+	xor	eax,edi
+	ror	esi,2
+	add	eax,edx
+	add	edx,DWORD [esp]
+	add	eax,esi
+	mov	ecx,edx
+	mov	esi,DWORD [4+esp]
+	ror	edx,14
+	mov	edi,DWORD [8+esp]
+	xor	edx,ecx
+	mov	ebx,DWORD [80+esp]
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [esp],ecx
+	xor	edx,ecx
+	add	ebx,DWORD [12+esp]
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,eax
+	add	ebx,edi
+	ror	ecx,9
+	mov	esi,eax
+	mov	edi,DWORD [20+esp]
+	xor	ecx,eax
+	mov	DWORD [16+esp],eax
+	xor	eax,edi
+	ror	ecx,11
+	and	ebp,eax
+	lea	edx,[1925078388+edx*1+ebx]
+	xor	ecx,esi
+	xor	ebp,edi
+	ror	ecx,2
+	add	ebp,edx
+	add	edx,DWORD [28+esp]
+	add	ebp,ecx
+	mov	esi,edx
+	mov	ecx,DWORD [esp]
+	ror	edx,14
+	mov	edi,DWORD [4+esp]
+	xor	edx,esi
+	mov	ebx,DWORD [84+esp]
+	xor	ecx,edi
+	ror	edx,5
+	and	ecx,esi
+	mov	DWORD [28+esp],esi
+	xor	edx,esi
+	add	ebx,DWORD [8+esp]
+	xor	edi,ecx
+	ror	edx,6
+	mov	esi,ebp
+	add	ebx,edi
+	ror	esi,9
+	mov	ecx,ebp
+	mov	edi,DWORD [16+esp]
+	xor	esi,ebp
+	mov	DWORD [12+esp],ebp
+	xor	ebp,edi
+	ror	esi,11
+	and	eax,ebp
+	lea	edx,[2162078206+edx*1+ebx]
+	xor	esi,ecx
+	xor	eax,edi
+	ror	esi,2
+	add	eax,edx
+	add	edx,DWORD [24+esp]
+	add	eax,esi
+	mov	ecx,edx
+	mov	esi,DWORD [28+esp]
+	ror	edx,14
+	mov	edi,DWORD [esp]
+	xor	edx,ecx
+	mov	ebx,DWORD [88+esp]
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [24+esp],ecx
+	xor	edx,ecx
+	add	ebx,DWORD [4+esp]
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,eax
+	add	ebx,edi
+	ror	ecx,9
+	mov	esi,eax
+	mov	edi,DWORD [12+esp]
+	xor	ecx,eax
+	mov	DWORD [8+esp],eax
+	xor	eax,edi
+	ror	ecx,11
+	and	ebp,eax
+	lea	edx,[2614888103+edx*1+ebx]
+	xor	ecx,esi
+	xor	ebp,edi
+	ror	ecx,2
+	add	ebp,edx
+	add	edx,DWORD [20+esp]
+	add	ebp,ecx
+	mov	esi,edx
+	mov	ecx,DWORD [24+esp]
+	ror	edx,14
+	mov	edi,DWORD [28+esp]
+	xor	edx,esi
+	mov	ebx,DWORD [92+esp]
+	xor	ecx,edi
+	ror	edx,5
+	and	ecx,esi
+	mov	DWORD [20+esp],esi
+	xor	edx,esi
+	add	ebx,DWORD [esp]
+	xor	edi,ecx
+	ror	edx,6
+	mov	esi,ebp
+	add	ebx,edi
+	ror	esi,9
+	mov	ecx,ebp
+	mov	edi,DWORD [8+esp]
+	xor	esi,ebp
+	mov	DWORD [4+esp],ebp
+	xor	ebp,edi
+	ror	esi,11
+	and	eax,ebp
+	lea	edx,[3248222580+edx*1+ebx]
+	xor	esi,ecx
+	xor	eax,edi
+	mov	ecx,DWORD [36+esp]
+	ror	esi,2
+	add	eax,edx
+	add	edx,DWORD [16+esp]
+	add	eax,esi
+	mov	esi,DWORD [88+esp]
+	mov	ebx,ecx
+	ror	ecx,11
+	mov	edi,esi
+	ror	esi,2
+	xor	ecx,ebx
+	shr	ebx,3
+	ror	ecx,7
+	xor	esi,edi
+	xor	ebx,ecx
+	ror	esi,17
+	add	ebx,DWORD [32+esp]
+	shr	edi,10
+	add	ebx,DWORD [68+esp]
+	mov	ecx,edx
+	xor	edi,esi
+	mov	esi,DWORD [20+esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [24+esp]
+	xor	edx,ecx
+	mov	DWORD [32+esp],ebx
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [16+esp],ecx
+	xor	edx,ecx
+	add	ebx,DWORD [28+esp]
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,eax
+	add	ebx,edi
+	ror	ecx,9
+	mov	esi,eax
+	mov	edi,DWORD [4+esp]
+	xor	ecx,eax
+	mov	DWORD [esp],eax
+	xor	eax,edi
+	ror	ecx,11
+	and	ebp,eax
+	lea	edx,[3835390401+edx*1+ebx]
+	xor	ecx,esi
+	xor	ebp,edi
+	mov	esi,DWORD [40+esp]
+	ror	ecx,2
+	add	ebp,edx
+	add	edx,DWORD [12+esp]
+	add	ebp,ecx
+	mov	ecx,DWORD [92+esp]
+	mov	ebx,esi
+	ror	esi,11
+	mov	edi,ecx
+	ror	ecx,2
+	xor	esi,ebx
+	shr	ebx,3
+	ror	esi,7
+	xor	ecx,edi
+	xor	ebx,esi
+	ror	ecx,17
+	add	ebx,DWORD [36+esp]
+	shr	edi,10
+	add	ebx,DWORD [72+esp]
+	mov	esi,edx
+	xor	edi,ecx
+	mov	ecx,DWORD [16+esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [20+esp]
+	xor	edx,esi
+	mov	DWORD [36+esp],ebx
+	xor	ecx,edi
+	ror	edx,5
+	and	ecx,esi
+	mov	DWORD [12+esp],esi
+	xor	edx,esi
+	add	ebx,DWORD [24+esp]
+	xor	edi,ecx
+	ror	edx,6
+	mov	esi,ebp
+	add	ebx,edi
+	ror	esi,9
+	mov	ecx,ebp
+	mov	edi,DWORD [esp]
+	xor	esi,ebp
+	mov	DWORD [28+esp],ebp
+	xor	ebp,edi
+	ror	esi,11
+	and	eax,ebp
+	lea	edx,[4022224774+edx*1+ebx]
+	xor	esi,ecx
+	xor	eax,edi
+	mov	ecx,DWORD [44+esp]
+	ror	esi,2
+	add	eax,edx
+	add	edx,DWORD [8+esp]
+	add	eax,esi
+	mov	esi,DWORD [32+esp]
+	mov	ebx,ecx
+	ror	ecx,11
+	mov	edi,esi
+	ror	esi,2
+	xor	ecx,ebx
+	shr	ebx,3
+	ror	ecx,7
+	xor	esi,edi
+	xor	ebx,ecx
+	ror	esi,17
+	add	ebx,DWORD [40+esp]
+	shr	edi,10
+	add	ebx,DWORD [76+esp]
+	mov	ecx,edx
+	xor	edi,esi
+	mov	esi,DWORD [12+esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [16+esp]
+	xor	edx,ecx
+	mov	DWORD [40+esp],ebx
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [8+esp],ecx
+	xor	edx,ecx
+	add	ebx,DWORD [20+esp]
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,eax
+	add	ebx,edi
+	ror	ecx,9
+	mov	esi,eax
+	mov	edi,DWORD [28+esp]
+	xor	ecx,eax
+	mov	DWORD [24+esp],eax
+	xor	eax,edi
+	ror	ecx,11
+	and	ebp,eax
+	lea	edx,[264347078+edx*1+ebx]
+	xor	ecx,esi
+	xor	ebp,edi
+	mov	esi,DWORD [48+esp]
+	ror	ecx,2
+	add	ebp,edx
+	add	edx,DWORD [4+esp]
+	add	ebp,ecx
+	mov	ecx,DWORD [36+esp]
+	mov	ebx,esi
+	ror	esi,11
+	mov	edi,ecx
+	ror	ecx,2
+	xor	esi,ebx
+	shr	ebx,3
+	ror	esi,7
+	xor	ecx,edi
+	xor	ebx,esi
+	ror	ecx,17
+	add	ebx,DWORD [44+esp]
+	shr	edi,10
+	add	ebx,DWORD [80+esp]
+	mov	esi,edx
+	xor	edi,ecx
+	mov	ecx,DWORD [8+esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [12+esp]
+	xor	edx,esi
+	mov	DWORD [44+esp],ebx
+	xor	ecx,edi
+	ror	edx,5
+	and	ecx,esi
+	mov	DWORD [4+esp],esi
+	xor	edx,esi
+	add	ebx,DWORD [16+esp]
+	xor	edi,ecx
+	ror	edx,6
+	mov	esi,ebp
+	add	ebx,edi
+	ror	esi,9
+	mov	ecx,ebp
+	mov	edi,DWORD [24+esp]
+	xor	esi,ebp
+	mov	DWORD [20+esp],ebp
+	xor	ebp,edi
+	ror	esi,11
+	and	eax,ebp
+	lea	edx,[604807628+edx*1+ebx]
+	xor	esi,ecx
+	xor	eax,edi
+	mov	ecx,DWORD [52+esp]
+	ror	esi,2
+	add	eax,edx
+	add	edx,DWORD [esp]
+	add	eax,esi
+	mov	esi,DWORD [40+esp]
+	mov	ebx,ecx
+	ror	ecx,11
+	mov	edi,esi
+	ror	esi,2
+	xor	ecx,ebx
+	shr	ebx,3
+	ror	ecx,7
+	xor	esi,edi
+	xor	ebx,ecx
+	ror	esi,17
+	add	ebx,DWORD [48+esp]
+	shr	edi,10
+	add	ebx,DWORD [84+esp]
+	mov	ecx,edx
+	xor	edi,esi
+	mov	esi,DWORD [4+esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [8+esp]
+	xor	edx,ecx
+	mov	DWORD [48+esp],ebx
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [esp],ecx
+	xor	edx,ecx
+	add	ebx,DWORD [12+esp]
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,eax
+	add	ebx,edi
+	ror	ecx,9
+	mov	esi,eax
+	mov	edi,DWORD [20+esp]
+	xor	ecx,eax
+	mov	DWORD [16+esp],eax
+	xor	eax,edi
+	ror	ecx,11
+	and	ebp,eax
+	lea	edx,[770255983+edx*1+ebx]
+	xor	ecx,esi
+	xor	ebp,edi
+	mov	esi,DWORD [56+esp]
+	ror	ecx,2
+	add	ebp,edx
+	add	edx,DWORD [28+esp]
+	add	ebp,ecx
+	mov	ecx,DWORD [44+esp]
+	mov	ebx,esi
+	ror	esi,11
+	mov	edi,ecx
+	ror	ecx,2
+	xor	esi,ebx
+	shr	ebx,3
+	ror	esi,7
+	xor	ecx,edi
+	xor	ebx,esi
+	ror	ecx,17
+	add	ebx,DWORD [52+esp]
+	shr	edi,10
+	add	ebx,DWORD [88+esp]
+	mov	esi,edx
+	xor	edi,ecx
+	mov	ecx,DWORD [esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [4+esp]
+	xor	edx,esi
+	mov	DWORD [52+esp],ebx
+	xor	ecx,edi
+	ror	edx,5
+	and	ecx,esi
+	mov	DWORD [28+esp],esi
+	xor	edx,esi
+	add	ebx,DWORD [8+esp]
+	xor	edi,ecx
+	ror	edx,6
+	mov	esi,ebp
+	add	ebx,edi
+	ror	esi,9
+	mov	ecx,ebp
+	mov	edi,DWORD [16+esp]
+	xor	esi,ebp
+	mov	DWORD [12+esp],ebp
+	xor	ebp,edi
+	ror	esi,11
+	and	eax,ebp
+	lea	edx,[1249150122+edx*1+ebx]
+	xor	esi,ecx
+	xor	eax,edi
+	mov	ecx,DWORD [60+esp]
+	ror	esi,2
+	add	eax,edx
+	add	edx,DWORD [24+esp]
+	add	eax,esi
+	mov	esi,DWORD [48+esp]
+	mov	ebx,ecx
+	ror	ecx,11
+	mov	edi,esi
+	ror	esi,2
+	xor	ecx,ebx
+	shr	ebx,3
+	ror	ecx,7
+	xor	esi,edi
+	xor	ebx,ecx
+	ror	esi,17
+	add	ebx,DWORD [56+esp]
+	shr	edi,10
+	add	ebx,DWORD [92+esp]
+	mov	ecx,edx
+	xor	edi,esi
+	mov	esi,DWORD [28+esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [esp]
+	xor	edx,ecx
+	mov	DWORD [56+esp],ebx
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [24+esp],ecx
+	xor	edx,ecx
+	add	ebx,DWORD [4+esp]
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,eax
+	add	ebx,edi
+	ror	ecx,9
+	mov	esi,eax
+	mov	edi,DWORD [12+esp]
+	xor	ecx,eax
+	mov	DWORD [8+esp],eax
+	xor	eax,edi
+	ror	ecx,11
+	and	ebp,eax
+	lea	edx,[1555081692+edx*1+ebx]
+	xor	ecx,esi
+	xor	ebp,edi
+	mov	esi,DWORD [64+esp]
+	ror	ecx,2
+	add	ebp,edx
+	add	edx,DWORD [20+esp]
+	add	ebp,ecx
+	mov	ecx,DWORD [52+esp]
+	mov	ebx,esi
+	ror	esi,11
+	mov	edi,ecx
+	ror	ecx,2
+	xor	esi,ebx
+	shr	ebx,3
+	ror	esi,7
+	xor	ecx,edi
+	xor	ebx,esi
+	ror	ecx,17
+	add	ebx,DWORD [60+esp]
+	shr	edi,10
+	add	ebx,DWORD [32+esp]
+	mov	esi,edx
+	xor	edi,ecx
+	mov	ecx,DWORD [24+esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [28+esp]
+	xor	edx,esi
+	mov	DWORD [60+esp],ebx
+	xor	ecx,edi
+	ror	edx,5
+	and	ecx,esi
+	mov	DWORD [20+esp],esi
+	xor	edx,esi
+	add	ebx,DWORD [esp]
+	xor	edi,ecx
+	ror	edx,6
+	mov	esi,ebp
+	add	ebx,edi
+	ror	esi,9
+	mov	ecx,ebp
+	mov	edi,DWORD [8+esp]
+	xor	esi,ebp
+	mov	DWORD [4+esp],ebp
+	xor	ebp,edi
+	ror	esi,11
+	and	eax,ebp
+	lea	edx,[1996064986+edx*1+ebx]
+	xor	esi,ecx
+	xor	eax,edi
+	mov	ecx,DWORD [68+esp]
+	ror	esi,2
+	add	eax,edx
+	add	edx,DWORD [16+esp]
+	add	eax,esi
+	mov	esi,DWORD [56+esp]
+	mov	ebx,ecx
+	ror	ecx,11
+	mov	edi,esi
+	ror	esi,2
+	xor	ecx,ebx
+	shr	ebx,3
+	ror	ecx,7
+	xor	esi,edi
+	xor	ebx,ecx
+	ror	esi,17
+	add	ebx,DWORD [64+esp]
+	shr	edi,10
+	add	ebx,DWORD [36+esp]
+	mov	ecx,edx
+	xor	edi,esi
+	mov	esi,DWORD [20+esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [24+esp]
+	xor	edx,ecx
+	mov	DWORD [64+esp],ebx
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [16+esp],ecx
+	xor	edx,ecx
+	add	ebx,DWORD [28+esp]
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,eax
+	add	ebx,edi
+	ror	ecx,9
+	mov	esi,eax
+	mov	edi,DWORD [4+esp]
+	xor	ecx,eax
+	mov	DWORD [esp],eax
+	xor	eax,edi
+	ror	ecx,11
+	and	ebp,eax
+	lea	edx,[2554220882+edx*1+ebx]
+	xor	ecx,esi
+	xor	ebp,edi
+	mov	esi,DWORD [72+esp]
+	ror	ecx,2
+	add	ebp,edx
+	add	edx,DWORD [12+esp]
+	add	ebp,ecx
+	mov	ecx,DWORD [60+esp]
+	mov	ebx,esi
+	ror	esi,11
+	mov	edi,ecx
+	ror	ecx,2
+	xor	esi,ebx
+	shr	ebx,3
+	ror	esi,7
+	xor	ecx,edi
+	xor	ebx,esi
+	ror	ecx,17
+	add	ebx,DWORD [68+esp]
+	shr	edi,10
+	add	ebx,DWORD [40+esp]
+	mov	esi,edx
+	xor	edi,ecx
+	mov	ecx,DWORD [16+esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [20+esp]
+	xor	edx,esi
+	mov	DWORD [68+esp],ebx
+	xor	ecx,edi
+	ror	edx,5
+	and	ecx,esi
+	mov	DWORD [12+esp],esi
+	xor	edx,esi
+	add	ebx,DWORD [24+esp]
+	xor	edi,ecx
+	ror	edx,6
+	mov	esi,ebp
+	add	ebx,edi
+	ror	esi,9
+	mov	ecx,ebp
+	mov	edi,DWORD [esp]
+	xor	esi,ebp
+	mov	DWORD [28+esp],ebp
+	xor	ebp,edi
+	ror	esi,11
+	and	eax,ebp
+	lea	edx,[2821834349+edx*1+ebx]
+	xor	esi,ecx
+	xor	eax,edi
+	mov	ecx,DWORD [76+esp]
+	ror	esi,2
+	add	eax,edx
+	add	edx,DWORD [8+esp]
+	add	eax,esi
+	mov	esi,DWORD [64+esp]
+	mov	ebx,ecx
+	ror	ecx,11
+	mov	edi,esi
+	ror	esi,2
+	xor	ecx,ebx
+	shr	ebx,3
+	ror	ecx,7
+	xor	esi,edi
+	xor	ebx,ecx
+	ror	esi,17
+	add	ebx,DWORD [72+esp]
+	shr	edi,10
+	add	ebx,DWORD [44+esp]
+	mov	ecx,edx
+	xor	edi,esi
+	mov	esi,DWORD [12+esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [16+esp]
+	xor	edx,ecx
+	mov	DWORD [72+esp],ebx
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [8+esp],ecx
+	xor	edx,ecx
+	add	ebx,DWORD [20+esp]
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,eax
+	add	ebx,edi
+	ror	ecx,9
+	mov	esi,eax
+	mov	edi,DWORD [28+esp]
+	xor	ecx,eax
+	mov	DWORD [24+esp],eax
+	xor	eax,edi
+	ror	ecx,11
+	and	ebp,eax
+	lea	edx,[2952996808+edx*1+ebx]
+	xor	ecx,esi
+	xor	ebp,edi
+	mov	esi,DWORD [80+esp]
+	ror	ecx,2
+	add	ebp,edx
+	add	edx,DWORD [4+esp]
+	add	ebp,ecx
+	mov	ecx,DWORD [68+esp]
+	mov	ebx,esi
+	ror	esi,11
+	mov	edi,ecx
+	ror	ecx,2
+	xor	esi,ebx
+	shr	ebx,3
+	ror	esi,7
+	xor	ecx,edi
+	xor	ebx,esi
+	ror	ecx,17
+	add	ebx,DWORD [76+esp]
+	shr	edi,10
+	add	ebx,DWORD [48+esp]
+	mov	esi,edx
+	xor	edi,ecx
+	mov	ecx,DWORD [8+esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [12+esp]
+	xor	edx,esi
+	mov	DWORD [76+esp],ebx
+	xor	ecx,edi
+	ror	edx,5
+	and	ecx,esi
+	mov	DWORD [4+esp],esi
+	xor	edx,esi
+	add	ebx,DWORD [16+esp]
+	xor	edi,ecx
+	ror	edx,6
+	mov	esi,ebp
+	add	ebx,edi
+	ror	esi,9
+	mov	ecx,ebp
+	mov	edi,DWORD [24+esp]
+	xor	esi,ebp
+	mov	DWORD [20+esp],ebp
+	xor	ebp,edi
+	ror	esi,11
+	and	eax,ebp
+	lea	edx,[3210313671+edx*1+ebx]
+	xor	esi,ecx
+	xor	eax,edi
+	mov	ecx,DWORD [84+esp]
+	ror	esi,2
+	add	eax,edx
+	add	edx,DWORD [esp]
+	add	eax,esi
+	mov	esi,DWORD [72+esp]
+	mov	ebx,ecx
+	ror	ecx,11
+	mov	edi,esi
+	ror	esi,2
+	xor	ecx,ebx
+	shr	ebx,3
+	ror	ecx,7
+	xor	esi,edi
+	xor	ebx,ecx
+	ror	esi,17
+	add	ebx,DWORD [80+esp]
+	shr	edi,10
+	add	ebx,DWORD [52+esp]
+	mov	ecx,edx
+	xor	edi,esi
+	mov	esi,DWORD [4+esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [8+esp]
+	xor	edx,ecx
+	mov	DWORD [80+esp],ebx
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [esp],ecx
+	xor	edx,ecx
+	add	ebx,DWORD [12+esp]
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,eax
+	add	ebx,edi
+	ror	ecx,9
+	mov	esi,eax
+	mov	edi,DWORD [20+esp]
+	xor	ecx,eax
+	mov	DWORD [16+esp],eax
+	xor	eax,edi
+	ror	ecx,11
+	and	ebp,eax
+	lea	edx,[3336571891+edx*1+ebx]
+	xor	ecx,esi
+	xor	ebp,edi
+	mov	esi,DWORD [88+esp]
+	ror	ecx,2
+	add	ebp,edx
+	add	edx,DWORD [28+esp]
+	add	ebp,ecx
+	mov	ecx,DWORD [76+esp]
+	mov	ebx,esi
+	ror	esi,11
+	mov	edi,ecx
+	ror	ecx,2
+	xor	esi,ebx
+	shr	ebx,3
+	ror	esi,7
+	xor	ecx,edi
+	xor	ebx,esi
+	ror	ecx,17
+	add	ebx,DWORD [84+esp]
+	shr	edi,10
+	add	ebx,DWORD [56+esp]
+	mov	esi,edx
+	xor	edi,ecx
+	mov	ecx,DWORD [esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [4+esp]
+	xor	edx,esi
+	mov	DWORD [84+esp],ebx
+	xor	ecx,edi
+	ror	edx,5
+	and	ecx,esi
+	mov	DWORD [28+esp],esi
+	xor	edx,esi
+	add	ebx,DWORD [8+esp]
+	xor	edi,ecx
+	ror	edx,6
+	mov	esi,ebp
+	add	ebx,edi
+	ror	esi,9
+	mov	ecx,ebp
+	mov	edi,DWORD [16+esp]
+	xor	esi,ebp
+	mov	DWORD [12+esp],ebp
+	xor	ebp,edi
+	ror	esi,11
+	and	eax,ebp
+	lea	edx,[3584528711+edx*1+ebx]
+	xor	esi,ecx
+	xor	eax,edi
+	mov	ecx,DWORD [92+esp]
+	ror	esi,2
+	add	eax,edx
+	add	edx,DWORD [24+esp]
+	add	eax,esi
+	mov	esi,DWORD [80+esp]
+	mov	ebx,ecx
+	ror	ecx,11
+	mov	edi,esi
+	ror	esi,2
+	xor	ecx,ebx
+	shr	ebx,3
+	ror	ecx,7
+	xor	esi,edi
+	xor	ebx,ecx
+	ror	esi,17
+	add	ebx,DWORD [88+esp]
+	shr	edi,10
+	add	ebx,DWORD [60+esp]
+	mov	ecx,edx
+	xor	edi,esi
+	mov	esi,DWORD [28+esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [esp]
+	xor	edx,ecx
+	mov	DWORD [88+esp],ebx
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [24+esp],ecx
+	xor	edx,ecx
+	add	ebx,DWORD [4+esp]
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,eax
+	add	ebx,edi
+	ror	ecx,9
+	mov	esi,eax
+	mov	edi,DWORD [12+esp]
+	xor	ecx,eax
+	mov	DWORD [8+esp],eax
+	xor	eax,edi
+	ror	ecx,11
+	and	ebp,eax
+	lea	edx,[113926993+edx*1+ebx]
+	xor	ecx,esi
+	xor	ebp,edi
+	mov	esi,DWORD [32+esp]
+	ror	ecx,2
+	add	ebp,edx
+	add	edx,DWORD [20+esp]
+	add	ebp,ecx
+	mov	ecx,DWORD [84+esp]
+	mov	ebx,esi
+	ror	esi,11
+	mov	edi,ecx
+	ror	ecx,2
+	xor	esi,ebx
+	shr	ebx,3
+	ror	esi,7
+	xor	ecx,edi
+	xor	ebx,esi
+	ror	ecx,17
+	add	ebx,DWORD [92+esp]
+	shr	edi,10
+	add	ebx,DWORD [64+esp]
+	mov	esi,edx
+	xor	edi,ecx
+	mov	ecx,DWORD [24+esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [28+esp]
+	xor	edx,esi
+	mov	DWORD [92+esp],ebx
+	xor	ecx,edi
+	ror	edx,5
+	and	ecx,esi
+	mov	DWORD [20+esp],esi
+	xor	edx,esi
+	add	ebx,DWORD [esp]
+	xor	edi,ecx
+	ror	edx,6
+	mov	esi,ebp
+	add	ebx,edi
+	ror	esi,9
+	mov	ecx,ebp
+	mov	edi,DWORD [8+esp]
+	xor	esi,ebp
+	mov	DWORD [4+esp],ebp
+	xor	ebp,edi
+	ror	esi,11
+	and	eax,ebp
+	lea	edx,[338241895+edx*1+ebx]
+	xor	esi,ecx
+	xor	eax,edi
+	mov	ecx,DWORD [36+esp]
+	ror	esi,2
+	add	eax,edx
+	add	edx,DWORD [16+esp]
+	add	eax,esi
+	mov	esi,DWORD [88+esp]
+	mov	ebx,ecx
+	ror	ecx,11
+	mov	edi,esi
+	ror	esi,2
+	xor	ecx,ebx
+	shr	ebx,3
+	ror	ecx,7
+	xor	esi,edi
+	xor	ebx,ecx
+	ror	esi,17
+	add	ebx,DWORD [32+esp]
+	shr	edi,10
+	add	ebx,DWORD [68+esp]
+	mov	ecx,edx
+	xor	edi,esi
+	mov	esi,DWORD [20+esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [24+esp]
+	xor	edx,ecx
+	mov	DWORD [32+esp],ebx
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [16+esp],ecx
+	xor	edx,ecx
+	add	ebx,DWORD [28+esp]
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,eax
+	add	ebx,edi
+	ror	ecx,9
+	mov	esi,eax
+	mov	edi,DWORD [4+esp]
+	xor	ecx,eax
+	mov	DWORD [esp],eax
+	xor	eax,edi
+	ror	ecx,11
+	and	ebp,eax
+	lea	edx,[666307205+edx*1+ebx]
+	xor	ecx,esi
+	xor	ebp,edi
+	mov	esi,DWORD [40+esp]
+	ror	ecx,2
+	add	ebp,edx
+	add	edx,DWORD [12+esp]
+	add	ebp,ecx
+	mov	ecx,DWORD [92+esp]
+	mov	ebx,esi
+	ror	esi,11
+	mov	edi,ecx
+	ror	ecx,2
+	xor	esi,ebx
+	shr	ebx,3
+	ror	esi,7
+	xor	ecx,edi
+	xor	ebx,esi
+	ror	ecx,17
+	add	ebx,DWORD [36+esp]
+	shr	edi,10
+	add	ebx,DWORD [72+esp]
+	mov	esi,edx
+	xor	edi,ecx
+	mov	ecx,DWORD [16+esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [20+esp]
+	xor	edx,esi
+	mov	DWORD [36+esp],ebx
+	xor	ecx,edi
+	ror	edx,5
+	and	ecx,esi
+	mov	DWORD [12+esp],esi
+	xor	edx,esi
+	add	ebx,DWORD [24+esp]
+	xor	edi,ecx
+	ror	edx,6
+	mov	esi,ebp
+	add	ebx,edi
+	ror	esi,9
+	mov	ecx,ebp
+	mov	edi,DWORD [esp]
+	xor	esi,ebp
+	mov	DWORD [28+esp],ebp
+	xor	ebp,edi
+	ror	esi,11
+	and	eax,ebp
+	lea	edx,[773529912+edx*1+ebx]
+	xor	esi,ecx
+	xor	eax,edi
+	mov	ecx,DWORD [44+esp]
+	ror	esi,2
+	add	eax,edx
+	add	edx,DWORD [8+esp]
+	add	eax,esi
+	mov	esi,DWORD [32+esp]
+	mov	ebx,ecx
+	ror	ecx,11
+	mov	edi,esi
+	ror	esi,2
+	xor	ecx,ebx
+	shr	ebx,3
+	ror	ecx,7
+	xor	esi,edi
+	xor	ebx,ecx
+	ror	esi,17
+	add	ebx,DWORD [40+esp]
+	shr	edi,10
+	add	ebx,DWORD [76+esp]
+	mov	ecx,edx
+	xor	edi,esi
+	mov	esi,DWORD [12+esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [16+esp]
+	xor	edx,ecx
+	mov	DWORD [40+esp],ebx
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [8+esp],ecx
+	xor	edx,ecx
+	add	ebx,DWORD [20+esp]
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,eax
+	add	ebx,edi
+	ror	ecx,9
+	mov	esi,eax
+	mov	edi,DWORD [28+esp]
+	xor	ecx,eax
+	mov	DWORD [24+esp],eax
+	xor	eax,edi
+	ror	ecx,11
+	and	ebp,eax
+	lea	edx,[1294757372+edx*1+ebx]
+	xor	ecx,esi
+	xor	ebp,edi
+	mov	esi,DWORD [48+esp]
+	ror	ecx,2
+	add	ebp,edx
+	add	edx,DWORD [4+esp]
+	add	ebp,ecx
+	mov	ecx,DWORD [36+esp]
+	mov	ebx,esi
+	ror	esi,11
+	mov	edi,ecx
+	ror	ecx,2
+	xor	esi,ebx
+	shr	ebx,3
+	ror	esi,7
+	xor	ecx,edi
+	xor	ebx,esi
+	ror	ecx,17
+	add	ebx,DWORD [44+esp]
+	shr	edi,10
+	add	ebx,DWORD [80+esp]
+	mov	esi,edx
+	xor	edi,ecx
+	mov	ecx,DWORD [8+esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [12+esp]
+	xor	edx,esi
+	mov	DWORD [44+esp],ebx
+	xor	ecx,edi
+	ror	edx,5
+	and	ecx,esi
+	mov	DWORD [4+esp],esi
+	xor	edx,esi
+	add	ebx,DWORD [16+esp]
+	xor	edi,ecx
+	ror	edx,6
+	mov	esi,ebp
+	add	ebx,edi
+	ror	esi,9
+	mov	ecx,ebp
+	mov	edi,DWORD [24+esp]
+	xor	esi,ebp
+	mov	DWORD [20+esp],ebp
+	xor	ebp,edi
+	ror	esi,11
+	and	eax,ebp
+	lea	edx,[1396182291+edx*1+ebx]
+	xor	esi,ecx
+	xor	eax,edi
+	mov	ecx,DWORD [52+esp]
+	ror	esi,2
+	add	eax,edx
+	add	edx,DWORD [esp]
+	add	eax,esi
+	mov	esi,DWORD [40+esp]
+	mov	ebx,ecx
+	ror	ecx,11
+	mov	edi,esi
+	ror	esi,2
+	xor	ecx,ebx
+	shr	ebx,3
+	ror	ecx,7
+	xor	esi,edi
+	xor	ebx,ecx
+	ror	esi,17
+	add	ebx,DWORD [48+esp]
+	shr	edi,10
+	add	ebx,DWORD [84+esp]
+	mov	ecx,edx
+	xor	edi,esi
+	mov	esi,DWORD [4+esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [8+esp]
+	xor	edx,ecx
+	mov	DWORD [48+esp],ebx
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [esp],ecx
+	xor	edx,ecx
+	add	ebx,DWORD [12+esp]
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,eax
+	add	ebx,edi
+	ror	ecx,9
+	mov	esi,eax
+	mov	edi,DWORD [20+esp]
+	xor	ecx,eax
+	mov	DWORD [16+esp],eax
+	xor	eax,edi
+	ror	ecx,11
+	and	ebp,eax
+	lea	edx,[1695183700+edx*1+ebx]
+	xor	ecx,esi
+	xor	ebp,edi
+	mov	esi,DWORD [56+esp]
+	ror	ecx,2
+	add	ebp,edx
+	add	edx,DWORD [28+esp]
+	add	ebp,ecx
+	mov	ecx,DWORD [44+esp]
+	mov	ebx,esi
+	ror	esi,11
+	mov	edi,ecx
+	ror	ecx,2
+	xor	esi,ebx
+	shr	ebx,3
+	ror	esi,7
+	xor	ecx,edi
+	xor	ebx,esi
+	ror	ecx,17
+	add	ebx,DWORD [52+esp]
+	shr	edi,10
+	add	ebx,DWORD [88+esp]
+	mov	esi,edx
+	xor	edi,ecx
+	mov	ecx,DWORD [esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [4+esp]
+	xor	edx,esi
+	mov	DWORD [52+esp],ebx
+	xor	ecx,edi
+	ror	edx,5
+	and	ecx,esi
+	mov	DWORD [28+esp],esi
+	xor	edx,esi
+	add	ebx,DWORD [8+esp]
+	xor	edi,ecx
+	ror	edx,6
+	mov	esi,ebp
+	add	ebx,edi
+	ror	esi,9
+	mov	ecx,ebp
+	mov	edi,DWORD [16+esp]
+	xor	esi,ebp
+	mov	DWORD [12+esp],ebp
+	xor	ebp,edi
+	ror	esi,11
+	and	eax,ebp
+	lea	edx,[1986661051+edx*1+ebx]
+	xor	esi,ecx
+	xor	eax,edi
+	mov	ecx,DWORD [60+esp]
+	ror	esi,2
+	add	eax,edx
+	add	edx,DWORD [24+esp]
+	add	eax,esi
+	mov	esi,DWORD [48+esp]
+	mov	ebx,ecx
+	ror	ecx,11
+	mov	edi,esi
+	ror	esi,2
+	xor	ecx,ebx
+	shr	ebx,3
+	ror	ecx,7
+	xor	esi,edi
+	xor	ebx,ecx
+	ror	esi,17
+	add	ebx,DWORD [56+esp]
+	shr	edi,10
+	add	ebx,DWORD [92+esp]
+	mov	ecx,edx
+	xor	edi,esi
+	mov	esi,DWORD [28+esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [esp]
+	xor	edx,ecx
+	mov	DWORD [56+esp],ebx
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [24+esp],ecx
+	xor	edx,ecx
+	add	ebx,DWORD [4+esp]
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,eax
+	add	ebx,edi
+	ror	ecx,9
+	mov	esi,eax
+	mov	edi,DWORD [12+esp]
+	xor	ecx,eax
+	mov	DWORD [8+esp],eax
+	xor	eax,edi
+	ror	ecx,11
+	and	ebp,eax
+	lea	edx,[2177026350+edx*1+ebx]
+	xor	ecx,esi
+	xor	ebp,edi
+	mov	esi,DWORD [64+esp]
+	ror	ecx,2
+	add	ebp,edx
+	add	edx,DWORD [20+esp]
+	add	ebp,ecx
+	mov	ecx,DWORD [52+esp]
+	mov	ebx,esi
+	ror	esi,11
+	mov	edi,ecx
+	ror	ecx,2
+	xor	esi,ebx
+	shr	ebx,3
+	ror	esi,7
+	xor	ecx,edi
+	xor	ebx,esi
+	ror	ecx,17
+	add	ebx,DWORD [60+esp]
+	shr	edi,10
+	add	ebx,DWORD [32+esp]
+	mov	esi,edx
+	xor	edi,ecx
+	mov	ecx,DWORD [24+esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [28+esp]
+	xor	edx,esi
+	mov	DWORD [60+esp],ebx
+	xor	ecx,edi
+	ror	edx,5
+	and	ecx,esi
+	mov	DWORD [20+esp],esi
+	xor	edx,esi
+	add	ebx,DWORD [esp]
+	xor	edi,ecx
+	ror	edx,6
+	mov	esi,ebp
+	add	ebx,edi
+	ror	esi,9
+	mov	ecx,ebp
+	mov	edi,DWORD [8+esp]
+	xor	esi,ebp
+	mov	DWORD [4+esp],ebp
+	xor	ebp,edi
+	ror	esi,11
+	and	eax,ebp
+	lea	edx,[2456956037+edx*1+ebx]
+	xor	esi,ecx
+	xor	eax,edi
+	mov	ecx,DWORD [68+esp]
+	ror	esi,2
+	add	eax,edx
+	add	edx,DWORD [16+esp]
+	add	eax,esi
+	mov	esi,DWORD [56+esp]
+	mov	ebx,ecx
+	ror	ecx,11
+	mov	edi,esi
+	ror	esi,2
+	xor	ecx,ebx
+	shr	ebx,3
+	ror	ecx,7
+	xor	esi,edi
+	xor	ebx,ecx
+	ror	esi,17
+	add	ebx,DWORD [64+esp]
+	shr	edi,10
+	add	ebx,DWORD [36+esp]
+	mov	ecx,edx
+	xor	edi,esi
+	mov	esi,DWORD [20+esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [24+esp]
+	xor	edx,ecx
+	mov	DWORD [64+esp],ebx
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [16+esp],ecx
+	xor	edx,ecx
+	add	ebx,DWORD [28+esp]
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,eax
+	add	ebx,edi
+	ror	ecx,9
+	mov	esi,eax
+	mov	edi,DWORD [4+esp]
+	xor	ecx,eax
+	mov	DWORD [esp],eax
+	xor	eax,edi
+	ror	ecx,11
+	and	ebp,eax
+	lea	edx,[2730485921+edx*1+ebx]
+	xor	ecx,esi
+	xor	ebp,edi
+	mov	esi,DWORD [72+esp]
+	ror	ecx,2
+	add	ebp,edx
+	add	edx,DWORD [12+esp]
+	add	ebp,ecx
+	mov	ecx,DWORD [60+esp]
+	mov	ebx,esi
+	ror	esi,11
+	mov	edi,ecx
+	ror	ecx,2
+	xor	esi,ebx
+	shr	ebx,3
+	ror	esi,7
+	xor	ecx,edi
+	xor	ebx,esi
+	ror	ecx,17
+	add	ebx,DWORD [68+esp]
+	shr	edi,10
+	add	ebx,DWORD [40+esp]
+	mov	esi,edx
+	xor	edi,ecx
+	mov	ecx,DWORD [16+esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [20+esp]
+	xor	edx,esi
+	mov	DWORD [68+esp],ebx
+	xor	ecx,edi
+	ror	edx,5
+	and	ecx,esi
+	mov	DWORD [12+esp],esi
+	xor	edx,esi
+	add	ebx,DWORD [24+esp]
+	xor	edi,ecx
+	ror	edx,6
+	mov	esi,ebp
+	add	ebx,edi
+	ror	esi,9
+	mov	ecx,ebp
+	mov	edi,DWORD [esp]
+	xor	esi,ebp
+	mov	DWORD [28+esp],ebp
+	xor	ebp,edi
+	ror	esi,11
+	and	eax,ebp
+	lea	edx,[2820302411+edx*1+ebx]
+	xor	esi,ecx
+	xor	eax,edi
+	mov	ecx,DWORD [76+esp]
+	ror	esi,2
+	add	eax,edx
+	add	edx,DWORD [8+esp]
+	add	eax,esi
+	mov	esi,DWORD [64+esp]
+	mov	ebx,ecx
+	ror	ecx,11
+	mov	edi,esi
+	ror	esi,2
+	xor	ecx,ebx
+	shr	ebx,3
+	ror	ecx,7
+	xor	esi,edi
+	xor	ebx,ecx
+	ror	esi,17
+	add	ebx,DWORD [72+esp]
+	shr	edi,10
+	add	ebx,DWORD [44+esp]
+	mov	ecx,edx
+	xor	edi,esi
+	mov	esi,DWORD [12+esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [16+esp]
+	xor	edx,ecx
+	mov	DWORD [72+esp],ebx
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [8+esp],ecx
+	xor	edx,ecx
+	add	ebx,DWORD [20+esp]
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,eax
+	add	ebx,edi
+	ror	ecx,9
+	mov	esi,eax
+	mov	edi,DWORD [28+esp]
+	xor	ecx,eax
+	mov	DWORD [24+esp],eax
+	xor	eax,edi
+	ror	ecx,11
+	and	ebp,eax
+	lea	edx,[3259730800+edx*1+ebx]
+	xor	ecx,esi
+	xor	ebp,edi
+	mov	esi,DWORD [80+esp]
+	ror	ecx,2
+	add	ebp,edx
+	add	edx,DWORD [4+esp]
+	add	ebp,ecx
+	mov	ecx,DWORD [68+esp]
+	mov	ebx,esi
+	ror	esi,11
+	mov	edi,ecx
+	ror	ecx,2
+	xor	esi,ebx
+	shr	ebx,3
+	ror	esi,7
+	xor	ecx,edi
+	xor	ebx,esi
+	ror	ecx,17
+	add	ebx,DWORD [76+esp]
+	shr	edi,10
+	add	ebx,DWORD [48+esp]
+	mov	esi,edx
+	xor	edi,ecx
+	mov	ecx,DWORD [8+esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [12+esp]
+	xor	edx,esi
+	mov	DWORD [76+esp],ebx
+	xor	ecx,edi
+	ror	edx,5
+	and	ecx,esi
+	mov	DWORD [4+esp],esi
+	xor	edx,esi
+	add	ebx,DWORD [16+esp]
+	xor	edi,ecx
+	ror	edx,6
+	mov	esi,ebp
+	add	ebx,edi
+	ror	esi,9
+	mov	ecx,ebp
+	mov	edi,DWORD [24+esp]
+	xor	esi,ebp
+	mov	DWORD [20+esp],ebp
+	xor	ebp,edi
+	ror	esi,11
+	and	eax,ebp
+	lea	edx,[3345764771+edx*1+ebx]
+	xor	esi,ecx
+	xor	eax,edi
+	mov	ecx,DWORD [84+esp]
+	ror	esi,2
+	add	eax,edx
+	add	edx,DWORD [esp]
+	add	eax,esi
+	mov	esi,DWORD [72+esp]
+	mov	ebx,ecx
+	ror	ecx,11
+	mov	edi,esi
+	ror	esi,2
+	xor	ecx,ebx
+	shr	ebx,3
+	ror	ecx,7
+	xor	esi,edi
+	xor	ebx,ecx
+	ror	esi,17
+	add	ebx,DWORD [80+esp]
+	shr	edi,10
+	add	ebx,DWORD [52+esp]
+	mov	ecx,edx
+	xor	edi,esi
+	mov	esi,DWORD [4+esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [8+esp]
+	xor	edx,ecx
+	mov	DWORD [80+esp],ebx
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [esp],ecx
+	xor	edx,ecx
+	add	ebx,DWORD [12+esp]
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,eax
+	add	ebx,edi
+	ror	ecx,9
+	mov	esi,eax
+	mov	edi,DWORD [20+esp]
+	xor	ecx,eax
+	mov	DWORD [16+esp],eax
+	xor	eax,edi
+	ror	ecx,11
+	and	ebp,eax
+	lea	edx,[3516065817+edx*1+ebx]
+	xor	ecx,esi
+	xor	ebp,edi
+	mov	esi,DWORD [88+esp]
+	ror	ecx,2
+	add	ebp,edx
+	add	edx,DWORD [28+esp]
+	add	ebp,ecx
+	mov	ecx,DWORD [76+esp]
+	mov	ebx,esi
+	ror	esi,11
+	mov	edi,ecx
+	ror	ecx,2
+	xor	esi,ebx
+	shr	ebx,3
+	ror	esi,7
+	xor	ecx,edi
+	xor	ebx,esi
+	ror	ecx,17
+	add	ebx,DWORD [84+esp]
+	shr	edi,10
+	add	ebx,DWORD [56+esp]
+	mov	esi,edx
+	xor	edi,ecx
+	mov	ecx,DWORD [esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [4+esp]
+	xor	edx,esi
+	mov	DWORD [84+esp],ebx
+	xor	ecx,edi
+	ror	edx,5
+	and	ecx,esi
+	mov	DWORD [28+esp],esi
+	xor	edx,esi
+	add	ebx,DWORD [8+esp]
+	xor	edi,ecx
+	ror	edx,6
+	mov	esi,ebp
+	add	ebx,edi
+	ror	esi,9
+	mov	ecx,ebp
+	mov	edi,DWORD [16+esp]
+	xor	esi,ebp
+	mov	DWORD [12+esp],ebp
+	xor	ebp,edi
+	ror	esi,11
+	and	eax,ebp
+	lea	edx,[3600352804+edx*1+ebx]
+	xor	esi,ecx
+	xor	eax,edi
+	mov	ecx,DWORD [92+esp]
+	ror	esi,2
+	add	eax,edx
+	add	edx,DWORD [24+esp]
+	add	eax,esi
+	mov	esi,DWORD [80+esp]
+	mov	ebx,ecx
+	ror	ecx,11
+	mov	edi,esi
+	ror	esi,2
+	xor	ecx,ebx
+	shr	ebx,3
+	ror	ecx,7
+	xor	esi,edi
+	xor	ebx,ecx
+	ror	esi,17
+	add	ebx,DWORD [88+esp]
+	shr	edi,10
+	add	ebx,DWORD [60+esp]
+	mov	ecx,edx
+	xor	edi,esi
+	mov	esi,DWORD [28+esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [esp]
+	xor	edx,ecx
+	mov	DWORD [88+esp],ebx
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [24+esp],ecx
+	xor	edx,ecx
+	add	ebx,DWORD [4+esp]
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,eax
+	add	ebx,edi
+	ror	ecx,9
+	mov	esi,eax
+	mov	edi,DWORD [12+esp]
+	xor	ecx,eax
+	mov	DWORD [8+esp],eax
+	xor	eax,edi
+	ror	ecx,11
+	and	ebp,eax
+	lea	edx,[4094571909+edx*1+ebx]
+	xor	ecx,esi
+	xor	ebp,edi
+	mov	esi,DWORD [32+esp]
+	ror	ecx,2
+	add	ebp,edx
+	add	edx,DWORD [20+esp]
+	add	ebp,ecx
+	mov	ecx,DWORD [84+esp]
+	mov	ebx,esi
+	ror	esi,11
+	mov	edi,ecx
+	ror	ecx,2
+	xor	esi,ebx
+	shr	ebx,3
+	ror	esi,7
+	xor	ecx,edi
+	xor	ebx,esi
+	ror	ecx,17
+	add	ebx,DWORD [92+esp]
+	shr	edi,10
+	add	ebx,DWORD [64+esp]
+	mov	esi,edx
+	xor	edi,ecx
+	mov	ecx,DWORD [24+esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [28+esp]
+	xor	edx,esi
+	mov	DWORD [92+esp],ebx
+	xor	ecx,edi
+	ror	edx,5
+	and	ecx,esi
+	mov	DWORD [20+esp],esi
+	xor	edx,esi
+	add	ebx,DWORD [esp]
+	xor	edi,ecx
+	ror	edx,6
+	mov	esi,ebp
+	add	ebx,edi
+	ror	esi,9
+	mov	ecx,ebp
+	mov	edi,DWORD [8+esp]
+	xor	esi,ebp
+	mov	DWORD [4+esp],ebp
+	xor	ebp,edi
+	ror	esi,11
+	and	eax,ebp
+	lea	edx,[275423344+edx*1+ebx]
+	xor	esi,ecx
+	xor	eax,edi
+	mov	ecx,DWORD [36+esp]
+	ror	esi,2
+	add	eax,edx
+	add	edx,DWORD [16+esp]
+	add	eax,esi
+	mov	esi,DWORD [88+esp]
+	mov	ebx,ecx
+	ror	ecx,11
+	mov	edi,esi
+	ror	esi,2
+	xor	ecx,ebx
+	shr	ebx,3
+	ror	ecx,7
+	xor	esi,edi
+	xor	ebx,ecx
+	ror	esi,17
+	add	ebx,DWORD [32+esp]
+	shr	edi,10
+	add	ebx,DWORD [68+esp]
+	mov	ecx,edx
+	xor	edi,esi
+	mov	esi,DWORD [20+esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [24+esp]
+	xor	edx,ecx
+	mov	DWORD [32+esp],ebx
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [16+esp],ecx
+	xor	edx,ecx
+	add	ebx,DWORD [28+esp]
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,eax
+	add	ebx,edi
+	ror	ecx,9
+	mov	esi,eax
+	mov	edi,DWORD [4+esp]
+	xor	ecx,eax
+	mov	DWORD [esp],eax
+	xor	eax,edi
+	ror	ecx,11
+	and	ebp,eax
+	lea	edx,[430227734+edx*1+ebx]
+	xor	ecx,esi
+	xor	ebp,edi
+	mov	esi,DWORD [40+esp]
+	ror	ecx,2
+	add	ebp,edx
+	add	edx,DWORD [12+esp]
+	add	ebp,ecx
+	mov	ecx,DWORD [92+esp]
+	mov	ebx,esi
+	ror	esi,11
+	mov	edi,ecx
+	ror	ecx,2
+	xor	esi,ebx
+	shr	ebx,3
+	ror	esi,7
+	xor	ecx,edi
+	xor	ebx,esi
+	ror	ecx,17
+	add	ebx,DWORD [36+esp]
+	shr	edi,10
+	add	ebx,DWORD [72+esp]
+	mov	esi,edx
+	xor	edi,ecx
+	mov	ecx,DWORD [16+esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [20+esp]
+	xor	edx,esi
+	mov	DWORD [36+esp],ebx
+	xor	ecx,edi
+	ror	edx,5
+	and	ecx,esi
+	mov	DWORD [12+esp],esi
+	xor	edx,esi
+	add	ebx,DWORD [24+esp]
+	xor	edi,ecx
+	ror	edx,6
+	mov	esi,ebp
+	add	ebx,edi
+	ror	esi,9
+	mov	ecx,ebp
+	mov	edi,DWORD [esp]
+	xor	esi,ebp
+	mov	DWORD [28+esp],ebp
+	xor	ebp,edi
+	ror	esi,11
+	and	eax,ebp
+	lea	edx,[506948616+edx*1+ebx]
+	xor	esi,ecx
+	xor	eax,edi
+	mov	ecx,DWORD [44+esp]
+	ror	esi,2
+	add	eax,edx
+	add	edx,DWORD [8+esp]
+	add	eax,esi
+	mov	esi,DWORD [32+esp]
+	mov	ebx,ecx
+	ror	ecx,11
+	mov	edi,esi
+	ror	esi,2
+	xor	ecx,ebx
+	shr	ebx,3
+	ror	ecx,7
+	xor	esi,edi
+	xor	ebx,ecx
+	ror	esi,17
+	add	ebx,DWORD [40+esp]
+	shr	edi,10
+	add	ebx,DWORD [76+esp]
+	mov	ecx,edx
+	xor	edi,esi
+	mov	esi,DWORD [12+esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [16+esp]
+	xor	edx,ecx
+	mov	DWORD [40+esp],ebx
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [8+esp],ecx
+	xor	edx,ecx
+	add	ebx,DWORD [20+esp]
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,eax
+	add	ebx,edi
+	ror	ecx,9
+	mov	esi,eax
+	mov	edi,DWORD [28+esp]
+	xor	ecx,eax
+	mov	DWORD [24+esp],eax
+	xor	eax,edi
+	ror	ecx,11
+	and	ebp,eax
+	lea	edx,[659060556+edx*1+ebx]
+	xor	ecx,esi
+	xor	ebp,edi
+	mov	esi,DWORD [48+esp]
+	ror	ecx,2
+	add	ebp,edx
+	add	edx,DWORD [4+esp]
+	add	ebp,ecx
+	mov	ecx,DWORD [36+esp]
+	mov	ebx,esi
+	ror	esi,11
+	mov	edi,ecx
+	ror	ecx,2
+	xor	esi,ebx
+	shr	ebx,3
+	ror	esi,7
+	xor	ecx,edi
+	xor	ebx,esi
+	ror	ecx,17
+	add	ebx,DWORD [44+esp]
+	shr	edi,10
+	add	ebx,DWORD [80+esp]
+	mov	esi,edx
+	xor	edi,ecx
+	mov	ecx,DWORD [8+esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [12+esp]
+	xor	edx,esi
+	mov	DWORD [44+esp],ebx
+	xor	ecx,edi
+	ror	edx,5
+	and	ecx,esi
+	mov	DWORD [4+esp],esi
+	xor	edx,esi
+	add	ebx,DWORD [16+esp]
+	xor	edi,ecx
+	ror	edx,6
+	mov	esi,ebp
+	add	ebx,edi
+	ror	esi,9
+	mov	ecx,ebp
+	mov	edi,DWORD [24+esp]
+	xor	esi,ebp
+	mov	DWORD [20+esp],ebp
+	xor	ebp,edi
+	ror	esi,11
+	and	eax,ebp
+	lea	edx,[883997877+edx*1+ebx]
+	xor	esi,ecx
+	xor	eax,edi
+	mov	ecx,DWORD [52+esp]
+	ror	esi,2
+	add	eax,edx
+	add	edx,DWORD [esp]
+	add	eax,esi
+	mov	esi,DWORD [40+esp]
+	mov	ebx,ecx
+	ror	ecx,11
+	mov	edi,esi
+	ror	esi,2
+	xor	ecx,ebx
+	shr	ebx,3
+	ror	ecx,7
+	xor	esi,edi
+	xor	ebx,ecx
+	ror	esi,17
+	add	ebx,DWORD [48+esp]
+	shr	edi,10
+	add	ebx,DWORD [84+esp]
+	mov	ecx,edx
+	xor	edi,esi
+	mov	esi,DWORD [4+esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [8+esp]
+	xor	edx,ecx
+	mov	DWORD [48+esp],ebx
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [esp],ecx
+	xor	edx,ecx
+	add	ebx,DWORD [12+esp]
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,eax
+	add	ebx,edi
+	ror	ecx,9
+	mov	esi,eax
+	mov	edi,DWORD [20+esp]
+	xor	ecx,eax
+	mov	DWORD [16+esp],eax
+	xor	eax,edi
+	ror	ecx,11
+	and	ebp,eax
+	lea	edx,[958139571+edx*1+ebx]
+	xor	ecx,esi
+	xor	ebp,edi
+	mov	esi,DWORD [56+esp]
+	ror	ecx,2
+	add	ebp,edx
+	add	edx,DWORD [28+esp]
+	add	ebp,ecx
+	mov	ecx,DWORD [44+esp]
+	mov	ebx,esi
+	ror	esi,11
+	mov	edi,ecx
+	ror	ecx,2
+	xor	esi,ebx
+	shr	ebx,3
+	ror	esi,7
+	xor	ecx,edi
+	xor	ebx,esi
+	ror	ecx,17
+	add	ebx,DWORD [52+esp]
+	shr	edi,10
+	add	ebx,DWORD [88+esp]
+	mov	esi,edx
+	xor	edi,ecx
+	mov	ecx,DWORD [esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [4+esp]
+	xor	edx,esi
+	mov	DWORD [52+esp],ebx
+	xor	ecx,edi
+	ror	edx,5
+	and	ecx,esi
+	mov	DWORD [28+esp],esi
+	xor	edx,esi
+	add	ebx,DWORD [8+esp]
+	xor	edi,ecx
+	ror	edx,6
+	mov	esi,ebp
+	add	ebx,edi
+	ror	esi,9
+	mov	ecx,ebp
+	mov	edi,DWORD [16+esp]
+	xor	esi,ebp
+	mov	DWORD [12+esp],ebp
+	xor	ebp,edi
+	ror	esi,11
+	and	eax,ebp
+	lea	edx,[1322822218+edx*1+ebx]
+	xor	esi,ecx
+	xor	eax,edi
+	mov	ecx,DWORD [60+esp]
+	ror	esi,2
+	add	eax,edx
+	add	edx,DWORD [24+esp]
+	add	eax,esi
+	mov	esi,DWORD [48+esp]
+	mov	ebx,ecx
+	ror	ecx,11
+	mov	edi,esi
+	ror	esi,2
+	xor	ecx,ebx
+	shr	ebx,3
+	ror	ecx,7
+	xor	esi,edi
+	xor	ebx,ecx
+	ror	esi,17
+	add	ebx,DWORD [56+esp]
+	shr	edi,10
+	add	ebx,DWORD [92+esp]
+	mov	ecx,edx
+	xor	edi,esi
+	mov	esi,DWORD [28+esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [esp]
+	xor	edx,ecx
+	mov	DWORD [56+esp],ebx
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [24+esp],ecx
+	xor	edx,ecx
+	add	ebx,DWORD [4+esp]
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,eax
+	add	ebx,edi
+	ror	ecx,9
+	mov	esi,eax
+	mov	edi,DWORD [12+esp]
+	xor	ecx,eax
+	mov	DWORD [8+esp],eax
+	xor	eax,edi
+	ror	ecx,11
+	and	ebp,eax
+	lea	edx,[1537002063+edx*1+ebx]
+	xor	ecx,esi
+	xor	ebp,edi
+	mov	esi,DWORD [64+esp]
+	ror	ecx,2
+	add	ebp,edx
+	add	edx,DWORD [20+esp]
+	add	ebp,ecx
+	mov	ecx,DWORD [52+esp]
+	mov	ebx,esi
+	ror	esi,11
+	mov	edi,ecx
+	ror	ecx,2
+	xor	esi,ebx
+	shr	ebx,3
+	ror	esi,7
+	xor	ecx,edi
+	xor	ebx,esi
+	ror	ecx,17
+	add	ebx,DWORD [60+esp]
+	shr	edi,10
+	add	ebx,DWORD [32+esp]
+	mov	esi,edx
+	xor	edi,ecx
+	mov	ecx,DWORD [24+esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [28+esp]
+	xor	edx,esi
+	mov	DWORD [60+esp],ebx
+	xor	ecx,edi
+	ror	edx,5
+	and	ecx,esi
+	mov	DWORD [20+esp],esi
+	xor	edx,esi
+	add	ebx,DWORD [esp]
+	xor	edi,ecx
+	ror	edx,6
+	mov	esi,ebp
+	add	ebx,edi
+	ror	esi,9
+	mov	ecx,ebp
+	mov	edi,DWORD [8+esp]
+	xor	esi,ebp
+	mov	DWORD [4+esp],ebp
+	xor	ebp,edi
+	ror	esi,11
+	and	eax,ebp
+	lea	edx,[1747873779+edx*1+ebx]
+	xor	esi,ecx
+	xor	eax,edi
+	mov	ecx,DWORD [68+esp]
+	ror	esi,2
+	add	eax,edx
+	add	edx,DWORD [16+esp]
+	add	eax,esi
+	mov	esi,DWORD [56+esp]
+	mov	ebx,ecx
+	ror	ecx,11
+	mov	edi,esi
+	ror	esi,2
+	xor	ecx,ebx
+	shr	ebx,3
+	ror	ecx,7
+	xor	esi,edi
+	xor	ebx,ecx
+	ror	esi,17
+	add	ebx,DWORD [64+esp]
+	shr	edi,10
+	add	ebx,DWORD [36+esp]
+	mov	ecx,edx
+	xor	edi,esi
+	mov	esi,DWORD [20+esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [24+esp]
+	xor	edx,ecx
+	mov	DWORD [64+esp],ebx
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [16+esp],ecx
+	xor	edx,ecx
+	add	ebx,DWORD [28+esp]
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,eax
+	add	ebx,edi
+	ror	ecx,9
+	mov	esi,eax
+	mov	edi,DWORD [4+esp]
+	xor	ecx,eax
+	mov	DWORD [esp],eax
+	xor	eax,edi
+	ror	ecx,11
+	and	ebp,eax
+	lea	edx,[1955562222+edx*1+ebx]
+	xor	ecx,esi
+	xor	ebp,edi
+	mov	esi,DWORD [72+esp]
+	ror	ecx,2
+	add	ebp,edx
+	add	edx,DWORD [12+esp]
+	add	ebp,ecx
+	mov	ecx,DWORD [60+esp]
+	mov	ebx,esi
+	ror	esi,11
+	mov	edi,ecx
+	ror	ecx,2
+	xor	esi,ebx
+	shr	ebx,3
+	ror	esi,7
+	xor	ecx,edi
+	xor	ebx,esi
+	ror	ecx,17
+	add	ebx,DWORD [68+esp]
+	shr	edi,10
+	add	ebx,DWORD [40+esp]
+	mov	esi,edx
+	xor	edi,ecx
+	mov	ecx,DWORD [16+esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [20+esp]
+	xor	edx,esi
+	mov	DWORD [68+esp],ebx
+	xor	ecx,edi
+	ror	edx,5
+	and	ecx,esi
+	mov	DWORD [12+esp],esi
+	xor	edx,esi
+	add	ebx,DWORD [24+esp]
+	xor	edi,ecx
+	ror	edx,6
+	mov	esi,ebp
+	add	ebx,edi
+	ror	esi,9
+	mov	ecx,ebp
+	mov	edi,DWORD [esp]
+	xor	esi,ebp
+	mov	DWORD [28+esp],ebp
+	xor	ebp,edi
+	ror	esi,11
+	and	eax,ebp
+	lea	edx,[2024104815+edx*1+ebx]
+	xor	esi,ecx
+	xor	eax,edi
+	mov	ecx,DWORD [76+esp]
+	ror	esi,2
+	add	eax,edx
+	add	edx,DWORD [8+esp]
+	add	eax,esi
+	mov	esi,DWORD [64+esp]
+	mov	ebx,ecx
+	ror	ecx,11
+	mov	edi,esi
+	ror	esi,2
+	xor	ecx,ebx
+	shr	ebx,3
+	ror	ecx,7
+	xor	esi,edi
+	xor	ebx,ecx
+	ror	esi,17
+	add	ebx,DWORD [72+esp]
+	shr	edi,10
+	add	ebx,DWORD [44+esp]
+	mov	ecx,edx
+	xor	edi,esi
+	mov	esi,DWORD [12+esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [16+esp]
+	xor	edx,ecx
+	mov	DWORD [72+esp],ebx
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [8+esp],ecx
+	xor	edx,ecx
+	add	ebx,DWORD [20+esp]
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,eax
+	add	ebx,edi
+	ror	ecx,9
+	mov	esi,eax
+	mov	edi,DWORD [28+esp]
+	xor	ecx,eax
+	mov	DWORD [24+esp],eax
+	xor	eax,edi
+	ror	ecx,11
+	and	ebp,eax
+	lea	edx,[2227730452+edx*1+ebx]
+	xor	ecx,esi
+	xor	ebp,edi
+	mov	esi,DWORD [80+esp]
+	ror	ecx,2
+	add	ebp,edx
+	add	edx,DWORD [4+esp]
+	add	ebp,ecx
+	mov	ecx,DWORD [68+esp]
+	mov	ebx,esi
+	ror	esi,11
+	mov	edi,ecx
+	ror	ecx,2
+	xor	esi,ebx
+	shr	ebx,3
+	ror	esi,7
+	xor	ecx,edi
+	xor	ebx,esi
+	ror	ecx,17
+	add	ebx,DWORD [76+esp]
+	shr	edi,10
+	add	ebx,DWORD [48+esp]
+	mov	esi,edx
+	xor	edi,ecx
+	mov	ecx,DWORD [8+esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [12+esp]
+	xor	edx,esi
+	mov	DWORD [76+esp],ebx
+	xor	ecx,edi
+	ror	edx,5
+	and	ecx,esi
+	mov	DWORD [4+esp],esi
+	xor	edx,esi
+	add	ebx,DWORD [16+esp]
+	xor	edi,ecx
+	ror	edx,6
+	mov	esi,ebp
+	add	ebx,edi
+	ror	esi,9
+	mov	ecx,ebp
+	mov	edi,DWORD [24+esp]
+	xor	esi,ebp
+	mov	DWORD [20+esp],ebp
+	xor	ebp,edi
+	ror	esi,11
+	and	eax,ebp
+	lea	edx,[2361852424+edx*1+ebx]
+	xor	esi,ecx
+	xor	eax,edi
+	mov	ecx,DWORD [84+esp]
+	ror	esi,2
+	add	eax,edx
+	add	edx,DWORD [esp]
+	add	eax,esi
+	mov	esi,DWORD [72+esp]
+	mov	ebx,ecx
+	ror	ecx,11
+	mov	edi,esi
+	ror	esi,2
+	xor	ecx,ebx
+	shr	ebx,3
+	ror	ecx,7
+	xor	esi,edi
+	xor	ebx,ecx
+	ror	esi,17
+	add	ebx,DWORD [80+esp]
+	shr	edi,10
+	add	ebx,DWORD [52+esp]
+	mov	ecx,edx
+	xor	edi,esi
+	mov	esi,DWORD [4+esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [8+esp]
+	xor	edx,ecx
+	mov	DWORD [80+esp],ebx
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [esp],ecx
+	xor	edx,ecx
+	add	ebx,DWORD [12+esp]
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,eax
+	add	ebx,edi
+	ror	ecx,9
+	mov	esi,eax
+	mov	edi,DWORD [20+esp]
+	xor	ecx,eax
+	mov	DWORD [16+esp],eax
+	xor	eax,edi
+	ror	ecx,11
+	and	ebp,eax
+	lea	edx,[2428436474+edx*1+ebx]
+	xor	ecx,esi
+	xor	ebp,edi
+	mov	esi,DWORD [88+esp]
+	ror	ecx,2
+	add	ebp,edx
+	add	edx,DWORD [28+esp]
+	add	ebp,ecx
+	mov	ecx,DWORD [76+esp]
+	mov	ebx,esi
+	ror	esi,11
+	mov	edi,ecx
+	ror	ecx,2
+	xor	esi,ebx
+	shr	ebx,3
+	ror	esi,7
+	xor	ecx,edi
+	xor	ebx,esi
+	ror	ecx,17
+	add	ebx,DWORD [84+esp]
+	shr	edi,10
+	add	ebx,DWORD [56+esp]
+	mov	esi,edx
+	xor	edi,ecx
+	mov	ecx,DWORD [esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [4+esp]
+	xor	edx,esi
+	mov	DWORD [84+esp],ebx
+	xor	ecx,edi
+	ror	edx,5
+	and	ecx,esi
+	mov	DWORD [28+esp],esi
+	xor	edx,esi
+	add	ebx,DWORD [8+esp]
+	xor	edi,ecx
+	ror	edx,6
+	mov	esi,ebp
+	add	ebx,edi
+	ror	esi,9
+	mov	ecx,ebp
+	mov	edi,DWORD [16+esp]
+	xor	esi,ebp
+	mov	DWORD [12+esp],ebp
+	xor	ebp,edi
+	ror	esi,11
+	and	eax,ebp
+	lea	edx,[2756734187+edx*1+ebx]
+	xor	esi,ecx
+	xor	eax,edi
+	mov	ecx,DWORD [92+esp]
+	ror	esi,2
+	add	eax,edx
+	add	edx,DWORD [24+esp]
+	add	eax,esi
+	mov	esi,DWORD [80+esp]
+	mov	ebx,ecx
+	ror	ecx,11
+	mov	edi,esi
+	ror	esi,2
+	xor	ecx,ebx
+	shr	ebx,3
+	ror	ecx,7
+	xor	esi,edi
+	xor	ebx,ecx
+	ror	esi,17
+	add	ebx,DWORD [88+esp]
+	shr	edi,10
+	add	ebx,DWORD [60+esp]
+	mov	ecx,edx
+	xor	edi,esi
+	mov	esi,DWORD [28+esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [esp]
+	xor	edx,ecx
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [24+esp],ecx
+	xor	edx,ecx
+	add	ebx,DWORD [4+esp]
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,eax
+	add	ebx,edi
+	ror	ecx,9
+	mov	esi,eax
+	mov	edi,DWORD [12+esp]
+	xor	ecx,eax
+	mov	DWORD [8+esp],eax
+	xor	eax,edi
+	ror	ecx,11
+	and	ebp,eax
+	lea	edx,[3204031479+edx*1+ebx]
+	xor	ecx,esi
+	xor	ebp,edi
+	mov	esi,DWORD [32+esp]
+	ror	ecx,2
+	add	ebp,edx
+	add	edx,DWORD [20+esp]
+	add	ebp,ecx
+	mov	ecx,DWORD [84+esp]
+	mov	ebx,esi
+	ror	esi,11
+	mov	edi,ecx
+	ror	ecx,2
+	xor	esi,ebx
+	shr	ebx,3
+	ror	esi,7
+	xor	ecx,edi
+	xor	ebx,esi
+	ror	ecx,17
+	add	ebx,DWORD [92+esp]
+	shr	edi,10
+	add	ebx,DWORD [64+esp]
+	mov	esi,edx
+	xor	edi,ecx
+	mov	ecx,DWORD [24+esp]
+	ror	edx,14
+	add	ebx,edi
+	mov	edi,DWORD [28+esp]
+	xor	edx,esi
+	xor	ecx,edi
+	ror	edx,5
+	and	ecx,esi
+	mov	DWORD [20+esp],esi
+	xor	edx,esi
+	add	ebx,DWORD [esp]
+	xor	edi,ecx
+	ror	edx,6
+	mov	esi,ebp
+	add	ebx,edi
+	ror	esi,9
+	mov	ecx,ebp
+	mov	edi,DWORD [8+esp]
+	xor	esi,ebp
+	mov	DWORD [4+esp],ebp
+	xor	ebp,edi
+	ror	esi,11
+	and	eax,ebp
+	lea	edx,[3329325298+edx*1+ebx]
+	xor	esi,ecx
+	xor	eax,edi
+	ror	esi,2
+	add	eax,edx
+	add	edx,DWORD [16+esp]
+	add	eax,esi
+	mov	esi,DWORD [96+esp]
+	xor	ebp,edi
+	mov	ecx,DWORD [12+esp]
+	add	eax,DWORD [esi]
+	add	ebp,DWORD [4+esi]
+	add	edi,DWORD [8+esi]
+	add	ecx,DWORD [12+esi]
+	mov	DWORD [esi],eax
+	mov	DWORD [4+esi],ebp
+	mov	DWORD [8+esi],edi
+	mov	DWORD [12+esi],ecx
+	mov	DWORD [4+esp],ebp
+	xor	ebp,edi
+	mov	DWORD [8+esp],edi
+	mov	DWORD [12+esp],ecx
+	mov	edi,DWORD [20+esp]
+	mov	ebx,DWORD [24+esp]
+	mov	ecx,DWORD [28+esp]
+	add	edx,DWORD [16+esi]
+	add	edi,DWORD [20+esi]
+	add	ebx,DWORD [24+esi]
+	add	ecx,DWORD [28+esi]
+	mov	DWORD [16+esi],edx
+	mov	DWORD [20+esi],edi
+	mov	DWORD [24+esi],ebx
+	mov	DWORD [28+esi],ecx
+	mov	DWORD [20+esp],edi
+	mov	edi,DWORD [100+esp]
+	mov	DWORD [24+esp],ebx
+	mov	DWORD [28+esp],ecx
+	cmp	edi,DWORD [104+esp]
+	jb	NEAR L$009grand_loop
+	mov	esp,DWORD [108+esp]
+	pop	edi
+	pop	esi
+	pop	ebx
+	pop	ebp
+	ret
+align	32
+L$005SSSE3:
+	lea	esp,[esp-96]
+	mov	eax,DWORD [esi]
+	mov	ebx,DWORD [4+esi]
+	mov	ecx,DWORD [8+esi]
+	mov	edi,DWORD [12+esi]
+	mov	DWORD [4+esp],ebx
+	xor	ebx,ecx
+	mov	DWORD [8+esp],ecx
+	mov	DWORD [12+esp],edi
+	mov	edx,DWORD [16+esi]
+	mov	edi,DWORD [20+esi]
+	mov	ecx,DWORD [24+esi]
+	mov	esi,DWORD [28+esi]
+	mov	DWORD [20+esp],edi
+	mov	edi,DWORD [100+esp]
+	mov	DWORD [24+esp],ecx
+	mov	DWORD [28+esp],esi
+	movdqa	xmm7,[256+ebp]
+	jmp	NEAR L$010grand_ssse3
+align	16
+L$010grand_ssse3:
+	movdqu	xmm0,[edi]
+	movdqu	xmm1,[16+edi]
+	movdqu	xmm2,[32+edi]
+	movdqu	xmm3,[48+edi]
+	add	edi,64
+db	102,15,56,0,199
+	mov	DWORD [100+esp],edi
+db	102,15,56,0,207
+	movdqa	xmm4,[ebp]
+db	102,15,56,0,215
+	movdqa	xmm5,[16+ebp]
+	paddd	xmm4,xmm0
+db	102,15,56,0,223
+	movdqa	xmm6,[32+ebp]
+	paddd	xmm5,xmm1
+	movdqa	xmm7,[48+ebp]
+	movdqa	[32+esp],xmm4
+	paddd	xmm6,xmm2
+	movdqa	[48+esp],xmm5
+	paddd	xmm7,xmm3
+	movdqa	[64+esp],xmm6
+	movdqa	[80+esp],xmm7
+	jmp	NEAR L$011ssse3_00_47
+align	16
+L$011ssse3_00_47:
+	add	ebp,64
+	mov	ecx,edx
+	movdqa	xmm4,xmm1
+	ror	edx,14
+	mov	esi,DWORD [20+esp]
+	movdqa	xmm7,xmm3
+	xor	edx,ecx
+	mov	edi,DWORD [24+esp]
+db	102,15,58,15,224,4
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+db	102,15,58,15,250,4
+	mov	DWORD [16+esp],ecx
+	xor	edx,ecx
+	xor	edi,esi
+	movdqa	xmm5,xmm4
+	ror	edx,6
+	mov	ecx,eax
+	movdqa	xmm6,xmm4
+	add	edx,edi
+	mov	edi,DWORD [4+esp]
+	psrld	xmm4,3
+	mov	esi,eax
+	ror	ecx,9
+	paddd	xmm0,xmm7
+	mov	DWORD [esp],eax
+	xor	ecx,eax
+	psrld	xmm6,7
+	xor	eax,edi
+	add	edx,DWORD [28+esp]
+	ror	ecx,11
+	and	ebx,eax
+	pshufd	xmm7,xmm3,250
+	xor	ecx,esi
+	add	edx,DWORD [32+esp]
+	pslld	xmm5,14
+	xor	ebx,edi
+	ror	ecx,2
+	pxor	xmm4,xmm6
+	add	ebx,edx
+	add	edx,DWORD [12+esp]
+	psrld	xmm6,11
+	add	ebx,ecx
+	mov	ecx,edx
+	ror	edx,14
+	pxor	xmm4,xmm5
+	mov	esi,DWORD [16+esp]
+	xor	edx,ecx
+	pslld	xmm5,11
+	mov	edi,DWORD [20+esp]
+	xor	esi,edi
+	ror	edx,5
+	pxor	xmm4,xmm6
+	and	esi,ecx
+	mov	DWORD [12+esp],ecx
+	movdqa	xmm6,xmm7
+	xor	edx,ecx
+	xor	edi,esi
+	ror	edx,6
+	pxor	xmm4,xmm5
+	mov	ecx,ebx
+	add	edx,edi
+	psrld	xmm7,10
+	mov	edi,DWORD [esp]
+	mov	esi,ebx
+	ror	ecx,9
+	paddd	xmm0,xmm4
+	mov	DWORD [28+esp],ebx
+	xor	ecx,ebx
+	psrlq	xmm6,17
+	xor	ebx,edi
+	add	edx,DWORD [24+esp]
+	ror	ecx,11
+	pxor	xmm7,xmm6
+	and	eax,ebx
+	xor	ecx,esi
+	psrlq	xmm6,2
+	add	edx,DWORD [36+esp]
+	xor	eax,edi
+	ror	ecx,2
+	pxor	xmm7,xmm6
+	add	eax,edx
+	add	edx,DWORD [8+esp]
+	pshufd	xmm7,xmm7,128
+	add	eax,ecx
+	mov	ecx,edx
+	ror	edx,14
+	mov	esi,DWORD [12+esp]
+	xor	edx,ecx
+	mov	edi,DWORD [16+esp]
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	psrldq	xmm7,8
+	mov	DWORD [8+esp],ecx
+	xor	edx,ecx
+	xor	edi,esi
+	paddd	xmm0,xmm7
+	ror	edx,6
+	mov	ecx,eax
+	add	edx,edi
+	mov	edi,DWORD [28+esp]
+	mov	esi,eax
+	ror	ecx,9
+	mov	DWORD [24+esp],eax
+	pshufd	xmm7,xmm0,80
+	xor	ecx,eax
+	xor	eax,edi
+	add	edx,DWORD [20+esp]
+	movdqa	xmm6,xmm7
+	ror	ecx,11
+	psrld	xmm7,10
+	and	ebx,eax
+	psrlq	xmm6,17
+	xor	ecx,esi
+	add	edx,DWORD [40+esp]
+	xor	ebx,edi
+	ror	ecx,2
+	pxor	xmm7,xmm6
+	add	ebx,edx
+	add	edx,DWORD [4+esp]
+	psrlq	xmm6,2
+	add	ebx,ecx
+	mov	ecx,edx
+	ror	edx,14
+	pxor	xmm7,xmm6
+	mov	esi,DWORD [8+esp]
+	xor	edx,ecx
+	mov	edi,DWORD [12+esp]
+	pshufd	xmm7,xmm7,8
+	xor	esi,edi
+	ror	edx,5
+	movdqa	xmm6,[ebp]
+	and	esi,ecx
+	mov	DWORD [4+esp],ecx
+	pslldq	xmm7,8
+	xor	edx,ecx
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,ebx
+	add	edx,edi
+	mov	edi,DWORD [24+esp]
+	mov	esi,ebx
+	ror	ecx,9
+	paddd	xmm0,xmm7
+	mov	DWORD [20+esp],ebx
+	xor	ecx,ebx
+	xor	ebx,edi
+	add	edx,DWORD [16+esp]
+	paddd	xmm6,xmm0
+	ror	ecx,11
+	and	eax,ebx
+	xor	ecx,esi
+	add	edx,DWORD [44+esp]
+	xor	eax,edi
+	ror	ecx,2
+	add	eax,edx
+	add	edx,DWORD [esp]
+	add	eax,ecx
+	movdqa	[32+esp],xmm6
+	mov	ecx,edx
+	movdqa	xmm4,xmm2
+	ror	edx,14
+	mov	esi,DWORD [4+esp]
+	movdqa	xmm7,xmm0
+	xor	edx,ecx
+	mov	edi,DWORD [8+esp]
+db	102,15,58,15,225,4
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+db	102,15,58,15,251,4
+	mov	DWORD [esp],ecx
+	xor	edx,ecx
+	xor	edi,esi
+	movdqa	xmm5,xmm4
+	ror	edx,6
+	mov	ecx,eax
+	movdqa	xmm6,xmm4
+	add	edx,edi
+	mov	edi,DWORD [20+esp]
+	psrld	xmm4,3
+	mov	esi,eax
+	ror	ecx,9
+	paddd	xmm1,xmm7
+	mov	DWORD [16+esp],eax
+	xor	ecx,eax
+	psrld	xmm6,7
+	xor	eax,edi
+	add	edx,DWORD [12+esp]
+	ror	ecx,11
+	and	ebx,eax
+	pshufd	xmm7,xmm0,250
+	xor	ecx,esi
+	add	edx,DWORD [48+esp]
+	pslld	xmm5,14
+	xor	ebx,edi
+	ror	ecx,2
+	pxor	xmm4,xmm6
+	add	ebx,edx
+	add	edx,DWORD [28+esp]
+	psrld	xmm6,11
+	add	ebx,ecx
+	mov	ecx,edx
+	ror	edx,14
+	pxor	xmm4,xmm5
+	mov	esi,DWORD [esp]
+	xor	edx,ecx
+	pslld	xmm5,11
+	mov	edi,DWORD [4+esp]
+	xor	esi,edi
+	ror	edx,5
+	pxor	xmm4,xmm6
+	and	esi,ecx
+	mov	DWORD [28+esp],ecx
+	movdqa	xmm6,xmm7
+	xor	edx,ecx
+	xor	edi,esi
+	ror	edx,6
+	pxor	xmm4,xmm5
+	mov	ecx,ebx
+	add	edx,edi
+	psrld	xmm7,10
+	mov	edi,DWORD [16+esp]
+	mov	esi,ebx
+	ror	ecx,9
+	paddd	xmm1,xmm4
+	mov	DWORD [12+esp],ebx
+	xor	ecx,ebx
+	psrlq	xmm6,17
+	xor	ebx,edi
+	add	edx,DWORD [8+esp]
+	ror	ecx,11
+	pxor	xmm7,xmm6
+	and	eax,ebx
+	xor	ecx,esi
+	psrlq	xmm6,2
+	add	edx,DWORD [52+esp]
+	xor	eax,edi
+	ror	ecx,2
+	pxor	xmm7,xmm6
+	add	eax,edx
+	add	edx,DWORD [24+esp]
+	pshufd	xmm7,xmm7,128
+	add	eax,ecx
+	mov	ecx,edx
+	ror	edx,14
+	mov	esi,DWORD [28+esp]
+	xor	edx,ecx
+	mov	edi,DWORD [esp]
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	psrldq	xmm7,8
+	mov	DWORD [24+esp],ecx
+	xor	edx,ecx
+	xor	edi,esi
+	paddd	xmm1,xmm7
+	ror	edx,6
+	mov	ecx,eax
+	add	edx,edi
+	mov	edi,DWORD [12+esp]
+	mov	esi,eax
+	ror	ecx,9
+	mov	DWORD [8+esp],eax
+	pshufd	xmm7,xmm1,80
+	xor	ecx,eax
+	xor	eax,edi
+	add	edx,DWORD [4+esp]
+	movdqa	xmm6,xmm7
+	ror	ecx,11
+	psrld	xmm7,10
+	and	ebx,eax
+	psrlq	xmm6,17
+	xor	ecx,esi
+	add	edx,DWORD [56+esp]
+	xor	ebx,edi
+	ror	ecx,2
+	pxor	xmm7,xmm6
+	add	ebx,edx
+	add	edx,DWORD [20+esp]
+	psrlq	xmm6,2
+	add	ebx,ecx
+	mov	ecx,edx
+	ror	edx,14
+	pxor	xmm7,xmm6
+	mov	esi,DWORD [24+esp]
+	xor	edx,ecx
+	mov	edi,DWORD [28+esp]
+	pshufd	xmm7,xmm7,8
+	xor	esi,edi
+	ror	edx,5
+	movdqa	xmm6,[16+ebp]
+	and	esi,ecx
+	mov	DWORD [20+esp],ecx
+	pslldq	xmm7,8
+	xor	edx,ecx
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,ebx
+	add	edx,edi
+	mov	edi,DWORD [8+esp]
+	mov	esi,ebx
+	ror	ecx,9
+	paddd	xmm1,xmm7
+	mov	DWORD [4+esp],ebx
+	xor	ecx,ebx
+	xor	ebx,edi
+	add	edx,DWORD [esp]
+	paddd	xmm6,xmm1
+	ror	ecx,11
+	and	eax,ebx
+	xor	ecx,esi
+	add	edx,DWORD [60+esp]
+	xor	eax,edi
+	ror	ecx,2
+	add	eax,edx
+	add	edx,DWORD [16+esp]
+	add	eax,ecx
+	movdqa	[48+esp],xmm6
+	mov	ecx,edx
+	movdqa	xmm4,xmm3
+	ror	edx,14
+	mov	esi,DWORD [20+esp]
+	movdqa	xmm7,xmm1
+	xor	edx,ecx
+	mov	edi,DWORD [24+esp]
+db	102,15,58,15,226,4
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+db	102,15,58,15,248,4
+	mov	DWORD [16+esp],ecx
+	xor	edx,ecx
+	xor	edi,esi
+	movdqa	xmm5,xmm4
+	ror	edx,6
+	mov	ecx,eax
+	movdqa	xmm6,xmm4
+	add	edx,edi
+	mov	edi,DWORD [4+esp]
+	psrld	xmm4,3
+	mov	esi,eax
+	ror	ecx,9
+	paddd	xmm2,xmm7
+	mov	DWORD [esp],eax
+	xor	ecx,eax
+	psrld	xmm6,7
+	xor	eax,edi
+	add	edx,DWORD [28+esp]
+	ror	ecx,11
+	and	ebx,eax
+	pshufd	xmm7,xmm1,250
+	xor	ecx,esi
+	add	edx,DWORD [64+esp]
+	pslld	xmm5,14
+	xor	ebx,edi
+	ror	ecx,2
+	pxor	xmm4,xmm6
+	add	ebx,edx
+	add	edx,DWORD [12+esp]
+	psrld	xmm6,11
+	add	ebx,ecx
+	mov	ecx,edx
+	ror	edx,14
+	pxor	xmm4,xmm5
+	mov	esi,DWORD [16+esp]
+	xor	edx,ecx
+	pslld	xmm5,11
+	mov	edi,DWORD [20+esp]
+	xor	esi,edi
+	ror	edx,5
+	pxor	xmm4,xmm6
+	and	esi,ecx
+	mov	DWORD [12+esp],ecx
+	movdqa	xmm6,xmm7
+	xor	edx,ecx
+	xor	edi,esi
+	ror	edx,6
+	pxor	xmm4,xmm5
+	mov	ecx,ebx
+	add	edx,edi
+	psrld	xmm7,10
+	mov	edi,DWORD [esp]
+	mov	esi,ebx
+	ror	ecx,9
+	paddd	xmm2,xmm4
+	mov	DWORD [28+esp],ebx
+	xor	ecx,ebx
+	psrlq	xmm6,17
+	xor	ebx,edi
+	add	edx,DWORD [24+esp]
+	ror	ecx,11
+	pxor	xmm7,xmm6
+	and	eax,ebx
+	xor	ecx,esi
+	psrlq	xmm6,2
+	add	edx,DWORD [68+esp]
+	xor	eax,edi
+	ror	ecx,2
+	pxor	xmm7,xmm6
+	add	eax,edx
+	add	edx,DWORD [8+esp]
+	pshufd	xmm7,xmm7,128
+	add	eax,ecx
+	mov	ecx,edx
+	ror	edx,14
+	mov	esi,DWORD [12+esp]
+	xor	edx,ecx
+	mov	edi,DWORD [16+esp]
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	psrldq	xmm7,8
+	mov	DWORD [8+esp],ecx
+	xor	edx,ecx
+	xor	edi,esi
+	paddd	xmm2,xmm7
+	ror	edx,6
+	mov	ecx,eax
+	add	edx,edi
+	mov	edi,DWORD [28+esp]
+	mov	esi,eax
+	ror	ecx,9
+	mov	DWORD [24+esp],eax
+	pshufd	xmm7,xmm2,80
+	xor	ecx,eax
+	xor	eax,edi
+	add	edx,DWORD [20+esp]
+	movdqa	xmm6,xmm7
+	ror	ecx,11
+	psrld	xmm7,10
+	and	ebx,eax
+	psrlq	xmm6,17
+	xor	ecx,esi
+	add	edx,DWORD [72+esp]
+	xor	ebx,edi
+	ror	ecx,2
+	pxor	xmm7,xmm6
+	add	ebx,edx
+	add	edx,DWORD [4+esp]
+	psrlq	xmm6,2
+	add	ebx,ecx
+	mov	ecx,edx
+	ror	edx,14
+	pxor	xmm7,xmm6
+	mov	esi,DWORD [8+esp]
+	xor	edx,ecx
+	mov	edi,DWORD [12+esp]
+	pshufd	xmm7,xmm7,8
+	xor	esi,edi
+	ror	edx,5
+	movdqa	xmm6,[32+ebp]
+	and	esi,ecx
+	mov	DWORD [4+esp],ecx
+	pslldq	xmm7,8
+	xor	edx,ecx
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,ebx
+	add	edx,edi
+	mov	edi,DWORD [24+esp]
+	mov	esi,ebx
+	ror	ecx,9
+	paddd	xmm2,xmm7
+	mov	DWORD [20+esp],ebx
+	xor	ecx,ebx
+	xor	ebx,edi
+	add	edx,DWORD [16+esp]
+	paddd	xmm6,xmm2
+	ror	ecx,11
+	and	eax,ebx
+	xor	ecx,esi
+	add	edx,DWORD [76+esp]
+	xor	eax,edi
+	ror	ecx,2
+	add	eax,edx
+	add	edx,DWORD [esp]
+	add	eax,ecx
+	movdqa	[64+esp],xmm6
+	mov	ecx,edx
+	movdqa	xmm4,xmm0
+	ror	edx,14
+	mov	esi,DWORD [4+esp]
+	movdqa	xmm7,xmm2
+	xor	edx,ecx
+	mov	edi,DWORD [8+esp]
+db	102,15,58,15,227,4
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+db	102,15,58,15,249,4
+	mov	DWORD [esp],ecx
+	xor	edx,ecx
+	xor	edi,esi
+	movdqa	xmm5,xmm4
+	ror	edx,6
+	mov	ecx,eax
+	movdqa	xmm6,xmm4
+	add	edx,edi
+	mov	edi,DWORD [20+esp]
+	psrld	xmm4,3
+	mov	esi,eax
+	ror	ecx,9
+	paddd	xmm3,xmm7
+	mov	DWORD [16+esp],eax
+	xor	ecx,eax
+	psrld	xmm6,7
+	xor	eax,edi
+	add	edx,DWORD [12+esp]
+	ror	ecx,11
+	and	ebx,eax
+	pshufd	xmm7,xmm2,250
+	xor	ecx,esi
+	add	edx,DWORD [80+esp]
+	pslld	xmm5,14
+	xor	ebx,edi
+	ror	ecx,2
+	pxor	xmm4,xmm6
+	add	ebx,edx
+	add	edx,DWORD [28+esp]
+	psrld	xmm6,11
+	add	ebx,ecx
+	mov	ecx,edx
+	ror	edx,14
+	pxor	xmm4,xmm5
+	mov	esi,DWORD [esp]
+	xor	edx,ecx
+	pslld	xmm5,11
+	mov	edi,DWORD [4+esp]
+	xor	esi,edi
+	ror	edx,5
+	pxor	xmm4,xmm6
+	and	esi,ecx
+	mov	DWORD [28+esp],ecx
+	movdqa	xmm6,xmm7
+	xor	edx,ecx
+	xor	edi,esi
+	ror	edx,6
+	pxor	xmm4,xmm5
+	mov	ecx,ebx
+	add	edx,edi
+	psrld	xmm7,10
+	mov	edi,DWORD [16+esp]
+	mov	esi,ebx
+	ror	ecx,9
+	paddd	xmm3,xmm4
+	mov	DWORD [12+esp],ebx
+	xor	ecx,ebx
+	psrlq	xmm6,17
+	xor	ebx,edi
+	add	edx,DWORD [8+esp]
+	ror	ecx,11
+	pxor	xmm7,xmm6
+	and	eax,ebx
+	xor	ecx,esi
+	psrlq	xmm6,2
+	add	edx,DWORD [84+esp]
+	xor	eax,edi
+	ror	ecx,2
+	pxor	xmm7,xmm6
+	add	eax,edx
+	add	edx,DWORD [24+esp]
+	pshufd	xmm7,xmm7,128
+	add	eax,ecx
+	mov	ecx,edx
+	ror	edx,14
+	mov	esi,DWORD [28+esp]
+	xor	edx,ecx
+	mov	edi,DWORD [esp]
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	psrldq	xmm7,8
+	mov	DWORD [24+esp],ecx
+	xor	edx,ecx
+	xor	edi,esi
+	paddd	xmm3,xmm7
+	ror	edx,6
+	mov	ecx,eax
+	add	edx,edi
+	mov	edi,DWORD [12+esp]
+	mov	esi,eax
+	ror	ecx,9
+	mov	DWORD [8+esp],eax
+	pshufd	xmm7,xmm3,80
+	xor	ecx,eax
+	xor	eax,edi
+	add	edx,DWORD [4+esp]
+	movdqa	xmm6,xmm7
+	ror	ecx,11
+	psrld	xmm7,10
+	and	ebx,eax
+	psrlq	xmm6,17
+	xor	ecx,esi
+	add	edx,DWORD [88+esp]
+	xor	ebx,edi
+	ror	ecx,2
+	pxor	xmm7,xmm6
+	add	ebx,edx
+	add	edx,DWORD [20+esp]
+	psrlq	xmm6,2
+	add	ebx,ecx
+	mov	ecx,edx
+	ror	edx,14
+	pxor	xmm7,xmm6
+	mov	esi,DWORD [24+esp]
+	xor	edx,ecx
+	mov	edi,DWORD [28+esp]
+	pshufd	xmm7,xmm7,8
+	xor	esi,edi
+	ror	edx,5
+	movdqa	xmm6,[48+ebp]
+	and	esi,ecx
+	mov	DWORD [20+esp],ecx
+	pslldq	xmm7,8
+	xor	edx,ecx
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,ebx
+	add	edx,edi
+	mov	edi,DWORD [8+esp]
+	mov	esi,ebx
+	ror	ecx,9
+	paddd	xmm3,xmm7
+	mov	DWORD [4+esp],ebx
+	xor	ecx,ebx
+	xor	ebx,edi
+	add	edx,DWORD [esp]
+	paddd	xmm6,xmm3
+	ror	ecx,11
+	and	eax,ebx
+	xor	ecx,esi
+	add	edx,DWORD [92+esp]
+	xor	eax,edi
+	ror	ecx,2
+	add	eax,edx
+	add	edx,DWORD [16+esp]
+	add	eax,ecx
+	movdqa	[80+esp],xmm6
+	cmp	DWORD [64+ebp],66051
+	jne	NEAR L$011ssse3_00_47
+	mov	ecx,edx
+	ror	edx,14
+	mov	esi,DWORD [20+esp]
+	xor	edx,ecx
+	mov	edi,DWORD [24+esp]
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [16+esp],ecx
+	xor	edx,ecx
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,eax
+	add	edx,edi
+	mov	edi,DWORD [4+esp]
+	mov	esi,eax
+	ror	ecx,9
+	mov	DWORD [esp],eax
+	xor	ecx,eax
+	xor	eax,edi
+	add	edx,DWORD [28+esp]
+	ror	ecx,11
+	and	ebx,eax
+	xor	ecx,esi
+	add	edx,DWORD [32+esp]
+	xor	ebx,edi
+	ror	ecx,2
+	add	ebx,edx
+	add	edx,DWORD [12+esp]
+	add	ebx,ecx
+	mov	ecx,edx
+	ror	edx,14
+	mov	esi,DWORD [16+esp]
+	xor	edx,ecx
+	mov	edi,DWORD [20+esp]
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [12+esp],ecx
+	xor	edx,ecx
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,ebx
+	add	edx,edi
+	mov	edi,DWORD [esp]
+	mov	esi,ebx
+	ror	ecx,9
+	mov	DWORD [28+esp],ebx
+	xor	ecx,ebx
+	xor	ebx,edi
+	add	edx,DWORD [24+esp]
+	ror	ecx,11
+	and	eax,ebx
+	xor	ecx,esi
+	add	edx,DWORD [36+esp]
+	xor	eax,edi
+	ror	ecx,2
+	add	eax,edx
+	add	edx,DWORD [8+esp]
+	add	eax,ecx
+	mov	ecx,edx
+	ror	edx,14
+	mov	esi,DWORD [12+esp]
+	xor	edx,ecx
+	mov	edi,DWORD [16+esp]
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [8+esp],ecx
+	xor	edx,ecx
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,eax
+	add	edx,edi
+	mov	edi,DWORD [28+esp]
+	mov	esi,eax
+	ror	ecx,9
+	mov	DWORD [24+esp],eax
+	xor	ecx,eax
+	xor	eax,edi
+	add	edx,DWORD [20+esp]
+	ror	ecx,11
+	and	ebx,eax
+	xor	ecx,esi
+	add	edx,DWORD [40+esp]
+	xor	ebx,edi
+	ror	ecx,2
+	add	ebx,edx
+	add	edx,DWORD [4+esp]
+	add	ebx,ecx
+	mov	ecx,edx
+	ror	edx,14
+	mov	esi,DWORD [8+esp]
+	xor	edx,ecx
+	mov	edi,DWORD [12+esp]
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [4+esp],ecx
+	xor	edx,ecx
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,ebx
+	add	edx,edi
+	mov	edi,DWORD [24+esp]
+	mov	esi,ebx
+	ror	ecx,9
+	mov	DWORD [20+esp],ebx
+	xor	ecx,ebx
+	xor	ebx,edi
+	add	edx,DWORD [16+esp]
+	ror	ecx,11
+	and	eax,ebx
+	xor	ecx,esi
+	add	edx,DWORD [44+esp]
+	xor	eax,edi
+	ror	ecx,2
+	add	eax,edx
+	add	edx,DWORD [esp]
+	add	eax,ecx
+	mov	ecx,edx
+	ror	edx,14
+	mov	esi,DWORD [4+esp]
+	xor	edx,ecx
+	mov	edi,DWORD [8+esp]
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [esp],ecx
+	xor	edx,ecx
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,eax
+	add	edx,edi
+	mov	edi,DWORD [20+esp]
+	mov	esi,eax
+	ror	ecx,9
+	mov	DWORD [16+esp],eax
+	xor	ecx,eax
+	xor	eax,edi
+	add	edx,DWORD [12+esp]
+	ror	ecx,11
+	and	ebx,eax
+	xor	ecx,esi
+	add	edx,DWORD [48+esp]
+	xor	ebx,edi
+	ror	ecx,2
+	add	ebx,edx
+	add	edx,DWORD [28+esp]
+	add	ebx,ecx
+	mov	ecx,edx
+	ror	edx,14
+	mov	esi,DWORD [esp]
+	xor	edx,ecx
+	mov	edi,DWORD [4+esp]
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [28+esp],ecx
+	xor	edx,ecx
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,ebx
+	add	edx,edi
+	mov	edi,DWORD [16+esp]
+	mov	esi,ebx
+	ror	ecx,9
+	mov	DWORD [12+esp],ebx
+	xor	ecx,ebx
+	xor	ebx,edi
+	add	edx,DWORD [8+esp]
+	ror	ecx,11
+	and	eax,ebx
+	xor	ecx,esi
+	add	edx,DWORD [52+esp]
+	xor	eax,edi
+	ror	ecx,2
+	add	eax,edx
+	add	edx,DWORD [24+esp]
+	add	eax,ecx
+	mov	ecx,edx
+	ror	edx,14
+	mov	esi,DWORD [28+esp]
+	xor	edx,ecx
+	mov	edi,DWORD [esp]
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [24+esp],ecx
+	xor	edx,ecx
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,eax
+	add	edx,edi
+	mov	edi,DWORD [12+esp]
+	mov	esi,eax
+	ror	ecx,9
+	mov	DWORD [8+esp],eax
+	xor	ecx,eax
+	xor	eax,edi
+	add	edx,DWORD [4+esp]
+	ror	ecx,11
+	and	ebx,eax
+	xor	ecx,esi
+	add	edx,DWORD [56+esp]
+	xor	ebx,edi
+	ror	ecx,2
+	add	ebx,edx
+	add	edx,DWORD [20+esp]
+	add	ebx,ecx
+	mov	ecx,edx
+	ror	edx,14
+	mov	esi,DWORD [24+esp]
+	xor	edx,ecx
+	mov	edi,DWORD [28+esp]
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [20+esp],ecx
+	xor	edx,ecx
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,ebx
+	add	edx,edi
+	mov	edi,DWORD [8+esp]
+	mov	esi,ebx
+	ror	ecx,9
+	mov	DWORD [4+esp],ebx
+	xor	ecx,ebx
+	xor	ebx,edi
+	add	edx,DWORD [esp]
+	ror	ecx,11
+	and	eax,ebx
+	xor	ecx,esi
+	add	edx,DWORD [60+esp]
+	xor	eax,edi
+	ror	ecx,2
+	add	eax,edx
+	add	edx,DWORD [16+esp]
+	add	eax,ecx
+	mov	ecx,edx
+	ror	edx,14
+	mov	esi,DWORD [20+esp]
+	xor	edx,ecx
+	mov	edi,DWORD [24+esp]
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [16+esp],ecx
+	xor	edx,ecx
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,eax
+	add	edx,edi
+	mov	edi,DWORD [4+esp]
+	mov	esi,eax
+	ror	ecx,9
+	mov	DWORD [esp],eax
+	xor	ecx,eax
+	xor	eax,edi
+	add	edx,DWORD [28+esp]
+	ror	ecx,11
+	and	ebx,eax
+	xor	ecx,esi
+	add	edx,DWORD [64+esp]
+	xor	ebx,edi
+	ror	ecx,2
+	add	ebx,edx
+	add	edx,DWORD [12+esp]
+	add	ebx,ecx
+	mov	ecx,edx
+	ror	edx,14
+	mov	esi,DWORD [16+esp]
+	xor	edx,ecx
+	mov	edi,DWORD [20+esp]
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [12+esp],ecx
+	xor	edx,ecx
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,ebx
+	add	edx,edi
+	mov	edi,DWORD [esp]
+	mov	esi,ebx
+	ror	ecx,9
+	mov	DWORD [28+esp],ebx
+	xor	ecx,ebx
+	xor	ebx,edi
+	add	edx,DWORD [24+esp]
+	ror	ecx,11
+	and	eax,ebx
+	xor	ecx,esi
+	add	edx,DWORD [68+esp]
+	xor	eax,edi
+	ror	ecx,2
+	add	eax,edx
+	add	edx,DWORD [8+esp]
+	add	eax,ecx
+	mov	ecx,edx
+	ror	edx,14
+	mov	esi,DWORD [12+esp]
+	xor	edx,ecx
+	mov	edi,DWORD [16+esp]
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [8+esp],ecx
+	xor	edx,ecx
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,eax
+	add	edx,edi
+	mov	edi,DWORD [28+esp]
+	mov	esi,eax
+	ror	ecx,9
+	mov	DWORD [24+esp],eax
+	xor	ecx,eax
+	xor	eax,edi
+	add	edx,DWORD [20+esp]
+	ror	ecx,11
+	and	ebx,eax
+	xor	ecx,esi
+	add	edx,DWORD [72+esp]
+	xor	ebx,edi
+	ror	ecx,2
+	add	ebx,edx
+	add	edx,DWORD [4+esp]
+	add	ebx,ecx
+	mov	ecx,edx
+	ror	edx,14
+	mov	esi,DWORD [8+esp]
+	xor	edx,ecx
+	mov	edi,DWORD [12+esp]
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [4+esp],ecx
+	xor	edx,ecx
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,ebx
+	add	edx,edi
+	mov	edi,DWORD [24+esp]
+	mov	esi,ebx
+	ror	ecx,9
+	mov	DWORD [20+esp],ebx
+	xor	ecx,ebx
+	xor	ebx,edi
+	add	edx,DWORD [16+esp]
+	ror	ecx,11
+	and	eax,ebx
+	xor	ecx,esi
+	add	edx,DWORD [76+esp]
+	xor	eax,edi
+	ror	ecx,2
+	add	eax,edx
+	add	edx,DWORD [esp]
+	add	eax,ecx
+	mov	ecx,edx
+	ror	edx,14
+	mov	esi,DWORD [4+esp]
+	xor	edx,ecx
+	mov	edi,DWORD [8+esp]
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [esp],ecx
+	xor	edx,ecx
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,eax
+	add	edx,edi
+	mov	edi,DWORD [20+esp]
+	mov	esi,eax
+	ror	ecx,9
+	mov	DWORD [16+esp],eax
+	xor	ecx,eax
+	xor	eax,edi
+	add	edx,DWORD [12+esp]
+	ror	ecx,11
+	and	ebx,eax
+	xor	ecx,esi
+	add	edx,DWORD [80+esp]
+	xor	ebx,edi
+	ror	ecx,2
+	add	ebx,edx
+	add	edx,DWORD [28+esp]
+	add	ebx,ecx
+	mov	ecx,edx
+	ror	edx,14
+	mov	esi,DWORD [esp]
+	xor	edx,ecx
+	mov	edi,DWORD [4+esp]
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [28+esp],ecx
+	xor	edx,ecx
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,ebx
+	add	edx,edi
+	mov	edi,DWORD [16+esp]
+	mov	esi,ebx
+	ror	ecx,9
+	mov	DWORD [12+esp],ebx
+	xor	ecx,ebx
+	xor	ebx,edi
+	add	edx,DWORD [8+esp]
+	ror	ecx,11
+	and	eax,ebx
+	xor	ecx,esi
+	add	edx,DWORD [84+esp]
+	xor	eax,edi
+	ror	ecx,2
+	add	eax,edx
+	add	edx,DWORD [24+esp]
+	add	eax,ecx
+	mov	ecx,edx
+	ror	edx,14
+	mov	esi,DWORD [28+esp]
+	xor	edx,ecx
+	mov	edi,DWORD [esp]
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [24+esp],ecx
+	xor	edx,ecx
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,eax
+	add	edx,edi
+	mov	edi,DWORD [12+esp]
+	mov	esi,eax
+	ror	ecx,9
+	mov	DWORD [8+esp],eax
+	xor	ecx,eax
+	xor	eax,edi
+	add	edx,DWORD [4+esp]
+	ror	ecx,11
+	and	ebx,eax
+	xor	ecx,esi
+	add	edx,DWORD [88+esp]
+	xor	ebx,edi
+	ror	ecx,2
+	add	ebx,edx
+	add	edx,DWORD [20+esp]
+	add	ebx,ecx
+	mov	ecx,edx
+	ror	edx,14
+	mov	esi,DWORD [24+esp]
+	xor	edx,ecx
+	mov	edi,DWORD [28+esp]
+	xor	esi,edi
+	ror	edx,5
+	and	esi,ecx
+	mov	DWORD [20+esp],ecx
+	xor	edx,ecx
+	xor	edi,esi
+	ror	edx,6
+	mov	ecx,ebx
+	add	edx,edi
+	mov	edi,DWORD [8+esp]
+	mov	esi,ebx
+	ror	ecx,9
+	mov	DWORD [4+esp],ebx
+	xor	ecx,ebx
+	xor	ebx,edi
+	add	edx,DWORD [esp]
+	ror	ecx,11
+	and	eax,ebx
+	xor	ecx,esi
+	add	edx,DWORD [92+esp]
+	xor	eax,edi
+	ror	ecx,2
+	add	eax,edx
+	add	edx,DWORD [16+esp]
+	add	eax,ecx
+	mov	esi,DWORD [96+esp]
+	xor	ebx,edi
+	mov	ecx,DWORD [12+esp]
+	add	eax,DWORD [esi]
+	add	ebx,DWORD [4+esi]
+	add	edi,DWORD [8+esi]
+	add	ecx,DWORD [12+esi]
+	mov	DWORD [esi],eax
+	mov	DWORD [4+esi],ebx
+	mov	DWORD [8+esi],edi
+	mov	DWORD [12+esi],ecx
+	mov	DWORD [4+esp],ebx
+	xor	ebx,edi
+	mov	DWORD [8+esp],edi
+	mov	DWORD [12+esp],ecx
+	mov	edi,DWORD [20+esp]
+	mov	ecx,DWORD [24+esp]
+	add	edx,DWORD [16+esi]
+	add	edi,DWORD [20+esi]
+	add	ecx,DWORD [24+esi]
+	mov	DWORD [16+esi],edx
+	mov	DWORD [20+esi],edi
+	mov	DWORD [20+esp],edi
+	mov	edi,DWORD [28+esp]
+	mov	DWORD [24+esi],ecx
+	add	edi,DWORD [28+esi]
+	mov	DWORD [24+esp],ecx
+	mov	DWORD [28+esi],edi
+	mov	DWORD [28+esp],edi
+	mov	edi,DWORD [100+esp]
+	movdqa	xmm7,[64+ebp]
+	sub	ebp,192
+	cmp	edi,DWORD [104+esp]
+	jb	NEAR L$010grand_ssse3
+	mov	esp,DWORD [108+esp]
+	pop	edi
+	pop	esi
+	pop	ebx
+	pop	ebp
+	ret
+align	32
+L$004AVX:
+	lea	esp,[esp-96]
+	vzeroall
+	mov	eax,DWORD [esi]
+	mov	ebx,DWORD [4+esi]
+	mov	ecx,DWORD [8+esi]
+	mov	edi,DWORD [12+esi]
+	mov	DWORD [4+esp],ebx
+	xor	ebx,ecx
+	mov	DWORD [8+esp],ecx
+	mov	DWORD [12+esp],edi
+	mov	edx,DWORD [16+esi]
+	mov	edi,DWORD [20+esi]
+	mov	ecx,DWORD [24+esi]
+	mov	esi,DWORD [28+esi]
+	mov	DWORD [20+esp],edi
+	mov	edi,DWORD [100+esp]
+	mov	DWORD [24+esp],ecx
+	mov	DWORD [28+esp],esi
+	vmovdqa	xmm7,[256+ebp]
+	jmp	NEAR L$012grand_avx
+align	32
+L$012grand_avx:
+	vmovdqu	xmm0,[edi]
+	vmovdqu	xmm1,[16+edi]
+	vmovdqu	xmm2,[32+edi]
+	vmovdqu	xmm3,[48+edi]
+	add	edi,64
+	vpshufb	xmm0,xmm0,xmm7
+	mov	DWORD [100+esp],edi
+	vpshufb	xmm1,xmm1,xmm7
+	vpshufb	xmm2,xmm2,xmm7
+	vpaddd	xmm4,xmm0,[ebp]
+	vpshufb	xmm3,xmm3,xmm7
+	vpaddd	xmm5,xmm1,[16+ebp]
+	vpaddd	xmm6,xmm2,[32+ebp]
+	vpaddd	xmm7,xmm3,[48+ebp]
+	vmovdqa	[32+esp],xmm4
+	vmovdqa	[48+esp],xmm5
+	vmovdqa	[64+esp],xmm6
+	vmovdqa	[80+esp],xmm7
+	jmp	NEAR L$013avx_00_47
+align	16
+L$013avx_00_47:
+	add	ebp,64
+	vpalignr	xmm4,xmm1,xmm0,4
+	mov	ecx,edx
+	shrd	edx,edx,14
+	mov	esi,DWORD [20+esp]
+	vpalignr	xmm7,xmm3,xmm2,4
+	xor	edx,ecx
+	mov	edi,DWORD [24+esp]
+	xor	esi,edi
+	vpsrld	xmm6,xmm4,7
+	shrd	edx,edx,5
+	and	esi,ecx
+	mov	DWORD [16+esp],ecx
+	vpaddd	xmm0,xmm0,xmm7
+	xor	edx,ecx
+	xor	edi,esi
+	shrd	edx,edx,6
+	vpsrld	xmm7,xmm4,3
+	mov	ecx,eax
+	add	edx,edi
+	mov	edi,DWORD [4+esp]
+	vpslld	xmm5,xmm4,14
+	mov	esi,eax
+	shrd	ecx,ecx,9
+	mov	DWORD [esp],eax
+	vpxor	xmm4,xmm7,xmm6
+	xor	ecx,eax
+	xor	eax,edi
+	add	edx,DWORD [28+esp]
+	vpshufd	xmm7,xmm3,250
+	shrd	ecx,ecx,11
+	and	ebx,eax
+	xor	ecx,esi
+	vpsrld	xmm6,xmm6,11
+	add	edx,DWORD [32+esp]
+	xor	ebx,edi
+	shrd	ecx,ecx,2
+	vpxor	xmm4,xmm4,xmm5
+	add	ebx,edx
+	add	edx,DWORD [12+esp]
+	add	ebx,ecx
+	vpslld	xmm5,xmm5,11
+	mov	ecx,edx
+	shrd	edx,edx,14
+	mov	esi,DWORD [16+esp]
+	vpxor	xmm4,xmm4,xmm6
+	xor	edx,ecx
+	mov	edi,DWORD [20+esp]
+	xor	esi,edi
+	vpsrld	xmm6,xmm7,10
+	shrd	edx,edx,5
+	and	esi,ecx
+	mov	DWORD [12+esp],ecx
+	vpxor	xmm4,xmm4,xmm5
+	xor	edx,ecx
+	xor	edi,esi
+	shrd	edx,edx,6
+	vpsrlq	xmm5,xmm7,17
+	mov	ecx,ebx
+	add	edx,edi
+	mov	edi,DWORD [esp]
+	vpaddd	xmm0,xmm0,xmm4
+	mov	esi,ebx
+	shrd	ecx,ecx,9
+	mov	DWORD [28+esp],ebx
+	vpxor	xmm6,xmm6,xmm5
+	xor	ecx,ebx
+	xor	ebx,edi
+	add	edx,DWORD [24+esp]
+	vpsrlq	xmm7,xmm7,19
+	shrd	ecx,ecx,11
+	and	eax,ebx
+	xor	ecx,esi
+	vpxor	xmm6,xmm6,xmm7
+	add	edx,DWORD [36+esp]
+	xor	eax,edi
+	shrd	ecx,ecx,2
+	vpshufd	xmm7,xmm6,132
+	add	eax,edx
+	add	edx,DWORD [8+esp]
+	add	eax,ecx
+	vpsrldq	xmm7,xmm7,8
+	mov	ecx,edx
+	shrd	edx,edx,14
+	mov	esi,DWORD [12+esp]
+	vpaddd	xmm0,xmm0,xmm7
+	xor	edx,ecx
+	mov	edi,DWORD [16+esp]
+	xor	esi,edi
+	vpshufd	xmm7,xmm0,80
+	shrd	edx,edx,5
+	and	esi,ecx
+	mov	DWORD [8+esp],ecx
+	vpsrld	xmm6,xmm7,10
+	xor	edx,ecx
+	xor	edi,esi
+	shrd	edx,edx,6
+	vpsrlq	xmm5,xmm7,17
+	mov	ecx,eax
+	add	edx,edi
+	mov	edi,DWORD [28+esp]
+	vpxor	xmm6,xmm6,xmm5
+	mov	esi,eax
+	shrd	ecx,ecx,9
+	mov	DWORD [24+esp],eax
+	vpsrlq	xmm7,xmm7,19
+	xor	ecx,eax
+	xor	eax,edi
+	add	edx,DWORD [20+esp]
+	vpxor	xmm6,xmm6,xmm7
+	shrd	ecx,ecx,11
+	and	ebx,eax
+	xor	ecx,esi
+	vpshufd	xmm7,xmm6,232
+	add	edx,DWORD [40+esp]
+	xor	ebx,edi
+	shrd	ecx,ecx,2
+	vpslldq	xmm7,xmm7,8
+	add	ebx,edx
+	add	edx,DWORD [4+esp]
+	add	ebx,ecx
+	vpaddd	xmm0,xmm0,xmm7
+	mov	ecx,edx
+	shrd	edx,edx,14
+	mov	esi,DWORD [8+esp]
+	vpaddd	xmm6,xmm0,[ebp]
+	xor	edx,ecx
+	mov	edi,DWORD [12+esp]
+	xor	esi,edi
+	shrd	edx,edx,5
+	and	esi,ecx
+	mov	DWORD [4+esp],ecx
+	xor	edx,ecx
+	xor	edi,esi
+	shrd	edx,edx,6
+	mov	ecx,ebx
+	add	edx,edi
+	mov	edi,DWORD [24+esp]
+	mov	esi,ebx
+	shrd	ecx,ecx,9
+	mov	DWORD [20+esp],ebx
+	xor	ecx,ebx
+	xor	ebx,edi
+	add	edx,DWORD [16+esp]
+	shrd	ecx,ecx,11
+	and	eax,ebx
+	xor	ecx,esi
+	add	edx,DWORD [44+esp]
+	xor	eax,edi
+	shrd	ecx,ecx,2
+	add	eax,edx
+	add	edx,DWORD [esp]
+	add	eax,ecx
+	vmovdqa	[32+esp],xmm6
+	vpalignr	xmm4,xmm2,xmm1,4
+	mov	ecx,edx
+	shrd	edx,edx,14
+	mov	esi,DWORD [4+esp]
+	vpalignr	xmm7,xmm0,xmm3,4
+	xor	edx,ecx
+	mov	edi,DWORD [8+esp]
+	xor	esi,edi
+	vpsrld	xmm6,xmm4,7
+	shrd	edx,edx,5
+	and	esi,ecx
+	mov	DWORD [esp],ecx
+	vpaddd	xmm1,xmm1,xmm7
+	xor	edx,ecx
+	xor	edi,esi
+	shrd	edx,edx,6
+	vpsrld	xmm7,xmm4,3
+	mov	ecx,eax
+	add	edx,edi
+	mov	edi,DWORD [20+esp]
+	vpslld	xmm5,xmm4,14
+	mov	esi,eax
+	shrd	ecx,ecx,9
+	mov	DWORD [16+esp],eax
+	vpxor	xmm4,xmm7,xmm6
+	xor	ecx,eax
+	xor	eax,edi
+	add	edx,DWORD [12+esp]
+	vpshufd	xmm7,xmm0,250
+	shrd	ecx,ecx,11
+	and	ebx,eax
+	xor	ecx,esi
+	vpsrld	xmm6,xmm6,11
+	add	edx,DWORD [48+esp]
+	xor	ebx,edi
+	shrd	ecx,ecx,2
+	vpxor	xmm4,xmm4,xmm5
+	add	ebx,edx
+	add	edx,DWORD [28+esp]
+	add	ebx,ecx
+	vpslld	xmm5,xmm5,11
+	mov	ecx,edx
+	shrd	edx,edx,14
+	mov	esi,DWORD [esp]
+	vpxor	xmm4,xmm4,xmm6
+	xor	edx,ecx
+	mov	edi,DWORD [4+esp]
+	xor	esi,edi
+	vpsrld	xmm6,xmm7,10
+	shrd	edx,edx,5
+	and	esi,ecx
+	mov	DWORD [28+esp],ecx
+	vpxor	xmm4,xmm4,xmm5
+	xor	edx,ecx
+	xor	edi,esi
+	shrd	edx,edx,6
+	vpsrlq	xmm5,xmm7,17
+	mov	ecx,ebx
+	add	edx,edi
+	mov	edi,DWORD [16+esp]
+	vpaddd	xmm1,xmm1,xmm4
+	mov	esi,ebx
+	shrd	ecx,ecx,9
+	mov	DWORD [12+esp],ebx
+	vpxor	xmm6,xmm6,xmm5
+	xor	ecx,ebx
+	xor	ebx,edi
+	add	edx,DWORD [8+esp]
+	vpsrlq	xmm7,xmm7,19
+	shrd	ecx,ecx,11
+	and	eax,ebx
+	xor	ecx,esi
+	vpxor	xmm6,xmm6,xmm7
+	add	edx,DWORD [52+esp]
+	xor	eax,edi
+	shrd	ecx,ecx,2
+	vpshufd	xmm7,xmm6,132
+	add	eax,edx
+	add	edx,DWORD [24+esp]
+	add	eax,ecx
+	vpsrldq	xmm7,xmm7,8
+	mov	ecx,edx
+	shrd	edx,edx,14
+	mov	esi,DWORD [28+esp]
+	vpaddd	xmm1,xmm1,xmm7
+	xor	edx,ecx
+	mov	edi,DWORD [esp]
+	xor	esi,edi
+	vpshufd	xmm7,xmm1,80
+	shrd	edx,edx,5
+	and	esi,ecx
+	mov	DWORD [24+esp],ecx
+	vpsrld	xmm6,xmm7,10
+	xor	edx,ecx
+	xor	edi,esi
+	shrd	edx,edx,6
+	vpsrlq	xmm5,xmm7,17
+	mov	ecx,eax
+	add	edx,edi
+	mov	edi,DWORD [12+esp]
+	vpxor	xmm6,xmm6,xmm5
+	mov	esi,eax
+	shrd	ecx,ecx,9
+	mov	DWORD [8+esp],eax
+	vpsrlq	xmm7,xmm7,19
+	xor	ecx,eax
+	xor	eax,edi
+	add	edx,DWORD [4+esp]
+	vpxor	xmm6,xmm6,xmm7
+	shrd	ecx,ecx,11
+	and	ebx,eax
+	xor	ecx,esi
+	vpshufd	xmm7,xmm6,232
+	add	edx,DWORD [56+esp]
+	xor	ebx,edi
+	shrd	ecx,ecx,2
+	vpslldq	xmm7,xmm7,8
+	add	ebx,edx
+	add	edx,DWORD [20+esp]
+	add	ebx,ecx
+	vpaddd	xmm1,xmm1,xmm7
+	mov	ecx,edx
+	shrd	edx,edx,14
+	mov	esi,DWORD [24+esp]
+	vpaddd	xmm6,xmm1,[16+ebp]
+	xor	edx,ecx
+	mov	edi,DWORD [28+esp]
+	xor	esi,edi
+	shrd	edx,edx,5
+	and	esi,ecx
+	mov	DWORD [20+esp],ecx
+	xor	edx,ecx
+	xor	edi,esi
+	shrd	edx,edx,6
+	mov	ecx,ebx
+	add	edx,edi
+	mov	edi,DWORD [8+esp]
+	mov	esi,ebx
+	shrd	ecx,ecx,9
+	mov	DWORD [4+esp],ebx
+	xor	ecx,ebx
+	xor	ebx,edi
+	add	edx,DWORD [esp]
+	shrd	ecx,ecx,11
+	and	eax,ebx
+	xor	ecx,esi
+	add	edx,DWORD [60+esp]
+	xor	eax,edi
+	shrd	ecx,ecx,2
+	add	eax,edx
+	add	edx,DWORD [16+esp]
+	add	eax,ecx
+	vmovdqa	[48+esp],xmm6
+	vpalignr	xmm4,xmm3,xmm2,4
+	mov	ecx,edx
+	shrd	edx,edx,14
+	mov	esi,DWORD [20+esp]
+	vpalignr	xmm7,xmm1,xmm0,4
+	xor	edx,ecx
+	mov	edi,DWORD [24+esp]
+	xor	esi,edi
+	vpsrld	xmm6,xmm4,7
+	shrd	edx,edx,5
+	and	esi,ecx
+	mov	DWORD [16+esp],ecx
+	vpaddd	xmm2,xmm2,xmm7
+	xor	edx,ecx
+	xor	edi,esi
+	shrd	edx,edx,6
+	vpsrld	xmm7,xmm4,3
+	mov	ecx,eax
+	add	edx,edi
+	mov	edi,DWORD [4+esp]
+	vpslld	xmm5,xmm4,14
+	mov	esi,eax
+	shrd	ecx,ecx,9
+	mov	DWORD [esp],eax
+	vpxor	xmm4,xmm7,xmm6
+	xor	ecx,eax
+	xor	eax,edi
+	add	edx,DWORD [28+esp]
+	vpshufd	xmm7,xmm1,250
+	shrd	ecx,ecx,11
+	and	ebx,eax
+	xor	ecx,esi
+	vpsrld	xmm6,xmm6,11
+	add	edx,DWORD [64+esp]
+	xor	ebx,edi
+	shrd	ecx,ecx,2
+	vpxor	xmm4,xmm4,xmm5
+	add	ebx,edx
+	add	edx,DWORD [12+esp]
+	add	ebx,ecx
+	vpslld	xmm5,xmm5,11
+	mov	ecx,edx
+	shrd	edx,edx,14
+	mov	esi,DWORD [16+esp]
+	vpxor	xmm4,xmm4,xmm6
+	xor	edx,ecx
+	mov	edi,DWORD [20+esp]
+	xor	esi,edi
+	vpsrld	xmm6,xmm7,10
+	shrd	edx,edx,5
+	and	esi,ecx
+	mov	DWORD [12+esp],ecx
+	vpxor	xmm4,xmm4,xmm5
+	xor	edx,ecx
+	xor	edi,esi
+	shrd	edx,edx,6
+	vpsrlq	xmm5,xmm7,17
+	mov	ecx,ebx
+	add	edx,edi
+	mov	edi,DWORD [esp]
+	vpaddd	xmm2,xmm2,xmm4
+	mov	esi,ebx
+	shrd	ecx,ecx,9
+	mov	DWORD [28+esp],ebx
+	vpxor	xmm6,xmm6,xmm5
+	xor	ecx,ebx
+	xor	ebx,edi
+	add	edx,DWORD [24+esp]
+	vpsrlq	xmm7,xmm7,19
+	shrd	ecx,ecx,11
+	and	eax,ebx
+	xor	ecx,esi
+	vpxor	xmm6,xmm6,xmm7
+	add	edx,DWORD [68+esp]
+	xor	eax,edi
+	shrd	ecx,ecx,2
+	vpshufd	xmm7,xmm6,132
+	add	eax,edx
+	add	edx,DWORD [8+esp]
+	add	eax,ecx
+	vpsrldq	xmm7,xmm7,8
+	mov	ecx,edx
+	shrd	edx,edx,14
+	mov	esi,DWORD [12+esp]
+	vpaddd	xmm2,xmm2,xmm7
+	xor	edx,ecx
+	mov	edi,DWORD [16+esp]
+	xor	esi,edi
+	vpshufd	xmm7,xmm2,80
+	shrd	edx,edx,5
+	and	esi,ecx
+	mov	DWORD [8+esp],ecx
+	vpsrld	xmm6,xmm7,10
+	xor	edx,ecx
+	xor	edi,esi
+	shrd	edx,edx,6
+	vpsrlq	xmm5,xmm7,17
+	mov	ecx,eax
+	add	edx,edi
+	mov	edi,DWORD [28+esp]
+	vpxor	xmm6,xmm6,xmm5
+	mov	esi,eax
+	shrd	ecx,ecx,9
+	mov	DWORD [24+esp],eax
+	vpsrlq	xmm7,xmm7,19
+	xor	ecx,eax
+	xor	eax,edi
+	add	edx,DWORD [20+esp]
+	vpxor	xmm6,xmm6,xmm7
+	shrd	ecx,ecx,11
+	and	ebx,eax
+	xor	ecx,esi
+	vpshufd	xmm7,xmm6,232
+	add	edx,DWORD [72+esp]
+	xor	ebx,edi
+	shrd	ecx,ecx,2
+	vpslldq	xmm7,xmm7,8
+	add	ebx,edx
+	add	edx,DWORD [4+esp]
+	add	ebx,ecx
+	vpaddd	xmm2,xmm2,xmm7
+	mov	ecx,edx
+	shrd	edx,edx,14
+	mov	esi,DWORD [8+esp]
+	vpaddd	xmm6,xmm2,[32+ebp]
+	xor	edx,ecx
+	mov	edi,DWORD [12+esp]
+	xor	esi,edi
+	shrd	edx,edx,5
+	and	esi,ecx
+	mov	DWORD [4+esp],ecx
+	xor	edx,ecx
+	xor	edi,esi
+	shrd	edx,edx,6
+	mov	ecx,ebx
+	add	edx,edi
+	mov	edi,DWORD [24+esp]
+	mov	esi,ebx
+	shrd	ecx,ecx,9
+	mov	DWORD [20+esp],ebx
+	xor	ecx,ebx
+	xor	ebx,edi
+	add	edx,DWORD [16+esp]
+	shrd	ecx,ecx,11
+	and	eax,ebx
+	xor	ecx,esi
+	add	edx,DWORD [76+esp]
+	xor	eax,edi
+	shrd	ecx,ecx,2
+	add	eax,edx
+	add	edx,DWORD [esp]
+	add	eax,ecx
+	vmovdqa	[64+esp],xmm6
+	vpalignr	xmm4,xmm0,xmm3,4
+	mov	ecx,edx
+	shrd	edx,edx,14
+	mov	esi,DWORD [4+esp]
+	vpalignr	xmm7,xmm2,xmm1,4
+	xor	edx,ecx
+	mov	edi,DWORD [8+esp]
+	xor	esi,edi
+	vpsrld	xmm6,xmm4,7
+	shrd	edx,edx,5
+	and	esi,ecx
+	mov	DWORD [esp],ecx
+	vpaddd	xmm3,xmm3,xmm7
+	xor	edx,ecx
+	xor	edi,esi
+	shrd	edx,edx,6
+	vpsrld	xmm7,xmm4,3
+	mov	ecx,eax
+	add	edx,edi
+	mov	edi,DWORD [20+esp]
+	vpslld	xmm5,xmm4,14
+	mov	esi,eax
+	shrd	ecx,ecx,9
+	mov	DWORD [16+esp],eax
+	vpxor	xmm4,xmm7,xmm6
+	xor	ecx,eax
+	xor	eax,edi
+	add	edx,DWORD [12+esp]
+	vpshufd	xmm7,xmm2,250
+	shrd	ecx,ecx,11
+	and	ebx,eax
+	xor	ecx,esi
+	vpsrld	xmm6,xmm6,11
+	add	edx,DWORD [80+esp]
+	xor	ebx,edi
+	shrd	ecx,ecx,2
+	vpxor	xmm4,xmm4,xmm5
+	add	ebx,edx
+	add	edx,DWORD [28+esp]
+	add	ebx,ecx
+	vpslld	xmm5,xmm5,11
+	mov	ecx,edx
+	shrd	edx,edx,14
+	mov	esi,DWORD [esp]
+	vpxor	xmm4,xmm4,xmm6
+	xor	edx,ecx
+	mov	edi,DWORD [4+esp]
+	xor	esi,edi
+	vpsrld	xmm6,xmm7,10
+	shrd	edx,edx,5
+	and	esi,ecx
+	mov	DWORD [28+esp],ecx
+	vpxor	xmm4,xmm4,xmm5
+	xor	edx,ecx
+	xor	edi,esi
+	shrd	edx,edx,6
+	vpsrlq	xmm5,xmm7,17
+	mov	ecx,ebx
+	add	edx,edi
+	mov	edi,DWORD [16+esp]
+	vpaddd	xmm3,xmm3,xmm4
+	mov	esi,ebx
+	shrd	ecx,ecx,9
+	mov	DWORD [12+esp],ebx
+	vpxor	xmm6,xmm6,xmm5
+	xor	ecx,ebx
+	xor	ebx,edi
+	add	edx,DWORD [8+esp]
+	vpsrlq	xmm7,xmm7,19
+	shrd	ecx,ecx,11
+	and	eax,ebx
+	xor	ecx,esi
+	vpxor	xmm6,xmm6,xmm7
+	add	edx,DWORD [84+esp]
+	xor	eax,edi
+	shrd	ecx,ecx,2
+	vpshufd	xmm7,xmm6,132
+	add	eax,edx
+	add	edx,DWORD [24+esp]
+	add	eax,ecx
+	vpsrldq	xmm7,xmm7,8
+	mov	ecx,edx
+	shrd	edx,edx,14
+	mov	esi,DWORD [28+esp]
+	vpaddd	xmm3,xmm3,xmm7
+	xor	edx,ecx
+	mov	edi,DWORD [esp]
+	xor	esi,edi
+	vpshufd	xmm7,xmm3,80
+	shrd	edx,edx,5
+	and	esi,ecx
+	mov	DWORD [24+esp],ecx
+	vpsrld	xmm6,xmm7,10
+	xor	edx,ecx
+	xor	edi,esi
+	shrd	edx,edx,6
+	vpsrlq	xmm5,xmm7,17
+	mov	ecx,eax
+	add	edx,edi
+	mov	edi,DWORD [12+esp]
+	vpxor	xmm6,xmm6,xmm5
+	mov	esi,eax
+	shrd	ecx,ecx,9
+	mov	DWORD [8+esp],eax
+	vpsrlq	xmm7,xmm7,19
+	xor	ecx,eax
+	xor	eax,edi
+	add	edx,DWORD [4+esp]
+	vpxor	xmm6,xmm6,xmm7
+	shrd	ecx,ecx,11
+	and	ebx,eax
+	xor	ecx,esi
+	vpshufd	xmm7,xmm6,232
+	add	edx,DWORD [88+esp]
+	xor	ebx,edi
+	shrd	ecx,ecx,2
+	vpslldq	xmm7,xmm7,8
+	add	ebx,edx
+	add	edx,DWORD [20+esp]
+	add	ebx,ecx
+	vpaddd	xmm3,xmm3,xmm7
+	mov	ecx,edx
+	shrd	edx,edx,14
+	mov	esi,DWORD [24+esp]
+	vpaddd	xmm6,xmm3,[48+ebp]
+	xor	edx,ecx
+	mov	edi,DWORD [28+esp]
+	xor	esi,edi
+	shrd	edx,edx,5
+	and	esi,ecx
+	mov	DWORD [20+esp],ecx
+	xor	edx,ecx
+	xor	edi,esi
+	shrd	edx,edx,6
+	mov	ecx,ebx
+	add	edx,edi
+	mov	edi,DWORD [8+esp]
+	mov	esi,ebx
+	shrd	ecx,ecx,9
+	mov	DWORD [4+esp],ebx
+	xor	ecx,ebx
+	xor	ebx,edi
+	add	edx,DWORD [esp]
+	shrd	ecx,ecx,11
+	and	eax,ebx
+	xor	ecx,esi
+	add	edx,DWORD [92+esp]
+	xor	eax,edi
+	shrd	ecx,ecx,2
+	add	eax,edx
+	add	edx,DWORD [16+esp]
+	add	eax,ecx
+	vmovdqa	[80+esp],xmm6
+	cmp	DWORD [64+ebp],66051
+	jne	NEAR L$013avx_00_47
+	mov	ecx,edx
+	shrd	edx,edx,14
+	mov	esi,DWORD [20+esp]
+	xor	edx,ecx
+	mov	edi,DWORD [24+esp]
+	xor	esi,edi
+	shrd	edx,edx,5
+	and	esi,ecx
+	mov	DWORD [16+esp],ecx
+	xor	edx,ecx
+	xor	edi,esi
+	shrd	edx,edx,6
+	mov	ecx,eax
+	add	edx,edi
+	mov	edi,DWORD [4+esp]
+	mov	esi,eax
+	shrd	ecx,ecx,9
+	mov	DWORD [esp],eax
+	xor	ecx,eax
+	xor	eax,edi
+	add	edx,DWORD [28+esp]
+	shrd	ecx,ecx,11
+	and	ebx,eax
+	xor	ecx,esi
+	add	edx,DWORD [32+esp]
+	xor	ebx,edi
+	shrd	ecx,ecx,2
+	add	ebx,edx
+	add	edx,DWORD [12+esp]
+	add	ebx,ecx
+	mov	ecx,edx
+	shrd	edx,edx,14
+	mov	esi,DWORD [16+esp]
+	xor	edx,ecx
+	mov	edi,DWORD [20+esp]
+	xor	esi,edi
+	shrd	edx,edx,5
+	and	esi,ecx
+	mov	DWORD [12+esp],ecx
+	xor	edx,ecx
+	xor	edi,esi
+	shrd	edx,edx,6
+	mov	ecx,ebx
+	add	edx,edi
+	mov	edi,DWORD [esp]
+	mov	esi,ebx
+	shrd	ecx,ecx,9
+	mov	DWORD [28+esp],ebx
+	xor	ecx,ebx
+	xor	ebx,edi
+	add	edx,DWORD [24+esp]
+	shrd	ecx,ecx,11
+	and	eax,ebx
+	xor	ecx,esi
+	add	edx,DWORD [36+esp]
+	xor	eax,edi
+	shrd	ecx,ecx,2
+	add	eax,edx
+	add	edx,DWORD [8+esp]
+	add	eax,ecx
+	mov	ecx,edx
+	shrd	edx,edx,14
+	mov	esi,DWORD [12+esp]
+	xor	edx,ecx
+	mov	edi,DWORD [16+esp]
+	xor	esi,edi
+	shrd	edx,edx,5
+	and	esi,ecx
+	mov	DWORD [8+esp],ecx
+	xor	edx,ecx
+	xor	edi,esi
+	shrd	edx,edx,6
+	mov	ecx,eax
+	add	edx,edi
+	mov	edi,DWORD [28+esp]
+	mov	esi,eax
+	shrd	ecx,ecx,9
+	mov	DWORD [24+esp],eax
+	xor	ecx,eax
+	xor	eax,edi
+	add	edx,DWORD [20+esp]
+	shrd	ecx,ecx,11
+	and	ebx,eax
+	xor	ecx,esi
+	add	edx,DWORD [40+esp]
+	xor	ebx,edi
+	shrd	ecx,ecx,2
+	add	ebx,edx
+	add	edx,DWORD [4+esp]
+	add	ebx,ecx
+	mov	ecx,edx
+	shrd	edx,edx,14
+	mov	esi,DWORD [8+esp]
+	xor	edx,ecx
+	mov	edi,DWORD [12+esp]
+	xor	esi,edi
+	shrd	edx,edx,5
+	and	esi,ecx
+	mov	DWORD [4+esp],ecx
+	xor	edx,ecx
+	xor	edi,esi
+	shrd	edx,edx,6
+	mov	ecx,ebx
+	add	edx,edi
+	mov	edi,DWORD [24+esp]
+	mov	esi,ebx
+	shrd	ecx,ecx,9
+	mov	DWORD [20+esp],ebx
+	xor	ecx,ebx
+	xor	ebx,edi
+	add	edx,DWORD [16+esp]
+	shrd	ecx,ecx,11
+	and	eax,ebx
+	xor	ecx,esi
+	add	edx,DWORD [44+esp]
+	xor	eax,edi
+	shrd	ecx,ecx,2
+	add	eax,edx
+	add	edx,DWORD [esp]
+	add	eax,ecx
+	mov	ecx,edx
+	shrd	edx,edx,14
+	mov	esi,DWORD [4+esp]
+	xor	edx,ecx
+	mov	edi,DWORD [8+esp]
+	xor	esi,edi
+	shrd	edx,edx,5
+	and	esi,ecx
+	mov	DWORD [esp],ecx
+	xor	edx,ecx
+	xor	edi,esi
+	shrd	edx,edx,6
+	mov	ecx,eax
+	add	edx,edi
+	mov	edi,DWORD [20+esp]
+	mov	esi,eax
+	shrd	ecx,ecx,9
+	mov	DWORD [16+esp],eax
+	xor	ecx,eax
+	xor	eax,edi
+	add	edx,DWORD [12+esp]
+	shrd	ecx,ecx,11
+	and	ebx,eax
+	xor	ecx,esi
+	add	edx,DWORD [48+esp]
+	xor	ebx,edi
+	shrd	ecx,ecx,2
+	add	ebx,edx
+	add	edx,DWORD [28+esp]
+	add	ebx,ecx
+	mov	ecx,edx
+	shrd	edx,edx,14
+	mov	esi,DWORD [esp]
+	xor	edx,ecx
+	mov	edi,DWORD [4+esp]
+	xor	esi,edi
+	shrd	edx,edx,5
+	and	esi,ecx
+	mov	DWORD [28+esp],ecx
+	xor	edx,ecx
+	xor	edi,esi
+	shrd	edx,edx,6
+	mov	ecx,ebx
+	add	edx,edi
+	mov	edi,DWORD [16+esp]
+	mov	esi,ebx
+	shrd	ecx,ecx,9
+	mov	DWORD [12+esp],ebx
+	xor	ecx,ebx
+	xor	ebx,edi
+	add	edx,DWORD [8+esp]
+	shrd	ecx,ecx,11
+	and	eax,ebx
+	xor	ecx,esi
+	add	edx,DWORD [52+esp]
+	xor	eax,edi
+	shrd	ecx,ecx,2
+	add	eax,edx
+	add	edx,DWORD [24+esp]
+	add	eax,ecx
+	mov	ecx,edx
+	shrd	edx,edx,14
+	mov	esi,DWORD [28+esp]
+	xor	edx,ecx
+	mov	edi,DWORD [esp]
+	xor	esi,edi
+	shrd	edx,edx,5
+	and	esi,ecx
+	mov	DWORD [24+esp],ecx
+	xor	edx,ecx
+	xor	edi,esi
+	shrd	edx,edx,6
+	mov	ecx,eax
+	add	edx,edi
+	mov	edi,DWORD [12+esp]
+	mov	esi,eax
+	shrd	ecx,ecx,9
+	mov	DWORD [8+esp],eax
+	xor	ecx,eax
+	xor	eax,edi
+	add	edx,DWORD [4+esp]
+	shrd	ecx,ecx,11
+	and	ebx,eax
+	xor	ecx,esi
+	add	edx,DWORD [56+esp]
+	xor	ebx,edi
+	shrd	ecx,ecx,2
+	add	ebx,edx
+	add	edx,DWORD [20+esp]
+	add	ebx,ecx
+	mov	ecx,edx
+	shrd	edx,edx,14
+	mov	esi,DWORD [24+esp]
+	xor	edx,ecx
+	mov	edi,DWORD [28+esp]
+	xor	esi,edi
+	shrd	edx,edx,5
+	and	esi,ecx
+	mov	DWORD [20+esp],ecx
+	xor	edx,ecx
+	xor	edi,esi
+	shrd	edx,edx,6
+	mov	ecx,ebx
+	add	edx,edi
+	mov	edi,DWORD [8+esp]
+	mov	esi,ebx
+	shrd	ecx,ecx,9
+	mov	DWORD [4+esp],ebx
+	xor	ecx,ebx
+	xor	ebx,edi
+	add	edx,DWORD [esp]
+	shrd	ecx,ecx,11
+	and	eax,ebx
+	xor	ecx,esi
+	add	edx,DWORD [60+esp]
+	xor	eax,edi
+	shrd	ecx,ecx,2
+	add	eax,edx
+	add	edx,DWORD [16+esp]
+	add	eax,ecx
+	mov	ecx,edx
+	shrd	edx,edx,14
+	mov	esi,DWORD [20+esp]
+	xor	edx,ecx
+	mov	edi,DWORD [24+esp]
+	xor	esi,edi
+	shrd	edx,edx,5
+	and	esi,ecx
+	mov	DWORD [16+esp],ecx
+	xor	edx,ecx
+	xor	edi,esi
+	shrd	edx,edx,6
+	mov	ecx,eax
+	add	edx,edi
+	mov	edi,DWORD [4+esp]
+	mov	esi,eax
+	shrd	ecx,ecx,9
+	mov	DWORD [esp],eax
+	xor	ecx,eax
+	xor	eax,edi
+	add	edx,DWORD [28+esp]
+	shrd	ecx,ecx,11
+	and	ebx,eax
+	xor	ecx,esi
+	add	edx,DWORD [64+esp]
+	xor	ebx,edi
+	shrd	ecx,ecx,2
+	add	ebx,edx
+	add	edx,DWORD [12+esp]
+	add	ebx,ecx
+	mov	ecx,edx
+	shrd	edx,edx,14
+	mov	esi,DWORD [16+esp]
+	xor	edx,ecx
+	mov	edi,DWORD [20+esp]
+	xor	esi,edi
+	shrd	edx,edx,5
+	and	esi,ecx
+	mov	DWORD [12+esp],ecx
+	xor	edx,ecx
+	xor	edi,esi
+	shrd	edx,edx,6
+	mov	ecx,ebx
+	add	edx,edi
+	mov	edi,DWORD [esp]
+	mov	esi,ebx
+	shrd	ecx,ecx,9
+	mov	DWORD [28+esp],ebx
+	xor	ecx,ebx
+	xor	ebx,edi
+	add	edx,DWORD [24+esp]
+	shrd	ecx,ecx,11
+	and	eax,ebx
+	xor	ecx,esi
+	add	edx,DWORD [68+esp]
+	xor	eax,edi
+	shrd	ecx,ecx,2
+	add	eax,edx
+	add	edx,DWORD [8+esp]
+	add	eax,ecx
+	mov	ecx,edx
+	shrd	edx,edx,14
+	mov	esi,DWORD [12+esp]
+	xor	edx,ecx
+	mov	edi,DWORD [16+esp]
+	xor	esi,edi
+	shrd	edx,edx,5
+	and	esi,ecx
+	mov	DWORD [8+esp],ecx
+	xor	edx,ecx
+	xor	edi,esi
+	shrd	edx,edx,6
+	mov	ecx,eax
+	add	edx,edi
+	mov	edi,DWORD [28+esp]
+	mov	esi,eax
+	shrd	ecx,ecx,9
+	mov	DWORD [24+esp],eax
+	xor	ecx,eax
+	xor	eax,edi
+	add	edx,DWORD [20+esp]
+	shrd	ecx,ecx,11
+	and	ebx,eax
+	xor	ecx,esi
+	add	edx,DWORD [72+esp]
+	xor	ebx,edi
+	shrd	ecx,ecx,2
+	add	ebx,edx
+	add	edx,DWORD [4+esp]
+	add	ebx,ecx
+	mov	ecx,edx
+	shrd	edx,edx,14
+	mov	esi,DWORD [8+esp]
+	xor	edx,ecx
+	mov	edi,DWORD [12+esp]
+	xor	esi,edi
+	shrd	edx,edx,5
+	and	esi,ecx
+	mov	DWORD [4+esp],ecx
+	xor	edx,ecx
+	xor	edi,esi
+	shrd	edx,edx,6
+	mov	ecx,ebx
+	add	edx,edi
+	mov	edi,DWORD [24+esp]
+	mov	esi,ebx
+	shrd	ecx,ecx,9
+	mov	DWORD [20+esp],ebx
+	xor	ecx,ebx
+	xor	ebx,edi
+	add	edx,DWORD [16+esp]
+	shrd	ecx,ecx,11
+	and	eax,ebx
+	xor	ecx,esi
+	add	edx,DWORD [76+esp]
+	xor	eax,edi
+	shrd	ecx,ecx,2
+	add	eax,edx
+	add	edx,DWORD [esp]
+	add	eax,ecx
+	mov	ecx,edx
+	shrd	edx,edx,14
+	mov	esi,DWORD [4+esp]
+	xor	edx,ecx
+	mov	edi,DWORD [8+esp]
+	xor	esi,edi
+	shrd	edx,edx,5
+	and	esi,ecx
+	mov	DWORD [esp],ecx
+	xor	edx,ecx
+	xor	edi,esi
+	shrd	edx,edx,6
+	mov	ecx,eax
+	add	edx,edi
+	mov	edi,DWORD [20+esp]
+	mov	esi,eax
+	shrd	ecx,ecx,9
+	mov	DWORD [16+esp],eax
+	xor	ecx,eax
+	xor	eax,edi
+	add	edx,DWORD [12+esp]
+	shrd	ecx,ecx,11
+	and	ebx,eax
+	xor	ecx,esi
+	add	edx,DWORD [80+esp]
+	xor	ebx,edi
+	shrd	ecx,ecx,2
+	add	ebx,edx
+	add	edx,DWORD [28+esp]
+	add	ebx,ecx
+	mov	ecx,edx
+	shrd	edx,edx,14
+	mov	esi,DWORD [esp]
+	xor	edx,ecx
+	mov	edi,DWORD [4+esp]
+	xor	esi,edi
+	shrd	edx,edx,5
+	and	esi,ecx
+	mov	DWORD [28+esp],ecx
+	xor	edx,ecx
+	xor	edi,esi
+	shrd	edx,edx,6
+	mov	ecx,ebx
+	add	edx,edi
+	mov	edi,DWORD [16+esp]
+	mov	esi,ebx
+	shrd	ecx,ecx,9
+	mov	DWORD [12+esp],ebx
+	xor	ecx,ebx
+	xor	ebx,edi
+	add	edx,DWORD [8+esp]
+	shrd	ecx,ecx,11
+	and	eax,ebx
+	xor	ecx,esi
+	add	edx,DWORD [84+esp]
+	xor	eax,edi
+	shrd	ecx,ecx,2
+	add	eax,edx
+	add	edx,DWORD [24+esp]
+	add	eax,ecx
+	mov	ecx,edx
+	shrd	edx,edx,14
+	mov	esi,DWORD [28+esp]
+	xor	edx,ecx
+	mov	edi,DWORD [esp]
+	xor	esi,edi
+	shrd	edx,edx,5
+	and	esi,ecx
+	mov	DWORD [24+esp],ecx
+	xor	edx,ecx
+	xor	edi,esi
+	shrd	edx,edx,6
+	mov	ecx,eax
+	add	edx,edi
+	mov	edi,DWORD [12+esp]
+	mov	esi,eax
+	shrd	ecx,ecx,9
+	mov	DWORD [8+esp],eax
+	xor	ecx,eax
+	xor	eax,edi
+	add	edx,DWORD [4+esp]
+	shrd	ecx,ecx,11
+	and	ebx,eax
+	xor	ecx,esi
+	add	edx,DWORD [88+esp]
+	xor	ebx,edi
+	shrd	ecx,ecx,2
+	add	ebx,edx
+	add	edx,DWORD [20+esp]
+	add	ebx,ecx
+	mov	ecx,edx
+	shrd	edx,edx,14
+	mov	esi,DWORD [24+esp]
+	xor	edx,ecx
+	mov	edi,DWORD [28+esp]
+	xor	esi,edi
+	shrd	edx,edx,5
+	and	esi,ecx
+	mov	DWORD [20+esp],ecx
+	xor	edx,ecx
+	xor	edi,esi
+	shrd	edx,edx,6
+	mov	ecx,ebx
+	add	edx,edi
+	mov	edi,DWORD [8+esp]
+	mov	esi,ebx
+	shrd	ecx,ecx,9
+	mov	DWORD [4+esp],ebx
+	xor	ecx,ebx
+	xor	ebx,edi
+	add	edx,DWORD [esp]
+	shrd	ecx,ecx,11
+	and	eax,ebx
+	xor	ecx,esi
+	add	edx,DWORD [92+esp]
+	xor	eax,edi
+	shrd	ecx,ecx,2
+	add	eax,edx
+	add	edx,DWORD [16+esp]
+	add	eax,ecx
+	mov	esi,DWORD [96+esp]
+	xor	ebx,edi
+	mov	ecx,DWORD [12+esp]
+	add	eax,DWORD [esi]
+	add	ebx,DWORD [4+esi]
+	add	edi,DWORD [8+esi]
+	add	ecx,DWORD [12+esi]
+	mov	DWORD [esi],eax
+	mov	DWORD [4+esi],ebx
+	mov	DWORD [8+esi],edi
+	mov	DWORD [12+esi],ecx
+	mov	DWORD [4+esp],ebx
+	xor	ebx,edi
+	mov	DWORD [8+esp],edi
+	mov	DWORD [12+esp],ecx
+	mov	edi,DWORD [20+esp]
+	mov	ecx,DWORD [24+esp]
+	add	edx,DWORD [16+esi]
+	add	edi,DWORD [20+esi]
+	add	ecx,DWORD [24+esi]
+	mov	DWORD [16+esi],edx
+	mov	DWORD [20+esi],edi
+	mov	DWORD [20+esp],edi
+	mov	edi,DWORD [28+esp]
+	mov	DWORD [24+esi],ecx
+	add	edi,DWORD [28+esi]
+	mov	DWORD [24+esp],ecx
+	mov	DWORD [28+esi],edi
+	mov	DWORD [28+esp],edi
+	mov	edi,DWORD [100+esp]
+	vmovdqa	xmm7,[64+ebp]
+	sub	ebp,192
+	cmp	edi,DWORD [104+esp]
+	jb	NEAR L$012grand_avx
+	mov	esp,DWORD [108+esp]
+	vzeroall
+	pop	edi
+	pop	esi
+	pop	ebx
+	pop	ebp
+	ret
+segment	.bss
+common	_OPENSSL_ia32cap_P 16
diff --git a/deps/boringssl/win-x86/crypto/fipsmodule/sha512-586.asm b/deps/boringssl/win-x86/crypto/fipsmodule/sha512-586.asm
new file mode 100644
index 0000000..f2c47a7
--- /dev/null
+++ b/deps/boringssl/win-x86/crypto/fipsmodule/sha512-586.asm
@@ -0,0 +1,2841 @@
+; This file is generated from a similarly-named Perl script in the BoringSSL
+; source tree. Do not edit by hand.
+
+%ifdef BORINGSSL_PREFIX
+%include "boringssl_prefix_symbols_nasm.inc"
+%endif
+%ifidn __OUTPUT_FORMAT__,obj
+section	code	use32 class=code align=64
+%elifidn __OUTPUT_FORMAT__,win32
+$@feat.00 equ 1
+section	.text	code align=64
+%else
+section	.text	code
+%endif
+;extern	_OPENSSL_ia32cap_P
+global	_sha512_block_data_order
+align	16
+_sha512_block_data_order:
+L$_sha512_block_data_order_begin:
+	push	ebp
+	push	ebx
+	push	esi
+	push	edi
+	mov	esi,DWORD [20+esp]
+	mov	edi,DWORD [24+esp]
+	mov	eax,DWORD [28+esp]
+	mov	ebx,esp
+	call	L$000pic_point
+L$000pic_point:
+	pop	ebp
+	lea	ebp,[(L$001K512-L$000pic_point)+ebp]
+	sub	esp,16
+	and	esp,-64
+	shl	eax,7
+	add	eax,edi
+	mov	DWORD [esp],esi
+	mov	DWORD [4+esp],edi
+	mov	DWORD [8+esp],eax
+	mov	DWORD [12+esp],ebx
+	lea	edx,[_OPENSSL_ia32cap_P]
+	mov	ecx,DWORD [edx]
+	test	ecx,67108864
+	jz	NEAR L$002loop_x86
+	mov	edx,DWORD [4+edx]
+	movq	mm0,[esi]
+	and	ecx,16777216
+	movq	mm1,[8+esi]
+	and	edx,512
+	movq	mm2,[16+esi]
+	or	ecx,edx
+	movq	mm3,[24+esi]
+	movq	mm4,[32+esi]
+	movq	mm5,[40+esi]
+	movq	mm6,[48+esi]
+	movq	mm7,[56+esi]
+	cmp	ecx,16777728
+	je	NEAR L$003SSSE3
+	sub	esp,80
+	jmp	NEAR L$004loop_sse2
+align	16
+L$004loop_sse2:
+	movq	[8+esp],mm1
+	movq	[16+esp],mm2
+	movq	[24+esp],mm3
+	movq	[40+esp],mm5
+	movq	[48+esp],mm6
+	pxor	mm2,mm1
+	movq	[56+esp],mm7
+	movq	mm3,mm0
+	mov	eax,DWORD [edi]
+	mov	ebx,DWORD [4+edi]
+	add	edi,8
+	mov	edx,15
+	bswap	eax
+	bswap	ebx
+	jmp	NEAR L$00500_14_sse2
+align	16
+L$00500_14_sse2:
+	movd	mm1,eax
+	mov	eax,DWORD [edi]
+	movd	mm7,ebx
+	mov	ebx,DWORD [4+edi]
+	add	edi,8
+	bswap	eax
+	bswap	ebx
+	punpckldq	mm7,mm1
+	movq	mm1,mm4
+	pxor	mm5,mm6
+	psrlq	mm1,14
+	movq	[32+esp],mm4
+	pand	mm5,mm4
+	psllq	mm4,23
+	movq	mm0,mm3
+	movq	[72+esp],mm7
+	movq	mm3,mm1
+	psrlq	mm1,4
+	pxor	mm5,mm6
+	pxor	mm3,mm4
+	psllq	mm4,23
+	pxor	mm3,mm1
+	movq	[esp],mm0
+	paddq	mm7,mm5
+	pxor	mm3,mm4
+	psrlq	mm1,23
+	paddq	mm7,[56+esp]
+	pxor	mm3,mm1
+	psllq	mm4,4
+	paddq	mm7,[ebp]
+	pxor	mm3,mm4
+	movq	mm4,[24+esp]
+	paddq	mm3,mm7
+	movq	mm5,mm0
+	psrlq	mm5,28
+	paddq	mm4,mm3
+	movq	mm6,mm0
+	movq	mm7,mm5
+	psllq	mm6,25
+	movq	mm1,[8+esp]
+	psrlq	mm5,6
+	pxor	mm7,mm6
+	sub	esp,8
+	psllq	mm6,5
+	pxor	mm7,mm5
+	pxor	mm0,mm1
+	psrlq	mm5,5
+	pxor	mm7,mm6
+	pand	mm2,mm0
+	psllq	mm6,6
+	pxor	mm7,mm5
+	pxor	mm2,mm1
+	pxor	mm6,mm7
+	movq	mm5,[40+esp]
+	paddq	mm3,mm2
+	movq	mm2,mm0
+	add	ebp,8
+	paddq	mm3,mm6
+	movq	mm6,[48+esp]
+	dec	edx
+	jnz	NEAR L$00500_14_sse2
+	movd	mm1,eax
+	movd	mm7,ebx
+	punpckldq	mm7,mm1
+	movq	mm1,mm4
+	pxor	mm5,mm6
+	psrlq	mm1,14
+	movq	[32+esp],mm4
+	pand	mm5,mm4
+	psllq	mm4,23
+	movq	mm0,mm3
+	movq	[72+esp],mm7
+	movq	mm3,mm1
+	psrlq	mm1,4
+	pxor	mm5,mm6
+	pxor	mm3,mm4
+	psllq	mm4,23
+	pxor	mm3,mm1
+	movq	[esp],mm0
+	paddq	mm7,mm5
+	pxor	mm3,mm4
+	psrlq	mm1,23
+	paddq	mm7,[56+esp]
+	pxor	mm3,mm1
+	psllq	mm4,4
+	paddq	mm7,[ebp]
+	pxor	mm3,mm4
+	movq	mm4,[24+esp]
+	paddq	mm3,mm7
+	movq	mm5,mm0
+	psrlq	mm5,28
+	paddq	mm4,mm3
+	movq	mm6,mm0
+	movq	mm7,mm5
+	psllq	mm6,25
+	movq	mm1,[8+esp]
+	psrlq	mm5,6
+	pxor	mm7,mm6
+	sub	esp,8
+	psllq	mm6,5
+	pxor	mm7,mm5
+	pxor	mm0,mm1
+	psrlq	mm5,5
+	pxor	mm7,mm6
+	pand	mm2,mm0
+	psllq	mm6,6
+	pxor	mm7,mm5
+	pxor	mm2,mm1
+	pxor	mm6,mm7
+	movq	mm7,[192+esp]
+	paddq	mm3,mm2
+	movq	mm2,mm0
+	add	ebp,8
+	paddq	mm3,mm6
+	pxor	mm0,mm0
+	mov	edx,32
+	jmp	NEAR L$00616_79_sse2
+align	16
+L$00616_79_sse2:
+	movq	mm5,[88+esp]
+	movq	mm1,mm7
+	psrlq	mm7,1
+	movq	mm6,mm5
+	psrlq	mm5,6
+	psllq	mm1,56
+	paddq	mm0,mm3
+	movq	mm3,mm7
+	psrlq	mm7,6
+	pxor	mm3,mm1
+	psllq	mm1,7
+	pxor	mm3,mm7
+	psrlq	mm7,1
+	pxor	mm3,mm1
+	movq	mm1,mm5
+	psrlq	mm5,13
+	pxor	mm7,mm3
+	psllq	mm6,3
+	pxor	mm1,mm5
+	paddq	mm7,[200+esp]
+	pxor	mm1,mm6
+	psrlq	mm5,42
+	paddq	mm7,[128+esp]
+	pxor	mm1,mm5
+	psllq	mm6,42
+	movq	mm5,[40+esp]
+	pxor	mm1,mm6
+	movq	mm6,[48+esp]
+	paddq	mm7,mm1
+	movq	mm1,mm4
+	pxor	mm5,mm6
+	psrlq	mm1,14
+	movq	[32+esp],mm4
+	pand	mm5,mm4
+	psllq	mm4,23
+	movq	[72+esp],mm7
+	movq	mm3,mm1
+	psrlq	mm1,4
+	pxor	mm5,mm6
+	pxor	mm3,mm4
+	psllq	mm4,23
+	pxor	mm3,mm1
+	movq	[esp],mm0
+	paddq	mm7,mm5
+	pxor	mm3,mm4
+	psrlq	mm1,23
+	paddq	mm7,[56+esp]
+	pxor	mm3,mm1
+	psllq	mm4,4
+	paddq	mm7,[ebp]
+	pxor	mm3,mm4
+	movq	mm4,[24+esp]
+	paddq	mm3,mm7
+	movq	mm5,mm0
+	psrlq	mm5,28
+	paddq	mm4,mm3
+	movq	mm6,mm0
+	movq	mm7,mm5
+	psllq	mm6,25
+	movq	mm1,[8+esp]
+	psrlq	mm5,6
+	pxor	mm7,mm6
+	sub	esp,8
+	psllq	mm6,5
+	pxor	mm7,mm5
+	pxor	mm0,mm1
+	psrlq	mm5,5
+	pxor	mm7,mm6
+	pand	mm2,mm0
+	psllq	mm6,6
+	pxor	mm7,mm5
+	pxor	mm2,mm1
+	pxor	mm6,mm7
+	movq	mm7,[192+esp]
+	paddq	mm2,mm6
+	add	ebp,8
+	movq	mm5,[88+esp]
+	movq	mm1,mm7
+	psrlq	mm7,1
+	movq	mm6,mm5
+	psrlq	mm5,6
+	psllq	mm1,56
+	paddq	mm2,mm3
+	movq	mm3,mm7
+	psrlq	mm7,6
+	pxor	mm3,mm1
+	psllq	mm1,7
+	pxor	mm3,mm7
+	psrlq	mm7,1
+	pxor	mm3,mm1
+	movq	mm1,mm5
+	psrlq	mm5,13
+	pxor	mm7,mm3
+	psllq	mm6,3
+	pxor	mm1,mm5
+	paddq	mm7,[200+esp]
+	pxor	mm1,mm6
+	psrlq	mm5,42
+	paddq	mm7,[128+esp]
+	pxor	mm1,mm5
+	psllq	mm6,42
+	movq	mm5,[40+esp]
+	pxor	mm1,mm6
+	movq	mm6,[48+esp]
+	paddq	mm7,mm1
+	movq	mm1,mm4
+	pxor	mm5,mm6
+	psrlq	mm1,14
+	movq	[32+esp],mm4
+	pand	mm5,mm4
+	psllq	mm4,23
+	movq	[72+esp],mm7
+	movq	mm3,mm1
+	psrlq	mm1,4
+	pxor	mm5,mm6
+	pxor	mm3,mm4
+	psllq	mm4,23
+	pxor	mm3,mm1
+	movq	[esp],mm2
+	paddq	mm7,mm5
+	pxor	mm3,mm4
+	psrlq	mm1,23
+	paddq	mm7,[56+esp]
+	pxor	mm3,mm1
+	psllq	mm4,4
+	paddq	mm7,[ebp]
+	pxor	mm3,mm4
+	movq	mm4,[24+esp]
+	paddq	mm3,mm7
+	movq	mm5,mm2
+	psrlq	mm5,28
+	paddq	mm4,mm3
+	movq	mm6,mm2
+	movq	mm7,mm5
+	psllq	mm6,25
+	movq	mm1,[8+esp]
+	psrlq	mm5,6
+	pxor	mm7,mm6
+	sub	esp,8
+	psllq	mm6,5
+	pxor	mm7,mm5
+	pxor	mm2,mm1
+	psrlq	mm5,5
+	pxor	mm7,mm6
+	pand	mm0,mm2
+	psllq	mm6,6
+	pxor	mm7,mm5
+	pxor	mm0,mm1
+	pxor	mm6,mm7
+	movq	mm7,[192+esp]
+	paddq	mm0,mm6
+	add	ebp,8
+	dec	edx
+	jnz	NEAR L$00616_79_sse2
+	paddq	mm0,mm3
+	movq	mm1,[8+esp]
+	movq	mm3,[24+esp]
+	movq	mm5,[40+esp]
+	movq	mm6,[48+esp]
+	movq	mm7,[56+esp]
+	pxor	mm2,mm1
+	paddq	mm0,[esi]
+	paddq	mm1,[8+esi]
+	paddq	mm2,[16+esi]
+	paddq	mm3,[24+esi]
+	paddq	mm4,[32+esi]
+	paddq	mm5,[40+esi]
+	paddq	mm6,[48+esi]
+	paddq	mm7,[56+esi]
+	mov	eax,640
+	movq	[esi],mm0
+	movq	[8+esi],mm1
+	movq	[16+esi],mm2
+	movq	[24+esi],mm3
+	movq	[32+esi],mm4
+	movq	[40+esi],mm5
+	movq	[48+esi],mm6
+	movq	[56+esi],mm7
+	lea	esp,[eax*1+esp]
+	sub	ebp,eax
+	cmp	edi,DWORD [88+esp]
+	jb	NEAR L$004loop_sse2
+	mov	esp,DWORD [92+esp]
+	emms
+	pop	edi
+	pop	esi
+	pop	ebx
+	pop	ebp
+	ret
+align	32
+L$003SSSE3:
+	lea	edx,[esp-64]
+	sub	esp,256
+	movdqa	xmm1,[640+ebp]
+	movdqu	xmm0,[edi]
+db	102,15,56,0,193
+	movdqa	xmm3,[ebp]
+	movdqa	xmm2,xmm1
+	movdqu	xmm1,[16+edi]
+	paddq	xmm3,xmm0
+db	102,15,56,0,202
+	movdqa	[edx-128],xmm3
+	movdqa	xmm4,[16+ebp]
+	movdqa	xmm3,xmm2
+	movdqu	xmm2,[32+edi]
+	paddq	xmm4,xmm1
+db	102,15,56,0,211
+	movdqa	[edx-112],xmm4
+	movdqa	xmm5,[32+ebp]
+	movdqa	xmm4,xmm3
+	movdqu	xmm3,[48+edi]
+	paddq	xmm5,xmm2
+db	102,15,56,0,220
+	movdqa	[edx-96],xmm5
+	movdqa	xmm6,[48+ebp]
+	movdqa	xmm5,xmm4
+	movdqu	xmm4,[64+edi]
+	paddq	xmm6,xmm3
+db	102,15,56,0,229
+	movdqa	[edx-80],xmm6
+	movdqa	xmm7,[64+ebp]
+	movdqa	xmm6,xmm5
+	movdqu	xmm5,[80+edi]
+	paddq	xmm7,xmm4
+db	102,15,56,0,238
+	movdqa	[edx-64],xmm7
+	movdqa	[edx],xmm0
+	movdqa	xmm0,[80+ebp]
+	movdqa	xmm7,xmm6
+	movdqu	xmm6,[96+edi]
+	paddq	xmm0,xmm5
+db	102,15,56,0,247
+	movdqa	[edx-48],xmm0
+	movdqa	[16+edx],xmm1
+	movdqa	xmm1,[96+ebp]
+	movdqa	xmm0,xmm7
+	movdqu	xmm7,[112+edi]
+	paddq	xmm1,xmm6
+db	102,15,56,0,248
+	movdqa	[edx-32],xmm1
+	movdqa	[32+edx],xmm2
+	movdqa	xmm2,[112+ebp]
+	movdqa	xmm0,[edx]
+	paddq	xmm2,xmm7
+	movdqa	[edx-16],xmm2
+	nop
+align	32
+L$007loop_ssse3:
+	movdqa	xmm2,[16+edx]
+	movdqa	[48+edx],xmm3
+	lea	ebp,[128+ebp]
+	movq	[8+esp],mm1
+	mov	ebx,edi
+	movq	[16+esp],mm2
+	lea	edi,[128+edi]
+	movq	[24+esp],mm3
+	cmp	edi,eax
+	movq	[40+esp],mm5
+	cmovb	ebx,edi
+	movq	[48+esp],mm6
+	mov	ecx,4
+	pxor	mm2,mm1
+	movq	[56+esp],mm7
+	pxor	mm3,mm3
+	jmp	NEAR L$00800_47_ssse3
+align	32
+L$00800_47_ssse3:
+	movdqa	xmm3,xmm5
+	movdqa	xmm1,xmm2
+db	102,15,58,15,208,8
+	movdqa	[edx],xmm4
+db	102,15,58,15,220,8
+	movdqa	xmm4,xmm2
+	psrlq	xmm2,7
+	paddq	xmm0,xmm3
+	movdqa	xmm3,xmm4
+	psrlq	xmm4,1
+	psllq	xmm3,56
+	pxor	xmm2,xmm4
+	psrlq	xmm4,7
+	pxor	xmm2,xmm3
+	psllq	xmm3,7
+	pxor	xmm2,xmm4
+	movdqa	xmm4,xmm7
+	pxor	xmm2,xmm3
+	movdqa	xmm3,xmm7
+	psrlq	xmm4,6
+	paddq	xmm0,xmm2
+	movdqa	xmm2,xmm7
+	psrlq	xmm3,19
+	psllq	xmm2,3
+	pxor	xmm4,xmm3
+	psrlq	xmm3,42
+	pxor	xmm4,xmm2
+	psllq	xmm2,42
+	pxor	xmm4,xmm3
+	movdqa	xmm3,[32+edx]
+	pxor	xmm4,xmm2
+	movdqa	xmm2,[ebp]
+	movq	mm1,mm4
+	paddq	xmm0,xmm4
+	movq	mm7,[edx-128]
+	pxor	mm5,mm6
+	psrlq	mm1,14
+	movq	[32+esp],mm4
+	paddq	xmm2,xmm0
+	pand	mm5,mm4
+	psllq	mm4,23
+	paddq	mm0,mm3
+	movq	mm3,mm1
+	psrlq	mm1,4
+	pxor	mm5,mm6
+	pxor	mm3,mm4
+	psllq	mm4,23
+	pxor	mm3,mm1
+	movq	[esp],mm0
+	paddq	mm7,mm5
+	pxor	mm3,mm4
+	psrlq	mm1,23
+	paddq	mm7,[56+esp]
+	pxor	mm3,mm1
+	psllq	mm4,4
+	pxor	mm3,mm4
+	movq	mm4,[24+esp]
+	paddq	mm3,mm7
+	movq	mm5,mm0
+	psrlq	mm5,28
+	paddq	mm4,mm3
+	movq	mm6,mm0
+	movq	mm7,mm5
+	psllq	mm6,25
+	movq	mm1,[8+esp]
+	psrlq	mm5,6
+	pxor	mm7,mm6
+	psllq	mm6,5
+	pxor	mm7,mm5
+	pxor	mm0,mm1
+	psrlq	mm5,5
+	pxor	mm7,mm6
+	pand	mm2,mm0
+	psllq	mm6,6
+	pxor	mm7,mm5
+	pxor	mm2,mm1
+	pxor	mm6,mm7
+	movq	mm5,[32+esp]
+	paddq	mm2,mm6
+	movq	mm6,[40+esp]
+	movq	mm1,mm4
+	movq	mm7,[edx-120]
+	pxor	mm5,mm6
+	psrlq	mm1,14
+	movq	[24+esp],mm4
+	pand	mm5,mm4
+	psllq	mm4,23
+	paddq	mm2,mm3
+	movq	mm3,mm1
+	psrlq	mm1,4
+	pxor	mm5,mm6
+	pxor	mm3,mm4
+	psllq	mm4,23
+	pxor	mm3,mm1
+	movq	[56+esp],mm2
+	paddq	mm7,mm5
+	pxor	mm3,mm4
+	psrlq	mm1,23
+	paddq	mm7,[48+esp]
+	pxor	mm3,mm1
+	psllq	mm4,4
+	pxor	mm3,mm4
+	movq	mm4,[16+esp]
+	paddq	mm3,mm7
+	movq	mm5,mm2
+	psrlq	mm5,28
+	paddq	mm4,mm3
+	movq	mm6,mm2
+	movq	mm7,mm5
+	psllq	mm6,25
+	movq	mm1,[esp]
+	psrlq	mm5,6
+	pxor	mm7,mm6
+	psllq	mm6,5
+	pxor	mm7,mm5
+	pxor	mm2,mm1
+	psrlq	mm5,5
+	pxor	mm7,mm6
+	pand	mm0,mm2
+	psllq	mm6,6
+	pxor	mm7,mm5
+	pxor	mm0,mm1
+	pxor	mm6,mm7
+	movq	mm5,[24+esp]
+	paddq	mm0,mm6
+	movq	mm6,[32+esp]
+	movdqa	[edx-128],xmm2
+	movdqa	xmm4,xmm6
+	movdqa	xmm2,xmm3
+db	102,15,58,15,217,8
+	movdqa	[16+edx],xmm5
+db	102,15,58,15,229,8
+	movdqa	xmm5,xmm3
+	psrlq	xmm3,7
+	paddq	xmm1,xmm4
+	movdqa	xmm4,xmm5
+	psrlq	xmm5,1
+	psllq	xmm4,56
+	pxor	xmm3,xmm5
+	psrlq	xmm5,7
+	pxor	xmm3,xmm4
+	psllq	xmm4,7
+	pxor	xmm3,xmm5
+	movdqa	xmm5,xmm0
+	pxor	xmm3,xmm4
+	movdqa	xmm4,xmm0
+	psrlq	xmm5,6
+	paddq	xmm1,xmm3
+	movdqa	xmm3,xmm0
+	psrlq	xmm4,19
+	psllq	xmm3,3
+	pxor	xmm5,xmm4
+	psrlq	xmm4,42
+	pxor	xmm5,xmm3
+	psllq	xmm3,42
+	pxor	xmm5,xmm4
+	movdqa	xmm4,[48+edx]
+	pxor	xmm5,xmm3
+	movdqa	xmm3,[16+ebp]
+	movq	mm1,mm4
+	paddq	xmm1,xmm5
+	movq	mm7,[edx-112]
+	pxor	mm5,mm6
+	psrlq	mm1,14
+	movq	[16+esp],mm4
+	paddq	xmm3,xmm1
+	pand	mm5,mm4
+	psllq	mm4,23
+	paddq	mm0,mm3
+	movq	mm3,mm1
+	psrlq	mm1,4
+	pxor	mm5,mm6
+	pxor	mm3,mm4
+	psllq	mm4,23
+	pxor	mm3,mm1
+	movq	[48+esp],mm0
+	paddq	mm7,mm5
+	pxor	mm3,mm4
+	psrlq	mm1,23
+	paddq	mm7,[40+esp]
+	pxor	mm3,mm1
+	psllq	mm4,4
+	pxor	mm3,mm4
+	movq	mm4,[8+esp]
+	paddq	mm3,mm7
+	movq	mm5,mm0
+	psrlq	mm5,28
+	paddq	mm4,mm3
+	movq	mm6,mm0
+	movq	mm7,mm5
+	psllq	mm6,25
+	movq	mm1,[56+esp]
+	psrlq	mm5,6
+	pxor	mm7,mm6
+	psllq	mm6,5
+	pxor	mm7,mm5
+	pxor	mm0,mm1
+	psrlq	mm5,5
+	pxor	mm7,mm6
+	pand	mm2,mm0
+	psllq	mm6,6
+	pxor	mm7,mm5
+	pxor	mm2,mm1
+	pxor	mm6,mm7
+	movq	mm5,[16+esp]
+	paddq	mm2,mm6
+	movq	mm6,[24+esp]
+	movq	mm1,mm4
+	movq	mm7,[edx-104]
+	pxor	mm5,mm6
+	psrlq	mm1,14
+	movq	[8+esp],mm4
+	pand	mm5,mm4
+	psllq	mm4,23
+	paddq	mm2,mm3
+	movq	mm3,mm1
+	psrlq	mm1,4
+	pxor	mm5,mm6
+	pxor	mm3,mm4
+	psllq	mm4,23
+	pxor	mm3,mm1
+	movq	[40+esp],mm2
+	paddq	mm7,mm5
+	pxor	mm3,mm4
+	psrlq	mm1,23
+	paddq	mm7,[32+esp]
+	pxor	mm3,mm1
+	psllq	mm4,4
+	pxor	mm3,mm4
+	movq	mm4,[esp]
+	paddq	mm3,mm7
+	movq	mm5,mm2
+	psrlq	mm5,28
+	paddq	mm4,mm3
+	movq	mm6,mm2
+	movq	mm7,mm5
+	psllq	mm6,25
+	movq	mm1,[48+esp]
+	psrlq	mm5,6
+	pxor	mm7,mm6
+	psllq	mm6,5
+	pxor	mm7,mm5
+	pxor	mm2,mm1
+	psrlq	mm5,5
+	pxor	mm7,mm6
+	pand	mm0,mm2
+	psllq	mm6,6
+	pxor	mm7,mm5
+	pxor	mm0,mm1
+	pxor	mm6,mm7
+	movq	mm5,[8+esp]
+	paddq	mm0,mm6
+	movq	mm6,[16+esp]
+	movdqa	[edx-112],xmm3
+	movdqa	xmm5,xmm7
+	movdqa	xmm3,xmm4
+db	102,15,58,15,226,8
+	movdqa	[32+edx],xmm6
+db	102,15,58,15,238,8
+	movdqa	xmm6,xmm4
+	psrlq	xmm4,7
+	paddq	xmm2,xmm5
+	movdqa	xmm5,xmm6
+	psrlq	xmm6,1
+	psllq	xmm5,56
+	pxor	xmm4,xmm6
+	psrlq	xmm6,7
+	pxor	xmm4,xmm5
+	psllq	xmm5,7
+	pxor	xmm4,xmm6
+	movdqa	xmm6,xmm1
+	pxor	xmm4,xmm5
+	movdqa	xmm5,xmm1
+	psrlq	xmm6,6
+	paddq	xmm2,xmm4
+	movdqa	xmm4,xmm1
+	psrlq	xmm5,19
+	psllq	xmm4,3
+	pxor	xmm6,xmm5
+	psrlq	xmm5,42
+	pxor	xmm6,xmm4
+	psllq	xmm4,42
+	pxor	xmm6,xmm5
+	movdqa	xmm5,[edx]
+	pxor	xmm6,xmm4
+	movdqa	xmm4,[32+ebp]
+	movq	mm1,mm4
+	paddq	xmm2,xmm6
+	movq	mm7,[edx-96]
+	pxor	mm5,mm6
+	psrlq	mm1,14
+	movq	[esp],mm4
+	paddq	xmm4,xmm2
+	pand	mm5,mm4
+	psllq	mm4,23
+	paddq	mm0,mm3
+	movq	mm3,mm1
+	psrlq	mm1,4
+	pxor	mm5,mm6
+	pxor	mm3,mm4
+	psllq	mm4,23
+	pxor	mm3,mm1
+	movq	[32+esp],mm0
+	paddq	mm7,mm5
+	pxor	mm3,mm4
+	psrlq	mm1,23
+	paddq	mm7,[24+esp]
+	pxor	mm3,mm1
+	psllq	mm4,4
+	pxor	mm3,mm4
+	movq	mm4,[56+esp]
+	paddq	mm3,mm7
+	movq	mm5,mm0
+	psrlq	mm5,28
+	paddq	mm4,mm3
+	movq	mm6,mm0
+	movq	mm7,mm5
+	psllq	mm6,25
+	movq	mm1,[40+esp]
+	psrlq	mm5,6
+	pxor	mm7,mm6
+	psllq	mm6,5
+	pxor	mm7,mm5
+	pxor	mm0,mm1
+	psrlq	mm5,5
+	pxor	mm7,mm6
+	pand	mm2,mm0
+	psllq	mm6,6
+	pxor	mm7,mm5
+	pxor	mm2,mm1
+	pxor	mm6,mm7
+	movq	mm5,[esp]
+	paddq	mm2,mm6
+	movq	mm6,[8+esp]
+	movq	mm1,mm4
+	movq	mm7,[edx-88]
+	pxor	mm5,mm6
+	psrlq	mm1,14
+	movq	[56+esp],mm4
+	pand	mm5,mm4
+	psllq	mm4,23
+	paddq	mm2,mm3
+	movq	mm3,mm1
+	psrlq	mm1,4
+	pxor	mm5,mm6
+	pxor	mm3,mm4
+	psllq	mm4,23
+	pxor	mm3,mm1
+	movq	[24+esp],mm2
+	paddq	mm7,mm5
+	pxor	mm3,mm4
+	psrlq	mm1,23
+	paddq	mm7,[16+esp]
+	pxor	mm3,mm1
+	psllq	mm4,4
+	pxor	mm3,mm4
+	movq	mm4,[48+esp]
+	paddq	mm3,mm7
+	movq	mm5,mm2
+	psrlq	mm5,28
+	paddq	mm4,mm3
+	movq	mm6,mm2
+	movq	mm7,mm5
+	psllq	mm6,25
+	movq	mm1,[32+esp]
+	psrlq	mm5,6
+	pxor	mm7,mm6
+	psllq	mm6,5
+	pxor	mm7,mm5
+	pxor	mm2,mm1
+	psrlq	mm5,5
+	pxor	mm7,mm6
+	pand	mm0,mm2
+	psllq	mm6,6
+	pxor	mm7,mm5
+	pxor	mm0,mm1
+	pxor	mm6,mm7
+	movq	mm5,[56+esp]
+	paddq	mm0,mm6
+	movq	mm6,[esp]
+	movdqa	[edx-96],xmm4
+	movdqa	xmm6,xmm0
+	movdqa	xmm4,xmm5
+db	102,15,58,15,235,8
+	movdqa	[48+edx],xmm7
+db	102,15,58,15,247,8
+	movdqa	xmm7,xmm5
+	psrlq	xmm5,7
+	paddq	xmm3,xmm6
+	movdqa	xmm6,xmm7
+	psrlq	xmm7,1
+	psllq	xmm6,56
+	pxor	xmm5,xmm7
+	psrlq	xmm7,7
+	pxor	xmm5,xmm6
+	psllq	xmm6,7
+	pxor	xmm5,xmm7
+	movdqa	xmm7,xmm2
+	pxor	xmm5,xmm6
+	movdqa	xmm6,xmm2
+	psrlq	xmm7,6
+	paddq	xmm3,xmm5
+	movdqa	xmm5,xmm2
+	psrlq	xmm6,19
+	psllq	xmm5,3
+	pxor	xmm7,xmm6
+	psrlq	xmm6,42
+	pxor	xmm7,xmm5
+	psllq	xmm5,42
+	pxor	xmm7,xmm6
+	movdqa	xmm6,[16+edx]
+	pxor	xmm7,xmm5
+	movdqa	xmm5,[48+ebp]
+	movq	mm1,mm4
+	paddq	xmm3,xmm7
+	movq	mm7,[edx-80]
+	pxor	mm5,mm6
+	psrlq	mm1,14
+	movq	[48+esp],mm4
+	paddq	xmm5,xmm3
+	pand	mm5,mm4
+	psllq	mm4,23
+	paddq	mm0,mm3
+	movq	mm3,mm1
+	psrlq	mm1,4
+	pxor	mm5,mm6
+	pxor	mm3,mm4
+	psllq	mm4,23
+	pxor	mm3,mm1
+	movq	[16+esp],mm0
+	paddq	mm7,mm5
+	pxor	mm3,mm4
+	psrlq	mm1,23
+	paddq	mm7,[8+esp]
+	pxor	mm3,mm1
+	psllq	mm4,4
+	pxor	mm3,mm4
+	movq	mm4,[40+esp]
+	paddq	mm3,mm7
+	movq	mm5,mm0
+	psrlq	mm5,28
+	paddq	mm4,mm3
+	movq	mm6,mm0
+	movq	mm7,mm5
+	psllq	mm6,25
+	movq	mm1,[24+esp]
+	psrlq	mm5,6
+	pxor	mm7,mm6
+	psllq	mm6,5
+	pxor	mm7,mm5
+	pxor	mm0,mm1
+	psrlq	mm5,5
+	pxor	mm7,mm6
+	pand	mm2,mm0
+	psllq	mm6,6
+	pxor	mm7,mm5
+	pxor	mm2,mm1
+	pxor	mm6,mm7
+	movq	mm5,[48+esp]
+	paddq	mm2,mm6
+	movq	mm6,[56+esp]
+	movq	mm1,mm4
+	movq	mm7,[edx-72]
+	pxor	mm5,mm6
+	psrlq	mm1,14
+	movq	[40+esp],mm4
+	pand	mm5,mm4
+	psllq	mm4,23
+	paddq	mm2,mm3
+	movq	mm3,mm1
+	psrlq	mm1,4
+	pxor	mm5,mm6
+	pxor	mm3,mm4
+	psllq	mm4,23
+	pxor	mm3,mm1
+	movq	[8+esp],mm2
+	paddq	mm7,mm5
+	pxor	mm3,mm4
+	psrlq	mm1,23
+	paddq	mm7,[esp]
+	pxor	mm3,mm1
+	psllq	mm4,4
+	pxor	mm3,mm4
+	movq	mm4,[32+esp]
+	paddq	mm3,mm7
+	movq	mm5,mm2
+	psrlq	mm5,28
+	paddq	mm4,mm3
+	movq	mm6,mm2
+	movq	mm7,mm5
+	psllq	mm6,25
+	movq	mm1,[16+esp]
+	psrlq	mm5,6
+	pxor	mm7,mm6
+	psllq	mm6,5
+	pxor	mm7,mm5
+	pxor	mm2,mm1
+	psrlq	mm5,5
+	pxor	mm7,mm6
+	pand	mm0,mm2
+	psllq	mm6,6
+	pxor	mm7,mm5
+	pxor	mm0,mm1
+	pxor	mm6,mm7
+	movq	mm5,[40+esp]
+	paddq	mm0,mm6
+	movq	mm6,[48+esp]
+	movdqa	[edx-80],xmm5
+	movdqa	xmm7,xmm1
+	movdqa	xmm5,xmm6
+db	102,15,58,15,244,8
+	movdqa	[edx],xmm0
+db	102,15,58,15,248,8
+	movdqa	xmm0,xmm6
+	psrlq	xmm6,7
+	paddq	xmm4,xmm7
+	movdqa	xmm7,xmm0
+	psrlq	xmm0,1
+	psllq	xmm7,56
+	pxor	xmm6,xmm0
+	psrlq	xmm0,7
+	pxor	xmm6,xmm7
+	psllq	xmm7,7
+	pxor	xmm6,xmm0
+	movdqa	xmm0,xmm3
+	pxor	xmm6,xmm7
+	movdqa	xmm7,xmm3
+	psrlq	xmm0,6
+	paddq	xmm4,xmm6
+	movdqa	xmm6,xmm3
+	psrlq	xmm7,19
+	psllq	xmm6,3
+	pxor	xmm0,xmm7
+	psrlq	xmm7,42
+	pxor	xmm0,xmm6
+	psllq	xmm6,42
+	pxor	xmm0,xmm7
+	movdqa	xmm7,[32+edx]
+	pxor	xmm0,xmm6
+	movdqa	xmm6,[64+ebp]
+	movq	mm1,mm4
+	paddq	xmm4,xmm0
+	movq	mm7,[edx-64]
+	pxor	mm5,mm6
+	psrlq	mm1,14
+	movq	[32+esp],mm4
+	paddq	xmm6,xmm4
+	pand	mm5,mm4
+	psllq	mm4,23
+	paddq	mm0,mm3
+	movq	mm3,mm1
+	psrlq	mm1,4
+	pxor	mm5,mm6
+	pxor	mm3,mm4
+	psllq	mm4,23
+	pxor	mm3,mm1
+	movq	[esp],mm0
+	paddq	mm7,mm5
+	pxor	mm3,mm4
+	psrlq	mm1,23
+	paddq	mm7,[56+esp]
+	pxor	mm3,mm1
+	psllq	mm4,4
+	pxor	mm3,mm4
+	movq	mm4,[24+esp]
+	paddq	mm3,mm7
+	movq	mm5,mm0
+	psrlq	mm5,28
+	paddq	mm4,mm3
+	movq	mm6,mm0
+	movq	mm7,mm5
+	psllq	mm6,25
+	movq	mm1,[8+esp]
+	psrlq	mm5,6
+	pxor	mm7,mm6
+	psllq	mm6,5
+	pxor	mm7,mm5
+	pxor	mm0,mm1
+	psrlq	mm5,5
+	pxor	mm7,mm6
+	pand	mm2,mm0
+	psllq	mm6,6
+	pxor	mm7,mm5
+	pxor	mm2,mm1
+	pxor	mm6,mm7
+	movq	mm5,[32+esp]
+	paddq	mm2,mm6
+	movq	mm6,[40+esp]
+	movq	mm1,mm4
+	movq	mm7,[edx-56]
+	pxor	mm5,mm6
+	psrlq	mm1,14
+	movq	[24+esp],mm4
+	pand	mm5,mm4
+	psllq	mm4,23
+	paddq	mm2,mm3
+	movq	mm3,mm1
+	psrlq	mm1,4
+	pxor	mm5,mm6
+	pxor	mm3,mm4
+	psllq	mm4,23
+	pxor	mm3,mm1
+	movq	[56+esp],mm2
+	paddq	mm7,mm5
+	pxor	mm3,mm4
+	psrlq	mm1,23
+	paddq	mm7,[48+esp]
+	pxor	mm3,mm1
+	psllq	mm4,4
+	pxor	mm3,mm4
+	movq	mm4,[16+esp]
+	paddq	mm3,mm7
+	movq	mm5,mm2
+	psrlq	mm5,28
+	paddq	mm4,mm3
+	movq	mm6,mm2
+	movq	mm7,mm5
+	psllq	mm6,25
+	movq	mm1,[esp]
+	psrlq	mm5,6
+	pxor	mm7,mm6
+	psllq	mm6,5
+	pxor	mm7,mm5
+	pxor	mm2,mm1
+	psrlq	mm5,5
+	pxor	mm7,mm6
+	pand	mm0,mm2
+	psllq	mm6,6
+	pxor	mm7,mm5
+	pxor	mm0,mm1
+	pxor	mm6,mm7
+	movq	mm5,[24+esp]
+	paddq	mm0,mm6
+	movq	mm6,[32+esp]
+	movdqa	[edx-64],xmm6
+	movdqa	xmm0,xmm2
+	movdqa	xmm6,xmm7
+db	102,15,58,15,253,8
+	movdqa	[16+edx],xmm1
+db	102,15,58,15,193,8
+	movdqa	xmm1,xmm7
+	psrlq	xmm7,7
+	paddq	xmm5,xmm0
+	movdqa	xmm0,xmm1
+	psrlq	xmm1,1
+	psllq	xmm0,56
+	pxor	xmm7,xmm1
+	psrlq	xmm1,7
+	pxor	xmm7,xmm0
+	psllq	xmm0,7
+	pxor	xmm7,xmm1
+	movdqa	xmm1,xmm4
+	pxor	xmm7,xmm0
+	movdqa	xmm0,xmm4
+	psrlq	xmm1,6
+	paddq	xmm5,xmm7
+	movdqa	xmm7,xmm4
+	psrlq	xmm0,19
+	psllq	xmm7,3
+	pxor	xmm1,xmm0
+	psrlq	xmm0,42
+	pxor	xmm1,xmm7
+	psllq	xmm7,42
+	pxor	xmm1,xmm0
+	movdqa	xmm0,[48+edx]
+	pxor	xmm1,xmm7
+	movdqa	xmm7,[80+ebp]
+	movq	mm1,mm4
+	paddq	xmm5,xmm1
+	movq	mm7,[edx-48]
+	pxor	mm5,mm6
+	psrlq	mm1,14
+	movq	[16+esp],mm4
+	paddq	xmm7,xmm5
+	pand	mm5,mm4
+	psllq	mm4,23
+	paddq	mm0,mm3
+	movq	mm3,mm1
+	psrlq	mm1,4
+	pxor	mm5,mm6
+	pxor	mm3,mm4
+	psllq	mm4,23
+	pxor	mm3,mm1
+	movq	[48+esp],mm0
+	paddq	mm7,mm5
+	pxor	mm3,mm4
+	psrlq	mm1,23
+	paddq	mm7,[40+esp]
+	pxor	mm3,mm1
+	psllq	mm4,4
+	pxor	mm3,mm4
+	movq	mm4,[8+esp]
+	paddq	mm3,mm7
+	movq	mm5,mm0
+	psrlq	mm5,28
+	paddq	mm4,mm3
+	movq	mm6,mm0
+	movq	mm7,mm5
+	psllq	mm6,25
+	movq	mm1,[56+esp]
+	psrlq	mm5,6
+	pxor	mm7,mm6
+	psllq	mm6,5
+	pxor	mm7,mm5
+	pxor	mm0,mm1
+	psrlq	mm5,5
+	pxor	mm7,mm6
+	pand	mm2,mm0
+	psllq	mm6,6
+	pxor	mm7,mm5
+	pxor	mm2,mm1
+	pxor	mm6,mm7
+	movq	mm5,[16+esp]
+	paddq	mm2,mm6
+	movq	mm6,[24+esp]
+	movq	mm1,mm4
+	movq	mm7,[edx-40]
+	pxor	mm5,mm6
+	psrlq	mm1,14
+	movq	[8+esp],mm4
+	pand	mm5,mm4
+	psllq	mm4,23
+	paddq	mm2,mm3
+	movq	mm3,mm1
+	psrlq	mm1,4
+	pxor	mm5,mm6
+	pxor	mm3,mm4
+	psllq	mm4,23
+	pxor	mm3,mm1
+	movq	[40+esp],mm2
+	paddq	mm7,mm5
+	pxor	mm3,mm4
+	psrlq	mm1,23
+	paddq	mm7,[32+esp]
+	pxor	mm3,mm1
+	psllq	mm4,4
+	pxor	mm3,mm4
+	movq	mm4,[esp]
+	paddq	mm3,mm7
+	movq	mm5,mm2
+	psrlq	mm5,28
+	paddq	mm4,mm3
+	movq	mm6,mm2
+	movq	mm7,mm5
+	psllq	mm6,25
+	movq	mm1,[48+esp]
+	psrlq	mm5,6
+	pxor	mm7,mm6
+	psllq	mm6,5
+	pxor	mm7,mm5
+	pxor	mm2,mm1
+	psrlq	mm5,5
+	pxor	mm7,mm6
+	pand	mm0,mm2
+	psllq	mm6,6
+	pxor	mm7,mm5
+	pxor	mm0,mm1
+	pxor	mm6,mm7
+	movq	mm5,[8+esp]
+	paddq	mm0,mm6
+	movq	mm6,[16+esp]
+	movdqa	[edx-48],xmm7
+	movdqa	xmm1,xmm3
+	movdqa	xmm7,xmm0
+db	102,15,58,15,198,8
+	movdqa	[32+edx],xmm2
+db	102,15,58,15,202,8
+	movdqa	xmm2,xmm0
+	psrlq	xmm0,7
+	paddq	xmm6,xmm1
+	movdqa	xmm1,xmm2
+	psrlq	xmm2,1
+	psllq	xmm1,56
+	pxor	xmm0,xmm2
+	psrlq	xmm2,7
+	pxor	xmm0,xmm1
+	psllq	xmm1,7
+	pxor	xmm0,xmm2
+	movdqa	xmm2,xmm5
+	pxor	xmm0,xmm1
+	movdqa	xmm1,xmm5
+	psrlq	xmm2,6
+	paddq	xmm6,xmm0
+	movdqa	xmm0,xmm5
+	psrlq	xmm1,19
+	psllq	xmm0,3
+	pxor	xmm2,xmm1
+	psrlq	xmm1,42
+	pxor	xmm2,xmm0
+	psllq	xmm0,42
+	pxor	xmm2,xmm1
+	movdqa	xmm1,[edx]
+	pxor	xmm2,xmm0
+	movdqa	xmm0,[96+ebp]
+	movq	mm1,mm4
+	paddq	xmm6,xmm2
+	movq	mm7,[edx-32]
+	pxor	mm5,mm6
+	psrlq	mm1,14
+	movq	[esp],mm4
+	paddq	xmm0,xmm6
+	pand	mm5,mm4
+	psllq	mm4,23
+	paddq	mm0,mm3
+	movq	mm3,mm1
+	psrlq	mm1,4
+	pxor	mm5,mm6
+	pxor	mm3,mm4
+	psllq	mm4,23
+	pxor	mm3,mm1
+	movq	[32+esp],mm0
+	paddq	mm7,mm5
+	pxor	mm3,mm4
+	psrlq	mm1,23
+	paddq	mm7,[24+esp]
+	pxor	mm3,mm1
+	psllq	mm4,4
+	pxor	mm3,mm4
+	movq	mm4,[56+esp]
+	paddq	mm3,mm7
+	movq	mm5,mm0
+	psrlq	mm5,28
+	paddq	mm4,mm3
+	movq	mm6,mm0
+	movq	mm7,mm5
+	psllq	mm6,25
+	movq	mm1,[40+esp]
+	psrlq	mm5,6
+	pxor	mm7,mm6
+	psllq	mm6,5
+	pxor	mm7,mm5
+	pxor	mm0,mm1
+	psrlq	mm5,5
+	pxor	mm7,mm6
+	pand	mm2,mm0
+	psllq	mm6,6
+	pxor	mm7,mm5
+	pxor	mm2,mm1
+	pxor	mm6,mm7
+	movq	mm5,[esp]
+	paddq	mm2,mm6
+	movq	mm6,[8+esp]
+	movq	mm1,mm4
+	movq	mm7,[edx-24]
+	pxor	mm5,mm6
+	psrlq	mm1,14
+	movq	[56+esp],mm4
+	pand	mm5,mm4
+	psllq	mm4,23
+	paddq	mm2,mm3
+	movq	mm3,mm1
+	psrlq	mm1,4
+	pxor	mm5,mm6
+	pxor	mm3,mm4
+	psllq	mm4,23
+	pxor	mm3,mm1
+	movq	[24+esp],mm2
+	paddq	mm7,mm5
+	pxor	mm3,mm4
+	psrlq	mm1,23
+	paddq	mm7,[16+esp]
+	pxor	mm3,mm1
+	psllq	mm4,4
+	pxor	mm3,mm4
+	movq	mm4,[48+esp]
+	paddq	mm3,mm7
+	movq	mm5,mm2
+	psrlq	mm5,28
+	paddq	mm4,mm3
+	movq	mm6,mm2
+	movq	mm7,mm5
+	psllq	mm6,25
+	movq	mm1,[32+esp]
+	psrlq	mm5,6
+	pxor	mm7,mm6
+	psllq	mm6,5
+	pxor	mm7,mm5
+	pxor	mm2,mm1
+	psrlq	mm5,5
+	pxor	mm7,mm6
+	pand	mm0,mm2
+	psllq	mm6,6
+	pxor	mm7,mm5
+	pxor	mm0,mm1
+	pxor	mm6,mm7
+	movq	mm5,[56+esp]
+	paddq	mm0,mm6
+	movq	mm6,[esp]
+	movdqa	[edx-32],xmm0
+	movdqa	xmm2,xmm4
+	movdqa	xmm0,xmm1
+db	102,15,58,15,207,8
+	movdqa	[48+edx],xmm3
+db	102,15,58,15,211,8
+	movdqa	xmm3,xmm1
+	psrlq	xmm1,7
+	paddq	xmm7,xmm2
+	movdqa	xmm2,xmm3
+	psrlq	xmm3,1
+	psllq	xmm2,56
+	pxor	xmm1,xmm3
+	psrlq	xmm3,7
+	pxor	xmm1,xmm2
+	psllq	xmm2,7
+	pxor	xmm1,xmm3
+	movdqa	xmm3,xmm6
+	pxor	xmm1,xmm2
+	movdqa	xmm2,xmm6
+	psrlq	xmm3,6
+	paddq	xmm7,xmm1
+	movdqa	xmm1,xmm6
+	psrlq	xmm2,19
+	psllq	xmm1,3
+	pxor	xmm3,xmm2
+	psrlq	xmm2,42
+	pxor	xmm3,xmm1
+	psllq	xmm1,42
+	pxor	xmm3,xmm2
+	movdqa	xmm2,[16+edx]
+	pxor	xmm3,xmm1
+	movdqa	xmm1,[112+ebp]
+	movq	mm1,mm4
+	paddq	xmm7,xmm3
+	movq	mm7,[edx-16]
+	pxor	mm5,mm6
+	psrlq	mm1,14
+	movq	[48+esp],mm4
+	paddq	xmm1,xmm7
+	pand	mm5,mm4
+	psllq	mm4,23
+	paddq	mm0,mm3
+	movq	mm3,mm1
+	psrlq	mm1,4
+	pxor	mm5,mm6
+	pxor	mm3,mm4
+	psllq	mm4,23
+	pxor	mm3,mm1
+	movq	[16+esp],mm0
+	paddq	mm7,mm5
+	pxor	mm3,mm4
+	psrlq	mm1,23
+	paddq	mm7,[8+esp]
+	pxor	mm3,mm1
+	psllq	mm4,4
+	pxor	mm3,mm4
+	movq	mm4,[40+esp]
+	paddq	mm3,mm7
+	movq	mm5,mm0
+	psrlq	mm5,28
+	paddq	mm4,mm3
+	movq	mm6,mm0
+	movq	mm7,mm5
+	psllq	mm6,25
+	movq	mm1,[24+esp]
+	psrlq	mm5,6
+	pxor	mm7,mm6
+	psllq	mm6,5
+	pxor	mm7,mm5
+	pxor	mm0,mm1
+	psrlq	mm5,5
+	pxor	mm7,mm6
+	pand	mm2,mm0
+	psllq	mm6,6
+	pxor	mm7,mm5
+	pxor	mm2,mm1
+	pxor	mm6,mm7
+	movq	mm5,[48+esp]
+	paddq	mm2,mm6
+	movq	mm6,[56+esp]
+	movq	mm1,mm4
+	movq	mm7,[edx-8]
+	pxor	mm5,mm6
+	psrlq	mm1,14
+	movq	[40+esp],mm4
+	pand	mm5,mm4
+	psllq	mm4,23
+	paddq	mm2,mm3
+	movq	mm3,mm1
+	psrlq	mm1,4
+	pxor	mm5,mm6
+	pxor	mm3,mm4
+	psllq	mm4,23
+	pxor	mm3,mm1
+	movq	[8+esp],mm2
+	paddq	mm7,mm5
+	pxor	mm3,mm4
+	psrlq	mm1,23
+	paddq	mm7,[esp]
+	pxor	mm3,mm1
+	psllq	mm4,4
+	pxor	mm3,mm4
+	movq	mm4,[32+esp]
+	paddq	mm3,mm7
+	movq	mm5,mm2
+	psrlq	mm5,28
+	paddq	mm4,mm3
+	movq	mm6,mm2
+	movq	mm7,mm5
+	psllq	mm6,25
+	movq	mm1,[16+esp]
+	psrlq	mm5,6
+	pxor	mm7,mm6
+	psllq	mm6,5
+	pxor	mm7,mm5
+	pxor	mm2,mm1
+	psrlq	mm5,5
+	pxor	mm7,mm6
+	pand	mm0,mm2
+	psllq	mm6,6
+	pxor	mm7,mm5
+	pxor	mm0,mm1
+	pxor	mm6,mm7
+	movq	mm5,[40+esp]
+	paddq	mm0,mm6
+	movq	mm6,[48+esp]
+	movdqa	[edx-16],xmm1
+	lea	ebp,[128+ebp]
+	dec	ecx
+	jnz	NEAR L$00800_47_ssse3
+	movdqa	xmm1,[ebp]
+	lea	ebp,[ebp-640]
+	movdqu	xmm0,[ebx]
+db	102,15,56,0,193
+	movdqa	xmm3,[ebp]
+	movdqa	xmm2,xmm1
+	movdqu	xmm1,[16+ebx]
+	paddq	xmm3,xmm0
+db	102,15,56,0,202
+	movq	mm1,mm4
+	movq	mm7,[edx-128]
+	pxor	mm5,mm6
+	psrlq	mm1,14
+	movq	[32+esp],mm4
+	pand	mm5,mm4
+	psllq	mm4,23
+	paddq	mm0,mm3
+	movq	mm3,mm1
+	psrlq	mm1,4
+	pxor	mm5,mm6
+	pxor	mm3,mm4
+	psllq	mm4,23
+	pxor	mm3,mm1
+	movq	[esp],mm0
+	paddq	mm7,mm5
+	pxor	mm3,mm4
+	psrlq	mm1,23
+	paddq	mm7,[56+esp]
+	pxor	mm3,mm1
+	psllq	mm4,4
+	pxor	mm3,mm4
+	movq	mm4,[24+esp]
+	paddq	mm3,mm7
+	movq	mm5,mm0
+	psrlq	mm5,28
+	paddq	mm4,mm3
+	movq	mm6,mm0
+	movq	mm7,mm5
+	psllq	mm6,25
+	movq	mm1,[8+esp]
+	psrlq	mm5,6
+	pxor	mm7,mm6
+	psllq	mm6,5
+	pxor	mm7,mm5
+	pxor	mm0,mm1
+	psrlq	mm5,5
+	pxor	mm7,mm6
+	pand	mm2,mm0
+	psllq	mm6,6
+	pxor	mm7,mm5
+	pxor	mm2,mm1
+	pxor	mm6,mm7
+	movq	mm5,[32+esp]
+	paddq	mm2,mm6
+	movq	mm6,[40+esp]
+	movq	mm1,mm4
+	movq	mm7,[edx-120]
+	pxor	mm5,mm6
+	psrlq	mm1,14
+	movq	[24+esp],mm4
+	pand	mm5,mm4
+	psllq	mm4,23
+	paddq	mm2,mm3
+	movq	mm3,mm1
+	psrlq	mm1,4
+	pxor	mm5,mm6
+	pxor	mm3,mm4
+	psllq	mm4,23
+	pxor	mm3,mm1
+	movq	[56+esp],mm2
+	paddq	mm7,mm5
+	pxor	mm3,mm4
+	psrlq	mm1,23
+	paddq	mm7,[48+esp]
+	pxor	mm3,mm1
+	psllq	mm4,4
+	pxor	mm3,mm4
+	movq	mm4,[16+esp]
+	paddq	mm3,mm7
+	movq	mm5,mm2
+	psrlq	mm5,28
+	paddq	mm4,mm3
+	movq	mm6,mm2
+	movq	mm7,mm5
+	psllq	mm6,25
+	movq	mm1,[esp]
+	psrlq	mm5,6
+	pxor	mm7,mm6
+	psllq	mm6,5
+	pxor	mm7,mm5
+	pxor	mm2,mm1
+	psrlq	mm5,5
+	pxor	mm7,mm6
+	pand	mm0,mm2
+	psllq	mm6,6
+	pxor	mm7,mm5
+	pxor	mm0,mm1
+	pxor	mm6,mm7
+	movq	mm5,[24+esp]
+	paddq	mm0,mm6
+	movq	mm6,[32+esp]
+	movdqa	[edx-128],xmm3
+	movdqa	xmm4,[16+ebp]
+	movdqa	xmm3,xmm2
+	movdqu	xmm2,[32+ebx]
+	paddq	xmm4,xmm1
+db	102,15,56,0,211
+	movq	mm1,mm4
+	movq	mm7,[edx-112]
+	pxor	mm5,mm6
+	psrlq	mm1,14
+	movq	[16+esp],mm4
+	pand	mm5,mm4
+	psllq	mm4,23
+	paddq	mm0,mm3
+	movq	mm3,mm1
+	psrlq	mm1,4
+	pxor	mm5,mm6
+	pxor	mm3,mm4
+	psllq	mm4,23
+	pxor	mm3,mm1
+	movq	[48+esp],mm0
+	paddq	mm7,mm5
+	pxor	mm3,mm4
+	psrlq	mm1,23
+	paddq	mm7,[40+esp]
+	pxor	mm3,mm1
+	psllq	mm4,4
+	pxor	mm3,mm4
+	movq	mm4,[8+esp]
+	paddq	mm3,mm7
+	movq	mm5,mm0
+	psrlq	mm5,28
+	paddq	mm4,mm3
+	movq	mm6,mm0
+	movq	mm7,mm5
+	psllq	mm6,25
+	movq	mm1,[56+esp]
+	psrlq	mm5,6
+	pxor	mm7,mm6
+	psllq	mm6,5
+	pxor	mm7,mm5
+	pxor	mm0,mm1
+	psrlq	mm5,5
+	pxor	mm7,mm6
+	pand	mm2,mm0
+	psllq	mm6,6
+	pxor	mm7,mm5
+	pxor	mm2,mm1
+	pxor	mm6,mm7
+	movq	mm5,[16+esp]
+	paddq	mm2,mm6
+	movq	mm6,[24+esp]
+	movq	mm1,mm4
+	movq	mm7,[edx-104]
+	pxor	mm5,mm6
+	psrlq	mm1,14
+	movq	[8+esp],mm4
+	pand	mm5,mm4
+	psllq	mm4,23
+	paddq	mm2,mm3
+	movq	mm3,mm1
+	psrlq	mm1,4
+	pxor	mm5,mm6
+	pxor	mm3,mm4
+	psllq	mm4,23
+	pxor	mm3,mm1
+	movq	[40+esp],mm2
+	paddq	mm7,mm5
+	pxor	mm3,mm4
+	psrlq	mm1,23
+	paddq	mm7,[32+esp]
+	pxor	mm3,mm1
+	psllq	mm4,4
+	pxor	mm3,mm4
+	movq	mm4,[esp]
+	paddq	mm3,mm7
+	movq	mm5,mm2
+	psrlq	mm5,28
+	paddq	mm4,mm3
+	movq	mm6,mm2
+	movq	mm7,mm5
+	psllq	mm6,25
+	movq	mm1,[48+esp]
+	psrlq	mm5,6
+	pxor	mm7,mm6
+	psllq	mm6,5
+	pxor	mm7,mm5
+	pxor	mm2,mm1
+	psrlq	mm5,5
+	pxor	mm7,mm6
+	pand	mm0,mm2
+	psllq	mm6,6
+	pxor	mm7,mm5
+	pxor	mm0,mm1
+	pxor	mm6,mm7
+	movq	mm5,[8+esp]
+	paddq	mm0,mm6
+	movq	mm6,[16+esp]
+	movdqa	[edx-112],xmm4
+	movdqa	xmm5,[32+ebp]
+	movdqa	xmm4,xmm3
+	movdqu	xmm3,[48+ebx]
+	paddq	xmm5,xmm2
+db	102,15,56,0,220
+	movq	mm1,mm4
+	movq	mm7,[edx-96]
+	pxor	mm5,mm6
+	psrlq	mm1,14
+	movq	[esp],mm4
+	pand	mm5,mm4
+	psllq	mm4,23
+	paddq	mm0,mm3
+	movq	mm3,mm1
+	psrlq	mm1,4
+	pxor	mm5,mm6
+	pxor	mm3,mm4
+	psllq	mm4,23
+	pxor	mm3,mm1
+	movq	[32+esp],mm0
+	paddq	mm7,mm5
+	pxor	mm3,mm4
+	psrlq	mm1,23
+	paddq	mm7,[24+esp]
+	pxor	mm3,mm1
+	psllq	mm4,4
+	pxor	mm3,mm4
+	movq	mm4,[56+esp]
+	paddq	mm3,mm7
+	movq	mm5,mm0
+	psrlq	mm5,28
+	paddq	mm4,mm3
+	movq	mm6,mm0
+	movq	mm7,mm5
+	psllq	mm6,25
+	movq	mm1,[40+esp]
+	psrlq	mm5,6
+	pxor	mm7,mm6
+	psllq	mm6,5
+	pxor	mm7,mm5
+	pxor	mm0,mm1
+	psrlq	mm5,5
+	pxor	mm7,mm6
+	pand	mm2,mm0
+	psllq	mm6,6
+	pxor	mm7,mm5
+	pxor	mm2,mm1
+	pxor	mm6,mm7
+	movq	mm5,[esp]
+	paddq	mm2,mm6
+	movq	mm6,[8+esp]
+	movq	mm1,mm4
+	movq	mm7,[edx-88]
+	pxor	mm5,mm6
+	psrlq	mm1,14
+	movq	[56+esp],mm4
+	pand	mm5,mm4
+	psllq	mm4,23
+	paddq	mm2,mm3
+	movq	mm3,mm1
+	psrlq	mm1,4
+	pxor	mm5,mm6
+	pxor	mm3,mm4
+	psllq	mm4,23
+	pxor	mm3,mm1
+	movq	[24+esp],mm2
+	paddq	mm7,mm5
+	pxor	mm3,mm4
+	psrlq	mm1,23
+	paddq	mm7,[16+esp]
+	pxor	mm3,mm1
+	psllq	mm4,4
+	pxor	mm3,mm4
+	movq	mm4,[48+esp]
+	paddq	mm3,mm7
+	movq	mm5,mm2
+	psrlq	mm5,28
+	paddq	mm4,mm3
+	movq	mm6,mm2
+	movq	mm7,mm5
+	psllq	mm6,25
+	movq	mm1,[32+esp]
+	psrlq	mm5,6
+	pxor	mm7,mm6
+	psllq	mm6,5
+	pxor	mm7,mm5
+	pxor	mm2,mm1
+	psrlq	mm5,5
+	pxor	mm7,mm6
+	pand	mm0,mm2
+	psllq	mm6,6
+	pxor	mm7,mm5
+	pxor	mm0,mm1
+	pxor	mm6,mm7
+	movq	mm5,[56+esp]
+	paddq	mm0,mm6
+	movq	mm6,[esp]
+	movdqa	[edx-96],xmm5
+	movdqa	xmm6,[48+ebp]
+	movdqa	xmm5,xmm4
+	movdqu	xmm4,[64+ebx]
+	paddq	xmm6,xmm3
+db	102,15,56,0,229
+	movq	mm1,mm4
+	movq	mm7,[edx-80]
+	pxor	mm5,mm6
+	psrlq	mm1,14
+	movq	[48+esp],mm4
+	pand	mm5,mm4
+	psllq	mm4,23
+	paddq	mm0,mm3
+	movq	mm3,mm1
+	psrlq	mm1,4
+	pxor	mm5,mm6
+	pxor	mm3,mm4
+	psllq	mm4,23
+	pxor	mm3,mm1
+	movq	[16+esp],mm0
+	paddq	mm7,mm5
+	pxor	mm3,mm4
+	psrlq	mm1,23
+	paddq	mm7,[8+esp]
+	pxor	mm3,mm1
+	psllq	mm4,4
+	pxor	mm3,mm4
+	movq	mm4,[40+esp]
+	paddq	mm3,mm7
+	movq	mm5,mm0
+	psrlq	mm5,28
+	paddq	mm4,mm3
+	movq	mm6,mm0
+	movq	mm7,mm5
+	psllq	mm6,25
+	movq	mm1,[24+esp]
+	psrlq	mm5,6
+	pxor	mm7,mm6
+	psllq	mm6,5
+	pxor	mm7,mm5
+	pxor	mm0,mm1
+	psrlq	mm5,5
+	pxor	mm7,mm6
+	pand	mm2,mm0
+	psllq	mm6,6
+	pxor	mm7,mm5
+	pxor	mm2,mm1
+	pxor	mm6,mm7
+	movq	mm5,[48+esp]
+	paddq	mm2,mm6
+	movq	mm6,[56+esp]
+	movq	mm1,mm4
+	movq	mm7,[edx-72]
+	pxor	mm5,mm6
+	psrlq	mm1,14
+	movq	[40+esp],mm4
+	pand	mm5,mm4
+	psllq	mm4,23
+	paddq	mm2,mm3
+	movq	mm3,mm1
+	psrlq	mm1,4
+	pxor	mm5,mm6
+	pxor	mm3,mm4
+	psllq	mm4,23
+	pxor	mm3,mm1
+	movq	[8+esp],mm2
+	paddq	mm7,mm5
+	pxor	mm3,mm4
+	psrlq	mm1,23
+	paddq	mm7,[esp]
+	pxor	mm3,mm1
+	psllq	mm4,4
+	pxor	mm3,mm4
+	movq	mm4,[32+esp]
+	paddq	mm3,mm7
+	movq	mm5,mm2
+	psrlq	mm5,28
+	paddq	mm4,mm3
+	movq	mm6,mm2
+	movq	mm7,mm5
+	psllq	mm6,25
+	movq	mm1,[16+esp]
+	psrlq	mm5,6
+	pxor	mm7,mm6
+	psllq	mm6,5
+	pxor	mm7,mm5
+	pxor	mm2,mm1
+	psrlq	mm5,5
+	pxor	mm7,mm6
+	pand	mm0,mm2
+	psllq	mm6,6
+	pxor	mm7,mm5
+	pxor	mm0,mm1
+	pxor	mm6,mm7
+	movq	mm5,[40+esp]
+	paddq	mm0,mm6
+	movq	mm6,[48+esp]
+	movdqa	[edx-80],xmm6
+	movdqa	xmm7,[64+ebp]
+	movdqa	xmm6,xmm5
+	movdqu	xmm5,[80+ebx]
+	paddq	xmm7,xmm4
+db	102,15,56,0,238
+	movq	mm1,mm4
+	movq	mm7,[edx-64]
+	pxor	mm5,mm6
+	psrlq	mm1,14
+	movq	[32+esp],mm4
+	pand	mm5,mm4
+	psllq	mm4,23
+	paddq	mm0,mm3
+	movq	mm3,mm1
+	psrlq	mm1,4
+	pxor	mm5,mm6
+	pxor	mm3,mm4
+	psllq	mm4,23
+	pxor	mm3,mm1
+	movq	[esp],mm0
+	paddq	mm7,mm5
+	pxor	mm3,mm4
+	psrlq	mm1,23
+	paddq	mm7,[56+esp]
+	pxor	mm3,mm1
+	psllq	mm4,4
+	pxor	mm3,mm4
+	movq	mm4,[24+esp]
+	paddq	mm3,mm7
+	movq	mm5,mm0
+	psrlq	mm5,28
+	paddq	mm4,mm3
+	movq	mm6,mm0
+	movq	mm7,mm5
+	psllq	mm6,25
+	movq	mm1,[8+esp]
+	psrlq	mm5,6
+	pxor	mm7,mm6
+	psllq	mm6,5
+	pxor	mm7,mm5
+	pxor	mm0,mm1
+	psrlq	mm5,5
+	pxor	mm7,mm6
+	pand	mm2,mm0
+	psllq	mm6,6
+	pxor	mm7,mm5
+	pxor	mm2,mm1
+	pxor	mm6,mm7
+	movq	mm5,[32+esp]
+	paddq	mm2,mm6
+	movq	mm6,[40+esp]
+	movq	mm1,mm4
+	movq	mm7,[edx-56]
+	pxor	mm5,mm6
+	psrlq	mm1,14
+	movq	[24+esp],mm4
+	pand	mm5,mm4
+	psllq	mm4,23
+	paddq	mm2,mm3
+	movq	mm3,mm1
+	psrlq	mm1,4
+	pxor	mm5,mm6
+	pxor	mm3,mm4
+	psllq	mm4,23
+	pxor	mm3,mm1
+	movq	[56+esp],mm2
+	paddq	mm7,mm5
+	pxor	mm3,mm4
+	psrlq	mm1,23
+	paddq	mm7,[48+esp]
+	pxor	mm3,mm1
+	psllq	mm4,4
+	pxor	mm3,mm4
+	movq	mm4,[16+esp]
+	paddq	mm3,mm7
+	movq	mm5,mm2
+	psrlq	mm5,28
+	paddq	mm4,mm3
+	movq	mm6,mm2
+	movq	mm7,mm5
+	psllq	mm6,25
+	movq	mm1,[esp]
+	psrlq	mm5,6
+	pxor	mm7,mm6
+	psllq	mm6,5
+	pxor	mm7,mm5
+	pxor	mm2,mm1
+	psrlq	mm5,5
+	pxor	mm7,mm6
+	pand	mm0,mm2
+	psllq	mm6,6
+	pxor	mm7,mm5
+	pxor	mm0,mm1
+	pxor	mm6,mm7
+	movq	mm5,[24+esp]
+	paddq	mm0,mm6
+	movq	mm6,[32+esp]
+	movdqa	[edx-64],xmm7
+	movdqa	[edx],xmm0
+	movdqa	xmm0,[80+ebp]
+	movdqa	xmm7,xmm6
+	movdqu	xmm6,[96+ebx]
+	paddq	xmm0,xmm5
+db	102,15,56,0,247
+	movq	mm1,mm4
+	movq	mm7,[edx-48]
+	pxor	mm5,mm6
+	psrlq	mm1,14
+	movq	[16+esp],mm4
+	pand	mm5,mm4
+	psllq	mm4,23
+	paddq	mm0,mm3
+	movq	mm3,mm1
+	psrlq	mm1,4
+	pxor	mm5,mm6
+	pxor	mm3,mm4
+	psllq	mm4,23
+	pxor	mm3,mm1
+	movq	[48+esp],mm0
+	paddq	mm7,mm5
+	pxor	mm3,mm4
+	psrlq	mm1,23
+	paddq	mm7,[40+esp]
+	pxor	mm3,mm1
+	psllq	mm4,4
+	pxor	mm3,mm4
+	movq	mm4,[8+esp]
+	paddq	mm3,mm7
+	movq	mm5,mm0
+	psrlq	mm5,28
+	paddq	mm4,mm3
+	movq	mm6,mm0
+	movq	mm7,mm5
+	psllq	mm6,25
+	movq	mm1,[56+esp]
+	psrlq	mm5,6
+	pxor	mm7,mm6
+	psllq	mm6,5
+	pxor	mm7,mm5
+	pxor	mm0,mm1
+	psrlq	mm5,5
+	pxor	mm7,mm6
+	pand	mm2,mm0
+	psllq	mm6,6
+	pxor	mm7,mm5
+	pxor	mm2,mm1
+	pxor	mm6,mm7
+	movq	mm5,[16+esp]
+	paddq	mm2,mm6
+	movq	mm6,[24+esp]
+	movq	mm1,mm4
+	movq	mm7,[edx-40]
+	pxor	mm5,mm6
+	psrlq	mm1,14
+	movq	[8+esp],mm4
+	pand	mm5,mm4
+	psllq	mm4,23
+	paddq	mm2,mm3
+	movq	mm3,mm1
+	psrlq	mm1,4
+	pxor	mm5,mm6
+	pxor	mm3,mm4
+	psllq	mm4,23
+	pxor	mm3,mm1
+	movq	[40+esp],mm2
+	paddq	mm7,mm5
+	pxor	mm3,mm4
+	psrlq	mm1,23
+	paddq	mm7,[32+esp]
+	pxor	mm3,mm1
+	psllq	mm4,4
+	pxor	mm3,mm4
+	movq	mm4,[esp]
+	paddq	mm3,mm7
+	movq	mm5,mm2
+	psrlq	mm5,28
+	paddq	mm4,mm3
+	movq	mm6,mm2
+	movq	mm7,mm5
+	psllq	mm6,25
+	movq	mm1,[48+esp]
+	psrlq	mm5,6
+	pxor	mm7,mm6
+	psllq	mm6,5
+	pxor	mm7,mm5
+	pxor	mm2,mm1
+	psrlq	mm5,5
+	pxor	mm7,mm6
+	pand	mm0,mm2
+	psllq	mm6,6
+	pxor	mm7,mm5
+	pxor	mm0,mm1
+	pxor	mm6,mm7
+	movq	mm5,[8+esp]
+	paddq	mm0,mm6
+	movq	mm6,[16+esp]
+	movdqa	[edx-48],xmm0
+	movdqa	[16+edx],xmm1
+	movdqa	xmm1,[96+ebp]
+	movdqa	xmm0,xmm7
+	movdqu	xmm7,[112+ebx]
+	paddq	xmm1,xmm6
+db	102,15,56,0,248
+	movq	mm1,mm4
+	movq	mm7,[edx-32]
+	pxor	mm5,mm6
+	psrlq	mm1,14
+	movq	[esp],mm4
+	pand	mm5,mm4
+	psllq	mm4,23
+	paddq	mm0,mm3
+	movq	mm3,mm1
+	psrlq	mm1,4
+	pxor	mm5,mm6
+	pxor	mm3,mm4
+	psllq	mm4,23
+	pxor	mm3,mm1
+	movq	[32+esp],mm0
+	paddq	mm7,mm5
+	pxor	mm3,mm4
+	psrlq	mm1,23
+	paddq	mm7,[24+esp]
+	pxor	mm3,mm1
+	psllq	mm4,4
+	pxor	mm3,mm4
+	movq	mm4,[56+esp]
+	paddq	mm3,mm7
+	movq	mm5,mm0
+	psrlq	mm5,28
+	paddq	mm4,mm3
+	movq	mm6,mm0
+	movq	mm7,mm5
+	psllq	mm6,25
+	movq	mm1,[40+esp]
+	psrlq	mm5,6
+	pxor	mm7,mm6
+	psllq	mm6,5
+	pxor	mm7,mm5
+	pxor	mm0,mm1
+	psrlq	mm5,5
+	pxor	mm7,mm6
+	pand	mm2,mm0
+	psllq	mm6,6
+	pxor	mm7,mm5
+	pxor	mm2,mm1
+	pxor	mm6,mm7
+	movq	mm5,[esp]
+	paddq	mm2,mm6
+	movq	mm6,[8+esp]
+	movq	mm1,mm4
+	movq	mm7,[edx-24]
+	pxor	mm5,mm6
+	psrlq	mm1,14
+	movq	[56+esp],mm4
+	pand	mm5,mm4
+	psllq	mm4,23
+	paddq	mm2,mm3
+	movq	mm3,mm1
+	psrlq	mm1,4
+	pxor	mm5,mm6
+	pxor	mm3,mm4
+	psllq	mm4,23
+	pxor	mm3,mm1
+	movq	[24+esp],mm2
+	paddq	mm7,mm5
+	pxor	mm3,mm4
+	psrlq	mm1,23
+	paddq	mm7,[16+esp]
+	pxor	mm3,mm1
+	psllq	mm4,4
+	pxor	mm3,mm4
+	movq	mm4,[48+esp]
+	paddq	mm3,mm7
+	movq	mm5,mm2
+	psrlq	mm5,28
+	paddq	mm4,mm3
+	movq	mm6,mm2
+	movq	mm7,mm5
+	psllq	mm6,25
+	movq	mm1,[32+esp]
+	psrlq	mm5,6
+	pxor	mm7,mm6
+	psllq	mm6,5
+	pxor	mm7,mm5
+	pxor	mm2,mm1
+	psrlq	mm5,5
+	pxor	mm7,mm6
+	pand	mm0,mm2
+	psllq	mm6,6
+	pxor	mm7,mm5
+	pxor	mm0,mm1
+	pxor	mm6,mm7
+	movq	mm5,[56+esp]
+	paddq	mm0,mm6
+	movq	mm6,[esp]
+	movdqa	[edx-32],xmm1
+	movdqa	[32+edx],xmm2
+	movdqa	xmm2,[112+ebp]
+	movdqa	xmm0,[edx]
+	paddq	xmm2,xmm7
+	movq	mm1,mm4
+	movq	mm7,[edx-16]
+	pxor	mm5,mm6
+	psrlq	mm1,14
+	movq	[48+esp],mm4
+	pand	mm5,mm4
+	psllq	mm4,23
+	paddq	mm0,mm3
+	movq	mm3,mm1
+	psrlq	mm1,4
+	pxor	mm5,mm6
+	pxor	mm3,mm4
+	psllq	mm4,23
+	pxor	mm3,mm1
+	movq	[16+esp],mm0
+	paddq	mm7,mm5
+	pxor	mm3,mm4
+	psrlq	mm1,23
+	paddq	mm7,[8+esp]
+	pxor	mm3,mm1
+	psllq	mm4,4
+	pxor	mm3,mm4
+	movq	mm4,[40+esp]
+	paddq	mm3,mm7
+	movq	mm5,mm0
+	psrlq	mm5,28
+	paddq	mm4,mm3
+	movq	mm6,mm0
+	movq	mm7,mm5
+	psllq	mm6,25
+	movq	mm1,[24+esp]
+	psrlq	mm5,6
+	pxor	mm7,mm6
+	psllq	mm6,5
+	pxor	mm7,mm5
+	pxor	mm0,mm1
+	psrlq	mm5,5
+	pxor	mm7,mm6
+	pand	mm2,mm0
+	psllq	mm6,6
+	pxor	mm7,mm5
+	pxor	mm2,mm1
+	pxor	mm6,mm7
+	movq	mm5,[48+esp]
+	paddq	mm2,mm6
+	movq	mm6,[56+esp]
+	movq	mm1,mm4
+	movq	mm7,[edx-8]
+	pxor	mm5,mm6
+	psrlq	mm1,14
+	movq	[40+esp],mm4
+	pand	mm5,mm4
+	psllq	mm4,23
+	paddq	mm2,mm3
+	movq	mm3,mm1
+	psrlq	mm1,4
+	pxor	mm5,mm6
+	pxor	mm3,mm4
+	psllq	mm4,23
+	pxor	mm3,mm1
+	movq	[8+esp],mm2
+	paddq	mm7,mm5
+	pxor	mm3,mm4
+	psrlq	mm1,23
+	paddq	mm7,[esp]
+	pxor	mm3,mm1
+	psllq	mm4,4
+	pxor	mm3,mm4
+	movq	mm4,[32+esp]
+	paddq	mm3,mm7
+	movq	mm5,mm2
+	psrlq	mm5,28
+	paddq	mm4,mm3
+	movq	mm6,mm2
+	movq	mm7,mm5
+	psllq	mm6,25
+	movq	mm1,[16+esp]
+	psrlq	mm5,6
+	pxor	mm7,mm6
+	psllq	mm6,5
+	pxor	mm7,mm5
+	pxor	mm2,mm1
+	psrlq	mm5,5
+	pxor	mm7,mm6
+	pand	mm0,mm2
+	psllq	mm6,6
+	pxor	mm7,mm5
+	pxor	mm0,mm1
+	pxor	mm6,mm7
+	movq	mm5,[40+esp]
+	paddq	mm0,mm6
+	movq	mm6,[48+esp]
+	movdqa	[edx-16],xmm2
+	movq	mm1,[8+esp]
+	paddq	mm0,mm3
+	movq	mm3,[24+esp]
+	movq	mm7,[56+esp]
+	pxor	mm2,mm1
+	paddq	mm0,[esi]
+	paddq	mm1,[8+esi]
+	paddq	mm2,[16+esi]
+	paddq	mm3,[24+esi]
+	paddq	mm4,[32+esi]
+	paddq	mm5,[40+esi]
+	paddq	mm6,[48+esi]
+	paddq	mm7,[56+esi]
+	movq	[esi],mm0
+	movq	[8+esi],mm1
+	movq	[16+esi],mm2
+	movq	[24+esi],mm3
+	movq	[32+esi],mm4
+	movq	[40+esi],mm5
+	movq	[48+esi],mm6
+	movq	[56+esi],mm7
+	cmp	edi,eax
+	jb	NEAR L$007loop_ssse3
+	mov	esp,DWORD [76+edx]
+	emms
+	pop	edi
+	pop	esi
+	pop	ebx
+	pop	ebp
+	ret
+align	16
+L$002loop_x86:
+	mov	eax,DWORD [edi]
+	mov	ebx,DWORD [4+edi]
+	mov	ecx,DWORD [8+edi]
+	mov	edx,DWORD [12+edi]
+	bswap	eax
+	bswap	ebx
+	bswap	ecx
+	bswap	edx
+	push	eax
+	push	ebx
+	push	ecx
+	push	edx
+	mov	eax,DWORD [16+edi]
+	mov	ebx,DWORD [20+edi]
+	mov	ecx,DWORD [24+edi]
+	mov	edx,DWORD [28+edi]
+	bswap	eax
+	bswap	ebx
+	bswap	ecx
+	bswap	edx
+	push	eax
+	push	ebx
+	push	ecx
+	push	edx
+	mov	eax,DWORD [32+edi]
+	mov	ebx,DWORD [36+edi]
+	mov	ecx,DWORD [40+edi]
+	mov	edx,DWORD [44+edi]
+	bswap	eax
+	bswap	ebx
+	bswap	ecx
+	bswap	edx
+	push	eax
+	push	ebx
+	push	ecx
+	push	edx
+	mov	eax,DWORD [48+edi]
+	mov	ebx,DWORD [52+edi]
+	mov	ecx,DWORD [56+edi]
+	mov	edx,DWORD [60+edi]
+	bswap	eax
+	bswap	ebx
+	bswap	ecx
+	bswap	edx
+	push	eax
+	push	ebx
+	push	ecx
+	push	edx
+	mov	eax,DWORD [64+edi]
+	mov	ebx,DWORD [68+edi]
+	mov	ecx,DWORD [72+edi]
+	mov	edx,DWORD [76+edi]
+	bswap	eax
+	bswap	ebx
+	bswap	ecx
+	bswap	edx
+	push	eax
+	push	ebx
+	push	ecx
+	push	edx
+	mov	eax,DWORD [80+edi]
+	mov	ebx,DWORD [84+edi]
+	mov	ecx,DWORD [88+edi]
+	mov	edx,DWORD [92+edi]
+	bswap	eax
+	bswap	ebx
+	bswap	ecx
+	bswap	edx
+	push	eax
+	push	ebx
+	push	ecx
+	push	edx
+	mov	eax,DWORD [96+edi]
+	mov	ebx,DWORD [100+edi]
+	mov	ecx,DWORD [104+edi]
+	mov	edx,DWORD [108+edi]
+	bswap	eax
+	bswap	ebx
+	bswap	ecx
+	bswap	edx
+	push	eax
+	push	ebx
+	push	ecx
+	push	edx
+	mov	eax,DWORD [112+edi]
+	mov	ebx,DWORD [116+edi]
+	mov	ecx,DWORD [120+edi]
+	mov	edx,DWORD [124+edi]
+	bswap	eax
+	bswap	ebx
+	bswap	ecx
+	bswap	edx
+	push	eax
+	push	ebx
+	push	ecx
+	push	edx
+	add	edi,128
+	sub	esp,72
+	mov	DWORD [204+esp],edi
+	lea	edi,[8+esp]
+	mov	ecx,16
+dd	2784229001
+align	16
+L$00900_15_x86:
+	mov	ecx,DWORD [40+esp]
+	mov	edx,DWORD [44+esp]
+	mov	esi,ecx
+	shr	ecx,9
+	mov	edi,edx
+	shr	edx,9
+	mov	ebx,ecx
+	shl	esi,14
+	mov	eax,edx
+	shl	edi,14
+	xor	ebx,esi
+	shr	ecx,5
+	xor	eax,edi
+	shr	edx,5
+	xor	eax,ecx
+	shl	esi,4
+	xor	ebx,edx
+	shl	edi,4
+	xor	ebx,esi
+	shr	ecx,4
+	xor	eax,edi
+	shr	edx,4
+	xor	eax,ecx
+	shl	esi,5
+	xor	ebx,edx
+	shl	edi,5
+	xor	eax,esi
+	xor	ebx,edi
+	mov	ecx,DWORD [48+esp]
+	mov	edx,DWORD [52+esp]
+	mov	esi,DWORD [56+esp]
+	mov	edi,DWORD [60+esp]
+	add	eax,DWORD [64+esp]
+	adc	ebx,DWORD [68+esp]
+	xor	ecx,esi
+	xor	edx,edi
+	and	ecx,DWORD [40+esp]
+	and	edx,DWORD [44+esp]
+	add	eax,DWORD [192+esp]
+	adc	ebx,DWORD [196+esp]
+	xor	ecx,esi
+	xor	edx,edi
+	mov	esi,DWORD [ebp]
+	mov	edi,DWORD [4+ebp]
+	add	eax,ecx
+	adc	ebx,edx
+	mov	ecx,DWORD [32+esp]
+	mov	edx,DWORD [36+esp]
+	add	eax,esi
+	adc	ebx,edi
+	mov	DWORD [esp],eax
+	mov	DWORD [4+esp],ebx
+	add	eax,ecx
+	adc	ebx,edx
+	mov	ecx,DWORD [8+esp]
+	mov	edx,DWORD [12+esp]
+	mov	DWORD [32+esp],eax
+	mov	DWORD [36+esp],ebx
+	mov	esi,ecx
+	shr	ecx,2
+	mov	edi,edx
+	shr	edx,2
+	mov	ebx,ecx
+	shl	esi,4
+	mov	eax,edx
+	shl	edi,4
+	xor	ebx,esi
+	shr	ecx,5
+	xor	eax,edi
+	shr	edx,5
+	xor	ebx,ecx
+	shl	esi,21
+	xor	eax,edx
+	shl	edi,21
+	xor	eax,esi
+	shr	ecx,21
+	xor	ebx,edi
+	shr	edx,21
+	xor	eax,ecx
+	shl	esi,5
+	xor	ebx,edx
+	shl	edi,5
+	xor	eax,esi
+	xor	ebx,edi
+	mov	ecx,DWORD [8+esp]
+	mov	edx,DWORD [12+esp]
+	mov	esi,DWORD [16+esp]
+	mov	edi,DWORD [20+esp]
+	add	eax,DWORD [esp]
+	adc	ebx,DWORD [4+esp]
+	or	ecx,esi
+	or	edx,edi
+	and	ecx,DWORD [24+esp]
+	and	edx,DWORD [28+esp]
+	and	esi,DWORD [8+esp]
+	and	edi,DWORD [12+esp]
+	or	ecx,esi
+	or	edx,edi
+	add	eax,ecx
+	adc	ebx,edx
+	mov	DWORD [esp],eax
+	mov	DWORD [4+esp],ebx
+	mov	dl,BYTE [ebp]
+	sub	esp,8
+	lea	ebp,[8+ebp]
+	cmp	dl,148
+	jne	NEAR L$00900_15_x86
+align	16
+L$01016_79_x86:
+	mov	ecx,DWORD [312+esp]
+	mov	edx,DWORD [316+esp]
+	mov	esi,ecx
+	shr	ecx,1
+	mov	edi,edx
+	shr	edx,1
+	mov	eax,ecx
+	shl	esi,24
+	mov	ebx,edx
+	shl	edi,24
+	xor	ebx,esi
+	shr	ecx,6
+	xor	eax,edi
+	shr	edx,6
+	xor	eax,ecx
+	shl	esi,7
+	xor	ebx,edx
+	shl	edi,1
+	xor	ebx,esi
+	shr	ecx,1
+	xor	eax,edi
+	shr	edx,1
+	xor	eax,ecx
+	shl	edi,6
+	xor	ebx,edx
+	xor	eax,edi
+	mov	DWORD [esp],eax
+	mov	DWORD [4+esp],ebx
+	mov	ecx,DWORD [208+esp]
+	mov	edx,DWORD [212+esp]
+	mov	esi,ecx
+	shr	ecx,6
+	mov	edi,edx
+	shr	edx,6
+	mov	eax,ecx
+	shl	esi,3
+	mov	ebx,edx
+	shl	edi,3
+	xor	eax,esi
+	shr	ecx,13
+	xor	ebx,edi
+	shr	edx,13
+	xor	eax,ecx
+	shl	esi,10
+	xor	ebx,edx
+	shl	edi,10
+	xor	ebx,esi
+	shr	ecx,10
+	xor	eax,edi
+	shr	edx,10
+	xor	ebx,ecx
+	shl	edi,13
+	xor	eax,edx
+	xor	eax,edi
+	mov	ecx,DWORD [320+esp]
+	mov	edx,DWORD [324+esp]
+	add	eax,DWORD [esp]
+	adc	ebx,DWORD [4+esp]
+	mov	esi,DWORD [248+esp]
+	mov	edi,DWORD [252+esp]
+	add	eax,ecx
+	adc	ebx,edx
+	add	eax,esi
+	adc	ebx,edi
+	mov	DWORD [192+esp],eax
+	mov	DWORD [196+esp],ebx
+	mov	ecx,DWORD [40+esp]
+	mov	edx,DWORD [44+esp]
+	mov	esi,ecx
+	shr	ecx,9
+	mov	edi,edx
+	shr	edx,9
+	mov	ebx,ecx
+	shl	esi,14
+	mov	eax,edx
+	shl	edi,14
+	xor	ebx,esi
+	shr	ecx,5
+	xor	eax,edi
+	shr	edx,5
+	xor	eax,ecx
+	shl	esi,4
+	xor	ebx,edx
+	shl	edi,4
+	xor	ebx,esi
+	shr	ecx,4
+	xor	eax,edi
+	shr	edx,4
+	xor	eax,ecx
+	shl	esi,5
+	xor	ebx,edx
+	shl	edi,5
+	xor	eax,esi
+	xor	ebx,edi
+	mov	ecx,DWORD [48+esp]
+	mov	edx,DWORD [52+esp]
+	mov	esi,DWORD [56+esp]
+	mov	edi,DWORD [60+esp]
+	add	eax,DWORD [64+esp]
+	adc	ebx,DWORD [68+esp]
+	xor	ecx,esi
+	xor	edx,edi
+	and	ecx,DWORD [40+esp]
+	and	edx,DWORD [44+esp]
+	add	eax,DWORD [192+esp]
+	adc	ebx,DWORD [196+esp]
+	xor	ecx,esi
+	xor	edx,edi
+	mov	esi,DWORD [ebp]
+	mov	edi,DWORD [4+ebp]
+	add	eax,ecx
+	adc	ebx,edx
+	mov	ecx,DWORD [32+esp]
+	mov	edx,DWORD [36+esp]
+	add	eax,esi
+	adc	ebx,edi
+	mov	DWORD [esp],eax
+	mov	DWORD [4+esp],ebx
+	add	eax,ecx
+	adc	ebx,edx
+	mov	ecx,DWORD [8+esp]
+	mov	edx,DWORD [12+esp]
+	mov	DWORD [32+esp],eax
+	mov	DWORD [36+esp],ebx
+	mov	esi,ecx
+	shr	ecx,2
+	mov	edi,edx
+	shr	edx,2
+	mov	ebx,ecx
+	shl	esi,4
+	mov	eax,edx
+	shl	edi,4
+	xor	ebx,esi
+	shr	ecx,5
+	xor	eax,edi
+	shr	edx,5
+	xor	ebx,ecx
+	shl	esi,21
+	xor	eax,edx
+	shl	edi,21
+	xor	eax,esi
+	shr	ecx,21
+	xor	ebx,edi
+	shr	edx,21
+	xor	eax,ecx
+	shl	esi,5
+	xor	ebx,edx
+	shl	edi,5
+	xor	eax,esi
+	xor	ebx,edi
+	mov	ecx,DWORD [8+esp]
+	mov	edx,DWORD [12+esp]
+	mov	esi,DWORD [16+esp]
+	mov	edi,DWORD [20+esp]
+	add	eax,DWORD [esp]
+	adc	ebx,DWORD [4+esp]
+	or	ecx,esi
+	or	edx,edi
+	and	ecx,DWORD [24+esp]
+	and	edx,DWORD [28+esp]
+	and	esi,DWORD [8+esp]
+	and	edi,DWORD [12+esp]
+	or	ecx,esi
+	or	edx,edi
+	add	eax,ecx
+	adc	ebx,edx
+	mov	DWORD [esp],eax
+	mov	DWORD [4+esp],ebx
+	mov	dl,BYTE [ebp]
+	sub	esp,8
+	lea	ebp,[8+ebp]
+	cmp	dl,23
+	jne	NEAR L$01016_79_x86
+	mov	esi,DWORD [840+esp]
+	mov	edi,DWORD [844+esp]
+	mov	eax,DWORD [esi]
+	mov	ebx,DWORD [4+esi]
+	mov	ecx,DWORD [8+esi]
+	mov	edx,DWORD [12+esi]
+	add	eax,DWORD [8+esp]
+	adc	ebx,DWORD [12+esp]
+	mov	DWORD [esi],eax
+	mov	DWORD [4+esi],ebx
+	add	ecx,DWORD [16+esp]
+	adc	edx,DWORD [20+esp]
+	mov	DWORD [8+esi],ecx
+	mov	DWORD [12+esi],edx
+	mov	eax,DWORD [16+esi]
+	mov	ebx,DWORD [20+esi]
+	mov	ecx,DWORD [24+esi]
+	mov	edx,DWORD [28+esi]
+	add	eax,DWORD [24+esp]
+	adc	ebx,DWORD [28+esp]
+	mov	DWORD [16+esi],eax
+	mov	DWORD [20+esi],ebx
+	add	ecx,DWORD [32+esp]
+	adc	edx,DWORD [36+esp]
+	mov	DWORD [24+esi],ecx
+	mov	DWORD [28+esi],edx
+	mov	eax,DWORD [32+esi]
+	mov	ebx,DWORD [36+esi]
+	mov	ecx,DWORD [40+esi]
+	mov	edx,DWORD [44+esi]
+	add	eax,DWORD [40+esp]
+	adc	ebx,DWORD [44+esp]
+	mov	DWORD [32+esi],eax
+	mov	DWORD [36+esi],ebx
+	add	ecx,DWORD [48+esp]
+	adc	edx,DWORD [52+esp]
+	mov	DWORD [40+esi],ecx
+	mov	DWORD [44+esi],edx
+	mov	eax,DWORD [48+esi]
+	mov	ebx,DWORD [52+esi]
+	mov	ecx,DWORD [56+esi]
+	mov	edx,DWORD [60+esi]
+	add	eax,DWORD [56+esp]
+	adc	ebx,DWORD [60+esp]
+	mov	DWORD [48+esi],eax
+	mov	DWORD [52+esi],ebx
+	add	ecx,DWORD [64+esp]
+	adc	edx,DWORD [68+esp]
+	mov	DWORD [56+esi],ecx
+	mov	DWORD [60+esi],edx
+	add	esp,840
+	sub	ebp,640
+	cmp	edi,DWORD [8+esp]
+	jb	NEAR L$002loop_x86
+	mov	esp,DWORD [12+esp]
+	pop	edi
+	pop	esi
+	pop	ebx
+	pop	ebp
+	ret
+align	64
+L$001K512:
+dd	3609767458,1116352408
+dd	602891725,1899447441
+dd	3964484399,3049323471
+dd	2173295548,3921009573
+dd	4081628472,961987163
+dd	3053834265,1508970993
+dd	2937671579,2453635748
+dd	3664609560,2870763221
+dd	2734883394,3624381080
+dd	1164996542,310598401
+dd	1323610764,607225278
+dd	3590304994,1426881987
+dd	4068182383,1925078388
+dd	991336113,2162078206
+dd	633803317,2614888103
+dd	3479774868,3248222580
+dd	2666613458,3835390401
+dd	944711139,4022224774
+dd	2341262773,264347078
+dd	2007800933,604807628
+dd	1495990901,770255983
+dd	1856431235,1249150122
+dd	3175218132,1555081692
+dd	2198950837,1996064986
+dd	3999719339,2554220882
+dd	766784016,2821834349
+dd	2566594879,2952996808
+dd	3203337956,3210313671
+dd	1034457026,3336571891
+dd	2466948901,3584528711
+dd	3758326383,113926993
+dd	168717936,338241895
+dd	1188179964,666307205
+dd	1546045734,773529912
+dd	1522805485,1294757372
+dd	2643833823,1396182291
+dd	2343527390,1695183700
+dd	1014477480,1986661051
+dd	1206759142,2177026350
+dd	344077627,2456956037
+dd	1290863460,2730485921
+dd	3158454273,2820302411
+dd	3505952657,3259730800
+dd	106217008,3345764771
+dd	3606008344,3516065817
+dd	1432725776,3600352804
+dd	1467031594,4094571909
+dd	851169720,275423344
+dd	3100823752,430227734
+dd	1363258195,506948616
+dd	3750685593,659060556
+dd	3785050280,883997877
+dd	3318307427,958139571
+dd	3812723403,1322822218
+dd	2003034995,1537002063
+dd	3602036899,1747873779
+dd	1575990012,1955562222
+dd	1125592928,2024104815
+dd	2716904306,2227730452
+dd	442776044,2361852424
+dd	593698344,2428436474
+dd	3733110249,2756734187
+dd	2999351573,3204031479
+dd	3815920427,3329325298
+dd	3928383900,3391569614
+dd	566280711,3515267271
+dd	3454069534,3940187606
+dd	4000239992,4118630271
+dd	1914138554,116418474
+dd	2731055270,174292421
+dd	3203993006,289380356
+dd	320620315,460393269
+dd	587496836,685471733
+dd	1086792851,852142971
+dd	365543100,1017036298
+dd	2618297676,1126000580
+dd	3409855158,1288033470
+dd	4234509866,1501505948
+dd	987167468,1607167915
+dd	1246189591,1816402316
+dd	67438087,66051
+dd	202182159,134810123
+db	83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97
+db	110,115,102,111,114,109,32,102,111,114,32,120,56,54,44,32
+db	67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97
+db	112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103
+db	62,0
+segment	.bss
+common	_OPENSSL_ia32cap_P 16
diff --git a/deps/boringssl/win-x86/crypto/fipsmodule/vpaes-x86.asm b/deps/boringssl/win-x86/crypto/fipsmodule/vpaes-x86.asm
new file mode 100644
index 0000000..49f8866
--- /dev/null
+++ b/deps/boringssl/win-x86/crypto/fipsmodule/vpaes-x86.asm
@@ -0,0 +1,674 @@
+; This file is generated from a similarly-named Perl script in the BoringSSL
+; source tree. Do not edit by hand.
+
+%ifdef BORINGSSL_PREFIX
+%include "boringssl_prefix_symbols_nasm.inc"
+%endif
+%ifidn __OUTPUT_FORMAT__,obj
+section	code	use32 class=code align=64
+%elifidn __OUTPUT_FORMAT__,win32
+$@feat.00 equ 1
+section	.text	code align=64
+%else
+section	.text	code
+%endif
+%ifdef BORINGSSL_DISPATCH_TEST
+extern	_BORINGSSL_function_hit
+%endif
+align	64
+L$_vpaes_consts:
+dd	218628480,235210255,168496130,67568393
+dd	252381056,17041926,33884169,51187212
+dd	252645135,252645135,252645135,252645135
+dd	1512730624,3266504856,1377990664,3401244816
+dd	830229760,1275146365,2969422977,3447763452
+dd	3411033600,2979783055,338359620,2782886510
+dd	4209124096,907596821,221174255,1006095553
+dd	191964160,3799684038,3164090317,1589111125
+dd	182528256,1777043520,2877432650,3265356744
+dd	1874708224,3503451415,3305285752,363511674
+dd	1606117888,3487855781,1093350906,2384367825
+dd	197121,67569157,134941193,202313229
+dd	67569157,134941193,202313229,197121
+dd	134941193,202313229,197121,67569157
+dd	202313229,197121,67569157,134941193
+dd	33619971,100992007,168364043,235736079
+dd	235736079,33619971,100992007,168364043
+dd	168364043,235736079,33619971,100992007
+dd	100992007,168364043,235736079,33619971
+dd	50462976,117835012,185207048,252579084
+dd	252314880,51251460,117574920,184942860
+dd	184682752,252054788,50987272,118359308
+dd	118099200,185467140,251790600,50727180
+dd	2946363062,528716217,1300004225,1881839624
+dd	1532713819,1532713819,1532713819,1532713819
+dd	3602276352,4288629033,3737020424,4153884961
+dd	1354558464,32357713,2958822624,3775749553
+dd	1201988352,132424512,1572796698,503232858
+dd	2213177600,1597421020,4103937655,675398315
+dd	2749646592,4273543773,1511898873,121693092
+dd	3040248576,1103263732,2871565598,1608280554
+dd	2236667136,2588920351,482954393,64377734
+dd	3069987328,291237287,2117370568,3650299247
+dd	533321216,3573750986,2572112006,1401264716
+dd	1339849704,2721158661,548607111,3445553514
+dd	2128193280,3054596040,2183486460,1257083700
+dd	655635200,1165381986,3923443150,2344132524
+dd	190078720,256924420,290342170,357187870
+dd	1610966272,2263057382,4103205268,309794674
+dd	2592527872,2233205587,1335446729,3402964816
+dd	3973531904,3225098121,3002836325,1918774430
+dd	3870401024,2102906079,2284471353,4117666579
+dd	617007872,1021508343,366931923,691083277
+dd	2528395776,3491914898,2968704004,1613121270
+dd	3445188352,3247741094,844474987,4093578302
+dd	651481088,1190302358,1689581232,574775300
+dd	4289380608,206939853,2555985458,2489840491
+dd	2130264064,327674451,3566485037,3349835193
+dd	2470714624,316102159,3636825756,3393945945
+db	86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105
+db	111,110,32,65,69,83,32,102,111,114,32,120,56,54,47,83
+db	83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117
+db	114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105
+db	118,101,114,115,105,116,121,41,0
+align	64
+align	16
+__vpaes_preheat:
+	add	ebp,DWORD [esp]
+	movdqa	xmm7,[ebp-48]
+	movdqa	xmm6,[ebp-16]
+	ret
+align	16
+__vpaes_encrypt_core:
+	mov	ecx,16
+	mov	eax,DWORD [240+edx]
+	movdqa	xmm1,xmm6
+	movdqa	xmm2,[ebp]
+	pandn	xmm1,xmm0
+	pand	xmm0,xmm6
+	movdqu	xmm5,[edx]
+db	102,15,56,0,208
+	movdqa	xmm0,[16+ebp]
+	pxor	xmm2,xmm5
+	psrld	xmm1,4
+	add	edx,16
+db	102,15,56,0,193
+	lea	ebx,[192+ebp]
+	pxor	xmm0,xmm2
+	jmp	NEAR L$000enc_entry
+align	16
+L$001enc_loop:
+	movdqa	xmm4,[32+ebp]
+	movdqa	xmm0,[48+ebp]
+db	102,15,56,0,226
+db	102,15,56,0,195
+	pxor	xmm4,xmm5
+	movdqa	xmm5,[64+ebp]
+	pxor	xmm0,xmm4
+	movdqa	xmm1,[ecx*1+ebx-64]
+db	102,15,56,0,234
+	movdqa	xmm2,[80+ebp]
+	movdqa	xmm4,[ecx*1+ebx]
+db	102,15,56,0,211
+	movdqa	xmm3,xmm0
+	pxor	xmm2,xmm5
+db	102,15,56,0,193
+	add	edx,16
+	pxor	xmm0,xmm2
+db	102,15,56,0,220
+	add	ecx,16
+	pxor	xmm3,xmm0
+db	102,15,56,0,193
+	and	ecx,48
+	sub	eax,1
+	pxor	xmm0,xmm3
+L$000enc_entry:
+	movdqa	xmm1,xmm6
+	movdqa	xmm5,[ebp-32]
+	pandn	xmm1,xmm0
+	psrld	xmm1,4
+	pand	xmm0,xmm6
+db	102,15,56,0,232
+	movdqa	xmm3,xmm7
+	pxor	xmm0,xmm1
+db	102,15,56,0,217
+	movdqa	xmm4,xmm7
+	pxor	xmm3,xmm5
+db	102,15,56,0,224
+	movdqa	xmm2,xmm7
+	pxor	xmm4,xmm5
+db	102,15,56,0,211
+	movdqa	xmm3,xmm7
+	pxor	xmm2,xmm0
+db	102,15,56,0,220
+	movdqu	xmm5,[edx]
+	pxor	xmm3,xmm1
+	jnz	NEAR L$001enc_loop
+	movdqa	xmm4,[96+ebp]
+	movdqa	xmm0,[112+ebp]
+db	102,15,56,0,226
+	pxor	xmm4,xmm5
+db	102,15,56,0,195
+	movdqa	xmm1,[64+ecx*1+ebx]
+	pxor	xmm0,xmm4
+db	102,15,56,0,193
+	ret
+align	16
+__vpaes_decrypt_core:
+	lea	ebx,[608+ebp]
+	mov	eax,DWORD [240+edx]
+	movdqa	xmm1,xmm6
+	movdqa	xmm2,[ebx-64]
+	pandn	xmm1,xmm0
+	mov	ecx,eax
+	psrld	xmm1,4
+	movdqu	xmm5,[edx]
+	shl	ecx,4
+	pand	xmm0,xmm6
+db	102,15,56,0,208
+	movdqa	xmm0,[ebx-48]
+	xor	ecx,48
+db	102,15,56,0,193
+	and	ecx,48
+	pxor	xmm2,xmm5
+	movdqa	xmm5,[176+ebp]
+	pxor	xmm0,xmm2
+	add	edx,16
+	lea	ecx,[ecx*1+ebx-352]
+	jmp	NEAR L$002dec_entry
+align	16
+L$003dec_loop:
+	movdqa	xmm4,[ebx-32]
+	movdqa	xmm1,[ebx-16]
+db	102,15,56,0,226
+db	102,15,56,0,203
+	pxor	xmm0,xmm4
+	movdqa	xmm4,[ebx]
+	pxor	xmm0,xmm1
+	movdqa	xmm1,[16+ebx]
+db	102,15,56,0,226
+db	102,15,56,0,197
+db	102,15,56,0,203
+	pxor	xmm0,xmm4
+	movdqa	xmm4,[32+ebx]
+	pxor	xmm0,xmm1
+	movdqa	xmm1,[48+ebx]
+db	102,15,56,0,226
+db	102,15,56,0,197
+db	102,15,56,0,203
+	pxor	xmm0,xmm4
+	movdqa	xmm4,[64+ebx]
+	pxor	xmm0,xmm1
+	movdqa	xmm1,[80+ebx]
+db	102,15,56,0,226
+db	102,15,56,0,197
+db	102,15,56,0,203
+	pxor	xmm0,xmm4
+	add	edx,16
+db	102,15,58,15,237,12
+	pxor	xmm0,xmm1
+	sub	eax,1
+L$002dec_entry:
+	movdqa	xmm1,xmm6
+	movdqa	xmm2,[ebp-32]
+	pandn	xmm1,xmm0
+	pand	xmm0,xmm6
+	psrld	xmm1,4
+db	102,15,56,0,208
+	movdqa	xmm3,xmm7
+	pxor	xmm0,xmm1
+db	102,15,56,0,217
+	movdqa	xmm4,xmm7
+	pxor	xmm3,xmm2
+db	102,15,56,0,224
+	pxor	xmm4,xmm2
+	movdqa	xmm2,xmm7
+db	102,15,56,0,211
+	movdqa	xmm3,xmm7
+	pxor	xmm2,xmm0
+db	102,15,56,0,220
+	movdqu	xmm0,[edx]
+	pxor	xmm3,xmm1
+	jnz	NEAR L$003dec_loop
+	movdqa	xmm4,[96+ebx]
+db	102,15,56,0,226
+	pxor	xmm4,xmm0
+	movdqa	xmm0,[112+ebx]
+	movdqa	xmm2,[ecx]
+db	102,15,56,0,195
+	pxor	xmm0,xmm4
+db	102,15,56,0,194
+	ret
+align	16
+__vpaes_schedule_core:
+	add	ebp,DWORD [esp]
+	movdqu	xmm0,[esi]
+	movdqa	xmm2,[320+ebp]
+	movdqa	xmm3,xmm0
+	lea	ebx,[ebp]
+	movdqa	[4+esp],xmm2
+	call	__vpaes_schedule_transform
+	movdqa	xmm7,xmm0
+	test	edi,edi
+	jnz	NEAR L$004schedule_am_decrypting
+	movdqu	[edx],xmm0
+	jmp	NEAR L$005schedule_go
+L$004schedule_am_decrypting:
+	movdqa	xmm1,[256+ecx*1+ebp]
+db	102,15,56,0,217
+	movdqu	[edx],xmm3
+	xor	ecx,48
+L$005schedule_go:
+	cmp	eax,192
+	ja	NEAR L$006schedule_256
+	je	NEAR L$007schedule_192
+L$008schedule_128:
+	mov	eax,10
+L$009loop_schedule_128:
+	call	__vpaes_schedule_round
+	dec	eax
+	jz	NEAR L$010schedule_mangle_last
+	call	__vpaes_schedule_mangle
+	jmp	NEAR L$009loop_schedule_128
+align	16
+L$007schedule_192:
+	movdqu	xmm0,[8+esi]
+	call	__vpaes_schedule_transform
+	movdqa	xmm6,xmm0
+	pxor	xmm4,xmm4
+	movhlps	xmm6,xmm4
+	mov	eax,4
+L$011loop_schedule_192:
+	call	__vpaes_schedule_round
+db	102,15,58,15,198,8
+	call	__vpaes_schedule_mangle
+	call	__vpaes_schedule_192_smear
+	call	__vpaes_schedule_mangle
+	call	__vpaes_schedule_round
+	dec	eax
+	jz	NEAR L$010schedule_mangle_last
+	call	__vpaes_schedule_mangle
+	call	__vpaes_schedule_192_smear
+	jmp	NEAR L$011loop_schedule_192
+align	16
+L$006schedule_256:
+	movdqu	xmm0,[16+esi]
+	call	__vpaes_schedule_transform
+	mov	eax,7
+L$012loop_schedule_256:
+	call	__vpaes_schedule_mangle
+	movdqa	xmm6,xmm0
+	call	__vpaes_schedule_round
+	dec	eax
+	jz	NEAR L$010schedule_mangle_last
+	call	__vpaes_schedule_mangle
+	pshufd	xmm0,xmm0,255
+	movdqa	[20+esp],xmm7
+	movdqa	xmm7,xmm6
+	call	L$_vpaes_schedule_low_round
+	movdqa	xmm7,[20+esp]
+	jmp	NEAR L$012loop_schedule_256
+align	16
+L$010schedule_mangle_last:
+	lea	ebx,[384+ebp]
+	test	edi,edi
+	jnz	NEAR L$013schedule_mangle_last_dec
+	movdqa	xmm1,[256+ecx*1+ebp]
+db	102,15,56,0,193
+	lea	ebx,[352+ebp]
+	add	edx,32
+L$013schedule_mangle_last_dec:
+	add	edx,-16
+	pxor	xmm0,[336+ebp]
+	call	__vpaes_schedule_transform
+	movdqu	[edx],xmm0
+	pxor	xmm0,xmm0
+	pxor	xmm1,xmm1
+	pxor	xmm2,xmm2
+	pxor	xmm3,xmm3
+	pxor	xmm4,xmm4
+	pxor	xmm5,xmm5
+	pxor	xmm6,xmm6
+	pxor	xmm7,xmm7
+	ret
+align	16
+__vpaes_schedule_192_smear:
+	pshufd	xmm1,xmm6,128
+	pshufd	xmm0,xmm7,254
+	pxor	xmm6,xmm1
+	pxor	xmm1,xmm1
+	pxor	xmm6,xmm0
+	movdqa	xmm0,xmm6
+	movhlps	xmm6,xmm1
+	ret
+align	16
+__vpaes_schedule_round:
+	movdqa	xmm2,[8+esp]
+	pxor	xmm1,xmm1
+db	102,15,58,15,202,15
+db	102,15,58,15,210,15
+	pxor	xmm7,xmm1
+	pshufd	xmm0,xmm0,255
+db	102,15,58,15,192,1
+	movdqa	[8+esp],xmm2
+L$_vpaes_schedule_low_round:
+	movdqa	xmm1,xmm7
+	pslldq	xmm7,4
+	pxor	xmm7,xmm1
+	movdqa	xmm1,xmm7
+	pslldq	xmm7,8
+	pxor	xmm7,xmm1
+	pxor	xmm7,[336+ebp]
+	movdqa	xmm4,[ebp-16]
+	movdqa	xmm5,[ebp-48]
+	movdqa	xmm1,xmm4
+	pandn	xmm1,xmm0
+	psrld	xmm1,4
+	pand	xmm0,xmm4
+	movdqa	xmm2,[ebp-32]
+db	102,15,56,0,208
+	pxor	xmm0,xmm1
+	movdqa	xmm3,xmm5
+db	102,15,56,0,217
+	pxor	xmm3,xmm2
+	movdqa	xmm4,xmm5
+db	102,15,56,0,224
+	pxor	xmm4,xmm2
+	movdqa	xmm2,xmm5
+db	102,15,56,0,211
+	pxor	xmm2,xmm0
+	movdqa	xmm3,xmm5
+db	102,15,56,0,220
+	pxor	xmm3,xmm1
+	movdqa	xmm4,[32+ebp]
+db	102,15,56,0,226
+	movdqa	xmm0,[48+ebp]
+db	102,15,56,0,195
+	pxor	xmm0,xmm4
+	pxor	xmm0,xmm7
+	movdqa	xmm7,xmm0
+	ret
+align	16
+__vpaes_schedule_transform:
+	movdqa	xmm2,[ebp-16]
+	movdqa	xmm1,xmm2
+	pandn	xmm1,xmm0
+	psrld	xmm1,4
+	pand	xmm0,xmm2
+	movdqa	xmm2,[ebx]
+db	102,15,56,0,208
+	movdqa	xmm0,[16+ebx]
+db	102,15,56,0,193
+	pxor	xmm0,xmm2
+	ret
+align	16
+__vpaes_schedule_mangle:
+	movdqa	xmm4,xmm0
+	movdqa	xmm5,[128+ebp]
+	test	edi,edi
+	jnz	NEAR L$014schedule_mangle_dec
+	add	edx,16
+	pxor	xmm4,[336+ebp]
+db	102,15,56,0,229
+	movdqa	xmm3,xmm4
+db	102,15,56,0,229
+	pxor	xmm3,xmm4
+db	102,15,56,0,229
+	pxor	xmm3,xmm4
+	jmp	NEAR L$015schedule_mangle_both
+align	16
+L$014schedule_mangle_dec:
+	movdqa	xmm2,[ebp-16]
+	lea	esi,[416+ebp]
+	movdqa	xmm1,xmm2
+	pandn	xmm1,xmm4
+	psrld	xmm1,4
+	pand	xmm4,xmm2
+	movdqa	xmm2,[esi]
+db	102,15,56,0,212
+	movdqa	xmm3,[16+esi]
+db	102,15,56,0,217
+	pxor	xmm3,xmm2
+db	102,15,56,0,221
+	movdqa	xmm2,[32+esi]
+db	102,15,56,0,212
+	pxor	xmm2,xmm3
+	movdqa	xmm3,[48+esi]
+db	102,15,56,0,217
+	pxor	xmm3,xmm2
+db	102,15,56,0,221
+	movdqa	xmm2,[64+esi]
+db	102,15,56,0,212
+	pxor	xmm2,xmm3
+	movdqa	xmm3,[80+esi]
+db	102,15,56,0,217
+	pxor	xmm3,xmm2
+db	102,15,56,0,221
+	movdqa	xmm2,[96+esi]
+db	102,15,56,0,212
+	pxor	xmm2,xmm3
+	movdqa	xmm3,[112+esi]
+db	102,15,56,0,217
+	pxor	xmm3,xmm2
+	add	edx,-16
+L$015schedule_mangle_both:
+	movdqa	xmm1,[256+ecx*1+ebp]
+db	102,15,56,0,217
+	add	ecx,-16
+	and	ecx,48
+	movdqu	[edx],xmm3
+	ret
+global	_vpaes_set_encrypt_key
+align	16
+_vpaes_set_encrypt_key:
+L$_vpaes_set_encrypt_key_begin:
+	push	ebp
+	push	ebx
+	push	esi
+	push	edi
+%ifdef BORINGSSL_DISPATCH_TEST
+	push	ebx
+	push	edx
+	call	L$016pic
+L$016pic:
+	pop	ebx
+	lea	ebx,[(_BORINGSSL_function_hit+5-L$016pic)+ebx]
+	mov	edx,1
+	mov	BYTE [ebx],dl
+	pop	edx
+	pop	ebx
+%endif
+	mov	esi,DWORD [20+esp]
+	lea	ebx,[esp-56]
+	mov	eax,DWORD [24+esp]
+	and	ebx,-16
+	mov	edx,DWORD [28+esp]
+	xchg	ebx,esp
+	mov	DWORD [48+esp],ebx
+	mov	ebx,eax
+	shr	ebx,5
+	add	ebx,5
+	mov	DWORD [240+edx],ebx
+	mov	ecx,48
+	mov	edi,0
+	lea	ebp,[(L$_vpaes_consts+0x30-L$017pic_point)]
+	call	__vpaes_schedule_core
+L$017pic_point:
+	mov	esp,DWORD [48+esp]
+	xor	eax,eax
+	pop	edi
+	pop	esi
+	pop	ebx
+	pop	ebp
+	ret
+global	_vpaes_set_decrypt_key
+align	16
+_vpaes_set_decrypt_key:
+L$_vpaes_set_decrypt_key_begin:
+	push	ebp
+	push	ebx
+	push	esi
+	push	edi
+	mov	esi,DWORD [20+esp]
+	lea	ebx,[esp-56]
+	mov	eax,DWORD [24+esp]
+	and	ebx,-16
+	mov	edx,DWORD [28+esp]
+	xchg	ebx,esp
+	mov	DWORD [48+esp],ebx
+	mov	ebx,eax
+	shr	ebx,5
+	add	ebx,5
+	mov	DWORD [240+edx],ebx
+	shl	ebx,4
+	lea	edx,[16+ebx*1+edx]
+	mov	edi,1
+	mov	ecx,eax
+	shr	ecx,1
+	and	ecx,32
+	xor	ecx,32
+	lea	ebp,[(L$_vpaes_consts+0x30-L$018pic_point)]
+	call	__vpaes_schedule_core
+L$018pic_point:
+	mov	esp,DWORD [48+esp]
+	xor	eax,eax
+	pop	edi
+	pop	esi
+	pop	ebx
+	pop	ebp
+	ret
+global	_vpaes_encrypt
+align	16
+_vpaes_encrypt:
+L$_vpaes_encrypt_begin:
+	push	ebp
+	push	ebx
+	push	esi
+	push	edi
+%ifdef BORINGSSL_DISPATCH_TEST
+	push	ebx
+	push	edx
+	call	L$019pic
+L$019pic:
+	pop	ebx
+	lea	ebx,[(_BORINGSSL_function_hit+4-L$019pic)+ebx]
+	mov	edx,1
+	mov	BYTE [ebx],dl
+	pop	edx
+	pop	ebx
+%endif
+	lea	ebp,[(L$_vpaes_consts+0x30-L$020pic_point)]
+	call	__vpaes_preheat
+L$020pic_point:
+	mov	esi,DWORD [20+esp]
+	lea	ebx,[esp-56]
+	mov	edi,DWORD [24+esp]
+	and	ebx,-16
+	mov	edx,DWORD [28+esp]
+	xchg	ebx,esp
+	mov	DWORD [48+esp],ebx
+	movdqu	xmm0,[esi]
+	call	__vpaes_encrypt_core
+	movdqu	[edi],xmm0
+	mov	esp,DWORD [48+esp]
+	pop	edi
+	pop	esi
+	pop	ebx
+	pop	ebp
+	ret
+global	_vpaes_decrypt
+align	16
+_vpaes_decrypt:
+L$_vpaes_decrypt_begin:
+	push	ebp
+	push	ebx
+	push	esi
+	push	edi
+	lea	ebp,[(L$_vpaes_consts+0x30-L$021pic_point)]
+	call	__vpaes_preheat
+L$021pic_point:
+	mov	esi,DWORD [20+esp]
+	lea	ebx,[esp-56]
+	mov	edi,DWORD [24+esp]
+	and	ebx,-16
+	mov	edx,DWORD [28+esp]
+	xchg	ebx,esp
+	mov	DWORD [48+esp],ebx
+	movdqu	xmm0,[esi]
+	call	__vpaes_decrypt_core
+	movdqu	[edi],xmm0
+	mov	esp,DWORD [48+esp]
+	pop	edi
+	pop	esi
+	pop	ebx
+	pop	ebp
+	ret
+global	_vpaes_cbc_encrypt
+align	16
+_vpaes_cbc_encrypt:
+L$_vpaes_cbc_encrypt_begin:
+	push	ebp
+	push	ebx
+	push	esi
+	push	edi
+	mov	esi,DWORD [20+esp]
+	mov	edi,DWORD [24+esp]
+	mov	eax,DWORD [28+esp]
+	mov	edx,DWORD [32+esp]
+	sub	eax,16
+	jc	NEAR L$022cbc_abort
+	lea	ebx,[esp-56]
+	mov	ebp,DWORD [36+esp]
+	and	ebx,-16
+	mov	ecx,DWORD [40+esp]
+	xchg	ebx,esp
+	movdqu	xmm1,[ebp]
+	sub	edi,esi
+	mov	DWORD [48+esp],ebx
+	mov	DWORD [esp],edi
+	mov	DWORD [4+esp],edx
+	mov	DWORD [8+esp],ebp
+	mov	edi,eax
+	lea	ebp,[(L$_vpaes_consts+0x30-L$023pic_point)]
+	call	__vpaes_preheat
+L$023pic_point:
+	cmp	ecx,0
+	je	NEAR L$024cbc_dec_loop
+	jmp	NEAR L$025cbc_enc_loop
+align	16
+L$025cbc_enc_loop:
+	movdqu	xmm0,[esi]
+	pxor	xmm0,xmm1
+	call	__vpaes_encrypt_core
+	mov	ebx,DWORD [esp]
+	mov	edx,DWORD [4+esp]
+	movdqa	xmm1,xmm0
+	movdqu	[esi*1+ebx],xmm0
+	lea	esi,[16+esi]
+	sub	edi,16
+	jnc	NEAR L$025cbc_enc_loop
+	jmp	NEAR L$026cbc_done
+align	16
+L$024cbc_dec_loop:
+	movdqu	xmm0,[esi]
+	movdqa	[16+esp],xmm1
+	movdqa	[32+esp],xmm0
+	call	__vpaes_decrypt_core
+	mov	ebx,DWORD [esp]
+	mov	edx,DWORD [4+esp]
+	pxor	xmm0,[16+esp]
+	movdqa	xmm1,[32+esp]
+	movdqu	[esi*1+ebx],xmm0
+	lea	esi,[16+esi]
+	sub	edi,16
+	jnc	NEAR L$024cbc_dec_loop
+L$026cbc_done:
+	mov	ebx,DWORD [8+esp]
+	mov	esp,DWORD [48+esp]
+	movdqu	[ebx],xmm1
+L$022cbc_abort:
+	pop	edi
+	pop	esi
+	pop	ebx
+	pop	ebp
+	ret
diff --git a/deps/boringssl/win-x86/crypto/fipsmodule/x86-mont.asm b/deps/boringssl/win-x86/crypto/fipsmodule/x86-mont.asm
new file mode 100644
index 0000000..14aa988
--- /dev/null
+++ b/deps/boringssl/win-x86/crypto/fipsmodule/x86-mont.asm
@@ -0,0 +1,485 @@
+; This file is generated from a similarly-named Perl script in the BoringSSL
+; source tree. Do not edit by hand.
+
+%ifdef BORINGSSL_PREFIX
+%include "boringssl_prefix_symbols_nasm.inc"
+%endif
+%ifidn __OUTPUT_FORMAT__,obj
+section	code	use32 class=code align=64
+%elifidn __OUTPUT_FORMAT__,win32
+$@feat.00 equ 1
+section	.text	code align=64
+%else
+section	.text	code
+%endif
+;extern	_OPENSSL_ia32cap_P
+global	_bn_mul_mont
+align	16
+_bn_mul_mont:
+L$_bn_mul_mont_begin:
+	push	ebp
+	push	ebx
+	push	esi
+	push	edi
+	xor	eax,eax
+	mov	edi,DWORD [40+esp]
+	cmp	edi,4
+	jl	NEAR L$000just_leave
+	lea	esi,[20+esp]
+	lea	edx,[24+esp]
+	add	edi,2
+	neg	edi
+	lea	ebp,[edi*4+esp-32]
+	neg	edi
+	mov	eax,ebp
+	sub	eax,edx
+	and	eax,2047
+	sub	ebp,eax
+	xor	edx,ebp
+	and	edx,2048
+	xor	edx,2048
+	sub	ebp,edx
+	and	ebp,-64
+	mov	eax,esp
+	sub	eax,ebp
+	and	eax,-4096
+	mov	edx,esp
+	lea	esp,[eax*1+ebp]
+	mov	eax,DWORD [esp]
+	cmp	esp,ebp
+	ja	NEAR L$001page_walk
+	jmp	NEAR L$002page_walk_done
+align	16
+L$001page_walk:
+	lea	esp,[esp-4096]
+	mov	eax,DWORD [esp]
+	cmp	esp,ebp
+	ja	NEAR L$001page_walk
+L$002page_walk_done:
+	mov	eax,DWORD [esi]
+	mov	ebx,DWORD [4+esi]
+	mov	ecx,DWORD [8+esi]
+	mov	ebp,DWORD [12+esi]
+	mov	esi,DWORD [16+esi]
+	mov	esi,DWORD [esi]
+	mov	DWORD [4+esp],eax
+	mov	DWORD [8+esp],ebx
+	mov	DWORD [12+esp],ecx
+	mov	DWORD [16+esp],ebp
+	mov	DWORD [20+esp],esi
+	lea	ebx,[edi-3]
+	mov	DWORD [24+esp],edx
+	lea	eax,[_OPENSSL_ia32cap_P]
+	bt	DWORD [eax],26
+	jnc	NEAR L$003non_sse2
+	mov	eax,-1
+	movd	mm7,eax
+	mov	esi,DWORD [8+esp]
+	mov	edi,DWORD [12+esp]
+	mov	ebp,DWORD [16+esp]
+	xor	edx,edx
+	xor	ecx,ecx
+	movd	mm4,DWORD [edi]
+	movd	mm5,DWORD [esi]
+	movd	mm3,DWORD [ebp]
+	pmuludq	mm5,mm4
+	movq	mm2,mm5
+	movq	mm0,mm5
+	pand	mm0,mm7
+	pmuludq	mm5,[20+esp]
+	pmuludq	mm3,mm5
+	paddq	mm3,mm0
+	movd	mm1,DWORD [4+ebp]
+	movd	mm0,DWORD [4+esi]
+	psrlq	mm2,32
+	psrlq	mm3,32
+	inc	ecx
+align	16
+L$0041st:
+	pmuludq	mm0,mm4
+	pmuludq	mm1,mm5
+	paddq	mm2,mm0
+	paddq	mm3,mm1
+	movq	mm0,mm2
+	pand	mm0,mm7
+	movd	mm1,DWORD [4+ecx*4+ebp]
+	paddq	mm3,mm0
+	movd	mm0,DWORD [4+ecx*4+esi]
+	psrlq	mm2,32
+	movd	DWORD [28+ecx*4+esp],mm3
+	psrlq	mm3,32
+	lea	ecx,[1+ecx]
+	cmp	ecx,ebx
+	jl	NEAR L$0041st
+	pmuludq	mm0,mm4
+	pmuludq	mm1,mm5
+	paddq	mm2,mm0
+	paddq	mm3,mm1
+	movq	mm0,mm2
+	pand	mm0,mm7
+	paddq	mm3,mm0
+	movd	DWORD [28+ecx*4+esp],mm3
+	psrlq	mm2,32
+	psrlq	mm3,32
+	paddq	mm3,mm2
+	movq	[32+ebx*4+esp],mm3
+	inc	edx
+L$005outer:
+	xor	ecx,ecx
+	movd	mm4,DWORD [edx*4+edi]
+	movd	mm5,DWORD [esi]
+	movd	mm6,DWORD [32+esp]
+	movd	mm3,DWORD [ebp]
+	pmuludq	mm5,mm4
+	paddq	mm5,mm6
+	movq	mm0,mm5
+	movq	mm2,mm5
+	pand	mm0,mm7
+	pmuludq	mm5,[20+esp]
+	pmuludq	mm3,mm5
+	paddq	mm3,mm0
+	movd	mm6,DWORD [36+esp]
+	movd	mm1,DWORD [4+ebp]
+	movd	mm0,DWORD [4+esi]
+	psrlq	mm2,32
+	psrlq	mm3,32
+	paddq	mm2,mm6
+	inc	ecx
+	dec	ebx
+L$006inner:
+	pmuludq	mm0,mm4
+	pmuludq	mm1,mm5
+	paddq	mm2,mm0
+	paddq	mm3,mm1
+	movq	mm0,mm2
+	movd	mm6,DWORD [36+ecx*4+esp]
+	pand	mm0,mm7
+	movd	mm1,DWORD [4+ecx*4+ebp]
+	paddq	mm3,mm0
+	movd	mm0,DWORD [4+ecx*4+esi]
+	psrlq	mm2,32
+	movd	DWORD [28+ecx*4+esp],mm3
+	psrlq	mm3,32
+	paddq	mm2,mm6
+	dec	ebx
+	lea	ecx,[1+ecx]
+	jnz	NEAR L$006inner
+	mov	ebx,ecx
+	pmuludq	mm0,mm4
+	pmuludq	mm1,mm5
+	paddq	mm2,mm0
+	paddq	mm3,mm1
+	movq	mm0,mm2
+	pand	mm0,mm7
+	paddq	mm3,mm0
+	movd	DWORD [28+ecx*4+esp],mm3
+	psrlq	mm2,32
+	psrlq	mm3,32
+	movd	mm6,DWORD [36+ebx*4+esp]
+	paddq	mm3,mm2
+	paddq	mm3,mm6
+	movq	[32+ebx*4+esp],mm3
+	lea	edx,[1+edx]
+	cmp	edx,ebx
+	jle	NEAR L$005outer
+	emms
+	jmp	NEAR L$007common_tail
+align	16
+L$003non_sse2:
+	mov	esi,DWORD [8+esp]
+	lea	ebp,[1+ebx]
+	mov	edi,DWORD [12+esp]
+	xor	ecx,ecx
+	mov	edx,esi
+	and	ebp,1
+	sub	edx,edi
+	lea	eax,[4+ebx*4+edi]
+	or	ebp,edx
+	mov	edi,DWORD [edi]
+	jz	NEAR L$008bn_sqr_mont
+	mov	DWORD [28+esp],eax
+	mov	eax,DWORD [esi]
+	xor	edx,edx
+align	16
+L$009mull:
+	mov	ebp,edx
+	mul	edi
+	add	ebp,eax
+	lea	ecx,[1+ecx]
+	adc	edx,0
+	mov	eax,DWORD [ecx*4+esi]
+	cmp	ecx,ebx
+	mov	DWORD [28+ecx*4+esp],ebp
+	jl	NEAR L$009mull
+	mov	ebp,edx
+	mul	edi
+	mov	edi,DWORD [20+esp]
+	add	eax,ebp
+	mov	esi,DWORD [16+esp]
+	adc	edx,0
+	imul	edi,DWORD [32+esp]
+	mov	DWORD [32+ebx*4+esp],eax
+	xor	ecx,ecx
+	mov	DWORD [36+ebx*4+esp],edx
+	mov	DWORD [40+ebx*4+esp],ecx
+	mov	eax,DWORD [esi]
+	mul	edi
+	add	eax,DWORD [32+esp]
+	mov	eax,DWORD [4+esi]
+	adc	edx,0
+	inc	ecx
+	jmp	NEAR L$0102ndmadd
+align	16
+L$0111stmadd:
+	mov	ebp,edx
+	mul	edi
+	add	ebp,DWORD [32+ecx*4+esp]
+	lea	ecx,[1+ecx]
+	adc	edx,0
+	add	ebp,eax
+	mov	eax,DWORD [ecx*4+esi]
+	adc	edx,0
+	cmp	ecx,ebx
+	mov	DWORD [28+ecx*4+esp],ebp
+	jl	NEAR L$0111stmadd
+	mov	ebp,edx
+	mul	edi
+	add	eax,DWORD [32+ebx*4+esp]
+	mov	edi,DWORD [20+esp]
+	adc	edx,0
+	mov	esi,DWORD [16+esp]
+	add	ebp,eax
+	adc	edx,0
+	imul	edi,DWORD [32+esp]
+	xor	ecx,ecx
+	add	edx,DWORD [36+ebx*4+esp]
+	mov	DWORD [32+ebx*4+esp],ebp
+	adc	ecx,0
+	mov	eax,DWORD [esi]
+	mov	DWORD [36+ebx*4+esp],edx
+	mov	DWORD [40+ebx*4+esp],ecx
+	mul	edi
+	add	eax,DWORD [32+esp]
+	mov	eax,DWORD [4+esi]
+	adc	edx,0
+	mov	ecx,1
+align	16
+L$0102ndmadd:
+	mov	ebp,edx
+	mul	edi
+	add	ebp,DWORD [32+ecx*4+esp]
+	lea	ecx,[1+ecx]
+	adc	edx,0
+	add	ebp,eax
+	mov	eax,DWORD [ecx*4+esi]
+	adc	edx,0
+	cmp	ecx,ebx
+	mov	DWORD [24+ecx*4+esp],ebp
+	jl	NEAR L$0102ndmadd
+	mov	ebp,edx
+	mul	edi
+	add	ebp,DWORD [32+ebx*4+esp]
+	adc	edx,0
+	add	ebp,eax
+	adc	edx,0
+	mov	DWORD [28+ebx*4+esp],ebp
+	xor	eax,eax
+	mov	ecx,DWORD [12+esp]
+	add	edx,DWORD [36+ebx*4+esp]
+	adc	eax,DWORD [40+ebx*4+esp]
+	lea	ecx,[4+ecx]
+	mov	DWORD [32+ebx*4+esp],edx
+	cmp	ecx,DWORD [28+esp]
+	mov	DWORD [36+ebx*4+esp],eax
+	je	NEAR L$007common_tail
+	mov	edi,DWORD [ecx]
+	mov	esi,DWORD [8+esp]
+	mov	DWORD [12+esp],ecx
+	xor	ecx,ecx
+	xor	edx,edx
+	mov	eax,DWORD [esi]
+	jmp	NEAR L$0111stmadd
+align	16
+L$008bn_sqr_mont:
+	mov	DWORD [esp],ebx
+	mov	DWORD [12+esp],ecx
+	mov	eax,edi
+	mul	edi
+	mov	DWORD [32+esp],eax
+	mov	ebx,edx
+	shr	edx,1
+	and	ebx,1
+	inc	ecx
+align	16
+L$012sqr:
+	mov	eax,DWORD [ecx*4+esi]
+	mov	ebp,edx
+	mul	edi
+	add	eax,ebp
+	lea	ecx,[1+ecx]
+	adc	edx,0
+	lea	ebp,[eax*2+ebx]
+	shr	eax,31
+	cmp	ecx,DWORD [esp]
+	mov	ebx,eax
+	mov	DWORD [28+ecx*4+esp],ebp
+	jl	NEAR L$012sqr
+	mov	eax,DWORD [ecx*4+esi]
+	mov	ebp,edx
+	mul	edi
+	add	eax,ebp
+	mov	edi,DWORD [20+esp]
+	adc	edx,0
+	mov	esi,DWORD [16+esp]
+	lea	ebp,[eax*2+ebx]
+	imul	edi,DWORD [32+esp]
+	shr	eax,31
+	mov	DWORD [32+ecx*4+esp],ebp
+	lea	ebp,[edx*2+eax]
+	mov	eax,DWORD [esi]
+	shr	edx,31
+	mov	DWORD [36+ecx*4+esp],ebp
+	mov	DWORD [40+ecx*4+esp],edx
+	mul	edi
+	add	eax,DWORD [32+esp]
+	mov	ebx,ecx
+	adc	edx,0
+	mov	eax,DWORD [4+esi]
+	mov	ecx,1
+align	16
+L$0133rdmadd:
+	mov	ebp,edx
+	mul	edi
+	add	ebp,DWORD [32+ecx*4+esp]
+	adc	edx,0
+	add	ebp,eax
+	mov	eax,DWORD [4+ecx*4+esi]
+	adc	edx,0
+	mov	DWORD [28+ecx*4+esp],ebp
+	mov	ebp,edx
+	mul	edi
+	add	ebp,DWORD [36+ecx*4+esp]
+	lea	ecx,[2+ecx]
+	adc	edx,0
+	add	ebp,eax
+	mov	eax,DWORD [ecx*4+esi]
+	adc	edx,0
+	cmp	ecx,ebx
+	mov	DWORD [24+ecx*4+esp],ebp
+	jl	NEAR L$0133rdmadd
+	mov	ebp,edx
+	mul	edi
+	add	ebp,DWORD [32+ebx*4+esp]
+	adc	edx,0
+	add	ebp,eax
+	adc	edx,0
+	mov	DWORD [28+ebx*4+esp],ebp
+	mov	ecx,DWORD [12+esp]
+	xor	eax,eax
+	mov	esi,DWORD [8+esp]
+	add	edx,DWORD [36+ebx*4+esp]
+	adc	eax,DWORD [40+ebx*4+esp]
+	mov	DWORD [32+ebx*4+esp],edx
+	cmp	ecx,ebx
+	mov	DWORD [36+ebx*4+esp],eax
+	je	NEAR L$007common_tail
+	mov	edi,DWORD [4+ecx*4+esi]
+	lea	ecx,[1+ecx]
+	mov	eax,edi
+	mov	DWORD [12+esp],ecx
+	mul	edi
+	add	eax,DWORD [32+ecx*4+esp]
+	adc	edx,0
+	mov	DWORD [32+ecx*4+esp],eax
+	xor	ebp,ebp
+	cmp	ecx,ebx
+	lea	ecx,[1+ecx]
+	je	NEAR L$014sqrlast
+	mov	ebx,edx
+	shr	edx,1
+	and	ebx,1
+align	16
+L$015sqradd:
+	mov	eax,DWORD [ecx*4+esi]
+	mov	ebp,edx
+	mul	edi
+	add	eax,ebp
+	lea	ebp,[eax*1+eax]
+	adc	edx,0
+	shr	eax,31
+	add	ebp,DWORD [32+ecx*4+esp]
+	lea	ecx,[1+ecx]
+	adc	eax,0
+	add	ebp,ebx
+	adc	eax,0
+	cmp	ecx,DWORD [esp]
+	mov	DWORD [28+ecx*4+esp],ebp
+	mov	ebx,eax
+	jle	NEAR L$015sqradd
+	mov	ebp,edx
+	add	edx,edx
+	shr	ebp,31
+	add	edx,ebx
+	adc	ebp,0
+L$014sqrlast:
+	mov	edi,DWORD [20+esp]
+	mov	esi,DWORD [16+esp]
+	imul	edi,DWORD [32+esp]
+	add	edx,DWORD [32+ecx*4+esp]
+	mov	eax,DWORD [esi]
+	adc	ebp,0
+	mov	DWORD [32+ecx*4+esp],edx
+	mov	DWORD [36+ecx*4+esp],ebp
+	mul	edi
+	add	eax,DWORD [32+esp]
+	lea	ebx,[ecx-1]
+	adc	edx,0
+	mov	ecx,1
+	mov	eax,DWORD [4+esi]
+	jmp	NEAR L$0133rdmadd
+align	16
+L$007common_tail:
+	mov	ebp,DWORD [16+esp]
+	mov	edi,DWORD [4+esp]
+	lea	esi,[32+esp]
+	mov	eax,DWORD [esi]
+	mov	ecx,ebx
+	xor	edx,edx
+align	16
+L$016sub:
+	sbb	eax,DWORD [edx*4+ebp]
+	mov	DWORD [edx*4+edi],eax
+	dec	ecx
+	mov	eax,DWORD [4+edx*4+esi]
+	lea	edx,[1+edx]
+	jge	NEAR L$016sub
+	sbb	eax,0
+	mov	edx,-1
+	xor	edx,eax
+	jmp	NEAR L$017copy
+align	16
+L$017copy:
+	mov	esi,DWORD [32+ebx*4+esp]
+	mov	ebp,DWORD [ebx*4+edi]
+	mov	DWORD [32+ebx*4+esp],ecx
+	and	esi,eax
+	and	ebp,edx
+	or	ebp,esi
+	mov	DWORD [ebx*4+edi],ebp
+	dec	ebx
+	jge	NEAR L$017copy
+	mov	esp,DWORD [24+esp]
+	mov	eax,1
+L$000just_leave:
+	pop	edi
+	pop	esi
+	pop	ebx
+	pop	ebp
+	ret
+db	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
+db	112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
+db	54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
+db	32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
+db	111,114,103,62,0
+segment	.bss
+common	_OPENSSL_ia32cap_P 16
diff --git a/deps/boringssl/win-x86/crypto/test/trampoline-x86.asm b/deps/boringssl/win-x86/crypto/test/trampoline-x86.asm
new file mode 100644
index 0000000..5fb72c7
--- /dev/null
+++ b/deps/boringssl/win-x86/crypto/test/trampoline-x86.asm
@@ -0,0 +1,156 @@
+; This file is generated from a similarly-named Perl script in the BoringSSL
+; source tree. Do not edit by hand.
+
+%ifdef BORINGSSL_PREFIX
+%include "boringssl_prefix_symbols_nasm.inc"
+%endif
+%ifidn __OUTPUT_FORMAT__,obj
+section	code	use32 class=code align=64
+%elifidn __OUTPUT_FORMAT__,win32
+$@feat.00 equ 1
+section	.text	code align=64
+%else
+section	.text	code
+%endif
+global	_abi_test_trampoline
+align	16
+_abi_test_trampoline:
+L$_abi_test_trampoline_begin:
+	push	ebp
+	push	ebx
+	push	esi
+	push	edi
+	mov	ecx,DWORD [24+esp]
+	mov	esi,DWORD [ecx]
+	mov	edi,DWORD [4+ecx]
+	mov	ebx,DWORD [8+ecx]
+	mov	ebp,DWORD [12+ecx]
+	sub	esp,44
+	mov	eax,DWORD [72+esp]
+	xor	ecx,ecx
+L$000loop:
+	cmp	ecx,DWORD [76+esp]
+	jae	NEAR L$001loop_done
+	mov	edx,DWORD [ecx*4+eax]
+	mov	DWORD [ecx*4+esp],edx
+	add	ecx,1
+	jmp	NEAR L$000loop
+L$001loop_done:
+	call	DWORD [64+esp]
+	add	esp,44
+	mov	ecx,DWORD [24+esp]
+	mov	DWORD [ecx],esi
+	mov	DWORD [4+ecx],edi
+	mov	DWORD [8+ecx],ebx
+	mov	DWORD [12+ecx],ebp
+	pop	edi
+	pop	esi
+	pop	ebx
+	pop	ebp
+	ret
+global	_abi_test_get_and_clear_direction_flag
+align	16
+_abi_test_get_and_clear_direction_flag:
+L$_abi_test_get_and_clear_direction_flag_begin:
+	pushfd
+	pop	eax
+	and	eax,1024
+	shr	eax,10
+	cld
+	ret
+global	_abi_test_set_direction_flag
+align	16
+_abi_test_set_direction_flag:
+L$_abi_test_set_direction_flag_begin:
+	std
+	ret
+global	_abi_test_clobber_eax
+align	16
+_abi_test_clobber_eax:
+L$_abi_test_clobber_eax_begin:
+	xor	eax,eax
+	ret
+global	_abi_test_clobber_ebx
+align	16
+_abi_test_clobber_ebx:
+L$_abi_test_clobber_ebx_begin:
+	xor	ebx,ebx
+	ret
+global	_abi_test_clobber_ecx
+align	16
+_abi_test_clobber_ecx:
+L$_abi_test_clobber_ecx_begin:
+	xor	ecx,ecx
+	ret
+global	_abi_test_clobber_edx
+align	16
+_abi_test_clobber_edx:
+L$_abi_test_clobber_edx_begin:
+	xor	edx,edx
+	ret
+global	_abi_test_clobber_edi
+align	16
+_abi_test_clobber_edi:
+L$_abi_test_clobber_edi_begin:
+	xor	edi,edi
+	ret
+global	_abi_test_clobber_esi
+align	16
+_abi_test_clobber_esi:
+L$_abi_test_clobber_esi_begin:
+	xor	esi,esi
+	ret
+global	_abi_test_clobber_ebp
+align	16
+_abi_test_clobber_ebp:
+L$_abi_test_clobber_ebp_begin:
+	xor	ebp,ebp
+	ret
+global	_abi_test_clobber_xmm0
+align	16
+_abi_test_clobber_xmm0:
+L$_abi_test_clobber_xmm0_begin:
+	pxor	xmm0,xmm0
+	ret
+global	_abi_test_clobber_xmm1
+align	16
+_abi_test_clobber_xmm1:
+L$_abi_test_clobber_xmm1_begin:
+	pxor	xmm1,xmm1
+	ret
+global	_abi_test_clobber_xmm2
+align	16
+_abi_test_clobber_xmm2:
+L$_abi_test_clobber_xmm2_begin:
+	pxor	xmm2,xmm2
+	ret
+global	_abi_test_clobber_xmm3
+align	16
+_abi_test_clobber_xmm3:
+L$_abi_test_clobber_xmm3_begin:
+	pxor	xmm3,xmm3
+	ret
+global	_abi_test_clobber_xmm4
+align	16
+_abi_test_clobber_xmm4:
+L$_abi_test_clobber_xmm4_begin:
+	pxor	xmm4,xmm4
+	ret
+global	_abi_test_clobber_xmm5
+align	16
+_abi_test_clobber_xmm5:
+L$_abi_test_clobber_xmm5_begin:
+	pxor	xmm5,xmm5
+	ret
+global	_abi_test_clobber_xmm6
+align	16
+_abi_test_clobber_xmm6:
+L$_abi_test_clobber_xmm6_begin:
+	pxor	xmm6,xmm6
+	ret
+global	_abi_test_clobber_xmm7
+align	16
+_abi_test_clobber_xmm7:
+L$_abi_test_clobber_xmm7_begin:
+	pxor	xmm7,xmm7
+	ret
diff --git a/deps/boringssl/win-x86_64/crypto/chacha/chacha-x86_64.asm b/deps/boringssl/win-x86_64/crypto/chacha/chacha-x86_64.asm
new file mode 100644
index 0000000..a3c2938
--- /dev/null
+++ b/deps/boringssl/win-x86_64/crypto/chacha/chacha-x86_64.asm
@@ -0,0 +1,1926 @@
+; This file is generated from a similarly-named Perl script in the BoringSSL
+; source tree. Do not edit by hand.
+
+default	rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+
+%ifdef BORINGSSL_PREFIX
+%include "boringssl_prefix_symbols_nasm.inc"
+%endif
+section	.text code align=64
+
+
+EXTERN	OPENSSL_ia32cap_P
+
+ALIGN	64
+$L$zero:
+	DD	0,0,0,0
+$L$one:
+	DD	1,0,0,0
+$L$inc:
+	DD	0,1,2,3
+$L$four:
+	DD	4,4,4,4
+$L$incy:
+	DD	0,2,4,6,1,3,5,7
+$L$eight:
+	DD	8,8,8,8,8,8,8,8
+$L$rot16:
+DB	0x2,0x3,0x0,0x1,0x6,0x7,0x4,0x5,0xa,0xb,0x8,0x9,0xe,0xf,0xc,0xd
+$L$rot24:
+DB	0x3,0x0,0x1,0x2,0x7,0x4,0x5,0x6,0xb,0x8,0x9,0xa,0xf,0xc,0xd,0xe
+$L$sigma:
+DB	101,120,112,97,110,100,32,51,50,45,98,121,116,101,32,107
+DB	0
+ALIGN	64
+$L$zeroz:
+	DD	0,0,0,0,1,0,0,0,2,0,0,0,3,0,0,0
+$L$fourz:
+	DD	4,0,0,0,4,0,0,0,4,0,0,0,4,0,0,0
+$L$incz:
+	DD	0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
+$L$sixteen:
+	DD	16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16
+DB	67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
+DB	95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32
+DB	98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115
+DB	108,46,111,114,103,62,0
+global	ChaCha20_ctr32
+
+ALIGN	64
+ChaCha20_ctr32:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_ChaCha20_ctr32:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+
+
+
+	cmp	rdx,0
+	je	NEAR $L$no_data
+	mov	r10,QWORD[((OPENSSL_ia32cap_P+4))]
+	test	r10d,512
+	jnz	NEAR $L$ChaCha20_ssse3
+
+	push	rbx
+
+	push	rbp
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+	sub	rsp,64+24
+
+$L$ctr32_body:
+
+
+	movdqu	xmm1,XMMWORD[rcx]
+	movdqu	xmm2,XMMWORD[16+rcx]
+	movdqu	xmm3,XMMWORD[r8]
+	movdqa	xmm4,XMMWORD[$L$one]
+
+
+	movdqa	XMMWORD[16+rsp],xmm1
+	movdqa	XMMWORD[32+rsp],xmm2
+	movdqa	XMMWORD[48+rsp],xmm3
+	mov	rbp,rdx
+	jmp	NEAR $L$oop_outer
+
+ALIGN	32
+$L$oop_outer:
+	mov	eax,0x61707865
+	mov	ebx,0x3320646e
+	mov	ecx,0x79622d32
+	mov	edx,0x6b206574
+	mov	r8d,DWORD[16+rsp]
+	mov	r9d,DWORD[20+rsp]
+	mov	r10d,DWORD[24+rsp]
+	mov	r11d,DWORD[28+rsp]
+	movd	r12d,xmm3
+	mov	r13d,DWORD[52+rsp]
+	mov	r14d,DWORD[56+rsp]
+	mov	r15d,DWORD[60+rsp]
+
+	mov	QWORD[((64+0))+rsp],rbp
+	mov	ebp,10
+	mov	QWORD[((64+8))+rsp],rsi
+DB	102,72,15,126,214
+	mov	QWORD[((64+16))+rsp],rdi
+	mov	rdi,rsi
+	shr	rdi,32
+	jmp	NEAR $L$oop
+
+ALIGN	32
+$L$oop:
+	add	eax,r8d
+	xor	r12d,eax
+	rol	r12d,16
+	add	ebx,r9d
+	xor	r13d,ebx
+	rol	r13d,16
+	add	esi,r12d
+	xor	r8d,esi
+	rol	r8d,12
+	add	edi,r13d
+	xor	r9d,edi
+	rol	r9d,12
+	add	eax,r8d
+	xor	r12d,eax
+	rol	r12d,8
+	add	ebx,r9d
+	xor	r13d,ebx
+	rol	r13d,8
+	add	esi,r12d
+	xor	r8d,esi
+	rol	r8d,7
+	add	edi,r13d
+	xor	r9d,edi
+	rol	r9d,7
+	mov	DWORD[32+rsp],esi
+	mov	DWORD[36+rsp],edi
+	mov	esi,DWORD[40+rsp]
+	mov	edi,DWORD[44+rsp]
+	add	ecx,r10d
+	xor	r14d,ecx
+	rol	r14d,16
+	add	edx,r11d
+	xor	r15d,edx
+	rol	r15d,16
+	add	esi,r14d
+	xor	r10d,esi
+	rol	r10d,12
+	add	edi,r15d
+	xor	r11d,edi
+	rol	r11d,12
+	add	ecx,r10d
+	xor	r14d,ecx
+	rol	r14d,8
+	add	edx,r11d
+	xor	r15d,edx
+	rol	r15d,8
+	add	esi,r14d
+	xor	r10d,esi
+	rol	r10d,7
+	add	edi,r15d
+	xor	r11d,edi
+	rol	r11d,7
+	add	eax,r9d
+	xor	r15d,eax
+	rol	r15d,16
+	add	ebx,r10d
+	xor	r12d,ebx
+	rol	r12d,16
+	add	esi,r15d
+	xor	r9d,esi
+	rol	r9d,12
+	add	edi,r12d
+	xor	r10d,edi
+	rol	r10d,12
+	add	eax,r9d
+	xor	r15d,eax
+	rol	r15d,8
+	add	ebx,r10d
+	xor	r12d,ebx
+	rol	r12d,8
+	add	esi,r15d
+	xor	r9d,esi
+	rol	r9d,7
+	add	edi,r12d
+	xor	r10d,edi
+	rol	r10d,7
+	mov	DWORD[40+rsp],esi
+	mov	DWORD[44+rsp],edi
+	mov	esi,DWORD[32+rsp]
+	mov	edi,DWORD[36+rsp]
+	add	ecx,r11d
+	xor	r13d,ecx
+	rol	r13d,16
+	add	edx,r8d
+	xor	r14d,edx
+	rol	r14d,16
+	add	esi,r13d
+	xor	r11d,esi
+	rol	r11d,12
+	add	edi,r14d
+	xor	r8d,edi
+	rol	r8d,12
+	add	ecx,r11d
+	xor	r13d,ecx
+	rol	r13d,8
+	add	edx,r8d
+	xor	r14d,edx
+	rol	r14d,8
+	add	esi,r13d
+	xor	r11d,esi
+	rol	r11d,7
+	add	edi,r14d
+	xor	r8d,edi
+	rol	r8d,7
+	dec	ebp
+	jnz	NEAR $L$oop
+	mov	DWORD[36+rsp],edi
+	mov	DWORD[32+rsp],esi
+	mov	rbp,QWORD[64+rsp]
+	movdqa	xmm1,xmm2
+	mov	rsi,QWORD[((64+8))+rsp]
+	paddd	xmm3,xmm4
+	mov	rdi,QWORD[((64+16))+rsp]
+
+	add	eax,0x61707865
+	add	ebx,0x3320646e
+	add	ecx,0x79622d32
+	add	edx,0x6b206574
+	add	r8d,DWORD[16+rsp]
+	add	r9d,DWORD[20+rsp]
+	add	r10d,DWORD[24+rsp]
+	add	r11d,DWORD[28+rsp]
+	add	r12d,DWORD[48+rsp]
+	add	r13d,DWORD[52+rsp]
+	add	r14d,DWORD[56+rsp]
+	add	r15d,DWORD[60+rsp]
+	paddd	xmm1,XMMWORD[32+rsp]
+
+	cmp	rbp,64
+	jb	NEAR $L$tail
+
+	xor	eax,DWORD[rsi]
+	xor	ebx,DWORD[4+rsi]
+	xor	ecx,DWORD[8+rsi]
+	xor	edx,DWORD[12+rsi]
+	xor	r8d,DWORD[16+rsi]
+	xor	r9d,DWORD[20+rsi]
+	xor	r10d,DWORD[24+rsi]
+	xor	r11d,DWORD[28+rsi]
+	movdqu	xmm0,XMMWORD[32+rsi]
+	xor	r12d,DWORD[48+rsi]
+	xor	r13d,DWORD[52+rsi]
+	xor	r14d,DWORD[56+rsi]
+	xor	r15d,DWORD[60+rsi]
+	lea	rsi,[64+rsi]
+	pxor	xmm0,xmm1
+
+	movdqa	XMMWORD[32+rsp],xmm2
+	movd	DWORD[48+rsp],xmm3
+
+	mov	DWORD[rdi],eax
+	mov	DWORD[4+rdi],ebx
+	mov	DWORD[8+rdi],ecx
+	mov	DWORD[12+rdi],edx
+	mov	DWORD[16+rdi],r8d
+	mov	DWORD[20+rdi],r9d
+	mov	DWORD[24+rdi],r10d
+	mov	DWORD[28+rdi],r11d
+	movdqu	XMMWORD[32+rdi],xmm0
+	mov	DWORD[48+rdi],r12d
+	mov	DWORD[52+rdi],r13d
+	mov	DWORD[56+rdi],r14d
+	mov	DWORD[60+rdi],r15d
+	lea	rdi,[64+rdi]
+
+	sub	rbp,64
+	jnz	NEAR $L$oop_outer
+
+	jmp	NEAR $L$done
+
+ALIGN	16
+$L$tail:
+	mov	DWORD[rsp],eax
+	mov	DWORD[4+rsp],ebx
+	xor	rbx,rbx
+	mov	DWORD[8+rsp],ecx
+	mov	DWORD[12+rsp],edx
+	mov	DWORD[16+rsp],r8d
+	mov	DWORD[20+rsp],r9d
+	mov	DWORD[24+rsp],r10d
+	mov	DWORD[28+rsp],r11d
+	movdqa	XMMWORD[32+rsp],xmm1
+	mov	DWORD[48+rsp],r12d
+	mov	DWORD[52+rsp],r13d
+	mov	DWORD[56+rsp],r14d
+	mov	DWORD[60+rsp],r15d
+
+$L$oop_tail:
+	movzx	eax,BYTE[rbx*1+rsi]
+	movzx	edx,BYTE[rbx*1+rsp]
+	lea	rbx,[1+rbx]
+	xor	eax,edx
+	mov	BYTE[((-1))+rbx*1+rdi],al
+	dec	rbp
+	jnz	NEAR $L$oop_tail
+
+$L$done:
+	lea	rsi,[((64+24+48))+rsp]
+	mov	r15,QWORD[((-48))+rsi]
+
+	mov	r14,QWORD[((-40))+rsi]
+
+	mov	r13,QWORD[((-32))+rsi]
+
+	mov	r12,QWORD[((-24))+rsi]
+
+	mov	rbp,QWORD[((-16))+rsi]
+
+	mov	rbx,QWORD[((-8))+rsi]
+
+	lea	rsp,[rsi]
+
+$L$no_data:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_ChaCha20_ctr32:
+
+ALIGN	32
+ChaCha20_ssse3:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_ChaCha20_ssse3:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+
+
+$L$ChaCha20_ssse3:
+
+	mov	r9,rsp
+
+	cmp	rdx,128
+	ja	NEAR $L$ChaCha20_4x
+
+$L$do_sse3_after_all:
+	sub	rsp,64+40
+	movaps	XMMWORD[(-40)+r9],xmm6
+	movaps	XMMWORD[(-24)+r9],xmm7
+$L$ssse3_body:
+	movdqa	xmm0,XMMWORD[$L$sigma]
+	movdqu	xmm1,XMMWORD[rcx]
+	movdqu	xmm2,XMMWORD[16+rcx]
+	movdqu	xmm3,XMMWORD[r8]
+	movdqa	xmm6,XMMWORD[$L$rot16]
+	movdqa	xmm7,XMMWORD[$L$rot24]
+
+	movdqa	XMMWORD[rsp],xmm0
+	movdqa	XMMWORD[16+rsp],xmm1
+	movdqa	XMMWORD[32+rsp],xmm2
+	movdqa	XMMWORD[48+rsp],xmm3
+	mov	r8,10
+	jmp	NEAR $L$oop_ssse3
+
+ALIGN	32
+$L$oop_outer_ssse3:
+	movdqa	xmm3,XMMWORD[$L$one]
+	movdqa	xmm0,XMMWORD[rsp]
+	movdqa	xmm1,XMMWORD[16+rsp]
+	movdqa	xmm2,XMMWORD[32+rsp]
+	paddd	xmm3,XMMWORD[48+rsp]
+	mov	r8,10
+	movdqa	XMMWORD[48+rsp],xmm3
+	jmp	NEAR $L$oop_ssse3
+
+ALIGN	32
+$L$oop_ssse3:
+	paddd	xmm0,xmm1
+	pxor	xmm3,xmm0
+DB	102,15,56,0,222
+	paddd	xmm2,xmm3
+	pxor	xmm1,xmm2
+	movdqa	xmm4,xmm1
+	psrld	xmm1,20
+	pslld	xmm4,12
+	por	xmm1,xmm4
+	paddd	xmm0,xmm1
+	pxor	xmm3,xmm0
+DB	102,15,56,0,223
+	paddd	xmm2,xmm3
+	pxor	xmm1,xmm2
+	movdqa	xmm4,xmm1
+	psrld	xmm1,25
+	pslld	xmm4,7
+	por	xmm1,xmm4
+	pshufd	xmm2,xmm2,78
+	pshufd	xmm1,xmm1,57
+	pshufd	xmm3,xmm3,147
+	nop
+	paddd	xmm0,xmm1
+	pxor	xmm3,xmm0
+DB	102,15,56,0,222
+	paddd	xmm2,xmm3
+	pxor	xmm1,xmm2
+	movdqa	xmm4,xmm1
+	psrld	xmm1,20
+	pslld	xmm4,12
+	por	xmm1,xmm4
+	paddd	xmm0,xmm1
+	pxor	xmm3,xmm0
+DB	102,15,56,0,223
+	paddd	xmm2,xmm3
+	pxor	xmm1,xmm2
+	movdqa	xmm4,xmm1
+	psrld	xmm1,25
+	pslld	xmm4,7
+	por	xmm1,xmm4
+	pshufd	xmm2,xmm2,78
+	pshufd	xmm1,xmm1,147
+	pshufd	xmm3,xmm3,57
+	dec	r8
+	jnz	NEAR $L$oop_ssse3
+	paddd	xmm0,XMMWORD[rsp]
+	paddd	xmm1,XMMWORD[16+rsp]
+	paddd	xmm2,XMMWORD[32+rsp]
+	paddd	xmm3,XMMWORD[48+rsp]
+
+	cmp	rdx,64
+	jb	NEAR $L$tail_ssse3
+
+	movdqu	xmm4,XMMWORD[rsi]
+	movdqu	xmm5,XMMWORD[16+rsi]
+	pxor	xmm0,xmm4
+	movdqu	xmm4,XMMWORD[32+rsi]
+	pxor	xmm1,xmm5
+	movdqu	xmm5,XMMWORD[48+rsi]
+	lea	rsi,[64+rsi]
+	pxor	xmm2,xmm4
+	pxor	xmm3,xmm5
+
+	movdqu	XMMWORD[rdi],xmm0
+	movdqu	XMMWORD[16+rdi],xmm1
+	movdqu	XMMWORD[32+rdi],xmm2
+	movdqu	XMMWORD[48+rdi],xmm3
+	lea	rdi,[64+rdi]
+
+	sub	rdx,64
+	jnz	NEAR $L$oop_outer_ssse3
+
+	jmp	NEAR $L$done_ssse3
+
+ALIGN	16
+$L$tail_ssse3:
+	movdqa	XMMWORD[rsp],xmm0
+	movdqa	XMMWORD[16+rsp],xmm1
+	movdqa	XMMWORD[32+rsp],xmm2
+	movdqa	XMMWORD[48+rsp],xmm3
+	xor	r8,r8
+
+$L$oop_tail_ssse3:
+	movzx	eax,BYTE[r8*1+rsi]
+	movzx	ecx,BYTE[r8*1+rsp]
+	lea	r8,[1+r8]
+	xor	eax,ecx
+	mov	BYTE[((-1))+r8*1+rdi],al
+	dec	rdx
+	jnz	NEAR $L$oop_tail_ssse3
+
+$L$done_ssse3:
+	movaps	xmm6,XMMWORD[((-40))+r9]
+	movaps	xmm7,XMMWORD[((-24))+r9]
+	lea	rsp,[r9]
+
+$L$ssse3_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_ChaCha20_ssse3:
+
+ALIGN	32
+ChaCha20_4x:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_ChaCha20_4x:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+
+
+$L$ChaCha20_4x:
+
+	mov	r9,rsp
+
+	mov	r11,r10
+	shr	r10,32
+	test	r10,32
+	jnz	NEAR $L$ChaCha20_8x
+	cmp	rdx,192
+	ja	NEAR $L$proceed4x
+
+	and	r11,71303168
+	cmp	r11,4194304
+	je	NEAR $L$do_sse3_after_all
+
+$L$proceed4x:
+	sub	rsp,0x140+168
+	movaps	XMMWORD[(-168)+r9],xmm6
+	movaps	XMMWORD[(-152)+r9],xmm7
+	movaps	XMMWORD[(-136)+r9],xmm8
+	movaps	XMMWORD[(-120)+r9],xmm9
+	movaps	XMMWORD[(-104)+r9],xmm10
+	movaps	XMMWORD[(-88)+r9],xmm11
+	movaps	XMMWORD[(-72)+r9],xmm12
+	movaps	XMMWORD[(-56)+r9],xmm13
+	movaps	XMMWORD[(-40)+r9],xmm14
+	movaps	XMMWORD[(-24)+r9],xmm15
+$L$4x_body:
+	movdqa	xmm11,XMMWORD[$L$sigma]
+	movdqu	xmm15,XMMWORD[rcx]
+	movdqu	xmm7,XMMWORD[16+rcx]
+	movdqu	xmm3,XMMWORD[r8]
+	lea	rcx,[256+rsp]
+	lea	r10,[$L$rot16]
+	lea	r11,[$L$rot24]
+
+	pshufd	xmm8,xmm11,0x00
+	pshufd	xmm9,xmm11,0x55
+	movdqa	XMMWORD[64+rsp],xmm8
+	pshufd	xmm10,xmm11,0xaa
+	movdqa	XMMWORD[80+rsp],xmm9
+	pshufd	xmm11,xmm11,0xff
+	movdqa	XMMWORD[96+rsp],xmm10
+	movdqa	XMMWORD[112+rsp],xmm11
+
+	pshufd	xmm12,xmm15,0x00
+	pshufd	xmm13,xmm15,0x55
+	movdqa	XMMWORD[(128-256)+rcx],xmm12
+	pshufd	xmm14,xmm15,0xaa
+	movdqa	XMMWORD[(144-256)+rcx],xmm13
+	pshufd	xmm15,xmm15,0xff
+	movdqa	XMMWORD[(160-256)+rcx],xmm14
+	movdqa	XMMWORD[(176-256)+rcx],xmm15
+
+	pshufd	xmm4,xmm7,0x00
+	pshufd	xmm5,xmm7,0x55
+	movdqa	XMMWORD[(192-256)+rcx],xmm4
+	pshufd	xmm6,xmm7,0xaa
+	movdqa	XMMWORD[(208-256)+rcx],xmm5
+	pshufd	xmm7,xmm7,0xff
+	movdqa	XMMWORD[(224-256)+rcx],xmm6
+	movdqa	XMMWORD[(240-256)+rcx],xmm7
+
+	pshufd	xmm0,xmm3,0x00
+	pshufd	xmm1,xmm3,0x55
+	paddd	xmm0,XMMWORD[$L$inc]
+	pshufd	xmm2,xmm3,0xaa
+	movdqa	XMMWORD[(272-256)+rcx],xmm1
+	pshufd	xmm3,xmm3,0xff
+	movdqa	XMMWORD[(288-256)+rcx],xmm2
+	movdqa	XMMWORD[(304-256)+rcx],xmm3
+
+	jmp	NEAR $L$oop_enter4x
+
+ALIGN	32
+$L$oop_outer4x:
+	movdqa	xmm8,XMMWORD[64+rsp]
+	movdqa	xmm9,XMMWORD[80+rsp]
+	movdqa	xmm10,XMMWORD[96+rsp]
+	movdqa	xmm11,XMMWORD[112+rsp]
+	movdqa	xmm12,XMMWORD[((128-256))+rcx]
+	movdqa	xmm13,XMMWORD[((144-256))+rcx]
+	movdqa	xmm14,XMMWORD[((160-256))+rcx]
+	movdqa	xmm15,XMMWORD[((176-256))+rcx]
+	movdqa	xmm4,XMMWORD[((192-256))+rcx]
+	movdqa	xmm5,XMMWORD[((208-256))+rcx]
+	movdqa	xmm6,XMMWORD[((224-256))+rcx]
+	movdqa	xmm7,XMMWORD[((240-256))+rcx]
+	movdqa	xmm0,XMMWORD[((256-256))+rcx]
+	movdqa	xmm1,XMMWORD[((272-256))+rcx]
+	movdqa	xmm2,XMMWORD[((288-256))+rcx]
+	movdqa	xmm3,XMMWORD[((304-256))+rcx]
+	paddd	xmm0,XMMWORD[$L$four]
+
+$L$oop_enter4x:
+	movdqa	XMMWORD[32+rsp],xmm6
+	movdqa	XMMWORD[48+rsp],xmm7
+	movdqa	xmm7,XMMWORD[r10]
+	mov	eax,10
+	movdqa	XMMWORD[(256-256)+rcx],xmm0
+	jmp	NEAR $L$oop4x
+
+ALIGN	32
+$L$oop4x:
+	paddd	xmm8,xmm12
+	paddd	xmm9,xmm13
+	pxor	xmm0,xmm8
+	pxor	xmm1,xmm9
+DB	102,15,56,0,199
+DB	102,15,56,0,207
+	paddd	xmm4,xmm0
+	paddd	xmm5,xmm1
+	pxor	xmm12,xmm4
+	pxor	xmm13,xmm5
+	movdqa	xmm6,xmm12
+	pslld	xmm12,12
+	psrld	xmm6,20
+	movdqa	xmm7,xmm13
+	pslld	xmm13,12
+	por	xmm12,xmm6
+	psrld	xmm7,20
+	movdqa	xmm6,XMMWORD[r11]
+	por	xmm13,xmm7
+	paddd	xmm8,xmm12
+	paddd	xmm9,xmm13
+	pxor	xmm0,xmm8
+	pxor	xmm1,xmm9
+DB	102,15,56,0,198
+DB	102,15,56,0,206
+	paddd	xmm4,xmm0
+	paddd	xmm5,xmm1
+	pxor	xmm12,xmm4
+	pxor	xmm13,xmm5
+	movdqa	xmm7,xmm12
+	pslld	xmm12,7
+	psrld	xmm7,25
+	movdqa	xmm6,xmm13
+	pslld	xmm13,7
+	por	xmm12,xmm7
+	psrld	xmm6,25
+	movdqa	xmm7,XMMWORD[r10]
+	por	xmm13,xmm6
+	movdqa	XMMWORD[rsp],xmm4
+	movdqa	XMMWORD[16+rsp],xmm5
+	movdqa	xmm4,XMMWORD[32+rsp]
+	movdqa	xmm5,XMMWORD[48+rsp]
+	paddd	xmm10,xmm14
+	paddd	xmm11,xmm15
+	pxor	xmm2,xmm10
+	pxor	xmm3,xmm11
+DB	102,15,56,0,215
+DB	102,15,56,0,223
+	paddd	xmm4,xmm2
+	paddd	xmm5,xmm3
+	pxor	xmm14,xmm4
+	pxor	xmm15,xmm5
+	movdqa	xmm6,xmm14
+	pslld	xmm14,12
+	psrld	xmm6,20
+	movdqa	xmm7,xmm15
+	pslld	xmm15,12
+	por	xmm14,xmm6
+	psrld	xmm7,20
+	movdqa	xmm6,XMMWORD[r11]
+	por	xmm15,xmm7
+	paddd	xmm10,xmm14
+	paddd	xmm11,xmm15
+	pxor	xmm2,xmm10
+	pxor	xmm3,xmm11
+DB	102,15,56,0,214
+DB	102,15,56,0,222
+	paddd	xmm4,xmm2
+	paddd	xmm5,xmm3
+	pxor	xmm14,xmm4
+	pxor	xmm15,xmm5
+	movdqa	xmm7,xmm14
+	pslld	xmm14,7
+	psrld	xmm7,25
+	movdqa	xmm6,xmm15
+	pslld	xmm15,7
+	por	xmm14,xmm7
+	psrld	xmm6,25
+	movdqa	xmm7,XMMWORD[r10]
+	por	xmm15,xmm6
+	paddd	xmm8,xmm13
+	paddd	xmm9,xmm14
+	pxor	xmm3,xmm8
+	pxor	xmm0,xmm9
+DB	102,15,56,0,223
+DB	102,15,56,0,199
+	paddd	xmm4,xmm3
+	paddd	xmm5,xmm0
+	pxor	xmm13,xmm4
+	pxor	xmm14,xmm5
+	movdqa	xmm6,xmm13
+	pslld	xmm13,12
+	psrld	xmm6,20
+	movdqa	xmm7,xmm14
+	pslld	xmm14,12
+	por	xmm13,xmm6
+	psrld	xmm7,20
+	movdqa	xmm6,XMMWORD[r11]
+	por	xmm14,xmm7
+	paddd	xmm8,xmm13
+	paddd	xmm9,xmm14
+	pxor	xmm3,xmm8
+	pxor	xmm0,xmm9
+DB	102,15,56,0,222
+DB	102,15,56,0,198
+	paddd	xmm4,xmm3
+	paddd	xmm5,xmm0
+	pxor	xmm13,xmm4
+	pxor	xmm14,xmm5
+	movdqa	xmm7,xmm13
+	pslld	xmm13,7
+	psrld	xmm7,25
+	movdqa	xmm6,xmm14
+	pslld	xmm14,7
+	por	xmm13,xmm7
+	psrld	xmm6,25
+	movdqa	xmm7,XMMWORD[r10]
+	por	xmm14,xmm6
+	movdqa	XMMWORD[32+rsp],xmm4
+	movdqa	XMMWORD[48+rsp],xmm5
+	movdqa	xmm4,XMMWORD[rsp]
+	movdqa	xmm5,XMMWORD[16+rsp]
+	paddd	xmm10,xmm15
+	paddd	xmm11,xmm12
+	pxor	xmm1,xmm10
+	pxor	xmm2,xmm11
+DB	102,15,56,0,207
+DB	102,15,56,0,215
+	paddd	xmm4,xmm1
+	paddd	xmm5,xmm2
+	pxor	xmm15,xmm4
+	pxor	xmm12,xmm5
+	movdqa	xmm6,xmm15
+	pslld	xmm15,12
+	psrld	xmm6,20
+	movdqa	xmm7,xmm12
+	pslld	xmm12,12
+	por	xmm15,xmm6
+	psrld	xmm7,20
+	movdqa	xmm6,XMMWORD[r11]
+	por	xmm12,xmm7
+	paddd	xmm10,xmm15
+	paddd	xmm11,xmm12
+	pxor	xmm1,xmm10
+	pxor	xmm2,xmm11
+DB	102,15,56,0,206
+DB	102,15,56,0,214
+	paddd	xmm4,xmm1
+	paddd	xmm5,xmm2
+	pxor	xmm15,xmm4
+	pxor	xmm12,xmm5
+	movdqa	xmm7,xmm15
+	pslld	xmm15,7
+	psrld	xmm7,25
+	movdqa	xmm6,xmm12
+	pslld	xmm12,7
+	por	xmm15,xmm7
+	psrld	xmm6,25
+	movdqa	xmm7,XMMWORD[r10]
+	por	xmm12,xmm6
+	dec	eax
+	jnz	NEAR $L$oop4x
+
+	paddd	xmm8,XMMWORD[64+rsp]
+	paddd	xmm9,XMMWORD[80+rsp]
+	paddd	xmm10,XMMWORD[96+rsp]
+	paddd	xmm11,XMMWORD[112+rsp]
+
+	movdqa	xmm6,xmm8
+	punpckldq	xmm8,xmm9
+	movdqa	xmm7,xmm10
+	punpckldq	xmm10,xmm11
+	punpckhdq	xmm6,xmm9
+	punpckhdq	xmm7,xmm11
+	movdqa	xmm9,xmm8
+	punpcklqdq	xmm8,xmm10
+	movdqa	xmm11,xmm6
+	punpcklqdq	xmm6,xmm7
+	punpckhqdq	xmm9,xmm10
+	punpckhqdq	xmm11,xmm7
+	paddd	xmm12,XMMWORD[((128-256))+rcx]
+	paddd	xmm13,XMMWORD[((144-256))+rcx]
+	paddd	xmm14,XMMWORD[((160-256))+rcx]
+	paddd	xmm15,XMMWORD[((176-256))+rcx]
+
+	movdqa	XMMWORD[rsp],xmm8
+	movdqa	XMMWORD[16+rsp],xmm9
+	movdqa	xmm8,XMMWORD[32+rsp]
+	movdqa	xmm9,XMMWORD[48+rsp]
+
+	movdqa	xmm10,xmm12
+	punpckldq	xmm12,xmm13
+	movdqa	xmm7,xmm14
+	punpckldq	xmm14,xmm15
+	punpckhdq	xmm10,xmm13
+	punpckhdq	xmm7,xmm15
+	movdqa	xmm13,xmm12
+	punpcklqdq	xmm12,xmm14
+	movdqa	xmm15,xmm10
+	punpcklqdq	xmm10,xmm7
+	punpckhqdq	xmm13,xmm14
+	punpckhqdq	xmm15,xmm7
+	paddd	xmm4,XMMWORD[((192-256))+rcx]
+	paddd	xmm5,XMMWORD[((208-256))+rcx]
+	paddd	xmm8,XMMWORD[((224-256))+rcx]
+	paddd	xmm9,XMMWORD[((240-256))+rcx]
+
+	movdqa	XMMWORD[32+rsp],xmm6
+	movdqa	XMMWORD[48+rsp],xmm11
+
+	movdqa	xmm14,xmm4
+	punpckldq	xmm4,xmm5
+	movdqa	xmm7,xmm8
+	punpckldq	xmm8,xmm9
+	punpckhdq	xmm14,xmm5
+	punpckhdq	xmm7,xmm9
+	movdqa	xmm5,xmm4
+	punpcklqdq	xmm4,xmm8
+	movdqa	xmm9,xmm14
+	punpcklqdq	xmm14,xmm7
+	punpckhqdq	xmm5,xmm8
+	punpckhqdq	xmm9,xmm7
+	paddd	xmm0,XMMWORD[((256-256))+rcx]
+	paddd	xmm1,XMMWORD[((272-256))+rcx]
+	paddd	xmm2,XMMWORD[((288-256))+rcx]
+	paddd	xmm3,XMMWORD[((304-256))+rcx]
+
+	movdqa	xmm8,xmm0
+	punpckldq	xmm0,xmm1
+	movdqa	xmm7,xmm2
+	punpckldq	xmm2,xmm3
+	punpckhdq	xmm8,xmm1
+	punpckhdq	xmm7,xmm3
+	movdqa	xmm1,xmm0
+	punpcklqdq	xmm0,xmm2
+	movdqa	xmm3,xmm8
+	punpcklqdq	xmm8,xmm7
+	punpckhqdq	xmm1,xmm2
+	punpckhqdq	xmm3,xmm7
+	cmp	rdx,64*4
+	jb	NEAR $L$tail4x
+
+	movdqu	xmm6,XMMWORD[rsi]
+	movdqu	xmm11,XMMWORD[16+rsi]
+	movdqu	xmm2,XMMWORD[32+rsi]
+	movdqu	xmm7,XMMWORD[48+rsi]
+	pxor	xmm6,XMMWORD[rsp]
+	pxor	xmm11,xmm12
+	pxor	xmm2,xmm4
+	pxor	xmm7,xmm0
+
+	movdqu	XMMWORD[rdi],xmm6
+	movdqu	xmm6,XMMWORD[64+rsi]
+	movdqu	XMMWORD[16+rdi],xmm11
+	movdqu	xmm11,XMMWORD[80+rsi]
+	movdqu	XMMWORD[32+rdi],xmm2
+	movdqu	xmm2,XMMWORD[96+rsi]
+	movdqu	XMMWORD[48+rdi],xmm7
+	movdqu	xmm7,XMMWORD[112+rsi]
+	lea	rsi,[128+rsi]
+	pxor	xmm6,XMMWORD[16+rsp]
+	pxor	xmm11,xmm13
+	pxor	xmm2,xmm5
+	pxor	xmm7,xmm1
+
+	movdqu	XMMWORD[64+rdi],xmm6
+	movdqu	xmm6,XMMWORD[rsi]
+	movdqu	XMMWORD[80+rdi],xmm11
+	movdqu	xmm11,XMMWORD[16+rsi]
+	movdqu	XMMWORD[96+rdi],xmm2
+	movdqu	xmm2,XMMWORD[32+rsi]
+	movdqu	XMMWORD[112+rdi],xmm7
+	lea	rdi,[128+rdi]
+	movdqu	xmm7,XMMWORD[48+rsi]
+	pxor	xmm6,XMMWORD[32+rsp]
+	pxor	xmm11,xmm10
+	pxor	xmm2,xmm14
+	pxor	xmm7,xmm8
+
+	movdqu	XMMWORD[rdi],xmm6
+	movdqu	xmm6,XMMWORD[64+rsi]
+	movdqu	XMMWORD[16+rdi],xmm11
+	movdqu	xmm11,XMMWORD[80+rsi]
+	movdqu	XMMWORD[32+rdi],xmm2
+	movdqu	xmm2,XMMWORD[96+rsi]
+	movdqu	XMMWORD[48+rdi],xmm7
+	movdqu	xmm7,XMMWORD[112+rsi]
+	lea	rsi,[128+rsi]
+	pxor	xmm6,XMMWORD[48+rsp]
+	pxor	xmm11,xmm15
+	pxor	xmm2,xmm9
+	pxor	xmm7,xmm3
+	movdqu	XMMWORD[64+rdi],xmm6
+	movdqu	XMMWORD[80+rdi],xmm11
+	movdqu	XMMWORD[96+rdi],xmm2
+	movdqu	XMMWORD[112+rdi],xmm7
+	lea	rdi,[128+rdi]
+
+	sub	rdx,64*4
+	jnz	NEAR $L$oop_outer4x
+
+	jmp	NEAR $L$done4x
+
+$L$tail4x:
+	cmp	rdx,192
+	jae	NEAR $L$192_or_more4x
+	cmp	rdx,128
+	jae	NEAR $L$128_or_more4x
+	cmp	rdx,64
+	jae	NEAR $L$64_or_more4x
+
+
+	xor	r10,r10
+
+	movdqa	XMMWORD[16+rsp],xmm12
+	movdqa	XMMWORD[32+rsp],xmm4
+	movdqa	XMMWORD[48+rsp],xmm0
+	jmp	NEAR $L$oop_tail4x
+
+ALIGN	32
+$L$64_or_more4x:
+	movdqu	xmm6,XMMWORD[rsi]
+	movdqu	xmm11,XMMWORD[16+rsi]
+	movdqu	xmm2,XMMWORD[32+rsi]
+	movdqu	xmm7,XMMWORD[48+rsi]
+	pxor	xmm6,XMMWORD[rsp]
+	pxor	xmm11,xmm12
+	pxor	xmm2,xmm4
+	pxor	xmm7,xmm0
+	movdqu	XMMWORD[rdi],xmm6
+	movdqu	XMMWORD[16+rdi],xmm11
+	movdqu	XMMWORD[32+rdi],xmm2
+	movdqu	XMMWORD[48+rdi],xmm7
+	je	NEAR $L$done4x
+
+	movdqa	xmm6,XMMWORD[16+rsp]
+	lea	rsi,[64+rsi]
+	xor	r10,r10
+	movdqa	XMMWORD[rsp],xmm6
+	movdqa	XMMWORD[16+rsp],xmm13
+	lea	rdi,[64+rdi]
+	movdqa	XMMWORD[32+rsp],xmm5
+	sub	rdx,64
+	movdqa	XMMWORD[48+rsp],xmm1
+	jmp	NEAR $L$oop_tail4x
+
+ALIGN	32
+$L$128_or_more4x:
+	movdqu	xmm6,XMMWORD[rsi]
+	movdqu	xmm11,XMMWORD[16+rsi]
+	movdqu	xmm2,XMMWORD[32+rsi]
+	movdqu	xmm7,XMMWORD[48+rsi]
+	pxor	xmm6,XMMWORD[rsp]
+	pxor	xmm11,xmm12
+	pxor	xmm2,xmm4
+	pxor	xmm7,xmm0
+
+	movdqu	XMMWORD[rdi],xmm6
+	movdqu	xmm6,XMMWORD[64+rsi]
+	movdqu	XMMWORD[16+rdi],xmm11
+	movdqu	xmm11,XMMWORD[80+rsi]
+	movdqu	XMMWORD[32+rdi],xmm2
+	movdqu	xmm2,XMMWORD[96+rsi]
+	movdqu	XMMWORD[48+rdi],xmm7
+	movdqu	xmm7,XMMWORD[112+rsi]
+	pxor	xmm6,XMMWORD[16+rsp]
+	pxor	xmm11,xmm13
+	pxor	xmm2,xmm5
+	pxor	xmm7,xmm1
+	movdqu	XMMWORD[64+rdi],xmm6
+	movdqu	XMMWORD[80+rdi],xmm11
+	movdqu	XMMWORD[96+rdi],xmm2
+	movdqu	XMMWORD[112+rdi],xmm7
+	je	NEAR $L$done4x
+
+	movdqa	xmm6,XMMWORD[32+rsp]
+	lea	rsi,[128+rsi]
+	xor	r10,r10
+	movdqa	XMMWORD[rsp],xmm6
+	movdqa	XMMWORD[16+rsp],xmm10
+	lea	rdi,[128+rdi]
+	movdqa	XMMWORD[32+rsp],xmm14
+	sub	rdx,128
+	movdqa	XMMWORD[48+rsp],xmm8
+	jmp	NEAR $L$oop_tail4x
+
+ALIGN	32
+$L$192_or_more4x:
+	movdqu	xmm6,XMMWORD[rsi]
+	movdqu	xmm11,XMMWORD[16+rsi]
+	movdqu	xmm2,XMMWORD[32+rsi]
+	movdqu	xmm7,XMMWORD[48+rsi]
+	pxor	xmm6,XMMWORD[rsp]
+	pxor	xmm11,xmm12
+	pxor	xmm2,xmm4
+	pxor	xmm7,xmm0
+
+	movdqu	XMMWORD[rdi],xmm6
+	movdqu	xmm6,XMMWORD[64+rsi]
+	movdqu	XMMWORD[16+rdi],xmm11
+	movdqu	xmm11,XMMWORD[80+rsi]
+	movdqu	XMMWORD[32+rdi],xmm2
+	movdqu	xmm2,XMMWORD[96+rsi]
+	movdqu	XMMWORD[48+rdi],xmm7
+	movdqu	xmm7,XMMWORD[112+rsi]
+	lea	rsi,[128+rsi]
+	pxor	xmm6,XMMWORD[16+rsp]
+	pxor	xmm11,xmm13
+	pxor	xmm2,xmm5
+	pxor	xmm7,xmm1
+
+	movdqu	XMMWORD[64+rdi],xmm6
+	movdqu	xmm6,XMMWORD[rsi]
+	movdqu	XMMWORD[80+rdi],xmm11
+	movdqu	xmm11,XMMWORD[16+rsi]
+	movdqu	XMMWORD[96+rdi],xmm2
+	movdqu	xmm2,XMMWORD[32+rsi]
+	movdqu	XMMWORD[112+rdi],xmm7
+	lea	rdi,[128+rdi]
+	movdqu	xmm7,XMMWORD[48+rsi]
+	pxor	xmm6,XMMWORD[32+rsp]
+	pxor	xmm11,xmm10
+	pxor	xmm2,xmm14
+	pxor	xmm7,xmm8
+	movdqu	XMMWORD[rdi],xmm6
+	movdqu	XMMWORD[16+rdi],xmm11
+	movdqu	XMMWORD[32+rdi],xmm2
+	movdqu	XMMWORD[48+rdi],xmm7
+	je	NEAR $L$done4x
+
+	movdqa	xmm6,XMMWORD[48+rsp]
+	lea	rsi,[64+rsi]
+	xor	r10,r10
+	movdqa	XMMWORD[rsp],xmm6
+	movdqa	XMMWORD[16+rsp],xmm15
+	lea	rdi,[64+rdi]
+	movdqa	XMMWORD[32+rsp],xmm9
+	sub	rdx,192
+	movdqa	XMMWORD[48+rsp],xmm3
+
+$L$oop_tail4x:
+	movzx	eax,BYTE[r10*1+rsi]
+	movzx	ecx,BYTE[r10*1+rsp]
+	lea	r10,[1+r10]
+	xor	eax,ecx
+	mov	BYTE[((-1))+r10*1+rdi],al
+	dec	rdx
+	jnz	NEAR $L$oop_tail4x
+
+$L$done4x:
+	movaps	xmm6,XMMWORD[((-168))+r9]
+	movaps	xmm7,XMMWORD[((-152))+r9]
+	movaps	xmm8,XMMWORD[((-136))+r9]
+	movaps	xmm9,XMMWORD[((-120))+r9]
+	movaps	xmm10,XMMWORD[((-104))+r9]
+	movaps	xmm11,XMMWORD[((-88))+r9]
+	movaps	xmm12,XMMWORD[((-72))+r9]
+	movaps	xmm13,XMMWORD[((-56))+r9]
+	movaps	xmm14,XMMWORD[((-40))+r9]
+	movaps	xmm15,XMMWORD[((-24))+r9]
+	lea	rsp,[r9]
+
+$L$4x_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_ChaCha20_4x:
+
+ALIGN	32
+ChaCha20_8x:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_ChaCha20_8x:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+
+
+$L$ChaCha20_8x:
+
+	mov	r9,rsp
+
+	sub	rsp,0x280+168
+	and	rsp,-32
+	movaps	XMMWORD[(-168)+r9],xmm6
+	movaps	XMMWORD[(-152)+r9],xmm7
+	movaps	XMMWORD[(-136)+r9],xmm8
+	movaps	XMMWORD[(-120)+r9],xmm9
+	movaps	XMMWORD[(-104)+r9],xmm10
+	movaps	XMMWORD[(-88)+r9],xmm11
+	movaps	XMMWORD[(-72)+r9],xmm12
+	movaps	XMMWORD[(-56)+r9],xmm13
+	movaps	XMMWORD[(-40)+r9],xmm14
+	movaps	XMMWORD[(-24)+r9],xmm15
+$L$8x_body:
+	vzeroupper
+
+
+
+
+
+
+
+
+
+
+	vbroadcasti128	ymm11,XMMWORD[$L$sigma]
+	vbroadcasti128	ymm3,XMMWORD[rcx]
+	vbroadcasti128	ymm15,XMMWORD[16+rcx]
+	vbroadcasti128	ymm7,XMMWORD[r8]
+	lea	rcx,[256+rsp]
+	lea	rax,[512+rsp]
+	lea	r10,[$L$rot16]
+	lea	r11,[$L$rot24]
+
+	vpshufd	ymm8,ymm11,0x00
+	vpshufd	ymm9,ymm11,0x55
+	vmovdqa	YMMWORD[(128-256)+rcx],ymm8
+	vpshufd	ymm10,ymm11,0xaa
+	vmovdqa	YMMWORD[(160-256)+rcx],ymm9
+	vpshufd	ymm11,ymm11,0xff
+	vmovdqa	YMMWORD[(192-256)+rcx],ymm10
+	vmovdqa	YMMWORD[(224-256)+rcx],ymm11
+
+	vpshufd	ymm0,ymm3,0x00
+	vpshufd	ymm1,ymm3,0x55
+	vmovdqa	YMMWORD[(256-256)+rcx],ymm0
+	vpshufd	ymm2,ymm3,0xaa
+	vmovdqa	YMMWORD[(288-256)+rcx],ymm1
+	vpshufd	ymm3,ymm3,0xff
+	vmovdqa	YMMWORD[(320-256)+rcx],ymm2
+	vmovdqa	YMMWORD[(352-256)+rcx],ymm3
+
+	vpshufd	ymm12,ymm15,0x00
+	vpshufd	ymm13,ymm15,0x55
+	vmovdqa	YMMWORD[(384-512)+rax],ymm12
+	vpshufd	ymm14,ymm15,0xaa
+	vmovdqa	YMMWORD[(416-512)+rax],ymm13
+	vpshufd	ymm15,ymm15,0xff
+	vmovdqa	YMMWORD[(448-512)+rax],ymm14
+	vmovdqa	YMMWORD[(480-512)+rax],ymm15
+
+	vpshufd	ymm4,ymm7,0x00
+	vpshufd	ymm5,ymm7,0x55
+	vpaddd	ymm4,ymm4,YMMWORD[$L$incy]
+	vpshufd	ymm6,ymm7,0xaa
+	vmovdqa	YMMWORD[(544-512)+rax],ymm5
+	vpshufd	ymm7,ymm7,0xff
+	vmovdqa	YMMWORD[(576-512)+rax],ymm6
+	vmovdqa	YMMWORD[(608-512)+rax],ymm7
+
+	jmp	NEAR $L$oop_enter8x
+
+ALIGN	32
+$L$oop_outer8x:
+	vmovdqa	ymm8,YMMWORD[((128-256))+rcx]
+	vmovdqa	ymm9,YMMWORD[((160-256))+rcx]
+	vmovdqa	ymm10,YMMWORD[((192-256))+rcx]
+	vmovdqa	ymm11,YMMWORD[((224-256))+rcx]
+	vmovdqa	ymm0,YMMWORD[((256-256))+rcx]
+	vmovdqa	ymm1,YMMWORD[((288-256))+rcx]
+	vmovdqa	ymm2,YMMWORD[((320-256))+rcx]
+	vmovdqa	ymm3,YMMWORD[((352-256))+rcx]
+	vmovdqa	ymm12,YMMWORD[((384-512))+rax]
+	vmovdqa	ymm13,YMMWORD[((416-512))+rax]
+	vmovdqa	ymm14,YMMWORD[((448-512))+rax]
+	vmovdqa	ymm15,YMMWORD[((480-512))+rax]
+	vmovdqa	ymm4,YMMWORD[((512-512))+rax]
+	vmovdqa	ymm5,YMMWORD[((544-512))+rax]
+	vmovdqa	ymm6,YMMWORD[((576-512))+rax]
+	vmovdqa	ymm7,YMMWORD[((608-512))+rax]
+	vpaddd	ymm4,ymm4,YMMWORD[$L$eight]
+
+$L$oop_enter8x:
+	vmovdqa	YMMWORD[64+rsp],ymm14
+	vmovdqa	YMMWORD[96+rsp],ymm15
+	vbroadcasti128	ymm15,XMMWORD[r10]
+	vmovdqa	YMMWORD[(512-512)+rax],ymm4
+	mov	eax,10
+	jmp	NEAR $L$oop8x
+
+ALIGN	32
+$L$oop8x:
+	vpaddd	ymm8,ymm8,ymm0
+	vpxor	ymm4,ymm8,ymm4
+	vpshufb	ymm4,ymm4,ymm15
+	vpaddd	ymm9,ymm9,ymm1
+	vpxor	ymm5,ymm9,ymm5
+	vpshufb	ymm5,ymm5,ymm15
+	vpaddd	ymm12,ymm12,ymm4
+	vpxor	ymm0,ymm12,ymm0
+	vpslld	ymm14,ymm0,12
+	vpsrld	ymm0,ymm0,20
+	vpor	ymm0,ymm14,ymm0
+	vbroadcasti128	ymm14,XMMWORD[r11]
+	vpaddd	ymm13,ymm13,ymm5
+	vpxor	ymm1,ymm13,ymm1
+	vpslld	ymm15,ymm1,12
+	vpsrld	ymm1,ymm1,20
+	vpor	ymm1,ymm15,ymm1
+	vpaddd	ymm8,ymm8,ymm0
+	vpxor	ymm4,ymm8,ymm4
+	vpshufb	ymm4,ymm4,ymm14
+	vpaddd	ymm9,ymm9,ymm1
+	vpxor	ymm5,ymm9,ymm5
+	vpshufb	ymm5,ymm5,ymm14
+	vpaddd	ymm12,ymm12,ymm4
+	vpxor	ymm0,ymm12,ymm0
+	vpslld	ymm15,ymm0,7
+	vpsrld	ymm0,ymm0,25
+	vpor	ymm0,ymm15,ymm0
+	vbroadcasti128	ymm15,XMMWORD[r10]
+	vpaddd	ymm13,ymm13,ymm5
+	vpxor	ymm1,ymm13,ymm1
+	vpslld	ymm14,ymm1,7
+	vpsrld	ymm1,ymm1,25
+	vpor	ymm1,ymm14,ymm1
+	vmovdqa	YMMWORD[rsp],ymm12
+	vmovdqa	YMMWORD[32+rsp],ymm13
+	vmovdqa	ymm12,YMMWORD[64+rsp]
+	vmovdqa	ymm13,YMMWORD[96+rsp]
+	vpaddd	ymm10,ymm10,ymm2
+	vpxor	ymm6,ymm10,ymm6
+	vpshufb	ymm6,ymm6,ymm15
+	vpaddd	ymm11,ymm11,ymm3
+	vpxor	ymm7,ymm11,ymm7
+	vpshufb	ymm7,ymm7,ymm15
+	vpaddd	ymm12,ymm12,ymm6
+	vpxor	ymm2,ymm12,ymm2
+	vpslld	ymm14,ymm2,12
+	vpsrld	ymm2,ymm2,20
+	vpor	ymm2,ymm14,ymm2
+	vbroadcasti128	ymm14,XMMWORD[r11]
+	vpaddd	ymm13,ymm13,ymm7
+	vpxor	ymm3,ymm13,ymm3
+	vpslld	ymm15,ymm3,12
+	vpsrld	ymm3,ymm3,20
+	vpor	ymm3,ymm15,ymm3
+	vpaddd	ymm10,ymm10,ymm2
+	vpxor	ymm6,ymm10,ymm6
+	vpshufb	ymm6,ymm6,ymm14
+	vpaddd	ymm11,ymm11,ymm3
+	vpxor	ymm7,ymm11,ymm7
+	vpshufb	ymm7,ymm7,ymm14
+	vpaddd	ymm12,ymm12,ymm6
+	vpxor	ymm2,ymm12,ymm2
+	vpslld	ymm15,ymm2,7
+	vpsrld	ymm2,ymm2,25
+	vpor	ymm2,ymm15,ymm2
+	vbroadcasti128	ymm15,XMMWORD[r10]
+	vpaddd	ymm13,ymm13,ymm7
+	vpxor	ymm3,ymm13,ymm3
+	vpslld	ymm14,ymm3,7
+	vpsrld	ymm3,ymm3,25
+	vpor	ymm3,ymm14,ymm3
+	vpaddd	ymm8,ymm8,ymm1
+	vpxor	ymm7,ymm8,ymm7
+	vpshufb	ymm7,ymm7,ymm15
+	vpaddd	ymm9,ymm9,ymm2
+	vpxor	ymm4,ymm9,ymm4
+	vpshufb	ymm4,ymm4,ymm15
+	vpaddd	ymm12,ymm12,ymm7
+	vpxor	ymm1,ymm12,ymm1
+	vpslld	ymm14,ymm1,12
+	vpsrld	ymm1,ymm1,20
+	vpor	ymm1,ymm14,ymm1
+	vbroadcasti128	ymm14,XMMWORD[r11]
+	vpaddd	ymm13,ymm13,ymm4
+	vpxor	ymm2,ymm13,ymm2
+	vpslld	ymm15,ymm2,12
+	vpsrld	ymm2,ymm2,20
+	vpor	ymm2,ymm15,ymm2
+	vpaddd	ymm8,ymm8,ymm1
+	vpxor	ymm7,ymm8,ymm7
+	vpshufb	ymm7,ymm7,ymm14
+	vpaddd	ymm9,ymm9,ymm2
+	vpxor	ymm4,ymm9,ymm4
+	vpshufb	ymm4,ymm4,ymm14
+	vpaddd	ymm12,ymm12,ymm7
+	vpxor	ymm1,ymm12,ymm1
+	vpslld	ymm15,ymm1,7
+	vpsrld	ymm1,ymm1,25
+	vpor	ymm1,ymm15,ymm1
+	vbroadcasti128	ymm15,XMMWORD[r10]
+	vpaddd	ymm13,ymm13,ymm4
+	vpxor	ymm2,ymm13,ymm2
+	vpslld	ymm14,ymm2,7
+	vpsrld	ymm2,ymm2,25
+	vpor	ymm2,ymm14,ymm2
+	vmovdqa	YMMWORD[64+rsp],ymm12
+	vmovdqa	YMMWORD[96+rsp],ymm13
+	vmovdqa	ymm12,YMMWORD[rsp]
+	vmovdqa	ymm13,YMMWORD[32+rsp]
+	vpaddd	ymm10,ymm10,ymm3
+	vpxor	ymm5,ymm10,ymm5
+	vpshufb	ymm5,ymm5,ymm15
+	vpaddd	ymm11,ymm11,ymm0
+	vpxor	ymm6,ymm11,ymm6
+	vpshufb	ymm6,ymm6,ymm15
+	vpaddd	ymm12,ymm12,ymm5
+	vpxor	ymm3,ymm12,ymm3
+	vpslld	ymm14,ymm3,12
+	vpsrld	ymm3,ymm3,20
+	vpor	ymm3,ymm14,ymm3
+	vbroadcasti128	ymm14,XMMWORD[r11]
+	vpaddd	ymm13,ymm13,ymm6
+	vpxor	ymm0,ymm13,ymm0
+	vpslld	ymm15,ymm0,12
+	vpsrld	ymm0,ymm0,20
+	vpor	ymm0,ymm15,ymm0
+	vpaddd	ymm10,ymm10,ymm3
+	vpxor	ymm5,ymm10,ymm5
+	vpshufb	ymm5,ymm5,ymm14
+	vpaddd	ymm11,ymm11,ymm0
+	vpxor	ymm6,ymm11,ymm6
+	vpshufb	ymm6,ymm6,ymm14
+	vpaddd	ymm12,ymm12,ymm5
+	vpxor	ymm3,ymm12,ymm3
+	vpslld	ymm15,ymm3,7
+	vpsrld	ymm3,ymm3,25
+	vpor	ymm3,ymm15,ymm3
+	vbroadcasti128	ymm15,XMMWORD[r10]
+	vpaddd	ymm13,ymm13,ymm6
+	vpxor	ymm0,ymm13,ymm0
+	vpslld	ymm14,ymm0,7
+	vpsrld	ymm0,ymm0,25
+	vpor	ymm0,ymm14,ymm0
+	dec	eax
+	jnz	NEAR $L$oop8x
+
+	lea	rax,[512+rsp]
+	vpaddd	ymm8,ymm8,YMMWORD[((128-256))+rcx]
+	vpaddd	ymm9,ymm9,YMMWORD[((160-256))+rcx]
+	vpaddd	ymm10,ymm10,YMMWORD[((192-256))+rcx]
+	vpaddd	ymm11,ymm11,YMMWORD[((224-256))+rcx]
+
+	vpunpckldq	ymm14,ymm8,ymm9
+	vpunpckldq	ymm15,ymm10,ymm11
+	vpunpckhdq	ymm8,ymm8,ymm9
+	vpunpckhdq	ymm10,ymm10,ymm11
+	vpunpcklqdq	ymm9,ymm14,ymm15
+	vpunpckhqdq	ymm14,ymm14,ymm15
+	vpunpcklqdq	ymm11,ymm8,ymm10
+	vpunpckhqdq	ymm8,ymm8,ymm10
+	vpaddd	ymm0,ymm0,YMMWORD[((256-256))+rcx]
+	vpaddd	ymm1,ymm1,YMMWORD[((288-256))+rcx]
+	vpaddd	ymm2,ymm2,YMMWORD[((320-256))+rcx]
+	vpaddd	ymm3,ymm3,YMMWORD[((352-256))+rcx]
+
+	vpunpckldq	ymm10,ymm0,ymm1
+	vpunpckldq	ymm15,ymm2,ymm3
+	vpunpckhdq	ymm0,ymm0,ymm1
+	vpunpckhdq	ymm2,ymm2,ymm3
+	vpunpcklqdq	ymm1,ymm10,ymm15
+	vpunpckhqdq	ymm10,ymm10,ymm15
+	vpunpcklqdq	ymm3,ymm0,ymm2
+	vpunpckhqdq	ymm0,ymm0,ymm2
+	vperm2i128	ymm15,ymm9,ymm1,0x20
+	vperm2i128	ymm1,ymm9,ymm1,0x31
+	vperm2i128	ymm9,ymm14,ymm10,0x20
+	vperm2i128	ymm10,ymm14,ymm10,0x31
+	vperm2i128	ymm14,ymm11,ymm3,0x20
+	vperm2i128	ymm3,ymm11,ymm3,0x31
+	vperm2i128	ymm11,ymm8,ymm0,0x20
+	vperm2i128	ymm0,ymm8,ymm0,0x31
+	vmovdqa	YMMWORD[rsp],ymm15
+	vmovdqa	YMMWORD[32+rsp],ymm9
+	vmovdqa	ymm15,YMMWORD[64+rsp]
+	vmovdqa	ymm9,YMMWORD[96+rsp]
+
+	vpaddd	ymm12,ymm12,YMMWORD[((384-512))+rax]
+	vpaddd	ymm13,ymm13,YMMWORD[((416-512))+rax]
+	vpaddd	ymm15,ymm15,YMMWORD[((448-512))+rax]
+	vpaddd	ymm9,ymm9,YMMWORD[((480-512))+rax]
+
+	vpunpckldq	ymm2,ymm12,ymm13
+	vpunpckldq	ymm8,ymm15,ymm9
+	vpunpckhdq	ymm12,ymm12,ymm13
+	vpunpckhdq	ymm15,ymm15,ymm9
+	vpunpcklqdq	ymm13,ymm2,ymm8
+	vpunpckhqdq	ymm2,ymm2,ymm8
+	vpunpcklqdq	ymm9,ymm12,ymm15
+	vpunpckhqdq	ymm12,ymm12,ymm15
+	vpaddd	ymm4,ymm4,YMMWORD[((512-512))+rax]
+	vpaddd	ymm5,ymm5,YMMWORD[((544-512))+rax]
+	vpaddd	ymm6,ymm6,YMMWORD[((576-512))+rax]
+	vpaddd	ymm7,ymm7,YMMWORD[((608-512))+rax]
+
+	vpunpckldq	ymm15,ymm4,ymm5
+	vpunpckldq	ymm8,ymm6,ymm7
+	vpunpckhdq	ymm4,ymm4,ymm5
+	vpunpckhdq	ymm6,ymm6,ymm7
+	vpunpcklqdq	ymm5,ymm15,ymm8
+	vpunpckhqdq	ymm15,ymm15,ymm8
+	vpunpcklqdq	ymm7,ymm4,ymm6
+	vpunpckhqdq	ymm4,ymm4,ymm6
+	vperm2i128	ymm8,ymm13,ymm5,0x20
+	vperm2i128	ymm5,ymm13,ymm5,0x31
+	vperm2i128	ymm13,ymm2,ymm15,0x20
+	vperm2i128	ymm15,ymm2,ymm15,0x31
+	vperm2i128	ymm2,ymm9,ymm7,0x20
+	vperm2i128	ymm7,ymm9,ymm7,0x31
+	vperm2i128	ymm9,ymm12,ymm4,0x20
+	vperm2i128	ymm4,ymm12,ymm4,0x31
+	vmovdqa	ymm6,YMMWORD[rsp]
+	vmovdqa	ymm12,YMMWORD[32+rsp]
+
+	cmp	rdx,64*8
+	jb	NEAR $L$tail8x
+
+	vpxor	ymm6,ymm6,YMMWORD[rsi]
+	vpxor	ymm8,ymm8,YMMWORD[32+rsi]
+	vpxor	ymm1,ymm1,YMMWORD[64+rsi]
+	vpxor	ymm5,ymm5,YMMWORD[96+rsi]
+	lea	rsi,[128+rsi]
+	vmovdqu	YMMWORD[rdi],ymm6
+	vmovdqu	YMMWORD[32+rdi],ymm8
+	vmovdqu	YMMWORD[64+rdi],ymm1
+	vmovdqu	YMMWORD[96+rdi],ymm5
+	lea	rdi,[128+rdi]
+
+	vpxor	ymm12,ymm12,YMMWORD[rsi]
+	vpxor	ymm13,ymm13,YMMWORD[32+rsi]
+	vpxor	ymm10,ymm10,YMMWORD[64+rsi]
+	vpxor	ymm15,ymm15,YMMWORD[96+rsi]
+	lea	rsi,[128+rsi]
+	vmovdqu	YMMWORD[rdi],ymm12
+	vmovdqu	YMMWORD[32+rdi],ymm13
+	vmovdqu	YMMWORD[64+rdi],ymm10
+	vmovdqu	YMMWORD[96+rdi],ymm15
+	lea	rdi,[128+rdi]
+
+	vpxor	ymm14,ymm14,YMMWORD[rsi]
+	vpxor	ymm2,ymm2,YMMWORD[32+rsi]
+	vpxor	ymm3,ymm3,YMMWORD[64+rsi]
+	vpxor	ymm7,ymm7,YMMWORD[96+rsi]
+	lea	rsi,[128+rsi]
+	vmovdqu	YMMWORD[rdi],ymm14
+	vmovdqu	YMMWORD[32+rdi],ymm2
+	vmovdqu	YMMWORD[64+rdi],ymm3
+	vmovdqu	YMMWORD[96+rdi],ymm7
+	lea	rdi,[128+rdi]
+
+	vpxor	ymm11,ymm11,YMMWORD[rsi]
+	vpxor	ymm9,ymm9,YMMWORD[32+rsi]
+	vpxor	ymm0,ymm0,YMMWORD[64+rsi]
+	vpxor	ymm4,ymm4,YMMWORD[96+rsi]
+	lea	rsi,[128+rsi]
+	vmovdqu	YMMWORD[rdi],ymm11
+	vmovdqu	YMMWORD[32+rdi],ymm9
+	vmovdqu	YMMWORD[64+rdi],ymm0
+	vmovdqu	YMMWORD[96+rdi],ymm4
+	lea	rdi,[128+rdi]
+
+	sub	rdx,64*8
+	jnz	NEAR $L$oop_outer8x
+
+	jmp	NEAR $L$done8x
+
+$L$tail8x:
+	cmp	rdx,448
+	jae	NEAR $L$448_or_more8x
+	cmp	rdx,384
+	jae	NEAR $L$384_or_more8x
+	cmp	rdx,320
+	jae	NEAR $L$320_or_more8x
+	cmp	rdx,256
+	jae	NEAR $L$256_or_more8x
+	cmp	rdx,192
+	jae	NEAR $L$192_or_more8x
+	cmp	rdx,128
+	jae	NEAR $L$128_or_more8x
+	cmp	rdx,64
+	jae	NEAR $L$64_or_more8x
+
+	xor	r10,r10
+	vmovdqa	YMMWORD[rsp],ymm6
+	vmovdqa	YMMWORD[32+rsp],ymm8
+	jmp	NEAR $L$oop_tail8x
+
+ALIGN	32
+$L$64_or_more8x:
+	vpxor	ymm6,ymm6,YMMWORD[rsi]
+	vpxor	ymm8,ymm8,YMMWORD[32+rsi]
+	vmovdqu	YMMWORD[rdi],ymm6
+	vmovdqu	YMMWORD[32+rdi],ymm8
+	je	NEAR $L$done8x
+
+	lea	rsi,[64+rsi]
+	xor	r10,r10
+	vmovdqa	YMMWORD[rsp],ymm1
+	lea	rdi,[64+rdi]
+	sub	rdx,64
+	vmovdqa	YMMWORD[32+rsp],ymm5
+	jmp	NEAR $L$oop_tail8x
+
+ALIGN	32
+$L$128_or_more8x:
+	vpxor	ymm6,ymm6,YMMWORD[rsi]
+	vpxor	ymm8,ymm8,YMMWORD[32+rsi]
+	vpxor	ymm1,ymm1,YMMWORD[64+rsi]
+	vpxor	ymm5,ymm5,YMMWORD[96+rsi]
+	vmovdqu	YMMWORD[rdi],ymm6
+	vmovdqu	YMMWORD[32+rdi],ymm8
+	vmovdqu	YMMWORD[64+rdi],ymm1
+	vmovdqu	YMMWORD[96+rdi],ymm5
+	je	NEAR $L$done8x
+
+	lea	rsi,[128+rsi]
+	xor	r10,r10
+	vmovdqa	YMMWORD[rsp],ymm12
+	lea	rdi,[128+rdi]
+	sub	rdx,128
+	vmovdqa	YMMWORD[32+rsp],ymm13
+	jmp	NEAR $L$oop_tail8x
+
+ALIGN	32
+$L$192_or_more8x:
+	vpxor	ymm6,ymm6,YMMWORD[rsi]
+	vpxor	ymm8,ymm8,YMMWORD[32+rsi]
+	vpxor	ymm1,ymm1,YMMWORD[64+rsi]
+	vpxor	ymm5,ymm5,YMMWORD[96+rsi]
+	vpxor	ymm12,ymm12,YMMWORD[128+rsi]
+	vpxor	ymm13,ymm13,YMMWORD[160+rsi]
+	vmovdqu	YMMWORD[rdi],ymm6
+	vmovdqu	YMMWORD[32+rdi],ymm8
+	vmovdqu	YMMWORD[64+rdi],ymm1
+	vmovdqu	YMMWORD[96+rdi],ymm5
+	vmovdqu	YMMWORD[128+rdi],ymm12
+	vmovdqu	YMMWORD[160+rdi],ymm13
+	je	NEAR $L$done8x
+
+	lea	rsi,[192+rsi]
+	xor	r10,r10
+	vmovdqa	YMMWORD[rsp],ymm10
+	lea	rdi,[192+rdi]
+	sub	rdx,192
+	vmovdqa	YMMWORD[32+rsp],ymm15
+	jmp	NEAR $L$oop_tail8x
+
+ALIGN	32
+$L$256_or_more8x:
+	vpxor	ymm6,ymm6,YMMWORD[rsi]
+	vpxor	ymm8,ymm8,YMMWORD[32+rsi]
+	vpxor	ymm1,ymm1,YMMWORD[64+rsi]
+	vpxor	ymm5,ymm5,YMMWORD[96+rsi]
+	vpxor	ymm12,ymm12,YMMWORD[128+rsi]
+	vpxor	ymm13,ymm13,YMMWORD[160+rsi]
+	vpxor	ymm10,ymm10,YMMWORD[192+rsi]
+	vpxor	ymm15,ymm15,YMMWORD[224+rsi]
+	vmovdqu	YMMWORD[rdi],ymm6
+	vmovdqu	YMMWORD[32+rdi],ymm8
+	vmovdqu	YMMWORD[64+rdi],ymm1
+	vmovdqu	YMMWORD[96+rdi],ymm5
+	vmovdqu	YMMWORD[128+rdi],ymm12
+	vmovdqu	YMMWORD[160+rdi],ymm13
+	vmovdqu	YMMWORD[192+rdi],ymm10
+	vmovdqu	YMMWORD[224+rdi],ymm15
+	je	NEAR $L$done8x
+
+	lea	rsi,[256+rsi]
+	xor	r10,r10
+	vmovdqa	YMMWORD[rsp],ymm14
+	lea	rdi,[256+rdi]
+	sub	rdx,256
+	vmovdqa	YMMWORD[32+rsp],ymm2
+	jmp	NEAR $L$oop_tail8x
+
+ALIGN	32
+$L$320_or_more8x:
+	vpxor	ymm6,ymm6,YMMWORD[rsi]
+	vpxor	ymm8,ymm8,YMMWORD[32+rsi]
+	vpxor	ymm1,ymm1,YMMWORD[64+rsi]
+	vpxor	ymm5,ymm5,YMMWORD[96+rsi]
+	vpxor	ymm12,ymm12,YMMWORD[128+rsi]
+	vpxor	ymm13,ymm13,YMMWORD[160+rsi]
+	vpxor	ymm10,ymm10,YMMWORD[192+rsi]
+	vpxor	ymm15,ymm15,YMMWORD[224+rsi]
+	vpxor	ymm14,ymm14,YMMWORD[256+rsi]
+	vpxor	ymm2,ymm2,YMMWORD[288+rsi]
+	vmovdqu	YMMWORD[rdi],ymm6
+	vmovdqu	YMMWORD[32+rdi],ymm8
+	vmovdqu	YMMWORD[64+rdi],ymm1
+	vmovdqu	YMMWORD[96+rdi],ymm5
+	vmovdqu	YMMWORD[128+rdi],ymm12
+	vmovdqu	YMMWORD[160+rdi],ymm13
+	vmovdqu	YMMWORD[192+rdi],ymm10
+	vmovdqu	YMMWORD[224+rdi],ymm15
+	vmovdqu	YMMWORD[256+rdi],ymm14
+	vmovdqu	YMMWORD[288+rdi],ymm2
+	je	NEAR $L$done8x
+
+	lea	rsi,[320+rsi]
+	xor	r10,r10
+	vmovdqa	YMMWORD[rsp],ymm3
+	lea	rdi,[320+rdi]
+	sub	rdx,320
+	vmovdqa	YMMWORD[32+rsp],ymm7
+	jmp	NEAR $L$oop_tail8x
+
+ALIGN	32
+$L$384_or_more8x:
+	vpxor	ymm6,ymm6,YMMWORD[rsi]
+	vpxor	ymm8,ymm8,YMMWORD[32+rsi]
+	vpxor	ymm1,ymm1,YMMWORD[64+rsi]
+	vpxor	ymm5,ymm5,YMMWORD[96+rsi]
+	vpxor	ymm12,ymm12,YMMWORD[128+rsi]
+	vpxor	ymm13,ymm13,YMMWORD[160+rsi]
+	vpxor	ymm10,ymm10,YMMWORD[192+rsi]
+	vpxor	ymm15,ymm15,YMMWORD[224+rsi]
+	vpxor	ymm14,ymm14,YMMWORD[256+rsi]
+	vpxor	ymm2,ymm2,YMMWORD[288+rsi]
+	vpxor	ymm3,ymm3,YMMWORD[320+rsi]
+	vpxor	ymm7,ymm7,YMMWORD[352+rsi]
+	vmovdqu	YMMWORD[rdi],ymm6
+	vmovdqu	YMMWORD[32+rdi],ymm8
+	vmovdqu	YMMWORD[64+rdi],ymm1
+	vmovdqu	YMMWORD[96+rdi],ymm5
+	vmovdqu	YMMWORD[128+rdi],ymm12
+	vmovdqu	YMMWORD[160+rdi],ymm13
+	vmovdqu	YMMWORD[192+rdi],ymm10
+	vmovdqu	YMMWORD[224+rdi],ymm15
+	vmovdqu	YMMWORD[256+rdi],ymm14
+	vmovdqu	YMMWORD[288+rdi],ymm2
+	vmovdqu	YMMWORD[320+rdi],ymm3
+	vmovdqu	YMMWORD[352+rdi],ymm7
+	je	NEAR $L$done8x
+
+	lea	rsi,[384+rsi]
+	xor	r10,r10
+	vmovdqa	YMMWORD[rsp],ymm11
+	lea	rdi,[384+rdi]
+	sub	rdx,384
+	vmovdqa	YMMWORD[32+rsp],ymm9
+	jmp	NEAR $L$oop_tail8x
+
+ALIGN	32
+$L$448_or_more8x:
+	vpxor	ymm6,ymm6,YMMWORD[rsi]
+	vpxor	ymm8,ymm8,YMMWORD[32+rsi]
+	vpxor	ymm1,ymm1,YMMWORD[64+rsi]
+	vpxor	ymm5,ymm5,YMMWORD[96+rsi]
+	vpxor	ymm12,ymm12,YMMWORD[128+rsi]
+	vpxor	ymm13,ymm13,YMMWORD[160+rsi]
+	vpxor	ymm10,ymm10,YMMWORD[192+rsi]
+	vpxor	ymm15,ymm15,YMMWORD[224+rsi]
+	vpxor	ymm14,ymm14,YMMWORD[256+rsi]
+	vpxor	ymm2,ymm2,YMMWORD[288+rsi]
+	vpxor	ymm3,ymm3,YMMWORD[320+rsi]
+	vpxor	ymm7,ymm7,YMMWORD[352+rsi]
+	vpxor	ymm11,ymm11,YMMWORD[384+rsi]
+	vpxor	ymm9,ymm9,YMMWORD[416+rsi]
+	vmovdqu	YMMWORD[rdi],ymm6
+	vmovdqu	YMMWORD[32+rdi],ymm8
+	vmovdqu	YMMWORD[64+rdi],ymm1
+	vmovdqu	YMMWORD[96+rdi],ymm5
+	vmovdqu	YMMWORD[128+rdi],ymm12
+	vmovdqu	YMMWORD[160+rdi],ymm13
+	vmovdqu	YMMWORD[192+rdi],ymm10
+	vmovdqu	YMMWORD[224+rdi],ymm15
+	vmovdqu	YMMWORD[256+rdi],ymm14
+	vmovdqu	YMMWORD[288+rdi],ymm2
+	vmovdqu	YMMWORD[320+rdi],ymm3
+	vmovdqu	YMMWORD[352+rdi],ymm7
+	vmovdqu	YMMWORD[384+rdi],ymm11
+	vmovdqu	YMMWORD[416+rdi],ymm9
+	je	NEAR $L$done8x
+
+	lea	rsi,[448+rsi]
+	xor	r10,r10
+	vmovdqa	YMMWORD[rsp],ymm0
+	lea	rdi,[448+rdi]
+	sub	rdx,448
+	vmovdqa	YMMWORD[32+rsp],ymm4
+
+$L$oop_tail8x:
+	movzx	eax,BYTE[r10*1+rsi]
+	movzx	ecx,BYTE[r10*1+rsp]
+	lea	r10,[1+r10]
+	xor	eax,ecx
+	mov	BYTE[((-1))+r10*1+rdi],al
+	dec	rdx
+	jnz	NEAR $L$oop_tail8x
+
+$L$done8x:
+	vzeroall
+	movaps	xmm6,XMMWORD[((-168))+r9]
+	movaps	xmm7,XMMWORD[((-152))+r9]
+	movaps	xmm8,XMMWORD[((-136))+r9]
+	movaps	xmm9,XMMWORD[((-120))+r9]
+	movaps	xmm10,XMMWORD[((-104))+r9]
+	movaps	xmm11,XMMWORD[((-88))+r9]
+	movaps	xmm12,XMMWORD[((-72))+r9]
+	movaps	xmm13,XMMWORD[((-56))+r9]
+	movaps	xmm14,XMMWORD[((-40))+r9]
+	movaps	xmm15,XMMWORD[((-24))+r9]
+	lea	rsp,[r9]
+
+$L$8x_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_ChaCha20_8x:
+EXTERN	__imp_RtlVirtualUnwind
+
+ALIGN	16
+se_handler:
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD[120+r8]
+	mov	rbx,QWORD[248+r8]
+
+	mov	rsi,QWORD[8+r9]
+	mov	r11,QWORD[56+r9]
+
+	lea	r10,[$L$ctr32_body]
+	cmp	rbx,r10
+	jb	NEAR $L$common_seh_tail
+
+	mov	rax,QWORD[152+r8]
+
+	lea	r10,[$L$no_data]
+	cmp	rbx,r10
+	jae	NEAR $L$common_seh_tail
+
+	lea	rax,[((64+24+48))+rax]
+
+	mov	rbx,QWORD[((-8))+rax]
+	mov	rbp,QWORD[((-16))+rax]
+	mov	r12,QWORD[((-24))+rax]
+	mov	r13,QWORD[((-32))+rax]
+	mov	r14,QWORD[((-40))+rax]
+	mov	r15,QWORD[((-48))+rax]
+	mov	QWORD[144+r8],rbx
+	mov	QWORD[160+r8],rbp
+	mov	QWORD[216+r8],r12
+	mov	QWORD[224+r8],r13
+	mov	QWORD[232+r8],r14
+	mov	QWORD[240+r8],r15
+
+$L$common_seh_tail:
+	mov	rdi,QWORD[8+rax]
+	mov	rsi,QWORD[16+rax]
+	mov	QWORD[152+r8],rax
+	mov	QWORD[168+r8],rsi
+	mov	QWORD[176+r8],rdi
+
+	mov	rdi,QWORD[40+r9]
+	mov	rsi,r8
+	mov	ecx,154
+	DD	0xa548f3fc
+
+	mov	rsi,r9
+	xor	rcx,rcx
+	mov	rdx,QWORD[8+rsi]
+	mov	r8,QWORD[rsi]
+	mov	r9,QWORD[16+rsi]
+	mov	r10,QWORD[40+rsi]
+	lea	r11,[56+rsi]
+	lea	r12,[24+rsi]
+	mov	QWORD[32+rsp],r10
+	mov	QWORD[40+rsp],r11
+	mov	QWORD[48+rsp],r12
+	mov	QWORD[56+rsp],rcx
+	call	QWORD[__imp_RtlVirtualUnwind]
+
+	mov	eax,1
+	add	rsp,64
+	popfq
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rbp
+	pop	rbx
+	pop	rdi
+	pop	rsi
+	DB	0F3h,0C3h		;repret
+
+
+
+ALIGN	16
+ssse3_handler:
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD[120+r8]
+	mov	rbx,QWORD[248+r8]
+
+	mov	rsi,QWORD[8+r9]
+	mov	r11,QWORD[56+r9]
+
+	mov	r10d,DWORD[r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jb	NEAR $L$common_seh_tail
+
+	mov	rax,QWORD[192+r8]
+
+	mov	r10d,DWORD[4+r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jae	NEAR $L$common_seh_tail
+
+	lea	rsi,[((-40))+rax]
+	lea	rdi,[512+r8]
+	mov	ecx,4
+	DD	0xa548f3fc
+
+	jmp	NEAR $L$common_seh_tail
+
+
+
+ALIGN	16
+full_handler:
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD[120+r8]
+	mov	rbx,QWORD[248+r8]
+
+	mov	rsi,QWORD[8+r9]
+	mov	r11,QWORD[56+r9]
+
+	mov	r10d,DWORD[r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jb	NEAR $L$common_seh_tail
+
+	mov	rax,QWORD[192+r8]
+
+	mov	r10d,DWORD[4+r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jae	NEAR $L$common_seh_tail
+
+	lea	rsi,[((-168))+rax]
+	lea	rdi,[512+r8]
+	mov	ecx,20
+	DD	0xa548f3fc
+
+	jmp	NEAR $L$common_seh_tail
+
+
+section	.pdata rdata align=4
+ALIGN	4
+	DD	$L$SEH_begin_ChaCha20_ctr32 wrt ..imagebase
+	DD	$L$SEH_end_ChaCha20_ctr32 wrt ..imagebase
+	DD	$L$SEH_info_ChaCha20_ctr32 wrt ..imagebase
+
+	DD	$L$SEH_begin_ChaCha20_ssse3 wrt ..imagebase
+	DD	$L$SEH_end_ChaCha20_ssse3 wrt ..imagebase
+	DD	$L$SEH_info_ChaCha20_ssse3 wrt ..imagebase
+
+	DD	$L$SEH_begin_ChaCha20_4x wrt ..imagebase
+	DD	$L$SEH_end_ChaCha20_4x wrt ..imagebase
+	DD	$L$SEH_info_ChaCha20_4x wrt ..imagebase
+	DD	$L$SEH_begin_ChaCha20_8x wrt ..imagebase
+	DD	$L$SEH_end_ChaCha20_8x wrt ..imagebase
+	DD	$L$SEH_info_ChaCha20_8x wrt ..imagebase
+section	.xdata rdata align=8
+ALIGN	8
+$L$SEH_info_ChaCha20_ctr32:
+DB	9,0,0,0
+	DD	se_handler wrt ..imagebase
+
+$L$SEH_info_ChaCha20_ssse3:
+DB	9,0,0,0
+	DD	ssse3_handler wrt ..imagebase
+	DD	$L$ssse3_body wrt ..imagebase,$L$ssse3_epilogue wrt ..imagebase
+
+$L$SEH_info_ChaCha20_4x:
+DB	9,0,0,0
+	DD	full_handler wrt ..imagebase
+	DD	$L$4x_body wrt ..imagebase,$L$4x_epilogue wrt ..imagebase
+$L$SEH_info_ChaCha20_8x:
+DB	9,0,0,0
+	DD	full_handler wrt ..imagebase
+	DD	$L$8x_body wrt ..imagebase,$L$8x_epilogue wrt ..imagebase
diff --git a/deps/boringssl/win-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.asm b/deps/boringssl/win-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.asm
new file mode 100644
index 0000000..e711826
--- /dev/null
+++ b/deps/boringssl/win-x86_64/crypto/cipher_extra/aes128gcmsiv-x86_64.asm
@@ -0,0 +1,3277 @@
+; This file is generated from a similarly-named Perl script in the BoringSSL
+; source tree. Do not edit by hand.
+
+default	rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+
+%ifdef BORINGSSL_PREFIX
+%include "boringssl_prefix_symbols_nasm.inc"
+%endif
+section	.data data align=8
+
+
+ALIGN	16
+one:
+	DQ	1,0
+two:
+	DQ	2,0
+three:
+	DQ	3,0
+four:
+	DQ	4,0
+five:
+	DQ	5,0
+six:
+	DQ	6,0
+seven:
+	DQ	7,0
+eight:
+	DQ	8,0
+
+OR_MASK:
+	DD	0x00000000,0x00000000,0x00000000,0x80000000
+poly:
+	DQ	0x1,0xc200000000000000
+mask:
+	DD	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
+con1:
+	DD	1,1,1,1
+con2:
+	DD	0x1b,0x1b,0x1b,0x1b
+con3:
+DB	-1,-1,-1,-1,-1,-1,-1,-1,4,5,6,7,4,5,6,7
+and_mask:
+	DD	0,0xffffffff,0xffffffff,0xffffffff
+section	.text code align=64
+
+
+ALIGN	16
+GFMUL:
+
+	vpclmulqdq	xmm2,xmm0,xmm1,0x00
+	vpclmulqdq	xmm5,xmm0,xmm1,0x11
+	vpclmulqdq	xmm3,xmm0,xmm1,0x10
+	vpclmulqdq	xmm4,xmm0,xmm1,0x01
+	vpxor	xmm3,xmm3,xmm4
+	vpslldq	xmm4,xmm3,8
+	vpsrldq	xmm3,xmm3,8
+	vpxor	xmm2,xmm2,xmm4
+	vpxor	xmm5,xmm5,xmm3
+
+	vpclmulqdq	xmm3,xmm2,XMMWORD[poly],0x10
+	vpshufd	xmm4,xmm2,78
+	vpxor	xmm2,xmm3,xmm4
+
+	vpclmulqdq	xmm3,xmm2,XMMWORD[poly],0x10
+	vpshufd	xmm4,xmm2,78
+	vpxor	xmm2,xmm3,xmm4
+
+	vpxor	xmm0,xmm2,xmm5
+	DB	0F3h,0C3h		;repret
+
+
+global	aesgcmsiv_htable_init
+
+ALIGN	16
+aesgcmsiv_htable_init:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aesgcmsiv_htable_init:
+	mov	rdi,rcx
+	mov	rsi,rdx
+
+
+
+	vmovdqa	xmm0,XMMWORD[rsi]
+	vmovdqa	xmm1,xmm0
+	vmovdqa	XMMWORD[rdi],xmm0
+	call	GFMUL
+	vmovdqa	XMMWORD[16+rdi],xmm0
+	call	GFMUL
+	vmovdqa	XMMWORD[32+rdi],xmm0
+	call	GFMUL
+	vmovdqa	XMMWORD[48+rdi],xmm0
+	call	GFMUL
+	vmovdqa	XMMWORD[64+rdi],xmm0
+	call	GFMUL
+	vmovdqa	XMMWORD[80+rdi],xmm0
+	call	GFMUL
+	vmovdqa	XMMWORD[96+rdi],xmm0
+	call	GFMUL
+	vmovdqa	XMMWORD[112+rdi],xmm0
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_aesgcmsiv_htable_init:
+global	aesgcmsiv_htable6_init
+
+ALIGN	16
+aesgcmsiv_htable6_init:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aesgcmsiv_htable6_init:
+	mov	rdi,rcx
+	mov	rsi,rdx
+
+
+
+	vmovdqa	xmm0,XMMWORD[rsi]
+	vmovdqa	xmm1,xmm0
+	vmovdqa	XMMWORD[rdi],xmm0
+	call	GFMUL
+	vmovdqa	XMMWORD[16+rdi],xmm0
+	call	GFMUL
+	vmovdqa	XMMWORD[32+rdi],xmm0
+	call	GFMUL
+	vmovdqa	XMMWORD[48+rdi],xmm0
+	call	GFMUL
+	vmovdqa	XMMWORD[64+rdi],xmm0
+	call	GFMUL
+	vmovdqa	XMMWORD[80+rdi],xmm0
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_aesgcmsiv_htable6_init:
+global	aesgcmsiv_htable_polyval
+
+ALIGN	16
+aesgcmsiv_htable_polyval:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aesgcmsiv_htable_polyval:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+
+
+
+	test	rdx,rdx
+	jnz	NEAR $L$htable_polyval_start
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$htable_polyval_start:
+	vzeroall
+
+
+
+	mov	r11,rdx
+	and	r11,127
+
+	jz	NEAR $L$htable_polyval_no_prefix
+
+	vpxor	xmm9,xmm9,xmm9
+	vmovdqa	xmm1,XMMWORD[rcx]
+	sub	rdx,r11
+
+	sub	r11,16
+
+
+	vmovdqu	xmm0,XMMWORD[rsi]
+	vpxor	xmm0,xmm0,xmm1
+
+	vpclmulqdq	xmm5,xmm0,XMMWORD[r11*1+rdi],0x01
+	vpclmulqdq	xmm3,xmm0,XMMWORD[r11*1+rdi],0x00
+	vpclmulqdq	xmm4,xmm0,XMMWORD[r11*1+rdi],0x11
+	vpclmulqdq	xmm6,xmm0,XMMWORD[r11*1+rdi],0x10
+	vpxor	xmm5,xmm5,xmm6
+
+	lea	rsi,[16+rsi]
+	test	r11,r11
+	jnz	NEAR $L$htable_polyval_prefix_loop
+	jmp	NEAR $L$htable_polyval_prefix_complete
+
+
+ALIGN	64
+$L$htable_polyval_prefix_loop:
+	sub	r11,16
+
+	vmovdqu	xmm0,XMMWORD[rsi]
+
+	vpclmulqdq	xmm6,xmm0,XMMWORD[r11*1+rdi],0x00
+	vpxor	xmm3,xmm3,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[r11*1+rdi],0x11
+	vpxor	xmm4,xmm4,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[r11*1+rdi],0x01
+	vpxor	xmm5,xmm5,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[r11*1+rdi],0x10
+	vpxor	xmm5,xmm5,xmm6
+
+	test	r11,r11
+
+	lea	rsi,[16+rsi]
+
+	jnz	NEAR $L$htable_polyval_prefix_loop
+
+$L$htable_polyval_prefix_complete:
+	vpsrldq	xmm6,xmm5,8
+	vpslldq	xmm5,xmm5,8
+
+	vpxor	xmm9,xmm4,xmm6
+	vpxor	xmm1,xmm3,xmm5
+
+	jmp	NEAR $L$htable_polyval_main_loop
+
+$L$htable_polyval_no_prefix:
+
+
+
+
+	vpxor	xmm1,xmm1,xmm1
+	vmovdqa	xmm9,XMMWORD[rcx]
+
+ALIGN	64
+$L$htable_polyval_main_loop:
+	sub	rdx,0x80
+	jb	NEAR $L$htable_polyval_out
+
+	vmovdqu	xmm0,XMMWORD[112+rsi]
+
+	vpclmulqdq	xmm5,xmm0,XMMWORD[rdi],0x01
+	vpclmulqdq	xmm3,xmm0,XMMWORD[rdi],0x00
+	vpclmulqdq	xmm4,xmm0,XMMWORD[rdi],0x11
+	vpclmulqdq	xmm6,xmm0,XMMWORD[rdi],0x10
+	vpxor	xmm5,xmm5,xmm6
+
+
+	vmovdqu	xmm0,XMMWORD[96+rsi]
+	vpclmulqdq	xmm6,xmm0,XMMWORD[16+rdi],0x01
+	vpxor	xmm5,xmm5,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[16+rdi],0x00
+	vpxor	xmm3,xmm3,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[16+rdi],0x11
+	vpxor	xmm4,xmm4,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[16+rdi],0x10
+	vpxor	xmm5,xmm5,xmm6
+
+
+
+	vmovdqu	xmm0,XMMWORD[80+rsi]
+
+	vpclmulqdq	xmm7,xmm1,XMMWORD[poly],0x10
+	vpalignr	xmm1,xmm1,xmm1,8
+
+	vpclmulqdq	xmm6,xmm0,XMMWORD[32+rdi],0x01
+	vpxor	xmm5,xmm5,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[32+rdi],0x00
+	vpxor	xmm3,xmm3,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[32+rdi],0x11
+	vpxor	xmm4,xmm4,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[32+rdi],0x10
+	vpxor	xmm5,xmm5,xmm6
+
+
+	vpxor	xmm1,xmm1,xmm7
+
+	vmovdqu	xmm0,XMMWORD[64+rsi]
+
+	vpclmulqdq	xmm6,xmm0,XMMWORD[48+rdi],0x01
+	vpxor	xmm5,xmm5,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[48+rdi],0x00
+	vpxor	xmm3,xmm3,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[48+rdi],0x11
+	vpxor	xmm4,xmm4,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[48+rdi],0x10
+	vpxor	xmm5,xmm5,xmm6
+
+
+	vmovdqu	xmm0,XMMWORD[48+rsi]
+
+	vpclmulqdq	xmm7,xmm1,XMMWORD[poly],0x10
+	vpalignr	xmm1,xmm1,xmm1,8
+
+	vpclmulqdq	xmm6,xmm0,XMMWORD[64+rdi],0x01
+	vpxor	xmm5,xmm5,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[64+rdi],0x00
+	vpxor	xmm3,xmm3,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[64+rdi],0x11
+	vpxor	xmm4,xmm4,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[64+rdi],0x10
+	vpxor	xmm5,xmm5,xmm6
+
+
+	vpxor	xmm1,xmm1,xmm7
+
+	vmovdqu	xmm0,XMMWORD[32+rsi]
+
+	vpclmulqdq	xmm6,xmm0,XMMWORD[80+rdi],0x01
+	vpxor	xmm5,xmm5,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[80+rdi],0x00
+	vpxor	xmm3,xmm3,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[80+rdi],0x11
+	vpxor	xmm4,xmm4,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[80+rdi],0x10
+	vpxor	xmm5,xmm5,xmm6
+
+
+	vpxor	xmm1,xmm1,xmm9
+
+	vmovdqu	xmm0,XMMWORD[16+rsi]
+
+	vpclmulqdq	xmm6,xmm0,XMMWORD[96+rdi],0x01
+	vpxor	xmm5,xmm5,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[96+rdi],0x00
+	vpxor	xmm3,xmm3,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[96+rdi],0x11
+	vpxor	xmm4,xmm4,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[96+rdi],0x10
+	vpxor	xmm5,xmm5,xmm6
+
+
+	vmovdqu	xmm0,XMMWORD[rsi]
+	vpxor	xmm0,xmm0,xmm1
+
+	vpclmulqdq	xmm6,xmm0,XMMWORD[112+rdi],0x01
+	vpxor	xmm5,xmm5,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[112+rdi],0x00
+	vpxor	xmm3,xmm3,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[112+rdi],0x11
+	vpxor	xmm4,xmm4,xmm6
+	vpclmulqdq	xmm6,xmm0,XMMWORD[112+rdi],0x10
+	vpxor	xmm5,xmm5,xmm6
+
+
+	vpsrldq	xmm6,xmm5,8
+	vpslldq	xmm5,xmm5,8
+
+	vpxor	xmm9,xmm4,xmm6
+	vpxor	xmm1,xmm3,xmm5
+
+	lea	rsi,[128+rsi]
+	jmp	NEAR $L$htable_polyval_main_loop
+
+
+
+$L$htable_polyval_out:
+	vpclmulqdq	xmm6,xmm1,XMMWORD[poly],0x10
+	vpalignr	xmm1,xmm1,xmm1,8
+	vpxor	xmm1,xmm1,xmm6
+
+	vpclmulqdq	xmm6,xmm1,XMMWORD[poly],0x10
+	vpalignr	xmm1,xmm1,xmm1,8
+	vpxor	xmm1,xmm1,xmm6
+	vpxor	xmm1,xmm1,xmm9
+
+	vmovdqu	XMMWORD[rcx],xmm1
+	vzeroupper
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_aesgcmsiv_htable_polyval:
+global	aesgcmsiv_polyval_horner
+
+ALIGN	16
+aesgcmsiv_polyval_horner:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aesgcmsiv_polyval_horner:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+
+
+
+	test	rcx,rcx
+	jnz	NEAR $L$polyval_horner_start
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$polyval_horner_start:
+
+
+
+	xor	r10,r10
+	shl	rcx,4
+
+	vmovdqa	xmm1,XMMWORD[rsi]
+	vmovdqa	xmm0,XMMWORD[rdi]
+
+$L$polyval_horner_loop:
+	vpxor	xmm0,xmm0,XMMWORD[r10*1+rdx]
+	call	GFMUL
+
+	add	r10,16
+	cmp	rcx,r10
+	jne	NEAR $L$polyval_horner_loop
+
+
+	vmovdqa	XMMWORD[rdi],xmm0
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_aesgcmsiv_polyval_horner:
+global	aes128gcmsiv_aes_ks
+
+ALIGN	16
+aes128gcmsiv_aes_ks:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aes128gcmsiv_aes_ks:
+	mov	rdi,rcx
+	mov	rsi,rdx
+
+
+
+	vmovdqu	xmm1,XMMWORD[rdi]
+	vmovdqa	XMMWORD[rsi],xmm1
+
+	vmovdqa	xmm0,XMMWORD[con1]
+	vmovdqa	xmm15,XMMWORD[mask]
+
+	mov	rax,8
+
+$L$ks128_loop:
+	add	rsi,16
+	sub	rax,1
+	vpshufb	xmm2,xmm1,xmm15
+	vaesenclast	xmm2,xmm2,xmm0
+	vpslld	xmm0,xmm0,1
+	vpslldq	xmm3,xmm1,4
+	vpxor	xmm1,xmm1,xmm3
+	vpslldq	xmm3,xmm3,4
+	vpxor	xmm1,xmm1,xmm3
+	vpslldq	xmm3,xmm3,4
+	vpxor	xmm1,xmm1,xmm3
+	vpxor	xmm1,xmm1,xmm2
+	vmovdqa	XMMWORD[rsi],xmm1
+	jne	NEAR $L$ks128_loop
+
+	vmovdqa	xmm0,XMMWORD[con2]
+	vpshufb	xmm2,xmm1,xmm15
+	vaesenclast	xmm2,xmm2,xmm0
+	vpslld	xmm0,xmm0,1
+	vpslldq	xmm3,xmm1,4
+	vpxor	xmm1,xmm1,xmm3
+	vpslldq	xmm3,xmm3,4
+	vpxor	xmm1,xmm1,xmm3
+	vpslldq	xmm3,xmm3,4
+	vpxor	xmm1,xmm1,xmm3
+	vpxor	xmm1,xmm1,xmm2
+	vmovdqa	XMMWORD[16+rsi],xmm1
+
+	vpshufb	xmm2,xmm1,xmm15
+	vaesenclast	xmm2,xmm2,xmm0
+	vpslldq	xmm3,xmm1,4
+	vpxor	xmm1,xmm1,xmm3
+	vpslldq	xmm3,xmm3,4
+	vpxor	xmm1,xmm1,xmm3
+	vpslldq	xmm3,xmm3,4
+	vpxor	xmm1,xmm1,xmm3
+	vpxor	xmm1,xmm1,xmm2
+	vmovdqa	XMMWORD[32+rsi],xmm1
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_aes128gcmsiv_aes_ks:
+global	aes256gcmsiv_aes_ks
+
+ALIGN	16
+aes256gcmsiv_aes_ks:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aes256gcmsiv_aes_ks:
+	mov	rdi,rcx
+	mov	rsi,rdx
+
+
+
+	vmovdqu	xmm1,XMMWORD[rdi]
+	vmovdqu	xmm3,XMMWORD[16+rdi]
+	vmovdqa	XMMWORD[rsi],xmm1
+	vmovdqa	XMMWORD[16+rsi],xmm3
+	vmovdqa	xmm0,XMMWORD[con1]
+	vmovdqa	xmm15,XMMWORD[mask]
+	vpxor	xmm14,xmm14,xmm14
+	mov	rax,6
+
+$L$ks256_loop:
+	add	rsi,32
+	sub	rax,1
+	vpshufb	xmm2,xmm3,xmm15
+	vaesenclast	xmm2,xmm2,xmm0
+	vpslld	xmm0,xmm0,1
+	vpsllq	xmm4,xmm1,32
+	vpxor	xmm1,xmm1,xmm4
+	vpshufb	xmm4,xmm1,XMMWORD[con3]
+	vpxor	xmm1,xmm1,xmm4
+	vpxor	xmm1,xmm1,xmm2
+	vmovdqa	XMMWORD[rsi],xmm1
+	vpshufd	xmm2,xmm1,0xff
+	vaesenclast	xmm2,xmm2,xmm14
+	vpsllq	xmm4,xmm3,32
+	vpxor	xmm3,xmm3,xmm4
+	vpshufb	xmm4,xmm3,XMMWORD[con3]
+	vpxor	xmm3,xmm3,xmm4
+	vpxor	xmm3,xmm3,xmm2
+	vmovdqa	XMMWORD[16+rsi],xmm3
+	jne	NEAR $L$ks256_loop
+
+	vpshufb	xmm2,xmm3,xmm15
+	vaesenclast	xmm2,xmm2,xmm0
+	vpsllq	xmm4,xmm1,32
+	vpxor	xmm1,xmm1,xmm4
+	vpshufb	xmm4,xmm1,XMMWORD[con3]
+	vpxor	xmm1,xmm1,xmm4
+	vpxor	xmm1,xmm1,xmm2
+	vmovdqa	XMMWORD[32+rsi],xmm1
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+global	aes128gcmsiv_aes_ks_enc_x1
+
+ALIGN	16
+aes128gcmsiv_aes_ks_enc_x1:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aes128gcmsiv_aes_ks_enc_x1:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+
+
+
+	vmovdqa	xmm1,XMMWORD[rcx]
+	vmovdqa	xmm4,XMMWORD[rdi]
+
+	vmovdqa	XMMWORD[rdx],xmm1
+	vpxor	xmm4,xmm4,xmm1
+
+	vmovdqa	xmm0,XMMWORD[con1]
+	vmovdqa	xmm15,XMMWORD[mask]
+
+	vpshufb	xmm2,xmm1,xmm15
+	vaesenclast	xmm2,xmm2,xmm0
+	vpslld	xmm0,xmm0,1
+	vpsllq	xmm3,xmm1,32
+	vpxor	xmm1,xmm1,xmm3
+	vpshufb	xmm3,xmm1,XMMWORD[con3]
+	vpxor	xmm1,xmm1,xmm3
+	vpxor	xmm1,xmm1,xmm2
+
+	vaesenc	xmm4,xmm4,xmm1
+	vmovdqa	XMMWORD[16+rdx],xmm1
+
+	vpshufb	xmm2,xmm1,xmm15
+	vaesenclast	xmm2,xmm2,xmm0
+	vpslld	xmm0,xmm0,1
+	vpsllq	xmm3,xmm1,32
+	vpxor	xmm1,xmm1,xmm3
+	vpshufb	xmm3,xmm1,XMMWORD[con3]
+	vpxor	xmm1,xmm1,xmm3
+	vpxor	xmm1,xmm1,xmm2
+
+	vaesenc	xmm4,xmm4,xmm1
+	vmovdqa	XMMWORD[32+rdx],xmm1
+
+	vpshufb	xmm2,xmm1,xmm15
+	vaesenclast	xmm2,xmm2,xmm0
+	vpslld	xmm0,xmm0,1
+	vpsllq	xmm3,xmm1,32
+	vpxor	xmm1,xmm1,xmm3
+	vpshufb	xmm3,xmm1,XMMWORD[con3]
+	vpxor	xmm1,xmm1,xmm3
+	vpxor	xmm1,xmm1,xmm2
+
+	vaesenc	xmm4,xmm4,xmm1
+	vmovdqa	XMMWORD[48+rdx],xmm1
+
+	vpshufb	xmm2,xmm1,xmm15
+	vaesenclast	xmm2,xmm2,xmm0
+	vpslld	xmm0,xmm0,1
+	vpsllq	xmm3,xmm1,32
+	vpxor	xmm1,xmm1,xmm3
+	vpshufb	xmm3,xmm1,XMMWORD[con3]
+	vpxor	xmm1,xmm1,xmm3
+	vpxor	xmm1,xmm1,xmm2
+
+	vaesenc	xmm4,xmm4,xmm1
+	vmovdqa	XMMWORD[64+rdx],xmm1
+
+	vpshufb	xmm2,xmm1,xmm15
+	vaesenclast	xmm2,xmm2,xmm0
+	vpslld	xmm0,xmm0,1
+	vpsllq	xmm3,xmm1,32
+	vpxor	xmm1,xmm1,xmm3
+	vpshufb	xmm3,xmm1,XMMWORD[con3]
+	vpxor	xmm1,xmm1,xmm3
+	vpxor	xmm1,xmm1,xmm2
+
+	vaesenc	xmm4,xmm4,xmm1
+	vmovdqa	XMMWORD[80+rdx],xmm1
+
+	vpshufb	xmm2,xmm1,xmm15
+	vaesenclast	xmm2,xmm2,xmm0
+	vpslld	xmm0,xmm0,1
+	vpsllq	xmm3,xmm1,32
+	vpxor	xmm1,xmm1,xmm3
+	vpshufb	xmm3,xmm1,XMMWORD[con3]
+	vpxor	xmm1,xmm1,xmm3
+	vpxor	xmm1,xmm1,xmm2
+
+	vaesenc	xmm4,xmm4,xmm1
+	vmovdqa	XMMWORD[96+rdx],xmm1
+
+	vpshufb	xmm2,xmm1,xmm15
+	vaesenclast	xmm2,xmm2,xmm0
+	vpslld	xmm0,xmm0,1
+	vpsllq	xmm3,xmm1,32
+	vpxor	xmm1,xmm1,xmm3
+	vpshufb	xmm3,xmm1,XMMWORD[con3]
+	vpxor	xmm1,xmm1,xmm3
+	vpxor	xmm1,xmm1,xmm2
+
+	vaesenc	xmm4,xmm4,xmm1
+	vmovdqa	XMMWORD[112+rdx],xmm1
+
+	vpshufb	xmm2,xmm1,xmm15
+	vaesenclast	xmm2,xmm2,xmm0
+	vpslld	xmm0,xmm0,1
+	vpsllq	xmm3,xmm1,32
+	vpxor	xmm1,xmm1,xmm3
+	vpshufb	xmm3,xmm1,XMMWORD[con3]
+	vpxor	xmm1,xmm1,xmm3
+	vpxor	xmm1,xmm1,xmm2
+
+	vaesenc	xmm4,xmm4,xmm1
+	vmovdqa	XMMWORD[128+rdx],xmm1
+
+
+	vmovdqa	xmm0,XMMWORD[con2]
+
+	vpshufb	xmm2,xmm1,xmm15
+	vaesenclast	xmm2,xmm2,xmm0
+	vpslld	xmm0,xmm0,1
+	vpsllq	xmm3,xmm1,32
+	vpxor	xmm1,xmm1,xmm3
+	vpshufb	xmm3,xmm1,XMMWORD[con3]
+	vpxor	xmm1,xmm1,xmm3
+	vpxor	xmm1,xmm1,xmm2
+
+	vaesenc	xmm4,xmm4,xmm1
+	vmovdqa	XMMWORD[144+rdx],xmm1
+
+	vpshufb	xmm2,xmm1,xmm15
+	vaesenclast	xmm2,xmm2,xmm0
+	vpsllq	xmm3,xmm1,32
+	vpxor	xmm1,xmm1,xmm3
+	vpshufb	xmm3,xmm1,XMMWORD[con3]
+	vpxor	xmm1,xmm1,xmm3
+	vpxor	xmm1,xmm1,xmm2
+
+	vaesenclast	xmm4,xmm4,xmm1
+	vmovdqa	XMMWORD[160+rdx],xmm1
+
+
+	vmovdqa	XMMWORD[rsi],xmm4
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_aes128gcmsiv_aes_ks_enc_x1:
+global	aes128gcmsiv_kdf
+
+ALIGN	16
+aes128gcmsiv_kdf:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aes128gcmsiv_kdf:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+
+
+
+
+
+	vmovdqa	xmm1,XMMWORD[rdx]
+	vmovdqa	xmm9,XMMWORD[rdi]
+	vmovdqa	xmm12,XMMWORD[and_mask]
+	vmovdqa	xmm13,XMMWORD[one]
+	vpshufd	xmm9,xmm9,0x90
+	vpand	xmm9,xmm9,xmm12
+	vpaddd	xmm10,xmm9,xmm13
+	vpaddd	xmm11,xmm10,xmm13
+	vpaddd	xmm12,xmm11,xmm13
+
+	vpxor	xmm9,xmm9,xmm1
+	vpxor	xmm10,xmm10,xmm1
+	vpxor	xmm11,xmm11,xmm1
+	vpxor	xmm12,xmm12,xmm1
+
+	vmovdqa	xmm1,XMMWORD[16+rdx]
+	vaesenc	xmm9,xmm9,xmm1
+	vaesenc	xmm10,xmm10,xmm1
+	vaesenc	xmm11,xmm11,xmm1
+	vaesenc	xmm12,xmm12,xmm1
+
+	vmovdqa	xmm2,XMMWORD[32+rdx]
+	vaesenc	xmm9,xmm9,xmm2
+	vaesenc	xmm10,xmm10,xmm2
+	vaesenc	xmm11,xmm11,xmm2
+	vaesenc	xmm12,xmm12,xmm2
+
+	vmovdqa	xmm1,XMMWORD[48+rdx]
+	vaesenc	xmm9,xmm9,xmm1
+	vaesenc	xmm10,xmm10,xmm1
+	vaesenc	xmm11,xmm11,xmm1
+	vaesenc	xmm12,xmm12,xmm1
+
+	vmovdqa	xmm2,XMMWORD[64+rdx]
+	vaesenc	xmm9,xmm9,xmm2
+	vaesenc	xmm10,xmm10,xmm2
+	vaesenc	xmm11,xmm11,xmm2
+	vaesenc	xmm12,xmm12,xmm2
+
+	vmovdqa	xmm1,XMMWORD[80+rdx]
+	vaesenc	xmm9,xmm9,xmm1
+	vaesenc	xmm10,xmm10,xmm1
+	vaesenc	xmm11,xmm11,xmm1
+	vaesenc	xmm12,xmm12,xmm1
+
+	vmovdqa	xmm2,XMMWORD[96+rdx]
+	vaesenc	xmm9,xmm9,xmm2
+	vaesenc	xmm10,xmm10,xmm2
+	vaesenc	xmm11,xmm11,xmm2
+	vaesenc	xmm12,xmm12,xmm2
+
+	vmovdqa	xmm1,XMMWORD[112+rdx]
+	vaesenc	xmm9,xmm9,xmm1
+	vaesenc	xmm10,xmm10,xmm1
+	vaesenc	xmm11,xmm11,xmm1
+	vaesenc	xmm12,xmm12,xmm1
+
+	vmovdqa	xmm2,XMMWORD[128+rdx]
+	vaesenc	xmm9,xmm9,xmm2
+	vaesenc	xmm10,xmm10,xmm2
+	vaesenc	xmm11,xmm11,xmm2
+	vaesenc	xmm12,xmm12,xmm2
+
+	vmovdqa	xmm1,XMMWORD[144+rdx]
+	vaesenc	xmm9,xmm9,xmm1
+	vaesenc	xmm10,xmm10,xmm1
+	vaesenc	xmm11,xmm11,xmm1
+	vaesenc	xmm12,xmm12,xmm1
+
+	vmovdqa	xmm2,XMMWORD[160+rdx]
+	vaesenclast	xmm9,xmm9,xmm2
+	vaesenclast	xmm10,xmm10,xmm2
+	vaesenclast	xmm11,xmm11,xmm2
+	vaesenclast	xmm12,xmm12,xmm2
+
+
+	vmovdqa	XMMWORD[rsi],xmm9
+	vmovdqa	XMMWORD[16+rsi],xmm10
+	vmovdqa	XMMWORD[32+rsi],xmm11
+	vmovdqa	XMMWORD[48+rsi],xmm12
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_aes128gcmsiv_kdf:
+global	aes128gcmsiv_enc_msg_x4
+
+ALIGN	16
+aes128gcmsiv_enc_msg_x4:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aes128gcmsiv_enc_msg_x4:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+
+
+
+	test	r8,r8
+	jnz	NEAR $L$128_enc_msg_x4_start
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$128_enc_msg_x4_start:
+	push	r12
+
+	push	r13
+
+
+	shr	r8,4
+	mov	r10,r8
+	shl	r10,62
+	shr	r10,62
+
+
+	vmovdqa	xmm15,XMMWORD[rdx]
+	vpor	xmm15,xmm15,XMMWORD[OR_MASK]
+
+	vmovdqu	xmm4,XMMWORD[four]
+	vmovdqa	xmm0,xmm15
+	vpaddd	xmm1,xmm15,XMMWORD[one]
+	vpaddd	xmm2,xmm15,XMMWORD[two]
+	vpaddd	xmm3,xmm15,XMMWORD[three]
+
+	shr	r8,2
+	je	NEAR $L$128_enc_msg_x4_check_remainder
+
+	sub	rsi,64
+	sub	rdi,64
+
+$L$128_enc_msg_x4_loop1:
+	add	rsi,64
+	add	rdi,64
+
+	vmovdqa	xmm5,xmm0
+	vmovdqa	xmm6,xmm1
+	vmovdqa	xmm7,xmm2
+	vmovdqa	xmm8,xmm3
+
+	vpxor	xmm5,xmm5,XMMWORD[rcx]
+	vpxor	xmm6,xmm6,XMMWORD[rcx]
+	vpxor	xmm7,xmm7,XMMWORD[rcx]
+	vpxor	xmm8,xmm8,XMMWORD[rcx]
+
+	vmovdqu	xmm12,XMMWORD[16+rcx]
+	vaesenc	xmm5,xmm5,xmm12
+	vaesenc	xmm6,xmm6,xmm12
+	vaesenc	xmm7,xmm7,xmm12
+	vaesenc	xmm8,xmm8,xmm12
+
+	vpaddd	xmm0,xmm0,xmm4
+	vmovdqu	xmm12,XMMWORD[32+rcx]
+	vaesenc	xmm5,xmm5,xmm12
+	vaesenc	xmm6,xmm6,xmm12
+	vaesenc	xmm7,xmm7,xmm12
+	vaesenc	xmm8,xmm8,xmm12
+
+	vpaddd	xmm1,xmm1,xmm4
+	vmovdqu	xmm12,XMMWORD[48+rcx]
+	vaesenc	xmm5,xmm5,xmm12
+	vaesenc	xmm6,xmm6,xmm12
+	vaesenc	xmm7,xmm7,xmm12
+	vaesenc	xmm8,xmm8,xmm12
+
+	vpaddd	xmm2,xmm2,xmm4
+	vmovdqu	xmm12,XMMWORD[64+rcx]
+	vaesenc	xmm5,xmm5,xmm12
+	vaesenc	xmm6,xmm6,xmm12
+	vaesenc	xmm7,xmm7,xmm12
+	vaesenc	xmm8,xmm8,xmm12
+
+	vpaddd	xmm3,xmm3,xmm4
+
+	vmovdqu	xmm12,XMMWORD[80+rcx]
+	vaesenc	xmm5,xmm5,xmm12
+	vaesenc	xmm6,xmm6,xmm12
+	vaesenc	xmm7,xmm7,xmm12
+	vaesenc	xmm8,xmm8,xmm12
+
+	vmovdqu	xmm12,XMMWORD[96+rcx]
+	vaesenc	xmm5,xmm5,xmm12
+	vaesenc	xmm6,xmm6,xmm12
+	vaesenc	xmm7,xmm7,xmm12
+	vaesenc	xmm8,xmm8,xmm12
+
+	vmovdqu	xmm12,XMMWORD[112+rcx]
+	vaesenc	xmm5,xmm5,xmm12
+	vaesenc	xmm6,xmm6,xmm12
+	vaesenc	xmm7,xmm7,xmm12
+	vaesenc	xmm8,xmm8,xmm12
+
+	vmovdqu	xmm12,XMMWORD[128+rcx]
+	vaesenc	xmm5,xmm5,xmm12
+	vaesenc	xmm6,xmm6,xmm12
+	vaesenc	xmm7,xmm7,xmm12
+	vaesenc	xmm8,xmm8,xmm12
+
+	vmovdqu	xmm12,XMMWORD[144+rcx]
+	vaesenc	xmm5,xmm5,xmm12
+	vaesenc	xmm6,xmm6,xmm12
+	vaesenc	xmm7,xmm7,xmm12
+	vaesenc	xmm8,xmm8,xmm12
+
+	vmovdqu	xmm12,XMMWORD[160+rcx]
+	vaesenclast	xmm5,xmm5,xmm12
+	vaesenclast	xmm6,xmm6,xmm12
+	vaesenclast	xmm7,xmm7,xmm12
+	vaesenclast	xmm8,xmm8,xmm12
+
+
+
+	vpxor	xmm5,xmm5,XMMWORD[rdi]
+	vpxor	xmm6,xmm6,XMMWORD[16+rdi]
+	vpxor	xmm7,xmm7,XMMWORD[32+rdi]
+	vpxor	xmm8,xmm8,XMMWORD[48+rdi]
+
+	sub	r8,1
+
+	vmovdqu	XMMWORD[rsi],xmm5
+	vmovdqu	XMMWORD[16+rsi],xmm6
+	vmovdqu	XMMWORD[32+rsi],xmm7
+	vmovdqu	XMMWORD[48+rsi],xmm8
+
+	jne	NEAR $L$128_enc_msg_x4_loop1
+
+	add	rsi,64
+	add	rdi,64
+
+$L$128_enc_msg_x4_check_remainder:
+	cmp	r10,0
+	je	NEAR $L$128_enc_msg_x4_out
+
+$L$128_enc_msg_x4_loop2:
+
+
+	vmovdqa	xmm5,xmm0
+	vpaddd	xmm0,xmm0,XMMWORD[one]
+
+	vpxor	xmm5,xmm5,XMMWORD[rcx]
+	vaesenc	xmm5,xmm5,XMMWORD[16+rcx]
+	vaesenc	xmm5,xmm5,XMMWORD[32+rcx]
+	vaesenc	xmm5,xmm5,XMMWORD[48+rcx]
+	vaesenc	xmm5,xmm5,XMMWORD[64+rcx]
+	vaesenc	xmm5,xmm5,XMMWORD[80+rcx]
+	vaesenc	xmm5,xmm5,XMMWORD[96+rcx]
+	vaesenc	xmm5,xmm5,XMMWORD[112+rcx]
+	vaesenc	xmm5,xmm5,XMMWORD[128+rcx]
+	vaesenc	xmm5,xmm5,XMMWORD[144+rcx]
+	vaesenclast	xmm5,xmm5,XMMWORD[160+rcx]
+
+
+	vpxor	xmm5,xmm5,XMMWORD[rdi]
+	vmovdqu	XMMWORD[rsi],xmm5
+
+	add	rdi,16
+	add	rsi,16
+
+	sub	r10,1
+	jne	NEAR $L$128_enc_msg_x4_loop2
+
+$L$128_enc_msg_x4_out:
+	pop	r13
+
+	pop	r12
+
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_aes128gcmsiv_enc_msg_x4:
+global	aes128gcmsiv_enc_msg_x8
+
+ALIGN	16
+aes128gcmsiv_enc_msg_x8:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aes128gcmsiv_enc_msg_x8:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+
+
+
+	test	r8,r8
+	jnz	NEAR $L$128_enc_msg_x8_start
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$128_enc_msg_x8_start:
+	push	r12
+
+	push	r13
+
+	push	rbp
+
+	mov	rbp,rsp
+
+
+
+	sub	rsp,128
+	and	rsp,-64
+
+	shr	r8,4
+	mov	r10,r8
+	shl	r10,61
+	shr	r10,61
+
+
+	vmovdqu	xmm1,XMMWORD[rdx]
+	vpor	xmm1,xmm1,XMMWORD[OR_MASK]
+
+
+	vpaddd	xmm0,xmm1,XMMWORD[seven]
+	vmovdqu	XMMWORD[rsp],xmm0
+	vpaddd	xmm9,xmm1,XMMWORD[one]
+	vpaddd	xmm10,xmm1,XMMWORD[two]
+	vpaddd	xmm11,xmm1,XMMWORD[three]
+	vpaddd	xmm12,xmm1,XMMWORD[four]
+	vpaddd	xmm13,xmm1,XMMWORD[five]
+	vpaddd	xmm14,xmm1,XMMWORD[six]
+	vmovdqa	xmm0,xmm1
+
+	shr	r8,3
+	je	NEAR $L$128_enc_msg_x8_check_remainder
+
+	sub	rsi,128
+	sub	rdi,128
+
+$L$128_enc_msg_x8_loop1:
+	add	rsi,128
+	add	rdi,128
+
+	vmovdqa	xmm1,xmm0
+	vmovdqa	xmm2,xmm9
+	vmovdqa	xmm3,xmm10
+	vmovdqa	xmm4,xmm11
+	vmovdqa	xmm5,xmm12
+	vmovdqa	xmm6,xmm13
+	vmovdqa	xmm7,xmm14
+
+	vmovdqu	xmm8,XMMWORD[rsp]
+
+	vpxor	xmm1,xmm1,XMMWORD[rcx]
+	vpxor	xmm2,xmm2,XMMWORD[rcx]
+	vpxor	xmm3,xmm3,XMMWORD[rcx]
+	vpxor	xmm4,xmm4,XMMWORD[rcx]
+	vpxor	xmm5,xmm5,XMMWORD[rcx]
+	vpxor	xmm6,xmm6,XMMWORD[rcx]
+	vpxor	xmm7,xmm7,XMMWORD[rcx]
+	vpxor	xmm8,xmm8,XMMWORD[rcx]
+
+	vmovdqu	xmm15,XMMWORD[16+rcx]
+	vaesenc	xmm1,xmm1,xmm15
+	vaesenc	xmm2,xmm2,xmm15
+	vaesenc	xmm3,xmm3,xmm15
+	vaesenc	xmm4,xmm4,xmm15
+	vaesenc	xmm5,xmm5,xmm15
+	vaesenc	xmm6,xmm6,xmm15
+	vaesenc	xmm7,xmm7,xmm15
+	vaesenc	xmm8,xmm8,xmm15
+
+	vmovdqu	xmm14,XMMWORD[rsp]
+	vpaddd	xmm14,xmm14,XMMWORD[eight]
+	vmovdqu	XMMWORD[rsp],xmm14
+	vmovdqu	xmm15,XMMWORD[32+rcx]
+	vaesenc	xmm1,xmm1,xmm15
+	vaesenc	xmm2,xmm2,xmm15
+	vaesenc	xmm3,xmm3,xmm15
+	vaesenc	xmm4,xmm4,xmm15
+	vaesenc	xmm5,xmm5,xmm15
+	vaesenc	xmm6,xmm6,xmm15
+	vaesenc	xmm7,xmm7,xmm15
+	vaesenc	xmm8,xmm8,xmm15
+
+	vpsubd	xmm14,xmm14,XMMWORD[one]
+	vmovdqu	xmm15,XMMWORD[48+rcx]
+	vaesenc	xmm1,xmm1,xmm15
+	vaesenc	xmm2,xmm2,xmm15
+	vaesenc	xmm3,xmm3,xmm15
+	vaesenc	xmm4,xmm4,xmm15
+	vaesenc	xmm5,xmm5,xmm15
+	vaesenc	xmm6,xmm6,xmm15
+	vaesenc	xmm7,xmm7,xmm15
+	vaesenc	xmm8,xmm8,xmm15
+
+	vpaddd	xmm0,xmm0,XMMWORD[eight]
+	vmovdqu	xmm15,XMMWORD[64+rcx]
+	vaesenc	xmm1,xmm1,xmm15
+	vaesenc	xmm2,xmm2,xmm15
+	vaesenc	xmm3,xmm3,xmm15
+	vaesenc	xmm4,xmm4,xmm15
+	vaesenc	xmm5,xmm5,xmm15
+	vaesenc	xmm6,xmm6,xmm15
+	vaesenc	xmm7,xmm7,xmm15
+	vaesenc	xmm8,xmm8,xmm15
+
+	vpaddd	xmm9,xmm9,XMMWORD[eight]
+	vmovdqu	xmm15,XMMWORD[80+rcx]
+	vaesenc	xmm1,xmm1,xmm15
+	vaesenc	xmm2,xmm2,xmm15
+	vaesenc	xmm3,xmm3,xmm15
+	vaesenc	xmm4,xmm4,xmm15
+	vaesenc	xmm5,xmm5,xmm15
+	vaesenc	xmm6,xmm6,xmm15
+	vaesenc	xmm7,xmm7,xmm15
+	vaesenc	xmm8,xmm8,xmm15
+
+	vpaddd	xmm10,xmm10,XMMWORD[eight]
+	vmovdqu	xmm15,XMMWORD[96+rcx]
+	vaesenc	xmm1,xmm1,xmm15
+	vaesenc	xmm2,xmm2,xmm15
+	vaesenc	xmm3,xmm3,xmm15
+	vaesenc	xmm4,xmm4,xmm15
+	vaesenc	xmm5,xmm5,xmm15
+	vaesenc	xmm6,xmm6,xmm15
+	vaesenc	xmm7,xmm7,xmm15
+	vaesenc	xmm8,xmm8,xmm15
+
+	vpaddd	xmm11,xmm11,XMMWORD[eight]
+	vmovdqu	xmm15,XMMWORD[112+rcx]
+	vaesenc	xmm1,xmm1,xmm15
+	vaesenc	xmm2,xmm2,xmm15
+	vaesenc	xmm3,xmm3,xmm15
+	vaesenc	xmm4,xmm4,xmm15
+	vaesenc	xmm5,xmm5,xmm15
+	vaesenc	xmm6,xmm6,xmm15
+	vaesenc	xmm7,xmm7,xmm15
+	vaesenc	xmm8,xmm8,xmm15
+
+	vpaddd	xmm12,xmm12,XMMWORD[eight]
+	vmovdqu	xmm15,XMMWORD[128+rcx]
+	vaesenc	xmm1,xmm1,xmm15
+	vaesenc	xmm2,xmm2,xmm15
+	vaesenc	xmm3,xmm3,xmm15
+	vaesenc	xmm4,xmm4,xmm15
+	vaesenc	xmm5,xmm5,xmm15
+	vaesenc	xmm6,xmm6,xmm15
+	vaesenc	xmm7,xmm7,xmm15
+	vaesenc	xmm8,xmm8,xmm15
+
+	vpaddd	xmm13,xmm13,XMMWORD[eight]
+	vmovdqu	xmm15,XMMWORD[144+rcx]
+	vaesenc	xmm1,xmm1,xmm15
+	vaesenc	xmm2,xmm2,xmm15
+	vaesenc	xmm3,xmm3,xmm15
+	vaesenc	xmm4,xmm4,xmm15
+	vaesenc	xmm5,xmm5,xmm15
+	vaesenc	xmm6,xmm6,xmm15
+	vaesenc	xmm7,xmm7,xmm15
+	vaesenc	xmm8,xmm8,xmm15
+
+	vmovdqu	xmm15,XMMWORD[160+rcx]
+	vaesenclast	xmm1,xmm1,xmm15
+	vaesenclast	xmm2,xmm2,xmm15
+	vaesenclast	xmm3,xmm3,xmm15
+	vaesenclast	xmm4,xmm4,xmm15
+	vaesenclast	xmm5,xmm5,xmm15
+	vaesenclast	xmm6,xmm6,xmm15
+	vaesenclast	xmm7,xmm7,xmm15
+	vaesenclast	xmm8,xmm8,xmm15
+
+
+
+	vpxor	xmm1,xmm1,XMMWORD[rdi]
+	vpxor	xmm2,xmm2,XMMWORD[16+rdi]
+	vpxor	xmm3,xmm3,XMMWORD[32+rdi]
+	vpxor	xmm4,xmm4,XMMWORD[48+rdi]
+	vpxor	xmm5,xmm5,XMMWORD[64+rdi]
+	vpxor	xmm6,xmm6,XMMWORD[80+rdi]
+	vpxor	xmm7,xmm7,XMMWORD[96+rdi]
+	vpxor	xmm8,xmm8,XMMWORD[112+rdi]
+
+	dec	r8
+
+	vmovdqu	XMMWORD[rsi],xmm1
+	vmovdqu	XMMWORD[16+rsi],xmm2
+	vmovdqu	XMMWORD[32+rsi],xmm3
+	vmovdqu	XMMWORD[48+rsi],xmm4
+	vmovdqu	XMMWORD[64+rsi],xmm5
+	vmovdqu	XMMWORD[80+rsi],xmm6
+	vmovdqu	XMMWORD[96+rsi],xmm7
+	vmovdqu	XMMWORD[112+rsi],xmm8
+
+	jne	NEAR $L$128_enc_msg_x8_loop1
+
+	add	rsi,128
+	add	rdi,128
+
+$L$128_enc_msg_x8_check_remainder:
+	cmp	r10,0
+	je	NEAR $L$128_enc_msg_x8_out
+
+$L$128_enc_msg_x8_loop2:
+
+
+	vmovdqa	xmm1,xmm0
+	vpaddd	xmm0,xmm0,XMMWORD[one]
+
+	vpxor	xmm1,xmm1,XMMWORD[rcx]
+	vaesenc	xmm1,xmm1,XMMWORD[16+rcx]
+	vaesenc	xmm1,xmm1,XMMWORD[32+rcx]
+	vaesenc	xmm1,xmm1,XMMWORD[48+rcx]
+	vaesenc	xmm1,xmm1,XMMWORD[64+rcx]
+	vaesenc	xmm1,xmm1,XMMWORD[80+rcx]
+	vaesenc	xmm1,xmm1,XMMWORD[96+rcx]
+	vaesenc	xmm1,xmm1,XMMWORD[112+rcx]
+	vaesenc	xmm1,xmm1,XMMWORD[128+rcx]
+	vaesenc	xmm1,xmm1,XMMWORD[144+rcx]
+	vaesenclast	xmm1,xmm1,XMMWORD[160+rcx]
+
+
+	vpxor	xmm1,xmm1,XMMWORD[rdi]
+
+	vmovdqu	XMMWORD[rsi],xmm1
+
+	add	rdi,16
+	add	rsi,16
+
+	dec	r10
+	jne	NEAR $L$128_enc_msg_x8_loop2
+
+$L$128_enc_msg_x8_out:
+	mov	rsp,rbp
+
+	pop	rbp
+
+	pop	r13
+
+	pop	r12
+
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_aes128gcmsiv_enc_msg_x8:
+global	aes128gcmsiv_dec
+
+ALIGN	16
+aes128gcmsiv_dec:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aes128gcmsiv_dec:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+	mov	r9,QWORD[48+rsp]
+
+
+
+	test	r9,~15
+	jnz	NEAR $L$128_dec_start
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$128_dec_start:
+	vzeroupper
+	vmovdqa	xmm0,XMMWORD[rdx]
+	mov	rax,rdx
+
+	lea	rax,[32+rax]
+	lea	rcx,[32+rcx]
+
+
+	vmovdqu	xmm15,XMMWORD[r9*1+rdi]
+	vpor	xmm15,xmm15,XMMWORD[OR_MASK]
+	and	r9,~15
+
+
+	cmp	r9,96
+	jb	NEAR $L$128_dec_loop2
+
+
+	sub	r9,96
+	vmovdqa	xmm7,xmm15
+	vpaddd	xmm8,xmm7,XMMWORD[one]
+	vpaddd	xmm9,xmm7,XMMWORD[two]
+	vpaddd	xmm10,xmm9,XMMWORD[one]
+	vpaddd	xmm11,xmm9,XMMWORD[two]
+	vpaddd	xmm12,xmm11,XMMWORD[one]
+	vpaddd	xmm15,xmm11,XMMWORD[two]
+
+	vpxor	xmm7,xmm7,XMMWORD[r8]
+	vpxor	xmm8,xmm8,XMMWORD[r8]
+	vpxor	xmm9,xmm9,XMMWORD[r8]
+	vpxor	xmm10,xmm10,XMMWORD[r8]
+	vpxor	xmm11,xmm11,XMMWORD[r8]
+	vpxor	xmm12,xmm12,XMMWORD[r8]
+
+	vmovdqu	xmm4,XMMWORD[16+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[32+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[48+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[64+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[80+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[96+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[112+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[128+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[144+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[160+r8]
+	vaesenclast	xmm7,xmm7,xmm4
+	vaesenclast	xmm8,xmm8,xmm4
+	vaesenclast	xmm9,xmm9,xmm4
+	vaesenclast	xmm10,xmm10,xmm4
+	vaesenclast	xmm11,xmm11,xmm4
+	vaesenclast	xmm12,xmm12,xmm4
+
+
+	vpxor	xmm7,xmm7,XMMWORD[rdi]
+	vpxor	xmm8,xmm8,XMMWORD[16+rdi]
+	vpxor	xmm9,xmm9,XMMWORD[32+rdi]
+	vpxor	xmm10,xmm10,XMMWORD[48+rdi]
+	vpxor	xmm11,xmm11,XMMWORD[64+rdi]
+	vpxor	xmm12,xmm12,XMMWORD[80+rdi]
+
+	vmovdqu	XMMWORD[rsi],xmm7
+	vmovdqu	XMMWORD[16+rsi],xmm8
+	vmovdqu	XMMWORD[32+rsi],xmm9
+	vmovdqu	XMMWORD[48+rsi],xmm10
+	vmovdqu	XMMWORD[64+rsi],xmm11
+	vmovdqu	XMMWORD[80+rsi],xmm12
+
+	add	rdi,96
+	add	rsi,96
+	jmp	NEAR $L$128_dec_loop1
+
+
+ALIGN	64
+$L$128_dec_loop1:
+	cmp	r9,96
+	jb	NEAR $L$128_dec_finish_96
+	sub	r9,96
+
+	vmovdqa	xmm6,xmm12
+	vmovdqa	XMMWORD[(16-32)+rax],xmm11
+	vmovdqa	XMMWORD[(32-32)+rax],xmm10
+	vmovdqa	XMMWORD[(48-32)+rax],xmm9
+	vmovdqa	XMMWORD[(64-32)+rax],xmm8
+	vmovdqa	XMMWORD[(80-32)+rax],xmm7
+
+	vmovdqa	xmm7,xmm15
+	vpaddd	xmm8,xmm7,XMMWORD[one]
+	vpaddd	xmm9,xmm7,XMMWORD[two]
+	vpaddd	xmm10,xmm9,XMMWORD[one]
+	vpaddd	xmm11,xmm9,XMMWORD[two]
+	vpaddd	xmm12,xmm11,XMMWORD[one]
+	vpaddd	xmm15,xmm11,XMMWORD[two]
+
+	vmovdqa	xmm4,XMMWORD[r8]
+	vpxor	xmm7,xmm7,xmm4
+	vpxor	xmm8,xmm8,xmm4
+	vpxor	xmm9,xmm9,xmm4
+	vpxor	xmm10,xmm10,xmm4
+	vpxor	xmm11,xmm11,xmm4
+	vpxor	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[((0-32))+rcx]
+	vpclmulqdq	xmm2,xmm6,xmm4,0x11
+	vpclmulqdq	xmm3,xmm6,xmm4,0x00
+	vpclmulqdq	xmm1,xmm6,xmm4,0x01
+	vpclmulqdq	xmm4,xmm6,xmm4,0x10
+	vpxor	xmm1,xmm1,xmm4
+
+	vmovdqu	xmm4,XMMWORD[16+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm6,XMMWORD[((-16))+rax]
+	vmovdqu	xmm13,XMMWORD[((-16))+rcx]
+
+	vpclmulqdq	xmm4,xmm6,xmm13,0x10
+	vpxor	xmm1,xmm1,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x11
+	vpxor	xmm2,xmm2,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x00
+	vpxor	xmm3,xmm3,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x01
+	vpxor	xmm1,xmm1,xmm4
+
+
+	vmovdqu	xmm4,XMMWORD[32+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm6,XMMWORD[rax]
+	vmovdqu	xmm13,XMMWORD[rcx]
+
+	vpclmulqdq	xmm4,xmm6,xmm13,0x10
+	vpxor	xmm1,xmm1,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x11
+	vpxor	xmm2,xmm2,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x00
+	vpxor	xmm3,xmm3,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x01
+	vpxor	xmm1,xmm1,xmm4
+
+
+	vmovdqu	xmm4,XMMWORD[48+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm6,XMMWORD[16+rax]
+	vmovdqu	xmm13,XMMWORD[16+rcx]
+
+	vpclmulqdq	xmm4,xmm6,xmm13,0x10
+	vpxor	xmm1,xmm1,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x11
+	vpxor	xmm2,xmm2,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x00
+	vpxor	xmm3,xmm3,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x01
+	vpxor	xmm1,xmm1,xmm4
+
+
+	vmovdqu	xmm4,XMMWORD[64+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm6,XMMWORD[32+rax]
+	vmovdqu	xmm13,XMMWORD[32+rcx]
+
+	vpclmulqdq	xmm4,xmm6,xmm13,0x10
+	vpxor	xmm1,xmm1,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x11
+	vpxor	xmm2,xmm2,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x00
+	vpxor	xmm3,xmm3,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x01
+	vpxor	xmm1,xmm1,xmm4
+
+
+	vmovdqu	xmm4,XMMWORD[80+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[96+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[112+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+
+	vmovdqa	xmm6,XMMWORD[((80-32))+rax]
+	vpxor	xmm6,xmm6,xmm0
+	vmovdqu	xmm5,XMMWORD[((80-32))+rcx]
+
+	vpclmulqdq	xmm4,xmm6,xmm5,0x01
+	vpxor	xmm1,xmm1,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm5,0x11
+	vpxor	xmm2,xmm2,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm5,0x00
+	vpxor	xmm3,xmm3,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm5,0x10
+	vpxor	xmm1,xmm1,xmm4
+
+	vmovdqu	xmm4,XMMWORD[128+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+
+	vpsrldq	xmm4,xmm1,8
+	vpxor	xmm5,xmm2,xmm4
+	vpslldq	xmm4,xmm1,8
+	vpxor	xmm0,xmm3,xmm4
+
+	vmovdqa	xmm3,XMMWORD[poly]
+
+	vmovdqu	xmm4,XMMWORD[144+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm6,XMMWORD[160+r8]
+	vpalignr	xmm2,xmm0,xmm0,8
+	vpclmulqdq	xmm0,xmm0,xmm3,0x10
+	vpxor	xmm0,xmm2,xmm0
+
+	vpxor	xmm4,xmm6,XMMWORD[rdi]
+	vaesenclast	xmm7,xmm7,xmm4
+	vpxor	xmm4,xmm6,XMMWORD[16+rdi]
+	vaesenclast	xmm8,xmm8,xmm4
+	vpxor	xmm4,xmm6,XMMWORD[32+rdi]
+	vaesenclast	xmm9,xmm9,xmm4
+	vpxor	xmm4,xmm6,XMMWORD[48+rdi]
+	vaesenclast	xmm10,xmm10,xmm4
+	vpxor	xmm4,xmm6,XMMWORD[64+rdi]
+	vaesenclast	xmm11,xmm11,xmm4
+	vpxor	xmm4,xmm6,XMMWORD[80+rdi]
+	vaesenclast	xmm12,xmm12,xmm4
+
+	vpalignr	xmm2,xmm0,xmm0,8
+	vpclmulqdq	xmm0,xmm0,xmm3,0x10
+	vpxor	xmm0,xmm2,xmm0
+
+	vmovdqu	XMMWORD[rsi],xmm7
+	vmovdqu	XMMWORD[16+rsi],xmm8
+	vmovdqu	XMMWORD[32+rsi],xmm9
+	vmovdqu	XMMWORD[48+rsi],xmm10
+	vmovdqu	XMMWORD[64+rsi],xmm11
+	vmovdqu	XMMWORD[80+rsi],xmm12
+
+	vpxor	xmm0,xmm0,xmm5
+
+	lea	rdi,[96+rdi]
+	lea	rsi,[96+rsi]
+	jmp	NEAR $L$128_dec_loop1
+
+$L$128_dec_finish_96:
+	vmovdqa	xmm6,xmm12
+	vmovdqa	XMMWORD[(16-32)+rax],xmm11
+	vmovdqa	XMMWORD[(32-32)+rax],xmm10
+	vmovdqa	XMMWORD[(48-32)+rax],xmm9
+	vmovdqa	XMMWORD[(64-32)+rax],xmm8
+	vmovdqa	XMMWORD[(80-32)+rax],xmm7
+
+	vmovdqu	xmm4,XMMWORD[((0-32))+rcx]
+	vpclmulqdq	xmm1,xmm6,xmm4,0x10
+	vpclmulqdq	xmm2,xmm6,xmm4,0x11
+	vpclmulqdq	xmm3,xmm6,xmm4,0x00
+	vpclmulqdq	xmm4,xmm6,xmm4,0x01
+	vpxor	xmm1,xmm1,xmm4
+
+	vmovdqu	xmm6,XMMWORD[((-16))+rax]
+	vmovdqu	xmm13,XMMWORD[((-16))+rcx]
+
+	vpclmulqdq	xmm4,xmm6,xmm13,0x10
+	vpxor	xmm1,xmm1,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x11
+	vpxor	xmm2,xmm2,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x00
+	vpxor	xmm3,xmm3,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x01
+	vpxor	xmm1,xmm1,xmm4
+
+	vmovdqu	xmm6,XMMWORD[rax]
+	vmovdqu	xmm13,XMMWORD[rcx]
+
+	vpclmulqdq	xmm4,xmm6,xmm13,0x10
+	vpxor	xmm1,xmm1,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x11
+	vpxor	xmm2,xmm2,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x00
+	vpxor	xmm3,xmm3,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x01
+	vpxor	xmm1,xmm1,xmm4
+
+	vmovdqu	xmm6,XMMWORD[16+rax]
+	vmovdqu	xmm13,XMMWORD[16+rcx]
+
+	vpclmulqdq	xmm4,xmm6,xmm13,0x10
+	vpxor	xmm1,xmm1,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x11
+	vpxor	xmm2,xmm2,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x00
+	vpxor	xmm3,xmm3,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x01
+	vpxor	xmm1,xmm1,xmm4
+
+	vmovdqu	xmm6,XMMWORD[32+rax]
+	vmovdqu	xmm13,XMMWORD[32+rcx]
+
+	vpclmulqdq	xmm4,xmm6,xmm13,0x10
+	vpxor	xmm1,xmm1,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x11
+	vpxor	xmm2,xmm2,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x00
+	vpxor	xmm3,xmm3,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x01
+	vpxor	xmm1,xmm1,xmm4
+
+
+	vmovdqu	xmm6,XMMWORD[((80-32))+rax]
+	vpxor	xmm6,xmm6,xmm0
+	vmovdqu	xmm5,XMMWORD[((80-32))+rcx]
+	vpclmulqdq	xmm4,xmm6,xmm5,0x11
+	vpxor	xmm2,xmm2,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm5,0x00
+	vpxor	xmm3,xmm3,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm5,0x10
+	vpxor	xmm1,xmm1,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm5,0x01
+	vpxor	xmm1,xmm1,xmm4
+
+	vpsrldq	xmm4,xmm1,8
+	vpxor	xmm5,xmm2,xmm4
+	vpslldq	xmm4,xmm1,8
+	vpxor	xmm0,xmm3,xmm4
+
+	vmovdqa	xmm3,XMMWORD[poly]
+
+	vpalignr	xmm2,xmm0,xmm0,8
+	vpclmulqdq	xmm0,xmm0,xmm3,0x10
+	vpxor	xmm0,xmm2,xmm0
+
+	vpalignr	xmm2,xmm0,xmm0,8
+	vpclmulqdq	xmm0,xmm0,xmm3,0x10
+	vpxor	xmm0,xmm2,xmm0
+
+	vpxor	xmm0,xmm0,xmm5
+
+$L$128_dec_loop2:
+
+
+
+	cmp	r9,16
+	jb	NEAR $L$128_dec_out
+	sub	r9,16
+
+	vmovdqa	xmm2,xmm15
+	vpaddd	xmm15,xmm15,XMMWORD[one]
+
+	vpxor	xmm2,xmm2,XMMWORD[r8]
+	vaesenc	xmm2,xmm2,XMMWORD[16+r8]
+	vaesenc	xmm2,xmm2,XMMWORD[32+r8]
+	vaesenc	xmm2,xmm2,XMMWORD[48+r8]
+	vaesenc	xmm2,xmm2,XMMWORD[64+r8]
+	vaesenc	xmm2,xmm2,XMMWORD[80+r8]
+	vaesenc	xmm2,xmm2,XMMWORD[96+r8]
+	vaesenc	xmm2,xmm2,XMMWORD[112+r8]
+	vaesenc	xmm2,xmm2,XMMWORD[128+r8]
+	vaesenc	xmm2,xmm2,XMMWORD[144+r8]
+	vaesenclast	xmm2,xmm2,XMMWORD[160+r8]
+	vpxor	xmm2,xmm2,XMMWORD[rdi]
+	vmovdqu	XMMWORD[rsi],xmm2
+	add	rdi,16
+	add	rsi,16
+
+	vpxor	xmm0,xmm0,xmm2
+	vmovdqa	xmm1,XMMWORD[((-32))+rcx]
+	call	GFMUL
+
+	jmp	NEAR $L$128_dec_loop2
+
+$L$128_dec_out:
+	vmovdqu	XMMWORD[rdx],xmm0
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_aes128gcmsiv_dec:
+global	aes128gcmsiv_ecb_enc_block
+
+ALIGN	16
+aes128gcmsiv_ecb_enc_block:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aes128gcmsiv_ecb_enc_block:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+
+	vmovdqa	xmm1,XMMWORD[rdi]
+
+	vpxor	xmm1,xmm1,XMMWORD[rdx]
+	vaesenc	xmm1,xmm1,XMMWORD[16+rdx]
+	vaesenc	xmm1,xmm1,XMMWORD[32+rdx]
+	vaesenc	xmm1,xmm1,XMMWORD[48+rdx]
+	vaesenc	xmm1,xmm1,XMMWORD[64+rdx]
+	vaesenc	xmm1,xmm1,XMMWORD[80+rdx]
+	vaesenc	xmm1,xmm1,XMMWORD[96+rdx]
+	vaesenc	xmm1,xmm1,XMMWORD[112+rdx]
+	vaesenc	xmm1,xmm1,XMMWORD[128+rdx]
+	vaesenc	xmm1,xmm1,XMMWORD[144+rdx]
+	vaesenclast	xmm1,xmm1,XMMWORD[160+rdx]
+
+	vmovdqa	XMMWORD[rsi],xmm1
+
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_aes128gcmsiv_ecb_enc_block:
+global	aes256gcmsiv_aes_ks_enc_x1
+
+ALIGN	16
+aes256gcmsiv_aes_ks_enc_x1:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aes256gcmsiv_aes_ks_enc_x1:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+
+
+
+	vmovdqa	xmm0,XMMWORD[con1]
+	vmovdqa	xmm15,XMMWORD[mask]
+	vmovdqa	xmm8,XMMWORD[rdi]
+	vmovdqa	xmm1,XMMWORD[rcx]
+	vmovdqa	xmm3,XMMWORD[16+rcx]
+	vpxor	xmm8,xmm8,xmm1
+	vaesenc	xmm8,xmm8,xmm3
+	vmovdqu	XMMWORD[rdx],xmm1
+	vmovdqu	XMMWORD[16+rdx],xmm3
+	vpxor	xmm14,xmm14,xmm14
+
+	vpshufb	xmm2,xmm3,xmm15
+	vaesenclast	xmm2,xmm2,xmm0
+	vpslld	xmm0,xmm0,1
+	vpslldq	xmm4,xmm1,4
+	vpxor	xmm1,xmm1,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm1,xmm1,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm1,xmm1,xmm4
+	vpxor	xmm1,xmm1,xmm2
+	vaesenc	xmm8,xmm8,xmm1
+	vmovdqu	XMMWORD[32+rdx],xmm1
+
+	vpshufd	xmm2,xmm1,0xff
+	vaesenclast	xmm2,xmm2,xmm14
+	vpslldq	xmm4,xmm3,4
+	vpxor	xmm3,xmm3,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm3,xmm3,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm3,xmm3,xmm4
+	vpxor	xmm3,xmm3,xmm2
+	vaesenc	xmm8,xmm8,xmm3
+	vmovdqu	XMMWORD[48+rdx],xmm3
+
+	vpshufb	xmm2,xmm3,xmm15
+	vaesenclast	xmm2,xmm2,xmm0
+	vpslld	xmm0,xmm0,1
+	vpslldq	xmm4,xmm1,4
+	vpxor	xmm1,xmm1,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm1,xmm1,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm1,xmm1,xmm4
+	vpxor	xmm1,xmm1,xmm2
+	vaesenc	xmm8,xmm8,xmm1
+	vmovdqu	XMMWORD[64+rdx],xmm1
+
+	vpshufd	xmm2,xmm1,0xff
+	vaesenclast	xmm2,xmm2,xmm14
+	vpslldq	xmm4,xmm3,4
+	vpxor	xmm3,xmm3,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm3,xmm3,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm3,xmm3,xmm4
+	vpxor	xmm3,xmm3,xmm2
+	vaesenc	xmm8,xmm8,xmm3
+	vmovdqu	XMMWORD[80+rdx],xmm3
+
+	vpshufb	xmm2,xmm3,xmm15
+	vaesenclast	xmm2,xmm2,xmm0
+	vpslld	xmm0,xmm0,1
+	vpslldq	xmm4,xmm1,4
+	vpxor	xmm1,xmm1,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm1,xmm1,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm1,xmm1,xmm4
+	vpxor	xmm1,xmm1,xmm2
+	vaesenc	xmm8,xmm8,xmm1
+	vmovdqu	XMMWORD[96+rdx],xmm1
+
+	vpshufd	xmm2,xmm1,0xff
+	vaesenclast	xmm2,xmm2,xmm14
+	vpslldq	xmm4,xmm3,4
+	vpxor	xmm3,xmm3,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm3,xmm3,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm3,xmm3,xmm4
+	vpxor	xmm3,xmm3,xmm2
+	vaesenc	xmm8,xmm8,xmm3
+	vmovdqu	XMMWORD[112+rdx],xmm3
+
+	vpshufb	xmm2,xmm3,xmm15
+	vaesenclast	xmm2,xmm2,xmm0
+	vpslld	xmm0,xmm0,1
+	vpslldq	xmm4,xmm1,4
+	vpxor	xmm1,xmm1,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm1,xmm1,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm1,xmm1,xmm4
+	vpxor	xmm1,xmm1,xmm2
+	vaesenc	xmm8,xmm8,xmm1
+	vmovdqu	XMMWORD[128+rdx],xmm1
+
+	vpshufd	xmm2,xmm1,0xff
+	vaesenclast	xmm2,xmm2,xmm14
+	vpslldq	xmm4,xmm3,4
+	vpxor	xmm3,xmm3,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm3,xmm3,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm3,xmm3,xmm4
+	vpxor	xmm3,xmm3,xmm2
+	vaesenc	xmm8,xmm8,xmm3
+	vmovdqu	XMMWORD[144+rdx],xmm3
+
+	vpshufb	xmm2,xmm3,xmm15
+	vaesenclast	xmm2,xmm2,xmm0
+	vpslld	xmm0,xmm0,1
+	vpslldq	xmm4,xmm1,4
+	vpxor	xmm1,xmm1,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm1,xmm1,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm1,xmm1,xmm4
+	vpxor	xmm1,xmm1,xmm2
+	vaesenc	xmm8,xmm8,xmm1
+	vmovdqu	XMMWORD[160+rdx],xmm1
+
+	vpshufd	xmm2,xmm1,0xff
+	vaesenclast	xmm2,xmm2,xmm14
+	vpslldq	xmm4,xmm3,4
+	vpxor	xmm3,xmm3,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm3,xmm3,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm3,xmm3,xmm4
+	vpxor	xmm3,xmm3,xmm2
+	vaesenc	xmm8,xmm8,xmm3
+	vmovdqu	XMMWORD[176+rdx],xmm3
+
+	vpshufb	xmm2,xmm3,xmm15
+	vaesenclast	xmm2,xmm2,xmm0
+	vpslld	xmm0,xmm0,1
+	vpslldq	xmm4,xmm1,4
+	vpxor	xmm1,xmm1,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm1,xmm1,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm1,xmm1,xmm4
+	vpxor	xmm1,xmm1,xmm2
+	vaesenc	xmm8,xmm8,xmm1
+	vmovdqu	XMMWORD[192+rdx],xmm1
+
+	vpshufd	xmm2,xmm1,0xff
+	vaesenclast	xmm2,xmm2,xmm14
+	vpslldq	xmm4,xmm3,4
+	vpxor	xmm3,xmm3,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm3,xmm3,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm3,xmm3,xmm4
+	vpxor	xmm3,xmm3,xmm2
+	vaesenc	xmm8,xmm8,xmm3
+	vmovdqu	XMMWORD[208+rdx],xmm3
+
+	vpshufb	xmm2,xmm3,xmm15
+	vaesenclast	xmm2,xmm2,xmm0
+	vpslldq	xmm4,xmm1,4
+	vpxor	xmm1,xmm1,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm1,xmm1,xmm4
+	vpslldq	xmm4,xmm4,4
+	vpxor	xmm1,xmm1,xmm4
+	vpxor	xmm1,xmm1,xmm2
+	vaesenclast	xmm8,xmm8,xmm1
+	vmovdqu	XMMWORD[224+rdx],xmm1
+
+	vmovdqa	XMMWORD[rsi],xmm8
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_aes256gcmsiv_aes_ks_enc_x1:
+global	aes256gcmsiv_ecb_enc_block
+
+ALIGN	16
+aes256gcmsiv_ecb_enc_block:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aes256gcmsiv_ecb_enc_block:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+
+	vmovdqa	xmm1,XMMWORD[rdi]
+	vpxor	xmm1,xmm1,XMMWORD[rdx]
+	vaesenc	xmm1,xmm1,XMMWORD[16+rdx]
+	vaesenc	xmm1,xmm1,XMMWORD[32+rdx]
+	vaesenc	xmm1,xmm1,XMMWORD[48+rdx]
+	vaesenc	xmm1,xmm1,XMMWORD[64+rdx]
+	vaesenc	xmm1,xmm1,XMMWORD[80+rdx]
+	vaesenc	xmm1,xmm1,XMMWORD[96+rdx]
+	vaesenc	xmm1,xmm1,XMMWORD[112+rdx]
+	vaesenc	xmm1,xmm1,XMMWORD[128+rdx]
+	vaesenc	xmm1,xmm1,XMMWORD[144+rdx]
+	vaesenc	xmm1,xmm1,XMMWORD[160+rdx]
+	vaesenc	xmm1,xmm1,XMMWORD[176+rdx]
+	vaesenc	xmm1,xmm1,XMMWORD[192+rdx]
+	vaesenc	xmm1,xmm1,XMMWORD[208+rdx]
+	vaesenclast	xmm1,xmm1,XMMWORD[224+rdx]
+	vmovdqa	XMMWORD[rsi],xmm1
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_aes256gcmsiv_ecb_enc_block:
+global	aes256gcmsiv_enc_msg_x4
+
+ALIGN	16
+aes256gcmsiv_enc_msg_x4:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aes256gcmsiv_enc_msg_x4:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+
+
+
+	test	r8,r8
+	jnz	NEAR $L$256_enc_msg_x4_start
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$256_enc_msg_x4_start:
+	mov	r10,r8
+	shr	r8,4
+	shl	r10,60
+	jz	NEAR $L$256_enc_msg_x4_start2
+	add	r8,1
+
+$L$256_enc_msg_x4_start2:
+	mov	r10,r8
+	shl	r10,62
+	shr	r10,62
+
+
+	vmovdqa	xmm15,XMMWORD[rdx]
+	vpor	xmm15,xmm15,XMMWORD[OR_MASK]
+
+	vmovdqa	xmm4,XMMWORD[four]
+	vmovdqa	xmm0,xmm15
+	vpaddd	xmm1,xmm15,XMMWORD[one]
+	vpaddd	xmm2,xmm15,XMMWORD[two]
+	vpaddd	xmm3,xmm15,XMMWORD[three]
+
+	shr	r8,2
+	je	NEAR $L$256_enc_msg_x4_check_remainder
+
+	sub	rsi,64
+	sub	rdi,64
+
+$L$256_enc_msg_x4_loop1:
+	add	rsi,64
+	add	rdi,64
+
+	vmovdqa	xmm5,xmm0
+	vmovdqa	xmm6,xmm1
+	vmovdqa	xmm7,xmm2
+	vmovdqa	xmm8,xmm3
+
+	vpxor	xmm5,xmm5,XMMWORD[rcx]
+	vpxor	xmm6,xmm6,XMMWORD[rcx]
+	vpxor	xmm7,xmm7,XMMWORD[rcx]
+	vpxor	xmm8,xmm8,XMMWORD[rcx]
+
+	vmovdqu	xmm12,XMMWORD[16+rcx]
+	vaesenc	xmm5,xmm5,xmm12
+	vaesenc	xmm6,xmm6,xmm12
+	vaesenc	xmm7,xmm7,xmm12
+	vaesenc	xmm8,xmm8,xmm12
+
+	vpaddd	xmm0,xmm0,xmm4
+	vmovdqu	xmm12,XMMWORD[32+rcx]
+	vaesenc	xmm5,xmm5,xmm12
+	vaesenc	xmm6,xmm6,xmm12
+	vaesenc	xmm7,xmm7,xmm12
+	vaesenc	xmm8,xmm8,xmm12
+
+	vpaddd	xmm1,xmm1,xmm4
+	vmovdqu	xmm12,XMMWORD[48+rcx]
+	vaesenc	xmm5,xmm5,xmm12
+	vaesenc	xmm6,xmm6,xmm12
+	vaesenc	xmm7,xmm7,xmm12
+	vaesenc	xmm8,xmm8,xmm12
+
+	vpaddd	xmm2,xmm2,xmm4
+	vmovdqu	xmm12,XMMWORD[64+rcx]
+	vaesenc	xmm5,xmm5,xmm12
+	vaesenc	xmm6,xmm6,xmm12
+	vaesenc	xmm7,xmm7,xmm12
+	vaesenc	xmm8,xmm8,xmm12
+
+	vpaddd	xmm3,xmm3,xmm4
+
+	vmovdqu	xmm12,XMMWORD[80+rcx]
+	vaesenc	xmm5,xmm5,xmm12
+	vaesenc	xmm6,xmm6,xmm12
+	vaesenc	xmm7,xmm7,xmm12
+	vaesenc	xmm8,xmm8,xmm12
+
+	vmovdqu	xmm12,XMMWORD[96+rcx]
+	vaesenc	xmm5,xmm5,xmm12
+	vaesenc	xmm6,xmm6,xmm12
+	vaesenc	xmm7,xmm7,xmm12
+	vaesenc	xmm8,xmm8,xmm12
+
+	vmovdqu	xmm12,XMMWORD[112+rcx]
+	vaesenc	xmm5,xmm5,xmm12
+	vaesenc	xmm6,xmm6,xmm12
+	vaesenc	xmm7,xmm7,xmm12
+	vaesenc	xmm8,xmm8,xmm12
+
+	vmovdqu	xmm12,XMMWORD[128+rcx]
+	vaesenc	xmm5,xmm5,xmm12
+	vaesenc	xmm6,xmm6,xmm12
+	vaesenc	xmm7,xmm7,xmm12
+	vaesenc	xmm8,xmm8,xmm12
+
+	vmovdqu	xmm12,XMMWORD[144+rcx]
+	vaesenc	xmm5,xmm5,xmm12
+	vaesenc	xmm6,xmm6,xmm12
+	vaesenc	xmm7,xmm7,xmm12
+	vaesenc	xmm8,xmm8,xmm12
+
+	vmovdqu	xmm12,XMMWORD[160+rcx]
+	vaesenc	xmm5,xmm5,xmm12
+	vaesenc	xmm6,xmm6,xmm12
+	vaesenc	xmm7,xmm7,xmm12
+	vaesenc	xmm8,xmm8,xmm12
+
+	vmovdqu	xmm12,XMMWORD[176+rcx]
+	vaesenc	xmm5,xmm5,xmm12
+	vaesenc	xmm6,xmm6,xmm12
+	vaesenc	xmm7,xmm7,xmm12
+	vaesenc	xmm8,xmm8,xmm12
+
+	vmovdqu	xmm12,XMMWORD[192+rcx]
+	vaesenc	xmm5,xmm5,xmm12
+	vaesenc	xmm6,xmm6,xmm12
+	vaesenc	xmm7,xmm7,xmm12
+	vaesenc	xmm8,xmm8,xmm12
+
+	vmovdqu	xmm12,XMMWORD[208+rcx]
+	vaesenc	xmm5,xmm5,xmm12
+	vaesenc	xmm6,xmm6,xmm12
+	vaesenc	xmm7,xmm7,xmm12
+	vaesenc	xmm8,xmm8,xmm12
+
+	vmovdqu	xmm12,XMMWORD[224+rcx]
+	vaesenclast	xmm5,xmm5,xmm12
+	vaesenclast	xmm6,xmm6,xmm12
+	vaesenclast	xmm7,xmm7,xmm12
+	vaesenclast	xmm8,xmm8,xmm12
+
+
+
+	vpxor	xmm5,xmm5,XMMWORD[rdi]
+	vpxor	xmm6,xmm6,XMMWORD[16+rdi]
+	vpxor	xmm7,xmm7,XMMWORD[32+rdi]
+	vpxor	xmm8,xmm8,XMMWORD[48+rdi]
+
+	sub	r8,1
+
+	vmovdqu	XMMWORD[rsi],xmm5
+	vmovdqu	XMMWORD[16+rsi],xmm6
+	vmovdqu	XMMWORD[32+rsi],xmm7
+	vmovdqu	XMMWORD[48+rsi],xmm8
+
+	jne	NEAR $L$256_enc_msg_x4_loop1
+
+	add	rsi,64
+	add	rdi,64
+
+$L$256_enc_msg_x4_check_remainder:
+	cmp	r10,0
+	je	NEAR $L$256_enc_msg_x4_out
+
+$L$256_enc_msg_x4_loop2:
+
+
+
+	vmovdqa	xmm5,xmm0
+	vpaddd	xmm0,xmm0,XMMWORD[one]
+	vpxor	xmm5,xmm5,XMMWORD[rcx]
+	vaesenc	xmm5,xmm5,XMMWORD[16+rcx]
+	vaesenc	xmm5,xmm5,XMMWORD[32+rcx]
+	vaesenc	xmm5,xmm5,XMMWORD[48+rcx]
+	vaesenc	xmm5,xmm5,XMMWORD[64+rcx]
+	vaesenc	xmm5,xmm5,XMMWORD[80+rcx]
+	vaesenc	xmm5,xmm5,XMMWORD[96+rcx]
+	vaesenc	xmm5,xmm5,XMMWORD[112+rcx]
+	vaesenc	xmm5,xmm5,XMMWORD[128+rcx]
+	vaesenc	xmm5,xmm5,XMMWORD[144+rcx]
+	vaesenc	xmm5,xmm5,XMMWORD[160+rcx]
+	vaesenc	xmm5,xmm5,XMMWORD[176+rcx]
+	vaesenc	xmm5,xmm5,XMMWORD[192+rcx]
+	vaesenc	xmm5,xmm5,XMMWORD[208+rcx]
+	vaesenclast	xmm5,xmm5,XMMWORD[224+rcx]
+
+
+	vpxor	xmm5,xmm5,XMMWORD[rdi]
+
+	vmovdqu	XMMWORD[rsi],xmm5
+
+	add	rdi,16
+	add	rsi,16
+
+	sub	r10,1
+	jne	NEAR $L$256_enc_msg_x4_loop2
+
+$L$256_enc_msg_x4_out:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_aes256gcmsiv_enc_msg_x4:
+global	aes256gcmsiv_enc_msg_x8
+
+ALIGN	16
+aes256gcmsiv_enc_msg_x8:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aes256gcmsiv_enc_msg_x8:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+
+
+
+	test	r8,r8
+	jnz	NEAR $L$256_enc_msg_x8_start
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$256_enc_msg_x8_start:
+
+	mov	r11,rsp
+	sub	r11,16
+	and	r11,-64
+
+	mov	r10,r8
+	shr	r8,4
+	shl	r10,60
+	jz	NEAR $L$256_enc_msg_x8_start2
+	add	r8,1
+
+$L$256_enc_msg_x8_start2:
+	mov	r10,r8
+	shl	r10,61
+	shr	r10,61
+
+
+	vmovdqa	xmm1,XMMWORD[rdx]
+	vpor	xmm1,xmm1,XMMWORD[OR_MASK]
+
+
+	vpaddd	xmm0,xmm1,XMMWORD[seven]
+	vmovdqa	XMMWORD[r11],xmm0
+	vpaddd	xmm9,xmm1,XMMWORD[one]
+	vpaddd	xmm10,xmm1,XMMWORD[two]
+	vpaddd	xmm11,xmm1,XMMWORD[three]
+	vpaddd	xmm12,xmm1,XMMWORD[four]
+	vpaddd	xmm13,xmm1,XMMWORD[five]
+	vpaddd	xmm14,xmm1,XMMWORD[six]
+	vmovdqa	xmm0,xmm1
+
+	shr	r8,3
+	jz	NEAR $L$256_enc_msg_x8_check_remainder
+
+	sub	rsi,128
+	sub	rdi,128
+
+$L$256_enc_msg_x8_loop1:
+	add	rsi,128
+	add	rdi,128
+
+	vmovdqa	xmm1,xmm0
+	vmovdqa	xmm2,xmm9
+	vmovdqa	xmm3,xmm10
+	vmovdqa	xmm4,xmm11
+	vmovdqa	xmm5,xmm12
+	vmovdqa	xmm6,xmm13
+	vmovdqa	xmm7,xmm14
+
+	vmovdqa	xmm8,XMMWORD[r11]
+
+	vpxor	xmm1,xmm1,XMMWORD[rcx]
+	vpxor	xmm2,xmm2,XMMWORD[rcx]
+	vpxor	xmm3,xmm3,XMMWORD[rcx]
+	vpxor	xmm4,xmm4,XMMWORD[rcx]
+	vpxor	xmm5,xmm5,XMMWORD[rcx]
+	vpxor	xmm6,xmm6,XMMWORD[rcx]
+	vpxor	xmm7,xmm7,XMMWORD[rcx]
+	vpxor	xmm8,xmm8,XMMWORD[rcx]
+
+	vmovdqu	xmm15,XMMWORD[16+rcx]
+	vaesenc	xmm1,xmm1,xmm15
+	vaesenc	xmm2,xmm2,xmm15
+	vaesenc	xmm3,xmm3,xmm15
+	vaesenc	xmm4,xmm4,xmm15
+	vaesenc	xmm5,xmm5,xmm15
+	vaesenc	xmm6,xmm6,xmm15
+	vaesenc	xmm7,xmm7,xmm15
+	vaesenc	xmm8,xmm8,xmm15
+
+	vmovdqa	xmm14,XMMWORD[r11]
+	vpaddd	xmm14,xmm14,XMMWORD[eight]
+	vmovdqa	XMMWORD[r11],xmm14
+	vmovdqu	xmm15,XMMWORD[32+rcx]
+	vaesenc	xmm1,xmm1,xmm15
+	vaesenc	xmm2,xmm2,xmm15
+	vaesenc	xmm3,xmm3,xmm15
+	vaesenc	xmm4,xmm4,xmm15
+	vaesenc	xmm5,xmm5,xmm15
+	vaesenc	xmm6,xmm6,xmm15
+	vaesenc	xmm7,xmm7,xmm15
+	vaesenc	xmm8,xmm8,xmm15
+
+	vpsubd	xmm14,xmm14,XMMWORD[one]
+	vmovdqu	xmm15,XMMWORD[48+rcx]
+	vaesenc	xmm1,xmm1,xmm15
+	vaesenc	xmm2,xmm2,xmm15
+	vaesenc	xmm3,xmm3,xmm15
+	vaesenc	xmm4,xmm4,xmm15
+	vaesenc	xmm5,xmm5,xmm15
+	vaesenc	xmm6,xmm6,xmm15
+	vaesenc	xmm7,xmm7,xmm15
+	vaesenc	xmm8,xmm8,xmm15
+
+	vpaddd	xmm0,xmm0,XMMWORD[eight]
+	vmovdqu	xmm15,XMMWORD[64+rcx]
+	vaesenc	xmm1,xmm1,xmm15
+	vaesenc	xmm2,xmm2,xmm15
+	vaesenc	xmm3,xmm3,xmm15
+	vaesenc	xmm4,xmm4,xmm15
+	vaesenc	xmm5,xmm5,xmm15
+	vaesenc	xmm6,xmm6,xmm15
+	vaesenc	xmm7,xmm7,xmm15
+	vaesenc	xmm8,xmm8,xmm15
+
+	vpaddd	xmm9,xmm9,XMMWORD[eight]
+	vmovdqu	xmm15,XMMWORD[80+rcx]
+	vaesenc	xmm1,xmm1,xmm15
+	vaesenc	xmm2,xmm2,xmm15
+	vaesenc	xmm3,xmm3,xmm15
+	vaesenc	xmm4,xmm4,xmm15
+	vaesenc	xmm5,xmm5,xmm15
+	vaesenc	xmm6,xmm6,xmm15
+	vaesenc	xmm7,xmm7,xmm15
+	vaesenc	xmm8,xmm8,xmm15
+
+	vpaddd	xmm10,xmm10,XMMWORD[eight]
+	vmovdqu	xmm15,XMMWORD[96+rcx]
+	vaesenc	xmm1,xmm1,xmm15
+	vaesenc	xmm2,xmm2,xmm15
+	vaesenc	xmm3,xmm3,xmm15
+	vaesenc	xmm4,xmm4,xmm15
+	vaesenc	xmm5,xmm5,xmm15
+	vaesenc	xmm6,xmm6,xmm15
+	vaesenc	xmm7,xmm7,xmm15
+	vaesenc	xmm8,xmm8,xmm15
+
+	vpaddd	xmm11,xmm11,XMMWORD[eight]
+	vmovdqu	xmm15,XMMWORD[112+rcx]
+	vaesenc	xmm1,xmm1,xmm15
+	vaesenc	xmm2,xmm2,xmm15
+	vaesenc	xmm3,xmm3,xmm15
+	vaesenc	xmm4,xmm4,xmm15
+	vaesenc	xmm5,xmm5,xmm15
+	vaesenc	xmm6,xmm6,xmm15
+	vaesenc	xmm7,xmm7,xmm15
+	vaesenc	xmm8,xmm8,xmm15
+
+	vpaddd	xmm12,xmm12,XMMWORD[eight]
+	vmovdqu	xmm15,XMMWORD[128+rcx]
+	vaesenc	xmm1,xmm1,xmm15
+	vaesenc	xmm2,xmm2,xmm15
+	vaesenc	xmm3,xmm3,xmm15
+	vaesenc	xmm4,xmm4,xmm15
+	vaesenc	xmm5,xmm5,xmm15
+	vaesenc	xmm6,xmm6,xmm15
+	vaesenc	xmm7,xmm7,xmm15
+	vaesenc	xmm8,xmm8,xmm15
+
+	vpaddd	xmm13,xmm13,XMMWORD[eight]
+	vmovdqu	xmm15,XMMWORD[144+rcx]
+	vaesenc	xmm1,xmm1,xmm15
+	vaesenc	xmm2,xmm2,xmm15
+	vaesenc	xmm3,xmm3,xmm15
+	vaesenc	xmm4,xmm4,xmm15
+	vaesenc	xmm5,xmm5,xmm15
+	vaesenc	xmm6,xmm6,xmm15
+	vaesenc	xmm7,xmm7,xmm15
+	vaesenc	xmm8,xmm8,xmm15
+
+	vmovdqu	xmm15,XMMWORD[160+rcx]
+	vaesenc	xmm1,xmm1,xmm15
+	vaesenc	xmm2,xmm2,xmm15
+	vaesenc	xmm3,xmm3,xmm15
+	vaesenc	xmm4,xmm4,xmm15
+	vaesenc	xmm5,xmm5,xmm15
+	vaesenc	xmm6,xmm6,xmm15
+	vaesenc	xmm7,xmm7,xmm15
+	vaesenc	xmm8,xmm8,xmm15
+
+	vmovdqu	xmm15,XMMWORD[176+rcx]
+	vaesenc	xmm1,xmm1,xmm15
+	vaesenc	xmm2,xmm2,xmm15
+	vaesenc	xmm3,xmm3,xmm15
+	vaesenc	xmm4,xmm4,xmm15
+	vaesenc	xmm5,xmm5,xmm15
+	vaesenc	xmm6,xmm6,xmm15
+	vaesenc	xmm7,xmm7,xmm15
+	vaesenc	xmm8,xmm8,xmm15
+
+	vmovdqu	xmm15,XMMWORD[192+rcx]
+	vaesenc	xmm1,xmm1,xmm15
+	vaesenc	xmm2,xmm2,xmm15
+	vaesenc	xmm3,xmm3,xmm15
+	vaesenc	xmm4,xmm4,xmm15
+	vaesenc	xmm5,xmm5,xmm15
+	vaesenc	xmm6,xmm6,xmm15
+	vaesenc	xmm7,xmm7,xmm15
+	vaesenc	xmm8,xmm8,xmm15
+
+	vmovdqu	xmm15,XMMWORD[208+rcx]
+	vaesenc	xmm1,xmm1,xmm15
+	vaesenc	xmm2,xmm2,xmm15
+	vaesenc	xmm3,xmm3,xmm15
+	vaesenc	xmm4,xmm4,xmm15
+	vaesenc	xmm5,xmm5,xmm15
+	vaesenc	xmm6,xmm6,xmm15
+	vaesenc	xmm7,xmm7,xmm15
+	vaesenc	xmm8,xmm8,xmm15
+
+	vmovdqu	xmm15,XMMWORD[224+rcx]
+	vaesenclast	xmm1,xmm1,xmm15
+	vaesenclast	xmm2,xmm2,xmm15
+	vaesenclast	xmm3,xmm3,xmm15
+	vaesenclast	xmm4,xmm4,xmm15
+	vaesenclast	xmm5,xmm5,xmm15
+	vaesenclast	xmm6,xmm6,xmm15
+	vaesenclast	xmm7,xmm7,xmm15
+	vaesenclast	xmm8,xmm8,xmm15
+
+
+
+	vpxor	xmm1,xmm1,XMMWORD[rdi]
+	vpxor	xmm2,xmm2,XMMWORD[16+rdi]
+	vpxor	xmm3,xmm3,XMMWORD[32+rdi]
+	vpxor	xmm4,xmm4,XMMWORD[48+rdi]
+	vpxor	xmm5,xmm5,XMMWORD[64+rdi]
+	vpxor	xmm6,xmm6,XMMWORD[80+rdi]
+	vpxor	xmm7,xmm7,XMMWORD[96+rdi]
+	vpxor	xmm8,xmm8,XMMWORD[112+rdi]
+
+	sub	r8,1
+
+	vmovdqu	XMMWORD[rsi],xmm1
+	vmovdqu	XMMWORD[16+rsi],xmm2
+	vmovdqu	XMMWORD[32+rsi],xmm3
+	vmovdqu	XMMWORD[48+rsi],xmm4
+	vmovdqu	XMMWORD[64+rsi],xmm5
+	vmovdqu	XMMWORD[80+rsi],xmm6
+	vmovdqu	XMMWORD[96+rsi],xmm7
+	vmovdqu	XMMWORD[112+rsi],xmm8
+
+	jne	NEAR $L$256_enc_msg_x8_loop1
+
+	add	rsi,128
+	add	rdi,128
+
+$L$256_enc_msg_x8_check_remainder:
+	cmp	r10,0
+	je	NEAR $L$256_enc_msg_x8_out
+
+$L$256_enc_msg_x8_loop2:
+
+
+	vmovdqa	xmm1,xmm0
+	vpaddd	xmm0,xmm0,XMMWORD[one]
+
+	vpxor	xmm1,xmm1,XMMWORD[rcx]
+	vaesenc	xmm1,xmm1,XMMWORD[16+rcx]
+	vaesenc	xmm1,xmm1,XMMWORD[32+rcx]
+	vaesenc	xmm1,xmm1,XMMWORD[48+rcx]
+	vaesenc	xmm1,xmm1,XMMWORD[64+rcx]
+	vaesenc	xmm1,xmm1,XMMWORD[80+rcx]
+	vaesenc	xmm1,xmm1,XMMWORD[96+rcx]
+	vaesenc	xmm1,xmm1,XMMWORD[112+rcx]
+	vaesenc	xmm1,xmm1,XMMWORD[128+rcx]
+	vaesenc	xmm1,xmm1,XMMWORD[144+rcx]
+	vaesenc	xmm1,xmm1,XMMWORD[160+rcx]
+	vaesenc	xmm1,xmm1,XMMWORD[176+rcx]
+	vaesenc	xmm1,xmm1,XMMWORD[192+rcx]
+	vaesenc	xmm1,xmm1,XMMWORD[208+rcx]
+	vaesenclast	xmm1,xmm1,XMMWORD[224+rcx]
+
+
+	vpxor	xmm1,xmm1,XMMWORD[rdi]
+
+	vmovdqu	XMMWORD[rsi],xmm1
+
+	add	rdi,16
+	add	rsi,16
+	sub	r10,1
+	jnz	NEAR $L$256_enc_msg_x8_loop2
+
+$L$256_enc_msg_x8_out:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+
+$L$SEH_end_aes256gcmsiv_enc_msg_x8:
+global	aes256gcmsiv_dec
+
+ALIGN	16
+aes256gcmsiv_dec:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aes256gcmsiv_dec:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+	mov	r9,QWORD[48+rsp]
+
+
+
+	test	r9,~15
+	jnz	NEAR $L$256_dec_start
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$256_dec_start:
+	vzeroupper
+	vmovdqa	xmm0,XMMWORD[rdx]
+	mov	rax,rdx
+
+	lea	rax,[32+rax]
+	lea	rcx,[32+rcx]
+
+
+	vmovdqu	xmm15,XMMWORD[r9*1+rdi]
+	vpor	xmm15,xmm15,XMMWORD[OR_MASK]
+	and	r9,~15
+
+
+	cmp	r9,96
+	jb	NEAR $L$256_dec_loop2
+
+
+	sub	r9,96
+	vmovdqa	xmm7,xmm15
+	vpaddd	xmm8,xmm7,XMMWORD[one]
+	vpaddd	xmm9,xmm7,XMMWORD[two]
+	vpaddd	xmm10,xmm9,XMMWORD[one]
+	vpaddd	xmm11,xmm9,XMMWORD[two]
+	vpaddd	xmm12,xmm11,XMMWORD[one]
+	vpaddd	xmm15,xmm11,XMMWORD[two]
+
+	vpxor	xmm7,xmm7,XMMWORD[r8]
+	vpxor	xmm8,xmm8,XMMWORD[r8]
+	vpxor	xmm9,xmm9,XMMWORD[r8]
+	vpxor	xmm10,xmm10,XMMWORD[r8]
+	vpxor	xmm11,xmm11,XMMWORD[r8]
+	vpxor	xmm12,xmm12,XMMWORD[r8]
+
+	vmovdqu	xmm4,XMMWORD[16+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[32+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[48+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[64+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[80+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[96+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[112+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[128+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[144+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[160+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[176+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[192+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[208+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[224+r8]
+	vaesenclast	xmm7,xmm7,xmm4
+	vaesenclast	xmm8,xmm8,xmm4
+	vaesenclast	xmm9,xmm9,xmm4
+	vaesenclast	xmm10,xmm10,xmm4
+	vaesenclast	xmm11,xmm11,xmm4
+	vaesenclast	xmm12,xmm12,xmm4
+
+
+	vpxor	xmm7,xmm7,XMMWORD[rdi]
+	vpxor	xmm8,xmm8,XMMWORD[16+rdi]
+	vpxor	xmm9,xmm9,XMMWORD[32+rdi]
+	vpxor	xmm10,xmm10,XMMWORD[48+rdi]
+	vpxor	xmm11,xmm11,XMMWORD[64+rdi]
+	vpxor	xmm12,xmm12,XMMWORD[80+rdi]
+
+	vmovdqu	XMMWORD[rsi],xmm7
+	vmovdqu	XMMWORD[16+rsi],xmm8
+	vmovdqu	XMMWORD[32+rsi],xmm9
+	vmovdqu	XMMWORD[48+rsi],xmm10
+	vmovdqu	XMMWORD[64+rsi],xmm11
+	vmovdqu	XMMWORD[80+rsi],xmm12
+
+	add	rdi,96
+	add	rsi,96
+	jmp	NEAR $L$256_dec_loop1
+
+
+ALIGN	64
+$L$256_dec_loop1:
+	cmp	r9,96
+	jb	NEAR $L$256_dec_finish_96
+	sub	r9,96
+
+	vmovdqa	xmm6,xmm12
+	vmovdqa	XMMWORD[(16-32)+rax],xmm11
+	vmovdqa	XMMWORD[(32-32)+rax],xmm10
+	vmovdqa	XMMWORD[(48-32)+rax],xmm9
+	vmovdqa	XMMWORD[(64-32)+rax],xmm8
+	vmovdqa	XMMWORD[(80-32)+rax],xmm7
+
+	vmovdqa	xmm7,xmm15
+	vpaddd	xmm8,xmm7,XMMWORD[one]
+	vpaddd	xmm9,xmm7,XMMWORD[two]
+	vpaddd	xmm10,xmm9,XMMWORD[one]
+	vpaddd	xmm11,xmm9,XMMWORD[two]
+	vpaddd	xmm12,xmm11,XMMWORD[one]
+	vpaddd	xmm15,xmm11,XMMWORD[two]
+
+	vmovdqa	xmm4,XMMWORD[r8]
+	vpxor	xmm7,xmm7,xmm4
+	vpxor	xmm8,xmm8,xmm4
+	vpxor	xmm9,xmm9,xmm4
+	vpxor	xmm10,xmm10,xmm4
+	vpxor	xmm11,xmm11,xmm4
+	vpxor	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[((0-32))+rcx]
+	vpclmulqdq	xmm2,xmm6,xmm4,0x11
+	vpclmulqdq	xmm3,xmm6,xmm4,0x00
+	vpclmulqdq	xmm1,xmm6,xmm4,0x01
+	vpclmulqdq	xmm4,xmm6,xmm4,0x10
+	vpxor	xmm1,xmm1,xmm4
+
+	vmovdqu	xmm4,XMMWORD[16+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm6,XMMWORD[((-16))+rax]
+	vmovdqu	xmm13,XMMWORD[((-16))+rcx]
+
+	vpclmulqdq	xmm4,xmm6,xmm13,0x10
+	vpxor	xmm1,xmm1,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x11
+	vpxor	xmm2,xmm2,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x00
+	vpxor	xmm3,xmm3,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x01
+	vpxor	xmm1,xmm1,xmm4
+
+
+	vmovdqu	xmm4,XMMWORD[32+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm6,XMMWORD[rax]
+	vmovdqu	xmm13,XMMWORD[rcx]
+
+	vpclmulqdq	xmm4,xmm6,xmm13,0x10
+	vpxor	xmm1,xmm1,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x11
+	vpxor	xmm2,xmm2,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x00
+	vpxor	xmm3,xmm3,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x01
+	vpxor	xmm1,xmm1,xmm4
+
+
+	vmovdqu	xmm4,XMMWORD[48+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm6,XMMWORD[16+rax]
+	vmovdqu	xmm13,XMMWORD[16+rcx]
+
+	vpclmulqdq	xmm4,xmm6,xmm13,0x10
+	vpxor	xmm1,xmm1,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x11
+	vpxor	xmm2,xmm2,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x00
+	vpxor	xmm3,xmm3,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x01
+	vpxor	xmm1,xmm1,xmm4
+
+
+	vmovdqu	xmm4,XMMWORD[64+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm6,XMMWORD[32+rax]
+	vmovdqu	xmm13,XMMWORD[32+rcx]
+
+	vpclmulqdq	xmm4,xmm6,xmm13,0x10
+	vpxor	xmm1,xmm1,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x11
+	vpxor	xmm2,xmm2,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x00
+	vpxor	xmm3,xmm3,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x01
+	vpxor	xmm1,xmm1,xmm4
+
+
+	vmovdqu	xmm4,XMMWORD[80+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[96+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[112+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+
+	vmovdqa	xmm6,XMMWORD[((80-32))+rax]
+	vpxor	xmm6,xmm6,xmm0
+	vmovdqu	xmm5,XMMWORD[((80-32))+rcx]
+
+	vpclmulqdq	xmm4,xmm6,xmm5,0x01
+	vpxor	xmm1,xmm1,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm5,0x11
+	vpxor	xmm2,xmm2,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm5,0x00
+	vpxor	xmm3,xmm3,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm5,0x10
+	vpxor	xmm1,xmm1,xmm4
+
+	vmovdqu	xmm4,XMMWORD[128+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+
+	vpsrldq	xmm4,xmm1,8
+	vpxor	xmm5,xmm2,xmm4
+	vpslldq	xmm4,xmm1,8
+	vpxor	xmm0,xmm3,xmm4
+
+	vmovdqa	xmm3,XMMWORD[poly]
+
+	vmovdqu	xmm4,XMMWORD[144+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[160+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[176+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[192+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm4,XMMWORD[208+r8]
+	vaesenc	xmm7,xmm7,xmm4
+	vaesenc	xmm8,xmm8,xmm4
+	vaesenc	xmm9,xmm9,xmm4
+	vaesenc	xmm10,xmm10,xmm4
+	vaesenc	xmm11,xmm11,xmm4
+	vaesenc	xmm12,xmm12,xmm4
+
+	vmovdqu	xmm6,XMMWORD[224+r8]
+	vpalignr	xmm2,xmm0,xmm0,8
+	vpclmulqdq	xmm0,xmm0,xmm3,0x10
+	vpxor	xmm0,xmm2,xmm0
+
+	vpxor	xmm4,xmm6,XMMWORD[rdi]
+	vaesenclast	xmm7,xmm7,xmm4
+	vpxor	xmm4,xmm6,XMMWORD[16+rdi]
+	vaesenclast	xmm8,xmm8,xmm4
+	vpxor	xmm4,xmm6,XMMWORD[32+rdi]
+	vaesenclast	xmm9,xmm9,xmm4
+	vpxor	xmm4,xmm6,XMMWORD[48+rdi]
+	vaesenclast	xmm10,xmm10,xmm4
+	vpxor	xmm4,xmm6,XMMWORD[64+rdi]
+	vaesenclast	xmm11,xmm11,xmm4
+	vpxor	xmm4,xmm6,XMMWORD[80+rdi]
+	vaesenclast	xmm12,xmm12,xmm4
+
+	vpalignr	xmm2,xmm0,xmm0,8
+	vpclmulqdq	xmm0,xmm0,xmm3,0x10
+	vpxor	xmm0,xmm2,xmm0
+
+	vmovdqu	XMMWORD[rsi],xmm7
+	vmovdqu	XMMWORD[16+rsi],xmm8
+	vmovdqu	XMMWORD[32+rsi],xmm9
+	vmovdqu	XMMWORD[48+rsi],xmm10
+	vmovdqu	XMMWORD[64+rsi],xmm11
+	vmovdqu	XMMWORD[80+rsi],xmm12
+
+	vpxor	xmm0,xmm0,xmm5
+
+	lea	rdi,[96+rdi]
+	lea	rsi,[96+rsi]
+	jmp	NEAR $L$256_dec_loop1
+
+$L$256_dec_finish_96:
+	vmovdqa	xmm6,xmm12
+	vmovdqa	XMMWORD[(16-32)+rax],xmm11
+	vmovdqa	XMMWORD[(32-32)+rax],xmm10
+	vmovdqa	XMMWORD[(48-32)+rax],xmm9
+	vmovdqa	XMMWORD[(64-32)+rax],xmm8
+	vmovdqa	XMMWORD[(80-32)+rax],xmm7
+
+	vmovdqu	xmm4,XMMWORD[((0-32))+rcx]
+	vpclmulqdq	xmm1,xmm6,xmm4,0x10
+	vpclmulqdq	xmm2,xmm6,xmm4,0x11
+	vpclmulqdq	xmm3,xmm6,xmm4,0x00
+	vpclmulqdq	xmm4,xmm6,xmm4,0x01
+	vpxor	xmm1,xmm1,xmm4
+
+	vmovdqu	xmm6,XMMWORD[((-16))+rax]
+	vmovdqu	xmm13,XMMWORD[((-16))+rcx]
+
+	vpclmulqdq	xmm4,xmm6,xmm13,0x10
+	vpxor	xmm1,xmm1,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x11
+	vpxor	xmm2,xmm2,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x00
+	vpxor	xmm3,xmm3,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x01
+	vpxor	xmm1,xmm1,xmm4
+
+	vmovdqu	xmm6,XMMWORD[rax]
+	vmovdqu	xmm13,XMMWORD[rcx]
+
+	vpclmulqdq	xmm4,xmm6,xmm13,0x10
+	vpxor	xmm1,xmm1,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x11
+	vpxor	xmm2,xmm2,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x00
+	vpxor	xmm3,xmm3,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x01
+	vpxor	xmm1,xmm1,xmm4
+
+	vmovdqu	xmm6,XMMWORD[16+rax]
+	vmovdqu	xmm13,XMMWORD[16+rcx]
+
+	vpclmulqdq	xmm4,xmm6,xmm13,0x10
+	vpxor	xmm1,xmm1,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x11
+	vpxor	xmm2,xmm2,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x00
+	vpxor	xmm3,xmm3,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x01
+	vpxor	xmm1,xmm1,xmm4
+
+	vmovdqu	xmm6,XMMWORD[32+rax]
+	vmovdqu	xmm13,XMMWORD[32+rcx]
+
+	vpclmulqdq	xmm4,xmm6,xmm13,0x10
+	vpxor	xmm1,xmm1,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x11
+	vpxor	xmm2,xmm2,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x00
+	vpxor	xmm3,xmm3,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm13,0x01
+	vpxor	xmm1,xmm1,xmm4
+
+
+	vmovdqu	xmm6,XMMWORD[((80-32))+rax]
+	vpxor	xmm6,xmm6,xmm0
+	vmovdqu	xmm5,XMMWORD[((80-32))+rcx]
+	vpclmulqdq	xmm4,xmm6,xmm5,0x11
+	vpxor	xmm2,xmm2,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm5,0x00
+	vpxor	xmm3,xmm3,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm5,0x10
+	vpxor	xmm1,xmm1,xmm4
+	vpclmulqdq	xmm4,xmm6,xmm5,0x01
+	vpxor	xmm1,xmm1,xmm4
+
+	vpsrldq	xmm4,xmm1,8
+	vpxor	xmm5,xmm2,xmm4
+	vpslldq	xmm4,xmm1,8
+	vpxor	xmm0,xmm3,xmm4
+
+	vmovdqa	xmm3,XMMWORD[poly]
+
+	vpalignr	xmm2,xmm0,xmm0,8
+	vpclmulqdq	xmm0,xmm0,xmm3,0x10
+	vpxor	xmm0,xmm2,xmm0
+
+	vpalignr	xmm2,xmm0,xmm0,8
+	vpclmulqdq	xmm0,xmm0,xmm3,0x10
+	vpxor	xmm0,xmm2,xmm0
+
+	vpxor	xmm0,xmm0,xmm5
+
+$L$256_dec_loop2:
+
+
+
+	cmp	r9,16
+	jb	NEAR $L$256_dec_out
+	sub	r9,16
+
+	vmovdqa	xmm2,xmm15
+	vpaddd	xmm15,xmm15,XMMWORD[one]
+
+	vpxor	xmm2,xmm2,XMMWORD[r8]
+	vaesenc	xmm2,xmm2,XMMWORD[16+r8]
+	vaesenc	xmm2,xmm2,XMMWORD[32+r8]
+	vaesenc	xmm2,xmm2,XMMWORD[48+r8]
+	vaesenc	xmm2,xmm2,XMMWORD[64+r8]
+	vaesenc	xmm2,xmm2,XMMWORD[80+r8]
+	vaesenc	xmm2,xmm2,XMMWORD[96+r8]
+	vaesenc	xmm2,xmm2,XMMWORD[112+r8]
+	vaesenc	xmm2,xmm2,XMMWORD[128+r8]
+	vaesenc	xmm2,xmm2,XMMWORD[144+r8]
+	vaesenc	xmm2,xmm2,XMMWORD[160+r8]
+	vaesenc	xmm2,xmm2,XMMWORD[176+r8]
+	vaesenc	xmm2,xmm2,XMMWORD[192+r8]
+	vaesenc	xmm2,xmm2,XMMWORD[208+r8]
+	vaesenclast	xmm2,xmm2,XMMWORD[224+r8]
+	vpxor	xmm2,xmm2,XMMWORD[rdi]
+	vmovdqu	XMMWORD[rsi],xmm2
+	add	rdi,16
+	add	rsi,16
+
+	vpxor	xmm0,xmm0,xmm2
+	vmovdqa	xmm1,XMMWORD[((-32))+rcx]
+	call	GFMUL
+
+	jmp	NEAR $L$256_dec_loop2
+
+$L$256_dec_out:
+	vmovdqu	XMMWORD[rdx],xmm0
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_aes256gcmsiv_dec:
+global	aes256gcmsiv_kdf
+
+ALIGN	16
+aes256gcmsiv_kdf:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aes256gcmsiv_kdf:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+
+
+
+
+
+	vmovdqa	xmm1,XMMWORD[rdx]
+	vmovdqa	xmm4,XMMWORD[rdi]
+	vmovdqa	xmm11,XMMWORD[and_mask]
+	vmovdqa	xmm8,XMMWORD[one]
+	vpshufd	xmm4,xmm4,0x90
+	vpand	xmm4,xmm4,xmm11
+	vpaddd	xmm6,xmm4,xmm8
+	vpaddd	xmm7,xmm6,xmm8
+	vpaddd	xmm11,xmm7,xmm8
+	vpaddd	xmm12,xmm11,xmm8
+	vpaddd	xmm13,xmm12,xmm8
+
+	vpxor	xmm4,xmm4,xmm1
+	vpxor	xmm6,xmm6,xmm1
+	vpxor	xmm7,xmm7,xmm1
+	vpxor	xmm11,xmm11,xmm1
+	vpxor	xmm12,xmm12,xmm1
+	vpxor	xmm13,xmm13,xmm1
+
+	vmovdqa	xmm1,XMMWORD[16+rdx]
+	vaesenc	xmm4,xmm4,xmm1
+	vaesenc	xmm6,xmm6,xmm1
+	vaesenc	xmm7,xmm7,xmm1
+	vaesenc	xmm11,xmm11,xmm1
+	vaesenc	xmm12,xmm12,xmm1
+	vaesenc	xmm13,xmm13,xmm1
+
+	vmovdqa	xmm2,XMMWORD[32+rdx]
+	vaesenc	xmm4,xmm4,xmm2
+	vaesenc	xmm6,xmm6,xmm2
+	vaesenc	xmm7,xmm7,xmm2
+	vaesenc	xmm11,xmm11,xmm2
+	vaesenc	xmm12,xmm12,xmm2
+	vaesenc	xmm13,xmm13,xmm2
+
+	vmovdqa	xmm1,XMMWORD[48+rdx]
+	vaesenc	xmm4,xmm4,xmm1
+	vaesenc	xmm6,xmm6,xmm1
+	vaesenc	xmm7,xmm7,xmm1
+	vaesenc	xmm11,xmm11,xmm1
+	vaesenc	xmm12,xmm12,xmm1
+	vaesenc	xmm13,xmm13,xmm1
+
+	vmovdqa	xmm2,XMMWORD[64+rdx]
+	vaesenc	xmm4,xmm4,xmm2
+	vaesenc	xmm6,xmm6,xmm2
+	vaesenc	xmm7,xmm7,xmm2
+	vaesenc	xmm11,xmm11,xmm2
+	vaesenc	xmm12,xmm12,xmm2
+	vaesenc	xmm13,xmm13,xmm2
+
+	vmovdqa	xmm1,XMMWORD[80+rdx]
+	vaesenc	xmm4,xmm4,xmm1
+	vaesenc	xmm6,xmm6,xmm1
+	vaesenc	xmm7,xmm7,xmm1
+	vaesenc	xmm11,xmm11,xmm1
+	vaesenc	xmm12,xmm12,xmm1
+	vaesenc	xmm13,xmm13,xmm1
+
+	vmovdqa	xmm2,XMMWORD[96+rdx]
+	vaesenc	xmm4,xmm4,xmm2
+	vaesenc	xmm6,xmm6,xmm2
+	vaesenc	xmm7,xmm7,xmm2
+	vaesenc	xmm11,xmm11,xmm2
+	vaesenc	xmm12,xmm12,xmm2
+	vaesenc	xmm13,xmm13,xmm2
+
+	vmovdqa	xmm1,XMMWORD[112+rdx]
+	vaesenc	xmm4,xmm4,xmm1
+	vaesenc	xmm6,xmm6,xmm1
+	vaesenc	xmm7,xmm7,xmm1
+	vaesenc	xmm11,xmm11,xmm1
+	vaesenc	xmm12,xmm12,xmm1
+	vaesenc	xmm13,xmm13,xmm1
+
+	vmovdqa	xmm2,XMMWORD[128+rdx]
+	vaesenc	xmm4,xmm4,xmm2
+	vaesenc	xmm6,xmm6,xmm2
+	vaesenc	xmm7,xmm7,xmm2
+	vaesenc	xmm11,xmm11,xmm2
+	vaesenc	xmm12,xmm12,xmm2
+	vaesenc	xmm13,xmm13,xmm2
+
+	vmovdqa	xmm1,XMMWORD[144+rdx]
+	vaesenc	xmm4,xmm4,xmm1
+	vaesenc	xmm6,xmm6,xmm1
+	vaesenc	xmm7,xmm7,xmm1
+	vaesenc	xmm11,xmm11,xmm1
+	vaesenc	xmm12,xmm12,xmm1
+	vaesenc	xmm13,xmm13,xmm1
+
+	vmovdqa	xmm2,XMMWORD[160+rdx]
+	vaesenc	xmm4,xmm4,xmm2
+	vaesenc	xmm6,xmm6,xmm2
+	vaesenc	xmm7,xmm7,xmm2
+	vaesenc	xmm11,xmm11,xmm2
+	vaesenc	xmm12,xmm12,xmm2
+	vaesenc	xmm13,xmm13,xmm2
+
+	vmovdqa	xmm1,XMMWORD[176+rdx]
+	vaesenc	xmm4,xmm4,xmm1
+	vaesenc	xmm6,xmm6,xmm1
+	vaesenc	xmm7,xmm7,xmm1
+	vaesenc	xmm11,xmm11,xmm1
+	vaesenc	xmm12,xmm12,xmm1
+	vaesenc	xmm13,xmm13,xmm1
+
+	vmovdqa	xmm2,XMMWORD[192+rdx]
+	vaesenc	xmm4,xmm4,xmm2
+	vaesenc	xmm6,xmm6,xmm2
+	vaesenc	xmm7,xmm7,xmm2
+	vaesenc	xmm11,xmm11,xmm2
+	vaesenc	xmm12,xmm12,xmm2
+	vaesenc	xmm13,xmm13,xmm2
+
+	vmovdqa	xmm1,XMMWORD[208+rdx]
+	vaesenc	xmm4,xmm4,xmm1
+	vaesenc	xmm6,xmm6,xmm1
+	vaesenc	xmm7,xmm7,xmm1
+	vaesenc	xmm11,xmm11,xmm1
+	vaesenc	xmm12,xmm12,xmm1
+	vaesenc	xmm13,xmm13,xmm1
+
+	vmovdqa	xmm2,XMMWORD[224+rdx]
+	vaesenclast	xmm4,xmm4,xmm2
+	vaesenclast	xmm6,xmm6,xmm2
+	vaesenclast	xmm7,xmm7,xmm2
+	vaesenclast	xmm11,xmm11,xmm2
+	vaesenclast	xmm12,xmm12,xmm2
+	vaesenclast	xmm13,xmm13,xmm2
+
+
+	vmovdqa	XMMWORD[rsi],xmm4
+	vmovdqa	XMMWORD[16+rsi],xmm6
+	vmovdqa	XMMWORD[32+rsi],xmm7
+	vmovdqa	XMMWORD[48+rsi],xmm11
+	vmovdqa	XMMWORD[64+rsi],xmm12
+	vmovdqa	XMMWORD[80+rsi],xmm13
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_aes256gcmsiv_kdf:
diff --git a/deps/boringssl/win-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.asm b/deps/boringssl/win-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.asm
new file mode 100644
index 0000000..7e3d6dd
--- /dev/null
+++ b/deps/boringssl/win-x86_64/crypto/cipher_extra/chacha20_poly1305_x86_64.asm
@@ -0,0 +1,8946 @@
+; This file is generated from a similarly-named Perl script in the BoringSSL
+; source tree. Do not edit by hand.
+
+default	rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+
+%ifdef BORINGSSL_PREFIX
+%include "boringssl_prefix_symbols_nasm.inc"
+%endif
+section	.text code align=64
+
+EXTERN	OPENSSL_ia32cap_P
+
+chacha20_poly1305_constants:
+
+ALIGN	64
+$L$chacha20_consts:
+DB	'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
+DB	'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k'
+$L$rol8:
+DB	3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
+DB	3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
+$L$rol16:
+DB	2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
+DB	2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
+$L$avx2_init:
+	DD	0,0,0,0
+$L$sse_inc:
+	DD	1,0,0,0
+$L$avx2_inc:
+	DD	2,0,0,0,2,0,0,0
+$L$clamp:
+	DQ	0x0FFFFFFC0FFFFFFF,0x0FFFFFFC0FFFFFFC
+	DQ	0xFFFFFFFFFFFFFFFF,0xFFFFFFFFFFFFFFFF
+ALIGN	16
+$L$and_masks:
+DB	0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+DB	0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+DB	0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+DB	0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+DB	0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+DB	0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00
+DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00
+DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00
+DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00
+DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00
+DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00
+DB	0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff
+
+
+ALIGN	64
+poly_hash_ad_internal:
+
+
+	xor	r10,r10
+	xor	r11,r11
+	xor	r12,r12
+	cmp	r8,13
+	jne	NEAR $L$hash_ad_loop
+$L$poly_fast_tls_ad:
+
+	mov	r10,QWORD[rcx]
+	mov	r11,QWORD[5+rcx]
+	shr	r11,24
+	mov	r12,1
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+	DB	0F3h,0C3h		;repret
+$L$hash_ad_loop:
+
+	cmp	r8,16
+	jb	NEAR $L$hash_ad_tail
+	add	r10,QWORD[((0+0))+rcx]
+	adc	r11,QWORD[((8+0))+rcx]
+	adc	r12,1
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+	lea	rcx,[16+rcx]
+	sub	r8,16
+	jmp	NEAR $L$hash_ad_loop
+$L$hash_ad_tail:
+	cmp	r8,0
+	je	NEAR $L$hash_ad_done
+
+	xor	r13,r13
+	xor	r14,r14
+	xor	r15,r15
+	add	rcx,r8
+$L$hash_ad_tail_loop:
+	shld	r14,r13,8
+	shl	r13,8
+	movzx	r15,BYTE[((-1))+rcx]
+	xor	r13,r15
+	dec	rcx
+	dec	r8
+	jne	NEAR $L$hash_ad_tail_loop
+
+	add	r10,r13
+	adc	r11,r14
+	adc	r12,1
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+
+$L$hash_ad_done:
+	DB	0F3h,0C3h		;repret
+
+
+
+global	chacha20_poly1305_open
+
+ALIGN	64
+chacha20_poly1305_open:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_chacha20_poly1305_open:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+	mov	r9,QWORD[48+rsp]
+
+
+
+	push	rbp
+
+	push	rbx
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+
+
+	push	r9
+
+	sub	rsp,288 + 160 + 32
+
+
+	lea	rbp,[32+rsp]
+	and	rbp,-32
+
+	movaps	XMMWORD[(0+0)+rbp],xmm6
+	movaps	XMMWORD[(16+0)+rbp],xmm7
+	movaps	XMMWORD[(32+0)+rbp],xmm8
+	movaps	XMMWORD[(48+0)+rbp],xmm9
+	movaps	XMMWORD[(64+0)+rbp],xmm10
+	movaps	XMMWORD[(80+0)+rbp],xmm11
+	movaps	XMMWORD[(96+0)+rbp],xmm12
+	movaps	XMMWORD[(112+0)+rbp],xmm13
+	movaps	XMMWORD[(128+0)+rbp],xmm14
+	movaps	XMMWORD[(144+0)+rbp],xmm15
+
+	mov	rbx,rdx
+	mov	QWORD[((0+160+32))+rbp],r8
+	mov	QWORD[((8+160+32))+rbp],rbx
+
+	mov	eax,DWORD[((OPENSSL_ia32cap_P+8))]
+	and	eax,288
+	xor	eax,288
+	jz	NEAR chacha20_poly1305_open_avx2
+
+	cmp	rbx,128
+	jbe	NEAR $L$open_sse_128
+
+	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
+	movdqu	xmm4,XMMWORD[r9]
+	movdqu	xmm8,XMMWORD[16+r9]
+	movdqu	xmm12,XMMWORD[32+r9]
+
+	movdqa	xmm7,xmm12
+
+	movdqa	XMMWORD[(160+48)+rbp],xmm4
+	movdqa	XMMWORD[(160+64)+rbp],xmm8
+	movdqa	XMMWORD[(160+96)+rbp],xmm12
+	mov	r10,10
+$L$open_sse_init_rounds:
+	paddd	xmm0,xmm4
+	pxor	xmm12,xmm0
+	pshufb	xmm12,XMMWORD[$L$rol16]
+	paddd	xmm8,xmm12
+	pxor	xmm4,xmm8
+	movdqa	xmm3,xmm4
+	pslld	xmm3,12
+	psrld	xmm4,20
+	pxor	xmm4,xmm3
+	paddd	xmm0,xmm4
+	pxor	xmm12,xmm0
+	pshufb	xmm12,XMMWORD[$L$rol8]
+	paddd	xmm8,xmm12
+	pxor	xmm4,xmm8
+	movdqa	xmm3,xmm4
+	pslld	xmm3,7
+	psrld	xmm4,25
+	pxor	xmm4,xmm3
+DB	102,15,58,15,228,4
+DB	102,69,15,58,15,192,8
+DB	102,69,15,58,15,228,12
+	paddd	xmm0,xmm4
+	pxor	xmm12,xmm0
+	pshufb	xmm12,XMMWORD[$L$rol16]
+	paddd	xmm8,xmm12
+	pxor	xmm4,xmm8
+	movdqa	xmm3,xmm4
+	pslld	xmm3,12
+	psrld	xmm4,20
+	pxor	xmm4,xmm3
+	paddd	xmm0,xmm4
+	pxor	xmm12,xmm0
+	pshufb	xmm12,XMMWORD[$L$rol8]
+	paddd	xmm8,xmm12
+	pxor	xmm4,xmm8
+	movdqa	xmm3,xmm4
+	pslld	xmm3,7
+	psrld	xmm4,25
+	pxor	xmm4,xmm3
+DB	102,15,58,15,228,12
+DB	102,69,15,58,15,192,8
+DB	102,69,15,58,15,228,4
+
+	dec	r10
+	jne	NEAR $L$open_sse_init_rounds
+
+	paddd	xmm0,XMMWORD[$L$chacha20_consts]
+	paddd	xmm4,XMMWORD[((160+48))+rbp]
+
+	pand	xmm0,XMMWORD[$L$clamp]
+	movdqa	XMMWORD[(160+0)+rbp],xmm0
+	movdqa	XMMWORD[(160+16)+rbp],xmm4
+
+	mov	r8,r8
+	call	poly_hash_ad_internal
+$L$open_sse_main_loop:
+	cmp	rbx,16*16
+	jb	NEAR $L$open_sse_tail
+
+	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
+	movdqa	xmm4,XMMWORD[((160+48))+rbp]
+	movdqa	xmm8,XMMWORD[((160+64))+rbp]
+	movdqa	xmm1,xmm0
+	movdqa	xmm5,xmm4
+	movdqa	xmm9,xmm8
+	movdqa	xmm2,xmm0
+	movdqa	xmm6,xmm4
+	movdqa	xmm10,xmm8
+	movdqa	xmm3,xmm0
+	movdqa	xmm7,xmm4
+	movdqa	xmm11,xmm8
+	movdqa	xmm15,XMMWORD[((160+96))+rbp]
+	paddd	xmm15,XMMWORD[$L$sse_inc]
+	movdqa	xmm14,xmm15
+	paddd	xmm14,XMMWORD[$L$sse_inc]
+	movdqa	xmm13,xmm14
+	paddd	xmm13,XMMWORD[$L$sse_inc]
+	movdqa	xmm12,xmm13
+	paddd	xmm12,XMMWORD[$L$sse_inc]
+	movdqa	XMMWORD[(160+96)+rbp],xmm12
+	movdqa	XMMWORD[(160+112)+rbp],xmm13
+	movdqa	XMMWORD[(160+128)+rbp],xmm14
+	movdqa	XMMWORD[(160+144)+rbp],xmm15
+
+
+
+	mov	rcx,4
+	mov	r8,rsi
+$L$open_sse_main_loop_rounds:
+	movdqa	XMMWORD[(160+80)+rbp],xmm8
+	movdqa	xmm8,XMMWORD[$L$rol16]
+	paddd	xmm3,xmm7
+	paddd	xmm2,xmm6
+	paddd	xmm1,xmm5
+	paddd	xmm0,xmm4
+	pxor	xmm15,xmm3
+	pxor	xmm14,xmm2
+	pxor	xmm13,xmm1
+	pxor	xmm12,xmm0
+DB	102,69,15,56,0,248
+DB	102,69,15,56,0,240
+DB	102,69,15,56,0,232
+DB	102,69,15,56,0,224
+	movdqa	xmm8,XMMWORD[((160+80))+rbp]
+	paddd	xmm11,xmm15
+	paddd	xmm10,xmm14
+	paddd	xmm9,xmm13
+	paddd	xmm8,xmm12
+	pxor	xmm7,xmm11
+	add	r10,QWORD[((0+0))+r8]
+	adc	r11,QWORD[((8+0))+r8]
+	adc	r12,1
+
+	lea	r8,[16+r8]
+	pxor	xmm6,xmm10
+	pxor	xmm5,xmm9
+	pxor	xmm4,xmm8
+	movdqa	XMMWORD[(160+80)+rbp],xmm8
+	movdqa	xmm8,xmm7
+	psrld	xmm8,20
+	pslld	xmm7,32-20
+	pxor	xmm7,xmm8
+	movdqa	xmm8,xmm6
+	psrld	xmm8,20
+	pslld	xmm6,32-20
+	pxor	xmm6,xmm8
+	movdqa	xmm8,xmm5
+	psrld	xmm8,20
+	pslld	xmm5,32-20
+	pxor	xmm5,xmm8
+	movdqa	xmm8,xmm4
+	psrld	xmm8,20
+	pslld	xmm4,32-20
+	pxor	xmm4,xmm8
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	movdqa	xmm8,XMMWORD[$L$rol8]
+	paddd	xmm3,xmm7
+	paddd	xmm2,xmm6
+	paddd	xmm1,xmm5
+	paddd	xmm0,xmm4
+	pxor	xmm15,xmm3
+	pxor	xmm14,xmm2
+	pxor	xmm13,xmm1
+	pxor	xmm12,xmm0
+DB	102,69,15,56,0,248
+DB	102,69,15,56,0,240
+DB	102,69,15,56,0,232
+DB	102,69,15,56,0,224
+	movdqa	xmm8,XMMWORD[((160+80))+rbp]
+	paddd	xmm11,xmm15
+	paddd	xmm10,xmm14
+	paddd	xmm9,xmm13
+	paddd	xmm8,xmm12
+	pxor	xmm7,xmm11
+	pxor	xmm6,xmm10
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	pxor	xmm5,xmm9
+	pxor	xmm4,xmm8
+	movdqa	XMMWORD[(160+80)+rbp],xmm8
+	movdqa	xmm8,xmm7
+	psrld	xmm8,25
+	pslld	xmm7,32-25
+	pxor	xmm7,xmm8
+	movdqa	xmm8,xmm6
+	psrld	xmm8,25
+	pslld	xmm6,32-25
+	pxor	xmm6,xmm8
+	movdqa	xmm8,xmm5
+	psrld	xmm8,25
+	pslld	xmm5,32-25
+	pxor	xmm5,xmm8
+	movdqa	xmm8,xmm4
+	psrld	xmm8,25
+	pslld	xmm4,32-25
+	pxor	xmm4,xmm8
+	movdqa	xmm8,XMMWORD[((160+80))+rbp]
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+DB	102,15,58,15,255,4
+DB	102,69,15,58,15,219,8
+DB	102,69,15,58,15,255,12
+DB	102,15,58,15,246,4
+DB	102,69,15,58,15,210,8
+DB	102,69,15,58,15,246,12
+DB	102,15,58,15,237,4
+DB	102,69,15,58,15,201,8
+DB	102,69,15,58,15,237,12
+DB	102,15,58,15,228,4
+DB	102,69,15,58,15,192,8
+DB	102,69,15,58,15,228,12
+	movdqa	XMMWORD[(160+80)+rbp],xmm8
+	movdqa	xmm8,XMMWORD[$L$rol16]
+	paddd	xmm3,xmm7
+	paddd	xmm2,xmm6
+	paddd	xmm1,xmm5
+	paddd	xmm0,xmm4
+	pxor	xmm15,xmm3
+	pxor	xmm14,xmm2
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+	pxor	xmm13,xmm1
+	pxor	xmm12,xmm0
+DB	102,69,15,56,0,248
+DB	102,69,15,56,0,240
+DB	102,69,15,56,0,232
+DB	102,69,15,56,0,224
+	movdqa	xmm8,XMMWORD[((160+80))+rbp]
+	paddd	xmm11,xmm15
+	paddd	xmm10,xmm14
+	paddd	xmm9,xmm13
+	paddd	xmm8,xmm12
+	pxor	xmm7,xmm11
+	pxor	xmm6,xmm10
+	pxor	xmm5,xmm9
+	pxor	xmm4,xmm8
+	movdqa	XMMWORD[(160+80)+rbp],xmm8
+	movdqa	xmm8,xmm7
+	psrld	xmm8,20
+	pslld	xmm7,32-20
+	pxor	xmm7,xmm8
+	movdqa	xmm8,xmm6
+	psrld	xmm8,20
+	pslld	xmm6,32-20
+	pxor	xmm6,xmm8
+	movdqa	xmm8,xmm5
+	psrld	xmm8,20
+	pslld	xmm5,32-20
+	pxor	xmm5,xmm8
+	movdqa	xmm8,xmm4
+	psrld	xmm8,20
+	pslld	xmm4,32-20
+	pxor	xmm4,xmm8
+	movdqa	xmm8,XMMWORD[$L$rol8]
+	paddd	xmm3,xmm7
+	paddd	xmm2,xmm6
+	paddd	xmm1,xmm5
+	paddd	xmm0,xmm4
+	pxor	xmm15,xmm3
+	pxor	xmm14,xmm2
+	pxor	xmm13,xmm1
+	pxor	xmm12,xmm0
+DB	102,69,15,56,0,248
+DB	102,69,15,56,0,240
+DB	102,69,15,56,0,232
+DB	102,69,15,56,0,224
+	movdqa	xmm8,XMMWORD[((160+80))+rbp]
+	paddd	xmm11,xmm15
+	paddd	xmm10,xmm14
+	paddd	xmm9,xmm13
+	paddd	xmm8,xmm12
+	pxor	xmm7,xmm11
+	pxor	xmm6,xmm10
+	pxor	xmm5,xmm9
+	pxor	xmm4,xmm8
+	movdqa	XMMWORD[(160+80)+rbp],xmm8
+	movdqa	xmm8,xmm7
+	psrld	xmm8,25
+	pslld	xmm7,32-25
+	pxor	xmm7,xmm8
+	movdqa	xmm8,xmm6
+	psrld	xmm8,25
+	pslld	xmm6,32-25
+	pxor	xmm6,xmm8
+	movdqa	xmm8,xmm5
+	psrld	xmm8,25
+	pslld	xmm5,32-25
+	pxor	xmm5,xmm8
+	movdqa	xmm8,xmm4
+	psrld	xmm8,25
+	pslld	xmm4,32-25
+	pxor	xmm4,xmm8
+	movdqa	xmm8,XMMWORD[((160+80))+rbp]
+DB	102,15,58,15,255,12
+DB	102,69,15,58,15,219,8
+DB	102,69,15,58,15,255,4
+DB	102,15,58,15,246,12
+DB	102,69,15,58,15,210,8
+DB	102,69,15,58,15,246,4
+DB	102,15,58,15,237,12
+DB	102,69,15,58,15,201,8
+DB	102,69,15,58,15,237,4
+DB	102,15,58,15,228,12
+DB	102,69,15,58,15,192,8
+DB	102,69,15,58,15,228,4
+
+	dec	rcx
+	jge	NEAR $L$open_sse_main_loop_rounds
+	add	r10,QWORD[((0+0))+r8]
+	adc	r11,QWORD[((8+0))+r8]
+	adc	r12,1
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+	lea	r8,[16+r8]
+	cmp	rcx,-6
+	jg	NEAR $L$open_sse_main_loop_rounds
+	paddd	xmm3,XMMWORD[$L$chacha20_consts]
+	paddd	xmm7,XMMWORD[((160+48))+rbp]
+	paddd	xmm11,XMMWORD[((160+64))+rbp]
+	paddd	xmm15,XMMWORD[((160+144))+rbp]
+	paddd	xmm2,XMMWORD[$L$chacha20_consts]
+	paddd	xmm6,XMMWORD[((160+48))+rbp]
+	paddd	xmm10,XMMWORD[((160+64))+rbp]
+	paddd	xmm14,XMMWORD[((160+128))+rbp]
+	paddd	xmm1,XMMWORD[$L$chacha20_consts]
+	paddd	xmm5,XMMWORD[((160+48))+rbp]
+	paddd	xmm9,XMMWORD[((160+64))+rbp]
+	paddd	xmm13,XMMWORD[((160+112))+rbp]
+	paddd	xmm0,XMMWORD[$L$chacha20_consts]
+	paddd	xmm4,XMMWORD[((160+48))+rbp]
+	paddd	xmm8,XMMWORD[((160+64))+rbp]
+	paddd	xmm12,XMMWORD[((160+96))+rbp]
+	movdqa	XMMWORD[(160+80)+rbp],xmm12
+	movdqu	xmm12,XMMWORD[((0 + 0))+rsi]
+	pxor	xmm12,xmm3
+	movdqu	XMMWORD[(0 + 0)+rdi],xmm12
+	movdqu	xmm12,XMMWORD[((16 + 0))+rsi]
+	pxor	xmm12,xmm7
+	movdqu	XMMWORD[(16 + 0)+rdi],xmm12
+	movdqu	xmm12,XMMWORD[((32 + 0))+rsi]
+	pxor	xmm12,xmm11
+	movdqu	XMMWORD[(32 + 0)+rdi],xmm12
+	movdqu	xmm12,XMMWORD[((48 + 0))+rsi]
+	pxor	xmm12,xmm15
+	movdqu	XMMWORD[(48 + 0)+rdi],xmm12
+	movdqu	xmm3,XMMWORD[((0 + 64))+rsi]
+	movdqu	xmm7,XMMWORD[((16 + 64))+rsi]
+	movdqu	xmm11,XMMWORD[((32 + 64))+rsi]
+	movdqu	xmm15,XMMWORD[((48 + 64))+rsi]
+	pxor	xmm2,xmm3
+	pxor	xmm6,xmm7
+	pxor	xmm10,xmm11
+	pxor	xmm15,xmm14
+	movdqu	XMMWORD[(0 + 64)+rdi],xmm2
+	movdqu	XMMWORD[(16 + 64)+rdi],xmm6
+	movdqu	XMMWORD[(32 + 64)+rdi],xmm10
+	movdqu	XMMWORD[(48 + 64)+rdi],xmm15
+	movdqu	xmm3,XMMWORD[((0 + 128))+rsi]
+	movdqu	xmm7,XMMWORD[((16 + 128))+rsi]
+	movdqu	xmm11,XMMWORD[((32 + 128))+rsi]
+	movdqu	xmm15,XMMWORD[((48 + 128))+rsi]
+	pxor	xmm1,xmm3
+	pxor	xmm5,xmm7
+	pxor	xmm9,xmm11
+	pxor	xmm15,xmm13
+	movdqu	XMMWORD[(0 + 128)+rdi],xmm1
+	movdqu	XMMWORD[(16 + 128)+rdi],xmm5
+	movdqu	XMMWORD[(32 + 128)+rdi],xmm9
+	movdqu	XMMWORD[(48 + 128)+rdi],xmm15
+	movdqu	xmm3,XMMWORD[((0 + 192))+rsi]
+	movdqu	xmm7,XMMWORD[((16 + 192))+rsi]
+	movdqu	xmm11,XMMWORD[((32 + 192))+rsi]
+	movdqu	xmm15,XMMWORD[((48 + 192))+rsi]
+	pxor	xmm0,xmm3
+	pxor	xmm4,xmm7
+	pxor	xmm8,xmm11
+	pxor	xmm15,XMMWORD[((160+80))+rbp]
+	movdqu	XMMWORD[(0 + 192)+rdi],xmm0
+	movdqu	XMMWORD[(16 + 192)+rdi],xmm4
+	movdqu	XMMWORD[(32 + 192)+rdi],xmm8
+	movdqu	XMMWORD[(48 + 192)+rdi],xmm15
+
+	lea	rsi,[256+rsi]
+	lea	rdi,[256+rdi]
+	sub	rbx,16*16
+	jmp	NEAR $L$open_sse_main_loop
+$L$open_sse_tail:
+
+	test	rbx,rbx
+	jz	NEAR $L$open_sse_finalize
+	cmp	rbx,12*16
+	ja	NEAR $L$open_sse_tail_256
+	cmp	rbx,8*16
+	ja	NEAR $L$open_sse_tail_192
+	cmp	rbx,4*16
+	ja	NEAR $L$open_sse_tail_128
+	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
+	movdqa	xmm4,XMMWORD[((160+48))+rbp]
+	movdqa	xmm8,XMMWORD[((160+64))+rbp]
+	movdqa	xmm12,XMMWORD[((160+96))+rbp]
+	paddd	xmm12,XMMWORD[$L$sse_inc]
+	movdqa	XMMWORD[(160+96)+rbp],xmm12
+
+	xor	r8,r8
+	mov	rcx,rbx
+	cmp	rcx,16
+	jb	NEAR $L$open_sse_tail_64_rounds
+$L$open_sse_tail_64_rounds_and_x1hash:
+	add	r10,QWORD[((0+0))+r8*1+rsi]
+	adc	r11,QWORD[((8+0))+r8*1+rsi]
+	adc	r12,1
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+	sub	rcx,16
+$L$open_sse_tail_64_rounds:
+	add	r8,16
+	paddd	xmm0,xmm4
+	pxor	xmm12,xmm0
+	pshufb	xmm12,XMMWORD[$L$rol16]
+	paddd	xmm8,xmm12
+	pxor	xmm4,xmm8
+	movdqa	xmm3,xmm4
+	pslld	xmm3,12
+	psrld	xmm4,20
+	pxor	xmm4,xmm3
+	paddd	xmm0,xmm4
+	pxor	xmm12,xmm0
+	pshufb	xmm12,XMMWORD[$L$rol8]
+	paddd	xmm8,xmm12
+	pxor	xmm4,xmm8
+	movdqa	xmm3,xmm4
+	pslld	xmm3,7
+	psrld	xmm4,25
+	pxor	xmm4,xmm3
+DB	102,15,58,15,228,4
+DB	102,69,15,58,15,192,8
+DB	102,69,15,58,15,228,12
+	paddd	xmm0,xmm4
+	pxor	xmm12,xmm0
+	pshufb	xmm12,XMMWORD[$L$rol16]
+	paddd	xmm8,xmm12
+	pxor	xmm4,xmm8
+	movdqa	xmm3,xmm4
+	pslld	xmm3,12
+	psrld	xmm4,20
+	pxor	xmm4,xmm3
+	paddd	xmm0,xmm4
+	pxor	xmm12,xmm0
+	pshufb	xmm12,XMMWORD[$L$rol8]
+	paddd	xmm8,xmm12
+	pxor	xmm4,xmm8
+	movdqa	xmm3,xmm4
+	pslld	xmm3,7
+	psrld	xmm4,25
+	pxor	xmm4,xmm3
+DB	102,15,58,15,228,12
+DB	102,69,15,58,15,192,8
+DB	102,69,15,58,15,228,4
+
+	cmp	rcx,16
+	jae	NEAR $L$open_sse_tail_64_rounds_and_x1hash
+	cmp	r8,10*16
+	jne	NEAR $L$open_sse_tail_64_rounds
+	paddd	xmm0,XMMWORD[$L$chacha20_consts]
+	paddd	xmm4,XMMWORD[((160+48))+rbp]
+	paddd	xmm8,XMMWORD[((160+64))+rbp]
+	paddd	xmm12,XMMWORD[((160+96))+rbp]
+
+	jmp	NEAR $L$open_sse_tail_64_dec_loop
+
+$L$open_sse_tail_128:
+	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
+	movdqa	xmm4,XMMWORD[((160+48))+rbp]
+	movdqa	xmm8,XMMWORD[((160+64))+rbp]
+	movdqa	xmm1,xmm0
+	movdqa	xmm5,xmm4
+	movdqa	xmm9,xmm8
+	movdqa	xmm13,XMMWORD[((160+96))+rbp]
+	paddd	xmm13,XMMWORD[$L$sse_inc]
+	movdqa	xmm12,xmm13
+	paddd	xmm12,XMMWORD[$L$sse_inc]
+	movdqa	XMMWORD[(160+96)+rbp],xmm12
+	movdqa	XMMWORD[(160+112)+rbp],xmm13
+
+	mov	rcx,rbx
+	and	rcx,-16
+	xor	r8,r8
+$L$open_sse_tail_128_rounds_and_x1hash:
+	add	r10,QWORD[((0+0))+r8*1+rsi]
+	adc	r11,QWORD[((8+0))+r8*1+rsi]
+	adc	r12,1
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+$L$open_sse_tail_128_rounds:
+	add	r8,16
+	paddd	xmm0,xmm4
+	pxor	xmm12,xmm0
+	pshufb	xmm12,XMMWORD[$L$rol16]
+	paddd	xmm8,xmm12
+	pxor	xmm4,xmm8
+	movdqa	xmm3,xmm4
+	pslld	xmm3,12
+	psrld	xmm4,20
+	pxor	xmm4,xmm3
+	paddd	xmm0,xmm4
+	pxor	xmm12,xmm0
+	pshufb	xmm12,XMMWORD[$L$rol8]
+	paddd	xmm8,xmm12
+	pxor	xmm4,xmm8
+	movdqa	xmm3,xmm4
+	pslld	xmm3,7
+	psrld	xmm4,25
+	pxor	xmm4,xmm3
+DB	102,15,58,15,228,4
+DB	102,69,15,58,15,192,8
+DB	102,69,15,58,15,228,12
+	paddd	xmm1,xmm5
+	pxor	xmm13,xmm1
+	pshufb	xmm13,XMMWORD[$L$rol16]
+	paddd	xmm9,xmm13
+	pxor	xmm5,xmm9
+	movdqa	xmm3,xmm5
+	pslld	xmm3,12
+	psrld	xmm5,20
+	pxor	xmm5,xmm3
+	paddd	xmm1,xmm5
+	pxor	xmm13,xmm1
+	pshufb	xmm13,XMMWORD[$L$rol8]
+	paddd	xmm9,xmm13
+	pxor	xmm5,xmm9
+	movdqa	xmm3,xmm5
+	pslld	xmm3,7
+	psrld	xmm5,25
+	pxor	xmm5,xmm3
+DB	102,15,58,15,237,4
+DB	102,69,15,58,15,201,8
+DB	102,69,15,58,15,237,12
+	paddd	xmm0,xmm4
+	pxor	xmm12,xmm0
+	pshufb	xmm12,XMMWORD[$L$rol16]
+	paddd	xmm8,xmm12
+	pxor	xmm4,xmm8
+	movdqa	xmm3,xmm4
+	pslld	xmm3,12
+	psrld	xmm4,20
+	pxor	xmm4,xmm3
+	paddd	xmm0,xmm4
+	pxor	xmm12,xmm0
+	pshufb	xmm12,XMMWORD[$L$rol8]
+	paddd	xmm8,xmm12
+	pxor	xmm4,xmm8
+	movdqa	xmm3,xmm4
+	pslld	xmm3,7
+	psrld	xmm4,25
+	pxor	xmm4,xmm3
+DB	102,15,58,15,228,12
+DB	102,69,15,58,15,192,8
+DB	102,69,15,58,15,228,4
+	paddd	xmm1,xmm5
+	pxor	xmm13,xmm1
+	pshufb	xmm13,XMMWORD[$L$rol16]
+	paddd	xmm9,xmm13
+	pxor	xmm5,xmm9
+	movdqa	xmm3,xmm5
+	pslld	xmm3,12
+	psrld	xmm5,20
+	pxor	xmm5,xmm3
+	paddd	xmm1,xmm5
+	pxor	xmm13,xmm1
+	pshufb	xmm13,XMMWORD[$L$rol8]
+	paddd	xmm9,xmm13
+	pxor	xmm5,xmm9
+	movdqa	xmm3,xmm5
+	pslld	xmm3,7
+	psrld	xmm5,25
+	pxor	xmm5,xmm3
+DB	102,15,58,15,237,12
+DB	102,69,15,58,15,201,8
+DB	102,69,15,58,15,237,4
+
+	cmp	r8,rcx
+	jb	NEAR $L$open_sse_tail_128_rounds_and_x1hash
+	cmp	r8,10*16
+	jne	NEAR $L$open_sse_tail_128_rounds
+	paddd	xmm1,XMMWORD[$L$chacha20_consts]
+	paddd	xmm5,XMMWORD[((160+48))+rbp]
+	paddd	xmm9,XMMWORD[((160+64))+rbp]
+	paddd	xmm13,XMMWORD[((160+112))+rbp]
+	paddd	xmm0,XMMWORD[$L$chacha20_consts]
+	paddd	xmm4,XMMWORD[((160+48))+rbp]
+	paddd	xmm8,XMMWORD[((160+64))+rbp]
+	paddd	xmm12,XMMWORD[((160+96))+rbp]
+	movdqu	xmm3,XMMWORD[((0 + 0))+rsi]
+	movdqu	xmm7,XMMWORD[((16 + 0))+rsi]
+	movdqu	xmm11,XMMWORD[((32 + 0))+rsi]
+	movdqu	xmm15,XMMWORD[((48 + 0))+rsi]
+	pxor	xmm1,xmm3
+	pxor	xmm5,xmm7
+	pxor	xmm9,xmm11
+	pxor	xmm15,xmm13
+	movdqu	XMMWORD[(0 + 0)+rdi],xmm1
+	movdqu	XMMWORD[(16 + 0)+rdi],xmm5
+	movdqu	XMMWORD[(32 + 0)+rdi],xmm9
+	movdqu	XMMWORD[(48 + 0)+rdi],xmm15
+
+	sub	rbx,4*16
+	lea	rsi,[64+rsi]
+	lea	rdi,[64+rdi]
+	jmp	NEAR $L$open_sse_tail_64_dec_loop
+
+$L$open_sse_tail_192:
+	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
+	movdqa	xmm4,XMMWORD[((160+48))+rbp]
+	movdqa	xmm8,XMMWORD[((160+64))+rbp]
+	movdqa	xmm1,xmm0
+	movdqa	xmm5,xmm4
+	movdqa	xmm9,xmm8
+	movdqa	xmm2,xmm0
+	movdqa	xmm6,xmm4
+	movdqa	xmm10,xmm8
+	movdqa	xmm14,XMMWORD[((160+96))+rbp]
+	paddd	xmm14,XMMWORD[$L$sse_inc]
+	movdqa	xmm13,xmm14
+	paddd	xmm13,XMMWORD[$L$sse_inc]
+	movdqa	xmm12,xmm13
+	paddd	xmm12,XMMWORD[$L$sse_inc]
+	movdqa	XMMWORD[(160+96)+rbp],xmm12
+	movdqa	XMMWORD[(160+112)+rbp],xmm13
+	movdqa	XMMWORD[(160+128)+rbp],xmm14
+
+	mov	rcx,rbx
+	mov	r8,10*16
+	cmp	rcx,10*16
+	cmovg	rcx,r8
+	and	rcx,-16
+	xor	r8,r8
+$L$open_sse_tail_192_rounds_and_x1hash:
+	add	r10,QWORD[((0+0))+r8*1+rsi]
+	adc	r11,QWORD[((8+0))+r8*1+rsi]
+	adc	r12,1
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+$L$open_sse_tail_192_rounds:
+	add	r8,16
+	paddd	xmm0,xmm4
+	pxor	xmm12,xmm0
+	pshufb	xmm12,XMMWORD[$L$rol16]
+	paddd	xmm8,xmm12
+	pxor	xmm4,xmm8
+	movdqa	xmm3,xmm4
+	pslld	xmm3,12
+	psrld	xmm4,20
+	pxor	xmm4,xmm3
+	paddd	xmm0,xmm4
+	pxor	xmm12,xmm0
+	pshufb	xmm12,XMMWORD[$L$rol8]
+	paddd	xmm8,xmm12
+	pxor	xmm4,xmm8
+	movdqa	xmm3,xmm4
+	pslld	xmm3,7
+	psrld	xmm4,25
+	pxor	xmm4,xmm3
+DB	102,15,58,15,228,4
+DB	102,69,15,58,15,192,8
+DB	102,69,15,58,15,228,12
+	paddd	xmm1,xmm5
+	pxor	xmm13,xmm1
+	pshufb	xmm13,XMMWORD[$L$rol16]
+	paddd	xmm9,xmm13
+	pxor	xmm5,xmm9
+	movdqa	xmm3,xmm5
+	pslld	xmm3,12
+	psrld	xmm5,20
+	pxor	xmm5,xmm3
+	paddd	xmm1,xmm5
+	pxor	xmm13,xmm1
+	pshufb	xmm13,XMMWORD[$L$rol8]
+	paddd	xmm9,xmm13
+	pxor	xmm5,xmm9
+	movdqa	xmm3,xmm5
+	pslld	xmm3,7
+	psrld	xmm5,25
+	pxor	xmm5,xmm3
+DB	102,15,58,15,237,4
+DB	102,69,15,58,15,201,8
+DB	102,69,15,58,15,237,12
+	paddd	xmm2,xmm6
+	pxor	xmm14,xmm2
+	pshufb	xmm14,XMMWORD[$L$rol16]
+	paddd	xmm10,xmm14
+	pxor	xmm6,xmm10
+	movdqa	xmm3,xmm6
+	pslld	xmm3,12
+	psrld	xmm6,20
+	pxor	xmm6,xmm3
+	paddd	xmm2,xmm6
+	pxor	xmm14,xmm2
+	pshufb	xmm14,XMMWORD[$L$rol8]
+	paddd	xmm10,xmm14
+	pxor	xmm6,xmm10
+	movdqa	xmm3,xmm6
+	pslld	xmm3,7
+	psrld	xmm6,25
+	pxor	xmm6,xmm3
+DB	102,15,58,15,246,4
+DB	102,69,15,58,15,210,8
+DB	102,69,15,58,15,246,12
+	paddd	xmm0,xmm4
+	pxor	xmm12,xmm0
+	pshufb	xmm12,XMMWORD[$L$rol16]
+	paddd	xmm8,xmm12
+	pxor	xmm4,xmm8
+	movdqa	xmm3,xmm4
+	pslld	xmm3,12
+	psrld	xmm4,20
+	pxor	xmm4,xmm3
+	paddd	xmm0,xmm4
+	pxor	xmm12,xmm0
+	pshufb	xmm12,XMMWORD[$L$rol8]
+	paddd	xmm8,xmm12
+	pxor	xmm4,xmm8
+	movdqa	xmm3,xmm4
+	pslld	xmm3,7
+	psrld	xmm4,25
+	pxor	xmm4,xmm3
+DB	102,15,58,15,228,12
+DB	102,69,15,58,15,192,8
+DB	102,69,15,58,15,228,4
+	paddd	xmm1,xmm5
+	pxor	xmm13,xmm1
+	pshufb	xmm13,XMMWORD[$L$rol16]
+	paddd	xmm9,xmm13
+	pxor	xmm5,xmm9
+	movdqa	xmm3,xmm5
+	pslld	xmm3,12
+	psrld	xmm5,20
+	pxor	xmm5,xmm3
+	paddd	xmm1,xmm5
+	pxor	xmm13,xmm1
+	pshufb	xmm13,XMMWORD[$L$rol8]
+	paddd	xmm9,xmm13
+	pxor	xmm5,xmm9
+	movdqa	xmm3,xmm5
+	pslld	xmm3,7
+	psrld	xmm5,25
+	pxor	xmm5,xmm3
+DB	102,15,58,15,237,12
+DB	102,69,15,58,15,201,8
+DB	102,69,15,58,15,237,4
+	paddd	xmm2,xmm6
+	pxor	xmm14,xmm2
+	pshufb	xmm14,XMMWORD[$L$rol16]
+	paddd	xmm10,xmm14
+	pxor	xmm6,xmm10
+	movdqa	xmm3,xmm6
+	pslld	xmm3,12
+	psrld	xmm6,20
+	pxor	xmm6,xmm3
+	paddd	xmm2,xmm6
+	pxor	xmm14,xmm2
+	pshufb	xmm14,XMMWORD[$L$rol8]
+	paddd	xmm10,xmm14
+	pxor	xmm6,xmm10
+	movdqa	xmm3,xmm6
+	pslld	xmm3,7
+	psrld	xmm6,25
+	pxor	xmm6,xmm3
+DB	102,15,58,15,246,12
+DB	102,69,15,58,15,210,8
+DB	102,69,15,58,15,246,4
+
+	cmp	r8,rcx
+	jb	NEAR $L$open_sse_tail_192_rounds_and_x1hash
+	cmp	r8,10*16
+	jne	NEAR $L$open_sse_tail_192_rounds
+	cmp	rbx,11*16
+	jb	NEAR $L$open_sse_tail_192_finish
+	add	r10,QWORD[((0+160))+rsi]
+	adc	r11,QWORD[((8+160))+rsi]
+	adc	r12,1
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+	cmp	rbx,12*16
+	jb	NEAR $L$open_sse_tail_192_finish
+	add	r10,QWORD[((0+176))+rsi]
+	adc	r11,QWORD[((8+176))+rsi]
+	adc	r12,1
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+$L$open_sse_tail_192_finish:
+	paddd	xmm2,XMMWORD[$L$chacha20_consts]
+	paddd	xmm6,XMMWORD[((160+48))+rbp]
+	paddd	xmm10,XMMWORD[((160+64))+rbp]
+	paddd	xmm14,XMMWORD[((160+128))+rbp]
+	paddd	xmm1,XMMWORD[$L$chacha20_consts]
+	paddd	xmm5,XMMWORD[((160+48))+rbp]
+	paddd	xmm9,XMMWORD[((160+64))+rbp]
+	paddd	xmm13,XMMWORD[((160+112))+rbp]
+	paddd	xmm0,XMMWORD[$L$chacha20_consts]
+	paddd	xmm4,XMMWORD[((160+48))+rbp]
+	paddd	xmm8,XMMWORD[((160+64))+rbp]
+	paddd	xmm12,XMMWORD[((160+96))+rbp]
+	movdqu	xmm3,XMMWORD[((0 + 0))+rsi]
+	movdqu	xmm7,XMMWORD[((16 + 0))+rsi]
+	movdqu	xmm11,XMMWORD[((32 + 0))+rsi]
+	movdqu	xmm15,XMMWORD[((48 + 0))+rsi]
+	pxor	xmm2,xmm3
+	pxor	xmm6,xmm7
+	pxor	xmm10,xmm11
+	pxor	xmm15,xmm14
+	movdqu	XMMWORD[(0 + 0)+rdi],xmm2
+	movdqu	XMMWORD[(16 + 0)+rdi],xmm6
+	movdqu	XMMWORD[(32 + 0)+rdi],xmm10
+	movdqu	XMMWORD[(48 + 0)+rdi],xmm15
+	movdqu	xmm3,XMMWORD[((0 + 64))+rsi]
+	movdqu	xmm7,XMMWORD[((16 + 64))+rsi]
+	movdqu	xmm11,XMMWORD[((32 + 64))+rsi]
+	movdqu	xmm15,XMMWORD[((48 + 64))+rsi]
+	pxor	xmm1,xmm3
+	pxor	xmm5,xmm7
+	pxor	xmm9,xmm11
+	pxor	xmm15,xmm13
+	movdqu	XMMWORD[(0 + 64)+rdi],xmm1
+	movdqu	XMMWORD[(16 + 64)+rdi],xmm5
+	movdqu	XMMWORD[(32 + 64)+rdi],xmm9
+	movdqu	XMMWORD[(48 + 64)+rdi],xmm15
+
+	sub	rbx,8*16
+	lea	rsi,[128+rsi]
+	lea	rdi,[128+rdi]
+	jmp	NEAR $L$open_sse_tail_64_dec_loop
+
+$L$open_sse_tail_256:
+	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
+	movdqa	xmm4,XMMWORD[((160+48))+rbp]
+	movdqa	xmm8,XMMWORD[((160+64))+rbp]
+	movdqa	xmm1,xmm0
+	movdqa	xmm5,xmm4
+	movdqa	xmm9,xmm8
+	movdqa	xmm2,xmm0
+	movdqa	xmm6,xmm4
+	movdqa	xmm10,xmm8
+	movdqa	xmm3,xmm0
+	movdqa	xmm7,xmm4
+	movdqa	xmm11,xmm8
+	movdqa	xmm15,XMMWORD[((160+96))+rbp]
+	paddd	xmm15,XMMWORD[$L$sse_inc]
+	movdqa	xmm14,xmm15
+	paddd	xmm14,XMMWORD[$L$sse_inc]
+	movdqa	xmm13,xmm14
+	paddd	xmm13,XMMWORD[$L$sse_inc]
+	movdqa	xmm12,xmm13
+	paddd	xmm12,XMMWORD[$L$sse_inc]
+	movdqa	XMMWORD[(160+96)+rbp],xmm12
+	movdqa	XMMWORD[(160+112)+rbp],xmm13
+	movdqa	XMMWORD[(160+128)+rbp],xmm14
+	movdqa	XMMWORD[(160+144)+rbp],xmm15
+
+	xor	r8,r8
+$L$open_sse_tail_256_rounds_and_x1hash:
+	add	r10,QWORD[((0+0))+r8*1+rsi]
+	adc	r11,QWORD[((8+0))+r8*1+rsi]
+	adc	r12,1
+	movdqa	XMMWORD[(160+80)+rbp],xmm11
+	paddd	xmm0,xmm4
+	pxor	xmm12,xmm0
+	pshufb	xmm12,XMMWORD[$L$rol16]
+	paddd	xmm8,xmm12
+	pxor	xmm4,xmm8
+	movdqa	xmm11,xmm4
+	pslld	xmm11,12
+	psrld	xmm4,20
+	pxor	xmm4,xmm11
+	paddd	xmm0,xmm4
+	pxor	xmm12,xmm0
+	pshufb	xmm12,XMMWORD[$L$rol8]
+	paddd	xmm8,xmm12
+	pxor	xmm4,xmm8
+	movdqa	xmm11,xmm4
+	pslld	xmm11,7
+	psrld	xmm4,25
+	pxor	xmm4,xmm11
+DB	102,15,58,15,228,4
+DB	102,69,15,58,15,192,8
+DB	102,69,15,58,15,228,12
+	paddd	xmm1,xmm5
+	pxor	xmm13,xmm1
+	pshufb	xmm13,XMMWORD[$L$rol16]
+	paddd	xmm9,xmm13
+	pxor	xmm5,xmm9
+	movdqa	xmm11,xmm5
+	pslld	xmm11,12
+	psrld	xmm5,20
+	pxor	xmm5,xmm11
+	paddd	xmm1,xmm5
+	pxor	xmm13,xmm1
+	pshufb	xmm13,XMMWORD[$L$rol8]
+	paddd	xmm9,xmm13
+	pxor	xmm5,xmm9
+	movdqa	xmm11,xmm5
+	pslld	xmm11,7
+	psrld	xmm5,25
+	pxor	xmm5,xmm11
+DB	102,15,58,15,237,4
+DB	102,69,15,58,15,201,8
+DB	102,69,15,58,15,237,12
+	paddd	xmm2,xmm6
+	pxor	xmm14,xmm2
+	pshufb	xmm14,XMMWORD[$L$rol16]
+	paddd	xmm10,xmm14
+	pxor	xmm6,xmm10
+	movdqa	xmm11,xmm6
+	pslld	xmm11,12
+	psrld	xmm6,20
+	pxor	xmm6,xmm11
+	paddd	xmm2,xmm6
+	pxor	xmm14,xmm2
+	pshufb	xmm14,XMMWORD[$L$rol8]
+	paddd	xmm10,xmm14
+	pxor	xmm6,xmm10
+	movdqa	xmm11,xmm6
+	pslld	xmm11,7
+	psrld	xmm6,25
+	pxor	xmm6,xmm11
+DB	102,15,58,15,246,4
+DB	102,69,15,58,15,210,8
+DB	102,69,15,58,15,246,12
+	movdqa	xmm11,XMMWORD[((160+80))+rbp]
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	movdqa	XMMWORD[(160+80)+rbp],xmm9
+	paddd	xmm3,xmm7
+	pxor	xmm15,xmm3
+	pshufb	xmm15,XMMWORD[$L$rol16]
+	paddd	xmm11,xmm15
+	pxor	xmm7,xmm11
+	movdqa	xmm9,xmm7
+	pslld	xmm9,12
+	psrld	xmm7,20
+	pxor	xmm7,xmm9
+	paddd	xmm3,xmm7
+	pxor	xmm15,xmm3
+	pshufb	xmm15,XMMWORD[$L$rol8]
+	paddd	xmm11,xmm15
+	pxor	xmm7,xmm11
+	movdqa	xmm9,xmm7
+	pslld	xmm9,7
+	psrld	xmm7,25
+	pxor	xmm7,xmm9
+DB	102,15,58,15,255,4
+DB	102,69,15,58,15,219,8
+DB	102,69,15,58,15,255,12
+	movdqa	xmm9,XMMWORD[((160+80))+rbp]
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	movdqa	XMMWORD[(160+80)+rbp],xmm11
+	paddd	xmm0,xmm4
+	pxor	xmm12,xmm0
+	pshufb	xmm12,XMMWORD[$L$rol16]
+	paddd	xmm8,xmm12
+	pxor	xmm4,xmm8
+	movdqa	xmm11,xmm4
+	pslld	xmm11,12
+	psrld	xmm4,20
+	pxor	xmm4,xmm11
+	paddd	xmm0,xmm4
+	pxor	xmm12,xmm0
+	pshufb	xmm12,XMMWORD[$L$rol8]
+	paddd	xmm8,xmm12
+	pxor	xmm4,xmm8
+	movdqa	xmm11,xmm4
+	pslld	xmm11,7
+	psrld	xmm4,25
+	pxor	xmm4,xmm11
+DB	102,15,58,15,228,12
+DB	102,69,15,58,15,192,8
+DB	102,69,15,58,15,228,4
+	paddd	xmm1,xmm5
+	pxor	xmm13,xmm1
+	pshufb	xmm13,XMMWORD[$L$rol16]
+	paddd	xmm9,xmm13
+	pxor	xmm5,xmm9
+	movdqa	xmm11,xmm5
+	pslld	xmm11,12
+	psrld	xmm5,20
+	pxor	xmm5,xmm11
+	paddd	xmm1,xmm5
+	pxor	xmm13,xmm1
+	pshufb	xmm13,XMMWORD[$L$rol8]
+	paddd	xmm9,xmm13
+	pxor	xmm5,xmm9
+	movdqa	xmm11,xmm5
+	pslld	xmm11,7
+	psrld	xmm5,25
+	pxor	xmm5,xmm11
+DB	102,15,58,15,237,12
+DB	102,69,15,58,15,201,8
+DB	102,69,15,58,15,237,4
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	paddd	xmm2,xmm6
+	pxor	xmm14,xmm2
+	pshufb	xmm14,XMMWORD[$L$rol16]
+	paddd	xmm10,xmm14
+	pxor	xmm6,xmm10
+	movdqa	xmm11,xmm6
+	pslld	xmm11,12
+	psrld	xmm6,20
+	pxor	xmm6,xmm11
+	paddd	xmm2,xmm6
+	pxor	xmm14,xmm2
+	pshufb	xmm14,XMMWORD[$L$rol8]
+	paddd	xmm10,xmm14
+	pxor	xmm6,xmm10
+	movdqa	xmm11,xmm6
+	pslld	xmm11,7
+	psrld	xmm6,25
+	pxor	xmm6,xmm11
+DB	102,15,58,15,246,12
+DB	102,69,15,58,15,210,8
+DB	102,69,15,58,15,246,4
+	movdqa	xmm11,XMMWORD[((160+80))+rbp]
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+	movdqa	XMMWORD[(160+80)+rbp],xmm9
+	paddd	xmm3,xmm7
+	pxor	xmm15,xmm3
+	pshufb	xmm15,XMMWORD[$L$rol16]
+	paddd	xmm11,xmm15
+	pxor	xmm7,xmm11
+	movdqa	xmm9,xmm7
+	pslld	xmm9,12
+	psrld	xmm7,20
+	pxor	xmm7,xmm9
+	paddd	xmm3,xmm7
+	pxor	xmm15,xmm3
+	pshufb	xmm15,XMMWORD[$L$rol8]
+	paddd	xmm11,xmm15
+	pxor	xmm7,xmm11
+	movdqa	xmm9,xmm7
+	pslld	xmm9,7
+	psrld	xmm7,25
+	pxor	xmm7,xmm9
+DB	102,15,58,15,255,12
+DB	102,69,15,58,15,219,8
+DB	102,69,15,58,15,255,4
+	movdqa	xmm9,XMMWORD[((160+80))+rbp]
+
+	add	r8,16
+	cmp	r8,10*16
+	jb	NEAR $L$open_sse_tail_256_rounds_and_x1hash
+
+	mov	rcx,rbx
+	and	rcx,-16
+$L$open_sse_tail_256_hash:
+	add	r10,QWORD[((0+0))+r8*1+rsi]
+	adc	r11,QWORD[((8+0))+r8*1+rsi]
+	adc	r12,1
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+	add	r8,16
+	cmp	r8,rcx
+	jb	NEAR $L$open_sse_tail_256_hash
+	paddd	xmm3,XMMWORD[$L$chacha20_consts]
+	paddd	xmm7,XMMWORD[((160+48))+rbp]
+	paddd	xmm11,XMMWORD[((160+64))+rbp]
+	paddd	xmm15,XMMWORD[((160+144))+rbp]
+	paddd	xmm2,XMMWORD[$L$chacha20_consts]
+	paddd	xmm6,XMMWORD[((160+48))+rbp]
+	paddd	xmm10,XMMWORD[((160+64))+rbp]
+	paddd	xmm14,XMMWORD[((160+128))+rbp]
+	paddd	xmm1,XMMWORD[$L$chacha20_consts]
+	paddd	xmm5,XMMWORD[((160+48))+rbp]
+	paddd	xmm9,XMMWORD[((160+64))+rbp]
+	paddd	xmm13,XMMWORD[((160+112))+rbp]
+	paddd	xmm0,XMMWORD[$L$chacha20_consts]
+	paddd	xmm4,XMMWORD[((160+48))+rbp]
+	paddd	xmm8,XMMWORD[((160+64))+rbp]
+	paddd	xmm12,XMMWORD[((160+96))+rbp]
+	movdqa	XMMWORD[(160+80)+rbp],xmm12
+	movdqu	xmm12,XMMWORD[((0 + 0))+rsi]
+	pxor	xmm12,xmm3
+	movdqu	XMMWORD[(0 + 0)+rdi],xmm12
+	movdqu	xmm12,XMMWORD[((16 + 0))+rsi]
+	pxor	xmm12,xmm7
+	movdqu	XMMWORD[(16 + 0)+rdi],xmm12
+	movdqu	xmm12,XMMWORD[((32 + 0))+rsi]
+	pxor	xmm12,xmm11
+	movdqu	XMMWORD[(32 + 0)+rdi],xmm12
+	movdqu	xmm12,XMMWORD[((48 + 0))+rsi]
+	pxor	xmm12,xmm15
+	movdqu	XMMWORD[(48 + 0)+rdi],xmm12
+	movdqu	xmm3,XMMWORD[((0 + 64))+rsi]
+	movdqu	xmm7,XMMWORD[((16 + 64))+rsi]
+	movdqu	xmm11,XMMWORD[((32 + 64))+rsi]
+	movdqu	xmm15,XMMWORD[((48 + 64))+rsi]
+	pxor	xmm2,xmm3
+	pxor	xmm6,xmm7
+	pxor	xmm10,xmm11
+	pxor	xmm15,xmm14
+	movdqu	XMMWORD[(0 + 64)+rdi],xmm2
+	movdqu	XMMWORD[(16 + 64)+rdi],xmm6
+	movdqu	XMMWORD[(32 + 64)+rdi],xmm10
+	movdqu	XMMWORD[(48 + 64)+rdi],xmm15
+	movdqu	xmm3,XMMWORD[((0 + 128))+rsi]
+	movdqu	xmm7,XMMWORD[((16 + 128))+rsi]
+	movdqu	xmm11,XMMWORD[((32 + 128))+rsi]
+	movdqu	xmm15,XMMWORD[((48 + 128))+rsi]
+	pxor	xmm1,xmm3
+	pxor	xmm5,xmm7
+	pxor	xmm9,xmm11
+	pxor	xmm15,xmm13
+	movdqu	XMMWORD[(0 + 128)+rdi],xmm1
+	movdqu	XMMWORD[(16 + 128)+rdi],xmm5
+	movdqu	XMMWORD[(32 + 128)+rdi],xmm9
+	movdqu	XMMWORD[(48 + 128)+rdi],xmm15
+
+	movdqa	xmm12,XMMWORD[((160+80))+rbp]
+	sub	rbx,12*16
+	lea	rsi,[192+rsi]
+	lea	rdi,[192+rdi]
+
+
+$L$open_sse_tail_64_dec_loop:
+	cmp	rbx,16
+	jb	NEAR $L$open_sse_tail_16_init
+	sub	rbx,16
+	movdqu	xmm3,XMMWORD[rsi]
+	pxor	xmm0,xmm3
+	movdqu	XMMWORD[rdi],xmm0
+	lea	rsi,[16+rsi]
+	lea	rdi,[16+rdi]
+	movdqa	xmm0,xmm4
+	movdqa	xmm4,xmm8
+	movdqa	xmm8,xmm12
+	jmp	NEAR $L$open_sse_tail_64_dec_loop
+$L$open_sse_tail_16_init:
+	movdqa	xmm1,xmm0
+
+
+$L$open_sse_tail_16:
+	test	rbx,rbx
+	jz	NEAR $L$open_sse_finalize
+
+
+
+	pxor	xmm3,xmm3
+	lea	rsi,[((-1))+rbx*1+rsi]
+	mov	r8,rbx
+$L$open_sse_tail_16_compose:
+	pslldq	xmm3,1
+	pinsrb	xmm3,BYTE[rsi],0
+	sub	rsi,1
+	sub	r8,1
+	jnz	NEAR $L$open_sse_tail_16_compose
+
+DB	102,73,15,126,221
+	pextrq	r14,xmm3,1
+
+	pxor	xmm3,xmm1
+
+
+$L$open_sse_tail_16_extract:
+	pextrb	XMMWORD[rdi],xmm3,0
+	psrldq	xmm3,1
+	add	rdi,1
+	sub	rbx,1
+	jne	NEAR $L$open_sse_tail_16_extract
+
+	add	r10,r13
+	adc	r11,r14
+	adc	r12,1
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+
+$L$open_sse_finalize:
+	add	r10,QWORD[((0+160+32))+rbp]
+	adc	r11,QWORD[((8+160+32))+rbp]
+	adc	r12,1
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+
+	mov	r13,r10
+	mov	r14,r11
+	mov	r15,r12
+	sub	r10,-5
+	sbb	r11,-1
+	sbb	r12,3
+	cmovc	r10,r13
+	cmovc	r11,r14
+	cmovc	r12,r15
+
+	add	r10,QWORD[((0+160+16))+rbp]
+	adc	r11,QWORD[((8+160+16))+rbp]
+
+	movaps	xmm6,XMMWORD[((0+0))+rbp]
+	movaps	xmm7,XMMWORD[((16+0))+rbp]
+	movaps	xmm8,XMMWORD[((32+0))+rbp]
+	movaps	xmm9,XMMWORD[((48+0))+rbp]
+	movaps	xmm10,XMMWORD[((64+0))+rbp]
+	movaps	xmm11,XMMWORD[((80+0))+rbp]
+	movaps	xmm12,XMMWORD[((96+0))+rbp]
+	movaps	xmm13,XMMWORD[((112+0))+rbp]
+	movaps	xmm14,XMMWORD[((128+0))+rbp]
+	movaps	xmm15,XMMWORD[((144+0))+rbp]
+
+
+	add	rsp,288 + 160 + 32
+
+
+	pop	r9
+
+	mov	QWORD[r9],r10
+	mov	QWORD[8+r9],r11
+	pop	r15
+
+	pop	r14
+
+	pop	r13
+
+	pop	r12
+
+	pop	rbx
+
+	pop	rbp
+
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$open_sse_128:
+
+	movdqu	xmm0,XMMWORD[$L$chacha20_consts]
+	movdqa	xmm1,xmm0
+	movdqa	xmm2,xmm0
+	movdqu	xmm4,XMMWORD[r9]
+	movdqa	xmm5,xmm4
+	movdqa	xmm6,xmm4
+	movdqu	xmm8,XMMWORD[16+r9]
+	movdqa	xmm9,xmm8
+	movdqa	xmm10,xmm8
+	movdqu	xmm12,XMMWORD[32+r9]
+	movdqa	xmm13,xmm12
+	paddd	xmm13,XMMWORD[$L$sse_inc]
+	movdqa	xmm14,xmm13
+	paddd	xmm14,XMMWORD[$L$sse_inc]
+	movdqa	xmm7,xmm4
+	movdqa	xmm11,xmm8
+	movdqa	xmm15,xmm13
+	mov	r10,10
+
+$L$open_sse_128_rounds:
+	paddd	xmm0,xmm4
+	pxor	xmm12,xmm0
+	pshufb	xmm12,XMMWORD[$L$rol16]
+	paddd	xmm8,xmm12
+	pxor	xmm4,xmm8
+	movdqa	xmm3,xmm4
+	pslld	xmm3,12
+	psrld	xmm4,20
+	pxor	xmm4,xmm3
+	paddd	xmm0,xmm4
+	pxor	xmm12,xmm0
+	pshufb	xmm12,XMMWORD[$L$rol8]
+	paddd	xmm8,xmm12
+	pxor	xmm4,xmm8
+	movdqa	xmm3,xmm4
+	pslld	xmm3,7
+	psrld	xmm4,25
+	pxor	xmm4,xmm3
+DB	102,15,58,15,228,4
+DB	102,69,15,58,15,192,8
+DB	102,69,15,58,15,228,12
+	paddd	xmm1,xmm5
+	pxor	xmm13,xmm1
+	pshufb	xmm13,XMMWORD[$L$rol16]
+	paddd	xmm9,xmm13
+	pxor	xmm5,xmm9
+	movdqa	xmm3,xmm5
+	pslld	xmm3,12
+	psrld	xmm5,20
+	pxor	xmm5,xmm3
+	paddd	xmm1,xmm5
+	pxor	xmm13,xmm1
+	pshufb	xmm13,XMMWORD[$L$rol8]
+	paddd	xmm9,xmm13
+	pxor	xmm5,xmm9
+	movdqa	xmm3,xmm5
+	pslld	xmm3,7
+	psrld	xmm5,25
+	pxor	xmm5,xmm3
+DB	102,15,58,15,237,4
+DB	102,69,15,58,15,201,8
+DB	102,69,15,58,15,237,12
+	paddd	xmm2,xmm6
+	pxor	xmm14,xmm2
+	pshufb	xmm14,XMMWORD[$L$rol16]
+	paddd	xmm10,xmm14
+	pxor	xmm6,xmm10
+	movdqa	xmm3,xmm6
+	pslld	xmm3,12
+	psrld	xmm6,20
+	pxor	xmm6,xmm3
+	paddd	xmm2,xmm6
+	pxor	xmm14,xmm2
+	pshufb	xmm14,XMMWORD[$L$rol8]
+	paddd	xmm10,xmm14
+	pxor	xmm6,xmm10
+	movdqa	xmm3,xmm6
+	pslld	xmm3,7
+	psrld	xmm6,25
+	pxor	xmm6,xmm3
+DB	102,15,58,15,246,4
+DB	102,69,15,58,15,210,8
+DB	102,69,15,58,15,246,12
+	paddd	xmm0,xmm4
+	pxor	xmm12,xmm0
+	pshufb	xmm12,XMMWORD[$L$rol16]
+	paddd	xmm8,xmm12
+	pxor	xmm4,xmm8
+	movdqa	xmm3,xmm4
+	pslld	xmm3,12
+	psrld	xmm4,20
+	pxor	xmm4,xmm3
+	paddd	xmm0,xmm4
+	pxor	xmm12,xmm0
+	pshufb	xmm12,XMMWORD[$L$rol8]
+	paddd	xmm8,xmm12
+	pxor	xmm4,xmm8
+	movdqa	xmm3,xmm4
+	pslld	xmm3,7
+	psrld	xmm4,25
+	pxor	xmm4,xmm3
+DB	102,15,58,15,228,12
+DB	102,69,15,58,15,192,8
+DB	102,69,15,58,15,228,4
+	paddd	xmm1,xmm5
+	pxor	xmm13,xmm1
+	pshufb	xmm13,XMMWORD[$L$rol16]
+	paddd	xmm9,xmm13
+	pxor	xmm5,xmm9
+	movdqa	xmm3,xmm5
+	pslld	xmm3,12
+	psrld	xmm5,20
+	pxor	xmm5,xmm3
+	paddd	xmm1,xmm5
+	pxor	xmm13,xmm1
+	pshufb	xmm13,XMMWORD[$L$rol8]
+	paddd	xmm9,xmm13
+	pxor	xmm5,xmm9
+	movdqa	xmm3,xmm5
+	pslld	xmm3,7
+	psrld	xmm5,25
+	pxor	xmm5,xmm3
+DB	102,15,58,15,237,12
+DB	102,69,15,58,15,201,8
+DB	102,69,15,58,15,237,4
+	paddd	xmm2,xmm6
+	pxor	xmm14,xmm2
+	pshufb	xmm14,XMMWORD[$L$rol16]
+	paddd	xmm10,xmm14
+	pxor	xmm6,xmm10
+	movdqa	xmm3,xmm6
+	pslld	xmm3,12
+	psrld	xmm6,20
+	pxor	xmm6,xmm3
+	paddd	xmm2,xmm6
+	pxor	xmm14,xmm2
+	pshufb	xmm14,XMMWORD[$L$rol8]
+	paddd	xmm10,xmm14
+	pxor	xmm6,xmm10
+	movdqa	xmm3,xmm6
+	pslld	xmm3,7
+	psrld	xmm6,25
+	pxor	xmm6,xmm3
+DB	102,15,58,15,246,12
+DB	102,69,15,58,15,210,8
+DB	102,69,15,58,15,246,4
+
+	dec	r10
+	jnz	NEAR $L$open_sse_128_rounds
+	paddd	xmm0,XMMWORD[$L$chacha20_consts]
+	paddd	xmm1,XMMWORD[$L$chacha20_consts]
+	paddd	xmm2,XMMWORD[$L$chacha20_consts]
+	paddd	xmm4,xmm7
+	paddd	xmm5,xmm7
+	paddd	xmm6,xmm7
+	paddd	xmm9,xmm11
+	paddd	xmm10,xmm11
+	paddd	xmm13,xmm15
+	paddd	xmm15,XMMWORD[$L$sse_inc]
+	paddd	xmm14,xmm15
+
+	pand	xmm0,XMMWORD[$L$clamp]
+	movdqa	XMMWORD[(160+0)+rbp],xmm0
+	movdqa	XMMWORD[(160+16)+rbp],xmm4
+
+	mov	r8,r8
+	call	poly_hash_ad_internal
+$L$open_sse_128_xor_hash:
+	cmp	rbx,16
+	jb	NEAR $L$open_sse_tail_16
+	sub	rbx,16
+	add	r10,QWORD[((0+0))+rsi]
+	adc	r11,QWORD[((8+0))+rsi]
+	adc	r12,1
+
+
+	movdqu	xmm3,XMMWORD[rsi]
+	pxor	xmm1,xmm3
+	movdqu	XMMWORD[rdi],xmm1
+	lea	rsi,[16+rsi]
+	lea	rdi,[16+rdi]
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+
+	movdqa	xmm1,xmm5
+	movdqa	xmm5,xmm9
+	movdqa	xmm9,xmm13
+	movdqa	xmm13,xmm2
+	movdqa	xmm2,xmm6
+	movdqa	xmm6,xmm10
+	movdqa	xmm10,xmm14
+	jmp	NEAR $L$open_sse_128_xor_hash
+$L$SEH_end_chacha20_poly1305_open:
+
+
+
+
+
+
+
+
+global	chacha20_poly1305_seal
+
+ALIGN	64
+chacha20_poly1305_seal:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_chacha20_poly1305_seal:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+	mov	r9,QWORD[48+rsp]
+
+
+
+	push	rbp
+
+	push	rbx
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+
+
+	push	r9
+
+	sub	rsp,288 + 160 + 32
+
+	lea	rbp,[32+rsp]
+	and	rbp,-32
+
+	movaps	XMMWORD[(0+0)+rbp],xmm6
+	movaps	XMMWORD[(16+0)+rbp],xmm7
+	movaps	XMMWORD[(32+0)+rbp],xmm8
+	movaps	XMMWORD[(48+0)+rbp],xmm9
+	movaps	XMMWORD[(64+0)+rbp],xmm10
+	movaps	XMMWORD[(80+0)+rbp],xmm11
+	movaps	XMMWORD[(96+0)+rbp],xmm12
+	movaps	XMMWORD[(112+0)+rbp],xmm13
+	movaps	XMMWORD[(128+0)+rbp],xmm14
+	movaps	XMMWORD[(144+0)+rbp],xmm15
+
+	mov	rbx,QWORD[56+r9]
+	add	rbx,rdx
+	mov	QWORD[((0+160+32))+rbp],r8
+	mov	QWORD[((8+160+32))+rbp],rbx
+	mov	rbx,rdx
+
+	mov	eax,DWORD[((OPENSSL_ia32cap_P+8))]
+	and	eax,288
+	xor	eax,288
+	jz	NEAR chacha20_poly1305_seal_avx2
+
+	cmp	rbx,128
+	jbe	NEAR $L$seal_sse_128
+
+	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
+	movdqu	xmm4,XMMWORD[r9]
+	movdqu	xmm8,XMMWORD[16+r9]
+	movdqu	xmm12,XMMWORD[32+r9]
+
+	movdqa	xmm1,xmm0
+	movdqa	xmm2,xmm0
+	movdqa	xmm3,xmm0
+	movdqa	xmm5,xmm4
+	movdqa	xmm6,xmm4
+	movdqa	xmm7,xmm4
+	movdqa	xmm9,xmm8
+	movdqa	xmm10,xmm8
+	movdqa	xmm11,xmm8
+	movdqa	xmm15,xmm12
+	paddd	xmm12,XMMWORD[$L$sse_inc]
+	movdqa	xmm14,xmm12
+	paddd	xmm12,XMMWORD[$L$sse_inc]
+	movdqa	xmm13,xmm12
+	paddd	xmm12,XMMWORD[$L$sse_inc]
+
+	movdqa	XMMWORD[(160+48)+rbp],xmm4
+	movdqa	XMMWORD[(160+64)+rbp],xmm8
+	movdqa	XMMWORD[(160+96)+rbp],xmm12
+	movdqa	XMMWORD[(160+112)+rbp],xmm13
+	movdqa	XMMWORD[(160+128)+rbp],xmm14
+	movdqa	XMMWORD[(160+144)+rbp],xmm15
+	mov	r10,10
+$L$seal_sse_init_rounds:
+	movdqa	XMMWORD[(160+80)+rbp],xmm8
+	movdqa	xmm8,XMMWORD[$L$rol16]
+	paddd	xmm3,xmm7
+	paddd	xmm2,xmm6
+	paddd	xmm1,xmm5
+	paddd	xmm0,xmm4
+	pxor	xmm15,xmm3
+	pxor	xmm14,xmm2
+	pxor	xmm13,xmm1
+	pxor	xmm12,xmm0
+DB	102,69,15,56,0,248
+DB	102,69,15,56,0,240
+DB	102,69,15,56,0,232
+DB	102,69,15,56,0,224
+	movdqa	xmm8,XMMWORD[((160+80))+rbp]
+	paddd	xmm11,xmm15
+	paddd	xmm10,xmm14
+	paddd	xmm9,xmm13
+	paddd	xmm8,xmm12
+	pxor	xmm7,xmm11
+	pxor	xmm6,xmm10
+	pxor	xmm5,xmm9
+	pxor	xmm4,xmm8
+	movdqa	XMMWORD[(160+80)+rbp],xmm8
+	movdqa	xmm8,xmm7
+	psrld	xmm8,20
+	pslld	xmm7,32-20
+	pxor	xmm7,xmm8
+	movdqa	xmm8,xmm6
+	psrld	xmm8,20
+	pslld	xmm6,32-20
+	pxor	xmm6,xmm8
+	movdqa	xmm8,xmm5
+	psrld	xmm8,20
+	pslld	xmm5,32-20
+	pxor	xmm5,xmm8
+	movdqa	xmm8,xmm4
+	psrld	xmm8,20
+	pslld	xmm4,32-20
+	pxor	xmm4,xmm8
+	movdqa	xmm8,XMMWORD[$L$rol8]
+	paddd	xmm3,xmm7
+	paddd	xmm2,xmm6
+	paddd	xmm1,xmm5
+	paddd	xmm0,xmm4
+	pxor	xmm15,xmm3
+	pxor	xmm14,xmm2
+	pxor	xmm13,xmm1
+	pxor	xmm12,xmm0
+DB	102,69,15,56,0,248
+DB	102,69,15,56,0,240
+DB	102,69,15,56,0,232
+DB	102,69,15,56,0,224
+	movdqa	xmm8,XMMWORD[((160+80))+rbp]
+	paddd	xmm11,xmm15
+	paddd	xmm10,xmm14
+	paddd	xmm9,xmm13
+	paddd	xmm8,xmm12
+	pxor	xmm7,xmm11
+	pxor	xmm6,xmm10
+	pxor	xmm5,xmm9
+	pxor	xmm4,xmm8
+	movdqa	XMMWORD[(160+80)+rbp],xmm8
+	movdqa	xmm8,xmm7
+	psrld	xmm8,25
+	pslld	xmm7,32-25
+	pxor	xmm7,xmm8
+	movdqa	xmm8,xmm6
+	psrld	xmm8,25
+	pslld	xmm6,32-25
+	pxor	xmm6,xmm8
+	movdqa	xmm8,xmm5
+	psrld	xmm8,25
+	pslld	xmm5,32-25
+	pxor	xmm5,xmm8
+	movdqa	xmm8,xmm4
+	psrld	xmm8,25
+	pslld	xmm4,32-25
+	pxor	xmm4,xmm8
+	movdqa	xmm8,XMMWORD[((160+80))+rbp]
+DB	102,15,58,15,255,4
+DB	102,69,15,58,15,219,8
+DB	102,69,15,58,15,255,12
+DB	102,15,58,15,246,4
+DB	102,69,15,58,15,210,8
+DB	102,69,15,58,15,246,12
+DB	102,15,58,15,237,4
+DB	102,69,15,58,15,201,8
+DB	102,69,15,58,15,237,12
+DB	102,15,58,15,228,4
+DB	102,69,15,58,15,192,8
+DB	102,69,15,58,15,228,12
+	movdqa	XMMWORD[(160+80)+rbp],xmm8
+	movdqa	xmm8,XMMWORD[$L$rol16]
+	paddd	xmm3,xmm7
+	paddd	xmm2,xmm6
+	paddd	xmm1,xmm5
+	paddd	xmm0,xmm4
+	pxor	xmm15,xmm3
+	pxor	xmm14,xmm2
+	pxor	xmm13,xmm1
+	pxor	xmm12,xmm0
+DB	102,69,15,56,0,248
+DB	102,69,15,56,0,240
+DB	102,69,15,56,0,232
+DB	102,69,15,56,0,224
+	movdqa	xmm8,XMMWORD[((160+80))+rbp]
+	paddd	xmm11,xmm15
+	paddd	xmm10,xmm14
+	paddd	xmm9,xmm13
+	paddd	xmm8,xmm12
+	pxor	xmm7,xmm11
+	pxor	xmm6,xmm10
+	pxor	xmm5,xmm9
+	pxor	xmm4,xmm8
+	movdqa	XMMWORD[(160+80)+rbp],xmm8
+	movdqa	xmm8,xmm7
+	psrld	xmm8,20
+	pslld	xmm7,32-20
+	pxor	xmm7,xmm8
+	movdqa	xmm8,xmm6
+	psrld	xmm8,20
+	pslld	xmm6,32-20
+	pxor	xmm6,xmm8
+	movdqa	xmm8,xmm5
+	psrld	xmm8,20
+	pslld	xmm5,32-20
+	pxor	xmm5,xmm8
+	movdqa	xmm8,xmm4
+	psrld	xmm8,20
+	pslld	xmm4,32-20
+	pxor	xmm4,xmm8
+	movdqa	xmm8,XMMWORD[$L$rol8]
+	paddd	xmm3,xmm7
+	paddd	xmm2,xmm6
+	paddd	xmm1,xmm5
+	paddd	xmm0,xmm4
+	pxor	xmm15,xmm3
+	pxor	xmm14,xmm2
+	pxor	xmm13,xmm1
+	pxor	xmm12,xmm0
+DB	102,69,15,56,0,248
+DB	102,69,15,56,0,240
+DB	102,69,15,56,0,232
+DB	102,69,15,56,0,224
+	movdqa	xmm8,XMMWORD[((160+80))+rbp]
+	paddd	xmm11,xmm15
+	paddd	xmm10,xmm14
+	paddd	xmm9,xmm13
+	paddd	xmm8,xmm12
+	pxor	xmm7,xmm11
+	pxor	xmm6,xmm10
+	pxor	xmm5,xmm9
+	pxor	xmm4,xmm8
+	movdqa	XMMWORD[(160+80)+rbp],xmm8
+	movdqa	xmm8,xmm7
+	psrld	xmm8,25
+	pslld	xmm7,32-25
+	pxor	xmm7,xmm8
+	movdqa	xmm8,xmm6
+	psrld	xmm8,25
+	pslld	xmm6,32-25
+	pxor	xmm6,xmm8
+	movdqa	xmm8,xmm5
+	psrld	xmm8,25
+	pslld	xmm5,32-25
+	pxor	xmm5,xmm8
+	movdqa	xmm8,xmm4
+	psrld	xmm8,25
+	pslld	xmm4,32-25
+	pxor	xmm4,xmm8
+	movdqa	xmm8,XMMWORD[((160+80))+rbp]
+DB	102,15,58,15,255,12
+DB	102,69,15,58,15,219,8
+DB	102,69,15,58,15,255,4
+DB	102,15,58,15,246,12
+DB	102,69,15,58,15,210,8
+DB	102,69,15,58,15,246,4
+DB	102,15,58,15,237,12
+DB	102,69,15,58,15,201,8
+DB	102,69,15,58,15,237,4
+DB	102,15,58,15,228,12
+DB	102,69,15,58,15,192,8
+DB	102,69,15,58,15,228,4
+
+	dec	r10
+	jnz	NEAR $L$seal_sse_init_rounds
+	paddd	xmm3,XMMWORD[$L$chacha20_consts]
+	paddd	xmm7,XMMWORD[((160+48))+rbp]
+	paddd	xmm11,XMMWORD[((160+64))+rbp]
+	paddd	xmm15,XMMWORD[((160+144))+rbp]
+	paddd	xmm2,XMMWORD[$L$chacha20_consts]
+	paddd	xmm6,XMMWORD[((160+48))+rbp]
+	paddd	xmm10,XMMWORD[((160+64))+rbp]
+	paddd	xmm14,XMMWORD[((160+128))+rbp]
+	paddd	xmm1,XMMWORD[$L$chacha20_consts]
+	paddd	xmm5,XMMWORD[((160+48))+rbp]
+	paddd	xmm9,XMMWORD[((160+64))+rbp]
+	paddd	xmm13,XMMWORD[((160+112))+rbp]
+	paddd	xmm0,XMMWORD[$L$chacha20_consts]
+	paddd	xmm4,XMMWORD[((160+48))+rbp]
+	paddd	xmm8,XMMWORD[((160+64))+rbp]
+	paddd	xmm12,XMMWORD[((160+96))+rbp]
+
+
+	pand	xmm3,XMMWORD[$L$clamp]
+	movdqa	XMMWORD[(160+0)+rbp],xmm3
+	movdqa	XMMWORD[(160+16)+rbp],xmm7
+
+	mov	r8,r8
+	call	poly_hash_ad_internal
+	movdqu	xmm3,XMMWORD[((0 + 0))+rsi]
+	movdqu	xmm7,XMMWORD[((16 + 0))+rsi]
+	movdqu	xmm11,XMMWORD[((32 + 0))+rsi]
+	movdqu	xmm15,XMMWORD[((48 + 0))+rsi]
+	pxor	xmm2,xmm3
+	pxor	xmm6,xmm7
+	pxor	xmm10,xmm11
+	pxor	xmm15,xmm14
+	movdqu	XMMWORD[(0 + 0)+rdi],xmm2
+	movdqu	XMMWORD[(16 + 0)+rdi],xmm6
+	movdqu	XMMWORD[(32 + 0)+rdi],xmm10
+	movdqu	XMMWORD[(48 + 0)+rdi],xmm15
+	movdqu	xmm3,XMMWORD[((0 + 64))+rsi]
+	movdqu	xmm7,XMMWORD[((16 + 64))+rsi]
+	movdqu	xmm11,XMMWORD[((32 + 64))+rsi]
+	movdqu	xmm15,XMMWORD[((48 + 64))+rsi]
+	pxor	xmm1,xmm3
+	pxor	xmm5,xmm7
+	pxor	xmm9,xmm11
+	pxor	xmm15,xmm13
+	movdqu	XMMWORD[(0 + 64)+rdi],xmm1
+	movdqu	XMMWORD[(16 + 64)+rdi],xmm5
+	movdqu	XMMWORD[(32 + 64)+rdi],xmm9
+	movdqu	XMMWORD[(48 + 64)+rdi],xmm15
+
+	cmp	rbx,12*16
+	ja	NEAR $L$seal_sse_main_init
+	mov	rcx,8*16
+	sub	rbx,8*16
+	lea	rsi,[128+rsi]
+	jmp	NEAR $L$seal_sse_128_tail_hash
+$L$seal_sse_main_init:
+	movdqu	xmm3,XMMWORD[((0 + 128))+rsi]
+	movdqu	xmm7,XMMWORD[((16 + 128))+rsi]
+	movdqu	xmm11,XMMWORD[((32 + 128))+rsi]
+	movdqu	xmm15,XMMWORD[((48 + 128))+rsi]
+	pxor	xmm0,xmm3
+	pxor	xmm4,xmm7
+	pxor	xmm8,xmm11
+	pxor	xmm15,xmm12
+	movdqu	XMMWORD[(0 + 128)+rdi],xmm0
+	movdqu	XMMWORD[(16 + 128)+rdi],xmm4
+	movdqu	XMMWORD[(32 + 128)+rdi],xmm8
+	movdqu	XMMWORD[(48 + 128)+rdi],xmm15
+
+	mov	rcx,12*16
+	sub	rbx,12*16
+	lea	rsi,[192+rsi]
+	mov	rcx,2
+	mov	r8,8
+	cmp	rbx,4*16
+	jbe	NEAR $L$seal_sse_tail_64
+	cmp	rbx,8*16
+	jbe	NEAR $L$seal_sse_tail_128
+	cmp	rbx,12*16
+	jbe	NEAR $L$seal_sse_tail_192
+
+$L$seal_sse_main_loop:
+	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
+	movdqa	xmm4,XMMWORD[((160+48))+rbp]
+	movdqa	xmm8,XMMWORD[((160+64))+rbp]
+	movdqa	xmm1,xmm0
+	movdqa	xmm5,xmm4
+	movdqa	xmm9,xmm8
+	movdqa	xmm2,xmm0
+	movdqa	xmm6,xmm4
+	movdqa	xmm10,xmm8
+	movdqa	xmm3,xmm0
+	movdqa	xmm7,xmm4
+	movdqa	xmm11,xmm8
+	movdqa	xmm15,XMMWORD[((160+96))+rbp]
+	paddd	xmm15,XMMWORD[$L$sse_inc]
+	movdqa	xmm14,xmm15
+	paddd	xmm14,XMMWORD[$L$sse_inc]
+	movdqa	xmm13,xmm14
+	paddd	xmm13,XMMWORD[$L$sse_inc]
+	movdqa	xmm12,xmm13
+	paddd	xmm12,XMMWORD[$L$sse_inc]
+	movdqa	XMMWORD[(160+96)+rbp],xmm12
+	movdqa	XMMWORD[(160+112)+rbp],xmm13
+	movdqa	XMMWORD[(160+128)+rbp],xmm14
+	movdqa	XMMWORD[(160+144)+rbp],xmm15
+
+ALIGN	32
+$L$seal_sse_main_rounds:
+	movdqa	XMMWORD[(160+80)+rbp],xmm8
+	movdqa	xmm8,XMMWORD[$L$rol16]
+	paddd	xmm3,xmm7
+	paddd	xmm2,xmm6
+	paddd	xmm1,xmm5
+	paddd	xmm0,xmm4
+	pxor	xmm15,xmm3
+	pxor	xmm14,xmm2
+	pxor	xmm13,xmm1
+	pxor	xmm12,xmm0
+DB	102,69,15,56,0,248
+DB	102,69,15,56,0,240
+DB	102,69,15,56,0,232
+DB	102,69,15,56,0,224
+	movdqa	xmm8,XMMWORD[((160+80))+rbp]
+	paddd	xmm11,xmm15
+	paddd	xmm10,xmm14
+	paddd	xmm9,xmm13
+	paddd	xmm8,xmm12
+	pxor	xmm7,xmm11
+	add	r10,QWORD[((0+0))+rdi]
+	adc	r11,QWORD[((8+0))+rdi]
+	adc	r12,1
+	pxor	xmm6,xmm10
+	pxor	xmm5,xmm9
+	pxor	xmm4,xmm8
+	movdqa	XMMWORD[(160+80)+rbp],xmm8
+	movdqa	xmm8,xmm7
+	psrld	xmm8,20
+	pslld	xmm7,32-20
+	pxor	xmm7,xmm8
+	movdqa	xmm8,xmm6
+	psrld	xmm8,20
+	pslld	xmm6,32-20
+	pxor	xmm6,xmm8
+	movdqa	xmm8,xmm5
+	psrld	xmm8,20
+	pslld	xmm5,32-20
+	pxor	xmm5,xmm8
+	movdqa	xmm8,xmm4
+	psrld	xmm8,20
+	pslld	xmm4,32-20
+	pxor	xmm4,xmm8
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	movdqa	xmm8,XMMWORD[$L$rol8]
+	paddd	xmm3,xmm7
+	paddd	xmm2,xmm6
+	paddd	xmm1,xmm5
+	paddd	xmm0,xmm4
+	pxor	xmm15,xmm3
+	pxor	xmm14,xmm2
+	pxor	xmm13,xmm1
+	pxor	xmm12,xmm0
+DB	102,69,15,56,0,248
+DB	102,69,15,56,0,240
+DB	102,69,15,56,0,232
+DB	102,69,15,56,0,224
+	movdqa	xmm8,XMMWORD[((160+80))+rbp]
+	paddd	xmm11,xmm15
+	paddd	xmm10,xmm14
+	paddd	xmm9,xmm13
+	paddd	xmm8,xmm12
+	pxor	xmm7,xmm11
+	pxor	xmm6,xmm10
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	pxor	xmm5,xmm9
+	pxor	xmm4,xmm8
+	movdqa	XMMWORD[(160+80)+rbp],xmm8
+	movdqa	xmm8,xmm7
+	psrld	xmm8,25
+	pslld	xmm7,32-25
+	pxor	xmm7,xmm8
+	movdqa	xmm8,xmm6
+	psrld	xmm8,25
+	pslld	xmm6,32-25
+	pxor	xmm6,xmm8
+	movdqa	xmm8,xmm5
+	psrld	xmm8,25
+	pslld	xmm5,32-25
+	pxor	xmm5,xmm8
+	movdqa	xmm8,xmm4
+	psrld	xmm8,25
+	pslld	xmm4,32-25
+	pxor	xmm4,xmm8
+	movdqa	xmm8,XMMWORD[((160+80))+rbp]
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+DB	102,15,58,15,255,4
+DB	102,69,15,58,15,219,8
+DB	102,69,15,58,15,255,12
+DB	102,15,58,15,246,4
+DB	102,69,15,58,15,210,8
+DB	102,69,15,58,15,246,12
+DB	102,15,58,15,237,4
+DB	102,69,15,58,15,201,8
+DB	102,69,15,58,15,237,12
+DB	102,15,58,15,228,4
+DB	102,69,15,58,15,192,8
+DB	102,69,15,58,15,228,12
+	movdqa	XMMWORD[(160+80)+rbp],xmm8
+	movdqa	xmm8,XMMWORD[$L$rol16]
+	paddd	xmm3,xmm7
+	paddd	xmm2,xmm6
+	paddd	xmm1,xmm5
+	paddd	xmm0,xmm4
+	pxor	xmm15,xmm3
+	pxor	xmm14,xmm2
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+	pxor	xmm13,xmm1
+	pxor	xmm12,xmm0
+DB	102,69,15,56,0,248
+DB	102,69,15,56,0,240
+DB	102,69,15,56,0,232
+DB	102,69,15,56,0,224
+	movdqa	xmm8,XMMWORD[((160+80))+rbp]
+	paddd	xmm11,xmm15
+	paddd	xmm10,xmm14
+	paddd	xmm9,xmm13
+	paddd	xmm8,xmm12
+	pxor	xmm7,xmm11
+	pxor	xmm6,xmm10
+	pxor	xmm5,xmm9
+	pxor	xmm4,xmm8
+	movdqa	XMMWORD[(160+80)+rbp],xmm8
+	movdqa	xmm8,xmm7
+	psrld	xmm8,20
+	pslld	xmm7,32-20
+	pxor	xmm7,xmm8
+	movdqa	xmm8,xmm6
+	psrld	xmm8,20
+	pslld	xmm6,32-20
+	pxor	xmm6,xmm8
+	movdqa	xmm8,xmm5
+	psrld	xmm8,20
+	pslld	xmm5,32-20
+	pxor	xmm5,xmm8
+	movdqa	xmm8,xmm4
+	psrld	xmm8,20
+	pslld	xmm4,32-20
+	pxor	xmm4,xmm8
+	movdqa	xmm8,XMMWORD[$L$rol8]
+	paddd	xmm3,xmm7
+	paddd	xmm2,xmm6
+	paddd	xmm1,xmm5
+	paddd	xmm0,xmm4
+	pxor	xmm15,xmm3
+	pxor	xmm14,xmm2
+	pxor	xmm13,xmm1
+	pxor	xmm12,xmm0
+DB	102,69,15,56,0,248
+DB	102,69,15,56,0,240
+DB	102,69,15,56,0,232
+DB	102,69,15,56,0,224
+	movdqa	xmm8,XMMWORD[((160+80))+rbp]
+	paddd	xmm11,xmm15
+	paddd	xmm10,xmm14
+	paddd	xmm9,xmm13
+	paddd	xmm8,xmm12
+	pxor	xmm7,xmm11
+	pxor	xmm6,xmm10
+	pxor	xmm5,xmm9
+	pxor	xmm4,xmm8
+	movdqa	XMMWORD[(160+80)+rbp],xmm8
+	movdqa	xmm8,xmm7
+	psrld	xmm8,25
+	pslld	xmm7,32-25
+	pxor	xmm7,xmm8
+	movdqa	xmm8,xmm6
+	psrld	xmm8,25
+	pslld	xmm6,32-25
+	pxor	xmm6,xmm8
+	movdqa	xmm8,xmm5
+	psrld	xmm8,25
+	pslld	xmm5,32-25
+	pxor	xmm5,xmm8
+	movdqa	xmm8,xmm4
+	psrld	xmm8,25
+	pslld	xmm4,32-25
+	pxor	xmm4,xmm8
+	movdqa	xmm8,XMMWORD[((160+80))+rbp]
+DB	102,15,58,15,255,12
+DB	102,69,15,58,15,219,8
+DB	102,69,15,58,15,255,4
+DB	102,15,58,15,246,12
+DB	102,69,15,58,15,210,8
+DB	102,69,15,58,15,246,4
+DB	102,15,58,15,237,12
+DB	102,69,15,58,15,201,8
+DB	102,69,15,58,15,237,4
+DB	102,15,58,15,228,12
+DB	102,69,15,58,15,192,8
+DB	102,69,15,58,15,228,4
+
+	lea	rdi,[16+rdi]
+	dec	r8
+	jge	NEAR $L$seal_sse_main_rounds
+	add	r10,QWORD[((0+0))+rdi]
+	adc	r11,QWORD[((8+0))+rdi]
+	adc	r12,1
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+	lea	rdi,[16+rdi]
+	dec	rcx
+	jg	NEAR $L$seal_sse_main_rounds
+	paddd	xmm3,XMMWORD[$L$chacha20_consts]
+	paddd	xmm7,XMMWORD[((160+48))+rbp]
+	paddd	xmm11,XMMWORD[((160+64))+rbp]
+	paddd	xmm15,XMMWORD[((160+144))+rbp]
+	paddd	xmm2,XMMWORD[$L$chacha20_consts]
+	paddd	xmm6,XMMWORD[((160+48))+rbp]
+	paddd	xmm10,XMMWORD[((160+64))+rbp]
+	paddd	xmm14,XMMWORD[((160+128))+rbp]
+	paddd	xmm1,XMMWORD[$L$chacha20_consts]
+	paddd	xmm5,XMMWORD[((160+48))+rbp]
+	paddd	xmm9,XMMWORD[((160+64))+rbp]
+	paddd	xmm13,XMMWORD[((160+112))+rbp]
+	paddd	xmm0,XMMWORD[$L$chacha20_consts]
+	paddd	xmm4,XMMWORD[((160+48))+rbp]
+	paddd	xmm8,XMMWORD[((160+64))+rbp]
+	paddd	xmm12,XMMWORD[((160+96))+rbp]
+
+	movdqa	XMMWORD[(160+80)+rbp],xmm14
+	movdqa	XMMWORD[(160+80)+rbp],xmm14
+	movdqu	xmm14,XMMWORD[((0 + 0))+rsi]
+	pxor	xmm14,xmm3
+	movdqu	XMMWORD[(0 + 0)+rdi],xmm14
+	movdqu	xmm14,XMMWORD[((16 + 0))+rsi]
+	pxor	xmm14,xmm7
+	movdqu	XMMWORD[(16 + 0)+rdi],xmm14
+	movdqu	xmm14,XMMWORD[((32 + 0))+rsi]
+	pxor	xmm14,xmm11
+	movdqu	XMMWORD[(32 + 0)+rdi],xmm14
+	movdqu	xmm14,XMMWORD[((48 + 0))+rsi]
+	pxor	xmm14,xmm15
+	movdqu	XMMWORD[(48 + 0)+rdi],xmm14
+
+	movdqa	xmm14,XMMWORD[((160+80))+rbp]
+	movdqu	xmm3,XMMWORD[((0 + 64))+rsi]
+	movdqu	xmm7,XMMWORD[((16 + 64))+rsi]
+	movdqu	xmm11,XMMWORD[((32 + 64))+rsi]
+	movdqu	xmm15,XMMWORD[((48 + 64))+rsi]
+	pxor	xmm2,xmm3
+	pxor	xmm6,xmm7
+	pxor	xmm10,xmm11
+	pxor	xmm15,xmm14
+	movdqu	XMMWORD[(0 + 64)+rdi],xmm2
+	movdqu	XMMWORD[(16 + 64)+rdi],xmm6
+	movdqu	XMMWORD[(32 + 64)+rdi],xmm10
+	movdqu	XMMWORD[(48 + 64)+rdi],xmm15
+	movdqu	xmm3,XMMWORD[((0 + 128))+rsi]
+	movdqu	xmm7,XMMWORD[((16 + 128))+rsi]
+	movdqu	xmm11,XMMWORD[((32 + 128))+rsi]
+	movdqu	xmm15,XMMWORD[((48 + 128))+rsi]
+	pxor	xmm1,xmm3
+	pxor	xmm5,xmm7
+	pxor	xmm9,xmm11
+	pxor	xmm15,xmm13
+	movdqu	XMMWORD[(0 + 128)+rdi],xmm1
+	movdqu	XMMWORD[(16 + 128)+rdi],xmm5
+	movdqu	XMMWORD[(32 + 128)+rdi],xmm9
+	movdqu	XMMWORD[(48 + 128)+rdi],xmm15
+
+	cmp	rbx,16*16
+	ja	NEAR $L$seal_sse_main_loop_xor
+
+	mov	rcx,12*16
+	sub	rbx,12*16
+	lea	rsi,[192+rsi]
+	jmp	NEAR $L$seal_sse_128_tail_hash
+$L$seal_sse_main_loop_xor:
+	movdqu	xmm3,XMMWORD[((0 + 192))+rsi]
+	movdqu	xmm7,XMMWORD[((16 + 192))+rsi]
+	movdqu	xmm11,XMMWORD[((32 + 192))+rsi]
+	movdqu	xmm15,XMMWORD[((48 + 192))+rsi]
+	pxor	xmm0,xmm3
+	pxor	xmm4,xmm7
+	pxor	xmm8,xmm11
+	pxor	xmm15,xmm12
+	movdqu	XMMWORD[(0 + 192)+rdi],xmm0
+	movdqu	XMMWORD[(16 + 192)+rdi],xmm4
+	movdqu	XMMWORD[(32 + 192)+rdi],xmm8
+	movdqu	XMMWORD[(48 + 192)+rdi],xmm15
+
+	lea	rsi,[256+rsi]
+	sub	rbx,16*16
+	mov	rcx,6
+	mov	r8,4
+	cmp	rbx,12*16
+	jg	NEAR $L$seal_sse_main_loop
+	mov	rcx,rbx
+	test	rbx,rbx
+	je	NEAR $L$seal_sse_128_tail_hash
+	mov	rcx,6
+	cmp	rbx,8*16
+	ja	NEAR $L$seal_sse_tail_192
+	cmp	rbx,4*16
+	ja	NEAR $L$seal_sse_tail_128
+
+$L$seal_sse_tail_64:
+	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
+	movdqa	xmm4,XMMWORD[((160+48))+rbp]
+	movdqa	xmm8,XMMWORD[((160+64))+rbp]
+	movdqa	xmm12,XMMWORD[((160+96))+rbp]
+	paddd	xmm12,XMMWORD[$L$sse_inc]
+	movdqa	XMMWORD[(160+96)+rbp],xmm12
+
+$L$seal_sse_tail_64_rounds_and_x2hash:
+	add	r10,QWORD[((0+0))+rdi]
+	adc	r11,QWORD[((8+0))+rdi]
+	adc	r12,1
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+	lea	rdi,[16+rdi]
+$L$seal_sse_tail_64_rounds_and_x1hash:
+	paddd	xmm0,xmm4
+	pxor	xmm12,xmm0
+	pshufb	xmm12,XMMWORD[$L$rol16]
+	paddd	xmm8,xmm12
+	pxor	xmm4,xmm8
+	movdqa	xmm3,xmm4
+	pslld	xmm3,12
+	psrld	xmm4,20
+	pxor	xmm4,xmm3
+	paddd	xmm0,xmm4
+	pxor	xmm12,xmm0
+	pshufb	xmm12,XMMWORD[$L$rol8]
+	paddd	xmm8,xmm12
+	pxor	xmm4,xmm8
+	movdqa	xmm3,xmm4
+	pslld	xmm3,7
+	psrld	xmm4,25
+	pxor	xmm4,xmm3
+DB	102,15,58,15,228,4
+DB	102,69,15,58,15,192,8
+DB	102,69,15,58,15,228,12
+	paddd	xmm0,xmm4
+	pxor	xmm12,xmm0
+	pshufb	xmm12,XMMWORD[$L$rol16]
+	paddd	xmm8,xmm12
+	pxor	xmm4,xmm8
+	movdqa	xmm3,xmm4
+	pslld	xmm3,12
+	psrld	xmm4,20
+	pxor	xmm4,xmm3
+	paddd	xmm0,xmm4
+	pxor	xmm12,xmm0
+	pshufb	xmm12,XMMWORD[$L$rol8]
+	paddd	xmm8,xmm12
+	pxor	xmm4,xmm8
+	movdqa	xmm3,xmm4
+	pslld	xmm3,7
+	psrld	xmm4,25
+	pxor	xmm4,xmm3
+DB	102,15,58,15,228,12
+DB	102,69,15,58,15,192,8
+DB	102,69,15,58,15,228,4
+	add	r10,QWORD[((0+0))+rdi]
+	adc	r11,QWORD[((8+0))+rdi]
+	adc	r12,1
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+	lea	rdi,[16+rdi]
+	dec	rcx
+	jg	NEAR $L$seal_sse_tail_64_rounds_and_x2hash
+	dec	r8
+	jge	NEAR $L$seal_sse_tail_64_rounds_and_x1hash
+	paddd	xmm0,XMMWORD[$L$chacha20_consts]
+	paddd	xmm4,XMMWORD[((160+48))+rbp]
+	paddd	xmm8,XMMWORD[((160+64))+rbp]
+	paddd	xmm12,XMMWORD[((160+96))+rbp]
+
+	jmp	NEAR $L$seal_sse_128_tail_xor
+
+$L$seal_sse_tail_128:
+	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
+	movdqa	xmm4,XMMWORD[((160+48))+rbp]
+	movdqa	xmm8,XMMWORD[((160+64))+rbp]
+	movdqa	xmm1,xmm0
+	movdqa	xmm5,xmm4
+	movdqa	xmm9,xmm8
+	movdqa	xmm13,XMMWORD[((160+96))+rbp]
+	paddd	xmm13,XMMWORD[$L$sse_inc]
+	movdqa	xmm12,xmm13
+	paddd	xmm12,XMMWORD[$L$sse_inc]
+	movdqa	XMMWORD[(160+96)+rbp],xmm12
+	movdqa	XMMWORD[(160+112)+rbp],xmm13
+
+$L$seal_sse_tail_128_rounds_and_x2hash:
+	add	r10,QWORD[((0+0))+rdi]
+	adc	r11,QWORD[((8+0))+rdi]
+	adc	r12,1
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+	lea	rdi,[16+rdi]
+$L$seal_sse_tail_128_rounds_and_x1hash:
+	paddd	xmm0,xmm4
+	pxor	xmm12,xmm0
+	pshufb	xmm12,XMMWORD[$L$rol16]
+	paddd	xmm8,xmm12
+	pxor	xmm4,xmm8
+	movdqa	xmm3,xmm4
+	pslld	xmm3,12
+	psrld	xmm4,20
+	pxor	xmm4,xmm3
+	paddd	xmm0,xmm4
+	pxor	xmm12,xmm0
+	pshufb	xmm12,XMMWORD[$L$rol8]
+	paddd	xmm8,xmm12
+	pxor	xmm4,xmm8
+	movdqa	xmm3,xmm4
+	pslld	xmm3,7
+	psrld	xmm4,25
+	pxor	xmm4,xmm3
+DB	102,15,58,15,228,4
+DB	102,69,15,58,15,192,8
+DB	102,69,15,58,15,228,12
+	paddd	xmm1,xmm5
+	pxor	xmm13,xmm1
+	pshufb	xmm13,XMMWORD[$L$rol16]
+	paddd	xmm9,xmm13
+	pxor	xmm5,xmm9
+	movdqa	xmm3,xmm5
+	pslld	xmm3,12
+	psrld	xmm5,20
+	pxor	xmm5,xmm3
+	paddd	xmm1,xmm5
+	pxor	xmm13,xmm1
+	pshufb	xmm13,XMMWORD[$L$rol8]
+	paddd	xmm9,xmm13
+	pxor	xmm5,xmm9
+	movdqa	xmm3,xmm5
+	pslld	xmm3,7
+	psrld	xmm5,25
+	pxor	xmm5,xmm3
+DB	102,15,58,15,237,4
+DB	102,69,15,58,15,201,8
+DB	102,69,15,58,15,237,12
+	add	r10,QWORD[((0+0))+rdi]
+	adc	r11,QWORD[((8+0))+rdi]
+	adc	r12,1
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+	paddd	xmm0,xmm4
+	pxor	xmm12,xmm0
+	pshufb	xmm12,XMMWORD[$L$rol16]
+	paddd	xmm8,xmm12
+	pxor	xmm4,xmm8
+	movdqa	xmm3,xmm4
+	pslld	xmm3,12
+	psrld	xmm4,20
+	pxor	xmm4,xmm3
+	paddd	xmm0,xmm4
+	pxor	xmm12,xmm0
+	pshufb	xmm12,XMMWORD[$L$rol8]
+	paddd	xmm8,xmm12
+	pxor	xmm4,xmm8
+	movdqa	xmm3,xmm4
+	pslld	xmm3,7
+	psrld	xmm4,25
+	pxor	xmm4,xmm3
+DB	102,15,58,15,228,12
+DB	102,69,15,58,15,192,8
+DB	102,69,15,58,15,228,4
+	paddd	xmm1,xmm5
+	pxor	xmm13,xmm1
+	pshufb	xmm13,XMMWORD[$L$rol16]
+	paddd	xmm9,xmm13
+	pxor	xmm5,xmm9
+	movdqa	xmm3,xmm5
+	pslld	xmm3,12
+	psrld	xmm5,20
+	pxor	xmm5,xmm3
+	paddd	xmm1,xmm5
+	pxor	xmm13,xmm1
+	pshufb	xmm13,XMMWORD[$L$rol8]
+	paddd	xmm9,xmm13
+	pxor	xmm5,xmm9
+	movdqa	xmm3,xmm5
+	pslld	xmm3,7
+	psrld	xmm5,25
+	pxor	xmm5,xmm3
+DB	102,15,58,15,237,12
+DB	102,69,15,58,15,201,8
+DB	102,69,15,58,15,237,4
+
+	lea	rdi,[16+rdi]
+	dec	rcx
+	jg	NEAR $L$seal_sse_tail_128_rounds_and_x2hash
+	dec	r8
+	jge	NEAR $L$seal_sse_tail_128_rounds_and_x1hash
+	paddd	xmm1,XMMWORD[$L$chacha20_consts]
+	paddd	xmm5,XMMWORD[((160+48))+rbp]
+	paddd	xmm9,XMMWORD[((160+64))+rbp]
+	paddd	xmm13,XMMWORD[((160+112))+rbp]
+	paddd	xmm0,XMMWORD[$L$chacha20_consts]
+	paddd	xmm4,XMMWORD[((160+48))+rbp]
+	paddd	xmm8,XMMWORD[((160+64))+rbp]
+	paddd	xmm12,XMMWORD[((160+96))+rbp]
+	movdqu	xmm3,XMMWORD[((0 + 0))+rsi]
+	movdqu	xmm7,XMMWORD[((16 + 0))+rsi]
+	movdqu	xmm11,XMMWORD[((32 + 0))+rsi]
+	movdqu	xmm15,XMMWORD[((48 + 0))+rsi]
+	pxor	xmm1,xmm3
+	pxor	xmm5,xmm7
+	pxor	xmm9,xmm11
+	pxor	xmm15,xmm13
+	movdqu	XMMWORD[(0 + 0)+rdi],xmm1
+	movdqu	XMMWORD[(16 + 0)+rdi],xmm5
+	movdqu	XMMWORD[(32 + 0)+rdi],xmm9
+	movdqu	XMMWORD[(48 + 0)+rdi],xmm15
+
+	mov	rcx,4*16
+	sub	rbx,4*16
+	lea	rsi,[64+rsi]
+	jmp	NEAR $L$seal_sse_128_tail_hash
+
+$L$seal_sse_tail_192:
+	movdqa	xmm0,XMMWORD[$L$chacha20_consts]
+	movdqa	xmm4,XMMWORD[((160+48))+rbp]
+	movdqa	xmm8,XMMWORD[((160+64))+rbp]
+	movdqa	xmm1,xmm0
+	movdqa	xmm5,xmm4
+	movdqa	xmm9,xmm8
+	movdqa	xmm2,xmm0
+	movdqa	xmm6,xmm4
+	movdqa	xmm10,xmm8
+	movdqa	xmm14,XMMWORD[((160+96))+rbp]
+	paddd	xmm14,XMMWORD[$L$sse_inc]
+	movdqa	xmm13,xmm14
+	paddd	xmm13,XMMWORD[$L$sse_inc]
+	movdqa	xmm12,xmm13
+	paddd	xmm12,XMMWORD[$L$sse_inc]
+	movdqa	XMMWORD[(160+96)+rbp],xmm12
+	movdqa	XMMWORD[(160+112)+rbp],xmm13
+	movdqa	XMMWORD[(160+128)+rbp],xmm14
+
+$L$seal_sse_tail_192_rounds_and_x2hash:
+	add	r10,QWORD[((0+0))+rdi]
+	adc	r11,QWORD[((8+0))+rdi]
+	adc	r12,1
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+	lea	rdi,[16+rdi]
+$L$seal_sse_tail_192_rounds_and_x1hash:
+	paddd	xmm0,xmm4
+	pxor	xmm12,xmm0
+	pshufb	xmm12,XMMWORD[$L$rol16]
+	paddd	xmm8,xmm12
+	pxor	xmm4,xmm8
+	movdqa	xmm3,xmm4
+	pslld	xmm3,12
+	psrld	xmm4,20
+	pxor	xmm4,xmm3
+	paddd	xmm0,xmm4
+	pxor	xmm12,xmm0
+	pshufb	xmm12,XMMWORD[$L$rol8]
+	paddd	xmm8,xmm12
+	pxor	xmm4,xmm8
+	movdqa	xmm3,xmm4
+	pslld	xmm3,7
+	psrld	xmm4,25
+	pxor	xmm4,xmm3
+DB	102,15,58,15,228,4
+DB	102,69,15,58,15,192,8
+DB	102,69,15,58,15,228,12
+	paddd	xmm1,xmm5
+	pxor	xmm13,xmm1
+	pshufb	xmm13,XMMWORD[$L$rol16]
+	paddd	xmm9,xmm13
+	pxor	xmm5,xmm9
+	movdqa	xmm3,xmm5
+	pslld	xmm3,12
+	psrld	xmm5,20
+	pxor	xmm5,xmm3
+	paddd	xmm1,xmm5
+	pxor	xmm13,xmm1
+	pshufb	xmm13,XMMWORD[$L$rol8]
+	paddd	xmm9,xmm13
+	pxor	xmm5,xmm9
+	movdqa	xmm3,xmm5
+	pslld	xmm3,7
+	psrld	xmm5,25
+	pxor	xmm5,xmm3
+DB	102,15,58,15,237,4
+DB	102,69,15,58,15,201,8
+DB	102,69,15,58,15,237,12
+	paddd	xmm2,xmm6
+	pxor	xmm14,xmm2
+	pshufb	xmm14,XMMWORD[$L$rol16]
+	paddd	xmm10,xmm14
+	pxor	xmm6,xmm10
+	movdqa	xmm3,xmm6
+	pslld	xmm3,12
+	psrld	xmm6,20
+	pxor	xmm6,xmm3
+	paddd	xmm2,xmm6
+	pxor	xmm14,xmm2
+	pshufb	xmm14,XMMWORD[$L$rol8]
+	paddd	xmm10,xmm14
+	pxor	xmm6,xmm10
+	movdqa	xmm3,xmm6
+	pslld	xmm3,7
+	psrld	xmm6,25
+	pxor	xmm6,xmm3
+DB	102,15,58,15,246,4
+DB	102,69,15,58,15,210,8
+DB	102,69,15,58,15,246,12
+	add	r10,QWORD[((0+0))+rdi]
+	adc	r11,QWORD[((8+0))+rdi]
+	adc	r12,1
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+	paddd	xmm0,xmm4
+	pxor	xmm12,xmm0
+	pshufb	xmm12,XMMWORD[$L$rol16]
+	paddd	xmm8,xmm12
+	pxor	xmm4,xmm8
+	movdqa	xmm3,xmm4
+	pslld	xmm3,12
+	psrld	xmm4,20
+	pxor	xmm4,xmm3
+	paddd	xmm0,xmm4
+	pxor	xmm12,xmm0
+	pshufb	xmm12,XMMWORD[$L$rol8]
+	paddd	xmm8,xmm12
+	pxor	xmm4,xmm8
+	movdqa	xmm3,xmm4
+	pslld	xmm3,7
+	psrld	xmm4,25
+	pxor	xmm4,xmm3
+DB	102,15,58,15,228,12
+DB	102,69,15,58,15,192,8
+DB	102,69,15,58,15,228,4
+	paddd	xmm1,xmm5
+	pxor	xmm13,xmm1
+	pshufb	xmm13,XMMWORD[$L$rol16]
+	paddd	xmm9,xmm13
+	pxor	xmm5,xmm9
+	movdqa	xmm3,xmm5
+	pslld	xmm3,12
+	psrld	xmm5,20
+	pxor	xmm5,xmm3
+	paddd	xmm1,xmm5
+	pxor	xmm13,xmm1
+	pshufb	xmm13,XMMWORD[$L$rol8]
+	paddd	xmm9,xmm13
+	pxor	xmm5,xmm9
+	movdqa	xmm3,xmm5
+	pslld	xmm3,7
+	psrld	xmm5,25
+	pxor	xmm5,xmm3
+DB	102,15,58,15,237,12
+DB	102,69,15,58,15,201,8
+DB	102,69,15,58,15,237,4
+	paddd	xmm2,xmm6
+	pxor	xmm14,xmm2
+	pshufb	xmm14,XMMWORD[$L$rol16]
+	paddd	xmm10,xmm14
+	pxor	xmm6,xmm10
+	movdqa	xmm3,xmm6
+	pslld	xmm3,12
+	psrld	xmm6,20
+	pxor	xmm6,xmm3
+	paddd	xmm2,xmm6
+	pxor	xmm14,xmm2
+	pshufb	xmm14,XMMWORD[$L$rol8]
+	paddd	xmm10,xmm14
+	pxor	xmm6,xmm10
+	movdqa	xmm3,xmm6
+	pslld	xmm3,7
+	psrld	xmm6,25
+	pxor	xmm6,xmm3
+DB	102,15,58,15,246,12
+DB	102,69,15,58,15,210,8
+DB	102,69,15,58,15,246,4
+
+	lea	rdi,[16+rdi]
+	dec	rcx
+	jg	NEAR $L$seal_sse_tail_192_rounds_and_x2hash
+	dec	r8
+	jge	NEAR $L$seal_sse_tail_192_rounds_and_x1hash
+	paddd	xmm2,XMMWORD[$L$chacha20_consts]
+	paddd	xmm6,XMMWORD[((160+48))+rbp]
+	paddd	xmm10,XMMWORD[((160+64))+rbp]
+	paddd	xmm14,XMMWORD[((160+128))+rbp]
+	paddd	xmm1,XMMWORD[$L$chacha20_consts]
+	paddd	xmm5,XMMWORD[((160+48))+rbp]
+	paddd	xmm9,XMMWORD[((160+64))+rbp]
+	paddd	xmm13,XMMWORD[((160+112))+rbp]
+	paddd	xmm0,XMMWORD[$L$chacha20_consts]
+	paddd	xmm4,XMMWORD[((160+48))+rbp]
+	paddd	xmm8,XMMWORD[((160+64))+rbp]
+	paddd	xmm12,XMMWORD[((160+96))+rbp]
+	movdqu	xmm3,XMMWORD[((0 + 0))+rsi]
+	movdqu	xmm7,XMMWORD[((16 + 0))+rsi]
+	movdqu	xmm11,XMMWORD[((32 + 0))+rsi]
+	movdqu	xmm15,XMMWORD[((48 + 0))+rsi]
+	pxor	xmm2,xmm3
+	pxor	xmm6,xmm7
+	pxor	xmm10,xmm11
+	pxor	xmm15,xmm14
+	movdqu	XMMWORD[(0 + 0)+rdi],xmm2
+	movdqu	XMMWORD[(16 + 0)+rdi],xmm6
+	movdqu	XMMWORD[(32 + 0)+rdi],xmm10
+	movdqu	XMMWORD[(48 + 0)+rdi],xmm15
+	movdqu	xmm3,XMMWORD[((0 + 64))+rsi]
+	movdqu	xmm7,XMMWORD[((16 + 64))+rsi]
+	movdqu	xmm11,XMMWORD[((32 + 64))+rsi]
+	movdqu	xmm15,XMMWORD[((48 + 64))+rsi]
+	pxor	xmm1,xmm3
+	pxor	xmm5,xmm7
+	pxor	xmm9,xmm11
+	pxor	xmm15,xmm13
+	movdqu	XMMWORD[(0 + 64)+rdi],xmm1
+	movdqu	XMMWORD[(16 + 64)+rdi],xmm5
+	movdqu	XMMWORD[(32 + 64)+rdi],xmm9
+	movdqu	XMMWORD[(48 + 64)+rdi],xmm15
+
+	mov	rcx,8*16
+	sub	rbx,8*16
+	lea	rsi,[128+rsi]
+
+$L$seal_sse_128_tail_hash:
+	cmp	rcx,16
+	jb	NEAR $L$seal_sse_128_tail_xor
+	add	r10,QWORD[((0+0))+rdi]
+	adc	r11,QWORD[((8+0))+rdi]
+	adc	r12,1
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+	sub	rcx,16
+	lea	rdi,[16+rdi]
+	jmp	NEAR $L$seal_sse_128_tail_hash
+
+$L$seal_sse_128_tail_xor:
+	cmp	rbx,16
+	jb	NEAR $L$seal_sse_tail_16
+	sub	rbx,16
+
+	movdqu	xmm3,XMMWORD[rsi]
+	pxor	xmm0,xmm3
+	movdqu	XMMWORD[rdi],xmm0
+
+	add	r10,QWORD[rdi]
+	adc	r11,QWORD[8+rdi]
+	adc	r12,1
+	lea	rsi,[16+rsi]
+	lea	rdi,[16+rdi]
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+
+	movdqa	xmm0,xmm4
+	movdqa	xmm4,xmm8
+	movdqa	xmm8,xmm12
+	movdqa	xmm12,xmm1
+	movdqa	xmm1,xmm5
+	movdqa	xmm5,xmm9
+	movdqa	xmm9,xmm13
+	jmp	NEAR $L$seal_sse_128_tail_xor
+
+$L$seal_sse_tail_16:
+	test	rbx,rbx
+	jz	NEAR $L$process_blocks_of_extra_in
+
+	mov	r8,rbx
+	mov	rcx,rbx
+	lea	rsi,[((-1))+rbx*1+rsi]
+	pxor	xmm15,xmm15
+$L$seal_sse_tail_16_compose:
+	pslldq	xmm15,1
+	pinsrb	xmm15,BYTE[rsi],0
+	lea	rsi,[((-1))+rsi]
+	dec	rcx
+	jne	NEAR $L$seal_sse_tail_16_compose
+
+
+	pxor	xmm15,xmm0
+
+
+	mov	rcx,rbx
+	movdqu	xmm0,xmm15
+$L$seal_sse_tail_16_extract:
+	pextrb	XMMWORD[rdi],xmm0,0
+	psrldq	xmm0,1
+	add	rdi,1
+	sub	rcx,1
+	jnz	NEAR $L$seal_sse_tail_16_extract
+
+
+
+
+
+
+
+
+	mov	r9,QWORD[((288 + 160 + 32))+rsp]
+	mov	r14,QWORD[56+r9]
+	mov	r13,QWORD[48+r9]
+	test	r14,r14
+	jz	NEAR $L$process_partial_block
+
+	mov	r15,16
+	sub	r15,rbx
+	cmp	r14,r15
+
+	jge	NEAR $L$load_extra_in
+	mov	r15,r14
+
+$L$load_extra_in:
+
+
+	lea	rsi,[((-1))+r15*1+r13]
+
+
+	add	r13,r15
+	sub	r14,r15
+	mov	QWORD[48+r9],r13
+	mov	QWORD[56+r9],r14
+
+
+
+	add	r8,r15
+
+
+	pxor	xmm11,xmm11
+$L$load_extra_load_loop:
+	pslldq	xmm11,1
+	pinsrb	xmm11,BYTE[rsi],0
+	lea	rsi,[((-1))+rsi]
+	sub	r15,1
+	jnz	NEAR $L$load_extra_load_loop
+
+
+
+
+	mov	r15,rbx
+
+$L$load_extra_shift_loop:
+	pslldq	xmm11,1
+	sub	r15,1
+	jnz	NEAR $L$load_extra_shift_loop
+
+
+
+
+	lea	r15,[$L$and_masks]
+	shl	rbx,4
+	pand	xmm15,XMMWORD[((-16))+rbx*1+r15]
+
+
+	por	xmm15,xmm11
+
+
+
+DB	102,77,15,126,253
+	pextrq	r14,xmm15,1
+	add	r10,r13
+	adc	r11,r14
+	adc	r12,1
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+
+$L$process_blocks_of_extra_in:
+
+	mov	r9,QWORD[((288+32+160 ))+rsp]
+	mov	rsi,QWORD[48+r9]
+	mov	r8,QWORD[56+r9]
+	mov	rcx,r8
+	shr	r8,4
+
+$L$process_extra_hash_loop:
+	jz	NEAR process_extra_in_trailer
+	add	r10,QWORD[((0+0))+rsi]
+	adc	r11,QWORD[((8+0))+rsi]
+	adc	r12,1
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+	lea	rsi,[16+rsi]
+	sub	r8,1
+	jmp	NEAR $L$process_extra_hash_loop
+process_extra_in_trailer:
+	and	rcx,15
+	mov	rbx,rcx
+	jz	NEAR $L$do_length_block
+	lea	rsi,[((-1))+rcx*1+rsi]
+
+$L$process_extra_in_trailer_load:
+	pslldq	xmm15,1
+	pinsrb	xmm15,BYTE[rsi],0
+	lea	rsi,[((-1))+rsi]
+	sub	rcx,1
+	jnz	NEAR $L$process_extra_in_trailer_load
+
+$L$process_partial_block:
+
+	lea	r15,[$L$and_masks]
+	shl	rbx,4
+	pand	xmm15,XMMWORD[((-16))+rbx*1+r15]
+DB	102,77,15,126,253
+	pextrq	r14,xmm15,1
+	add	r10,r13
+	adc	r11,r14
+	adc	r12,1
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+
+$L$do_length_block:
+	add	r10,QWORD[((0+160+32))+rbp]
+	adc	r11,QWORD[((8+160+32))+rbp]
+	adc	r12,1
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+
+	mov	r13,r10
+	mov	r14,r11
+	mov	r15,r12
+	sub	r10,-5
+	sbb	r11,-1
+	sbb	r12,3
+	cmovc	r10,r13
+	cmovc	r11,r14
+	cmovc	r12,r15
+
+	add	r10,QWORD[((0+160+16))+rbp]
+	adc	r11,QWORD[((8+160+16))+rbp]
+
+	movaps	xmm6,XMMWORD[((0+0))+rbp]
+	movaps	xmm7,XMMWORD[((16+0))+rbp]
+	movaps	xmm8,XMMWORD[((32+0))+rbp]
+	movaps	xmm9,XMMWORD[((48+0))+rbp]
+	movaps	xmm10,XMMWORD[((64+0))+rbp]
+	movaps	xmm11,XMMWORD[((80+0))+rbp]
+	movaps	xmm12,XMMWORD[((96+0))+rbp]
+	movaps	xmm13,XMMWORD[((112+0))+rbp]
+	movaps	xmm14,XMMWORD[((128+0))+rbp]
+	movaps	xmm15,XMMWORD[((144+0))+rbp]
+
+
+	add	rsp,288 + 160 + 32
+
+
+	pop	r9
+
+	mov	QWORD[r9],r10
+	mov	QWORD[8+r9],r11
+	pop	r15
+
+	pop	r14
+
+	pop	r13
+
+	pop	r12
+
+	pop	rbx
+
+	pop	rbp
+
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$seal_sse_128:
+
+	movdqu	xmm0,XMMWORD[$L$chacha20_consts]
+	movdqa	xmm1,xmm0
+	movdqa	xmm2,xmm0
+	movdqu	xmm4,XMMWORD[r9]
+	movdqa	xmm5,xmm4
+	movdqa	xmm6,xmm4
+	movdqu	xmm8,XMMWORD[16+r9]
+	movdqa	xmm9,xmm8
+	movdqa	xmm10,xmm8
+	movdqu	xmm14,XMMWORD[32+r9]
+	movdqa	xmm12,xmm14
+	paddd	xmm12,XMMWORD[$L$sse_inc]
+	movdqa	xmm13,xmm12
+	paddd	xmm13,XMMWORD[$L$sse_inc]
+	movdqa	xmm7,xmm4
+	movdqa	xmm11,xmm8
+	movdqa	xmm15,xmm12
+	mov	r10,10
+
+$L$seal_sse_128_rounds:
+	paddd	xmm0,xmm4
+	pxor	xmm12,xmm0
+	pshufb	xmm12,XMMWORD[$L$rol16]
+	paddd	xmm8,xmm12
+	pxor	xmm4,xmm8
+	movdqa	xmm3,xmm4
+	pslld	xmm3,12
+	psrld	xmm4,20
+	pxor	xmm4,xmm3
+	paddd	xmm0,xmm4
+	pxor	xmm12,xmm0
+	pshufb	xmm12,XMMWORD[$L$rol8]
+	paddd	xmm8,xmm12
+	pxor	xmm4,xmm8
+	movdqa	xmm3,xmm4
+	pslld	xmm3,7
+	psrld	xmm4,25
+	pxor	xmm4,xmm3
+DB	102,15,58,15,228,4
+DB	102,69,15,58,15,192,8
+DB	102,69,15,58,15,228,12
+	paddd	xmm1,xmm5
+	pxor	xmm13,xmm1
+	pshufb	xmm13,XMMWORD[$L$rol16]
+	paddd	xmm9,xmm13
+	pxor	xmm5,xmm9
+	movdqa	xmm3,xmm5
+	pslld	xmm3,12
+	psrld	xmm5,20
+	pxor	xmm5,xmm3
+	paddd	xmm1,xmm5
+	pxor	xmm13,xmm1
+	pshufb	xmm13,XMMWORD[$L$rol8]
+	paddd	xmm9,xmm13
+	pxor	xmm5,xmm9
+	movdqa	xmm3,xmm5
+	pslld	xmm3,7
+	psrld	xmm5,25
+	pxor	xmm5,xmm3
+DB	102,15,58,15,237,4
+DB	102,69,15,58,15,201,8
+DB	102,69,15,58,15,237,12
+	paddd	xmm2,xmm6
+	pxor	xmm14,xmm2
+	pshufb	xmm14,XMMWORD[$L$rol16]
+	paddd	xmm10,xmm14
+	pxor	xmm6,xmm10
+	movdqa	xmm3,xmm6
+	pslld	xmm3,12
+	psrld	xmm6,20
+	pxor	xmm6,xmm3
+	paddd	xmm2,xmm6
+	pxor	xmm14,xmm2
+	pshufb	xmm14,XMMWORD[$L$rol8]
+	paddd	xmm10,xmm14
+	pxor	xmm6,xmm10
+	movdqa	xmm3,xmm6
+	pslld	xmm3,7
+	psrld	xmm6,25
+	pxor	xmm6,xmm3
+DB	102,15,58,15,246,4
+DB	102,69,15,58,15,210,8
+DB	102,69,15,58,15,246,12
+	paddd	xmm0,xmm4
+	pxor	xmm12,xmm0
+	pshufb	xmm12,XMMWORD[$L$rol16]
+	paddd	xmm8,xmm12
+	pxor	xmm4,xmm8
+	movdqa	xmm3,xmm4
+	pslld	xmm3,12
+	psrld	xmm4,20
+	pxor	xmm4,xmm3
+	paddd	xmm0,xmm4
+	pxor	xmm12,xmm0
+	pshufb	xmm12,XMMWORD[$L$rol8]
+	paddd	xmm8,xmm12
+	pxor	xmm4,xmm8
+	movdqa	xmm3,xmm4
+	pslld	xmm3,7
+	psrld	xmm4,25
+	pxor	xmm4,xmm3
+DB	102,15,58,15,228,12
+DB	102,69,15,58,15,192,8
+DB	102,69,15,58,15,228,4
+	paddd	xmm1,xmm5
+	pxor	xmm13,xmm1
+	pshufb	xmm13,XMMWORD[$L$rol16]
+	paddd	xmm9,xmm13
+	pxor	xmm5,xmm9
+	movdqa	xmm3,xmm5
+	pslld	xmm3,12
+	psrld	xmm5,20
+	pxor	xmm5,xmm3
+	paddd	xmm1,xmm5
+	pxor	xmm13,xmm1
+	pshufb	xmm13,XMMWORD[$L$rol8]
+	paddd	xmm9,xmm13
+	pxor	xmm5,xmm9
+	movdqa	xmm3,xmm5
+	pslld	xmm3,7
+	psrld	xmm5,25
+	pxor	xmm5,xmm3
+DB	102,15,58,15,237,12
+DB	102,69,15,58,15,201,8
+DB	102,69,15,58,15,237,4
+	paddd	xmm2,xmm6
+	pxor	xmm14,xmm2
+	pshufb	xmm14,XMMWORD[$L$rol16]
+	paddd	xmm10,xmm14
+	pxor	xmm6,xmm10
+	movdqa	xmm3,xmm6
+	pslld	xmm3,12
+	psrld	xmm6,20
+	pxor	xmm6,xmm3
+	paddd	xmm2,xmm6
+	pxor	xmm14,xmm2
+	pshufb	xmm14,XMMWORD[$L$rol8]
+	paddd	xmm10,xmm14
+	pxor	xmm6,xmm10
+	movdqa	xmm3,xmm6
+	pslld	xmm3,7
+	psrld	xmm6,25
+	pxor	xmm6,xmm3
+DB	102,15,58,15,246,12
+DB	102,69,15,58,15,210,8
+DB	102,69,15,58,15,246,4
+
+	dec	r10
+	jnz	NEAR $L$seal_sse_128_rounds
+	paddd	xmm0,XMMWORD[$L$chacha20_consts]
+	paddd	xmm1,XMMWORD[$L$chacha20_consts]
+	paddd	xmm2,XMMWORD[$L$chacha20_consts]
+	paddd	xmm4,xmm7
+	paddd	xmm5,xmm7
+	paddd	xmm6,xmm7
+	paddd	xmm8,xmm11
+	paddd	xmm9,xmm11
+	paddd	xmm12,xmm15
+	paddd	xmm15,XMMWORD[$L$sse_inc]
+	paddd	xmm13,xmm15
+
+	pand	xmm2,XMMWORD[$L$clamp]
+	movdqa	XMMWORD[(160+0)+rbp],xmm2
+	movdqa	XMMWORD[(160+16)+rbp],xmm6
+
+	mov	r8,r8
+	call	poly_hash_ad_internal
+	jmp	NEAR $L$seal_sse_128_tail_xor
+$L$SEH_end_chacha20_poly1305_seal:
+
+
+
+
+ALIGN	64
+chacha20_poly1305_open_avx2:
+
+
+
+
+
+
+
+
+
+
+
+
+	vzeroupper
+	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
+	vbroadcasti128	ymm4,XMMWORD[r9]
+	vbroadcasti128	ymm8,XMMWORD[16+r9]
+	vbroadcasti128	ymm12,XMMWORD[32+r9]
+	vpaddd	ymm12,ymm12,YMMWORD[$L$avx2_init]
+	cmp	rbx,6*32
+	jbe	NEAR $L$open_avx2_192
+	cmp	rbx,10*32
+	jbe	NEAR $L$open_avx2_320
+
+	vmovdqa	YMMWORD[(160+64)+rbp],ymm4
+	vmovdqa	YMMWORD[(160+96)+rbp],ymm8
+	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
+	mov	r10,10
+$L$open_avx2_init_rounds:
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
+	vpaddd	ymm8,ymm8,ymm12
+	vpxor	ymm4,ymm4,ymm8
+	vpsrld	ymm3,ymm4,20
+	vpslld	ymm4,ymm4,12
+	vpxor	ymm4,ymm4,ymm3
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
+	vpaddd	ymm8,ymm8,ymm12
+	vpxor	ymm4,ymm4,ymm8
+	vpslld	ymm3,ymm4,7
+	vpsrld	ymm4,ymm4,25
+	vpxor	ymm4,ymm4,ymm3
+	vpalignr	ymm12,ymm12,ymm12,12
+	vpalignr	ymm8,ymm8,ymm8,8
+	vpalignr	ymm4,ymm4,ymm4,4
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
+	vpaddd	ymm8,ymm8,ymm12
+	vpxor	ymm4,ymm4,ymm8
+	vpsrld	ymm3,ymm4,20
+	vpslld	ymm4,ymm4,12
+	vpxor	ymm4,ymm4,ymm3
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
+	vpaddd	ymm8,ymm8,ymm12
+	vpxor	ymm4,ymm4,ymm8
+	vpslld	ymm3,ymm4,7
+	vpsrld	ymm4,ymm4,25
+	vpxor	ymm4,ymm4,ymm3
+	vpalignr	ymm12,ymm12,ymm12,4
+	vpalignr	ymm8,ymm8,ymm8,8
+	vpalignr	ymm4,ymm4,ymm4,12
+
+	dec	r10
+	jne	NEAR $L$open_avx2_init_rounds
+	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
+	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
+	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
+	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
+
+	vperm2i128	ymm3,ymm4,ymm0,0x02
+
+	vpand	ymm3,ymm3,YMMWORD[$L$clamp]
+	vmovdqa	YMMWORD[(160+0)+rbp],ymm3
+
+	vperm2i128	ymm0,ymm4,ymm0,0x13
+	vperm2i128	ymm4,ymm12,ymm8,0x13
+
+	mov	r8,r8
+	call	poly_hash_ad_internal
+
+	xor	rcx,rcx
+$L$open_avx2_init_hash:
+	add	r10,QWORD[((0+0))+rcx*1+rsi]
+	adc	r11,QWORD[((8+0))+rcx*1+rsi]
+	adc	r12,1
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+	add	rcx,16
+	cmp	rcx,2*32
+	jne	NEAR $L$open_avx2_init_hash
+
+	vpxor	ymm0,ymm0,YMMWORD[rsi]
+	vpxor	ymm4,ymm4,YMMWORD[32+rsi]
+
+	vmovdqu	YMMWORD[rdi],ymm0
+	vmovdqu	YMMWORD[32+rdi],ymm4
+	lea	rsi,[64+rsi]
+	lea	rdi,[64+rdi]
+	sub	rbx,2*32
+$L$open_avx2_main_loop:
+
+	cmp	rbx,16*32
+	jb	NEAR $L$open_avx2_main_loop_done
+	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
+	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
+	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
+	vmovdqa	ymm1,ymm0
+	vmovdqa	ymm5,ymm4
+	vmovdqa	ymm9,ymm8
+	vmovdqa	ymm2,ymm0
+	vmovdqa	ymm6,ymm4
+	vmovdqa	ymm10,ymm8
+	vmovdqa	ymm3,ymm0
+	vmovdqa	ymm7,ymm4
+	vmovdqa	ymm11,ymm8
+	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
+	vpaddd	ymm15,ymm12,YMMWORD[((160+160))+rbp]
+	vpaddd	ymm14,ymm12,ymm15
+	vpaddd	ymm13,ymm12,ymm14
+	vpaddd	ymm12,ymm12,ymm13
+	vmovdqa	YMMWORD[(160+256)+rbp],ymm15
+	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
+	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
+	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
+
+	xor	rcx,rcx
+$L$open_avx2_main_loop_rounds:
+	add	r10,QWORD[((0+0))+rcx*1+rsi]
+	adc	r11,QWORD[((8+0))+rcx*1+rsi]
+	adc	r12,1
+	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
+	vmovdqa	ymm8,YMMWORD[$L$rol16]
+	vpaddd	ymm3,ymm3,ymm7
+	vpaddd	ymm2,ymm2,ymm6
+	vpaddd	ymm1,ymm1,ymm5
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm15,ymm15,ymm3
+	vpxor	ymm14,ymm14,ymm2
+	vpxor	ymm13,ymm13,ymm1
+	vpxor	ymm12,ymm12,ymm0
+	mov	rdx,QWORD[((0+160+0))+rbp]
+	mov	r15,rdx
+	mulx	r14,r13,r10
+	mulx	rdx,rax,r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	vpshufb	ymm15,ymm15,ymm8
+	vpshufb	ymm14,ymm14,ymm8
+	vpshufb	ymm13,ymm13,ymm8
+	vpshufb	ymm12,ymm12,ymm8
+	vpaddd	ymm11,ymm11,ymm15
+	vpaddd	ymm10,ymm10,ymm14
+	vpaddd	ymm9,ymm9,ymm13
+	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
+	vpxor	ymm7,ymm7,ymm11
+	mov	rdx,QWORD[((8+160+0))+rbp]
+	mulx	rax,r10,r10
+	add	r14,r10
+	mulx	r9,r11,r11
+	adc	r15,r11
+	adc	r9,0
+	imul	rdx,r12
+	vpxor	ymm6,ymm6,ymm10
+	vpxor	ymm5,ymm5,ymm9
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
+	vpsrld	ymm8,ymm7,20
+	vpslld	ymm7,ymm7,32-20
+	vpxor	ymm7,ymm7,ymm8
+	vpsrld	ymm8,ymm6,20
+	vpslld	ymm6,ymm6,32-20
+	vpxor	ymm6,ymm6,ymm8
+	vpsrld	ymm8,ymm5,20
+	vpslld	ymm5,ymm5,32-20
+	add	r15,rax
+	adc	r9,rdx
+	vpxor	ymm5,ymm5,ymm8
+	vpsrld	ymm8,ymm4,20
+	vpslld	ymm4,ymm4,32-20
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	ymm8,YMMWORD[$L$rol8]
+	vpaddd	ymm3,ymm3,ymm7
+	vpaddd	ymm2,ymm2,ymm6
+	vpaddd	ymm1,ymm1,ymm5
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm15,ymm15,ymm3
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+	vpxor	ymm14,ymm14,ymm2
+	vpxor	ymm13,ymm13,ymm1
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm15,ymm15,ymm8
+	vpshufb	ymm14,ymm14,ymm8
+	vpshufb	ymm13,ymm13,ymm8
+	vpshufb	ymm12,ymm12,ymm8
+	vpaddd	ymm11,ymm11,ymm15
+	vpaddd	ymm10,ymm10,ymm14
+	add	r10,QWORD[((0+16))+rcx*1+rsi]
+	adc	r11,QWORD[((8+16))+rcx*1+rsi]
+	adc	r12,1
+	vpaddd	ymm9,ymm9,ymm13
+	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
+	vpxor	ymm7,ymm7,ymm11
+	vpxor	ymm6,ymm6,ymm10
+	vpxor	ymm5,ymm5,ymm9
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
+	vpsrld	ymm8,ymm7,25
+	mov	rdx,QWORD[((0+160+0))+rbp]
+	mov	r15,rdx
+	mulx	r14,r13,r10
+	mulx	rdx,rax,r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	vpslld	ymm7,ymm7,32-25
+	vpxor	ymm7,ymm7,ymm8
+	vpsrld	ymm8,ymm6,25
+	vpslld	ymm6,ymm6,32-25
+	vpxor	ymm6,ymm6,ymm8
+	vpsrld	ymm8,ymm5,25
+	vpslld	ymm5,ymm5,32-25
+	vpxor	ymm5,ymm5,ymm8
+	vpsrld	ymm8,ymm4,25
+	vpslld	ymm4,ymm4,32-25
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
+	vpalignr	ymm7,ymm7,ymm7,4
+	vpalignr	ymm11,ymm11,ymm11,8
+	vpalignr	ymm15,ymm15,ymm15,12
+	vpalignr	ymm6,ymm6,ymm6,4
+	vpalignr	ymm10,ymm10,ymm10,8
+	vpalignr	ymm14,ymm14,ymm14,12
+	mov	rdx,QWORD[((8+160+0))+rbp]
+	mulx	rax,r10,r10
+	add	r14,r10
+	mulx	r9,r11,r11
+	adc	r15,r11
+	adc	r9,0
+	imul	rdx,r12
+	vpalignr	ymm5,ymm5,ymm5,4
+	vpalignr	ymm9,ymm9,ymm9,8
+	vpalignr	ymm13,ymm13,ymm13,12
+	vpalignr	ymm4,ymm4,ymm4,4
+	vpalignr	ymm8,ymm8,ymm8,8
+	vpalignr	ymm12,ymm12,ymm12,12
+	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
+	vmovdqa	ymm8,YMMWORD[$L$rol16]
+	vpaddd	ymm3,ymm3,ymm7
+	vpaddd	ymm2,ymm2,ymm6
+	vpaddd	ymm1,ymm1,ymm5
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm15,ymm15,ymm3
+	vpxor	ymm14,ymm14,ymm2
+	vpxor	ymm13,ymm13,ymm1
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm15,ymm15,ymm8
+	vpshufb	ymm14,ymm14,ymm8
+	add	r15,rax
+	adc	r9,rdx
+	vpshufb	ymm13,ymm13,ymm8
+	vpshufb	ymm12,ymm12,ymm8
+	vpaddd	ymm11,ymm11,ymm15
+	vpaddd	ymm10,ymm10,ymm14
+	vpaddd	ymm9,ymm9,ymm13
+	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
+	vpxor	ymm7,ymm7,ymm11
+	vpxor	ymm6,ymm6,ymm10
+	vpxor	ymm5,ymm5,ymm9
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
+	vpsrld	ymm8,ymm7,20
+	vpslld	ymm7,ymm7,32-20
+	vpxor	ymm7,ymm7,ymm8
+	vpsrld	ymm8,ymm6,20
+	vpslld	ymm6,ymm6,32-20
+	vpxor	ymm6,ymm6,ymm8
+	add	r10,QWORD[((0+32))+rcx*1+rsi]
+	adc	r11,QWORD[((8+32))+rcx*1+rsi]
+	adc	r12,1
+
+	lea	rcx,[48+rcx]
+	vpsrld	ymm8,ymm5,20
+	vpslld	ymm5,ymm5,32-20
+	vpxor	ymm5,ymm5,ymm8
+	vpsrld	ymm8,ymm4,20
+	vpslld	ymm4,ymm4,32-20
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	ymm8,YMMWORD[$L$rol8]
+	vpaddd	ymm3,ymm3,ymm7
+	vpaddd	ymm2,ymm2,ymm6
+	vpaddd	ymm1,ymm1,ymm5
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm15,ymm15,ymm3
+	vpxor	ymm14,ymm14,ymm2
+	vpxor	ymm13,ymm13,ymm1
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm15,ymm15,ymm8
+	vpshufb	ymm14,ymm14,ymm8
+	vpshufb	ymm13,ymm13,ymm8
+	mov	rdx,QWORD[((0+160+0))+rbp]
+	mov	r15,rdx
+	mulx	r14,r13,r10
+	mulx	rdx,rax,r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	vpshufb	ymm12,ymm12,ymm8
+	vpaddd	ymm11,ymm11,ymm15
+	vpaddd	ymm10,ymm10,ymm14
+	vpaddd	ymm9,ymm9,ymm13
+	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
+	vpxor	ymm7,ymm7,ymm11
+	vpxor	ymm6,ymm6,ymm10
+	vpxor	ymm5,ymm5,ymm9
+	mov	rdx,QWORD[((8+160+0))+rbp]
+	mulx	rax,r10,r10
+	add	r14,r10
+	mulx	r9,r11,r11
+	adc	r15,r11
+	adc	r9,0
+	imul	rdx,r12
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
+	vpsrld	ymm8,ymm7,25
+	vpslld	ymm7,ymm7,32-25
+	vpxor	ymm7,ymm7,ymm8
+	vpsrld	ymm8,ymm6,25
+	vpslld	ymm6,ymm6,32-25
+	vpxor	ymm6,ymm6,ymm8
+	add	r15,rax
+	adc	r9,rdx
+	vpsrld	ymm8,ymm5,25
+	vpslld	ymm5,ymm5,32-25
+	vpxor	ymm5,ymm5,ymm8
+	vpsrld	ymm8,ymm4,25
+	vpslld	ymm4,ymm4,32-25
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
+	vpalignr	ymm7,ymm7,ymm7,12
+	vpalignr	ymm11,ymm11,ymm11,8
+	vpalignr	ymm15,ymm15,ymm15,4
+	vpalignr	ymm6,ymm6,ymm6,12
+	vpalignr	ymm10,ymm10,ymm10,8
+	vpalignr	ymm14,ymm14,ymm14,4
+	vpalignr	ymm5,ymm5,ymm5,12
+	vpalignr	ymm9,ymm9,ymm9,8
+	vpalignr	ymm13,ymm13,ymm13,4
+	vpalignr	ymm4,ymm4,ymm4,12
+	vpalignr	ymm8,ymm8,ymm8,8
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+	vpalignr	ymm12,ymm12,ymm12,4
+
+	cmp	rcx,10*6*8
+	jne	NEAR $L$open_avx2_main_loop_rounds
+	vpaddd	ymm3,ymm3,YMMWORD[$L$chacha20_consts]
+	vpaddd	ymm7,ymm7,YMMWORD[((160+64))+rbp]
+	vpaddd	ymm11,ymm11,YMMWORD[((160+96))+rbp]
+	vpaddd	ymm15,ymm15,YMMWORD[((160+256))+rbp]
+	vpaddd	ymm2,ymm2,YMMWORD[$L$chacha20_consts]
+	vpaddd	ymm6,ymm6,YMMWORD[((160+64))+rbp]
+	vpaddd	ymm10,ymm10,YMMWORD[((160+96))+rbp]
+	vpaddd	ymm14,ymm14,YMMWORD[((160+224))+rbp]
+	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
+	vpaddd	ymm5,ymm5,YMMWORD[((160+64))+rbp]
+	vpaddd	ymm9,ymm9,YMMWORD[((160+96))+rbp]
+	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
+	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
+	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
+	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
+	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
+
+	vmovdqa	YMMWORD[(160+128)+rbp],ymm0
+	add	r10,QWORD[((0+480))+rsi]
+	adc	r11,QWORD[((8+480))+rsi]
+	adc	r12,1
+	vperm2i128	ymm0,ymm7,ymm3,0x02
+	vperm2i128	ymm7,ymm7,ymm3,0x13
+	vperm2i128	ymm3,ymm15,ymm11,0x02
+	vperm2i128	ymm11,ymm15,ymm11,0x13
+	vpxor	ymm0,ymm0,YMMWORD[((0+0))+rsi]
+	vpxor	ymm3,ymm3,YMMWORD[((32+0))+rsi]
+	vpxor	ymm7,ymm7,YMMWORD[((64+0))+rsi]
+	vpxor	ymm11,ymm11,YMMWORD[((96+0))+rsi]
+	vmovdqu	YMMWORD[(0+0)+rdi],ymm0
+	vmovdqu	YMMWORD[(32+0)+rdi],ymm3
+	vmovdqu	YMMWORD[(64+0)+rdi],ymm7
+	vmovdqu	YMMWORD[(96+0)+rdi],ymm11
+
+	vmovdqa	ymm0,YMMWORD[((160+128))+rbp]
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+	vperm2i128	ymm3,ymm6,ymm2,0x02
+	vperm2i128	ymm6,ymm6,ymm2,0x13
+	vperm2i128	ymm2,ymm14,ymm10,0x02
+	vperm2i128	ymm10,ymm14,ymm10,0x13
+	vpxor	ymm3,ymm3,YMMWORD[((0+128))+rsi]
+	vpxor	ymm2,ymm2,YMMWORD[((32+128))+rsi]
+	vpxor	ymm6,ymm6,YMMWORD[((64+128))+rsi]
+	vpxor	ymm10,ymm10,YMMWORD[((96+128))+rsi]
+	vmovdqu	YMMWORD[(0+128)+rdi],ymm3
+	vmovdqu	YMMWORD[(32+128)+rdi],ymm2
+	vmovdqu	YMMWORD[(64+128)+rdi],ymm6
+	vmovdqu	YMMWORD[(96+128)+rdi],ymm10
+	add	r10,QWORD[((0+480+16))+rsi]
+	adc	r11,QWORD[((8+480+16))+rsi]
+	adc	r12,1
+	vperm2i128	ymm3,ymm5,ymm1,0x02
+	vperm2i128	ymm5,ymm5,ymm1,0x13
+	vperm2i128	ymm1,ymm13,ymm9,0x02
+	vperm2i128	ymm9,ymm13,ymm9,0x13
+	vpxor	ymm3,ymm3,YMMWORD[((0+256))+rsi]
+	vpxor	ymm1,ymm1,YMMWORD[((32+256))+rsi]
+	vpxor	ymm5,ymm5,YMMWORD[((64+256))+rsi]
+	vpxor	ymm9,ymm9,YMMWORD[((96+256))+rsi]
+	vmovdqu	YMMWORD[(0+256)+rdi],ymm3
+	vmovdqu	YMMWORD[(32+256)+rdi],ymm1
+	vmovdqu	YMMWORD[(64+256)+rdi],ymm5
+	vmovdqu	YMMWORD[(96+256)+rdi],ymm9
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+	vperm2i128	ymm3,ymm4,ymm0,0x02
+	vperm2i128	ymm4,ymm4,ymm0,0x13
+	vperm2i128	ymm0,ymm12,ymm8,0x02
+	vperm2i128	ymm8,ymm12,ymm8,0x13
+	vpxor	ymm3,ymm3,YMMWORD[((0+384))+rsi]
+	vpxor	ymm0,ymm0,YMMWORD[((32+384))+rsi]
+	vpxor	ymm4,ymm4,YMMWORD[((64+384))+rsi]
+	vpxor	ymm8,ymm8,YMMWORD[((96+384))+rsi]
+	vmovdqu	YMMWORD[(0+384)+rdi],ymm3
+	vmovdqu	YMMWORD[(32+384)+rdi],ymm0
+	vmovdqu	YMMWORD[(64+384)+rdi],ymm4
+	vmovdqu	YMMWORD[(96+384)+rdi],ymm8
+
+	lea	rsi,[512+rsi]
+	lea	rdi,[512+rdi]
+	sub	rbx,16*32
+	jmp	NEAR $L$open_avx2_main_loop
+$L$open_avx2_main_loop_done:
+	test	rbx,rbx
+	vzeroupper
+	je	NEAR $L$open_sse_finalize
+
+	cmp	rbx,12*32
+	ja	NEAR $L$open_avx2_tail_512
+	cmp	rbx,8*32
+	ja	NEAR $L$open_avx2_tail_384
+	cmp	rbx,4*32
+	ja	NEAR $L$open_avx2_tail_256
+	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
+	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
+	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
+	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
+	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
+	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
+
+	xor	r8,r8
+	mov	rcx,rbx
+	and	rcx,-16
+	test	rcx,rcx
+	je	NEAR $L$open_avx2_tail_128_rounds
+$L$open_avx2_tail_128_rounds_and_x1hash:
+	add	r10,QWORD[((0+0))+r8*1+rsi]
+	adc	r11,QWORD[((8+0))+r8*1+rsi]
+	adc	r12,1
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+$L$open_avx2_tail_128_rounds:
+	add	r8,16
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
+	vpaddd	ymm8,ymm8,ymm12
+	vpxor	ymm4,ymm4,ymm8
+	vpsrld	ymm3,ymm4,20
+	vpslld	ymm4,ymm4,12
+	vpxor	ymm4,ymm4,ymm3
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
+	vpaddd	ymm8,ymm8,ymm12
+	vpxor	ymm4,ymm4,ymm8
+	vpslld	ymm3,ymm4,7
+	vpsrld	ymm4,ymm4,25
+	vpxor	ymm4,ymm4,ymm3
+	vpalignr	ymm12,ymm12,ymm12,12
+	vpalignr	ymm8,ymm8,ymm8,8
+	vpalignr	ymm4,ymm4,ymm4,4
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
+	vpaddd	ymm8,ymm8,ymm12
+	vpxor	ymm4,ymm4,ymm8
+	vpsrld	ymm3,ymm4,20
+	vpslld	ymm4,ymm4,12
+	vpxor	ymm4,ymm4,ymm3
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
+	vpaddd	ymm8,ymm8,ymm12
+	vpxor	ymm4,ymm4,ymm8
+	vpslld	ymm3,ymm4,7
+	vpsrld	ymm4,ymm4,25
+	vpxor	ymm4,ymm4,ymm3
+	vpalignr	ymm12,ymm12,ymm12,4
+	vpalignr	ymm8,ymm8,ymm8,8
+	vpalignr	ymm4,ymm4,ymm4,12
+
+	cmp	r8,rcx
+	jb	NEAR $L$open_avx2_tail_128_rounds_and_x1hash
+	cmp	r8,160
+	jne	NEAR $L$open_avx2_tail_128_rounds
+	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
+	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
+	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
+	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
+	vperm2i128	ymm3,ymm4,ymm0,0x13
+	vperm2i128	ymm0,ymm4,ymm0,0x02
+	vperm2i128	ymm4,ymm12,ymm8,0x02
+	vperm2i128	ymm12,ymm12,ymm8,0x13
+	vmovdqa	ymm8,ymm3
+
+	jmp	NEAR $L$open_avx2_tail_128_xor
+
+$L$open_avx2_tail_256:
+	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
+	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
+	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
+	vmovdqa	ymm1,ymm0
+	vmovdqa	ymm5,ymm4
+	vmovdqa	ymm9,ymm8
+	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
+	vpaddd	ymm13,ymm12,YMMWORD[((160+160))+rbp]
+	vpaddd	ymm12,ymm12,ymm13
+	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
+	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
+
+	mov	QWORD[((160+128))+rbp],rbx
+	mov	rcx,rbx
+	sub	rcx,4*32
+	shr	rcx,4
+	mov	r8,10
+	cmp	rcx,10
+	cmovg	rcx,r8
+	mov	rbx,rsi
+	xor	r8,r8
+$L$open_avx2_tail_256_rounds_and_x1hash:
+	add	r10,QWORD[((0+0))+rbx]
+	adc	r11,QWORD[((8+0))+rbx]
+	adc	r12,1
+	mov	rdx,QWORD[((0+160+0))+rbp]
+	mov	r15,rdx
+	mulx	r14,r13,r10
+	mulx	rdx,rax,r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rdx,QWORD[((8+160+0))+rbp]
+	mulx	rax,r10,r10
+	add	r14,r10
+	mulx	r9,r11,r11
+	adc	r15,r11
+	adc	r9,0
+	imul	rdx,r12
+	add	r15,rax
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+	lea	rbx,[16+rbx]
+$L$open_avx2_tail_256_rounds:
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
+	vpaddd	ymm8,ymm8,ymm12
+	vpxor	ymm4,ymm4,ymm8
+	vpsrld	ymm3,ymm4,20
+	vpslld	ymm4,ymm4,12
+	vpxor	ymm4,ymm4,ymm3
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
+	vpaddd	ymm8,ymm8,ymm12
+	vpxor	ymm4,ymm4,ymm8
+	vpslld	ymm3,ymm4,7
+	vpsrld	ymm4,ymm4,25
+	vpxor	ymm4,ymm4,ymm3
+	vpalignr	ymm12,ymm12,ymm12,12
+	vpalignr	ymm8,ymm8,ymm8,8
+	vpalignr	ymm4,ymm4,ymm4,4
+	vpaddd	ymm1,ymm1,ymm5
+	vpxor	ymm13,ymm13,ymm1
+	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
+	vpaddd	ymm9,ymm9,ymm13
+	vpxor	ymm5,ymm5,ymm9
+	vpsrld	ymm3,ymm5,20
+	vpslld	ymm5,ymm5,12
+	vpxor	ymm5,ymm5,ymm3
+	vpaddd	ymm1,ymm1,ymm5
+	vpxor	ymm13,ymm13,ymm1
+	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
+	vpaddd	ymm9,ymm9,ymm13
+	vpxor	ymm5,ymm5,ymm9
+	vpslld	ymm3,ymm5,7
+	vpsrld	ymm5,ymm5,25
+	vpxor	ymm5,ymm5,ymm3
+	vpalignr	ymm13,ymm13,ymm13,12
+	vpalignr	ymm9,ymm9,ymm9,8
+	vpalignr	ymm5,ymm5,ymm5,4
+
+	inc	r8
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
+	vpaddd	ymm8,ymm8,ymm12
+	vpxor	ymm4,ymm4,ymm8
+	vpsrld	ymm3,ymm4,20
+	vpslld	ymm4,ymm4,12
+	vpxor	ymm4,ymm4,ymm3
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
+	vpaddd	ymm8,ymm8,ymm12
+	vpxor	ymm4,ymm4,ymm8
+	vpslld	ymm3,ymm4,7
+	vpsrld	ymm4,ymm4,25
+	vpxor	ymm4,ymm4,ymm3
+	vpalignr	ymm12,ymm12,ymm12,4
+	vpalignr	ymm8,ymm8,ymm8,8
+	vpalignr	ymm4,ymm4,ymm4,12
+	vpaddd	ymm1,ymm1,ymm5
+	vpxor	ymm13,ymm13,ymm1
+	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
+	vpaddd	ymm9,ymm9,ymm13
+	vpxor	ymm5,ymm5,ymm9
+	vpsrld	ymm3,ymm5,20
+	vpslld	ymm5,ymm5,12
+	vpxor	ymm5,ymm5,ymm3
+	vpaddd	ymm1,ymm1,ymm5
+	vpxor	ymm13,ymm13,ymm1
+	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
+	vpaddd	ymm9,ymm9,ymm13
+	vpxor	ymm5,ymm5,ymm9
+	vpslld	ymm3,ymm5,7
+	vpsrld	ymm5,ymm5,25
+	vpxor	ymm5,ymm5,ymm3
+	vpalignr	ymm13,ymm13,ymm13,4
+	vpalignr	ymm9,ymm9,ymm9,8
+	vpalignr	ymm5,ymm5,ymm5,12
+	vpaddd	ymm2,ymm2,ymm6
+	vpxor	ymm14,ymm14,ymm2
+	vpshufb	ymm14,ymm14,YMMWORD[$L$rol16]
+	vpaddd	ymm10,ymm10,ymm14
+	vpxor	ymm6,ymm6,ymm10
+	vpsrld	ymm3,ymm6,20
+	vpslld	ymm6,ymm6,12
+	vpxor	ymm6,ymm6,ymm3
+	vpaddd	ymm2,ymm2,ymm6
+	vpxor	ymm14,ymm14,ymm2
+	vpshufb	ymm14,ymm14,YMMWORD[$L$rol8]
+	vpaddd	ymm10,ymm10,ymm14
+	vpxor	ymm6,ymm6,ymm10
+	vpslld	ymm3,ymm6,7
+	vpsrld	ymm6,ymm6,25
+	vpxor	ymm6,ymm6,ymm3
+	vpalignr	ymm14,ymm14,ymm14,4
+	vpalignr	ymm10,ymm10,ymm10,8
+	vpalignr	ymm6,ymm6,ymm6,12
+
+	cmp	r8,rcx
+	jb	NEAR $L$open_avx2_tail_256_rounds_and_x1hash
+	cmp	r8,10
+	jne	NEAR $L$open_avx2_tail_256_rounds
+	mov	r8,rbx
+	sub	rbx,rsi
+	mov	rcx,rbx
+	mov	rbx,QWORD[((160+128))+rbp]
+$L$open_avx2_tail_256_hash:
+	add	rcx,16
+	cmp	rcx,rbx
+	jg	NEAR $L$open_avx2_tail_256_done
+	add	r10,QWORD[((0+0))+r8]
+	adc	r11,QWORD[((8+0))+r8]
+	adc	r12,1
+	mov	rdx,QWORD[((0+160+0))+rbp]
+	mov	r15,rdx
+	mulx	r14,r13,r10
+	mulx	rdx,rax,r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rdx,QWORD[((8+160+0))+rbp]
+	mulx	rax,r10,r10
+	add	r14,r10
+	mulx	r9,r11,r11
+	adc	r15,r11
+	adc	r9,0
+	imul	rdx,r12
+	add	r15,rax
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+	lea	r8,[16+r8]
+	jmp	NEAR $L$open_avx2_tail_256_hash
+$L$open_avx2_tail_256_done:
+	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
+	vpaddd	ymm5,ymm5,YMMWORD[((160+64))+rbp]
+	vpaddd	ymm9,ymm9,YMMWORD[((160+96))+rbp]
+	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
+	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
+	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
+	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
+	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
+	vperm2i128	ymm3,ymm5,ymm1,0x02
+	vperm2i128	ymm5,ymm5,ymm1,0x13
+	vperm2i128	ymm1,ymm13,ymm9,0x02
+	vperm2i128	ymm9,ymm13,ymm9,0x13
+	vpxor	ymm3,ymm3,YMMWORD[((0+0))+rsi]
+	vpxor	ymm1,ymm1,YMMWORD[((32+0))+rsi]
+	vpxor	ymm5,ymm5,YMMWORD[((64+0))+rsi]
+	vpxor	ymm9,ymm9,YMMWORD[((96+0))+rsi]
+	vmovdqu	YMMWORD[(0+0)+rdi],ymm3
+	vmovdqu	YMMWORD[(32+0)+rdi],ymm1
+	vmovdqu	YMMWORD[(64+0)+rdi],ymm5
+	vmovdqu	YMMWORD[(96+0)+rdi],ymm9
+	vperm2i128	ymm3,ymm4,ymm0,0x13
+	vperm2i128	ymm0,ymm4,ymm0,0x02
+	vperm2i128	ymm4,ymm12,ymm8,0x02
+	vperm2i128	ymm12,ymm12,ymm8,0x13
+	vmovdqa	ymm8,ymm3
+
+	lea	rsi,[128+rsi]
+	lea	rdi,[128+rdi]
+	sub	rbx,4*32
+	jmp	NEAR $L$open_avx2_tail_128_xor
+
+$L$open_avx2_tail_384:
+	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
+	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
+	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
+	vmovdqa	ymm1,ymm0
+	vmovdqa	ymm5,ymm4
+	vmovdqa	ymm9,ymm8
+	vmovdqa	ymm2,ymm0
+	vmovdqa	ymm6,ymm4
+	vmovdqa	ymm10,ymm8
+	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
+	vpaddd	ymm14,ymm12,YMMWORD[((160+160))+rbp]
+	vpaddd	ymm13,ymm12,ymm14
+	vpaddd	ymm12,ymm12,ymm13
+	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
+	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
+	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
+
+	mov	QWORD[((160+128))+rbp],rbx
+	mov	rcx,rbx
+	sub	rcx,8*32
+	shr	rcx,4
+	add	rcx,6
+	mov	r8,10
+	cmp	rcx,10
+	cmovg	rcx,r8
+	mov	rbx,rsi
+	xor	r8,r8
+$L$open_avx2_tail_384_rounds_and_x2hash:
+	add	r10,QWORD[((0+0))+rbx]
+	adc	r11,QWORD[((8+0))+rbx]
+	adc	r12,1
+	mov	rdx,QWORD[((0+160+0))+rbp]
+	mov	r15,rdx
+	mulx	r14,r13,r10
+	mulx	rdx,rax,r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rdx,QWORD[((8+160+0))+rbp]
+	mulx	rax,r10,r10
+	add	r14,r10
+	mulx	r9,r11,r11
+	adc	r15,r11
+	adc	r9,0
+	imul	rdx,r12
+	add	r15,rax
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+	lea	rbx,[16+rbx]
+$L$open_avx2_tail_384_rounds_and_x1hash:
+	vpaddd	ymm2,ymm2,ymm6
+	vpxor	ymm14,ymm14,ymm2
+	vpshufb	ymm14,ymm14,YMMWORD[$L$rol16]
+	vpaddd	ymm10,ymm10,ymm14
+	vpxor	ymm6,ymm6,ymm10
+	vpsrld	ymm3,ymm6,20
+	vpslld	ymm6,ymm6,12
+	vpxor	ymm6,ymm6,ymm3
+	vpaddd	ymm2,ymm2,ymm6
+	vpxor	ymm14,ymm14,ymm2
+	vpshufb	ymm14,ymm14,YMMWORD[$L$rol8]
+	vpaddd	ymm10,ymm10,ymm14
+	vpxor	ymm6,ymm6,ymm10
+	vpslld	ymm3,ymm6,7
+	vpsrld	ymm6,ymm6,25
+	vpxor	ymm6,ymm6,ymm3
+	vpalignr	ymm14,ymm14,ymm14,12
+	vpalignr	ymm10,ymm10,ymm10,8
+	vpalignr	ymm6,ymm6,ymm6,4
+	vpaddd	ymm1,ymm1,ymm5
+	vpxor	ymm13,ymm13,ymm1
+	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
+	vpaddd	ymm9,ymm9,ymm13
+	vpxor	ymm5,ymm5,ymm9
+	vpsrld	ymm3,ymm5,20
+	vpslld	ymm5,ymm5,12
+	vpxor	ymm5,ymm5,ymm3
+	vpaddd	ymm1,ymm1,ymm5
+	vpxor	ymm13,ymm13,ymm1
+	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
+	vpaddd	ymm9,ymm9,ymm13
+	vpxor	ymm5,ymm5,ymm9
+	vpslld	ymm3,ymm5,7
+	vpsrld	ymm5,ymm5,25
+	vpxor	ymm5,ymm5,ymm3
+	vpalignr	ymm13,ymm13,ymm13,12
+	vpalignr	ymm9,ymm9,ymm9,8
+	vpalignr	ymm5,ymm5,ymm5,4
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
+	vpaddd	ymm8,ymm8,ymm12
+	vpxor	ymm4,ymm4,ymm8
+	vpsrld	ymm3,ymm4,20
+	vpslld	ymm4,ymm4,12
+	vpxor	ymm4,ymm4,ymm3
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
+	vpaddd	ymm8,ymm8,ymm12
+	vpxor	ymm4,ymm4,ymm8
+	vpslld	ymm3,ymm4,7
+	vpsrld	ymm4,ymm4,25
+	vpxor	ymm4,ymm4,ymm3
+	vpalignr	ymm12,ymm12,ymm12,12
+	vpalignr	ymm8,ymm8,ymm8,8
+	vpalignr	ymm4,ymm4,ymm4,4
+	add	r10,QWORD[((0+0))+rbx]
+	adc	r11,QWORD[((8+0))+rbx]
+	adc	r12,1
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+	lea	rbx,[16+rbx]
+	inc	r8
+	vpaddd	ymm2,ymm2,ymm6
+	vpxor	ymm14,ymm14,ymm2
+	vpshufb	ymm14,ymm14,YMMWORD[$L$rol16]
+	vpaddd	ymm10,ymm10,ymm14
+	vpxor	ymm6,ymm6,ymm10
+	vpsrld	ymm3,ymm6,20
+	vpslld	ymm6,ymm6,12
+	vpxor	ymm6,ymm6,ymm3
+	vpaddd	ymm2,ymm2,ymm6
+	vpxor	ymm14,ymm14,ymm2
+	vpshufb	ymm14,ymm14,YMMWORD[$L$rol8]
+	vpaddd	ymm10,ymm10,ymm14
+	vpxor	ymm6,ymm6,ymm10
+	vpslld	ymm3,ymm6,7
+	vpsrld	ymm6,ymm6,25
+	vpxor	ymm6,ymm6,ymm3
+	vpalignr	ymm14,ymm14,ymm14,4
+	vpalignr	ymm10,ymm10,ymm10,8
+	vpalignr	ymm6,ymm6,ymm6,12
+	vpaddd	ymm1,ymm1,ymm5
+	vpxor	ymm13,ymm13,ymm1
+	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
+	vpaddd	ymm9,ymm9,ymm13
+	vpxor	ymm5,ymm5,ymm9
+	vpsrld	ymm3,ymm5,20
+	vpslld	ymm5,ymm5,12
+	vpxor	ymm5,ymm5,ymm3
+	vpaddd	ymm1,ymm1,ymm5
+	vpxor	ymm13,ymm13,ymm1
+	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
+	vpaddd	ymm9,ymm9,ymm13
+	vpxor	ymm5,ymm5,ymm9
+	vpslld	ymm3,ymm5,7
+	vpsrld	ymm5,ymm5,25
+	vpxor	ymm5,ymm5,ymm3
+	vpalignr	ymm13,ymm13,ymm13,4
+	vpalignr	ymm9,ymm9,ymm9,8
+	vpalignr	ymm5,ymm5,ymm5,12
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
+	vpaddd	ymm8,ymm8,ymm12
+	vpxor	ymm4,ymm4,ymm8
+	vpsrld	ymm3,ymm4,20
+	vpslld	ymm4,ymm4,12
+	vpxor	ymm4,ymm4,ymm3
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
+	vpaddd	ymm8,ymm8,ymm12
+	vpxor	ymm4,ymm4,ymm8
+	vpslld	ymm3,ymm4,7
+	vpsrld	ymm4,ymm4,25
+	vpxor	ymm4,ymm4,ymm3
+	vpalignr	ymm12,ymm12,ymm12,4
+	vpalignr	ymm8,ymm8,ymm8,8
+	vpalignr	ymm4,ymm4,ymm4,12
+
+	cmp	r8,rcx
+	jb	NEAR $L$open_avx2_tail_384_rounds_and_x2hash
+	cmp	r8,10
+	jne	NEAR $L$open_avx2_tail_384_rounds_and_x1hash
+	mov	r8,rbx
+	sub	rbx,rsi
+	mov	rcx,rbx
+	mov	rbx,QWORD[((160+128))+rbp]
+$L$open_avx2_384_tail_hash:
+	add	rcx,16
+	cmp	rcx,rbx
+	jg	NEAR $L$open_avx2_384_tail_done
+	add	r10,QWORD[((0+0))+r8]
+	adc	r11,QWORD[((8+0))+r8]
+	adc	r12,1
+	mov	rdx,QWORD[((0+160+0))+rbp]
+	mov	r15,rdx
+	mulx	r14,r13,r10
+	mulx	rdx,rax,r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rdx,QWORD[((8+160+0))+rbp]
+	mulx	rax,r10,r10
+	add	r14,r10
+	mulx	r9,r11,r11
+	adc	r15,r11
+	adc	r9,0
+	imul	rdx,r12
+	add	r15,rax
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+	lea	r8,[16+r8]
+	jmp	NEAR $L$open_avx2_384_tail_hash
+$L$open_avx2_384_tail_done:
+	vpaddd	ymm2,ymm2,YMMWORD[$L$chacha20_consts]
+	vpaddd	ymm6,ymm6,YMMWORD[((160+64))+rbp]
+	vpaddd	ymm10,ymm10,YMMWORD[((160+96))+rbp]
+	vpaddd	ymm14,ymm14,YMMWORD[((160+224))+rbp]
+	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
+	vpaddd	ymm5,ymm5,YMMWORD[((160+64))+rbp]
+	vpaddd	ymm9,ymm9,YMMWORD[((160+96))+rbp]
+	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
+	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
+	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
+	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
+	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
+	vperm2i128	ymm3,ymm6,ymm2,0x02
+	vperm2i128	ymm6,ymm6,ymm2,0x13
+	vperm2i128	ymm2,ymm14,ymm10,0x02
+	vperm2i128	ymm10,ymm14,ymm10,0x13
+	vpxor	ymm3,ymm3,YMMWORD[((0+0))+rsi]
+	vpxor	ymm2,ymm2,YMMWORD[((32+0))+rsi]
+	vpxor	ymm6,ymm6,YMMWORD[((64+0))+rsi]
+	vpxor	ymm10,ymm10,YMMWORD[((96+0))+rsi]
+	vmovdqu	YMMWORD[(0+0)+rdi],ymm3
+	vmovdqu	YMMWORD[(32+0)+rdi],ymm2
+	vmovdqu	YMMWORD[(64+0)+rdi],ymm6
+	vmovdqu	YMMWORD[(96+0)+rdi],ymm10
+	vperm2i128	ymm3,ymm5,ymm1,0x02
+	vperm2i128	ymm5,ymm5,ymm1,0x13
+	vperm2i128	ymm1,ymm13,ymm9,0x02
+	vperm2i128	ymm9,ymm13,ymm9,0x13
+	vpxor	ymm3,ymm3,YMMWORD[((0+128))+rsi]
+	vpxor	ymm1,ymm1,YMMWORD[((32+128))+rsi]
+	vpxor	ymm5,ymm5,YMMWORD[((64+128))+rsi]
+	vpxor	ymm9,ymm9,YMMWORD[((96+128))+rsi]
+	vmovdqu	YMMWORD[(0+128)+rdi],ymm3
+	vmovdqu	YMMWORD[(32+128)+rdi],ymm1
+	vmovdqu	YMMWORD[(64+128)+rdi],ymm5
+	vmovdqu	YMMWORD[(96+128)+rdi],ymm9
+	vperm2i128	ymm3,ymm4,ymm0,0x13
+	vperm2i128	ymm0,ymm4,ymm0,0x02
+	vperm2i128	ymm4,ymm12,ymm8,0x02
+	vperm2i128	ymm12,ymm12,ymm8,0x13
+	vmovdqa	ymm8,ymm3
+
+	lea	rsi,[256+rsi]
+	lea	rdi,[256+rdi]
+	sub	rbx,8*32
+	jmp	NEAR $L$open_avx2_tail_128_xor
+
+$L$open_avx2_tail_512:
+	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
+	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
+	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
+	vmovdqa	ymm1,ymm0
+	vmovdqa	ymm5,ymm4
+	vmovdqa	ymm9,ymm8
+	vmovdqa	ymm2,ymm0
+	vmovdqa	ymm6,ymm4
+	vmovdqa	ymm10,ymm8
+	vmovdqa	ymm3,ymm0
+	vmovdqa	ymm7,ymm4
+	vmovdqa	ymm11,ymm8
+	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
+	vpaddd	ymm15,ymm12,YMMWORD[((160+160))+rbp]
+	vpaddd	ymm14,ymm12,ymm15
+	vpaddd	ymm13,ymm12,ymm14
+	vpaddd	ymm12,ymm12,ymm13
+	vmovdqa	YMMWORD[(160+256)+rbp],ymm15
+	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
+	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
+	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
+
+	xor	rcx,rcx
+	mov	r8,rsi
+$L$open_avx2_tail_512_rounds_and_x2hash:
+	add	r10,QWORD[((0+0))+r8]
+	adc	r11,QWORD[((8+0))+r8]
+	adc	r12,1
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+	lea	r8,[16+r8]
+$L$open_avx2_tail_512_rounds_and_x1hash:
+	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
+	vmovdqa	ymm8,YMMWORD[$L$rol16]
+	vpaddd	ymm3,ymm3,ymm7
+	vpaddd	ymm2,ymm2,ymm6
+	vpaddd	ymm1,ymm1,ymm5
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm15,ymm15,ymm3
+	vpxor	ymm14,ymm14,ymm2
+	vpxor	ymm13,ymm13,ymm1
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm15,ymm15,ymm8
+	vpshufb	ymm14,ymm14,ymm8
+	vpshufb	ymm13,ymm13,ymm8
+	vpshufb	ymm12,ymm12,ymm8
+	vpaddd	ymm11,ymm11,ymm15
+	vpaddd	ymm10,ymm10,ymm14
+	vpaddd	ymm9,ymm9,ymm13
+	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
+	vpxor	ymm7,ymm7,ymm11
+	vpxor	ymm6,ymm6,ymm10
+	vpxor	ymm5,ymm5,ymm9
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
+	vpsrld	ymm8,ymm7,20
+	vpslld	ymm7,ymm7,32-20
+	vpxor	ymm7,ymm7,ymm8
+	vpsrld	ymm8,ymm6,20
+	vpslld	ymm6,ymm6,32-20
+	vpxor	ymm6,ymm6,ymm8
+	vpsrld	ymm8,ymm5,20
+	vpslld	ymm5,ymm5,32-20
+	vpxor	ymm5,ymm5,ymm8
+	vpsrld	ymm8,ymm4,20
+	vpslld	ymm4,ymm4,32-20
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	ymm8,YMMWORD[$L$rol8]
+	vpaddd	ymm3,ymm3,ymm7
+	add	r10,QWORD[((0+0))+r8]
+	adc	r11,QWORD[((8+0))+r8]
+	adc	r12,1
+	mov	rdx,QWORD[((0+160+0))+rbp]
+	mov	r15,rdx
+	mulx	r14,r13,r10
+	mulx	rdx,rax,r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rdx,QWORD[((8+160+0))+rbp]
+	mulx	rax,r10,r10
+	add	r14,r10
+	mulx	r9,r11,r11
+	adc	r15,r11
+	adc	r9,0
+	imul	rdx,r12
+	add	r15,rax
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+	vpaddd	ymm2,ymm2,ymm6
+	vpaddd	ymm1,ymm1,ymm5
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm15,ymm15,ymm3
+	vpxor	ymm14,ymm14,ymm2
+	vpxor	ymm13,ymm13,ymm1
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm15,ymm15,ymm8
+	vpshufb	ymm14,ymm14,ymm8
+	vpshufb	ymm13,ymm13,ymm8
+	vpshufb	ymm12,ymm12,ymm8
+	vpaddd	ymm11,ymm11,ymm15
+	vpaddd	ymm10,ymm10,ymm14
+	vpaddd	ymm9,ymm9,ymm13
+	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
+	vpxor	ymm7,ymm7,ymm11
+	vpxor	ymm6,ymm6,ymm10
+	vpxor	ymm5,ymm5,ymm9
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
+	vpsrld	ymm8,ymm7,25
+	vpslld	ymm7,ymm7,32-25
+	vpxor	ymm7,ymm7,ymm8
+	vpsrld	ymm8,ymm6,25
+	vpslld	ymm6,ymm6,32-25
+	vpxor	ymm6,ymm6,ymm8
+	vpsrld	ymm8,ymm5,25
+	vpslld	ymm5,ymm5,32-25
+	vpxor	ymm5,ymm5,ymm8
+	vpsrld	ymm8,ymm4,25
+	vpslld	ymm4,ymm4,32-25
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
+	vpalignr	ymm7,ymm7,ymm7,4
+	vpalignr	ymm11,ymm11,ymm11,8
+	vpalignr	ymm15,ymm15,ymm15,12
+	vpalignr	ymm6,ymm6,ymm6,4
+	vpalignr	ymm10,ymm10,ymm10,8
+	vpalignr	ymm14,ymm14,ymm14,12
+	vpalignr	ymm5,ymm5,ymm5,4
+	vpalignr	ymm9,ymm9,ymm9,8
+	vpalignr	ymm13,ymm13,ymm13,12
+	vpalignr	ymm4,ymm4,ymm4,4
+	vpalignr	ymm8,ymm8,ymm8,8
+	vpalignr	ymm12,ymm12,ymm12,12
+	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
+	vmovdqa	ymm8,YMMWORD[$L$rol16]
+	vpaddd	ymm3,ymm3,ymm7
+	add	r10,QWORD[((0+16))+r8]
+	adc	r11,QWORD[((8+16))+r8]
+	adc	r12,1
+	mov	rdx,QWORD[((0+160+0))+rbp]
+	mov	r15,rdx
+	mulx	r14,r13,r10
+	mulx	rdx,rax,r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rdx,QWORD[((8+160+0))+rbp]
+	mulx	rax,r10,r10
+	add	r14,r10
+	mulx	r9,r11,r11
+	adc	r15,r11
+	adc	r9,0
+	imul	rdx,r12
+	add	r15,rax
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+	lea	r8,[32+r8]
+	vpaddd	ymm2,ymm2,ymm6
+	vpaddd	ymm1,ymm1,ymm5
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm15,ymm15,ymm3
+	vpxor	ymm14,ymm14,ymm2
+	vpxor	ymm13,ymm13,ymm1
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm15,ymm15,ymm8
+	vpshufb	ymm14,ymm14,ymm8
+	vpshufb	ymm13,ymm13,ymm8
+	vpshufb	ymm12,ymm12,ymm8
+	vpaddd	ymm11,ymm11,ymm15
+	vpaddd	ymm10,ymm10,ymm14
+	vpaddd	ymm9,ymm9,ymm13
+	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
+	vpxor	ymm7,ymm7,ymm11
+	vpxor	ymm6,ymm6,ymm10
+	vpxor	ymm5,ymm5,ymm9
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
+	vpsrld	ymm8,ymm7,20
+	vpslld	ymm7,ymm7,32-20
+	vpxor	ymm7,ymm7,ymm8
+	vpsrld	ymm8,ymm6,20
+	vpslld	ymm6,ymm6,32-20
+	vpxor	ymm6,ymm6,ymm8
+	vpsrld	ymm8,ymm5,20
+	vpslld	ymm5,ymm5,32-20
+	vpxor	ymm5,ymm5,ymm8
+	vpsrld	ymm8,ymm4,20
+	vpslld	ymm4,ymm4,32-20
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	ymm8,YMMWORD[$L$rol8]
+	vpaddd	ymm3,ymm3,ymm7
+	vpaddd	ymm2,ymm2,ymm6
+	vpaddd	ymm1,ymm1,ymm5
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm15,ymm15,ymm3
+	vpxor	ymm14,ymm14,ymm2
+	vpxor	ymm13,ymm13,ymm1
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm15,ymm15,ymm8
+	vpshufb	ymm14,ymm14,ymm8
+	vpshufb	ymm13,ymm13,ymm8
+	vpshufb	ymm12,ymm12,ymm8
+	vpaddd	ymm11,ymm11,ymm15
+	vpaddd	ymm10,ymm10,ymm14
+	vpaddd	ymm9,ymm9,ymm13
+	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
+	vpxor	ymm7,ymm7,ymm11
+	vpxor	ymm6,ymm6,ymm10
+	vpxor	ymm5,ymm5,ymm9
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
+	vpsrld	ymm8,ymm7,25
+	vpslld	ymm7,ymm7,32-25
+	vpxor	ymm7,ymm7,ymm8
+	vpsrld	ymm8,ymm6,25
+	vpslld	ymm6,ymm6,32-25
+	vpxor	ymm6,ymm6,ymm8
+	vpsrld	ymm8,ymm5,25
+	vpslld	ymm5,ymm5,32-25
+	vpxor	ymm5,ymm5,ymm8
+	vpsrld	ymm8,ymm4,25
+	vpslld	ymm4,ymm4,32-25
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
+	vpalignr	ymm7,ymm7,ymm7,12
+	vpalignr	ymm11,ymm11,ymm11,8
+	vpalignr	ymm15,ymm15,ymm15,4
+	vpalignr	ymm6,ymm6,ymm6,12
+	vpalignr	ymm10,ymm10,ymm10,8
+	vpalignr	ymm14,ymm14,ymm14,4
+	vpalignr	ymm5,ymm5,ymm5,12
+	vpalignr	ymm9,ymm9,ymm9,8
+	vpalignr	ymm13,ymm13,ymm13,4
+	vpalignr	ymm4,ymm4,ymm4,12
+	vpalignr	ymm8,ymm8,ymm8,8
+	vpalignr	ymm12,ymm12,ymm12,4
+
+	inc	rcx
+	cmp	rcx,4
+	jl	NEAR $L$open_avx2_tail_512_rounds_and_x2hash
+	cmp	rcx,10
+	jne	NEAR $L$open_avx2_tail_512_rounds_and_x1hash
+	mov	rcx,rbx
+	sub	rcx,12*32
+	and	rcx,-16
+$L$open_avx2_tail_512_hash:
+	test	rcx,rcx
+	je	NEAR $L$open_avx2_tail_512_done
+	add	r10,QWORD[((0+0))+r8]
+	adc	r11,QWORD[((8+0))+r8]
+	adc	r12,1
+	mov	rdx,QWORD[((0+160+0))+rbp]
+	mov	r15,rdx
+	mulx	r14,r13,r10
+	mulx	rdx,rax,r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rdx,QWORD[((8+160+0))+rbp]
+	mulx	rax,r10,r10
+	add	r14,r10
+	mulx	r9,r11,r11
+	adc	r15,r11
+	adc	r9,0
+	imul	rdx,r12
+	add	r15,rax
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+	lea	r8,[16+r8]
+	sub	rcx,2*8
+	jmp	NEAR $L$open_avx2_tail_512_hash
+$L$open_avx2_tail_512_done:
+	vpaddd	ymm3,ymm3,YMMWORD[$L$chacha20_consts]
+	vpaddd	ymm7,ymm7,YMMWORD[((160+64))+rbp]
+	vpaddd	ymm11,ymm11,YMMWORD[((160+96))+rbp]
+	vpaddd	ymm15,ymm15,YMMWORD[((160+256))+rbp]
+	vpaddd	ymm2,ymm2,YMMWORD[$L$chacha20_consts]
+	vpaddd	ymm6,ymm6,YMMWORD[((160+64))+rbp]
+	vpaddd	ymm10,ymm10,YMMWORD[((160+96))+rbp]
+	vpaddd	ymm14,ymm14,YMMWORD[((160+224))+rbp]
+	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
+	vpaddd	ymm5,ymm5,YMMWORD[((160+64))+rbp]
+	vpaddd	ymm9,ymm9,YMMWORD[((160+96))+rbp]
+	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
+	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
+	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
+	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
+	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
+
+	vmovdqa	YMMWORD[(160+128)+rbp],ymm0
+	vperm2i128	ymm0,ymm7,ymm3,0x02
+	vperm2i128	ymm7,ymm7,ymm3,0x13
+	vperm2i128	ymm3,ymm15,ymm11,0x02
+	vperm2i128	ymm11,ymm15,ymm11,0x13
+	vpxor	ymm0,ymm0,YMMWORD[((0+0))+rsi]
+	vpxor	ymm3,ymm3,YMMWORD[((32+0))+rsi]
+	vpxor	ymm7,ymm7,YMMWORD[((64+0))+rsi]
+	vpxor	ymm11,ymm11,YMMWORD[((96+0))+rsi]
+	vmovdqu	YMMWORD[(0+0)+rdi],ymm0
+	vmovdqu	YMMWORD[(32+0)+rdi],ymm3
+	vmovdqu	YMMWORD[(64+0)+rdi],ymm7
+	vmovdqu	YMMWORD[(96+0)+rdi],ymm11
+
+	vmovdqa	ymm0,YMMWORD[((160+128))+rbp]
+	vperm2i128	ymm3,ymm6,ymm2,0x02
+	vperm2i128	ymm6,ymm6,ymm2,0x13
+	vperm2i128	ymm2,ymm14,ymm10,0x02
+	vperm2i128	ymm10,ymm14,ymm10,0x13
+	vpxor	ymm3,ymm3,YMMWORD[((0+128))+rsi]
+	vpxor	ymm2,ymm2,YMMWORD[((32+128))+rsi]
+	vpxor	ymm6,ymm6,YMMWORD[((64+128))+rsi]
+	vpxor	ymm10,ymm10,YMMWORD[((96+128))+rsi]
+	vmovdqu	YMMWORD[(0+128)+rdi],ymm3
+	vmovdqu	YMMWORD[(32+128)+rdi],ymm2
+	vmovdqu	YMMWORD[(64+128)+rdi],ymm6
+	vmovdqu	YMMWORD[(96+128)+rdi],ymm10
+	vperm2i128	ymm3,ymm5,ymm1,0x02
+	vperm2i128	ymm5,ymm5,ymm1,0x13
+	vperm2i128	ymm1,ymm13,ymm9,0x02
+	vperm2i128	ymm9,ymm13,ymm9,0x13
+	vpxor	ymm3,ymm3,YMMWORD[((0+256))+rsi]
+	vpxor	ymm1,ymm1,YMMWORD[((32+256))+rsi]
+	vpxor	ymm5,ymm5,YMMWORD[((64+256))+rsi]
+	vpxor	ymm9,ymm9,YMMWORD[((96+256))+rsi]
+	vmovdqu	YMMWORD[(0+256)+rdi],ymm3
+	vmovdqu	YMMWORD[(32+256)+rdi],ymm1
+	vmovdqu	YMMWORD[(64+256)+rdi],ymm5
+	vmovdqu	YMMWORD[(96+256)+rdi],ymm9
+	vperm2i128	ymm3,ymm4,ymm0,0x13
+	vperm2i128	ymm0,ymm4,ymm0,0x02
+	vperm2i128	ymm4,ymm12,ymm8,0x02
+	vperm2i128	ymm12,ymm12,ymm8,0x13
+	vmovdqa	ymm8,ymm3
+
+	lea	rsi,[384+rsi]
+	lea	rdi,[384+rdi]
+	sub	rbx,12*32
+$L$open_avx2_tail_128_xor:
+	cmp	rbx,32
+	jb	NEAR $L$open_avx2_tail_32_xor
+	sub	rbx,32
+	vpxor	ymm0,ymm0,YMMWORD[rsi]
+	vmovdqu	YMMWORD[rdi],ymm0
+	lea	rsi,[32+rsi]
+	lea	rdi,[32+rdi]
+	vmovdqa	ymm0,ymm4
+	vmovdqa	ymm4,ymm8
+	vmovdqa	ymm8,ymm12
+	jmp	NEAR $L$open_avx2_tail_128_xor
+$L$open_avx2_tail_32_xor:
+	cmp	rbx,16
+	vmovdqa	xmm1,xmm0
+	jb	NEAR $L$open_avx2_exit
+	sub	rbx,16
+
+	vpxor	xmm1,xmm0,XMMWORD[rsi]
+	vmovdqu	XMMWORD[rdi],xmm1
+	lea	rsi,[16+rsi]
+	lea	rdi,[16+rdi]
+	vperm2i128	ymm0,ymm0,ymm0,0x11
+	vmovdqa	xmm1,xmm0
+$L$open_avx2_exit:
+	vzeroupper
+	jmp	NEAR $L$open_sse_tail_16
+
+$L$open_avx2_192:
+	vmovdqa	ymm1,ymm0
+	vmovdqa	ymm2,ymm0
+	vmovdqa	ymm5,ymm4
+	vmovdqa	ymm6,ymm4
+	vmovdqa	ymm9,ymm8
+	vmovdqa	ymm10,ymm8
+	vpaddd	ymm13,ymm12,YMMWORD[$L$avx2_inc]
+	vmovdqa	ymm11,ymm12
+	vmovdqa	ymm15,ymm13
+	mov	r10,10
+$L$open_avx2_192_rounds:
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
+	vpaddd	ymm8,ymm8,ymm12
+	vpxor	ymm4,ymm4,ymm8
+	vpsrld	ymm3,ymm4,20
+	vpslld	ymm4,ymm4,12
+	vpxor	ymm4,ymm4,ymm3
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
+	vpaddd	ymm8,ymm8,ymm12
+	vpxor	ymm4,ymm4,ymm8
+	vpslld	ymm3,ymm4,7
+	vpsrld	ymm4,ymm4,25
+	vpxor	ymm4,ymm4,ymm3
+	vpalignr	ymm12,ymm12,ymm12,12
+	vpalignr	ymm8,ymm8,ymm8,8
+	vpalignr	ymm4,ymm4,ymm4,4
+	vpaddd	ymm1,ymm1,ymm5
+	vpxor	ymm13,ymm13,ymm1
+	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
+	vpaddd	ymm9,ymm9,ymm13
+	vpxor	ymm5,ymm5,ymm9
+	vpsrld	ymm3,ymm5,20
+	vpslld	ymm5,ymm5,12
+	vpxor	ymm5,ymm5,ymm3
+	vpaddd	ymm1,ymm1,ymm5
+	vpxor	ymm13,ymm13,ymm1
+	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
+	vpaddd	ymm9,ymm9,ymm13
+	vpxor	ymm5,ymm5,ymm9
+	vpslld	ymm3,ymm5,7
+	vpsrld	ymm5,ymm5,25
+	vpxor	ymm5,ymm5,ymm3
+	vpalignr	ymm13,ymm13,ymm13,12
+	vpalignr	ymm9,ymm9,ymm9,8
+	vpalignr	ymm5,ymm5,ymm5,4
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
+	vpaddd	ymm8,ymm8,ymm12
+	vpxor	ymm4,ymm4,ymm8
+	vpsrld	ymm3,ymm4,20
+	vpslld	ymm4,ymm4,12
+	vpxor	ymm4,ymm4,ymm3
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
+	vpaddd	ymm8,ymm8,ymm12
+	vpxor	ymm4,ymm4,ymm8
+	vpslld	ymm3,ymm4,7
+	vpsrld	ymm4,ymm4,25
+	vpxor	ymm4,ymm4,ymm3
+	vpalignr	ymm12,ymm12,ymm12,4
+	vpalignr	ymm8,ymm8,ymm8,8
+	vpalignr	ymm4,ymm4,ymm4,12
+	vpaddd	ymm1,ymm1,ymm5
+	vpxor	ymm13,ymm13,ymm1
+	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
+	vpaddd	ymm9,ymm9,ymm13
+	vpxor	ymm5,ymm5,ymm9
+	vpsrld	ymm3,ymm5,20
+	vpslld	ymm5,ymm5,12
+	vpxor	ymm5,ymm5,ymm3
+	vpaddd	ymm1,ymm1,ymm5
+	vpxor	ymm13,ymm13,ymm1
+	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
+	vpaddd	ymm9,ymm9,ymm13
+	vpxor	ymm5,ymm5,ymm9
+	vpslld	ymm3,ymm5,7
+	vpsrld	ymm5,ymm5,25
+	vpxor	ymm5,ymm5,ymm3
+	vpalignr	ymm13,ymm13,ymm13,4
+	vpalignr	ymm9,ymm9,ymm9,8
+	vpalignr	ymm5,ymm5,ymm5,12
+
+	dec	r10
+	jne	NEAR $L$open_avx2_192_rounds
+	vpaddd	ymm0,ymm0,ymm2
+	vpaddd	ymm1,ymm1,ymm2
+	vpaddd	ymm4,ymm4,ymm6
+	vpaddd	ymm5,ymm5,ymm6
+	vpaddd	ymm8,ymm8,ymm10
+	vpaddd	ymm9,ymm9,ymm10
+	vpaddd	ymm12,ymm12,ymm11
+	vpaddd	ymm13,ymm13,ymm15
+	vperm2i128	ymm3,ymm4,ymm0,0x02
+
+	vpand	ymm3,ymm3,YMMWORD[$L$clamp]
+	vmovdqa	YMMWORD[(160+0)+rbp],ymm3
+
+	vperm2i128	ymm0,ymm4,ymm0,0x13
+	vperm2i128	ymm4,ymm12,ymm8,0x13
+	vperm2i128	ymm8,ymm5,ymm1,0x02
+	vperm2i128	ymm12,ymm13,ymm9,0x02
+	vperm2i128	ymm1,ymm5,ymm1,0x13
+	vperm2i128	ymm5,ymm13,ymm9,0x13
+$L$open_avx2_short:
+	mov	r8,r8
+	call	poly_hash_ad_internal
+$L$open_avx2_short_hash_and_xor_loop:
+	cmp	rbx,32
+	jb	NEAR $L$open_avx2_short_tail_32
+	sub	rbx,32
+	add	r10,QWORD[((0+0))+rsi]
+	adc	r11,QWORD[((8+0))+rsi]
+	adc	r12,1
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+	add	r10,QWORD[((0+16))+rsi]
+	adc	r11,QWORD[((8+16))+rsi]
+	adc	r12,1
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+
+	vpxor	ymm0,ymm0,YMMWORD[rsi]
+	vmovdqu	YMMWORD[rdi],ymm0
+	lea	rsi,[32+rsi]
+	lea	rdi,[32+rdi]
+
+	vmovdqa	ymm0,ymm4
+	vmovdqa	ymm4,ymm8
+	vmovdqa	ymm8,ymm12
+	vmovdqa	ymm12,ymm1
+	vmovdqa	ymm1,ymm5
+	vmovdqa	ymm5,ymm9
+	vmovdqa	ymm9,ymm13
+	vmovdqa	ymm13,ymm2
+	vmovdqa	ymm2,ymm6
+	jmp	NEAR $L$open_avx2_short_hash_and_xor_loop
+$L$open_avx2_short_tail_32:
+	cmp	rbx,16
+	vmovdqa	xmm1,xmm0
+	jb	NEAR $L$open_avx2_short_tail_32_exit
+	sub	rbx,16
+	add	r10,QWORD[((0+0))+rsi]
+	adc	r11,QWORD[((8+0))+rsi]
+	adc	r12,1
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+	vpxor	xmm3,xmm0,XMMWORD[rsi]
+	vmovdqu	XMMWORD[rdi],xmm3
+	lea	rsi,[16+rsi]
+	lea	rdi,[16+rdi]
+	vextracti128	xmm1,ymm0,1
+$L$open_avx2_short_tail_32_exit:
+	vzeroupper
+	jmp	NEAR $L$open_sse_tail_16
+
+$L$open_avx2_320:
+	vmovdqa	ymm1,ymm0
+	vmovdqa	ymm2,ymm0
+	vmovdqa	ymm5,ymm4
+	vmovdqa	ymm6,ymm4
+	vmovdqa	ymm9,ymm8
+	vmovdqa	ymm10,ymm8
+	vpaddd	ymm13,ymm12,YMMWORD[$L$avx2_inc]
+	vpaddd	ymm14,ymm13,YMMWORD[$L$avx2_inc]
+	vmovdqa	ymm7,ymm4
+	vmovdqa	ymm11,ymm8
+	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
+	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
+	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
+	mov	r10,10
+$L$open_avx2_320_rounds:
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
+	vpaddd	ymm8,ymm8,ymm12
+	vpxor	ymm4,ymm4,ymm8
+	vpsrld	ymm3,ymm4,20
+	vpslld	ymm4,ymm4,12
+	vpxor	ymm4,ymm4,ymm3
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
+	vpaddd	ymm8,ymm8,ymm12
+	vpxor	ymm4,ymm4,ymm8
+	vpslld	ymm3,ymm4,7
+	vpsrld	ymm4,ymm4,25
+	vpxor	ymm4,ymm4,ymm3
+	vpalignr	ymm12,ymm12,ymm12,12
+	vpalignr	ymm8,ymm8,ymm8,8
+	vpalignr	ymm4,ymm4,ymm4,4
+	vpaddd	ymm1,ymm1,ymm5
+	vpxor	ymm13,ymm13,ymm1
+	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
+	vpaddd	ymm9,ymm9,ymm13
+	vpxor	ymm5,ymm5,ymm9
+	vpsrld	ymm3,ymm5,20
+	vpslld	ymm5,ymm5,12
+	vpxor	ymm5,ymm5,ymm3
+	vpaddd	ymm1,ymm1,ymm5
+	vpxor	ymm13,ymm13,ymm1
+	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
+	vpaddd	ymm9,ymm9,ymm13
+	vpxor	ymm5,ymm5,ymm9
+	vpslld	ymm3,ymm5,7
+	vpsrld	ymm5,ymm5,25
+	vpxor	ymm5,ymm5,ymm3
+	vpalignr	ymm13,ymm13,ymm13,12
+	vpalignr	ymm9,ymm9,ymm9,8
+	vpalignr	ymm5,ymm5,ymm5,4
+	vpaddd	ymm2,ymm2,ymm6
+	vpxor	ymm14,ymm14,ymm2
+	vpshufb	ymm14,ymm14,YMMWORD[$L$rol16]
+	vpaddd	ymm10,ymm10,ymm14
+	vpxor	ymm6,ymm6,ymm10
+	vpsrld	ymm3,ymm6,20
+	vpslld	ymm6,ymm6,12
+	vpxor	ymm6,ymm6,ymm3
+	vpaddd	ymm2,ymm2,ymm6
+	vpxor	ymm14,ymm14,ymm2
+	vpshufb	ymm14,ymm14,YMMWORD[$L$rol8]
+	vpaddd	ymm10,ymm10,ymm14
+	vpxor	ymm6,ymm6,ymm10
+	vpslld	ymm3,ymm6,7
+	vpsrld	ymm6,ymm6,25
+	vpxor	ymm6,ymm6,ymm3
+	vpalignr	ymm14,ymm14,ymm14,12
+	vpalignr	ymm10,ymm10,ymm10,8
+	vpalignr	ymm6,ymm6,ymm6,4
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
+	vpaddd	ymm8,ymm8,ymm12
+	vpxor	ymm4,ymm4,ymm8
+	vpsrld	ymm3,ymm4,20
+	vpslld	ymm4,ymm4,12
+	vpxor	ymm4,ymm4,ymm3
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
+	vpaddd	ymm8,ymm8,ymm12
+	vpxor	ymm4,ymm4,ymm8
+	vpslld	ymm3,ymm4,7
+	vpsrld	ymm4,ymm4,25
+	vpxor	ymm4,ymm4,ymm3
+	vpalignr	ymm12,ymm12,ymm12,4
+	vpalignr	ymm8,ymm8,ymm8,8
+	vpalignr	ymm4,ymm4,ymm4,12
+	vpaddd	ymm1,ymm1,ymm5
+	vpxor	ymm13,ymm13,ymm1
+	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
+	vpaddd	ymm9,ymm9,ymm13
+	vpxor	ymm5,ymm5,ymm9
+	vpsrld	ymm3,ymm5,20
+	vpslld	ymm5,ymm5,12
+	vpxor	ymm5,ymm5,ymm3
+	vpaddd	ymm1,ymm1,ymm5
+	vpxor	ymm13,ymm13,ymm1
+	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
+	vpaddd	ymm9,ymm9,ymm13
+	vpxor	ymm5,ymm5,ymm9
+	vpslld	ymm3,ymm5,7
+	vpsrld	ymm5,ymm5,25
+	vpxor	ymm5,ymm5,ymm3
+	vpalignr	ymm13,ymm13,ymm13,4
+	vpalignr	ymm9,ymm9,ymm9,8
+	vpalignr	ymm5,ymm5,ymm5,12
+	vpaddd	ymm2,ymm2,ymm6
+	vpxor	ymm14,ymm14,ymm2
+	vpshufb	ymm14,ymm14,YMMWORD[$L$rol16]
+	vpaddd	ymm10,ymm10,ymm14
+	vpxor	ymm6,ymm6,ymm10
+	vpsrld	ymm3,ymm6,20
+	vpslld	ymm6,ymm6,12
+	vpxor	ymm6,ymm6,ymm3
+	vpaddd	ymm2,ymm2,ymm6
+	vpxor	ymm14,ymm14,ymm2
+	vpshufb	ymm14,ymm14,YMMWORD[$L$rol8]
+	vpaddd	ymm10,ymm10,ymm14
+	vpxor	ymm6,ymm6,ymm10
+	vpslld	ymm3,ymm6,7
+	vpsrld	ymm6,ymm6,25
+	vpxor	ymm6,ymm6,ymm3
+	vpalignr	ymm14,ymm14,ymm14,4
+	vpalignr	ymm10,ymm10,ymm10,8
+	vpalignr	ymm6,ymm6,ymm6,12
+
+	dec	r10
+	jne	NEAR $L$open_avx2_320_rounds
+	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
+	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
+	vpaddd	ymm2,ymm2,YMMWORD[$L$chacha20_consts]
+	vpaddd	ymm4,ymm4,ymm7
+	vpaddd	ymm5,ymm5,ymm7
+	vpaddd	ymm6,ymm6,ymm7
+	vpaddd	ymm8,ymm8,ymm11
+	vpaddd	ymm9,ymm9,ymm11
+	vpaddd	ymm10,ymm10,ymm11
+	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
+	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
+	vpaddd	ymm14,ymm14,YMMWORD[((160+224))+rbp]
+	vperm2i128	ymm3,ymm4,ymm0,0x02
+
+	vpand	ymm3,ymm3,YMMWORD[$L$clamp]
+	vmovdqa	YMMWORD[(160+0)+rbp],ymm3
+
+	vperm2i128	ymm0,ymm4,ymm0,0x13
+	vperm2i128	ymm4,ymm12,ymm8,0x13
+	vperm2i128	ymm8,ymm5,ymm1,0x02
+	vperm2i128	ymm12,ymm13,ymm9,0x02
+	vperm2i128	ymm1,ymm5,ymm1,0x13
+	vperm2i128	ymm5,ymm13,ymm9,0x13
+	vperm2i128	ymm9,ymm6,ymm2,0x02
+	vperm2i128	ymm13,ymm14,ymm10,0x02
+	vperm2i128	ymm2,ymm6,ymm2,0x13
+	vperm2i128	ymm6,ymm14,ymm10,0x13
+	jmp	NEAR $L$open_avx2_short
+
+
+
+
+
+ALIGN	64
+chacha20_poly1305_seal_avx2:
+
+
+
+
+
+
+
+
+
+
+
+
+	vzeroupper
+	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
+	vbroadcasti128	ymm4,XMMWORD[r9]
+	vbroadcasti128	ymm8,XMMWORD[16+r9]
+	vbroadcasti128	ymm12,XMMWORD[32+r9]
+	vpaddd	ymm12,ymm12,YMMWORD[$L$avx2_init]
+	cmp	rbx,6*32
+	jbe	NEAR $L$seal_avx2_192
+	cmp	rbx,10*32
+	jbe	NEAR $L$seal_avx2_320
+	vmovdqa	ymm1,ymm0
+	vmovdqa	ymm2,ymm0
+	vmovdqa	ymm3,ymm0
+	vmovdqa	ymm5,ymm4
+	vmovdqa	ymm6,ymm4
+	vmovdqa	ymm7,ymm4
+	vmovdqa	YMMWORD[(160+64)+rbp],ymm4
+	vmovdqa	ymm9,ymm8
+	vmovdqa	ymm10,ymm8
+	vmovdqa	ymm11,ymm8
+	vmovdqa	YMMWORD[(160+96)+rbp],ymm8
+	vmovdqa	ymm15,ymm12
+	vpaddd	ymm14,ymm15,YMMWORD[$L$avx2_inc]
+	vpaddd	ymm13,ymm14,YMMWORD[$L$avx2_inc]
+	vpaddd	ymm12,ymm13,YMMWORD[$L$avx2_inc]
+	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
+	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
+	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
+	vmovdqa	YMMWORD[(160+256)+rbp],ymm15
+	mov	r10,10
+$L$seal_avx2_init_rounds:
+	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
+	vmovdqa	ymm8,YMMWORD[$L$rol16]
+	vpaddd	ymm3,ymm3,ymm7
+	vpaddd	ymm2,ymm2,ymm6
+	vpaddd	ymm1,ymm1,ymm5
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm15,ymm15,ymm3
+	vpxor	ymm14,ymm14,ymm2
+	vpxor	ymm13,ymm13,ymm1
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm15,ymm15,ymm8
+	vpshufb	ymm14,ymm14,ymm8
+	vpshufb	ymm13,ymm13,ymm8
+	vpshufb	ymm12,ymm12,ymm8
+	vpaddd	ymm11,ymm11,ymm15
+	vpaddd	ymm10,ymm10,ymm14
+	vpaddd	ymm9,ymm9,ymm13
+	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
+	vpxor	ymm7,ymm7,ymm11
+	vpxor	ymm6,ymm6,ymm10
+	vpxor	ymm5,ymm5,ymm9
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
+	vpsrld	ymm8,ymm7,20
+	vpslld	ymm7,ymm7,32-20
+	vpxor	ymm7,ymm7,ymm8
+	vpsrld	ymm8,ymm6,20
+	vpslld	ymm6,ymm6,32-20
+	vpxor	ymm6,ymm6,ymm8
+	vpsrld	ymm8,ymm5,20
+	vpslld	ymm5,ymm5,32-20
+	vpxor	ymm5,ymm5,ymm8
+	vpsrld	ymm8,ymm4,20
+	vpslld	ymm4,ymm4,32-20
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	ymm8,YMMWORD[$L$rol8]
+	vpaddd	ymm3,ymm3,ymm7
+	vpaddd	ymm2,ymm2,ymm6
+	vpaddd	ymm1,ymm1,ymm5
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm15,ymm15,ymm3
+	vpxor	ymm14,ymm14,ymm2
+	vpxor	ymm13,ymm13,ymm1
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm15,ymm15,ymm8
+	vpshufb	ymm14,ymm14,ymm8
+	vpshufb	ymm13,ymm13,ymm8
+	vpshufb	ymm12,ymm12,ymm8
+	vpaddd	ymm11,ymm11,ymm15
+	vpaddd	ymm10,ymm10,ymm14
+	vpaddd	ymm9,ymm9,ymm13
+	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
+	vpxor	ymm7,ymm7,ymm11
+	vpxor	ymm6,ymm6,ymm10
+	vpxor	ymm5,ymm5,ymm9
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
+	vpsrld	ymm8,ymm7,25
+	vpslld	ymm7,ymm7,32-25
+	vpxor	ymm7,ymm7,ymm8
+	vpsrld	ymm8,ymm6,25
+	vpslld	ymm6,ymm6,32-25
+	vpxor	ymm6,ymm6,ymm8
+	vpsrld	ymm8,ymm5,25
+	vpslld	ymm5,ymm5,32-25
+	vpxor	ymm5,ymm5,ymm8
+	vpsrld	ymm8,ymm4,25
+	vpslld	ymm4,ymm4,32-25
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
+	vpalignr	ymm7,ymm7,ymm7,4
+	vpalignr	ymm11,ymm11,ymm11,8
+	vpalignr	ymm15,ymm15,ymm15,12
+	vpalignr	ymm6,ymm6,ymm6,4
+	vpalignr	ymm10,ymm10,ymm10,8
+	vpalignr	ymm14,ymm14,ymm14,12
+	vpalignr	ymm5,ymm5,ymm5,4
+	vpalignr	ymm9,ymm9,ymm9,8
+	vpalignr	ymm13,ymm13,ymm13,12
+	vpalignr	ymm4,ymm4,ymm4,4
+	vpalignr	ymm8,ymm8,ymm8,8
+	vpalignr	ymm12,ymm12,ymm12,12
+	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
+	vmovdqa	ymm8,YMMWORD[$L$rol16]
+	vpaddd	ymm3,ymm3,ymm7
+	vpaddd	ymm2,ymm2,ymm6
+	vpaddd	ymm1,ymm1,ymm5
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm15,ymm15,ymm3
+	vpxor	ymm14,ymm14,ymm2
+	vpxor	ymm13,ymm13,ymm1
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm15,ymm15,ymm8
+	vpshufb	ymm14,ymm14,ymm8
+	vpshufb	ymm13,ymm13,ymm8
+	vpshufb	ymm12,ymm12,ymm8
+	vpaddd	ymm11,ymm11,ymm15
+	vpaddd	ymm10,ymm10,ymm14
+	vpaddd	ymm9,ymm9,ymm13
+	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
+	vpxor	ymm7,ymm7,ymm11
+	vpxor	ymm6,ymm6,ymm10
+	vpxor	ymm5,ymm5,ymm9
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
+	vpsrld	ymm8,ymm7,20
+	vpslld	ymm7,ymm7,32-20
+	vpxor	ymm7,ymm7,ymm8
+	vpsrld	ymm8,ymm6,20
+	vpslld	ymm6,ymm6,32-20
+	vpxor	ymm6,ymm6,ymm8
+	vpsrld	ymm8,ymm5,20
+	vpslld	ymm5,ymm5,32-20
+	vpxor	ymm5,ymm5,ymm8
+	vpsrld	ymm8,ymm4,20
+	vpslld	ymm4,ymm4,32-20
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	ymm8,YMMWORD[$L$rol8]
+	vpaddd	ymm3,ymm3,ymm7
+	vpaddd	ymm2,ymm2,ymm6
+	vpaddd	ymm1,ymm1,ymm5
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm15,ymm15,ymm3
+	vpxor	ymm14,ymm14,ymm2
+	vpxor	ymm13,ymm13,ymm1
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm15,ymm15,ymm8
+	vpshufb	ymm14,ymm14,ymm8
+	vpshufb	ymm13,ymm13,ymm8
+	vpshufb	ymm12,ymm12,ymm8
+	vpaddd	ymm11,ymm11,ymm15
+	vpaddd	ymm10,ymm10,ymm14
+	vpaddd	ymm9,ymm9,ymm13
+	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
+	vpxor	ymm7,ymm7,ymm11
+	vpxor	ymm6,ymm6,ymm10
+	vpxor	ymm5,ymm5,ymm9
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
+	vpsrld	ymm8,ymm7,25
+	vpslld	ymm7,ymm7,32-25
+	vpxor	ymm7,ymm7,ymm8
+	vpsrld	ymm8,ymm6,25
+	vpslld	ymm6,ymm6,32-25
+	vpxor	ymm6,ymm6,ymm8
+	vpsrld	ymm8,ymm5,25
+	vpslld	ymm5,ymm5,32-25
+	vpxor	ymm5,ymm5,ymm8
+	vpsrld	ymm8,ymm4,25
+	vpslld	ymm4,ymm4,32-25
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
+	vpalignr	ymm7,ymm7,ymm7,12
+	vpalignr	ymm11,ymm11,ymm11,8
+	vpalignr	ymm15,ymm15,ymm15,4
+	vpalignr	ymm6,ymm6,ymm6,12
+	vpalignr	ymm10,ymm10,ymm10,8
+	vpalignr	ymm14,ymm14,ymm14,4
+	vpalignr	ymm5,ymm5,ymm5,12
+	vpalignr	ymm9,ymm9,ymm9,8
+	vpalignr	ymm13,ymm13,ymm13,4
+	vpalignr	ymm4,ymm4,ymm4,12
+	vpalignr	ymm8,ymm8,ymm8,8
+	vpalignr	ymm12,ymm12,ymm12,4
+
+	dec	r10
+	jnz	NEAR $L$seal_avx2_init_rounds
+	vpaddd	ymm3,ymm3,YMMWORD[$L$chacha20_consts]
+	vpaddd	ymm7,ymm7,YMMWORD[((160+64))+rbp]
+	vpaddd	ymm11,ymm11,YMMWORD[((160+96))+rbp]
+	vpaddd	ymm15,ymm15,YMMWORD[((160+256))+rbp]
+	vpaddd	ymm2,ymm2,YMMWORD[$L$chacha20_consts]
+	vpaddd	ymm6,ymm6,YMMWORD[((160+64))+rbp]
+	vpaddd	ymm10,ymm10,YMMWORD[((160+96))+rbp]
+	vpaddd	ymm14,ymm14,YMMWORD[((160+224))+rbp]
+	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
+	vpaddd	ymm5,ymm5,YMMWORD[((160+64))+rbp]
+	vpaddd	ymm9,ymm9,YMMWORD[((160+96))+rbp]
+	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
+	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
+	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
+	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
+	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
+
+	vperm2i128	ymm11,ymm15,ymm11,0x13
+	vperm2i128	ymm15,ymm7,ymm3,0x02
+	vperm2i128	ymm3,ymm7,ymm3,0x13
+	vpand	ymm15,ymm15,YMMWORD[$L$clamp]
+	vmovdqa	YMMWORD[(160+0)+rbp],ymm15
+	mov	r8,r8
+	call	poly_hash_ad_internal
+
+	vpxor	ymm3,ymm3,YMMWORD[rsi]
+	vpxor	ymm11,ymm11,YMMWORD[32+rsi]
+	vmovdqu	YMMWORD[rdi],ymm3
+	vmovdqu	YMMWORD[32+rdi],ymm11
+	vperm2i128	ymm15,ymm6,ymm2,0x02
+	vperm2i128	ymm6,ymm6,ymm2,0x13
+	vperm2i128	ymm2,ymm14,ymm10,0x02
+	vperm2i128	ymm10,ymm14,ymm10,0x13
+	vpxor	ymm15,ymm15,YMMWORD[((0+64))+rsi]
+	vpxor	ymm2,ymm2,YMMWORD[((32+64))+rsi]
+	vpxor	ymm6,ymm6,YMMWORD[((64+64))+rsi]
+	vpxor	ymm10,ymm10,YMMWORD[((96+64))+rsi]
+	vmovdqu	YMMWORD[(0+64)+rdi],ymm15
+	vmovdqu	YMMWORD[(32+64)+rdi],ymm2
+	vmovdqu	YMMWORD[(64+64)+rdi],ymm6
+	vmovdqu	YMMWORD[(96+64)+rdi],ymm10
+	vperm2i128	ymm15,ymm5,ymm1,0x02
+	vperm2i128	ymm5,ymm5,ymm1,0x13
+	vperm2i128	ymm1,ymm13,ymm9,0x02
+	vperm2i128	ymm9,ymm13,ymm9,0x13
+	vpxor	ymm15,ymm15,YMMWORD[((0+192))+rsi]
+	vpxor	ymm1,ymm1,YMMWORD[((32+192))+rsi]
+	vpxor	ymm5,ymm5,YMMWORD[((64+192))+rsi]
+	vpxor	ymm9,ymm9,YMMWORD[((96+192))+rsi]
+	vmovdqu	YMMWORD[(0+192)+rdi],ymm15
+	vmovdqu	YMMWORD[(32+192)+rdi],ymm1
+	vmovdqu	YMMWORD[(64+192)+rdi],ymm5
+	vmovdqu	YMMWORD[(96+192)+rdi],ymm9
+	vperm2i128	ymm15,ymm4,ymm0,0x13
+	vperm2i128	ymm0,ymm4,ymm0,0x02
+	vperm2i128	ymm4,ymm12,ymm8,0x02
+	vperm2i128	ymm12,ymm12,ymm8,0x13
+	vmovdqa	ymm8,ymm15
+
+	lea	rsi,[320+rsi]
+	sub	rbx,10*32
+	mov	rcx,10*32
+	cmp	rbx,4*32
+	jbe	NEAR $L$seal_avx2_short_hash_remainder
+	vpxor	ymm0,ymm0,YMMWORD[rsi]
+	vpxor	ymm4,ymm4,YMMWORD[32+rsi]
+	vpxor	ymm8,ymm8,YMMWORD[64+rsi]
+	vpxor	ymm12,ymm12,YMMWORD[96+rsi]
+	vmovdqu	YMMWORD[320+rdi],ymm0
+	vmovdqu	YMMWORD[352+rdi],ymm4
+	vmovdqu	YMMWORD[384+rdi],ymm8
+	vmovdqu	YMMWORD[416+rdi],ymm12
+	lea	rsi,[128+rsi]
+	sub	rbx,4*32
+	mov	rcx,8
+	mov	r8,2
+	cmp	rbx,4*32
+	jbe	NEAR $L$seal_avx2_tail_128
+	cmp	rbx,8*32
+	jbe	NEAR $L$seal_avx2_tail_256
+	cmp	rbx,12*32
+	jbe	NEAR $L$seal_avx2_tail_384
+	cmp	rbx,16*32
+	jbe	NEAR $L$seal_avx2_tail_512
+	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
+	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
+	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
+	vmovdqa	ymm1,ymm0
+	vmovdqa	ymm5,ymm4
+	vmovdqa	ymm9,ymm8
+	vmovdqa	ymm2,ymm0
+	vmovdqa	ymm6,ymm4
+	vmovdqa	ymm10,ymm8
+	vmovdqa	ymm3,ymm0
+	vmovdqa	ymm7,ymm4
+	vmovdqa	ymm11,ymm8
+	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
+	vpaddd	ymm15,ymm12,YMMWORD[((160+160))+rbp]
+	vpaddd	ymm14,ymm12,ymm15
+	vpaddd	ymm13,ymm12,ymm14
+	vpaddd	ymm12,ymm12,ymm13
+	vmovdqa	YMMWORD[(160+256)+rbp],ymm15
+	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
+	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
+	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
+	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
+	vmovdqa	ymm8,YMMWORD[$L$rol16]
+	vpaddd	ymm3,ymm3,ymm7
+	vpaddd	ymm2,ymm2,ymm6
+	vpaddd	ymm1,ymm1,ymm5
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm15,ymm15,ymm3
+	vpxor	ymm14,ymm14,ymm2
+	vpxor	ymm13,ymm13,ymm1
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm15,ymm15,ymm8
+	vpshufb	ymm14,ymm14,ymm8
+	vpshufb	ymm13,ymm13,ymm8
+	vpshufb	ymm12,ymm12,ymm8
+	vpaddd	ymm11,ymm11,ymm15
+	vpaddd	ymm10,ymm10,ymm14
+	vpaddd	ymm9,ymm9,ymm13
+	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
+	vpxor	ymm7,ymm7,ymm11
+	vpxor	ymm6,ymm6,ymm10
+	vpxor	ymm5,ymm5,ymm9
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
+	vpsrld	ymm8,ymm7,20
+	vpslld	ymm7,ymm7,32-20
+	vpxor	ymm7,ymm7,ymm8
+	vpsrld	ymm8,ymm6,20
+	vpslld	ymm6,ymm6,32-20
+	vpxor	ymm6,ymm6,ymm8
+	vpsrld	ymm8,ymm5,20
+	vpslld	ymm5,ymm5,32-20
+	vpxor	ymm5,ymm5,ymm8
+	vpsrld	ymm8,ymm4,20
+	vpslld	ymm4,ymm4,32-20
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	ymm8,YMMWORD[$L$rol8]
+	vpaddd	ymm3,ymm3,ymm7
+	vpaddd	ymm2,ymm2,ymm6
+	vpaddd	ymm1,ymm1,ymm5
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm15,ymm15,ymm3
+	vpxor	ymm14,ymm14,ymm2
+	vpxor	ymm13,ymm13,ymm1
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm15,ymm15,ymm8
+	vpshufb	ymm14,ymm14,ymm8
+	vpshufb	ymm13,ymm13,ymm8
+	vpshufb	ymm12,ymm12,ymm8
+	vpaddd	ymm11,ymm11,ymm15
+	vpaddd	ymm10,ymm10,ymm14
+	vpaddd	ymm9,ymm9,ymm13
+	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
+	vpxor	ymm7,ymm7,ymm11
+	vpxor	ymm6,ymm6,ymm10
+	vpxor	ymm5,ymm5,ymm9
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
+	vpsrld	ymm8,ymm7,25
+	vpslld	ymm7,ymm7,32-25
+	vpxor	ymm7,ymm7,ymm8
+	vpsrld	ymm8,ymm6,25
+	vpslld	ymm6,ymm6,32-25
+	vpxor	ymm6,ymm6,ymm8
+	vpsrld	ymm8,ymm5,25
+	vpslld	ymm5,ymm5,32-25
+	vpxor	ymm5,ymm5,ymm8
+	vpsrld	ymm8,ymm4,25
+	vpslld	ymm4,ymm4,32-25
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
+	vpalignr	ymm7,ymm7,ymm7,4
+	vpalignr	ymm11,ymm11,ymm11,8
+	vpalignr	ymm15,ymm15,ymm15,12
+	vpalignr	ymm6,ymm6,ymm6,4
+	vpalignr	ymm10,ymm10,ymm10,8
+	vpalignr	ymm14,ymm14,ymm14,12
+	vpalignr	ymm5,ymm5,ymm5,4
+	vpalignr	ymm9,ymm9,ymm9,8
+	vpalignr	ymm13,ymm13,ymm13,12
+	vpalignr	ymm4,ymm4,ymm4,4
+	vpalignr	ymm8,ymm8,ymm8,8
+	vpalignr	ymm12,ymm12,ymm12,12
+	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
+	vmovdqa	ymm8,YMMWORD[$L$rol16]
+	vpaddd	ymm3,ymm3,ymm7
+	vpaddd	ymm2,ymm2,ymm6
+	vpaddd	ymm1,ymm1,ymm5
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm15,ymm15,ymm3
+	vpxor	ymm14,ymm14,ymm2
+	vpxor	ymm13,ymm13,ymm1
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm15,ymm15,ymm8
+	vpshufb	ymm14,ymm14,ymm8
+	vpshufb	ymm13,ymm13,ymm8
+	vpshufb	ymm12,ymm12,ymm8
+	vpaddd	ymm11,ymm11,ymm15
+	vpaddd	ymm10,ymm10,ymm14
+	vpaddd	ymm9,ymm9,ymm13
+	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
+	vpxor	ymm7,ymm7,ymm11
+	vpxor	ymm6,ymm6,ymm10
+	vpxor	ymm5,ymm5,ymm9
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
+	vpsrld	ymm8,ymm7,20
+	vpslld	ymm7,ymm7,32-20
+	vpxor	ymm7,ymm7,ymm8
+	vpsrld	ymm8,ymm6,20
+	vpslld	ymm6,ymm6,32-20
+	vpxor	ymm6,ymm6,ymm8
+	vpsrld	ymm8,ymm5,20
+	vpslld	ymm5,ymm5,32-20
+	vpxor	ymm5,ymm5,ymm8
+	vpsrld	ymm8,ymm4,20
+	vpslld	ymm4,ymm4,32-20
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	ymm8,YMMWORD[$L$rol8]
+	vpaddd	ymm3,ymm3,ymm7
+	vpaddd	ymm2,ymm2,ymm6
+	vpaddd	ymm1,ymm1,ymm5
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm15,ymm15,ymm3
+	vpxor	ymm14,ymm14,ymm2
+	vpxor	ymm13,ymm13,ymm1
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm15,ymm15,ymm8
+	vpshufb	ymm14,ymm14,ymm8
+	vpshufb	ymm13,ymm13,ymm8
+	vpshufb	ymm12,ymm12,ymm8
+	vpaddd	ymm11,ymm11,ymm15
+	vpaddd	ymm10,ymm10,ymm14
+	vpaddd	ymm9,ymm9,ymm13
+	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
+	vpxor	ymm7,ymm7,ymm11
+	vpxor	ymm6,ymm6,ymm10
+	vpxor	ymm5,ymm5,ymm9
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
+	vpsrld	ymm8,ymm7,25
+	vpslld	ymm7,ymm7,32-25
+	vpxor	ymm7,ymm7,ymm8
+	vpsrld	ymm8,ymm6,25
+	vpslld	ymm6,ymm6,32-25
+	vpxor	ymm6,ymm6,ymm8
+	vpsrld	ymm8,ymm5,25
+	vpslld	ymm5,ymm5,32-25
+	vpxor	ymm5,ymm5,ymm8
+	vpsrld	ymm8,ymm4,25
+	vpslld	ymm4,ymm4,32-25
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
+	vpalignr	ymm7,ymm7,ymm7,12
+	vpalignr	ymm11,ymm11,ymm11,8
+	vpalignr	ymm15,ymm15,ymm15,4
+	vpalignr	ymm6,ymm6,ymm6,12
+	vpalignr	ymm10,ymm10,ymm10,8
+	vpalignr	ymm14,ymm14,ymm14,4
+	vpalignr	ymm5,ymm5,ymm5,12
+	vpalignr	ymm9,ymm9,ymm9,8
+	vpalignr	ymm13,ymm13,ymm13,4
+	vpalignr	ymm4,ymm4,ymm4,12
+	vpalignr	ymm8,ymm8,ymm8,8
+	vpalignr	ymm12,ymm12,ymm12,4
+	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
+	vmovdqa	ymm8,YMMWORD[$L$rol16]
+	vpaddd	ymm3,ymm3,ymm7
+	vpaddd	ymm2,ymm2,ymm6
+	vpaddd	ymm1,ymm1,ymm5
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm15,ymm15,ymm3
+	vpxor	ymm14,ymm14,ymm2
+	vpxor	ymm13,ymm13,ymm1
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm15,ymm15,ymm8
+	vpshufb	ymm14,ymm14,ymm8
+	vpshufb	ymm13,ymm13,ymm8
+	vpshufb	ymm12,ymm12,ymm8
+	vpaddd	ymm11,ymm11,ymm15
+	vpaddd	ymm10,ymm10,ymm14
+	vpaddd	ymm9,ymm9,ymm13
+	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
+	vpxor	ymm7,ymm7,ymm11
+	vpxor	ymm6,ymm6,ymm10
+	vpxor	ymm5,ymm5,ymm9
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
+	vpsrld	ymm8,ymm7,20
+	vpslld	ymm7,ymm7,32-20
+	vpxor	ymm7,ymm7,ymm8
+	vpsrld	ymm8,ymm6,20
+	vpslld	ymm6,ymm6,32-20
+	vpxor	ymm6,ymm6,ymm8
+	vpsrld	ymm8,ymm5,20
+	vpslld	ymm5,ymm5,32-20
+	vpxor	ymm5,ymm5,ymm8
+	vpsrld	ymm8,ymm4,20
+	vpslld	ymm4,ymm4,32-20
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	ymm8,YMMWORD[$L$rol8]
+	vpaddd	ymm3,ymm3,ymm7
+	vpaddd	ymm2,ymm2,ymm6
+	vpaddd	ymm1,ymm1,ymm5
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm15,ymm15,ymm3
+
+	sub	rdi,16
+	mov	rcx,9
+	jmp	NEAR $L$seal_avx2_main_loop_rounds_entry
+ALIGN	32
+$L$seal_avx2_main_loop:
+	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
+	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
+	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
+	vmovdqa	ymm1,ymm0
+	vmovdqa	ymm5,ymm4
+	vmovdqa	ymm9,ymm8
+	vmovdqa	ymm2,ymm0
+	vmovdqa	ymm6,ymm4
+	vmovdqa	ymm10,ymm8
+	vmovdqa	ymm3,ymm0
+	vmovdqa	ymm7,ymm4
+	vmovdqa	ymm11,ymm8
+	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
+	vpaddd	ymm15,ymm12,YMMWORD[((160+160))+rbp]
+	vpaddd	ymm14,ymm12,ymm15
+	vpaddd	ymm13,ymm12,ymm14
+	vpaddd	ymm12,ymm12,ymm13
+	vmovdqa	YMMWORD[(160+256)+rbp],ymm15
+	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
+	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
+	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
+
+	mov	rcx,10
+ALIGN	32
+$L$seal_avx2_main_loop_rounds:
+	add	r10,QWORD[((0+0))+rdi]
+	adc	r11,QWORD[((8+0))+rdi]
+	adc	r12,1
+	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
+	vmovdqa	ymm8,YMMWORD[$L$rol16]
+	vpaddd	ymm3,ymm3,ymm7
+	vpaddd	ymm2,ymm2,ymm6
+	vpaddd	ymm1,ymm1,ymm5
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm15,ymm15,ymm3
+	vpxor	ymm14,ymm14,ymm2
+	vpxor	ymm13,ymm13,ymm1
+	vpxor	ymm12,ymm12,ymm0
+	mov	rdx,QWORD[((0+160+0))+rbp]
+	mov	r15,rdx
+	mulx	r14,r13,r10
+	mulx	rdx,rax,r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	vpshufb	ymm15,ymm15,ymm8
+	vpshufb	ymm14,ymm14,ymm8
+	vpshufb	ymm13,ymm13,ymm8
+	vpshufb	ymm12,ymm12,ymm8
+	vpaddd	ymm11,ymm11,ymm15
+	vpaddd	ymm10,ymm10,ymm14
+	vpaddd	ymm9,ymm9,ymm13
+	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
+	vpxor	ymm7,ymm7,ymm11
+	mov	rdx,QWORD[((8+160+0))+rbp]
+	mulx	rax,r10,r10
+	add	r14,r10
+	mulx	r9,r11,r11
+	adc	r15,r11
+	adc	r9,0
+	imul	rdx,r12
+	vpxor	ymm6,ymm6,ymm10
+	vpxor	ymm5,ymm5,ymm9
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
+	vpsrld	ymm8,ymm7,20
+	vpslld	ymm7,ymm7,32-20
+	vpxor	ymm7,ymm7,ymm8
+	vpsrld	ymm8,ymm6,20
+	vpslld	ymm6,ymm6,32-20
+	vpxor	ymm6,ymm6,ymm8
+	vpsrld	ymm8,ymm5,20
+	vpslld	ymm5,ymm5,32-20
+	add	r15,rax
+	adc	r9,rdx
+	vpxor	ymm5,ymm5,ymm8
+	vpsrld	ymm8,ymm4,20
+	vpslld	ymm4,ymm4,32-20
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	ymm8,YMMWORD[$L$rol8]
+	vpaddd	ymm3,ymm3,ymm7
+	vpaddd	ymm2,ymm2,ymm6
+	vpaddd	ymm1,ymm1,ymm5
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm15,ymm15,ymm3
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+$L$seal_avx2_main_loop_rounds_entry:
+	vpxor	ymm14,ymm14,ymm2
+	vpxor	ymm13,ymm13,ymm1
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm15,ymm15,ymm8
+	vpshufb	ymm14,ymm14,ymm8
+	vpshufb	ymm13,ymm13,ymm8
+	vpshufb	ymm12,ymm12,ymm8
+	vpaddd	ymm11,ymm11,ymm15
+	vpaddd	ymm10,ymm10,ymm14
+	add	r10,QWORD[((0+16))+rdi]
+	adc	r11,QWORD[((8+16))+rdi]
+	adc	r12,1
+	vpaddd	ymm9,ymm9,ymm13
+	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
+	vpxor	ymm7,ymm7,ymm11
+	vpxor	ymm6,ymm6,ymm10
+	vpxor	ymm5,ymm5,ymm9
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
+	vpsrld	ymm8,ymm7,25
+	mov	rdx,QWORD[((0+160+0))+rbp]
+	mov	r15,rdx
+	mulx	r14,r13,r10
+	mulx	rdx,rax,r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	vpslld	ymm7,ymm7,32-25
+	vpxor	ymm7,ymm7,ymm8
+	vpsrld	ymm8,ymm6,25
+	vpslld	ymm6,ymm6,32-25
+	vpxor	ymm6,ymm6,ymm8
+	vpsrld	ymm8,ymm5,25
+	vpslld	ymm5,ymm5,32-25
+	vpxor	ymm5,ymm5,ymm8
+	vpsrld	ymm8,ymm4,25
+	vpslld	ymm4,ymm4,32-25
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
+	vpalignr	ymm7,ymm7,ymm7,4
+	vpalignr	ymm11,ymm11,ymm11,8
+	vpalignr	ymm15,ymm15,ymm15,12
+	vpalignr	ymm6,ymm6,ymm6,4
+	vpalignr	ymm10,ymm10,ymm10,8
+	vpalignr	ymm14,ymm14,ymm14,12
+	mov	rdx,QWORD[((8+160+0))+rbp]
+	mulx	rax,r10,r10
+	add	r14,r10
+	mulx	r9,r11,r11
+	adc	r15,r11
+	adc	r9,0
+	imul	rdx,r12
+	vpalignr	ymm5,ymm5,ymm5,4
+	vpalignr	ymm9,ymm9,ymm9,8
+	vpalignr	ymm13,ymm13,ymm13,12
+	vpalignr	ymm4,ymm4,ymm4,4
+	vpalignr	ymm8,ymm8,ymm8,8
+	vpalignr	ymm12,ymm12,ymm12,12
+	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
+	vmovdqa	ymm8,YMMWORD[$L$rol16]
+	vpaddd	ymm3,ymm3,ymm7
+	vpaddd	ymm2,ymm2,ymm6
+	vpaddd	ymm1,ymm1,ymm5
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm15,ymm15,ymm3
+	vpxor	ymm14,ymm14,ymm2
+	vpxor	ymm13,ymm13,ymm1
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm15,ymm15,ymm8
+	vpshufb	ymm14,ymm14,ymm8
+	add	r15,rax
+	adc	r9,rdx
+	vpshufb	ymm13,ymm13,ymm8
+	vpshufb	ymm12,ymm12,ymm8
+	vpaddd	ymm11,ymm11,ymm15
+	vpaddd	ymm10,ymm10,ymm14
+	vpaddd	ymm9,ymm9,ymm13
+	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
+	vpxor	ymm7,ymm7,ymm11
+	vpxor	ymm6,ymm6,ymm10
+	vpxor	ymm5,ymm5,ymm9
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
+	vpsrld	ymm8,ymm7,20
+	vpslld	ymm7,ymm7,32-20
+	vpxor	ymm7,ymm7,ymm8
+	vpsrld	ymm8,ymm6,20
+	vpslld	ymm6,ymm6,32-20
+	vpxor	ymm6,ymm6,ymm8
+	add	r10,QWORD[((0+32))+rdi]
+	adc	r11,QWORD[((8+32))+rdi]
+	adc	r12,1
+
+	lea	rdi,[48+rdi]
+	vpsrld	ymm8,ymm5,20
+	vpslld	ymm5,ymm5,32-20
+	vpxor	ymm5,ymm5,ymm8
+	vpsrld	ymm8,ymm4,20
+	vpslld	ymm4,ymm4,32-20
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	ymm8,YMMWORD[$L$rol8]
+	vpaddd	ymm3,ymm3,ymm7
+	vpaddd	ymm2,ymm2,ymm6
+	vpaddd	ymm1,ymm1,ymm5
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm15,ymm15,ymm3
+	vpxor	ymm14,ymm14,ymm2
+	vpxor	ymm13,ymm13,ymm1
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm15,ymm15,ymm8
+	vpshufb	ymm14,ymm14,ymm8
+	vpshufb	ymm13,ymm13,ymm8
+	mov	rdx,QWORD[((0+160+0))+rbp]
+	mov	r15,rdx
+	mulx	r14,r13,r10
+	mulx	rdx,rax,r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	vpshufb	ymm12,ymm12,ymm8
+	vpaddd	ymm11,ymm11,ymm15
+	vpaddd	ymm10,ymm10,ymm14
+	vpaddd	ymm9,ymm9,ymm13
+	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
+	vpxor	ymm7,ymm7,ymm11
+	vpxor	ymm6,ymm6,ymm10
+	vpxor	ymm5,ymm5,ymm9
+	mov	rdx,QWORD[((8+160+0))+rbp]
+	mulx	rax,r10,r10
+	add	r14,r10
+	mulx	r9,r11,r11
+	adc	r15,r11
+	adc	r9,0
+	imul	rdx,r12
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
+	vpsrld	ymm8,ymm7,25
+	vpslld	ymm7,ymm7,32-25
+	vpxor	ymm7,ymm7,ymm8
+	vpsrld	ymm8,ymm6,25
+	vpslld	ymm6,ymm6,32-25
+	vpxor	ymm6,ymm6,ymm8
+	add	r15,rax
+	adc	r9,rdx
+	vpsrld	ymm8,ymm5,25
+	vpslld	ymm5,ymm5,32-25
+	vpxor	ymm5,ymm5,ymm8
+	vpsrld	ymm8,ymm4,25
+	vpslld	ymm4,ymm4,32-25
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
+	vpalignr	ymm7,ymm7,ymm7,12
+	vpalignr	ymm11,ymm11,ymm11,8
+	vpalignr	ymm15,ymm15,ymm15,4
+	vpalignr	ymm6,ymm6,ymm6,12
+	vpalignr	ymm10,ymm10,ymm10,8
+	vpalignr	ymm14,ymm14,ymm14,4
+	vpalignr	ymm5,ymm5,ymm5,12
+	vpalignr	ymm9,ymm9,ymm9,8
+	vpalignr	ymm13,ymm13,ymm13,4
+	vpalignr	ymm4,ymm4,ymm4,12
+	vpalignr	ymm8,ymm8,ymm8,8
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+	vpalignr	ymm12,ymm12,ymm12,4
+
+	dec	rcx
+	jne	NEAR $L$seal_avx2_main_loop_rounds
+	vpaddd	ymm3,ymm3,YMMWORD[$L$chacha20_consts]
+	vpaddd	ymm7,ymm7,YMMWORD[((160+64))+rbp]
+	vpaddd	ymm11,ymm11,YMMWORD[((160+96))+rbp]
+	vpaddd	ymm15,ymm15,YMMWORD[((160+256))+rbp]
+	vpaddd	ymm2,ymm2,YMMWORD[$L$chacha20_consts]
+	vpaddd	ymm6,ymm6,YMMWORD[((160+64))+rbp]
+	vpaddd	ymm10,ymm10,YMMWORD[((160+96))+rbp]
+	vpaddd	ymm14,ymm14,YMMWORD[((160+224))+rbp]
+	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
+	vpaddd	ymm5,ymm5,YMMWORD[((160+64))+rbp]
+	vpaddd	ymm9,ymm9,YMMWORD[((160+96))+rbp]
+	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
+	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
+	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
+	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
+	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
+
+	vmovdqa	YMMWORD[(160+128)+rbp],ymm0
+	add	r10,QWORD[((0+0))+rdi]
+	adc	r11,QWORD[((8+0))+rdi]
+	adc	r12,1
+	mov	rdx,QWORD[((0+160+0))+rbp]
+	mov	r15,rdx
+	mulx	r14,r13,r10
+	mulx	rdx,rax,r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rdx,QWORD[((8+160+0))+rbp]
+	mulx	rax,r10,r10
+	add	r14,r10
+	mulx	r9,r11,r11
+	adc	r15,r11
+	adc	r9,0
+	imul	rdx,r12
+	add	r15,rax
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+	add	r10,QWORD[((0+16))+rdi]
+	adc	r11,QWORD[((8+16))+rdi]
+	adc	r12,1
+	mov	rdx,QWORD[((0+160+0))+rbp]
+	mov	r15,rdx
+	mulx	r14,r13,r10
+	mulx	rdx,rax,r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rdx,QWORD[((8+160+0))+rbp]
+	mulx	rax,r10,r10
+	add	r14,r10
+	mulx	r9,r11,r11
+	adc	r15,r11
+	adc	r9,0
+	imul	rdx,r12
+	add	r15,rax
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+	lea	rdi,[32+rdi]
+	vperm2i128	ymm0,ymm7,ymm3,0x02
+	vperm2i128	ymm7,ymm7,ymm3,0x13
+	vperm2i128	ymm3,ymm15,ymm11,0x02
+	vperm2i128	ymm11,ymm15,ymm11,0x13
+	vpxor	ymm0,ymm0,YMMWORD[((0+0))+rsi]
+	vpxor	ymm3,ymm3,YMMWORD[((32+0))+rsi]
+	vpxor	ymm7,ymm7,YMMWORD[((64+0))+rsi]
+	vpxor	ymm11,ymm11,YMMWORD[((96+0))+rsi]
+	vmovdqu	YMMWORD[(0+0)+rdi],ymm0
+	vmovdqu	YMMWORD[(32+0)+rdi],ymm3
+	vmovdqu	YMMWORD[(64+0)+rdi],ymm7
+	vmovdqu	YMMWORD[(96+0)+rdi],ymm11
+
+	vmovdqa	ymm0,YMMWORD[((160+128))+rbp]
+	vperm2i128	ymm3,ymm6,ymm2,0x02
+	vperm2i128	ymm6,ymm6,ymm2,0x13
+	vperm2i128	ymm2,ymm14,ymm10,0x02
+	vperm2i128	ymm10,ymm14,ymm10,0x13
+	vpxor	ymm3,ymm3,YMMWORD[((0+128))+rsi]
+	vpxor	ymm2,ymm2,YMMWORD[((32+128))+rsi]
+	vpxor	ymm6,ymm6,YMMWORD[((64+128))+rsi]
+	vpxor	ymm10,ymm10,YMMWORD[((96+128))+rsi]
+	vmovdqu	YMMWORD[(0+128)+rdi],ymm3
+	vmovdqu	YMMWORD[(32+128)+rdi],ymm2
+	vmovdqu	YMMWORD[(64+128)+rdi],ymm6
+	vmovdqu	YMMWORD[(96+128)+rdi],ymm10
+	vperm2i128	ymm3,ymm5,ymm1,0x02
+	vperm2i128	ymm5,ymm5,ymm1,0x13
+	vperm2i128	ymm1,ymm13,ymm9,0x02
+	vperm2i128	ymm9,ymm13,ymm9,0x13
+	vpxor	ymm3,ymm3,YMMWORD[((0+256))+rsi]
+	vpxor	ymm1,ymm1,YMMWORD[((32+256))+rsi]
+	vpxor	ymm5,ymm5,YMMWORD[((64+256))+rsi]
+	vpxor	ymm9,ymm9,YMMWORD[((96+256))+rsi]
+	vmovdqu	YMMWORD[(0+256)+rdi],ymm3
+	vmovdqu	YMMWORD[(32+256)+rdi],ymm1
+	vmovdqu	YMMWORD[(64+256)+rdi],ymm5
+	vmovdqu	YMMWORD[(96+256)+rdi],ymm9
+	vperm2i128	ymm3,ymm4,ymm0,0x02
+	vperm2i128	ymm4,ymm4,ymm0,0x13
+	vperm2i128	ymm0,ymm12,ymm8,0x02
+	vperm2i128	ymm8,ymm12,ymm8,0x13
+	vpxor	ymm3,ymm3,YMMWORD[((0+384))+rsi]
+	vpxor	ymm0,ymm0,YMMWORD[((32+384))+rsi]
+	vpxor	ymm4,ymm4,YMMWORD[((64+384))+rsi]
+	vpxor	ymm8,ymm8,YMMWORD[((96+384))+rsi]
+	vmovdqu	YMMWORD[(0+384)+rdi],ymm3
+	vmovdqu	YMMWORD[(32+384)+rdi],ymm0
+	vmovdqu	YMMWORD[(64+384)+rdi],ymm4
+	vmovdqu	YMMWORD[(96+384)+rdi],ymm8
+
+	lea	rsi,[512+rsi]
+	sub	rbx,16*32
+	cmp	rbx,16*32
+	jg	NEAR $L$seal_avx2_main_loop
+
+	add	r10,QWORD[((0+0))+rdi]
+	adc	r11,QWORD[((8+0))+rdi]
+	adc	r12,1
+	mov	rdx,QWORD[((0+160+0))+rbp]
+	mov	r15,rdx
+	mulx	r14,r13,r10
+	mulx	rdx,rax,r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rdx,QWORD[((8+160+0))+rbp]
+	mulx	rax,r10,r10
+	add	r14,r10
+	mulx	r9,r11,r11
+	adc	r15,r11
+	adc	r9,0
+	imul	rdx,r12
+	add	r15,rax
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+	add	r10,QWORD[((0+16))+rdi]
+	adc	r11,QWORD[((8+16))+rdi]
+	adc	r12,1
+	mov	rdx,QWORD[((0+160+0))+rbp]
+	mov	r15,rdx
+	mulx	r14,r13,r10
+	mulx	rdx,rax,r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rdx,QWORD[((8+160+0))+rbp]
+	mulx	rax,r10,r10
+	add	r14,r10
+	mulx	r9,r11,r11
+	adc	r15,r11
+	adc	r9,0
+	imul	rdx,r12
+	add	r15,rax
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+	lea	rdi,[32+rdi]
+	mov	rcx,10
+	xor	r8,r8
+
+	cmp	rbx,12*32
+	ja	NEAR $L$seal_avx2_tail_512
+	cmp	rbx,8*32
+	ja	NEAR $L$seal_avx2_tail_384
+	cmp	rbx,4*32
+	ja	NEAR $L$seal_avx2_tail_256
+
+$L$seal_avx2_tail_128:
+	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
+	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
+	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
+	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
+	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
+	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
+
+$L$seal_avx2_tail_128_rounds_and_3xhash:
+	add	r10,QWORD[((0+0))+rdi]
+	adc	r11,QWORD[((8+0))+rdi]
+	adc	r12,1
+	mov	rdx,QWORD[((0+160+0))+rbp]
+	mov	r15,rdx
+	mulx	r14,r13,r10
+	mulx	rdx,rax,r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rdx,QWORD[((8+160+0))+rbp]
+	mulx	rax,r10,r10
+	add	r14,r10
+	mulx	r9,r11,r11
+	adc	r15,r11
+	adc	r9,0
+	imul	rdx,r12
+	add	r15,rax
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+	lea	rdi,[16+rdi]
+$L$seal_avx2_tail_128_rounds_and_2xhash:
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
+	vpaddd	ymm8,ymm8,ymm12
+	vpxor	ymm4,ymm4,ymm8
+	vpsrld	ymm3,ymm4,20
+	vpslld	ymm4,ymm4,12
+	vpxor	ymm4,ymm4,ymm3
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
+	vpaddd	ymm8,ymm8,ymm12
+	vpxor	ymm4,ymm4,ymm8
+	vpslld	ymm3,ymm4,7
+	vpsrld	ymm4,ymm4,25
+	vpxor	ymm4,ymm4,ymm3
+	vpalignr	ymm12,ymm12,ymm12,12
+	vpalignr	ymm8,ymm8,ymm8,8
+	vpalignr	ymm4,ymm4,ymm4,4
+	add	r10,QWORD[((0+0))+rdi]
+	adc	r11,QWORD[((8+0))+rdi]
+	adc	r12,1
+	mov	rdx,QWORD[((0+160+0))+rbp]
+	mov	r15,rdx
+	mulx	r14,r13,r10
+	mulx	rdx,rax,r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rdx,QWORD[((8+160+0))+rbp]
+	mulx	rax,r10,r10
+	add	r14,r10
+	mulx	r9,r11,r11
+	adc	r15,r11
+	adc	r9,0
+	imul	rdx,r12
+	add	r15,rax
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
+	vpaddd	ymm8,ymm8,ymm12
+	vpxor	ymm4,ymm4,ymm8
+	vpsrld	ymm3,ymm4,20
+	vpslld	ymm4,ymm4,12
+	vpxor	ymm4,ymm4,ymm3
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
+	vpaddd	ymm8,ymm8,ymm12
+	vpxor	ymm4,ymm4,ymm8
+	vpslld	ymm3,ymm4,7
+	vpsrld	ymm4,ymm4,25
+	vpxor	ymm4,ymm4,ymm3
+	vpalignr	ymm12,ymm12,ymm12,4
+	vpalignr	ymm8,ymm8,ymm8,8
+	vpalignr	ymm4,ymm4,ymm4,12
+	add	r10,QWORD[((0+16))+rdi]
+	adc	r11,QWORD[((8+16))+rdi]
+	adc	r12,1
+	mov	rdx,QWORD[((0+160+0))+rbp]
+	mov	r15,rdx
+	mulx	r14,r13,r10
+	mulx	rdx,rax,r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rdx,QWORD[((8+160+0))+rbp]
+	mulx	rax,r10,r10
+	add	r14,r10
+	mulx	r9,r11,r11
+	adc	r15,r11
+	adc	r9,0
+	imul	rdx,r12
+	add	r15,rax
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+	lea	rdi,[32+rdi]
+	dec	rcx
+	jg	NEAR $L$seal_avx2_tail_128_rounds_and_3xhash
+	dec	r8
+	jge	NEAR $L$seal_avx2_tail_128_rounds_and_2xhash
+	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
+	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
+	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
+	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
+	vperm2i128	ymm3,ymm4,ymm0,0x13
+	vperm2i128	ymm0,ymm4,ymm0,0x02
+	vperm2i128	ymm4,ymm12,ymm8,0x02
+	vperm2i128	ymm12,ymm12,ymm8,0x13
+	vmovdqa	ymm8,ymm3
+
+	jmp	NEAR $L$seal_avx2_short_loop
+
+$L$seal_avx2_tail_256:
+	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
+	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
+	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
+	vmovdqa	ymm1,ymm0
+	vmovdqa	ymm5,ymm4
+	vmovdqa	ymm9,ymm8
+	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
+	vpaddd	ymm13,ymm12,YMMWORD[((160+160))+rbp]
+	vpaddd	ymm12,ymm12,ymm13
+	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
+	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
+
+$L$seal_avx2_tail_256_rounds_and_3xhash:
+	add	r10,QWORD[((0+0))+rdi]
+	adc	r11,QWORD[((8+0))+rdi]
+	adc	r12,1
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+	lea	rdi,[16+rdi]
+$L$seal_avx2_tail_256_rounds_and_2xhash:
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
+	vpaddd	ymm8,ymm8,ymm12
+	vpxor	ymm4,ymm4,ymm8
+	vpsrld	ymm3,ymm4,20
+	vpslld	ymm4,ymm4,12
+	vpxor	ymm4,ymm4,ymm3
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
+	vpaddd	ymm8,ymm8,ymm12
+	vpxor	ymm4,ymm4,ymm8
+	vpslld	ymm3,ymm4,7
+	vpsrld	ymm4,ymm4,25
+	vpxor	ymm4,ymm4,ymm3
+	vpalignr	ymm12,ymm12,ymm12,12
+	vpalignr	ymm8,ymm8,ymm8,8
+	vpalignr	ymm4,ymm4,ymm4,4
+	vpaddd	ymm1,ymm1,ymm5
+	vpxor	ymm13,ymm13,ymm1
+	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
+	vpaddd	ymm9,ymm9,ymm13
+	vpxor	ymm5,ymm5,ymm9
+	vpsrld	ymm3,ymm5,20
+	vpslld	ymm5,ymm5,12
+	vpxor	ymm5,ymm5,ymm3
+	vpaddd	ymm1,ymm1,ymm5
+	vpxor	ymm13,ymm13,ymm1
+	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
+	vpaddd	ymm9,ymm9,ymm13
+	vpxor	ymm5,ymm5,ymm9
+	vpslld	ymm3,ymm5,7
+	vpsrld	ymm5,ymm5,25
+	vpxor	ymm5,ymm5,ymm3
+	vpalignr	ymm13,ymm13,ymm13,12
+	vpalignr	ymm9,ymm9,ymm9,8
+	vpalignr	ymm5,ymm5,ymm5,4
+	add	r10,QWORD[((0+0))+rdi]
+	adc	r11,QWORD[((8+0))+rdi]
+	adc	r12,1
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
+	vpaddd	ymm8,ymm8,ymm12
+	vpxor	ymm4,ymm4,ymm8
+	vpsrld	ymm3,ymm4,20
+	vpslld	ymm4,ymm4,12
+	vpxor	ymm4,ymm4,ymm3
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
+	vpaddd	ymm8,ymm8,ymm12
+	vpxor	ymm4,ymm4,ymm8
+	vpslld	ymm3,ymm4,7
+	vpsrld	ymm4,ymm4,25
+	vpxor	ymm4,ymm4,ymm3
+	vpalignr	ymm12,ymm12,ymm12,4
+	vpalignr	ymm8,ymm8,ymm8,8
+	vpalignr	ymm4,ymm4,ymm4,12
+	vpaddd	ymm1,ymm1,ymm5
+	vpxor	ymm13,ymm13,ymm1
+	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
+	vpaddd	ymm9,ymm9,ymm13
+	vpxor	ymm5,ymm5,ymm9
+	vpsrld	ymm3,ymm5,20
+	vpslld	ymm5,ymm5,12
+	vpxor	ymm5,ymm5,ymm3
+	vpaddd	ymm1,ymm1,ymm5
+	vpxor	ymm13,ymm13,ymm1
+	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
+	vpaddd	ymm9,ymm9,ymm13
+	vpxor	ymm5,ymm5,ymm9
+	vpslld	ymm3,ymm5,7
+	vpsrld	ymm5,ymm5,25
+	vpxor	ymm5,ymm5,ymm3
+	vpalignr	ymm13,ymm13,ymm13,4
+	vpalignr	ymm9,ymm9,ymm9,8
+	vpalignr	ymm5,ymm5,ymm5,12
+	add	r10,QWORD[((0+16))+rdi]
+	adc	r11,QWORD[((8+16))+rdi]
+	adc	r12,1
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+	lea	rdi,[32+rdi]
+	dec	rcx
+	jg	NEAR $L$seal_avx2_tail_256_rounds_and_3xhash
+	dec	r8
+	jge	NEAR $L$seal_avx2_tail_256_rounds_and_2xhash
+	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
+	vpaddd	ymm5,ymm5,YMMWORD[((160+64))+rbp]
+	vpaddd	ymm9,ymm9,YMMWORD[((160+96))+rbp]
+	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
+	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
+	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
+	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
+	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
+	vperm2i128	ymm3,ymm5,ymm1,0x02
+	vperm2i128	ymm5,ymm5,ymm1,0x13
+	vperm2i128	ymm1,ymm13,ymm9,0x02
+	vperm2i128	ymm9,ymm13,ymm9,0x13
+	vpxor	ymm3,ymm3,YMMWORD[((0+0))+rsi]
+	vpxor	ymm1,ymm1,YMMWORD[((32+0))+rsi]
+	vpxor	ymm5,ymm5,YMMWORD[((64+0))+rsi]
+	vpxor	ymm9,ymm9,YMMWORD[((96+0))+rsi]
+	vmovdqu	YMMWORD[(0+0)+rdi],ymm3
+	vmovdqu	YMMWORD[(32+0)+rdi],ymm1
+	vmovdqu	YMMWORD[(64+0)+rdi],ymm5
+	vmovdqu	YMMWORD[(96+0)+rdi],ymm9
+	vperm2i128	ymm3,ymm4,ymm0,0x13
+	vperm2i128	ymm0,ymm4,ymm0,0x02
+	vperm2i128	ymm4,ymm12,ymm8,0x02
+	vperm2i128	ymm12,ymm12,ymm8,0x13
+	vmovdqa	ymm8,ymm3
+
+	mov	rcx,4*32
+	lea	rsi,[128+rsi]
+	sub	rbx,4*32
+	jmp	NEAR $L$seal_avx2_short_hash_remainder
+
+$L$seal_avx2_tail_384:
+	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
+	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
+	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
+	vmovdqa	ymm1,ymm0
+	vmovdqa	ymm5,ymm4
+	vmovdqa	ymm9,ymm8
+	vmovdqa	ymm2,ymm0
+	vmovdqa	ymm6,ymm4
+	vmovdqa	ymm10,ymm8
+	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
+	vpaddd	ymm14,ymm12,YMMWORD[((160+160))+rbp]
+	vpaddd	ymm13,ymm12,ymm14
+	vpaddd	ymm12,ymm12,ymm13
+	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
+	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
+	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
+
+$L$seal_avx2_tail_384_rounds_and_3xhash:
+	add	r10,QWORD[((0+0))+rdi]
+	adc	r11,QWORD[((8+0))+rdi]
+	adc	r12,1
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+	lea	rdi,[16+rdi]
+$L$seal_avx2_tail_384_rounds_and_2xhash:
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
+	vpaddd	ymm8,ymm8,ymm12
+	vpxor	ymm4,ymm4,ymm8
+	vpsrld	ymm3,ymm4,20
+	vpslld	ymm4,ymm4,12
+	vpxor	ymm4,ymm4,ymm3
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
+	vpaddd	ymm8,ymm8,ymm12
+	vpxor	ymm4,ymm4,ymm8
+	vpslld	ymm3,ymm4,7
+	vpsrld	ymm4,ymm4,25
+	vpxor	ymm4,ymm4,ymm3
+	vpalignr	ymm12,ymm12,ymm12,12
+	vpalignr	ymm8,ymm8,ymm8,8
+	vpalignr	ymm4,ymm4,ymm4,4
+	vpaddd	ymm1,ymm1,ymm5
+	vpxor	ymm13,ymm13,ymm1
+	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
+	vpaddd	ymm9,ymm9,ymm13
+	vpxor	ymm5,ymm5,ymm9
+	vpsrld	ymm3,ymm5,20
+	vpslld	ymm5,ymm5,12
+	vpxor	ymm5,ymm5,ymm3
+	vpaddd	ymm1,ymm1,ymm5
+	vpxor	ymm13,ymm13,ymm1
+	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
+	vpaddd	ymm9,ymm9,ymm13
+	vpxor	ymm5,ymm5,ymm9
+	vpslld	ymm3,ymm5,7
+	vpsrld	ymm5,ymm5,25
+	vpxor	ymm5,ymm5,ymm3
+	vpalignr	ymm13,ymm13,ymm13,12
+	vpalignr	ymm9,ymm9,ymm9,8
+	vpalignr	ymm5,ymm5,ymm5,4
+	add	r10,QWORD[((0+0))+rdi]
+	adc	r11,QWORD[((8+0))+rdi]
+	adc	r12,1
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+	vpaddd	ymm2,ymm2,ymm6
+	vpxor	ymm14,ymm14,ymm2
+	vpshufb	ymm14,ymm14,YMMWORD[$L$rol16]
+	vpaddd	ymm10,ymm10,ymm14
+	vpxor	ymm6,ymm6,ymm10
+	vpsrld	ymm3,ymm6,20
+	vpslld	ymm6,ymm6,12
+	vpxor	ymm6,ymm6,ymm3
+	vpaddd	ymm2,ymm2,ymm6
+	vpxor	ymm14,ymm14,ymm2
+	vpshufb	ymm14,ymm14,YMMWORD[$L$rol8]
+	vpaddd	ymm10,ymm10,ymm14
+	vpxor	ymm6,ymm6,ymm10
+	vpslld	ymm3,ymm6,7
+	vpsrld	ymm6,ymm6,25
+	vpxor	ymm6,ymm6,ymm3
+	vpalignr	ymm14,ymm14,ymm14,12
+	vpalignr	ymm10,ymm10,ymm10,8
+	vpalignr	ymm6,ymm6,ymm6,4
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
+	vpaddd	ymm8,ymm8,ymm12
+	vpxor	ymm4,ymm4,ymm8
+	vpsrld	ymm3,ymm4,20
+	vpslld	ymm4,ymm4,12
+	vpxor	ymm4,ymm4,ymm3
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
+	vpaddd	ymm8,ymm8,ymm12
+	vpxor	ymm4,ymm4,ymm8
+	vpslld	ymm3,ymm4,7
+	vpsrld	ymm4,ymm4,25
+	vpxor	ymm4,ymm4,ymm3
+	vpalignr	ymm12,ymm12,ymm12,4
+	vpalignr	ymm8,ymm8,ymm8,8
+	vpalignr	ymm4,ymm4,ymm4,12
+	add	r10,QWORD[((0+16))+rdi]
+	adc	r11,QWORD[((8+16))+rdi]
+	adc	r12,1
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+	vpaddd	ymm1,ymm1,ymm5
+	vpxor	ymm13,ymm13,ymm1
+	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
+	vpaddd	ymm9,ymm9,ymm13
+	vpxor	ymm5,ymm5,ymm9
+	vpsrld	ymm3,ymm5,20
+	vpslld	ymm5,ymm5,12
+	vpxor	ymm5,ymm5,ymm3
+	vpaddd	ymm1,ymm1,ymm5
+	vpxor	ymm13,ymm13,ymm1
+	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
+	vpaddd	ymm9,ymm9,ymm13
+	vpxor	ymm5,ymm5,ymm9
+	vpslld	ymm3,ymm5,7
+	vpsrld	ymm5,ymm5,25
+	vpxor	ymm5,ymm5,ymm3
+	vpalignr	ymm13,ymm13,ymm13,4
+	vpalignr	ymm9,ymm9,ymm9,8
+	vpalignr	ymm5,ymm5,ymm5,12
+	vpaddd	ymm2,ymm2,ymm6
+	vpxor	ymm14,ymm14,ymm2
+	vpshufb	ymm14,ymm14,YMMWORD[$L$rol16]
+	vpaddd	ymm10,ymm10,ymm14
+	vpxor	ymm6,ymm6,ymm10
+	vpsrld	ymm3,ymm6,20
+	vpslld	ymm6,ymm6,12
+	vpxor	ymm6,ymm6,ymm3
+	vpaddd	ymm2,ymm2,ymm6
+	vpxor	ymm14,ymm14,ymm2
+	vpshufb	ymm14,ymm14,YMMWORD[$L$rol8]
+	vpaddd	ymm10,ymm10,ymm14
+	vpxor	ymm6,ymm6,ymm10
+	vpslld	ymm3,ymm6,7
+	vpsrld	ymm6,ymm6,25
+	vpxor	ymm6,ymm6,ymm3
+	vpalignr	ymm14,ymm14,ymm14,4
+	vpalignr	ymm10,ymm10,ymm10,8
+	vpalignr	ymm6,ymm6,ymm6,12
+
+	lea	rdi,[32+rdi]
+	dec	rcx
+	jg	NEAR $L$seal_avx2_tail_384_rounds_and_3xhash
+	dec	r8
+	jge	NEAR $L$seal_avx2_tail_384_rounds_and_2xhash
+	vpaddd	ymm2,ymm2,YMMWORD[$L$chacha20_consts]
+	vpaddd	ymm6,ymm6,YMMWORD[((160+64))+rbp]
+	vpaddd	ymm10,ymm10,YMMWORD[((160+96))+rbp]
+	vpaddd	ymm14,ymm14,YMMWORD[((160+224))+rbp]
+	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
+	vpaddd	ymm5,ymm5,YMMWORD[((160+64))+rbp]
+	vpaddd	ymm9,ymm9,YMMWORD[((160+96))+rbp]
+	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
+	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
+	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
+	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
+	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
+	vperm2i128	ymm3,ymm6,ymm2,0x02
+	vperm2i128	ymm6,ymm6,ymm2,0x13
+	vperm2i128	ymm2,ymm14,ymm10,0x02
+	vperm2i128	ymm10,ymm14,ymm10,0x13
+	vpxor	ymm3,ymm3,YMMWORD[((0+0))+rsi]
+	vpxor	ymm2,ymm2,YMMWORD[((32+0))+rsi]
+	vpxor	ymm6,ymm6,YMMWORD[((64+0))+rsi]
+	vpxor	ymm10,ymm10,YMMWORD[((96+0))+rsi]
+	vmovdqu	YMMWORD[(0+0)+rdi],ymm3
+	vmovdqu	YMMWORD[(32+0)+rdi],ymm2
+	vmovdqu	YMMWORD[(64+0)+rdi],ymm6
+	vmovdqu	YMMWORD[(96+0)+rdi],ymm10
+	vperm2i128	ymm3,ymm5,ymm1,0x02
+	vperm2i128	ymm5,ymm5,ymm1,0x13
+	vperm2i128	ymm1,ymm13,ymm9,0x02
+	vperm2i128	ymm9,ymm13,ymm9,0x13
+	vpxor	ymm3,ymm3,YMMWORD[((0+128))+rsi]
+	vpxor	ymm1,ymm1,YMMWORD[((32+128))+rsi]
+	vpxor	ymm5,ymm5,YMMWORD[((64+128))+rsi]
+	vpxor	ymm9,ymm9,YMMWORD[((96+128))+rsi]
+	vmovdqu	YMMWORD[(0+128)+rdi],ymm3
+	vmovdqu	YMMWORD[(32+128)+rdi],ymm1
+	vmovdqu	YMMWORD[(64+128)+rdi],ymm5
+	vmovdqu	YMMWORD[(96+128)+rdi],ymm9
+	vperm2i128	ymm3,ymm4,ymm0,0x13
+	vperm2i128	ymm0,ymm4,ymm0,0x02
+	vperm2i128	ymm4,ymm12,ymm8,0x02
+	vperm2i128	ymm12,ymm12,ymm8,0x13
+	vmovdqa	ymm8,ymm3
+
+	mov	rcx,8*32
+	lea	rsi,[256+rsi]
+	sub	rbx,8*32
+	jmp	NEAR $L$seal_avx2_short_hash_remainder
+
+$L$seal_avx2_tail_512:
+	vmovdqa	ymm0,YMMWORD[$L$chacha20_consts]
+	vmovdqa	ymm4,YMMWORD[((160+64))+rbp]
+	vmovdqa	ymm8,YMMWORD[((160+96))+rbp]
+	vmovdqa	ymm1,ymm0
+	vmovdqa	ymm5,ymm4
+	vmovdqa	ymm9,ymm8
+	vmovdqa	ymm2,ymm0
+	vmovdqa	ymm6,ymm4
+	vmovdqa	ymm10,ymm8
+	vmovdqa	ymm3,ymm0
+	vmovdqa	ymm7,ymm4
+	vmovdqa	ymm11,ymm8
+	vmovdqa	ymm12,YMMWORD[$L$avx2_inc]
+	vpaddd	ymm15,ymm12,YMMWORD[((160+160))+rbp]
+	vpaddd	ymm14,ymm12,ymm15
+	vpaddd	ymm13,ymm12,ymm14
+	vpaddd	ymm12,ymm12,ymm13
+	vmovdqa	YMMWORD[(160+256)+rbp],ymm15
+	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
+	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
+	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
+
+$L$seal_avx2_tail_512_rounds_and_3xhash:
+	add	r10,QWORD[((0+0))+rdi]
+	adc	r11,QWORD[((8+0))+rdi]
+	adc	r12,1
+	mov	rdx,QWORD[((0+160+0))+rbp]
+	mov	r15,rdx
+	mulx	r14,r13,r10
+	mulx	rdx,rax,r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rdx,QWORD[((8+160+0))+rbp]
+	mulx	rax,r10,r10
+	add	r14,r10
+	mulx	r9,r11,r11
+	adc	r15,r11
+	adc	r9,0
+	imul	rdx,r12
+	add	r15,rax
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+	lea	rdi,[16+rdi]
+$L$seal_avx2_tail_512_rounds_and_2xhash:
+	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
+	vmovdqa	ymm8,YMMWORD[$L$rol16]
+	vpaddd	ymm3,ymm3,ymm7
+	vpaddd	ymm2,ymm2,ymm6
+	vpaddd	ymm1,ymm1,ymm5
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm15,ymm15,ymm3
+	vpxor	ymm14,ymm14,ymm2
+	vpxor	ymm13,ymm13,ymm1
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm15,ymm15,ymm8
+	vpshufb	ymm14,ymm14,ymm8
+	vpshufb	ymm13,ymm13,ymm8
+	vpshufb	ymm12,ymm12,ymm8
+	vpaddd	ymm11,ymm11,ymm15
+	vpaddd	ymm10,ymm10,ymm14
+	vpaddd	ymm9,ymm9,ymm13
+	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
+	vpxor	ymm7,ymm7,ymm11
+	vpxor	ymm6,ymm6,ymm10
+	add	r10,QWORD[((0+0))+rdi]
+	adc	r11,QWORD[((8+0))+rdi]
+	adc	r12,1
+	vpxor	ymm5,ymm5,ymm9
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
+	vpsrld	ymm8,ymm7,20
+	vpslld	ymm7,ymm7,32-20
+	vpxor	ymm7,ymm7,ymm8
+	vpsrld	ymm8,ymm6,20
+	vpslld	ymm6,ymm6,32-20
+	vpxor	ymm6,ymm6,ymm8
+	vpsrld	ymm8,ymm5,20
+	vpslld	ymm5,ymm5,32-20
+	vpxor	ymm5,ymm5,ymm8
+	vpsrld	ymm8,ymm4,20
+	vpslld	ymm4,ymm4,32-20
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	ymm8,YMMWORD[$L$rol8]
+	vpaddd	ymm3,ymm3,ymm7
+	vpaddd	ymm2,ymm2,ymm6
+	vpaddd	ymm1,ymm1,ymm5
+	vpaddd	ymm0,ymm0,ymm4
+	mov	rdx,QWORD[((0+160+0))+rbp]
+	mov	r15,rdx
+	mulx	r14,r13,r10
+	mulx	rdx,rax,r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	vpxor	ymm15,ymm15,ymm3
+	vpxor	ymm14,ymm14,ymm2
+	vpxor	ymm13,ymm13,ymm1
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm15,ymm15,ymm8
+	vpshufb	ymm14,ymm14,ymm8
+	vpshufb	ymm13,ymm13,ymm8
+	vpshufb	ymm12,ymm12,ymm8
+	vpaddd	ymm11,ymm11,ymm15
+	vpaddd	ymm10,ymm10,ymm14
+	vpaddd	ymm9,ymm9,ymm13
+	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
+	vpxor	ymm7,ymm7,ymm11
+	vpxor	ymm6,ymm6,ymm10
+	vpxor	ymm5,ymm5,ymm9
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
+	vpsrld	ymm8,ymm7,25
+	vpslld	ymm7,ymm7,32-25
+	vpxor	ymm7,ymm7,ymm8
+	mov	rdx,QWORD[((8+160+0))+rbp]
+	mulx	rax,r10,r10
+	add	r14,r10
+	mulx	r9,r11,r11
+	adc	r15,r11
+	adc	r9,0
+	imul	rdx,r12
+	vpsrld	ymm8,ymm6,25
+	vpslld	ymm6,ymm6,32-25
+	vpxor	ymm6,ymm6,ymm8
+	vpsrld	ymm8,ymm5,25
+	vpslld	ymm5,ymm5,32-25
+	vpxor	ymm5,ymm5,ymm8
+	vpsrld	ymm8,ymm4,25
+	vpslld	ymm4,ymm4,32-25
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
+	vpalignr	ymm7,ymm7,ymm7,4
+	vpalignr	ymm11,ymm11,ymm11,8
+	vpalignr	ymm15,ymm15,ymm15,12
+	vpalignr	ymm6,ymm6,ymm6,4
+	vpalignr	ymm10,ymm10,ymm10,8
+	vpalignr	ymm14,ymm14,ymm14,12
+	vpalignr	ymm5,ymm5,ymm5,4
+	vpalignr	ymm9,ymm9,ymm9,8
+	vpalignr	ymm13,ymm13,ymm13,12
+	vpalignr	ymm4,ymm4,ymm4,4
+	add	r15,rax
+	adc	r9,rdx
+	vpalignr	ymm8,ymm8,ymm8,8
+	vpalignr	ymm12,ymm12,ymm12,12
+	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
+	vmovdqa	ymm8,YMMWORD[$L$rol16]
+	vpaddd	ymm3,ymm3,ymm7
+	vpaddd	ymm2,ymm2,ymm6
+	vpaddd	ymm1,ymm1,ymm5
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm15,ymm15,ymm3
+	vpxor	ymm14,ymm14,ymm2
+	vpxor	ymm13,ymm13,ymm1
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm15,ymm15,ymm8
+	vpshufb	ymm14,ymm14,ymm8
+	vpshufb	ymm13,ymm13,ymm8
+	vpshufb	ymm12,ymm12,ymm8
+	vpaddd	ymm11,ymm11,ymm15
+	vpaddd	ymm10,ymm10,ymm14
+	vpaddd	ymm9,ymm9,ymm13
+	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+	vpxor	ymm7,ymm7,ymm11
+	vpxor	ymm6,ymm6,ymm10
+	vpxor	ymm5,ymm5,ymm9
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
+	vpsrld	ymm8,ymm7,20
+	vpslld	ymm7,ymm7,32-20
+	vpxor	ymm7,ymm7,ymm8
+	vpsrld	ymm8,ymm6,20
+	vpslld	ymm6,ymm6,32-20
+	vpxor	ymm6,ymm6,ymm8
+	vpsrld	ymm8,ymm5,20
+	vpslld	ymm5,ymm5,32-20
+	vpxor	ymm5,ymm5,ymm8
+	vpsrld	ymm8,ymm4,20
+	vpslld	ymm4,ymm4,32-20
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	ymm8,YMMWORD[$L$rol8]
+	vpaddd	ymm3,ymm3,ymm7
+	vpaddd	ymm2,ymm2,ymm6
+	add	r10,QWORD[((0+16))+rdi]
+	adc	r11,QWORD[((8+16))+rdi]
+	adc	r12,1
+	vpaddd	ymm1,ymm1,ymm5
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm15,ymm15,ymm3
+	vpxor	ymm14,ymm14,ymm2
+	vpxor	ymm13,ymm13,ymm1
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm15,ymm15,ymm8
+	vpshufb	ymm14,ymm14,ymm8
+	vpshufb	ymm13,ymm13,ymm8
+	vpshufb	ymm12,ymm12,ymm8
+	vpaddd	ymm11,ymm11,ymm15
+	vpaddd	ymm10,ymm10,ymm14
+	vpaddd	ymm9,ymm9,ymm13
+	vpaddd	ymm8,ymm12,YMMWORD[((160+128))+rbp]
+	vpxor	ymm7,ymm7,ymm11
+	vpxor	ymm6,ymm6,ymm10
+	vpxor	ymm5,ymm5,ymm9
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	YMMWORD[(160+128)+rbp],ymm8
+	vpsrld	ymm8,ymm7,25
+	mov	rdx,QWORD[((0+160+0))+rbp]
+	mov	r15,rdx
+	mulx	r14,r13,r10
+	mulx	rdx,rax,r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	vpslld	ymm7,ymm7,32-25
+	vpxor	ymm7,ymm7,ymm8
+	vpsrld	ymm8,ymm6,25
+	vpslld	ymm6,ymm6,32-25
+	vpxor	ymm6,ymm6,ymm8
+	vpsrld	ymm8,ymm5,25
+	vpslld	ymm5,ymm5,32-25
+	vpxor	ymm5,ymm5,ymm8
+	vpsrld	ymm8,ymm4,25
+	vpslld	ymm4,ymm4,32-25
+	vpxor	ymm4,ymm4,ymm8
+	vmovdqa	ymm8,YMMWORD[((160+128))+rbp]
+	vpalignr	ymm7,ymm7,ymm7,12
+	vpalignr	ymm11,ymm11,ymm11,8
+	vpalignr	ymm15,ymm15,ymm15,4
+	vpalignr	ymm6,ymm6,ymm6,12
+	vpalignr	ymm10,ymm10,ymm10,8
+	vpalignr	ymm14,ymm14,ymm14,4
+	vpalignr	ymm5,ymm5,ymm5,12
+	vpalignr	ymm9,ymm9,ymm9,8
+	mov	rdx,QWORD[((8+160+0))+rbp]
+	mulx	rax,r10,r10
+	add	r14,r10
+	mulx	r9,r11,r11
+	adc	r15,r11
+	adc	r9,0
+	imul	rdx,r12
+	vpalignr	ymm13,ymm13,ymm13,4
+	vpalignr	ymm4,ymm4,ymm4,12
+	vpalignr	ymm8,ymm8,ymm8,8
+	vpalignr	ymm12,ymm12,ymm12,4
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+	add	r15,rax
+	adc	r9,rdx
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+	lea	rdi,[32+rdi]
+	dec	rcx
+	jg	NEAR $L$seal_avx2_tail_512_rounds_and_3xhash
+	dec	r8
+	jge	NEAR $L$seal_avx2_tail_512_rounds_and_2xhash
+	vpaddd	ymm3,ymm3,YMMWORD[$L$chacha20_consts]
+	vpaddd	ymm7,ymm7,YMMWORD[((160+64))+rbp]
+	vpaddd	ymm11,ymm11,YMMWORD[((160+96))+rbp]
+	vpaddd	ymm15,ymm15,YMMWORD[((160+256))+rbp]
+	vpaddd	ymm2,ymm2,YMMWORD[$L$chacha20_consts]
+	vpaddd	ymm6,ymm6,YMMWORD[((160+64))+rbp]
+	vpaddd	ymm10,ymm10,YMMWORD[((160+96))+rbp]
+	vpaddd	ymm14,ymm14,YMMWORD[((160+224))+rbp]
+	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
+	vpaddd	ymm5,ymm5,YMMWORD[((160+64))+rbp]
+	vpaddd	ymm9,ymm9,YMMWORD[((160+96))+rbp]
+	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
+	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
+	vpaddd	ymm4,ymm4,YMMWORD[((160+64))+rbp]
+	vpaddd	ymm8,ymm8,YMMWORD[((160+96))+rbp]
+	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
+
+	vmovdqa	YMMWORD[(160+128)+rbp],ymm0
+	vperm2i128	ymm0,ymm7,ymm3,0x02
+	vperm2i128	ymm7,ymm7,ymm3,0x13
+	vperm2i128	ymm3,ymm15,ymm11,0x02
+	vperm2i128	ymm11,ymm15,ymm11,0x13
+	vpxor	ymm0,ymm0,YMMWORD[((0+0))+rsi]
+	vpxor	ymm3,ymm3,YMMWORD[((32+0))+rsi]
+	vpxor	ymm7,ymm7,YMMWORD[((64+0))+rsi]
+	vpxor	ymm11,ymm11,YMMWORD[((96+0))+rsi]
+	vmovdqu	YMMWORD[(0+0)+rdi],ymm0
+	vmovdqu	YMMWORD[(32+0)+rdi],ymm3
+	vmovdqu	YMMWORD[(64+0)+rdi],ymm7
+	vmovdqu	YMMWORD[(96+0)+rdi],ymm11
+
+	vmovdqa	ymm0,YMMWORD[((160+128))+rbp]
+	vperm2i128	ymm3,ymm6,ymm2,0x02
+	vperm2i128	ymm6,ymm6,ymm2,0x13
+	vperm2i128	ymm2,ymm14,ymm10,0x02
+	vperm2i128	ymm10,ymm14,ymm10,0x13
+	vpxor	ymm3,ymm3,YMMWORD[((0+128))+rsi]
+	vpxor	ymm2,ymm2,YMMWORD[((32+128))+rsi]
+	vpxor	ymm6,ymm6,YMMWORD[((64+128))+rsi]
+	vpxor	ymm10,ymm10,YMMWORD[((96+128))+rsi]
+	vmovdqu	YMMWORD[(0+128)+rdi],ymm3
+	vmovdqu	YMMWORD[(32+128)+rdi],ymm2
+	vmovdqu	YMMWORD[(64+128)+rdi],ymm6
+	vmovdqu	YMMWORD[(96+128)+rdi],ymm10
+	vperm2i128	ymm3,ymm5,ymm1,0x02
+	vperm2i128	ymm5,ymm5,ymm1,0x13
+	vperm2i128	ymm1,ymm13,ymm9,0x02
+	vperm2i128	ymm9,ymm13,ymm9,0x13
+	vpxor	ymm3,ymm3,YMMWORD[((0+256))+rsi]
+	vpxor	ymm1,ymm1,YMMWORD[((32+256))+rsi]
+	vpxor	ymm5,ymm5,YMMWORD[((64+256))+rsi]
+	vpxor	ymm9,ymm9,YMMWORD[((96+256))+rsi]
+	vmovdqu	YMMWORD[(0+256)+rdi],ymm3
+	vmovdqu	YMMWORD[(32+256)+rdi],ymm1
+	vmovdqu	YMMWORD[(64+256)+rdi],ymm5
+	vmovdqu	YMMWORD[(96+256)+rdi],ymm9
+	vperm2i128	ymm3,ymm4,ymm0,0x13
+	vperm2i128	ymm0,ymm4,ymm0,0x02
+	vperm2i128	ymm4,ymm12,ymm8,0x02
+	vperm2i128	ymm12,ymm12,ymm8,0x13
+	vmovdqa	ymm8,ymm3
+
+	mov	rcx,12*32
+	lea	rsi,[384+rsi]
+	sub	rbx,12*32
+	jmp	NEAR $L$seal_avx2_short_hash_remainder
+
+$L$seal_avx2_320:
+	vmovdqa	ymm1,ymm0
+	vmovdqa	ymm2,ymm0
+	vmovdqa	ymm5,ymm4
+	vmovdqa	ymm6,ymm4
+	vmovdqa	ymm9,ymm8
+	vmovdqa	ymm10,ymm8
+	vpaddd	ymm13,ymm12,YMMWORD[$L$avx2_inc]
+	vpaddd	ymm14,ymm13,YMMWORD[$L$avx2_inc]
+	vmovdqa	ymm7,ymm4
+	vmovdqa	ymm11,ymm8
+	vmovdqa	YMMWORD[(160+160)+rbp],ymm12
+	vmovdqa	YMMWORD[(160+192)+rbp],ymm13
+	vmovdqa	YMMWORD[(160+224)+rbp],ymm14
+	mov	r10,10
+$L$seal_avx2_320_rounds:
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
+	vpaddd	ymm8,ymm8,ymm12
+	vpxor	ymm4,ymm4,ymm8
+	vpsrld	ymm3,ymm4,20
+	vpslld	ymm4,ymm4,12
+	vpxor	ymm4,ymm4,ymm3
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
+	vpaddd	ymm8,ymm8,ymm12
+	vpxor	ymm4,ymm4,ymm8
+	vpslld	ymm3,ymm4,7
+	vpsrld	ymm4,ymm4,25
+	vpxor	ymm4,ymm4,ymm3
+	vpalignr	ymm12,ymm12,ymm12,12
+	vpalignr	ymm8,ymm8,ymm8,8
+	vpalignr	ymm4,ymm4,ymm4,4
+	vpaddd	ymm1,ymm1,ymm5
+	vpxor	ymm13,ymm13,ymm1
+	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
+	vpaddd	ymm9,ymm9,ymm13
+	vpxor	ymm5,ymm5,ymm9
+	vpsrld	ymm3,ymm5,20
+	vpslld	ymm5,ymm5,12
+	vpxor	ymm5,ymm5,ymm3
+	vpaddd	ymm1,ymm1,ymm5
+	vpxor	ymm13,ymm13,ymm1
+	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
+	vpaddd	ymm9,ymm9,ymm13
+	vpxor	ymm5,ymm5,ymm9
+	vpslld	ymm3,ymm5,7
+	vpsrld	ymm5,ymm5,25
+	vpxor	ymm5,ymm5,ymm3
+	vpalignr	ymm13,ymm13,ymm13,12
+	vpalignr	ymm9,ymm9,ymm9,8
+	vpalignr	ymm5,ymm5,ymm5,4
+	vpaddd	ymm2,ymm2,ymm6
+	vpxor	ymm14,ymm14,ymm2
+	vpshufb	ymm14,ymm14,YMMWORD[$L$rol16]
+	vpaddd	ymm10,ymm10,ymm14
+	vpxor	ymm6,ymm6,ymm10
+	vpsrld	ymm3,ymm6,20
+	vpslld	ymm6,ymm6,12
+	vpxor	ymm6,ymm6,ymm3
+	vpaddd	ymm2,ymm2,ymm6
+	vpxor	ymm14,ymm14,ymm2
+	vpshufb	ymm14,ymm14,YMMWORD[$L$rol8]
+	vpaddd	ymm10,ymm10,ymm14
+	vpxor	ymm6,ymm6,ymm10
+	vpslld	ymm3,ymm6,7
+	vpsrld	ymm6,ymm6,25
+	vpxor	ymm6,ymm6,ymm3
+	vpalignr	ymm14,ymm14,ymm14,12
+	vpalignr	ymm10,ymm10,ymm10,8
+	vpalignr	ymm6,ymm6,ymm6,4
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
+	vpaddd	ymm8,ymm8,ymm12
+	vpxor	ymm4,ymm4,ymm8
+	vpsrld	ymm3,ymm4,20
+	vpslld	ymm4,ymm4,12
+	vpxor	ymm4,ymm4,ymm3
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
+	vpaddd	ymm8,ymm8,ymm12
+	vpxor	ymm4,ymm4,ymm8
+	vpslld	ymm3,ymm4,7
+	vpsrld	ymm4,ymm4,25
+	vpxor	ymm4,ymm4,ymm3
+	vpalignr	ymm12,ymm12,ymm12,4
+	vpalignr	ymm8,ymm8,ymm8,8
+	vpalignr	ymm4,ymm4,ymm4,12
+	vpaddd	ymm1,ymm1,ymm5
+	vpxor	ymm13,ymm13,ymm1
+	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
+	vpaddd	ymm9,ymm9,ymm13
+	vpxor	ymm5,ymm5,ymm9
+	vpsrld	ymm3,ymm5,20
+	vpslld	ymm5,ymm5,12
+	vpxor	ymm5,ymm5,ymm3
+	vpaddd	ymm1,ymm1,ymm5
+	vpxor	ymm13,ymm13,ymm1
+	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
+	vpaddd	ymm9,ymm9,ymm13
+	vpxor	ymm5,ymm5,ymm9
+	vpslld	ymm3,ymm5,7
+	vpsrld	ymm5,ymm5,25
+	vpxor	ymm5,ymm5,ymm3
+	vpalignr	ymm13,ymm13,ymm13,4
+	vpalignr	ymm9,ymm9,ymm9,8
+	vpalignr	ymm5,ymm5,ymm5,12
+	vpaddd	ymm2,ymm2,ymm6
+	vpxor	ymm14,ymm14,ymm2
+	vpshufb	ymm14,ymm14,YMMWORD[$L$rol16]
+	vpaddd	ymm10,ymm10,ymm14
+	vpxor	ymm6,ymm6,ymm10
+	vpsrld	ymm3,ymm6,20
+	vpslld	ymm6,ymm6,12
+	vpxor	ymm6,ymm6,ymm3
+	vpaddd	ymm2,ymm2,ymm6
+	vpxor	ymm14,ymm14,ymm2
+	vpshufb	ymm14,ymm14,YMMWORD[$L$rol8]
+	vpaddd	ymm10,ymm10,ymm14
+	vpxor	ymm6,ymm6,ymm10
+	vpslld	ymm3,ymm6,7
+	vpsrld	ymm6,ymm6,25
+	vpxor	ymm6,ymm6,ymm3
+	vpalignr	ymm14,ymm14,ymm14,4
+	vpalignr	ymm10,ymm10,ymm10,8
+	vpalignr	ymm6,ymm6,ymm6,12
+
+	dec	r10
+	jne	NEAR $L$seal_avx2_320_rounds
+	vpaddd	ymm0,ymm0,YMMWORD[$L$chacha20_consts]
+	vpaddd	ymm1,ymm1,YMMWORD[$L$chacha20_consts]
+	vpaddd	ymm2,ymm2,YMMWORD[$L$chacha20_consts]
+	vpaddd	ymm4,ymm4,ymm7
+	vpaddd	ymm5,ymm5,ymm7
+	vpaddd	ymm6,ymm6,ymm7
+	vpaddd	ymm8,ymm8,ymm11
+	vpaddd	ymm9,ymm9,ymm11
+	vpaddd	ymm10,ymm10,ymm11
+	vpaddd	ymm12,ymm12,YMMWORD[((160+160))+rbp]
+	vpaddd	ymm13,ymm13,YMMWORD[((160+192))+rbp]
+	vpaddd	ymm14,ymm14,YMMWORD[((160+224))+rbp]
+	vperm2i128	ymm3,ymm4,ymm0,0x02
+
+	vpand	ymm3,ymm3,YMMWORD[$L$clamp]
+	vmovdqa	YMMWORD[(160+0)+rbp],ymm3
+
+	vperm2i128	ymm0,ymm4,ymm0,0x13
+	vperm2i128	ymm4,ymm12,ymm8,0x13
+	vperm2i128	ymm8,ymm5,ymm1,0x02
+	vperm2i128	ymm12,ymm13,ymm9,0x02
+	vperm2i128	ymm1,ymm5,ymm1,0x13
+	vperm2i128	ymm5,ymm13,ymm9,0x13
+	vperm2i128	ymm9,ymm6,ymm2,0x02
+	vperm2i128	ymm13,ymm14,ymm10,0x02
+	vperm2i128	ymm2,ymm6,ymm2,0x13
+	vperm2i128	ymm6,ymm14,ymm10,0x13
+	jmp	NEAR $L$seal_avx2_short
+
+$L$seal_avx2_192:
+	vmovdqa	ymm1,ymm0
+	vmovdqa	ymm2,ymm0
+	vmovdqa	ymm5,ymm4
+	vmovdqa	ymm6,ymm4
+	vmovdqa	ymm9,ymm8
+	vmovdqa	ymm10,ymm8
+	vpaddd	ymm13,ymm12,YMMWORD[$L$avx2_inc]
+	vmovdqa	ymm11,ymm12
+	vmovdqa	ymm15,ymm13
+	mov	r10,10
+$L$seal_avx2_192_rounds:
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
+	vpaddd	ymm8,ymm8,ymm12
+	vpxor	ymm4,ymm4,ymm8
+	vpsrld	ymm3,ymm4,20
+	vpslld	ymm4,ymm4,12
+	vpxor	ymm4,ymm4,ymm3
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
+	vpaddd	ymm8,ymm8,ymm12
+	vpxor	ymm4,ymm4,ymm8
+	vpslld	ymm3,ymm4,7
+	vpsrld	ymm4,ymm4,25
+	vpxor	ymm4,ymm4,ymm3
+	vpalignr	ymm12,ymm12,ymm12,12
+	vpalignr	ymm8,ymm8,ymm8,8
+	vpalignr	ymm4,ymm4,ymm4,4
+	vpaddd	ymm1,ymm1,ymm5
+	vpxor	ymm13,ymm13,ymm1
+	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
+	vpaddd	ymm9,ymm9,ymm13
+	vpxor	ymm5,ymm5,ymm9
+	vpsrld	ymm3,ymm5,20
+	vpslld	ymm5,ymm5,12
+	vpxor	ymm5,ymm5,ymm3
+	vpaddd	ymm1,ymm1,ymm5
+	vpxor	ymm13,ymm13,ymm1
+	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
+	vpaddd	ymm9,ymm9,ymm13
+	vpxor	ymm5,ymm5,ymm9
+	vpslld	ymm3,ymm5,7
+	vpsrld	ymm5,ymm5,25
+	vpxor	ymm5,ymm5,ymm3
+	vpalignr	ymm13,ymm13,ymm13,12
+	vpalignr	ymm9,ymm9,ymm9,8
+	vpalignr	ymm5,ymm5,ymm5,4
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm12,ymm12,YMMWORD[$L$rol16]
+	vpaddd	ymm8,ymm8,ymm12
+	vpxor	ymm4,ymm4,ymm8
+	vpsrld	ymm3,ymm4,20
+	vpslld	ymm4,ymm4,12
+	vpxor	ymm4,ymm4,ymm3
+	vpaddd	ymm0,ymm0,ymm4
+	vpxor	ymm12,ymm12,ymm0
+	vpshufb	ymm12,ymm12,YMMWORD[$L$rol8]
+	vpaddd	ymm8,ymm8,ymm12
+	vpxor	ymm4,ymm4,ymm8
+	vpslld	ymm3,ymm4,7
+	vpsrld	ymm4,ymm4,25
+	vpxor	ymm4,ymm4,ymm3
+	vpalignr	ymm12,ymm12,ymm12,4
+	vpalignr	ymm8,ymm8,ymm8,8
+	vpalignr	ymm4,ymm4,ymm4,12
+	vpaddd	ymm1,ymm1,ymm5
+	vpxor	ymm13,ymm13,ymm1
+	vpshufb	ymm13,ymm13,YMMWORD[$L$rol16]
+	vpaddd	ymm9,ymm9,ymm13
+	vpxor	ymm5,ymm5,ymm9
+	vpsrld	ymm3,ymm5,20
+	vpslld	ymm5,ymm5,12
+	vpxor	ymm5,ymm5,ymm3
+	vpaddd	ymm1,ymm1,ymm5
+	vpxor	ymm13,ymm13,ymm1
+	vpshufb	ymm13,ymm13,YMMWORD[$L$rol8]
+	vpaddd	ymm9,ymm9,ymm13
+	vpxor	ymm5,ymm5,ymm9
+	vpslld	ymm3,ymm5,7
+	vpsrld	ymm5,ymm5,25
+	vpxor	ymm5,ymm5,ymm3
+	vpalignr	ymm13,ymm13,ymm13,4
+	vpalignr	ymm9,ymm9,ymm9,8
+	vpalignr	ymm5,ymm5,ymm5,12
+
+	dec	r10
+	jne	NEAR $L$seal_avx2_192_rounds
+	vpaddd	ymm0,ymm0,ymm2
+	vpaddd	ymm1,ymm1,ymm2
+	vpaddd	ymm4,ymm4,ymm6
+	vpaddd	ymm5,ymm5,ymm6
+	vpaddd	ymm8,ymm8,ymm10
+	vpaddd	ymm9,ymm9,ymm10
+	vpaddd	ymm12,ymm12,ymm11
+	vpaddd	ymm13,ymm13,ymm15
+	vperm2i128	ymm3,ymm4,ymm0,0x02
+
+	vpand	ymm3,ymm3,YMMWORD[$L$clamp]
+	vmovdqa	YMMWORD[(160+0)+rbp],ymm3
+
+	vperm2i128	ymm0,ymm4,ymm0,0x13
+	vperm2i128	ymm4,ymm12,ymm8,0x13
+	vperm2i128	ymm8,ymm5,ymm1,0x02
+	vperm2i128	ymm12,ymm13,ymm9,0x02
+	vperm2i128	ymm1,ymm5,ymm1,0x13
+	vperm2i128	ymm5,ymm13,ymm9,0x13
+$L$seal_avx2_short:
+	mov	r8,r8
+	call	poly_hash_ad_internal
+	xor	rcx,rcx
+$L$seal_avx2_short_hash_remainder:
+	cmp	rcx,16
+	jb	NEAR $L$seal_avx2_short_loop
+	add	r10,QWORD[((0+0))+rdi]
+	adc	r11,QWORD[((8+0))+rdi]
+	adc	r12,1
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+	sub	rcx,16
+	add	rdi,16
+	jmp	NEAR $L$seal_avx2_short_hash_remainder
+$L$seal_avx2_short_loop:
+	cmp	rbx,32
+	jb	NEAR $L$seal_avx2_short_tail
+	sub	rbx,32
+
+	vpxor	ymm0,ymm0,YMMWORD[rsi]
+	vmovdqu	YMMWORD[rdi],ymm0
+	lea	rsi,[32+rsi]
+
+	add	r10,QWORD[((0+0))+rdi]
+	adc	r11,QWORD[((8+0))+rdi]
+	adc	r12,1
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+	add	r10,QWORD[((0+16))+rdi]
+	adc	r11,QWORD[((8+16))+rdi]
+	adc	r12,1
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+	lea	rdi,[32+rdi]
+
+	vmovdqa	ymm0,ymm4
+	vmovdqa	ymm4,ymm8
+	vmovdqa	ymm8,ymm12
+	vmovdqa	ymm12,ymm1
+	vmovdqa	ymm1,ymm5
+	vmovdqa	ymm5,ymm9
+	vmovdqa	ymm9,ymm13
+	vmovdqa	ymm13,ymm2
+	vmovdqa	ymm2,ymm6
+	jmp	NEAR $L$seal_avx2_short_loop
+$L$seal_avx2_short_tail:
+	cmp	rbx,16
+	jb	NEAR $L$seal_avx2_exit
+	sub	rbx,16
+	vpxor	xmm3,xmm0,XMMWORD[rsi]
+	vmovdqu	XMMWORD[rdi],xmm3
+	lea	rsi,[16+rsi]
+	add	r10,QWORD[((0+0))+rdi]
+	adc	r11,QWORD[((8+0))+rdi]
+	adc	r12,1
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mov	r15,rax
+	mul	r10
+	mov	r13,rax
+	mov	r14,rdx
+	mov	rax,QWORD[((0+160+0))+rbp]
+	mul	r11
+	imul	r15,r12
+	add	r14,rax
+	adc	r15,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mov	r9,rax
+	mul	r10
+	add	r14,rax
+	adc	rdx,0
+	mov	r10,rdx
+	mov	rax,QWORD[((8+160+0))+rbp]
+	mul	r11
+	add	r15,rax
+	adc	rdx,0
+	imul	r9,r12
+	add	r15,r10
+	adc	r9,rdx
+	mov	r10,r13
+	mov	r11,r14
+	mov	r12,r15
+	and	r12,3
+	mov	r13,r15
+	and	r13,-4
+	mov	r14,r9
+	shrd	r15,r9,2
+	shr	r9,2
+	add	r15,r13
+	adc	r9,r14
+	add	r10,r15
+	adc	r11,r9
+	adc	r12,0
+
+	lea	rdi,[16+rdi]
+	vextracti128	xmm0,ymm0,1
+$L$seal_avx2_exit:
+	vzeroupper
+	jmp	NEAR $L$seal_sse_tail_16
+
+
diff --git a/deps/boringssl/win-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.asm b/deps/boringssl/win-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.asm
new file mode 100644
index 0000000..2b51a26
--- /dev/null
+++ b/deps/boringssl/win-x86_64/crypto/fipsmodule/aesni-gcm-x86_64.asm
@@ -0,0 +1,1033 @@
+; This file is generated from a similarly-named Perl script in the BoringSSL
+; source tree. Do not edit by hand.
+
+default	rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+
+%ifdef BORINGSSL_PREFIX
+%include "boringssl_prefix_symbols_nasm.inc"
+%endif
+section	.text code align=64
+
+
+
+ALIGN	32
+_aesni_ctr32_ghash_6x:
+
+	vmovdqu	xmm2,XMMWORD[32+r11]
+	sub	rdx,6
+	vpxor	xmm4,xmm4,xmm4
+	vmovdqu	xmm15,XMMWORD[((0-128))+rcx]
+	vpaddb	xmm10,xmm1,xmm2
+	vpaddb	xmm11,xmm10,xmm2
+	vpaddb	xmm12,xmm11,xmm2
+	vpaddb	xmm13,xmm12,xmm2
+	vpaddb	xmm14,xmm13,xmm2
+	vpxor	xmm9,xmm1,xmm15
+	vmovdqu	XMMWORD[(16+8)+rsp],xmm4
+	jmp	NEAR $L$oop6x
+
+ALIGN	32
+$L$oop6x:
+	add	ebx,100663296
+	jc	NEAR $L$handle_ctr32
+	vmovdqu	xmm3,XMMWORD[((0-32))+r9]
+	vpaddb	xmm1,xmm14,xmm2
+	vpxor	xmm10,xmm10,xmm15
+	vpxor	xmm11,xmm11,xmm15
+
+$L$resume_ctr32:
+	vmovdqu	XMMWORD[r8],xmm1
+	vpclmulqdq	xmm5,xmm7,xmm3,0x10
+	vpxor	xmm12,xmm12,xmm15
+	vmovups	xmm2,XMMWORD[((16-128))+rcx]
+	vpclmulqdq	xmm6,xmm7,xmm3,0x01
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+	xor	r12,r12
+	cmp	r15,r14
+
+	vaesenc	xmm9,xmm9,xmm2
+	vmovdqu	xmm0,XMMWORD[((48+8))+rsp]
+	vpxor	xmm13,xmm13,xmm15
+	vpclmulqdq	xmm1,xmm7,xmm3,0x00
+	vaesenc	xmm10,xmm10,xmm2
+	vpxor	xmm14,xmm14,xmm15
+	setnc	r12b
+	vpclmulqdq	xmm7,xmm7,xmm3,0x11
+	vaesenc	xmm11,xmm11,xmm2
+	vmovdqu	xmm3,XMMWORD[((16-32))+r9]
+	neg	r12
+	vaesenc	xmm12,xmm12,xmm2
+	vpxor	xmm6,xmm6,xmm5
+	vpclmulqdq	xmm5,xmm0,xmm3,0x00
+	vpxor	xmm8,xmm8,xmm4
+	vaesenc	xmm13,xmm13,xmm2
+	vpxor	xmm4,xmm1,xmm5
+	and	r12,0x60
+	vmovups	xmm15,XMMWORD[((32-128))+rcx]
+	vpclmulqdq	xmm1,xmm0,xmm3,0x10
+	vaesenc	xmm14,xmm14,xmm2
+
+	vpclmulqdq	xmm2,xmm0,xmm3,0x01
+	lea	r14,[r12*1+r14]
+	vaesenc	xmm9,xmm9,xmm15
+	vpxor	xmm8,xmm8,XMMWORD[((16+8))+rsp]
+	vpclmulqdq	xmm3,xmm0,xmm3,0x11
+	vmovdqu	xmm0,XMMWORD[((64+8))+rsp]
+	vaesenc	xmm10,xmm10,xmm15
+	movbe	r13,QWORD[88+r14]
+	vaesenc	xmm11,xmm11,xmm15
+	movbe	r12,QWORD[80+r14]
+	vaesenc	xmm12,xmm12,xmm15
+	mov	QWORD[((32+8))+rsp],r13
+	vaesenc	xmm13,xmm13,xmm15
+	mov	QWORD[((40+8))+rsp],r12
+	vmovdqu	xmm5,XMMWORD[((48-32))+r9]
+	vaesenc	xmm14,xmm14,xmm15
+
+	vmovups	xmm15,XMMWORD[((48-128))+rcx]
+	vpxor	xmm6,xmm6,xmm1
+	vpclmulqdq	xmm1,xmm0,xmm5,0x00
+	vaesenc	xmm9,xmm9,xmm15
+	vpxor	xmm6,xmm6,xmm2
+	vpclmulqdq	xmm2,xmm0,xmm5,0x10
+	vaesenc	xmm10,xmm10,xmm15
+	vpxor	xmm7,xmm7,xmm3
+	vpclmulqdq	xmm3,xmm0,xmm5,0x01
+	vaesenc	xmm11,xmm11,xmm15
+	vpclmulqdq	xmm5,xmm0,xmm5,0x11
+	vmovdqu	xmm0,XMMWORD[((80+8))+rsp]
+	vaesenc	xmm12,xmm12,xmm15
+	vaesenc	xmm13,xmm13,xmm15
+	vpxor	xmm4,xmm4,xmm1
+	vmovdqu	xmm1,XMMWORD[((64-32))+r9]
+	vaesenc	xmm14,xmm14,xmm15
+
+	vmovups	xmm15,XMMWORD[((64-128))+rcx]
+	vpxor	xmm6,xmm6,xmm2
+	vpclmulqdq	xmm2,xmm0,xmm1,0x00
+	vaesenc	xmm9,xmm9,xmm15
+	vpxor	xmm6,xmm6,xmm3
+	vpclmulqdq	xmm3,xmm0,xmm1,0x10
+	vaesenc	xmm10,xmm10,xmm15
+	movbe	r13,QWORD[72+r14]
+	vpxor	xmm7,xmm7,xmm5
+	vpclmulqdq	xmm5,xmm0,xmm1,0x01
+	vaesenc	xmm11,xmm11,xmm15
+	movbe	r12,QWORD[64+r14]
+	vpclmulqdq	xmm1,xmm0,xmm1,0x11
+	vmovdqu	xmm0,XMMWORD[((96+8))+rsp]
+	vaesenc	xmm12,xmm12,xmm15
+	mov	QWORD[((48+8))+rsp],r13
+	vaesenc	xmm13,xmm13,xmm15
+	mov	QWORD[((56+8))+rsp],r12
+	vpxor	xmm4,xmm4,xmm2
+	vmovdqu	xmm2,XMMWORD[((96-32))+r9]
+	vaesenc	xmm14,xmm14,xmm15
+
+	vmovups	xmm15,XMMWORD[((80-128))+rcx]
+	vpxor	xmm6,xmm6,xmm3
+	vpclmulqdq	xmm3,xmm0,xmm2,0x00
+	vaesenc	xmm9,xmm9,xmm15
+	vpxor	xmm6,xmm6,xmm5
+	vpclmulqdq	xmm5,xmm0,xmm2,0x10
+	vaesenc	xmm10,xmm10,xmm15
+	movbe	r13,QWORD[56+r14]
+	vpxor	xmm7,xmm7,xmm1
+	vpclmulqdq	xmm1,xmm0,xmm2,0x01
+	vpxor	xmm8,xmm8,XMMWORD[((112+8))+rsp]
+	vaesenc	xmm11,xmm11,xmm15
+	movbe	r12,QWORD[48+r14]
+	vpclmulqdq	xmm2,xmm0,xmm2,0x11
+	vaesenc	xmm12,xmm12,xmm15
+	mov	QWORD[((64+8))+rsp],r13
+	vaesenc	xmm13,xmm13,xmm15
+	mov	QWORD[((72+8))+rsp],r12
+	vpxor	xmm4,xmm4,xmm3
+	vmovdqu	xmm3,XMMWORD[((112-32))+r9]
+	vaesenc	xmm14,xmm14,xmm15
+
+	vmovups	xmm15,XMMWORD[((96-128))+rcx]
+	vpxor	xmm6,xmm6,xmm5
+	vpclmulqdq	xmm5,xmm8,xmm3,0x10
+	vaesenc	xmm9,xmm9,xmm15
+	vpxor	xmm6,xmm6,xmm1
+	vpclmulqdq	xmm1,xmm8,xmm3,0x01
+	vaesenc	xmm10,xmm10,xmm15
+	movbe	r13,QWORD[40+r14]
+	vpxor	xmm7,xmm7,xmm2
+	vpclmulqdq	xmm2,xmm8,xmm3,0x00
+	vaesenc	xmm11,xmm11,xmm15
+	movbe	r12,QWORD[32+r14]
+	vpclmulqdq	xmm8,xmm8,xmm3,0x11
+	vaesenc	xmm12,xmm12,xmm15
+	mov	QWORD[((80+8))+rsp],r13
+	vaesenc	xmm13,xmm13,xmm15
+	mov	QWORD[((88+8))+rsp],r12
+	vpxor	xmm6,xmm6,xmm5
+	vaesenc	xmm14,xmm14,xmm15
+	vpxor	xmm6,xmm6,xmm1
+
+	vmovups	xmm15,XMMWORD[((112-128))+rcx]
+	vpslldq	xmm5,xmm6,8
+	vpxor	xmm4,xmm4,xmm2
+	vmovdqu	xmm3,XMMWORD[16+r11]
+
+	vaesenc	xmm9,xmm9,xmm15
+	vpxor	xmm7,xmm7,xmm8
+	vaesenc	xmm10,xmm10,xmm15
+	vpxor	xmm4,xmm4,xmm5
+	movbe	r13,QWORD[24+r14]
+	vaesenc	xmm11,xmm11,xmm15
+	movbe	r12,QWORD[16+r14]
+	vpalignr	xmm0,xmm4,xmm4,8
+	vpclmulqdq	xmm4,xmm4,xmm3,0x10
+	mov	QWORD[((96+8))+rsp],r13
+	vaesenc	xmm12,xmm12,xmm15
+	mov	QWORD[((104+8))+rsp],r12
+	vaesenc	xmm13,xmm13,xmm15
+	vmovups	xmm1,XMMWORD[((128-128))+rcx]
+	vaesenc	xmm14,xmm14,xmm15
+
+	vaesenc	xmm9,xmm9,xmm1
+	vmovups	xmm15,XMMWORD[((144-128))+rcx]
+	vaesenc	xmm10,xmm10,xmm1
+	vpsrldq	xmm6,xmm6,8
+	vaesenc	xmm11,xmm11,xmm1
+	vpxor	xmm7,xmm7,xmm6
+	vaesenc	xmm12,xmm12,xmm1
+	vpxor	xmm4,xmm4,xmm0
+	movbe	r13,QWORD[8+r14]
+	vaesenc	xmm13,xmm13,xmm1
+	movbe	r12,QWORD[r14]
+	vaesenc	xmm14,xmm14,xmm1
+	vmovups	xmm1,XMMWORD[((160-128))+rcx]
+	cmp	ebp,11
+	jb	NEAR $L$enc_tail
+
+	vaesenc	xmm9,xmm9,xmm15
+	vaesenc	xmm10,xmm10,xmm15
+	vaesenc	xmm11,xmm11,xmm15
+	vaesenc	xmm12,xmm12,xmm15
+	vaesenc	xmm13,xmm13,xmm15
+	vaesenc	xmm14,xmm14,xmm15
+
+	vaesenc	xmm9,xmm9,xmm1
+	vaesenc	xmm10,xmm10,xmm1
+	vaesenc	xmm11,xmm11,xmm1
+	vaesenc	xmm12,xmm12,xmm1
+	vaesenc	xmm13,xmm13,xmm1
+	vmovups	xmm15,XMMWORD[((176-128))+rcx]
+	vaesenc	xmm14,xmm14,xmm1
+	vmovups	xmm1,XMMWORD[((192-128))+rcx]
+	je	NEAR $L$enc_tail
+
+	vaesenc	xmm9,xmm9,xmm15
+	vaesenc	xmm10,xmm10,xmm15
+	vaesenc	xmm11,xmm11,xmm15
+	vaesenc	xmm12,xmm12,xmm15
+	vaesenc	xmm13,xmm13,xmm15
+	vaesenc	xmm14,xmm14,xmm15
+
+	vaesenc	xmm9,xmm9,xmm1
+	vaesenc	xmm10,xmm10,xmm1
+	vaesenc	xmm11,xmm11,xmm1
+	vaesenc	xmm12,xmm12,xmm1
+	vaesenc	xmm13,xmm13,xmm1
+	vmovups	xmm15,XMMWORD[((208-128))+rcx]
+	vaesenc	xmm14,xmm14,xmm1
+	vmovups	xmm1,XMMWORD[((224-128))+rcx]
+	jmp	NEAR $L$enc_tail
+
+ALIGN	32
+$L$handle_ctr32:
+	vmovdqu	xmm0,XMMWORD[r11]
+	vpshufb	xmm6,xmm1,xmm0
+	vmovdqu	xmm5,XMMWORD[48+r11]
+	vpaddd	xmm10,xmm6,XMMWORD[64+r11]
+	vpaddd	xmm11,xmm6,xmm5
+	vmovdqu	xmm3,XMMWORD[((0-32))+r9]
+	vpaddd	xmm12,xmm10,xmm5
+	vpshufb	xmm10,xmm10,xmm0
+	vpaddd	xmm13,xmm11,xmm5
+	vpshufb	xmm11,xmm11,xmm0
+	vpxor	xmm10,xmm10,xmm15
+	vpaddd	xmm14,xmm12,xmm5
+	vpshufb	xmm12,xmm12,xmm0
+	vpxor	xmm11,xmm11,xmm15
+	vpaddd	xmm1,xmm13,xmm5
+	vpshufb	xmm13,xmm13,xmm0
+	vpshufb	xmm14,xmm14,xmm0
+	vpshufb	xmm1,xmm1,xmm0
+	jmp	NEAR $L$resume_ctr32
+
+ALIGN	32
+$L$enc_tail:
+	vaesenc	xmm9,xmm9,xmm15
+	vmovdqu	XMMWORD[(16+8)+rsp],xmm7
+	vpalignr	xmm8,xmm4,xmm4,8
+	vaesenc	xmm10,xmm10,xmm15
+	vpclmulqdq	xmm4,xmm4,xmm3,0x10
+	vpxor	xmm2,xmm1,XMMWORD[rdi]
+	vaesenc	xmm11,xmm11,xmm15
+	vpxor	xmm0,xmm1,XMMWORD[16+rdi]
+	vaesenc	xmm12,xmm12,xmm15
+	vpxor	xmm5,xmm1,XMMWORD[32+rdi]
+	vaesenc	xmm13,xmm13,xmm15
+	vpxor	xmm6,xmm1,XMMWORD[48+rdi]
+	vaesenc	xmm14,xmm14,xmm15
+	vpxor	xmm7,xmm1,XMMWORD[64+rdi]
+	vpxor	xmm3,xmm1,XMMWORD[80+rdi]
+	vmovdqu	xmm1,XMMWORD[r8]
+
+	vaesenclast	xmm9,xmm9,xmm2
+	vmovdqu	xmm2,XMMWORD[32+r11]
+	vaesenclast	xmm10,xmm10,xmm0
+	vpaddb	xmm0,xmm1,xmm2
+	mov	QWORD[((112+8))+rsp],r13
+	lea	rdi,[96+rdi]
+	vaesenclast	xmm11,xmm11,xmm5
+	vpaddb	xmm5,xmm0,xmm2
+	mov	QWORD[((120+8))+rsp],r12
+	lea	rsi,[96+rsi]
+	vmovdqu	xmm15,XMMWORD[((0-128))+rcx]
+	vaesenclast	xmm12,xmm12,xmm6
+	vpaddb	xmm6,xmm5,xmm2
+	vaesenclast	xmm13,xmm13,xmm7
+	vpaddb	xmm7,xmm6,xmm2
+	vaesenclast	xmm14,xmm14,xmm3
+	vpaddb	xmm3,xmm7,xmm2
+
+	add	r10,0x60
+	sub	rdx,0x6
+	jc	NEAR $L$6x_done
+
+	vmovups	XMMWORD[(-96)+rsi],xmm9
+	vpxor	xmm9,xmm1,xmm15
+	vmovups	XMMWORD[(-80)+rsi],xmm10
+	vmovdqa	xmm10,xmm0
+	vmovups	XMMWORD[(-64)+rsi],xmm11
+	vmovdqa	xmm11,xmm5
+	vmovups	XMMWORD[(-48)+rsi],xmm12
+	vmovdqa	xmm12,xmm6
+	vmovups	XMMWORD[(-32)+rsi],xmm13
+	vmovdqa	xmm13,xmm7
+	vmovups	XMMWORD[(-16)+rsi],xmm14
+	vmovdqa	xmm14,xmm3
+	vmovdqu	xmm7,XMMWORD[((32+8))+rsp]
+	jmp	NEAR $L$oop6x
+
+$L$6x_done:
+	vpxor	xmm8,xmm8,XMMWORD[((16+8))+rsp]
+	vpxor	xmm8,xmm8,xmm4
+
+	DB	0F3h,0C3h		;repret
+
+
+global	aesni_gcm_decrypt
+
+ALIGN	32
+aesni_gcm_decrypt:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aesni_gcm_decrypt:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+	mov	r9,QWORD[48+rsp]
+
+
+
+	xor	r10,r10
+
+
+
+	cmp	rdx,0x60
+	jb	NEAR $L$gcm_dec_abort
+
+	lea	rax,[rsp]
+
+	push	rbx
+
+	push	rbp
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+	lea	rsp,[((-168))+rsp]
+	movaps	XMMWORD[(-216)+rax],xmm6
+	movaps	XMMWORD[(-200)+rax],xmm7
+	movaps	XMMWORD[(-184)+rax],xmm8
+	movaps	XMMWORD[(-168)+rax],xmm9
+	movaps	XMMWORD[(-152)+rax],xmm10
+	movaps	XMMWORD[(-136)+rax],xmm11
+	movaps	XMMWORD[(-120)+rax],xmm12
+	movaps	XMMWORD[(-104)+rax],xmm13
+	movaps	XMMWORD[(-88)+rax],xmm14
+	movaps	XMMWORD[(-72)+rax],xmm15
+$L$gcm_dec_body:
+	vzeroupper
+
+	vmovdqu	xmm1,XMMWORD[r8]
+	add	rsp,-128
+	mov	ebx,DWORD[12+r8]
+	lea	r11,[$L$bswap_mask]
+	lea	r14,[((-128))+rcx]
+	mov	r15,0xf80
+	vmovdqu	xmm8,XMMWORD[r9]
+	and	rsp,-128
+	vmovdqu	xmm0,XMMWORD[r11]
+	lea	rcx,[128+rcx]
+	lea	r9,[((32+32))+r9]
+	mov	ebp,DWORD[((240-128))+rcx]
+	vpshufb	xmm8,xmm8,xmm0
+
+	and	r14,r15
+	and	r15,rsp
+	sub	r15,r14
+	jc	NEAR $L$dec_no_key_aliasing
+	cmp	r15,768
+	jnc	NEAR $L$dec_no_key_aliasing
+	sub	rsp,r15
+$L$dec_no_key_aliasing:
+
+	vmovdqu	xmm7,XMMWORD[80+rdi]
+	lea	r14,[rdi]
+	vmovdqu	xmm4,XMMWORD[64+rdi]
+
+
+
+
+
+
+
+	lea	r15,[((-192))+rdx*1+rdi]
+
+	vmovdqu	xmm5,XMMWORD[48+rdi]
+	shr	rdx,4
+	xor	r10,r10
+	vmovdqu	xmm6,XMMWORD[32+rdi]
+	vpshufb	xmm7,xmm7,xmm0
+	vmovdqu	xmm2,XMMWORD[16+rdi]
+	vpshufb	xmm4,xmm4,xmm0
+	vmovdqu	xmm3,XMMWORD[rdi]
+	vpshufb	xmm5,xmm5,xmm0
+	vmovdqu	XMMWORD[48+rsp],xmm4
+	vpshufb	xmm6,xmm6,xmm0
+	vmovdqu	XMMWORD[64+rsp],xmm5
+	vpshufb	xmm2,xmm2,xmm0
+	vmovdqu	XMMWORD[80+rsp],xmm6
+	vpshufb	xmm3,xmm3,xmm0
+	vmovdqu	XMMWORD[96+rsp],xmm2
+	vmovdqu	XMMWORD[112+rsp],xmm3
+
+	call	_aesni_ctr32_ghash_6x
+
+	vmovups	XMMWORD[(-96)+rsi],xmm9
+	vmovups	XMMWORD[(-80)+rsi],xmm10
+	vmovups	XMMWORD[(-64)+rsi],xmm11
+	vmovups	XMMWORD[(-48)+rsi],xmm12
+	vmovups	XMMWORD[(-32)+rsi],xmm13
+	vmovups	XMMWORD[(-16)+rsi],xmm14
+
+	vpshufb	xmm8,xmm8,XMMWORD[r11]
+	vmovdqu	XMMWORD[(-64)+r9],xmm8
+
+	vzeroupper
+	movaps	xmm6,XMMWORD[((-216))+rax]
+	movaps	xmm7,XMMWORD[((-200))+rax]
+	movaps	xmm8,XMMWORD[((-184))+rax]
+	movaps	xmm9,XMMWORD[((-168))+rax]
+	movaps	xmm10,XMMWORD[((-152))+rax]
+	movaps	xmm11,XMMWORD[((-136))+rax]
+	movaps	xmm12,XMMWORD[((-120))+rax]
+	movaps	xmm13,XMMWORD[((-104))+rax]
+	movaps	xmm14,XMMWORD[((-88))+rax]
+	movaps	xmm15,XMMWORD[((-72))+rax]
+	mov	r15,QWORD[((-48))+rax]
+
+	mov	r14,QWORD[((-40))+rax]
+
+	mov	r13,QWORD[((-32))+rax]
+
+	mov	r12,QWORD[((-24))+rax]
+
+	mov	rbp,QWORD[((-16))+rax]
+
+	mov	rbx,QWORD[((-8))+rax]
+
+	lea	rsp,[rax]
+
+$L$gcm_dec_abort:
+	mov	rax,r10
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_aesni_gcm_decrypt:
+
+ALIGN	32
+_aesni_ctr32_6x:
+
+	vmovdqu	xmm4,XMMWORD[((0-128))+rcx]
+	vmovdqu	xmm2,XMMWORD[32+r11]
+	lea	r13,[((-1))+rbp]
+	vmovups	xmm15,XMMWORD[((16-128))+rcx]
+	lea	r12,[((32-128))+rcx]
+	vpxor	xmm9,xmm1,xmm4
+	add	ebx,100663296
+	jc	NEAR $L$handle_ctr32_2
+	vpaddb	xmm10,xmm1,xmm2
+	vpaddb	xmm11,xmm10,xmm2
+	vpxor	xmm10,xmm10,xmm4
+	vpaddb	xmm12,xmm11,xmm2
+	vpxor	xmm11,xmm11,xmm4
+	vpaddb	xmm13,xmm12,xmm2
+	vpxor	xmm12,xmm12,xmm4
+	vpaddb	xmm14,xmm13,xmm2
+	vpxor	xmm13,xmm13,xmm4
+	vpaddb	xmm1,xmm14,xmm2
+	vpxor	xmm14,xmm14,xmm4
+	jmp	NEAR $L$oop_ctr32
+
+ALIGN	16
+$L$oop_ctr32:
+	vaesenc	xmm9,xmm9,xmm15
+	vaesenc	xmm10,xmm10,xmm15
+	vaesenc	xmm11,xmm11,xmm15
+	vaesenc	xmm12,xmm12,xmm15
+	vaesenc	xmm13,xmm13,xmm15
+	vaesenc	xmm14,xmm14,xmm15
+	vmovups	xmm15,XMMWORD[r12]
+	lea	r12,[16+r12]
+	dec	r13d
+	jnz	NEAR $L$oop_ctr32
+
+	vmovdqu	xmm3,XMMWORD[r12]
+	vaesenc	xmm9,xmm9,xmm15
+	vpxor	xmm4,xmm3,XMMWORD[rdi]
+	vaesenc	xmm10,xmm10,xmm15
+	vpxor	xmm5,xmm3,XMMWORD[16+rdi]
+	vaesenc	xmm11,xmm11,xmm15
+	vpxor	xmm6,xmm3,XMMWORD[32+rdi]
+	vaesenc	xmm12,xmm12,xmm15
+	vpxor	xmm8,xmm3,XMMWORD[48+rdi]
+	vaesenc	xmm13,xmm13,xmm15
+	vpxor	xmm2,xmm3,XMMWORD[64+rdi]
+	vaesenc	xmm14,xmm14,xmm15
+	vpxor	xmm3,xmm3,XMMWORD[80+rdi]
+	lea	rdi,[96+rdi]
+
+	vaesenclast	xmm9,xmm9,xmm4
+	vaesenclast	xmm10,xmm10,xmm5
+	vaesenclast	xmm11,xmm11,xmm6
+	vaesenclast	xmm12,xmm12,xmm8
+	vaesenclast	xmm13,xmm13,xmm2
+	vaesenclast	xmm14,xmm14,xmm3
+	vmovups	XMMWORD[rsi],xmm9
+	vmovups	XMMWORD[16+rsi],xmm10
+	vmovups	XMMWORD[32+rsi],xmm11
+	vmovups	XMMWORD[48+rsi],xmm12
+	vmovups	XMMWORD[64+rsi],xmm13
+	vmovups	XMMWORD[80+rsi],xmm14
+	lea	rsi,[96+rsi]
+
+	DB	0F3h,0C3h		;repret
+ALIGN	32
+$L$handle_ctr32_2:
+	vpshufb	xmm6,xmm1,xmm0
+	vmovdqu	xmm5,XMMWORD[48+r11]
+	vpaddd	xmm10,xmm6,XMMWORD[64+r11]
+	vpaddd	xmm11,xmm6,xmm5
+	vpaddd	xmm12,xmm10,xmm5
+	vpshufb	xmm10,xmm10,xmm0
+	vpaddd	xmm13,xmm11,xmm5
+	vpshufb	xmm11,xmm11,xmm0
+	vpxor	xmm10,xmm10,xmm4
+	vpaddd	xmm14,xmm12,xmm5
+	vpshufb	xmm12,xmm12,xmm0
+	vpxor	xmm11,xmm11,xmm4
+	vpaddd	xmm1,xmm13,xmm5
+	vpshufb	xmm13,xmm13,xmm0
+	vpxor	xmm12,xmm12,xmm4
+	vpshufb	xmm14,xmm14,xmm0
+	vpxor	xmm13,xmm13,xmm4
+	vpshufb	xmm1,xmm1,xmm0
+	vpxor	xmm14,xmm14,xmm4
+	jmp	NEAR $L$oop_ctr32
+
+
+
+global	aesni_gcm_encrypt
+
+ALIGN	32
+aesni_gcm_encrypt:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aesni_gcm_encrypt:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+	mov	r9,QWORD[48+rsp]
+
+
+
+%ifdef BORINGSSL_DISPATCH_TEST
+EXTERN	BORINGSSL_function_hit
+	mov	BYTE[((BORINGSSL_function_hit+2))],1
+%endif
+	xor	r10,r10
+
+
+
+
+	cmp	rdx,0x60*3
+	jb	NEAR $L$gcm_enc_abort
+
+	lea	rax,[rsp]
+
+	push	rbx
+
+	push	rbp
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+	lea	rsp,[((-168))+rsp]
+	movaps	XMMWORD[(-216)+rax],xmm6
+	movaps	XMMWORD[(-200)+rax],xmm7
+	movaps	XMMWORD[(-184)+rax],xmm8
+	movaps	XMMWORD[(-168)+rax],xmm9
+	movaps	XMMWORD[(-152)+rax],xmm10
+	movaps	XMMWORD[(-136)+rax],xmm11
+	movaps	XMMWORD[(-120)+rax],xmm12
+	movaps	XMMWORD[(-104)+rax],xmm13
+	movaps	XMMWORD[(-88)+rax],xmm14
+	movaps	XMMWORD[(-72)+rax],xmm15
+$L$gcm_enc_body:
+	vzeroupper
+
+	vmovdqu	xmm1,XMMWORD[r8]
+	add	rsp,-128
+	mov	ebx,DWORD[12+r8]
+	lea	r11,[$L$bswap_mask]
+	lea	r14,[((-128))+rcx]
+	mov	r15,0xf80
+	lea	rcx,[128+rcx]
+	vmovdqu	xmm0,XMMWORD[r11]
+	and	rsp,-128
+	mov	ebp,DWORD[((240-128))+rcx]
+
+	and	r14,r15
+	and	r15,rsp
+	sub	r15,r14
+	jc	NEAR $L$enc_no_key_aliasing
+	cmp	r15,768
+	jnc	NEAR $L$enc_no_key_aliasing
+	sub	rsp,r15
+$L$enc_no_key_aliasing:
+
+	lea	r14,[rsi]
+
+
+
+
+
+
+
+
+	lea	r15,[((-192))+rdx*1+rsi]
+
+	shr	rdx,4
+
+	call	_aesni_ctr32_6x
+	vpshufb	xmm8,xmm9,xmm0
+	vpshufb	xmm2,xmm10,xmm0
+	vmovdqu	XMMWORD[112+rsp],xmm8
+	vpshufb	xmm4,xmm11,xmm0
+	vmovdqu	XMMWORD[96+rsp],xmm2
+	vpshufb	xmm5,xmm12,xmm0
+	vmovdqu	XMMWORD[80+rsp],xmm4
+	vpshufb	xmm6,xmm13,xmm0
+	vmovdqu	XMMWORD[64+rsp],xmm5
+	vpshufb	xmm7,xmm14,xmm0
+	vmovdqu	XMMWORD[48+rsp],xmm6
+
+	call	_aesni_ctr32_6x
+
+	vmovdqu	xmm8,XMMWORD[r9]
+	lea	r9,[((32+32))+r9]
+	sub	rdx,12
+	mov	r10,0x60*2
+	vpshufb	xmm8,xmm8,xmm0
+
+	call	_aesni_ctr32_ghash_6x
+	vmovdqu	xmm7,XMMWORD[32+rsp]
+	vmovdqu	xmm0,XMMWORD[r11]
+	vmovdqu	xmm3,XMMWORD[((0-32))+r9]
+	vpunpckhqdq	xmm1,xmm7,xmm7
+	vmovdqu	xmm15,XMMWORD[((32-32))+r9]
+	vmovups	XMMWORD[(-96)+rsi],xmm9
+	vpshufb	xmm9,xmm9,xmm0
+	vpxor	xmm1,xmm1,xmm7
+	vmovups	XMMWORD[(-80)+rsi],xmm10
+	vpshufb	xmm10,xmm10,xmm0
+	vmovups	XMMWORD[(-64)+rsi],xmm11
+	vpshufb	xmm11,xmm11,xmm0
+	vmovups	XMMWORD[(-48)+rsi],xmm12
+	vpshufb	xmm12,xmm12,xmm0
+	vmovups	XMMWORD[(-32)+rsi],xmm13
+	vpshufb	xmm13,xmm13,xmm0
+	vmovups	XMMWORD[(-16)+rsi],xmm14
+	vpshufb	xmm14,xmm14,xmm0
+	vmovdqu	XMMWORD[16+rsp],xmm9
+	vmovdqu	xmm6,XMMWORD[48+rsp]
+	vmovdqu	xmm0,XMMWORD[((16-32))+r9]
+	vpunpckhqdq	xmm2,xmm6,xmm6
+	vpclmulqdq	xmm5,xmm7,xmm3,0x00
+	vpxor	xmm2,xmm2,xmm6
+	vpclmulqdq	xmm7,xmm7,xmm3,0x11
+	vpclmulqdq	xmm1,xmm1,xmm15,0x00
+
+	vmovdqu	xmm9,XMMWORD[64+rsp]
+	vpclmulqdq	xmm4,xmm6,xmm0,0x00
+	vmovdqu	xmm3,XMMWORD[((48-32))+r9]
+	vpxor	xmm4,xmm4,xmm5
+	vpunpckhqdq	xmm5,xmm9,xmm9
+	vpclmulqdq	xmm6,xmm6,xmm0,0x11
+	vpxor	xmm5,xmm5,xmm9
+	vpxor	xmm6,xmm6,xmm7
+	vpclmulqdq	xmm2,xmm2,xmm15,0x10
+	vmovdqu	xmm15,XMMWORD[((80-32))+r9]
+	vpxor	xmm2,xmm2,xmm1
+
+	vmovdqu	xmm1,XMMWORD[80+rsp]
+	vpclmulqdq	xmm7,xmm9,xmm3,0x00
+	vmovdqu	xmm0,XMMWORD[((64-32))+r9]
+	vpxor	xmm7,xmm7,xmm4
+	vpunpckhqdq	xmm4,xmm1,xmm1
+	vpclmulqdq	xmm9,xmm9,xmm3,0x11
+	vpxor	xmm4,xmm4,xmm1
+	vpxor	xmm9,xmm9,xmm6
+	vpclmulqdq	xmm5,xmm5,xmm15,0x00
+	vpxor	xmm5,xmm5,xmm2
+
+	vmovdqu	xmm2,XMMWORD[96+rsp]
+	vpclmulqdq	xmm6,xmm1,xmm0,0x00
+	vmovdqu	xmm3,XMMWORD[((96-32))+r9]
+	vpxor	xmm6,xmm6,xmm7
+	vpunpckhqdq	xmm7,xmm2,xmm2
+	vpclmulqdq	xmm1,xmm1,xmm0,0x11
+	vpxor	xmm7,xmm7,xmm2
+	vpxor	xmm1,xmm1,xmm9
+	vpclmulqdq	xmm4,xmm4,xmm15,0x10
+	vmovdqu	xmm15,XMMWORD[((128-32))+r9]
+	vpxor	xmm4,xmm4,xmm5
+
+	vpxor	xmm8,xmm8,XMMWORD[112+rsp]
+	vpclmulqdq	xmm5,xmm2,xmm3,0x00
+	vmovdqu	xmm0,XMMWORD[((112-32))+r9]
+	vpunpckhqdq	xmm9,xmm8,xmm8
+	vpxor	xmm5,xmm5,xmm6
+	vpclmulqdq	xmm2,xmm2,xmm3,0x11
+	vpxor	xmm9,xmm9,xmm8
+	vpxor	xmm2,xmm2,xmm1
+	vpclmulqdq	xmm7,xmm7,xmm15,0x00
+	vpxor	xmm4,xmm7,xmm4
+
+	vpclmulqdq	xmm6,xmm8,xmm0,0x00
+	vmovdqu	xmm3,XMMWORD[((0-32))+r9]
+	vpunpckhqdq	xmm1,xmm14,xmm14
+	vpclmulqdq	xmm8,xmm8,xmm0,0x11
+	vpxor	xmm1,xmm1,xmm14
+	vpxor	xmm5,xmm6,xmm5
+	vpclmulqdq	xmm9,xmm9,xmm15,0x10
+	vmovdqu	xmm15,XMMWORD[((32-32))+r9]
+	vpxor	xmm7,xmm8,xmm2
+	vpxor	xmm6,xmm9,xmm4
+
+	vmovdqu	xmm0,XMMWORD[((16-32))+r9]
+	vpxor	xmm9,xmm7,xmm5
+	vpclmulqdq	xmm4,xmm14,xmm3,0x00
+	vpxor	xmm6,xmm6,xmm9
+	vpunpckhqdq	xmm2,xmm13,xmm13
+	vpclmulqdq	xmm14,xmm14,xmm3,0x11
+	vpxor	xmm2,xmm2,xmm13
+	vpslldq	xmm9,xmm6,8
+	vpclmulqdq	xmm1,xmm1,xmm15,0x00
+	vpxor	xmm8,xmm5,xmm9
+	vpsrldq	xmm6,xmm6,8
+	vpxor	xmm7,xmm7,xmm6
+
+	vpclmulqdq	xmm5,xmm13,xmm0,0x00
+	vmovdqu	xmm3,XMMWORD[((48-32))+r9]
+	vpxor	xmm5,xmm5,xmm4
+	vpunpckhqdq	xmm9,xmm12,xmm12
+	vpclmulqdq	xmm13,xmm13,xmm0,0x11
+	vpxor	xmm9,xmm9,xmm12
+	vpxor	xmm13,xmm13,xmm14
+	vpalignr	xmm14,xmm8,xmm8,8
+	vpclmulqdq	xmm2,xmm2,xmm15,0x10
+	vmovdqu	xmm15,XMMWORD[((80-32))+r9]
+	vpxor	xmm2,xmm2,xmm1
+
+	vpclmulqdq	xmm4,xmm12,xmm3,0x00
+	vmovdqu	xmm0,XMMWORD[((64-32))+r9]
+	vpxor	xmm4,xmm4,xmm5
+	vpunpckhqdq	xmm1,xmm11,xmm11
+	vpclmulqdq	xmm12,xmm12,xmm3,0x11
+	vpxor	xmm1,xmm1,xmm11
+	vpxor	xmm12,xmm12,xmm13
+	vxorps	xmm7,xmm7,XMMWORD[16+rsp]
+	vpclmulqdq	xmm9,xmm9,xmm15,0x00
+	vpxor	xmm9,xmm9,xmm2
+
+	vpclmulqdq	xmm8,xmm8,XMMWORD[16+r11],0x10
+	vxorps	xmm8,xmm8,xmm14
+
+	vpclmulqdq	xmm5,xmm11,xmm0,0x00
+	vmovdqu	xmm3,XMMWORD[((96-32))+r9]
+	vpxor	xmm5,xmm5,xmm4
+	vpunpckhqdq	xmm2,xmm10,xmm10
+	vpclmulqdq	xmm11,xmm11,xmm0,0x11
+	vpxor	xmm2,xmm2,xmm10
+	vpalignr	xmm14,xmm8,xmm8,8
+	vpxor	xmm11,xmm11,xmm12
+	vpclmulqdq	xmm1,xmm1,xmm15,0x10
+	vmovdqu	xmm15,XMMWORD[((128-32))+r9]
+	vpxor	xmm1,xmm1,xmm9
+
+	vxorps	xmm14,xmm14,xmm7
+	vpclmulqdq	xmm8,xmm8,XMMWORD[16+r11],0x10
+	vxorps	xmm8,xmm8,xmm14
+
+	vpclmulqdq	xmm4,xmm10,xmm3,0x00
+	vmovdqu	xmm0,XMMWORD[((112-32))+r9]
+	vpxor	xmm4,xmm4,xmm5
+	vpunpckhqdq	xmm9,xmm8,xmm8
+	vpclmulqdq	xmm10,xmm10,xmm3,0x11
+	vpxor	xmm9,xmm9,xmm8
+	vpxor	xmm10,xmm10,xmm11
+	vpclmulqdq	xmm2,xmm2,xmm15,0x00
+	vpxor	xmm2,xmm2,xmm1
+
+	vpclmulqdq	xmm5,xmm8,xmm0,0x00
+	vpclmulqdq	xmm7,xmm8,xmm0,0x11
+	vpxor	xmm5,xmm5,xmm4
+	vpclmulqdq	xmm6,xmm9,xmm15,0x10
+	vpxor	xmm7,xmm7,xmm10
+	vpxor	xmm6,xmm6,xmm2
+
+	vpxor	xmm4,xmm7,xmm5
+	vpxor	xmm6,xmm6,xmm4
+	vpslldq	xmm1,xmm6,8
+	vmovdqu	xmm3,XMMWORD[16+r11]
+	vpsrldq	xmm6,xmm6,8
+	vpxor	xmm8,xmm5,xmm1
+	vpxor	xmm7,xmm7,xmm6
+
+	vpalignr	xmm2,xmm8,xmm8,8
+	vpclmulqdq	xmm8,xmm8,xmm3,0x10
+	vpxor	xmm8,xmm8,xmm2
+
+	vpalignr	xmm2,xmm8,xmm8,8
+	vpclmulqdq	xmm8,xmm8,xmm3,0x10
+	vpxor	xmm2,xmm2,xmm7
+	vpxor	xmm8,xmm8,xmm2
+	vpshufb	xmm8,xmm8,XMMWORD[r11]
+	vmovdqu	XMMWORD[(-64)+r9],xmm8
+
+	vzeroupper
+	movaps	xmm6,XMMWORD[((-216))+rax]
+	movaps	xmm7,XMMWORD[((-200))+rax]
+	movaps	xmm8,XMMWORD[((-184))+rax]
+	movaps	xmm9,XMMWORD[((-168))+rax]
+	movaps	xmm10,XMMWORD[((-152))+rax]
+	movaps	xmm11,XMMWORD[((-136))+rax]
+	movaps	xmm12,XMMWORD[((-120))+rax]
+	movaps	xmm13,XMMWORD[((-104))+rax]
+	movaps	xmm14,XMMWORD[((-88))+rax]
+	movaps	xmm15,XMMWORD[((-72))+rax]
+	mov	r15,QWORD[((-48))+rax]
+
+	mov	r14,QWORD[((-40))+rax]
+
+	mov	r13,QWORD[((-32))+rax]
+
+	mov	r12,QWORD[((-24))+rax]
+
+	mov	rbp,QWORD[((-16))+rax]
+
+	mov	rbx,QWORD[((-8))+rax]
+
+	lea	rsp,[rax]
+
+$L$gcm_enc_abort:
+	mov	rax,r10
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_aesni_gcm_encrypt:
+ALIGN	64
+$L$bswap_mask:
+DB	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+$L$poly:
+DB	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
+$L$one_msb:
+DB	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
+$L$two_lsb:
+DB	2,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+$L$one_lsb:
+DB	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
+DB	65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108
+DB	101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82
+DB	89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112
+DB	114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
+ALIGN	64
+EXTERN	__imp_RtlVirtualUnwind
+
+ALIGN	16
+gcm_se_handler:
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD[120+r8]
+	mov	rbx,QWORD[248+r8]
+
+	mov	rsi,QWORD[8+r9]
+	mov	r11,QWORD[56+r9]
+
+	mov	r10d,DWORD[r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jb	NEAR $L$common_seh_tail
+
+	mov	rax,QWORD[152+r8]
+
+	mov	r10d,DWORD[4+r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jae	NEAR $L$common_seh_tail
+
+	mov	rax,QWORD[120+r8]
+
+	mov	r15,QWORD[((-48))+rax]
+	mov	r14,QWORD[((-40))+rax]
+	mov	r13,QWORD[((-32))+rax]
+	mov	r12,QWORD[((-24))+rax]
+	mov	rbp,QWORD[((-16))+rax]
+	mov	rbx,QWORD[((-8))+rax]
+	mov	QWORD[240+r8],r15
+	mov	QWORD[232+r8],r14
+	mov	QWORD[224+r8],r13
+	mov	QWORD[216+r8],r12
+	mov	QWORD[160+r8],rbp
+	mov	QWORD[144+r8],rbx
+
+	lea	rsi,[((-216))+rax]
+	lea	rdi,[512+r8]
+	mov	ecx,20
+	DD	0xa548f3fc
+
+$L$common_seh_tail:
+	mov	rdi,QWORD[8+rax]
+	mov	rsi,QWORD[16+rax]
+	mov	QWORD[152+r8],rax
+	mov	QWORD[168+r8],rsi
+	mov	QWORD[176+r8],rdi
+
+	mov	rdi,QWORD[40+r9]
+	mov	rsi,r8
+	mov	ecx,154
+	DD	0xa548f3fc
+
+	mov	rsi,r9
+	xor	rcx,rcx
+	mov	rdx,QWORD[8+rsi]
+	mov	r8,QWORD[rsi]
+	mov	r9,QWORD[16+rsi]
+	mov	r10,QWORD[40+rsi]
+	lea	r11,[56+rsi]
+	lea	r12,[24+rsi]
+	mov	QWORD[32+rsp],r10
+	mov	QWORD[40+rsp],r11
+	mov	QWORD[48+rsp],r12
+	mov	QWORD[56+rsp],rcx
+	call	QWORD[__imp_RtlVirtualUnwind]
+
+	mov	eax,1
+	add	rsp,64
+	popfq
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rbp
+	pop	rbx
+	pop	rdi
+	pop	rsi
+	DB	0F3h,0C3h		;repret
+
+
+section	.pdata rdata align=4
+ALIGN	4
+	DD	$L$SEH_begin_aesni_gcm_decrypt wrt ..imagebase
+	DD	$L$SEH_end_aesni_gcm_decrypt wrt ..imagebase
+	DD	$L$SEH_gcm_dec_info wrt ..imagebase
+
+	DD	$L$SEH_begin_aesni_gcm_encrypt wrt ..imagebase
+	DD	$L$SEH_end_aesni_gcm_encrypt wrt ..imagebase
+	DD	$L$SEH_gcm_enc_info wrt ..imagebase
+section	.xdata rdata align=8
+ALIGN	8
+$L$SEH_gcm_dec_info:
+DB	9,0,0,0
+	DD	gcm_se_handler wrt ..imagebase
+	DD	$L$gcm_dec_body wrt ..imagebase,$L$gcm_dec_abort wrt ..imagebase
+$L$SEH_gcm_enc_info:
+DB	9,0,0,0
+	DD	gcm_se_handler wrt ..imagebase
+	DD	$L$gcm_enc_body wrt ..imagebase,$L$gcm_enc_abort wrt ..imagebase
diff --git a/deps/boringssl/win-x86_64/crypto/fipsmodule/aesni-x86_64.asm b/deps/boringssl/win-x86_64/crypto/fipsmodule/aesni-x86_64.asm
new file mode 100644
index 0000000..342c152
--- /dev/null
+++ b/deps/boringssl/win-x86_64/crypto/fipsmodule/aesni-x86_64.asm
@@ -0,0 +1,2806 @@
+; This file is generated from a similarly-named Perl script in the BoringSSL
+; source tree. Do not edit by hand.
+
+default	rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+
+%ifdef BORINGSSL_PREFIX
+%include "boringssl_prefix_symbols_nasm.inc"
+%endif
+section	.text code align=64
+
+EXTERN	OPENSSL_ia32cap_P
+global	aes_hw_encrypt
+
+ALIGN	16
+aes_hw_encrypt:
+
+%ifdef BORINGSSL_DISPATCH_TEST
+EXTERN	BORINGSSL_function_hit
+	mov	BYTE[((BORINGSSL_function_hit+1))],1
+%endif
+	movups	xmm2,XMMWORD[rcx]
+	mov	eax,DWORD[240+r8]
+	movups	xmm0,XMMWORD[r8]
+	movups	xmm1,XMMWORD[16+r8]
+	lea	r8,[32+r8]
+	xorps	xmm2,xmm0
+$L$oop_enc1_1:
+DB	102,15,56,220,209
+	dec	eax
+	movups	xmm1,XMMWORD[r8]
+	lea	r8,[16+r8]
+	jnz	NEAR $L$oop_enc1_1
+DB	102,15,56,221,209
+	pxor	xmm0,xmm0
+	pxor	xmm1,xmm1
+	movups	XMMWORD[rdx],xmm2
+	pxor	xmm2,xmm2
+	DB	0F3h,0C3h		;repret
+
+
+
+global	aes_hw_decrypt
+
+ALIGN	16
+aes_hw_decrypt:
+
+	movups	xmm2,XMMWORD[rcx]
+	mov	eax,DWORD[240+r8]
+	movups	xmm0,XMMWORD[r8]
+	movups	xmm1,XMMWORD[16+r8]
+	lea	r8,[32+r8]
+	xorps	xmm2,xmm0
+$L$oop_dec1_2:
+DB	102,15,56,222,209
+	dec	eax
+	movups	xmm1,XMMWORD[r8]
+	lea	r8,[16+r8]
+	jnz	NEAR $L$oop_dec1_2
+DB	102,15,56,223,209
+	pxor	xmm0,xmm0
+	pxor	xmm1,xmm1
+	movups	XMMWORD[rdx],xmm2
+	pxor	xmm2,xmm2
+	DB	0F3h,0C3h		;repret
+
+
+
+ALIGN	16
+_aesni_encrypt2:
+
+	movups	xmm0,XMMWORD[rcx]
+	shl	eax,4
+	movups	xmm1,XMMWORD[16+rcx]
+	xorps	xmm2,xmm0
+	xorps	xmm3,xmm0
+	movups	xmm0,XMMWORD[32+rcx]
+	lea	rcx,[32+rax*1+rcx]
+	neg	rax
+	add	rax,16
+
+$L$enc_loop2:
+DB	102,15,56,220,209
+DB	102,15,56,220,217
+	movups	xmm1,XMMWORD[rax*1+rcx]
+	add	rax,32
+DB	102,15,56,220,208
+DB	102,15,56,220,216
+	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
+	jnz	NEAR $L$enc_loop2
+
+DB	102,15,56,220,209
+DB	102,15,56,220,217
+DB	102,15,56,221,208
+DB	102,15,56,221,216
+	DB	0F3h,0C3h		;repret
+
+
+
+ALIGN	16
+_aesni_decrypt2:
+
+	movups	xmm0,XMMWORD[rcx]
+	shl	eax,4
+	movups	xmm1,XMMWORD[16+rcx]
+	xorps	xmm2,xmm0
+	xorps	xmm3,xmm0
+	movups	xmm0,XMMWORD[32+rcx]
+	lea	rcx,[32+rax*1+rcx]
+	neg	rax
+	add	rax,16
+
+$L$dec_loop2:
+DB	102,15,56,222,209
+DB	102,15,56,222,217
+	movups	xmm1,XMMWORD[rax*1+rcx]
+	add	rax,32
+DB	102,15,56,222,208
+DB	102,15,56,222,216
+	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
+	jnz	NEAR $L$dec_loop2
+
+DB	102,15,56,222,209
+DB	102,15,56,222,217
+DB	102,15,56,223,208
+DB	102,15,56,223,216
+	DB	0F3h,0C3h		;repret
+
+
+
+ALIGN	16
+_aesni_encrypt3:
+
+	movups	xmm0,XMMWORD[rcx]
+	shl	eax,4
+	movups	xmm1,XMMWORD[16+rcx]
+	xorps	xmm2,xmm0
+	xorps	xmm3,xmm0
+	xorps	xmm4,xmm0
+	movups	xmm0,XMMWORD[32+rcx]
+	lea	rcx,[32+rax*1+rcx]
+	neg	rax
+	add	rax,16
+
+$L$enc_loop3:
+DB	102,15,56,220,209
+DB	102,15,56,220,217
+DB	102,15,56,220,225
+	movups	xmm1,XMMWORD[rax*1+rcx]
+	add	rax,32
+DB	102,15,56,220,208
+DB	102,15,56,220,216
+DB	102,15,56,220,224
+	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
+	jnz	NEAR $L$enc_loop3
+
+DB	102,15,56,220,209
+DB	102,15,56,220,217
+DB	102,15,56,220,225
+DB	102,15,56,221,208
+DB	102,15,56,221,216
+DB	102,15,56,221,224
+	DB	0F3h,0C3h		;repret
+
+
+
+ALIGN	16
+_aesni_decrypt3:
+
+	movups	xmm0,XMMWORD[rcx]
+	shl	eax,4
+	movups	xmm1,XMMWORD[16+rcx]
+	xorps	xmm2,xmm0
+	xorps	xmm3,xmm0
+	xorps	xmm4,xmm0
+	movups	xmm0,XMMWORD[32+rcx]
+	lea	rcx,[32+rax*1+rcx]
+	neg	rax
+	add	rax,16
+
+$L$dec_loop3:
+DB	102,15,56,222,209
+DB	102,15,56,222,217
+DB	102,15,56,222,225
+	movups	xmm1,XMMWORD[rax*1+rcx]
+	add	rax,32
+DB	102,15,56,222,208
+DB	102,15,56,222,216
+DB	102,15,56,222,224
+	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
+	jnz	NEAR $L$dec_loop3
+
+DB	102,15,56,222,209
+DB	102,15,56,222,217
+DB	102,15,56,222,225
+DB	102,15,56,223,208
+DB	102,15,56,223,216
+DB	102,15,56,223,224
+	DB	0F3h,0C3h		;repret
+
+
+
+ALIGN	16
+_aesni_encrypt4:
+
+	movups	xmm0,XMMWORD[rcx]
+	shl	eax,4
+	movups	xmm1,XMMWORD[16+rcx]
+	xorps	xmm2,xmm0
+	xorps	xmm3,xmm0
+	xorps	xmm4,xmm0
+	xorps	xmm5,xmm0
+	movups	xmm0,XMMWORD[32+rcx]
+	lea	rcx,[32+rax*1+rcx]
+	neg	rax
+DB	0x0f,0x1f,0x00
+	add	rax,16
+
+$L$enc_loop4:
+DB	102,15,56,220,209
+DB	102,15,56,220,217
+DB	102,15,56,220,225
+DB	102,15,56,220,233
+	movups	xmm1,XMMWORD[rax*1+rcx]
+	add	rax,32
+DB	102,15,56,220,208
+DB	102,15,56,220,216
+DB	102,15,56,220,224
+DB	102,15,56,220,232
+	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
+	jnz	NEAR $L$enc_loop4
+
+DB	102,15,56,220,209
+DB	102,15,56,220,217
+DB	102,15,56,220,225
+DB	102,15,56,220,233
+DB	102,15,56,221,208
+DB	102,15,56,221,216
+DB	102,15,56,221,224
+DB	102,15,56,221,232
+	DB	0F3h,0C3h		;repret
+
+
+
+ALIGN	16
+_aesni_decrypt4:
+
+	movups	xmm0,XMMWORD[rcx]
+	shl	eax,4
+	movups	xmm1,XMMWORD[16+rcx]
+	xorps	xmm2,xmm0
+	xorps	xmm3,xmm0
+	xorps	xmm4,xmm0
+	xorps	xmm5,xmm0
+	movups	xmm0,XMMWORD[32+rcx]
+	lea	rcx,[32+rax*1+rcx]
+	neg	rax
+DB	0x0f,0x1f,0x00
+	add	rax,16
+
+$L$dec_loop4:
+DB	102,15,56,222,209
+DB	102,15,56,222,217
+DB	102,15,56,222,225
+DB	102,15,56,222,233
+	movups	xmm1,XMMWORD[rax*1+rcx]
+	add	rax,32
+DB	102,15,56,222,208
+DB	102,15,56,222,216
+DB	102,15,56,222,224
+DB	102,15,56,222,232
+	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
+	jnz	NEAR $L$dec_loop4
+
+DB	102,15,56,222,209
+DB	102,15,56,222,217
+DB	102,15,56,222,225
+DB	102,15,56,222,233
+DB	102,15,56,223,208
+DB	102,15,56,223,216
+DB	102,15,56,223,224
+DB	102,15,56,223,232
+	DB	0F3h,0C3h		;repret
+
+
+
+ALIGN	16
+_aesni_encrypt6:
+
+	movups	xmm0,XMMWORD[rcx]
+	shl	eax,4
+	movups	xmm1,XMMWORD[16+rcx]
+	xorps	xmm2,xmm0
+	pxor	xmm3,xmm0
+	pxor	xmm4,xmm0
+DB	102,15,56,220,209
+	lea	rcx,[32+rax*1+rcx]
+	neg	rax
+DB	102,15,56,220,217
+	pxor	xmm5,xmm0
+	pxor	xmm6,xmm0
+DB	102,15,56,220,225
+	pxor	xmm7,xmm0
+	movups	xmm0,XMMWORD[rax*1+rcx]
+	add	rax,16
+	jmp	NEAR $L$enc_loop6_enter
+ALIGN	16
+$L$enc_loop6:
+DB	102,15,56,220,209
+DB	102,15,56,220,217
+DB	102,15,56,220,225
+$L$enc_loop6_enter:
+DB	102,15,56,220,233
+DB	102,15,56,220,241
+DB	102,15,56,220,249
+	movups	xmm1,XMMWORD[rax*1+rcx]
+	add	rax,32
+DB	102,15,56,220,208
+DB	102,15,56,220,216
+DB	102,15,56,220,224
+DB	102,15,56,220,232
+DB	102,15,56,220,240
+DB	102,15,56,220,248
+	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
+	jnz	NEAR $L$enc_loop6
+
+DB	102,15,56,220,209
+DB	102,15,56,220,217
+DB	102,15,56,220,225
+DB	102,15,56,220,233
+DB	102,15,56,220,241
+DB	102,15,56,220,249
+DB	102,15,56,221,208
+DB	102,15,56,221,216
+DB	102,15,56,221,224
+DB	102,15,56,221,232
+DB	102,15,56,221,240
+DB	102,15,56,221,248
+	DB	0F3h,0C3h		;repret
+
+
+
+ALIGN	16
+_aesni_decrypt6:
+
+	movups	xmm0,XMMWORD[rcx]
+	shl	eax,4
+	movups	xmm1,XMMWORD[16+rcx]
+	xorps	xmm2,xmm0
+	pxor	xmm3,xmm0
+	pxor	xmm4,xmm0
+DB	102,15,56,222,209
+	lea	rcx,[32+rax*1+rcx]
+	neg	rax
+DB	102,15,56,222,217
+	pxor	xmm5,xmm0
+	pxor	xmm6,xmm0
+DB	102,15,56,222,225
+	pxor	xmm7,xmm0
+	movups	xmm0,XMMWORD[rax*1+rcx]
+	add	rax,16
+	jmp	NEAR $L$dec_loop6_enter
+ALIGN	16
+$L$dec_loop6:
+DB	102,15,56,222,209
+DB	102,15,56,222,217
+DB	102,15,56,222,225
+$L$dec_loop6_enter:
+DB	102,15,56,222,233
+DB	102,15,56,222,241
+DB	102,15,56,222,249
+	movups	xmm1,XMMWORD[rax*1+rcx]
+	add	rax,32
+DB	102,15,56,222,208
+DB	102,15,56,222,216
+DB	102,15,56,222,224
+DB	102,15,56,222,232
+DB	102,15,56,222,240
+DB	102,15,56,222,248
+	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
+	jnz	NEAR $L$dec_loop6
+
+DB	102,15,56,222,209
+DB	102,15,56,222,217
+DB	102,15,56,222,225
+DB	102,15,56,222,233
+DB	102,15,56,222,241
+DB	102,15,56,222,249
+DB	102,15,56,223,208
+DB	102,15,56,223,216
+DB	102,15,56,223,224
+DB	102,15,56,223,232
+DB	102,15,56,223,240
+DB	102,15,56,223,248
+	DB	0F3h,0C3h		;repret
+
+
+
+ALIGN	16
+_aesni_encrypt8:
+
+	movups	xmm0,XMMWORD[rcx]
+	shl	eax,4
+	movups	xmm1,XMMWORD[16+rcx]
+	xorps	xmm2,xmm0
+	xorps	xmm3,xmm0
+	pxor	xmm4,xmm0
+	pxor	xmm5,xmm0
+	pxor	xmm6,xmm0
+	lea	rcx,[32+rax*1+rcx]
+	neg	rax
+DB	102,15,56,220,209
+	pxor	xmm7,xmm0
+	pxor	xmm8,xmm0
+DB	102,15,56,220,217
+	pxor	xmm9,xmm0
+	movups	xmm0,XMMWORD[rax*1+rcx]
+	add	rax,16
+	jmp	NEAR $L$enc_loop8_inner
+ALIGN	16
+$L$enc_loop8:
+DB	102,15,56,220,209
+DB	102,15,56,220,217
+$L$enc_loop8_inner:
+DB	102,15,56,220,225
+DB	102,15,56,220,233
+DB	102,15,56,220,241
+DB	102,15,56,220,249
+DB	102,68,15,56,220,193
+DB	102,68,15,56,220,201
+$L$enc_loop8_enter:
+	movups	xmm1,XMMWORD[rax*1+rcx]
+	add	rax,32
+DB	102,15,56,220,208
+DB	102,15,56,220,216
+DB	102,15,56,220,224
+DB	102,15,56,220,232
+DB	102,15,56,220,240
+DB	102,15,56,220,248
+DB	102,68,15,56,220,192
+DB	102,68,15,56,220,200
+	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
+	jnz	NEAR $L$enc_loop8
+
+DB	102,15,56,220,209
+DB	102,15,56,220,217
+DB	102,15,56,220,225
+DB	102,15,56,220,233
+DB	102,15,56,220,241
+DB	102,15,56,220,249
+DB	102,68,15,56,220,193
+DB	102,68,15,56,220,201
+DB	102,15,56,221,208
+DB	102,15,56,221,216
+DB	102,15,56,221,224
+DB	102,15,56,221,232
+DB	102,15,56,221,240
+DB	102,15,56,221,248
+DB	102,68,15,56,221,192
+DB	102,68,15,56,221,200
+	DB	0F3h,0C3h		;repret
+
+
+
+ALIGN	16
+_aesni_decrypt8:
+
+	movups	xmm0,XMMWORD[rcx]
+	shl	eax,4
+	movups	xmm1,XMMWORD[16+rcx]
+	xorps	xmm2,xmm0
+	xorps	xmm3,xmm0
+	pxor	xmm4,xmm0
+	pxor	xmm5,xmm0
+	pxor	xmm6,xmm0
+	lea	rcx,[32+rax*1+rcx]
+	neg	rax
+DB	102,15,56,222,209
+	pxor	xmm7,xmm0
+	pxor	xmm8,xmm0
+DB	102,15,56,222,217
+	pxor	xmm9,xmm0
+	movups	xmm0,XMMWORD[rax*1+rcx]
+	add	rax,16
+	jmp	NEAR $L$dec_loop8_inner
+ALIGN	16
+$L$dec_loop8:
+DB	102,15,56,222,209
+DB	102,15,56,222,217
+$L$dec_loop8_inner:
+DB	102,15,56,222,225
+DB	102,15,56,222,233
+DB	102,15,56,222,241
+DB	102,15,56,222,249
+DB	102,68,15,56,222,193
+DB	102,68,15,56,222,201
+$L$dec_loop8_enter:
+	movups	xmm1,XMMWORD[rax*1+rcx]
+	add	rax,32
+DB	102,15,56,222,208
+DB	102,15,56,222,216
+DB	102,15,56,222,224
+DB	102,15,56,222,232
+DB	102,15,56,222,240
+DB	102,15,56,222,248
+DB	102,68,15,56,222,192
+DB	102,68,15,56,222,200
+	movups	xmm0,XMMWORD[((-16))+rax*1+rcx]
+	jnz	NEAR $L$dec_loop8
+
+DB	102,15,56,222,209
+DB	102,15,56,222,217
+DB	102,15,56,222,225
+DB	102,15,56,222,233
+DB	102,15,56,222,241
+DB	102,15,56,222,249
+DB	102,68,15,56,222,193
+DB	102,68,15,56,222,201
+DB	102,15,56,223,208
+DB	102,15,56,223,216
+DB	102,15,56,223,224
+DB	102,15,56,223,232
+DB	102,15,56,223,240
+DB	102,15,56,223,248
+DB	102,68,15,56,223,192
+DB	102,68,15,56,223,200
+	DB	0F3h,0C3h		;repret
+
+
+global	aes_hw_ecb_encrypt
+
+ALIGN	16
+aes_hw_ecb_encrypt:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aes_hw_ecb_encrypt:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+
+
+
+	lea	rsp,[((-88))+rsp]
+	movaps	XMMWORD[rsp],xmm6
+	movaps	XMMWORD[16+rsp],xmm7
+	movaps	XMMWORD[32+rsp],xmm8
+	movaps	XMMWORD[48+rsp],xmm9
+$L$ecb_enc_body:
+	and	rdx,-16
+	jz	NEAR $L$ecb_ret
+
+	mov	eax,DWORD[240+rcx]
+	movups	xmm0,XMMWORD[rcx]
+	mov	r11,rcx
+	mov	r10d,eax
+	test	r8d,r8d
+	jz	NEAR $L$ecb_decrypt
+
+	cmp	rdx,0x80
+	jb	NEAR $L$ecb_enc_tail
+
+	movdqu	xmm2,XMMWORD[rdi]
+	movdqu	xmm3,XMMWORD[16+rdi]
+	movdqu	xmm4,XMMWORD[32+rdi]
+	movdqu	xmm5,XMMWORD[48+rdi]
+	movdqu	xmm6,XMMWORD[64+rdi]
+	movdqu	xmm7,XMMWORD[80+rdi]
+	movdqu	xmm8,XMMWORD[96+rdi]
+	movdqu	xmm9,XMMWORD[112+rdi]
+	lea	rdi,[128+rdi]
+	sub	rdx,0x80
+	jmp	NEAR $L$ecb_enc_loop8_enter
+ALIGN	16
+$L$ecb_enc_loop8:
+	movups	XMMWORD[rsi],xmm2
+	mov	rcx,r11
+	movdqu	xmm2,XMMWORD[rdi]
+	mov	eax,r10d
+	movups	XMMWORD[16+rsi],xmm3
+	movdqu	xmm3,XMMWORD[16+rdi]
+	movups	XMMWORD[32+rsi],xmm4
+	movdqu	xmm4,XMMWORD[32+rdi]
+	movups	XMMWORD[48+rsi],xmm5
+	movdqu	xmm5,XMMWORD[48+rdi]
+	movups	XMMWORD[64+rsi],xmm6
+	movdqu	xmm6,XMMWORD[64+rdi]
+	movups	XMMWORD[80+rsi],xmm7
+	movdqu	xmm7,XMMWORD[80+rdi]
+	movups	XMMWORD[96+rsi],xmm8
+	movdqu	xmm8,XMMWORD[96+rdi]
+	movups	XMMWORD[112+rsi],xmm9
+	lea	rsi,[128+rsi]
+	movdqu	xmm9,XMMWORD[112+rdi]
+	lea	rdi,[128+rdi]
+$L$ecb_enc_loop8_enter:
+
+	call	_aesni_encrypt8
+
+	sub	rdx,0x80
+	jnc	NEAR $L$ecb_enc_loop8
+
+	movups	XMMWORD[rsi],xmm2
+	mov	rcx,r11
+	movups	XMMWORD[16+rsi],xmm3
+	mov	eax,r10d
+	movups	XMMWORD[32+rsi],xmm4
+	movups	XMMWORD[48+rsi],xmm5
+	movups	XMMWORD[64+rsi],xmm6
+	movups	XMMWORD[80+rsi],xmm7
+	movups	XMMWORD[96+rsi],xmm8
+	movups	XMMWORD[112+rsi],xmm9
+	lea	rsi,[128+rsi]
+	add	rdx,0x80
+	jz	NEAR $L$ecb_ret
+
+$L$ecb_enc_tail:
+	movups	xmm2,XMMWORD[rdi]
+	cmp	rdx,0x20
+	jb	NEAR $L$ecb_enc_one
+	movups	xmm3,XMMWORD[16+rdi]
+	je	NEAR $L$ecb_enc_two
+	movups	xmm4,XMMWORD[32+rdi]
+	cmp	rdx,0x40
+	jb	NEAR $L$ecb_enc_three
+	movups	xmm5,XMMWORD[48+rdi]
+	je	NEAR $L$ecb_enc_four
+	movups	xmm6,XMMWORD[64+rdi]
+	cmp	rdx,0x60
+	jb	NEAR $L$ecb_enc_five
+	movups	xmm7,XMMWORD[80+rdi]
+	je	NEAR $L$ecb_enc_six
+	movdqu	xmm8,XMMWORD[96+rdi]
+	xorps	xmm9,xmm9
+	call	_aesni_encrypt8
+	movups	XMMWORD[rsi],xmm2
+	movups	XMMWORD[16+rsi],xmm3
+	movups	XMMWORD[32+rsi],xmm4
+	movups	XMMWORD[48+rsi],xmm5
+	movups	XMMWORD[64+rsi],xmm6
+	movups	XMMWORD[80+rsi],xmm7
+	movups	XMMWORD[96+rsi],xmm8
+	jmp	NEAR $L$ecb_ret
+ALIGN	16
+$L$ecb_enc_one:
+	movups	xmm0,XMMWORD[rcx]
+	movups	xmm1,XMMWORD[16+rcx]
+	lea	rcx,[32+rcx]
+	xorps	xmm2,xmm0
+$L$oop_enc1_3:
+DB	102,15,56,220,209
+	dec	eax
+	movups	xmm1,XMMWORD[rcx]
+	lea	rcx,[16+rcx]
+	jnz	NEAR $L$oop_enc1_3
+DB	102,15,56,221,209
+	movups	XMMWORD[rsi],xmm2
+	jmp	NEAR $L$ecb_ret
+ALIGN	16
+$L$ecb_enc_two:
+	call	_aesni_encrypt2
+	movups	XMMWORD[rsi],xmm2
+	movups	XMMWORD[16+rsi],xmm3
+	jmp	NEAR $L$ecb_ret
+ALIGN	16
+$L$ecb_enc_three:
+	call	_aesni_encrypt3
+	movups	XMMWORD[rsi],xmm2
+	movups	XMMWORD[16+rsi],xmm3
+	movups	XMMWORD[32+rsi],xmm4
+	jmp	NEAR $L$ecb_ret
+ALIGN	16
+$L$ecb_enc_four:
+	call	_aesni_encrypt4
+	movups	XMMWORD[rsi],xmm2
+	movups	XMMWORD[16+rsi],xmm3
+	movups	XMMWORD[32+rsi],xmm4
+	movups	XMMWORD[48+rsi],xmm5
+	jmp	NEAR $L$ecb_ret
+ALIGN	16
+$L$ecb_enc_five:
+	xorps	xmm7,xmm7
+	call	_aesni_encrypt6
+	movups	XMMWORD[rsi],xmm2
+	movups	XMMWORD[16+rsi],xmm3
+	movups	XMMWORD[32+rsi],xmm4
+	movups	XMMWORD[48+rsi],xmm5
+	movups	XMMWORD[64+rsi],xmm6
+	jmp	NEAR $L$ecb_ret
+ALIGN	16
+$L$ecb_enc_six:
+	call	_aesni_encrypt6
+	movups	XMMWORD[rsi],xmm2
+	movups	XMMWORD[16+rsi],xmm3
+	movups	XMMWORD[32+rsi],xmm4
+	movups	XMMWORD[48+rsi],xmm5
+	movups	XMMWORD[64+rsi],xmm6
+	movups	XMMWORD[80+rsi],xmm7
+	jmp	NEAR $L$ecb_ret
+
+ALIGN	16
+$L$ecb_decrypt:
+	cmp	rdx,0x80
+	jb	NEAR $L$ecb_dec_tail
+
+	movdqu	xmm2,XMMWORD[rdi]
+	movdqu	xmm3,XMMWORD[16+rdi]
+	movdqu	xmm4,XMMWORD[32+rdi]
+	movdqu	xmm5,XMMWORD[48+rdi]
+	movdqu	xmm6,XMMWORD[64+rdi]
+	movdqu	xmm7,XMMWORD[80+rdi]
+	movdqu	xmm8,XMMWORD[96+rdi]
+	movdqu	xmm9,XMMWORD[112+rdi]
+	lea	rdi,[128+rdi]
+	sub	rdx,0x80
+	jmp	NEAR $L$ecb_dec_loop8_enter
+ALIGN	16
+$L$ecb_dec_loop8:
+	movups	XMMWORD[rsi],xmm2
+	mov	rcx,r11
+	movdqu	xmm2,XMMWORD[rdi]
+	mov	eax,r10d
+	movups	XMMWORD[16+rsi],xmm3
+	movdqu	xmm3,XMMWORD[16+rdi]
+	movups	XMMWORD[32+rsi],xmm4
+	movdqu	xmm4,XMMWORD[32+rdi]
+	movups	XMMWORD[48+rsi],xmm5
+	movdqu	xmm5,XMMWORD[48+rdi]
+	movups	XMMWORD[64+rsi],xmm6
+	movdqu	xmm6,XMMWORD[64+rdi]
+	movups	XMMWORD[80+rsi],xmm7
+	movdqu	xmm7,XMMWORD[80+rdi]
+	movups	XMMWORD[96+rsi],xmm8
+	movdqu	xmm8,XMMWORD[96+rdi]
+	movups	XMMWORD[112+rsi],xmm9
+	lea	rsi,[128+rsi]
+	movdqu	xmm9,XMMWORD[112+rdi]
+	lea	rdi,[128+rdi]
+$L$ecb_dec_loop8_enter:
+
+	call	_aesni_decrypt8
+
+	movups	xmm0,XMMWORD[r11]
+	sub	rdx,0x80
+	jnc	NEAR $L$ecb_dec_loop8
+
+	movups	XMMWORD[rsi],xmm2
+	pxor	xmm2,xmm2
+	mov	rcx,r11
+	movups	XMMWORD[16+rsi],xmm3
+	pxor	xmm3,xmm3
+	mov	eax,r10d
+	movups	XMMWORD[32+rsi],xmm4
+	pxor	xmm4,xmm4
+	movups	XMMWORD[48+rsi],xmm5
+	pxor	xmm5,xmm5
+	movups	XMMWORD[64+rsi],xmm6
+	pxor	xmm6,xmm6
+	movups	XMMWORD[80+rsi],xmm7
+	pxor	xmm7,xmm7
+	movups	XMMWORD[96+rsi],xmm8
+	pxor	xmm8,xmm8
+	movups	XMMWORD[112+rsi],xmm9
+	pxor	xmm9,xmm9
+	lea	rsi,[128+rsi]
+	add	rdx,0x80
+	jz	NEAR $L$ecb_ret
+
+$L$ecb_dec_tail:
+	movups	xmm2,XMMWORD[rdi]
+	cmp	rdx,0x20
+	jb	NEAR $L$ecb_dec_one
+	movups	xmm3,XMMWORD[16+rdi]
+	je	NEAR $L$ecb_dec_two
+	movups	xmm4,XMMWORD[32+rdi]
+	cmp	rdx,0x40
+	jb	NEAR $L$ecb_dec_three
+	movups	xmm5,XMMWORD[48+rdi]
+	je	NEAR $L$ecb_dec_four
+	movups	xmm6,XMMWORD[64+rdi]
+	cmp	rdx,0x60
+	jb	NEAR $L$ecb_dec_five
+	movups	xmm7,XMMWORD[80+rdi]
+	je	NEAR $L$ecb_dec_six
+	movups	xmm8,XMMWORD[96+rdi]
+	movups	xmm0,XMMWORD[rcx]
+	xorps	xmm9,xmm9
+	call	_aesni_decrypt8
+	movups	XMMWORD[rsi],xmm2
+	pxor	xmm2,xmm2
+	movups	XMMWORD[16+rsi],xmm3
+	pxor	xmm3,xmm3
+	movups	XMMWORD[32+rsi],xmm4
+	pxor	xmm4,xmm4
+	movups	XMMWORD[48+rsi],xmm5
+	pxor	xmm5,xmm5
+	movups	XMMWORD[64+rsi],xmm6
+	pxor	xmm6,xmm6
+	movups	XMMWORD[80+rsi],xmm7
+	pxor	xmm7,xmm7
+	movups	XMMWORD[96+rsi],xmm8
+	pxor	xmm8,xmm8
+	pxor	xmm9,xmm9
+	jmp	NEAR $L$ecb_ret
+ALIGN	16
+$L$ecb_dec_one:
+	movups	xmm0,XMMWORD[rcx]
+	movups	xmm1,XMMWORD[16+rcx]
+	lea	rcx,[32+rcx]
+	xorps	xmm2,xmm0
+$L$oop_dec1_4:
+DB	102,15,56,222,209
+	dec	eax
+	movups	xmm1,XMMWORD[rcx]
+	lea	rcx,[16+rcx]
+	jnz	NEAR $L$oop_dec1_4
+DB	102,15,56,223,209
+	movups	XMMWORD[rsi],xmm2
+	pxor	xmm2,xmm2
+	jmp	NEAR $L$ecb_ret
+ALIGN	16
+$L$ecb_dec_two:
+	call	_aesni_decrypt2
+	movups	XMMWORD[rsi],xmm2
+	pxor	xmm2,xmm2
+	movups	XMMWORD[16+rsi],xmm3
+	pxor	xmm3,xmm3
+	jmp	NEAR $L$ecb_ret
+ALIGN	16
+$L$ecb_dec_three:
+	call	_aesni_decrypt3
+	movups	XMMWORD[rsi],xmm2
+	pxor	xmm2,xmm2
+	movups	XMMWORD[16+rsi],xmm3
+	pxor	xmm3,xmm3
+	movups	XMMWORD[32+rsi],xmm4
+	pxor	xmm4,xmm4
+	jmp	NEAR $L$ecb_ret
+ALIGN	16
+$L$ecb_dec_four:
+	call	_aesni_decrypt4
+	movups	XMMWORD[rsi],xmm2
+	pxor	xmm2,xmm2
+	movups	XMMWORD[16+rsi],xmm3
+	pxor	xmm3,xmm3
+	movups	XMMWORD[32+rsi],xmm4
+	pxor	xmm4,xmm4
+	movups	XMMWORD[48+rsi],xmm5
+	pxor	xmm5,xmm5
+	jmp	NEAR $L$ecb_ret
+ALIGN	16
+$L$ecb_dec_five:
+	xorps	xmm7,xmm7
+	call	_aesni_decrypt6
+	movups	XMMWORD[rsi],xmm2
+	pxor	xmm2,xmm2
+	movups	XMMWORD[16+rsi],xmm3
+	pxor	xmm3,xmm3
+	movups	XMMWORD[32+rsi],xmm4
+	pxor	xmm4,xmm4
+	movups	XMMWORD[48+rsi],xmm5
+	pxor	xmm5,xmm5
+	movups	XMMWORD[64+rsi],xmm6
+	pxor	xmm6,xmm6
+	pxor	xmm7,xmm7
+	jmp	NEAR $L$ecb_ret
+ALIGN	16
+$L$ecb_dec_six:
+	call	_aesni_decrypt6
+	movups	XMMWORD[rsi],xmm2
+	pxor	xmm2,xmm2
+	movups	XMMWORD[16+rsi],xmm3
+	pxor	xmm3,xmm3
+	movups	XMMWORD[32+rsi],xmm4
+	pxor	xmm4,xmm4
+	movups	XMMWORD[48+rsi],xmm5
+	pxor	xmm5,xmm5
+	movups	XMMWORD[64+rsi],xmm6
+	pxor	xmm6,xmm6
+	movups	XMMWORD[80+rsi],xmm7
+	pxor	xmm7,xmm7
+
+$L$ecb_ret:
+	xorps	xmm0,xmm0
+	pxor	xmm1,xmm1
+	movaps	xmm6,XMMWORD[rsp]
+	movaps	XMMWORD[rsp],xmm0
+	movaps	xmm7,XMMWORD[16+rsp]
+	movaps	XMMWORD[16+rsp],xmm0
+	movaps	xmm8,XMMWORD[32+rsp]
+	movaps	XMMWORD[32+rsp],xmm0
+	movaps	xmm9,XMMWORD[48+rsp]
+	movaps	XMMWORD[48+rsp],xmm0
+	lea	rsp,[88+rsp]
+$L$ecb_enc_ret:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_aes_hw_ecb_encrypt:
+global	aes_hw_ctr32_encrypt_blocks
+
+ALIGN	16
+aes_hw_ctr32_encrypt_blocks:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aes_hw_ctr32_encrypt_blocks:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+
+
+
+%ifdef BORINGSSL_DISPATCH_TEST
+	mov	BYTE[BORINGSSL_function_hit],1
+%endif
+	cmp	rdx,1
+	jne	NEAR $L$ctr32_bulk
+
+
+
+	movups	xmm2,XMMWORD[r8]
+	movups	xmm3,XMMWORD[rdi]
+	mov	edx,DWORD[240+rcx]
+	movups	xmm0,XMMWORD[rcx]
+	movups	xmm1,XMMWORD[16+rcx]
+	lea	rcx,[32+rcx]
+	xorps	xmm2,xmm0
+$L$oop_enc1_5:
+DB	102,15,56,220,209
+	dec	edx
+	movups	xmm1,XMMWORD[rcx]
+	lea	rcx,[16+rcx]
+	jnz	NEAR $L$oop_enc1_5
+DB	102,15,56,221,209
+	pxor	xmm0,xmm0
+	pxor	xmm1,xmm1
+	xorps	xmm2,xmm3
+	pxor	xmm3,xmm3
+	movups	XMMWORD[rsi],xmm2
+	xorps	xmm2,xmm2
+	jmp	NEAR $L$ctr32_epilogue
+
+ALIGN	16
+$L$ctr32_bulk:
+	lea	r11,[rsp]
+
+	push	rbp
+
+	sub	rsp,288
+	and	rsp,-16
+	movaps	XMMWORD[(-168)+r11],xmm6
+	movaps	XMMWORD[(-152)+r11],xmm7
+	movaps	XMMWORD[(-136)+r11],xmm8
+	movaps	XMMWORD[(-120)+r11],xmm9
+	movaps	XMMWORD[(-104)+r11],xmm10
+	movaps	XMMWORD[(-88)+r11],xmm11
+	movaps	XMMWORD[(-72)+r11],xmm12
+	movaps	XMMWORD[(-56)+r11],xmm13
+	movaps	XMMWORD[(-40)+r11],xmm14
+	movaps	XMMWORD[(-24)+r11],xmm15
+$L$ctr32_body:
+
+
+
+
+	movdqu	xmm2,XMMWORD[r8]
+	movdqu	xmm0,XMMWORD[rcx]
+	mov	r8d,DWORD[12+r8]
+	pxor	xmm2,xmm0
+	mov	ebp,DWORD[12+rcx]
+	movdqa	XMMWORD[rsp],xmm2
+	bswap	r8d
+	movdqa	xmm3,xmm2
+	movdqa	xmm4,xmm2
+	movdqa	xmm5,xmm2
+	movdqa	XMMWORD[64+rsp],xmm2
+	movdqa	XMMWORD[80+rsp],xmm2
+	movdqa	XMMWORD[96+rsp],xmm2
+	mov	r10,rdx
+	movdqa	XMMWORD[112+rsp],xmm2
+
+	lea	rax,[1+r8]
+	lea	rdx,[2+r8]
+	bswap	eax
+	bswap	edx
+	xor	eax,ebp
+	xor	edx,ebp
+DB	102,15,58,34,216,3
+	lea	rax,[3+r8]
+	movdqa	XMMWORD[16+rsp],xmm3
+DB	102,15,58,34,226,3
+	bswap	eax
+	mov	rdx,r10
+	lea	r10,[4+r8]
+	movdqa	XMMWORD[32+rsp],xmm4
+	xor	eax,ebp
+	bswap	r10d
+DB	102,15,58,34,232,3
+	xor	r10d,ebp
+	movdqa	XMMWORD[48+rsp],xmm5
+	lea	r9,[5+r8]
+	mov	DWORD[((64+12))+rsp],r10d
+	bswap	r9d
+	lea	r10,[6+r8]
+	mov	eax,DWORD[240+rcx]
+	xor	r9d,ebp
+	bswap	r10d
+	mov	DWORD[((80+12))+rsp],r9d
+	xor	r10d,ebp
+	lea	r9,[7+r8]
+	mov	DWORD[((96+12))+rsp],r10d
+	bswap	r9d
+	lea	r10,[OPENSSL_ia32cap_P]
+	mov	r10d,DWORD[4+r10]
+	xor	r9d,ebp
+	and	r10d,71303168
+	mov	DWORD[((112+12))+rsp],r9d
+
+	movups	xmm1,XMMWORD[16+rcx]
+
+	movdqa	xmm6,XMMWORD[64+rsp]
+	movdqa	xmm7,XMMWORD[80+rsp]
+
+	cmp	rdx,8
+	jb	NEAR $L$ctr32_tail
+
+	sub	rdx,6
+	cmp	r10d,4194304
+	je	NEAR $L$ctr32_6x
+
+	lea	rcx,[128+rcx]
+	sub	rdx,2
+	jmp	NEAR $L$ctr32_loop8
+
+ALIGN	16
+$L$ctr32_6x:
+	shl	eax,4
+	mov	r10d,48
+	bswap	ebp
+	lea	rcx,[32+rax*1+rcx]
+	sub	r10,rax
+	jmp	NEAR $L$ctr32_loop6
+
+ALIGN	16
+$L$ctr32_loop6:
+	add	r8d,6
+	movups	xmm0,XMMWORD[((-48))+r10*1+rcx]
+DB	102,15,56,220,209
+	mov	eax,r8d
+	xor	eax,ebp
+DB	102,15,56,220,217
+DB	0x0f,0x38,0xf1,0x44,0x24,12
+	lea	eax,[1+r8]
+DB	102,15,56,220,225
+	xor	eax,ebp
+DB	0x0f,0x38,0xf1,0x44,0x24,28
+DB	102,15,56,220,233
+	lea	eax,[2+r8]
+	xor	eax,ebp
+DB	102,15,56,220,241
+DB	0x0f,0x38,0xf1,0x44,0x24,44
+	lea	eax,[3+r8]
+DB	102,15,56,220,249
+	movups	xmm1,XMMWORD[((-32))+r10*1+rcx]
+	xor	eax,ebp
+
+DB	102,15,56,220,208
+DB	0x0f,0x38,0xf1,0x44,0x24,60
+	lea	eax,[4+r8]
+DB	102,15,56,220,216
+	xor	eax,ebp
+DB	0x0f,0x38,0xf1,0x44,0x24,76
+DB	102,15,56,220,224
+	lea	eax,[5+r8]
+	xor	eax,ebp
+DB	102,15,56,220,232
+DB	0x0f,0x38,0xf1,0x44,0x24,92
+	mov	rax,r10
+DB	102,15,56,220,240
+DB	102,15,56,220,248
+	movups	xmm0,XMMWORD[((-16))+r10*1+rcx]
+
+	call	$L$enc_loop6
+
+	movdqu	xmm8,XMMWORD[rdi]
+	movdqu	xmm9,XMMWORD[16+rdi]
+	movdqu	xmm10,XMMWORD[32+rdi]
+	movdqu	xmm11,XMMWORD[48+rdi]
+	movdqu	xmm12,XMMWORD[64+rdi]
+	movdqu	xmm13,XMMWORD[80+rdi]
+	lea	rdi,[96+rdi]
+	movups	xmm1,XMMWORD[((-64))+r10*1+rcx]
+	pxor	xmm8,xmm2
+	movaps	xmm2,XMMWORD[rsp]
+	pxor	xmm9,xmm3
+	movaps	xmm3,XMMWORD[16+rsp]
+	pxor	xmm10,xmm4
+	movaps	xmm4,XMMWORD[32+rsp]
+	pxor	xmm11,xmm5
+	movaps	xmm5,XMMWORD[48+rsp]
+	pxor	xmm12,xmm6
+	movaps	xmm6,XMMWORD[64+rsp]
+	pxor	xmm13,xmm7
+	movaps	xmm7,XMMWORD[80+rsp]
+	movdqu	XMMWORD[rsi],xmm8
+	movdqu	XMMWORD[16+rsi],xmm9
+	movdqu	XMMWORD[32+rsi],xmm10
+	movdqu	XMMWORD[48+rsi],xmm11
+	movdqu	XMMWORD[64+rsi],xmm12
+	movdqu	XMMWORD[80+rsi],xmm13
+	lea	rsi,[96+rsi]
+
+	sub	rdx,6
+	jnc	NEAR $L$ctr32_loop6
+
+	add	rdx,6
+	jz	NEAR $L$ctr32_done
+
+	lea	eax,[((-48))+r10]
+	lea	rcx,[((-80))+r10*1+rcx]
+	neg	eax
+	shr	eax,4
+	jmp	NEAR $L$ctr32_tail
+
+ALIGN	32
+$L$ctr32_loop8:
+	add	r8d,8
+	movdqa	xmm8,XMMWORD[96+rsp]
+DB	102,15,56,220,209
+	mov	r9d,r8d
+	movdqa	xmm9,XMMWORD[112+rsp]
+DB	102,15,56,220,217
+	bswap	r9d
+	movups	xmm0,XMMWORD[((32-128))+rcx]
+DB	102,15,56,220,225
+	xor	r9d,ebp
+	nop
+DB	102,15,56,220,233
+	mov	DWORD[((0+12))+rsp],r9d
+	lea	r9,[1+r8]
+DB	102,15,56,220,241
+DB	102,15,56,220,249
+DB	102,68,15,56,220,193
+DB	102,68,15,56,220,201
+	movups	xmm1,XMMWORD[((48-128))+rcx]
+	bswap	r9d
+DB	102,15,56,220,208
+DB	102,15,56,220,216
+	xor	r9d,ebp
+DB	0x66,0x90
+DB	102,15,56,220,224
+DB	102,15,56,220,232
+	mov	DWORD[((16+12))+rsp],r9d
+	lea	r9,[2+r8]
+DB	102,15,56,220,240
+DB	102,15,56,220,248
+DB	102,68,15,56,220,192
+DB	102,68,15,56,220,200
+	movups	xmm0,XMMWORD[((64-128))+rcx]
+	bswap	r9d
+DB	102,15,56,220,209
+DB	102,15,56,220,217
+	xor	r9d,ebp
+DB	0x66,0x90
+DB	102,15,56,220,225
+DB	102,15,56,220,233
+	mov	DWORD[((32+12))+rsp],r9d
+	lea	r9,[3+r8]
+DB	102,15,56,220,241
+DB	102,15,56,220,249
+DB	102,68,15,56,220,193
+DB	102,68,15,56,220,201
+	movups	xmm1,XMMWORD[((80-128))+rcx]
+	bswap	r9d
+DB	102,15,56,220,208
+DB	102,15,56,220,216
+	xor	r9d,ebp
+DB	0x66,0x90
+DB	102,15,56,220,224
+DB	102,15,56,220,232
+	mov	DWORD[((48+12))+rsp],r9d
+	lea	r9,[4+r8]
+DB	102,15,56,220,240
+DB	102,15,56,220,248
+DB	102,68,15,56,220,192
+DB	102,68,15,56,220,200
+	movups	xmm0,XMMWORD[((96-128))+rcx]
+	bswap	r9d
+DB	102,15,56,220,209
+DB	102,15,56,220,217
+	xor	r9d,ebp
+DB	0x66,0x90
+DB	102,15,56,220,225
+DB	102,15,56,220,233
+	mov	DWORD[((64+12))+rsp],r9d
+	lea	r9,[5+r8]
+DB	102,15,56,220,241
+DB	102,15,56,220,249
+DB	102,68,15,56,220,193
+DB	102,68,15,56,220,201
+	movups	xmm1,XMMWORD[((112-128))+rcx]
+	bswap	r9d
+DB	102,15,56,220,208
+DB	102,15,56,220,216
+	xor	r9d,ebp
+DB	0x66,0x90
+DB	102,15,56,220,224
+DB	102,15,56,220,232
+	mov	DWORD[((80+12))+rsp],r9d
+	lea	r9,[6+r8]
+DB	102,15,56,220,240
+DB	102,15,56,220,248
+DB	102,68,15,56,220,192
+DB	102,68,15,56,220,200
+	movups	xmm0,XMMWORD[((128-128))+rcx]
+	bswap	r9d
+DB	102,15,56,220,209
+DB	102,15,56,220,217
+	xor	r9d,ebp
+DB	0x66,0x90
+DB	102,15,56,220,225
+DB	102,15,56,220,233
+	mov	DWORD[((96+12))+rsp],r9d
+	lea	r9,[7+r8]
+DB	102,15,56,220,241
+DB	102,15,56,220,249
+DB	102,68,15,56,220,193
+DB	102,68,15,56,220,201
+	movups	xmm1,XMMWORD[((144-128))+rcx]
+	bswap	r9d
+DB	102,15,56,220,208
+DB	102,15,56,220,216
+DB	102,15,56,220,224
+	xor	r9d,ebp
+	movdqu	xmm10,XMMWORD[rdi]
+DB	102,15,56,220,232
+	mov	DWORD[((112+12))+rsp],r9d
+	cmp	eax,11
+DB	102,15,56,220,240
+DB	102,15,56,220,248
+DB	102,68,15,56,220,192
+DB	102,68,15,56,220,200
+	movups	xmm0,XMMWORD[((160-128))+rcx]
+
+	jb	NEAR $L$ctr32_enc_done
+
+DB	102,15,56,220,209
+DB	102,15,56,220,217
+DB	102,15,56,220,225
+DB	102,15,56,220,233
+DB	102,15,56,220,241
+DB	102,15,56,220,249
+DB	102,68,15,56,220,193
+DB	102,68,15,56,220,201
+	movups	xmm1,XMMWORD[((176-128))+rcx]
+
+DB	102,15,56,220,208
+DB	102,15,56,220,216
+DB	102,15,56,220,224
+DB	102,15,56,220,232
+DB	102,15,56,220,240
+DB	102,15,56,220,248
+DB	102,68,15,56,220,192
+DB	102,68,15,56,220,200
+	movups	xmm0,XMMWORD[((192-128))+rcx]
+	je	NEAR $L$ctr32_enc_done
+
+DB	102,15,56,220,209
+DB	102,15,56,220,217
+DB	102,15,56,220,225
+DB	102,15,56,220,233
+DB	102,15,56,220,241
+DB	102,15,56,220,249
+DB	102,68,15,56,220,193
+DB	102,68,15,56,220,201
+	movups	xmm1,XMMWORD[((208-128))+rcx]
+
+DB	102,15,56,220,208
+DB	102,15,56,220,216
+DB	102,15,56,220,224
+DB	102,15,56,220,232
+DB	102,15,56,220,240
+DB	102,15,56,220,248
+DB	102,68,15,56,220,192
+DB	102,68,15,56,220,200
+	movups	xmm0,XMMWORD[((224-128))+rcx]
+	jmp	NEAR $L$ctr32_enc_done
+
+ALIGN	16
+$L$ctr32_enc_done:
+	movdqu	xmm11,XMMWORD[16+rdi]
+	pxor	xmm10,xmm0
+	movdqu	xmm12,XMMWORD[32+rdi]
+	pxor	xmm11,xmm0
+	movdqu	xmm13,XMMWORD[48+rdi]
+	pxor	xmm12,xmm0
+	movdqu	xmm14,XMMWORD[64+rdi]
+	pxor	xmm13,xmm0
+	movdqu	xmm15,XMMWORD[80+rdi]
+	pxor	xmm14,xmm0
+	pxor	xmm15,xmm0
+DB	102,15,56,220,209
+DB	102,15,56,220,217
+DB	102,15,56,220,225
+DB	102,15,56,220,233
+DB	102,15,56,220,241
+DB	102,15,56,220,249
+DB	102,68,15,56,220,193
+DB	102,68,15,56,220,201
+	movdqu	xmm1,XMMWORD[96+rdi]
+	lea	rdi,[128+rdi]
+
+DB	102,65,15,56,221,210
+	pxor	xmm1,xmm0
+	movdqu	xmm10,XMMWORD[((112-128))+rdi]
+DB	102,65,15,56,221,219
+	pxor	xmm10,xmm0
+	movdqa	xmm11,XMMWORD[rsp]
+DB	102,65,15,56,221,228
+DB	102,65,15,56,221,237
+	movdqa	xmm12,XMMWORD[16+rsp]
+	movdqa	xmm13,XMMWORD[32+rsp]
+DB	102,65,15,56,221,246
+DB	102,65,15,56,221,255
+	movdqa	xmm14,XMMWORD[48+rsp]
+	movdqa	xmm15,XMMWORD[64+rsp]
+DB	102,68,15,56,221,193
+	movdqa	xmm0,XMMWORD[80+rsp]
+	movups	xmm1,XMMWORD[((16-128))+rcx]
+DB	102,69,15,56,221,202
+
+	movups	XMMWORD[rsi],xmm2
+	movdqa	xmm2,xmm11
+	movups	XMMWORD[16+rsi],xmm3
+	movdqa	xmm3,xmm12
+	movups	XMMWORD[32+rsi],xmm4
+	movdqa	xmm4,xmm13
+	movups	XMMWORD[48+rsi],xmm5
+	movdqa	xmm5,xmm14
+	movups	XMMWORD[64+rsi],xmm6
+	movdqa	xmm6,xmm15
+	movups	XMMWORD[80+rsi],xmm7
+	movdqa	xmm7,xmm0
+	movups	XMMWORD[96+rsi],xmm8
+	movups	XMMWORD[112+rsi],xmm9
+	lea	rsi,[128+rsi]
+
+	sub	rdx,8
+	jnc	NEAR $L$ctr32_loop8
+
+	add	rdx,8
+	jz	NEAR $L$ctr32_done
+	lea	rcx,[((-128))+rcx]
+
+$L$ctr32_tail:
+
+
+	lea	rcx,[16+rcx]
+	cmp	rdx,4
+	jb	NEAR $L$ctr32_loop3
+	je	NEAR $L$ctr32_loop4
+
+
+	shl	eax,4
+	movdqa	xmm8,XMMWORD[96+rsp]
+	pxor	xmm9,xmm9
+
+	movups	xmm0,XMMWORD[16+rcx]
+DB	102,15,56,220,209
+DB	102,15,56,220,217
+	lea	rcx,[((32-16))+rax*1+rcx]
+	neg	rax
+DB	102,15,56,220,225
+	add	rax,16
+	movups	xmm10,XMMWORD[rdi]
+DB	102,15,56,220,233
+DB	102,15,56,220,241
+	movups	xmm11,XMMWORD[16+rdi]
+	movups	xmm12,XMMWORD[32+rdi]
+DB	102,15,56,220,249
+DB	102,68,15,56,220,193
+
+	call	$L$enc_loop8_enter
+
+	movdqu	xmm13,XMMWORD[48+rdi]
+	pxor	xmm2,xmm10
+	movdqu	xmm10,XMMWORD[64+rdi]
+	pxor	xmm3,xmm11
+	movdqu	XMMWORD[rsi],xmm2
+	pxor	xmm4,xmm12
+	movdqu	XMMWORD[16+rsi],xmm3
+	pxor	xmm5,xmm13
+	movdqu	XMMWORD[32+rsi],xmm4
+	pxor	xmm6,xmm10
+	movdqu	XMMWORD[48+rsi],xmm5
+	movdqu	XMMWORD[64+rsi],xmm6
+	cmp	rdx,6
+	jb	NEAR $L$ctr32_done
+
+	movups	xmm11,XMMWORD[80+rdi]
+	xorps	xmm7,xmm11
+	movups	XMMWORD[80+rsi],xmm7
+	je	NEAR $L$ctr32_done
+
+	movups	xmm12,XMMWORD[96+rdi]
+	xorps	xmm8,xmm12
+	movups	XMMWORD[96+rsi],xmm8
+	jmp	NEAR $L$ctr32_done
+
+ALIGN	32
+$L$ctr32_loop4:
+DB	102,15,56,220,209
+	lea	rcx,[16+rcx]
+	dec	eax
+DB	102,15,56,220,217
+DB	102,15,56,220,225
+DB	102,15,56,220,233
+	movups	xmm1,XMMWORD[rcx]
+	jnz	NEAR $L$ctr32_loop4
+DB	102,15,56,221,209
+DB	102,15,56,221,217
+	movups	xmm10,XMMWORD[rdi]
+	movups	xmm11,XMMWORD[16+rdi]
+DB	102,15,56,221,225
+DB	102,15,56,221,233
+	movups	xmm12,XMMWORD[32+rdi]
+	movups	xmm13,XMMWORD[48+rdi]
+
+	xorps	xmm2,xmm10
+	movups	XMMWORD[rsi],xmm2
+	xorps	xmm3,xmm11
+	movups	XMMWORD[16+rsi],xmm3
+	pxor	xmm4,xmm12
+	movdqu	XMMWORD[32+rsi],xmm4
+	pxor	xmm5,xmm13
+	movdqu	XMMWORD[48+rsi],xmm5
+	jmp	NEAR $L$ctr32_done
+
+ALIGN	32
+$L$ctr32_loop3:
+DB	102,15,56,220,209
+	lea	rcx,[16+rcx]
+	dec	eax
+DB	102,15,56,220,217
+DB	102,15,56,220,225
+	movups	xmm1,XMMWORD[rcx]
+	jnz	NEAR $L$ctr32_loop3
+DB	102,15,56,221,209
+DB	102,15,56,221,217
+DB	102,15,56,221,225
+
+	movups	xmm10,XMMWORD[rdi]
+	xorps	xmm2,xmm10
+	movups	XMMWORD[rsi],xmm2
+	cmp	rdx,2
+	jb	NEAR $L$ctr32_done
+
+	movups	xmm11,XMMWORD[16+rdi]
+	xorps	xmm3,xmm11
+	movups	XMMWORD[16+rsi],xmm3
+	je	NEAR $L$ctr32_done
+
+	movups	xmm12,XMMWORD[32+rdi]
+	xorps	xmm4,xmm12
+	movups	XMMWORD[32+rsi],xmm4
+
+$L$ctr32_done:
+	xorps	xmm0,xmm0
+	xor	ebp,ebp
+	pxor	xmm1,xmm1
+	pxor	xmm2,xmm2
+	pxor	xmm3,xmm3
+	pxor	xmm4,xmm4
+	pxor	xmm5,xmm5
+	movaps	xmm6,XMMWORD[((-168))+r11]
+	movaps	XMMWORD[(-168)+r11],xmm0
+	movaps	xmm7,XMMWORD[((-152))+r11]
+	movaps	XMMWORD[(-152)+r11],xmm0
+	movaps	xmm8,XMMWORD[((-136))+r11]
+	movaps	XMMWORD[(-136)+r11],xmm0
+	movaps	xmm9,XMMWORD[((-120))+r11]
+	movaps	XMMWORD[(-120)+r11],xmm0
+	movaps	xmm10,XMMWORD[((-104))+r11]
+	movaps	XMMWORD[(-104)+r11],xmm0
+	movaps	xmm11,XMMWORD[((-88))+r11]
+	movaps	XMMWORD[(-88)+r11],xmm0
+	movaps	xmm12,XMMWORD[((-72))+r11]
+	movaps	XMMWORD[(-72)+r11],xmm0
+	movaps	xmm13,XMMWORD[((-56))+r11]
+	movaps	XMMWORD[(-56)+r11],xmm0
+	movaps	xmm14,XMMWORD[((-40))+r11]
+	movaps	XMMWORD[(-40)+r11],xmm0
+	movaps	xmm15,XMMWORD[((-24))+r11]
+	movaps	XMMWORD[(-24)+r11],xmm0
+	movaps	XMMWORD[rsp],xmm0
+	movaps	XMMWORD[16+rsp],xmm0
+	movaps	XMMWORD[32+rsp],xmm0
+	movaps	XMMWORD[48+rsp],xmm0
+	movaps	XMMWORD[64+rsp],xmm0
+	movaps	XMMWORD[80+rsp],xmm0
+	movaps	XMMWORD[96+rsp],xmm0
+	movaps	XMMWORD[112+rsp],xmm0
+	mov	rbp,QWORD[((-8))+r11]
+
+	lea	rsp,[r11]
+
+$L$ctr32_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_aes_hw_ctr32_encrypt_blocks:
+global	aes_hw_cbc_encrypt
+
+ALIGN	16
+aes_hw_cbc_encrypt:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_aes_hw_cbc_encrypt:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+	mov	r9,QWORD[48+rsp]
+
+
+
+	test	rdx,rdx
+	jz	NEAR $L$cbc_ret
+
+	mov	r10d,DWORD[240+rcx]
+	mov	r11,rcx
+	test	r9d,r9d
+	jz	NEAR $L$cbc_decrypt
+
+	movups	xmm2,XMMWORD[r8]
+	mov	eax,r10d
+	cmp	rdx,16
+	jb	NEAR $L$cbc_enc_tail
+	sub	rdx,16
+	jmp	NEAR $L$cbc_enc_loop
+ALIGN	16
+$L$cbc_enc_loop:
+	movups	xmm3,XMMWORD[rdi]
+	lea	rdi,[16+rdi]
+
+	movups	xmm0,XMMWORD[rcx]
+	movups	xmm1,XMMWORD[16+rcx]
+	xorps	xmm3,xmm0
+	lea	rcx,[32+rcx]
+	xorps	xmm2,xmm3
+$L$oop_enc1_6:
+DB	102,15,56,220,209
+	dec	eax
+	movups	xmm1,XMMWORD[rcx]
+	lea	rcx,[16+rcx]
+	jnz	NEAR $L$oop_enc1_6
+DB	102,15,56,221,209
+	mov	eax,r10d
+	mov	rcx,r11
+	movups	XMMWORD[rsi],xmm2
+	lea	rsi,[16+rsi]
+	sub	rdx,16
+	jnc	NEAR $L$cbc_enc_loop
+	add	rdx,16
+	jnz	NEAR $L$cbc_enc_tail
+	pxor	xmm0,xmm0
+	pxor	xmm1,xmm1
+	movups	XMMWORD[r8],xmm2
+	pxor	xmm2,xmm2
+	pxor	xmm3,xmm3
+	jmp	NEAR $L$cbc_ret
+
+$L$cbc_enc_tail:
+	mov	rcx,rdx
+	xchg	rsi,rdi
+	DD	0x9066A4F3
+	mov	ecx,16
+	sub	rcx,rdx
+	xor	eax,eax
+	DD	0x9066AAF3
+	lea	rdi,[((-16))+rdi]
+	mov	eax,r10d
+	mov	rsi,rdi
+	mov	rcx,r11
+	xor	rdx,rdx
+	jmp	NEAR $L$cbc_enc_loop
+
+ALIGN	16
+$L$cbc_decrypt:
+	cmp	rdx,16
+	jne	NEAR $L$cbc_decrypt_bulk
+
+
+
+	movdqu	xmm2,XMMWORD[rdi]
+	movdqu	xmm3,XMMWORD[r8]
+	movdqa	xmm4,xmm2
+	movups	xmm0,XMMWORD[rcx]
+	movups	xmm1,XMMWORD[16+rcx]
+	lea	rcx,[32+rcx]
+	xorps	xmm2,xmm0
+$L$oop_dec1_7:
+DB	102,15,56,222,209
+	dec	r10d
+	movups	xmm1,XMMWORD[rcx]
+	lea	rcx,[16+rcx]
+	jnz	NEAR $L$oop_dec1_7
+DB	102,15,56,223,209
+	pxor	xmm0,xmm0
+	pxor	xmm1,xmm1
+	movdqu	XMMWORD[r8],xmm4
+	xorps	xmm2,xmm3
+	pxor	xmm3,xmm3
+	movups	XMMWORD[rsi],xmm2
+	pxor	xmm2,xmm2
+	jmp	NEAR $L$cbc_ret
+ALIGN	16
+$L$cbc_decrypt_bulk:
+	lea	r11,[rsp]
+
+	push	rbp
+
+	sub	rsp,176
+	and	rsp,-16
+	movaps	XMMWORD[16+rsp],xmm6
+	movaps	XMMWORD[32+rsp],xmm7
+	movaps	XMMWORD[48+rsp],xmm8
+	movaps	XMMWORD[64+rsp],xmm9
+	movaps	XMMWORD[80+rsp],xmm10
+	movaps	XMMWORD[96+rsp],xmm11
+	movaps	XMMWORD[112+rsp],xmm12
+	movaps	XMMWORD[128+rsp],xmm13
+	movaps	XMMWORD[144+rsp],xmm14
+	movaps	XMMWORD[160+rsp],xmm15
+$L$cbc_decrypt_body:
+	mov	rbp,rcx
+	movups	xmm10,XMMWORD[r8]
+	mov	eax,r10d
+	cmp	rdx,0x50
+	jbe	NEAR $L$cbc_dec_tail
+
+	movups	xmm0,XMMWORD[rcx]
+	movdqu	xmm2,XMMWORD[rdi]
+	movdqu	xmm3,XMMWORD[16+rdi]
+	movdqa	xmm11,xmm2
+	movdqu	xmm4,XMMWORD[32+rdi]
+	movdqa	xmm12,xmm3
+	movdqu	xmm5,XMMWORD[48+rdi]
+	movdqa	xmm13,xmm4
+	movdqu	xmm6,XMMWORD[64+rdi]
+	movdqa	xmm14,xmm5
+	movdqu	xmm7,XMMWORD[80+rdi]
+	movdqa	xmm15,xmm6
+	lea	r9,[OPENSSL_ia32cap_P]
+	mov	r9d,DWORD[4+r9]
+	cmp	rdx,0x70
+	jbe	NEAR $L$cbc_dec_six_or_seven
+
+	and	r9d,71303168
+	sub	rdx,0x50
+	cmp	r9d,4194304
+	je	NEAR $L$cbc_dec_loop6_enter
+	sub	rdx,0x20
+	lea	rcx,[112+rcx]
+	jmp	NEAR $L$cbc_dec_loop8_enter
+ALIGN	16
+$L$cbc_dec_loop8:
+	movups	XMMWORD[rsi],xmm9
+	lea	rsi,[16+rsi]
+$L$cbc_dec_loop8_enter:
+	movdqu	xmm8,XMMWORD[96+rdi]
+	pxor	xmm2,xmm0
+	movdqu	xmm9,XMMWORD[112+rdi]
+	pxor	xmm3,xmm0
+	movups	xmm1,XMMWORD[((16-112))+rcx]
+	pxor	xmm4,xmm0
+	mov	rbp,-1
+	cmp	rdx,0x70
+	pxor	xmm5,xmm0
+	pxor	xmm6,xmm0
+	pxor	xmm7,xmm0
+	pxor	xmm8,xmm0
+
+DB	102,15,56,222,209
+	pxor	xmm9,xmm0
+	movups	xmm0,XMMWORD[((32-112))+rcx]
+DB	102,15,56,222,217
+DB	102,15,56,222,225
+DB	102,15,56,222,233
+DB	102,15,56,222,241
+DB	102,15,56,222,249
+DB	102,68,15,56,222,193
+	adc	rbp,0
+	and	rbp,128
+DB	102,68,15,56,222,201
+	add	rbp,rdi
+	movups	xmm1,XMMWORD[((48-112))+rcx]
+DB	102,15,56,222,208
+DB	102,15,56,222,216
+DB	102,15,56,222,224
+DB	102,15,56,222,232
+DB	102,15,56,222,240
+DB	102,15,56,222,248
+DB	102,68,15,56,222,192
+DB	102,68,15,56,222,200
+	movups	xmm0,XMMWORD[((64-112))+rcx]
+	nop
+DB	102,15,56,222,209
+DB	102,15,56,222,217
+DB	102,15,56,222,225
+DB	102,15,56,222,233
+DB	102,15,56,222,241
+DB	102,15,56,222,249
+DB	102,68,15,56,222,193
+DB	102,68,15,56,222,201
+	movups	xmm1,XMMWORD[((80-112))+rcx]
+	nop
+DB	102,15,56,222,208
+DB	102,15,56,222,216
+DB	102,15,56,222,224
+DB	102,15,56,222,232
+DB	102,15,56,222,240
+DB	102,15,56,222,248
+DB	102,68,15,56,222,192
+DB	102,68,15,56,222,200
+	movups	xmm0,XMMWORD[((96-112))+rcx]
+	nop
+DB	102,15,56,222,209
+DB	102,15,56,222,217
+DB	102,15,56,222,225
+DB	102,15,56,222,233
+DB	102,15,56,222,241
+DB	102,15,56,222,249
+DB	102,68,15,56,222,193
+DB	102,68,15,56,222,201
+	movups	xmm1,XMMWORD[((112-112))+rcx]
+	nop
+DB	102,15,56,222,208
+DB	102,15,56,222,216
+DB	102,15,56,222,224
+DB	102,15,56,222,232
+DB	102,15,56,222,240
+DB	102,15,56,222,248
+DB	102,68,15,56,222,192
+DB	102,68,15,56,222,200
+	movups	xmm0,XMMWORD[((128-112))+rcx]
+	nop
+DB	102,15,56,222,209
+DB	102,15,56,222,217
+DB	102,15,56,222,225
+DB	102,15,56,222,233
+DB	102,15,56,222,241
+DB	102,15,56,222,249
+DB	102,68,15,56,222,193
+DB	102,68,15,56,222,201
+	movups	xmm1,XMMWORD[((144-112))+rcx]
+	cmp	eax,11
+DB	102,15,56,222,208
+DB	102,15,56,222,216
+DB	102,15,56,222,224
+DB	102,15,56,222,232
+DB	102,15,56,222,240
+DB	102,15,56,222,248
+DB	102,68,15,56,222,192
+DB	102,68,15,56,222,200
+	movups	xmm0,XMMWORD[((160-112))+rcx]
+	jb	NEAR $L$cbc_dec_done
+DB	102,15,56,222,209
+DB	102,15,56,222,217
+DB	102,15,56,222,225
+DB	102,15,56,222,233
+DB	102,15,56,222,241
+DB	102,15,56,222,249
+DB	102,68,15,56,222,193
+DB	102,68,15,56,222,201
+	movups	xmm1,XMMWORD[((176-112))+rcx]
+	nop
+DB	102,15,56,222,208
+DB	102,15,56,222,216
+DB	102,15,56,222,224
+DB	102,15,56,222,232
+DB	102,15,56,222,240
+DB	102,15,56,222,248
+DB	102,68,15,56,222,192
+DB	102,68,15,56,222,200
+	movups	xmm0,XMMWORD[((192-112))+rcx]
+	je	NEAR $L$cbc_dec_done
+DB	102,15,56,222,209
+DB	102,15,56,222,217
+DB	102,15,56,222,225
+DB	102,15,56,222,233
+DB	102,15,56,222,241
+DB	102,15,56,222,249
+DB	102,68,15,56,222,193
+DB	102,68,15,56,222,201
+	movups	xmm1,XMMWORD[((208-112))+rcx]
+	nop
+DB	102,15,56,222,208
+DB	102,15,56,222,216
+DB	102,15,56,222,224
+DB	102,15,56,222,232
+DB	102,15,56,222,240
+DB	102,15,56,222,248
+DB	102,68,15,56,222,192
+DB	102,68,15,56,222,200
+	movups	xmm0,XMMWORD[((224-112))+rcx]
+	jmp	NEAR $L$cbc_dec_done
+ALIGN	16
+$L$cbc_dec_done:
+DB	102,15,56,222,209
+DB	102,15,56,222,217
+	pxor	xmm10,xmm0
+	pxor	xmm11,xmm0
+DB	102,15,56,222,225
+DB	102,15,56,222,233
+	pxor	xmm12,xmm0
+	pxor	xmm13,xmm0
+DB	102,15,56,222,241
+DB	102,15,56,222,249
+	pxor	xmm14,xmm0
+	pxor	xmm15,xmm0
+DB	102,68,15,56,222,193
+DB	102,68,15,56,222,201
+	movdqu	xmm1,XMMWORD[80+rdi]
+
+DB	102,65,15,56,223,210
+	movdqu	xmm10,XMMWORD[96+rdi]
+	pxor	xmm1,xmm0
+DB	102,65,15,56,223,219
+	pxor	xmm10,xmm0
+	movdqu	xmm0,XMMWORD[112+rdi]
+DB	102,65,15,56,223,228
+	lea	rdi,[128+rdi]
+	movdqu	xmm11,XMMWORD[rbp]
+DB	102,65,15,56,223,237
+DB	102,65,15,56,223,246
+	movdqu	xmm12,XMMWORD[16+rbp]
+	movdqu	xmm13,XMMWORD[32+rbp]
+DB	102,65,15,56,223,255
+DB	102,68,15,56,223,193
+	movdqu	xmm14,XMMWORD[48+rbp]
+	movdqu	xmm15,XMMWORD[64+rbp]
+DB	102,69,15,56,223,202
+	movdqa	xmm10,xmm0
+	movdqu	xmm1,XMMWORD[80+rbp]
+	movups	xmm0,XMMWORD[((-112))+rcx]
+
+	movups	XMMWORD[rsi],xmm2
+	movdqa	xmm2,xmm11
+	movups	XMMWORD[16+rsi],xmm3
+	movdqa	xmm3,xmm12
+	movups	XMMWORD[32+rsi],xmm4
+	movdqa	xmm4,xmm13
+	movups	XMMWORD[48+rsi],xmm5
+	movdqa	xmm5,xmm14
+	movups	XMMWORD[64+rsi],xmm6
+	movdqa	xmm6,xmm15
+	movups	XMMWORD[80+rsi],xmm7
+	movdqa	xmm7,xmm1
+	movups	XMMWORD[96+rsi],xmm8
+	lea	rsi,[112+rsi]
+
+	sub	rdx,0x80
+	ja	NEAR $L$cbc_dec_loop8
+
+	movaps	xmm2,xmm9
+	lea	rcx,[((-112))+rcx]
+	add	rdx,0x70
+	jle	NEAR $L$cbc_dec_clear_tail_collected
+	movups	XMMWORD[rsi],xmm9
+	lea	rsi,[16+rsi]
+	cmp	rdx,0x50
+	jbe	NEAR $L$cbc_dec_tail
+
+	movaps	xmm2,xmm11
+$L$cbc_dec_six_or_seven:
+	cmp	rdx,0x60
+	ja	NEAR $L$cbc_dec_seven
+
+	movaps	xmm8,xmm7
+	call	_aesni_decrypt6
+	pxor	xmm2,xmm10
+	movaps	xmm10,xmm8
+	pxor	xmm3,xmm11
+	movdqu	XMMWORD[rsi],xmm2
+	pxor	xmm4,xmm12
+	movdqu	XMMWORD[16+rsi],xmm3
+	pxor	xmm3,xmm3
+	pxor	xmm5,xmm13
+	movdqu	XMMWORD[32+rsi],xmm4
+	pxor	xmm4,xmm4
+	pxor	xmm6,xmm14
+	movdqu	XMMWORD[48+rsi],xmm5
+	pxor	xmm5,xmm5
+	pxor	xmm7,xmm15
+	movdqu	XMMWORD[64+rsi],xmm6
+	pxor	xmm6,xmm6
+	lea	rsi,[80+rsi]
+	movdqa	xmm2,xmm7
+	pxor	xmm7,xmm7
+	jmp	NEAR $L$cbc_dec_tail_collected
+
+ALIGN	16
+$L$cbc_dec_seven:
+	movups	xmm8,XMMWORD[96+rdi]
+	xorps	xmm9,xmm9
+	call	_aesni_decrypt8
+	movups	xmm9,XMMWORD[80+rdi]
+	pxor	xmm2,xmm10
+	movups	xmm10,XMMWORD[96+rdi]
+	pxor	xmm3,xmm11
+	movdqu	XMMWORD[rsi],xmm2
+	pxor	xmm4,xmm12
+	movdqu	XMMWORD[16+rsi],xmm3
+	pxor	xmm3,xmm3
+	pxor	xmm5,xmm13
+	movdqu	XMMWORD[32+rsi],xmm4
+	pxor	xmm4,xmm4
+	pxor	xmm6,xmm14
+	movdqu	XMMWORD[48+rsi],xmm5
+	pxor	xmm5,xmm5
+	pxor	xmm7,xmm15
+	movdqu	XMMWORD[64+rsi],xmm6
+	pxor	xmm6,xmm6
+	pxor	xmm8,xmm9
+	movdqu	XMMWORD[80+rsi],xmm7
+	pxor	xmm7,xmm7
+	lea	rsi,[96+rsi]
+	movdqa	xmm2,xmm8
+	pxor	xmm8,xmm8
+	pxor	xmm9,xmm9
+	jmp	NEAR $L$cbc_dec_tail_collected
+
+ALIGN	16
+$L$cbc_dec_loop6:
+	movups	XMMWORD[rsi],xmm7
+	lea	rsi,[16+rsi]
+	movdqu	xmm2,XMMWORD[rdi]
+	movdqu	xmm3,XMMWORD[16+rdi]
+	movdqa	xmm11,xmm2
+	movdqu	xmm4,XMMWORD[32+rdi]
+	movdqa	xmm12,xmm3
+	movdqu	xmm5,XMMWORD[48+rdi]
+	movdqa	xmm13,xmm4
+	movdqu	xmm6,XMMWORD[64+rdi]
+	movdqa	xmm14,xmm5
+	movdqu	xmm7,XMMWORD[80+rdi]
+	movdqa	xmm15,xmm6
+$L$cbc_dec_loop6_enter:
+	lea	rdi,[96+rdi]
+	movdqa	xmm8,xmm7
+
+	call	_aesni_decrypt6
+
+	pxor	xmm2,xmm10
+	movdqa	xmm10,xmm8
+	pxor	xmm3,xmm11
+	movdqu	XMMWORD[rsi],xmm2
+	pxor	xmm4,xmm12
+	movdqu	XMMWORD[16+rsi],xmm3
+	pxor	xmm5,xmm13
+	movdqu	XMMWORD[32+rsi],xmm4
+	pxor	xmm6,xmm14
+	mov	rcx,rbp
+	movdqu	XMMWORD[48+rsi],xmm5
+	pxor	xmm7,xmm15
+	mov	eax,r10d
+	movdqu	XMMWORD[64+rsi],xmm6
+	lea	rsi,[80+rsi]
+	sub	rdx,0x60
+	ja	NEAR $L$cbc_dec_loop6
+
+	movdqa	xmm2,xmm7
+	add	rdx,0x50
+	jle	NEAR $L$cbc_dec_clear_tail_collected
+	movups	XMMWORD[rsi],xmm7
+	lea	rsi,[16+rsi]
+
+$L$cbc_dec_tail:
+	movups	xmm2,XMMWORD[rdi]
+	sub	rdx,0x10
+	jbe	NEAR $L$cbc_dec_one
+
+	movups	xmm3,XMMWORD[16+rdi]
+	movaps	xmm11,xmm2
+	sub	rdx,0x10
+	jbe	NEAR $L$cbc_dec_two
+
+	movups	xmm4,XMMWORD[32+rdi]
+	movaps	xmm12,xmm3
+	sub	rdx,0x10
+	jbe	NEAR $L$cbc_dec_three
+
+	movups	xmm5,XMMWORD[48+rdi]
+	movaps	xmm13,xmm4
+	sub	rdx,0x10
+	jbe	NEAR $L$cbc_dec_four
+
+	movups	xmm6,XMMWORD[64+rdi]
+	movaps	xmm14,xmm5
+	movaps	xmm15,xmm6
+	xorps	xmm7,xmm7
+	call	_aesni_decrypt6
+	pxor	xmm2,xmm10
+	movaps	xmm10,xmm15
+	pxor	xmm3,xmm11
+	movdqu	XMMWORD[rsi],xmm2
+	pxor	xmm4,xmm12
+	movdqu	XMMWORD[16+rsi],xmm3
+	pxor	xmm3,xmm3
+	pxor	xmm5,xmm13
+	movdqu	XMMWORD[32+rsi],xmm4
+	pxor	xmm4,xmm4
+	pxor	xmm6,xmm14
+	movdqu	XMMWORD[48+rsi],xmm5
+	pxor	xmm5,xmm5
+	lea	rsi,[64+rsi]
+	movdqa	xmm2,xmm6
+	pxor	xmm6,xmm6
+	pxor	xmm7,xmm7
+	sub	rdx,0x10
+	jmp	NEAR $L$cbc_dec_tail_collected
+
+ALIGN	16
+$L$cbc_dec_one:
+	movaps	xmm11,xmm2
+	movups	xmm0,XMMWORD[rcx]
+	movups	xmm1,XMMWORD[16+rcx]
+	lea	rcx,[32+rcx]
+	xorps	xmm2,xmm0
+$L$oop_dec1_8:
+DB	102,15,56,222,209
+	dec	eax
+	movups	xmm1,XMMWORD[rcx]
+	lea	rcx,[16+rcx]
+	jnz	NEAR $L$oop_dec1_8
+DB	102,15,56,223,209
+	xorps	xmm2,xmm10
+	movaps	xmm10,xmm11
+	jmp	NEAR $L$cbc_dec_tail_collected
+ALIGN	16
+$L$cbc_dec_two:
+	movaps	xmm12,xmm3
+	call	_aesni_decrypt2
+	pxor	xmm2,xmm10
+	movaps	xmm10,xmm12
+	pxor	xmm3,xmm11
+	movdqu	XMMWORD[rsi],xmm2
+	movdqa	xmm2,xmm3
+	pxor	xmm3,xmm3
+	lea	rsi,[16+rsi]
+	jmp	NEAR $L$cbc_dec_tail_collected
+ALIGN	16
+$L$cbc_dec_three:
+	movaps	xmm13,xmm4
+	call	_aesni_decrypt3
+	pxor	xmm2,xmm10
+	movaps	xmm10,xmm13
+	pxor	xmm3,xmm11
+	movdqu	XMMWORD[rsi],xmm2
+	pxor	xmm4,xmm12
+	movdqu	XMMWORD[16+rsi],xmm3
+	pxor	xmm3,xmm3
+	movdqa	xmm2,xmm4
+	pxor	xmm4,xmm4
+	lea	rsi,[32+rsi]
+	jmp	NEAR $L$cbc_dec_tail_collected
+ALIGN	16
+$L$cbc_dec_four:
+	movaps	xmm14,xmm5
+	call	_aesni_decrypt4
+	pxor	xmm2,xmm10
+	movaps	xmm10,xmm14
+	pxor	xmm3,xmm11
+	movdqu	XMMWORD[rsi],xmm2
+	pxor	xmm4,xmm12
+	movdqu	XMMWORD[16+rsi],xmm3
+	pxor	xmm3,xmm3
+	pxor	xmm5,xmm13
+	movdqu	XMMWORD[32+rsi],xmm4
+	pxor	xmm4,xmm4
+	movdqa	xmm2,xmm5
+	pxor	xmm5,xmm5
+	lea	rsi,[48+rsi]
+	jmp	NEAR $L$cbc_dec_tail_collected
+
+ALIGN	16
+$L$cbc_dec_clear_tail_collected:
+	pxor	xmm3,xmm3
+	pxor	xmm4,xmm4
+	pxor	xmm5,xmm5
+$L$cbc_dec_tail_collected:
+	movups	XMMWORD[r8],xmm10
+	and	rdx,15
+	jnz	NEAR $L$cbc_dec_tail_partial
+	movups	XMMWORD[rsi],xmm2
+	pxor	xmm2,xmm2
+	jmp	NEAR $L$cbc_dec_ret
+ALIGN	16
+$L$cbc_dec_tail_partial:
+	movaps	XMMWORD[rsp],xmm2
+	pxor	xmm2,xmm2
+	mov	rcx,16
+	mov	rdi,rsi
+	sub	rcx,rdx
+	lea	rsi,[rsp]
+	DD	0x9066A4F3
+	movdqa	XMMWORD[rsp],xmm2
+
+$L$cbc_dec_ret:
+	xorps	xmm0,xmm0
+	pxor	xmm1,xmm1
+	movaps	xmm6,XMMWORD[16+rsp]
+	movaps	XMMWORD[16+rsp],xmm0
+	movaps	xmm7,XMMWORD[32+rsp]
+	movaps	XMMWORD[32+rsp],xmm0
+	movaps	xmm8,XMMWORD[48+rsp]
+	movaps	XMMWORD[48+rsp],xmm0
+	movaps	xmm9,XMMWORD[64+rsp]
+	movaps	XMMWORD[64+rsp],xmm0
+	movaps	xmm10,XMMWORD[80+rsp]
+	movaps	XMMWORD[80+rsp],xmm0
+	movaps	xmm11,XMMWORD[96+rsp]
+	movaps	XMMWORD[96+rsp],xmm0
+	movaps	xmm12,XMMWORD[112+rsp]
+	movaps	XMMWORD[112+rsp],xmm0
+	movaps	xmm13,XMMWORD[128+rsp]
+	movaps	XMMWORD[128+rsp],xmm0
+	movaps	xmm14,XMMWORD[144+rsp]
+	movaps	XMMWORD[144+rsp],xmm0
+	movaps	xmm15,XMMWORD[160+rsp]
+	movaps	XMMWORD[160+rsp],xmm0
+	mov	rbp,QWORD[((-8))+r11]
+
+	lea	rsp,[r11]
+
+$L$cbc_ret:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_aes_hw_cbc_encrypt:
+global	aes_hw_set_decrypt_key
+
+ALIGN	16
+aes_hw_set_decrypt_key:
+
+DB	0x48,0x83,0xEC,0x08
+
+	call	__aesni_set_encrypt_key
+	shl	edx,4
+	test	eax,eax
+	jnz	NEAR $L$dec_key_ret
+	lea	rcx,[16+rdx*1+r8]
+
+	movups	xmm0,XMMWORD[r8]
+	movups	xmm1,XMMWORD[rcx]
+	movups	XMMWORD[rcx],xmm0
+	movups	XMMWORD[r8],xmm1
+	lea	r8,[16+r8]
+	lea	rcx,[((-16))+rcx]
+
+$L$dec_key_inverse:
+	movups	xmm0,XMMWORD[r8]
+	movups	xmm1,XMMWORD[rcx]
+DB	102,15,56,219,192
+DB	102,15,56,219,201
+	lea	r8,[16+r8]
+	lea	rcx,[((-16))+rcx]
+	movups	XMMWORD[16+rcx],xmm0
+	movups	XMMWORD[(-16)+r8],xmm1
+	cmp	rcx,r8
+	ja	NEAR $L$dec_key_inverse
+
+	movups	xmm0,XMMWORD[r8]
+DB	102,15,56,219,192
+	pxor	xmm1,xmm1
+	movups	XMMWORD[rcx],xmm0
+	pxor	xmm0,xmm0
+$L$dec_key_ret:
+	add	rsp,8
+
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_set_decrypt_key:
+
+global	aes_hw_set_encrypt_key
+
+ALIGN	16
+aes_hw_set_encrypt_key:
+__aesni_set_encrypt_key:
+
+%ifdef BORINGSSL_DISPATCH_TEST
+	mov	BYTE[((BORINGSSL_function_hit+3))],1
+%endif
+DB	0x48,0x83,0xEC,0x08
+
+	mov	rax,-1
+	test	rcx,rcx
+	jz	NEAR $L$enc_key_ret
+	test	r8,r8
+	jz	NEAR $L$enc_key_ret
+
+	movups	xmm0,XMMWORD[rcx]
+	xorps	xmm4,xmm4
+	lea	r10,[OPENSSL_ia32cap_P]
+	mov	r10d,DWORD[4+r10]
+	and	r10d,268437504
+	lea	rax,[16+r8]
+	cmp	edx,256
+	je	NEAR $L$14rounds
+	cmp	edx,192
+	je	NEAR $L$12rounds
+	cmp	edx,128
+	jne	NEAR $L$bad_keybits
+
+$L$10rounds:
+	mov	edx,9
+	cmp	r10d,268435456
+	je	NEAR $L$10rounds_alt
+
+	movups	XMMWORD[r8],xmm0
+DB	102,15,58,223,200,1
+	call	$L$key_expansion_128_cold
+DB	102,15,58,223,200,2
+	call	$L$key_expansion_128
+DB	102,15,58,223,200,4
+	call	$L$key_expansion_128
+DB	102,15,58,223,200,8
+	call	$L$key_expansion_128
+DB	102,15,58,223,200,16
+	call	$L$key_expansion_128
+DB	102,15,58,223,200,32
+	call	$L$key_expansion_128
+DB	102,15,58,223,200,64
+	call	$L$key_expansion_128
+DB	102,15,58,223,200,128
+	call	$L$key_expansion_128
+DB	102,15,58,223,200,27
+	call	$L$key_expansion_128
+DB	102,15,58,223,200,54
+	call	$L$key_expansion_128
+	movups	XMMWORD[rax],xmm0
+	mov	DWORD[80+rax],edx
+	xor	eax,eax
+	jmp	NEAR $L$enc_key_ret
+
+ALIGN	16
+$L$10rounds_alt:
+	movdqa	xmm5,XMMWORD[$L$key_rotate]
+	mov	r10d,8
+	movdqa	xmm4,XMMWORD[$L$key_rcon1]
+	movdqa	xmm2,xmm0
+	movdqu	XMMWORD[r8],xmm0
+	jmp	NEAR $L$oop_key128
+
+ALIGN	16
+$L$oop_key128:
+DB	102,15,56,0,197
+DB	102,15,56,221,196
+	pslld	xmm4,1
+	lea	rax,[16+rax]
+
+	movdqa	xmm3,xmm2
+	pslldq	xmm2,4
+	pxor	xmm3,xmm2
+	pslldq	xmm2,4
+	pxor	xmm3,xmm2
+	pslldq	xmm2,4
+	pxor	xmm2,xmm3
+
+	pxor	xmm0,xmm2
+	movdqu	XMMWORD[(-16)+rax],xmm0
+	movdqa	xmm2,xmm0
+
+	dec	r10d
+	jnz	NEAR $L$oop_key128
+
+	movdqa	xmm4,XMMWORD[$L$key_rcon1b]
+
+DB	102,15,56,0,197
+DB	102,15,56,221,196
+	pslld	xmm4,1
+
+	movdqa	xmm3,xmm2
+	pslldq	xmm2,4
+	pxor	xmm3,xmm2
+	pslldq	xmm2,4
+	pxor	xmm3,xmm2
+	pslldq	xmm2,4
+	pxor	xmm2,xmm3
+
+	pxor	xmm0,xmm2
+	movdqu	XMMWORD[rax],xmm0
+
+	movdqa	xmm2,xmm0
+DB	102,15,56,0,197
+DB	102,15,56,221,196
+
+	movdqa	xmm3,xmm2
+	pslldq	xmm2,4
+	pxor	xmm3,xmm2
+	pslldq	xmm2,4
+	pxor	xmm3,xmm2
+	pslldq	xmm2,4
+	pxor	xmm2,xmm3
+
+	pxor	xmm0,xmm2
+	movdqu	XMMWORD[16+rax],xmm0
+
+	mov	DWORD[96+rax],edx
+	xor	eax,eax
+	jmp	NEAR $L$enc_key_ret
+
+ALIGN	16
+$L$12rounds:
+	movq	xmm2,QWORD[16+rcx]
+	mov	edx,11
+	cmp	r10d,268435456
+	je	NEAR $L$12rounds_alt
+
+	movups	XMMWORD[r8],xmm0
+DB	102,15,58,223,202,1
+	call	$L$key_expansion_192a_cold
+DB	102,15,58,223,202,2
+	call	$L$key_expansion_192b
+DB	102,15,58,223,202,4
+	call	$L$key_expansion_192a
+DB	102,15,58,223,202,8
+	call	$L$key_expansion_192b
+DB	102,15,58,223,202,16
+	call	$L$key_expansion_192a
+DB	102,15,58,223,202,32
+	call	$L$key_expansion_192b
+DB	102,15,58,223,202,64
+	call	$L$key_expansion_192a
+DB	102,15,58,223,202,128
+	call	$L$key_expansion_192b
+	movups	XMMWORD[rax],xmm0
+	mov	DWORD[48+rax],edx
+	xor	rax,rax
+	jmp	NEAR $L$enc_key_ret
+
+ALIGN	16
+$L$12rounds_alt:
+	movdqa	xmm5,XMMWORD[$L$key_rotate192]
+	movdqa	xmm4,XMMWORD[$L$key_rcon1]
+	mov	r10d,8
+	movdqu	XMMWORD[r8],xmm0
+	jmp	NEAR $L$oop_key192
+
+ALIGN	16
+$L$oop_key192:
+	movq	QWORD[rax],xmm2
+	movdqa	xmm1,xmm2
+DB	102,15,56,0,213
+DB	102,15,56,221,212
+	pslld	xmm4,1
+	lea	rax,[24+rax]
+
+	movdqa	xmm3,xmm0
+	pslldq	xmm0,4
+	pxor	xmm3,xmm0
+	pslldq	xmm0,4
+	pxor	xmm3,xmm0
+	pslldq	xmm0,4
+	pxor	xmm0,xmm3
+
+	pshufd	xmm3,xmm0,0xff
+	pxor	xmm3,xmm1
+	pslldq	xmm1,4
+	pxor	xmm3,xmm1
+
+	pxor	xmm0,xmm2
+	pxor	xmm2,xmm3
+	movdqu	XMMWORD[(-16)+rax],xmm0
+
+	dec	r10d
+	jnz	NEAR $L$oop_key192
+
+	mov	DWORD[32+rax],edx
+	xor	eax,eax
+	jmp	NEAR $L$enc_key_ret
+
+ALIGN	16
+$L$14rounds:
+	movups	xmm2,XMMWORD[16+rcx]
+	mov	edx,13
+	lea	rax,[16+rax]
+	cmp	r10d,268435456
+	je	NEAR $L$14rounds_alt
+
+	movups	XMMWORD[r8],xmm0
+	movups	XMMWORD[16+r8],xmm2
+DB	102,15,58,223,202,1
+	call	$L$key_expansion_256a_cold
+DB	102,15,58,223,200,1
+	call	$L$key_expansion_256b
+DB	102,15,58,223,202,2
+	call	$L$key_expansion_256a
+DB	102,15,58,223,200,2
+	call	$L$key_expansion_256b
+DB	102,15,58,223,202,4
+	call	$L$key_expansion_256a
+DB	102,15,58,223,200,4
+	call	$L$key_expansion_256b
+DB	102,15,58,223,202,8
+	call	$L$key_expansion_256a
+DB	102,15,58,223,200,8
+	call	$L$key_expansion_256b
+DB	102,15,58,223,202,16
+	call	$L$key_expansion_256a
+DB	102,15,58,223,200,16
+	call	$L$key_expansion_256b
+DB	102,15,58,223,202,32
+	call	$L$key_expansion_256a
+DB	102,15,58,223,200,32
+	call	$L$key_expansion_256b
+DB	102,15,58,223,202,64
+	call	$L$key_expansion_256a
+	movups	XMMWORD[rax],xmm0
+	mov	DWORD[16+rax],edx
+	xor	rax,rax
+	jmp	NEAR $L$enc_key_ret
+
+ALIGN	16
+$L$14rounds_alt:
+	movdqa	xmm5,XMMWORD[$L$key_rotate]
+	movdqa	xmm4,XMMWORD[$L$key_rcon1]
+	mov	r10d,7
+	movdqu	XMMWORD[r8],xmm0
+	movdqa	xmm1,xmm2
+	movdqu	XMMWORD[16+r8],xmm2
+	jmp	NEAR $L$oop_key256
+
+ALIGN	16
+$L$oop_key256:
+DB	102,15,56,0,213
+DB	102,15,56,221,212
+
+	movdqa	xmm3,xmm0
+	pslldq	xmm0,4
+	pxor	xmm3,xmm0
+	pslldq	xmm0,4
+	pxor	xmm3,xmm0
+	pslldq	xmm0,4
+	pxor	xmm0,xmm3
+	pslld	xmm4,1
+
+	pxor	xmm0,xmm2
+	movdqu	XMMWORD[rax],xmm0
+
+	dec	r10d
+	jz	NEAR $L$done_key256
+
+	pshufd	xmm2,xmm0,0xff
+	pxor	xmm3,xmm3
+DB	102,15,56,221,211
+
+	movdqa	xmm3,xmm1
+	pslldq	xmm1,4
+	pxor	xmm3,xmm1
+	pslldq	xmm1,4
+	pxor	xmm3,xmm1
+	pslldq	xmm1,4
+	pxor	xmm1,xmm3
+
+	pxor	xmm2,xmm1
+	movdqu	XMMWORD[16+rax],xmm2
+	lea	rax,[32+rax]
+	movdqa	xmm1,xmm2
+
+	jmp	NEAR $L$oop_key256
+
+$L$done_key256:
+	mov	DWORD[16+rax],edx
+	xor	eax,eax
+	jmp	NEAR $L$enc_key_ret
+
+ALIGN	16
+$L$bad_keybits:
+	mov	rax,-2
+$L$enc_key_ret:
+	pxor	xmm0,xmm0
+	pxor	xmm1,xmm1
+	pxor	xmm2,xmm2
+	pxor	xmm3,xmm3
+	pxor	xmm4,xmm4
+	pxor	xmm5,xmm5
+	add	rsp,8
+
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_set_encrypt_key:
+
+ALIGN	16
+$L$key_expansion_128:
+	movups	XMMWORD[rax],xmm0
+	lea	rax,[16+rax]
+$L$key_expansion_128_cold:
+	shufps	xmm4,xmm0,16
+	xorps	xmm0,xmm4
+	shufps	xmm4,xmm0,140
+	xorps	xmm0,xmm4
+	shufps	xmm1,xmm1,255
+	xorps	xmm0,xmm1
+	DB	0F3h,0C3h		;repret
+
+ALIGN	16
+$L$key_expansion_192a:
+	movups	XMMWORD[rax],xmm0
+	lea	rax,[16+rax]
+$L$key_expansion_192a_cold:
+	movaps	xmm5,xmm2
+$L$key_expansion_192b_warm:
+	shufps	xmm4,xmm0,16
+	movdqa	xmm3,xmm2
+	xorps	xmm0,xmm4
+	shufps	xmm4,xmm0,140
+	pslldq	xmm3,4
+	xorps	xmm0,xmm4
+	pshufd	xmm1,xmm1,85
+	pxor	xmm2,xmm3
+	pxor	xmm0,xmm1
+	pshufd	xmm3,xmm0,255
+	pxor	xmm2,xmm3
+	DB	0F3h,0C3h		;repret
+
+ALIGN	16
+$L$key_expansion_192b:
+	movaps	xmm3,xmm0
+	shufps	xmm5,xmm0,68
+	movups	XMMWORD[rax],xmm5
+	shufps	xmm3,xmm2,78
+	movups	XMMWORD[16+rax],xmm3
+	lea	rax,[32+rax]
+	jmp	NEAR $L$key_expansion_192b_warm
+
+ALIGN	16
+$L$key_expansion_256a:
+	movups	XMMWORD[rax],xmm2
+	lea	rax,[16+rax]
+$L$key_expansion_256a_cold:
+	shufps	xmm4,xmm0,16
+	xorps	xmm0,xmm4
+	shufps	xmm4,xmm0,140
+	xorps	xmm0,xmm4
+	shufps	xmm1,xmm1,255
+	xorps	xmm0,xmm1
+	DB	0F3h,0C3h		;repret
+
+ALIGN	16
+$L$key_expansion_256b:
+	movups	XMMWORD[rax],xmm0
+	lea	rax,[16+rax]
+
+	shufps	xmm4,xmm2,16
+	xorps	xmm2,xmm4
+	shufps	xmm4,xmm2,140
+	xorps	xmm2,xmm4
+	shufps	xmm1,xmm1,170
+	xorps	xmm2,xmm1
+	DB	0F3h,0C3h		;repret
+
+
+ALIGN	64
+$L$bswap_mask:
+DB	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+$L$increment32:
+	DD	6,6,6,0
+$L$increment64:
+	DD	1,0,0,0
+$L$xts_magic:
+	DD	0x87,0,1,0
+$L$increment1:
+DB	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
+$L$key_rotate:
+	DD	0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
+$L$key_rotate192:
+	DD	0x04070605,0x04070605,0x04070605,0x04070605
+$L$key_rcon1:
+	DD	1,1,1,1
+$L$key_rcon1b:
+	DD	0x1b,0x1b,0x1b,0x1b
+
+DB	65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
+DB	83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
+DB	32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
+DB	115,108,46,111,114,103,62,0
+ALIGN	64
+EXTERN	__imp_RtlVirtualUnwind
+
+ALIGN	16
+ecb_ccm64_se_handler:
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD[120+r8]
+	mov	rbx,QWORD[248+r8]
+
+	mov	rsi,QWORD[8+r9]
+	mov	r11,QWORD[56+r9]
+
+	mov	r10d,DWORD[r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jb	NEAR $L$common_seh_tail
+
+	mov	rax,QWORD[152+r8]
+
+	mov	r10d,DWORD[4+r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jae	NEAR $L$common_seh_tail
+
+	lea	rsi,[rax]
+	lea	rdi,[512+r8]
+	mov	ecx,8
+	DD	0xa548f3fc
+	lea	rax,[88+rax]
+
+	jmp	NEAR $L$common_seh_tail
+
+
+
+ALIGN	16
+ctr_xts_se_handler:
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD[120+r8]
+	mov	rbx,QWORD[248+r8]
+
+	mov	rsi,QWORD[8+r9]
+	mov	r11,QWORD[56+r9]
+
+	mov	r10d,DWORD[r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jb	NEAR $L$common_seh_tail
+
+	mov	rax,QWORD[152+r8]
+
+	mov	r10d,DWORD[4+r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jae	NEAR $L$common_seh_tail
+
+	mov	rax,QWORD[208+r8]
+
+	lea	rsi,[((-168))+rax]
+	lea	rdi,[512+r8]
+	mov	ecx,20
+	DD	0xa548f3fc
+
+	mov	rbp,QWORD[((-8))+rax]
+	mov	QWORD[160+r8],rbp
+	jmp	NEAR $L$common_seh_tail
+
+
+
+ALIGN	16
+cbc_se_handler:
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD[152+r8]
+	mov	rbx,QWORD[248+r8]
+
+	lea	r10,[$L$cbc_decrypt_bulk]
+	cmp	rbx,r10
+	jb	NEAR $L$common_seh_tail
+
+	mov	rax,QWORD[120+r8]
+
+	lea	r10,[$L$cbc_decrypt_body]
+	cmp	rbx,r10
+	jb	NEAR $L$common_seh_tail
+
+	mov	rax,QWORD[152+r8]
+
+	lea	r10,[$L$cbc_ret]
+	cmp	rbx,r10
+	jae	NEAR $L$common_seh_tail
+
+	lea	rsi,[16+rax]
+	lea	rdi,[512+r8]
+	mov	ecx,20
+	DD	0xa548f3fc
+
+	mov	rax,QWORD[208+r8]
+
+	mov	rbp,QWORD[((-8))+rax]
+	mov	QWORD[160+r8],rbp
+
+$L$common_seh_tail:
+	mov	rdi,QWORD[8+rax]
+	mov	rsi,QWORD[16+rax]
+	mov	QWORD[152+r8],rax
+	mov	QWORD[168+r8],rsi
+	mov	QWORD[176+r8],rdi
+
+	mov	rdi,QWORD[40+r9]
+	mov	rsi,r8
+	mov	ecx,154
+	DD	0xa548f3fc
+
+	mov	rsi,r9
+	xor	rcx,rcx
+	mov	rdx,QWORD[8+rsi]
+	mov	r8,QWORD[rsi]
+	mov	r9,QWORD[16+rsi]
+	mov	r10,QWORD[40+rsi]
+	lea	r11,[56+rsi]
+	lea	r12,[24+rsi]
+	mov	QWORD[32+rsp],r10
+	mov	QWORD[40+rsp],r11
+	mov	QWORD[48+rsp],r12
+	mov	QWORD[56+rsp],rcx
+	call	QWORD[__imp_RtlVirtualUnwind]
+
+	mov	eax,1
+	add	rsp,64
+	popfq
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rbp
+	pop	rbx
+	pop	rdi
+	pop	rsi
+	DB	0F3h,0C3h		;repret
+
+
+section	.pdata rdata align=4
+ALIGN	4
+	DD	$L$SEH_begin_aes_hw_ecb_encrypt wrt ..imagebase
+	DD	$L$SEH_end_aes_hw_ecb_encrypt wrt ..imagebase
+	DD	$L$SEH_info_ecb wrt ..imagebase
+
+	DD	$L$SEH_begin_aes_hw_ctr32_encrypt_blocks wrt ..imagebase
+	DD	$L$SEH_end_aes_hw_ctr32_encrypt_blocks wrt ..imagebase
+	DD	$L$SEH_info_ctr32 wrt ..imagebase
+	DD	$L$SEH_begin_aes_hw_cbc_encrypt wrt ..imagebase
+	DD	$L$SEH_end_aes_hw_cbc_encrypt wrt ..imagebase
+	DD	$L$SEH_info_cbc wrt ..imagebase
+
+	DD	aes_hw_set_decrypt_key wrt ..imagebase
+	DD	$L$SEH_end_set_decrypt_key wrt ..imagebase
+	DD	$L$SEH_info_key wrt ..imagebase
+
+	DD	aes_hw_set_encrypt_key wrt ..imagebase
+	DD	$L$SEH_end_set_encrypt_key wrt ..imagebase
+	DD	$L$SEH_info_key wrt ..imagebase
+section	.xdata rdata align=8
+ALIGN	8
+$L$SEH_info_ecb:
+DB	9,0,0,0
+	DD	ecb_ccm64_se_handler wrt ..imagebase
+	DD	$L$ecb_enc_body wrt ..imagebase,$L$ecb_enc_ret wrt ..imagebase
+$L$SEH_info_ctr32:
+DB	9,0,0,0
+	DD	ctr_xts_se_handler wrt ..imagebase
+	DD	$L$ctr32_body wrt ..imagebase,$L$ctr32_epilogue wrt ..imagebase
+$L$SEH_info_cbc:
+DB	9,0,0,0
+	DD	cbc_se_handler wrt ..imagebase
+$L$SEH_info_key:
+DB	0x01,0x04,0x01,0x00
+DB	0x04,0x02,0x00,0x00
diff --git a/deps/boringssl/win-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.asm b/deps/boringssl/win-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.asm
new file mode 100644
index 0000000..434ba10
--- /dev/null
+++ b/deps/boringssl/win-x86_64/crypto/fipsmodule/ghash-ssse3-x86_64.asm
@@ -0,0 +1,495 @@
+; This file is generated from a similarly-named Perl script in the BoringSSL
+; source tree. Do not edit by hand.
+
+default	rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+
+%ifdef BORINGSSL_PREFIX
+%include "boringssl_prefix_symbols_nasm.inc"
+%endif
+section	.text code align=64
+
+
+
+
+
+
+
+global	gcm_gmult_ssse3
+ALIGN	16
+gcm_gmult_ssse3:
+
+$L$gmult_seh_begin:
+	sub	rsp,40
+$L$gmult_seh_allocstack:
+	movdqa	XMMWORD[rsp],xmm6
+$L$gmult_seh_save_xmm6:
+	movdqa	XMMWORD[16+rsp],xmm10
+$L$gmult_seh_save_xmm10:
+$L$gmult_seh_prolog_end:
+	movdqu	xmm0,XMMWORD[rcx]
+	movdqa	xmm10,XMMWORD[$L$reverse_bytes]
+	movdqa	xmm2,XMMWORD[$L$low4_mask]
+
+
+DB	102,65,15,56,0,194
+
+
+	movdqa	xmm1,xmm2
+	pandn	xmm1,xmm0
+	psrld	xmm1,4
+	pand	xmm0,xmm2
+
+
+
+
+	pxor	xmm2,xmm2
+	pxor	xmm3,xmm3
+	mov	rax,5
+$L$oop_row_1:
+	movdqa	xmm4,XMMWORD[rdx]
+	lea	rdx,[16+rdx]
+
+
+	movdqa	xmm6,xmm2
+DB	102,15,58,15,243,1
+	movdqa	xmm3,xmm6
+	psrldq	xmm2,1
+
+
+
+
+	movdqa	xmm5,xmm4
+DB	102,15,56,0,224
+DB	102,15,56,0,233
+
+
+	pxor	xmm2,xmm5
+
+
+
+	movdqa	xmm5,xmm4
+	psllq	xmm5,60
+	movdqa	xmm6,xmm5
+	pslldq	xmm6,8
+	pxor	xmm3,xmm6
+
+
+	psrldq	xmm5,8
+	pxor	xmm2,xmm5
+	psrlq	xmm4,4
+	pxor	xmm2,xmm4
+
+	sub	rax,1
+	jnz	NEAR $L$oop_row_1
+
+
+
+	pxor	xmm2,xmm3
+	psrlq	xmm3,1
+	pxor	xmm2,xmm3
+	psrlq	xmm3,1
+	pxor	xmm2,xmm3
+	psrlq	xmm3,5
+	pxor	xmm2,xmm3
+	pxor	xmm3,xmm3
+	mov	rax,5
+$L$oop_row_2:
+	movdqa	xmm4,XMMWORD[rdx]
+	lea	rdx,[16+rdx]
+
+
+	movdqa	xmm6,xmm2
+DB	102,15,58,15,243,1
+	movdqa	xmm3,xmm6
+	psrldq	xmm2,1
+
+
+
+
+	movdqa	xmm5,xmm4
+DB	102,15,56,0,224
+DB	102,15,56,0,233
+
+
+	pxor	xmm2,xmm5
+
+
+
+	movdqa	xmm5,xmm4
+	psllq	xmm5,60
+	movdqa	xmm6,xmm5
+	pslldq	xmm6,8
+	pxor	xmm3,xmm6
+
+
+	psrldq	xmm5,8
+	pxor	xmm2,xmm5
+	psrlq	xmm4,4
+	pxor	xmm2,xmm4
+
+	sub	rax,1
+	jnz	NEAR $L$oop_row_2
+
+
+
+	pxor	xmm2,xmm3
+	psrlq	xmm3,1
+	pxor	xmm2,xmm3
+	psrlq	xmm3,1
+	pxor	xmm2,xmm3
+	psrlq	xmm3,5
+	pxor	xmm2,xmm3
+	pxor	xmm3,xmm3
+	mov	rax,6
+$L$oop_row_3:
+	movdqa	xmm4,XMMWORD[rdx]
+	lea	rdx,[16+rdx]
+
+
+	movdqa	xmm6,xmm2
+DB	102,15,58,15,243,1
+	movdqa	xmm3,xmm6
+	psrldq	xmm2,1
+
+
+
+
+	movdqa	xmm5,xmm4
+DB	102,15,56,0,224
+DB	102,15,56,0,233
+
+
+	pxor	xmm2,xmm5
+
+
+
+	movdqa	xmm5,xmm4
+	psllq	xmm5,60
+	movdqa	xmm6,xmm5
+	pslldq	xmm6,8
+	pxor	xmm3,xmm6
+
+
+	psrldq	xmm5,8
+	pxor	xmm2,xmm5
+	psrlq	xmm4,4
+	pxor	xmm2,xmm4
+
+	sub	rax,1
+	jnz	NEAR $L$oop_row_3
+
+
+
+	pxor	xmm2,xmm3
+	psrlq	xmm3,1
+	pxor	xmm2,xmm3
+	psrlq	xmm3,1
+	pxor	xmm2,xmm3
+	psrlq	xmm3,5
+	pxor	xmm2,xmm3
+	pxor	xmm3,xmm3
+
+DB	102,65,15,56,0,210
+	movdqu	XMMWORD[rcx],xmm2
+
+
+	pxor	xmm0,xmm0
+	pxor	xmm1,xmm1
+	pxor	xmm2,xmm2
+	pxor	xmm3,xmm3
+	pxor	xmm4,xmm4
+	pxor	xmm5,xmm5
+	pxor	xmm6,xmm6
+	movdqa	xmm6,XMMWORD[rsp]
+	movdqa	xmm10,XMMWORD[16+rsp]
+	add	rsp,40
+	DB	0F3h,0C3h		;repret
+$L$gmult_seh_end:
+
+
+
+
+
+
+
+
+global	gcm_ghash_ssse3
+ALIGN	16
+gcm_ghash_ssse3:
+$L$ghash_seh_begin:
+
+	sub	rsp,56
+$L$ghash_seh_allocstack:
+	movdqa	XMMWORD[rsp],xmm6
+$L$ghash_seh_save_xmm6:
+	movdqa	XMMWORD[16+rsp],xmm10
+$L$ghash_seh_save_xmm10:
+	movdqa	XMMWORD[32+rsp],xmm11
+$L$ghash_seh_save_xmm11:
+$L$ghash_seh_prolog_end:
+	movdqu	xmm0,XMMWORD[rcx]
+	movdqa	xmm10,XMMWORD[$L$reverse_bytes]
+	movdqa	xmm11,XMMWORD[$L$low4_mask]
+
+
+	and	r9,-16
+
+
+
+DB	102,65,15,56,0,194
+
+
+	pxor	xmm3,xmm3
+$L$oop_ghash:
+
+	movdqu	xmm1,XMMWORD[r8]
+DB	102,65,15,56,0,202
+	pxor	xmm0,xmm1
+
+
+	movdqa	xmm1,xmm11
+	pandn	xmm1,xmm0
+	psrld	xmm1,4
+	pand	xmm0,xmm11
+
+
+
+
+	pxor	xmm2,xmm2
+
+	mov	rax,5
+$L$oop_row_4:
+	movdqa	xmm4,XMMWORD[rdx]
+	lea	rdx,[16+rdx]
+
+
+	movdqa	xmm6,xmm2
+DB	102,15,58,15,243,1
+	movdqa	xmm3,xmm6
+	psrldq	xmm2,1
+
+
+
+
+	movdqa	xmm5,xmm4
+DB	102,15,56,0,224
+DB	102,15,56,0,233
+
+
+	pxor	xmm2,xmm5
+
+
+
+	movdqa	xmm5,xmm4
+	psllq	xmm5,60
+	movdqa	xmm6,xmm5
+	pslldq	xmm6,8
+	pxor	xmm3,xmm6
+
+
+	psrldq	xmm5,8
+	pxor	xmm2,xmm5
+	psrlq	xmm4,4
+	pxor	xmm2,xmm4
+
+	sub	rax,1
+	jnz	NEAR $L$oop_row_4
+
+
+
+	pxor	xmm2,xmm3
+	psrlq	xmm3,1
+	pxor	xmm2,xmm3
+	psrlq	xmm3,1
+	pxor	xmm2,xmm3
+	psrlq	xmm3,5
+	pxor	xmm2,xmm3
+	pxor	xmm3,xmm3
+	mov	rax,5
+$L$oop_row_5:
+	movdqa	xmm4,XMMWORD[rdx]
+	lea	rdx,[16+rdx]
+
+
+	movdqa	xmm6,xmm2
+DB	102,15,58,15,243,1
+	movdqa	xmm3,xmm6
+	psrldq	xmm2,1
+
+
+
+
+	movdqa	xmm5,xmm4
+DB	102,15,56,0,224
+DB	102,15,56,0,233
+
+
+	pxor	xmm2,xmm5
+
+
+
+	movdqa	xmm5,xmm4
+	psllq	xmm5,60
+	movdqa	xmm6,xmm5
+	pslldq	xmm6,8
+	pxor	xmm3,xmm6
+
+
+	psrldq	xmm5,8
+	pxor	xmm2,xmm5
+	psrlq	xmm4,4
+	pxor	xmm2,xmm4
+
+	sub	rax,1
+	jnz	NEAR $L$oop_row_5
+
+
+
+	pxor	xmm2,xmm3
+	psrlq	xmm3,1
+	pxor	xmm2,xmm3
+	psrlq	xmm3,1
+	pxor	xmm2,xmm3
+	psrlq	xmm3,5
+	pxor	xmm2,xmm3
+	pxor	xmm3,xmm3
+	mov	rax,6
+$L$oop_row_6:
+	movdqa	xmm4,XMMWORD[rdx]
+	lea	rdx,[16+rdx]
+
+
+	movdqa	xmm6,xmm2
+DB	102,15,58,15,243,1
+	movdqa	xmm3,xmm6
+	psrldq	xmm2,1
+
+
+
+
+	movdqa	xmm5,xmm4
+DB	102,15,56,0,224
+DB	102,15,56,0,233
+
+
+	pxor	xmm2,xmm5
+
+
+
+	movdqa	xmm5,xmm4
+	psllq	xmm5,60
+	movdqa	xmm6,xmm5
+	pslldq	xmm6,8
+	pxor	xmm3,xmm6
+
+
+	psrldq	xmm5,8
+	pxor	xmm2,xmm5
+	psrlq	xmm4,4
+	pxor	xmm2,xmm4
+
+	sub	rax,1
+	jnz	NEAR $L$oop_row_6
+
+
+
+	pxor	xmm2,xmm3
+	psrlq	xmm3,1
+	pxor	xmm2,xmm3
+	psrlq	xmm3,1
+	pxor	xmm2,xmm3
+	psrlq	xmm3,5
+	pxor	xmm2,xmm3
+	pxor	xmm3,xmm3
+	movdqa	xmm0,xmm2
+
+
+	lea	rdx,[((-256))+rdx]
+
+
+	lea	r8,[16+r8]
+	sub	r9,16
+	jnz	NEAR $L$oop_ghash
+
+
+DB	102,65,15,56,0,194
+	movdqu	XMMWORD[rcx],xmm0
+
+
+	pxor	xmm0,xmm0
+	pxor	xmm1,xmm1
+	pxor	xmm2,xmm2
+	pxor	xmm3,xmm3
+	pxor	xmm4,xmm4
+	pxor	xmm5,xmm5
+	pxor	xmm6,xmm6
+	movdqa	xmm6,XMMWORD[rsp]
+	movdqa	xmm10,XMMWORD[16+rsp]
+	movdqa	xmm11,XMMWORD[32+rsp]
+	add	rsp,56
+	DB	0F3h,0C3h		;repret
+$L$ghash_seh_end:
+
+
+
+ALIGN	16
+
+
+$L$reverse_bytes:
+DB	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+
+$L$low4_mask:
+	DQ	0x0f0f0f0f0f0f0f0f,0x0f0f0f0f0f0f0f0f
+section	.pdata rdata align=4
+ALIGN	4
+	DD	$L$gmult_seh_begin wrt ..imagebase
+	DD	$L$gmult_seh_end wrt ..imagebase
+	DD	$L$gmult_seh_info wrt ..imagebase
+
+	DD	$L$ghash_seh_begin wrt ..imagebase
+	DD	$L$ghash_seh_end wrt ..imagebase
+	DD	$L$ghash_seh_info wrt ..imagebase
+
+section	.xdata rdata align=8
+ALIGN	8
+$L$gmult_seh_info:
+DB	1
+DB	$L$gmult_seh_prolog_end-$L$gmult_seh_begin
+DB	5
+DB	0
+
+DB	$L$gmult_seh_save_xmm10-$L$gmult_seh_begin
+DB	168
+	DW	1
+
+DB	$L$gmult_seh_save_xmm6-$L$gmult_seh_begin
+DB	104
+	DW	0
+
+DB	$L$gmult_seh_allocstack-$L$gmult_seh_begin
+DB	66
+
+ALIGN	8
+$L$ghash_seh_info:
+DB	1
+DB	$L$ghash_seh_prolog_end-$L$ghash_seh_begin
+DB	7
+DB	0
+
+DB	$L$ghash_seh_save_xmm11-$L$ghash_seh_begin
+DB	184
+	DW	2
+
+DB	$L$ghash_seh_save_xmm10-$L$ghash_seh_begin
+DB	168
+	DW	1
+
+DB	$L$ghash_seh_save_xmm6-$L$ghash_seh_begin
+DB	104
+	DW	0
+
+DB	$L$ghash_seh_allocstack-$L$ghash_seh_begin
+DB	98
diff --git a/deps/boringssl/win-x86_64/crypto/fipsmodule/ghash-x86_64.asm b/deps/boringssl/win-x86_64/crypto/fipsmodule/ghash-x86_64.asm
new file mode 100644
index 0000000..194ea8d
--- /dev/null
+++ b/deps/boringssl/win-x86_64/crypto/fipsmodule/ghash-x86_64.asm
@@ -0,0 +1,1221 @@
+; This file is generated from a similarly-named Perl script in the BoringSSL
+; source tree. Do not edit by hand.
+
+default	rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+
+%ifdef BORINGSSL_PREFIX
+%include "boringssl_prefix_symbols_nasm.inc"
+%endif
+section	.text code align=64
+
+EXTERN	OPENSSL_ia32cap_P
+global	gcm_init_clmul
+
+ALIGN	16
+gcm_init_clmul:
+
+$L$_init_clmul:
+$L$SEH_begin_gcm_init_clmul:
+
+DB	0x48,0x83,0xec,0x18
+DB	0x0f,0x29,0x34,0x24
+	movdqu	xmm2,XMMWORD[rdx]
+	pshufd	xmm2,xmm2,78
+
+
+	pshufd	xmm4,xmm2,255
+	movdqa	xmm3,xmm2
+	psllq	xmm2,1
+	pxor	xmm5,xmm5
+	psrlq	xmm3,63
+	pcmpgtd	xmm5,xmm4
+	pslldq	xmm3,8
+	por	xmm2,xmm3
+
+
+	pand	xmm5,XMMWORD[$L$0x1c2_polynomial]
+	pxor	xmm2,xmm5
+
+
+	pshufd	xmm6,xmm2,78
+	movdqa	xmm0,xmm2
+	pxor	xmm6,xmm2
+	movdqa	xmm1,xmm0
+	pshufd	xmm3,xmm0,78
+	pxor	xmm3,xmm0
+DB	102,15,58,68,194,0
+DB	102,15,58,68,202,17
+DB	102,15,58,68,222,0
+	pxor	xmm3,xmm0
+	pxor	xmm3,xmm1
+
+	movdqa	xmm4,xmm3
+	psrldq	xmm3,8
+	pslldq	xmm4,8
+	pxor	xmm1,xmm3
+	pxor	xmm0,xmm4
+
+	movdqa	xmm4,xmm0
+	movdqa	xmm3,xmm0
+	psllq	xmm0,5
+	pxor	xmm3,xmm0
+	psllq	xmm0,1
+	pxor	xmm0,xmm3
+	psllq	xmm0,57
+	movdqa	xmm3,xmm0
+	pslldq	xmm0,8
+	psrldq	xmm3,8
+	pxor	xmm0,xmm4
+	pxor	xmm1,xmm3
+
+
+	movdqa	xmm4,xmm0
+	psrlq	xmm0,1
+	pxor	xmm1,xmm4
+	pxor	xmm4,xmm0
+	psrlq	xmm0,5
+	pxor	xmm0,xmm4
+	psrlq	xmm0,1
+	pxor	xmm0,xmm1
+	pshufd	xmm3,xmm2,78
+	pshufd	xmm4,xmm0,78
+	pxor	xmm3,xmm2
+	movdqu	XMMWORD[rcx],xmm2
+	pxor	xmm4,xmm0
+	movdqu	XMMWORD[16+rcx],xmm0
+DB	102,15,58,15,227,8
+	movdqu	XMMWORD[32+rcx],xmm4
+	movdqa	xmm1,xmm0
+	pshufd	xmm3,xmm0,78
+	pxor	xmm3,xmm0
+DB	102,15,58,68,194,0
+DB	102,15,58,68,202,17
+DB	102,15,58,68,222,0
+	pxor	xmm3,xmm0
+	pxor	xmm3,xmm1
+
+	movdqa	xmm4,xmm3
+	psrldq	xmm3,8
+	pslldq	xmm4,8
+	pxor	xmm1,xmm3
+	pxor	xmm0,xmm4
+
+	movdqa	xmm4,xmm0
+	movdqa	xmm3,xmm0
+	psllq	xmm0,5
+	pxor	xmm3,xmm0
+	psllq	xmm0,1
+	pxor	xmm0,xmm3
+	psllq	xmm0,57
+	movdqa	xmm3,xmm0
+	pslldq	xmm0,8
+	psrldq	xmm3,8
+	pxor	xmm0,xmm4
+	pxor	xmm1,xmm3
+
+
+	movdqa	xmm4,xmm0
+	psrlq	xmm0,1
+	pxor	xmm1,xmm4
+	pxor	xmm4,xmm0
+	psrlq	xmm0,5
+	pxor	xmm0,xmm4
+	psrlq	xmm0,1
+	pxor	xmm0,xmm1
+	movdqa	xmm5,xmm0
+	movdqa	xmm1,xmm0
+	pshufd	xmm3,xmm0,78
+	pxor	xmm3,xmm0
+DB	102,15,58,68,194,0
+DB	102,15,58,68,202,17
+DB	102,15,58,68,222,0
+	pxor	xmm3,xmm0
+	pxor	xmm3,xmm1
+
+	movdqa	xmm4,xmm3
+	psrldq	xmm3,8
+	pslldq	xmm4,8
+	pxor	xmm1,xmm3
+	pxor	xmm0,xmm4
+
+	movdqa	xmm4,xmm0
+	movdqa	xmm3,xmm0
+	psllq	xmm0,5
+	pxor	xmm3,xmm0
+	psllq	xmm0,1
+	pxor	xmm0,xmm3
+	psllq	xmm0,57
+	movdqa	xmm3,xmm0
+	pslldq	xmm0,8
+	psrldq	xmm3,8
+	pxor	xmm0,xmm4
+	pxor	xmm1,xmm3
+
+
+	movdqa	xmm4,xmm0
+	psrlq	xmm0,1
+	pxor	xmm1,xmm4
+	pxor	xmm4,xmm0
+	psrlq	xmm0,5
+	pxor	xmm0,xmm4
+	psrlq	xmm0,1
+	pxor	xmm0,xmm1
+	pshufd	xmm3,xmm5,78
+	pshufd	xmm4,xmm0,78
+	pxor	xmm3,xmm5
+	movdqu	XMMWORD[48+rcx],xmm5
+	pxor	xmm4,xmm0
+	movdqu	XMMWORD[64+rcx],xmm0
+DB	102,15,58,15,227,8
+	movdqu	XMMWORD[80+rcx],xmm4
+	movaps	xmm6,XMMWORD[rsp]
+	lea	rsp,[24+rsp]
+$L$SEH_end_gcm_init_clmul:
+	DB	0F3h,0C3h		;repret
+
+
+global	gcm_gmult_clmul
+
+ALIGN	16
+gcm_gmult_clmul:
+
+$L$_gmult_clmul:
+	movdqu	xmm0,XMMWORD[rcx]
+	movdqa	xmm5,XMMWORD[$L$bswap_mask]
+	movdqu	xmm2,XMMWORD[rdx]
+	movdqu	xmm4,XMMWORD[32+rdx]
+DB	102,15,56,0,197
+	movdqa	xmm1,xmm0
+	pshufd	xmm3,xmm0,78
+	pxor	xmm3,xmm0
+DB	102,15,58,68,194,0
+DB	102,15,58,68,202,17
+DB	102,15,58,68,220,0
+	pxor	xmm3,xmm0
+	pxor	xmm3,xmm1
+
+	movdqa	xmm4,xmm3
+	psrldq	xmm3,8
+	pslldq	xmm4,8
+	pxor	xmm1,xmm3
+	pxor	xmm0,xmm4
+
+	movdqa	xmm4,xmm0
+	movdqa	xmm3,xmm0
+	psllq	xmm0,5
+	pxor	xmm3,xmm0
+	psllq	xmm0,1
+	pxor	xmm0,xmm3
+	psllq	xmm0,57
+	movdqa	xmm3,xmm0
+	pslldq	xmm0,8
+	psrldq	xmm3,8
+	pxor	xmm0,xmm4
+	pxor	xmm1,xmm3
+
+
+	movdqa	xmm4,xmm0
+	psrlq	xmm0,1
+	pxor	xmm1,xmm4
+	pxor	xmm4,xmm0
+	psrlq	xmm0,5
+	pxor	xmm0,xmm4
+	psrlq	xmm0,1
+	pxor	xmm0,xmm1
+DB	102,15,56,0,197
+	movdqu	XMMWORD[rcx],xmm0
+	DB	0F3h,0C3h		;repret
+
+
+global	gcm_ghash_clmul
+
+ALIGN	32
+gcm_ghash_clmul:
+
+$L$_ghash_clmul:
+	lea	rax,[((-136))+rsp]
+$L$SEH_begin_gcm_ghash_clmul:
+
+DB	0x48,0x8d,0x60,0xe0
+DB	0x0f,0x29,0x70,0xe0
+DB	0x0f,0x29,0x78,0xf0
+DB	0x44,0x0f,0x29,0x00
+DB	0x44,0x0f,0x29,0x48,0x10
+DB	0x44,0x0f,0x29,0x50,0x20
+DB	0x44,0x0f,0x29,0x58,0x30
+DB	0x44,0x0f,0x29,0x60,0x40
+DB	0x44,0x0f,0x29,0x68,0x50
+DB	0x44,0x0f,0x29,0x70,0x60
+DB	0x44,0x0f,0x29,0x78,0x70
+	movdqa	xmm10,XMMWORD[$L$bswap_mask]
+
+	movdqu	xmm0,XMMWORD[rcx]
+	movdqu	xmm2,XMMWORD[rdx]
+	movdqu	xmm7,XMMWORD[32+rdx]
+DB	102,65,15,56,0,194
+
+	sub	r9,0x10
+	jz	NEAR $L$odd_tail
+
+	movdqu	xmm6,XMMWORD[16+rdx]
+	lea	rax,[OPENSSL_ia32cap_P]
+	mov	eax,DWORD[4+rax]
+	cmp	r9,0x30
+	jb	NEAR $L$skip4x
+
+	and	eax,71303168
+	cmp	eax,4194304
+	je	NEAR $L$skip4x
+
+	sub	r9,0x30
+	mov	rax,0xA040608020C0E000
+	movdqu	xmm14,XMMWORD[48+rdx]
+	movdqu	xmm15,XMMWORD[64+rdx]
+
+
+
+
+	movdqu	xmm3,XMMWORD[48+r8]
+	movdqu	xmm11,XMMWORD[32+r8]
+DB	102,65,15,56,0,218
+DB	102,69,15,56,0,218
+	movdqa	xmm5,xmm3
+	pshufd	xmm4,xmm3,78
+	pxor	xmm4,xmm3
+DB	102,15,58,68,218,0
+DB	102,15,58,68,234,17
+DB	102,15,58,68,231,0
+
+	movdqa	xmm13,xmm11
+	pshufd	xmm12,xmm11,78
+	pxor	xmm12,xmm11
+DB	102,68,15,58,68,222,0
+DB	102,68,15,58,68,238,17
+DB	102,68,15,58,68,231,16
+	xorps	xmm3,xmm11
+	xorps	xmm5,xmm13
+	movups	xmm7,XMMWORD[80+rdx]
+	xorps	xmm4,xmm12
+
+	movdqu	xmm11,XMMWORD[16+r8]
+	movdqu	xmm8,XMMWORD[r8]
+DB	102,69,15,56,0,218
+DB	102,69,15,56,0,194
+	movdqa	xmm13,xmm11
+	pshufd	xmm12,xmm11,78
+	pxor	xmm0,xmm8
+	pxor	xmm12,xmm11
+DB	102,69,15,58,68,222,0
+	movdqa	xmm1,xmm0
+	pshufd	xmm8,xmm0,78
+	pxor	xmm8,xmm0
+DB	102,69,15,58,68,238,17
+DB	102,68,15,58,68,231,0
+	xorps	xmm3,xmm11
+	xorps	xmm5,xmm13
+
+	lea	r8,[64+r8]
+	sub	r9,0x40
+	jc	NEAR $L$tail4x
+
+	jmp	NEAR $L$mod4_loop
+ALIGN	32
+$L$mod4_loop:
+DB	102,65,15,58,68,199,0
+	xorps	xmm4,xmm12
+	movdqu	xmm11,XMMWORD[48+r8]
+DB	102,69,15,56,0,218
+DB	102,65,15,58,68,207,17
+	xorps	xmm0,xmm3
+	movdqu	xmm3,XMMWORD[32+r8]
+	movdqa	xmm13,xmm11
+DB	102,68,15,58,68,199,16
+	pshufd	xmm12,xmm11,78
+	xorps	xmm1,xmm5
+	pxor	xmm12,xmm11
+DB	102,65,15,56,0,218
+	movups	xmm7,XMMWORD[32+rdx]
+	xorps	xmm8,xmm4
+DB	102,68,15,58,68,218,0
+	pshufd	xmm4,xmm3,78
+
+	pxor	xmm8,xmm0
+	movdqa	xmm5,xmm3
+	pxor	xmm8,xmm1
+	pxor	xmm4,xmm3
+	movdqa	xmm9,xmm8
+DB	102,68,15,58,68,234,17
+	pslldq	xmm8,8
+	psrldq	xmm9,8
+	pxor	xmm0,xmm8
+	movdqa	xmm8,XMMWORD[$L$7_mask]
+	pxor	xmm1,xmm9
+DB	102,76,15,110,200
+
+	pand	xmm8,xmm0
+DB	102,69,15,56,0,200
+	pxor	xmm9,xmm0
+DB	102,68,15,58,68,231,0
+	psllq	xmm9,57
+	movdqa	xmm8,xmm9
+	pslldq	xmm9,8
+DB	102,15,58,68,222,0
+	psrldq	xmm8,8
+	pxor	xmm0,xmm9
+	pxor	xmm1,xmm8
+	movdqu	xmm8,XMMWORD[r8]
+
+	movdqa	xmm9,xmm0
+	psrlq	xmm0,1
+DB	102,15,58,68,238,17
+	xorps	xmm3,xmm11
+	movdqu	xmm11,XMMWORD[16+r8]
+DB	102,69,15,56,0,218
+DB	102,15,58,68,231,16
+	xorps	xmm5,xmm13
+	movups	xmm7,XMMWORD[80+rdx]
+DB	102,69,15,56,0,194
+	pxor	xmm1,xmm9
+	pxor	xmm9,xmm0
+	psrlq	xmm0,5
+
+	movdqa	xmm13,xmm11
+	pxor	xmm4,xmm12
+	pshufd	xmm12,xmm11,78
+	pxor	xmm0,xmm9
+	pxor	xmm1,xmm8
+	pxor	xmm12,xmm11
+DB	102,69,15,58,68,222,0
+	psrlq	xmm0,1
+	pxor	xmm0,xmm1
+	movdqa	xmm1,xmm0
+DB	102,69,15,58,68,238,17
+	xorps	xmm3,xmm11
+	pshufd	xmm8,xmm0,78
+	pxor	xmm8,xmm0
+
+DB	102,68,15,58,68,231,0
+	xorps	xmm5,xmm13
+
+	lea	r8,[64+r8]
+	sub	r9,0x40
+	jnc	NEAR $L$mod4_loop
+
+$L$tail4x:
+DB	102,65,15,58,68,199,0
+DB	102,65,15,58,68,207,17
+DB	102,68,15,58,68,199,16
+	xorps	xmm4,xmm12
+	xorps	xmm0,xmm3
+	xorps	xmm1,xmm5
+	pxor	xmm1,xmm0
+	pxor	xmm8,xmm4
+
+	pxor	xmm8,xmm1
+	pxor	xmm1,xmm0
+
+	movdqa	xmm9,xmm8
+	psrldq	xmm8,8
+	pslldq	xmm9,8
+	pxor	xmm1,xmm8
+	pxor	xmm0,xmm9
+
+	movdqa	xmm4,xmm0
+	movdqa	xmm3,xmm0
+	psllq	xmm0,5
+	pxor	xmm3,xmm0
+	psllq	xmm0,1
+	pxor	xmm0,xmm3
+	psllq	xmm0,57
+	movdqa	xmm3,xmm0
+	pslldq	xmm0,8
+	psrldq	xmm3,8
+	pxor	xmm0,xmm4
+	pxor	xmm1,xmm3
+
+
+	movdqa	xmm4,xmm0
+	psrlq	xmm0,1
+	pxor	xmm1,xmm4
+	pxor	xmm4,xmm0
+	psrlq	xmm0,5
+	pxor	xmm0,xmm4
+	psrlq	xmm0,1
+	pxor	xmm0,xmm1
+	add	r9,0x40
+	jz	NEAR $L$done
+	movdqu	xmm7,XMMWORD[32+rdx]
+	sub	r9,0x10
+	jz	NEAR $L$odd_tail
+$L$skip4x:
+
+
+
+
+
+	movdqu	xmm8,XMMWORD[r8]
+	movdqu	xmm3,XMMWORD[16+r8]
+DB	102,69,15,56,0,194
+DB	102,65,15,56,0,218
+	pxor	xmm0,xmm8
+
+	movdqa	xmm5,xmm3
+	pshufd	xmm4,xmm3,78
+	pxor	xmm4,xmm3
+DB	102,15,58,68,218,0
+DB	102,15,58,68,234,17
+DB	102,15,58,68,231,0
+
+	lea	r8,[32+r8]
+	nop
+	sub	r9,0x20
+	jbe	NEAR $L$even_tail
+	nop
+	jmp	NEAR $L$mod_loop
+
+ALIGN	32
+$L$mod_loop:
+	movdqa	xmm1,xmm0
+	movdqa	xmm8,xmm4
+	pshufd	xmm4,xmm0,78
+	pxor	xmm4,xmm0
+
+DB	102,15,58,68,198,0
+DB	102,15,58,68,206,17
+DB	102,15,58,68,231,16
+
+	pxor	xmm0,xmm3
+	pxor	xmm1,xmm5
+	movdqu	xmm9,XMMWORD[r8]
+	pxor	xmm8,xmm0
+DB	102,69,15,56,0,202
+	movdqu	xmm3,XMMWORD[16+r8]
+
+	pxor	xmm8,xmm1
+	pxor	xmm1,xmm9
+	pxor	xmm4,xmm8
+DB	102,65,15,56,0,218
+	movdqa	xmm8,xmm4
+	psrldq	xmm8,8
+	pslldq	xmm4,8
+	pxor	xmm1,xmm8
+	pxor	xmm0,xmm4
+
+	movdqa	xmm5,xmm3
+
+	movdqa	xmm9,xmm0
+	movdqa	xmm8,xmm0
+	psllq	xmm0,5
+	pxor	xmm8,xmm0
+DB	102,15,58,68,218,0
+	psllq	xmm0,1
+	pxor	xmm0,xmm8
+	psllq	xmm0,57
+	movdqa	xmm8,xmm0
+	pslldq	xmm0,8
+	psrldq	xmm8,8
+	pxor	xmm0,xmm9
+	pshufd	xmm4,xmm5,78
+	pxor	xmm1,xmm8
+	pxor	xmm4,xmm5
+
+	movdqa	xmm9,xmm0
+	psrlq	xmm0,1
+DB	102,15,58,68,234,17
+	pxor	xmm1,xmm9
+	pxor	xmm9,xmm0
+	psrlq	xmm0,5
+	pxor	xmm0,xmm9
+	lea	r8,[32+r8]
+	psrlq	xmm0,1
+DB	102,15,58,68,231,0
+	pxor	xmm0,xmm1
+
+	sub	r9,0x20
+	ja	NEAR $L$mod_loop
+
+$L$even_tail:
+	movdqa	xmm1,xmm0
+	movdqa	xmm8,xmm4
+	pshufd	xmm4,xmm0,78
+	pxor	xmm4,xmm0
+
+DB	102,15,58,68,198,0
+DB	102,15,58,68,206,17
+DB	102,15,58,68,231,16
+
+	pxor	xmm0,xmm3
+	pxor	xmm1,xmm5
+	pxor	xmm8,xmm0
+	pxor	xmm8,xmm1
+	pxor	xmm4,xmm8
+	movdqa	xmm8,xmm4
+	psrldq	xmm8,8
+	pslldq	xmm4,8
+	pxor	xmm1,xmm8
+	pxor	xmm0,xmm4
+
+	movdqa	xmm4,xmm0
+	movdqa	xmm3,xmm0
+	psllq	xmm0,5
+	pxor	xmm3,xmm0
+	psllq	xmm0,1
+	pxor	xmm0,xmm3
+	psllq	xmm0,57
+	movdqa	xmm3,xmm0
+	pslldq	xmm0,8
+	psrldq	xmm3,8
+	pxor	xmm0,xmm4
+	pxor	xmm1,xmm3
+
+
+	movdqa	xmm4,xmm0
+	psrlq	xmm0,1
+	pxor	xmm1,xmm4
+	pxor	xmm4,xmm0
+	psrlq	xmm0,5
+	pxor	xmm0,xmm4
+	psrlq	xmm0,1
+	pxor	xmm0,xmm1
+	test	r9,r9
+	jnz	NEAR $L$done
+
+$L$odd_tail:
+	movdqu	xmm8,XMMWORD[r8]
+DB	102,69,15,56,0,194
+	pxor	xmm0,xmm8
+	movdqa	xmm1,xmm0
+	pshufd	xmm3,xmm0,78
+	pxor	xmm3,xmm0
+DB	102,15,58,68,194,0
+DB	102,15,58,68,202,17
+DB	102,15,58,68,223,0
+	pxor	xmm3,xmm0
+	pxor	xmm3,xmm1
+
+	movdqa	xmm4,xmm3
+	psrldq	xmm3,8
+	pslldq	xmm4,8
+	pxor	xmm1,xmm3
+	pxor	xmm0,xmm4
+
+	movdqa	xmm4,xmm0
+	movdqa	xmm3,xmm0
+	psllq	xmm0,5
+	pxor	xmm3,xmm0
+	psllq	xmm0,1
+	pxor	xmm0,xmm3
+	psllq	xmm0,57
+	movdqa	xmm3,xmm0
+	pslldq	xmm0,8
+	psrldq	xmm3,8
+	pxor	xmm0,xmm4
+	pxor	xmm1,xmm3
+
+
+	movdqa	xmm4,xmm0
+	psrlq	xmm0,1
+	pxor	xmm1,xmm4
+	pxor	xmm4,xmm0
+	psrlq	xmm0,5
+	pxor	xmm0,xmm4
+	psrlq	xmm0,1
+	pxor	xmm0,xmm1
+$L$done:
+DB	102,65,15,56,0,194
+	movdqu	XMMWORD[rcx],xmm0
+	movaps	xmm6,XMMWORD[rsp]
+	movaps	xmm7,XMMWORD[16+rsp]
+	movaps	xmm8,XMMWORD[32+rsp]
+	movaps	xmm9,XMMWORD[48+rsp]
+	movaps	xmm10,XMMWORD[64+rsp]
+	movaps	xmm11,XMMWORD[80+rsp]
+	movaps	xmm12,XMMWORD[96+rsp]
+	movaps	xmm13,XMMWORD[112+rsp]
+	movaps	xmm14,XMMWORD[128+rsp]
+	movaps	xmm15,XMMWORD[144+rsp]
+	lea	rsp,[168+rsp]
+$L$SEH_end_gcm_ghash_clmul:
+	DB	0F3h,0C3h		;repret
+
+
+global	gcm_init_avx
+
+ALIGN	32
+gcm_init_avx:
+
+$L$SEH_begin_gcm_init_avx:
+
+DB	0x48,0x83,0xec,0x18
+DB	0x0f,0x29,0x34,0x24
+	vzeroupper
+
+	vmovdqu	xmm2,XMMWORD[rdx]
+	vpshufd	xmm2,xmm2,78
+
+
+	vpshufd	xmm4,xmm2,255
+	vpsrlq	xmm3,xmm2,63
+	vpsllq	xmm2,xmm2,1
+	vpxor	xmm5,xmm5,xmm5
+	vpcmpgtd	xmm5,xmm5,xmm4
+	vpslldq	xmm3,xmm3,8
+	vpor	xmm2,xmm2,xmm3
+
+
+	vpand	xmm5,xmm5,XMMWORD[$L$0x1c2_polynomial]
+	vpxor	xmm2,xmm2,xmm5
+
+	vpunpckhqdq	xmm6,xmm2,xmm2
+	vmovdqa	xmm0,xmm2
+	vpxor	xmm6,xmm6,xmm2
+	mov	r10,4
+	jmp	NEAR $L$init_start_avx
+ALIGN	32
+$L$init_loop_avx:
+	vpalignr	xmm5,xmm4,xmm3,8
+	vmovdqu	XMMWORD[(-16)+rcx],xmm5
+	vpunpckhqdq	xmm3,xmm0,xmm0
+	vpxor	xmm3,xmm3,xmm0
+	vpclmulqdq	xmm1,xmm0,xmm2,0x11
+	vpclmulqdq	xmm0,xmm0,xmm2,0x00
+	vpclmulqdq	xmm3,xmm3,xmm6,0x00
+	vpxor	xmm4,xmm1,xmm0
+	vpxor	xmm3,xmm3,xmm4
+
+	vpslldq	xmm4,xmm3,8
+	vpsrldq	xmm3,xmm3,8
+	vpxor	xmm0,xmm0,xmm4
+	vpxor	xmm1,xmm1,xmm3
+	vpsllq	xmm3,xmm0,57
+	vpsllq	xmm4,xmm0,62
+	vpxor	xmm4,xmm4,xmm3
+	vpsllq	xmm3,xmm0,63
+	vpxor	xmm4,xmm4,xmm3
+	vpslldq	xmm3,xmm4,8
+	vpsrldq	xmm4,xmm4,8
+	vpxor	xmm0,xmm0,xmm3
+	vpxor	xmm1,xmm1,xmm4
+
+	vpsrlq	xmm4,xmm0,1
+	vpxor	xmm1,xmm1,xmm0
+	vpxor	xmm0,xmm0,xmm4
+	vpsrlq	xmm4,xmm4,5
+	vpxor	xmm0,xmm0,xmm4
+	vpsrlq	xmm0,xmm0,1
+	vpxor	xmm0,xmm0,xmm1
+$L$init_start_avx:
+	vmovdqa	xmm5,xmm0
+	vpunpckhqdq	xmm3,xmm0,xmm0
+	vpxor	xmm3,xmm3,xmm0
+	vpclmulqdq	xmm1,xmm0,xmm2,0x11
+	vpclmulqdq	xmm0,xmm0,xmm2,0x00
+	vpclmulqdq	xmm3,xmm3,xmm6,0x00
+	vpxor	xmm4,xmm1,xmm0
+	vpxor	xmm3,xmm3,xmm4
+
+	vpslldq	xmm4,xmm3,8
+	vpsrldq	xmm3,xmm3,8
+	vpxor	xmm0,xmm0,xmm4
+	vpxor	xmm1,xmm1,xmm3
+	vpsllq	xmm3,xmm0,57
+	vpsllq	xmm4,xmm0,62
+	vpxor	xmm4,xmm4,xmm3
+	vpsllq	xmm3,xmm0,63
+	vpxor	xmm4,xmm4,xmm3
+	vpslldq	xmm3,xmm4,8
+	vpsrldq	xmm4,xmm4,8
+	vpxor	xmm0,xmm0,xmm3
+	vpxor	xmm1,xmm1,xmm4
+
+	vpsrlq	xmm4,xmm0,1
+	vpxor	xmm1,xmm1,xmm0
+	vpxor	xmm0,xmm0,xmm4
+	vpsrlq	xmm4,xmm4,5
+	vpxor	xmm0,xmm0,xmm4
+	vpsrlq	xmm0,xmm0,1
+	vpxor	xmm0,xmm0,xmm1
+	vpshufd	xmm3,xmm5,78
+	vpshufd	xmm4,xmm0,78
+	vpxor	xmm3,xmm3,xmm5
+	vmovdqu	XMMWORD[rcx],xmm5
+	vpxor	xmm4,xmm4,xmm0
+	vmovdqu	XMMWORD[16+rcx],xmm0
+	lea	rcx,[48+rcx]
+	sub	r10,1
+	jnz	NEAR $L$init_loop_avx
+
+	vpalignr	xmm5,xmm3,xmm4,8
+	vmovdqu	XMMWORD[(-16)+rcx],xmm5
+
+	vzeroupper
+	movaps	xmm6,XMMWORD[rsp]
+	lea	rsp,[24+rsp]
+$L$SEH_end_gcm_init_avx:
+	DB	0F3h,0C3h		;repret
+
+
+global	gcm_gmult_avx
+
+ALIGN	32
+gcm_gmult_avx:
+
+	jmp	NEAR $L$_gmult_clmul
+
+
+global	gcm_ghash_avx
+
+ALIGN	32
+gcm_ghash_avx:
+
+	lea	rax,[((-136))+rsp]
+$L$SEH_begin_gcm_ghash_avx:
+
+DB	0x48,0x8d,0x60,0xe0
+DB	0x0f,0x29,0x70,0xe0
+DB	0x0f,0x29,0x78,0xf0
+DB	0x44,0x0f,0x29,0x00
+DB	0x44,0x0f,0x29,0x48,0x10
+DB	0x44,0x0f,0x29,0x50,0x20
+DB	0x44,0x0f,0x29,0x58,0x30
+DB	0x44,0x0f,0x29,0x60,0x40
+DB	0x44,0x0f,0x29,0x68,0x50
+DB	0x44,0x0f,0x29,0x70,0x60
+DB	0x44,0x0f,0x29,0x78,0x70
+	vzeroupper
+
+	vmovdqu	xmm10,XMMWORD[rcx]
+	lea	r10,[$L$0x1c2_polynomial]
+	lea	rdx,[64+rdx]
+	vmovdqu	xmm13,XMMWORD[$L$bswap_mask]
+	vpshufb	xmm10,xmm10,xmm13
+	cmp	r9,0x80
+	jb	NEAR $L$short_avx
+	sub	r9,0x80
+
+	vmovdqu	xmm14,XMMWORD[112+r8]
+	vmovdqu	xmm6,XMMWORD[((0-64))+rdx]
+	vpshufb	xmm14,xmm14,xmm13
+	vmovdqu	xmm7,XMMWORD[((32-64))+rdx]
+
+	vpunpckhqdq	xmm9,xmm14,xmm14
+	vmovdqu	xmm15,XMMWORD[96+r8]
+	vpclmulqdq	xmm0,xmm14,xmm6,0x00
+	vpxor	xmm9,xmm9,xmm14
+	vpshufb	xmm15,xmm15,xmm13
+	vpclmulqdq	xmm1,xmm14,xmm6,0x11
+	vmovdqu	xmm6,XMMWORD[((16-64))+rdx]
+	vpunpckhqdq	xmm8,xmm15,xmm15
+	vmovdqu	xmm14,XMMWORD[80+r8]
+	vpclmulqdq	xmm2,xmm9,xmm7,0x00
+	vpxor	xmm8,xmm8,xmm15
+
+	vpshufb	xmm14,xmm14,xmm13
+	vpclmulqdq	xmm3,xmm15,xmm6,0x00
+	vpunpckhqdq	xmm9,xmm14,xmm14
+	vpclmulqdq	xmm4,xmm15,xmm6,0x11
+	vmovdqu	xmm6,XMMWORD[((48-64))+rdx]
+	vpxor	xmm9,xmm9,xmm14
+	vmovdqu	xmm15,XMMWORD[64+r8]
+	vpclmulqdq	xmm5,xmm8,xmm7,0x10
+	vmovdqu	xmm7,XMMWORD[((80-64))+rdx]
+
+	vpshufb	xmm15,xmm15,xmm13
+	vpxor	xmm3,xmm3,xmm0
+	vpclmulqdq	xmm0,xmm14,xmm6,0x00
+	vpxor	xmm4,xmm4,xmm1
+	vpunpckhqdq	xmm8,xmm15,xmm15
+	vpclmulqdq	xmm1,xmm14,xmm6,0x11
+	vmovdqu	xmm6,XMMWORD[((64-64))+rdx]
+	vpxor	xmm5,xmm5,xmm2
+	vpclmulqdq	xmm2,xmm9,xmm7,0x00
+	vpxor	xmm8,xmm8,xmm15
+
+	vmovdqu	xmm14,XMMWORD[48+r8]
+	vpxor	xmm0,xmm0,xmm3
+	vpclmulqdq	xmm3,xmm15,xmm6,0x00
+	vpxor	xmm1,xmm1,xmm4
+	vpshufb	xmm14,xmm14,xmm13
+	vpclmulqdq	xmm4,xmm15,xmm6,0x11
+	vmovdqu	xmm6,XMMWORD[((96-64))+rdx]
+	vpxor	xmm2,xmm2,xmm5
+	vpunpckhqdq	xmm9,xmm14,xmm14
+	vpclmulqdq	xmm5,xmm8,xmm7,0x10
+	vmovdqu	xmm7,XMMWORD[((128-64))+rdx]
+	vpxor	xmm9,xmm9,xmm14
+
+	vmovdqu	xmm15,XMMWORD[32+r8]
+	vpxor	xmm3,xmm3,xmm0
+	vpclmulqdq	xmm0,xmm14,xmm6,0x00
+	vpxor	xmm4,xmm4,xmm1
+	vpshufb	xmm15,xmm15,xmm13
+	vpclmulqdq	xmm1,xmm14,xmm6,0x11
+	vmovdqu	xmm6,XMMWORD[((112-64))+rdx]
+	vpxor	xmm5,xmm5,xmm2
+	vpunpckhqdq	xmm8,xmm15,xmm15
+	vpclmulqdq	xmm2,xmm9,xmm7,0x00
+	vpxor	xmm8,xmm8,xmm15
+
+	vmovdqu	xmm14,XMMWORD[16+r8]
+	vpxor	xmm0,xmm0,xmm3
+	vpclmulqdq	xmm3,xmm15,xmm6,0x00
+	vpxor	xmm1,xmm1,xmm4
+	vpshufb	xmm14,xmm14,xmm13
+	vpclmulqdq	xmm4,xmm15,xmm6,0x11
+	vmovdqu	xmm6,XMMWORD[((144-64))+rdx]
+	vpxor	xmm2,xmm2,xmm5
+	vpunpckhqdq	xmm9,xmm14,xmm14
+	vpclmulqdq	xmm5,xmm8,xmm7,0x10
+	vmovdqu	xmm7,XMMWORD[((176-64))+rdx]
+	vpxor	xmm9,xmm9,xmm14
+
+	vmovdqu	xmm15,XMMWORD[r8]
+	vpxor	xmm3,xmm3,xmm0
+	vpclmulqdq	xmm0,xmm14,xmm6,0x00
+	vpxor	xmm4,xmm4,xmm1
+	vpshufb	xmm15,xmm15,xmm13
+	vpclmulqdq	xmm1,xmm14,xmm6,0x11
+	vmovdqu	xmm6,XMMWORD[((160-64))+rdx]
+	vpxor	xmm5,xmm5,xmm2
+	vpclmulqdq	xmm2,xmm9,xmm7,0x10
+
+	lea	r8,[128+r8]
+	cmp	r9,0x80
+	jb	NEAR $L$tail_avx
+
+	vpxor	xmm15,xmm15,xmm10
+	sub	r9,0x80
+	jmp	NEAR $L$oop8x_avx
+
+ALIGN	32
+$L$oop8x_avx:
+	vpunpckhqdq	xmm8,xmm15,xmm15
+	vmovdqu	xmm14,XMMWORD[112+r8]
+	vpxor	xmm3,xmm3,xmm0
+	vpxor	xmm8,xmm8,xmm15
+	vpclmulqdq	xmm10,xmm15,xmm6,0x00
+	vpshufb	xmm14,xmm14,xmm13
+	vpxor	xmm4,xmm4,xmm1
+	vpclmulqdq	xmm11,xmm15,xmm6,0x11
+	vmovdqu	xmm6,XMMWORD[((0-64))+rdx]
+	vpunpckhqdq	xmm9,xmm14,xmm14
+	vpxor	xmm5,xmm5,xmm2
+	vpclmulqdq	xmm12,xmm8,xmm7,0x00
+	vmovdqu	xmm7,XMMWORD[((32-64))+rdx]
+	vpxor	xmm9,xmm9,xmm14
+
+	vmovdqu	xmm15,XMMWORD[96+r8]
+	vpclmulqdq	xmm0,xmm14,xmm6,0x00
+	vpxor	xmm10,xmm10,xmm3
+	vpshufb	xmm15,xmm15,xmm13
+	vpclmulqdq	xmm1,xmm14,xmm6,0x11
+	vxorps	xmm11,xmm11,xmm4
+	vmovdqu	xmm6,XMMWORD[((16-64))+rdx]
+	vpunpckhqdq	xmm8,xmm15,xmm15
+	vpclmulqdq	xmm2,xmm9,xmm7,0x00
+	vpxor	xmm12,xmm12,xmm5
+	vxorps	xmm8,xmm8,xmm15
+
+	vmovdqu	xmm14,XMMWORD[80+r8]
+	vpxor	xmm12,xmm12,xmm10
+	vpclmulqdq	xmm3,xmm15,xmm6,0x00
+	vpxor	xmm12,xmm12,xmm11
+	vpslldq	xmm9,xmm12,8
+	vpxor	xmm3,xmm3,xmm0
+	vpclmulqdq	xmm4,xmm15,xmm6,0x11
+	vpsrldq	xmm12,xmm12,8
+	vpxor	xmm10,xmm10,xmm9
+	vmovdqu	xmm6,XMMWORD[((48-64))+rdx]
+	vpshufb	xmm14,xmm14,xmm13
+	vxorps	xmm11,xmm11,xmm12
+	vpxor	xmm4,xmm4,xmm1
+	vpunpckhqdq	xmm9,xmm14,xmm14
+	vpclmulqdq	xmm5,xmm8,xmm7,0x10
+	vmovdqu	xmm7,XMMWORD[((80-64))+rdx]
+	vpxor	xmm9,xmm9,xmm14
+	vpxor	xmm5,xmm5,xmm2
+
+	vmovdqu	xmm15,XMMWORD[64+r8]
+	vpalignr	xmm12,xmm10,xmm10,8
+	vpclmulqdq	xmm0,xmm14,xmm6,0x00
+	vpshufb	xmm15,xmm15,xmm13
+	vpxor	xmm0,xmm0,xmm3
+	vpclmulqdq	xmm1,xmm14,xmm6,0x11
+	vmovdqu	xmm6,XMMWORD[((64-64))+rdx]
+	vpunpckhqdq	xmm8,xmm15,xmm15
+	vpxor	xmm1,xmm1,xmm4
+	vpclmulqdq	xmm2,xmm9,xmm7,0x00
+	vxorps	xmm8,xmm8,xmm15
+	vpxor	xmm2,xmm2,xmm5
+
+	vmovdqu	xmm14,XMMWORD[48+r8]
+	vpclmulqdq	xmm10,xmm10,XMMWORD[r10],0x10
+	vpclmulqdq	xmm3,xmm15,xmm6,0x00
+	vpshufb	xmm14,xmm14,xmm13
+	vpxor	xmm3,xmm3,xmm0
+	vpclmulqdq	xmm4,xmm15,xmm6,0x11
+	vmovdqu	xmm6,XMMWORD[((96-64))+rdx]
+	vpunpckhqdq	xmm9,xmm14,xmm14
+	vpxor	xmm4,xmm4,xmm1
+	vpclmulqdq	xmm5,xmm8,xmm7,0x10
+	vmovdqu	xmm7,XMMWORD[((128-64))+rdx]
+	vpxor	xmm9,xmm9,xmm14
+	vpxor	xmm5,xmm5,xmm2
+
+	vmovdqu	xmm15,XMMWORD[32+r8]
+	vpclmulqdq	xmm0,xmm14,xmm6,0x00
+	vpshufb	xmm15,xmm15,xmm13
+	vpxor	xmm0,xmm0,xmm3
+	vpclmulqdq	xmm1,xmm14,xmm6,0x11
+	vmovdqu	xmm6,XMMWORD[((112-64))+rdx]
+	vpunpckhqdq	xmm8,xmm15,xmm15
+	vpxor	xmm1,xmm1,xmm4
+	vpclmulqdq	xmm2,xmm9,xmm7,0x00
+	vpxor	xmm8,xmm8,xmm15
+	vpxor	xmm2,xmm2,xmm5
+	vxorps	xmm10,xmm10,xmm12
+
+	vmovdqu	xmm14,XMMWORD[16+r8]
+	vpalignr	xmm12,xmm10,xmm10,8
+	vpclmulqdq	xmm3,xmm15,xmm6,0x00
+	vpshufb	xmm14,xmm14,xmm13
+	vpxor	xmm3,xmm3,xmm0
+	vpclmulqdq	xmm4,xmm15,xmm6,0x11
+	vmovdqu	xmm6,XMMWORD[((144-64))+rdx]
+	vpclmulqdq	xmm10,xmm10,XMMWORD[r10],0x10
+	vxorps	xmm12,xmm12,xmm11
+	vpunpckhqdq	xmm9,xmm14,xmm14
+	vpxor	xmm4,xmm4,xmm1
+	vpclmulqdq	xmm5,xmm8,xmm7,0x10
+	vmovdqu	xmm7,XMMWORD[((176-64))+rdx]
+	vpxor	xmm9,xmm9,xmm14
+	vpxor	xmm5,xmm5,xmm2
+
+	vmovdqu	xmm15,XMMWORD[r8]
+	vpclmulqdq	xmm0,xmm14,xmm6,0x00
+	vpshufb	xmm15,xmm15,xmm13
+	vpclmulqdq	xmm1,xmm14,xmm6,0x11
+	vmovdqu	xmm6,XMMWORD[((160-64))+rdx]
+	vpxor	xmm15,xmm15,xmm12
+	vpclmulqdq	xmm2,xmm9,xmm7,0x10
+	vpxor	xmm15,xmm15,xmm10
+
+	lea	r8,[128+r8]
+	sub	r9,0x80
+	jnc	NEAR $L$oop8x_avx
+
+	add	r9,0x80
+	jmp	NEAR $L$tail_no_xor_avx
+
+ALIGN	32
+$L$short_avx:
+	vmovdqu	xmm14,XMMWORD[((-16))+r9*1+r8]
+	lea	r8,[r9*1+r8]
+	vmovdqu	xmm6,XMMWORD[((0-64))+rdx]
+	vmovdqu	xmm7,XMMWORD[((32-64))+rdx]
+	vpshufb	xmm15,xmm14,xmm13
+
+	vmovdqa	xmm3,xmm0
+	vmovdqa	xmm4,xmm1
+	vmovdqa	xmm5,xmm2
+	sub	r9,0x10
+	jz	NEAR $L$tail_avx
+
+	vpunpckhqdq	xmm8,xmm15,xmm15
+	vpxor	xmm3,xmm3,xmm0
+	vpclmulqdq	xmm0,xmm15,xmm6,0x00
+	vpxor	xmm8,xmm8,xmm15
+	vmovdqu	xmm14,XMMWORD[((-32))+r8]
+	vpxor	xmm4,xmm4,xmm1
+	vpclmulqdq	xmm1,xmm15,xmm6,0x11
+	vmovdqu	xmm6,XMMWORD[((16-64))+rdx]
+	vpshufb	xmm15,xmm14,xmm13
+	vpxor	xmm5,xmm5,xmm2
+	vpclmulqdq	xmm2,xmm8,xmm7,0x00
+	vpsrldq	xmm7,xmm7,8
+	sub	r9,0x10
+	jz	NEAR $L$tail_avx
+
+	vpunpckhqdq	xmm8,xmm15,xmm15
+	vpxor	xmm3,xmm3,xmm0
+	vpclmulqdq	xmm0,xmm15,xmm6,0x00
+	vpxor	xmm8,xmm8,xmm15
+	vmovdqu	xmm14,XMMWORD[((-48))+r8]
+	vpxor	xmm4,xmm4,xmm1
+	vpclmulqdq	xmm1,xmm15,xmm6,0x11
+	vmovdqu	xmm6,XMMWORD[((48-64))+rdx]
+	vpshufb	xmm15,xmm14,xmm13
+	vpxor	xmm5,xmm5,xmm2
+	vpclmulqdq	xmm2,xmm8,xmm7,0x00
+	vmovdqu	xmm7,XMMWORD[((80-64))+rdx]
+	sub	r9,0x10
+	jz	NEAR $L$tail_avx
+
+	vpunpckhqdq	xmm8,xmm15,xmm15
+	vpxor	xmm3,xmm3,xmm0
+	vpclmulqdq	xmm0,xmm15,xmm6,0x00
+	vpxor	xmm8,xmm8,xmm15
+	vmovdqu	xmm14,XMMWORD[((-64))+r8]
+	vpxor	xmm4,xmm4,xmm1
+	vpclmulqdq	xmm1,xmm15,xmm6,0x11
+	vmovdqu	xmm6,XMMWORD[((64-64))+rdx]
+	vpshufb	xmm15,xmm14,xmm13
+	vpxor	xmm5,xmm5,xmm2
+	vpclmulqdq	xmm2,xmm8,xmm7,0x00
+	vpsrldq	xmm7,xmm7,8
+	sub	r9,0x10
+	jz	NEAR $L$tail_avx
+
+	vpunpckhqdq	xmm8,xmm15,xmm15
+	vpxor	xmm3,xmm3,xmm0
+	vpclmulqdq	xmm0,xmm15,xmm6,0x00
+	vpxor	xmm8,xmm8,xmm15
+	vmovdqu	xmm14,XMMWORD[((-80))+r8]
+	vpxor	xmm4,xmm4,xmm1
+	vpclmulqdq	xmm1,xmm15,xmm6,0x11
+	vmovdqu	xmm6,XMMWORD[((96-64))+rdx]
+	vpshufb	xmm15,xmm14,xmm13
+	vpxor	xmm5,xmm5,xmm2
+	vpclmulqdq	xmm2,xmm8,xmm7,0x00
+	vmovdqu	xmm7,XMMWORD[((128-64))+rdx]
+	sub	r9,0x10
+	jz	NEAR $L$tail_avx
+
+	vpunpckhqdq	xmm8,xmm15,xmm15
+	vpxor	xmm3,xmm3,xmm0
+	vpclmulqdq	xmm0,xmm15,xmm6,0x00
+	vpxor	xmm8,xmm8,xmm15
+	vmovdqu	xmm14,XMMWORD[((-96))+r8]
+	vpxor	xmm4,xmm4,xmm1
+	vpclmulqdq	xmm1,xmm15,xmm6,0x11
+	vmovdqu	xmm6,XMMWORD[((112-64))+rdx]
+	vpshufb	xmm15,xmm14,xmm13
+	vpxor	xmm5,xmm5,xmm2
+	vpclmulqdq	xmm2,xmm8,xmm7,0x00
+	vpsrldq	xmm7,xmm7,8
+	sub	r9,0x10
+	jz	NEAR $L$tail_avx
+
+	vpunpckhqdq	xmm8,xmm15,xmm15
+	vpxor	xmm3,xmm3,xmm0
+	vpclmulqdq	xmm0,xmm15,xmm6,0x00
+	vpxor	xmm8,xmm8,xmm15
+	vmovdqu	xmm14,XMMWORD[((-112))+r8]
+	vpxor	xmm4,xmm4,xmm1
+	vpclmulqdq	xmm1,xmm15,xmm6,0x11
+	vmovdqu	xmm6,XMMWORD[((144-64))+rdx]
+	vpshufb	xmm15,xmm14,xmm13
+	vpxor	xmm5,xmm5,xmm2
+	vpclmulqdq	xmm2,xmm8,xmm7,0x00
+	vmovq	xmm7,QWORD[((184-64))+rdx]
+	sub	r9,0x10
+	jmp	NEAR $L$tail_avx
+
+ALIGN	32
+$L$tail_avx:
+	vpxor	xmm15,xmm15,xmm10
+$L$tail_no_xor_avx:
+	vpunpckhqdq	xmm8,xmm15,xmm15
+	vpxor	xmm3,xmm3,xmm0
+	vpclmulqdq	xmm0,xmm15,xmm6,0x00
+	vpxor	xmm8,xmm8,xmm15
+	vpxor	xmm4,xmm4,xmm1
+	vpclmulqdq	xmm1,xmm15,xmm6,0x11
+	vpxor	xmm5,xmm5,xmm2
+	vpclmulqdq	xmm2,xmm8,xmm7,0x00
+
+	vmovdqu	xmm12,XMMWORD[r10]
+
+	vpxor	xmm10,xmm3,xmm0
+	vpxor	xmm11,xmm4,xmm1
+	vpxor	xmm5,xmm5,xmm2
+
+	vpxor	xmm5,xmm5,xmm10
+	vpxor	xmm5,xmm5,xmm11
+	vpslldq	xmm9,xmm5,8
+	vpsrldq	xmm5,xmm5,8
+	vpxor	xmm10,xmm10,xmm9
+	vpxor	xmm11,xmm11,xmm5
+
+	vpclmulqdq	xmm9,xmm10,xmm12,0x10
+	vpalignr	xmm10,xmm10,xmm10,8
+	vpxor	xmm10,xmm10,xmm9
+
+	vpclmulqdq	xmm9,xmm10,xmm12,0x10
+	vpalignr	xmm10,xmm10,xmm10,8
+	vpxor	xmm10,xmm10,xmm11
+	vpxor	xmm10,xmm10,xmm9
+
+	cmp	r9,0
+	jne	NEAR $L$short_avx
+
+	vpshufb	xmm10,xmm10,xmm13
+	vmovdqu	XMMWORD[rcx],xmm10
+	vzeroupper
+	movaps	xmm6,XMMWORD[rsp]
+	movaps	xmm7,XMMWORD[16+rsp]
+	movaps	xmm8,XMMWORD[32+rsp]
+	movaps	xmm9,XMMWORD[48+rsp]
+	movaps	xmm10,XMMWORD[64+rsp]
+	movaps	xmm11,XMMWORD[80+rsp]
+	movaps	xmm12,XMMWORD[96+rsp]
+	movaps	xmm13,XMMWORD[112+rsp]
+	movaps	xmm14,XMMWORD[128+rsp]
+	movaps	xmm15,XMMWORD[144+rsp]
+	lea	rsp,[168+rsp]
+$L$SEH_end_gcm_ghash_avx:
+	DB	0F3h,0C3h		;repret
+
+
+ALIGN	64
+$L$bswap_mask:
+DB	15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
+$L$0x1c2_polynomial:
+DB	1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2
+$L$7_mask:
+	DD	7,0,7,0
+ALIGN	64
+
+DB	71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52
+DB	44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
+DB	60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
+DB	114,103,62,0
+ALIGN	64
+section	.pdata rdata align=4
+ALIGN	4
+	DD	$L$SEH_begin_gcm_init_clmul wrt ..imagebase
+	DD	$L$SEH_end_gcm_init_clmul wrt ..imagebase
+	DD	$L$SEH_info_gcm_init_clmul wrt ..imagebase
+
+	DD	$L$SEH_begin_gcm_ghash_clmul wrt ..imagebase
+	DD	$L$SEH_end_gcm_ghash_clmul wrt ..imagebase
+	DD	$L$SEH_info_gcm_ghash_clmul wrt ..imagebase
+	DD	$L$SEH_begin_gcm_init_avx wrt ..imagebase
+	DD	$L$SEH_end_gcm_init_avx wrt ..imagebase
+	DD	$L$SEH_info_gcm_init_clmul wrt ..imagebase
+
+	DD	$L$SEH_begin_gcm_ghash_avx wrt ..imagebase
+	DD	$L$SEH_end_gcm_ghash_avx wrt ..imagebase
+	DD	$L$SEH_info_gcm_ghash_clmul wrt ..imagebase
+section	.xdata rdata align=8
+ALIGN	8
+$L$SEH_info_gcm_init_clmul:
+DB	0x01,0x08,0x03,0x00
+DB	0x08,0x68,0x00,0x00
+DB	0x04,0x22,0x00,0x00
+$L$SEH_info_gcm_ghash_clmul:
+DB	0x01,0x33,0x16,0x00
+DB	0x33,0xf8,0x09,0x00
+DB	0x2e,0xe8,0x08,0x00
+DB	0x29,0xd8,0x07,0x00
+DB	0x24,0xc8,0x06,0x00
+DB	0x1f,0xb8,0x05,0x00
+DB	0x1a,0xa8,0x04,0x00
+DB	0x15,0x98,0x03,0x00
+DB	0x10,0x88,0x02,0x00
+DB	0x0c,0x78,0x01,0x00
+DB	0x08,0x68,0x00,0x00
+DB	0x04,0x01,0x15,0x00
diff --git a/deps/boringssl/win-x86_64/crypto/fipsmodule/md5-x86_64.asm b/deps/boringssl/win-x86_64/crypto/fipsmodule/md5-x86_64.asm
new file mode 100644
index 0000000..646201b
--- /dev/null
+++ b/deps/boringssl/win-x86_64/crypto/fipsmodule/md5-x86_64.asm
@@ -0,0 +1,796 @@
+; This file is generated from a similarly-named Perl script in the BoringSSL
+; source tree. Do not edit by hand.
+
+default	rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+
+%ifdef BORINGSSL_PREFIX
+%include "boringssl_prefix_symbols_nasm.inc"
+%endif
+section	.text code align=64
+
+ALIGN	16
+
+global	md5_block_asm_data_order
+
+md5_block_asm_data_order:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_md5_block_asm_data_order:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+
+	push	rbp
+
+	push	rbx
+
+	push	r12
+
+	push	r14
+
+	push	r15
+
+$L$prologue:
+
+
+
+
+	mov	rbp,rdi
+	shl	rdx,6
+	lea	rdi,[rdx*1+rsi]
+	mov	eax,DWORD[rbp]
+	mov	ebx,DWORD[4+rbp]
+	mov	ecx,DWORD[8+rbp]
+	mov	edx,DWORD[12+rbp]
+
+
+
+
+
+
+
+	cmp	rsi,rdi
+	je	NEAR $L$end
+
+
+$L$loop:
+	mov	r8d,eax
+	mov	r9d,ebx
+	mov	r14d,ecx
+	mov	r15d,edx
+	mov	r10d,DWORD[rsi]
+	mov	r11d,edx
+	xor	r11d,ecx
+	lea	eax,[((-680876936))+r10*1+rax]
+	and	r11d,ebx
+	xor	r11d,edx
+	mov	r10d,DWORD[4+rsi]
+	add	eax,r11d
+	rol	eax,7
+	mov	r11d,ecx
+	add	eax,ebx
+	xor	r11d,ebx
+	lea	edx,[((-389564586))+r10*1+rdx]
+	and	r11d,eax
+	xor	r11d,ecx
+	mov	r10d,DWORD[8+rsi]
+	add	edx,r11d
+	rol	edx,12
+	mov	r11d,ebx
+	add	edx,eax
+	xor	r11d,eax
+	lea	ecx,[606105819+r10*1+rcx]
+	and	r11d,edx
+	xor	r11d,ebx
+	mov	r10d,DWORD[12+rsi]
+	add	ecx,r11d
+	rol	ecx,17
+	mov	r11d,eax
+	add	ecx,edx
+	xor	r11d,edx
+	lea	ebx,[((-1044525330))+r10*1+rbx]
+	and	r11d,ecx
+	xor	r11d,eax
+	mov	r10d,DWORD[16+rsi]
+	add	ebx,r11d
+	rol	ebx,22
+	mov	r11d,edx
+	add	ebx,ecx
+	xor	r11d,ecx
+	lea	eax,[((-176418897))+r10*1+rax]
+	and	r11d,ebx
+	xor	r11d,edx
+	mov	r10d,DWORD[20+rsi]
+	add	eax,r11d
+	rol	eax,7
+	mov	r11d,ecx
+	add	eax,ebx
+	xor	r11d,ebx
+	lea	edx,[1200080426+r10*1+rdx]
+	and	r11d,eax
+	xor	r11d,ecx
+	mov	r10d,DWORD[24+rsi]
+	add	edx,r11d
+	rol	edx,12
+	mov	r11d,ebx
+	add	edx,eax
+	xor	r11d,eax
+	lea	ecx,[((-1473231341))+r10*1+rcx]
+	and	r11d,edx
+	xor	r11d,ebx
+	mov	r10d,DWORD[28+rsi]
+	add	ecx,r11d
+	rol	ecx,17
+	mov	r11d,eax
+	add	ecx,edx
+	xor	r11d,edx
+	lea	ebx,[((-45705983))+r10*1+rbx]
+	and	r11d,ecx
+	xor	r11d,eax
+	mov	r10d,DWORD[32+rsi]
+	add	ebx,r11d
+	rol	ebx,22
+	mov	r11d,edx
+	add	ebx,ecx
+	xor	r11d,ecx
+	lea	eax,[1770035416+r10*1+rax]
+	and	r11d,ebx
+	xor	r11d,edx
+	mov	r10d,DWORD[36+rsi]
+	add	eax,r11d
+	rol	eax,7
+	mov	r11d,ecx
+	add	eax,ebx
+	xor	r11d,ebx
+	lea	edx,[((-1958414417))+r10*1+rdx]
+	and	r11d,eax
+	xor	r11d,ecx
+	mov	r10d,DWORD[40+rsi]
+	add	edx,r11d
+	rol	edx,12
+	mov	r11d,ebx
+	add	edx,eax
+	xor	r11d,eax
+	lea	ecx,[((-42063))+r10*1+rcx]
+	and	r11d,edx
+	xor	r11d,ebx
+	mov	r10d,DWORD[44+rsi]
+	add	ecx,r11d
+	rol	ecx,17
+	mov	r11d,eax
+	add	ecx,edx
+	xor	r11d,edx
+	lea	ebx,[((-1990404162))+r10*1+rbx]
+	and	r11d,ecx
+	xor	r11d,eax
+	mov	r10d,DWORD[48+rsi]
+	add	ebx,r11d
+	rol	ebx,22
+	mov	r11d,edx
+	add	ebx,ecx
+	xor	r11d,ecx
+	lea	eax,[1804603682+r10*1+rax]
+	and	r11d,ebx
+	xor	r11d,edx
+	mov	r10d,DWORD[52+rsi]
+	add	eax,r11d
+	rol	eax,7
+	mov	r11d,ecx
+	add	eax,ebx
+	xor	r11d,ebx
+	lea	edx,[((-40341101))+r10*1+rdx]
+	and	r11d,eax
+	xor	r11d,ecx
+	mov	r10d,DWORD[56+rsi]
+	add	edx,r11d
+	rol	edx,12
+	mov	r11d,ebx
+	add	edx,eax
+	xor	r11d,eax
+	lea	ecx,[((-1502002290))+r10*1+rcx]
+	and	r11d,edx
+	xor	r11d,ebx
+	mov	r10d,DWORD[60+rsi]
+	add	ecx,r11d
+	rol	ecx,17
+	mov	r11d,eax
+	add	ecx,edx
+	xor	r11d,edx
+	lea	ebx,[1236535329+r10*1+rbx]
+	and	r11d,ecx
+	xor	r11d,eax
+	mov	r10d,DWORD[rsi]
+	add	ebx,r11d
+	rol	ebx,22
+	mov	r11d,edx
+	add	ebx,ecx
+	mov	r10d,DWORD[4+rsi]
+	mov	r11d,edx
+	mov	r12d,edx
+	not	r11d
+	lea	eax,[((-165796510))+r10*1+rax]
+	and	r12d,ebx
+	and	r11d,ecx
+	mov	r10d,DWORD[24+rsi]
+	or	r12d,r11d
+	mov	r11d,ecx
+	add	eax,r12d
+	mov	r12d,ecx
+	rol	eax,5
+	add	eax,ebx
+	not	r11d
+	lea	edx,[((-1069501632))+r10*1+rdx]
+	and	r12d,eax
+	and	r11d,ebx
+	mov	r10d,DWORD[44+rsi]
+	or	r12d,r11d
+	mov	r11d,ebx
+	add	edx,r12d
+	mov	r12d,ebx
+	rol	edx,9
+	add	edx,eax
+	not	r11d
+	lea	ecx,[643717713+r10*1+rcx]
+	and	r12d,edx
+	and	r11d,eax
+	mov	r10d,DWORD[rsi]
+	or	r12d,r11d
+	mov	r11d,eax
+	add	ecx,r12d
+	mov	r12d,eax
+	rol	ecx,14
+	add	ecx,edx
+	not	r11d
+	lea	ebx,[((-373897302))+r10*1+rbx]
+	and	r12d,ecx
+	and	r11d,edx
+	mov	r10d,DWORD[20+rsi]
+	or	r12d,r11d
+	mov	r11d,edx
+	add	ebx,r12d
+	mov	r12d,edx
+	rol	ebx,20
+	add	ebx,ecx
+	not	r11d
+	lea	eax,[((-701558691))+r10*1+rax]
+	and	r12d,ebx
+	and	r11d,ecx
+	mov	r10d,DWORD[40+rsi]
+	or	r12d,r11d
+	mov	r11d,ecx
+	add	eax,r12d
+	mov	r12d,ecx
+	rol	eax,5
+	add	eax,ebx
+	not	r11d
+	lea	edx,[38016083+r10*1+rdx]
+	and	r12d,eax
+	and	r11d,ebx
+	mov	r10d,DWORD[60+rsi]
+	or	r12d,r11d
+	mov	r11d,ebx
+	add	edx,r12d
+	mov	r12d,ebx
+	rol	edx,9
+	add	edx,eax
+	not	r11d
+	lea	ecx,[((-660478335))+r10*1+rcx]
+	and	r12d,edx
+	and	r11d,eax
+	mov	r10d,DWORD[16+rsi]
+	or	r12d,r11d
+	mov	r11d,eax
+	add	ecx,r12d
+	mov	r12d,eax
+	rol	ecx,14
+	add	ecx,edx
+	not	r11d
+	lea	ebx,[((-405537848))+r10*1+rbx]
+	and	r12d,ecx
+	and	r11d,edx
+	mov	r10d,DWORD[36+rsi]
+	or	r12d,r11d
+	mov	r11d,edx
+	add	ebx,r12d
+	mov	r12d,edx
+	rol	ebx,20
+	add	ebx,ecx
+	not	r11d
+	lea	eax,[568446438+r10*1+rax]
+	and	r12d,ebx
+	and	r11d,ecx
+	mov	r10d,DWORD[56+rsi]
+	or	r12d,r11d
+	mov	r11d,ecx
+	add	eax,r12d
+	mov	r12d,ecx
+	rol	eax,5
+	add	eax,ebx
+	not	r11d
+	lea	edx,[((-1019803690))+r10*1+rdx]
+	and	r12d,eax
+	and	r11d,ebx
+	mov	r10d,DWORD[12+rsi]
+	or	r12d,r11d
+	mov	r11d,ebx
+	add	edx,r12d
+	mov	r12d,ebx
+	rol	edx,9
+	add	edx,eax
+	not	r11d
+	lea	ecx,[((-187363961))+r10*1+rcx]
+	and	r12d,edx
+	and	r11d,eax
+	mov	r10d,DWORD[32+rsi]
+	or	r12d,r11d
+	mov	r11d,eax
+	add	ecx,r12d
+	mov	r12d,eax
+	rol	ecx,14
+	add	ecx,edx
+	not	r11d
+	lea	ebx,[1163531501+r10*1+rbx]
+	and	r12d,ecx
+	and	r11d,edx
+	mov	r10d,DWORD[52+rsi]
+	or	r12d,r11d
+	mov	r11d,edx
+	add	ebx,r12d
+	mov	r12d,edx
+	rol	ebx,20
+	add	ebx,ecx
+	not	r11d
+	lea	eax,[((-1444681467))+r10*1+rax]
+	and	r12d,ebx
+	and	r11d,ecx
+	mov	r10d,DWORD[8+rsi]
+	or	r12d,r11d
+	mov	r11d,ecx
+	add	eax,r12d
+	mov	r12d,ecx
+	rol	eax,5
+	add	eax,ebx
+	not	r11d
+	lea	edx,[((-51403784))+r10*1+rdx]
+	and	r12d,eax
+	and	r11d,ebx
+	mov	r10d,DWORD[28+rsi]
+	or	r12d,r11d
+	mov	r11d,ebx
+	add	edx,r12d
+	mov	r12d,ebx
+	rol	edx,9
+	add	edx,eax
+	not	r11d
+	lea	ecx,[1735328473+r10*1+rcx]
+	and	r12d,edx
+	and	r11d,eax
+	mov	r10d,DWORD[48+rsi]
+	or	r12d,r11d
+	mov	r11d,eax
+	add	ecx,r12d
+	mov	r12d,eax
+	rol	ecx,14
+	add	ecx,edx
+	not	r11d
+	lea	ebx,[((-1926607734))+r10*1+rbx]
+	and	r12d,ecx
+	and	r11d,edx
+	mov	r10d,DWORD[rsi]
+	or	r12d,r11d
+	mov	r11d,edx
+	add	ebx,r12d
+	mov	r12d,edx
+	rol	ebx,20
+	add	ebx,ecx
+	mov	r10d,DWORD[20+rsi]
+	mov	r11d,ecx
+	lea	eax,[((-378558))+r10*1+rax]
+	mov	r10d,DWORD[32+rsi]
+	xor	r11d,edx
+	xor	r11d,ebx
+	add	eax,r11d
+	rol	eax,4
+	mov	r11d,ebx
+	add	eax,ebx
+	lea	edx,[((-2022574463))+r10*1+rdx]
+	mov	r10d,DWORD[44+rsi]
+	xor	r11d,ecx
+	xor	r11d,eax
+	add	edx,r11d
+	rol	edx,11
+	mov	r11d,eax
+	add	edx,eax
+	lea	ecx,[1839030562+r10*1+rcx]
+	mov	r10d,DWORD[56+rsi]
+	xor	r11d,ebx
+	xor	r11d,edx
+	add	ecx,r11d
+	rol	ecx,16
+	mov	r11d,edx
+	add	ecx,edx
+	lea	ebx,[((-35309556))+r10*1+rbx]
+	mov	r10d,DWORD[4+rsi]
+	xor	r11d,eax
+	xor	r11d,ecx
+	add	ebx,r11d
+	rol	ebx,23
+	mov	r11d,ecx
+	add	ebx,ecx
+	lea	eax,[((-1530992060))+r10*1+rax]
+	mov	r10d,DWORD[16+rsi]
+	xor	r11d,edx
+	xor	r11d,ebx
+	add	eax,r11d
+	rol	eax,4
+	mov	r11d,ebx
+	add	eax,ebx
+	lea	edx,[1272893353+r10*1+rdx]
+	mov	r10d,DWORD[28+rsi]
+	xor	r11d,ecx
+	xor	r11d,eax
+	add	edx,r11d
+	rol	edx,11
+	mov	r11d,eax
+	add	edx,eax
+	lea	ecx,[((-155497632))+r10*1+rcx]
+	mov	r10d,DWORD[40+rsi]
+	xor	r11d,ebx
+	xor	r11d,edx
+	add	ecx,r11d
+	rol	ecx,16
+	mov	r11d,edx
+	add	ecx,edx
+	lea	ebx,[((-1094730640))+r10*1+rbx]
+	mov	r10d,DWORD[52+rsi]
+	xor	r11d,eax
+	xor	r11d,ecx
+	add	ebx,r11d
+	rol	ebx,23
+	mov	r11d,ecx
+	add	ebx,ecx
+	lea	eax,[681279174+r10*1+rax]
+	mov	r10d,DWORD[rsi]
+	xor	r11d,edx
+	xor	r11d,ebx
+	add	eax,r11d
+	rol	eax,4
+	mov	r11d,ebx
+	add	eax,ebx
+	lea	edx,[((-358537222))+r10*1+rdx]
+	mov	r10d,DWORD[12+rsi]
+	xor	r11d,ecx
+	xor	r11d,eax
+	add	edx,r11d
+	rol	edx,11
+	mov	r11d,eax
+	add	edx,eax
+	lea	ecx,[((-722521979))+r10*1+rcx]
+	mov	r10d,DWORD[24+rsi]
+	xor	r11d,ebx
+	xor	r11d,edx
+	add	ecx,r11d
+	rol	ecx,16
+	mov	r11d,edx
+	add	ecx,edx
+	lea	ebx,[76029189+r10*1+rbx]
+	mov	r10d,DWORD[36+rsi]
+	xor	r11d,eax
+	xor	r11d,ecx
+	add	ebx,r11d
+	rol	ebx,23
+	mov	r11d,ecx
+	add	ebx,ecx
+	lea	eax,[((-640364487))+r10*1+rax]
+	mov	r10d,DWORD[48+rsi]
+	xor	r11d,edx
+	xor	r11d,ebx
+	add	eax,r11d
+	rol	eax,4
+	mov	r11d,ebx
+	add	eax,ebx
+	lea	edx,[((-421815835))+r10*1+rdx]
+	mov	r10d,DWORD[60+rsi]
+	xor	r11d,ecx
+	xor	r11d,eax
+	add	edx,r11d
+	rol	edx,11
+	mov	r11d,eax
+	add	edx,eax
+	lea	ecx,[530742520+r10*1+rcx]
+	mov	r10d,DWORD[8+rsi]
+	xor	r11d,ebx
+	xor	r11d,edx
+	add	ecx,r11d
+	rol	ecx,16
+	mov	r11d,edx
+	add	ecx,edx
+	lea	ebx,[((-995338651))+r10*1+rbx]
+	mov	r10d,DWORD[rsi]
+	xor	r11d,eax
+	xor	r11d,ecx
+	add	ebx,r11d
+	rol	ebx,23
+	mov	r11d,ecx
+	add	ebx,ecx
+	mov	r10d,DWORD[rsi]
+	mov	r11d,0xffffffff
+	xor	r11d,edx
+	lea	eax,[((-198630844))+r10*1+rax]
+	or	r11d,ebx
+	xor	r11d,ecx
+	add	eax,r11d
+	mov	r10d,DWORD[28+rsi]
+	mov	r11d,0xffffffff
+	rol	eax,6
+	xor	r11d,ecx
+	add	eax,ebx
+	lea	edx,[1126891415+r10*1+rdx]
+	or	r11d,eax
+	xor	r11d,ebx
+	add	edx,r11d
+	mov	r10d,DWORD[56+rsi]
+	mov	r11d,0xffffffff
+	rol	edx,10
+	xor	r11d,ebx
+	add	edx,eax
+	lea	ecx,[((-1416354905))+r10*1+rcx]
+	or	r11d,edx
+	xor	r11d,eax
+	add	ecx,r11d
+	mov	r10d,DWORD[20+rsi]
+	mov	r11d,0xffffffff
+	rol	ecx,15
+	xor	r11d,eax
+	add	ecx,edx
+	lea	ebx,[((-57434055))+r10*1+rbx]
+	or	r11d,ecx
+	xor	r11d,edx
+	add	ebx,r11d
+	mov	r10d,DWORD[48+rsi]
+	mov	r11d,0xffffffff
+	rol	ebx,21
+	xor	r11d,edx
+	add	ebx,ecx
+	lea	eax,[1700485571+r10*1+rax]
+	or	r11d,ebx
+	xor	r11d,ecx
+	add	eax,r11d
+	mov	r10d,DWORD[12+rsi]
+	mov	r11d,0xffffffff
+	rol	eax,6
+	xor	r11d,ecx
+	add	eax,ebx
+	lea	edx,[((-1894986606))+r10*1+rdx]
+	or	r11d,eax
+	xor	r11d,ebx
+	add	edx,r11d
+	mov	r10d,DWORD[40+rsi]
+	mov	r11d,0xffffffff
+	rol	edx,10
+	xor	r11d,ebx
+	add	edx,eax
+	lea	ecx,[((-1051523))+r10*1+rcx]
+	or	r11d,edx
+	xor	r11d,eax
+	add	ecx,r11d
+	mov	r10d,DWORD[4+rsi]
+	mov	r11d,0xffffffff
+	rol	ecx,15
+	xor	r11d,eax
+	add	ecx,edx
+	lea	ebx,[((-2054922799))+r10*1+rbx]
+	or	r11d,ecx
+	xor	r11d,edx
+	add	ebx,r11d
+	mov	r10d,DWORD[32+rsi]
+	mov	r11d,0xffffffff
+	rol	ebx,21
+	xor	r11d,edx
+	add	ebx,ecx
+	lea	eax,[1873313359+r10*1+rax]
+	or	r11d,ebx
+	xor	r11d,ecx
+	add	eax,r11d
+	mov	r10d,DWORD[60+rsi]
+	mov	r11d,0xffffffff
+	rol	eax,6
+	xor	r11d,ecx
+	add	eax,ebx
+	lea	edx,[((-30611744))+r10*1+rdx]
+	or	r11d,eax
+	xor	r11d,ebx
+	add	edx,r11d
+	mov	r10d,DWORD[24+rsi]
+	mov	r11d,0xffffffff
+	rol	edx,10
+	xor	r11d,ebx
+	add	edx,eax
+	lea	ecx,[((-1560198380))+r10*1+rcx]
+	or	r11d,edx
+	xor	r11d,eax
+	add	ecx,r11d
+	mov	r10d,DWORD[52+rsi]
+	mov	r11d,0xffffffff
+	rol	ecx,15
+	xor	r11d,eax
+	add	ecx,edx
+	lea	ebx,[1309151649+r10*1+rbx]
+	or	r11d,ecx
+	xor	r11d,edx
+	add	ebx,r11d
+	mov	r10d,DWORD[16+rsi]
+	mov	r11d,0xffffffff
+	rol	ebx,21
+	xor	r11d,edx
+	add	ebx,ecx
+	lea	eax,[((-145523070))+r10*1+rax]
+	or	r11d,ebx
+	xor	r11d,ecx
+	add	eax,r11d
+	mov	r10d,DWORD[44+rsi]
+	mov	r11d,0xffffffff
+	rol	eax,6
+	xor	r11d,ecx
+	add	eax,ebx
+	lea	edx,[((-1120210379))+r10*1+rdx]
+	or	r11d,eax
+	xor	r11d,ebx
+	add	edx,r11d
+	mov	r10d,DWORD[8+rsi]
+	mov	r11d,0xffffffff
+	rol	edx,10
+	xor	r11d,ebx
+	add	edx,eax
+	lea	ecx,[718787259+r10*1+rcx]
+	or	r11d,edx
+	xor	r11d,eax
+	add	ecx,r11d
+	mov	r10d,DWORD[36+rsi]
+	mov	r11d,0xffffffff
+	rol	ecx,15
+	xor	r11d,eax
+	add	ecx,edx
+	lea	ebx,[((-343485551))+r10*1+rbx]
+	or	r11d,ecx
+	xor	r11d,edx
+	add	ebx,r11d
+	mov	r10d,DWORD[rsi]
+	mov	r11d,0xffffffff
+	rol	ebx,21
+	xor	r11d,edx
+	add	ebx,ecx
+
+	add	eax,r8d
+	add	ebx,r9d
+	add	ecx,r14d
+	add	edx,r15d
+
+
+	add	rsi,64
+	cmp	rsi,rdi
+	jb	NEAR $L$loop
+
+
+$L$end:
+	mov	DWORD[rbp],eax
+	mov	DWORD[4+rbp],ebx
+	mov	DWORD[8+rbp],ecx
+	mov	DWORD[12+rbp],edx
+
+	mov	r15,QWORD[rsp]
+
+	mov	r14,QWORD[8+rsp]
+
+	mov	r12,QWORD[16+rsp]
+
+	mov	rbx,QWORD[24+rsp]
+
+	mov	rbp,QWORD[32+rsp]
+
+	add	rsp,40
+
+$L$epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_md5_block_asm_data_order:
+EXTERN	__imp_RtlVirtualUnwind
+
+ALIGN	16
+se_handler:
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD[120+r8]
+	mov	rbx,QWORD[248+r8]
+
+	lea	r10,[$L$prologue]
+	cmp	rbx,r10
+	jb	NEAR $L$in_prologue
+
+	mov	rax,QWORD[152+r8]
+
+	lea	r10,[$L$epilogue]
+	cmp	rbx,r10
+	jae	NEAR $L$in_prologue
+
+	lea	rax,[40+rax]
+
+	mov	rbp,QWORD[((-8))+rax]
+	mov	rbx,QWORD[((-16))+rax]
+	mov	r12,QWORD[((-24))+rax]
+	mov	r14,QWORD[((-32))+rax]
+	mov	r15,QWORD[((-40))+rax]
+	mov	QWORD[144+r8],rbx
+	mov	QWORD[160+r8],rbp
+	mov	QWORD[216+r8],r12
+	mov	QWORD[232+r8],r14
+	mov	QWORD[240+r8],r15
+
+$L$in_prologue:
+	mov	rdi,QWORD[8+rax]
+	mov	rsi,QWORD[16+rax]
+	mov	QWORD[152+r8],rax
+	mov	QWORD[168+r8],rsi
+	mov	QWORD[176+r8],rdi
+
+	mov	rdi,QWORD[40+r9]
+	mov	rsi,r8
+	mov	ecx,154
+	DD	0xa548f3fc
+
+	mov	rsi,r9
+	xor	rcx,rcx
+	mov	rdx,QWORD[8+rsi]
+	mov	r8,QWORD[rsi]
+	mov	r9,QWORD[16+rsi]
+	mov	r10,QWORD[40+rsi]
+	lea	r11,[56+rsi]
+	lea	r12,[24+rsi]
+	mov	QWORD[32+rsp],r10
+	mov	QWORD[40+rsp],r11
+	mov	QWORD[48+rsp],r12
+	mov	QWORD[56+rsp],rcx
+	call	QWORD[__imp_RtlVirtualUnwind]
+
+	mov	eax,1
+	add	rsp,64
+	popfq
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rbp
+	pop	rbx
+	pop	rdi
+	pop	rsi
+	DB	0F3h,0C3h		;repret
+
+
+section	.pdata rdata align=4
+ALIGN	4
+	DD	$L$SEH_begin_md5_block_asm_data_order wrt ..imagebase
+	DD	$L$SEH_end_md5_block_asm_data_order wrt ..imagebase
+	DD	$L$SEH_info_md5_block_asm_data_order wrt ..imagebase
+
+section	.xdata rdata align=8
+ALIGN	8
+$L$SEH_info_md5_block_asm_data_order:
+DB	9,0,0,0
+	DD	se_handler wrt ..imagebase
diff --git a/deps/boringssl/win-x86_64/crypto/fipsmodule/p256-x86_64-asm.asm b/deps/boringssl/win-x86_64/crypto/fipsmodule/p256-x86_64-asm.asm
new file mode 100644
index 0000000..215f5d2
--- /dev/null
+++ b/deps/boringssl/win-x86_64/crypto/fipsmodule/p256-x86_64-asm.asm
@@ -0,0 +1,4984 @@
+; This file is generated from a similarly-named Perl script in the BoringSSL
+; source tree. Do not edit by hand.
+
+default	rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+
+%ifdef BORINGSSL_PREFIX
+%include "boringssl_prefix_symbols_nasm.inc"
+%endif
+section	.text code align=64
+
+EXTERN	OPENSSL_ia32cap_P
+
+
+ALIGN	64
+$L$poly:
+	DQ	0xffffffffffffffff,0x00000000ffffffff,0x0000000000000000,0xffffffff00000001
+
+$L$One:
+	DD	1,1,1,1,1,1,1,1
+$L$Two:
+	DD	2,2,2,2,2,2,2,2
+$L$Three:
+	DD	3,3,3,3,3,3,3,3
+$L$ONE_mont:
+	DQ	0x0000000000000001,0xffffffff00000000,0xffffffffffffffff,0x00000000fffffffe
+
+
+$L$ord:
+	DQ	0xf3b9cac2fc632551,0xbce6faada7179e84,0xffffffffffffffff,0xffffffff00000000
+$L$ordK:
+	DQ	0xccd1c8aaee00bc4f
+
+
+
+global	ecp_nistz256_neg
+
+ALIGN	32
+ecp_nistz256_neg:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_ecp_nistz256_neg:
+	mov	rdi,rcx
+	mov	rsi,rdx
+
+
+
+	push	r12
+
+	push	r13
+
+$L$neg_body:
+
+	xor	r8,r8
+	xor	r9,r9
+	xor	r10,r10
+	xor	r11,r11
+	xor	r13,r13
+
+	sub	r8,QWORD[rsi]
+	sbb	r9,QWORD[8+rsi]
+	sbb	r10,QWORD[16+rsi]
+	mov	rax,r8
+	sbb	r11,QWORD[24+rsi]
+	lea	rsi,[$L$poly]
+	mov	rdx,r9
+	sbb	r13,0
+
+	add	r8,QWORD[rsi]
+	mov	rcx,r10
+	adc	r9,QWORD[8+rsi]
+	adc	r10,QWORD[16+rsi]
+	mov	r12,r11
+	adc	r11,QWORD[24+rsi]
+	test	r13,r13
+
+	cmovz	r8,rax
+	cmovz	r9,rdx
+	mov	QWORD[rdi],r8
+	cmovz	r10,rcx
+	mov	QWORD[8+rdi],r9
+	cmovz	r11,r12
+	mov	QWORD[16+rdi],r10
+	mov	QWORD[24+rdi],r11
+
+	mov	r13,QWORD[rsp]
+
+	mov	r12,QWORD[8+rsp]
+
+	lea	rsp,[16+rsp]
+
+$L$neg_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_ecp_nistz256_neg:
+
+
+
+
+
+
+global	ecp_nistz256_ord_mul_mont
+
+ALIGN	32
+ecp_nistz256_ord_mul_mont:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_ecp_nistz256_ord_mul_mont:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+
+	lea	rcx,[OPENSSL_ia32cap_P]
+	mov	rcx,QWORD[8+rcx]
+	and	ecx,0x80100
+	cmp	ecx,0x80100
+	je	NEAR $L$ecp_nistz256_ord_mul_montx
+	push	rbp
+
+	push	rbx
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+$L$ord_mul_body:
+
+	mov	rax,QWORD[rdx]
+	mov	rbx,rdx
+	lea	r14,[$L$ord]
+	mov	r15,QWORD[$L$ordK]
+
+
+	mov	rcx,rax
+	mul	QWORD[rsi]
+	mov	r8,rax
+	mov	rax,rcx
+	mov	r9,rdx
+
+	mul	QWORD[8+rsi]
+	add	r9,rax
+	mov	rax,rcx
+	adc	rdx,0
+	mov	r10,rdx
+
+	mul	QWORD[16+rsi]
+	add	r10,rax
+	mov	rax,rcx
+	adc	rdx,0
+
+	mov	r13,r8
+	imul	r8,r15
+
+	mov	r11,rdx
+	mul	QWORD[24+rsi]
+	add	r11,rax
+	mov	rax,r8
+	adc	rdx,0
+	mov	r12,rdx
+
+
+	mul	QWORD[r14]
+	mov	rbp,r8
+	add	r13,rax
+	mov	rax,r8
+	adc	rdx,0
+	mov	rcx,rdx
+
+	sub	r10,r8
+	sbb	r8,0
+
+	mul	QWORD[8+r14]
+	add	r9,rcx
+	adc	rdx,0
+	add	r9,rax
+	mov	rax,rbp
+	adc	r10,rdx
+	mov	rdx,rbp
+	adc	r8,0
+
+	shl	rax,32
+	shr	rdx,32
+	sub	r11,rax
+	mov	rax,QWORD[8+rbx]
+	sbb	rbp,rdx
+
+	add	r11,r8
+	adc	r12,rbp
+	adc	r13,0
+
+
+	mov	rcx,rax
+	mul	QWORD[rsi]
+	add	r9,rax
+	mov	rax,rcx
+	adc	rdx,0
+	mov	rbp,rdx
+
+	mul	QWORD[8+rsi]
+	add	r10,rbp
+	adc	rdx,0
+	add	r10,rax
+	mov	rax,rcx
+	adc	rdx,0
+	mov	rbp,rdx
+
+	mul	QWORD[16+rsi]
+	add	r11,rbp
+	adc	rdx,0
+	add	r11,rax
+	mov	rax,rcx
+	adc	rdx,0
+
+	mov	rcx,r9
+	imul	r9,r15
+
+	mov	rbp,rdx
+	mul	QWORD[24+rsi]
+	add	r12,rbp
+	adc	rdx,0
+	xor	r8,r8
+	add	r12,rax
+	mov	rax,r9
+	adc	r13,rdx
+	adc	r8,0
+
+
+	mul	QWORD[r14]
+	mov	rbp,r9
+	add	rcx,rax
+	mov	rax,r9
+	adc	rcx,rdx
+
+	sub	r11,r9
+	sbb	r9,0
+
+	mul	QWORD[8+r14]
+	add	r10,rcx
+	adc	rdx,0
+	add	r10,rax
+	mov	rax,rbp
+	adc	r11,rdx
+	mov	rdx,rbp
+	adc	r9,0
+
+	shl	rax,32
+	shr	rdx,32
+	sub	r12,rax
+	mov	rax,QWORD[16+rbx]
+	sbb	rbp,rdx
+
+	add	r12,r9
+	adc	r13,rbp
+	adc	r8,0
+
+
+	mov	rcx,rax
+	mul	QWORD[rsi]
+	add	r10,rax
+	mov	rax,rcx
+	adc	rdx,0
+	mov	rbp,rdx
+
+	mul	QWORD[8+rsi]
+	add	r11,rbp
+	adc	rdx,0
+	add	r11,rax
+	mov	rax,rcx
+	adc	rdx,0
+	mov	rbp,rdx
+
+	mul	QWORD[16+rsi]
+	add	r12,rbp
+	adc	rdx,0
+	add	r12,rax
+	mov	rax,rcx
+	adc	rdx,0
+
+	mov	rcx,r10
+	imul	r10,r15
+
+	mov	rbp,rdx
+	mul	QWORD[24+rsi]
+	add	r13,rbp
+	adc	rdx,0
+	xor	r9,r9
+	add	r13,rax
+	mov	rax,r10
+	adc	r8,rdx
+	adc	r9,0
+
+
+	mul	QWORD[r14]
+	mov	rbp,r10
+	add	rcx,rax
+	mov	rax,r10
+	adc	rcx,rdx
+
+	sub	r12,r10
+	sbb	r10,0
+
+	mul	QWORD[8+r14]
+	add	r11,rcx
+	adc	rdx,0
+	add	r11,rax
+	mov	rax,rbp
+	adc	r12,rdx
+	mov	rdx,rbp
+	adc	r10,0
+
+	shl	rax,32
+	shr	rdx,32
+	sub	r13,rax
+	mov	rax,QWORD[24+rbx]
+	sbb	rbp,rdx
+
+	add	r13,r10
+	adc	r8,rbp
+	adc	r9,0
+
+
+	mov	rcx,rax
+	mul	QWORD[rsi]
+	add	r11,rax
+	mov	rax,rcx
+	adc	rdx,0
+	mov	rbp,rdx
+
+	mul	QWORD[8+rsi]
+	add	r12,rbp
+	adc	rdx,0
+	add	r12,rax
+	mov	rax,rcx
+	adc	rdx,0
+	mov	rbp,rdx
+
+	mul	QWORD[16+rsi]
+	add	r13,rbp
+	adc	rdx,0
+	add	r13,rax
+	mov	rax,rcx
+	adc	rdx,0
+
+	mov	rcx,r11
+	imul	r11,r15
+
+	mov	rbp,rdx
+	mul	QWORD[24+rsi]
+	add	r8,rbp
+	adc	rdx,0
+	xor	r10,r10
+	add	r8,rax
+	mov	rax,r11
+	adc	r9,rdx
+	adc	r10,0
+
+
+	mul	QWORD[r14]
+	mov	rbp,r11
+	add	rcx,rax
+	mov	rax,r11
+	adc	rcx,rdx
+
+	sub	r13,r11
+	sbb	r11,0
+
+	mul	QWORD[8+r14]
+	add	r12,rcx
+	adc	rdx,0
+	add	r12,rax
+	mov	rax,rbp
+	adc	r13,rdx
+	mov	rdx,rbp
+	adc	r11,0
+
+	shl	rax,32
+	shr	rdx,32
+	sub	r8,rax
+	sbb	rbp,rdx
+
+	add	r8,r11
+	adc	r9,rbp
+	adc	r10,0
+
+
+	mov	rsi,r12
+	sub	r12,QWORD[r14]
+	mov	r11,r13
+	sbb	r13,QWORD[8+r14]
+	mov	rcx,r8
+	sbb	r8,QWORD[16+r14]
+	mov	rbp,r9
+	sbb	r9,QWORD[24+r14]
+	sbb	r10,0
+
+	cmovc	r12,rsi
+	cmovc	r13,r11
+	cmovc	r8,rcx
+	cmovc	r9,rbp
+
+	mov	QWORD[rdi],r12
+	mov	QWORD[8+rdi],r13
+	mov	QWORD[16+rdi],r8
+	mov	QWORD[24+rdi],r9
+
+	mov	r15,QWORD[rsp]
+
+	mov	r14,QWORD[8+rsp]
+
+	mov	r13,QWORD[16+rsp]
+
+	mov	r12,QWORD[24+rsp]
+
+	mov	rbx,QWORD[32+rsp]
+
+	mov	rbp,QWORD[40+rsp]
+
+	lea	rsp,[48+rsp]
+
+$L$ord_mul_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_ecp_nistz256_ord_mul_mont:
+
+
+
+
+
+
+
+global	ecp_nistz256_ord_sqr_mont
+
+ALIGN	32
+ecp_nistz256_ord_sqr_mont:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_ecp_nistz256_ord_sqr_mont:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+
+	lea	rcx,[OPENSSL_ia32cap_P]
+	mov	rcx,QWORD[8+rcx]
+	and	ecx,0x80100
+	cmp	ecx,0x80100
+	je	NEAR $L$ecp_nistz256_ord_sqr_montx
+	push	rbp
+
+	push	rbx
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+$L$ord_sqr_body:
+
+	mov	r8,QWORD[rsi]
+	mov	rax,QWORD[8+rsi]
+	mov	r14,QWORD[16+rsi]
+	mov	r15,QWORD[24+rsi]
+	lea	rsi,[$L$ord]
+	mov	rbx,rdx
+	jmp	NEAR $L$oop_ord_sqr
+
+ALIGN	32
+$L$oop_ord_sqr:
+
+	mov	rbp,rax
+	mul	r8
+	mov	r9,rax
+DB	102,72,15,110,205
+	mov	rax,r14
+	mov	r10,rdx
+
+	mul	r8
+	add	r10,rax
+	mov	rax,r15
+DB	102,73,15,110,214
+	adc	rdx,0
+	mov	r11,rdx
+
+	mul	r8
+	add	r11,rax
+	mov	rax,r15
+DB	102,73,15,110,223
+	adc	rdx,0
+	mov	r12,rdx
+
+
+	mul	r14
+	mov	r13,rax
+	mov	rax,r14
+	mov	r14,rdx
+
+
+	mul	rbp
+	add	r11,rax
+	mov	rax,r15
+	adc	rdx,0
+	mov	r15,rdx
+
+	mul	rbp
+	add	r12,rax
+	adc	rdx,0
+
+	add	r12,r15
+	adc	r13,rdx
+	adc	r14,0
+
+
+	xor	r15,r15
+	mov	rax,r8
+	add	r9,r9
+	adc	r10,r10
+	adc	r11,r11
+	adc	r12,r12
+	adc	r13,r13
+	adc	r14,r14
+	adc	r15,0
+
+
+	mul	rax
+	mov	r8,rax
+DB	102,72,15,126,200
+	mov	rbp,rdx
+
+	mul	rax
+	add	r9,rbp
+	adc	r10,rax
+DB	102,72,15,126,208
+	adc	rdx,0
+	mov	rbp,rdx
+
+	mul	rax
+	add	r11,rbp
+	adc	r12,rax
+DB	102,72,15,126,216
+	adc	rdx,0
+	mov	rbp,rdx
+
+	mov	rcx,r8
+	imul	r8,QWORD[32+rsi]
+
+	mul	rax
+	add	r13,rbp
+	adc	r14,rax
+	mov	rax,QWORD[rsi]
+	adc	r15,rdx
+
+
+	mul	r8
+	mov	rbp,r8
+	add	rcx,rax
+	mov	rax,QWORD[8+rsi]
+	adc	rcx,rdx
+
+	sub	r10,r8
+	sbb	rbp,0
+
+	mul	r8
+	add	r9,rcx
+	adc	rdx,0
+	add	r9,rax
+	mov	rax,r8
+	adc	r10,rdx
+	mov	rdx,r8
+	adc	rbp,0
+
+	mov	rcx,r9
+	imul	r9,QWORD[32+rsi]
+
+	shl	rax,32
+	shr	rdx,32
+	sub	r11,rax
+	mov	rax,QWORD[rsi]
+	sbb	r8,rdx
+
+	add	r11,rbp
+	adc	r8,0
+
+
+	mul	r9
+	mov	rbp,r9
+	add	rcx,rax
+	mov	rax,QWORD[8+rsi]
+	adc	rcx,rdx
+
+	sub	r11,r9
+	sbb	rbp,0
+
+	mul	r9
+	add	r10,rcx
+	adc	rdx,0
+	add	r10,rax
+	mov	rax,r9
+	adc	r11,rdx
+	mov	rdx,r9
+	adc	rbp,0
+
+	mov	rcx,r10
+	imul	r10,QWORD[32+rsi]
+
+	shl	rax,32
+	shr	rdx,32
+	sub	r8,rax
+	mov	rax,QWORD[rsi]
+	sbb	r9,rdx
+
+	add	r8,rbp
+	adc	r9,0
+
+
+	mul	r10
+	mov	rbp,r10
+	add	rcx,rax
+	mov	rax,QWORD[8+rsi]
+	adc	rcx,rdx
+
+	sub	r8,r10
+	sbb	rbp,0
+
+	mul	r10
+	add	r11,rcx
+	adc	rdx,0
+	add	r11,rax
+	mov	rax,r10
+	adc	r8,rdx
+	mov	rdx,r10
+	adc	rbp,0
+
+	mov	rcx,r11
+	imul	r11,QWORD[32+rsi]
+
+	shl	rax,32
+	shr	rdx,32
+	sub	r9,rax
+	mov	rax,QWORD[rsi]
+	sbb	r10,rdx
+
+	add	r9,rbp
+	adc	r10,0
+
+
+	mul	r11
+	mov	rbp,r11
+	add	rcx,rax
+	mov	rax,QWORD[8+rsi]
+	adc	rcx,rdx
+
+	sub	r9,r11
+	sbb	rbp,0
+
+	mul	r11
+	add	r8,rcx
+	adc	rdx,0
+	add	r8,rax
+	mov	rax,r11
+	adc	r9,rdx
+	mov	rdx,r11
+	adc	rbp,0
+
+	shl	rax,32
+	shr	rdx,32
+	sub	r10,rax
+	sbb	r11,rdx
+
+	add	r10,rbp
+	adc	r11,0
+
+
+	xor	rdx,rdx
+	add	r8,r12
+	adc	r9,r13
+	mov	r12,r8
+	adc	r10,r14
+	adc	r11,r15
+	mov	rax,r9
+	adc	rdx,0
+
+
+	sub	r8,QWORD[rsi]
+	mov	r14,r10
+	sbb	r9,QWORD[8+rsi]
+	sbb	r10,QWORD[16+rsi]
+	mov	r15,r11
+	sbb	r11,QWORD[24+rsi]
+	sbb	rdx,0
+
+	cmovc	r8,r12
+	cmovnc	rax,r9
+	cmovnc	r14,r10
+	cmovnc	r15,r11
+
+	dec	rbx
+	jnz	NEAR $L$oop_ord_sqr
+
+	mov	QWORD[rdi],r8
+	mov	QWORD[8+rdi],rax
+	pxor	xmm1,xmm1
+	mov	QWORD[16+rdi],r14
+	pxor	xmm2,xmm2
+	mov	QWORD[24+rdi],r15
+	pxor	xmm3,xmm3
+
+	mov	r15,QWORD[rsp]
+
+	mov	r14,QWORD[8+rsp]
+
+	mov	r13,QWORD[16+rsp]
+
+	mov	r12,QWORD[24+rsp]
+
+	mov	rbx,QWORD[32+rsp]
+
+	mov	rbp,QWORD[40+rsp]
+
+	lea	rsp,[48+rsp]
+
+$L$ord_sqr_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_ecp_nistz256_ord_sqr_mont:
+
+
+ALIGN	32
+ecp_nistz256_ord_mul_montx:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_ecp_nistz256_ord_mul_montx:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+
+$L$ecp_nistz256_ord_mul_montx:
+	push	rbp
+
+	push	rbx
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+$L$ord_mulx_body:
+
+	mov	rbx,rdx
+	mov	rdx,QWORD[rdx]
+	mov	r9,QWORD[rsi]
+	mov	r10,QWORD[8+rsi]
+	mov	r11,QWORD[16+rsi]
+	mov	r12,QWORD[24+rsi]
+	lea	rsi,[((-128))+rsi]
+	lea	r14,[(($L$ord-128))]
+	mov	r15,QWORD[$L$ordK]
+
+
+	mulx	r9,r8,r9
+	mulx	r10,rcx,r10
+	mulx	r11,rbp,r11
+	add	r9,rcx
+	mulx	r12,rcx,r12
+	mov	rdx,r8
+	mulx	rax,rdx,r15
+	adc	r10,rbp
+	adc	r11,rcx
+	adc	r12,0
+
+
+	xor	r13,r13
+	mulx	rbp,rcx,QWORD[((0+128))+r14]
+	adcx	r8,rcx
+	adox	r9,rbp
+
+	mulx	rbp,rcx,QWORD[((8+128))+r14]
+	adcx	r9,rcx
+	adox	r10,rbp
+
+	mulx	rbp,rcx,QWORD[((16+128))+r14]
+	adcx	r10,rcx
+	adox	r11,rbp
+
+	mulx	rbp,rcx,QWORD[((24+128))+r14]
+	mov	rdx,QWORD[8+rbx]
+	adcx	r11,rcx
+	adox	r12,rbp
+	adcx	r12,r8
+	adox	r13,r8
+	adc	r13,0
+
+
+	mulx	rbp,rcx,QWORD[((0+128))+rsi]
+	adcx	r9,rcx
+	adox	r10,rbp
+
+	mulx	rbp,rcx,QWORD[((8+128))+rsi]
+	adcx	r10,rcx
+	adox	r11,rbp
+
+	mulx	rbp,rcx,QWORD[((16+128))+rsi]
+	adcx	r11,rcx
+	adox	r12,rbp
+
+	mulx	rbp,rcx,QWORD[((24+128))+rsi]
+	mov	rdx,r9
+	mulx	rax,rdx,r15
+	adcx	r12,rcx
+	adox	r13,rbp
+
+	adcx	r13,r8
+	adox	r8,r8
+	adc	r8,0
+
+
+	mulx	rbp,rcx,QWORD[((0+128))+r14]
+	adcx	r9,rcx
+	adox	r10,rbp
+
+	mulx	rbp,rcx,QWORD[((8+128))+r14]
+	adcx	r10,rcx
+	adox	r11,rbp
+
+	mulx	rbp,rcx,QWORD[((16+128))+r14]
+	adcx	r11,rcx
+	adox	r12,rbp
+
+	mulx	rbp,rcx,QWORD[((24+128))+r14]
+	mov	rdx,QWORD[16+rbx]
+	adcx	r12,rcx
+	adox	r13,rbp
+	adcx	r13,r9
+	adox	r8,r9
+	adc	r8,0
+
+
+	mulx	rbp,rcx,QWORD[((0+128))+rsi]
+	adcx	r10,rcx
+	adox	r11,rbp
+
+	mulx	rbp,rcx,QWORD[((8+128))+rsi]
+	adcx	r11,rcx
+	adox	r12,rbp
+
+	mulx	rbp,rcx,QWORD[((16+128))+rsi]
+	adcx	r12,rcx
+	adox	r13,rbp
+
+	mulx	rbp,rcx,QWORD[((24+128))+rsi]
+	mov	rdx,r10
+	mulx	rax,rdx,r15
+	adcx	r13,rcx
+	adox	r8,rbp
+
+	adcx	r8,r9
+	adox	r9,r9
+	adc	r9,0
+
+
+	mulx	rbp,rcx,QWORD[((0+128))+r14]
+	adcx	r10,rcx
+	adox	r11,rbp
+
+	mulx	rbp,rcx,QWORD[((8+128))+r14]
+	adcx	r11,rcx
+	adox	r12,rbp
+
+	mulx	rbp,rcx,QWORD[((16+128))+r14]
+	adcx	r12,rcx
+	adox	r13,rbp
+
+	mulx	rbp,rcx,QWORD[((24+128))+r14]
+	mov	rdx,QWORD[24+rbx]
+	adcx	r13,rcx
+	adox	r8,rbp
+	adcx	r8,r10
+	adox	r9,r10
+	adc	r9,0
+
+
+	mulx	rbp,rcx,QWORD[((0+128))+rsi]
+	adcx	r11,rcx
+	adox	r12,rbp
+
+	mulx	rbp,rcx,QWORD[((8+128))+rsi]
+	adcx	r12,rcx
+	adox	r13,rbp
+
+	mulx	rbp,rcx,QWORD[((16+128))+rsi]
+	adcx	r13,rcx
+	adox	r8,rbp
+
+	mulx	rbp,rcx,QWORD[((24+128))+rsi]
+	mov	rdx,r11
+	mulx	rax,rdx,r15
+	adcx	r8,rcx
+	adox	r9,rbp
+
+	adcx	r9,r10
+	adox	r10,r10
+	adc	r10,0
+
+
+	mulx	rbp,rcx,QWORD[((0+128))+r14]
+	adcx	r11,rcx
+	adox	r12,rbp
+
+	mulx	rbp,rcx,QWORD[((8+128))+r14]
+	adcx	r12,rcx
+	adox	r13,rbp
+
+	mulx	rbp,rcx,QWORD[((16+128))+r14]
+	adcx	r13,rcx
+	adox	r8,rbp
+
+	mulx	rbp,rcx,QWORD[((24+128))+r14]
+	lea	r14,[128+r14]
+	mov	rbx,r12
+	adcx	r8,rcx
+	adox	r9,rbp
+	mov	rdx,r13
+	adcx	r9,r11
+	adox	r10,r11
+	adc	r10,0
+
+
+
+	mov	rcx,r8
+	sub	r12,QWORD[r14]
+	sbb	r13,QWORD[8+r14]
+	sbb	r8,QWORD[16+r14]
+	mov	rbp,r9
+	sbb	r9,QWORD[24+r14]
+	sbb	r10,0
+
+	cmovc	r12,rbx
+	cmovc	r13,rdx
+	cmovc	r8,rcx
+	cmovc	r9,rbp
+
+	mov	QWORD[rdi],r12
+	mov	QWORD[8+rdi],r13
+	mov	QWORD[16+rdi],r8
+	mov	QWORD[24+rdi],r9
+
+	mov	r15,QWORD[rsp]
+
+	mov	r14,QWORD[8+rsp]
+
+	mov	r13,QWORD[16+rsp]
+
+	mov	r12,QWORD[24+rsp]
+
+	mov	rbx,QWORD[32+rsp]
+
+	mov	rbp,QWORD[40+rsp]
+
+	lea	rsp,[48+rsp]
+
+$L$ord_mulx_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_ecp_nistz256_ord_mul_montx:
+
+
+ALIGN	32
+ecp_nistz256_ord_sqr_montx:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_ecp_nistz256_ord_sqr_montx:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+
+$L$ecp_nistz256_ord_sqr_montx:
+	push	rbp
+
+	push	rbx
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+$L$ord_sqrx_body:
+
+	mov	rbx,rdx
+	mov	rdx,QWORD[rsi]
+	mov	r14,QWORD[8+rsi]
+	mov	r15,QWORD[16+rsi]
+	mov	r8,QWORD[24+rsi]
+	lea	rsi,[$L$ord]
+	jmp	NEAR $L$oop_ord_sqrx
+
+ALIGN	32
+$L$oop_ord_sqrx:
+	mulx	r10,r9,r14
+	mulx	r11,rcx,r15
+	mov	rax,rdx
+DB	102,73,15,110,206
+	mulx	r12,rbp,r8
+	mov	rdx,r14
+	add	r10,rcx
+DB	102,73,15,110,215
+	adc	r11,rbp
+	adc	r12,0
+	xor	r13,r13
+
+	mulx	rbp,rcx,r15
+	adcx	r11,rcx
+	adox	r12,rbp
+
+	mulx	rbp,rcx,r8
+	mov	rdx,r15
+	adcx	r12,rcx
+	adox	r13,rbp
+	adc	r13,0
+
+	mulx	r14,rcx,r8
+	mov	rdx,rax
+DB	102,73,15,110,216
+	xor	r15,r15
+	adcx	r9,r9
+	adox	r13,rcx
+	adcx	r10,r10
+	adox	r14,r15
+
+
+	mulx	rbp,r8,rdx
+DB	102,72,15,126,202
+	adcx	r11,r11
+	adox	r9,rbp
+	adcx	r12,r12
+	mulx	rax,rcx,rdx
+DB	102,72,15,126,210
+	adcx	r13,r13
+	adox	r10,rcx
+	adcx	r14,r14
+	mulx	rbp,rcx,rdx
+DB	0x67
+DB	102,72,15,126,218
+	adox	r11,rax
+	adcx	r15,r15
+	adox	r12,rcx
+	adox	r13,rbp
+	mulx	rax,rcx,rdx
+	adox	r14,rcx
+	adox	r15,rax
+
+
+	mov	rdx,r8
+	mulx	rcx,rdx,QWORD[32+rsi]
+
+	xor	rax,rax
+	mulx	rbp,rcx,QWORD[rsi]
+	adcx	r8,rcx
+	adox	r9,rbp
+	mulx	rbp,rcx,QWORD[8+rsi]
+	adcx	r9,rcx
+	adox	r10,rbp
+	mulx	rbp,rcx,QWORD[16+rsi]
+	adcx	r10,rcx
+	adox	r11,rbp
+	mulx	rbp,rcx,QWORD[24+rsi]
+	adcx	r11,rcx
+	adox	r8,rbp
+	adcx	r8,rax
+
+
+	mov	rdx,r9
+	mulx	rcx,rdx,QWORD[32+rsi]
+
+	mulx	rbp,rcx,QWORD[rsi]
+	adox	r9,rcx
+	adcx	r10,rbp
+	mulx	rbp,rcx,QWORD[8+rsi]
+	adox	r10,rcx
+	adcx	r11,rbp
+	mulx	rbp,rcx,QWORD[16+rsi]
+	adox	r11,rcx
+	adcx	r8,rbp
+	mulx	rbp,rcx,QWORD[24+rsi]
+	adox	r8,rcx
+	adcx	r9,rbp
+	adox	r9,rax
+
+
+	mov	rdx,r10
+	mulx	rcx,rdx,QWORD[32+rsi]
+
+	mulx	rbp,rcx,QWORD[rsi]
+	adcx	r10,rcx
+	adox	r11,rbp
+	mulx	rbp,rcx,QWORD[8+rsi]
+	adcx	r11,rcx
+	adox	r8,rbp
+	mulx	rbp,rcx,QWORD[16+rsi]
+	adcx	r8,rcx
+	adox	r9,rbp
+	mulx	rbp,rcx,QWORD[24+rsi]
+	adcx	r9,rcx
+	adox	r10,rbp
+	adcx	r10,rax
+
+
+	mov	rdx,r11
+	mulx	rcx,rdx,QWORD[32+rsi]
+
+	mulx	rbp,rcx,QWORD[rsi]
+	adox	r11,rcx
+	adcx	r8,rbp
+	mulx	rbp,rcx,QWORD[8+rsi]
+	adox	r8,rcx
+	adcx	r9,rbp
+	mulx	rbp,rcx,QWORD[16+rsi]
+	adox	r9,rcx
+	adcx	r10,rbp
+	mulx	rbp,rcx,QWORD[24+rsi]
+	adox	r10,rcx
+	adcx	r11,rbp
+	adox	r11,rax
+
+
+	add	r12,r8
+	adc	r9,r13
+	mov	rdx,r12
+	adc	r10,r14
+	adc	r11,r15
+	mov	r14,r9
+	adc	rax,0
+
+
+	sub	r12,QWORD[rsi]
+	mov	r15,r10
+	sbb	r9,QWORD[8+rsi]
+	sbb	r10,QWORD[16+rsi]
+	mov	r8,r11
+	sbb	r11,QWORD[24+rsi]
+	sbb	rax,0
+
+	cmovnc	rdx,r12
+	cmovnc	r14,r9
+	cmovnc	r15,r10
+	cmovnc	r8,r11
+
+	dec	rbx
+	jnz	NEAR $L$oop_ord_sqrx
+
+	mov	QWORD[rdi],rdx
+	mov	QWORD[8+rdi],r14
+	pxor	xmm1,xmm1
+	mov	QWORD[16+rdi],r15
+	pxor	xmm2,xmm2
+	mov	QWORD[24+rdi],r8
+	pxor	xmm3,xmm3
+
+	mov	r15,QWORD[rsp]
+
+	mov	r14,QWORD[8+rsp]
+
+	mov	r13,QWORD[16+rsp]
+
+	mov	r12,QWORD[24+rsp]
+
+	mov	rbx,QWORD[32+rsp]
+
+	mov	rbp,QWORD[40+rsp]
+
+	lea	rsp,[48+rsp]
+
+$L$ord_sqrx_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_ecp_nistz256_ord_sqr_montx:
+
+
+
+
+
+
+global	ecp_nistz256_mul_mont
+
+ALIGN	32
+ecp_nistz256_mul_mont:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_ecp_nistz256_mul_mont:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+
+	lea	rcx,[OPENSSL_ia32cap_P]
+	mov	rcx,QWORD[8+rcx]
+	and	ecx,0x80100
+$L$mul_mont:
+	push	rbp
+
+	push	rbx
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+$L$mul_body:
+	cmp	ecx,0x80100
+	je	NEAR $L$mul_montx
+	mov	rbx,rdx
+	mov	rax,QWORD[rdx]
+	mov	r9,QWORD[rsi]
+	mov	r10,QWORD[8+rsi]
+	mov	r11,QWORD[16+rsi]
+	mov	r12,QWORD[24+rsi]
+
+	call	__ecp_nistz256_mul_montq
+	jmp	NEAR $L$mul_mont_done
+
+ALIGN	32
+$L$mul_montx:
+	mov	rbx,rdx
+	mov	rdx,QWORD[rdx]
+	mov	r9,QWORD[rsi]
+	mov	r10,QWORD[8+rsi]
+	mov	r11,QWORD[16+rsi]
+	mov	r12,QWORD[24+rsi]
+	lea	rsi,[((-128))+rsi]
+
+	call	__ecp_nistz256_mul_montx
+$L$mul_mont_done:
+	mov	r15,QWORD[rsp]
+
+	mov	r14,QWORD[8+rsp]
+
+	mov	r13,QWORD[16+rsp]
+
+	mov	r12,QWORD[24+rsp]
+
+	mov	rbx,QWORD[32+rsp]
+
+	mov	rbp,QWORD[40+rsp]
+
+	lea	rsp,[48+rsp]
+
+$L$mul_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_ecp_nistz256_mul_mont:
+
+
+ALIGN	32
+__ecp_nistz256_mul_montq:
+
+
+
+	mov	rbp,rax
+	mul	r9
+	mov	r14,QWORD[(($L$poly+8))]
+	mov	r8,rax
+	mov	rax,rbp
+	mov	r9,rdx
+
+	mul	r10
+	mov	r15,QWORD[(($L$poly+24))]
+	add	r9,rax
+	mov	rax,rbp
+	adc	rdx,0
+	mov	r10,rdx
+
+	mul	r11
+	add	r10,rax
+	mov	rax,rbp
+	adc	rdx,0
+	mov	r11,rdx
+
+	mul	r12
+	add	r11,rax
+	mov	rax,r8
+	adc	rdx,0
+	xor	r13,r13
+	mov	r12,rdx
+
+
+
+
+
+
+
+
+
+
+	mov	rbp,r8
+	shl	r8,32
+	mul	r15
+	shr	rbp,32
+	add	r9,r8
+	adc	r10,rbp
+	adc	r11,rax
+	mov	rax,QWORD[8+rbx]
+	adc	r12,rdx
+	adc	r13,0
+	xor	r8,r8
+
+
+
+	mov	rbp,rax
+	mul	QWORD[rsi]
+	add	r9,rax
+	mov	rax,rbp
+	adc	rdx,0
+	mov	rcx,rdx
+
+	mul	QWORD[8+rsi]
+	add	r10,rcx
+	adc	rdx,0
+	add	r10,rax
+	mov	rax,rbp
+	adc	rdx,0
+	mov	rcx,rdx
+
+	mul	QWORD[16+rsi]
+	add	r11,rcx
+	adc	rdx,0
+	add	r11,rax
+	mov	rax,rbp
+	adc	rdx,0
+	mov	rcx,rdx
+
+	mul	QWORD[24+rsi]
+	add	r12,rcx
+	adc	rdx,0
+	add	r12,rax
+	mov	rax,r9
+	adc	r13,rdx
+	adc	r8,0
+
+
+
+	mov	rbp,r9
+	shl	r9,32
+	mul	r15
+	shr	rbp,32
+	add	r10,r9
+	adc	r11,rbp
+	adc	r12,rax
+	mov	rax,QWORD[16+rbx]
+	adc	r13,rdx
+	adc	r8,0
+	xor	r9,r9
+
+
+
+	mov	rbp,rax
+	mul	QWORD[rsi]
+	add	r10,rax
+	mov	rax,rbp
+	adc	rdx,0
+	mov	rcx,rdx
+
+	mul	QWORD[8+rsi]
+	add	r11,rcx
+	adc	rdx,0
+	add	r11,rax
+	mov	rax,rbp
+	adc	rdx,0
+	mov	rcx,rdx
+
+	mul	QWORD[16+rsi]
+	add	r12,rcx
+	adc	rdx,0
+	add	r12,rax
+	mov	rax,rbp
+	adc	rdx,0
+	mov	rcx,rdx
+
+	mul	QWORD[24+rsi]
+	add	r13,rcx
+	adc	rdx,0
+	add	r13,rax
+	mov	rax,r10
+	adc	r8,rdx
+	adc	r9,0
+
+
+
+	mov	rbp,r10
+	shl	r10,32
+	mul	r15
+	shr	rbp,32
+	add	r11,r10
+	adc	r12,rbp
+	adc	r13,rax
+	mov	rax,QWORD[24+rbx]
+	adc	r8,rdx
+	adc	r9,0
+	xor	r10,r10
+
+
+
+	mov	rbp,rax
+	mul	QWORD[rsi]
+	add	r11,rax
+	mov	rax,rbp
+	adc	rdx,0
+	mov	rcx,rdx
+
+	mul	QWORD[8+rsi]
+	add	r12,rcx
+	adc	rdx,0
+	add	r12,rax
+	mov	rax,rbp
+	adc	rdx,0
+	mov	rcx,rdx
+
+	mul	QWORD[16+rsi]
+	add	r13,rcx
+	adc	rdx,0
+	add	r13,rax
+	mov	rax,rbp
+	adc	rdx,0
+	mov	rcx,rdx
+
+	mul	QWORD[24+rsi]
+	add	r8,rcx
+	adc	rdx,0
+	add	r8,rax
+	mov	rax,r11
+	adc	r9,rdx
+	adc	r10,0
+
+
+
+	mov	rbp,r11
+	shl	r11,32
+	mul	r15
+	shr	rbp,32
+	add	r12,r11
+	adc	r13,rbp
+	mov	rcx,r12
+	adc	r8,rax
+	adc	r9,rdx
+	mov	rbp,r13
+	adc	r10,0
+
+
+
+	sub	r12,-1
+	mov	rbx,r8
+	sbb	r13,r14
+	sbb	r8,0
+	mov	rdx,r9
+	sbb	r9,r15
+	sbb	r10,0
+
+	cmovc	r12,rcx
+	cmovc	r13,rbp
+	mov	QWORD[rdi],r12
+	cmovc	r8,rbx
+	mov	QWORD[8+rdi],r13
+	cmovc	r9,rdx
+	mov	QWORD[16+rdi],r8
+	mov	QWORD[24+rdi],r9
+
+	DB	0F3h,0C3h		;repret
+
+
+
+
+
+
+
+
+
+
+global	ecp_nistz256_sqr_mont
+
+ALIGN	32
+ecp_nistz256_sqr_mont:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_ecp_nistz256_sqr_mont:
+	mov	rdi,rcx
+	mov	rsi,rdx
+
+
+
+	lea	rcx,[OPENSSL_ia32cap_P]
+	mov	rcx,QWORD[8+rcx]
+	and	ecx,0x80100
+	push	rbp
+
+	push	rbx
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+$L$sqr_body:
+	cmp	ecx,0x80100
+	je	NEAR $L$sqr_montx
+	mov	rax,QWORD[rsi]
+	mov	r14,QWORD[8+rsi]
+	mov	r15,QWORD[16+rsi]
+	mov	r8,QWORD[24+rsi]
+
+	call	__ecp_nistz256_sqr_montq
+	jmp	NEAR $L$sqr_mont_done
+
+ALIGN	32
+$L$sqr_montx:
+	mov	rdx,QWORD[rsi]
+	mov	r14,QWORD[8+rsi]
+	mov	r15,QWORD[16+rsi]
+	mov	r8,QWORD[24+rsi]
+	lea	rsi,[((-128))+rsi]
+
+	call	__ecp_nistz256_sqr_montx
+$L$sqr_mont_done:
+	mov	r15,QWORD[rsp]
+
+	mov	r14,QWORD[8+rsp]
+
+	mov	r13,QWORD[16+rsp]
+
+	mov	r12,QWORD[24+rsp]
+
+	mov	rbx,QWORD[32+rsp]
+
+	mov	rbp,QWORD[40+rsp]
+
+	lea	rsp,[48+rsp]
+
+$L$sqr_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_ecp_nistz256_sqr_mont:
+
+
+ALIGN	32
+__ecp_nistz256_sqr_montq:
+
+	mov	r13,rax
+	mul	r14
+	mov	r9,rax
+	mov	rax,r15
+	mov	r10,rdx
+
+	mul	r13
+	add	r10,rax
+	mov	rax,r8
+	adc	rdx,0
+	mov	r11,rdx
+
+	mul	r13
+	add	r11,rax
+	mov	rax,r15
+	adc	rdx,0
+	mov	r12,rdx
+
+
+	mul	r14
+	add	r11,rax
+	mov	rax,r8
+	adc	rdx,0
+	mov	rbp,rdx
+
+	mul	r14
+	add	r12,rax
+	mov	rax,r8
+	adc	rdx,0
+	add	r12,rbp
+	mov	r13,rdx
+	adc	r13,0
+
+
+	mul	r15
+	xor	r15,r15
+	add	r13,rax
+	mov	rax,QWORD[rsi]
+	mov	r14,rdx
+	adc	r14,0
+
+	add	r9,r9
+	adc	r10,r10
+	adc	r11,r11
+	adc	r12,r12
+	adc	r13,r13
+	adc	r14,r14
+	adc	r15,0
+
+	mul	rax
+	mov	r8,rax
+	mov	rax,QWORD[8+rsi]
+	mov	rcx,rdx
+
+	mul	rax
+	add	r9,rcx
+	adc	r10,rax
+	mov	rax,QWORD[16+rsi]
+	adc	rdx,0
+	mov	rcx,rdx
+
+	mul	rax
+	add	r11,rcx
+	adc	r12,rax
+	mov	rax,QWORD[24+rsi]
+	adc	rdx,0
+	mov	rcx,rdx
+
+	mul	rax
+	add	r13,rcx
+	adc	r14,rax
+	mov	rax,r8
+	adc	r15,rdx
+
+	mov	rsi,QWORD[(($L$poly+8))]
+	mov	rbp,QWORD[(($L$poly+24))]
+
+
+
+
+	mov	rcx,r8
+	shl	r8,32
+	mul	rbp
+	shr	rcx,32
+	add	r9,r8
+	adc	r10,rcx
+	adc	r11,rax
+	mov	rax,r9
+	adc	rdx,0
+
+
+
+	mov	rcx,r9
+	shl	r9,32
+	mov	r8,rdx
+	mul	rbp
+	shr	rcx,32
+	add	r10,r9
+	adc	r11,rcx
+	adc	r8,rax
+	mov	rax,r10
+	adc	rdx,0
+
+
+
+	mov	rcx,r10
+	shl	r10,32
+	mov	r9,rdx
+	mul	rbp
+	shr	rcx,32
+	add	r11,r10
+	adc	r8,rcx
+	adc	r9,rax
+	mov	rax,r11
+	adc	rdx,0
+
+
+
+	mov	rcx,r11
+	shl	r11,32
+	mov	r10,rdx
+	mul	rbp
+	shr	rcx,32
+	add	r8,r11
+	adc	r9,rcx
+	adc	r10,rax
+	adc	rdx,0
+	xor	r11,r11
+
+
+
+	add	r12,r8
+	adc	r13,r9
+	mov	r8,r12
+	adc	r14,r10
+	adc	r15,rdx
+	mov	r9,r13
+	adc	r11,0
+
+	sub	r12,-1
+	mov	r10,r14
+	sbb	r13,rsi
+	sbb	r14,0
+	mov	rcx,r15
+	sbb	r15,rbp
+	sbb	r11,0
+
+	cmovc	r12,r8
+	cmovc	r13,r9
+	mov	QWORD[rdi],r12
+	cmovc	r14,r10
+	mov	QWORD[8+rdi],r13
+	cmovc	r15,rcx
+	mov	QWORD[16+rdi],r14
+	mov	QWORD[24+rdi],r15
+
+	DB	0F3h,0C3h		;repret
+
+
+
+ALIGN	32
+__ecp_nistz256_mul_montx:
+
+
+
+	mulx	r9,r8,r9
+	mulx	r10,rcx,r10
+	mov	r14,32
+	xor	r13,r13
+	mulx	r11,rbp,r11
+	mov	r15,QWORD[(($L$poly+24))]
+	adc	r9,rcx
+	mulx	r12,rcx,r12
+	mov	rdx,r8
+	adc	r10,rbp
+	shlx	rbp,r8,r14
+	adc	r11,rcx
+	shrx	rcx,r8,r14
+	adc	r12,0
+
+
+
+	add	r9,rbp
+	adc	r10,rcx
+
+	mulx	rbp,rcx,r15
+	mov	rdx,QWORD[8+rbx]
+	adc	r11,rcx
+	adc	r12,rbp
+	adc	r13,0
+	xor	r8,r8
+
+
+
+	mulx	rbp,rcx,QWORD[((0+128))+rsi]
+	adcx	r9,rcx
+	adox	r10,rbp
+
+	mulx	rbp,rcx,QWORD[((8+128))+rsi]
+	adcx	r10,rcx
+	adox	r11,rbp
+
+	mulx	rbp,rcx,QWORD[((16+128))+rsi]
+	adcx	r11,rcx
+	adox	r12,rbp
+
+	mulx	rbp,rcx,QWORD[((24+128))+rsi]
+	mov	rdx,r9
+	adcx	r12,rcx
+	shlx	rcx,r9,r14
+	adox	r13,rbp
+	shrx	rbp,r9,r14
+
+	adcx	r13,r8
+	adox	r8,r8
+	adc	r8,0
+
+
+
+	add	r10,rcx
+	adc	r11,rbp
+
+	mulx	rbp,rcx,r15
+	mov	rdx,QWORD[16+rbx]
+	adc	r12,rcx
+	adc	r13,rbp
+	adc	r8,0
+	xor	r9,r9
+
+
+
+	mulx	rbp,rcx,QWORD[((0+128))+rsi]
+	adcx	r10,rcx
+	adox	r11,rbp
+
+	mulx	rbp,rcx,QWORD[((8+128))+rsi]
+	adcx	r11,rcx
+	adox	r12,rbp
+
+	mulx	rbp,rcx,QWORD[((16+128))+rsi]
+	adcx	r12,rcx
+	adox	r13,rbp
+
+	mulx	rbp,rcx,QWORD[((24+128))+rsi]
+	mov	rdx,r10
+	adcx	r13,rcx
+	shlx	rcx,r10,r14
+	adox	r8,rbp
+	shrx	rbp,r10,r14
+
+	adcx	r8,r9
+	adox	r9,r9
+	adc	r9,0
+
+
+
+	add	r11,rcx
+	adc	r12,rbp
+
+	mulx	rbp,rcx,r15
+	mov	rdx,QWORD[24+rbx]
+	adc	r13,rcx
+	adc	r8,rbp
+	adc	r9,0
+	xor	r10,r10
+
+
+
+	mulx	rbp,rcx,QWORD[((0+128))+rsi]
+	adcx	r11,rcx
+	adox	r12,rbp
+
+	mulx	rbp,rcx,QWORD[((8+128))+rsi]
+	adcx	r12,rcx
+	adox	r13,rbp
+
+	mulx	rbp,rcx,QWORD[((16+128))+rsi]
+	adcx	r13,rcx
+	adox	r8,rbp
+
+	mulx	rbp,rcx,QWORD[((24+128))+rsi]
+	mov	rdx,r11
+	adcx	r8,rcx
+	shlx	rcx,r11,r14
+	adox	r9,rbp
+	shrx	rbp,r11,r14
+
+	adcx	r9,r10
+	adox	r10,r10
+	adc	r10,0
+
+
+
+	add	r12,rcx
+	adc	r13,rbp
+
+	mulx	rbp,rcx,r15
+	mov	rbx,r12
+	mov	r14,QWORD[(($L$poly+8))]
+	adc	r8,rcx
+	mov	rdx,r13
+	adc	r9,rbp
+	adc	r10,0
+
+
+
+	xor	eax,eax
+	mov	rcx,r8
+	sbb	r12,-1
+	sbb	r13,r14
+	sbb	r8,0
+	mov	rbp,r9
+	sbb	r9,r15
+	sbb	r10,0
+
+	cmovc	r12,rbx
+	cmovc	r13,rdx
+	mov	QWORD[rdi],r12
+	cmovc	r8,rcx
+	mov	QWORD[8+rdi],r13
+	cmovc	r9,rbp
+	mov	QWORD[16+rdi],r8
+	mov	QWORD[24+rdi],r9
+
+	DB	0F3h,0C3h		;repret
+
+
+
+
+ALIGN	32
+__ecp_nistz256_sqr_montx:
+
+	mulx	r10,r9,r14
+	mulx	r11,rcx,r15
+	xor	eax,eax
+	adc	r10,rcx
+	mulx	r12,rbp,r8
+	mov	rdx,r14
+	adc	r11,rbp
+	adc	r12,0
+	xor	r13,r13
+
+
+	mulx	rbp,rcx,r15
+	adcx	r11,rcx
+	adox	r12,rbp
+
+	mulx	rbp,rcx,r8
+	mov	rdx,r15
+	adcx	r12,rcx
+	adox	r13,rbp
+	adc	r13,0
+
+
+	mulx	r14,rcx,r8
+	mov	rdx,QWORD[((0+128))+rsi]
+	xor	r15,r15
+	adcx	r9,r9
+	adox	r13,rcx
+	adcx	r10,r10
+	adox	r14,r15
+
+	mulx	rbp,r8,rdx
+	mov	rdx,QWORD[((8+128))+rsi]
+	adcx	r11,r11
+	adox	r9,rbp
+	adcx	r12,r12
+	mulx	rax,rcx,rdx
+	mov	rdx,QWORD[((16+128))+rsi]
+	adcx	r13,r13
+	adox	r10,rcx
+	adcx	r14,r14
+DB	0x67
+	mulx	rbp,rcx,rdx
+	mov	rdx,QWORD[((24+128))+rsi]
+	adox	r11,rax
+	adcx	r15,r15
+	adox	r12,rcx
+	mov	rsi,32
+	adox	r13,rbp
+DB	0x67,0x67
+	mulx	rax,rcx,rdx
+	mov	rdx,QWORD[(($L$poly+24))]
+	adox	r14,rcx
+	shlx	rcx,r8,rsi
+	adox	r15,rax
+	shrx	rax,r8,rsi
+	mov	rbp,rdx
+
+
+	add	r9,rcx
+	adc	r10,rax
+
+	mulx	r8,rcx,r8
+	adc	r11,rcx
+	shlx	rcx,r9,rsi
+	adc	r8,0
+	shrx	rax,r9,rsi
+
+
+	add	r10,rcx
+	adc	r11,rax
+
+	mulx	r9,rcx,r9
+	adc	r8,rcx
+	shlx	rcx,r10,rsi
+	adc	r9,0
+	shrx	rax,r10,rsi
+
+
+	add	r11,rcx
+	adc	r8,rax
+
+	mulx	r10,rcx,r10
+	adc	r9,rcx
+	shlx	rcx,r11,rsi
+	adc	r10,0
+	shrx	rax,r11,rsi
+
+
+	add	r8,rcx
+	adc	r9,rax
+
+	mulx	r11,rcx,r11
+	adc	r10,rcx
+	adc	r11,0
+
+	xor	rdx,rdx
+	add	r12,r8
+	mov	rsi,QWORD[(($L$poly+8))]
+	adc	r13,r9
+	mov	r8,r12
+	adc	r14,r10
+	adc	r15,r11
+	mov	r9,r13
+	adc	rdx,0
+
+	sub	r12,-1
+	mov	r10,r14
+	sbb	r13,rsi
+	sbb	r14,0
+	mov	r11,r15
+	sbb	r15,rbp
+	sbb	rdx,0
+
+	cmovc	r12,r8
+	cmovc	r13,r9
+	mov	QWORD[rdi],r12
+	cmovc	r14,r10
+	mov	QWORD[8+rdi],r13
+	cmovc	r15,r11
+	mov	QWORD[16+rdi],r14
+	mov	QWORD[24+rdi],r15
+
+	DB	0F3h,0C3h		;repret
+
+
+
+
+global	ecp_nistz256_select_w5
+
+ALIGN	32
+ecp_nistz256_select_w5:
+
+	lea	rax,[OPENSSL_ia32cap_P]
+	mov	rax,QWORD[8+rax]
+	test	eax,32
+	jnz	NEAR $L$avx2_select_w5
+	lea	rax,[((-136))+rsp]
+$L$SEH_begin_ecp_nistz256_select_w5:
+DB	0x48,0x8d,0x60,0xe0
+DB	0x0f,0x29,0x70,0xe0
+DB	0x0f,0x29,0x78,0xf0
+DB	0x44,0x0f,0x29,0x00
+DB	0x44,0x0f,0x29,0x48,0x10
+DB	0x44,0x0f,0x29,0x50,0x20
+DB	0x44,0x0f,0x29,0x58,0x30
+DB	0x44,0x0f,0x29,0x60,0x40
+DB	0x44,0x0f,0x29,0x68,0x50
+DB	0x44,0x0f,0x29,0x70,0x60
+DB	0x44,0x0f,0x29,0x78,0x70
+	movdqa	xmm0,XMMWORD[$L$One]
+	movd	xmm1,r8d
+
+	pxor	xmm2,xmm2
+	pxor	xmm3,xmm3
+	pxor	xmm4,xmm4
+	pxor	xmm5,xmm5
+	pxor	xmm6,xmm6
+	pxor	xmm7,xmm7
+
+	movdqa	xmm8,xmm0
+	pshufd	xmm1,xmm1,0
+
+	mov	rax,16
+$L$select_loop_sse_w5:
+
+	movdqa	xmm15,xmm8
+	paddd	xmm8,xmm0
+	pcmpeqd	xmm15,xmm1
+
+	movdqa	xmm9,XMMWORD[rdx]
+	movdqa	xmm10,XMMWORD[16+rdx]
+	movdqa	xmm11,XMMWORD[32+rdx]
+	movdqa	xmm12,XMMWORD[48+rdx]
+	movdqa	xmm13,XMMWORD[64+rdx]
+	movdqa	xmm14,XMMWORD[80+rdx]
+	lea	rdx,[96+rdx]
+
+	pand	xmm9,xmm15
+	pand	xmm10,xmm15
+	por	xmm2,xmm9
+	pand	xmm11,xmm15
+	por	xmm3,xmm10
+	pand	xmm12,xmm15
+	por	xmm4,xmm11
+	pand	xmm13,xmm15
+	por	xmm5,xmm12
+	pand	xmm14,xmm15
+	por	xmm6,xmm13
+	por	xmm7,xmm14
+
+	dec	rax
+	jnz	NEAR $L$select_loop_sse_w5
+
+	movdqu	XMMWORD[rcx],xmm2
+	movdqu	XMMWORD[16+rcx],xmm3
+	movdqu	XMMWORD[32+rcx],xmm4
+	movdqu	XMMWORD[48+rcx],xmm5
+	movdqu	XMMWORD[64+rcx],xmm6
+	movdqu	XMMWORD[80+rcx],xmm7
+	movaps	xmm6,XMMWORD[rsp]
+	movaps	xmm7,XMMWORD[16+rsp]
+	movaps	xmm8,XMMWORD[32+rsp]
+	movaps	xmm9,XMMWORD[48+rsp]
+	movaps	xmm10,XMMWORD[64+rsp]
+	movaps	xmm11,XMMWORD[80+rsp]
+	movaps	xmm12,XMMWORD[96+rsp]
+	movaps	xmm13,XMMWORD[112+rsp]
+	movaps	xmm14,XMMWORD[128+rsp]
+	movaps	xmm15,XMMWORD[144+rsp]
+	lea	rsp,[168+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_ecp_nistz256_select_w5:
+
+
+
+
+global	ecp_nistz256_select_w7
+
+ALIGN	32
+ecp_nistz256_select_w7:
+
+	lea	rax,[OPENSSL_ia32cap_P]
+	mov	rax,QWORD[8+rax]
+	test	eax,32
+	jnz	NEAR $L$avx2_select_w7
+	lea	rax,[((-136))+rsp]
+$L$SEH_begin_ecp_nistz256_select_w7:
+DB	0x48,0x8d,0x60,0xe0
+DB	0x0f,0x29,0x70,0xe0
+DB	0x0f,0x29,0x78,0xf0
+DB	0x44,0x0f,0x29,0x00
+DB	0x44,0x0f,0x29,0x48,0x10
+DB	0x44,0x0f,0x29,0x50,0x20
+DB	0x44,0x0f,0x29,0x58,0x30
+DB	0x44,0x0f,0x29,0x60,0x40
+DB	0x44,0x0f,0x29,0x68,0x50
+DB	0x44,0x0f,0x29,0x70,0x60
+DB	0x44,0x0f,0x29,0x78,0x70
+	movdqa	xmm8,XMMWORD[$L$One]
+	movd	xmm1,r8d
+
+	pxor	xmm2,xmm2
+	pxor	xmm3,xmm3
+	pxor	xmm4,xmm4
+	pxor	xmm5,xmm5
+
+	movdqa	xmm0,xmm8
+	pshufd	xmm1,xmm1,0
+	mov	rax,64
+
+$L$select_loop_sse_w7:
+	movdqa	xmm15,xmm8
+	paddd	xmm8,xmm0
+	movdqa	xmm9,XMMWORD[rdx]
+	movdqa	xmm10,XMMWORD[16+rdx]
+	pcmpeqd	xmm15,xmm1
+	movdqa	xmm11,XMMWORD[32+rdx]
+	movdqa	xmm12,XMMWORD[48+rdx]
+	lea	rdx,[64+rdx]
+
+	pand	xmm9,xmm15
+	pand	xmm10,xmm15
+	por	xmm2,xmm9
+	pand	xmm11,xmm15
+	por	xmm3,xmm10
+	pand	xmm12,xmm15
+	por	xmm4,xmm11
+	prefetcht0	[255+rdx]
+	por	xmm5,xmm12
+
+	dec	rax
+	jnz	NEAR $L$select_loop_sse_w7
+
+	movdqu	XMMWORD[rcx],xmm2
+	movdqu	XMMWORD[16+rcx],xmm3
+	movdqu	XMMWORD[32+rcx],xmm4
+	movdqu	XMMWORD[48+rcx],xmm5
+	movaps	xmm6,XMMWORD[rsp]
+	movaps	xmm7,XMMWORD[16+rsp]
+	movaps	xmm8,XMMWORD[32+rsp]
+	movaps	xmm9,XMMWORD[48+rsp]
+	movaps	xmm10,XMMWORD[64+rsp]
+	movaps	xmm11,XMMWORD[80+rsp]
+	movaps	xmm12,XMMWORD[96+rsp]
+	movaps	xmm13,XMMWORD[112+rsp]
+	movaps	xmm14,XMMWORD[128+rsp]
+	movaps	xmm15,XMMWORD[144+rsp]
+	lea	rsp,[168+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_ecp_nistz256_select_w7:
+
+
+
+
+ALIGN	32
+ecp_nistz256_avx2_select_w5:
+
+$L$avx2_select_w5:
+	vzeroupper
+	lea	rax,[((-136))+rsp]
+	mov	r11,rsp
+$L$SEH_begin_ecp_nistz256_avx2_select_w5:
+DB	0x48,0x8d,0x60,0xe0
+DB	0xc5,0xf8,0x29,0x70,0xe0
+DB	0xc5,0xf8,0x29,0x78,0xf0
+DB	0xc5,0x78,0x29,0x40,0x00
+DB	0xc5,0x78,0x29,0x48,0x10
+DB	0xc5,0x78,0x29,0x50,0x20
+DB	0xc5,0x78,0x29,0x58,0x30
+DB	0xc5,0x78,0x29,0x60,0x40
+DB	0xc5,0x78,0x29,0x68,0x50
+DB	0xc5,0x78,0x29,0x70,0x60
+DB	0xc5,0x78,0x29,0x78,0x70
+	vmovdqa	ymm0,YMMWORD[$L$Two]
+
+	vpxor	ymm2,ymm2,ymm2
+	vpxor	ymm3,ymm3,ymm3
+	vpxor	ymm4,ymm4,ymm4
+
+	vmovdqa	ymm5,YMMWORD[$L$One]
+	vmovdqa	ymm10,YMMWORD[$L$Two]
+
+	vmovd	xmm1,r8d
+	vpermd	ymm1,ymm2,ymm1
+
+	mov	rax,8
+$L$select_loop_avx2_w5:
+
+	vmovdqa	ymm6,YMMWORD[rdx]
+	vmovdqa	ymm7,YMMWORD[32+rdx]
+	vmovdqa	ymm8,YMMWORD[64+rdx]
+
+	vmovdqa	ymm11,YMMWORD[96+rdx]
+	vmovdqa	ymm12,YMMWORD[128+rdx]
+	vmovdqa	ymm13,YMMWORD[160+rdx]
+
+	vpcmpeqd	ymm9,ymm5,ymm1
+	vpcmpeqd	ymm14,ymm10,ymm1
+
+	vpaddd	ymm5,ymm5,ymm0
+	vpaddd	ymm10,ymm10,ymm0
+	lea	rdx,[192+rdx]
+
+	vpand	ymm6,ymm6,ymm9
+	vpand	ymm7,ymm7,ymm9
+	vpand	ymm8,ymm8,ymm9
+	vpand	ymm11,ymm11,ymm14
+	vpand	ymm12,ymm12,ymm14
+	vpand	ymm13,ymm13,ymm14
+
+	vpxor	ymm2,ymm2,ymm6
+	vpxor	ymm3,ymm3,ymm7
+	vpxor	ymm4,ymm4,ymm8
+	vpxor	ymm2,ymm2,ymm11
+	vpxor	ymm3,ymm3,ymm12
+	vpxor	ymm4,ymm4,ymm13
+
+	dec	rax
+	jnz	NEAR $L$select_loop_avx2_w5
+
+	vmovdqu	YMMWORD[rcx],ymm2
+	vmovdqu	YMMWORD[32+rcx],ymm3
+	vmovdqu	YMMWORD[64+rcx],ymm4
+	vzeroupper
+	movaps	xmm6,XMMWORD[rsp]
+	movaps	xmm7,XMMWORD[16+rsp]
+	movaps	xmm8,XMMWORD[32+rsp]
+	movaps	xmm9,XMMWORD[48+rsp]
+	movaps	xmm10,XMMWORD[64+rsp]
+	movaps	xmm11,XMMWORD[80+rsp]
+	movaps	xmm12,XMMWORD[96+rsp]
+	movaps	xmm13,XMMWORD[112+rsp]
+	movaps	xmm14,XMMWORD[128+rsp]
+	movaps	xmm15,XMMWORD[144+rsp]
+	lea	rsp,[r11]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_ecp_nistz256_avx2_select_w5:
+
+
+
+
+global	ecp_nistz256_avx2_select_w7
+
+ALIGN	32
+ecp_nistz256_avx2_select_w7:
+
+$L$avx2_select_w7:
+	vzeroupper
+	mov	r11,rsp
+	lea	rax,[((-136))+rsp]
+$L$SEH_begin_ecp_nistz256_avx2_select_w7:
+DB	0x48,0x8d,0x60,0xe0
+DB	0xc5,0xf8,0x29,0x70,0xe0
+DB	0xc5,0xf8,0x29,0x78,0xf0
+DB	0xc5,0x78,0x29,0x40,0x00
+DB	0xc5,0x78,0x29,0x48,0x10
+DB	0xc5,0x78,0x29,0x50,0x20
+DB	0xc5,0x78,0x29,0x58,0x30
+DB	0xc5,0x78,0x29,0x60,0x40
+DB	0xc5,0x78,0x29,0x68,0x50
+DB	0xc5,0x78,0x29,0x70,0x60
+DB	0xc5,0x78,0x29,0x78,0x70
+	vmovdqa	ymm0,YMMWORD[$L$Three]
+
+	vpxor	ymm2,ymm2,ymm2
+	vpxor	ymm3,ymm3,ymm3
+
+	vmovdqa	ymm4,YMMWORD[$L$One]
+	vmovdqa	ymm8,YMMWORD[$L$Two]
+	vmovdqa	ymm12,YMMWORD[$L$Three]
+
+	vmovd	xmm1,r8d
+	vpermd	ymm1,ymm2,ymm1
+
+
+	mov	rax,21
+$L$select_loop_avx2_w7:
+
+	vmovdqa	ymm5,YMMWORD[rdx]
+	vmovdqa	ymm6,YMMWORD[32+rdx]
+
+	vmovdqa	ymm9,YMMWORD[64+rdx]
+	vmovdqa	ymm10,YMMWORD[96+rdx]
+
+	vmovdqa	ymm13,YMMWORD[128+rdx]
+	vmovdqa	ymm14,YMMWORD[160+rdx]
+
+	vpcmpeqd	ymm7,ymm4,ymm1
+	vpcmpeqd	ymm11,ymm8,ymm1
+	vpcmpeqd	ymm15,ymm12,ymm1
+
+	vpaddd	ymm4,ymm4,ymm0
+	vpaddd	ymm8,ymm8,ymm0
+	vpaddd	ymm12,ymm12,ymm0
+	lea	rdx,[192+rdx]
+
+	vpand	ymm5,ymm5,ymm7
+	vpand	ymm6,ymm6,ymm7
+	vpand	ymm9,ymm9,ymm11
+	vpand	ymm10,ymm10,ymm11
+	vpand	ymm13,ymm13,ymm15
+	vpand	ymm14,ymm14,ymm15
+
+	vpxor	ymm2,ymm2,ymm5
+	vpxor	ymm3,ymm3,ymm6
+	vpxor	ymm2,ymm2,ymm9
+	vpxor	ymm3,ymm3,ymm10
+	vpxor	ymm2,ymm2,ymm13
+	vpxor	ymm3,ymm3,ymm14
+
+	dec	rax
+	jnz	NEAR $L$select_loop_avx2_w7
+
+
+	vmovdqa	ymm5,YMMWORD[rdx]
+	vmovdqa	ymm6,YMMWORD[32+rdx]
+
+	vpcmpeqd	ymm7,ymm4,ymm1
+
+	vpand	ymm5,ymm5,ymm7
+	vpand	ymm6,ymm6,ymm7
+
+	vpxor	ymm2,ymm2,ymm5
+	vpxor	ymm3,ymm3,ymm6
+
+	vmovdqu	YMMWORD[rcx],ymm2
+	vmovdqu	YMMWORD[32+rcx],ymm3
+	vzeroupper
+	movaps	xmm6,XMMWORD[rsp]
+	movaps	xmm7,XMMWORD[16+rsp]
+	movaps	xmm8,XMMWORD[32+rsp]
+	movaps	xmm9,XMMWORD[48+rsp]
+	movaps	xmm10,XMMWORD[64+rsp]
+	movaps	xmm11,XMMWORD[80+rsp]
+	movaps	xmm12,XMMWORD[96+rsp]
+	movaps	xmm13,XMMWORD[112+rsp]
+	movaps	xmm14,XMMWORD[128+rsp]
+	movaps	xmm15,XMMWORD[144+rsp]
+	lea	rsp,[r11]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_ecp_nistz256_avx2_select_w7:
+
+
+ALIGN	32
+__ecp_nistz256_add_toq:
+
+	xor	r11,r11
+	add	r12,QWORD[rbx]
+	adc	r13,QWORD[8+rbx]
+	mov	rax,r12
+	adc	r8,QWORD[16+rbx]
+	adc	r9,QWORD[24+rbx]
+	mov	rbp,r13
+	adc	r11,0
+
+	sub	r12,-1
+	mov	rcx,r8
+	sbb	r13,r14
+	sbb	r8,0
+	mov	r10,r9
+	sbb	r9,r15
+	sbb	r11,0
+
+	cmovc	r12,rax
+	cmovc	r13,rbp
+	mov	QWORD[rdi],r12
+	cmovc	r8,rcx
+	mov	QWORD[8+rdi],r13
+	cmovc	r9,r10
+	mov	QWORD[16+rdi],r8
+	mov	QWORD[24+rdi],r9
+
+	DB	0F3h,0C3h		;repret
+
+
+
+
+ALIGN	32
+__ecp_nistz256_sub_fromq:
+
+	sub	r12,QWORD[rbx]
+	sbb	r13,QWORD[8+rbx]
+	mov	rax,r12
+	sbb	r8,QWORD[16+rbx]
+	sbb	r9,QWORD[24+rbx]
+	mov	rbp,r13
+	sbb	r11,r11
+
+	add	r12,-1
+	mov	rcx,r8
+	adc	r13,r14
+	adc	r8,0
+	mov	r10,r9
+	adc	r9,r15
+	test	r11,r11
+
+	cmovz	r12,rax
+	cmovz	r13,rbp
+	mov	QWORD[rdi],r12
+	cmovz	r8,rcx
+	mov	QWORD[8+rdi],r13
+	cmovz	r9,r10
+	mov	QWORD[16+rdi],r8
+	mov	QWORD[24+rdi],r9
+
+	DB	0F3h,0C3h		;repret
+
+
+
+
+ALIGN	32
+__ecp_nistz256_subq:
+
+	sub	rax,r12
+	sbb	rbp,r13
+	mov	r12,rax
+	sbb	rcx,r8
+	sbb	r10,r9
+	mov	r13,rbp
+	sbb	r11,r11
+
+	add	rax,-1
+	mov	r8,rcx
+	adc	rbp,r14
+	adc	rcx,0
+	mov	r9,r10
+	adc	r10,r15
+	test	r11,r11
+
+	cmovnz	r12,rax
+	cmovnz	r13,rbp
+	cmovnz	r8,rcx
+	cmovnz	r9,r10
+
+	DB	0F3h,0C3h		;repret
+
+
+
+
+ALIGN	32
+__ecp_nistz256_mul_by_2q:
+
+	xor	r11,r11
+	add	r12,r12
+	adc	r13,r13
+	mov	rax,r12
+	adc	r8,r8
+	adc	r9,r9
+	mov	rbp,r13
+	adc	r11,0
+
+	sub	r12,-1
+	mov	rcx,r8
+	sbb	r13,r14
+	sbb	r8,0
+	mov	r10,r9
+	sbb	r9,r15
+	sbb	r11,0
+
+	cmovc	r12,rax
+	cmovc	r13,rbp
+	mov	QWORD[rdi],r12
+	cmovc	r8,rcx
+	mov	QWORD[8+rdi],r13
+	cmovc	r9,r10
+	mov	QWORD[16+rdi],r8
+	mov	QWORD[24+rdi],r9
+
+	DB	0F3h,0C3h		;repret
+
+
+global	ecp_nistz256_point_double
+
+ALIGN	32
+ecp_nistz256_point_double:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_ecp_nistz256_point_double:
+	mov	rdi,rcx
+	mov	rsi,rdx
+
+
+
+	lea	rcx,[OPENSSL_ia32cap_P]
+	mov	rcx,QWORD[8+rcx]
+	and	ecx,0x80100
+	cmp	ecx,0x80100
+	je	NEAR $L$point_doublex
+	push	rbp
+
+	push	rbx
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+	sub	rsp,32*5+8
+
+$L$point_doubleq_body:
+
+$L$point_double_shortcutq:
+	movdqu	xmm0,XMMWORD[rsi]
+	mov	rbx,rsi
+	movdqu	xmm1,XMMWORD[16+rsi]
+	mov	r12,QWORD[((32+0))+rsi]
+	mov	r13,QWORD[((32+8))+rsi]
+	mov	r8,QWORD[((32+16))+rsi]
+	mov	r9,QWORD[((32+24))+rsi]
+	mov	r14,QWORD[(($L$poly+8))]
+	mov	r15,QWORD[(($L$poly+24))]
+	movdqa	XMMWORD[96+rsp],xmm0
+	movdqa	XMMWORD[(96+16)+rsp],xmm1
+	lea	r10,[32+rdi]
+	lea	r11,[64+rdi]
+DB	102,72,15,110,199
+DB	102,73,15,110,202
+DB	102,73,15,110,211
+
+	lea	rdi,[rsp]
+	call	__ecp_nistz256_mul_by_2q
+
+	mov	rax,QWORD[((64+0))+rsi]
+	mov	r14,QWORD[((64+8))+rsi]
+	mov	r15,QWORD[((64+16))+rsi]
+	mov	r8,QWORD[((64+24))+rsi]
+	lea	rsi,[((64-0))+rsi]
+	lea	rdi,[64+rsp]
+	call	__ecp_nistz256_sqr_montq
+
+	mov	rax,QWORD[((0+0))+rsp]
+	mov	r14,QWORD[((8+0))+rsp]
+	lea	rsi,[((0+0))+rsp]
+	mov	r15,QWORD[((16+0))+rsp]
+	mov	r8,QWORD[((24+0))+rsp]
+	lea	rdi,[rsp]
+	call	__ecp_nistz256_sqr_montq
+
+	mov	rax,QWORD[32+rbx]
+	mov	r9,QWORD[((64+0))+rbx]
+	mov	r10,QWORD[((64+8))+rbx]
+	mov	r11,QWORD[((64+16))+rbx]
+	mov	r12,QWORD[((64+24))+rbx]
+	lea	rsi,[((64-0))+rbx]
+	lea	rbx,[32+rbx]
+DB	102,72,15,126,215
+	call	__ecp_nistz256_mul_montq
+	call	__ecp_nistz256_mul_by_2q
+
+	mov	r12,QWORD[((96+0))+rsp]
+	mov	r13,QWORD[((96+8))+rsp]
+	lea	rbx,[64+rsp]
+	mov	r8,QWORD[((96+16))+rsp]
+	mov	r9,QWORD[((96+24))+rsp]
+	lea	rdi,[32+rsp]
+	call	__ecp_nistz256_add_toq
+
+	mov	r12,QWORD[((96+0))+rsp]
+	mov	r13,QWORD[((96+8))+rsp]
+	lea	rbx,[64+rsp]
+	mov	r8,QWORD[((96+16))+rsp]
+	mov	r9,QWORD[((96+24))+rsp]
+	lea	rdi,[64+rsp]
+	call	__ecp_nistz256_sub_fromq
+
+	mov	rax,QWORD[((0+0))+rsp]
+	mov	r14,QWORD[((8+0))+rsp]
+	lea	rsi,[((0+0))+rsp]
+	mov	r15,QWORD[((16+0))+rsp]
+	mov	r8,QWORD[((24+0))+rsp]
+DB	102,72,15,126,207
+	call	__ecp_nistz256_sqr_montq
+	xor	r9,r9
+	mov	rax,r12
+	add	r12,-1
+	mov	r10,r13
+	adc	r13,rsi
+	mov	rcx,r14
+	adc	r14,0
+	mov	r8,r15
+	adc	r15,rbp
+	adc	r9,0
+	xor	rsi,rsi
+	test	rax,1
+
+	cmovz	r12,rax
+	cmovz	r13,r10
+	cmovz	r14,rcx
+	cmovz	r15,r8
+	cmovz	r9,rsi
+
+	mov	rax,r13
+	shr	r12,1
+	shl	rax,63
+	mov	r10,r14
+	shr	r13,1
+	or	r12,rax
+	shl	r10,63
+	mov	rcx,r15
+	shr	r14,1
+	or	r13,r10
+	shl	rcx,63
+	mov	QWORD[rdi],r12
+	shr	r15,1
+	mov	QWORD[8+rdi],r13
+	shl	r9,63
+	or	r14,rcx
+	or	r15,r9
+	mov	QWORD[16+rdi],r14
+	mov	QWORD[24+rdi],r15
+	mov	rax,QWORD[64+rsp]
+	lea	rbx,[64+rsp]
+	mov	r9,QWORD[((0+32))+rsp]
+	mov	r10,QWORD[((8+32))+rsp]
+	lea	rsi,[((0+32))+rsp]
+	mov	r11,QWORD[((16+32))+rsp]
+	mov	r12,QWORD[((24+32))+rsp]
+	lea	rdi,[32+rsp]
+	call	__ecp_nistz256_mul_montq
+
+	lea	rdi,[128+rsp]
+	call	__ecp_nistz256_mul_by_2q
+
+	lea	rbx,[32+rsp]
+	lea	rdi,[32+rsp]
+	call	__ecp_nistz256_add_toq
+
+	mov	rax,QWORD[96+rsp]
+	lea	rbx,[96+rsp]
+	mov	r9,QWORD[((0+0))+rsp]
+	mov	r10,QWORD[((8+0))+rsp]
+	lea	rsi,[((0+0))+rsp]
+	mov	r11,QWORD[((16+0))+rsp]
+	mov	r12,QWORD[((24+0))+rsp]
+	lea	rdi,[rsp]
+	call	__ecp_nistz256_mul_montq
+
+	lea	rdi,[128+rsp]
+	call	__ecp_nistz256_mul_by_2q
+
+	mov	rax,QWORD[((0+32))+rsp]
+	mov	r14,QWORD[((8+32))+rsp]
+	lea	rsi,[((0+32))+rsp]
+	mov	r15,QWORD[((16+32))+rsp]
+	mov	r8,QWORD[((24+32))+rsp]
+DB	102,72,15,126,199
+	call	__ecp_nistz256_sqr_montq
+
+	lea	rbx,[128+rsp]
+	mov	r8,r14
+	mov	r9,r15
+	mov	r14,rsi
+	mov	r15,rbp
+	call	__ecp_nistz256_sub_fromq
+
+	mov	rax,QWORD[((0+0))+rsp]
+	mov	rbp,QWORD[((0+8))+rsp]
+	mov	rcx,QWORD[((0+16))+rsp]
+	mov	r10,QWORD[((0+24))+rsp]
+	lea	rdi,[rsp]
+	call	__ecp_nistz256_subq
+
+	mov	rax,QWORD[32+rsp]
+	lea	rbx,[32+rsp]
+	mov	r14,r12
+	xor	ecx,ecx
+	mov	QWORD[((0+0))+rsp],r12
+	mov	r10,r13
+	mov	QWORD[((0+8))+rsp],r13
+	cmovz	r11,r8
+	mov	QWORD[((0+16))+rsp],r8
+	lea	rsi,[((0-0))+rsp]
+	cmovz	r12,r9
+	mov	QWORD[((0+24))+rsp],r9
+	mov	r9,r14
+	lea	rdi,[rsp]
+	call	__ecp_nistz256_mul_montq
+
+DB	102,72,15,126,203
+DB	102,72,15,126,207
+	call	__ecp_nistz256_sub_fromq
+
+	lea	rsi,[((160+56))+rsp]
+
+	mov	r15,QWORD[((-48))+rsi]
+
+	mov	r14,QWORD[((-40))+rsi]
+
+	mov	r13,QWORD[((-32))+rsi]
+
+	mov	r12,QWORD[((-24))+rsi]
+
+	mov	rbx,QWORD[((-16))+rsi]
+
+	mov	rbp,QWORD[((-8))+rsi]
+
+	lea	rsp,[rsi]
+
+$L$point_doubleq_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_ecp_nistz256_point_double:
+global	ecp_nistz256_point_add
+
+ALIGN	32
+ecp_nistz256_point_add:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_ecp_nistz256_point_add:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+
+	lea	rcx,[OPENSSL_ia32cap_P]
+	mov	rcx,QWORD[8+rcx]
+	and	ecx,0x80100
+	cmp	ecx,0x80100
+	je	NEAR $L$point_addx
+	push	rbp
+
+	push	rbx
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+	sub	rsp,32*18+8
+
+$L$point_addq_body:
+
+	movdqu	xmm0,XMMWORD[rsi]
+	movdqu	xmm1,XMMWORD[16+rsi]
+	movdqu	xmm2,XMMWORD[32+rsi]
+	movdqu	xmm3,XMMWORD[48+rsi]
+	movdqu	xmm4,XMMWORD[64+rsi]
+	movdqu	xmm5,XMMWORD[80+rsi]
+	mov	rbx,rsi
+	mov	rsi,rdx
+	movdqa	XMMWORD[384+rsp],xmm0
+	movdqa	XMMWORD[(384+16)+rsp],xmm1
+	movdqa	XMMWORD[416+rsp],xmm2
+	movdqa	XMMWORD[(416+16)+rsp],xmm3
+	movdqa	XMMWORD[448+rsp],xmm4
+	movdqa	XMMWORD[(448+16)+rsp],xmm5
+	por	xmm5,xmm4
+
+	movdqu	xmm0,XMMWORD[rsi]
+	pshufd	xmm3,xmm5,0xb1
+	movdqu	xmm1,XMMWORD[16+rsi]
+	movdqu	xmm2,XMMWORD[32+rsi]
+	por	xmm5,xmm3
+	movdqu	xmm3,XMMWORD[48+rsi]
+	mov	rax,QWORD[((64+0))+rsi]
+	mov	r14,QWORD[((64+8))+rsi]
+	mov	r15,QWORD[((64+16))+rsi]
+	mov	r8,QWORD[((64+24))+rsi]
+	movdqa	XMMWORD[480+rsp],xmm0
+	pshufd	xmm4,xmm5,0x1e
+	movdqa	XMMWORD[(480+16)+rsp],xmm1
+	movdqu	xmm0,XMMWORD[64+rsi]
+	movdqu	xmm1,XMMWORD[80+rsi]
+	movdqa	XMMWORD[512+rsp],xmm2
+	movdqa	XMMWORD[(512+16)+rsp],xmm3
+	por	xmm5,xmm4
+	pxor	xmm4,xmm4
+	por	xmm1,xmm0
+DB	102,72,15,110,199
+
+	lea	rsi,[((64-0))+rsi]
+	mov	QWORD[((544+0))+rsp],rax
+	mov	QWORD[((544+8))+rsp],r14
+	mov	QWORD[((544+16))+rsp],r15
+	mov	QWORD[((544+24))+rsp],r8
+	lea	rdi,[96+rsp]
+	call	__ecp_nistz256_sqr_montq
+
+	pcmpeqd	xmm5,xmm4
+	pshufd	xmm4,xmm1,0xb1
+	por	xmm4,xmm1
+	pshufd	xmm5,xmm5,0
+	pshufd	xmm3,xmm4,0x1e
+	por	xmm4,xmm3
+	pxor	xmm3,xmm3
+	pcmpeqd	xmm4,xmm3
+	pshufd	xmm4,xmm4,0
+	mov	rax,QWORD[((64+0))+rbx]
+	mov	r14,QWORD[((64+8))+rbx]
+	mov	r15,QWORD[((64+16))+rbx]
+	mov	r8,QWORD[((64+24))+rbx]
+DB	102,72,15,110,203
+
+	lea	rsi,[((64-0))+rbx]
+	lea	rdi,[32+rsp]
+	call	__ecp_nistz256_sqr_montq
+
+	mov	rax,QWORD[544+rsp]
+	lea	rbx,[544+rsp]
+	mov	r9,QWORD[((0+96))+rsp]
+	mov	r10,QWORD[((8+96))+rsp]
+	lea	rsi,[((0+96))+rsp]
+	mov	r11,QWORD[((16+96))+rsp]
+	mov	r12,QWORD[((24+96))+rsp]
+	lea	rdi,[224+rsp]
+	call	__ecp_nistz256_mul_montq
+
+	mov	rax,QWORD[448+rsp]
+	lea	rbx,[448+rsp]
+	mov	r9,QWORD[((0+32))+rsp]
+	mov	r10,QWORD[((8+32))+rsp]
+	lea	rsi,[((0+32))+rsp]
+	mov	r11,QWORD[((16+32))+rsp]
+	mov	r12,QWORD[((24+32))+rsp]
+	lea	rdi,[256+rsp]
+	call	__ecp_nistz256_mul_montq
+
+	mov	rax,QWORD[416+rsp]
+	lea	rbx,[416+rsp]
+	mov	r9,QWORD[((0+224))+rsp]
+	mov	r10,QWORD[((8+224))+rsp]
+	lea	rsi,[((0+224))+rsp]
+	mov	r11,QWORD[((16+224))+rsp]
+	mov	r12,QWORD[((24+224))+rsp]
+	lea	rdi,[224+rsp]
+	call	__ecp_nistz256_mul_montq
+
+	mov	rax,QWORD[512+rsp]
+	lea	rbx,[512+rsp]
+	mov	r9,QWORD[((0+256))+rsp]
+	mov	r10,QWORD[((8+256))+rsp]
+	lea	rsi,[((0+256))+rsp]
+	mov	r11,QWORD[((16+256))+rsp]
+	mov	r12,QWORD[((24+256))+rsp]
+	lea	rdi,[256+rsp]
+	call	__ecp_nistz256_mul_montq
+
+	lea	rbx,[224+rsp]
+	lea	rdi,[64+rsp]
+	call	__ecp_nistz256_sub_fromq
+
+	or	r12,r13
+	movdqa	xmm2,xmm4
+	or	r12,r8
+	or	r12,r9
+	por	xmm2,xmm5
+DB	102,73,15,110,220
+
+	mov	rax,QWORD[384+rsp]
+	lea	rbx,[384+rsp]
+	mov	r9,QWORD[((0+96))+rsp]
+	mov	r10,QWORD[((8+96))+rsp]
+	lea	rsi,[((0+96))+rsp]
+	mov	r11,QWORD[((16+96))+rsp]
+	mov	r12,QWORD[((24+96))+rsp]
+	lea	rdi,[160+rsp]
+	call	__ecp_nistz256_mul_montq
+
+	mov	rax,QWORD[480+rsp]
+	lea	rbx,[480+rsp]
+	mov	r9,QWORD[((0+32))+rsp]
+	mov	r10,QWORD[((8+32))+rsp]
+	lea	rsi,[((0+32))+rsp]
+	mov	r11,QWORD[((16+32))+rsp]
+	mov	r12,QWORD[((24+32))+rsp]
+	lea	rdi,[192+rsp]
+	call	__ecp_nistz256_mul_montq
+
+	lea	rbx,[160+rsp]
+	lea	rdi,[rsp]
+	call	__ecp_nistz256_sub_fromq
+
+	or	r12,r13
+	or	r12,r8
+	or	r12,r9
+
+DB	102,73,15,126,208
+DB	102,73,15,126,217
+	or	r12,r8
+DB	0x3e
+	jnz	NEAR $L$add_proceedq
+
+
+
+	test	r9,r9
+	jz	NEAR $L$add_doubleq
+
+
+
+
+
+
+DB	102,72,15,126,199
+	pxor	xmm0,xmm0
+	movdqu	XMMWORD[rdi],xmm0
+	movdqu	XMMWORD[16+rdi],xmm0
+	movdqu	XMMWORD[32+rdi],xmm0
+	movdqu	XMMWORD[48+rdi],xmm0
+	movdqu	XMMWORD[64+rdi],xmm0
+	movdqu	XMMWORD[80+rdi],xmm0
+	jmp	NEAR $L$add_doneq
+
+ALIGN	32
+$L$add_doubleq:
+DB	102,72,15,126,206
+DB	102,72,15,126,199
+	add	rsp,416
+
+	jmp	NEAR $L$point_double_shortcutq
+
+
+ALIGN	32
+$L$add_proceedq:
+	mov	rax,QWORD[((0+64))+rsp]
+	mov	r14,QWORD[((8+64))+rsp]
+	lea	rsi,[((0+64))+rsp]
+	mov	r15,QWORD[((16+64))+rsp]
+	mov	r8,QWORD[((24+64))+rsp]
+	lea	rdi,[96+rsp]
+	call	__ecp_nistz256_sqr_montq
+
+	mov	rax,QWORD[448+rsp]
+	lea	rbx,[448+rsp]
+	mov	r9,QWORD[((0+0))+rsp]
+	mov	r10,QWORD[((8+0))+rsp]
+	lea	rsi,[((0+0))+rsp]
+	mov	r11,QWORD[((16+0))+rsp]
+	mov	r12,QWORD[((24+0))+rsp]
+	lea	rdi,[352+rsp]
+	call	__ecp_nistz256_mul_montq
+
+	mov	rax,QWORD[((0+0))+rsp]
+	mov	r14,QWORD[((8+0))+rsp]
+	lea	rsi,[((0+0))+rsp]
+	mov	r15,QWORD[((16+0))+rsp]
+	mov	r8,QWORD[((24+0))+rsp]
+	lea	rdi,[32+rsp]
+	call	__ecp_nistz256_sqr_montq
+
+	mov	rax,QWORD[544+rsp]
+	lea	rbx,[544+rsp]
+	mov	r9,QWORD[((0+352))+rsp]
+	mov	r10,QWORD[((8+352))+rsp]
+	lea	rsi,[((0+352))+rsp]
+	mov	r11,QWORD[((16+352))+rsp]
+	mov	r12,QWORD[((24+352))+rsp]
+	lea	rdi,[352+rsp]
+	call	__ecp_nistz256_mul_montq
+
+	mov	rax,QWORD[rsp]
+	lea	rbx,[rsp]
+	mov	r9,QWORD[((0+32))+rsp]
+	mov	r10,QWORD[((8+32))+rsp]
+	lea	rsi,[((0+32))+rsp]
+	mov	r11,QWORD[((16+32))+rsp]
+	mov	r12,QWORD[((24+32))+rsp]
+	lea	rdi,[128+rsp]
+	call	__ecp_nistz256_mul_montq
+
+	mov	rax,QWORD[160+rsp]
+	lea	rbx,[160+rsp]
+	mov	r9,QWORD[((0+32))+rsp]
+	mov	r10,QWORD[((8+32))+rsp]
+	lea	rsi,[((0+32))+rsp]
+	mov	r11,QWORD[((16+32))+rsp]
+	mov	r12,QWORD[((24+32))+rsp]
+	lea	rdi,[192+rsp]
+	call	__ecp_nistz256_mul_montq
+
+
+
+
+	xor	r11,r11
+	add	r12,r12
+	lea	rsi,[96+rsp]
+	adc	r13,r13
+	mov	rax,r12
+	adc	r8,r8
+	adc	r9,r9
+	mov	rbp,r13
+	adc	r11,0
+
+	sub	r12,-1
+	mov	rcx,r8
+	sbb	r13,r14
+	sbb	r8,0
+	mov	r10,r9
+	sbb	r9,r15
+	sbb	r11,0
+
+	cmovc	r12,rax
+	mov	rax,QWORD[rsi]
+	cmovc	r13,rbp
+	mov	rbp,QWORD[8+rsi]
+	cmovc	r8,rcx
+	mov	rcx,QWORD[16+rsi]
+	cmovc	r9,r10
+	mov	r10,QWORD[24+rsi]
+
+	call	__ecp_nistz256_subq
+
+	lea	rbx,[128+rsp]
+	lea	rdi,[288+rsp]
+	call	__ecp_nistz256_sub_fromq
+
+	mov	rax,QWORD[((192+0))+rsp]
+	mov	rbp,QWORD[((192+8))+rsp]
+	mov	rcx,QWORD[((192+16))+rsp]
+	mov	r10,QWORD[((192+24))+rsp]
+	lea	rdi,[320+rsp]
+
+	call	__ecp_nistz256_subq
+
+	mov	QWORD[rdi],r12
+	mov	QWORD[8+rdi],r13
+	mov	QWORD[16+rdi],r8
+	mov	QWORD[24+rdi],r9
+	mov	rax,QWORD[128+rsp]
+	lea	rbx,[128+rsp]
+	mov	r9,QWORD[((0+224))+rsp]
+	mov	r10,QWORD[((8+224))+rsp]
+	lea	rsi,[((0+224))+rsp]
+	mov	r11,QWORD[((16+224))+rsp]
+	mov	r12,QWORD[((24+224))+rsp]
+	lea	rdi,[256+rsp]
+	call	__ecp_nistz256_mul_montq
+
+	mov	rax,QWORD[320+rsp]
+	lea	rbx,[320+rsp]
+	mov	r9,QWORD[((0+64))+rsp]
+	mov	r10,QWORD[((8+64))+rsp]
+	lea	rsi,[((0+64))+rsp]
+	mov	r11,QWORD[((16+64))+rsp]
+	mov	r12,QWORD[((24+64))+rsp]
+	lea	rdi,[320+rsp]
+	call	__ecp_nistz256_mul_montq
+
+	lea	rbx,[256+rsp]
+	lea	rdi,[320+rsp]
+	call	__ecp_nistz256_sub_fromq
+
+DB	102,72,15,126,199
+
+	movdqa	xmm0,xmm5
+	movdqa	xmm1,xmm5
+	pandn	xmm0,XMMWORD[352+rsp]
+	movdqa	xmm2,xmm5
+	pandn	xmm1,XMMWORD[((352+16))+rsp]
+	movdqa	xmm3,xmm5
+	pand	xmm2,XMMWORD[544+rsp]
+	pand	xmm3,XMMWORD[((544+16))+rsp]
+	por	xmm2,xmm0
+	por	xmm3,xmm1
+
+	movdqa	xmm0,xmm4
+	movdqa	xmm1,xmm4
+	pandn	xmm0,xmm2
+	movdqa	xmm2,xmm4
+	pandn	xmm1,xmm3
+	movdqa	xmm3,xmm4
+	pand	xmm2,XMMWORD[448+rsp]
+	pand	xmm3,XMMWORD[((448+16))+rsp]
+	por	xmm2,xmm0
+	por	xmm3,xmm1
+	movdqu	XMMWORD[64+rdi],xmm2
+	movdqu	XMMWORD[80+rdi],xmm3
+
+	movdqa	xmm0,xmm5
+	movdqa	xmm1,xmm5
+	pandn	xmm0,XMMWORD[288+rsp]
+	movdqa	xmm2,xmm5
+	pandn	xmm1,XMMWORD[((288+16))+rsp]
+	movdqa	xmm3,xmm5
+	pand	xmm2,XMMWORD[480+rsp]
+	pand	xmm3,XMMWORD[((480+16))+rsp]
+	por	xmm2,xmm0
+	por	xmm3,xmm1
+
+	movdqa	xmm0,xmm4
+	movdqa	xmm1,xmm4
+	pandn	xmm0,xmm2
+	movdqa	xmm2,xmm4
+	pandn	xmm1,xmm3
+	movdqa	xmm3,xmm4
+	pand	xmm2,XMMWORD[384+rsp]
+	pand	xmm3,XMMWORD[((384+16))+rsp]
+	por	xmm2,xmm0
+	por	xmm3,xmm1
+	movdqu	XMMWORD[rdi],xmm2
+	movdqu	XMMWORD[16+rdi],xmm3
+
+	movdqa	xmm0,xmm5
+	movdqa	xmm1,xmm5
+	pandn	xmm0,XMMWORD[320+rsp]
+	movdqa	xmm2,xmm5
+	pandn	xmm1,XMMWORD[((320+16))+rsp]
+	movdqa	xmm3,xmm5
+	pand	xmm2,XMMWORD[512+rsp]
+	pand	xmm3,XMMWORD[((512+16))+rsp]
+	por	xmm2,xmm0
+	por	xmm3,xmm1
+
+	movdqa	xmm0,xmm4
+	movdqa	xmm1,xmm4
+	pandn	xmm0,xmm2
+	movdqa	xmm2,xmm4
+	pandn	xmm1,xmm3
+	movdqa	xmm3,xmm4
+	pand	xmm2,XMMWORD[416+rsp]
+	pand	xmm3,XMMWORD[((416+16))+rsp]
+	por	xmm2,xmm0
+	por	xmm3,xmm1
+	movdqu	XMMWORD[32+rdi],xmm2
+	movdqu	XMMWORD[48+rdi],xmm3
+
+$L$add_doneq:
+	lea	rsi,[((576+56))+rsp]
+
+	mov	r15,QWORD[((-48))+rsi]
+
+	mov	r14,QWORD[((-40))+rsi]
+
+	mov	r13,QWORD[((-32))+rsi]
+
+	mov	r12,QWORD[((-24))+rsi]
+
+	mov	rbx,QWORD[((-16))+rsi]
+
+	mov	rbp,QWORD[((-8))+rsi]
+
+	lea	rsp,[rsi]
+
+$L$point_addq_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_ecp_nistz256_point_add:
+global	ecp_nistz256_point_add_affine
+
+ALIGN	32
+ecp_nistz256_point_add_affine:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_ecp_nistz256_point_add_affine:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+
+	lea	rcx,[OPENSSL_ia32cap_P]
+	mov	rcx,QWORD[8+rcx]
+	and	ecx,0x80100
+	cmp	ecx,0x80100
+	je	NEAR $L$point_add_affinex
+	push	rbp
+
+	push	rbx
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+	sub	rsp,32*15+8
+
+$L$add_affineq_body:
+
+	movdqu	xmm0,XMMWORD[rsi]
+	mov	rbx,rdx
+	movdqu	xmm1,XMMWORD[16+rsi]
+	movdqu	xmm2,XMMWORD[32+rsi]
+	movdqu	xmm3,XMMWORD[48+rsi]
+	movdqu	xmm4,XMMWORD[64+rsi]
+	movdqu	xmm5,XMMWORD[80+rsi]
+	mov	rax,QWORD[((64+0))+rsi]
+	mov	r14,QWORD[((64+8))+rsi]
+	mov	r15,QWORD[((64+16))+rsi]
+	mov	r8,QWORD[((64+24))+rsi]
+	movdqa	XMMWORD[320+rsp],xmm0
+	movdqa	XMMWORD[(320+16)+rsp],xmm1
+	movdqa	XMMWORD[352+rsp],xmm2
+	movdqa	XMMWORD[(352+16)+rsp],xmm3
+	movdqa	XMMWORD[384+rsp],xmm4
+	movdqa	XMMWORD[(384+16)+rsp],xmm5
+	por	xmm5,xmm4
+
+	movdqu	xmm0,XMMWORD[rbx]
+	pshufd	xmm3,xmm5,0xb1
+	movdqu	xmm1,XMMWORD[16+rbx]
+	movdqu	xmm2,XMMWORD[32+rbx]
+	por	xmm5,xmm3
+	movdqu	xmm3,XMMWORD[48+rbx]
+	movdqa	XMMWORD[416+rsp],xmm0
+	pshufd	xmm4,xmm5,0x1e
+	movdqa	XMMWORD[(416+16)+rsp],xmm1
+	por	xmm1,xmm0
+DB	102,72,15,110,199
+	movdqa	XMMWORD[448+rsp],xmm2
+	movdqa	XMMWORD[(448+16)+rsp],xmm3
+	por	xmm3,xmm2
+	por	xmm5,xmm4
+	pxor	xmm4,xmm4
+	por	xmm3,xmm1
+
+	lea	rsi,[((64-0))+rsi]
+	lea	rdi,[32+rsp]
+	call	__ecp_nistz256_sqr_montq
+
+	pcmpeqd	xmm5,xmm4
+	pshufd	xmm4,xmm3,0xb1
+	mov	rax,QWORD[rbx]
+
+	mov	r9,r12
+	por	xmm4,xmm3
+	pshufd	xmm5,xmm5,0
+	pshufd	xmm3,xmm4,0x1e
+	mov	r10,r13
+	por	xmm4,xmm3
+	pxor	xmm3,xmm3
+	mov	r11,r14
+	pcmpeqd	xmm4,xmm3
+	pshufd	xmm4,xmm4,0
+
+	lea	rsi,[((32-0))+rsp]
+	mov	r12,r15
+	lea	rdi,[rsp]
+	call	__ecp_nistz256_mul_montq
+
+	lea	rbx,[320+rsp]
+	lea	rdi,[64+rsp]
+	call	__ecp_nistz256_sub_fromq
+
+	mov	rax,QWORD[384+rsp]
+	lea	rbx,[384+rsp]
+	mov	r9,QWORD[((0+32))+rsp]
+	mov	r10,QWORD[((8+32))+rsp]
+	lea	rsi,[((0+32))+rsp]
+	mov	r11,QWORD[((16+32))+rsp]
+	mov	r12,QWORD[((24+32))+rsp]
+	lea	rdi,[32+rsp]
+	call	__ecp_nistz256_mul_montq
+
+	mov	rax,QWORD[384+rsp]
+	lea	rbx,[384+rsp]
+	mov	r9,QWORD[((0+64))+rsp]
+	mov	r10,QWORD[((8+64))+rsp]
+	lea	rsi,[((0+64))+rsp]
+	mov	r11,QWORD[((16+64))+rsp]
+	mov	r12,QWORD[((24+64))+rsp]
+	lea	rdi,[288+rsp]
+	call	__ecp_nistz256_mul_montq
+
+	mov	rax,QWORD[448+rsp]
+	lea	rbx,[448+rsp]
+	mov	r9,QWORD[((0+32))+rsp]
+	mov	r10,QWORD[((8+32))+rsp]
+	lea	rsi,[((0+32))+rsp]
+	mov	r11,QWORD[((16+32))+rsp]
+	mov	r12,QWORD[((24+32))+rsp]
+	lea	rdi,[32+rsp]
+	call	__ecp_nistz256_mul_montq
+
+	lea	rbx,[352+rsp]
+	lea	rdi,[96+rsp]
+	call	__ecp_nistz256_sub_fromq
+
+	mov	rax,QWORD[((0+64))+rsp]
+	mov	r14,QWORD[((8+64))+rsp]
+	lea	rsi,[((0+64))+rsp]
+	mov	r15,QWORD[((16+64))+rsp]
+	mov	r8,QWORD[((24+64))+rsp]
+	lea	rdi,[128+rsp]
+	call	__ecp_nistz256_sqr_montq
+
+	mov	rax,QWORD[((0+96))+rsp]
+	mov	r14,QWORD[((8+96))+rsp]
+	lea	rsi,[((0+96))+rsp]
+	mov	r15,QWORD[((16+96))+rsp]
+	mov	r8,QWORD[((24+96))+rsp]
+	lea	rdi,[192+rsp]
+	call	__ecp_nistz256_sqr_montq
+
+	mov	rax,QWORD[128+rsp]
+	lea	rbx,[128+rsp]
+	mov	r9,QWORD[((0+64))+rsp]
+	mov	r10,QWORD[((8+64))+rsp]
+	lea	rsi,[((0+64))+rsp]
+	mov	r11,QWORD[((16+64))+rsp]
+	mov	r12,QWORD[((24+64))+rsp]
+	lea	rdi,[160+rsp]
+	call	__ecp_nistz256_mul_montq
+
+	mov	rax,QWORD[320+rsp]
+	lea	rbx,[320+rsp]
+	mov	r9,QWORD[((0+128))+rsp]
+	mov	r10,QWORD[((8+128))+rsp]
+	lea	rsi,[((0+128))+rsp]
+	mov	r11,QWORD[((16+128))+rsp]
+	mov	r12,QWORD[((24+128))+rsp]
+	lea	rdi,[rsp]
+	call	__ecp_nistz256_mul_montq
+
+
+
+
+	xor	r11,r11
+	add	r12,r12
+	lea	rsi,[192+rsp]
+	adc	r13,r13
+	mov	rax,r12
+	adc	r8,r8
+	adc	r9,r9
+	mov	rbp,r13
+	adc	r11,0
+
+	sub	r12,-1
+	mov	rcx,r8
+	sbb	r13,r14
+	sbb	r8,0
+	mov	r10,r9
+	sbb	r9,r15
+	sbb	r11,0
+
+	cmovc	r12,rax
+	mov	rax,QWORD[rsi]
+	cmovc	r13,rbp
+	mov	rbp,QWORD[8+rsi]
+	cmovc	r8,rcx
+	mov	rcx,QWORD[16+rsi]
+	cmovc	r9,r10
+	mov	r10,QWORD[24+rsi]
+
+	call	__ecp_nistz256_subq
+
+	lea	rbx,[160+rsp]
+	lea	rdi,[224+rsp]
+	call	__ecp_nistz256_sub_fromq
+
+	mov	rax,QWORD[((0+0))+rsp]
+	mov	rbp,QWORD[((0+8))+rsp]
+	mov	rcx,QWORD[((0+16))+rsp]
+	mov	r10,QWORD[((0+24))+rsp]
+	lea	rdi,[64+rsp]
+
+	call	__ecp_nistz256_subq
+
+	mov	QWORD[rdi],r12
+	mov	QWORD[8+rdi],r13
+	mov	QWORD[16+rdi],r8
+	mov	QWORD[24+rdi],r9
+	mov	rax,QWORD[352+rsp]
+	lea	rbx,[352+rsp]
+	mov	r9,QWORD[((0+160))+rsp]
+	mov	r10,QWORD[((8+160))+rsp]
+	lea	rsi,[((0+160))+rsp]
+	mov	r11,QWORD[((16+160))+rsp]
+	mov	r12,QWORD[((24+160))+rsp]
+	lea	rdi,[32+rsp]
+	call	__ecp_nistz256_mul_montq
+
+	mov	rax,QWORD[96+rsp]
+	lea	rbx,[96+rsp]
+	mov	r9,QWORD[((0+64))+rsp]
+	mov	r10,QWORD[((8+64))+rsp]
+	lea	rsi,[((0+64))+rsp]
+	mov	r11,QWORD[((16+64))+rsp]
+	mov	r12,QWORD[((24+64))+rsp]
+	lea	rdi,[64+rsp]
+	call	__ecp_nistz256_mul_montq
+
+	lea	rbx,[32+rsp]
+	lea	rdi,[256+rsp]
+	call	__ecp_nistz256_sub_fromq
+
+DB	102,72,15,126,199
+
+	movdqa	xmm0,xmm5
+	movdqa	xmm1,xmm5
+	pandn	xmm0,XMMWORD[288+rsp]
+	movdqa	xmm2,xmm5
+	pandn	xmm1,XMMWORD[((288+16))+rsp]
+	movdqa	xmm3,xmm5
+	pand	xmm2,XMMWORD[$L$ONE_mont]
+	pand	xmm3,XMMWORD[(($L$ONE_mont+16))]
+	por	xmm2,xmm0
+	por	xmm3,xmm1
+
+	movdqa	xmm0,xmm4
+	movdqa	xmm1,xmm4
+	pandn	xmm0,xmm2
+	movdqa	xmm2,xmm4
+	pandn	xmm1,xmm3
+	movdqa	xmm3,xmm4
+	pand	xmm2,XMMWORD[384+rsp]
+	pand	xmm3,XMMWORD[((384+16))+rsp]
+	por	xmm2,xmm0
+	por	xmm3,xmm1
+	movdqu	XMMWORD[64+rdi],xmm2
+	movdqu	XMMWORD[80+rdi],xmm3
+
+	movdqa	xmm0,xmm5
+	movdqa	xmm1,xmm5
+	pandn	xmm0,XMMWORD[224+rsp]
+	movdqa	xmm2,xmm5
+	pandn	xmm1,XMMWORD[((224+16))+rsp]
+	movdqa	xmm3,xmm5
+	pand	xmm2,XMMWORD[416+rsp]
+	pand	xmm3,XMMWORD[((416+16))+rsp]
+	por	xmm2,xmm0
+	por	xmm3,xmm1
+
+	movdqa	xmm0,xmm4
+	movdqa	xmm1,xmm4
+	pandn	xmm0,xmm2
+	movdqa	xmm2,xmm4
+	pandn	xmm1,xmm3
+	movdqa	xmm3,xmm4
+	pand	xmm2,XMMWORD[320+rsp]
+	pand	xmm3,XMMWORD[((320+16))+rsp]
+	por	xmm2,xmm0
+	por	xmm3,xmm1
+	movdqu	XMMWORD[rdi],xmm2
+	movdqu	XMMWORD[16+rdi],xmm3
+
+	movdqa	xmm0,xmm5
+	movdqa	xmm1,xmm5
+	pandn	xmm0,XMMWORD[256+rsp]
+	movdqa	xmm2,xmm5
+	pandn	xmm1,XMMWORD[((256+16))+rsp]
+	movdqa	xmm3,xmm5
+	pand	xmm2,XMMWORD[448+rsp]
+	pand	xmm3,XMMWORD[((448+16))+rsp]
+	por	xmm2,xmm0
+	por	xmm3,xmm1
+
+	movdqa	xmm0,xmm4
+	movdqa	xmm1,xmm4
+	pandn	xmm0,xmm2
+	movdqa	xmm2,xmm4
+	pandn	xmm1,xmm3
+	movdqa	xmm3,xmm4
+	pand	xmm2,XMMWORD[352+rsp]
+	pand	xmm3,XMMWORD[((352+16))+rsp]
+	por	xmm2,xmm0
+	por	xmm3,xmm1
+	movdqu	XMMWORD[32+rdi],xmm2
+	movdqu	XMMWORD[48+rdi],xmm3
+
+	lea	rsi,[((480+56))+rsp]
+
+	mov	r15,QWORD[((-48))+rsi]
+
+	mov	r14,QWORD[((-40))+rsi]
+
+	mov	r13,QWORD[((-32))+rsi]
+
+	mov	r12,QWORD[((-24))+rsi]
+
+	mov	rbx,QWORD[((-16))+rsi]
+
+	mov	rbp,QWORD[((-8))+rsi]
+
+	lea	rsp,[rsi]
+
+$L$add_affineq_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_ecp_nistz256_point_add_affine:
+
+ALIGN	32
+__ecp_nistz256_add_tox:
+
+	xor	r11,r11
+	adc	r12,QWORD[rbx]
+	adc	r13,QWORD[8+rbx]
+	mov	rax,r12
+	adc	r8,QWORD[16+rbx]
+	adc	r9,QWORD[24+rbx]
+	mov	rbp,r13
+	adc	r11,0
+
+	xor	r10,r10
+	sbb	r12,-1
+	mov	rcx,r8
+	sbb	r13,r14
+	sbb	r8,0
+	mov	r10,r9
+	sbb	r9,r15
+	sbb	r11,0
+
+	cmovc	r12,rax
+	cmovc	r13,rbp
+	mov	QWORD[rdi],r12
+	cmovc	r8,rcx
+	mov	QWORD[8+rdi],r13
+	cmovc	r9,r10
+	mov	QWORD[16+rdi],r8
+	mov	QWORD[24+rdi],r9
+
+	DB	0F3h,0C3h		;repret
+
+
+
+
+ALIGN	32
+__ecp_nistz256_sub_fromx:
+
+	xor	r11,r11
+	sbb	r12,QWORD[rbx]
+	sbb	r13,QWORD[8+rbx]
+	mov	rax,r12
+	sbb	r8,QWORD[16+rbx]
+	sbb	r9,QWORD[24+rbx]
+	mov	rbp,r13
+	sbb	r11,0
+
+	xor	r10,r10
+	adc	r12,-1
+	mov	rcx,r8
+	adc	r13,r14
+	adc	r8,0
+	mov	r10,r9
+	adc	r9,r15
+
+	bt	r11,0
+	cmovnc	r12,rax
+	cmovnc	r13,rbp
+	mov	QWORD[rdi],r12
+	cmovnc	r8,rcx
+	mov	QWORD[8+rdi],r13
+	cmovnc	r9,r10
+	mov	QWORD[16+rdi],r8
+	mov	QWORD[24+rdi],r9
+
+	DB	0F3h,0C3h		;repret
+
+
+
+
+ALIGN	32
+__ecp_nistz256_subx:
+
+	xor	r11,r11
+	sbb	rax,r12
+	sbb	rbp,r13
+	mov	r12,rax
+	sbb	rcx,r8
+	sbb	r10,r9
+	mov	r13,rbp
+	sbb	r11,0
+
+	xor	r9,r9
+	adc	rax,-1
+	mov	r8,rcx
+	adc	rbp,r14
+	adc	rcx,0
+	mov	r9,r10
+	adc	r10,r15
+
+	bt	r11,0
+	cmovc	r12,rax
+	cmovc	r13,rbp
+	cmovc	r8,rcx
+	cmovc	r9,r10
+
+	DB	0F3h,0C3h		;repret
+
+
+
+
+ALIGN	32
+__ecp_nistz256_mul_by_2x:
+
+	xor	r11,r11
+	adc	r12,r12
+	adc	r13,r13
+	mov	rax,r12
+	adc	r8,r8
+	adc	r9,r9
+	mov	rbp,r13
+	adc	r11,0
+
+	xor	r10,r10
+	sbb	r12,-1
+	mov	rcx,r8
+	sbb	r13,r14
+	sbb	r8,0
+	mov	r10,r9
+	sbb	r9,r15
+	sbb	r11,0
+
+	cmovc	r12,rax
+	cmovc	r13,rbp
+	mov	QWORD[rdi],r12
+	cmovc	r8,rcx
+	mov	QWORD[8+rdi],r13
+	cmovc	r9,r10
+	mov	QWORD[16+rdi],r8
+	mov	QWORD[24+rdi],r9
+
+	DB	0F3h,0C3h		;repret
+
+
+
+ALIGN	32
+ecp_nistz256_point_doublex:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_ecp_nistz256_point_doublex:
+	mov	rdi,rcx
+	mov	rsi,rdx
+
+
+
+$L$point_doublex:
+	push	rbp
+
+	push	rbx
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+	sub	rsp,32*5+8
+
+$L$point_doublex_body:
+
+$L$point_double_shortcutx:
+	movdqu	xmm0,XMMWORD[rsi]
+	mov	rbx,rsi
+	movdqu	xmm1,XMMWORD[16+rsi]
+	mov	r12,QWORD[((32+0))+rsi]
+	mov	r13,QWORD[((32+8))+rsi]
+	mov	r8,QWORD[((32+16))+rsi]
+	mov	r9,QWORD[((32+24))+rsi]
+	mov	r14,QWORD[(($L$poly+8))]
+	mov	r15,QWORD[(($L$poly+24))]
+	movdqa	XMMWORD[96+rsp],xmm0
+	movdqa	XMMWORD[(96+16)+rsp],xmm1
+	lea	r10,[32+rdi]
+	lea	r11,[64+rdi]
+DB	102,72,15,110,199
+DB	102,73,15,110,202
+DB	102,73,15,110,211
+
+	lea	rdi,[rsp]
+	call	__ecp_nistz256_mul_by_2x
+
+	mov	rdx,QWORD[((64+0))+rsi]
+	mov	r14,QWORD[((64+8))+rsi]
+	mov	r15,QWORD[((64+16))+rsi]
+	mov	r8,QWORD[((64+24))+rsi]
+	lea	rsi,[((64-128))+rsi]
+	lea	rdi,[64+rsp]
+	call	__ecp_nistz256_sqr_montx
+
+	mov	rdx,QWORD[((0+0))+rsp]
+	mov	r14,QWORD[((8+0))+rsp]
+	lea	rsi,[((-128+0))+rsp]
+	mov	r15,QWORD[((16+0))+rsp]
+	mov	r8,QWORD[((24+0))+rsp]
+	lea	rdi,[rsp]
+	call	__ecp_nistz256_sqr_montx
+
+	mov	rdx,QWORD[32+rbx]
+	mov	r9,QWORD[((64+0))+rbx]
+	mov	r10,QWORD[((64+8))+rbx]
+	mov	r11,QWORD[((64+16))+rbx]
+	mov	r12,QWORD[((64+24))+rbx]
+	lea	rsi,[((64-128))+rbx]
+	lea	rbx,[32+rbx]
+DB	102,72,15,126,215
+	call	__ecp_nistz256_mul_montx
+	call	__ecp_nistz256_mul_by_2x
+
+	mov	r12,QWORD[((96+0))+rsp]
+	mov	r13,QWORD[((96+8))+rsp]
+	lea	rbx,[64+rsp]
+	mov	r8,QWORD[((96+16))+rsp]
+	mov	r9,QWORD[((96+24))+rsp]
+	lea	rdi,[32+rsp]
+	call	__ecp_nistz256_add_tox
+
+	mov	r12,QWORD[((96+0))+rsp]
+	mov	r13,QWORD[((96+8))+rsp]
+	lea	rbx,[64+rsp]
+	mov	r8,QWORD[((96+16))+rsp]
+	mov	r9,QWORD[((96+24))+rsp]
+	lea	rdi,[64+rsp]
+	call	__ecp_nistz256_sub_fromx
+
+	mov	rdx,QWORD[((0+0))+rsp]
+	mov	r14,QWORD[((8+0))+rsp]
+	lea	rsi,[((-128+0))+rsp]
+	mov	r15,QWORD[((16+0))+rsp]
+	mov	r8,QWORD[((24+0))+rsp]
+DB	102,72,15,126,207
+	call	__ecp_nistz256_sqr_montx
+	xor	r9,r9
+	mov	rax,r12
+	add	r12,-1
+	mov	r10,r13
+	adc	r13,rsi
+	mov	rcx,r14
+	adc	r14,0
+	mov	r8,r15
+	adc	r15,rbp
+	adc	r9,0
+	xor	rsi,rsi
+	test	rax,1
+
+	cmovz	r12,rax
+	cmovz	r13,r10
+	cmovz	r14,rcx
+	cmovz	r15,r8
+	cmovz	r9,rsi
+
+	mov	rax,r13
+	shr	r12,1
+	shl	rax,63
+	mov	r10,r14
+	shr	r13,1
+	or	r12,rax
+	shl	r10,63
+	mov	rcx,r15
+	shr	r14,1
+	or	r13,r10
+	shl	rcx,63
+	mov	QWORD[rdi],r12
+	shr	r15,1
+	mov	QWORD[8+rdi],r13
+	shl	r9,63
+	or	r14,rcx
+	or	r15,r9
+	mov	QWORD[16+rdi],r14
+	mov	QWORD[24+rdi],r15
+	mov	rdx,QWORD[64+rsp]
+	lea	rbx,[64+rsp]
+	mov	r9,QWORD[((0+32))+rsp]
+	mov	r10,QWORD[((8+32))+rsp]
+	lea	rsi,[((-128+32))+rsp]
+	mov	r11,QWORD[((16+32))+rsp]
+	mov	r12,QWORD[((24+32))+rsp]
+	lea	rdi,[32+rsp]
+	call	__ecp_nistz256_mul_montx
+
+	lea	rdi,[128+rsp]
+	call	__ecp_nistz256_mul_by_2x
+
+	lea	rbx,[32+rsp]
+	lea	rdi,[32+rsp]
+	call	__ecp_nistz256_add_tox
+
+	mov	rdx,QWORD[96+rsp]
+	lea	rbx,[96+rsp]
+	mov	r9,QWORD[((0+0))+rsp]
+	mov	r10,QWORD[((8+0))+rsp]
+	lea	rsi,[((-128+0))+rsp]
+	mov	r11,QWORD[((16+0))+rsp]
+	mov	r12,QWORD[((24+0))+rsp]
+	lea	rdi,[rsp]
+	call	__ecp_nistz256_mul_montx
+
+	lea	rdi,[128+rsp]
+	call	__ecp_nistz256_mul_by_2x
+
+	mov	rdx,QWORD[((0+32))+rsp]
+	mov	r14,QWORD[((8+32))+rsp]
+	lea	rsi,[((-128+32))+rsp]
+	mov	r15,QWORD[((16+32))+rsp]
+	mov	r8,QWORD[((24+32))+rsp]
+DB	102,72,15,126,199
+	call	__ecp_nistz256_sqr_montx
+
+	lea	rbx,[128+rsp]
+	mov	r8,r14
+	mov	r9,r15
+	mov	r14,rsi
+	mov	r15,rbp
+	call	__ecp_nistz256_sub_fromx
+
+	mov	rax,QWORD[((0+0))+rsp]
+	mov	rbp,QWORD[((0+8))+rsp]
+	mov	rcx,QWORD[((0+16))+rsp]
+	mov	r10,QWORD[((0+24))+rsp]
+	lea	rdi,[rsp]
+	call	__ecp_nistz256_subx
+
+	mov	rdx,QWORD[32+rsp]
+	lea	rbx,[32+rsp]
+	mov	r14,r12
+	xor	ecx,ecx
+	mov	QWORD[((0+0))+rsp],r12
+	mov	r10,r13
+	mov	QWORD[((0+8))+rsp],r13
+	cmovz	r11,r8
+	mov	QWORD[((0+16))+rsp],r8
+	lea	rsi,[((0-128))+rsp]
+	cmovz	r12,r9
+	mov	QWORD[((0+24))+rsp],r9
+	mov	r9,r14
+	lea	rdi,[rsp]
+	call	__ecp_nistz256_mul_montx
+
+DB	102,72,15,126,203
+DB	102,72,15,126,207
+	call	__ecp_nistz256_sub_fromx
+
+	lea	rsi,[((160+56))+rsp]
+
+	mov	r15,QWORD[((-48))+rsi]
+
+	mov	r14,QWORD[((-40))+rsi]
+
+	mov	r13,QWORD[((-32))+rsi]
+
+	mov	r12,QWORD[((-24))+rsi]
+
+	mov	rbx,QWORD[((-16))+rsi]
+
+	mov	rbp,QWORD[((-8))+rsi]
+
+	lea	rsp,[rsi]
+
+$L$point_doublex_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_ecp_nistz256_point_doublex:
+
+ALIGN	32
+ecp_nistz256_point_addx:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_ecp_nistz256_point_addx:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+
+$L$point_addx:
+	push	rbp
+
+	push	rbx
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+	sub	rsp,32*18+8
+
+$L$point_addx_body:
+
+	movdqu	xmm0,XMMWORD[rsi]
+	movdqu	xmm1,XMMWORD[16+rsi]
+	movdqu	xmm2,XMMWORD[32+rsi]
+	movdqu	xmm3,XMMWORD[48+rsi]
+	movdqu	xmm4,XMMWORD[64+rsi]
+	movdqu	xmm5,XMMWORD[80+rsi]
+	mov	rbx,rsi
+	mov	rsi,rdx
+	movdqa	XMMWORD[384+rsp],xmm0
+	movdqa	XMMWORD[(384+16)+rsp],xmm1
+	movdqa	XMMWORD[416+rsp],xmm2
+	movdqa	XMMWORD[(416+16)+rsp],xmm3
+	movdqa	XMMWORD[448+rsp],xmm4
+	movdqa	XMMWORD[(448+16)+rsp],xmm5
+	por	xmm5,xmm4
+
+	movdqu	xmm0,XMMWORD[rsi]
+	pshufd	xmm3,xmm5,0xb1
+	movdqu	xmm1,XMMWORD[16+rsi]
+	movdqu	xmm2,XMMWORD[32+rsi]
+	por	xmm5,xmm3
+	movdqu	xmm3,XMMWORD[48+rsi]
+	mov	rdx,QWORD[((64+0))+rsi]
+	mov	r14,QWORD[((64+8))+rsi]
+	mov	r15,QWORD[((64+16))+rsi]
+	mov	r8,QWORD[((64+24))+rsi]
+	movdqa	XMMWORD[480+rsp],xmm0
+	pshufd	xmm4,xmm5,0x1e
+	movdqa	XMMWORD[(480+16)+rsp],xmm1
+	movdqu	xmm0,XMMWORD[64+rsi]
+	movdqu	xmm1,XMMWORD[80+rsi]
+	movdqa	XMMWORD[512+rsp],xmm2
+	movdqa	XMMWORD[(512+16)+rsp],xmm3
+	por	xmm5,xmm4
+	pxor	xmm4,xmm4
+	por	xmm1,xmm0
+DB	102,72,15,110,199
+
+	lea	rsi,[((64-128))+rsi]
+	mov	QWORD[((544+0))+rsp],rdx
+	mov	QWORD[((544+8))+rsp],r14
+	mov	QWORD[((544+16))+rsp],r15
+	mov	QWORD[((544+24))+rsp],r8
+	lea	rdi,[96+rsp]
+	call	__ecp_nistz256_sqr_montx
+
+	pcmpeqd	xmm5,xmm4
+	pshufd	xmm4,xmm1,0xb1
+	por	xmm4,xmm1
+	pshufd	xmm5,xmm5,0
+	pshufd	xmm3,xmm4,0x1e
+	por	xmm4,xmm3
+	pxor	xmm3,xmm3
+	pcmpeqd	xmm4,xmm3
+	pshufd	xmm4,xmm4,0
+	mov	rdx,QWORD[((64+0))+rbx]
+	mov	r14,QWORD[((64+8))+rbx]
+	mov	r15,QWORD[((64+16))+rbx]
+	mov	r8,QWORD[((64+24))+rbx]
+DB	102,72,15,110,203
+
+	lea	rsi,[((64-128))+rbx]
+	lea	rdi,[32+rsp]
+	call	__ecp_nistz256_sqr_montx
+
+	mov	rdx,QWORD[544+rsp]
+	lea	rbx,[544+rsp]
+	mov	r9,QWORD[((0+96))+rsp]
+	mov	r10,QWORD[((8+96))+rsp]
+	lea	rsi,[((-128+96))+rsp]
+	mov	r11,QWORD[((16+96))+rsp]
+	mov	r12,QWORD[((24+96))+rsp]
+	lea	rdi,[224+rsp]
+	call	__ecp_nistz256_mul_montx
+
+	mov	rdx,QWORD[448+rsp]
+	lea	rbx,[448+rsp]
+	mov	r9,QWORD[((0+32))+rsp]
+	mov	r10,QWORD[((8+32))+rsp]
+	lea	rsi,[((-128+32))+rsp]
+	mov	r11,QWORD[((16+32))+rsp]
+	mov	r12,QWORD[((24+32))+rsp]
+	lea	rdi,[256+rsp]
+	call	__ecp_nistz256_mul_montx
+
+	mov	rdx,QWORD[416+rsp]
+	lea	rbx,[416+rsp]
+	mov	r9,QWORD[((0+224))+rsp]
+	mov	r10,QWORD[((8+224))+rsp]
+	lea	rsi,[((-128+224))+rsp]
+	mov	r11,QWORD[((16+224))+rsp]
+	mov	r12,QWORD[((24+224))+rsp]
+	lea	rdi,[224+rsp]
+	call	__ecp_nistz256_mul_montx
+
+	mov	rdx,QWORD[512+rsp]
+	lea	rbx,[512+rsp]
+	mov	r9,QWORD[((0+256))+rsp]
+	mov	r10,QWORD[((8+256))+rsp]
+	lea	rsi,[((-128+256))+rsp]
+	mov	r11,QWORD[((16+256))+rsp]
+	mov	r12,QWORD[((24+256))+rsp]
+	lea	rdi,[256+rsp]
+	call	__ecp_nistz256_mul_montx
+
+	lea	rbx,[224+rsp]
+	lea	rdi,[64+rsp]
+	call	__ecp_nistz256_sub_fromx
+
+	or	r12,r13
+	movdqa	xmm2,xmm4
+	or	r12,r8
+	or	r12,r9
+	por	xmm2,xmm5
+DB	102,73,15,110,220
+
+	mov	rdx,QWORD[384+rsp]
+	lea	rbx,[384+rsp]
+	mov	r9,QWORD[((0+96))+rsp]
+	mov	r10,QWORD[((8+96))+rsp]
+	lea	rsi,[((-128+96))+rsp]
+	mov	r11,QWORD[((16+96))+rsp]
+	mov	r12,QWORD[((24+96))+rsp]
+	lea	rdi,[160+rsp]
+	call	__ecp_nistz256_mul_montx
+
+	mov	rdx,QWORD[480+rsp]
+	lea	rbx,[480+rsp]
+	mov	r9,QWORD[((0+32))+rsp]
+	mov	r10,QWORD[((8+32))+rsp]
+	lea	rsi,[((-128+32))+rsp]
+	mov	r11,QWORD[((16+32))+rsp]
+	mov	r12,QWORD[((24+32))+rsp]
+	lea	rdi,[192+rsp]
+	call	__ecp_nistz256_mul_montx
+
+	lea	rbx,[160+rsp]
+	lea	rdi,[rsp]
+	call	__ecp_nistz256_sub_fromx
+
+	or	r12,r13
+	or	r12,r8
+	or	r12,r9
+
+DB	102,73,15,126,208
+DB	102,73,15,126,217
+	or	r12,r8
+DB	0x3e
+	jnz	NEAR $L$add_proceedx
+
+
+
+	test	r9,r9
+	jz	NEAR $L$add_doublex
+
+
+
+
+
+
+DB	102,72,15,126,199
+	pxor	xmm0,xmm0
+	movdqu	XMMWORD[rdi],xmm0
+	movdqu	XMMWORD[16+rdi],xmm0
+	movdqu	XMMWORD[32+rdi],xmm0
+	movdqu	XMMWORD[48+rdi],xmm0
+	movdqu	XMMWORD[64+rdi],xmm0
+	movdqu	XMMWORD[80+rdi],xmm0
+	jmp	NEAR $L$add_donex
+
+ALIGN	32
+$L$add_doublex:
+DB	102,72,15,126,206
+DB	102,72,15,126,199
+	add	rsp,416
+
+	jmp	NEAR $L$point_double_shortcutx
+
+
+ALIGN	32
+$L$add_proceedx:
+	mov	rdx,QWORD[((0+64))+rsp]
+	mov	r14,QWORD[((8+64))+rsp]
+	lea	rsi,[((-128+64))+rsp]
+	mov	r15,QWORD[((16+64))+rsp]
+	mov	r8,QWORD[((24+64))+rsp]
+	lea	rdi,[96+rsp]
+	call	__ecp_nistz256_sqr_montx
+
+	mov	rdx,QWORD[448+rsp]
+	lea	rbx,[448+rsp]
+	mov	r9,QWORD[((0+0))+rsp]
+	mov	r10,QWORD[((8+0))+rsp]
+	lea	rsi,[((-128+0))+rsp]
+	mov	r11,QWORD[((16+0))+rsp]
+	mov	r12,QWORD[((24+0))+rsp]
+	lea	rdi,[352+rsp]
+	call	__ecp_nistz256_mul_montx
+
+	mov	rdx,QWORD[((0+0))+rsp]
+	mov	r14,QWORD[((8+0))+rsp]
+	lea	rsi,[((-128+0))+rsp]
+	mov	r15,QWORD[((16+0))+rsp]
+	mov	r8,QWORD[((24+0))+rsp]
+	lea	rdi,[32+rsp]
+	call	__ecp_nistz256_sqr_montx
+
+	mov	rdx,QWORD[544+rsp]
+	lea	rbx,[544+rsp]
+	mov	r9,QWORD[((0+352))+rsp]
+	mov	r10,QWORD[((8+352))+rsp]
+	lea	rsi,[((-128+352))+rsp]
+	mov	r11,QWORD[((16+352))+rsp]
+	mov	r12,QWORD[((24+352))+rsp]
+	lea	rdi,[352+rsp]
+	call	__ecp_nistz256_mul_montx
+
+	mov	rdx,QWORD[rsp]
+	lea	rbx,[rsp]
+	mov	r9,QWORD[((0+32))+rsp]
+	mov	r10,QWORD[((8+32))+rsp]
+	lea	rsi,[((-128+32))+rsp]
+	mov	r11,QWORD[((16+32))+rsp]
+	mov	r12,QWORD[((24+32))+rsp]
+	lea	rdi,[128+rsp]
+	call	__ecp_nistz256_mul_montx
+
+	mov	rdx,QWORD[160+rsp]
+	lea	rbx,[160+rsp]
+	mov	r9,QWORD[((0+32))+rsp]
+	mov	r10,QWORD[((8+32))+rsp]
+	lea	rsi,[((-128+32))+rsp]
+	mov	r11,QWORD[((16+32))+rsp]
+	mov	r12,QWORD[((24+32))+rsp]
+	lea	rdi,[192+rsp]
+	call	__ecp_nistz256_mul_montx
+
+
+
+
+	xor	r11,r11
+	add	r12,r12
+	lea	rsi,[96+rsp]
+	adc	r13,r13
+	mov	rax,r12
+	adc	r8,r8
+	adc	r9,r9
+	mov	rbp,r13
+	adc	r11,0
+
+	sub	r12,-1
+	mov	rcx,r8
+	sbb	r13,r14
+	sbb	r8,0
+	mov	r10,r9
+	sbb	r9,r15
+	sbb	r11,0
+
+	cmovc	r12,rax
+	mov	rax,QWORD[rsi]
+	cmovc	r13,rbp
+	mov	rbp,QWORD[8+rsi]
+	cmovc	r8,rcx
+	mov	rcx,QWORD[16+rsi]
+	cmovc	r9,r10
+	mov	r10,QWORD[24+rsi]
+
+	call	__ecp_nistz256_subx
+
+	lea	rbx,[128+rsp]
+	lea	rdi,[288+rsp]
+	call	__ecp_nistz256_sub_fromx
+
+	mov	rax,QWORD[((192+0))+rsp]
+	mov	rbp,QWORD[((192+8))+rsp]
+	mov	rcx,QWORD[((192+16))+rsp]
+	mov	r10,QWORD[((192+24))+rsp]
+	lea	rdi,[320+rsp]
+
+	call	__ecp_nistz256_subx
+
+	mov	QWORD[rdi],r12
+	mov	QWORD[8+rdi],r13
+	mov	QWORD[16+rdi],r8
+	mov	QWORD[24+rdi],r9
+	mov	rdx,QWORD[128+rsp]
+	lea	rbx,[128+rsp]
+	mov	r9,QWORD[((0+224))+rsp]
+	mov	r10,QWORD[((8+224))+rsp]
+	lea	rsi,[((-128+224))+rsp]
+	mov	r11,QWORD[((16+224))+rsp]
+	mov	r12,QWORD[((24+224))+rsp]
+	lea	rdi,[256+rsp]
+	call	__ecp_nistz256_mul_montx
+
+	mov	rdx,QWORD[320+rsp]
+	lea	rbx,[320+rsp]
+	mov	r9,QWORD[((0+64))+rsp]
+	mov	r10,QWORD[((8+64))+rsp]
+	lea	rsi,[((-128+64))+rsp]
+	mov	r11,QWORD[((16+64))+rsp]
+	mov	r12,QWORD[((24+64))+rsp]
+	lea	rdi,[320+rsp]
+	call	__ecp_nistz256_mul_montx
+
+	lea	rbx,[256+rsp]
+	lea	rdi,[320+rsp]
+	call	__ecp_nistz256_sub_fromx
+
+DB	102,72,15,126,199
+
+	movdqa	xmm0,xmm5
+	movdqa	xmm1,xmm5
+	pandn	xmm0,XMMWORD[352+rsp]
+	movdqa	xmm2,xmm5
+	pandn	xmm1,XMMWORD[((352+16))+rsp]
+	movdqa	xmm3,xmm5
+	pand	xmm2,XMMWORD[544+rsp]
+	pand	xmm3,XMMWORD[((544+16))+rsp]
+	por	xmm2,xmm0
+	por	xmm3,xmm1
+
+	movdqa	xmm0,xmm4
+	movdqa	xmm1,xmm4
+	pandn	xmm0,xmm2
+	movdqa	xmm2,xmm4
+	pandn	xmm1,xmm3
+	movdqa	xmm3,xmm4
+	pand	xmm2,XMMWORD[448+rsp]
+	pand	xmm3,XMMWORD[((448+16))+rsp]
+	por	xmm2,xmm0
+	por	xmm3,xmm1
+	movdqu	XMMWORD[64+rdi],xmm2
+	movdqu	XMMWORD[80+rdi],xmm3
+
+	movdqa	xmm0,xmm5
+	movdqa	xmm1,xmm5
+	pandn	xmm0,XMMWORD[288+rsp]
+	movdqa	xmm2,xmm5
+	pandn	xmm1,XMMWORD[((288+16))+rsp]
+	movdqa	xmm3,xmm5
+	pand	xmm2,XMMWORD[480+rsp]
+	pand	xmm3,XMMWORD[((480+16))+rsp]
+	por	xmm2,xmm0
+	por	xmm3,xmm1
+
+	movdqa	xmm0,xmm4
+	movdqa	xmm1,xmm4
+	pandn	xmm0,xmm2
+	movdqa	xmm2,xmm4
+	pandn	xmm1,xmm3
+	movdqa	xmm3,xmm4
+	pand	xmm2,XMMWORD[384+rsp]
+	pand	xmm3,XMMWORD[((384+16))+rsp]
+	por	xmm2,xmm0
+	por	xmm3,xmm1
+	movdqu	XMMWORD[rdi],xmm2
+	movdqu	XMMWORD[16+rdi],xmm3
+
+	movdqa	xmm0,xmm5
+	movdqa	xmm1,xmm5
+	pandn	xmm0,XMMWORD[320+rsp]
+	movdqa	xmm2,xmm5
+	pandn	xmm1,XMMWORD[((320+16))+rsp]
+	movdqa	xmm3,xmm5
+	pand	xmm2,XMMWORD[512+rsp]
+	pand	xmm3,XMMWORD[((512+16))+rsp]
+	por	xmm2,xmm0
+	por	xmm3,xmm1
+
+	movdqa	xmm0,xmm4
+	movdqa	xmm1,xmm4
+	pandn	xmm0,xmm2
+	movdqa	xmm2,xmm4
+	pandn	xmm1,xmm3
+	movdqa	xmm3,xmm4
+	pand	xmm2,XMMWORD[416+rsp]
+	pand	xmm3,XMMWORD[((416+16))+rsp]
+	por	xmm2,xmm0
+	por	xmm3,xmm1
+	movdqu	XMMWORD[32+rdi],xmm2
+	movdqu	XMMWORD[48+rdi],xmm3
+
+$L$add_donex:
+	lea	rsi,[((576+56))+rsp]
+
+	mov	r15,QWORD[((-48))+rsi]
+
+	mov	r14,QWORD[((-40))+rsi]
+
+	mov	r13,QWORD[((-32))+rsi]
+
+	mov	r12,QWORD[((-24))+rsi]
+
+	mov	rbx,QWORD[((-16))+rsi]
+
+	mov	rbp,QWORD[((-8))+rsi]
+
+	lea	rsp,[rsi]
+
+$L$point_addx_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_ecp_nistz256_point_addx:
+
+ALIGN	32
+ecp_nistz256_point_add_affinex:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_ecp_nistz256_point_add_affinex:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+
+$L$point_add_affinex:
+	push	rbp
+
+	push	rbx
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+	sub	rsp,32*15+8
+
+$L$add_affinex_body:
+
+	movdqu	xmm0,XMMWORD[rsi]
+	mov	rbx,rdx
+	movdqu	xmm1,XMMWORD[16+rsi]
+	movdqu	xmm2,XMMWORD[32+rsi]
+	movdqu	xmm3,XMMWORD[48+rsi]
+	movdqu	xmm4,XMMWORD[64+rsi]
+	movdqu	xmm5,XMMWORD[80+rsi]
+	mov	rdx,QWORD[((64+0))+rsi]
+	mov	r14,QWORD[((64+8))+rsi]
+	mov	r15,QWORD[((64+16))+rsi]
+	mov	r8,QWORD[((64+24))+rsi]
+	movdqa	XMMWORD[320+rsp],xmm0
+	movdqa	XMMWORD[(320+16)+rsp],xmm1
+	movdqa	XMMWORD[352+rsp],xmm2
+	movdqa	XMMWORD[(352+16)+rsp],xmm3
+	movdqa	XMMWORD[384+rsp],xmm4
+	movdqa	XMMWORD[(384+16)+rsp],xmm5
+	por	xmm5,xmm4
+
+	movdqu	xmm0,XMMWORD[rbx]
+	pshufd	xmm3,xmm5,0xb1
+	movdqu	xmm1,XMMWORD[16+rbx]
+	movdqu	xmm2,XMMWORD[32+rbx]
+	por	xmm5,xmm3
+	movdqu	xmm3,XMMWORD[48+rbx]
+	movdqa	XMMWORD[416+rsp],xmm0
+	pshufd	xmm4,xmm5,0x1e
+	movdqa	XMMWORD[(416+16)+rsp],xmm1
+	por	xmm1,xmm0
+DB	102,72,15,110,199
+	movdqa	XMMWORD[448+rsp],xmm2
+	movdqa	XMMWORD[(448+16)+rsp],xmm3
+	por	xmm3,xmm2
+	por	xmm5,xmm4
+	pxor	xmm4,xmm4
+	por	xmm3,xmm1
+
+	lea	rsi,[((64-128))+rsi]
+	lea	rdi,[32+rsp]
+	call	__ecp_nistz256_sqr_montx
+
+	pcmpeqd	xmm5,xmm4
+	pshufd	xmm4,xmm3,0xb1
+	mov	rdx,QWORD[rbx]
+
+	mov	r9,r12
+	por	xmm4,xmm3
+	pshufd	xmm5,xmm5,0
+	pshufd	xmm3,xmm4,0x1e
+	mov	r10,r13
+	por	xmm4,xmm3
+	pxor	xmm3,xmm3
+	mov	r11,r14
+	pcmpeqd	xmm4,xmm3
+	pshufd	xmm4,xmm4,0
+
+	lea	rsi,[((32-128))+rsp]
+	mov	r12,r15
+	lea	rdi,[rsp]
+	call	__ecp_nistz256_mul_montx
+
+	lea	rbx,[320+rsp]
+	lea	rdi,[64+rsp]
+	call	__ecp_nistz256_sub_fromx
+
+	mov	rdx,QWORD[384+rsp]
+	lea	rbx,[384+rsp]
+	mov	r9,QWORD[((0+32))+rsp]
+	mov	r10,QWORD[((8+32))+rsp]
+	lea	rsi,[((-128+32))+rsp]
+	mov	r11,QWORD[((16+32))+rsp]
+	mov	r12,QWORD[((24+32))+rsp]
+	lea	rdi,[32+rsp]
+	call	__ecp_nistz256_mul_montx
+
+	mov	rdx,QWORD[384+rsp]
+	lea	rbx,[384+rsp]
+	mov	r9,QWORD[((0+64))+rsp]
+	mov	r10,QWORD[((8+64))+rsp]
+	lea	rsi,[((-128+64))+rsp]
+	mov	r11,QWORD[((16+64))+rsp]
+	mov	r12,QWORD[((24+64))+rsp]
+	lea	rdi,[288+rsp]
+	call	__ecp_nistz256_mul_montx
+
+	mov	rdx,QWORD[448+rsp]
+	lea	rbx,[448+rsp]
+	mov	r9,QWORD[((0+32))+rsp]
+	mov	r10,QWORD[((8+32))+rsp]
+	lea	rsi,[((-128+32))+rsp]
+	mov	r11,QWORD[((16+32))+rsp]
+	mov	r12,QWORD[((24+32))+rsp]
+	lea	rdi,[32+rsp]
+	call	__ecp_nistz256_mul_montx
+
+	lea	rbx,[352+rsp]
+	lea	rdi,[96+rsp]
+	call	__ecp_nistz256_sub_fromx
+
+	mov	rdx,QWORD[((0+64))+rsp]
+	mov	r14,QWORD[((8+64))+rsp]
+	lea	rsi,[((-128+64))+rsp]
+	mov	r15,QWORD[((16+64))+rsp]
+	mov	r8,QWORD[((24+64))+rsp]
+	lea	rdi,[128+rsp]
+	call	__ecp_nistz256_sqr_montx
+
+	mov	rdx,QWORD[((0+96))+rsp]
+	mov	r14,QWORD[((8+96))+rsp]
+	lea	rsi,[((-128+96))+rsp]
+	mov	r15,QWORD[((16+96))+rsp]
+	mov	r8,QWORD[((24+96))+rsp]
+	lea	rdi,[192+rsp]
+	call	__ecp_nistz256_sqr_montx
+
+	mov	rdx,QWORD[128+rsp]
+	lea	rbx,[128+rsp]
+	mov	r9,QWORD[((0+64))+rsp]
+	mov	r10,QWORD[((8+64))+rsp]
+	lea	rsi,[((-128+64))+rsp]
+	mov	r11,QWORD[((16+64))+rsp]
+	mov	r12,QWORD[((24+64))+rsp]
+	lea	rdi,[160+rsp]
+	call	__ecp_nistz256_mul_montx
+
+	mov	rdx,QWORD[320+rsp]
+	lea	rbx,[320+rsp]
+	mov	r9,QWORD[((0+128))+rsp]
+	mov	r10,QWORD[((8+128))+rsp]
+	lea	rsi,[((-128+128))+rsp]
+	mov	r11,QWORD[((16+128))+rsp]
+	mov	r12,QWORD[((24+128))+rsp]
+	lea	rdi,[rsp]
+	call	__ecp_nistz256_mul_montx
+
+
+
+
+	xor	r11,r11
+	add	r12,r12
+	lea	rsi,[192+rsp]
+	adc	r13,r13
+	mov	rax,r12
+	adc	r8,r8
+	adc	r9,r9
+	mov	rbp,r13
+	adc	r11,0
+
+	sub	r12,-1
+	mov	rcx,r8
+	sbb	r13,r14
+	sbb	r8,0
+	mov	r10,r9
+	sbb	r9,r15
+	sbb	r11,0
+
+	cmovc	r12,rax
+	mov	rax,QWORD[rsi]
+	cmovc	r13,rbp
+	mov	rbp,QWORD[8+rsi]
+	cmovc	r8,rcx
+	mov	rcx,QWORD[16+rsi]
+	cmovc	r9,r10
+	mov	r10,QWORD[24+rsi]
+
+	call	__ecp_nistz256_subx
+
+	lea	rbx,[160+rsp]
+	lea	rdi,[224+rsp]
+	call	__ecp_nistz256_sub_fromx
+
+	mov	rax,QWORD[((0+0))+rsp]
+	mov	rbp,QWORD[((0+8))+rsp]
+	mov	rcx,QWORD[((0+16))+rsp]
+	mov	r10,QWORD[((0+24))+rsp]
+	lea	rdi,[64+rsp]
+
+	call	__ecp_nistz256_subx
+
+	mov	QWORD[rdi],r12
+	mov	QWORD[8+rdi],r13
+	mov	QWORD[16+rdi],r8
+	mov	QWORD[24+rdi],r9
+	mov	rdx,QWORD[352+rsp]
+	lea	rbx,[352+rsp]
+	mov	r9,QWORD[((0+160))+rsp]
+	mov	r10,QWORD[((8+160))+rsp]
+	lea	rsi,[((-128+160))+rsp]
+	mov	r11,QWORD[((16+160))+rsp]
+	mov	r12,QWORD[((24+160))+rsp]
+	lea	rdi,[32+rsp]
+	call	__ecp_nistz256_mul_montx
+
+	mov	rdx,QWORD[96+rsp]
+	lea	rbx,[96+rsp]
+	mov	r9,QWORD[((0+64))+rsp]
+	mov	r10,QWORD[((8+64))+rsp]
+	lea	rsi,[((-128+64))+rsp]
+	mov	r11,QWORD[((16+64))+rsp]
+	mov	r12,QWORD[((24+64))+rsp]
+	lea	rdi,[64+rsp]
+	call	__ecp_nistz256_mul_montx
+
+	lea	rbx,[32+rsp]
+	lea	rdi,[256+rsp]
+	call	__ecp_nistz256_sub_fromx
+
+DB	102,72,15,126,199
+
+	movdqa	xmm0,xmm5
+	movdqa	xmm1,xmm5
+	pandn	xmm0,XMMWORD[288+rsp]
+	movdqa	xmm2,xmm5
+	pandn	xmm1,XMMWORD[((288+16))+rsp]
+	movdqa	xmm3,xmm5
+	pand	xmm2,XMMWORD[$L$ONE_mont]
+	pand	xmm3,XMMWORD[(($L$ONE_mont+16))]
+	por	xmm2,xmm0
+	por	xmm3,xmm1
+
+	movdqa	xmm0,xmm4
+	movdqa	xmm1,xmm4
+	pandn	xmm0,xmm2
+	movdqa	xmm2,xmm4
+	pandn	xmm1,xmm3
+	movdqa	xmm3,xmm4
+	pand	xmm2,XMMWORD[384+rsp]
+	pand	xmm3,XMMWORD[((384+16))+rsp]
+	por	xmm2,xmm0
+	por	xmm3,xmm1
+	movdqu	XMMWORD[64+rdi],xmm2
+	movdqu	XMMWORD[80+rdi],xmm3
+
+	movdqa	xmm0,xmm5
+	movdqa	xmm1,xmm5
+	pandn	xmm0,XMMWORD[224+rsp]
+	movdqa	xmm2,xmm5
+	pandn	xmm1,XMMWORD[((224+16))+rsp]
+	movdqa	xmm3,xmm5
+	pand	xmm2,XMMWORD[416+rsp]
+	pand	xmm3,XMMWORD[((416+16))+rsp]
+	por	xmm2,xmm0
+	por	xmm3,xmm1
+
+	movdqa	xmm0,xmm4
+	movdqa	xmm1,xmm4
+	pandn	xmm0,xmm2
+	movdqa	xmm2,xmm4
+	pandn	xmm1,xmm3
+	movdqa	xmm3,xmm4
+	pand	xmm2,XMMWORD[320+rsp]
+	pand	xmm3,XMMWORD[((320+16))+rsp]
+	por	xmm2,xmm0
+	por	xmm3,xmm1
+	movdqu	XMMWORD[rdi],xmm2
+	movdqu	XMMWORD[16+rdi],xmm3
+
+	movdqa	xmm0,xmm5
+	movdqa	xmm1,xmm5
+	pandn	xmm0,XMMWORD[256+rsp]
+	movdqa	xmm2,xmm5
+	pandn	xmm1,XMMWORD[((256+16))+rsp]
+	movdqa	xmm3,xmm5
+	pand	xmm2,XMMWORD[448+rsp]
+	pand	xmm3,XMMWORD[((448+16))+rsp]
+	por	xmm2,xmm0
+	por	xmm3,xmm1
+
+	movdqa	xmm0,xmm4
+	movdqa	xmm1,xmm4
+	pandn	xmm0,xmm2
+	movdqa	xmm2,xmm4
+	pandn	xmm1,xmm3
+	movdqa	xmm3,xmm4
+	pand	xmm2,XMMWORD[352+rsp]
+	pand	xmm3,XMMWORD[((352+16))+rsp]
+	por	xmm2,xmm0
+	por	xmm3,xmm1
+	movdqu	XMMWORD[32+rdi],xmm2
+	movdqu	XMMWORD[48+rdi],xmm3
+
+	lea	rsi,[((480+56))+rsp]
+
+	mov	r15,QWORD[((-48))+rsi]
+
+	mov	r14,QWORD[((-40))+rsi]
+
+	mov	r13,QWORD[((-32))+rsi]
+
+	mov	r12,QWORD[((-24))+rsi]
+
+	mov	rbx,QWORD[((-16))+rsi]
+
+	mov	rbp,QWORD[((-8))+rsi]
+
+	lea	rsp,[rsi]
+
+$L$add_affinex_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_ecp_nistz256_point_add_affinex:
+EXTERN	__imp_RtlVirtualUnwind
+
+
+ALIGN	16
+short_handler:
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD[120+r8]
+	mov	rbx,QWORD[248+r8]
+
+	mov	rsi,QWORD[8+r9]
+	mov	r11,QWORD[56+r9]
+
+	mov	r10d,DWORD[r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jb	NEAR $L$common_seh_tail
+
+	mov	rax,QWORD[152+r8]
+
+	mov	r10d,DWORD[4+r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jae	NEAR $L$common_seh_tail
+
+	lea	rax,[16+rax]
+
+	mov	r12,QWORD[((-8))+rax]
+	mov	r13,QWORD[((-16))+rax]
+	mov	QWORD[216+r8],r12
+	mov	QWORD[224+r8],r13
+
+	jmp	NEAR $L$common_seh_tail
+
+
+
+ALIGN	16
+full_handler:
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD[120+r8]
+	mov	rbx,QWORD[248+r8]
+
+	mov	rsi,QWORD[8+r9]
+	mov	r11,QWORD[56+r9]
+
+	mov	r10d,DWORD[r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jb	NEAR $L$common_seh_tail
+
+	mov	rax,QWORD[152+r8]
+
+	mov	r10d,DWORD[4+r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jae	NEAR $L$common_seh_tail
+
+	mov	r10d,DWORD[8+r11]
+	lea	rax,[r10*1+rax]
+
+	mov	rbp,QWORD[((-8))+rax]
+	mov	rbx,QWORD[((-16))+rax]
+	mov	r12,QWORD[((-24))+rax]
+	mov	r13,QWORD[((-32))+rax]
+	mov	r14,QWORD[((-40))+rax]
+	mov	r15,QWORD[((-48))+rax]
+	mov	QWORD[144+r8],rbx
+	mov	QWORD[160+r8],rbp
+	mov	QWORD[216+r8],r12
+	mov	QWORD[224+r8],r13
+	mov	QWORD[232+r8],r14
+	mov	QWORD[240+r8],r15
+
+$L$common_seh_tail:
+	mov	rdi,QWORD[8+rax]
+	mov	rsi,QWORD[16+rax]
+	mov	QWORD[152+r8],rax
+	mov	QWORD[168+r8],rsi
+	mov	QWORD[176+r8],rdi
+
+	mov	rdi,QWORD[40+r9]
+	mov	rsi,r8
+	mov	ecx,154
+	DD	0xa548f3fc
+
+	mov	rsi,r9
+	xor	rcx,rcx
+	mov	rdx,QWORD[8+rsi]
+	mov	r8,QWORD[rsi]
+	mov	r9,QWORD[16+rsi]
+	mov	r10,QWORD[40+rsi]
+	lea	r11,[56+rsi]
+	lea	r12,[24+rsi]
+	mov	QWORD[32+rsp],r10
+	mov	QWORD[40+rsp],r11
+	mov	QWORD[48+rsp],r12
+	mov	QWORD[56+rsp],rcx
+	call	QWORD[__imp_RtlVirtualUnwind]
+
+	mov	eax,1
+	add	rsp,64
+	popfq
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rbp
+	pop	rbx
+	pop	rdi
+	pop	rsi
+	DB	0F3h,0C3h		;repret
+
+
+section	.pdata rdata align=4
+ALIGN	4
+	DD	$L$SEH_begin_ecp_nistz256_neg wrt ..imagebase
+	DD	$L$SEH_end_ecp_nistz256_neg wrt ..imagebase
+	DD	$L$SEH_info_ecp_nistz256_neg wrt ..imagebase
+
+	DD	$L$SEH_begin_ecp_nistz256_ord_mul_mont wrt ..imagebase
+	DD	$L$SEH_end_ecp_nistz256_ord_mul_mont wrt ..imagebase
+	DD	$L$SEH_info_ecp_nistz256_ord_mul_mont wrt ..imagebase
+
+	DD	$L$SEH_begin_ecp_nistz256_ord_sqr_mont wrt ..imagebase
+	DD	$L$SEH_end_ecp_nistz256_ord_sqr_mont wrt ..imagebase
+	DD	$L$SEH_info_ecp_nistz256_ord_sqr_mont wrt ..imagebase
+	DD	$L$SEH_begin_ecp_nistz256_ord_mul_montx wrt ..imagebase
+	DD	$L$SEH_end_ecp_nistz256_ord_mul_montx wrt ..imagebase
+	DD	$L$SEH_info_ecp_nistz256_ord_mul_montx wrt ..imagebase
+
+	DD	$L$SEH_begin_ecp_nistz256_ord_sqr_montx wrt ..imagebase
+	DD	$L$SEH_end_ecp_nistz256_ord_sqr_montx wrt ..imagebase
+	DD	$L$SEH_info_ecp_nistz256_ord_sqr_montx wrt ..imagebase
+	DD	$L$SEH_begin_ecp_nistz256_mul_mont wrt ..imagebase
+	DD	$L$SEH_end_ecp_nistz256_mul_mont wrt ..imagebase
+	DD	$L$SEH_info_ecp_nistz256_mul_mont wrt ..imagebase
+
+	DD	$L$SEH_begin_ecp_nistz256_sqr_mont wrt ..imagebase
+	DD	$L$SEH_end_ecp_nistz256_sqr_mont wrt ..imagebase
+	DD	$L$SEH_info_ecp_nistz256_sqr_mont wrt ..imagebase
+
+	DD	$L$SEH_begin_ecp_nistz256_select_w5 wrt ..imagebase
+	DD	$L$SEH_end_ecp_nistz256_select_w5 wrt ..imagebase
+	DD	$L$SEH_info_ecp_nistz256_select_wX wrt ..imagebase
+
+	DD	$L$SEH_begin_ecp_nistz256_select_w7 wrt ..imagebase
+	DD	$L$SEH_end_ecp_nistz256_select_w7 wrt ..imagebase
+	DD	$L$SEH_info_ecp_nistz256_select_wX wrt ..imagebase
+	DD	$L$SEH_begin_ecp_nistz256_avx2_select_w5 wrt ..imagebase
+	DD	$L$SEH_end_ecp_nistz256_avx2_select_w5 wrt ..imagebase
+	DD	$L$SEH_info_ecp_nistz256_avx2_select_wX wrt ..imagebase
+
+	DD	$L$SEH_begin_ecp_nistz256_avx2_select_w7 wrt ..imagebase
+	DD	$L$SEH_end_ecp_nistz256_avx2_select_w7 wrt ..imagebase
+	DD	$L$SEH_info_ecp_nistz256_avx2_select_wX wrt ..imagebase
+	DD	$L$SEH_begin_ecp_nistz256_point_double wrt ..imagebase
+	DD	$L$SEH_end_ecp_nistz256_point_double wrt ..imagebase
+	DD	$L$SEH_info_ecp_nistz256_point_double wrt ..imagebase
+
+	DD	$L$SEH_begin_ecp_nistz256_point_add wrt ..imagebase
+	DD	$L$SEH_end_ecp_nistz256_point_add wrt ..imagebase
+	DD	$L$SEH_info_ecp_nistz256_point_add wrt ..imagebase
+
+	DD	$L$SEH_begin_ecp_nistz256_point_add_affine wrt ..imagebase
+	DD	$L$SEH_end_ecp_nistz256_point_add_affine wrt ..imagebase
+	DD	$L$SEH_info_ecp_nistz256_point_add_affine wrt ..imagebase
+	DD	$L$SEH_begin_ecp_nistz256_point_doublex wrt ..imagebase
+	DD	$L$SEH_end_ecp_nistz256_point_doublex wrt ..imagebase
+	DD	$L$SEH_info_ecp_nistz256_point_doublex wrt ..imagebase
+
+	DD	$L$SEH_begin_ecp_nistz256_point_addx wrt ..imagebase
+	DD	$L$SEH_end_ecp_nistz256_point_addx wrt ..imagebase
+	DD	$L$SEH_info_ecp_nistz256_point_addx wrt ..imagebase
+
+	DD	$L$SEH_begin_ecp_nistz256_point_add_affinex wrt ..imagebase
+	DD	$L$SEH_end_ecp_nistz256_point_add_affinex wrt ..imagebase
+	DD	$L$SEH_info_ecp_nistz256_point_add_affinex wrt ..imagebase
+
+section	.xdata rdata align=8
+ALIGN	8
+$L$SEH_info_ecp_nistz256_neg:
+DB	9,0,0,0
+	DD	short_handler wrt ..imagebase
+	DD	$L$neg_body wrt ..imagebase,$L$neg_epilogue wrt ..imagebase
+$L$SEH_info_ecp_nistz256_ord_mul_mont:
+DB	9,0,0,0
+	DD	full_handler wrt ..imagebase
+	DD	$L$ord_mul_body wrt ..imagebase,$L$ord_mul_epilogue wrt ..imagebase
+	DD	48,0
+$L$SEH_info_ecp_nistz256_ord_sqr_mont:
+DB	9,0,0,0
+	DD	full_handler wrt ..imagebase
+	DD	$L$ord_sqr_body wrt ..imagebase,$L$ord_sqr_epilogue wrt ..imagebase
+	DD	48,0
+$L$SEH_info_ecp_nistz256_ord_mul_montx:
+DB	9,0,0,0
+	DD	full_handler wrt ..imagebase
+	DD	$L$ord_mulx_body wrt ..imagebase,$L$ord_mulx_epilogue wrt ..imagebase
+	DD	48,0
+$L$SEH_info_ecp_nistz256_ord_sqr_montx:
+DB	9,0,0,0
+	DD	full_handler wrt ..imagebase
+	DD	$L$ord_sqrx_body wrt ..imagebase,$L$ord_sqrx_epilogue wrt ..imagebase
+	DD	48,0
+$L$SEH_info_ecp_nistz256_mul_mont:
+DB	9,0,0,0
+	DD	full_handler wrt ..imagebase
+	DD	$L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase
+	DD	48,0
+$L$SEH_info_ecp_nistz256_sqr_mont:
+DB	9,0,0,0
+	DD	full_handler wrt ..imagebase
+	DD	$L$sqr_body wrt ..imagebase,$L$sqr_epilogue wrt ..imagebase
+	DD	48,0
+$L$SEH_info_ecp_nistz256_select_wX:
+DB	0x01,0x33,0x16,0x00
+DB	0x33,0xf8,0x09,0x00
+DB	0x2e,0xe8,0x08,0x00
+DB	0x29,0xd8,0x07,0x00
+DB	0x24,0xc8,0x06,0x00
+DB	0x1f,0xb8,0x05,0x00
+DB	0x1a,0xa8,0x04,0x00
+DB	0x15,0x98,0x03,0x00
+DB	0x10,0x88,0x02,0x00
+DB	0x0c,0x78,0x01,0x00
+DB	0x08,0x68,0x00,0x00
+DB	0x04,0x01,0x15,0x00
+ALIGN	8
+$L$SEH_info_ecp_nistz256_avx2_select_wX:
+DB	0x01,0x36,0x17,0x0b
+DB	0x36,0xf8,0x09,0x00
+DB	0x31,0xe8,0x08,0x00
+DB	0x2c,0xd8,0x07,0x00
+DB	0x27,0xc8,0x06,0x00
+DB	0x22,0xb8,0x05,0x00
+DB	0x1d,0xa8,0x04,0x00
+DB	0x18,0x98,0x03,0x00
+DB	0x13,0x88,0x02,0x00
+DB	0x0e,0x78,0x01,0x00
+DB	0x09,0x68,0x00,0x00
+DB	0x04,0x01,0x15,0x00
+DB	0x00,0xb3,0x00,0x00
+ALIGN	8
+$L$SEH_info_ecp_nistz256_point_double:
+DB	9,0,0,0
+	DD	full_handler wrt ..imagebase
+	DD	$L$point_doubleq_body wrt ..imagebase,$L$point_doubleq_epilogue wrt ..imagebase
+	DD	32*5+56,0
+$L$SEH_info_ecp_nistz256_point_add:
+DB	9,0,0,0
+	DD	full_handler wrt ..imagebase
+	DD	$L$point_addq_body wrt ..imagebase,$L$point_addq_epilogue wrt ..imagebase
+	DD	32*18+56,0
+$L$SEH_info_ecp_nistz256_point_add_affine:
+DB	9,0,0,0
+	DD	full_handler wrt ..imagebase
+	DD	$L$add_affineq_body wrt ..imagebase,$L$add_affineq_epilogue wrt ..imagebase
+	DD	32*15+56,0
+ALIGN	8
+$L$SEH_info_ecp_nistz256_point_doublex:
+DB	9,0,0,0
+	DD	full_handler wrt ..imagebase
+	DD	$L$point_doublex_body wrt ..imagebase,$L$point_doublex_epilogue wrt ..imagebase
+	DD	32*5+56,0
+$L$SEH_info_ecp_nistz256_point_addx:
+DB	9,0,0,0
+	DD	full_handler wrt ..imagebase
+	DD	$L$point_addx_body wrt ..imagebase,$L$point_addx_epilogue wrt ..imagebase
+	DD	32*18+56,0
+$L$SEH_info_ecp_nistz256_point_add_affinex:
+DB	9,0,0,0
+	DD	full_handler wrt ..imagebase
+	DD	$L$add_affinex_body wrt ..imagebase,$L$add_affinex_epilogue wrt ..imagebase
+	DD	32*15+56,0
diff --git a/deps/boringssl/win-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.asm b/deps/boringssl/win-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.asm
new file mode 100644
index 0000000..563699d
--- /dev/null
+++ b/deps/boringssl/win-x86_64/crypto/fipsmodule/p256_beeu-x86_64-asm.asm
@@ -0,0 +1,339 @@
+; This file is generated from a similarly-named Perl script in the BoringSSL
+; source tree. Do not edit by hand.
+
+default	rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+
+%ifdef BORINGSSL_PREFIX
+%include "boringssl_prefix_symbols_nasm.inc"
+%endif
+section	.text code align=64
+
+
+
+
+global	beeu_mod_inverse_vartime
+ALIGN	32
+beeu_mod_inverse_vartime:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_beeu_mod_inverse_vartime:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+	mov	r9,QWORD[48+rsp]
+
+
+
+	push	rbp
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+	push	rbx
+
+	push	rsi
+
+
+	sub	rsp,80
+
+	mov	QWORD[rsp],rdi
+
+
+	mov	r8,1
+	xor	r9,r9
+	xor	r10,r10
+	xor	r11,r11
+	xor	rdi,rdi
+
+	xor	r12,r12
+	xor	r13,r13
+	xor	r14,r14
+	xor	r15,r15
+	xor	rbp,rbp
+
+
+	vmovdqu	xmm0,XMMWORD[rsi]
+	vmovdqu	xmm1,XMMWORD[16+rsi]
+	vmovdqu	XMMWORD[48+rsp],xmm0
+	vmovdqu	XMMWORD[64+rsp],xmm1
+
+	vmovdqu	xmm0,XMMWORD[rdx]
+	vmovdqu	xmm1,XMMWORD[16+rdx]
+	vmovdqu	XMMWORD[16+rsp],xmm0
+	vmovdqu	XMMWORD[32+rsp],xmm1
+
+$L$beeu_loop:
+	xor	rbx,rbx
+	or	rbx,QWORD[48+rsp]
+	or	rbx,QWORD[56+rsp]
+	or	rbx,QWORD[64+rsp]
+	or	rbx,QWORD[72+rsp]
+	jz	NEAR $L$beeu_loop_end
+
+
+
+
+
+
+
+
+
+
+	mov	rcx,1
+
+
+$L$beeu_shift_loop_XB:
+	mov	rbx,rcx
+	and	rbx,QWORD[48+rsp]
+	jnz	NEAR $L$beeu_shift_loop_end_XB
+
+
+	mov	rbx,1
+	and	rbx,r8
+	jz	NEAR $L$shift1_0
+	add	r8,QWORD[rdx]
+	adc	r9,QWORD[8+rdx]
+	adc	r10,QWORD[16+rdx]
+	adc	r11,QWORD[24+rdx]
+	adc	rdi,0
+
+$L$shift1_0:
+	shrd	r8,r9,1
+	shrd	r9,r10,1
+	shrd	r10,r11,1
+	shrd	r11,rdi,1
+	shr	rdi,1
+
+	shl	rcx,1
+
+
+
+
+
+	cmp	rcx,0x8000000
+	jne	NEAR $L$beeu_shift_loop_XB
+
+$L$beeu_shift_loop_end_XB:
+	bsf	rcx,rcx
+	test	rcx,rcx
+	jz	NEAR $L$beeu_no_shift_XB
+
+
+
+	mov	rax,QWORD[((8+48))+rsp]
+	mov	rbx,QWORD[((16+48))+rsp]
+	mov	rsi,QWORD[((24+48))+rsp]
+
+	shrd	QWORD[((0+48))+rsp],rax,cl
+	shrd	QWORD[((8+48))+rsp],rbx,cl
+	shrd	QWORD[((16+48))+rsp],rsi,cl
+
+	shr	rsi,cl
+	mov	QWORD[((24+48))+rsp],rsi
+
+
+$L$beeu_no_shift_XB:
+
+	mov	rcx,1
+
+
+$L$beeu_shift_loop_YA:
+	mov	rbx,rcx
+	and	rbx,QWORD[16+rsp]
+	jnz	NEAR $L$beeu_shift_loop_end_YA
+
+
+	mov	rbx,1
+	and	rbx,r12
+	jz	NEAR $L$shift1_1
+	add	r12,QWORD[rdx]
+	adc	r13,QWORD[8+rdx]
+	adc	r14,QWORD[16+rdx]
+	adc	r15,QWORD[24+rdx]
+	adc	rbp,0
+
+$L$shift1_1:
+	shrd	r12,r13,1
+	shrd	r13,r14,1
+	shrd	r14,r15,1
+	shrd	r15,rbp,1
+	shr	rbp,1
+
+	shl	rcx,1
+
+
+
+
+
+	cmp	rcx,0x8000000
+	jne	NEAR $L$beeu_shift_loop_YA
+
+$L$beeu_shift_loop_end_YA:
+	bsf	rcx,rcx
+	test	rcx,rcx
+	jz	NEAR $L$beeu_no_shift_YA
+
+
+
+	mov	rax,QWORD[((8+16))+rsp]
+	mov	rbx,QWORD[((16+16))+rsp]
+	mov	rsi,QWORD[((24+16))+rsp]
+
+	shrd	QWORD[((0+16))+rsp],rax,cl
+	shrd	QWORD[((8+16))+rsp],rbx,cl
+	shrd	QWORD[((16+16))+rsp],rsi,cl
+
+	shr	rsi,cl
+	mov	QWORD[((24+16))+rsp],rsi
+
+
+$L$beeu_no_shift_YA:
+
+	mov	rax,QWORD[48+rsp]
+	mov	rbx,QWORD[56+rsp]
+	mov	rsi,QWORD[64+rsp]
+	mov	rcx,QWORD[72+rsp]
+	sub	rax,QWORD[16+rsp]
+	sbb	rbx,QWORD[24+rsp]
+	sbb	rsi,QWORD[32+rsp]
+	sbb	rcx,QWORD[40+rsp]
+	jnc	NEAR $L$beeu_B_bigger_than_A
+
+
+	mov	rax,QWORD[16+rsp]
+	mov	rbx,QWORD[24+rsp]
+	mov	rsi,QWORD[32+rsp]
+	mov	rcx,QWORD[40+rsp]
+	sub	rax,QWORD[48+rsp]
+	sbb	rbx,QWORD[56+rsp]
+	sbb	rsi,QWORD[64+rsp]
+	sbb	rcx,QWORD[72+rsp]
+	mov	QWORD[16+rsp],rax
+	mov	QWORD[24+rsp],rbx
+	mov	QWORD[32+rsp],rsi
+	mov	QWORD[40+rsp],rcx
+
+
+	add	r12,r8
+	adc	r13,r9
+	adc	r14,r10
+	adc	r15,r11
+	adc	rbp,rdi
+	jmp	NEAR $L$beeu_loop
+
+$L$beeu_B_bigger_than_A:
+
+	mov	QWORD[48+rsp],rax
+	mov	QWORD[56+rsp],rbx
+	mov	QWORD[64+rsp],rsi
+	mov	QWORD[72+rsp],rcx
+
+
+	add	r8,r12
+	adc	r9,r13
+	adc	r10,r14
+	adc	r11,r15
+	adc	rdi,rbp
+
+	jmp	NEAR $L$beeu_loop
+
+$L$beeu_loop_end:
+
+
+
+
+	mov	rbx,QWORD[16+rsp]
+	sub	rbx,1
+	or	rbx,QWORD[24+rsp]
+	or	rbx,QWORD[32+rsp]
+	or	rbx,QWORD[40+rsp]
+
+	jnz	NEAR $L$beeu_err
+
+
+
+
+	mov	r8,QWORD[rdx]
+	mov	r9,QWORD[8+rdx]
+	mov	r10,QWORD[16+rdx]
+	mov	r11,QWORD[24+rdx]
+	xor	rdi,rdi
+
+$L$beeu_reduction_loop:
+	mov	QWORD[16+rsp],r12
+	mov	QWORD[24+rsp],r13
+	mov	QWORD[32+rsp],r14
+	mov	QWORD[40+rsp],r15
+	mov	QWORD[48+rsp],rbp
+
+
+	sub	r12,r8
+	sbb	r13,r9
+	sbb	r14,r10
+	sbb	r15,r11
+	sbb	rbp,0
+
+
+	cmovc	r12,QWORD[16+rsp]
+	cmovc	r13,QWORD[24+rsp]
+	cmovc	r14,QWORD[32+rsp]
+	cmovc	r15,QWORD[40+rsp]
+	jnc	NEAR $L$beeu_reduction_loop
+
+
+	sub	r8,r12
+	sbb	r9,r13
+	sbb	r10,r14
+	sbb	r11,r15
+
+$L$beeu_save:
+
+	mov	rdi,QWORD[rsp]
+
+	mov	QWORD[rdi],r8
+	mov	QWORD[8+rdi],r9
+	mov	QWORD[16+rdi],r10
+	mov	QWORD[24+rdi],r11
+
+
+	mov	rax,1
+	jmp	NEAR $L$beeu_finish
+
+$L$beeu_err:
+
+	xor	rax,rax
+
+$L$beeu_finish:
+	add	rsp,80
+
+	pop	rsi
+
+	pop	rbx
+
+	pop	r15
+
+	pop	r14
+
+	pop	r13
+
+	pop	r12
+
+	pop	rbp
+
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+
+$L$SEH_end_beeu_mod_inverse_vartime:
diff --git a/deps/boringssl/win-x86_64/crypto/fipsmodule/rdrand-x86_64.asm b/deps/boringssl/win-x86_64/crypto/fipsmodule/rdrand-x86_64.asm
new file mode 100644
index 0000000..89b91de
--- /dev/null
+++ b/deps/boringssl/win-x86_64/crypto/fipsmodule/rdrand-x86_64.asm
@@ -0,0 +1,58 @@
+; This file is generated from a similarly-named Perl script in the BoringSSL
+; source tree. Do not edit by hand.
+
+default	rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+
+%ifdef BORINGSSL_PREFIX
+%include "boringssl_prefix_symbols_nasm.inc"
+%endif
+section	.text code align=64
+
+
+
+
+
+global	CRYPTO_rdrand
+
+ALIGN	16
+CRYPTO_rdrand:
+
+	xor	rax,rax
+DB	73,15,199,240
+
+	adc	rax,rax
+	mov	QWORD[rcx],r8
+	DB	0F3h,0C3h		;repret
+
+
+
+
+
+
+
+global	CRYPTO_rdrand_multiple8_buf
+
+ALIGN	16
+CRYPTO_rdrand_multiple8_buf:
+
+	test	rdx,rdx
+	jz	NEAR $L$out
+	mov	r8,8
+$L$loop:
+DB	73,15,199,241
+	jnc	NEAR $L$err
+	mov	QWORD[rcx],r9
+	add	rcx,r8
+	sub	rdx,r8
+	jnz	NEAR $L$loop
+$L$out:
+	mov	rax,1
+	DB	0F3h,0C3h		;repret
+$L$err:
+	xor	rax,rax
+	DB	0F3h,0C3h		;repret
+
+
diff --git a/deps/boringssl/win-x86_64/crypto/fipsmodule/rsaz-avx2.asm b/deps/boringssl/win-x86_64/crypto/fipsmodule/rsaz-avx2.asm
new file mode 100644
index 0000000..74e2705
--- /dev/null
+++ b/deps/boringssl/win-x86_64/crypto/fipsmodule/rsaz-avx2.asm
@@ -0,0 +1,1972 @@
+; This file is generated from a similarly-named Perl script in the BoringSSL
+; source tree. Do not edit by hand.
+
+default	rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+
+%ifdef BORINGSSL_PREFIX
+%include "boringssl_prefix_symbols_nasm.inc"
+%endif
+section	.text code align=64
+
+
+global	rsaz_1024_sqr_avx2
+
+ALIGN	64
+rsaz_1024_sqr_avx2:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_rsaz_1024_sqr_avx2:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+
+
+
+	lea	rax,[rsp]
+
+	push	rbx
+
+	push	rbp
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+	vzeroupper
+	lea	rsp,[((-168))+rsp]
+	vmovaps	XMMWORD[(-216)+rax],xmm6
+	vmovaps	XMMWORD[(-200)+rax],xmm7
+	vmovaps	XMMWORD[(-184)+rax],xmm8
+	vmovaps	XMMWORD[(-168)+rax],xmm9
+	vmovaps	XMMWORD[(-152)+rax],xmm10
+	vmovaps	XMMWORD[(-136)+rax],xmm11
+	vmovaps	XMMWORD[(-120)+rax],xmm12
+	vmovaps	XMMWORD[(-104)+rax],xmm13
+	vmovaps	XMMWORD[(-88)+rax],xmm14
+	vmovaps	XMMWORD[(-72)+rax],xmm15
+$L$sqr_1024_body:
+	mov	rbp,rax
+
+	mov	r13,rdx
+	sub	rsp,832
+	mov	r15,r13
+	sub	rdi,-128
+	sub	rsi,-128
+	sub	r13,-128
+
+	and	r15,4095
+	add	r15,32*10
+	shr	r15,12
+	vpxor	ymm9,ymm9,ymm9
+	jz	NEAR $L$sqr_1024_no_n_copy
+
+
+
+
+
+	sub	rsp,32*10
+	vmovdqu	ymm0,YMMWORD[((0-128))+r13]
+	and	rsp,-2048
+	vmovdqu	ymm1,YMMWORD[((32-128))+r13]
+	vmovdqu	ymm2,YMMWORD[((64-128))+r13]
+	vmovdqu	ymm3,YMMWORD[((96-128))+r13]
+	vmovdqu	ymm4,YMMWORD[((128-128))+r13]
+	vmovdqu	ymm5,YMMWORD[((160-128))+r13]
+	vmovdqu	ymm6,YMMWORD[((192-128))+r13]
+	vmovdqu	ymm7,YMMWORD[((224-128))+r13]
+	vmovdqu	ymm8,YMMWORD[((256-128))+r13]
+	lea	r13,[((832+128))+rsp]
+	vmovdqu	YMMWORD[(0-128)+r13],ymm0
+	vmovdqu	YMMWORD[(32-128)+r13],ymm1
+	vmovdqu	YMMWORD[(64-128)+r13],ymm2
+	vmovdqu	YMMWORD[(96-128)+r13],ymm3
+	vmovdqu	YMMWORD[(128-128)+r13],ymm4
+	vmovdqu	YMMWORD[(160-128)+r13],ymm5
+	vmovdqu	YMMWORD[(192-128)+r13],ymm6
+	vmovdqu	YMMWORD[(224-128)+r13],ymm7
+	vmovdqu	YMMWORD[(256-128)+r13],ymm8
+	vmovdqu	YMMWORD[(288-128)+r13],ymm9
+
+$L$sqr_1024_no_n_copy:
+	and	rsp,-1024
+
+	vmovdqu	ymm1,YMMWORD[((32-128))+rsi]
+	vmovdqu	ymm2,YMMWORD[((64-128))+rsi]
+	vmovdqu	ymm3,YMMWORD[((96-128))+rsi]
+	vmovdqu	ymm4,YMMWORD[((128-128))+rsi]
+	vmovdqu	ymm5,YMMWORD[((160-128))+rsi]
+	vmovdqu	ymm6,YMMWORD[((192-128))+rsi]
+	vmovdqu	ymm7,YMMWORD[((224-128))+rsi]
+	vmovdqu	ymm8,YMMWORD[((256-128))+rsi]
+
+	lea	rbx,[192+rsp]
+	vmovdqu	ymm15,YMMWORD[$L$and_mask]
+	jmp	NEAR $L$OOP_GRANDE_SQR_1024
+
+ALIGN	32
+$L$OOP_GRANDE_SQR_1024:
+	lea	r9,[((576+128))+rsp]
+	lea	r12,[448+rsp]
+
+
+
+
+	vpaddq	ymm1,ymm1,ymm1
+	vpbroadcastq	ymm10,QWORD[((0-128))+rsi]
+	vpaddq	ymm2,ymm2,ymm2
+	vmovdqa	YMMWORD[(0-128)+r9],ymm1
+	vpaddq	ymm3,ymm3,ymm3
+	vmovdqa	YMMWORD[(32-128)+r9],ymm2
+	vpaddq	ymm4,ymm4,ymm4
+	vmovdqa	YMMWORD[(64-128)+r9],ymm3
+	vpaddq	ymm5,ymm5,ymm5
+	vmovdqa	YMMWORD[(96-128)+r9],ymm4
+	vpaddq	ymm6,ymm6,ymm6
+	vmovdqa	YMMWORD[(128-128)+r9],ymm5
+	vpaddq	ymm7,ymm7,ymm7
+	vmovdqa	YMMWORD[(160-128)+r9],ymm6
+	vpaddq	ymm8,ymm8,ymm8
+	vmovdqa	YMMWORD[(192-128)+r9],ymm7
+	vpxor	ymm9,ymm9,ymm9
+	vmovdqa	YMMWORD[(224-128)+r9],ymm8
+
+	vpmuludq	ymm0,ymm10,YMMWORD[((0-128))+rsi]
+	vpbroadcastq	ymm11,QWORD[((32-128))+rsi]
+	vmovdqu	YMMWORD[(288-192)+rbx],ymm9
+	vpmuludq	ymm1,ymm1,ymm10
+	vmovdqu	YMMWORD[(320-448)+r12],ymm9
+	vpmuludq	ymm2,ymm2,ymm10
+	vmovdqu	YMMWORD[(352-448)+r12],ymm9
+	vpmuludq	ymm3,ymm3,ymm10
+	vmovdqu	YMMWORD[(384-448)+r12],ymm9
+	vpmuludq	ymm4,ymm4,ymm10
+	vmovdqu	YMMWORD[(416-448)+r12],ymm9
+	vpmuludq	ymm5,ymm5,ymm10
+	vmovdqu	YMMWORD[(448-448)+r12],ymm9
+	vpmuludq	ymm6,ymm6,ymm10
+	vmovdqu	YMMWORD[(480-448)+r12],ymm9
+	vpmuludq	ymm7,ymm7,ymm10
+	vmovdqu	YMMWORD[(512-448)+r12],ymm9
+	vpmuludq	ymm8,ymm8,ymm10
+	vpbroadcastq	ymm10,QWORD[((64-128))+rsi]
+	vmovdqu	YMMWORD[(544-448)+r12],ymm9
+
+	mov	r15,rsi
+	mov	r14d,4
+	jmp	NEAR $L$sqr_entry_1024
+ALIGN	32
+$L$OOP_SQR_1024:
+	vpbroadcastq	ymm11,QWORD[((32-128))+r15]
+	vpmuludq	ymm0,ymm10,YMMWORD[((0-128))+rsi]
+	vpaddq	ymm0,ymm0,YMMWORD[((0-192))+rbx]
+	vpmuludq	ymm1,ymm10,YMMWORD[((0-128))+r9]
+	vpaddq	ymm1,ymm1,YMMWORD[((32-192))+rbx]
+	vpmuludq	ymm2,ymm10,YMMWORD[((32-128))+r9]
+	vpaddq	ymm2,ymm2,YMMWORD[((64-192))+rbx]
+	vpmuludq	ymm3,ymm10,YMMWORD[((64-128))+r9]
+	vpaddq	ymm3,ymm3,YMMWORD[((96-192))+rbx]
+	vpmuludq	ymm4,ymm10,YMMWORD[((96-128))+r9]
+	vpaddq	ymm4,ymm4,YMMWORD[((128-192))+rbx]
+	vpmuludq	ymm5,ymm10,YMMWORD[((128-128))+r9]
+	vpaddq	ymm5,ymm5,YMMWORD[((160-192))+rbx]
+	vpmuludq	ymm6,ymm10,YMMWORD[((160-128))+r9]
+	vpaddq	ymm6,ymm6,YMMWORD[((192-192))+rbx]
+	vpmuludq	ymm7,ymm10,YMMWORD[((192-128))+r9]
+	vpaddq	ymm7,ymm7,YMMWORD[((224-192))+rbx]
+	vpmuludq	ymm8,ymm10,YMMWORD[((224-128))+r9]
+	vpbroadcastq	ymm10,QWORD[((64-128))+r15]
+	vpaddq	ymm8,ymm8,YMMWORD[((256-192))+rbx]
+$L$sqr_entry_1024:
+	vmovdqu	YMMWORD[(0-192)+rbx],ymm0
+	vmovdqu	YMMWORD[(32-192)+rbx],ymm1
+
+	vpmuludq	ymm12,ymm11,YMMWORD[((32-128))+rsi]
+	vpaddq	ymm2,ymm2,ymm12
+	vpmuludq	ymm14,ymm11,YMMWORD[((32-128))+r9]
+	vpaddq	ymm3,ymm3,ymm14
+	vpmuludq	ymm13,ymm11,YMMWORD[((64-128))+r9]
+	vpaddq	ymm4,ymm4,ymm13
+	vpmuludq	ymm12,ymm11,YMMWORD[((96-128))+r9]
+	vpaddq	ymm5,ymm5,ymm12
+	vpmuludq	ymm14,ymm11,YMMWORD[((128-128))+r9]
+	vpaddq	ymm6,ymm6,ymm14
+	vpmuludq	ymm13,ymm11,YMMWORD[((160-128))+r9]
+	vpaddq	ymm7,ymm7,ymm13
+	vpmuludq	ymm12,ymm11,YMMWORD[((192-128))+r9]
+	vpaddq	ymm8,ymm8,ymm12
+	vpmuludq	ymm0,ymm11,YMMWORD[((224-128))+r9]
+	vpbroadcastq	ymm11,QWORD[((96-128))+r15]
+	vpaddq	ymm0,ymm0,YMMWORD[((288-192))+rbx]
+
+	vmovdqu	YMMWORD[(64-192)+rbx],ymm2
+	vmovdqu	YMMWORD[(96-192)+rbx],ymm3
+
+	vpmuludq	ymm13,ymm10,YMMWORD[((64-128))+rsi]
+	vpaddq	ymm4,ymm4,ymm13
+	vpmuludq	ymm12,ymm10,YMMWORD[((64-128))+r9]
+	vpaddq	ymm5,ymm5,ymm12
+	vpmuludq	ymm14,ymm10,YMMWORD[((96-128))+r9]
+	vpaddq	ymm6,ymm6,ymm14
+	vpmuludq	ymm13,ymm10,YMMWORD[((128-128))+r9]
+	vpaddq	ymm7,ymm7,ymm13
+	vpmuludq	ymm12,ymm10,YMMWORD[((160-128))+r9]
+	vpaddq	ymm8,ymm8,ymm12
+	vpmuludq	ymm14,ymm10,YMMWORD[((192-128))+r9]
+	vpaddq	ymm0,ymm0,ymm14
+	vpmuludq	ymm1,ymm10,YMMWORD[((224-128))+r9]
+	vpbroadcastq	ymm10,QWORD[((128-128))+r15]
+	vpaddq	ymm1,ymm1,YMMWORD[((320-448))+r12]
+
+	vmovdqu	YMMWORD[(128-192)+rbx],ymm4
+	vmovdqu	YMMWORD[(160-192)+rbx],ymm5
+
+	vpmuludq	ymm12,ymm11,YMMWORD[((96-128))+rsi]
+	vpaddq	ymm6,ymm6,ymm12
+	vpmuludq	ymm14,ymm11,YMMWORD[((96-128))+r9]
+	vpaddq	ymm7,ymm7,ymm14
+	vpmuludq	ymm13,ymm11,YMMWORD[((128-128))+r9]
+	vpaddq	ymm8,ymm8,ymm13
+	vpmuludq	ymm12,ymm11,YMMWORD[((160-128))+r9]
+	vpaddq	ymm0,ymm0,ymm12
+	vpmuludq	ymm14,ymm11,YMMWORD[((192-128))+r9]
+	vpaddq	ymm1,ymm1,ymm14
+	vpmuludq	ymm2,ymm11,YMMWORD[((224-128))+r9]
+	vpbroadcastq	ymm11,QWORD[((160-128))+r15]
+	vpaddq	ymm2,ymm2,YMMWORD[((352-448))+r12]
+
+	vmovdqu	YMMWORD[(192-192)+rbx],ymm6
+	vmovdqu	YMMWORD[(224-192)+rbx],ymm7
+
+	vpmuludq	ymm12,ymm10,YMMWORD[((128-128))+rsi]
+	vpaddq	ymm8,ymm8,ymm12
+	vpmuludq	ymm14,ymm10,YMMWORD[((128-128))+r9]
+	vpaddq	ymm0,ymm0,ymm14
+	vpmuludq	ymm13,ymm10,YMMWORD[((160-128))+r9]
+	vpaddq	ymm1,ymm1,ymm13
+	vpmuludq	ymm12,ymm10,YMMWORD[((192-128))+r9]
+	vpaddq	ymm2,ymm2,ymm12
+	vpmuludq	ymm3,ymm10,YMMWORD[((224-128))+r9]
+	vpbroadcastq	ymm10,QWORD[((192-128))+r15]
+	vpaddq	ymm3,ymm3,YMMWORD[((384-448))+r12]
+
+	vmovdqu	YMMWORD[(256-192)+rbx],ymm8
+	vmovdqu	YMMWORD[(288-192)+rbx],ymm0
+	lea	rbx,[8+rbx]
+
+	vpmuludq	ymm13,ymm11,YMMWORD[((160-128))+rsi]
+	vpaddq	ymm1,ymm1,ymm13
+	vpmuludq	ymm12,ymm11,YMMWORD[((160-128))+r9]
+	vpaddq	ymm2,ymm2,ymm12
+	vpmuludq	ymm14,ymm11,YMMWORD[((192-128))+r9]
+	vpaddq	ymm3,ymm3,ymm14
+	vpmuludq	ymm4,ymm11,YMMWORD[((224-128))+r9]
+	vpbroadcastq	ymm11,QWORD[((224-128))+r15]
+	vpaddq	ymm4,ymm4,YMMWORD[((416-448))+r12]
+
+	vmovdqu	YMMWORD[(320-448)+r12],ymm1
+	vmovdqu	YMMWORD[(352-448)+r12],ymm2
+
+	vpmuludq	ymm12,ymm10,YMMWORD[((192-128))+rsi]
+	vpaddq	ymm3,ymm3,ymm12
+	vpmuludq	ymm14,ymm10,YMMWORD[((192-128))+r9]
+	vpbroadcastq	ymm0,QWORD[((256-128))+r15]
+	vpaddq	ymm4,ymm4,ymm14
+	vpmuludq	ymm5,ymm10,YMMWORD[((224-128))+r9]
+	vpbroadcastq	ymm10,QWORD[((0+8-128))+r15]
+	vpaddq	ymm5,ymm5,YMMWORD[((448-448))+r12]
+
+	vmovdqu	YMMWORD[(384-448)+r12],ymm3
+	vmovdqu	YMMWORD[(416-448)+r12],ymm4
+	lea	r15,[8+r15]
+
+	vpmuludq	ymm12,ymm11,YMMWORD[((224-128))+rsi]
+	vpaddq	ymm5,ymm5,ymm12
+	vpmuludq	ymm6,ymm11,YMMWORD[((224-128))+r9]
+	vpaddq	ymm6,ymm6,YMMWORD[((480-448))+r12]
+
+	vpmuludq	ymm7,ymm0,YMMWORD[((256-128))+rsi]
+	vmovdqu	YMMWORD[(448-448)+r12],ymm5
+	vpaddq	ymm7,ymm7,YMMWORD[((512-448))+r12]
+	vmovdqu	YMMWORD[(480-448)+r12],ymm6
+	vmovdqu	YMMWORD[(512-448)+r12],ymm7
+	lea	r12,[8+r12]
+
+	dec	r14d
+	jnz	NEAR $L$OOP_SQR_1024
+
+	vmovdqu	ymm8,YMMWORD[256+rsp]
+	vmovdqu	ymm1,YMMWORD[288+rsp]
+	vmovdqu	ymm2,YMMWORD[320+rsp]
+	lea	rbx,[192+rsp]
+
+	vpsrlq	ymm14,ymm8,29
+	vpand	ymm8,ymm8,ymm15
+	vpsrlq	ymm11,ymm1,29
+	vpand	ymm1,ymm1,ymm15
+
+	vpermq	ymm14,ymm14,0x93
+	vpxor	ymm9,ymm9,ymm9
+	vpermq	ymm11,ymm11,0x93
+
+	vpblendd	ymm10,ymm14,ymm9,3
+	vpblendd	ymm14,ymm11,ymm14,3
+	vpaddq	ymm8,ymm8,ymm10
+	vpblendd	ymm11,ymm9,ymm11,3
+	vpaddq	ymm1,ymm1,ymm14
+	vpaddq	ymm2,ymm2,ymm11
+	vmovdqu	YMMWORD[(288-192)+rbx],ymm1
+	vmovdqu	YMMWORD[(320-192)+rbx],ymm2
+
+	mov	rax,QWORD[rsp]
+	mov	r10,QWORD[8+rsp]
+	mov	r11,QWORD[16+rsp]
+	mov	r12,QWORD[24+rsp]
+	vmovdqu	ymm1,YMMWORD[32+rsp]
+	vmovdqu	ymm2,YMMWORD[((64-192))+rbx]
+	vmovdqu	ymm3,YMMWORD[((96-192))+rbx]
+	vmovdqu	ymm4,YMMWORD[((128-192))+rbx]
+	vmovdqu	ymm5,YMMWORD[((160-192))+rbx]
+	vmovdqu	ymm6,YMMWORD[((192-192))+rbx]
+	vmovdqu	ymm7,YMMWORD[((224-192))+rbx]
+
+	mov	r9,rax
+	imul	eax,ecx
+	and	eax,0x1fffffff
+	vmovd	xmm12,eax
+
+	mov	rdx,rax
+	imul	rax,QWORD[((-128))+r13]
+	vpbroadcastq	ymm12,xmm12
+	add	r9,rax
+	mov	rax,rdx
+	imul	rax,QWORD[((8-128))+r13]
+	shr	r9,29
+	add	r10,rax
+	mov	rax,rdx
+	imul	rax,QWORD[((16-128))+r13]
+	add	r10,r9
+	add	r11,rax
+	imul	rdx,QWORD[((24-128))+r13]
+	add	r12,rdx
+
+	mov	rax,r10
+	imul	eax,ecx
+	and	eax,0x1fffffff
+
+	mov	r14d,9
+	jmp	NEAR $L$OOP_REDUCE_1024
+
+ALIGN	32
+$L$OOP_REDUCE_1024:
+	vmovd	xmm13,eax
+	vpbroadcastq	ymm13,xmm13
+
+	vpmuludq	ymm10,ymm12,YMMWORD[((32-128))+r13]
+	mov	rdx,rax
+	imul	rax,QWORD[((-128))+r13]
+	vpaddq	ymm1,ymm1,ymm10
+	add	r10,rax
+	vpmuludq	ymm14,ymm12,YMMWORD[((64-128))+r13]
+	mov	rax,rdx
+	imul	rax,QWORD[((8-128))+r13]
+	vpaddq	ymm2,ymm2,ymm14
+	vpmuludq	ymm11,ymm12,YMMWORD[((96-128))+r13]
+DB	0x67
+	add	r11,rax
+DB	0x67
+	mov	rax,rdx
+	imul	rax,QWORD[((16-128))+r13]
+	shr	r10,29
+	vpaddq	ymm3,ymm3,ymm11
+	vpmuludq	ymm10,ymm12,YMMWORD[((128-128))+r13]
+	add	r12,rax
+	add	r11,r10
+	vpaddq	ymm4,ymm4,ymm10
+	vpmuludq	ymm14,ymm12,YMMWORD[((160-128))+r13]
+	mov	rax,r11
+	imul	eax,ecx
+	vpaddq	ymm5,ymm5,ymm14
+	vpmuludq	ymm11,ymm12,YMMWORD[((192-128))+r13]
+	and	eax,0x1fffffff
+	vpaddq	ymm6,ymm6,ymm11
+	vpmuludq	ymm10,ymm12,YMMWORD[((224-128))+r13]
+	vpaddq	ymm7,ymm7,ymm10
+	vpmuludq	ymm14,ymm12,YMMWORD[((256-128))+r13]
+	vmovd	xmm12,eax
+
+	vpaddq	ymm8,ymm8,ymm14
+
+	vpbroadcastq	ymm12,xmm12
+
+	vpmuludq	ymm11,ymm13,YMMWORD[((32-8-128))+r13]
+	vmovdqu	ymm14,YMMWORD[((96-8-128))+r13]
+	mov	rdx,rax
+	imul	rax,QWORD[((-128))+r13]
+	vpaddq	ymm1,ymm1,ymm11
+	vpmuludq	ymm10,ymm13,YMMWORD[((64-8-128))+r13]
+	vmovdqu	ymm11,YMMWORD[((128-8-128))+r13]
+	add	r11,rax
+	mov	rax,rdx
+	imul	rax,QWORD[((8-128))+r13]
+	vpaddq	ymm2,ymm2,ymm10
+	add	rax,r12
+	shr	r11,29
+	vpmuludq	ymm14,ymm14,ymm13
+	vmovdqu	ymm10,YMMWORD[((160-8-128))+r13]
+	add	rax,r11
+	vpaddq	ymm3,ymm3,ymm14
+	vpmuludq	ymm11,ymm11,ymm13
+	vmovdqu	ymm14,YMMWORD[((192-8-128))+r13]
+DB	0x67
+	mov	r12,rax
+	imul	eax,ecx
+	vpaddq	ymm4,ymm4,ymm11
+	vpmuludq	ymm10,ymm10,ymm13
+DB	0xc4,0x41,0x7e,0x6f,0x9d,0x58,0x00,0x00,0x00
+	and	eax,0x1fffffff
+	vpaddq	ymm5,ymm5,ymm10
+	vpmuludq	ymm14,ymm14,ymm13
+	vmovdqu	ymm10,YMMWORD[((256-8-128))+r13]
+	vpaddq	ymm6,ymm6,ymm14
+	vpmuludq	ymm11,ymm11,ymm13
+	vmovdqu	ymm9,YMMWORD[((288-8-128))+r13]
+	vmovd	xmm0,eax
+	imul	rax,QWORD[((-128))+r13]
+	vpaddq	ymm7,ymm7,ymm11
+	vpmuludq	ymm10,ymm10,ymm13
+	vmovdqu	ymm14,YMMWORD[((32-16-128))+r13]
+	vpbroadcastq	ymm0,xmm0
+	vpaddq	ymm8,ymm8,ymm10
+	vpmuludq	ymm9,ymm9,ymm13
+	vmovdqu	ymm11,YMMWORD[((64-16-128))+r13]
+	add	r12,rax
+
+	vmovdqu	ymm13,YMMWORD[((32-24-128))+r13]
+	vpmuludq	ymm14,ymm14,ymm12
+	vmovdqu	ymm10,YMMWORD[((96-16-128))+r13]
+	vpaddq	ymm1,ymm1,ymm14
+	vpmuludq	ymm13,ymm13,ymm0
+	vpmuludq	ymm11,ymm11,ymm12
+DB	0xc4,0x41,0x7e,0x6f,0xb5,0xf0,0xff,0xff,0xff
+	vpaddq	ymm13,ymm13,ymm1
+	vpaddq	ymm2,ymm2,ymm11
+	vpmuludq	ymm10,ymm10,ymm12
+	vmovdqu	ymm11,YMMWORD[((160-16-128))+r13]
+DB	0x67
+	vmovq	rax,xmm13
+	vmovdqu	YMMWORD[rsp],ymm13
+	vpaddq	ymm3,ymm3,ymm10
+	vpmuludq	ymm14,ymm14,ymm12
+	vmovdqu	ymm10,YMMWORD[((192-16-128))+r13]
+	vpaddq	ymm4,ymm4,ymm14
+	vpmuludq	ymm11,ymm11,ymm12
+	vmovdqu	ymm14,YMMWORD[((224-16-128))+r13]
+	vpaddq	ymm5,ymm5,ymm11
+	vpmuludq	ymm10,ymm10,ymm12
+	vmovdqu	ymm11,YMMWORD[((256-16-128))+r13]
+	vpaddq	ymm6,ymm6,ymm10
+	vpmuludq	ymm14,ymm14,ymm12
+	shr	r12,29
+	vmovdqu	ymm10,YMMWORD[((288-16-128))+r13]
+	add	rax,r12
+	vpaddq	ymm7,ymm7,ymm14
+	vpmuludq	ymm11,ymm11,ymm12
+
+	mov	r9,rax
+	imul	eax,ecx
+	vpaddq	ymm8,ymm8,ymm11
+	vpmuludq	ymm10,ymm10,ymm12
+	and	eax,0x1fffffff
+	vmovd	xmm12,eax
+	vmovdqu	ymm11,YMMWORD[((96-24-128))+r13]
+DB	0x67
+	vpaddq	ymm9,ymm9,ymm10
+	vpbroadcastq	ymm12,xmm12
+
+	vpmuludq	ymm14,ymm0,YMMWORD[((64-24-128))+r13]
+	vmovdqu	ymm10,YMMWORD[((128-24-128))+r13]
+	mov	rdx,rax
+	imul	rax,QWORD[((-128))+r13]
+	mov	r10,QWORD[8+rsp]
+	vpaddq	ymm1,ymm2,ymm14
+	vpmuludq	ymm11,ymm11,ymm0
+	vmovdqu	ymm14,YMMWORD[((160-24-128))+r13]
+	add	r9,rax
+	mov	rax,rdx
+	imul	rax,QWORD[((8-128))+r13]
+DB	0x67
+	shr	r9,29
+	mov	r11,QWORD[16+rsp]
+	vpaddq	ymm2,ymm3,ymm11
+	vpmuludq	ymm10,ymm10,ymm0
+	vmovdqu	ymm11,YMMWORD[((192-24-128))+r13]
+	add	r10,rax
+	mov	rax,rdx
+	imul	rax,QWORD[((16-128))+r13]
+	vpaddq	ymm3,ymm4,ymm10
+	vpmuludq	ymm14,ymm14,ymm0
+	vmovdqu	ymm10,YMMWORD[((224-24-128))+r13]
+	imul	rdx,QWORD[((24-128))+r13]
+	add	r11,rax
+	lea	rax,[r10*1+r9]
+	vpaddq	ymm4,ymm5,ymm14
+	vpmuludq	ymm11,ymm11,ymm0
+	vmovdqu	ymm14,YMMWORD[((256-24-128))+r13]
+	mov	r10,rax
+	imul	eax,ecx
+	vpmuludq	ymm10,ymm10,ymm0
+	vpaddq	ymm5,ymm6,ymm11
+	vmovdqu	ymm11,YMMWORD[((288-24-128))+r13]
+	and	eax,0x1fffffff
+	vpaddq	ymm6,ymm7,ymm10
+	vpmuludq	ymm14,ymm14,ymm0
+	add	rdx,QWORD[24+rsp]
+	vpaddq	ymm7,ymm8,ymm14
+	vpmuludq	ymm11,ymm11,ymm0
+	vpaddq	ymm8,ymm9,ymm11
+	vmovq	xmm9,r12
+	mov	r12,rdx
+
+	dec	r14d
+	jnz	NEAR $L$OOP_REDUCE_1024
+	lea	r12,[448+rsp]
+	vpaddq	ymm0,ymm13,ymm9
+	vpxor	ymm9,ymm9,ymm9
+
+	vpaddq	ymm0,ymm0,YMMWORD[((288-192))+rbx]
+	vpaddq	ymm1,ymm1,YMMWORD[((320-448))+r12]
+	vpaddq	ymm2,ymm2,YMMWORD[((352-448))+r12]
+	vpaddq	ymm3,ymm3,YMMWORD[((384-448))+r12]
+	vpaddq	ymm4,ymm4,YMMWORD[((416-448))+r12]
+	vpaddq	ymm5,ymm5,YMMWORD[((448-448))+r12]
+	vpaddq	ymm6,ymm6,YMMWORD[((480-448))+r12]
+	vpaddq	ymm7,ymm7,YMMWORD[((512-448))+r12]
+	vpaddq	ymm8,ymm8,YMMWORD[((544-448))+r12]
+
+	vpsrlq	ymm14,ymm0,29
+	vpand	ymm0,ymm0,ymm15
+	vpsrlq	ymm11,ymm1,29
+	vpand	ymm1,ymm1,ymm15
+	vpsrlq	ymm12,ymm2,29
+	vpermq	ymm14,ymm14,0x93
+	vpand	ymm2,ymm2,ymm15
+	vpsrlq	ymm13,ymm3,29
+	vpermq	ymm11,ymm11,0x93
+	vpand	ymm3,ymm3,ymm15
+	vpermq	ymm12,ymm12,0x93
+
+	vpblendd	ymm10,ymm14,ymm9,3
+	vpermq	ymm13,ymm13,0x93
+	vpblendd	ymm14,ymm11,ymm14,3
+	vpaddq	ymm0,ymm0,ymm10
+	vpblendd	ymm11,ymm12,ymm11,3
+	vpaddq	ymm1,ymm1,ymm14
+	vpblendd	ymm12,ymm13,ymm12,3
+	vpaddq	ymm2,ymm2,ymm11
+	vpblendd	ymm13,ymm9,ymm13,3
+	vpaddq	ymm3,ymm3,ymm12
+	vpaddq	ymm4,ymm4,ymm13
+
+	vpsrlq	ymm14,ymm0,29
+	vpand	ymm0,ymm0,ymm15
+	vpsrlq	ymm11,ymm1,29
+	vpand	ymm1,ymm1,ymm15
+	vpsrlq	ymm12,ymm2,29
+	vpermq	ymm14,ymm14,0x93
+	vpand	ymm2,ymm2,ymm15
+	vpsrlq	ymm13,ymm3,29
+	vpermq	ymm11,ymm11,0x93
+	vpand	ymm3,ymm3,ymm15
+	vpermq	ymm12,ymm12,0x93
+
+	vpblendd	ymm10,ymm14,ymm9,3
+	vpermq	ymm13,ymm13,0x93
+	vpblendd	ymm14,ymm11,ymm14,3
+	vpaddq	ymm0,ymm0,ymm10
+	vpblendd	ymm11,ymm12,ymm11,3
+	vpaddq	ymm1,ymm1,ymm14
+	vmovdqu	YMMWORD[(0-128)+rdi],ymm0
+	vpblendd	ymm12,ymm13,ymm12,3
+	vpaddq	ymm2,ymm2,ymm11
+	vmovdqu	YMMWORD[(32-128)+rdi],ymm1
+	vpblendd	ymm13,ymm9,ymm13,3
+	vpaddq	ymm3,ymm3,ymm12
+	vmovdqu	YMMWORD[(64-128)+rdi],ymm2
+	vpaddq	ymm4,ymm4,ymm13
+	vmovdqu	YMMWORD[(96-128)+rdi],ymm3
+	vpsrlq	ymm14,ymm4,29
+	vpand	ymm4,ymm4,ymm15
+	vpsrlq	ymm11,ymm5,29
+	vpand	ymm5,ymm5,ymm15
+	vpsrlq	ymm12,ymm6,29
+	vpermq	ymm14,ymm14,0x93
+	vpand	ymm6,ymm6,ymm15
+	vpsrlq	ymm13,ymm7,29
+	vpermq	ymm11,ymm11,0x93
+	vpand	ymm7,ymm7,ymm15
+	vpsrlq	ymm0,ymm8,29
+	vpermq	ymm12,ymm12,0x93
+	vpand	ymm8,ymm8,ymm15
+	vpermq	ymm13,ymm13,0x93
+
+	vpblendd	ymm10,ymm14,ymm9,3
+	vpermq	ymm0,ymm0,0x93
+	vpblendd	ymm14,ymm11,ymm14,3
+	vpaddq	ymm4,ymm4,ymm10
+	vpblendd	ymm11,ymm12,ymm11,3
+	vpaddq	ymm5,ymm5,ymm14
+	vpblendd	ymm12,ymm13,ymm12,3
+	vpaddq	ymm6,ymm6,ymm11
+	vpblendd	ymm13,ymm0,ymm13,3
+	vpaddq	ymm7,ymm7,ymm12
+	vpaddq	ymm8,ymm8,ymm13
+
+	vpsrlq	ymm14,ymm4,29
+	vpand	ymm4,ymm4,ymm15
+	vpsrlq	ymm11,ymm5,29
+	vpand	ymm5,ymm5,ymm15
+	vpsrlq	ymm12,ymm6,29
+	vpermq	ymm14,ymm14,0x93
+	vpand	ymm6,ymm6,ymm15
+	vpsrlq	ymm13,ymm7,29
+	vpermq	ymm11,ymm11,0x93
+	vpand	ymm7,ymm7,ymm15
+	vpsrlq	ymm0,ymm8,29
+	vpermq	ymm12,ymm12,0x93
+	vpand	ymm8,ymm8,ymm15
+	vpermq	ymm13,ymm13,0x93
+
+	vpblendd	ymm10,ymm14,ymm9,3
+	vpermq	ymm0,ymm0,0x93
+	vpblendd	ymm14,ymm11,ymm14,3
+	vpaddq	ymm4,ymm4,ymm10
+	vpblendd	ymm11,ymm12,ymm11,3
+	vpaddq	ymm5,ymm5,ymm14
+	vmovdqu	YMMWORD[(128-128)+rdi],ymm4
+	vpblendd	ymm12,ymm13,ymm12,3
+	vpaddq	ymm6,ymm6,ymm11
+	vmovdqu	YMMWORD[(160-128)+rdi],ymm5
+	vpblendd	ymm13,ymm0,ymm13,3
+	vpaddq	ymm7,ymm7,ymm12
+	vmovdqu	YMMWORD[(192-128)+rdi],ymm6
+	vpaddq	ymm8,ymm8,ymm13
+	vmovdqu	YMMWORD[(224-128)+rdi],ymm7
+	vmovdqu	YMMWORD[(256-128)+rdi],ymm8
+
+	mov	rsi,rdi
+	dec	r8d
+	jne	NEAR $L$OOP_GRANDE_SQR_1024
+
+	vzeroall
+	mov	rax,rbp
+
+$L$sqr_1024_in_tail:
+	movaps	xmm6,XMMWORD[((-216))+rax]
+	movaps	xmm7,XMMWORD[((-200))+rax]
+	movaps	xmm8,XMMWORD[((-184))+rax]
+	movaps	xmm9,XMMWORD[((-168))+rax]
+	movaps	xmm10,XMMWORD[((-152))+rax]
+	movaps	xmm11,XMMWORD[((-136))+rax]
+	movaps	xmm12,XMMWORD[((-120))+rax]
+	movaps	xmm13,XMMWORD[((-104))+rax]
+	movaps	xmm14,XMMWORD[((-88))+rax]
+	movaps	xmm15,XMMWORD[((-72))+rax]
+	mov	r15,QWORD[((-48))+rax]
+
+	mov	r14,QWORD[((-40))+rax]
+
+	mov	r13,QWORD[((-32))+rax]
+
+	mov	r12,QWORD[((-24))+rax]
+
+	mov	rbp,QWORD[((-16))+rax]
+
+	mov	rbx,QWORD[((-8))+rax]
+
+	lea	rsp,[rax]
+
+$L$sqr_1024_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_rsaz_1024_sqr_avx2:
+global	rsaz_1024_mul_avx2
+
+ALIGN	64
+rsaz_1024_mul_avx2:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_rsaz_1024_mul_avx2:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+
+
+
+	lea	rax,[rsp]
+
+	push	rbx
+
+	push	rbp
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+	vzeroupper
+	lea	rsp,[((-168))+rsp]
+	vmovaps	XMMWORD[(-216)+rax],xmm6
+	vmovaps	XMMWORD[(-200)+rax],xmm7
+	vmovaps	XMMWORD[(-184)+rax],xmm8
+	vmovaps	XMMWORD[(-168)+rax],xmm9
+	vmovaps	XMMWORD[(-152)+rax],xmm10
+	vmovaps	XMMWORD[(-136)+rax],xmm11
+	vmovaps	XMMWORD[(-120)+rax],xmm12
+	vmovaps	XMMWORD[(-104)+rax],xmm13
+	vmovaps	XMMWORD[(-88)+rax],xmm14
+	vmovaps	XMMWORD[(-72)+rax],xmm15
+$L$mul_1024_body:
+	mov	rbp,rax
+
+	vzeroall
+	mov	r13,rdx
+	sub	rsp,64
+
+
+
+
+
+
+DB	0x67,0x67
+	mov	r15,rsi
+	and	r15,4095
+	add	r15,32*10
+	shr	r15,12
+	mov	r15,rsi
+	cmovnz	rsi,r13
+	cmovnz	r13,r15
+
+	mov	r15,rcx
+	sub	rsi,-128
+	sub	rcx,-128
+	sub	rdi,-128
+
+	and	r15,4095
+	add	r15,32*10
+DB	0x67,0x67
+	shr	r15,12
+	jz	NEAR $L$mul_1024_no_n_copy
+
+
+
+
+
+	sub	rsp,32*10
+	vmovdqu	ymm0,YMMWORD[((0-128))+rcx]
+	and	rsp,-512
+	vmovdqu	ymm1,YMMWORD[((32-128))+rcx]
+	vmovdqu	ymm2,YMMWORD[((64-128))+rcx]
+	vmovdqu	ymm3,YMMWORD[((96-128))+rcx]
+	vmovdqu	ymm4,YMMWORD[((128-128))+rcx]
+	vmovdqu	ymm5,YMMWORD[((160-128))+rcx]
+	vmovdqu	ymm6,YMMWORD[((192-128))+rcx]
+	vmovdqu	ymm7,YMMWORD[((224-128))+rcx]
+	vmovdqu	ymm8,YMMWORD[((256-128))+rcx]
+	lea	rcx,[((64+128))+rsp]
+	vmovdqu	YMMWORD[(0-128)+rcx],ymm0
+	vpxor	ymm0,ymm0,ymm0
+	vmovdqu	YMMWORD[(32-128)+rcx],ymm1
+	vpxor	ymm1,ymm1,ymm1
+	vmovdqu	YMMWORD[(64-128)+rcx],ymm2
+	vpxor	ymm2,ymm2,ymm2
+	vmovdqu	YMMWORD[(96-128)+rcx],ymm3
+	vpxor	ymm3,ymm3,ymm3
+	vmovdqu	YMMWORD[(128-128)+rcx],ymm4
+	vpxor	ymm4,ymm4,ymm4
+	vmovdqu	YMMWORD[(160-128)+rcx],ymm5
+	vpxor	ymm5,ymm5,ymm5
+	vmovdqu	YMMWORD[(192-128)+rcx],ymm6
+	vpxor	ymm6,ymm6,ymm6
+	vmovdqu	YMMWORD[(224-128)+rcx],ymm7
+	vpxor	ymm7,ymm7,ymm7
+	vmovdqu	YMMWORD[(256-128)+rcx],ymm8
+	vmovdqa	ymm8,ymm0
+	vmovdqu	YMMWORD[(288-128)+rcx],ymm9
+$L$mul_1024_no_n_copy:
+	and	rsp,-64
+
+	mov	rbx,QWORD[r13]
+	vpbroadcastq	ymm10,QWORD[r13]
+	vmovdqu	YMMWORD[rsp],ymm0
+	xor	r9,r9
+DB	0x67
+	xor	r10,r10
+	xor	r11,r11
+	xor	r12,r12
+
+	vmovdqu	ymm15,YMMWORD[$L$and_mask]
+	mov	r14d,9
+	vmovdqu	YMMWORD[(288-128)+rdi],ymm9
+	jmp	NEAR $L$oop_mul_1024
+
+ALIGN	32
+$L$oop_mul_1024:
+	vpsrlq	ymm9,ymm3,29
+	mov	rax,rbx
+	imul	rax,QWORD[((-128))+rsi]
+	add	rax,r9
+	mov	r10,rbx
+	imul	r10,QWORD[((8-128))+rsi]
+	add	r10,QWORD[8+rsp]
+
+	mov	r9,rax
+	imul	eax,r8d
+	and	eax,0x1fffffff
+
+	mov	r11,rbx
+	imul	r11,QWORD[((16-128))+rsi]
+	add	r11,QWORD[16+rsp]
+
+	mov	r12,rbx
+	imul	r12,QWORD[((24-128))+rsi]
+	add	r12,QWORD[24+rsp]
+	vpmuludq	ymm0,ymm10,YMMWORD[((32-128))+rsi]
+	vmovd	xmm11,eax
+	vpaddq	ymm1,ymm1,ymm0
+	vpmuludq	ymm12,ymm10,YMMWORD[((64-128))+rsi]
+	vpbroadcastq	ymm11,xmm11
+	vpaddq	ymm2,ymm2,ymm12
+	vpmuludq	ymm13,ymm10,YMMWORD[((96-128))+rsi]
+	vpand	ymm3,ymm3,ymm15
+	vpaddq	ymm3,ymm3,ymm13
+	vpmuludq	ymm0,ymm10,YMMWORD[((128-128))+rsi]
+	vpaddq	ymm4,ymm4,ymm0
+	vpmuludq	ymm12,ymm10,YMMWORD[((160-128))+rsi]
+	vpaddq	ymm5,ymm5,ymm12
+	vpmuludq	ymm13,ymm10,YMMWORD[((192-128))+rsi]
+	vpaddq	ymm6,ymm6,ymm13
+	vpmuludq	ymm0,ymm10,YMMWORD[((224-128))+rsi]
+	vpermq	ymm9,ymm9,0x93
+	vpaddq	ymm7,ymm7,ymm0
+	vpmuludq	ymm12,ymm10,YMMWORD[((256-128))+rsi]
+	vpbroadcastq	ymm10,QWORD[8+r13]
+	vpaddq	ymm8,ymm8,ymm12
+
+	mov	rdx,rax
+	imul	rax,QWORD[((-128))+rcx]
+	add	r9,rax
+	mov	rax,rdx
+	imul	rax,QWORD[((8-128))+rcx]
+	add	r10,rax
+	mov	rax,rdx
+	imul	rax,QWORD[((16-128))+rcx]
+	add	r11,rax
+	shr	r9,29
+	imul	rdx,QWORD[((24-128))+rcx]
+	add	r12,rdx
+	add	r10,r9
+
+	vpmuludq	ymm13,ymm11,YMMWORD[((32-128))+rcx]
+	vmovq	rbx,xmm10
+	vpaddq	ymm1,ymm1,ymm13
+	vpmuludq	ymm0,ymm11,YMMWORD[((64-128))+rcx]
+	vpaddq	ymm2,ymm2,ymm0
+	vpmuludq	ymm12,ymm11,YMMWORD[((96-128))+rcx]
+	vpaddq	ymm3,ymm3,ymm12
+	vpmuludq	ymm13,ymm11,YMMWORD[((128-128))+rcx]
+	vpaddq	ymm4,ymm4,ymm13
+	vpmuludq	ymm0,ymm11,YMMWORD[((160-128))+rcx]
+	vpaddq	ymm5,ymm5,ymm0
+	vpmuludq	ymm12,ymm11,YMMWORD[((192-128))+rcx]
+	vpaddq	ymm6,ymm6,ymm12
+	vpmuludq	ymm13,ymm11,YMMWORD[((224-128))+rcx]
+	vpblendd	ymm12,ymm9,ymm14,3
+	vpaddq	ymm7,ymm7,ymm13
+	vpmuludq	ymm0,ymm11,YMMWORD[((256-128))+rcx]
+	vpaddq	ymm3,ymm3,ymm12
+	vpaddq	ymm8,ymm8,ymm0
+
+	mov	rax,rbx
+	imul	rax,QWORD[((-128))+rsi]
+	add	r10,rax
+	vmovdqu	ymm12,YMMWORD[((-8+32-128))+rsi]
+	mov	rax,rbx
+	imul	rax,QWORD[((8-128))+rsi]
+	add	r11,rax
+	vmovdqu	ymm13,YMMWORD[((-8+64-128))+rsi]
+
+	mov	rax,r10
+	vpblendd	ymm9,ymm9,ymm14,0xfc
+	imul	eax,r8d
+	vpaddq	ymm4,ymm4,ymm9
+	and	eax,0x1fffffff
+
+	imul	rbx,QWORD[((16-128))+rsi]
+	add	r12,rbx
+	vpmuludq	ymm12,ymm12,ymm10
+	vmovd	xmm11,eax
+	vmovdqu	ymm0,YMMWORD[((-8+96-128))+rsi]
+	vpaddq	ymm1,ymm1,ymm12
+	vpmuludq	ymm13,ymm13,ymm10
+	vpbroadcastq	ymm11,xmm11
+	vmovdqu	ymm12,YMMWORD[((-8+128-128))+rsi]
+	vpaddq	ymm2,ymm2,ymm13
+	vpmuludq	ymm0,ymm0,ymm10
+	vmovdqu	ymm13,YMMWORD[((-8+160-128))+rsi]
+	vpaddq	ymm3,ymm3,ymm0
+	vpmuludq	ymm12,ymm12,ymm10
+	vmovdqu	ymm0,YMMWORD[((-8+192-128))+rsi]
+	vpaddq	ymm4,ymm4,ymm12
+	vpmuludq	ymm13,ymm13,ymm10
+	vmovdqu	ymm12,YMMWORD[((-8+224-128))+rsi]
+	vpaddq	ymm5,ymm5,ymm13
+	vpmuludq	ymm0,ymm0,ymm10
+	vmovdqu	ymm13,YMMWORD[((-8+256-128))+rsi]
+	vpaddq	ymm6,ymm6,ymm0
+	vpmuludq	ymm12,ymm12,ymm10
+	vmovdqu	ymm9,YMMWORD[((-8+288-128))+rsi]
+	vpaddq	ymm7,ymm7,ymm12
+	vpmuludq	ymm13,ymm13,ymm10
+	vpaddq	ymm8,ymm8,ymm13
+	vpmuludq	ymm9,ymm9,ymm10
+	vpbroadcastq	ymm10,QWORD[16+r13]
+
+	mov	rdx,rax
+	imul	rax,QWORD[((-128))+rcx]
+	add	r10,rax
+	vmovdqu	ymm0,YMMWORD[((-8+32-128))+rcx]
+	mov	rax,rdx
+	imul	rax,QWORD[((8-128))+rcx]
+	add	r11,rax
+	vmovdqu	ymm12,YMMWORD[((-8+64-128))+rcx]
+	shr	r10,29
+	imul	rdx,QWORD[((16-128))+rcx]
+	add	r12,rdx
+	add	r11,r10
+
+	vpmuludq	ymm0,ymm0,ymm11
+	vmovq	rbx,xmm10
+	vmovdqu	ymm13,YMMWORD[((-8+96-128))+rcx]
+	vpaddq	ymm1,ymm1,ymm0
+	vpmuludq	ymm12,ymm12,ymm11
+	vmovdqu	ymm0,YMMWORD[((-8+128-128))+rcx]
+	vpaddq	ymm2,ymm2,ymm12
+	vpmuludq	ymm13,ymm13,ymm11
+	vmovdqu	ymm12,YMMWORD[((-8+160-128))+rcx]
+	vpaddq	ymm3,ymm3,ymm13
+	vpmuludq	ymm0,ymm0,ymm11
+	vmovdqu	ymm13,YMMWORD[((-8+192-128))+rcx]
+	vpaddq	ymm4,ymm4,ymm0
+	vpmuludq	ymm12,ymm12,ymm11
+	vmovdqu	ymm0,YMMWORD[((-8+224-128))+rcx]
+	vpaddq	ymm5,ymm5,ymm12
+	vpmuludq	ymm13,ymm13,ymm11
+	vmovdqu	ymm12,YMMWORD[((-8+256-128))+rcx]
+	vpaddq	ymm6,ymm6,ymm13
+	vpmuludq	ymm0,ymm0,ymm11
+	vmovdqu	ymm13,YMMWORD[((-8+288-128))+rcx]
+	vpaddq	ymm7,ymm7,ymm0
+	vpmuludq	ymm12,ymm12,ymm11
+	vpaddq	ymm8,ymm8,ymm12
+	vpmuludq	ymm13,ymm13,ymm11
+	vpaddq	ymm9,ymm9,ymm13
+
+	vmovdqu	ymm0,YMMWORD[((-16+32-128))+rsi]
+	mov	rax,rbx
+	imul	rax,QWORD[((-128))+rsi]
+	add	rax,r11
+
+	vmovdqu	ymm12,YMMWORD[((-16+64-128))+rsi]
+	mov	r11,rax
+	imul	eax,r8d
+	and	eax,0x1fffffff
+
+	imul	rbx,QWORD[((8-128))+rsi]
+	add	r12,rbx
+	vpmuludq	ymm0,ymm0,ymm10
+	vmovd	xmm11,eax
+	vmovdqu	ymm13,YMMWORD[((-16+96-128))+rsi]
+	vpaddq	ymm1,ymm1,ymm0
+	vpmuludq	ymm12,ymm12,ymm10
+	vpbroadcastq	ymm11,xmm11
+	vmovdqu	ymm0,YMMWORD[((-16+128-128))+rsi]
+	vpaddq	ymm2,ymm2,ymm12
+	vpmuludq	ymm13,ymm13,ymm10
+	vmovdqu	ymm12,YMMWORD[((-16+160-128))+rsi]
+	vpaddq	ymm3,ymm3,ymm13
+	vpmuludq	ymm0,ymm0,ymm10
+	vmovdqu	ymm13,YMMWORD[((-16+192-128))+rsi]
+	vpaddq	ymm4,ymm4,ymm0
+	vpmuludq	ymm12,ymm12,ymm10
+	vmovdqu	ymm0,YMMWORD[((-16+224-128))+rsi]
+	vpaddq	ymm5,ymm5,ymm12
+	vpmuludq	ymm13,ymm13,ymm10
+	vmovdqu	ymm12,YMMWORD[((-16+256-128))+rsi]
+	vpaddq	ymm6,ymm6,ymm13
+	vpmuludq	ymm0,ymm0,ymm10
+	vmovdqu	ymm13,YMMWORD[((-16+288-128))+rsi]
+	vpaddq	ymm7,ymm7,ymm0
+	vpmuludq	ymm12,ymm12,ymm10
+	vpaddq	ymm8,ymm8,ymm12
+	vpmuludq	ymm13,ymm13,ymm10
+	vpbroadcastq	ymm10,QWORD[24+r13]
+	vpaddq	ymm9,ymm9,ymm13
+
+	vmovdqu	ymm0,YMMWORD[((-16+32-128))+rcx]
+	mov	rdx,rax
+	imul	rax,QWORD[((-128))+rcx]
+	add	r11,rax
+	vmovdqu	ymm12,YMMWORD[((-16+64-128))+rcx]
+	imul	rdx,QWORD[((8-128))+rcx]
+	add	r12,rdx
+	shr	r11,29
+
+	vpmuludq	ymm0,ymm0,ymm11
+	vmovq	rbx,xmm10
+	vmovdqu	ymm13,YMMWORD[((-16+96-128))+rcx]
+	vpaddq	ymm1,ymm1,ymm0
+	vpmuludq	ymm12,ymm12,ymm11
+	vmovdqu	ymm0,YMMWORD[((-16+128-128))+rcx]
+	vpaddq	ymm2,ymm2,ymm12
+	vpmuludq	ymm13,ymm13,ymm11
+	vmovdqu	ymm12,YMMWORD[((-16+160-128))+rcx]
+	vpaddq	ymm3,ymm3,ymm13
+	vpmuludq	ymm0,ymm0,ymm11
+	vmovdqu	ymm13,YMMWORD[((-16+192-128))+rcx]
+	vpaddq	ymm4,ymm4,ymm0
+	vpmuludq	ymm12,ymm12,ymm11
+	vmovdqu	ymm0,YMMWORD[((-16+224-128))+rcx]
+	vpaddq	ymm5,ymm5,ymm12
+	vpmuludq	ymm13,ymm13,ymm11
+	vmovdqu	ymm12,YMMWORD[((-16+256-128))+rcx]
+	vpaddq	ymm6,ymm6,ymm13
+	vpmuludq	ymm0,ymm0,ymm11
+	vmovdqu	ymm13,YMMWORD[((-16+288-128))+rcx]
+	vpaddq	ymm7,ymm7,ymm0
+	vpmuludq	ymm12,ymm12,ymm11
+	vmovdqu	ymm0,YMMWORD[((-24+32-128))+rsi]
+	vpaddq	ymm8,ymm8,ymm12
+	vpmuludq	ymm13,ymm13,ymm11
+	vmovdqu	ymm12,YMMWORD[((-24+64-128))+rsi]
+	vpaddq	ymm9,ymm9,ymm13
+
+	add	r12,r11
+	imul	rbx,QWORD[((-128))+rsi]
+	add	r12,rbx
+
+	mov	rax,r12
+	imul	eax,r8d
+	and	eax,0x1fffffff
+
+	vpmuludq	ymm0,ymm0,ymm10
+	vmovd	xmm11,eax
+	vmovdqu	ymm13,YMMWORD[((-24+96-128))+rsi]
+	vpaddq	ymm1,ymm1,ymm0
+	vpmuludq	ymm12,ymm12,ymm10
+	vpbroadcastq	ymm11,xmm11
+	vmovdqu	ymm0,YMMWORD[((-24+128-128))+rsi]
+	vpaddq	ymm2,ymm2,ymm12
+	vpmuludq	ymm13,ymm13,ymm10
+	vmovdqu	ymm12,YMMWORD[((-24+160-128))+rsi]
+	vpaddq	ymm3,ymm3,ymm13
+	vpmuludq	ymm0,ymm0,ymm10
+	vmovdqu	ymm13,YMMWORD[((-24+192-128))+rsi]
+	vpaddq	ymm4,ymm4,ymm0
+	vpmuludq	ymm12,ymm12,ymm10
+	vmovdqu	ymm0,YMMWORD[((-24+224-128))+rsi]
+	vpaddq	ymm5,ymm5,ymm12
+	vpmuludq	ymm13,ymm13,ymm10
+	vmovdqu	ymm12,YMMWORD[((-24+256-128))+rsi]
+	vpaddq	ymm6,ymm6,ymm13
+	vpmuludq	ymm0,ymm0,ymm10
+	vmovdqu	ymm13,YMMWORD[((-24+288-128))+rsi]
+	vpaddq	ymm7,ymm7,ymm0
+	vpmuludq	ymm12,ymm12,ymm10
+	vpaddq	ymm8,ymm8,ymm12
+	vpmuludq	ymm13,ymm13,ymm10
+	vpbroadcastq	ymm10,QWORD[32+r13]
+	vpaddq	ymm9,ymm9,ymm13
+	add	r13,32
+
+	vmovdqu	ymm0,YMMWORD[((-24+32-128))+rcx]
+	imul	rax,QWORD[((-128))+rcx]
+	add	r12,rax
+	shr	r12,29
+
+	vmovdqu	ymm12,YMMWORD[((-24+64-128))+rcx]
+	vpmuludq	ymm0,ymm0,ymm11
+	vmovq	rbx,xmm10
+	vmovdqu	ymm13,YMMWORD[((-24+96-128))+rcx]
+	vpaddq	ymm0,ymm1,ymm0
+	vpmuludq	ymm12,ymm12,ymm11
+	vmovdqu	YMMWORD[rsp],ymm0
+	vpaddq	ymm1,ymm2,ymm12
+	vmovdqu	ymm0,YMMWORD[((-24+128-128))+rcx]
+	vpmuludq	ymm13,ymm13,ymm11
+	vmovdqu	ymm12,YMMWORD[((-24+160-128))+rcx]
+	vpaddq	ymm2,ymm3,ymm13
+	vpmuludq	ymm0,ymm0,ymm11
+	vmovdqu	ymm13,YMMWORD[((-24+192-128))+rcx]
+	vpaddq	ymm3,ymm4,ymm0
+	vpmuludq	ymm12,ymm12,ymm11
+	vmovdqu	ymm0,YMMWORD[((-24+224-128))+rcx]
+	vpaddq	ymm4,ymm5,ymm12
+	vpmuludq	ymm13,ymm13,ymm11
+	vmovdqu	ymm12,YMMWORD[((-24+256-128))+rcx]
+	vpaddq	ymm5,ymm6,ymm13
+	vpmuludq	ymm0,ymm0,ymm11
+	vmovdqu	ymm13,YMMWORD[((-24+288-128))+rcx]
+	mov	r9,r12
+	vpaddq	ymm6,ymm7,ymm0
+	vpmuludq	ymm12,ymm12,ymm11
+	add	r9,QWORD[rsp]
+	vpaddq	ymm7,ymm8,ymm12
+	vpmuludq	ymm13,ymm13,ymm11
+	vmovq	xmm12,r12
+	vpaddq	ymm8,ymm9,ymm13
+
+	dec	r14d
+	jnz	NEAR $L$oop_mul_1024
+	vpaddq	ymm0,ymm12,YMMWORD[rsp]
+
+	vpsrlq	ymm12,ymm0,29
+	vpand	ymm0,ymm0,ymm15
+	vpsrlq	ymm13,ymm1,29
+	vpand	ymm1,ymm1,ymm15
+	vpsrlq	ymm10,ymm2,29
+	vpermq	ymm12,ymm12,0x93
+	vpand	ymm2,ymm2,ymm15
+	vpsrlq	ymm11,ymm3,29
+	vpermq	ymm13,ymm13,0x93
+	vpand	ymm3,ymm3,ymm15
+
+	vpblendd	ymm9,ymm12,ymm14,3
+	vpermq	ymm10,ymm10,0x93
+	vpblendd	ymm12,ymm13,ymm12,3
+	vpermq	ymm11,ymm11,0x93
+	vpaddq	ymm0,ymm0,ymm9
+	vpblendd	ymm13,ymm10,ymm13,3
+	vpaddq	ymm1,ymm1,ymm12
+	vpblendd	ymm10,ymm11,ymm10,3
+	vpaddq	ymm2,ymm2,ymm13
+	vpblendd	ymm11,ymm14,ymm11,3
+	vpaddq	ymm3,ymm3,ymm10
+	vpaddq	ymm4,ymm4,ymm11
+
+	vpsrlq	ymm12,ymm0,29
+	vpand	ymm0,ymm0,ymm15
+	vpsrlq	ymm13,ymm1,29
+	vpand	ymm1,ymm1,ymm15
+	vpsrlq	ymm10,ymm2,29
+	vpermq	ymm12,ymm12,0x93
+	vpand	ymm2,ymm2,ymm15
+	vpsrlq	ymm11,ymm3,29
+	vpermq	ymm13,ymm13,0x93
+	vpand	ymm3,ymm3,ymm15
+	vpermq	ymm10,ymm10,0x93
+
+	vpblendd	ymm9,ymm12,ymm14,3
+	vpermq	ymm11,ymm11,0x93
+	vpblendd	ymm12,ymm13,ymm12,3
+	vpaddq	ymm0,ymm0,ymm9
+	vpblendd	ymm13,ymm10,ymm13,3
+	vpaddq	ymm1,ymm1,ymm12
+	vpblendd	ymm10,ymm11,ymm10,3
+	vpaddq	ymm2,ymm2,ymm13
+	vpblendd	ymm11,ymm14,ymm11,3
+	vpaddq	ymm3,ymm3,ymm10
+	vpaddq	ymm4,ymm4,ymm11
+
+	vmovdqu	YMMWORD[(0-128)+rdi],ymm0
+	vmovdqu	YMMWORD[(32-128)+rdi],ymm1
+	vmovdqu	YMMWORD[(64-128)+rdi],ymm2
+	vmovdqu	YMMWORD[(96-128)+rdi],ymm3
+	vpsrlq	ymm12,ymm4,29
+	vpand	ymm4,ymm4,ymm15
+	vpsrlq	ymm13,ymm5,29
+	vpand	ymm5,ymm5,ymm15
+	vpsrlq	ymm10,ymm6,29
+	vpermq	ymm12,ymm12,0x93
+	vpand	ymm6,ymm6,ymm15
+	vpsrlq	ymm11,ymm7,29
+	vpermq	ymm13,ymm13,0x93
+	vpand	ymm7,ymm7,ymm15
+	vpsrlq	ymm0,ymm8,29
+	vpermq	ymm10,ymm10,0x93
+	vpand	ymm8,ymm8,ymm15
+	vpermq	ymm11,ymm11,0x93
+
+	vpblendd	ymm9,ymm12,ymm14,3
+	vpermq	ymm0,ymm0,0x93
+	vpblendd	ymm12,ymm13,ymm12,3
+	vpaddq	ymm4,ymm4,ymm9
+	vpblendd	ymm13,ymm10,ymm13,3
+	vpaddq	ymm5,ymm5,ymm12
+	vpblendd	ymm10,ymm11,ymm10,3
+	vpaddq	ymm6,ymm6,ymm13
+	vpblendd	ymm11,ymm0,ymm11,3
+	vpaddq	ymm7,ymm7,ymm10
+	vpaddq	ymm8,ymm8,ymm11
+
+	vpsrlq	ymm12,ymm4,29
+	vpand	ymm4,ymm4,ymm15
+	vpsrlq	ymm13,ymm5,29
+	vpand	ymm5,ymm5,ymm15
+	vpsrlq	ymm10,ymm6,29
+	vpermq	ymm12,ymm12,0x93
+	vpand	ymm6,ymm6,ymm15
+	vpsrlq	ymm11,ymm7,29
+	vpermq	ymm13,ymm13,0x93
+	vpand	ymm7,ymm7,ymm15
+	vpsrlq	ymm0,ymm8,29
+	vpermq	ymm10,ymm10,0x93
+	vpand	ymm8,ymm8,ymm15
+	vpermq	ymm11,ymm11,0x93
+
+	vpblendd	ymm9,ymm12,ymm14,3
+	vpermq	ymm0,ymm0,0x93
+	vpblendd	ymm12,ymm13,ymm12,3
+	vpaddq	ymm4,ymm4,ymm9
+	vpblendd	ymm13,ymm10,ymm13,3
+	vpaddq	ymm5,ymm5,ymm12
+	vpblendd	ymm10,ymm11,ymm10,3
+	vpaddq	ymm6,ymm6,ymm13
+	vpblendd	ymm11,ymm0,ymm11,3
+	vpaddq	ymm7,ymm7,ymm10
+	vpaddq	ymm8,ymm8,ymm11
+
+	vmovdqu	YMMWORD[(128-128)+rdi],ymm4
+	vmovdqu	YMMWORD[(160-128)+rdi],ymm5
+	vmovdqu	YMMWORD[(192-128)+rdi],ymm6
+	vmovdqu	YMMWORD[(224-128)+rdi],ymm7
+	vmovdqu	YMMWORD[(256-128)+rdi],ymm8
+	vzeroupper
+
+	mov	rax,rbp
+
+$L$mul_1024_in_tail:
+	movaps	xmm6,XMMWORD[((-216))+rax]
+	movaps	xmm7,XMMWORD[((-200))+rax]
+	movaps	xmm8,XMMWORD[((-184))+rax]
+	movaps	xmm9,XMMWORD[((-168))+rax]
+	movaps	xmm10,XMMWORD[((-152))+rax]
+	movaps	xmm11,XMMWORD[((-136))+rax]
+	movaps	xmm12,XMMWORD[((-120))+rax]
+	movaps	xmm13,XMMWORD[((-104))+rax]
+	movaps	xmm14,XMMWORD[((-88))+rax]
+	movaps	xmm15,XMMWORD[((-72))+rax]
+	mov	r15,QWORD[((-48))+rax]
+
+	mov	r14,QWORD[((-40))+rax]
+
+	mov	r13,QWORD[((-32))+rax]
+
+	mov	r12,QWORD[((-24))+rax]
+
+	mov	rbp,QWORD[((-16))+rax]
+
+	mov	rbx,QWORD[((-8))+rax]
+
+	lea	rsp,[rax]
+
+$L$mul_1024_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_rsaz_1024_mul_avx2:
+global	rsaz_1024_red2norm_avx2
+
+ALIGN	32
+rsaz_1024_red2norm_avx2:
+
+	sub	rdx,-128
+	xor	rax,rax
+	mov	r8,QWORD[((-128))+rdx]
+	mov	r9,QWORD[((-120))+rdx]
+	mov	r10,QWORD[((-112))+rdx]
+	shl	r8,0
+	shl	r9,29
+	mov	r11,r10
+	shl	r10,58
+	shr	r11,6
+	add	rax,r8
+	add	rax,r9
+	add	rax,r10
+	adc	r11,0
+	mov	QWORD[rcx],rax
+	mov	rax,r11
+	mov	r8,QWORD[((-104))+rdx]
+	mov	r9,QWORD[((-96))+rdx]
+	shl	r8,23
+	mov	r10,r9
+	shl	r9,52
+	shr	r10,12
+	add	rax,r8
+	add	rax,r9
+	adc	r10,0
+	mov	QWORD[8+rcx],rax
+	mov	rax,r10
+	mov	r11,QWORD[((-88))+rdx]
+	mov	r8,QWORD[((-80))+rdx]
+	shl	r11,17
+	mov	r9,r8
+	shl	r8,46
+	shr	r9,18
+	add	rax,r11
+	add	rax,r8
+	adc	r9,0
+	mov	QWORD[16+rcx],rax
+	mov	rax,r9
+	mov	r10,QWORD[((-72))+rdx]
+	mov	r11,QWORD[((-64))+rdx]
+	shl	r10,11
+	mov	r8,r11
+	shl	r11,40
+	shr	r8,24
+	add	rax,r10
+	add	rax,r11
+	adc	r8,0
+	mov	QWORD[24+rcx],rax
+	mov	rax,r8
+	mov	r9,QWORD[((-56))+rdx]
+	mov	r10,QWORD[((-48))+rdx]
+	mov	r11,QWORD[((-40))+rdx]
+	shl	r9,5
+	shl	r10,34
+	mov	r8,r11
+	shl	r11,63
+	shr	r8,1
+	add	rax,r9
+	add	rax,r10
+	add	rax,r11
+	adc	r8,0
+	mov	QWORD[32+rcx],rax
+	mov	rax,r8
+	mov	r9,QWORD[((-32))+rdx]
+	mov	r10,QWORD[((-24))+rdx]
+	shl	r9,28
+	mov	r11,r10
+	shl	r10,57
+	shr	r11,7
+	add	rax,r9
+	add	rax,r10
+	adc	r11,0
+	mov	QWORD[40+rcx],rax
+	mov	rax,r11
+	mov	r8,QWORD[((-16))+rdx]
+	mov	r9,QWORD[((-8))+rdx]
+	shl	r8,22
+	mov	r10,r9
+	shl	r9,51
+	shr	r10,13
+	add	rax,r8
+	add	rax,r9
+	adc	r10,0
+	mov	QWORD[48+rcx],rax
+	mov	rax,r10
+	mov	r11,QWORD[rdx]
+	mov	r8,QWORD[8+rdx]
+	shl	r11,16
+	mov	r9,r8
+	shl	r8,45
+	shr	r9,19
+	add	rax,r11
+	add	rax,r8
+	adc	r9,0
+	mov	QWORD[56+rcx],rax
+	mov	rax,r9
+	mov	r10,QWORD[16+rdx]
+	mov	r11,QWORD[24+rdx]
+	shl	r10,10
+	mov	r8,r11
+	shl	r11,39
+	shr	r8,25
+	add	rax,r10
+	add	rax,r11
+	adc	r8,0
+	mov	QWORD[64+rcx],rax
+	mov	rax,r8
+	mov	r9,QWORD[32+rdx]
+	mov	r10,QWORD[40+rdx]
+	mov	r11,QWORD[48+rdx]
+	shl	r9,4
+	shl	r10,33
+	mov	r8,r11
+	shl	r11,62
+	shr	r8,2
+	add	rax,r9
+	add	rax,r10
+	add	rax,r11
+	adc	r8,0
+	mov	QWORD[72+rcx],rax
+	mov	rax,r8
+	mov	r9,QWORD[56+rdx]
+	mov	r10,QWORD[64+rdx]
+	shl	r9,27
+	mov	r11,r10
+	shl	r10,56
+	shr	r11,8
+	add	rax,r9
+	add	rax,r10
+	adc	r11,0
+	mov	QWORD[80+rcx],rax
+	mov	rax,r11
+	mov	r8,QWORD[72+rdx]
+	mov	r9,QWORD[80+rdx]
+	shl	r8,21
+	mov	r10,r9
+	shl	r9,50
+	shr	r10,14
+	add	rax,r8
+	add	rax,r9
+	adc	r10,0
+	mov	QWORD[88+rcx],rax
+	mov	rax,r10
+	mov	r11,QWORD[88+rdx]
+	mov	r8,QWORD[96+rdx]
+	shl	r11,15
+	mov	r9,r8
+	shl	r8,44
+	shr	r9,20
+	add	rax,r11
+	add	rax,r8
+	adc	r9,0
+	mov	QWORD[96+rcx],rax
+	mov	rax,r9
+	mov	r10,QWORD[104+rdx]
+	mov	r11,QWORD[112+rdx]
+	shl	r10,9
+	mov	r8,r11
+	shl	r11,38
+	shr	r8,26
+	add	rax,r10
+	add	rax,r11
+	adc	r8,0
+	mov	QWORD[104+rcx],rax
+	mov	rax,r8
+	mov	r9,QWORD[120+rdx]
+	mov	r10,QWORD[128+rdx]
+	mov	r11,QWORD[136+rdx]
+	shl	r9,3
+	shl	r10,32
+	mov	r8,r11
+	shl	r11,61
+	shr	r8,3
+	add	rax,r9
+	add	rax,r10
+	add	rax,r11
+	adc	r8,0
+	mov	QWORD[112+rcx],rax
+	mov	rax,r8
+	mov	r9,QWORD[144+rdx]
+	mov	r10,QWORD[152+rdx]
+	shl	r9,26
+	mov	r11,r10
+	shl	r10,55
+	shr	r11,9
+	add	rax,r9
+	add	rax,r10
+	adc	r11,0
+	mov	QWORD[120+rcx],rax
+	mov	rax,r11
+	DB	0F3h,0C3h		;repret
+
+
+
+global	rsaz_1024_norm2red_avx2
+
+ALIGN	32
+rsaz_1024_norm2red_avx2:
+
+	sub	rcx,-128
+	mov	r8,QWORD[rdx]
+	mov	eax,0x1fffffff
+	mov	r9,QWORD[8+rdx]
+	mov	r11,r8
+	shr	r11,0
+	and	r11,rax
+	mov	QWORD[((-128))+rcx],r11
+	mov	r10,r8
+	shr	r10,29
+	and	r10,rax
+	mov	QWORD[((-120))+rcx],r10
+	shrd	r8,r9,58
+	and	r8,rax
+	mov	QWORD[((-112))+rcx],r8
+	mov	r10,QWORD[16+rdx]
+	mov	r8,r9
+	shr	r8,23
+	and	r8,rax
+	mov	QWORD[((-104))+rcx],r8
+	shrd	r9,r10,52
+	and	r9,rax
+	mov	QWORD[((-96))+rcx],r9
+	mov	r11,QWORD[24+rdx]
+	mov	r9,r10
+	shr	r9,17
+	and	r9,rax
+	mov	QWORD[((-88))+rcx],r9
+	shrd	r10,r11,46
+	and	r10,rax
+	mov	QWORD[((-80))+rcx],r10
+	mov	r8,QWORD[32+rdx]
+	mov	r10,r11
+	shr	r10,11
+	and	r10,rax
+	mov	QWORD[((-72))+rcx],r10
+	shrd	r11,r8,40
+	and	r11,rax
+	mov	QWORD[((-64))+rcx],r11
+	mov	r9,QWORD[40+rdx]
+	mov	r11,r8
+	shr	r11,5
+	and	r11,rax
+	mov	QWORD[((-56))+rcx],r11
+	mov	r10,r8
+	shr	r10,34
+	and	r10,rax
+	mov	QWORD[((-48))+rcx],r10
+	shrd	r8,r9,63
+	and	r8,rax
+	mov	QWORD[((-40))+rcx],r8
+	mov	r10,QWORD[48+rdx]
+	mov	r8,r9
+	shr	r8,28
+	and	r8,rax
+	mov	QWORD[((-32))+rcx],r8
+	shrd	r9,r10,57
+	and	r9,rax
+	mov	QWORD[((-24))+rcx],r9
+	mov	r11,QWORD[56+rdx]
+	mov	r9,r10
+	shr	r9,22
+	and	r9,rax
+	mov	QWORD[((-16))+rcx],r9
+	shrd	r10,r11,51
+	and	r10,rax
+	mov	QWORD[((-8))+rcx],r10
+	mov	r8,QWORD[64+rdx]
+	mov	r10,r11
+	shr	r10,16
+	and	r10,rax
+	mov	QWORD[rcx],r10
+	shrd	r11,r8,45
+	and	r11,rax
+	mov	QWORD[8+rcx],r11
+	mov	r9,QWORD[72+rdx]
+	mov	r11,r8
+	shr	r11,10
+	and	r11,rax
+	mov	QWORD[16+rcx],r11
+	shrd	r8,r9,39
+	and	r8,rax
+	mov	QWORD[24+rcx],r8
+	mov	r10,QWORD[80+rdx]
+	mov	r8,r9
+	shr	r8,4
+	and	r8,rax
+	mov	QWORD[32+rcx],r8
+	mov	r11,r9
+	shr	r11,33
+	and	r11,rax
+	mov	QWORD[40+rcx],r11
+	shrd	r9,r10,62
+	and	r9,rax
+	mov	QWORD[48+rcx],r9
+	mov	r11,QWORD[88+rdx]
+	mov	r9,r10
+	shr	r9,27
+	and	r9,rax
+	mov	QWORD[56+rcx],r9
+	shrd	r10,r11,56
+	and	r10,rax
+	mov	QWORD[64+rcx],r10
+	mov	r8,QWORD[96+rdx]
+	mov	r10,r11
+	shr	r10,21
+	and	r10,rax
+	mov	QWORD[72+rcx],r10
+	shrd	r11,r8,50
+	and	r11,rax
+	mov	QWORD[80+rcx],r11
+	mov	r9,QWORD[104+rdx]
+	mov	r11,r8
+	shr	r11,15
+	and	r11,rax
+	mov	QWORD[88+rcx],r11
+	shrd	r8,r9,44
+	and	r8,rax
+	mov	QWORD[96+rcx],r8
+	mov	r10,QWORD[112+rdx]
+	mov	r8,r9
+	shr	r8,9
+	and	r8,rax
+	mov	QWORD[104+rcx],r8
+	shrd	r9,r10,38
+	and	r9,rax
+	mov	QWORD[112+rcx],r9
+	mov	r11,QWORD[120+rdx]
+	mov	r9,r10
+	shr	r9,3
+	and	r9,rax
+	mov	QWORD[120+rcx],r9
+	mov	r8,r10
+	shr	r8,32
+	and	r8,rax
+	mov	QWORD[128+rcx],r8
+	shrd	r10,r11,61
+	and	r10,rax
+	mov	QWORD[136+rcx],r10
+	xor	r8,r8
+	mov	r10,r11
+	shr	r10,26
+	and	r10,rax
+	mov	QWORD[144+rcx],r10
+	shrd	r11,r8,55
+	and	r11,rax
+	mov	QWORD[152+rcx],r11
+	mov	QWORD[160+rcx],r8
+	mov	QWORD[168+rcx],r8
+	mov	QWORD[176+rcx],r8
+	mov	QWORD[184+rcx],r8
+	DB	0F3h,0C3h		;repret
+
+
+global	rsaz_1024_scatter5_avx2
+
+ALIGN	32
+rsaz_1024_scatter5_avx2:
+
+	vzeroupper
+	vmovdqu	ymm5,YMMWORD[$L$scatter_permd]
+	shl	r8d,4
+	lea	rcx,[r8*1+rcx]
+	mov	eax,9
+	jmp	NEAR $L$oop_scatter_1024
+
+ALIGN	32
+$L$oop_scatter_1024:
+	vmovdqu	ymm0,YMMWORD[rdx]
+	lea	rdx,[32+rdx]
+	vpermd	ymm0,ymm5,ymm0
+	vmovdqu	XMMWORD[rcx],xmm0
+	lea	rcx,[512+rcx]
+	dec	eax
+	jnz	NEAR $L$oop_scatter_1024
+
+	vzeroupper
+	DB	0F3h,0C3h		;repret
+
+
+
+global	rsaz_1024_gather5_avx2
+
+ALIGN	32
+rsaz_1024_gather5_avx2:
+
+	vzeroupper
+	mov	r11,rsp
+
+	lea	rax,[((-136))+rsp]
+$L$SEH_begin_rsaz_1024_gather5:
+
+DB	0x48,0x8d,0x60,0xe0
+DB	0xc5,0xf8,0x29,0x70,0xe0
+DB	0xc5,0xf8,0x29,0x78,0xf0
+DB	0xc5,0x78,0x29,0x40,0x00
+DB	0xc5,0x78,0x29,0x48,0x10
+DB	0xc5,0x78,0x29,0x50,0x20
+DB	0xc5,0x78,0x29,0x58,0x30
+DB	0xc5,0x78,0x29,0x60,0x40
+DB	0xc5,0x78,0x29,0x68,0x50
+DB	0xc5,0x78,0x29,0x70,0x60
+DB	0xc5,0x78,0x29,0x78,0x70
+	lea	rsp,[((-256))+rsp]
+	and	rsp,-32
+	lea	r10,[$L$inc]
+	lea	rax,[((-128))+rsp]
+
+	vmovd	xmm4,r8d
+	vmovdqa	ymm0,YMMWORD[r10]
+	vmovdqa	ymm1,YMMWORD[32+r10]
+	vmovdqa	ymm5,YMMWORD[64+r10]
+	vpbroadcastd	ymm4,xmm4
+
+	vpaddd	ymm2,ymm0,ymm5
+	vpcmpeqd	ymm0,ymm0,ymm4
+	vpaddd	ymm3,ymm1,ymm5
+	vpcmpeqd	ymm1,ymm1,ymm4
+	vmovdqa	YMMWORD[(0+128)+rax],ymm0
+	vpaddd	ymm0,ymm2,ymm5
+	vpcmpeqd	ymm2,ymm2,ymm4
+	vmovdqa	YMMWORD[(32+128)+rax],ymm1
+	vpaddd	ymm1,ymm3,ymm5
+	vpcmpeqd	ymm3,ymm3,ymm4
+	vmovdqa	YMMWORD[(64+128)+rax],ymm2
+	vpaddd	ymm2,ymm0,ymm5
+	vpcmpeqd	ymm0,ymm0,ymm4
+	vmovdqa	YMMWORD[(96+128)+rax],ymm3
+	vpaddd	ymm3,ymm1,ymm5
+	vpcmpeqd	ymm1,ymm1,ymm4
+	vmovdqa	YMMWORD[(128+128)+rax],ymm0
+	vpaddd	ymm8,ymm2,ymm5
+	vpcmpeqd	ymm2,ymm2,ymm4
+	vmovdqa	YMMWORD[(160+128)+rax],ymm1
+	vpaddd	ymm9,ymm3,ymm5
+	vpcmpeqd	ymm3,ymm3,ymm4
+	vmovdqa	YMMWORD[(192+128)+rax],ymm2
+	vpaddd	ymm10,ymm8,ymm5
+	vpcmpeqd	ymm8,ymm8,ymm4
+	vmovdqa	YMMWORD[(224+128)+rax],ymm3
+	vpaddd	ymm11,ymm9,ymm5
+	vpcmpeqd	ymm9,ymm9,ymm4
+	vpaddd	ymm12,ymm10,ymm5
+	vpcmpeqd	ymm10,ymm10,ymm4
+	vpaddd	ymm13,ymm11,ymm5
+	vpcmpeqd	ymm11,ymm11,ymm4
+	vpaddd	ymm14,ymm12,ymm5
+	vpcmpeqd	ymm12,ymm12,ymm4
+	vpaddd	ymm15,ymm13,ymm5
+	vpcmpeqd	ymm13,ymm13,ymm4
+	vpcmpeqd	ymm14,ymm14,ymm4
+	vpcmpeqd	ymm15,ymm15,ymm4
+
+	vmovdqa	ymm7,YMMWORD[((-32))+r10]
+	lea	rdx,[128+rdx]
+	mov	r8d,9
+
+$L$oop_gather_1024:
+	vmovdqa	ymm0,YMMWORD[((0-128))+rdx]
+	vmovdqa	ymm1,YMMWORD[((32-128))+rdx]
+	vmovdqa	ymm2,YMMWORD[((64-128))+rdx]
+	vmovdqa	ymm3,YMMWORD[((96-128))+rdx]
+	vpand	ymm0,ymm0,YMMWORD[((0+128))+rax]
+	vpand	ymm1,ymm1,YMMWORD[((32+128))+rax]
+	vpand	ymm2,ymm2,YMMWORD[((64+128))+rax]
+	vpor	ymm4,ymm1,ymm0
+	vpand	ymm3,ymm3,YMMWORD[((96+128))+rax]
+	vmovdqa	ymm0,YMMWORD[((128-128))+rdx]
+	vmovdqa	ymm1,YMMWORD[((160-128))+rdx]
+	vpor	ymm5,ymm3,ymm2
+	vmovdqa	ymm2,YMMWORD[((192-128))+rdx]
+	vmovdqa	ymm3,YMMWORD[((224-128))+rdx]
+	vpand	ymm0,ymm0,YMMWORD[((128+128))+rax]
+	vpand	ymm1,ymm1,YMMWORD[((160+128))+rax]
+	vpand	ymm2,ymm2,YMMWORD[((192+128))+rax]
+	vpor	ymm4,ymm4,ymm0
+	vpand	ymm3,ymm3,YMMWORD[((224+128))+rax]
+	vpand	ymm0,ymm8,YMMWORD[((256-128))+rdx]
+	vpor	ymm5,ymm5,ymm1
+	vpand	ymm1,ymm9,YMMWORD[((288-128))+rdx]
+	vpor	ymm4,ymm4,ymm2
+	vpand	ymm2,ymm10,YMMWORD[((320-128))+rdx]
+	vpor	ymm5,ymm5,ymm3
+	vpand	ymm3,ymm11,YMMWORD[((352-128))+rdx]
+	vpor	ymm4,ymm4,ymm0
+	vpand	ymm0,ymm12,YMMWORD[((384-128))+rdx]
+	vpor	ymm5,ymm5,ymm1
+	vpand	ymm1,ymm13,YMMWORD[((416-128))+rdx]
+	vpor	ymm4,ymm4,ymm2
+	vpand	ymm2,ymm14,YMMWORD[((448-128))+rdx]
+	vpor	ymm5,ymm5,ymm3
+	vpand	ymm3,ymm15,YMMWORD[((480-128))+rdx]
+	lea	rdx,[512+rdx]
+	vpor	ymm4,ymm4,ymm0
+	vpor	ymm5,ymm5,ymm1
+	vpor	ymm4,ymm4,ymm2
+	vpor	ymm5,ymm5,ymm3
+
+	vpor	ymm4,ymm4,ymm5
+	vextracti128	xmm5,ymm4,1
+	vpor	xmm5,xmm5,xmm4
+	vpermd	ymm5,ymm7,ymm5
+	vmovdqu	YMMWORD[rcx],ymm5
+	lea	rcx,[32+rcx]
+	dec	r8d
+	jnz	NEAR $L$oop_gather_1024
+
+	vpxor	ymm0,ymm0,ymm0
+	vmovdqu	YMMWORD[rcx],ymm0
+	vzeroupper
+	movaps	xmm6,XMMWORD[((-168))+r11]
+	movaps	xmm7,XMMWORD[((-152))+r11]
+	movaps	xmm8,XMMWORD[((-136))+r11]
+	movaps	xmm9,XMMWORD[((-120))+r11]
+	movaps	xmm10,XMMWORD[((-104))+r11]
+	movaps	xmm11,XMMWORD[((-88))+r11]
+	movaps	xmm12,XMMWORD[((-72))+r11]
+	movaps	xmm13,XMMWORD[((-56))+r11]
+	movaps	xmm14,XMMWORD[((-40))+r11]
+	movaps	xmm15,XMMWORD[((-24))+r11]
+	lea	rsp,[r11]
+
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_rsaz_1024_gather5:
+
+ALIGN	64
+$L$and_mask:
+	DQ	0x1fffffff,0x1fffffff,0x1fffffff,0x1fffffff
+$L$scatter_permd:
+	DD	0,2,4,6,7,7,7,7
+$L$gather_permd:
+	DD	0,7,1,7,2,7,3,7
+$L$inc:
+	DD	0,0,0,0,1,1,1,1
+	DD	2,2,2,2,3,3,3,3
+	DD	4,4,4,4,4,4,4,4
+ALIGN	64
+EXTERN	__imp_RtlVirtualUnwind
+
+ALIGN	16
+rsaz_se_handler:
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD[120+r8]
+	mov	rbx,QWORD[248+r8]
+
+	mov	rsi,QWORD[8+r9]
+	mov	r11,QWORD[56+r9]
+
+	mov	r10d,DWORD[r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jb	NEAR $L$common_seh_tail
+
+	mov	r10d,DWORD[4+r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jae	NEAR $L$common_seh_tail
+
+	mov	rbp,QWORD[160+r8]
+
+	mov	r10d,DWORD[8+r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	cmovc	rax,rbp
+
+	mov	r15,QWORD[((-48))+rax]
+	mov	r14,QWORD[((-40))+rax]
+	mov	r13,QWORD[((-32))+rax]
+	mov	r12,QWORD[((-24))+rax]
+	mov	rbp,QWORD[((-16))+rax]
+	mov	rbx,QWORD[((-8))+rax]
+	mov	QWORD[240+r8],r15
+	mov	QWORD[232+r8],r14
+	mov	QWORD[224+r8],r13
+	mov	QWORD[216+r8],r12
+	mov	QWORD[160+r8],rbp
+	mov	QWORD[144+r8],rbx
+
+	lea	rsi,[((-216))+rax]
+	lea	rdi,[512+r8]
+	mov	ecx,20
+	DD	0xa548f3fc
+
+$L$common_seh_tail:
+	mov	rdi,QWORD[8+rax]
+	mov	rsi,QWORD[16+rax]
+	mov	QWORD[152+r8],rax
+	mov	QWORD[168+r8],rsi
+	mov	QWORD[176+r8],rdi
+
+	mov	rdi,QWORD[40+r9]
+	mov	rsi,r8
+	mov	ecx,154
+	DD	0xa548f3fc
+
+	mov	rsi,r9
+	xor	rcx,rcx
+	mov	rdx,QWORD[8+rsi]
+	mov	r8,QWORD[rsi]
+	mov	r9,QWORD[16+rsi]
+	mov	r10,QWORD[40+rsi]
+	lea	r11,[56+rsi]
+	lea	r12,[24+rsi]
+	mov	QWORD[32+rsp],r10
+	mov	QWORD[40+rsp],r11
+	mov	QWORD[48+rsp],r12
+	mov	QWORD[56+rsp],rcx
+	call	QWORD[__imp_RtlVirtualUnwind]
+
+	mov	eax,1
+	add	rsp,64
+	popfq
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rbp
+	pop	rbx
+	pop	rdi
+	pop	rsi
+	DB	0F3h,0C3h		;repret
+
+
+section	.pdata rdata align=4
+ALIGN	4
+	DD	$L$SEH_begin_rsaz_1024_sqr_avx2 wrt ..imagebase
+	DD	$L$SEH_end_rsaz_1024_sqr_avx2 wrt ..imagebase
+	DD	$L$SEH_info_rsaz_1024_sqr_avx2 wrt ..imagebase
+
+	DD	$L$SEH_begin_rsaz_1024_mul_avx2 wrt ..imagebase
+	DD	$L$SEH_end_rsaz_1024_mul_avx2 wrt ..imagebase
+	DD	$L$SEH_info_rsaz_1024_mul_avx2 wrt ..imagebase
+
+	DD	$L$SEH_begin_rsaz_1024_gather5 wrt ..imagebase
+	DD	$L$SEH_end_rsaz_1024_gather5 wrt ..imagebase
+	DD	$L$SEH_info_rsaz_1024_gather5 wrt ..imagebase
+section	.xdata rdata align=8
+ALIGN	8
+$L$SEH_info_rsaz_1024_sqr_avx2:
+DB	9,0,0,0
+	DD	rsaz_se_handler wrt ..imagebase
+	DD	$L$sqr_1024_body wrt ..imagebase,$L$sqr_1024_epilogue wrt ..imagebase,$L$sqr_1024_in_tail wrt ..imagebase
+	DD	0
+$L$SEH_info_rsaz_1024_mul_avx2:
+DB	9,0,0,0
+	DD	rsaz_se_handler wrt ..imagebase
+	DD	$L$mul_1024_body wrt ..imagebase,$L$mul_1024_epilogue wrt ..imagebase,$L$mul_1024_in_tail wrt ..imagebase
+	DD	0
+$L$SEH_info_rsaz_1024_gather5:
+DB	0x01,0x36,0x17,0x0b
+DB	0x36,0xf8,0x09,0x00
+DB	0x31,0xe8,0x08,0x00
+DB	0x2c,0xd8,0x07,0x00
+DB	0x27,0xc8,0x06,0x00
+DB	0x22,0xb8,0x05,0x00
+DB	0x1d,0xa8,0x04,0x00
+DB	0x18,0x98,0x03,0x00
+DB	0x13,0x88,0x02,0x00
+DB	0x0e,0x78,0x01,0x00
+DB	0x09,0x68,0x00,0x00
+DB	0x04,0x01,0x15,0x00
+DB	0x00,0xb3,0x00,0x00
diff --git a/deps/boringssl/win-x86_64/crypto/fipsmodule/sha1-x86_64.asm b/deps/boringssl/win-x86_64/crypto/fipsmodule/sha1-x86_64.asm
new file mode 100644
index 0000000..1654df1
--- /dev/null
+++ b/deps/boringssl/win-x86_64/crypto/fipsmodule/sha1-x86_64.asm
@@ -0,0 +1,5774 @@
+; This file is generated from a similarly-named Perl script in the BoringSSL
+; source tree. Do not edit by hand.
+
+default	rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+
+%ifdef BORINGSSL_PREFIX
+%include "boringssl_prefix_symbols_nasm.inc"
+%endif
+section	.text code align=64
+
+EXTERN	OPENSSL_ia32cap_P
+
+global	sha1_block_data_order
+
+ALIGN	16
+sha1_block_data_order:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_sha1_block_data_order:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+
+	lea	r10,[OPENSSL_ia32cap_P]
+	mov	r9d,DWORD[r10]
+	mov	r8d,DWORD[4+r10]
+	mov	r10d,DWORD[8+r10]
+	test	r8d,512
+	jz	NEAR $L$ialu
+	test	r10d,536870912
+	jnz	NEAR _shaext_shortcut
+	and	r10d,296
+	cmp	r10d,296
+	je	NEAR _avx2_shortcut
+	and	r8d,268435456
+	and	r9d,1073741824
+	or	r8d,r9d
+	cmp	r8d,1342177280
+	je	NEAR _avx_shortcut
+	jmp	NEAR _ssse3_shortcut
+
+ALIGN	16
+$L$ialu:
+	mov	rax,rsp
+
+	push	rbx
+
+	push	rbp
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	mov	r8,rdi
+	sub	rsp,72
+	mov	r9,rsi
+	and	rsp,-64
+	mov	r10,rdx
+	mov	QWORD[64+rsp],rax
+
+$L$prologue:
+
+	mov	esi,DWORD[r8]
+	mov	edi,DWORD[4+r8]
+	mov	r11d,DWORD[8+r8]
+	mov	r12d,DWORD[12+r8]
+	mov	r13d,DWORD[16+r8]
+	jmp	NEAR $L$loop
+
+ALIGN	16
+$L$loop:
+	mov	edx,DWORD[r9]
+	bswap	edx
+	mov	ebp,DWORD[4+r9]
+	mov	eax,r12d
+	mov	DWORD[rsp],edx
+	mov	ecx,esi
+	bswap	ebp
+	xor	eax,r11d
+	rol	ecx,5
+	and	eax,edi
+	lea	r13d,[1518500249+r13*1+rdx]
+	add	r13d,ecx
+	xor	eax,r12d
+	rol	edi,30
+	add	r13d,eax
+	mov	r14d,DWORD[8+r9]
+	mov	eax,r11d
+	mov	DWORD[4+rsp],ebp
+	mov	ecx,r13d
+	bswap	r14d
+	xor	eax,edi
+	rol	ecx,5
+	and	eax,esi
+	lea	r12d,[1518500249+r12*1+rbp]
+	add	r12d,ecx
+	xor	eax,r11d
+	rol	esi,30
+	add	r12d,eax
+	mov	edx,DWORD[12+r9]
+	mov	eax,edi
+	mov	DWORD[8+rsp],r14d
+	mov	ecx,r12d
+	bswap	edx
+	xor	eax,esi
+	rol	ecx,5
+	and	eax,r13d
+	lea	r11d,[1518500249+r11*1+r14]
+	add	r11d,ecx
+	xor	eax,edi
+	rol	r13d,30
+	add	r11d,eax
+	mov	ebp,DWORD[16+r9]
+	mov	eax,esi
+	mov	DWORD[12+rsp],edx
+	mov	ecx,r11d
+	bswap	ebp
+	xor	eax,r13d
+	rol	ecx,5
+	and	eax,r12d
+	lea	edi,[1518500249+rdi*1+rdx]
+	add	edi,ecx
+	xor	eax,esi
+	rol	r12d,30
+	add	edi,eax
+	mov	r14d,DWORD[20+r9]
+	mov	eax,r13d
+	mov	DWORD[16+rsp],ebp
+	mov	ecx,edi
+	bswap	r14d
+	xor	eax,r12d
+	rol	ecx,5
+	and	eax,r11d
+	lea	esi,[1518500249+rsi*1+rbp]
+	add	esi,ecx
+	xor	eax,r13d
+	rol	r11d,30
+	add	esi,eax
+	mov	edx,DWORD[24+r9]
+	mov	eax,r12d
+	mov	DWORD[20+rsp],r14d
+	mov	ecx,esi
+	bswap	edx
+	xor	eax,r11d
+	rol	ecx,5
+	and	eax,edi
+	lea	r13d,[1518500249+r13*1+r14]
+	add	r13d,ecx
+	xor	eax,r12d
+	rol	edi,30
+	add	r13d,eax
+	mov	ebp,DWORD[28+r9]
+	mov	eax,r11d
+	mov	DWORD[24+rsp],edx
+	mov	ecx,r13d
+	bswap	ebp
+	xor	eax,edi
+	rol	ecx,5
+	and	eax,esi
+	lea	r12d,[1518500249+r12*1+rdx]
+	add	r12d,ecx
+	xor	eax,r11d
+	rol	esi,30
+	add	r12d,eax
+	mov	r14d,DWORD[32+r9]
+	mov	eax,edi
+	mov	DWORD[28+rsp],ebp
+	mov	ecx,r12d
+	bswap	r14d
+	xor	eax,esi
+	rol	ecx,5
+	and	eax,r13d
+	lea	r11d,[1518500249+r11*1+rbp]
+	add	r11d,ecx
+	xor	eax,edi
+	rol	r13d,30
+	add	r11d,eax
+	mov	edx,DWORD[36+r9]
+	mov	eax,esi
+	mov	DWORD[32+rsp],r14d
+	mov	ecx,r11d
+	bswap	edx
+	xor	eax,r13d
+	rol	ecx,5
+	and	eax,r12d
+	lea	edi,[1518500249+rdi*1+r14]
+	add	edi,ecx
+	xor	eax,esi
+	rol	r12d,30
+	add	edi,eax
+	mov	ebp,DWORD[40+r9]
+	mov	eax,r13d
+	mov	DWORD[36+rsp],edx
+	mov	ecx,edi
+	bswap	ebp
+	xor	eax,r12d
+	rol	ecx,5
+	and	eax,r11d
+	lea	esi,[1518500249+rsi*1+rdx]
+	add	esi,ecx
+	xor	eax,r13d
+	rol	r11d,30
+	add	esi,eax
+	mov	r14d,DWORD[44+r9]
+	mov	eax,r12d
+	mov	DWORD[40+rsp],ebp
+	mov	ecx,esi
+	bswap	r14d
+	xor	eax,r11d
+	rol	ecx,5
+	and	eax,edi
+	lea	r13d,[1518500249+r13*1+rbp]
+	add	r13d,ecx
+	xor	eax,r12d
+	rol	edi,30
+	add	r13d,eax
+	mov	edx,DWORD[48+r9]
+	mov	eax,r11d
+	mov	DWORD[44+rsp],r14d
+	mov	ecx,r13d
+	bswap	edx
+	xor	eax,edi
+	rol	ecx,5
+	and	eax,esi
+	lea	r12d,[1518500249+r12*1+r14]
+	add	r12d,ecx
+	xor	eax,r11d
+	rol	esi,30
+	add	r12d,eax
+	mov	ebp,DWORD[52+r9]
+	mov	eax,edi
+	mov	DWORD[48+rsp],edx
+	mov	ecx,r12d
+	bswap	ebp
+	xor	eax,esi
+	rol	ecx,5
+	and	eax,r13d
+	lea	r11d,[1518500249+r11*1+rdx]
+	add	r11d,ecx
+	xor	eax,edi
+	rol	r13d,30
+	add	r11d,eax
+	mov	r14d,DWORD[56+r9]
+	mov	eax,esi
+	mov	DWORD[52+rsp],ebp
+	mov	ecx,r11d
+	bswap	r14d
+	xor	eax,r13d
+	rol	ecx,5
+	and	eax,r12d
+	lea	edi,[1518500249+rdi*1+rbp]
+	add	edi,ecx
+	xor	eax,esi
+	rol	r12d,30
+	add	edi,eax
+	mov	edx,DWORD[60+r9]
+	mov	eax,r13d
+	mov	DWORD[56+rsp],r14d
+	mov	ecx,edi
+	bswap	edx
+	xor	eax,r12d
+	rol	ecx,5
+	and	eax,r11d
+	lea	esi,[1518500249+rsi*1+r14]
+	add	esi,ecx
+	xor	eax,r13d
+	rol	r11d,30
+	add	esi,eax
+	xor	ebp,DWORD[rsp]
+	mov	eax,r12d
+	mov	DWORD[60+rsp],edx
+	mov	ecx,esi
+	xor	ebp,DWORD[8+rsp]
+	xor	eax,r11d
+	rol	ecx,5
+	xor	ebp,DWORD[32+rsp]
+	and	eax,edi
+	lea	r13d,[1518500249+r13*1+rdx]
+	rol	edi,30
+	xor	eax,r12d
+	add	r13d,ecx
+	rol	ebp,1
+	add	r13d,eax
+	xor	r14d,DWORD[4+rsp]
+	mov	eax,r11d
+	mov	DWORD[rsp],ebp
+	mov	ecx,r13d
+	xor	r14d,DWORD[12+rsp]
+	xor	eax,edi
+	rol	ecx,5
+	xor	r14d,DWORD[36+rsp]
+	and	eax,esi
+	lea	r12d,[1518500249+r12*1+rbp]
+	rol	esi,30
+	xor	eax,r11d
+	add	r12d,ecx
+	rol	r14d,1
+	add	r12d,eax
+	xor	edx,DWORD[8+rsp]
+	mov	eax,edi
+	mov	DWORD[4+rsp],r14d
+	mov	ecx,r12d
+	xor	edx,DWORD[16+rsp]
+	xor	eax,esi
+	rol	ecx,5
+	xor	edx,DWORD[40+rsp]
+	and	eax,r13d
+	lea	r11d,[1518500249+r11*1+r14]
+	rol	r13d,30
+	xor	eax,edi
+	add	r11d,ecx
+	rol	edx,1
+	add	r11d,eax
+	xor	ebp,DWORD[12+rsp]
+	mov	eax,esi
+	mov	DWORD[8+rsp],edx
+	mov	ecx,r11d
+	xor	ebp,DWORD[20+rsp]
+	xor	eax,r13d
+	rol	ecx,5
+	xor	ebp,DWORD[44+rsp]
+	and	eax,r12d
+	lea	edi,[1518500249+rdi*1+rdx]
+	rol	r12d,30
+	xor	eax,esi
+	add	edi,ecx
+	rol	ebp,1
+	add	edi,eax
+	xor	r14d,DWORD[16+rsp]
+	mov	eax,r13d
+	mov	DWORD[12+rsp],ebp
+	mov	ecx,edi
+	xor	r14d,DWORD[24+rsp]
+	xor	eax,r12d
+	rol	ecx,5
+	xor	r14d,DWORD[48+rsp]
+	and	eax,r11d
+	lea	esi,[1518500249+rsi*1+rbp]
+	rol	r11d,30
+	xor	eax,r13d
+	add	esi,ecx
+	rol	r14d,1
+	add	esi,eax
+	xor	edx,DWORD[20+rsp]
+	mov	eax,edi
+	mov	DWORD[16+rsp],r14d
+	mov	ecx,esi
+	xor	edx,DWORD[28+rsp]
+	xor	eax,r12d
+	rol	ecx,5
+	xor	edx,DWORD[52+rsp]
+	lea	r13d,[1859775393+r13*1+r14]
+	xor	eax,r11d
+	add	r13d,ecx
+	rol	edi,30
+	add	r13d,eax
+	rol	edx,1
+	xor	ebp,DWORD[24+rsp]
+	mov	eax,esi
+	mov	DWORD[20+rsp],edx
+	mov	ecx,r13d
+	xor	ebp,DWORD[32+rsp]
+	xor	eax,r11d
+	rol	ecx,5
+	xor	ebp,DWORD[56+rsp]
+	lea	r12d,[1859775393+r12*1+rdx]
+	xor	eax,edi
+	add	r12d,ecx
+	rol	esi,30
+	add	r12d,eax
+	rol	ebp,1
+	xor	r14d,DWORD[28+rsp]
+	mov	eax,r13d
+	mov	DWORD[24+rsp],ebp
+	mov	ecx,r12d
+	xor	r14d,DWORD[36+rsp]
+	xor	eax,edi
+	rol	ecx,5
+	xor	r14d,DWORD[60+rsp]
+	lea	r11d,[1859775393+r11*1+rbp]
+	xor	eax,esi
+	add	r11d,ecx
+	rol	r13d,30
+	add	r11d,eax
+	rol	r14d,1
+	xor	edx,DWORD[32+rsp]
+	mov	eax,r12d
+	mov	DWORD[28+rsp],r14d
+	mov	ecx,r11d
+	xor	edx,DWORD[40+rsp]
+	xor	eax,esi
+	rol	ecx,5
+	xor	edx,DWORD[rsp]
+	lea	edi,[1859775393+rdi*1+r14]
+	xor	eax,r13d
+	add	edi,ecx
+	rol	r12d,30
+	add	edi,eax
+	rol	edx,1
+	xor	ebp,DWORD[36+rsp]
+	mov	eax,r11d
+	mov	DWORD[32+rsp],edx
+	mov	ecx,edi
+	xor	ebp,DWORD[44+rsp]
+	xor	eax,r13d
+	rol	ecx,5
+	xor	ebp,DWORD[4+rsp]
+	lea	esi,[1859775393+rsi*1+rdx]
+	xor	eax,r12d
+	add	esi,ecx
+	rol	r11d,30
+	add	esi,eax
+	rol	ebp,1
+	xor	r14d,DWORD[40+rsp]
+	mov	eax,edi
+	mov	DWORD[36+rsp],ebp
+	mov	ecx,esi
+	xor	r14d,DWORD[48+rsp]
+	xor	eax,r12d
+	rol	ecx,5
+	xor	r14d,DWORD[8+rsp]
+	lea	r13d,[1859775393+r13*1+rbp]
+	xor	eax,r11d
+	add	r13d,ecx
+	rol	edi,30
+	add	r13d,eax
+	rol	r14d,1
+	xor	edx,DWORD[44+rsp]
+	mov	eax,esi
+	mov	DWORD[40+rsp],r14d
+	mov	ecx,r13d
+	xor	edx,DWORD[52+rsp]
+	xor	eax,r11d
+	rol	ecx,5
+	xor	edx,DWORD[12+rsp]
+	lea	r12d,[1859775393+r12*1+r14]
+	xor	eax,edi
+	add	r12d,ecx
+	rol	esi,30
+	add	r12d,eax
+	rol	edx,1
+	xor	ebp,DWORD[48+rsp]
+	mov	eax,r13d
+	mov	DWORD[44+rsp],edx
+	mov	ecx,r12d
+	xor	ebp,DWORD[56+rsp]
+	xor	eax,edi
+	rol	ecx,5
+	xor	ebp,DWORD[16+rsp]
+	lea	r11d,[1859775393+r11*1+rdx]
+	xor	eax,esi
+	add	r11d,ecx
+	rol	r13d,30
+	add	r11d,eax
+	rol	ebp,1
+	xor	r14d,DWORD[52+rsp]
+	mov	eax,r12d
+	mov	DWORD[48+rsp],ebp
+	mov	ecx,r11d
+	xor	r14d,DWORD[60+rsp]
+	xor	eax,esi
+	rol	ecx,5
+	xor	r14d,DWORD[20+rsp]
+	lea	edi,[1859775393+rdi*1+rbp]
+	xor	eax,r13d
+	add	edi,ecx
+	rol	r12d,30
+	add	edi,eax
+	rol	r14d,1
+	xor	edx,DWORD[56+rsp]
+	mov	eax,r11d
+	mov	DWORD[52+rsp],r14d
+	mov	ecx,edi
+	xor	edx,DWORD[rsp]
+	xor	eax,r13d
+	rol	ecx,5
+	xor	edx,DWORD[24+rsp]
+	lea	esi,[1859775393+rsi*1+r14]
+	xor	eax,r12d
+	add	esi,ecx
+	rol	r11d,30
+	add	esi,eax
+	rol	edx,1
+	xor	ebp,DWORD[60+rsp]
+	mov	eax,edi
+	mov	DWORD[56+rsp],edx
+	mov	ecx,esi
+	xor	ebp,DWORD[4+rsp]
+	xor	eax,r12d
+	rol	ecx,5
+	xor	ebp,DWORD[28+rsp]
+	lea	r13d,[1859775393+r13*1+rdx]
+	xor	eax,r11d
+	add	r13d,ecx
+	rol	edi,30
+	add	r13d,eax
+	rol	ebp,1
+	xor	r14d,DWORD[rsp]
+	mov	eax,esi
+	mov	DWORD[60+rsp],ebp
+	mov	ecx,r13d
+	xor	r14d,DWORD[8+rsp]
+	xor	eax,r11d
+	rol	ecx,5
+	xor	r14d,DWORD[32+rsp]
+	lea	r12d,[1859775393+r12*1+rbp]
+	xor	eax,edi
+	add	r12d,ecx
+	rol	esi,30
+	add	r12d,eax
+	rol	r14d,1
+	xor	edx,DWORD[4+rsp]
+	mov	eax,r13d
+	mov	DWORD[rsp],r14d
+	mov	ecx,r12d
+	xor	edx,DWORD[12+rsp]
+	xor	eax,edi
+	rol	ecx,5
+	xor	edx,DWORD[36+rsp]
+	lea	r11d,[1859775393+r11*1+r14]
+	xor	eax,esi
+	add	r11d,ecx
+	rol	r13d,30
+	add	r11d,eax
+	rol	edx,1
+	xor	ebp,DWORD[8+rsp]
+	mov	eax,r12d
+	mov	DWORD[4+rsp],edx
+	mov	ecx,r11d
+	xor	ebp,DWORD[16+rsp]
+	xor	eax,esi
+	rol	ecx,5
+	xor	ebp,DWORD[40+rsp]
+	lea	edi,[1859775393+rdi*1+rdx]
+	xor	eax,r13d
+	add	edi,ecx
+	rol	r12d,30
+	add	edi,eax
+	rol	ebp,1
+	xor	r14d,DWORD[12+rsp]
+	mov	eax,r11d
+	mov	DWORD[8+rsp],ebp
+	mov	ecx,edi
+	xor	r14d,DWORD[20+rsp]
+	xor	eax,r13d
+	rol	ecx,5
+	xor	r14d,DWORD[44+rsp]
+	lea	esi,[1859775393+rsi*1+rbp]
+	xor	eax,r12d
+	add	esi,ecx
+	rol	r11d,30
+	add	esi,eax
+	rol	r14d,1
+	xor	edx,DWORD[16+rsp]
+	mov	eax,edi
+	mov	DWORD[12+rsp],r14d
+	mov	ecx,esi
+	xor	edx,DWORD[24+rsp]
+	xor	eax,r12d
+	rol	ecx,5
+	xor	edx,DWORD[48+rsp]
+	lea	r13d,[1859775393+r13*1+r14]
+	xor	eax,r11d
+	add	r13d,ecx
+	rol	edi,30
+	add	r13d,eax
+	rol	edx,1
+	xor	ebp,DWORD[20+rsp]
+	mov	eax,esi
+	mov	DWORD[16+rsp],edx
+	mov	ecx,r13d
+	xor	ebp,DWORD[28+rsp]
+	xor	eax,r11d
+	rol	ecx,5
+	xor	ebp,DWORD[52+rsp]
+	lea	r12d,[1859775393+r12*1+rdx]
+	xor	eax,edi
+	add	r12d,ecx
+	rol	esi,30
+	add	r12d,eax
+	rol	ebp,1
+	xor	r14d,DWORD[24+rsp]
+	mov	eax,r13d
+	mov	DWORD[20+rsp],ebp
+	mov	ecx,r12d
+	xor	r14d,DWORD[32+rsp]
+	xor	eax,edi
+	rol	ecx,5
+	xor	r14d,DWORD[56+rsp]
+	lea	r11d,[1859775393+r11*1+rbp]
+	xor	eax,esi
+	add	r11d,ecx
+	rol	r13d,30
+	add	r11d,eax
+	rol	r14d,1
+	xor	edx,DWORD[28+rsp]
+	mov	eax,r12d
+	mov	DWORD[24+rsp],r14d
+	mov	ecx,r11d
+	xor	edx,DWORD[36+rsp]
+	xor	eax,esi
+	rol	ecx,5
+	xor	edx,DWORD[60+rsp]
+	lea	edi,[1859775393+rdi*1+r14]
+	xor	eax,r13d
+	add	edi,ecx
+	rol	r12d,30
+	add	edi,eax
+	rol	edx,1
+	xor	ebp,DWORD[32+rsp]
+	mov	eax,r11d
+	mov	DWORD[28+rsp],edx
+	mov	ecx,edi
+	xor	ebp,DWORD[40+rsp]
+	xor	eax,r13d
+	rol	ecx,5
+	xor	ebp,DWORD[rsp]
+	lea	esi,[1859775393+rsi*1+rdx]
+	xor	eax,r12d
+	add	esi,ecx
+	rol	r11d,30
+	add	esi,eax
+	rol	ebp,1
+	xor	r14d,DWORD[36+rsp]
+	mov	eax,r12d
+	mov	DWORD[32+rsp],ebp
+	mov	ebx,r12d
+	xor	r14d,DWORD[44+rsp]
+	and	eax,r11d
+	mov	ecx,esi
+	xor	r14d,DWORD[4+rsp]
+	lea	r13d,[((-1894007588))+r13*1+rbp]
+	xor	ebx,r11d
+	rol	ecx,5
+	add	r13d,eax
+	rol	r14d,1
+	and	ebx,edi
+	add	r13d,ecx
+	rol	edi,30
+	add	r13d,ebx
+	xor	edx,DWORD[40+rsp]
+	mov	eax,r11d
+	mov	DWORD[36+rsp],r14d
+	mov	ebx,r11d
+	xor	edx,DWORD[48+rsp]
+	and	eax,edi
+	mov	ecx,r13d
+	xor	edx,DWORD[8+rsp]
+	lea	r12d,[((-1894007588))+r12*1+r14]
+	xor	ebx,edi
+	rol	ecx,5
+	add	r12d,eax
+	rol	edx,1
+	and	ebx,esi
+	add	r12d,ecx
+	rol	esi,30
+	add	r12d,ebx
+	xor	ebp,DWORD[44+rsp]
+	mov	eax,edi
+	mov	DWORD[40+rsp],edx
+	mov	ebx,edi
+	xor	ebp,DWORD[52+rsp]
+	and	eax,esi
+	mov	ecx,r12d
+	xor	ebp,DWORD[12+rsp]
+	lea	r11d,[((-1894007588))+r11*1+rdx]
+	xor	ebx,esi
+	rol	ecx,5
+	add	r11d,eax
+	rol	ebp,1
+	and	ebx,r13d
+	add	r11d,ecx
+	rol	r13d,30
+	add	r11d,ebx
+	xor	r14d,DWORD[48+rsp]
+	mov	eax,esi
+	mov	DWORD[44+rsp],ebp
+	mov	ebx,esi
+	xor	r14d,DWORD[56+rsp]
+	and	eax,r13d
+	mov	ecx,r11d
+	xor	r14d,DWORD[16+rsp]
+	lea	edi,[((-1894007588))+rdi*1+rbp]
+	xor	ebx,r13d
+	rol	ecx,5
+	add	edi,eax
+	rol	r14d,1
+	and	ebx,r12d
+	add	edi,ecx
+	rol	r12d,30
+	add	edi,ebx
+	xor	edx,DWORD[52+rsp]
+	mov	eax,r13d
+	mov	DWORD[48+rsp],r14d
+	mov	ebx,r13d
+	xor	edx,DWORD[60+rsp]
+	and	eax,r12d
+	mov	ecx,edi
+	xor	edx,DWORD[20+rsp]
+	lea	esi,[((-1894007588))+rsi*1+r14]
+	xor	ebx,r12d
+	rol	ecx,5
+	add	esi,eax
+	rol	edx,1
+	and	ebx,r11d
+	add	esi,ecx
+	rol	r11d,30
+	add	esi,ebx
+	xor	ebp,DWORD[56+rsp]
+	mov	eax,r12d
+	mov	DWORD[52+rsp],edx
+	mov	ebx,r12d
+	xor	ebp,DWORD[rsp]
+	and	eax,r11d
+	mov	ecx,esi
+	xor	ebp,DWORD[24+rsp]
+	lea	r13d,[((-1894007588))+r13*1+rdx]
+	xor	ebx,r11d
+	rol	ecx,5
+	add	r13d,eax
+	rol	ebp,1
+	and	ebx,edi
+	add	r13d,ecx
+	rol	edi,30
+	add	r13d,ebx
+	xor	r14d,DWORD[60+rsp]
+	mov	eax,r11d
+	mov	DWORD[56+rsp],ebp
+	mov	ebx,r11d
+	xor	r14d,DWORD[4+rsp]
+	and	eax,edi
+	mov	ecx,r13d
+	xor	r14d,DWORD[28+rsp]
+	lea	r12d,[((-1894007588))+r12*1+rbp]
+	xor	ebx,edi
+	rol	ecx,5
+	add	r12d,eax
+	rol	r14d,1
+	and	ebx,esi
+	add	r12d,ecx
+	rol	esi,30
+	add	r12d,ebx
+	xor	edx,DWORD[rsp]
+	mov	eax,edi
+	mov	DWORD[60+rsp],r14d
+	mov	ebx,edi
+	xor	edx,DWORD[8+rsp]
+	and	eax,esi
+	mov	ecx,r12d
+	xor	edx,DWORD[32+rsp]
+	lea	r11d,[((-1894007588))+r11*1+r14]
+	xor	ebx,esi
+	rol	ecx,5
+	add	r11d,eax
+	rol	edx,1
+	and	ebx,r13d
+	add	r11d,ecx
+	rol	r13d,30
+	add	r11d,ebx
+	xor	ebp,DWORD[4+rsp]
+	mov	eax,esi
+	mov	DWORD[rsp],edx
+	mov	ebx,esi
+	xor	ebp,DWORD[12+rsp]
+	and	eax,r13d
+	mov	ecx,r11d
+	xor	ebp,DWORD[36+rsp]
+	lea	edi,[((-1894007588))+rdi*1+rdx]
+	xor	ebx,r13d
+	rol	ecx,5
+	add	edi,eax
+	rol	ebp,1
+	and	ebx,r12d
+	add	edi,ecx
+	rol	r12d,30
+	add	edi,ebx
+	xor	r14d,DWORD[8+rsp]
+	mov	eax,r13d
+	mov	DWORD[4+rsp],ebp
+	mov	ebx,r13d
+	xor	r14d,DWORD[16+rsp]
+	and	eax,r12d
+	mov	ecx,edi
+	xor	r14d,DWORD[40+rsp]
+	lea	esi,[((-1894007588))+rsi*1+rbp]
+	xor	ebx,r12d
+	rol	ecx,5
+	add	esi,eax
+	rol	r14d,1
+	and	ebx,r11d
+	add	esi,ecx
+	rol	r11d,30
+	add	esi,ebx
+	xor	edx,DWORD[12+rsp]
+	mov	eax,r12d
+	mov	DWORD[8+rsp],r14d
+	mov	ebx,r12d
+	xor	edx,DWORD[20+rsp]
+	and	eax,r11d
+	mov	ecx,esi
+	xor	edx,DWORD[44+rsp]
+	lea	r13d,[((-1894007588))+r13*1+r14]
+	xor	ebx,r11d
+	rol	ecx,5
+	add	r13d,eax
+	rol	edx,1
+	and	ebx,edi
+	add	r13d,ecx
+	rol	edi,30
+	add	r13d,ebx
+	xor	ebp,DWORD[16+rsp]
+	mov	eax,r11d
+	mov	DWORD[12+rsp],edx
+	mov	ebx,r11d
+	xor	ebp,DWORD[24+rsp]
+	and	eax,edi
+	mov	ecx,r13d
+	xor	ebp,DWORD[48+rsp]
+	lea	r12d,[((-1894007588))+r12*1+rdx]
+	xor	ebx,edi
+	rol	ecx,5
+	add	r12d,eax
+	rol	ebp,1
+	and	ebx,esi
+	add	r12d,ecx
+	rol	esi,30
+	add	r12d,ebx
+	xor	r14d,DWORD[20+rsp]
+	mov	eax,edi
+	mov	DWORD[16+rsp],ebp
+	mov	ebx,edi
+	xor	r14d,DWORD[28+rsp]
+	and	eax,esi
+	mov	ecx,r12d
+	xor	r14d,DWORD[52+rsp]
+	lea	r11d,[((-1894007588))+r11*1+rbp]
+	xor	ebx,esi
+	rol	ecx,5
+	add	r11d,eax
+	rol	r14d,1
+	and	ebx,r13d
+	add	r11d,ecx
+	rol	r13d,30
+	add	r11d,ebx
+	xor	edx,DWORD[24+rsp]
+	mov	eax,esi
+	mov	DWORD[20+rsp],r14d
+	mov	ebx,esi
+	xor	edx,DWORD[32+rsp]
+	and	eax,r13d
+	mov	ecx,r11d
+	xor	edx,DWORD[56+rsp]
+	lea	edi,[((-1894007588))+rdi*1+r14]
+	xor	ebx,r13d
+	rol	ecx,5
+	add	edi,eax
+	rol	edx,1
+	and	ebx,r12d
+	add	edi,ecx
+	rol	r12d,30
+	add	edi,ebx
+	xor	ebp,DWORD[28+rsp]
+	mov	eax,r13d
+	mov	DWORD[24+rsp],edx
+	mov	ebx,r13d
+	xor	ebp,DWORD[36+rsp]
+	and	eax,r12d
+	mov	ecx,edi
+	xor	ebp,DWORD[60+rsp]
+	lea	esi,[((-1894007588))+rsi*1+rdx]
+	xor	ebx,r12d
+	rol	ecx,5
+	add	esi,eax
+	rol	ebp,1
+	and	ebx,r11d
+	add	esi,ecx
+	rol	r11d,30
+	add	esi,ebx
+	xor	r14d,DWORD[32+rsp]
+	mov	eax,r12d
+	mov	DWORD[28+rsp],ebp
+	mov	ebx,r12d
+	xor	r14d,DWORD[40+rsp]
+	and	eax,r11d
+	mov	ecx,esi
+	xor	r14d,DWORD[rsp]
+	lea	r13d,[((-1894007588))+r13*1+rbp]
+	xor	ebx,r11d
+	rol	ecx,5
+	add	r13d,eax
+	rol	r14d,1
+	and	ebx,edi
+	add	r13d,ecx
+	rol	edi,30
+	add	r13d,ebx
+	xor	edx,DWORD[36+rsp]
+	mov	eax,r11d
+	mov	DWORD[32+rsp],r14d
+	mov	ebx,r11d
+	xor	edx,DWORD[44+rsp]
+	and	eax,edi
+	mov	ecx,r13d
+	xor	edx,DWORD[4+rsp]
+	lea	r12d,[((-1894007588))+r12*1+r14]
+	xor	ebx,edi
+	rol	ecx,5
+	add	r12d,eax
+	rol	edx,1
+	and	ebx,esi
+	add	r12d,ecx
+	rol	esi,30
+	add	r12d,ebx
+	xor	ebp,DWORD[40+rsp]
+	mov	eax,edi
+	mov	DWORD[36+rsp],edx
+	mov	ebx,edi
+	xor	ebp,DWORD[48+rsp]
+	and	eax,esi
+	mov	ecx,r12d
+	xor	ebp,DWORD[8+rsp]
+	lea	r11d,[((-1894007588))+r11*1+rdx]
+	xor	ebx,esi
+	rol	ecx,5
+	add	r11d,eax
+	rol	ebp,1
+	and	ebx,r13d
+	add	r11d,ecx
+	rol	r13d,30
+	add	r11d,ebx
+	xor	r14d,DWORD[44+rsp]
+	mov	eax,esi
+	mov	DWORD[40+rsp],ebp
+	mov	ebx,esi
+	xor	r14d,DWORD[52+rsp]
+	and	eax,r13d
+	mov	ecx,r11d
+	xor	r14d,DWORD[12+rsp]
+	lea	edi,[((-1894007588))+rdi*1+rbp]
+	xor	ebx,r13d
+	rol	ecx,5
+	add	edi,eax
+	rol	r14d,1
+	and	ebx,r12d
+	add	edi,ecx
+	rol	r12d,30
+	add	edi,ebx
+	xor	edx,DWORD[48+rsp]
+	mov	eax,r13d
+	mov	DWORD[44+rsp],r14d
+	mov	ebx,r13d
+	xor	edx,DWORD[56+rsp]
+	and	eax,r12d
+	mov	ecx,edi
+	xor	edx,DWORD[16+rsp]
+	lea	esi,[((-1894007588))+rsi*1+r14]
+	xor	ebx,r12d
+	rol	ecx,5
+	add	esi,eax
+	rol	edx,1
+	and	ebx,r11d
+	add	esi,ecx
+	rol	r11d,30
+	add	esi,ebx
+	xor	ebp,DWORD[52+rsp]
+	mov	eax,edi
+	mov	DWORD[48+rsp],edx
+	mov	ecx,esi
+	xor	ebp,DWORD[60+rsp]
+	xor	eax,r12d
+	rol	ecx,5
+	xor	ebp,DWORD[20+rsp]
+	lea	r13d,[((-899497514))+r13*1+rdx]
+	xor	eax,r11d
+	add	r13d,ecx
+	rol	edi,30
+	add	r13d,eax
+	rol	ebp,1
+	xor	r14d,DWORD[56+rsp]
+	mov	eax,esi
+	mov	DWORD[52+rsp],ebp
+	mov	ecx,r13d
+	xor	r14d,DWORD[rsp]
+	xor	eax,r11d
+	rol	ecx,5
+	xor	r14d,DWORD[24+rsp]
+	lea	r12d,[((-899497514))+r12*1+rbp]
+	xor	eax,edi
+	add	r12d,ecx
+	rol	esi,30
+	add	r12d,eax
+	rol	r14d,1
+	xor	edx,DWORD[60+rsp]
+	mov	eax,r13d
+	mov	DWORD[56+rsp],r14d
+	mov	ecx,r12d
+	xor	edx,DWORD[4+rsp]
+	xor	eax,edi
+	rol	ecx,5
+	xor	edx,DWORD[28+rsp]
+	lea	r11d,[((-899497514))+r11*1+r14]
+	xor	eax,esi
+	add	r11d,ecx
+	rol	r13d,30
+	add	r11d,eax
+	rol	edx,1
+	xor	ebp,DWORD[rsp]
+	mov	eax,r12d
+	mov	DWORD[60+rsp],edx
+	mov	ecx,r11d
+	xor	ebp,DWORD[8+rsp]
+	xor	eax,esi
+	rol	ecx,5
+	xor	ebp,DWORD[32+rsp]
+	lea	edi,[((-899497514))+rdi*1+rdx]
+	xor	eax,r13d
+	add	edi,ecx
+	rol	r12d,30
+	add	edi,eax
+	rol	ebp,1
+	xor	r14d,DWORD[4+rsp]
+	mov	eax,r11d
+	mov	DWORD[rsp],ebp
+	mov	ecx,edi
+	xor	r14d,DWORD[12+rsp]
+	xor	eax,r13d
+	rol	ecx,5
+	xor	r14d,DWORD[36+rsp]
+	lea	esi,[((-899497514))+rsi*1+rbp]
+	xor	eax,r12d
+	add	esi,ecx
+	rol	r11d,30
+	add	esi,eax
+	rol	r14d,1
+	xor	edx,DWORD[8+rsp]
+	mov	eax,edi
+	mov	DWORD[4+rsp],r14d
+	mov	ecx,esi
+	xor	edx,DWORD[16+rsp]
+	xor	eax,r12d
+	rol	ecx,5
+	xor	edx,DWORD[40+rsp]
+	lea	r13d,[((-899497514))+r13*1+r14]
+	xor	eax,r11d
+	add	r13d,ecx
+	rol	edi,30
+	add	r13d,eax
+	rol	edx,1
+	xor	ebp,DWORD[12+rsp]
+	mov	eax,esi
+	mov	DWORD[8+rsp],edx
+	mov	ecx,r13d
+	xor	ebp,DWORD[20+rsp]
+	xor	eax,r11d
+	rol	ecx,5
+	xor	ebp,DWORD[44+rsp]
+	lea	r12d,[((-899497514))+r12*1+rdx]
+	xor	eax,edi
+	add	r12d,ecx
+	rol	esi,30
+	add	r12d,eax
+	rol	ebp,1
+	xor	r14d,DWORD[16+rsp]
+	mov	eax,r13d
+	mov	DWORD[12+rsp],ebp
+	mov	ecx,r12d
+	xor	r14d,DWORD[24+rsp]
+	xor	eax,edi
+	rol	ecx,5
+	xor	r14d,DWORD[48+rsp]
+	lea	r11d,[((-899497514))+r11*1+rbp]
+	xor	eax,esi
+	add	r11d,ecx
+	rol	r13d,30
+	add	r11d,eax
+	rol	r14d,1
+	xor	edx,DWORD[20+rsp]
+	mov	eax,r12d
+	mov	DWORD[16+rsp],r14d
+	mov	ecx,r11d
+	xor	edx,DWORD[28+rsp]
+	xor	eax,esi
+	rol	ecx,5
+	xor	edx,DWORD[52+rsp]
+	lea	edi,[((-899497514))+rdi*1+r14]
+	xor	eax,r13d
+	add	edi,ecx
+	rol	r12d,30
+	add	edi,eax
+	rol	edx,1
+	xor	ebp,DWORD[24+rsp]
+	mov	eax,r11d
+	mov	DWORD[20+rsp],edx
+	mov	ecx,edi
+	xor	ebp,DWORD[32+rsp]
+	xor	eax,r13d
+	rol	ecx,5
+	xor	ebp,DWORD[56+rsp]
+	lea	esi,[((-899497514))+rsi*1+rdx]
+	xor	eax,r12d
+	add	esi,ecx
+	rol	r11d,30
+	add	esi,eax
+	rol	ebp,1
+	xor	r14d,DWORD[28+rsp]
+	mov	eax,edi
+	mov	DWORD[24+rsp],ebp
+	mov	ecx,esi
+	xor	r14d,DWORD[36+rsp]
+	xor	eax,r12d
+	rol	ecx,5
+	xor	r14d,DWORD[60+rsp]
+	lea	r13d,[((-899497514))+r13*1+rbp]
+	xor	eax,r11d
+	add	r13d,ecx
+	rol	edi,30
+	add	r13d,eax
+	rol	r14d,1
+	xor	edx,DWORD[32+rsp]
+	mov	eax,esi
+	mov	DWORD[28+rsp],r14d
+	mov	ecx,r13d
+	xor	edx,DWORD[40+rsp]
+	xor	eax,r11d
+	rol	ecx,5
+	xor	edx,DWORD[rsp]
+	lea	r12d,[((-899497514))+r12*1+r14]
+	xor	eax,edi
+	add	r12d,ecx
+	rol	esi,30
+	add	r12d,eax
+	rol	edx,1
+	xor	ebp,DWORD[36+rsp]
+	mov	eax,r13d
+
+	mov	ecx,r12d
+	xor	ebp,DWORD[44+rsp]
+	xor	eax,edi
+	rol	ecx,5
+	xor	ebp,DWORD[4+rsp]
+	lea	r11d,[((-899497514))+r11*1+rdx]
+	xor	eax,esi
+	add	r11d,ecx
+	rol	r13d,30
+	add	r11d,eax
+	rol	ebp,1
+	xor	r14d,DWORD[40+rsp]
+	mov	eax,r12d
+
+	mov	ecx,r11d
+	xor	r14d,DWORD[48+rsp]
+	xor	eax,esi
+	rol	ecx,5
+	xor	r14d,DWORD[8+rsp]
+	lea	edi,[((-899497514))+rdi*1+rbp]
+	xor	eax,r13d
+	add	edi,ecx
+	rol	r12d,30
+	add	edi,eax
+	rol	r14d,1
+	xor	edx,DWORD[44+rsp]
+	mov	eax,r11d
+
+	mov	ecx,edi
+	xor	edx,DWORD[52+rsp]
+	xor	eax,r13d
+	rol	ecx,5
+	xor	edx,DWORD[12+rsp]
+	lea	esi,[((-899497514))+rsi*1+r14]
+	xor	eax,r12d
+	add	esi,ecx
+	rol	r11d,30
+	add	esi,eax
+	rol	edx,1
+	xor	ebp,DWORD[48+rsp]
+	mov	eax,edi
+
+	mov	ecx,esi
+	xor	ebp,DWORD[56+rsp]
+	xor	eax,r12d
+	rol	ecx,5
+	xor	ebp,DWORD[16+rsp]
+	lea	r13d,[((-899497514))+r13*1+rdx]
+	xor	eax,r11d
+	add	r13d,ecx
+	rol	edi,30
+	add	r13d,eax
+	rol	ebp,1
+	xor	r14d,DWORD[52+rsp]
+	mov	eax,esi
+
+	mov	ecx,r13d
+	xor	r14d,DWORD[60+rsp]
+	xor	eax,r11d
+	rol	ecx,5
+	xor	r14d,DWORD[20+rsp]
+	lea	r12d,[((-899497514))+r12*1+rbp]
+	xor	eax,edi
+	add	r12d,ecx
+	rol	esi,30
+	add	r12d,eax
+	rol	r14d,1
+	xor	edx,DWORD[56+rsp]
+	mov	eax,r13d
+
+	mov	ecx,r12d
+	xor	edx,DWORD[rsp]
+	xor	eax,edi
+	rol	ecx,5
+	xor	edx,DWORD[24+rsp]
+	lea	r11d,[((-899497514))+r11*1+r14]
+	xor	eax,esi
+	add	r11d,ecx
+	rol	r13d,30
+	add	r11d,eax
+	rol	edx,1
+	xor	ebp,DWORD[60+rsp]
+	mov	eax,r12d
+
+	mov	ecx,r11d
+	xor	ebp,DWORD[4+rsp]
+	xor	eax,esi
+	rol	ecx,5
+	xor	ebp,DWORD[28+rsp]
+	lea	edi,[((-899497514))+rdi*1+rdx]
+	xor	eax,r13d
+	add	edi,ecx
+	rol	r12d,30
+	add	edi,eax
+	rol	ebp,1
+	mov	eax,r11d
+	mov	ecx,edi
+	xor	eax,r13d
+	lea	esi,[((-899497514))+rsi*1+rbp]
+	rol	ecx,5
+	xor	eax,r12d
+	add	esi,ecx
+	rol	r11d,30
+	add	esi,eax
+	add	esi,DWORD[r8]
+	add	edi,DWORD[4+r8]
+	add	r11d,DWORD[8+r8]
+	add	r12d,DWORD[12+r8]
+	add	r13d,DWORD[16+r8]
+	mov	DWORD[r8],esi
+	mov	DWORD[4+r8],edi
+	mov	DWORD[8+r8],r11d
+	mov	DWORD[12+r8],r12d
+	mov	DWORD[16+r8],r13d
+
+	sub	r10,1
+	lea	r9,[64+r9]
+	jnz	NEAR $L$loop
+
+	mov	rsi,QWORD[64+rsp]
+
+	mov	r14,QWORD[((-40))+rsi]
+
+	mov	r13,QWORD[((-32))+rsi]
+
+	mov	r12,QWORD[((-24))+rsi]
+
+	mov	rbp,QWORD[((-16))+rsi]
+
+	mov	rbx,QWORD[((-8))+rsi]
+
+	lea	rsp,[rsi]
+
+$L$epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_sha1_block_data_order:
+
+ALIGN	32
+sha1_block_data_order_shaext:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_sha1_block_data_order_shaext:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+_shaext_shortcut:
+
+	lea	rsp,[((-72))+rsp]
+	movaps	XMMWORD[(-8-64)+rax],xmm6
+	movaps	XMMWORD[(-8-48)+rax],xmm7
+	movaps	XMMWORD[(-8-32)+rax],xmm8
+	movaps	XMMWORD[(-8-16)+rax],xmm9
+$L$prologue_shaext:
+	movdqu	xmm0,XMMWORD[rdi]
+	movd	xmm1,DWORD[16+rdi]
+	movdqa	xmm3,XMMWORD[((K_XX_XX+160))]
+
+	movdqu	xmm4,XMMWORD[rsi]
+	pshufd	xmm0,xmm0,27
+	movdqu	xmm5,XMMWORD[16+rsi]
+	pshufd	xmm1,xmm1,27
+	movdqu	xmm6,XMMWORD[32+rsi]
+DB	102,15,56,0,227
+	movdqu	xmm7,XMMWORD[48+rsi]
+DB	102,15,56,0,235
+DB	102,15,56,0,243
+	movdqa	xmm9,xmm1
+DB	102,15,56,0,251
+	jmp	NEAR $L$oop_shaext
+
+ALIGN	16
+$L$oop_shaext:
+	dec	rdx
+	lea	r8,[64+rsi]
+	paddd	xmm1,xmm4
+	cmovne	rsi,r8
+	movdqa	xmm8,xmm0
+DB	15,56,201,229
+	movdqa	xmm2,xmm0
+DB	15,58,204,193,0
+DB	15,56,200,213
+	pxor	xmm4,xmm6
+DB	15,56,201,238
+DB	15,56,202,231
+
+	movdqa	xmm1,xmm0
+DB	15,58,204,194,0
+DB	15,56,200,206
+	pxor	xmm5,xmm7
+DB	15,56,202,236
+DB	15,56,201,247
+	movdqa	xmm2,xmm0
+DB	15,58,204,193,0
+DB	15,56,200,215
+	pxor	xmm6,xmm4
+DB	15,56,201,252
+DB	15,56,202,245
+
+	movdqa	xmm1,xmm0
+DB	15,58,204,194,0
+DB	15,56,200,204
+	pxor	xmm7,xmm5
+DB	15,56,202,254
+DB	15,56,201,229
+	movdqa	xmm2,xmm0
+DB	15,58,204,193,0
+DB	15,56,200,213
+	pxor	xmm4,xmm6
+DB	15,56,201,238
+DB	15,56,202,231
+
+	movdqa	xmm1,xmm0
+DB	15,58,204,194,1
+DB	15,56,200,206
+	pxor	xmm5,xmm7
+DB	15,56,202,236
+DB	15,56,201,247
+	movdqa	xmm2,xmm0
+DB	15,58,204,193,1
+DB	15,56,200,215
+	pxor	xmm6,xmm4
+DB	15,56,201,252
+DB	15,56,202,245
+
+	movdqa	xmm1,xmm0
+DB	15,58,204,194,1
+DB	15,56,200,204
+	pxor	xmm7,xmm5
+DB	15,56,202,254
+DB	15,56,201,229
+	movdqa	xmm2,xmm0
+DB	15,58,204,193,1
+DB	15,56,200,213
+	pxor	xmm4,xmm6
+DB	15,56,201,238
+DB	15,56,202,231
+
+	movdqa	xmm1,xmm0
+DB	15,58,204,194,1
+DB	15,56,200,206
+	pxor	xmm5,xmm7
+DB	15,56,202,236
+DB	15,56,201,247
+	movdqa	xmm2,xmm0
+DB	15,58,204,193,2
+DB	15,56,200,215
+	pxor	xmm6,xmm4
+DB	15,56,201,252
+DB	15,56,202,245
+
+	movdqa	xmm1,xmm0
+DB	15,58,204,194,2
+DB	15,56,200,204
+	pxor	xmm7,xmm5
+DB	15,56,202,254
+DB	15,56,201,229
+	movdqa	xmm2,xmm0
+DB	15,58,204,193,2
+DB	15,56,200,213
+	pxor	xmm4,xmm6
+DB	15,56,201,238
+DB	15,56,202,231
+
+	movdqa	xmm1,xmm0
+DB	15,58,204,194,2
+DB	15,56,200,206
+	pxor	xmm5,xmm7
+DB	15,56,202,236
+DB	15,56,201,247
+	movdqa	xmm2,xmm0
+DB	15,58,204,193,2
+DB	15,56,200,215
+	pxor	xmm6,xmm4
+DB	15,56,201,252
+DB	15,56,202,245
+
+	movdqa	xmm1,xmm0
+DB	15,58,204,194,3
+DB	15,56,200,204
+	pxor	xmm7,xmm5
+DB	15,56,202,254
+	movdqu	xmm4,XMMWORD[rsi]
+	movdqa	xmm2,xmm0
+DB	15,58,204,193,3
+DB	15,56,200,213
+	movdqu	xmm5,XMMWORD[16+rsi]
+DB	102,15,56,0,227
+
+	movdqa	xmm1,xmm0
+DB	15,58,204,194,3
+DB	15,56,200,206
+	movdqu	xmm6,XMMWORD[32+rsi]
+DB	102,15,56,0,235
+
+	movdqa	xmm2,xmm0
+DB	15,58,204,193,3
+DB	15,56,200,215
+	movdqu	xmm7,XMMWORD[48+rsi]
+DB	102,15,56,0,243
+
+	movdqa	xmm1,xmm0
+DB	15,58,204,194,3
+DB	65,15,56,200,201
+DB	102,15,56,0,251
+
+	paddd	xmm0,xmm8
+	movdqa	xmm9,xmm1
+
+	jnz	NEAR $L$oop_shaext
+
+	pshufd	xmm0,xmm0,27
+	pshufd	xmm1,xmm1,27
+	movdqu	XMMWORD[rdi],xmm0
+	movd	DWORD[16+rdi],xmm1
+	movaps	xmm6,XMMWORD[((-8-64))+rax]
+	movaps	xmm7,XMMWORD[((-8-48))+rax]
+	movaps	xmm8,XMMWORD[((-8-32))+rax]
+	movaps	xmm9,XMMWORD[((-8-16))+rax]
+	mov	rsp,rax
+$L$epilogue_shaext:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_sha1_block_data_order_shaext:
+
+ALIGN	16
+sha1_block_data_order_ssse3:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_sha1_block_data_order_ssse3:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+_ssse3_shortcut:
+
+	mov	r11,rsp
+
+	push	rbx
+
+	push	rbp
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	lea	rsp,[((-160))+rsp]
+	movaps	XMMWORD[(-40-96)+r11],xmm6
+	movaps	XMMWORD[(-40-80)+r11],xmm7
+	movaps	XMMWORD[(-40-64)+r11],xmm8
+	movaps	XMMWORD[(-40-48)+r11],xmm9
+	movaps	XMMWORD[(-40-32)+r11],xmm10
+	movaps	XMMWORD[(-40-16)+r11],xmm11
+$L$prologue_ssse3:
+	and	rsp,-64
+	mov	r8,rdi
+	mov	r9,rsi
+	mov	r10,rdx
+
+	shl	r10,6
+	add	r10,r9
+	lea	r14,[((K_XX_XX+64))]
+
+	mov	eax,DWORD[r8]
+	mov	ebx,DWORD[4+r8]
+	mov	ecx,DWORD[8+r8]
+	mov	edx,DWORD[12+r8]
+	mov	esi,ebx
+	mov	ebp,DWORD[16+r8]
+	mov	edi,ecx
+	xor	edi,edx
+	and	esi,edi
+
+	movdqa	xmm6,XMMWORD[64+r14]
+	movdqa	xmm9,XMMWORD[((-64))+r14]
+	movdqu	xmm0,XMMWORD[r9]
+	movdqu	xmm1,XMMWORD[16+r9]
+	movdqu	xmm2,XMMWORD[32+r9]
+	movdqu	xmm3,XMMWORD[48+r9]
+DB	102,15,56,0,198
+DB	102,15,56,0,206
+DB	102,15,56,0,214
+	add	r9,64
+	paddd	xmm0,xmm9
+DB	102,15,56,0,222
+	paddd	xmm1,xmm9
+	paddd	xmm2,xmm9
+	movdqa	XMMWORD[rsp],xmm0
+	psubd	xmm0,xmm9
+	movdqa	XMMWORD[16+rsp],xmm1
+	psubd	xmm1,xmm9
+	movdqa	XMMWORD[32+rsp],xmm2
+	psubd	xmm2,xmm9
+	jmp	NEAR $L$oop_ssse3
+ALIGN	16
+$L$oop_ssse3:
+	ror	ebx,2
+	pshufd	xmm4,xmm0,238
+	xor	esi,edx
+	movdqa	xmm8,xmm3
+	paddd	xmm9,xmm3
+	mov	edi,eax
+	add	ebp,DWORD[rsp]
+	punpcklqdq	xmm4,xmm1
+	xor	ebx,ecx
+	rol	eax,5
+	add	ebp,esi
+	psrldq	xmm8,4
+	and	edi,ebx
+	xor	ebx,ecx
+	pxor	xmm4,xmm0
+	add	ebp,eax
+	ror	eax,7
+	pxor	xmm8,xmm2
+	xor	edi,ecx
+	mov	esi,ebp
+	add	edx,DWORD[4+rsp]
+	pxor	xmm4,xmm8
+	xor	eax,ebx
+	rol	ebp,5
+	movdqa	XMMWORD[48+rsp],xmm9
+	add	edx,edi
+	and	esi,eax
+	movdqa	xmm10,xmm4
+	xor	eax,ebx
+	add	edx,ebp
+	ror	ebp,7
+	movdqa	xmm8,xmm4
+	xor	esi,ebx
+	pslldq	xmm10,12
+	paddd	xmm4,xmm4
+	mov	edi,edx
+	add	ecx,DWORD[8+rsp]
+	psrld	xmm8,31
+	xor	ebp,eax
+	rol	edx,5
+	add	ecx,esi
+	movdqa	xmm9,xmm10
+	and	edi,ebp
+	xor	ebp,eax
+	psrld	xmm10,30
+	add	ecx,edx
+	ror	edx,7
+	por	xmm4,xmm8
+	xor	edi,eax
+	mov	esi,ecx
+	add	ebx,DWORD[12+rsp]
+	pslld	xmm9,2
+	pxor	xmm4,xmm10
+	xor	edx,ebp
+	movdqa	xmm10,XMMWORD[((-64))+r14]
+	rol	ecx,5
+	add	ebx,edi
+	and	esi,edx
+	pxor	xmm4,xmm9
+	xor	edx,ebp
+	add	ebx,ecx
+	ror	ecx,7
+	pshufd	xmm5,xmm1,238
+	xor	esi,ebp
+	movdqa	xmm9,xmm4
+	paddd	xmm10,xmm4
+	mov	edi,ebx
+	add	eax,DWORD[16+rsp]
+	punpcklqdq	xmm5,xmm2
+	xor	ecx,edx
+	rol	ebx,5
+	add	eax,esi
+	psrldq	xmm9,4
+	and	edi,ecx
+	xor	ecx,edx
+	pxor	xmm5,xmm1
+	add	eax,ebx
+	ror	ebx,7
+	pxor	xmm9,xmm3
+	xor	edi,edx
+	mov	esi,eax
+	add	ebp,DWORD[20+rsp]
+	pxor	xmm5,xmm9
+	xor	ebx,ecx
+	rol	eax,5
+	movdqa	XMMWORD[rsp],xmm10
+	add	ebp,edi
+	and	esi,ebx
+	movdqa	xmm8,xmm5
+	xor	ebx,ecx
+	add	ebp,eax
+	ror	eax,7
+	movdqa	xmm9,xmm5
+	xor	esi,ecx
+	pslldq	xmm8,12
+	paddd	xmm5,xmm5
+	mov	edi,ebp
+	add	edx,DWORD[24+rsp]
+	psrld	xmm9,31
+	xor	eax,ebx
+	rol	ebp,5
+	add	edx,esi
+	movdqa	xmm10,xmm8
+	and	edi,eax
+	xor	eax,ebx
+	psrld	xmm8,30
+	add	edx,ebp
+	ror	ebp,7
+	por	xmm5,xmm9
+	xor	edi,ebx
+	mov	esi,edx
+	add	ecx,DWORD[28+rsp]
+	pslld	xmm10,2
+	pxor	xmm5,xmm8
+	xor	ebp,eax
+	movdqa	xmm8,XMMWORD[((-32))+r14]
+	rol	edx,5
+	add	ecx,edi
+	and	esi,ebp
+	pxor	xmm5,xmm10
+	xor	ebp,eax
+	add	ecx,edx
+	ror	edx,7
+	pshufd	xmm6,xmm2,238
+	xor	esi,eax
+	movdqa	xmm10,xmm5
+	paddd	xmm8,xmm5
+	mov	edi,ecx
+	add	ebx,DWORD[32+rsp]
+	punpcklqdq	xmm6,xmm3
+	xor	edx,ebp
+	rol	ecx,5
+	add	ebx,esi
+	psrldq	xmm10,4
+	and	edi,edx
+	xor	edx,ebp
+	pxor	xmm6,xmm2
+	add	ebx,ecx
+	ror	ecx,7
+	pxor	xmm10,xmm4
+	xor	edi,ebp
+	mov	esi,ebx
+	add	eax,DWORD[36+rsp]
+	pxor	xmm6,xmm10
+	xor	ecx,edx
+	rol	ebx,5
+	movdqa	XMMWORD[16+rsp],xmm8
+	add	eax,edi
+	and	esi,ecx
+	movdqa	xmm9,xmm6
+	xor	ecx,edx
+	add	eax,ebx
+	ror	ebx,7
+	movdqa	xmm10,xmm6
+	xor	esi,edx
+	pslldq	xmm9,12
+	paddd	xmm6,xmm6
+	mov	edi,eax
+	add	ebp,DWORD[40+rsp]
+	psrld	xmm10,31
+	xor	ebx,ecx
+	rol	eax,5
+	add	ebp,esi
+	movdqa	xmm8,xmm9
+	and	edi,ebx
+	xor	ebx,ecx
+	psrld	xmm9,30
+	add	ebp,eax
+	ror	eax,7
+	por	xmm6,xmm10
+	xor	edi,ecx
+	mov	esi,ebp
+	add	edx,DWORD[44+rsp]
+	pslld	xmm8,2
+	pxor	xmm6,xmm9
+	xor	eax,ebx
+	movdqa	xmm9,XMMWORD[((-32))+r14]
+	rol	ebp,5
+	add	edx,edi
+	and	esi,eax
+	pxor	xmm6,xmm8
+	xor	eax,ebx
+	add	edx,ebp
+	ror	ebp,7
+	pshufd	xmm7,xmm3,238
+	xor	esi,ebx
+	movdqa	xmm8,xmm6
+	paddd	xmm9,xmm6
+	mov	edi,edx
+	add	ecx,DWORD[48+rsp]
+	punpcklqdq	xmm7,xmm4
+	xor	ebp,eax
+	rol	edx,5
+	add	ecx,esi
+	psrldq	xmm8,4
+	and	edi,ebp
+	xor	ebp,eax
+	pxor	xmm7,xmm3
+	add	ecx,edx
+	ror	edx,7
+	pxor	xmm8,xmm5
+	xor	edi,eax
+	mov	esi,ecx
+	add	ebx,DWORD[52+rsp]
+	pxor	xmm7,xmm8
+	xor	edx,ebp
+	rol	ecx,5
+	movdqa	XMMWORD[32+rsp],xmm9
+	add	ebx,edi
+	and	esi,edx
+	movdqa	xmm10,xmm7
+	xor	edx,ebp
+	add	ebx,ecx
+	ror	ecx,7
+	movdqa	xmm8,xmm7
+	xor	esi,ebp
+	pslldq	xmm10,12
+	paddd	xmm7,xmm7
+	mov	edi,ebx
+	add	eax,DWORD[56+rsp]
+	psrld	xmm8,31
+	xor	ecx,edx
+	rol	ebx,5
+	add	eax,esi
+	movdqa	xmm9,xmm10
+	and	edi,ecx
+	xor	ecx,edx
+	psrld	xmm10,30
+	add	eax,ebx
+	ror	ebx,7
+	por	xmm7,xmm8
+	xor	edi,edx
+	mov	esi,eax
+	add	ebp,DWORD[60+rsp]
+	pslld	xmm9,2
+	pxor	xmm7,xmm10
+	xor	ebx,ecx
+	movdqa	xmm10,XMMWORD[((-32))+r14]
+	rol	eax,5
+	add	ebp,edi
+	and	esi,ebx
+	pxor	xmm7,xmm9
+	pshufd	xmm9,xmm6,238
+	xor	ebx,ecx
+	add	ebp,eax
+	ror	eax,7
+	pxor	xmm0,xmm4
+	xor	esi,ecx
+	mov	edi,ebp
+	add	edx,DWORD[rsp]
+	punpcklqdq	xmm9,xmm7
+	xor	eax,ebx
+	rol	ebp,5
+	pxor	xmm0,xmm1
+	add	edx,esi
+	and	edi,eax
+	movdqa	xmm8,xmm10
+	xor	eax,ebx
+	paddd	xmm10,xmm7
+	add	edx,ebp
+	pxor	xmm0,xmm9
+	ror	ebp,7
+	xor	edi,ebx
+	mov	esi,edx
+	add	ecx,DWORD[4+rsp]
+	movdqa	xmm9,xmm0
+	xor	ebp,eax
+	rol	edx,5
+	movdqa	XMMWORD[48+rsp],xmm10
+	add	ecx,edi
+	and	esi,ebp
+	xor	ebp,eax
+	pslld	xmm0,2
+	add	ecx,edx
+	ror	edx,7
+	psrld	xmm9,30
+	xor	esi,eax
+	mov	edi,ecx
+	add	ebx,DWORD[8+rsp]
+	por	xmm0,xmm9
+	xor	edx,ebp
+	rol	ecx,5
+	pshufd	xmm10,xmm7,238
+	add	ebx,esi
+	and	edi,edx
+	xor	edx,ebp
+	add	ebx,ecx
+	add	eax,DWORD[12+rsp]
+	xor	edi,ebp
+	mov	esi,ebx
+	rol	ebx,5
+	add	eax,edi
+	xor	esi,edx
+	ror	ecx,7
+	add	eax,ebx
+	pxor	xmm1,xmm5
+	add	ebp,DWORD[16+rsp]
+	xor	esi,ecx
+	punpcklqdq	xmm10,xmm0
+	mov	edi,eax
+	rol	eax,5
+	pxor	xmm1,xmm2
+	add	ebp,esi
+	xor	edi,ecx
+	movdqa	xmm9,xmm8
+	ror	ebx,7
+	paddd	xmm8,xmm0
+	add	ebp,eax
+	pxor	xmm1,xmm10
+	add	edx,DWORD[20+rsp]
+	xor	edi,ebx
+	mov	esi,ebp
+	rol	ebp,5
+	movdqa	xmm10,xmm1
+	add	edx,edi
+	xor	esi,ebx
+	movdqa	XMMWORD[rsp],xmm8
+	ror	eax,7
+	add	edx,ebp
+	add	ecx,DWORD[24+rsp]
+	pslld	xmm1,2
+	xor	esi,eax
+	mov	edi,edx
+	psrld	xmm10,30
+	rol	edx,5
+	add	ecx,esi
+	xor	edi,eax
+	ror	ebp,7
+	por	xmm1,xmm10
+	add	ecx,edx
+	add	ebx,DWORD[28+rsp]
+	pshufd	xmm8,xmm0,238
+	xor	edi,ebp
+	mov	esi,ecx
+	rol	ecx,5
+	add	ebx,edi
+	xor	esi,ebp
+	ror	edx,7
+	add	ebx,ecx
+	pxor	xmm2,xmm6
+	add	eax,DWORD[32+rsp]
+	xor	esi,edx
+	punpcklqdq	xmm8,xmm1
+	mov	edi,ebx
+	rol	ebx,5
+	pxor	xmm2,xmm3
+	add	eax,esi
+	xor	edi,edx
+	movdqa	xmm10,XMMWORD[r14]
+	ror	ecx,7
+	paddd	xmm9,xmm1
+	add	eax,ebx
+	pxor	xmm2,xmm8
+	add	ebp,DWORD[36+rsp]
+	xor	edi,ecx
+	mov	esi,eax
+	rol	eax,5
+	movdqa	xmm8,xmm2
+	add	ebp,edi
+	xor	esi,ecx
+	movdqa	XMMWORD[16+rsp],xmm9
+	ror	ebx,7
+	add	ebp,eax
+	add	edx,DWORD[40+rsp]
+	pslld	xmm2,2
+	xor	esi,ebx
+	mov	edi,ebp
+	psrld	xmm8,30
+	rol	ebp,5
+	add	edx,esi
+	xor	edi,ebx
+	ror	eax,7
+	por	xmm2,xmm8
+	add	edx,ebp
+	add	ecx,DWORD[44+rsp]
+	pshufd	xmm9,xmm1,238
+	xor	edi,eax
+	mov	esi,edx
+	rol	edx,5
+	add	ecx,edi
+	xor	esi,eax
+	ror	ebp,7
+	add	ecx,edx
+	pxor	xmm3,xmm7
+	add	ebx,DWORD[48+rsp]
+	xor	esi,ebp
+	punpcklqdq	xmm9,xmm2
+	mov	edi,ecx
+	rol	ecx,5
+	pxor	xmm3,xmm4
+	add	ebx,esi
+	xor	edi,ebp
+	movdqa	xmm8,xmm10
+	ror	edx,7
+	paddd	xmm10,xmm2
+	add	ebx,ecx
+	pxor	xmm3,xmm9
+	add	eax,DWORD[52+rsp]
+	xor	edi,edx
+	mov	esi,ebx
+	rol	ebx,5
+	movdqa	xmm9,xmm3
+	add	eax,edi
+	xor	esi,edx
+	movdqa	XMMWORD[32+rsp],xmm10
+	ror	ecx,7
+	add	eax,ebx
+	add	ebp,DWORD[56+rsp]
+	pslld	xmm3,2
+	xor	esi,ecx
+	mov	edi,eax
+	psrld	xmm9,30
+	rol	eax,5
+	add	ebp,esi
+	xor	edi,ecx
+	ror	ebx,7
+	por	xmm3,xmm9
+	add	ebp,eax
+	add	edx,DWORD[60+rsp]
+	pshufd	xmm10,xmm2,238
+	xor	edi,ebx
+	mov	esi,ebp
+	rol	ebp,5
+	add	edx,edi
+	xor	esi,ebx
+	ror	eax,7
+	add	edx,ebp
+	pxor	xmm4,xmm0
+	add	ecx,DWORD[rsp]
+	xor	esi,eax
+	punpcklqdq	xmm10,xmm3
+	mov	edi,edx
+	rol	edx,5
+	pxor	xmm4,xmm5
+	add	ecx,esi
+	xor	edi,eax
+	movdqa	xmm9,xmm8
+	ror	ebp,7
+	paddd	xmm8,xmm3
+	add	ecx,edx
+	pxor	xmm4,xmm10
+	add	ebx,DWORD[4+rsp]
+	xor	edi,ebp
+	mov	esi,ecx
+	rol	ecx,5
+	movdqa	xmm10,xmm4
+	add	ebx,edi
+	xor	esi,ebp
+	movdqa	XMMWORD[48+rsp],xmm8
+	ror	edx,7
+	add	ebx,ecx
+	add	eax,DWORD[8+rsp]
+	pslld	xmm4,2
+	xor	esi,edx
+	mov	edi,ebx
+	psrld	xmm10,30
+	rol	ebx,5
+	add	eax,esi
+	xor	edi,edx
+	ror	ecx,7
+	por	xmm4,xmm10
+	add	eax,ebx
+	add	ebp,DWORD[12+rsp]
+	pshufd	xmm8,xmm3,238
+	xor	edi,ecx
+	mov	esi,eax
+	rol	eax,5
+	add	ebp,edi
+	xor	esi,ecx
+	ror	ebx,7
+	add	ebp,eax
+	pxor	xmm5,xmm1
+	add	edx,DWORD[16+rsp]
+	xor	esi,ebx
+	punpcklqdq	xmm8,xmm4
+	mov	edi,ebp
+	rol	ebp,5
+	pxor	xmm5,xmm6
+	add	edx,esi
+	xor	edi,ebx
+	movdqa	xmm10,xmm9
+	ror	eax,7
+	paddd	xmm9,xmm4
+	add	edx,ebp
+	pxor	xmm5,xmm8
+	add	ecx,DWORD[20+rsp]
+	xor	edi,eax
+	mov	esi,edx
+	rol	edx,5
+	movdqa	xmm8,xmm5
+	add	ecx,edi
+	xor	esi,eax
+	movdqa	XMMWORD[rsp],xmm9
+	ror	ebp,7
+	add	ecx,edx
+	add	ebx,DWORD[24+rsp]
+	pslld	xmm5,2
+	xor	esi,ebp
+	mov	edi,ecx
+	psrld	xmm8,30
+	rol	ecx,5
+	add	ebx,esi
+	xor	edi,ebp
+	ror	edx,7
+	por	xmm5,xmm8
+	add	ebx,ecx
+	add	eax,DWORD[28+rsp]
+	pshufd	xmm9,xmm4,238
+	ror	ecx,7
+	mov	esi,ebx
+	xor	edi,edx
+	rol	ebx,5
+	add	eax,edi
+	xor	esi,ecx
+	xor	ecx,edx
+	add	eax,ebx
+	pxor	xmm6,xmm2
+	add	ebp,DWORD[32+rsp]
+	and	esi,ecx
+	xor	ecx,edx
+	ror	ebx,7
+	punpcklqdq	xmm9,xmm5
+	mov	edi,eax
+	xor	esi,ecx
+	pxor	xmm6,xmm7
+	rol	eax,5
+	add	ebp,esi
+	movdqa	xmm8,xmm10
+	xor	edi,ebx
+	paddd	xmm10,xmm5
+	xor	ebx,ecx
+	pxor	xmm6,xmm9
+	add	ebp,eax
+	add	edx,DWORD[36+rsp]
+	and	edi,ebx
+	xor	ebx,ecx
+	ror	eax,7
+	movdqa	xmm9,xmm6
+	mov	esi,ebp
+	xor	edi,ebx
+	movdqa	XMMWORD[16+rsp],xmm10
+	rol	ebp,5
+	add	edx,edi
+	xor	esi,eax
+	pslld	xmm6,2
+	xor	eax,ebx
+	add	edx,ebp
+	psrld	xmm9,30
+	add	ecx,DWORD[40+rsp]
+	and	esi,eax
+	xor	eax,ebx
+	por	xmm6,xmm9
+	ror	ebp,7
+	mov	edi,edx
+	xor	esi,eax
+	rol	edx,5
+	pshufd	xmm10,xmm5,238
+	add	ecx,esi
+	xor	edi,ebp
+	xor	ebp,eax
+	add	ecx,edx
+	add	ebx,DWORD[44+rsp]
+	and	edi,ebp
+	xor	ebp,eax
+	ror	edx,7
+	mov	esi,ecx
+	xor	edi,ebp
+	rol	ecx,5
+	add	ebx,edi
+	xor	esi,edx
+	xor	edx,ebp
+	add	ebx,ecx
+	pxor	xmm7,xmm3
+	add	eax,DWORD[48+rsp]
+	and	esi,edx
+	xor	edx,ebp
+	ror	ecx,7
+	punpcklqdq	xmm10,xmm6
+	mov	edi,ebx
+	xor	esi,edx
+	pxor	xmm7,xmm0
+	rol	ebx,5
+	add	eax,esi
+	movdqa	xmm9,XMMWORD[32+r14]
+	xor	edi,ecx
+	paddd	xmm8,xmm6
+	xor	ecx,edx
+	pxor	xmm7,xmm10
+	add	eax,ebx
+	add	ebp,DWORD[52+rsp]
+	and	edi,ecx
+	xor	ecx,edx
+	ror	ebx,7
+	movdqa	xmm10,xmm7
+	mov	esi,eax
+	xor	edi,ecx
+	movdqa	XMMWORD[32+rsp],xmm8
+	rol	eax,5
+	add	ebp,edi
+	xor	esi,ebx
+	pslld	xmm7,2
+	xor	ebx,ecx
+	add	ebp,eax
+	psrld	xmm10,30
+	add	edx,DWORD[56+rsp]
+	and	esi,ebx
+	xor	ebx,ecx
+	por	xmm7,xmm10
+	ror	eax,7
+	mov	edi,ebp
+	xor	esi,ebx
+	rol	ebp,5
+	pshufd	xmm8,xmm6,238
+	add	edx,esi
+	xor	edi,eax
+	xor	eax,ebx
+	add	edx,ebp
+	add	ecx,DWORD[60+rsp]
+	and	edi,eax
+	xor	eax,ebx
+	ror	ebp,7
+	mov	esi,edx
+	xor	edi,eax
+	rol	edx,5
+	add	ecx,edi
+	xor	esi,ebp
+	xor	ebp,eax
+	add	ecx,edx
+	pxor	xmm0,xmm4
+	add	ebx,DWORD[rsp]
+	and	esi,ebp
+	xor	ebp,eax
+	ror	edx,7
+	punpcklqdq	xmm8,xmm7
+	mov	edi,ecx
+	xor	esi,ebp
+	pxor	xmm0,xmm1
+	rol	ecx,5
+	add	ebx,esi
+	movdqa	xmm10,xmm9
+	xor	edi,edx
+	paddd	xmm9,xmm7
+	xor	edx,ebp
+	pxor	xmm0,xmm8
+	add	ebx,ecx
+	add	eax,DWORD[4+rsp]
+	and	edi,edx
+	xor	edx,ebp
+	ror	ecx,7
+	movdqa	xmm8,xmm0
+	mov	esi,ebx
+	xor	edi,edx
+	movdqa	XMMWORD[48+rsp],xmm9
+	rol	ebx,5
+	add	eax,edi
+	xor	esi,ecx
+	pslld	xmm0,2
+	xor	ecx,edx
+	add	eax,ebx
+	psrld	xmm8,30
+	add	ebp,DWORD[8+rsp]
+	and	esi,ecx
+	xor	ecx,edx
+	por	xmm0,xmm8
+	ror	ebx,7
+	mov	edi,eax
+	xor	esi,ecx
+	rol	eax,5
+	pshufd	xmm9,xmm7,238
+	add	ebp,esi
+	xor	edi,ebx
+	xor	ebx,ecx
+	add	ebp,eax
+	add	edx,DWORD[12+rsp]
+	and	edi,ebx
+	xor	ebx,ecx
+	ror	eax,7
+	mov	esi,ebp
+	xor	edi,ebx
+	rol	ebp,5
+	add	edx,edi
+	xor	esi,eax
+	xor	eax,ebx
+	add	edx,ebp
+	pxor	xmm1,xmm5
+	add	ecx,DWORD[16+rsp]
+	and	esi,eax
+	xor	eax,ebx
+	ror	ebp,7
+	punpcklqdq	xmm9,xmm0
+	mov	edi,edx
+	xor	esi,eax
+	pxor	xmm1,xmm2
+	rol	edx,5
+	add	ecx,esi
+	movdqa	xmm8,xmm10
+	xor	edi,ebp
+	paddd	xmm10,xmm0
+	xor	ebp,eax
+	pxor	xmm1,xmm9
+	add	ecx,edx
+	add	ebx,DWORD[20+rsp]
+	and	edi,ebp
+	xor	ebp,eax
+	ror	edx,7
+	movdqa	xmm9,xmm1
+	mov	esi,ecx
+	xor	edi,ebp
+	movdqa	XMMWORD[rsp],xmm10
+	rol	ecx,5
+	add	ebx,edi
+	xor	esi,edx
+	pslld	xmm1,2
+	xor	edx,ebp
+	add	ebx,ecx
+	psrld	xmm9,30
+	add	eax,DWORD[24+rsp]
+	and	esi,edx
+	xor	edx,ebp
+	por	xmm1,xmm9
+	ror	ecx,7
+	mov	edi,ebx
+	xor	esi,edx
+	rol	ebx,5
+	pshufd	xmm10,xmm0,238
+	add	eax,esi
+	xor	edi,ecx
+	xor	ecx,edx
+	add	eax,ebx
+	add	ebp,DWORD[28+rsp]
+	and	edi,ecx
+	xor	ecx,edx
+	ror	ebx,7
+	mov	esi,eax
+	xor	edi,ecx
+	rol	eax,5
+	add	ebp,edi
+	xor	esi,ebx
+	xor	ebx,ecx
+	add	ebp,eax
+	pxor	xmm2,xmm6
+	add	edx,DWORD[32+rsp]
+	and	esi,ebx
+	xor	ebx,ecx
+	ror	eax,7
+	punpcklqdq	xmm10,xmm1
+	mov	edi,ebp
+	xor	esi,ebx
+	pxor	xmm2,xmm3
+	rol	ebp,5
+	add	edx,esi
+	movdqa	xmm9,xmm8
+	xor	edi,eax
+	paddd	xmm8,xmm1
+	xor	eax,ebx
+	pxor	xmm2,xmm10
+	add	edx,ebp
+	add	ecx,DWORD[36+rsp]
+	and	edi,eax
+	xor	eax,ebx
+	ror	ebp,7
+	movdqa	xmm10,xmm2
+	mov	esi,edx
+	xor	edi,eax
+	movdqa	XMMWORD[16+rsp],xmm8
+	rol	edx,5
+	add	ecx,edi
+	xor	esi,ebp
+	pslld	xmm2,2
+	xor	ebp,eax
+	add	ecx,edx
+	psrld	xmm10,30
+	add	ebx,DWORD[40+rsp]
+	and	esi,ebp
+	xor	ebp,eax
+	por	xmm2,xmm10
+	ror	edx,7
+	mov	edi,ecx
+	xor	esi,ebp
+	rol	ecx,5
+	pshufd	xmm8,xmm1,238
+	add	ebx,esi
+	xor	edi,edx
+	xor	edx,ebp
+	add	ebx,ecx
+	add	eax,DWORD[44+rsp]
+	and	edi,edx
+	xor	edx,ebp
+	ror	ecx,7
+	mov	esi,ebx
+	xor	edi,edx
+	rol	ebx,5
+	add	eax,edi
+	xor	esi,edx
+	add	eax,ebx
+	pxor	xmm3,xmm7
+	add	ebp,DWORD[48+rsp]
+	xor	esi,ecx
+	punpcklqdq	xmm8,xmm2
+	mov	edi,eax
+	rol	eax,5
+	pxor	xmm3,xmm4
+	add	ebp,esi
+	xor	edi,ecx
+	movdqa	xmm10,xmm9
+	ror	ebx,7
+	paddd	xmm9,xmm2
+	add	ebp,eax
+	pxor	xmm3,xmm8
+	add	edx,DWORD[52+rsp]
+	xor	edi,ebx
+	mov	esi,ebp
+	rol	ebp,5
+	movdqa	xmm8,xmm3
+	add	edx,edi
+	xor	esi,ebx
+	movdqa	XMMWORD[32+rsp],xmm9
+	ror	eax,7
+	add	edx,ebp
+	add	ecx,DWORD[56+rsp]
+	pslld	xmm3,2
+	xor	esi,eax
+	mov	edi,edx
+	psrld	xmm8,30
+	rol	edx,5
+	add	ecx,esi
+	xor	edi,eax
+	ror	ebp,7
+	por	xmm3,xmm8
+	add	ecx,edx
+	add	ebx,DWORD[60+rsp]
+	xor	edi,ebp
+	mov	esi,ecx
+	rol	ecx,5
+	add	ebx,edi
+	xor	esi,ebp
+	ror	edx,7
+	add	ebx,ecx
+	add	eax,DWORD[rsp]
+	xor	esi,edx
+	mov	edi,ebx
+	rol	ebx,5
+	paddd	xmm10,xmm3
+	add	eax,esi
+	xor	edi,edx
+	movdqa	XMMWORD[48+rsp],xmm10
+	ror	ecx,7
+	add	eax,ebx
+	add	ebp,DWORD[4+rsp]
+	xor	edi,ecx
+	mov	esi,eax
+	rol	eax,5
+	add	ebp,edi
+	xor	esi,ecx
+	ror	ebx,7
+	add	ebp,eax
+	add	edx,DWORD[8+rsp]
+	xor	esi,ebx
+	mov	edi,ebp
+	rol	ebp,5
+	add	edx,esi
+	xor	edi,ebx
+	ror	eax,7
+	add	edx,ebp
+	add	ecx,DWORD[12+rsp]
+	xor	edi,eax
+	mov	esi,edx
+	rol	edx,5
+	add	ecx,edi
+	xor	esi,eax
+	ror	ebp,7
+	add	ecx,edx
+	cmp	r9,r10
+	je	NEAR $L$done_ssse3
+	movdqa	xmm6,XMMWORD[64+r14]
+	movdqa	xmm9,XMMWORD[((-64))+r14]
+	movdqu	xmm0,XMMWORD[r9]
+	movdqu	xmm1,XMMWORD[16+r9]
+	movdqu	xmm2,XMMWORD[32+r9]
+	movdqu	xmm3,XMMWORD[48+r9]
+DB	102,15,56,0,198
+	add	r9,64
+	add	ebx,DWORD[16+rsp]
+	xor	esi,ebp
+	mov	edi,ecx
+DB	102,15,56,0,206
+	rol	ecx,5
+	add	ebx,esi
+	xor	edi,ebp
+	ror	edx,7
+	paddd	xmm0,xmm9
+	add	ebx,ecx
+	add	eax,DWORD[20+rsp]
+	xor	edi,edx
+	mov	esi,ebx
+	movdqa	XMMWORD[rsp],xmm0
+	rol	ebx,5
+	add	eax,edi
+	xor	esi,edx
+	ror	ecx,7
+	psubd	xmm0,xmm9
+	add	eax,ebx
+	add	ebp,DWORD[24+rsp]
+	xor	esi,ecx
+	mov	edi,eax
+	rol	eax,5
+	add	ebp,esi
+	xor	edi,ecx
+	ror	ebx,7
+	add	ebp,eax
+	add	edx,DWORD[28+rsp]
+	xor	edi,ebx
+	mov	esi,ebp
+	rol	ebp,5
+	add	edx,edi
+	xor	esi,ebx
+	ror	eax,7
+	add	edx,ebp
+	add	ecx,DWORD[32+rsp]
+	xor	esi,eax
+	mov	edi,edx
+DB	102,15,56,0,214
+	rol	edx,5
+	add	ecx,esi
+	xor	edi,eax
+	ror	ebp,7
+	paddd	xmm1,xmm9
+	add	ecx,edx
+	add	ebx,DWORD[36+rsp]
+	xor	edi,ebp
+	mov	esi,ecx
+	movdqa	XMMWORD[16+rsp],xmm1
+	rol	ecx,5
+	add	ebx,edi
+	xor	esi,ebp
+	ror	edx,7
+	psubd	xmm1,xmm9
+	add	ebx,ecx
+	add	eax,DWORD[40+rsp]
+	xor	esi,edx
+	mov	edi,ebx
+	rol	ebx,5
+	add	eax,esi
+	xor	edi,edx
+	ror	ecx,7
+	add	eax,ebx
+	add	ebp,DWORD[44+rsp]
+	xor	edi,ecx
+	mov	esi,eax
+	rol	eax,5
+	add	ebp,edi
+	xor	esi,ecx
+	ror	ebx,7
+	add	ebp,eax
+	add	edx,DWORD[48+rsp]
+	xor	esi,ebx
+	mov	edi,ebp
+DB	102,15,56,0,222
+	rol	ebp,5
+	add	edx,esi
+	xor	edi,ebx
+	ror	eax,7
+	paddd	xmm2,xmm9
+	add	edx,ebp
+	add	ecx,DWORD[52+rsp]
+	xor	edi,eax
+	mov	esi,edx
+	movdqa	XMMWORD[32+rsp],xmm2
+	rol	edx,5
+	add	ecx,edi
+	xor	esi,eax
+	ror	ebp,7
+	psubd	xmm2,xmm9
+	add	ecx,edx
+	add	ebx,DWORD[56+rsp]
+	xor	esi,ebp
+	mov	edi,ecx
+	rol	ecx,5
+	add	ebx,esi
+	xor	edi,ebp
+	ror	edx,7
+	add	ebx,ecx
+	add	eax,DWORD[60+rsp]
+	xor	edi,edx
+	mov	esi,ebx
+	rol	ebx,5
+	add	eax,edi
+	ror	ecx,7
+	add	eax,ebx
+	add	eax,DWORD[r8]
+	add	esi,DWORD[4+r8]
+	add	ecx,DWORD[8+r8]
+	add	edx,DWORD[12+r8]
+	mov	DWORD[r8],eax
+	add	ebp,DWORD[16+r8]
+	mov	DWORD[4+r8],esi
+	mov	ebx,esi
+	mov	DWORD[8+r8],ecx
+	mov	edi,ecx
+	mov	DWORD[12+r8],edx
+	xor	edi,edx
+	mov	DWORD[16+r8],ebp
+	and	esi,edi
+	jmp	NEAR $L$oop_ssse3
+
+ALIGN	16
+$L$done_ssse3:
+	add	ebx,DWORD[16+rsp]
+	xor	esi,ebp
+	mov	edi,ecx
+	rol	ecx,5
+	add	ebx,esi
+	xor	edi,ebp
+	ror	edx,7
+	add	ebx,ecx
+	add	eax,DWORD[20+rsp]
+	xor	edi,edx
+	mov	esi,ebx
+	rol	ebx,5
+	add	eax,edi
+	xor	esi,edx
+	ror	ecx,7
+	add	eax,ebx
+	add	ebp,DWORD[24+rsp]
+	xor	esi,ecx
+	mov	edi,eax
+	rol	eax,5
+	add	ebp,esi
+	xor	edi,ecx
+	ror	ebx,7
+	add	ebp,eax
+	add	edx,DWORD[28+rsp]
+	xor	edi,ebx
+	mov	esi,ebp
+	rol	ebp,5
+	add	edx,edi
+	xor	esi,ebx
+	ror	eax,7
+	add	edx,ebp
+	add	ecx,DWORD[32+rsp]
+	xor	esi,eax
+	mov	edi,edx
+	rol	edx,5
+	add	ecx,esi
+	xor	edi,eax
+	ror	ebp,7
+	add	ecx,edx
+	add	ebx,DWORD[36+rsp]
+	xor	edi,ebp
+	mov	esi,ecx
+	rol	ecx,5
+	add	ebx,edi
+	xor	esi,ebp
+	ror	edx,7
+	add	ebx,ecx
+	add	eax,DWORD[40+rsp]
+	xor	esi,edx
+	mov	edi,ebx
+	rol	ebx,5
+	add	eax,esi
+	xor	edi,edx
+	ror	ecx,7
+	add	eax,ebx
+	add	ebp,DWORD[44+rsp]
+	xor	edi,ecx
+	mov	esi,eax
+	rol	eax,5
+	add	ebp,edi
+	xor	esi,ecx
+	ror	ebx,7
+	add	ebp,eax
+	add	edx,DWORD[48+rsp]
+	xor	esi,ebx
+	mov	edi,ebp
+	rol	ebp,5
+	add	edx,esi
+	xor	edi,ebx
+	ror	eax,7
+	add	edx,ebp
+	add	ecx,DWORD[52+rsp]
+	xor	edi,eax
+	mov	esi,edx
+	rol	edx,5
+	add	ecx,edi
+	xor	esi,eax
+	ror	ebp,7
+	add	ecx,edx
+	add	ebx,DWORD[56+rsp]
+	xor	esi,ebp
+	mov	edi,ecx
+	rol	ecx,5
+	add	ebx,esi
+	xor	edi,ebp
+	ror	edx,7
+	add	ebx,ecx
+	add	eax,DWORD[60+rsp]
+	xor	edi,edx
+	mov	esi,ebx
+	rol	ebx,5
+	add	eax,edi
+	ror	ecx,7
+	add	eax,ebx
+	add	eax,DWORD[r8]
+	add	esi,DWORD[4+r8]
+	add	ecx,DWORD[8+r8]
+	mov	DWORD[r8],eax
+	add	edx,DWORD[12+r8]
+	mov	DWORD[4+r8],esi
+	add	ebp,DWORD[16+r8]
+	mov	DWORD[8+r8],ecx
+	mov	DWORD[12+r8],edx
+	mov	DWORD[16+r8],ebp
+	movaps	xmm6,XMMWORD[((-40-96))+r11]
+	movaps	xmm7,XMMWORD[((-40-80))+r11]
+	movaps	xmm8,XMMWORD[((-40-64))+r11]
+	movaps	xmm9,XMMWORD[((-40-48))+r11]
+	movaps	xmm10,XMMWORD[((-40-32))+r11]
+	movaps	xmm11,XMMWORD[((-40-16))+r11]
+	mov	r14,QWORD[((-40))+r11]
+
+	mov	r13,QWORD[((-32))+r11]
+
+	mov	r12,QWORD[((-24))+r11]
+
+	mov	rbp,QWORD[((-16))+r11]
+
+	mov	rbx,QWORD[((-8))+r11]
+
+	lea	rsp,[r11]
+
+$L$epilogue_ssse3:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_sha1_block_data_order_ssse3:
+
+ALIGN	16
+sha1_block_data_order_avx:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_sha1_block_data_order_avx:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+_avx_shortcut:
+
+	mov	r11,rsp
+
+	push	rbx
+
+	push	rbp
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	lea	rsp,[((-160))+rsp]
+	vzeroupper
+	vmovaps	XMMWORD[(-40-96)+r11],xmm6
+	vmovaps	XMMWORD[(-40-80)+r11],xmm7
+	vmovaps	XMMWORD[(-40-64)+r11],xmm8
+	vmovaps	XMMWORD[(-40-48)+r11],xmm9
+	vmovaps	XMMWORD[(-40-32)+r11],xmm10
+	vmovaps	XMMWORD[(-40-16)+r11],xmm11
+$L$prologue_avx:
+	and	rsp,-64
+	mov	r8,rdi
+	mov	r9,rsi
+	mov	r10,rdx
+
+	shl	r10,6
+	add	r10,r9
+	lea	r14,[((K_XX_XX+64))]
+
+	mov	eax,DWORD[r8]
+	mov	ebx,DWORD[4+r8]
+	mov	ecx,DWORD[8+r8]
+	mov	edx,DWORD[12+r8]
+	mov	esi,ebx
+	mov	ebp,DWORD[16+r8]
+	mov	edi,ecx
+	xor	edi,edx
+	and	esi,edi
+
+	vmovdqa	xmm6,XMMWORD[64+r14]
+	vmovdqa	xmm11,XMMWORD[((-64))+r14]
+	vmovdqu	xmm0,XMMWORD[r9]
+	vmovdqu	xmm1,XMMWORD[16+r9]
+	vmovdqu	xmm2,XMMWORD[32+r9]
+	vmovdqu	xmm3,XMMWORD[48+r9]
+	vpshufb	xmm0,xmm0,xmm6
+	add	r9,64
+	vpshufb	xmm1,xmm1,xmm6
+	vpshufb	xmm2,xmm2,xmm6
+	vpshufb	xmm3,xmm3,xmm6
+	vpaddd	xmm4,xmm0,xmm11
+	vpaddd	xmm5,xmm1,xmm11
+	vpaddd	xmm6,xmm2,xmm11
+	vmovdqa	XMMWORD[rsp],xmm4
+	vmovdqa	XMMWORD[16+rsp],xmm5
+	vmovdqa	XMMWORD[32+rsp],xmm6
+	jmp	NEAR $L$oop_avx
+ALIGN	16
+$L$oop_avx:
+	shrd	ebx,ebx,2
+	xor	esi,edx
+	vpalignr	xmm4,xmm1,xmm0,8
+	mov	edi,eax
+	add	ebp,DWORD[rsp]
+	vpaddd	xmm9,xmm11,xmm3
+	xor	ebx,ecx
+	shld	eax,eax,5
+	vpsrldq	xmm8,xmm3,4
+	add	ebp,esi
+	and	edi,ebx
+	vpxor	xmm4,xmm4,xmm0
+	xor	ebx,ecx
+	add	ebp,eax
+	vpxor	xmm8,xmm8,xmm2
+	shrd	eax,eax,7
+	xor	edi,ecx
+	mov	esi,ebp
+	add	edx,DWORD[4+rsp]
+	vpxor	xmm4,xmm4,xmm8
+	xor	eax,ebx
+	shld	ebp,ebp,5
+	vmovdqa	XMMWORD[48+rsp],xmm9
+	add	edx,edi
+	and	esi,eax
+	vpsrld	xmm8,xmm4,31
+	xor	eax,ebx
+	add	edx,ebp
+	shrd	ebp,ebp,7
+	xor	esi,ebx
+	vpslldq	xmm10,xmm4,12
+	vpaddd	xmm4,xmm4,xmm4
+	mov	edi,edx
+	add	ecx,DWORD[8+rsp]
+	xor	ebp,eax
+	shld	edx,edx,5
+	vpsrld	xmm9,xmm10,30
+	vpor	xmm4,xmm4,xmm8
+	add	ecx,esi
+	and	edi,ebp
+	xor	ebp,eax
+	add	ecx,edx
+	vpslld	xmm10,xmm10,2
+	vpxor	xmm4,xmm4,xmm9
+	shrd	edx,edx,7
+	xor	edi,eax
+	mov	esi,ecx
+	add	ebx,DWORD[12+rsp]
+	vpxor	xmm4,xmm4,xmm10
+	xor	edx,ebp
+	shld	ecx,ecx,5
+	add	ebx,edi
+	and	esi,edx
+	xor	edx,ebp
+	add	ebx,ecx
+	shrd	ecx,ecx,7
+	xor	esi,ebp
+	vpalignr	xmm5,xmm2,xmm1,8
+	mov	edi,ebx
+	add	eax,DWORD[16+rsp]
+	vpaddd	xmm9,xmm11,xmm4
+	xor	ecx,edx
+	shld	ebx,ebx,5
+	vpsrldq	xmm8,xmm4,4
+	add	eax,esi
+	and	edi,ecx
+	vpxor	xmm5,xmm5,xmm1
+	xor	ecx,edx
+	add	eax,ebx
+	vpxor	xmm8,xmm8,xmm3
+	shrd	ebx,ebx,7
+	xor	edi,edx
+	mov	esi,eax
+	add	ebp,DWORD[20+rsp]
+	vpxor	xmm5,xmm5,xmm8
+	xor	ebx,ecx
+	shld	eax,eax,5
+	vmovdqa	XMMWORD[rsp],xmm9
+	add	ebp,edi
+	and	esi,ebx
+	vpsrld	xmm8,xmm5,31
+	xor	ebx,ecx
+	add	ebp,eax
+	shrd	eax,eax,7
+	xor	esi,ecx
+	vpslldq	xmm10,xmm5,12
+	vpaddd	xmm5,xmm5,xmm5
+	mov	edi,ebp
+	add	edx,DWORD[24+rsp]
+	xor	eax,ebx
+	shld	ebp,ebp,5
+	vpsrld	xmm9,xmm10,30
+	vpor	xmm5,xmm5,xmm8
+	add	edx,esi
+	and	edi,eax
+	xor	eax,ebx
+	add	edx,ebp
+	vpslld	xmm10,xmm10,2
+	vpxor	xmm5,xmm5,xmm9
+	shrd	ebp,ebp,7
+	xor	edi,ebx
+	mov	esi,edx
+	add	ecx,DWORD[28+rsp]
+	vpxor	xmm5,xmm5,xmm10
+	xor	ebp,eax
+	shld	edx,edx,5
+	vmovdqa	xmm11,XMMWORD[((-32))+r14]
+	add	ecx,edi
+	and	esi,ebp
+	xor	ebp,eax
+	add	ecx,edx
+	shrd	edx,edx,7
+	xor	esi,eax
+	vpalignr	xmm6,xmm3,xmm2,8
+	mov	edi,ecx
+	add	ebx,DWORD[32+rsp]
+	vpaddd	xmm9,xmm11,xmm5
+	xor	edx,ebp
+	shld	ecx,ecx,5
+	vpsrldq	xmm8,xmm5,4
+	add	ebx,esi
+	and	edi,edx
+	vpxor	xmm6,xmm6,xmm2
+	xor	edx,ebp
+	add	ebx,ecx
+	vpxor	xmm8,xmm8,xmm4
+	shrd	ecx,ecx,7
+	xor	edi,ebp
+	mov	esi,ebx
+	add	eax,DWORD[36+rsp]
+	vpxor	xmm6,xmm6,xmm8
+	xor	ecx,edx
+	shld	ebx,ebx,5
+	vmovdqa	XMMWORD[16+rsp],xmm9
+	add	eax,edi
+	and	esi,ecx
+	vpsrld	xmm8,xmm6,31
+	xor	ecx,edx
+	add	eax,ebx
+	shrd	ebx,ebx,7
+	xor	esi,edx
+	vpslldq	xmm10,xmm6,12
+	vpaddd	xmm6,xmm6,xmm6
+	mov	edi,eax
+	add	ebp,DWORD[40+rsp]
+	xor	ebx,ecx
+	shld	eax,eax,5
+	vpsrld	xmm9,xmm10,30
+	vpor	xmm6,xmm6,xmm8
+	add	ebp,esi
+	and	edi,ebx
+	xor	ebx,ecx
+	add	ebp,eax
+	vpslld	xmm10,xmm10,2
+	vpxor	xmm6,xmm6,xmm9
+	shrd	eax,eax,7
+	xor	edi,ecx
+	mov	esi,ebp
+	add	edx,DWORD[44+rsp]
+	vpxor	xmm6,xmm6,xmm10
+	xor	eax,ebx
+	shld	ebp,ebp,5
+	add	edx,edi
+	and	esi,eax
+	xor	eax,ebx
+	add	edx,ebp
+	shrd	ebp,ebp,7
+	xor	esi,ebx
+	vpalignr	xmm7,xmm4,xmm3,8
+	mov	edi,edx
+	add	ecx,DWORD[48+rsp]
+	vpaddd	xmm9,xmm11,xmm6
+	xor	ebp,eax
+	shld	edx,edx,5
+	vpsrldq	xmm8,xmm6,4
+	add	ecx,esi
+	and	edi,ebp
+	vpxor	xmm7,xmm7,xmm3
+	xor	ebp,eax
+	add	ecx,edx
+	vpxor	xmm8,xmm8,xmm5
+	shrd	edx,edx,7
+	xor	edi,eax
+	mov	esi,ecx
+	add	ebx,DWORD[52+rsp]
+	vpxor	xmm7,xmm7,xmm8
+	xor	edx,ebp
+	shld	ecx,ecx,5
+	vmovdqa	XMMWORD[32+rsp],xmm9
+	add	ebx,edi
+	and	esi,edx
+	vpsrld	xmm8,xmm7,31
+	xor	edx,ebp
+	add	ebx,ecx
+	shrd	ecx,ecx,7
+	xor	esi,ebp
+	vpslldq	xmm10,xmm7,12
+	vpaddd	xmm7,xmm7,xmm7
+	mov	edi,ebx
+	add	eax,DWORD[56+rsp]
+	xor	ecx,edx
+	shld	ebx,ebx,5
+	vpsrld	xmm9,xmm10,30
+	vpor	xmm7,xmm7,xmm8
+	add	eax,esi
+	and	edi,ecx
+	xor	ecx,edx
+	add	eax,ebx
+	vpslld	xmm10,xmm10,2
+	vpxor	xmm7,xmm7,xmm9
+	shrd	ebx,ebx,7
+	xor	edi,edx
+	mov	esi,eax
+	add	ebp,DWORD[60+rsp]
+	vpxor	xmm7,xmm7,xmm10
+	xor	ebx,ecx
+	shld	eax,eax,5
+	add	ebp,edi
+	and	esi,ebx
+	xor	ebx,ecx
+	add	ebp,eax
+	vpalignr	xmm8,xmm7,xmm6,8
+	vpxor	xmm0,xmm0,xmm4
+	shrd	eax,eax,7
+	xor	esi,ecx
+	mov	edi,ebp
+	add	edx,DWORD[rsp]
+	vpxor	xmm0,xmm0,xmm1
+	xor	eax,ebx
+	shld	ebp,ebp,5
+	vpaddd	xmm9,xmm11,xmm7
+	add	edx,esi
+	and	edi,eax
+	vpxor	xmm0,xmm0,xmm8
+	xor	eax,ebx
+	add	edx,ebp
+	shrd	ebp,ebp,7
+	xor	edi,ebx
+	vpsrld	xmm8,xmm0,30
+	vmovdqa	XMMWORD[48+rsp],xmm9
+	mov	esi,edx
+	add	ecx,DWORD[4+rsp]
+	xor	ebp,eax
+	shld	edx,edx,5
+	vpslld	xmm0,xmm0,2
+	add	ecx,edi
+	and	esi,ebp
+	xor	ebp,eax
+	add	ecx,edx
+	shrd	edx,edx,7
+	xor	esi,eax
+	mov	edi,ecx
+	add	ebx,DWORD[8+rsp]
+	vpor	xmm0,xmm0,xmm8
+	xor	edx,ebp
+	shld	ecx,ecx,5
+	add	ebx,esi
+	and	edi,edx
+	xor	edx,ebp
+	add	ebx,ecx
+	add	eax,DWORD[12+rsp]
+	xor	edi,ebp
+	mov	esi,ebx
+	shld	ebx,ebx,5
+	add	eax,edi
+	xor	esi,edx
+	shrd	ecx,ecx,7
+	add	eax,ebx
+	vpalignr	xmm8,xmm0,xmm7,8
+	vpxor	xmm1,xmm1,xmm5
+	add	ebp,DWORD[16+rsp]
+	xor	esi,ecx
+	mov	edi,eax
+	shld	eax,eax,5
+	vpxor	xmm1,xmm1,xmm2
+	add	ebp,esi
+	xor	edi,ecx
+	vpaddd	xmm9,xmm11,xmm0
+	shrd	ebx,ebx,7
+	add	ebp,eax
+	vpxor	xmm1,xmm1,xmm8
+	add	edx,DWORD[20+rsp]
+	xor	edi,ebx
+	mov	esi,ebp
+	shld	ebp,ebp,5
+	vpsrld	xmm8,xmm1,30
+	vmovdqa	XMMWORD[rsp],xmm9
+	add	edx,edi
+	xor	esi,ebx
+	shrd	eax,eax,7
+	add	edx,ebp
+	vpslld	xmm1,xmm1,2
+	add	ecx,DWORD[24+rsp]
+	xor	esi,eax
+	mov	edi,edx
+	shld	edx,edx,5
+	add	ecx,esi
+	xor	edi,eax
+	shrd	ebp,ebp,7
+	add	ecx,edx
+	vpor	xmm1,xmm1,xmm8
+	add	ebx,DWORD[28+rsp]
+	xor	edi,ebp
+	mov	esi,ecx
+	shld	ecx,ecx,5
+	add	ebx,edi
+	xor	esi,ebp
+	shrd	edx,edx,7
+	add	ebx,ecx
+	vpalignr	xmm8,xmm1,xmm0,8
+	vpxor	xmm2,xmm2,xmm6
+	add	eax,DWORD[32+rsp]
+	xor	esi,edx
+	mov	edi,ebx
+	shld	ebx,ebx,5
+	vpxor	xmm2,xmm2,xmm3
+	add	eax,esi
+	xor	edi,edx
+	vpaddd	xmm9,xmm11,xmm1
+	vmovdqa	xmm11,XMMWORD[r14]
+	shrd	ecx,ecx,7
+	add	eax,ebx
+	vpxor	xmm2,xmm2,xmm8
+	add	ebp,DWORD[36+rsp]
+	xor	edi,ecx
+	mov	esi,eax
+	shld	eax,eax,5
+	vpsrld	xmm8,xmm2,30
+	vmovdqa	XMMWORD[16+rsp],xmm9
+	add	ebp,edi
+	xor	esi,ecx
+	shrd	ebx,ebx,7
+	add	ebp,eax
+	vpslld	xmm2,xmm2,2
+	add	edx,DWORD[40+rsp]
+	xor	esi,ebx
+	mov	edi,ebp
+	shld	ebp,ebp,5
+	add	edx,esi
+	xor	edi,ebx
+	shrd	eax,eax,7
+	add	edx,ebp
+	vpor	xmm2,xmm2,xmm8
+	add	ecx,DWORD[44+rsp]
+	xor	edi,eax
+	mov	esi,edx
+	shld	edx,edx,5
+	add	ecx,edi
+	xor	esi,eax
+	shrd	ebp,ebp,7
+	add	ecx,edx
+	vpalignr	xmm8,xmm2,xmm1,8
+	vpxor	xmm3,xmm3,xmm7
+	add	ebx,DWORD[48+rsp]
+	xor	esi,ebp
+	mov	edi,ecx
+	shld	ecx,ecx,5
+	vpxor	xmm3,xmm3,xmm4
+	add	ebx,esi
+	xor	edi,ebp
+	vpaddd	xmm9,xmm11,xmm2
+	shrd	edx,edx,7
+	add	ebx,ecx
+	vpxor	xmm3,xmm3,xmm8
+	add	eax,DWORD[52+rsp]
+	xor	edi,edx
+	mov	esi,ebx
+	shld	ebx,ebx,5
+	vpsrld	xmm8,xmm3,30
+	vmovdqa	XMMWORD[32+rsp],xmm9
+	add	eax,edi
+	xor	esi,edx
+	shrd	ecx,ecx,7
+	add	eax,ebx
+	vpslld	xmm3,xmm3,2
+	add	ebp,DWORD[56+rsp]
+	xor	esi,ecx
+	mov	edi,eax
+	shld	eax,eax,5
+	add	ebp,esi
+	xor	edi,ecx
+	shrd	ebx,ebx,7
+	add	ebp,eax
+	vpor	xmm3,xmm3,xmm8
+	add	edx,DWORD[60+rsp]
+	xor	edi,ebx
+	mov	esi,ebp
+	shld	ebp,ebp,5
+	add	edx,edi
+	xor	esi,ebx
+	shrd	eax,eax,7
+	add	edx,ebp
+	vpalignr	xmm8,xmm3,xmm2,8
+	vpxor	xmm4,xmm4,xmm0
+	add	ecx,DWORD[rsp]
+	xor	esi,eax
+	mov	edi,edx
+	shld	edx,edx,5
+	vpxor	xmm4,xmm4,xmm5
+	add	ecx,esi
+	xor	edi,eax
+	vpaddd	xmm9,xmm11,xmm3
+	shrd	ebp,ebp,7
+	add	ecx,edx
+	vpxor	xmm4,xmm4,xmm8
+	add	ebx,DWORD[4+rsp]
+	xor	edi,ebp
+	mov	esi,ecx
+	shld	ecx,ecx,5
+	vpsrld	xmm8,xmm4,30
+	vmovdqa	XMMWORD[48+rsp],xmm9
+	add	ebx,edi
+	xor	esi,ebp
+	shrd	edx,edx,7
+	add	ebx,ecx
+	vpslld	xmm4,xmm4,2
+	add	eax,DWORD[8+rsp]
+	xor	esi,edx
+	mov	edi,ebx
+	shld	ebx,ebx,5
+	add	eax,esi
+	xor	edi,edx
+	shrd	ecx,ecx,7
+	add	eax,ebx
+	vpor	xmm4,xmm4,xmm8
+	add	ebp,DWORD[12+rsp]
+	xor	edi,ecx
+	mov	esi,eax
+	shld	eax,eax,5
+	add	ebp,edi
+	xor	esi,ecx
+	shrd	ebx,ebx,7
+	add	ebp,eax
+	vpalignr	xmm8,xmm4,xmm3,8
+	vpxor	xmm5,xmm5,xmm1
+	add	edx,DWORD[16+rsp]
+	xor	esi,ebx
+	mov	edi,ebp
+	shld	ebp,ebp,5
+	vpxor	xmm5,xmm5,xmm6
+	add	edx,esi
+	xor	edi,ebx
+	vpaddd	xmm9,xmm11,xmm4
+	shrd	eax,eax,7
+	add	edx,ebp
+	vpxor	xmm5,xmm5,xmm8
+	add	ecx,DWORD[20+rsp]
+	xor	edi,eax
+	mov	esi,edx
+	shld	edx,edx,5
+	vpsrld	xmm8,xmm5,30
+	vmovdqa	XMMWORD[rsp],xmm9
+	add	ecx,edi
+	xor	esi,eax
+	shrd	ebp,ebp,7
+	add	ecx,edx
+	vpslld	xmm5,xmm5,2
+	add	ebx,DWORD[24+rsp]
+	xor	esi,ebp
+	mov	edi,ecx
+	shld	ecx,ecx,5
+	add	ebx,esi
+	xor	edi,ebp
+	shrd	edx,edx,7
+	add	ebx,ecx
+	vpor	xmm5,xmm5,xmm8
+	add	eax,DWORD[28+rsp]
+	shrd	ecx,ecx,7
+	mov	esi,ebx
+	xor	edi,edx
+	shld	ebx,ebx,5
+	add	eax,edi
+	xor	esi,ecx
+	xor	ecx,edx
+	add	eax,ebx
+	vpalignr	xmm8,xmm5,xmm4,8
+	vpxor	xmm6,xmm6,xmm2
+	add	ebp,DWORD[32+rsp]
+	and	esi,ecx
+	xor	ecx,edx
+	shrd	ebx,ebx,7
+	vpxor	xmm6,xmm6,xmm7
+	mov	edi,eax
+	xor	esi,ecx
+	vpaddd	xmm9,xmm11,xmm5
+	shld	eax,eax,5
+	add	ebp,esi
+	vpxor	xmm6,xmm6,xmm8
+	xor	edi,ebx
+	xor	ebx,ecx
+	add	ebp,eax
+	add	edx,DWORD[36+rsp]
+	vpsrld	xmm8,xmm6,30
+	vmovdqa	XMMWORD[16+rsp],xmm9
+	and	edi,ebx
+	xor	ebx,ecx
+	shrd	eax,eax,7
+	mov	esi,ebp
+	vpslld	xmm6,xmm6,2
+	xor	edi,ebx
+	shld	ebp,ebp,5
+	add	edx,edi
+	xor	esi,eax
+	xor	eax,ebx
+	add	edx,ebp
+	add	ecx,DWORD[40+rsp]
+	and	esi,eax
+	vpor	xmm6,xmm6,xmm8
+	xor	eax,ebx
+	shrd	ebp,ebp,7
+	mov	edi,edx
+	xor	esi,eax
+	shld	edx,edx,5
+	add	ecx,esi
+	xor	edi,ebp
+	xor	ebp,eax
+	add	ecx,edx
+	add	ebx,DWORD[44+rsp]
+	and	edi,ebp
+	xor	ebp,eax
+	shrd	edx,edx,7
+	mov	esi,ecx
+	xor	edi,ebp
+	shld	ecx,ecx,5
+	add	ebx,edi
+	xor	esi,edx
+	xor	edx,ebp
+	add	ebx,ecx
+	vpalignr	xmm8,xmm6,xmm5,8
+	vpxor	xmm7,xmm7,xmm3
+	add	eax,DWORD[48+rsp]
+	and	esi,edx
+	xor	edx,ebp
+	shrd	ecx,ecx,7
+	vpxor	xmm7,xmm7,xmm0
+	mov	edi,ebx
+	xor	esi,edx
+	vpaddd	xmm9,xmm11,xmm6
+	vmovdqa	xmm11,XMMWORD[32+r14]
+	shld	ebx,ebx,5
+	add	eax,esi
+	vpxor	xmm7,xmm7,xmm8
+	xor	edi,ecx
+	xor	ecx,edx
+	add	eax,ebx
+	add	ebp,DWORD[52+rsp]
+	vpsrld	xmm8,xmm7,30
+	vmovdqa	XMMWORD[32+rsp],xmm9
+	and	edi,ecx
+	xor	ecx,edx
+	shrd	ebx,ebx,7
+	mov	esi,eax
+	vpslld	xmm7,xmm7,2
+	xor	edi,ecx
+	shld	eax,eax,5
+	add	ebp,edi
+	xor	esi,ebx
+	xor	ebx,ecx
+	add	ebp,eax
+	add	edx,DWORD[56+rsp]
+	and	esi,ebx
+	vpor	xmm7,xmm7,xmm8
+	xor	ebx,ecx
+	shrd	eax,eax,7
+	mov	edi,ebp
+	xor	esi,ebx
+	shld	ebp,ebp,5
+	add	edx,esi
+	xor	edi,eax
+	xor	eax,ebx
+	add	edx,ebp
+	add	ecx,DWORD[60+rsp]
+	and	edi,eax
+	xor	eax,ebx
+	shrd	ebp,ebp,7
+	mov	esi,edx
+	xor	edi,eax
+	shld	edx,edx,5
+	add	ecx,edi
+	xor	esi,ebp
+	xor	ebp,eax
+	add	ecx,edx
+	vpalignr	xmm8,xmm7,xmm6,8
+	vpxor	xmm0,xmm0,xmm4
+	add	ebx,DWORD[rsp]
+	and	esi,ebp
+	xor	ebp,eax
+	shrd	edx,edx,7
+	vpxor	xmm0,xmm0,xmm1
+	mov	edi,ecx
+	xor	esi,ebp
+	vpaddd	xmm9,xmm11,xmm7
+	shld	ecx,ecx,5
+	add	ebx,esi
+	vpxor	xmm0,xmm0,xmm8
+	xor	edi,edx
+	xor	edx,ebp
+	add	ebx,ecx
+	add	eax,DWORD[4+rsp]
+	vpsrld	xmm8,xmm0,30
+	vmovdqa	XMMWORD[48+rsp],xmm9
+	and	edi,edx
+	xor	edx,ebp
+	shrd	ecx,ecx,7
+	mov	esi,ebx
+	vpslld	xmm0,xmm0,2
+	xor	edi,edx
+	shld	ebx,ebx,5
+	add	eax,edi
+	xor	esi,ecx
+	xor	ecx,edx
+	add	eax,ebx
+	add	ebp,DWORD[8+rsp]
+	and	esi,ecx
+	vpor	xmm0,xmm0,xmm8
+	xor	ecx,edx
+	shrd	ebx,ebx,7
+	mov	edi,eax
+	xor	esi,ecx
+	shld	eax,eax,5
+	add	ebp,esi
+	xor	edi,ebx
+	xor	ebx,ecx
+	add	ebp,eax
+	add	edx,DWORD[12+rsp]
+	and	edi,ebx
+	xor	ebx,ecx
+	shrd	eax,eax,7
+	mov	esi,ebp
+	xor	edi,ebx
+	shld	ebp,ebp,5
+	add	edx,edi
+	xor	esi,eax
+	xor	eax,ebx
+	add	edx,ebp
+	vpalignr	xmm8,xmm0,xmm7,8
+	vpxor	xmm1,xmm1,xmm5
+	add	ecx,DWORD[16+rsp]
+	and	esi,eax
+	xor	eax,ebx
+	shrd	ebp,ebp,7
+	vpxor	xmm1,xmm1,xmm2
+	mov	edi,edx
+	xor	esi,eax
+	vpaddd	xmm9,xmm11,xmm0
+	shld	edx,edx,5
+	add	ecx,esi
+	vpxor	xmm1,xmm1,xmm8
+	xor	edi,ebp
+	xor	ebp,eax
+	add	ecx,edx
+	add	ebx,DWORD[20+rsp]
+	vpsrld	xmm8,xmm1,30
+	vmovdqa	XMMWORD[rsp],xmm9
+	and	edi,ebp
+	xor	ebp,eax
+	shrd	edx,edx,7
+	mov	esi,ecx
+	vpslld	xmm1,xmm1,2
+	xor	edi,ebp
+	shld	ecx,ecx,5
+	add	ebx,edi
+	xor	esi,edx
+	xor	edx,ebp
+	add	ebx,ecx
+	add	eax,DWORD[24+rsp]
+	and	esi,edx
+	vpor	xmm1,xmm1,xmm8
+	xor	edx,ebp
+	shrd	ecx,ecx,7
+	mov	edi,ebx
+	xor	esi,edx
+	shld	ebx,ebx,5
+	add	eax,esi
+	xor	edi,ecx
+	xor	ecx,edx
+	add	eax,ebx
+	add	ebp,DWORD[28+rsp]
+	and	edi,ecx
+	xor	ecx,edx
+	shrd	ebx,ebx,7
+	mov	esi,eax
+	xor	edi,ecx
+	shld	eax,eax,5
+	add	ebp,edi
+	xor	esi,ebx
+	xor	ebx,ecx
+	add	ebp,eax
+	vpalignr	xmm8,xmm1,xmm0,8
+	vpxor	xmm2,xmm2,xmm6
+	add	edx,DWORD[32+rsp]
+	and	esi,ebx
+	xor	ebx,ecx
+	shrd	eax,eax,7
+	vpxor	xmm2,xmm2,xmm3
+	mov	edi,ebp
+	xor	esi,ebx
+	vpaddd	xmm9,xmm11,xmm1
+	shld	ebp,ebp,5
+	add	edx,esi
+	vpxor	xmm2,xmm2,xmm8
+	xor	edi,eax
+	xor	eax,ebx
+	add	edx,ebp
+	add	ecx,DWORD[36+rsp]
+	vpsrld	xmm8,xmm2,30
+	vmovdqa	XMMWORD[16+rsp],xmm9
+	and	edi,eax
+	xor	eax,ebx
+	shrd	ebp,ebp,7
+	mov	esi,edx
+	vpslld	xmm2,xmm2,2
+	xor	edi,eax
+	shld	edx,edx,5
+	add	ecx,edi
+	xor	esi,ebp
+	xor	ebp,eax
+	add	ecx,edx
+	add	ebx,DWORD[40+rsp]
+	and	esi,ebp
+	vpor	xmm2,xmm2,xmm8
+	xor	ebp,eax
+	shrd	edx,edx,7
+	mov	edi,ecx
+	xor	esi,ebp
+	shld	ecx,ecx,5
+	add	ebx,esi
+	xor	edi,edx
+	xor	edx,ebp
+	add	ebx,ecx
+	add	eax,DWORD[44+rsp]
+	and	edi,edx
+	xor	edx,ebp
+	shrd	ecx,ecx,7
+	mov	esi,ebx
+	xor	edi,edx
+	shld	ebx,ebx,5
+	add	eax,edi
+	xor	esi,edx
+	add	eax,ebx
+	vpalignr	xmm8,xmm2,xmm1,8
+	vpxor	xmm3,xmm3,xmm7
+	add	ebp,DWORD[48+rsp]
+	xor	esi,ecx
+	mov	edi,eax
+	shld	eax,eax,5
+	vpxor	xmm3,xmm3,xmm4
+	add	ebp,esi
+	xor	edi,ecx
+	vpaddd	xmm9,xmm11,xmm2
+	shrd	ebx,ebx,7
+	add	ebp,eax
+	vpxor	xmm3,xmm3,xmm8
+	add	edx,DWORD[52+rsp]
+	xor	edi,ebx
+	mov	esi,ebp
+	shld	ebp,ebp,5
+	vpsrld	xmm8,xmm3,30
+	vmovdqa	XMMWORD[32+rsp],xmm9
+	add	edx,edi
+	xor	esi,ebx
+	shrd	eax,eax,7
+	add	edx,ebp
+	vpslld	xmm3,xmm3,2
+	add	ecx,DWORD[56+rsp]
+	xor	esi,eax
+	mov	edi,edx
+	shld	edx,edx,5
+	add	ecx,esi
+	xor	edi,eax
+	shrd	ebp,ebp,7
+	add	ecx,edx
+	vpor	xmm3,xmm3,xmm8
+	add	ebx,DWORD[60+rsp]
+	xor	edi,ebp
+	mov	esi,ecx
+	shld	ecx,ecx,5
+	add	ebx,edi
+	xor	esi,ebp
+	shrd	edx,edx,7
+	add	ebx,ecx
+	add	eax,DWORD[rsp]
+	vpaddd	xmm9,xmm11,xmm3
+	xor	esi,edx
+	mov	edi,ebx
+	shld	ebx,ebx,5
+	add	eax,esi
+	vmovdqa	XMMWORD[48+rsp],xmm9
+	xor	edi,edx
+	shrd	ecx,ecx,7
+	add	eax,ebx
+	add	ebp,DWORD[4+rsp]
+	xor	edi,ecx
+	mov	esi,eax
+	shld	eax,eax,5
+	add	ebp,edi
+	xor	esi,ecx
+	shrd	ebx,ebx,7
+	add	ebp,eax
+	add	edx,DWORD[8+rsp]
+	xor	esi,ebx
+	mov	edi,ebp
+	shld	ebp,ebp,5
+	add	edx,esi
+	xor	edi,ebx
+	shrd	eax,eax,7
+	add	edx,ebp
+	add	ecx,DWORD[12+rsp]
+	xor	edi,eax
+	mov	esi,edx
+	shld	edx,edx,5
+	add	ecx,edi
+	xor	esi,eax
+	shrd	ebp,ebp,7
+	add	ecx,edx
+	cmp	r9,r10
+	je	NEAR $L$done_avx
+	vmovdqa	xmm6,XMMWORD[64+r14]
+	vmovdqa	xmm11,XMMWORD[((-64))+r14]
+	vmovdqu	xmm0,XMMWORD[r9]
+	vmovdqu	xmm1,XMMWORD[16+r9]
+	vmovdqu	xmm2,XMMWORD[32+r9]
+	vmovdqu	xmm3,XMMWORD[48+r9]
+	vpshufb	xmm0,xmm0,xmm6
+	add	r9,64
+	add	ebx,DWORD[16+rsp]
+	xor	esi,ebp
+	vpshufb	xmm1,xmm1,xmm6
+	mov	edi,ecx
+	shld	ecx,ecx,5
+	vpaddd	xmm4,xmm0,xmm11
+	add	ebx,esi
+	xor	edi,ebp
+	shrd	edx,edx,7
+	add	ebx,ecx
+	vmovdqa	XMMWORD[rsp],xmm4
+	add	eax,DWORD[20+rsp]
+	xor	edi,edx
+	mov	esi,ebx
+	shld	ebx,ebx,5
+	add	eax,edi
+	xor	esi,edx
+	shrd	ecx,ecx,7
+	add	eax,ebx
+	add	ebp,DWORD[24+rsp]
+	xor	esi,ecx
+	mov	edi,eax
+	shld	eax,eax,5
+	add	ebp,esi
+	xor	edi,ecx
+	shrd	ebx,ebx,7
+	add	ebp,eax
+	add	edx,DWORD[28+rsp]
+	xor	edi,ebx
+	mov	esi,ebp
+	shld	ebp,ebp,5
+	add	edx,edi
+	xor	esi,ebx
+	shrd	eax,eax,7
+	add	edx,ebp
+	add	ecx,DWORD[32+rsp]
+	xor	esi,eax
+	vpshufb	xmm2,xmm2,xmm6
+	mov	edi,edx
+	shld	edx,edx,5
+	vpaddd	xmm5,xmm1,xmm11
+	add	ecx,esi
+	xor	edi,eax
+	shrd	ebp,ebp,7
+	add	ecx,edx
+	vmovdqa	XMMWORD[16+rsp],xmm5
+	add	ebx,DWORD[36+rsp]
+	xor	edi,ebp
+	mov	esi,ecx
+	shld	ecx,ecx,5
+	add	ebx,edi
+	xor	esi,ebp
+	shrd	edx,edx,7
+	add	ebx,ecx
+	add	eax,DWORD[40+rsp]
+	xor	esi,edx
+	mov	edi,ebx
+	shld	ebx,ebx,5
+	add	eax,esi
+	xor	edi,edx
+	shrd	ecx,ecx,7
+	add	eax,ebx
+	add	ebp,DWORD[44+rsp]
+	xor	edi,ecx
+	mov	esi,eax
+	shld	eax,eax,5
+	add	ebp,edi
+	xor	esi,ecx
+	shrd	ebx,ebx,7
+	add	ebp,eax
+	add	edx,DWORD[48+rsp]
+	xor	esi,ebx
+	vpshufb	xmm3,xmm3,xmm6
+	mov	edi,ebp
+	shld	ebp,ebp,5
+	vpaddd	xmm6,xmm2,xmm11
+	add	edx,esi
+	xor	edi,ebx
+	shrd	eax,eax,7
+	add	edx,ebp
+	vmovdqa	XMMWORD[32+rsp],xmm6
+	add	ecx,DWORD[52+rsp]
+	xor	edi,eax
+	mov	esi,edx
+	shld	edx,edx,5
+	add	ecx,edi
+	xor	esi,eax
+	shrd	ebp,ebp,7
+	add	ecx,edx
+	add	ebx,DWORD[56+rsp]
+	xor	esi,ebp
+	mov	edi,ecx
+	shld	ecx,ecx,5
+	add	ebx,esi
+	xor	edi,ebp
+	shrd	edx,edx,7
+	add	ebx,ecx
+	add	eax,DWORD[60+rsp]
+	xor	edi,edx
+	mov	esi,ebx
+	shld	ebx,ebx,5
+	add	eax,edi
+	shrd	ecx,ecx,7
+	add	eax,ebx
+	add	eax,DWORD[r8]
+	add	esi,DWORD[4+r8]
+	add	ecx,DWORD[8+r8]
+	add	edx,DWORD[12+r8]
+	mov	DWORD[r8],eax
+	add	ebp,DWORD[16+r8]
+	mov	DWORD[4+r8],esi
+	mov	ebx,esi
+	mov	DWORD[8+r8],ecx
+	mov	edi,ecx
+	mov	DWORD[12+r8],edx
+	xor	edi,edx
+	mov	DWORD[16+r8],ebp
+	and	esi,edi
+	jmp	NEAR $L$oop_avx
+
+ALIGN	16
+$L$done_avx:
+	add	ebx,DWORD[16+rsp]
+	xor	esi,ebp
+	mov	edi,ecx
+	shld	ecx,ecx,5
+	add	ebx,esi
+	xor	edi,ebp
+	shrd	edx,edx,7
+	add	ebx,ecx
+	add	eax,DWORD[20+rsp]
+	xor	edi,edx
+	mov	esi,ebx
+	shld	ebx,ebx,5
+	add	eax,edi
+	xor	esi,edx
+	shrd	ecx,ecx,7
+	add	eax,ebx
+	add	ebp,DWORD[24+rsp]
+	xor	esi,ecx
+	mov	edi,eax
+	shld	eax,eax,5
+	add	ebp,esi
+	xor	edi,ecx
+	shrd	ebx,ebx,7
+	add	ebp,eax
+	add	edx,DWORD[28+rsp]
+	xor	edi,ebx
+	mov	esi,ebp
+	shld	ebp,ebp,5
+	add	edx,edi
+	xor	esi,ebx
+	shrd	eax,eax,7
+	add	edx,ebp
+	add	ecx,DWORD[32+rsp]
+	xor	esi,eax
+	mov	edi,edx
+	shld	edx,edx,5
+	add	ecx,esi
+	xor	edi,eax
+	shrd	ebp,ebp,7
+	add	ecx,edx
+	add	ebx,DWORD[36+rsp]
+	xor	edi,ebp
+	mov	esi,ecx
+	shld	ecx,ecx,5
+	add	ebx,edi
+	xor	esi,ebp
+	shrd	edx,edx,7
+	add	ebx,ecx
+	add	eax,DWORD[40+rsp]
+	xor	esi,edx
+	mov	edi,ebx
+	shld	ebx,ebx,5
+	add	eax,esi
+	xor	edi,edx
+	shrd	ecx,ecx,7
+	add	eax,ebx
+	add	ebp,DWORD[44+rsp]
+	xor	edi,ecx
+	mov	esi,eax
+	shld	eax,eax,5
+	add	ebp,edi
+	xor	esi,ecx
+	shrd	ebx,ebx,7
+	add	ebp,eax
+	add	edx,DWORD[48+rsp]
+	xor	esi,ebx
+	mov	edi,ebp
+	shld	ebp,ebp,5
+	add	edx,esi
+	xor	edi,ebx
+	shrd	eax,eax,7
+	add	edx,ebp
+	add	ecx,DWORD[52+rsp]
+	xor	edi,eax
+	mov	esi,edx
+	shld	edx,edx,5
+	add	ecx,edi
+	xor	esi,eax
+	shrd	ebp,ebp,7
+	add	ecx,edx
+	add	ebx,DWORD[56+rsp]
+	xor	esi,ebp
+	mov	edi,ecx
+	shld	ecx,ecx,5
+	add	ebx,esi
+	xor	edi,ebp
+	shrd	edx,edx,7
+	add	ebx,ecx
+	add	eax,DWORD[60+rsp]
+	xor	edi,edx
+	mov	esi,ebx
+	shld	ebx,ebx,5
+	add	eax,edi
+	shrd	ecx,ecx,7
+	add	eax,ebx
+	vzeroupper
+
+	add	eax,DWORD[r8]
+	add	esi,DWORD[4+r8]
+	add	ecx,DWORD[8+r8]
+	mov	DWORD[r8],eax
+	add	edx,DWORD[12+r8]
+	mov	DWORD[4+r8],esi
+	add	ebp,DWORD[16+r8]
+	mov	DWORD[8+r8],ecx
+	mov	DWORD[12+r8],edx
+	mov	DWORD[16+r8],ebp
+	movaps	xmm6,XMMWORD[((-40-96))+r11]
+	movaps	xmm7,XMMWORD[((-40-80))+r11]
+	movaps	xmm8,XMMWORD[((-40-64))+r11]
+	movaps	xmm9,XMMWORD[((-40-48))+r11]
+	movaps	xmm10,XMMWORD[((-40-32))+r11]
+	movaps	xmm11,XMMWORD[((-40-16))+r11]
+	mov	r14,QWORD[((-40))+r11]
+
+	mov	r13,QWORD[((-32))+r11]
+
+	mov	r12,QWORD[((-24))+r11]
+
+	mov	rbp,QWORD[((-16))+r11]
+
+	mov	rbx,QWORD[((-8))+r11]
+
+	lea	rsp,[r11]
+
+$L$epilogue_avx:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_sha1_block_data_order_avx:
+
+ALIGN	16
+sha1_block_data_order_avx2:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_sha1_block_data_order_avx2:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+_avx2_shortcut:
+
+	mov	r11,rsp
+
+	push	rbx
+
+	push	rbp
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	vzeroupper
+	lea	rsp,[((-96))+rsp]
+	vmovaps	XMMWORD[(-40-96)+r11],xmm6
+	vmovaps	XMMWORD[(-40-80)+r11],xmm7
+	vmovaps	XMMWORD[(-40-64)+r11],xmm8
+	vmovaps	XMMWORD[(-40-48)+r11],xmm9
+	vmovaps	XMMWORD[(-40-32)+r11],xmm10
+	vmovaps	XMMWORD[(-40-16)+r11],xmm11
+$L$prologue_avx2:
+	mov	r8,rdi
+	mov	r9,rsi
+	mov	r10,rdx
+
+	lea	rsp,[((-640))+rsp]
+	shl	r10,6
+	lea	r13,[64+r9]
+	and	rsp,-128
+	add	r10,r9
+	lea	r14,[((K_XX_XX+64))]
+
+	mov	eax,DWORD[r8]
+	cmp	r13,r10
+	cmovae	r13,r9
+	mov	ebp,DWORD[4+r8]
+	mov	ecx,DWORD[8+r8]
+	mov	edx,DWORD[12+r8]
+	mov	esi,DWORD[16+r8]
+	vmovdqu	ymm6,YMMWORD[64+r14]
+
+	vmovdqu	xmm0,XMMWORD[r9]
+	vmovdqu	xmm1,XMMWORD[16+r9]
+	vmovdqu	xmm2,XMMWORD[32+r9]
+	vmovdqu	xmm3,XMMWORD[48+r9]
+	lea	r9,[64+r9]
+	vinserti128	ymm0,ymm0,XMMWORD[r13],1
+	vinserti128	ymm1,ymm1,XMMWORD[16+r13],1
+	vpshufb	ymm0,ymm0,ymm6
+	vinserti128	ymm2,ymm2,XMMWORD[32+r13],1
+	vpshufb	ymm1,ymm1,ymm6
+	vinserti128	ymm3,ymm3,XMMWORD[48+r13],1
+	vpshufb	ymm2,ymm2,ymm6
+	vmovdqu	ymm11,YMMWORD[((-64))+r14]
+	vpshufb	ymm3,ymm3,ymm6
+
+	vpaddd	ymm4,ymm0,ymm11
+	vpaddd	ymm5,ymm1,ymm11
+	vmovdqu	YMMWORD[rsp],ymm4
+	vpaddd	ymm6,ymm2,ymm11
+	vmovdqu	YMMWORD[32+rsp],ymm5
+	vpaddd	ymm7,ymm3,ymm11
+	vmovdqu	YMMWORD[64+rsp],ymm6
+	vmovdqu	YMMWORD[96+rsp],ymm7
+	vpalignr	ymm4,ymm1,ymm0,8
+	vpsrldq	ymm8,ymm3,4
+	vpxor	ymm4,ymm4,ymm0
+	vpxor	ymm8,ymm8,ymm2
+	vpxor	ymm4,ymm4,ymm8
+	vpsrld	ymm8,ymm4,31
+	vpslldq	ymm10,ymm4,12
+	vpaddd	ymm4,ymm4,ymm4
+	vpsrld	ymm9,ymm10,30
+	vpor	ymm4,ymm4,ymm8
+	vpslld	ymm10,ymm10,2
+	vpxor	ymm4,ymm4,ymm9
+	vpxor	ymm4,ymm4,ymm10
+	vpaddd	ymm9,ymm4,ymm11
+	vmovdqu	YMMWORD[128+rsp],ymm9
+	vpalignr	ymm5,ymm2,ymm1,8
+	vpsrldq	ymm8,ymm4,4
+	vpxor	ymm5,ymm5,ymm1
+	vpxor	ymm8,ymm8,ymm3
+	vpxor	ymm5,ymm5,ymm8
+	vpsrld	ymm8,ymm5,31
+	vmovdqu	ymm11,YMMWORD[((-32))+r14]
+	vpslldq	ymm10,ymm5,12
+	vpaddd	ymm5,ymm5,ymm5
+	vpsrld	ymm9,ymm10,30
+	vpor	ymm5,ymm5,ymm8
+	vpslld	ymm10,ymm10,2
+	vpxor	ymm5,ymm5,ymm9
+	vpxor	ymm5,ymm5,ymm10
+	vpaddd	ymm9,ymm5,ymm11
+	vmovdqu	YMMWORD[160+rsp],ymm9
+	vpalignr	ymm6,ymm3,ymm2,8
+	vpsrldq	ymm8,ymm5,4
+	vpxor	ymm6,ymm6,ymm2
+	vpxor	ymm8,ymm8,ymm4
+	vpxor	ymm6,ymm6,ymm8
+	vpsrld	ymm8,ymm6,31
+	vpslldq	ymm10,ymm6,12
+	vpaddd	ymm6,ymm6,ymm6
+	vpsrld	ymm9,ymm10,30
+	vpor	ymm6,ymm6,ymm8
+	vpslld	ymm10,ymm10,2
+	vpxor	ymm6,ymm6,ymm9
+	vpxor	ymm6,ymm6,ymm10
+	vpaddd	ymm9,ymm6,ymm11
+	vmovdqu	YMMWORD[192+rsp],ymm9
+	vpalignr	ymm7,ymm4,ymm3,8
+	vpsrldq	ymm8,ymm6,4
+	vpxor	ymm7,ymm7,ymm3
+	vpxor	ymm8,ymm8,ymm5
+	vpxor	ymm7,ymm7,ymm8
+	vpsrld	ymm8,ymm7,31
+	vpslldq	ymm10,ymm7,12
+	vpaddd	ymm7,ymm7,ymm7
+	vpsrld	ymm9,ymm10,30
+	vpor	ymm7,ymm7,ymm8
+	vpslld	ymm10,ymm10,2
+	vpxor	ymm7,ymm7,ymm9
+	vpxor	ymm7,ymm7,ymm10
+	vpaddd	ymm9,ymm7,ymm11
+	vmovdqu	YMMWORD[224+rsp],ymm9
+	lea	r13,[128+rsp]
+	jmp	NEAR $L$oop_avx2
+ALIGN	32
+$L$oop_avx2:
+	rorx	ebx,ebp,2
+	andn	edi,ebp,edx
+	and	ebp,ecx
+	xor	ebp,edi
+	jmp	NEAR $L$align32_1
+ALIGN	32
+$L$align32_1:
+	vpalignr	ymm8,ymm7,ymm6,8
+	vpxor	ymm0,ymm0,ymm4
+	add	esi,DWORD[((-128))+r13]
+	andn	edi,eax,ecx
+	vpxor	ymm0,ymm0,ymm1
+	add	esi,ebp
+	rorx	r12d,eax,27
+	rorx	ebp,eax,2
+	vpxor	ymm0,ymm0,ymm8
+	and	eax,ebx
+	add	esi,r12d
+	xor	eax,edi
+	vpsrld	ymm8,ymm0,30
+	vpslld	ymm0,ymm0,2
+	add	edx,DWORD[((-124))+r13]
+	andn	edi,esi,ebx
+	add	edx,eax
+	rorx	r12d,esi,27
+	rorx	eax,esi,2
+	and	esi,ebp
+	vpor	ymm0,ymm0,ymm8
+	add	edx,r12d
+	xor	esi,edi
+	add	ecx,DWORD[((-120))+r13]
+	andn	edi,edx,ebp
+	vpaddd	ymm9,ymm0,ymm11
+	add	ecx,esi
+	rorx	r12d,edx,27
+	rorx	esi,edx,2
+	and	edx,eax
+	vmovdqu	YMMWORD[256+rsp],ymm9
+	add	ecx,r12d
+	xor	edx,edi
+	add	ebx,DWORD[((-116))+r13]
+	andn	edi,ecx,eax
+	add	ebx,edx
+	rorx	r12d,ecx,27
+	rorx	edx,ecx,2
+	and	ecx,esi
+	add	ebx,r12d
+	xor	ecx,edi
+	add	ebp,DWORD[((-96))+r13]
+	andn	edi,ebx,esi
+	add	ebp,ecx
+	rorx	r12d,ebx,27
+	rorx	ecx,ebx,2
+	and	ebx,edx
+	add	ebp,r12d
+	xor	ebx,edi
+	vpalignr	ymm8,ymm0,ymm7,8
+	vpxor	ymm1,ymm1,ymm5
+	add	eax,DWORD[((-92))+r13]
+	andn	edi,ebp,edx
+	vpxor	ymm1,ymm1,ymm2
+	add	eax,ebx
+	rorx	r12d,ebp,27
+	rorx	ebx,ebp,2
+	vpxor	ymm1,ymm1,ymm8
+	and	ebp,ecx
+	add	eax,r12d
+	xor	ebp,edi
+	vpsrld	ymm8,ymm1,30
+	vpslld	ymm1,ymm1,2
+	add	esi,DWORD[((-88))+r13]
+	andn	edi,eax,ecx
+	add	esi,ebp
+	rorx	r12d,eax,27
+	rorx	ebp,eax,2
+	and	eax,ebx
+	vpor	ymm1,ymm1,ymm8
+	add	esi,r12d
+	xor	eax,edi
+	add	edx,DWORD[((-84))+r13]
+	andn	edi,esi,ebx
+	vpaddd	ymm9,ymm1,ymm11
+	add	edx,eax
+	rorx	r12d,esi,27
+	rorx	eax,esi,2
+	and	esi,ebp
+	vmovdqu	YMMWORD[288+rsp],ymm9
+	add	edx,r12d
+	xor	esi,edi
+	add	ecx,DWORD[((-64))+r13]
+	andn	edi,edx,ebp
+	add	ecx,esi
+	rorx	r12d,edx,27
+	rorx	esi,edx,2
+	and	edx,eax
+	add	ecx,r12d
+	xor	edx,edi
+	add	ebx,DWORD[((-60))+r13]
+	andn	edi,ecx,eax
+	add	ebx,edx
+	rorx	r12d,ecx,27
+	rorx	edx,ecx,2
+	and	ecx,esi
+	add	ebx,r12d
+	xor	ecx,edi
+	vpalignr	ymm8,ymm1,ymm0,8
+	vpxor	ymm2,ymm2,ymm6
+	add	ebp,DWORD[((-56))+r13]
+	andn	edi,ebx,esi
+	vpxor	ymm2,ymm2,ymm3
+	vmovdqu	ymm11,YMMWORD[r14]
+	add	ebp,ecx
+	rorx	r12d,ebx,27
+	rorx	ecx,ebx,2
+	vpxor	ymm2,ymm2,ymm8
+	and	ebx,edx
+	add	ebp,r12d
+	xor	ebx,edi
+	vpsrld	ymm8,ymm2,30
+	vpslld	ymm2,ymm2,2
+	add	eax,DWORD[((-52))+r13]
+	andn	edi,ebp,edx
+	add	eax,ebx
+	rorx	r12d,ebp,27
+	rorx	ebx,ebp,2
+	and	ebp,ecx
+	vpor	ymm2,ymm2,ymm8
+	add	eax,r12d
+	xor	ebp,edi
+	add	esi,DWORD[((-32))+r13]
+	andn	edi,eax,ecx
+	vpaddd	ymm9,ymm2,ymm11
+	add	esi,ebp
+	rorx	r12d,eax,27
+	rorx	ebp,eax,2
+	and	eax,ebx
+	vmovdqu	YMMWORD[320+rsp],ymm9
+	add	esi,r12d
+	xor	eax,edi
+	add	edx,DWORD[((-28))+r13]
+	andn	edi,esi,ebx
+	add	edx,eax
+	rorx	r12d,esi,27
+	rorx	eax,esi,2
+	and	esi,ebp
+	add	edx,r12d
+	xor	esi,edi
+	add	ecx,DWORD[((-24))+r13]
+	andn	edi,edx,ebp
+	add	ecx,esi
+	rorx	r12d,edx,27
+	rorx	esi,edx,2
+	and	edx,eax
+	add	ecx,r12d
+	xor	edx,edi
+	vpalignr	ymm8,ymm2,ymm1,8
+	vpxor	ymm3,ymm3,ymm7
+	add	ebx,DWORD[((-20))+r13]
+	andn	edi,ecx,eax
+	vpxor	ymm3,ymm3,ymm4
+	add	ebx,edx
+	rorx	r12d,ecx,27
+	rorx	edx,ecx,2
+	vpxor	ymm3,ymm3,ymm8
+	and	ecx,esi
+	add	ebx,r12d
+	xor	ecx,edi
+	vpsrld	ymm8,ymm3,30
+	vpslld	ymm3,ymm3,2
+	add	ebp,DWORD[r13]
+	andn	edi,ebx,esi
+	add	ebp,ecx
+	rorx	r12d,ebx,27
+	rorx	ecx,ebx,2
+	and	ebx,edx
+	vpor	ymm3,ymm3,ymm8
+	add	ebp,r12d
+	xor	ebx,edi
+	add	eax,DWORD[4+r13]
+	andn	edi,ebp,edx
+	vpaddd	ymm9,ymm3,ymm11
+	add	eax,ebx
+	rorx	r12d,ebp,27
+	rorx	ebx,ebp,2
+	and	ebp,ecx
+	vmovdqu	YMMWORD[352+rsp],ymm9
+	add	eax,r12d
+	xor	ebp,edi
+	add	esi,DWORD[8+r13]
+	andn	edi,eax,ecx
+	add	esi,ebp
+	rorx	r12d,eax,27
+	rorx	ebp,eax,2
+	and	eax,ebx
+	add	esi,r12d
+	xor	eax,edi
+	add	edx,DWORD[12+r13]
+	lea	edx,[rax*1+rdx]
+	rorx	r12d,esi,27
+	rorx	eax,esi,2
+	xor	esi,ebp
+	add	edx,r12d
+	xor	esi,ebx
+	vpalignr	ymm8,ymm3,ymm2,8
+	vpxor	ymm4,ymm4,ymm0
+	add	ecx,DWORD[32+r13]
+	lea	ecx,[rsi*1+rcx]
+	vpxor	ymm4,ymm4,ymm5
+	rorx	r12d,edx,27
+	rorx	esi,edx,2
+	xor	edx,eax
+	vpxor	ymm4,ymm4,ymm8
+	add	ecx,r12d
+	xor	edx,ebp
+	add	ebx,DWORD[36+r13]
+	vpsrld	ymm8,ymm4,30
+	vpslld	ymm4,ymm4,2
+	lea	ebx,[rdx*1+rbx]
+	rorx	r12d,ecx,27
+	rorx	edx,ecx,2
+	xor	ecx,esi
+	add	ebx,r12d
+	xor	ecx,eax
+	vpor	ymm4,ymm4,ymm8
+	add	ebp,DWORD[40+r13]
+	lea	ebp,[rbp*1+rcx]
+	rorx	r12d,ebx,27
+	rorx	ecx,ebx,2
+	vpaddd	ymm9,ymm4,ymm11
+	xor	ebx,edx
+	add	ebp,r12d
+	xor	ebx,esi
+	add	eax,DWORD[44+r13]
+	vmovdqu	YMMWORD[384+rsp],ymm9
+	lea	eax,[rbx*1+rax]
+	rorx	r12d,ebp,27
+	rorx	ebx,ebp,2
+	xor	ebp,ecx
+	add	eax,r12d
+	xor	ebp,edx
+	add	esi,DWORD[64+r13]
+	lea	esi,[rbp*1+rsi]
+	rorx	r12d,eax,27
+	rorx	ebp,eax,2
+	xor	eax,ebx
+	add	esi,r12d
+	xor	eax,ecx
+	vpalignr	ymm8,ymm4,ymm3,8
+	vpxor	ymm5,ymm5,ymm1
+	add	edx,DWORD[68+r13]
+	lea	edx,[rax*1+rdx]
+	vpxor	ymm5,ymm5,ymm6
+	rorx	r12d,esi,27
+	rorx	eax,esi,2
+	xor	esi,ebp
+	vpxor	ymm5,ymm5,ymm8
+	add	edx,r12d
+	xor	esi,ebx
+	add	ecx,DWORD[72+r13]
+	vpsrld	ymm8,ymm5,30
+	vpslld	ymm5,ymm5,2
+	lea	ecx,[rsi*1+rcx]
+	rorx	r12d,edx,27
+	rorx	esi,edx,2
+	xor	edx,eax
+	add	ecx,r12d
+	xor	edx,ebp
+	vpor	ymm5,ymm5,ymm8
+	add	ebx,DWORD[76+r13]
+	lea	ebx,[rdx*1+rbx]
+	rorx	r12d,ecx,27
+	rorx	edx,ecx,2
+	vpaddd	ymm9,ymm5,ymm11
+	xor	ecx,esi
+	add	ebx,r12d
+	xor	ecx,eax
+	add	ebp,DWORD[96+r13]
+	vmovdqu	YMMWORD[416+rsp],ymm9
+	lea	ebp,[rbp*1+rcx]
+	rorx	r12d,ebx,27
+	rorx	ecx,ebx,2
+	xor	ebx,edx
+	add	ebp,r12d
+	xor	ebx,esi
+	add	eax,DWORD[100+r13]
+	lea	eax,[rbx*1+rax]
+	rorx	r12d,ebp,27
+	rorx	ebx,ebp,2
+	xor	ebp,ecx
+	add	eax,r12d
+	xor	ebp,edx
+	vpalignr	ymm8,ymm5,ymm4,8
+	vpxor	ymm6,ymm6,ymm2
+	add	esi,DWORD[104+r13]
+	lea	esi,[rbp*1+rsi]
+	vpxor	ymm6,ymm6,ymm7
+	rorx	r12d,eax,27
+	rorx	ebp,eax,2
+	xor	eax,ebx
+	vpxor	ymm6,ymm6,ymm8
+	add	esi,r12d
+	xor	eax,ecx
+	add	edx,DWORD[108+r13]
+	lea	r13,[256+r13]
+	vpsrld	ymm8,ymm6,30
+	vpslld	ymm6,ymm6,2
+	lea	edx,[rax*1+rdx]
+	rorx	r12d,esi,27
+	rorx	eax,esi,2
+	xor	esi,ebp
+	add	edx,r12d
+	xor	esi,ebx
+	vpor	ymm6,ymm6,ymm8
+	add	ecx,DWORD[((-128))+r13]
+	lea	ecx,[rsi*1+rcx]
+	rorx	r12d,edx,27
+	rorx	esi,edx,2
+	vpaddd	ymm9,ymm6,ymm11
+	xor	edx,eax
+	add	ecx,r12d
+	xor	edx,ebp
+	add	ebx,DWORD[((-124))+r13]
+	vmovdqu	YMMWORD[448+rsp],ymm9
+	lea	ebx,[rdx*1+rbx]
+	rorx	r12d,ecx,27
+	rorx	edx,ecx,2
+	xor	ecx,esi
+	add	ebx,r12d
+	xor	ecx,eax
+	add	ebp,DWORD[((-120))+r13]
+	lea	ebp,[rbp*1+rcx]
+	rorx	r12d,ebx,27
+	rorx	ecx,ebx,2
+	xor	ebx,edx
+	add	ebp,r12d
+	xor	ebx,esi
+	vpalignr	ymm8,ymm6,ymm5,8
+	vpxor	ymm7,ymm7,ymm3
+	add	eax,DWORD[((-116))+r13]
+	lea	eax,[rbx*1+rax]
+	vpxor	ymm7,ymm7,ymm0
+	vmovdqu	ymm11,YMMWORD[32+r14]
+	rorx	r12d,ebp,27
+	rorx	ebx,ebp,2
+	xor	ebp,ecx
+	vpxor	ymm7,ymm7,ymm8
+	add	eax,r12d
+	xor	ebp,edx
+	add	esi,DWORD[((-96))+r13]
+	vpsrld	ymm8,ymm7,30
+	vpslld	ymm7,ymm7,2
+	lea	esi,[rbp*1+rsi]
+	rorx	r12d,eax,27
+	rorx	ebp,eax,2
+	xor	eax,ebx
+	add	esi,r12d
+	xor	eax,ecx
+	vpor	ymm7,ymm7,ymm8
+	add	edx,DWORD[((-92))+r13]
+	lea	edx,[rax*1+rdx]
+	rorx	r12d,esi,27
+	rorx	eax,esi,2
+	vpaddd	ymm9,ymm7,ymm11
+	xor	esi,ebp
+	add	edx,r12d
+	xor	esi,ebx
+	add	ecx,DWORD[((-88))+r13]
+	vmovdqu	YMMWORD[480+rsp],ymm9
+	lea	ecx,[rsi*1+rcx]
+	rorx	r12d,edx,27
+	rorx	esi,edx,2
+	xor	edx,eax
+	add	ecx,r12d
+	xor	edx,ebp
+	add	ebx,DWORD[((-84))+r13]
+	mov	edi,esi
+	xor	edi,eax
+	lea	ebx,[rdx*1+rbx]
+	rorx	r12d,ecx,27
+	rorx	edx,ecx,2
+	xor	ecx,esi
+	add	ebx,r12d
+	and	ecx,edi
+	jmp	NEAR $L$align32_2
+ALIGN	32
+$L$align32_2:
+	vpalignr	ymm8,ymm7,ymm6,8
+	vpxor	ymm0,ymm0,ymm4
+	add	ebp,DWORD[((-64))+r13]
+	xor	ecx,esi
+	vpxor	ymm0,ymm0,ymm1
+	mov	edi,edx
+	xor	edi,esi
+	lea	ebp,[rbp*1+rcx]
+	vpxor	ymm0,ymm0,ymm8
+	rorx	r12d,ebx,27
+	rorx	ecx,ebx,2
+	xor	ebx,edx
+	vpsrld	ymm8,ymm0,30
+	vpslld	ymm0,ymm0,2
+	add	ebp,r12d
+	and	ebx,edi
+	add	eax,DWORD[((-60))+r13]
+	xor	ebx,edx
+	mov	edi,ecx
+	xor	edi,edx
+	vpor	ymm0,ymm0,ymm8
+	lea	eax,[rbx*1+rax]
+	rorx	r12d,ebp,27
+	rorx	ebx,ebp,2
+	xor	ebp,ecx
+	vpaddd	ymm9,ymm0,ymm11
+	add	eax,r12d
+	and	ebp,edi
+	add	esi,DWORD[((-56))+r13]
+	xor	ebp,ecx
+	vmovdqu	YMMWORD[512+rsp],ymm9
+	mov	edi,ebx
+	xor	edi,ecx
+	lea	esi,[rbp*1+rsi]
+	rorx	r12d,eax,27
+	rorx	ebp,eax,2
+	xor	eax,ebx
+	add	esi,r12d
+	and	eax,edi
+	add	edx,DWORD[((-52))+r13]
+	xor	eax,ebx
+	mov	edi,ebp
+	xor	edi,ebx
+	lea	edx,[rax*1+rdx]
+	rorx	r12d,esi,27
+	rorx	eax,esi,2
+	xor	esi,ebp
+	add	edx,r12d
+	and	esi,edi
+	add	ecx,DWORD[((-32))+r13]
+	xor	esi,ebp
+	mov	edi,eax
+	xor	edi,ebp
+	lea	ecx,[rsi*1+rcx]
+	rorx	r12d,edx,27
+	rorx	esi,edx,2
+	xor	edx,eax
+	add	ecx,r12d
+	and	edx,edi
+	vpalignr	ymm8,ymm0,ymm7,8
+	vpxor	ymm1,ymm1,ymm5
+	add	ebx,DWORD[((-28))+r13]
+	xor	edx,eax
+	vpxor	ymm1,ymm1,ymm2
+	mov	edi,esi
+	xor	edi,eax
+	lea	ebx,[rdx*1+rbx]
+	vpxor	ymm1,ymm1,ymm8
+	rorx	r12d,ecx,27
+	rorx	edx,ecx,2
+	xor	ecx,esi
+	vpsrld	ymm8,ymm1,30
+	vpslld	ymm1,ymm1,2
+	add	ebx,r12d
+	and	ecx,edi
+	add	ebp,DWORD[((-24))+r13]
+	xor	ecx,esi
+	mov	edi,edx
+	xor	edi,esi
+	vpor	ymm1,ymm1,ymm8
+	lea	ebp,[rbp*1+rcx]
+	rorx	r12d,ebx,27
+	rorx	ecx,ebx,2
+	xor	ebx,edx
+	vpaddd	ymm9,ymm1,ymm11
+	add	ebp,r12d
+	and	ebx,edi
+	add	eax,DWORD[((-20))+r13]
+	xor	ebx,edx
+	vmovdqu	YMMWORD[544+rsp],ymm9
+	mov	edi,ecx
+	xor	edi,edx
+	lea	eax,[rbx*1+rax]
+	rorx	r12d,ebp,27
+	rorx	ebx,ebp,2
+	xor	ebp,ecx
+	add	eax,r12d
+	and	ebp,edi
+	add	esi,DWORD[r13]
+	xor	ebp,ecx
+	mov	edi,ebx
+	xor	edi,ecx
+	lea	esi,[rbp*1+rsi]
+	rorx	r12d,eax,27
+	rorx	ebp,eax,2
+	xor	eax,ebx
+	add	esi,r12d
+	and	eax,edi
+	add	edx,DWORD[4+r13]
+	xor	eax,ebx
+	mov	edi,ebp
+	xor	edi,ebx
+	lea	edx,[rax*1+rdx]
+	rorx	r12d,esi,27
+	rorx	eax,esi,2
+	xor	esi,ebp
+	add	edx,r12d
+	and	esi,edi
+	vpalignr	ymm8,ymm1,ymm0,8
+	vpxor	ymm2,ymm2,ymm6
+	add	ecx,DWORD[8+r13]
+	xor	esi,ebp
+	vpxor	ymm2,ymm2,ymm3
+	mov	edi,eax
+	xor	edi,ebp
+	lea	ecx,[rsi*1+rcx]
+	vpxor	ymm2,ymm2,ymm8
+	rorx	r12d,edx,27
+	rorx	esi,edx,2
+	xor	edx,eax
+	vpsrld	ymm8,ymm2,30
+	vpslld	ymm2,ymm2,2
+	add	ecx,r12d
+	and	edx,edi
+	add	ebx,DWORD[12+r13]
+	xor	edx,eax
+	mov	edi,esi
+	xor	edi,eax
+	vpor	ymm2,ymm2,ymm8
+	lea	ebx,[rdx*1+rbx]
+	rorx	r12d,ecx,27
+	rorx	edx,ecx,2
+	xor	ecx,esi
+	vpaddd	ymm9,ymm2,ymm11
+	add	ebx,r12d
+	and	ecx,edi
+	add	ebp,DWORD[32+r13]
+	xor	ecx,esi
+	vmovdqu	YMMWORD[576+rsp],ymm9
+	mov	edi,edx
+	xor	edi,esi
+	lea	ebp,[rbp*1+rcx]
+	rorx	r12d,ebx,27
+	rorx	ecx,ebx,2
+	xor	ebx,edx
+	add	ebp,r12d
+	and	ebx,edi
+	add	eax,DWORD[36+r13]
+	xor	ebx,edx
+	mov	edi,ecx
+	xor	edi,edx
+	lea	eax,[rbx*1+rax]
+	rorx	r12d,ebp,27
+	rorx	ebx,ebp,2
+	xor	ebp,ecx
+	add	eax,r12d
+	and	ebp,edi
+	add	esi,DWORD[40+r13]
+	xor	ebp,ecx
+	mov	edi,ebx
+	xor	edi,ecx
+	lea	esi,[rbp*1+rsi]
+	rorx	r12d,eax,27
+	rorx	ebp,eax,2
+	xor	eax,ebx
+	add	esi,r12d
+	and	eax,edi
+	vpalignr	ymm8,ymm2,ymm1,8
+	vpxor	ymm3,ymm3,ymm7
+	add	edx,DWORD[44+r13]
+	xor	eax,ebx
+	vpxor	ymm3,ymm3,ymm4
+	mov	edi,ebp
+	xor	edi,ebx
+	lea	edx,[rax*1+rdx]
+	vpxor	ymm3,ymm3,ymm8
+	rorx	r12d,esi,27
+	rorx	eax,esi,2
+	xor	esi,ebp
+	vpsrld	ymm8,ymm3,30
+	vpslld	ymm3,ymm3,2
+	add	edx,r12d
+	and	esi,edi
+	add	ecx,DWORD[64+r13]
+	xor	esi,ebp
+	mov	edi,eax
+	xor	edi,ebp
+	vpor	ymm3,ymm3,ymm8
+	lea	ecx,[rsi*1+rcx]
+	rorx	r12d,edx,27
+	rorx	esi,edx,2
+	xor	edx,eax
+	vpaddd	ymm9,ymm3,ymm11
+	add	ecx,r12d
+	and	edx,edi
+	add	ebx,DWORD[68+r13]
+	xor	edx,eax
+	vmovdqu	YMMWORD[608+rsp],ymm9
+	mov	edi,esi
+	xor	edi,eax
+	lea	ebx,[rdx*1+rbx]
+	rorx	r12d,ecx,27
+	rorx	edx,ecx,2
+	xor	ecx,esi
+	add	ebx,r12d
+	and	ecx,edi
+	add	ebp,DWORD[72+r13]
+	xor	ecx,esi
+	mov	edi,edx
+	xor	edi,esi
+	lea	ebp,[rbp*1+rcx]
+	rorx	r12d,ebx,27
+	rorx	ecx,ebx,2
+	xor	ebx,edx
+	add	ebp,r12d
+	and	ebx,edi
+	add	eax,DWORD[76+r13]
+	xor	ebx,edx
+	lea	eax,[rbx*1+rax]
+	rorx	r12d,ebp,27
+	rorx	ebx,ebp,2
+	xor	ebp,ecx
+	add	eax,r12d
+	xor	ebp,edx
+	add	esi,DWORD[96+r13]
+	lea	esi,[rbp*1+rsi]
+	rorx	r12d,eax,27
+	rorx	ebp,eax,2
+	xor	eax,ebx
+	add	esi,r12d
+	xor	eax,ecx
+	add	edx,DWORD[100+r13]
+	lea	edx,[rax*1+rdx]
+	rorx	r12d,esi,27
+	rorx	eax,esi,2
+	xor	esi,ebp
+	add	edx,r12d
+	xor	esi,ebx
+	add	ecx,DWORD[104+r13]
+	lea	ecx,[rsi*1+rcx]
+	rorx	r12d,edx,27
+	rorx	esi,edx,2
+	xor	edx,eax
+	add	ecx,r12d
+	xor	edx,ebp
+	add	ebx,DWORD[108+r13]
+	lea	r13,[256+r13]
+	lea	ebx,[rdx*1+rbx]
+	rorx	r12d,ecx,27
+	rorx	edx,ecx,2
+	xor	ecx,esi
+	add	ebx,r12d
+	xor	ecx,eax
+	add	ebp,DWORD[((-128))+r13]
+	lea	ebp,[rbp*1+rcx]
+	rorx	r12d,ebx,27
+	rorx	ecx,ebx,2
+	xor	ebx,edx
+	add	ebp,r12d
+	xor	ebx,esi
+	add	eax,DWORD[((-124))+r13]
+	lea	eax,[rbx*1+rax]
+	rorx	r12d,ebp,27
+	rorx	ebx,ebp,2
+	xor	ebp,ecx
+	add	eax,r12d
+	xor	ebp,edx
+	add	esi,DWORD[((-120))+r13]
+	lea	esi,[rbp*1+rsi]
+	rorx	r12d,eax,27
+	rorx	ebp,eax,2
+	xor	eax,ebx
+	add	esi,r12d
+	xor	eax,ecx
+	add	edx,DWORD[((-116))+r13]
+	lea	edx,[rax*1+rdx]
+	rorx	r12d,esi,27
+	rorx	eax,esi,2
+	xor	esi,ebp
+	add	edx,r12d
+	xor	esi,ebx
+	add	ecx,DWORD[((-96))+r13]
+	lea	ecx,[rsi*1+rcx]
+	rorx	r12d,edx,27
+	rorx	esi,edx,2
+	xor	edx,eax
+	add	ecx,r12d
+	xor	edx,ebp
+	add	ebx,DWORD[((-92))+r13]
+	lea	ebx,[rdx*1+rbx]
+	rorx	r12d,ecx,27
+	rorx	edx,ecx,2
+	xor	ecx,esi
+	add	ebx,r12d
+	xor	ecx,eax
+	add	ebp,DWORD[((-88))+r13]
+	lea	ebp,[rbp*1+rcx]
+	rorx	r12d,ebx,27
+	rorx	ecx,ebx,2
+	xor	ebx,edx
+	add	ebp,r12d
+	xor	ebx,esi
+	add	eax,DWORD[((-84))+r13]
+	lea	eax,[rbx*1+rax]
+	rorx	r12d,ebp,27
+	rorx	ebx,ebp,2
+	xor	ebp,ecx
+	add	eax,r12d
+	xor	ebp,edx
+	add	esi,DWORD[((-64))+r13]
+	lea	esi,[rbp*1+rsi]
+	rorx	r12d,eax,27
+	rorx	ebp,eax,2
+	xor	eax,ebx
+	add	esi,r12d
+	xor	eax,ecx
+	add	edx,DWORD[((-60))+r13]
+	lea	edx,[rax*1+rdx]
+	rorx	r12d,esi,27
+	rorx	eax,esi,2
+	xor	esi,ebp
+	add	edx,r12d
+	xor	esi,ebx
+	add	ecx,DWORD[((-56))+r13]
+	lea	ecx,[rsi*1+rcx]
+	rorx	r12d,edx,27
+	rorx	esi,edx,2
+	xor	edx,eax
+	add	ecx,r12d
+	xor	edx,ebp
+	add	ebx,DWORD[((-52))+r13]
+	lea	ebx,[rdx*1+rbx]
+	rorx	r12d,ecx,27
+	rorx	edx,ecx,2
+	xor	ecx,esi
+	add	ebx,r12d
+	xor	ecx,eax
+	add	ebp,DWORD[((-32))+r13]
+	lea	ebp,[rbp*1+rcx]
+	rorx	r12d,ebx,27
+	rorx	ecx,ebx,2
+	xor	ebx,edx
+	add	ebp,r12d
+	xor	ebx,esi
+	add	eax,DWORD[((-28))+r13]
+	lea	eax,[rbx*1+rax]
+	rorx	r12d,ebp,27
+	rorx	ebx,ebp,2
+	xor	ebp,ecx
+	add	eax,r12d
+	xor	ebp,edx
+	add	esi,DWORD[((-24))+r13]
+	lea	esi,[rbp*1+rsi]
+	rorx	r12d,eax,27
+	rorx	ebp,eax,2
+	xor	eax,ebx
+	add	esi,r12d
+	xor	eax,ecx
+	add	edx,DWORD[((-20))+r13]
+	lea	edx,[rax*1+rdx]
+	rorx	r12d,esi,27
+	add	edx,r12d
+	lea	r13,[128+r9]
+	lea	rdi,[128+r9]
+	cmp	r13,r10
+	cmovae	r13,r9
+
+
+	add	edx,DWORD[r8]
+	add	esi,DWORD[4+r8]
+	add	ebp,DWORD[8+r8]
+	mov	DWORD[r8],edx
+	add	ebx,DWORD[12+r8]
+	mov	DWORD[4+r8],esi
+	mov	eax,edx
+	add	ecx,DWORD[16+r8]
+	mov	r12d,ebp
+	mov	DWORD[8+r8],ebp
+	mov	edx,ebx
+
+	mov	DWORD[12+r8],ebx
+	mov	ebp,esi
+	mov	DWORD[16+r8],ecx
+
+	mov	esi,ecx
+	mov	ecx,r12d
+
+
+	cmp	r9,r10
+	je	NEAR $L$done_avx2
+	vmovdqu	ymm6,YMMWORD[64+r14]
+	cmp	rdi,r10
+	ja	NEAR $L$ast_avx2
+
+	vmovdqu	xmm0,XMMWORD[((-64))+rdi]
+	vmovdqu	xmm1,XMMWORD[((-48))+rdi]
+	vmovdqu	xmm2,XMMWORD[((-32))+rdi]
+	vmovdqu	xmm3,XMMWORD[((-16))+rdi]
+	vinserti128	ymm0,ymm0,XMMWORD[r13],1
+	vinserti128	ymm1,ymm1,XMMWORD[16+r13],1
+	vinserti128	ymm2,ymm2,XMMWORD[32+r13],1
+	vinserti128	ymm3,ymm3,XMMWORD[48+r13],1
+	jmp	NEAR $L$ast_avx2
+
+ALIGN	32
+$L$ast_avx2:
+	lea	r13,[((128+16))+rsp]
+	rorx	ebx,ebp,2
+	andn	edi,ebp,edx
+	and	ebp,ecx
+	xor	ebp,edi
+	sub	r9,-128
+	add	esi,DWORD[((-128))+r13]
+	andn	edi,eax,ecx
+	add	esi,ebp
+	rorx	r12d,eax,27
+	rorx	ebp,eax,2
+	and	eax,ebx
+	add	esi,r12d
+	xor	eax,edi
+	add	edx,DWORD[((-124))+r13]
+	andn	edi,esi,ebx
+	add	edx,eax
+	rorx	r12d,esi,27
+	rorx	eax,esi,2
+	and	esi,ebp
+	add	edx,r12d
+	xor	esi,edi
+	add	ecx,DWORD[((-120))+r13]
+	andn	edi,edx,ebp
+	add	ecx,esi
+	rorx	r12d,edx,27
+	rorx	esi,edx,2
+	and	edx,eax
+	add	ecx,r12d
+	xor	edx,edi
+	add	ebx,DWORD[((-116))+r13]
+	andn	edi,ecx,eax
+	add	ebx,edx
+	rorx	r12d,ecx,27
+	rorx	edx,ecx,2
+	and	ecx,esi
+	add	ebx,r12d
+	xor	ecx,edi
+	add	ebp,DWORD[((-96))+r13]
+	andn	edi,ebx,esi
+	add	ebp,ecx
+	rorx	r12d,ebx,27
+	rorx	ecx,ebx,2
+	and	ebx,edx
+	add	ebp,r12d
+	xor	ebx,edi
+	add	eax,DWORD[((-92))+r13]
+	andn	edi,ebp,edx
+	add	eax,ebx
+	rorx	r12d,ebp,27
+	rorx	ebx,ebp,2
+	and	ebp,ecx
+	add	eax,r12d
+	xor	ebp,edi
+	add	esi,DWORD[((-88))+r13]
+	andn	edi,eax,ecx
+	add	esi,ebp
+	rorx	r12d,eax,27
+	rorx	ebp,eax,2
+	and	eax,ebx
+	add	esi,r12d
+	xor	eax,edi
+	add	edx,DWORD[((-84))+r13]
+	andn	edi,esi,ebx
+	add	edx,eax
+	rorx	r12d,esi,27
+	rorx	eax,esi,2
+	and	esi,ebp
+	add	edx,r12d
+	xor	esi,edi
+	add	ecx,DWORD[((-64))+r13]
+	andn	edi,edx,ebp
+	add	ecx,esi
+	rorx	r12d,edx,27
+	rorx	esi,edx,2
+	and	edx,eax
+	add	ecx,r12d
+	xor	edx,edi
+	add	ebx,DWORD[((-60))+r13]
+	andn	edi,ecx,eax
+	add	ebx,edx
+	rorx	r12d,ecx,27
+	rorx	edx,ecx,2
+	and	ecx,esi
+	add	ebx,r12d
+	xor	ecx,edi
+	add	ebp,DWORD[((-56))+r13]
+	andn	edi,ebx,esi
+	add	ebp,ecx
+	rorx	r12d,ebx,27
+	rorx	ecx,ebx,2
+	and	ebx,edx
+	add	ebp,r12d
+	xor	ebx,edi
+	add	eax,DWORD[((-52))+r13]
+	andn	edi,ebp,edx
+	add	eax,ebx
+	rorx	r12d,ebp,27
+	rorx	ebx,ebp,2
+	and	ebp,ecx
+	add	eax,r12d
+	xor	ebp,edi
+	add	esi,DWORD[((-32))+r13]
+	andn	edi,eax,ecx
+	add	esi,ebp
+	rorx	r12d,eax,27
+	rorx	ebp,eax,2
+	and	eax,ebx
+	add	esi,r12d
+	xor	eax,edi
+	add	edx,DWORD[((-28))+r13]
+	andn	edi,esi,ebx
+	add	edx,eax
+	rorx	r12d,esi,27
+	rorx	eax,esi,2
+	and	esi,ebp
+	add	edx,r12d
+	xor	esi,edi
+	add	ecx,DWORD[((-24))+r13]
+	andn	edi,edx,ebp
+	add	ecx,esi
+	rorx	r12d,edx,27
+	rorx	esi,edx,2
+	and	edx,eax
+	add	ecx,r12d
+	xor	edx,edi
+	add	ebx,DWORD[((-20))+r13]
+	andn	edi,ecx,eax
+	add	ebx,edx
+	rorx	r12d,ecx,27
+	rorx	edx,ecx,2
+	and	ecx,esi
+	add	ebx,r12d
+	xor	ecx,edi
+	add	ebp,DWORD[r13]
+	andn	edi,ebx,esi
+	add	ebp,ecx
+	rorx	r12d,ebx,27
+	rorx	ecx,ebx,2
+	and	ebx,edx
+	add	ebp,r12d
+	xor	ebx,edi
+	add	eax,DWORD[4+r13]
+	andn	edi,ebp,edx
+	add	eax,ebx
+	rorx	r12d,ebp,27
+	rorx	ebx,ebp,2
+	and	ebp,ecx
+	add	eax,r12d
+	xor	ebp,edi
+	add	esi,DWORD[8+r13]
+	andn	edi,eax,ecx
+	add	esi,ebp
+	rorx	r12d,eax,27
+	rorx	ebp,eax,2
+	and	eax,ebx
+	add	esi,r12d
+	xor	eax,edi
+	add	edx,DWORD[12+r13]
+	lea	edx,[rax*1+rdx]
+	rorx	r12d,esi,27
+	rorx	eax,esi,2
+	xor	esi,ebp
+	add	edx,r12d
+	xor	esi,ebx
+	add	ecx,DWORD[32+r13]
+	lea	ecx,[rsi*1+rcx]
+	rorx	r12d,edx,27
+	rorx	esi,edx,2
+	xor	edx,eax
+	add	ecx,r12d
+	xor	edx,ebp
+	add	ebx,DWORD[36+r13]
+	lea	ebx,[rdx*1+rbx]
+	rorx	r12d,ecx,27
+	rorx	edx,ecx,2
+	xor	ecx,esi
+	add	ebx,r12d
+	xor	ecx,eax
+	add	ebp,DWORD[40+r13]
+	lea	ebp,[rbp*1+rcx]
+	rorx	r12d,ebx,27
+	rorx	ecx,ebx,2
+	xor	ebx,edx
+	add	ebp,r12d
+	xor	ebx,esi
+	add	eax,DWORD[44+r13]
+	lea	eax,[rbx*1+rax]
+	rorx	r12d,ebp,27
+	rorx	ebx,ebp,2
+	xor	ebp,ecx
+	add	eax,r12d
+	xor	ebp,edx
+	add	esi,DWORD[64+r13]
+	lea	esi,[rbp*1+rsi]
+	rorx	r12d,eax,27
+	rorx	ebp,eax,2
+	xor	eax,ebx
+	add	esi,r12d
+	xor	eax,ecx
+	vmovdqu	ymm11,YMMWORD[((-64))+r14]
+	vpshufb	ymm0,ymm0,ymm6
+	add	edx,DWORD[68+r13]
+	lea	edx,[rax*1+rdx]
+	rorx	r12d,esi,27
+	rorx	eax,esi,2
+	xor	esi,ebp
+	add	edx,r12d
+	xor	esi,ebx
+	add	ecx,DWORD[72+r13]
+	lea	ecx,[rsi*1+rcx]
+	rorx	r12d,edx,27
+	rorx	esi,edx,2
+	xor	edx,eax
+	add	ecx,r12d
+	xor	edx,ebp
+	add	ebx,DWORD[76+r13]
+	lea	ebx,[rdx*1+rbx]
+	rorx	r12d,ecx,27
+	rorx	edx,ecx,2
+	xor	ecx,esi
+	add	ebx,r12d
+	xor	ecx,eax
+	add	ebp,DWORD[96+r13]
+	lea	ebp,[rbp*1+rcx]
+	rorx	r12d,ebx,27
+	rorx	ecx,ebx,2
+	xor	ebx,edx
+	add	ebp,r12d
+	xor	ebx,esi
+	add	eax,DWORD[100+r13]
+	lea	eax,[rbx*1+rax]
+	rorx	r12d,ebp,27
+	rorx	ebx,ebp,2
+	xor	ebp,ecx
+	add	eax,r12d
+	xor	ebp,edx
+	vpshufb	ymm1,ymm1,ymm6
+	vpaddd	ymm8,ymm0,ymm11
+	add	esi,DWORD[104+r13]
+	lea	esi,[rbp*1+rsi]
+	rorx	r12d,eax,27
+	rorx	ebp,eax,2
+	xor	eax,ebx
+	add	esi,r12d
+	xor	eax,ecx
+	add	edx,DWORD[108+r13]
+	lea	r13,[256+r13]
+	lea	edx,[rax*1+rdx]
+	rorx	r12d,esi,27
+	rorx	eax,esi,2
+	xor	esi,ebp
+	add	edx,r12d
+	xor	esi,ebx
+	add	ecx,DWORD[((-128))+r13]
+	lea	ecx,[rsi*1+rcx]
+	rorx	r12d,edx,27
+	rorx	esi,edx,2
+	xor	edx,eax
+	add	ecx,r12d
+	xor	edx,ebp
+	add	ebx,DWORD[((-124))+r13]
+	lea	ebx,[rdx*1+rbx]
+	rorx	r12d,ecx,27
+	rorx	edx,ecx,2
+	xor	ecx,esi
+	add	ebx,r12d
+	xor	ecx,eax
+	add	ebp,DWORD[((-120))+r13]
+	lea	ebp,[rbp*1+rcx]
+	rorx	r12d,ebx,27
+	rorx	ecx,ebx,2
+	xor	ebx,edx
+	add	ebp,r12d
+	xor	ebx,esi
+	vmovdqu	YMMWORD[rsp],ymm8
+	vpshufb	ymm2,ymm2,ymm6
+	vpaddd	ymm9,ymm1,ymm11
+	add	eax,DWORD[((-116))+r13]
+	lea	eax,[rbx*1+rax]
+	rorx	r12d,ebp,27
+	rorx	ebx,ebp,2
+	xor	ebp,ecx
+	add	eax,r12d
+	xor	ebp,edx
+	add	esi,DWORD[((-96))+r13]
+	lea	esi,[rbp*1+rsi]
+	rorx	r12d,eax,27
+	rorx	ebp,eax,2
+	xor	eax,ebx
+	add	esi,r12d
+	xor	eax,ecx
+	add	edx,DWORD[((-92))+r13]
+	lea	edx,[rax*1+rdx]
+	rorx	r12d,esi,27
+	rorx	eax,esi,2
+	xor	esi,ebp
+	add	edx,r12d
+	xor	esi,ebx
+	add	ecx,DWORD[((-88))+r13]
+	lea	ecx,[rsi*1+rcx]
+	rorx	r12d,edx,27
+	rorx	esi,edx,2
+	xor	edx,eax
+	add	ecx,r12d
+	xor	edx,ebp
+	add	ebx,DWORD[((-84))+r13]
+	mov	edi,esi
+	xor	edi,eax
+	lea	ebx,[rdx*1+rbx]
+	rorx	r12d,ecx,27
+	rorx	edx,ecx,2
+	xor	ecx,esi
+	add	ebx,r12d
+	and	ecx,edi
+	vmovdqu	YMMWORD[32+rsp],ymm9
+	vpshufb	ymm3,ymm3,ymm6
+	vpaddd	ymm6,ymm2,ymm11
+	add	ebp,DWORD[((-64))+r13]
+	xor	ecx,esi
+	mov	edi,edx
+	xor	edi,esi
+	lea	ebp,[rbp*1+rcx]
+	rorx	r12d,ebx,27
+	rorx	ecx,ebx,2
+	xor	ebx,edx
+	add	ebp,r12d
+	and	ebx,edi
+	add	eax,DWORD[((-60))+r13]
+	xor	ebx,edx
+	mov	edi,ecx
+	xor	edi,edx
+	lea	eax,[rbx*1+rax]
+	rorx	r12d,ebp,27
+	rorx	ebx,ebp,2
+	xor	ebp,ecx
+	add	eax,r12d
+	and	ebp,edi
+	add	esi,DWORD[((-56))+r13]
+	xor	ebp,ecx
+	mov	edi,ebx
+	xor	edi,ecx
+	lea	esi,[rbp*1+rsi]
+	rorx	r12d,eax,27
+	rorx	ebp,eax,2
+	xor	eax,ebx
+	add	esi,r12d
+	and	eax,edi
+	add	edx,DWORD[((-52))+r13]
+	xor	eax,ebx
+	mov	edi,ebp
+	xor	edi,ebx
+	lea	edx,[rax*1+rdx]
+	rorx	r12d,esi,27
+	rorx	eax,esi,2
+	xor	esi,ebp
+	add	edx,r12d
+	and	esi,edi
+	add	ecx,DWORD[((-32))+r13]
+	xor	esi,ebp
+	mov	edi,eax
+	xor	edi,ebp
+	lea	ecx,[rsi*1+rcx]
+	rorx	r12d,edx,27
+	rorx	esi,edx,2
+	xor	edx,eax
+	add	ecx,r12d
+	and	edx,edi
+	jmp	NEAR $L$align32_3
+ALIGN	32
+$L$align32_3:
+	vmovdqu	YMMWORD[64+rsp],ymm6
+	vpaddd	ymm7,ymm3,ymm11
+	add	ebx,DWORD[((-28))+r13]
+	xor	edx,eax
+	mov	edi,esi
+	xor	edi,eax
+	lea	ebx,[rdx*1+rbx]
+	rorx	r12d,ecx,27
+	rorx	edx,ecx,2
+	xor	ecx,esi
+	add	ebx,r12d
+	and	ecx,edi
+	add	ebp,DWORD[((-24))+r13]
+	xor	ecx,esi
+	mov	edi,edx
+	xor	edi,esi
+	lea	ebp,[rbp*1+rcx]
+	rorx	r12d,ebx,27
+	rorx	ecx,ebx,2
+	xor	ebx,edx
+	add	ebp,r12d
+	and	ebx,edi
+	add	eax,DWORD[((-20))+r13]
+	xor	ebx,edx
+	mov	edi,ecx
+	xor	edi,edx
+	lea	eax,[rbx*1+rax]
+	rorx	r12d,ebp,27
+	rorx	ebx,ebp,2
+	xor	ebp,ecx
+	add	eax,r12d
+	and	ebp,edi
+	add	esi,DWORD[r13]
+	xor	ebp,ecx
+	mov	edi,ebx
+	xor	edi,ecx
+	lea	esi,[rbp*1+rsi]
+	rorx	r12d,eax,27
+	rorx	ebp,eax,2
+	xor	eax,ebx
+	add	esi,r12d
+	and	eax,edi
+	add	edx,DWORD[4+r13]
+	xor	eax,ebx
+	mov	edi,ebp
+	xor	edi,ebx
+	lea	edx,[rax*1+rdx]
+	rorx	r12d,esi,27
+	rorx	eax,esi,2
+	xor	esi,ebp
+	add	edx,r12d
+	and	esi,edi
+	vmovdqu	YMMWORD[96+rsp],ymm7
+	add	ecx,DWORD[8+r13]
+	xor	esi,ebp
+	mov	edi,eax
+	xor	edi,ebp
+	lea	ecx,[rsi*1+rcx]
+	rorx	r12d,edx,27
+	rorx	esi,edx,2
+	xor	edx,eax
+	add	ecx,r12d
+	and	edx,edi
+	add	ebx,DWORD[12+r13]
+	xor	edx,eax
+	mov	edi,esi
+	xor	edi,eax
+	lea	ebx,[rdx*1+rbx]
+	rorx	r12d,ecx,27
+	rorx	edx,ecx,2
+	xor	ecx,esi
+	add	ebx,r12d
+	and	ecx,edi
+	add	ebp,DWORD[32+r13]
+	xor	ecx,esi
+	mov	edi,edx
+	xor	edi,esi
+	lea	ebp,[rbp*1+rcx]
+	rorx	r12d,ebx,27
+	rorx	ecx,ebx,2
+	xor	ebx,edx
+	add	ebp,r12d
+	and	ebx,edi
+	add	eax,DWORD[36+r13]
+	xor	ebx,edx
+	mov	edi,ecx
+	xor	edi,edx
+	lea	eax,[rbx*1+rax]
+	rorx	r12d,ebp,27
+	rorx	ebx,ebp,2
+	xor	ebp,ecx
+	add	eax,r12d
+	and	ebp,edi
+	add	esi,DWORD[40+r13]
+	xor	ebp,ecx
+	mov	edi,ebx
+	xor	edi,ecx
+	lea	esi,[rbp*1+rsi]
+	rorx	r12d,eax,27
+	rorx	ebp,eax,2
+	xor	eax,ebx
+	add	esi,r12d
+	and	eax,edi
+	vpalignr	ymm4,ymm1,ymm0,8
+	add	edx,DWORD[44+r13]
+	xor	eax,ebx
+	mov	edi,ebp
+	xor	edi,ebx
+	vpsrldq	ymm8,ymm3,4
+	lea	edx,[rax*1+rdx]
+	rorx	r12d,esi,27
+	rorx	eax,esi,2
+	vpxor	ymm4,ymm4,ymm0
+	vpxor	ymm8,ymm8,ymm2
+	xor	esi,ebp
+	add	edx,r12d
+	vpxor	ymm4,ymm4,ymm8
+	and	esi,edi
+	add	ecx,DWORD[64+r13]
+	xor	esi,ebp
+	mov	edi,eax
+	vpsrld	ymm8,ymm4,31
+	xor	edi,ebp
+	lea	ecx,[rsi*1+rcx]
+	rorx	r12d,edx,27
+	vpslldq	ymm10,ymm4,12
+	vpaddd	ymm4,ymm4,ymm4
+	rorx	esi,edx,2
+	xor	edx,eax
+	vpsrld	ymm9,ymm10,30
+	vpor	ymm4,ymm4,ymm8
+	add	ecx,r12d
+	and	edx,edi
+	vpslld	ymm10,ymm10,2
+	vpxor	ymm4,ymm4,ymm9
+	add	ebx,DWORD[68+r13]
+	xor	edx,eax
+	vpxor	ymm4,ymm4,ymm10
+	mov	edi,esi
+	xor	edi,eax
+	lea	ebx,[rdx*1+rbx]
+	vpaddd	ymm9,ymm4,ymm11
+	rorx	r12d,ecx,27
+	rorx	edx,ecx,2
+	xor	ecx,esi
+	vmovdqu	YMMWORD[128+rsp],ymm9
+	add	ebx,r12d
+	and	ecx,edi
+	add	ebp,DWORD[72+r13]
+	xor	ecx,esi
+	mov	edi,edx
+	xor	edi,esi
+	lea	ebp,[rbp*1+rcx]
+	rorx	r12d,ebx,27
+	rorx	ecx,ebx,2
+	xor	ebx,edx
+	add	ebp,r12d
+	and	ebx,edi
+	add	eax,DWORD[76+r13]
+	xor	ebx,edx
+	lea	eax,[rbx*1+rax]
+	rorx	r12d,ebp,27
+	rorx	ebx,ebp,2
+	xor	ebp,ecx
+	add	eax,r12d
+	xor	ebp,edx
+	vpalignr	ymm5,ymm2,ymm1,8
+	add	esi,DWORD[96+r13]
+	lea	esi,[rbp*1+rsi]
+	rorx	r12d,eax,27
+	rorx	ebp,eax,2
+	vpsrldq	ymm8,ymm4,4
+	xor	eax,ebx
+	add	esi,r12d
+	xor	eax,ecx
+	vpxor	ymm5,ymm5,ymm1
+	vpxor	ymm8,ymm8,ymm3
+	add	edx,DWORD[100+r13]
+	lea	edx,[rax*1+rdx]
+	vpxor	ymm5,ymm5,ymm8
+	rorx	r12d,esi,27
+	rorx	eax,esi,2
+	xor	esi,ebp
+	add	edx,r12d
+	vpsrld	ymm8,ymm5,31
+	vmovdqu	ymm11,YMMWORD[((-32))+r14]
+	xor	esi,ebx
+	add	ecx,DWORD[104+r13]
+	lea	ecx,[rsi*1+rcx]
+	vpslldq	ymm10,ymm5,12
+	vpaddd	ymm5,ymm5,ymm5
+	rorx	r12d,edx,27
+	rorx	esi,edx,2
+	vpsrld	ymm9,ymm10,30
+	vpor	ymm5,ymm5,ymm8
+	xor	edx,eax
+	add	ecx,r12d
+	vpslld	ymm10,ymm10,2
+	vpxor	ymm5,ymm5,ymm9
+	xor	edx,ebp
+	add	ebx,DWORD[108+r13]
+	lea	r13,[256+r13]
+	vpxor	ymm5,ymm5,ymm10
+	lea	ebx,[rdx*1+rbx]
+	rorx	r12d,ecx,27
+	rorx	edx,ecx,2
+	vpaddd	ymm9,ymm5,ymm11
+	xor	ecx,esi
+	add	ebx,r12d
+	xor	ecx,eax
+	vmovdqu	YMMWORD[160+rsp],ymm9
+	add	ebp,DWORD[((-128))+r13]
+	lea	ebp,[rbp*1+rcx]
+	rorx	r12d,ebx,27
+	rorx	ecx,ebx,2
+	xor	ebx,edx
+	add	ebp,r12d
+	xor	ebx,esi
+	vpalignr	ymm6,ymm3,ymm2,8
+	add	eax,DWORD[((-124))+r13]
+	lea	eax,[rbx*1+rax]
+	rorx	r12d,ebp,27
+	rorx	ebx,ebp,2
+	vpsrldq	ymm8,ymm5,4
+	xor	ebp,ecx
+	add	eax,r12d
+	xor	ebp,edx
+	vpxor	ymm6,ymm6,ymm2
+	vpxor	ymm8,ymm8,ymm4
+	add	esi,DWORD[((-120))+r13]
+	lea	esi,[rbp*1+rsi]
+	vpxor	ymm6,ymm6,ymm8
+	rorx	r12d,eax,27
+	rorx	ebp,eax,2
+	xor	eax,ebx
+	add	esi,r12d
+	vpsrld	ymm8,ymm6,31
+	xor	eax,ecx
+	add	edx,DWORD[((-116))+r13]
+	lea	edx,[rax*1+rdx]
+	vpslldq	ymm10,ymm6,12
+	vpaddd	ymm6,ymm6,ymm6
+	rorx	r12d,esi,27
+	rorx	eax,esi,2
+	vpsrld	ymm9,ymm10,30
+	vpor	ymm6,ymm6,ymm8
+	xor	esi,ebp
+	add	edx,r12d
+	vpslld	ymm10,ymm10,2
+	vpxor	ymm6,ymm6,ymm9
+	xor	esi,ebx
+	add	ecx,DWORD[((-96))+r13]
+	vpxor	ymm6,ymm6,ymm10
+	lea	ecx,[rsi*1+rcx]
+	rorx	r12d,edx,27
+	rorx	esi,edx,2
+	vpaddd	ymm9,ymm6,ymm11
+	xor	edx,eax
+	add	ecx,r12d
+	xor	edx,ebp
+	vmovdqu	YMMWORD[192+rsp],ymm9
+	add	ebx,DWORD[((-92))+r13]
+	lea	ebx,[rdx*1+rbx]
+	rorx	r12d,ecx,27
+	rorx	edx,ecx,2
+	xor	ecx,esi
+	add	ebx,r12d
+	xor	ecx,eax
+	vpalignr	ymm7,ymm4,ymm3,8
+	add	ebp,DWORD[((-88))+r13]
+	lea	ebp,[rbp*1+rcx]
+	rorx	r12d,ebx,27
+	rorx	ecx,ebx,2
+	vpsrldq	ymm8,ymm6,4
+	xor	ebx,edx
+	add	ebp,r12d
+	xor	ebx,esi
+	vpxor	ymm7,ymm7,ymm3
+	vpxor	ymm8,ymm8,ymm5
+	add	eax,DWORD[((-84))+r13]
+	lea	eax,[rbx*1+rax]
+	vpxor	ymm7,ymm7,ymm8
+	rorx	r12d,ebp,27
+	rorx	ebx,ebp,2
+	xor	ebp,ecx
+	add	eax,r12d
+	vpsrld	ymm8,ymm7,31
+	xor	ebp,edx
+	add	esi,DWORD[((-64))+r13]
+	lea	esi,[rbp*1+rsi]
+	vpslldq	ymm10,ymm7,12
+	vpaddd	ymm7,ymm7,ymm7
+	rorx	r12d,eax,27
+	rorx	ebp,eax,2
+	vpsrld	ymm9,ymm10,30
+	vpor	ymm7,ymm7,ymm8
+	xor	eax,ebx
+	add	esi,r12d
+	vpslld	ymm10,ymm10,2
+	vpxor	ymm7,ymm7,ymm9
+	xor	eax,ecx
+	add	edx,DWORD[((-60))+r13]
+	vpxor	ymm7,ymm7,ymm10
+	lea	edx,[rax*1+rdx]
+	rorx	r12d,esi,27
+	rorx	eax,esi,2
+	vpaddd	ymm9,ymm7,ymm11
+	xor	esi,ebp
+	add	edx,r12d
+	xor	esi,ebx
+	vmovdqu	YMMWORD[224+rsp],ymm9
+	add	ecx,DWORD[((-56))+r13]
+	lea	ecx,[rsi*1+rcx]
+	rorx	r12d,edx,27
+	rorx	esi,edx,2
+	xor	edx,eax
+	add	ecx,r12d
+	xor	edx,ebp
+	add	ebx,DWORD[((-52))+r13]
+	lea	ebx,[rdx*1+rbx]
+	rorx	r12d,ecx,27
+	rorx	edx,ecx,2
+	xor	ecx,esi
+	add	ebx,r12d
+	xor	ecx,eax
+	add	ebp,DWORD[((-32))+r13]
+	lea	ebp,[rbp*1+rcx]
+	rorx	r12d,ebx,27
+	rorx	ecx,ebx,2
+	xor	ebx,edx
+	add	ebp,r12d
+	xor	ebx,esi
+	add	eax,DWORD[((-28))+r13]
+	lea	eax,[rbx*1+rax]
+	rorx	r12d,ebp,27
+	rorx	ebx,ebp,2
+	xor	ebp,ecx
+	add	eax,r12d
+	xor	ebp,edx
+	add	esi,DWORD[((-24))+r13]
+	lea	esi,[rbp*1+rsi]
+	rorx	r12d,eax,27
+	rorx	ebp,eax,2
+	xor	eax,ebx
+	add	esi,r12d
+	xor	eax,ecx
+	add	edx,DWORD[((-20))+r13]
+	lea	edx,[rax*1+rdx]
+	rorx	r12d,esi,27
+	add	edx,r12d
+	lea	r13,[128+rsp]
+
+
+	add	edx,DWORD[r8]
+	add	esi,DWORD[4+r8]
+	add	ebp,DWORD[8+r8]
+	mov	DWORD[r8],edx
+	add	ebx,DWORD[12+r8]
+	mov	DWORD[4+r8],esi
+	mov	eax,edx
+	add	ecx,DWORD[16+r8]
+	mov	r12d,ebp
+	mov	DWORD[8+r8],ebp
+	mov	edx,ebx
+
+	mov	DWORD[12+r8],ebx
+	mov	ebp,esi
+	mov	DWORD[16+r8],ecx
+
+	mov	esi,ecx
+	mov	ecx,r12d
+
+
+	cmp	r9,r10
+	jbe	NEAR $L$oop_avx2
+
+$L$done_avx2:
+	vzeroupper
+	movaps	xmm6,XMMWORD[((-40-96))+r11]
+	movaps	xmm7,XMMWORD[((-40-80))+r11]
+	movaps	xmm8,XMMWORD[((-40-64))+r11]
+	movaps	xmm9,XMMWORD[((-40-48))+r11]
+	movaps	xmm10,XMMWORD[((-40-32))+r11]
+	movaps	xmm11,XMMWORD[((-40-16))+r11]
+	mov	r14,QWORD[((-40))+r11]
+
+	mov	r13,QWORD[((-32))+r11]
+
+	mov	r12,QWORD[((-24))+r11]
+
+	mov	rbp,QWORD[((-16))+r11]
+
+	mov	rbx,QWORD[((-8))+r11]
+
+	lea	rsp,[r11]
+
+$L$epilogue_avx2:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_sha1_block_data_order_avx2:
+ALIGN	64
+K_XX_XX:
+	DD	0x5a827999,0x5a827999,0x5a827999,0x5a827999
+	DD	0x5a827999,0x5a827999,0x5a827999,0x5a827999
+	DD	0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1
+	DD	0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1
+	DD	0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc
+	DD	0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc
+	DD	0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6
+	DD	0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6
+	DD	0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+	DD	0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+DB	0xf,0xe,0xd,0xc,0xb,0xa,0x9,0x8,0x7,0x6,0x5,0x4,0x3,0x2,0x1,0x0
+DB	83,72,65,49,32,98,108,111,99,107,32,116,114,97,110,115
+DB	102,111,114,109,32,102,111,114,32,120,56,54,95,54,52,44
+DB	32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60
+DB	97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114
+DB	103,62,0
+ALIGN	64
+EXTERN	__imp_RtlVirtualUnwind
+
+ALIGN	16
+se_handler:
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD[120+r8]
+	mov	rbx,QWORD[248+r8]
+
+	lea	r10,[$L$prologue]
+	cmp	rbx,r10
+	jb	NEAR $L$common_seh_tail
+
+	mov	rax,QWORD[152+r8]
+
+	lea	r10,[$L$epilogue]
+	cmp	rbx,r10
+	jae	NEAR $L$common_seh_tail
+
+	mov	rax,QWORD[64+rax]
+
+	mov	rbx,QWORD[((-8))+rax]
+	mov	rbp,QWORD[((-16))+rax]
+	mov	r12,QWORD[((-24))+rax]
+	mov	r13,QWORD[((-32))+rax]
+	mov	r14,QWORD[((-40))+rax]
+	mov	QWORD[144+r8],rbx
+	mov	QWORD[160+r8],rbp
+	mov	QWORD[216+r8],r12
+	mov	QWORD[224+r8],r13
+	mov	QWORD[232+r8],r14
+
+	jmp	NEAR $L$common_seh_tail
+
+
+ALIGN	16
+shaext_handler:
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD[120+r8]
+	mov	rbx,QWORD[248+r8]
+
+	lea	r10,[$L$prologue_shaext]
+	cmp	rbx,r10
+	jb	NEAR $L$common_seh_tail
+
+	lea	r10,[$L$epilogue_shaext]
+	cmp	rbx,r10
+	jae	NEAR $L$common_seh_tail
+
+	lea	rsi,[((-8-64))+rax]
+	lea	rdi,[512+r8]
+	mov	ecx,8
+	DD	0xa548f3fc
+
+	jmp	NEAR $L$common_seh_tail
+
+
+ALIGN	16
+ssse3_handler:
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD[120+r8]
+	mov	rbx,QWORD[248+r8]
+
+	mov	rsi,QWORD[8+r9]
+	mov	r11,QWORD[56+r9]
+
+	mov	r10d,DWORD[r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jb	NEAR $L$common_seh_tail
+
+	mov	rax,QWORD[208+r8]
+
+	mov	r10d,DWORD[4+r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jae	NEAR $L$common_seh_tail
+
+	lea	rsi,[((-40-96))+rax]
+	lea	rdi,[512+r8]
+	mov	ecx,12
+	DD	0xa548f3fc
+
+	mov	rbx,QWORD[((-8))+rax]
+	mov	rbp,QWORD[((-16))+rax]
+	mov	r12,QWORD[((-24))+rax]
+	mov	r13,QWORD[((-32))+rax]
+	mov	r14,QWORD[((-40))+rax]
+	mov	QWORD[144+r8],rbx
+	mov	QWORD[160+r8],rbp
+	mov	QWORD[216+r8],r12
+	mov	QWORD[224+r8],r13
+	mov	QWORD[232+r8],r14
+
+$L$common_seh_tail:
+	mov	rdi,QWORD[8+rax]
+	mov	rsi,QWORD[16+rax]
+	mov	QWORD[152+r8],rax
+	mov	QWORD[168+r8],rsi
+	mov	QWORD[176+r8],rdi
+
+	mov	rdi,QWORD[40+r9]
+	mov	rsi,r8
+	mov	ecx,154
+	DD	0xa548f3fc
+
+	mov	rsi,r9
+	xor	rcx,rcx
+	mov	rdx,QWORD[8+rsi]
+	mov	r8,QWORD[rsi]
+	mov	r9,QWORD[16+rsi]
+	mov	r10,QWORD[40+rsi]
+	lea	r11,[56+rsi]
+	lea	r12,[24+rsi]
+	mov	QWORD[32+rsp],r10
+	mov	QWORD[40+rsp],r11
+	mov	QWORD[48+rsp],r12
+	mov	QWORD[56+rsp],rcx
+	call	QWORD[__imp_RtlVirtualUnwind]
+
+	mov	eax,1
+	add	rsp,64
+	popfq
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rbp
+	pop	rbx
+	pop	rdi
+	pop	rsi
+	DB	0F3h,0C3h		;repret
+
+
+section	.pdata rdata align=4
+ALIGN	4
+	DD	$L$SEH_begin_sha1_block_data_order wrt ..imagebase
+	DD	$L$SEH_end_sha1_block_data_order wrt ..imagebase
+	DD	$L$SEH_info_sha1_block_data_order wrt ..imagebase
+	DD	$L$SEH_begin_sha1_block_data_order_shaext wrt ..imagebase
+	DD	$L$SEH_end_sha1_block_data_order_shaext wrt ..imagebase
+	DD	$L$SEH_info_sha1_block_data_order_shaext wrt ..imagebase
+	DD	$L$SEH_begin_sha1_block_data_order_ssse3 wrt ..imagebase
+	DD	$L$SEH_end_sha1_block_data_order_ssse3 wrt ..imagebase
+	DD	$L$SEH_info_sha1_block_data_order_ssse3 wrt ..imagebase
+	DD	$L$SEH_begin_sha1_block_data_order_avx wrt ..imagebase
+	DD	$L$SEH_end_sha1_block_data_order_avx wrt ..imagebase
+	DD	$L$SEH_info_sha1_block_data_order_avx wrt ..imagebase
+	DD	$L$SEH_begin_sha1_block_data_order_avx2 wrt ..imagebase
+	DD	$L$SEH_end_sha1_block_data_order_avx2 wrt ..imagebase
+	DD	$L$SEH_info_sha1_block_data_order_avx2 wrt ..imagebase
+section	.xdata rdata align=8
+ALIGN	8
+$L$SEH_info_sha1_block_data_order:
+DB	9,0,0,0
+	DD	se_handler wrt ..imagebase
+$L$SEH_info_sha1_block_data_order_shaext:
+DB	9,0,0,0
+	DD	shaext_handler wrt ..imagebase
+$L$SEH_info_sha1_block_data_order_ssse3:
+DB	9,0,0,0
+	DD	ssse3_handler wrt ..imagebase
+	DD	$L$prologue_ssse3 wrt ..imagebase,$L$epilogue_ssse3 wrt ..imagebase
+$L$SEH_info_sha1_block_data_order_avx:
+DB	9,0,0,0
+	DD	ssse3_handler wrt ..imagebase
+	DD	$L$prologue_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase
+$L$SEH_info_sha1_block_data_order_avx2:
+DB	9,0,0,0
+	DD	ssse3_handler wrt ..imagebase
+	DD	$L$prologue_avx2 wrt ..imagebase,$L$epilogue_avx2 wrt ..imagebase
diff --git a/deps/boringssl/win-x86_64/crypto/fipsmodule/sha256-x86_64.asm b/deps/boringssl/win-x86_64/crypto/fipsmodule/sha256-x86_64.asm
new file mode 100644
index 0000000..68c74cc
--- /dev/null
+++ b/deps/boringssl/win-x86_64/crypto/fipsmodule/sha256-x86_64.asm
@@ -0,0 +1,4142 @@
+; This file is generated from a similarly-named Perl script in the BoringSSL
+; source tree. Do not edit by hand.
+
+default	rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+
+%ifdef BORINGSSL_PREFIX
+%include "boringssl_prefix_symbols_nasm.inc"
+%endif
+section	.text code align=64
+
+
+EXTERN	OPENSSL_ia32cap_P
+global	sha256_block_data_order
+
+ALIGN	16
+sha256_block_data_order:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_sha256_block_data_order:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+
+	lea	r11,[OPENSSL_ia32cap_P]
+	mov	r9d,DWORD[r11]
+	mov	r10d,DWORD[4+r11]
+	mov	r11d,DWORD[8+r11]
+	and	r9d,1073741824
+	and	r10d,268435968
+	or	r10d,r9d
+	cmp	r10d,1342177792
+	je	NEAR $L$avx_shortcut
+	test	r10d,512
+	jnz	NEAR $L$ssse3_shortcut
+	mov	rax,rsp
+
+	push	rbx
+
+	push	rbp
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+	shl	rdx,4
+	sub	rsp,16*4+4*8
+	lea	rdx,[rdx*4+rsi]
+	and	rsp,-64
+	mov	QWORD[((64+0))+rsp],rdi
+	mov	QWORD[((64+8))+rsp],rsi
+	mov	QWORD[((64+16))+rsp],rdx
+	mov	QWORD[88+rsp],rax
+
+$L$prologue:
+
+	mov	eax,DWORD[rdi]
+	mov	ebx,DWORD[4+rdi]
+	mov	ecx,DWORD[8+rdi]
+	mov	edx,DWORD[12+rdi]
+	mov	r8d,DWORD[16+rdi]
+	mov	r9d,DWORD[20+rdi]
+	mov	r10d,DWORD[24+rdi]
+	mov	r11d,DWORD[28+rdi]
+	jmp	NEAR $L$loop
+
+ALIGN	16
+$L$loop:
+	mov	edi,ebx
+	lea	rbp,[K256]
+	xor	edi,ecx
+	mov	r12d,DWORD[rsi]
+	mov	r13d,r8d
+	mov	r14d,eax
+	bswap	r12d
+	ror	r13d,14
+	mov	r15d,r9d
+
+	xor	r13d,r8d
+	ror	r14d,9
+	xor	r15d,r10d
+
+	mov	DWORD[rsp],r12d
+	xor	r14d,eax
+	and	r15d,r8d
+
+	ror	r13d,5
+	add	r12d,r11d
+	xor	r15d,r10d
+
+	ror	r14d,11
+	xor	r13d,r8d
+	add	r12d,r15d
+
+	mov	r15d,eax
+	add	r12d,DWORD[rbp]
+	xor	r14d,eax
+
+	xor	r15d,ebx
+	ror	r13d,6
+	mov	r11d,ebx
+
+	and	edi,r15d
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	r11d,edi
+	add	edx,r12d
+	add	r11d,r12d
+
+	lea	rbp,[4+rbp]
+	add	r11d,r14d
+	mov	r12d,DWORD[4+rsi]
+	mov	r13d,edx
+	mov	r14d,r11d
+	bswap	r12d
+	ror	r13d,14
+	mov	edi,r8d
+
+	xor	r13d,edx
+	ror	r14d,9
+	xor	edi,r9d
+
+	mov	DWORD[4+rsp],r12d
+	xor	r14d,r11d
+	and	edi,edx
+
+	ror	r13d,5
+	add	r12d,r10d
+	xor	edi,r9d
+
+	ror	r14d,11
+	xor	r13d,edx
+	add	r12d,edi
+
+	mov	edi,r11d
+	add	r12d,DWORD[rbp]
+	xor	r14d,r11d
+
+	xor	edi,eax
+	ror	r13d,6
+	mov	r10d,eax
+
+	and	r15d,edi
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	r10d,r15d
+	add	ecx,r12d
+	add	r10d,r12d
+
+	lea	rbp,[4+rbp]
+	add	r10d,r14d
+	mov	r12d,DWORD[8+rsi]
+	mov	r13d,ecx
+	mov	r14d,r10d
+	bswap	r12d
+	ror	r13d,14
+	mov	r15d,edx
+
+	xor	r13d,ecx
+	ror	r14d,9
+	xor	r15d,r8d
+
+	mov	DWORD[8+rsp],r12d
+	xor	r14d,r10d
+	and	r15d,ecx
+
+	ror	r13d,5
+	add	r12d,r9d
+	xor	r15d,r8d
+
+	ror	r14d,11
+	xor	r13d,ecx
+	add	r12d,r15d
+
+	mov	r15d,r10d
+	add	r12d,DWORD[rbp]
+	xor	r14d,r10d
+
+	xor	r15d,r11d
+	ror	r13d,6
+	mov	r9d,r11d
+
+	and	edi,r15d
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	r9d,edi
+	add	ebx,r12d
+	add	r9d,r12d
+
+	lea	rbp,[4+rbp]
+	add	r9d,r14d
+	mov	r12d,DWORD[12+rsi]
+	mov	r13d,ebx
+	mov	r14d,r9d
+	bswap	r12d
+	ror	r13d,14
+	mov	edi,ecx
+
+	xor	r13d,ebx
+	ror	r14d,9
+	xor	edi,edx
+
+	mov	DWORD[12+rsp],r12d
+	xor	r14d,r9d
+	and	edi,ebx
+
+	ror	r13d,5
+	add	r12d,r8d
+	xor	edi,edx
+
+	ror	r14d,11
+	xor	r13d,ebx
+	add	r12d,edi
+
+	mov	edi,r9d
+	add	r12d,DWORD[rbp]
+	xor	r14d,r9d
+
+	xor	edi,r10d
+	ror	r13d,6
+	mov	r8d,r10d
+
+	and	r15d,edi
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	r8d,r15d
+	add	eax,r12d
+	add	r8d,r12d
+
+	lea	rbp,[20+rbp]
+	add	r8d,r14d
+	mov	r12d,DWORD[16+rsi]
+	mov	r13d,eax
+	mov	r14d,r8d
+	bswap	r12d
+	ror	r13d,14
+	mov	r15d,ebx
+
+	xor	r13d,eax
+	ror	r14d,9
+	xor	r15d,ecx
+
+	mov	DWORD[16+rsp],r12d
+	xor	r14d,r8d
+	and	r15d,eax
+
+	ror	r13d,5
+	add	r12d,edx
+	xor	r15d,ecx
+
+	ror	r14d,11
+	xor	r13d,eax
+	add	r12d,r15d
+
+	mov	r15d,r8d
+	add	r12d,DWORD[rbp]
+	xor	r14d,r8d
+
+	xor	r15d,r9d
+	ror	r13d,6
+	mov	edx,r9d
+
+	and	edi,r15d
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	edx,edi
+	add	r11d,r12d
+	add	edx,r12d
+
+	lea	rbp,[4+rbp]
+	add	edx,r14d
+	mov	r12d,DWORD[20+rsi]
+	mov	r13d,r11d
+	mov	r14d,edx
+	bswap	r12d
+	ror	r13d,14
+	mov	edi,eax
+
+	xor	r13d,r11d
+	ror	r14d,9
+	xor	edi,ebx
+
+	mov	DWORD[20+rsp],r12d
+	xor	r14d,edx
+	and	edi,r11d
+
+	ror	r13d,5
+	add	r12d,ecx
+	xor	edi,ebx
+
+	ror	r14d,11
+	xor	r13d,r11d
+	add	r12d,edi
+
+	mov	edi,edx
+	add	r12d,DWORD[rbp]
+	xor	r14d,edx
+
+	xor	edi,r8d
+	ror	r13d,6
+	mov	ecx,r8d
+
+	and	r15d,edi
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	ecx,r15d
+	add	r10d,r12d
+	add	ecx,r12d
+
+	lea	rbp,[4+rbp]
+	add	ecx,r14d
+	mov	r12d,DWORD[24+rsi]
+	mov	r13d,r10d
+	mov	r14d,ecx
+	bswap	r12d
+	ror	r13d,14
+	mov	r15d,r11d
+
+	xor	r13d,r10d
+	ror	r14d,9
+	xor	r15d,eax
+
+	mov	DWORD[24+rsp],r12d
+	xor	r14d,ecx
+	and	r15d,r10d
+
+	ror	r13d,5
+	add	r12d,ebx
+	xor	r15d,eax
+
+	ror	r14d,11
+	xor	r13d,r10d
+	add	r12d,r15d
+
+	mov	r15d,ecx
+	add	r12d,DWORD[rbp]
+	xor	r14d,ecx
+
+	xor	r15d,edx
+	ror	r13d,6
+	mov	ebx,edx
+
+	and	edi,r15d
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	ebx,edi
+	add	r9d,r12d
+	add	ebx,r12d
+
+	lea	rbp,[4+rbp]
+	add	ebx,r14d
+	mov	r12d,DWORD[28+rsi]
+	mov	r13d,r9d
+	mov	r14d,ebx
+	bswap	r12d
+	ror	r13d,14
+	mov	edi,r10d
+
+	xor	r13d,r9d
+	ror	r14d,9
+	xor	edi,r11d
+
+	mov	DWORD[28+rsp],r12d
+	xor	r14d,ebx
+	and	edi,r9d
+
+	ror	r13d,5
+	add	r12d,eax
+	xor	edi,r11d
+
+	ror	r14d,11
+	xor	r13d,r9d
+	add	r12d,edi
+
+	mov	edi,ebx
+	add	r12d,DWORD[rbp]
+	xor	r14d,ebx
+
+	xor	edi,ecx
+	ror	r13d,6
+	mov	eax,ecx
+
+	and	r15d,edi
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	eax,r15d
+	add	r8d,r12d
+	add	eax,r12d
+
+	lea	rbp,[20+rbp]
+	add	eax,r14d
+	mov	r12d,DWORD[32+rsi]
+	mov	r13d,r8d
+	mov	r14d,eax
+	bswap	r12d
+	ror	r13d,14
+	mov	r15d,r9d
+
+	xor	r13d,r8d
+	ror	r14d,9
+	xor	r15d,r10d
+
+	mov	DWORD[32+rsp],r12d
+	xor	r14d,eax
+	and	r15d,r8d
+
+	ror	r13d,5
+	add	r12d,r11d
+	xor	r15d,r10d
+
+	ror	r14d,11
+	xor	r13d,r8d
+	add	r12d,r15d
+
+	mov	r15d,eax
+	add	r12d,DWORD[rbp]
+	xor	r14d,eax
+
+	xor	r15d,ebx
+	ror	r13d,6
+	mov	r11d,ebx
+
+	and	edi,r15d
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	r11d,edi
+	add	edx,r12d
+	add	r11d,r12d
+
+	lea	rbp,[4+rbp]
+	add	r11d,r14d
+	mov	r12d,DWORD[36+rsi]
+	mov	r13d,edx
+	mov	r14d,r11d
+	bswap	r12d
+	ror	r13d,14
+	mov	edi,r8d
+
+	xor	r13d,edx
+	ror	r14d,9
+	xor	edi,r9d
+
+	mov	DWORD[36+rsp],r12d
+	xor	r14d,r11d
+	and	edi,edx
+
+	ror	r13d,5
+	add	r12d,r10d
+	xor	edi,r9d
+
+	ror	r14d,11
+	xor	r13d,edx
+	add	r12d,edi
+
+	mov	edi,r11d
+	add	r12d,DWORD[rbp]
+	xor	r14d,r11d
+
+	xor	edi,eax
+	ror	r13d,6
+	mov	r10d,eax
+
+	and	r15d,edi
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	r10d,r15d
+	add	ecx,r12d
+	add	r10d,r12d
+
+	lea	rbp,[4+rbp]
+	add	r10d,r14d
+	mov	r12d,DWORD[40+rsi]
+	mov	r13d,ecx
+	mov	r14d,r10d
+	bswap	r12d
+	ror	r13d,14
+	mov	r15d,edx
+
+	xor	r13d,ecx
+	ror	r14d,9
+	xor	r15d,r8d
+
+	mov	DWORD[40+rsp],r12d
+	xor	r14d,r10d
+	and	r15d,ecx
+
+	ror	r13d,5
+	add	r12d,r9d
+	xor	r15d,r8d
+
+	ror	r14d,11
+	xor	r13d,ecx
+	add	r12d,r15d
+
+	mov	r15d,r10d
+	add	r12d,DWORD[rbp]
+	xor	r14d,r10d
+
+	xor	r15d,r11d
+	ror	r13d,6
+	mov	r9d,r11d
+
+	and	edi,r15d
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	r9d,edi
+	add	ebx,r12d
+	add	r9d,r12d
+
+	lea	rbp,[4+rbp]
+	add	r9d,r14d
+	mov	r12d,DWORD[44+rsi]
+	mov	r13d,ebx
+	mov	r14d,r9d
+	bswap	r12d
+	ror	r13d,14
+	mov	edi,ecx
+
+	xor	r13d,ebx
+	ror	r14d,9
+	xor	edi,edx
+
+	mov	DWORD[44+rsp],r12d
+	xor	r14d,r9d
+	and	edi,ebx
+
+	ror	r13d,5
+	add	r12d,r8d
+	xor	edi,edx
+
+	ror	r14d,11
+	xor	r13d,ebx
+	add	r12d,edi
+
+	mov	edi,r9d
+	add	r12d,DWORD[rbp]
+	xor	r14d,r9d
+
+	xor	edi,r10d
+	ror	r13d,6
+	mov	r8d,r10d
+
+	and	r15d,edi
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	r8d,r15d
+	add	eax,r12d
+	add	r8d,r12d
+
+	lea	rbp,[20+rbp]
+	add	r8d,r14d
+	mov	r12d,DWORD[48+rsi]
+	mov	r13d,eax
+	mov	r14d,r8d
+	bswap	r12d
+	ror	r13d,14
+	mov	r15d,ebx
+
+	xor	r13d,eax
+	ror	r14d,9
+	xor	r15d,ecx
+
+	mov	DWORD[48+rsp],r12d
+	xor	r14d,r8d
+	and	r15d,eax
+
+	ror	r13d,5
+	add	r12d,edx
+	xor	r15d,ecx
+
+	ror	r14d,11
+	xor	r13d,eax
+	add	r12d,r15d
+
+	mov	r15d,r8d
+	add	r12d,DWORD[rbp]
+	xor	r14d,r8d
+
+	xor	r15d,r9d
+	ror	r13d,6
+	mov	edx,r9d
+
+	and	edi,r15d
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	edx,edi
+	add	r11d,r12d
+	add	edx,r12d
+
+	lea	rbp,[4+rbp]
+	add	edx,r14d
+	mov	r12d,DWORD[52+rsi]
+	mov	r13d,r11d
+	mov	r14d,edx
+	bswap	r12d
+	ror	r13d,14
+	mov	edi,eax
+
+	xor	r13d,r11d
+	ror	r14d,9
+	xor	edi,ebx
+
+	mov	DWORD[52+rsp],r12d
+	xor	r14d,edx
+	and	edi,r11d
+
+	ror	r13d,5
+	add	r12d,ecx
+	xor	edi,ebx
+
+	ror	r14d,11
+	xor	r13d,r11d
+	add	r12d,edi
+
+	mov	edi,edx
+	add	r12d,DWORD[rbp]
+	xor	r14d,edx
+
+	xor	edi,r8d
+	ror	r13d,6
+	mov	ecx,r8d
+
+	and	r15d,edi
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	ecx,r15d
+	add	r10d,r12d
+	add	ecx,r12d
+
+	lea	rbp,[4+rbp]
+	add	ecx,r14d
+	mov	r12d,DWORD[56+rsi]
+	mov	r13d,r10d
+	mov	r14d,ecx
+	bswap	r12d
+	ror	r13d,14
+	mov	r15d,r11d
+
+	xor	r13d,r10d
+	ror	r14d,9
+	xor	r15d,eax
+
+	mov	DWORD[56+rsp],r12d
+	xor	r14d,ecx
+	and	r15d,r10d
+
+	ror	r13d,5
+	add	r12d,ebx
+	xor	r15d,eax
+
+	ror	r14d,11
+	xor	r13d,r10d
+	add	r12d,r15d
+
+	mov	r15d,ecx
+	add	r12d,DWORD[rbp]
+	xor	r14d,ecx
+
+	xor	r15d,edx
+	ror	r13d,6
+	mov	ebx,edx
+
+	and	edi,r15d
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	ebx,edi
+	add	r9d,r12d
+	add	ebx,r12d
+
+	lea	rbp,[4+rbp]
+	add	ebx,r14d
+	mov	r12d,DWORD[60+rsi]
+	mov	r13d,r9d
+	mov	r14d,ebx
+	bswap	r12d
+	ror	r13d,14
+	mov	edi,r10d
+
+	xor	r13d,r9d
+	ror	r14d,9
+	xor	edi,r11d
+
+	mov	DWORD[60+rsp],r12d
+	xor	r14d,ebx
+	and	edi,r9d
+
+	ror	r13d,5
+	add	r12d,eax
+	xor	edi,r11d
+
+	ror	r14d,11
+	xor	r13d,r9d
+	add	r12d,edi
+
+	mov	edi,ebx
+	add	r12d,DWORD[rbp]
+	xor	r14d,ebx
+
+	xor	edi,ecx
+	ror	r13d,6
+	mov	eax,ecx
+
+	and	r15d,edi
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	eax,r15d
+	add	r8d,r12d
+	add	eax,r12d
+
+	lea	rbp,[20+rbp]
+	jmp	NEAR $L$rounds_16_xx
+ALIGN	16
+$L$rounds_16_xx:
+	mov	r13d,DWORD[4+rsp]
+	mov	r15d,DWORD[56+rsp]
+
+	mov	r12d,r13d
+	ror	r13d,11
+	add	eax,r14d
+	mov	r14d,r15d
+	ror	r15d,2
+
+	xor	r13d,r12d
+	shr	r12d,3
+	ror	r13d,7
+	xor	r15d,r14d
+	shr	r14d,10
+
+	ror	r15d,17
+	xor	r12d,r13d
+	xor	r15d,r14d
+	add	r12d,DWORD[36+rsp]
+
+	add	r12d,DWORD[rsp]
+	mov	r13d,r8d
+	add	r12d,r15d
+	mov	r14d,eax
+	ror	r13d,14
+	mov	r15d,r9d
+
+	xor	r13d,r8d
+	ror	r14d,9
+	xor	r15d,r10d
+
+	mov	DWORD[rsp],r12d
+	xor	r14d,eax
+	and	r15d,r8d
+
+	ror	r13d,5
+	add	r12d,r11d
+	xor	r15d,r10d
+
+	ror	r14d,11
+	xor	r13d,r8d
+	add	r12d,r15d
+
+	mov	r15d,eax
+	add	r12d,DWORD[rbp]
+	xor	r14d,eax
+
+	xor	r15d,ebx
+	ror	r13d,6
+	mov	r11d,ebx
+
+	and	edi,r15d
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	r11d,edi
+	add	edx,r12d
+	add	r11d,r12d
+
+	lea	rbp,[4+rbp]
+	mov	r13d,DWORD[8+rsp]
+	mov	edi,DWORD[60+rsp]
+
+	mov	r12d,r13d
+	ror	r13d,11
+	add	r11d,r14d
+	mov	r14d,edi
+	ror	edi,2
+
+	xor	r13d,r12d
+	shr	r12d,3
+	ror	r13d,7
+	xor	edi,r14d
+	shr	r14d,10
+
+	ror	edi,17
+	xor	r12d,r13d
+	xor	edi,r14d
+	add	r12d,DWORD[40+rsp]
+
+	add	r12d,DWORD[4+rsp]
+	mov	r13d,edx
+	add	r12d,edi
+	mov	r14d,r11d
+	ror	r13d,14
+	mov	edi,r8d
+
+	xor	r13d,edx
+	ror	r14d,9
+	xor	edi,r9d
+
+	mov	DWORD[4+rsp],r12d
+	xor	r14d,r11d
+	and	edi,edx
+
+	ror	r13d,5
+	add	r12d,r10d
+	xor	edi,r9d
+
+	ror	r14d,11
+	xor	r13d,edx
+	add	r12d,edi
+
+	mov	edi,r11d
+	add	r12d,DWORD[rbp]
+	xor	r14d,r11d
+
+	xor	edi,eax
+	ror	r13d,6
+	mov	r10d,eax
+
+	and	r15d,edi
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	r10d,r15d
+	add	ecx,r12d
+	add	r10d,r12d
+
+	lea	rbp,[4+rbp]
+	mov	r13d,DWORD[12+rsp]
+	mov	r15d,DWORD[rsp]
+
+	mov	r12d,r13d
+	ror	r13d,11
+	add	r10d,r14d
+	mov	r14d,r15d
+	ror	r15d,2
+
+	xor	r13d,r12d
+	shr	r12d,3
+	ror	r13d,7
+	xor	r15d,r14d
+	shr	r14d,10
+
+	ror	r15d,17
+	xor	r12d,r13d
+	xor	r15d,r14d
+	add	r12d,DWORD[44+rsp]
+
+	add	r12d,DWORD[8+rsp]
+	mov	r13d,ecx
+	add	r12d,r15d
+	mov	r14d,r10d
+	ror	r13d,14
+	mov	r15d,edx
+
+	xor	r13d,ecx
+	ror	r14d,9
+	xor	r15d,r8d
+
+	mov	DWORD[8+rsp],r12d
+	xor	r14d,r10d
+	and	r15d,ecx
+
+	ror	r13d,5
+	add	r12d,r9d
+	xor	r15d,r8d
+
+	ror	r14d,11
+	xor	r13d,ecx
+	add	r12d,r15d
+
+	mov	r15d,r10d
+	add	r12d,DWORD[rbp]
+	xor	r14d,r10d
+
+	xor	r15d,r11d
+	ror	r13d,6
+	mov	r9d,r11d
+
+	and	edi,r15d
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	r9d,edi
+	add	ebx,r12d
+	add	r9d,r12d
+
+	lea	rbp,[4+rbp]
+	mov	r13d,DWORD[16+rsp]
+	mov	edi,DWORD[4+rsp]
+
+	mov	r12d,r13d
+	ror	r13d,11
+	add	r9d,r14d
+	mov	r14d,edi
+	ror	edi,2
+
+	xor	r13d,r12d
+	shr	r12d,3
+	ror	r13d,7
+	xor	edi,r14d
+	shr	r14d,10
+
+	ror	edi,17
+	xor	r12d,r13d
+	xor	edi,r14d
+	add	r12d,DWORD[48+rsp]
+
+	add	r12d,DWORD[12+rsp]
+	mov	r13d,ebx
+	add	r12d,edi
+	mov	r14d,r9d
+	ror	r13d,14
+	mov	edi,ecx
+
+	xor	r13d,ebx
+	ror	r14d,9
+	xor	edi,edx
+
+	mov	DWORD[12+rsp],r12d
+	xor	r14d,r9d
+	and	edi,ebx
+
+	ror	r13d,5
+	add	r12d,r8d
+	xor	edi,edx
+
+	ror	r14d,11
+	xor	r13d,ebx
+	add	r12d,edi
+
+	mov	edi,r9d
+	add	r12d,DWORD[rbp]
+	xor	r14d,r9d
+
+	xor	edi,r10d
+	ror	r13d,6
+	mov	r8d,r10d
+
+	and	r15d,edi
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	r8d,r15d
+	add	eax,r12d
+	add	r8d,r12d
+
+	lea	rbp,[20+rbp]
+	mov	r13d,DWORD[20+rsp]
+	mov	r15d,DWORD[8+rsp]
+
+	mov	r12d,r13d
+	ror	r13d,11
+	add	r8d,r14d
+	mov	r14d,r15d
+	ror	r15d,2
+
+	xor	r13d,r12d
+	shr	r12d,3
+	ror	r13d,7
+	xor	r15d,r14d
+	shr	r14d,10
+
+	ror	r15d,17
+	xor	r12d,r13d
+	xor	r15d,r14d
+	add	r12d,DWORD[52+rsp]
+
+	add	r12d,DWORD[16+rsp]
+	mov	r13d,eax
+	add	r12d,r15d
+	mov	r14d,r8d
+	ror	r13d,14
+	mov	r15d,ebx
+
+	xor	r13d,eax
+	ror	r14d,9
+	xor	r15d,ecx
+
+	mov	DWORD[16+rsp],r12d
+	xor	r14d,r8d
+	and	r15d,eax
+
+	ror	r13d,5
+	add	r12d,edx
+	xor	r15d,ecx
+
+	ror	r14d,11
+	xor	r13d,eax
+	add	r12d,r15d
+
+	mov	r15d,r8d
+	add	r12d,DWORD[rbp]
+	xor	r14d,r8d
+
+	xor	r15d,r9d
+	ror	r13d,6
+	mov	edx,r9d
+
+	and	edi,r15d
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	edx,edi
+	add	r11d,r12d
+	add	edx,r12d
+
+	lea	rbp,[4+rbp]
+	mov	r13d,DWORD[24+rsp]
+	mov	edi,DWORD[12+rsp]
+
+	mov	r12d,r13d
+	ror	r13d,11
+	add	edx,r14d
+	mov	r14d,edi
+	ror	edi,2
+
+	xor	r13d,r12d
+	shr	r12d,3
+	ror	r13d,7
+	xor	edi,r14d
+	shr	r14d,10
+
+	ror	edi,17
+	xor	r12d,r13d
+	xor	edi,r14d
+	add	r12d,DWORD[56+rsp]
+
+	add	r12d,DWORD[20+rsp]
+	mov	r13d,r11d
+	add	r12d,edi
+	mov	r14d,edx
+	ror	r13d,14
+	mov	edi,eax
+
+	xor	r13d,r11d
+	ror	r14d,9
+	xor	edi,ebx
+
+	mov	DWORD[20+rsp],r12d
+	xor	r14d,edx
+	and	edi,r11d
+
+	ror	r13d,5
+	add	r12d,ecx
+	xor	edi,ebx
+
+	ror	r14d,11
+	xor	r13d,r11d
+	add	r12d,edi
+
+	mov	edi,edx
+	add	r12d,DWORD[rbp]
+	xor	r14d,edx
+
+	xor	edi,r8d
+	ror	r13d,6
+	mov	ecx,r8d
+
+	and	r15d,edi
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	ecx,r15d
+	add	r10d,r12d
+	add	ecx,r12d
+
+	lea	rbp,[4+rbp]
+	mov	r13d,DWORD[28+rsp]
+	mov	r15d,DWORD[16+rsp]
+
+	mov	r12d,r13d
+	ror	r13d,11
+	add	ecx,r14d
+	mov	r14d,r15d
+	ror	r15d,2
+
+	xor	r13d,r12d
+	shr	r12d,3
+	ror	r13d,7
+	xor	r15d,r14d
+	shr	r14d,10
+
+	ror	r15d,17
+	xor	r12d,r13d
+	xor	r15d,r14d
+	add	r12d,DWORD[60+rsp]
+
+	add	r12d,DWORD[24+rsp]
+	mov	r13d,r10d
+	add	r12d,r15d
+	mov	r14d,ecx
+	ror	r13d,14
+	mov	r15d,r11d
+
+	xor	r13d,r10d
+	ror	r14d,9
+	xor	r15d,eax
+
+	mov	DWORD[24+rsp],r12d
+	xor	r14d,ecx
+	and	r15d,r10d
+
+	ror	r13d,5
+	add	r12d,ebx
+	xor	r15d,eax
+
+	ror	r14d,11
+	xor	r13d,r10d
+	add	r12d,r15d
+
+	mov	r15d,ecx
+	add	r12d,DWORD[rbp]
+	xor	r14d,ecx
+
+	xor	r15d,edx
+	ror	r13d,6
+	mov	ebx,edx
+
+	and	edi,r15d
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	ebx,edi
+	add	r9d,r12d
+	add	ebx,r12d
+
+	lea	rbp,[4+rbp]
+	mov	r13d,DWORD[32+rsp]
+	mov	edi,DWORD[20+rsp]
+
+	mov	r12d,r13d
+	ror	r13d,11
+	add	ebx,r14d
+	mov	r14d,edi
+	ror	edi,2
+
+	xor	r13d,r12d
+	shr	r12d,3
+	ror	r13d,7
+	xor	edi,r14d
+	shr	r14d,10
+
+	ror	edi,17
+	xor	r12d,r13d
+	xor	edi,r14d
+	add	r12d,DWORD[rsp]
+
+	add	r12d,DWORD[28+rsp]
+	mov	r13d,r9d
+	add	r12d,edi
+	mov	r14d,ebx
+	ror	r13d,14
+	mov	edi,r10d
+
+	xor	r13d,r9d
+	ror	r14d,9
+	xor	edi,r11d
+
+	mov	DWORD[28+rsp],r12d
+	xor	r14d,ebx
+	and	edi,r9d
+
+	ror	r13d,5
+	add	r12d,eax
+	xor	edi,r11d
+
+	ror	r14d,11
+	xor	r13d,r9d
+	add	r12d,edi
+
+	mov	edi,ebx
+	add	r12d,DWORD[rbp]
+	xor	r14d,ebx
+
+	xor	edi,ecx
+	ror	r13d,6
+	mov	eax,ecx
+
+	and	r15d,edi
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	eax,r15d
+	add	r8d,r12d
+	add	eax,r12d
+
+	lea	rbp,[20+rbp]
+	mov	r13d,DWORD[36+rsp]
+	mov	r15d,DWORD[24+rsp]
+
+	mov	r12d,r13d
+	ror	r13d,11
+	add	eax,r14d
+	mov	r14d,r15d
+	ror	r15d,2
+
+	xor	r13d,r12d
+	shr	r12d,3
+	ror	r13d,7
+	xor	r15d,r14d
+	shr	r14d,10
+
+	ror	r15d,17
+	xor	r12d,r13d
+	xor	r15d,r14d
+	add	r12d,DWORD[4+rsp]
+
+	add	r12d,DWORD[32+rsp]
+	mov	r13d,r8d
+	add	r12d,r15d
+	mov	r14d,eax
+	ror	r13d,14
+	mov	r15d,r9d
+
+	xor	r13d,r8d
+	ror	r14d,9
+	xor	r15d,r10d
+
+	mov	DWORD[32+rsp],r12d
+	xor	r14d,eax
+	and	r15d,r8d
+
+	ror	r13d,5
+	add	r12d,r11d
+	xor	r15d,r10d
+
+	ror	r14d,11
+	xor	r13d,r8d
+	add	r12d,r15d
+
+	mov	r15d,eax
+	add	r12d,DWORD[rbp]
+	xor	r14d,eax
+
+	xor	r15d,ebx
+	ror	r13d,6
+	mov	r11d,ebx
+
+	and	edi,r15d
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	r11d,edi
+	add	edx,r12d
+	add	r11d,r12d
+
+	lea	rbp,[4+rbp]
+	mov	r13d,DWORD[40+rsp]
+	mov	edi,DWORD[28+rsp]
+
+	mov	r12d,r13d
+	ror	r13d,11
+	add	r11d,r14d
+	mov	r14d,edi
+	ror	edi,2
+
+	xor	r13d,r12d
+	shr	r12d,3
+	ror	r13d,7
+	xor	edi,r14d
+	shr	r14d,10
+
+	ror	edi,17
+	xor	r12d,r13d
+	xor	edi,r14d
+	add	r12d,DWORD[8+rsp]
+
+	add	r12d,DWORD[36+rsp]
+	mov	r13d,edx
+	add	r12d,edi
+	mov	r14d,r11d
+	ror	r13d,14
+	mov	edi,r8d
+
+	xor	r13d,edx
+	ror	r14d,9
+	xor	edi,r9d
+
+	mov	DWORD[36+rsp],r12d
+	xor	r14d,r11d
+	and	edi,edx
+
+	ror	r13d,5
+	add	r12d,r10d
+	xor	edi,r9d
+
+	ror	r14d,11
+	xor	r13d,edx
+	add	r12d,edi
+
+	mov	edi,r11d
+	add	r12d,DWORD[rbp]
+	xor	r14d,r11d
+
+	xor	edi,eax
+	ror	r13d,6
+	mov	r10d,eax
+
+	and	r15d,edi
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	r10d,r15d
+	add	ecx,r12d
+	add	r10d,r12d
+
+	lea	rbp,[4+rbp]
+	mov	r13d,DWORD[44+rsp]
+	mov	r15d,DWORD[32+rsp]
+
+	mov	r12d,r13d
+	ror	r13d,11
+	add	r10d,r14d
+	mov	r14d,r15d
+	ror	r15d,2
+
+	xor	r13d,r12d
+	shr	r12d,3
+	ror	r13d,7
+	xor	r15d,r14d
+	shr	r14d,10
+
+	ror	r15d,17
+	xor	r12d,r13d
+	xor	r15d,r14d
+	add	r12d,DWORD[12+rsp]
+
+	add	r12d,DWORD[40+rsp]
+	mov	r13d,ecx
+	add	r12d,r15d
+	mov	r14d,r10d
+	ror	r13d,14
+	mov	r15d,edx
+
+	xor	r13d,ecx
+	ror	r14d,9
+	xor	r15d,r8d
+
+	mov	DWORD[40+rsp],r12d
+	xor	r14d,r10d
+	and	r15d,ecx
+
+	ror	r13d,5
+	add	r12d,r9d
+	xor	r15d,r8d
+
+	ror	r14d,11
+	xor	r13d,ecx
+	add	r12d,r15d
+
+	mov	r15d,r10d
+	add	r12d,DWORD[rbp]
+	xor	r14d,r10d
+
+	xor	r15d,r11d
+	ror	r13d,6
+	mov	r9d,r11d
+
+	and	edi,r15d
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	r9d,edi
+	add	ebx,r12d
+	add	r9d,r12d
+
+	lea	rbp,[4+rbp]
+	mov	r13d,DWORD[48+rsp]
+	mov	edi,DWORD[36+rsp]
+
+	mov	r12d,r13d
+	ror	r13d,11
+	add	r9d,r14d
+	mov	r14d,edi
+	ror	edi,2
+
+	xor	r13d,r12d
+	shr	r12d,3
+	ror	r13d,7
+	xor	edi,r14d
+	shr	r14d,10
+
+	ror	edi,17
+	xor	r12d,r13d
+	xor	edi,r14d
+	add	r12d,DWORD[16+rsp]
+
+	add	r12d,DWORD[44+rsp]
+	mov	r13d,ebx
+	add	r12d,edi
+	mov	r14d,r9d
+	ror	r13d,14
+	mov	edi,ecx
+
+	xor	r13d,ebx
+	ror	r14d,9
+	xor	edi,edx
+
+	mov	DWORD[44+rsp],r12d
+	xor	r14d,r9d
+	and	edi,ebx
+
+	ror	r13d,5
+	add	r12d,r8d
+	xor	edi,edx
+
+	ror	r14d,11
+	xor	r13d,ebx
+	add	r12d,edi
+
+	mov	edi,r9d
+	add	r12d,DWORD[rbp]
+	xor	r14d,r9d
+
+	xor	edi,r10d
+	ror	r13d,6
+	mov	r8d,r10d
+
+	and	r15d,edi
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	r8d,r15d
+	add	eax,r12d
+	add	r8d,r12d
+
+	lea	rbp,[20+rbp]
+	mov	r13d,DWORD[52+rsp]
+	mov	r15d,DWORD[40+rsp]
+
+	mov	r12d,r13d
+	ror	r13d,11
+	add	r8d,r14d
+	mov	r14d,r15d
+	ror	r15d,2
+
+	xor	r13d,r12d
+	shr	r12d,3
+	ror	r13d,7
+	xor	r15d,r14d
+	shr	r14d,10
+
+	ror	r15d,17
+	xor	r12d,r13d
+	xor	r15d,r14d
+	add	r12d,DWORD[20+rsp]
+
+	add	r12d,DWORD[48+rsp]
+	mov	r13d,eax
+	add	r12d,r15d
+	mov	r14d,r8d
+	ror	r13d,14
+	mov	r15d,ebx
+
+	xor	r13d,eax
+	ror	r14d,9
+	xor	r15d,ecx
+
+	mov	DWORD[48+rsp],r12d
+	xor	r14d,r8d
+	and	r15d,eax
+
+	ror	r13d,5
+	add	r12d,edx
+	xor	r15d,ecx
+
+	ror	r14d,11
+	xor	r13d,eax
+	add	r12d,r15d
+
+	mov	r15d,r8d
+	add	r12d,DWORD[rbp]
+	xor	r14d,r8d
+
+	xor	r15d,r9d
+	ror	r13d,6
+	mov	edx,r9d
+
+	and	edi,r15d
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	edx,edi
+	add	r11d,r12d
+	add	edx,r12d
+
+	lea	rbp,[4+rbp]
+	mov	r13d,DWORD[56+rsp]
+	mov	edi,DWORD[44+rsp]
+
+	mov	r12d,r13d
+	ror	r13d,11
+	add	edx,r14d
+	mov	r14d,edi
+	ror	edi,2
+
+	xor	r13d,r12d
+	shr	r12d,3
+	ror	r13d,7
+	xor	edi,r14d
+	shr	r14d,10
+
+	ror	edi,17
+	xor	r12d,r13d
+	xor	edi,r14d
+	add	r12d,DWORD[24+rsp]
+
+	add	r12d,DWORD[52+rsp]
+	mov	r13d,r11d
+	add	r12d,edi
+	mov	r14d,edx
+	ror	r13d,14
+	mov	edi,eax
+
+	xor	r13d,r11d
+	ror	r14d,9
+	xor	edi,ebx
+
+	mov	DWORD[52+rsp],r12d
+	xor	r14d,edx
+	and	edi,r11d
+
+	ror	r13d,5
+	add	r12d,ecx
+	xor	edi,ebx
+
+	ror	r14d,11
+	xor	r13d,r11d
+	add	r12d,edi
+
+	mov	edi,edx
+	add	r12d,DWORD[rbp]
+	xor	r14d,edx
+
+	xor	edi,r8d
+	ror	r13d,6
+	mov	ecx,r8d
+
+	and	r15d,edi
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	ecx,r15d
+	add	r10d,r12d
+	add	ecx,r12d
+
+	lea	rbp,[4+rbp]
+	mov	r13d,DWORD[60+rsp]
+	mov	r15d,DWORD[48+rsp]
+
+	mov	r12d,r13d
+	ror	r13d,11
+	add	ecx,r14d
+	mov	r14d,r15d
+	ror	r15d,2
+
+	xor	r13d,r12d
+	shr	r12d,3
+	ror	r13d,7
+	xor	r15d,r14d
+	shr	r14d,10
+
+	ror	r15d,17
+	xor	r12d,r13d
+	xor	r15d,r14d
+	add	r12d,DWORD[28+rsp]
+
+	add	r12d,DWORD[56+rsp]
+	mov	r13d,r10d
+	add	r12d,r15d
+	mov	r14d,ecx
+	ror	r13d,14
+	mov	r15d,r11d
+
+	xor	r13d,r10d
+	ror	r14d,9
+	xor	r15d,eax
+
+	mov	DWORD[56+rsp],r12d
+	xor	r14d,ecx
+	and	r15d,r10d
+
+	ror	r13d,5
+	add	r12d,ebx
+	xor	r15d,eax
+
+	ror	r14d,11
+	xor	r13d,r10d
+	add	r12d,r15d
+
+	mov	r15d,ecx
+	add	r12d,DWORD[rbp]
+	xor	r14d,ecx
+
+	xor	r15d,edx
+	ror	r13d,6
+	mov	ebx,edx
+
+	and	edi,r15d
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	ebx,edi
+	add	r9d,r12d
+	add	ebx,r12d
+
+	lea	rbp,[4+rbp]
+	mov	r13d,DWORD[rsp]
+	mov	edi,DWORD[52+rsp]
+
+	mov	r12d,r13d
+	ror	r13d,11
+	add	ebx,r14d
+	mov	r14d,edi
+	ror	edi,2
+
+	xor	r13d,r12d
+	shr	r12d,3
+	ror	r13d,7
+	xor	edi,r14d
+	shr	r14d,10
+
+	ror	edi,17
+	xor	r12d,r13d
+	xor	edi,r14d
+	add	r12d,DWORD[32+rsp]
+
+	add	r12d,DWORD[60+rsp]
+	mov	r13d,r9d
+	add	r12d,edi
+	mov	r14d,ebx
+	ror	r13d,14
+	mov	edi,r10d
+
+	xor	r13d,r9d
+	ror	r14d,9
+	xor	edi,r11d
+
+	mov	DWORD[60+rsp],r12d
+	xor	r14d,ebx
+	and	edi,r9d
+
+	ror	r13d,5
+	add	r12d,eax
+	xor	edi,r11d
+
+	ror	r14d,11
+	xor	r13d,r9d
+	add	r12d,edi
+
+	mov	edi,ebx
+	add	r12d,DWORD[rbp]
+	xor	r14d,ebx
+
+	xor	edi,ecx
+	ror	r13d,6
+	mov	eax,ecx
+
+	and	r15d,edi
+	ror	r14d,2
+	add	r12d,r13d
+
+	xor	eax,r15d
+	add	r8d,r12d
+	add	eax,r12d
+
+	lea	rbp,[20+rbp]
+	cmp	BYTE[3+rbp],0
+	jnz	NEAR $L$rounds_16_xx
+
+	mov	rdi,QWORD[((64+0))+rsp]
+	add	eax,r14d
+	lea	rsi,[64+rsi]
+
+	add	eax,DWORD[rdi]
+	add	ebx,DWORD[4+rdi]
+	add	ecx,DWORD[8+rdi]
+	add	edx,DWORD[12+rdi]
+	add	r8d,DWORD[16+rdi]
+	add	r9d,DWORD[20+rdi]
+	add	r10d,DWORD[24+rdi]
+	add	r11d,DWORD[28+rdi]
+
+	cmp	rsi,QWORD[((64+16))+rsp]
+
+	mov	DWORD[rdi],eax
+	mov	DWORD[4+rdi],ebx
+	mov	DWORD[8+rdi],ecx
+	mov	DWORD[12+rdi],edx
+	mov	DWORD[16+rdi],r8d
+	mov	DWORD[20+rdi],r9d
+	mov	DWORD[24+rdi],r10d
+	mov	DWORD[28+rdi],r11d
+	jb	NEAR $L$loop
+
+	mov	rsi,QWORD[88+rsp]
+
+	mov	r15,QWORD[((-48))+rsi]
+
+	mov	r14,QWORD[((-40))+rsi]
+
+	mov	r13,QWORD[((-32))+rsi]
+
+	mov	r12,QWORD[((-24))+rsi]
+
+	mov	rbp,QWORD[((-16))+rsi]
+
+	mov	rbx,QWORD[((-8))+rsi]
+
+	lea	rsp,[rsi]
+
+$L$epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_sha256_block_data_order:
+ALIGN	64
+
+K256:
+	DD	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+	DD	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
+	DD	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+	DD	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
+	DD	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+	DD	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
+	DD	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+	DD	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
+	DD	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+	DD	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
+	DD	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+	DD	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
+	DD	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+	DD	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
+	DD	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+	DD	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
+	DD	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+	DD	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
+	DD	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+	DD	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
+	DD	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+	DD	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
+	DD	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+	DD	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
+	DD	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+	DD	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
+	DD	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+	DD	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
+	DD	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+	DD	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
+	DD	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+	DD	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
+
+	DD	0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+	DD	0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f
+	DD	0x03020100,0x0b0a0908,0xffffffff,0xffffffff
+	DD	0x03020100,0x0b0a0908,0xffffffff,0xffffffff
+	DD	0xffffffff,0xffffffff,0x03020100,0x0b0a0908
+	DD	0xffffffff,0xffffffff,0x03020100,0x0b0a0908
+DB	83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97
+DB	110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54
+DB	52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
+DB	32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
+DB	111,114,103,62,0
+
+ALIGN	64
+sha256_block_data_order_ssse3:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_sha256_block_data_order_ssse3:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+
+$L$ssse3_shortcut:
+	mov	rax,rsp
+
+	push	rbx
+
+	push	rbp
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+	shl	rdx,4
+	sub	rsp,160
+	lea	rdx,[rdx*4+rsi]
+	and	rsp,-64
+	mov	QWORD[((64+0))+rsp],rdi
+	mov	QWORD[((64+8))+rsp],rsi
+	mov	QWORD[((64+16))+rsp],rdx
+	mov	QWORD[88+rsp],rax
+
+	movaps	XMMWORD[(64+32)+rsp],xmm6
+	movaps	XMMWORD[(64+48)+rsp],xmm7
+	movaps	XMMWORD[(64+64)+rsp],xmm8
+	movaps	XMMWORD[(64+80)+rsp],xmm9
+$L$prologue_ssse3:
+
+	mov	eax,DWORD[rdi]
+	mov	ebx,DWORD[4+rdi]
+	mov	ecx,DWORD[8+rdi]
+	mov	edx,DWORD[12+rdi]
+	mov	r8d,DWORD[16+rdi]
+	mov	r9d,DWORD[20+rdi]
+	mov	r10d,DWORD[24+rdi]
+	mov	r11d,DWORD[28+rdi]
+
+
+	jmp	NEAR $L$loop_ssse3
+ALIGN	16
+$L$loop_ssse3:
+	movdqa	xmm7,XMMWORD[((K256+512))]
+	movdqu	xmm0,XMMWORD[rsi]
+	movdqu	xmm1,XMMWORD[16+rsi]
+	movdqu	xmm2,XMMWORD[32+rsi]
+DB	102,15,56,0,199
+	movdqu	xmm3,XMMWORD[48+rsi]
+	lea	rbp,[K256]
+DB	102,15,56,0,207
+	movdqa	xmm4,XMMWORD[rbp]
+	movdqa	xmm5,XMMWORD[32+rbp]
+DB	102,15,56,0,215
+	paddd	xmm4,xmm0
+	movdqa	xmm6,XMMWORD[64+rbp]
+DB	102,15,56,0,223
+	movdqa	xmm7,XMMWORD[96+rbp]
+	paddd	xmm5,xmm1
+	paddd	xmm6,xmm2
+	paddd	xmm7,xmm3
+	movdqa	XMMWORD[rsp],xmm4
+	mov	r14d,eax
+	movdqa	XMMWORD[16+rsp],xmm5
+	mov	edi,ebx
+	movdqa	XMMWORD[32+rsp],xmm6
+	xor	edi,ecx
+	movdqa	XMMWORD[48+rsp],xmm7
+	mov	r13d,r8d
+	jmp	NEAR $L$ssse3_00_47
+
+ALIGN	16
+$L$ssse3_00_47:
+	sub	rbp,-128
+	ror	r13d,14
+	movdqa	xmm4,xmm1
+	mov	eax,r14d
+	mov	r12d,r9d
+	movdqa	xmm7,xmm3
+	ror	r14d,9
+	xor	r13d,r8d
+	xor	r12d,r10d
+	ror	r13d,5
+	xor	r14d,eax
+DB	102,15,58,15,224,4
+	and	r12d,r8d
+	xor	r13d,r8d
+DB	102,15,58,15,250,4
+	add	r11d,DWORD[rsp]
+	mov	r15d,eax
+	xor	r12d,r10d
+	ror	r14d,11
+	movdqa	xmm5,xmm4
+	xor	r15d,ebx
+	add	r11d,r12d
+	movdqa	xmm6,xmm4
+	ror	r13d,6
+	and	edi,r15d
+	psrld	xmm4,3
+	xor	r14d,eax
+	add	r11d,r13d
+	xor	edi,ebx
+	paddd	xmm0,xmm7
+	ror	r14d,2
+	add	edx,r11d
+	psrld	xmm6,7
+	add	r11d,edi
+	mov	r13d,edx
+	pshufd	xmm7,xmm3,250
+	add	r14d,r11d
+	ror	r13d,14
+	pslld	xmm5,14
+	mov	r11d,r14d
+	mov	r12d,r8d
+	pxor	xmm4,xmm6
+	ror	r14d,9
+	xor	r13d,edx
+	xor	r12d,r9d
+	ror	r13d,5
+	psrld	xmm6,11
+	xor	r14d,r11d
+	pxor	xmm4,xmm5
+	and	r12d,edx
+	xor	r13d,edx
+	pslld	xmm5,11
+	add	r10d,DWORD[4+rsp]
+	mov	edi,r11d
+	pxor	xmm4,xmm6
+	xor	r12d,r9d
+	ror	r14d,11
+	movdqa	xmm6,xmm7
+	xor	edi,eax
+	add	r10d,r12d
+	pxor	xmm4,xmm5
+	ror	r13d,6
+	and	r15d,edi
+	xor	r14d,r11d
+	psrld	xmm7,10
+	add	r10d,r13d
+	xor	r15d,eax
+	paddd	xmm0,xmm4
+	ror	r14d,2
+	add	ecx,r10d
+	psrlq	xmm6,17
+	add	r10d,r15d
+	mov	r13d,ecx
+	add	r14d,r10d
+	pxor	xmm7,xmm6
+	ror	r13d,14
+	mov	r10d,r14d
+	mov	r12d,edx
+	ror	r14d,9
+	psrlq	xmm6,2
+	xor	r13d,ecx
+	xor	r12d,r8d
+	pxor	xmm7,xmm6
+	ror	r13d,5
+	xor	r14d,r10d
+	and	r12d,ecx
+	pshufd	xmm7,xmm7,128
+	xor	r13d,ecx
+	add	r9d,DWORD[8+rsp]
+	mov	r15d,r10d
+	psrldq	xmm7,8
+	xor	r12d,r8d
+	ror	r14d,11
+	xor	r15d,r11d
+	add	r9d,r12d
+	ror	r13d,6
+	paddd	xmm0,xmm7
+	and	edi,r15d
+	xor	r14d,r10d
+	add	r9d,r13d
+	pshufd	xmm7,xmm0,80
+	xor	edi,r11d
+	ror	r14d,2
+	add	ebx,r9d
+	movdqa	xmm6,xmm7
+	add	r9d,edi
+	mov	r13d,ebx
+	psrld	xmm7,10
+	add	r14d,r9d
+	ror	r13d,14
+	psrlq	xmm6,17
+	mov	r9d,r14d
+	mov	r12d,ecx
+	pxor	xmm7,xmm6
+	ror	r14d,9
+	xor	r13d,ebx
+	xor	r12d,edx
+	ror	r13d,5
+	xor	r14d,r9d
+	psrlq	xmm6,2
+	and	r12d,ebx
+	xor	r13d,ebx
+	add	r8d,DWORD[12+rsp]
+	pxor	xmm7,xmm6
+	mov	edi,r9d
+	xor	r12d,edx
+	ror	r14d,11
+	pshufd	xmm7,xmm7,8
+	xor	edi,r10d
+	add	r8d,r12d
+	movdqa	xmm6,XMMWORD[rbp]
+	ror	r13d,6
+	and	r15d,edi
+	pslldq	xmm7,8
+	xor	r14d,r9d
+	add	r8d,r13d
+	xor	r15d,r10d
+	paddd	xmm0,xmm7
+	ror	r14d,2
+	add	eax,r8d
+	add	r8d,r15d
+	paddd	xmm6,xmm0
+	mov	r13d,eax
+	add	r14d,r8d
+	movdqa	XMMWORD[rsp],xmm6
+	ror	r13d,14
+	movdqa	xmm4,xmm2
+	mov	r8d,r14d
+	mov	r12d,ebx
+	movdqa	xmm7,xmm0
+	ror	r14d,9
+	xor	r13d,eax
+	xor	r12d,ecx
+	ror	r13d,5
+	xor	r14d,r8d
+DB	102,15,58,15,225,4
+	and	r12d,eax
+	xor	r13d,eax
+DB	102,15,58,15,251,4
+	add	edx,DWORD[16+rsp]
+	mov	r15d,r8d
+	xor	r12d,ecx
+	ror	r14d,11
+	movdqa	xmm5,xmm4
+	xor	r15d,r9d
+	add	edx,r12d
+	movdqa	xmm6,xmm4
+	ror	r13d,6
+	and	edi,r15d
+	psrld	xmm4,3
+	xor	r14d,r8d
+	add	edx,r13d
+	xor	edi,r9d
+	paddd	xmm1,xmm7
+	ror	r14d,2
+	add	r11d,edx
+	psrld	xmm6,7
+	add	edx,edi
+	mov	r13d,r11d
+	pshufd	xmm7,xmm0,250
+	add	r14d,edx
+	ror	r13d,14
+	pslld	xmm5,14
+	mov	edx,r14d
+	mov	r12d,eax
+	pxor	xmm4,xmm6
+	ror	r14d,9
+	xor	r13d,r11d
+	xor	r12d,ebx
+	ror	r13d,5
+	psrld	xmm6,11
+	xor	r14d,edx
+	pxor	xmm4,xmm5
+	and	r12d,r11d
+	xor	r13d,r11d
+	pslld	xmm5,11
+	add	ecx,DWORD[20+rsp]
+	mov	edi,edx
+	pxor	xmm4,xmm6
+	xor	r12d,ebx
+	ror	r14d,11
+	movdqa	xmm6,xmm7
+	xor	edi,r8d
+	add	ecx,r12d
+	pxor	xmm4,xmm5
+	ror	r13d,6
+	and	r15d,edi
+	xor	r14d,edx
+	psrld	xmm7,10
+	add	ecx,r13d
+	xor	r15d,r8d
+	paddd	xmm1,xmm4
+	ror	r14d,2
+	add	r10d,ecx
+	psrlq	xmm6,17
+	add	ecx,r15d
+	mov	r13d,r10d
+	add	r14d,ecx
+	pxor	xmm7,xmm6
+	ror	r13d,14
+	mov	ecx,r14d
+	mov	r12d,r11d
+	ror	r14d,9
+	psrlq	xmm6,2
+	xor	r13d,r10d
+	xor	r12d,eax
+	pxor	xmm7,xmm6
+	ror	r13d,5
+	xor	r14d,ecx
+	and	r12d,r10d
+	pshufd	xmm7,xmm7,128
+	xor	r13d,r10d
+	add	ebx,DWORD[24+rsp]
+	mov	r15d,ecx
+	psrldq	xmm7,8
+	xor	r12d,eax
+	ror	r14d,11
+	xor	r15d,edx
+	add	ebx,r12d
+	ror	r13d,6
+	paddd	xmm1,xmm7
+	and	edi,r15d
+	xor	r14d,ecx
+	add	ebx,r13d
+	pshufd	xmm7,xmm1,80
+	xor	edi,edx
+	ror	r14d,2
+	add	r9d,ebx
+	movdqa	xmm6,xmm7
+	add	ebx,edi
+	mov	r13d,r9d
+	psrld	xmm7,10
+	add	r14d,ebx
+	ror	r13d,14
+	psrlq	xmm6,17
+	mov	ebx,r14d
+	mov	r12d,r10d
+	pxor	xmm7,xmm6
+	ror	r14d,9
+	xor	r13d,r9d
+	xor	r12d,r11d
+	ror	r13d,5
+	xor	r14d,ebx
+	psrlq	xmm6,2
+	and	r12d,r9d
+	xor	r13d,r9d
+	add	eax,DWORD[28+rsp]
+	pxor	xmm7,xmm6
+	mov	edi,ebx
+	xor	r12d,r11d
+	ror	r14d,11
+	pshufd	xmm7,xmm7,8
+	xor	edi,ecx
+	add	eax,r12d
+	movdqa	xmm6,XMMWORD[32+rbp]
+	ror	r13d,6
+	and	r15d,edi
+	pslldq	xmm7,8
+	xor	r14d,ebx
+	add	eax,r13d
+	xor	r15d,ecx
+	paddd	xmm1,xmm7
+	ror	r14d,2
+	add	r8d,eax
+	add	eax,r15d
+	paddd	xmm6,xmm1
+	mov	r13d,r8d
+	add	r14d,eax
+	movdqa	XMMWORD[16+rsp],xmm6
+	ror	r13d,14
+	movdqa	xmm4,xmm3
+	mov	eax,r14d
+	mov	r12d,r9d
+	movdqa	xmm7,xmm1
+	ror	r14d,9
+	xor	r13d,r8d
+	xor	r12d,r10d
+	ror	r13d,5
+	xor	r14d,eax
+DB	102,15,58,15,226,4
+	and	r12d,r8d
+	xor	r13d,r8d
+DB	102,15,58,15,248,4
+	add	r11d,DWORD[32+rsp]
+	mov	r15d,eax
+	xor	r12d,r10d
+	ror	r14d,11
+	movdqa	xmm5,xmm4
+	xor	r15d,ebx
+	add	r11d,r12d
+	movdqa	xmm6,xmm4
+	ror	r13d,6
+	and	edi,r15d
+	psrld	xmm4,3
+	xor	r14d,eax
+	add	r11d,r13d
+	xor	edi,ebx
+	paddd	xmm2,xmm7
+	ror	r14d,2
+	add	edx,r11d
+	psrld	xmm6,7
+	add	r11d,edi
+	mov	r13d,edx
+	pshufd	xmm7,xmm1,250
+	add	r14d,r11d
+	ror	r13d,14
+	pslld	xmm5,14
+	mov	r11d,r14d
+	mov	r12d,r8d
+	pxor	xmm4,xmm6
+	ror	r14d,9
+	xor	r13d,edx
+	xor	r12d,r9d
+	ror	r13d,5
+	psrld	xmm6,11
+	xor	r14d,r11d
+	pxor	xmm4,xmm5
+	and	r12d,edx
+	xor	r13d,edx
+	pslld	xmm5,11
+	add	r10d,DWORD[36+rsp]
+	mov	edi,r11d
+	pxor	xmm4,xmm6
+	xor	r12d,r9d
+	ror	r14d,11
+	movdqa	xmm6,xmm7
+	xor	edi,eax
+	add	r10d,r12d
+	pxor	xmm4,xmm5
+	ror	r13d,6
+	and	r15d,edi
+	xor	r14d,r11d
+	psrld	xmm7,10
+	add	r10d,r13d
+	xor	r15d,eax
+	paddd	xmm2,xmm4
+	ror	r14d,2
+	add	ecx,r10d
+	psrlq	xmm6,17
+	add	r10d,r15d
+	mov	r13d,ecx
+	add	r14d,r10d
+	pxor	xmm7,xmm6
+	ror	r13d,14
+	mov	r10d,r14d
+	mov	r12d,edx
+	ror	r14d,9
+	psrlq	xmm6,2
+	xor	r13d,ecx
+	xor	r12d,r8d
+	pxor	xmm7,xmm6
+	ror	r13d,5
+	xor	r14d,r10d
+	and	r12d,ecx
+	pshufd	xmm7,xmm7,128
+	xor	r13d,ecx
+	add	r9d,DWORD[40+rsp]
+	mov	r15d,r10d
+	psrldq	xmm7,8
+	xor	r12d,r8d
+	ror	r14d,11
+	xor	r15d,r11d
+	add	r9d,r12d
+	ror	r13d,6
+	paddd	xmm2,xmm7
+	and	edi,r15d
+	xor	r14d,r10d
+	add	r9d,r13d
+	pshufd	xmm7,xmm2,80
+	xor	edi,r11d
+	ror	r14d,2
+	add	ebx,r9d
+	movdqa	xmm6,xmm7
+	add	r9d,edi
+	mov	r13d,ebx
+	psrld	xmm7,10
+	add	r14d,r9d
+	ror	r13d,14
+	psrlq	xmm6,17
+	mov	r9d,r14d
+	mov	r12d,ecx
+	pxor	xmm7,xmm6
+	ror	r14d,9
+	xor	r13d,ebx
+	xor	r12d,edx
+	ror	r13d,5
+	xor	r14d,r9d
+	psrlq	xmm6,2
+	and	r12d,ebx
+	xor	r13d,ebx
+	add	r8d,DWORD[44+rsp]
+	pxor	xmm7,xmm6
+	mov	edi,r9d
+	xor	r12d,edx
+	ror	r14d,11
+	pshufd	xmm7,xmm7,8
+	xor	edi,r10d
+	add	r8d,r12d
+	movdqa	xmm6,XMMWORD[64+rbp]
+	ror	r13d,6
+	and	r15d,edi
+	pslldq	xmm7,8
+	xor	r14d,r9d
+	add	r8d,r13d
+	xor	r15d,r10d
+	paddd	xmm2,xmm7
+	ror	r14d,2
+	add	eax,r8d
+	add	r8d,r15d
+	paddd	xmm6,xmm2
+	mov	r13d,eax
+	add	r14d,r8d
+	movdqa	XMMWORD[32+rsp],xmm6
+	ror	r13d,14
+	movdqa	xmm4,xmm0
+	mov	r8d,r14d
+	mov	r12d,ebx
+	movdqa	xmm7,xmm2
+	ror	r14d,9
+	xor	r13d,eax
+	xor	r12d,ecx
+	ror	r13d,5
+	xor	r14d,r8d
+DB	102,15,58,15,227,4
+	and	r12d,eax
+	xor	r13d,eax
+DB	102,15,58,15,249,4
+	add	edx,DWORD[48+rsp]
+	mov	r15d,r8d
+	xor	r12d,ecx
+	ror	r14d,11
+	movdqa	xmm5,xmm4
+	xor	r15d,r9d
+	add	edx,r12d
+	movdqa	xmm6,xmm4
+	ror	r13d,6
+	and	edi,r15d
+	psrld	xmm4,3
+	xor	r14d,r8d
+	add	edx,r13d
+	xor	edi,r9d
+	paddd	xmm3,xmm7
+	ror	r14d,2
+	add	r11d,edx
+	psrld	xmm6,7
+	add	edx,edi
+	mov	r13d,r11d
+	pshufd	xmm7,xmm2,250
+	add	r14d,edx
+	ror	r13d,14
+	pslld	xmm5,14
+	mov	edx,r14d
+	mov	r12d,eax
+	pxor	xmm4,xmm6
+	ror	r14d,9
+	xor	r13d,r11d
+	xor	r12d,ebx
+	ror	r13d,5
+	psrld	xmm6,11
+	xor	r14d,edx
+	pxor	xmm4,xmm5
+	and	r12d,r11d
+	xor	r13d,r11d
+	pslld	xmm5,11
+	add	ecx,DWORD[52+rsp]
+	mov	edi,edx
+	pxor	xmm4,xmm6
+	xor	r12d,ebx
+	ror	r14d,11
+	movdqa	xmm6,xmm7
+	xor	edi,r8d
+	add	ecx,r12d
+	pxor	xmm4,xmm5
+	ror	r13d,6
+	and	r15d,edi
+	xor	r14d,edx
+	psrld	xmm7,10
+	add	ecx,r13d
+	xor	r15d,r8d
+	paddd	xmm3,xmm4
+	ror	r14d,2
+	add	r10d,ecx
+	psrlq	xmm6,17
+	add	ecx,r15d
+	mov	r13d,r10d
+	add	r14d,ecx
+	pxor	xmm7,xmm6
+	ror	r13d,14
+	mov	ecx,r14d
+	mov	r12d,r11d
+	ror	r14d,9
+	psrlq	xmm6,2
+	xor	r13d,r10d
+	xor	r12d,eax
+	pxor	xmm7,xmm6
+	ror	r13d,5
+	xor	r14d,ecx
+	and	r12d,r10d
+	pshufd	xmm7,xmm7,128
+	xor	r13d,r10d
+	add	ebx,DWORD[56+rsp]
+	mov	r15d,ecx
+	psrldq	xmm7,8
+	xor	r12d,eax
+	ror	r14d,11
+	xor	r15d,edx
+	add	ebx,r12d
+	ror	r13d,6
+	paddd	xmm3,xmm7
+	and	edi,r15d
+	xor	r14d,ecx
+	add	ebx,r13d
+	pshufd	xmm7,xmm3,80
+	xor	edi,edx
+	ror	r14d,2
+	add	r9d,ebx
+	movdqa	xmm6,xmm7
+	add	ebx,edi
+	mov	r13d,r9d
+	psrld	xmm7,10
+	add	r14d,ebx
+	ror	r13d,14
+	psrlq	xmm6,17
+	mov	ebx,r14d
+	mov	r12d,r10d
+	pxor	xmm7,xmm6
+	ror	r14d,9
+	xor	r13d,r9d
+	xor	r12d,r11d
+	ror	r13d,5
+	xor	r14d,ebx
+	psrlq	xmm6,2
+	and	r12d,r9d
+	xor	r13d,r9d
+	add	eax,DWORD[60+rsp]
+	pxor	xmm7,xmm6
+	mov	edi,ebx
+	xor	r12d,r11d
+	ror	r14d,11
+	pshufd	xmm7,xmm7,8
+	xor	edi,ecx
+	add	eax,r12d
+	movdqa	xmm6,XMMWORD[96+rbp]
+	ror	r13d,6
+	and	r15d,edi
+	pslldq	xmm7,8
+	xor	r14d,ebx
+	add	eax,r13d
+	xor	r15d,ecx
+	paddd	xmm3,xmm7
+	ror	r14d,2
+	add	r8d,eax
+	add	eax,r15d
+	paddd	xmm6,xmm3
+	mov	r13d,r8d
+	add	r14d,eax
+	movdqa	XMMWORD[48+rsp],xmm6
+	cmp	BYTE[131+rbp],0
+	jne	NEAR $L$ssse3_00_47
+	ror	r13d,14
+	mov	eax,r14d
+	mov	r12d,r9d
+	ror	r14d,9
+	xor	r13d,r8d
+	xor	r12d,r10d
+	ror	r13d,5
+	xor	r14d,eax
+	and	r12d,r8d
+	xor	r13d,r8d
+	add	r11d,DWORD[rsp]
+	mov	r15d,eax
+	xor	r12d,r10d
+	ror	r14d,11
+	xor	r15d,ebx
+	add	r11d,r12d
+	ror	r13d,6
+	and	edi,r15d
+	xor	r14d,eax
+	add	r11d,r13d
+	xor	edi,ebx
+	ror	r14d,2
+	add	edx,r11d
+	add	r11d,edi
+	mov	r13d,edx
+	add	r14d,r11d
+	ror	r13d,14
+	mov	r11d,r14d
+	mov	r12d,r8d
+	ror	r14d,9
+	xor	r13d,edx
+	xor	r12d,r9d
+	ror	r13d,5
+	xor	r14d,r11d
+	and	r12d,edx
+	xor	r13d,edx
+	add	r10d,DWORD[4+rsp]
+	mov	edi,r11d
+	xor	r12d,r9d
+	ror	r14d,11
+	xor	edi,eax
+	add	r10d,r12d
+	ror	r13d,6
+	and	r15d,edi
+	xor	r14d,r11d
+	add	r10d,r13d
+	xor	r15d,eax
+	ror	r14d,2
+	add	ecx,r10d
+	add	r10d,r15d
+	mov	r13d,ecx
+	add	r14d,r10d
+	ror	r13d,14
+	mov	r10d,r14d
+	mov	r12d,edx
+	ror	r14d,9
+	xor	r13d,ecx
+	xor	r12d,r8d
+	ror	r13d,5
+	xor	r14d,r10d
+	and	r12d,ecx
+	xor	r13d,ecx
+	add	r9d,DWORD[8+rsp]
+	mov	r15d,r10d
+	xor	r12d,r8d
+	ror	r14d,11
+	xor	r15d,r11d
+	add	r9d,r12d
+	ror	r13d,6
+	and	edi,r15d
+	xor	r14d,r10d
+	add	r9d,r13d
+	xor	edi,r11d
+	ror	r14d,2
+	add	ebx,r9d
+	add	r9d,edi
+	mov	r13d,ebx
+	add	r14d,r9d
+	ror	r13d,14
+	mov	r9d,r14d
+	mov	r12d,ecx
+	ror	r14d,9
+	xor	r13d,ebx
+	xor	r12d,edx
+	ror	r13d,5
+	xor	r14d,r9d
+	and	r12d,ebx
+	xor	r13d,ebx
+	add	r8d,DWORD[12+rsp]
+	mov	edi,r9d
+	xor	r12d,edx
+	ror	r14d,11
+	xor	edi,r10d
+	add	r8d,r12d
+	ror	r13d,6
+	and	r15d,edi
+	xor	r14d,r9d
+	add	r8d,r13d
+	xor	r15d,r10d
+	ror	r14d,2
+	add	eax,r8d
+	add	r8d,r15d
+	mov	r13d,eax
+	add	r14d,r8d
+	ror	r13d,14
+	mov	r8d,r14d
+	mov	r12d,ebx
+	ror	r14d,9
+	xor	r13d,eax
+	xor	r12d,ecx
+	ror	r13d,5
+	xor	r14d,r8d
+	and	r12d,eax
+	xor	r13d,eax
+	add	edx,DWORD[16+rsp]
+	mov	r15d,r8d
+	xor	r12d,ecx
+	ror	r14d,11
+	xor	r15d,r9d
+	add	edx,r12d
+	ror	r13d,6
+	and	edi,r15d
+	xor	r14d,r8d
+	add	edx,r13d
+	xor	edi,r9d
+	ror	r14d,2
+	add	r11d,edx
+	add	edx,edi
+	mov	r13d,r11d
+	add	r14d,edx
+	ror	r13d,14
+	mov	edx,r14d
+	mov	r12d,eax
+	ror	r14d,9
+	xor	r13d,r11d
+	xor	r12d,ebx
+	ror	r13d,5
+	xor	r14d,edx
+	and	r12d,r11d
+	xor	r13d,r11d
+	add	ecx,DWORD[20+rsp]
+	mov	edi,edx
+	xor	r12d,ebx
+	ror	r14d,11
+	xor	edi,r8d
+	add	ecx,r12d
+	ror	r13d,6
+	and	r15d,edi
+	xor	r14d,edx
+	add	ecx,r13d
+	xor	r15d,r8d
+	ror	r14d,2
+	add	r10d,ecx
+	add	ecx,r15d
+	mov	r13d,r10d
+	add	r14d,ecx
+	ror	r13d,14
+	mov	ecx,r14d
+	mov	r12d,r11d
+	ror	r14d,9
+	xor	r13d,r10d
+	xor	r12d,eax
+	ror	r13d,5
+	xor	r14d,ecx
+	and	r12d,r10d
+	xor	r13d,r10d
+	add	ebx,DWORD[24+rsp]
+	mov	r15d,ecx
+	xor	r12d,eax
+	ror	r14d,11
+	xor	r15d,edx
+	add	ebx,r12d
+	ror	r13d,6
+	and	edi,r15d
+	xor	r14d,ecx
+	add	ebx,r13d
+	xor	edi,edx
+	ror	r14d,2
+	add	r9d,ebx
+	add	ebx,edi
+	mov	r13d,r9d
+	add	r14d,ebx
+	ror	r13d,14
+	mov	ebx,r14d
+	mov	r12d,r10d
+	ror	r14d,9
+	xor	r13d,r9d
+	xor	r12d,r11d
+	ror	r13d,5
+	xor	r14d,ebx
+	and	r12d,r9d
+	xor	r13d,r9d
+	add	eax,DWORD[28+rsp]
+	mov	edi,ebx
+	xor	r12d,r11d
+	ror	r14d,11
+	xor	edi,ecx
+	add	eax,r12d
+	ror	r13d,6
+	and	r15d,edi
+	xor	r14d,ebx
+	add	eax,r13d
+	xor	r15d,ecx
+	ror	r14d,2
+	add	r8d,eax
+	add	eax,r15d
+	mov	r13d,r8d
+	add	r14d,eax
+	ror	r13d,14
+	mov	eax,r14d
+	mov	r12d,r9d
+	ror	r14d,9
+	xor	r13d,r8d
+	xor	r12d,r10d
+	ror	r13d,5
+	xor	r14d,eax
+	and	r12d,r8d
+	xor	r13d,r8d
+	add	r11d,DWORD[32+rsp]
+	mov	r15d,eax
+	xor	r12d,r10d
+	ror	r14d,11
+	xor	r15d,ebx
+	add	r11d,r12d
+	ror	r13d,6
+	and	edi,r15d
+	xor	r14d,eax
+	add	r11d,r13d
+	xor	edi,ebx
+	ror	r14d,2
+	add	edx,r11d
+	add	r11d,edi
+	mov	r13d,edx
+	add	r14d,r11d
+	ror	r13d,14
+	mov	r11d,r14d
+	mov	r12d,r8d
+	ror	r14d,9
+	xor	r13d,edx
+	xor	r12d,r9d
+	ror	r13d,5
+	xor	r14d,r11d
+	and	r12d,edx
+	xor	r13d,edx
+	add	r10d,DWORD[36+rsp]
+	mov	edi,r11d
+	xor	r12d,r9d
+	ror	r14d,11
+	xor	edi,eax
+	add	r10d,r12d
+	ror	r13d,6
+	and	r15d,edi
+	xor	r14d,r11d
+	add	r10d,r13d
+	xor	r15d,eax
+	ror	r14d,2
+	add	ecx,r10d
+	add	r10d,r15d
+	mov	r13d,ecx
+	add	r14d,r10d
+	ror	r13d,14
+	mov	r10d,r14d
+	mov	r12d,edx
+	ror	r14d,9
+	xor	r13d,ecx
+	xor	r12d,r8d
+	ror	r13d,5
+	xor	r14d,r10d
+	and	r12d,ecx
+	xor	r13d,ecx
+	add	r9d,DWORD[40+rsp]
+	mov	r15d,r10d
+	xor	r12d,r8d
+	ror	r14d,11
+	xor	r15d,r11d
+	add	r9d,r12d
+	ror	r13d,6
+	and	edi,r15d
+	xor	r14d,r10d
+	add	r9d,r13d
+	xor	edi,r11d
+	ror	r14d,2
+	add	ebx,r9d
+	add	r9d,edi
+	mov	r13d,ebx
+	add	r14d,r9d
+	ror	r13d,14
+	mov	r9d,r14d
+	mov	r12d,ecx
+	ror	r14d,9
+	xor	r13d,ebx
+	xor	r12d,edx
+	ror	r13d,5
+	xor	r14d,r9d
+	and	r12d,ebx
+	xor	r13d,ebx
+	add	r8d,DWORD[44+rsp]
+	mov	edi,r9d
+	xor	r12d,edx
+	ror	r14d,11
+	xor	edi,r10d
+	add	r8d,r12d
+	ror	r13d,6
+	and	r15d,edi
+	xor	r14d,r9d
+	add	r8d,r13d
+	xor	r15d,r10d
+	ror	r14d,2
+	add	eax,r8d
+	add	r8d,r15d
+	mov	r13d,eax
+	add	r14d,r8d
+	ror	r13d,14
+	mov	r8d,r14d
+	mov	r12d,ebx
+	ror	r14d,9
+	xor	r13d,eax
+	xor	r12d,ecx
+	ror	r13d,5
+	xor	r14d,r8d
+	and	r12d,eax
+	xor	r13d,eax
+	add	edx,DWORD[48+rsp]
+	mov	r15d,r8d
+	xor	r12d,ecx
+	ror	r14d,11
+	xor	r15d,r9d
+	add	edx,r12d
+	ror	r13d,6
+	and	edi,r15d
+	xor	r14d,r8d
+	add	edx,r13d
+	xor	edi,r9d
+	ror	r14d,2
+	add	r11d,edx
+	add	edx,edi
+	mov	r13d,r11d
+	add	r14d,edx
+	ror	r13d,14
+	mov	edx,r14d
+	mov	r12d,eax
+	ror	r14d,9
+	xor	r13d,r11d
+	xor	r12d,ebx
+	ror	r13d,5
+	xor	r14d,edx
+	and	r12d,r11d
+	xor	r13d,r11d
+	add	ecx,DWORD[52+rsp]
+	mov	edi,edx
+	xor	r12d,ebx
+	ror	r14d,11
+	xor	edi,r8d
+	add	ecx,r12d
+	ror	r13d,6
+	and	r15d,edi
+	xor	r14d,edx
+	add	ecx,r13d
+	xor	r15d,r8d
+	ror	r14d,2
+	add	r10d,ecx
+	add	ecx,r15d
+	mov	r13d,r10d
+	add	r14d,ecx
+	ror	r13d,14
+	mov	ecx,r14d
+	mov	r12d,r11d
+	ror	r14d,9
+	xor	r13d,r10d
+	xor	r12d,eax
+	ror	r13d,5
+	xor	r14d,ecx
+	and	r12d,r10d
+	xor	r13d,r10d
+	add	ebx,DWORD[56+rsp]
+	mov	r15d,ecx
+	xor	r12d,eax
+	ror	r14d,11
+	xor	r15d,edx
+	add	ebx,r12d
+	ror	r13d,6
+	and	edi,r15d
+	xor	r14d,ecx
+	add	ebx,r13d
+	xor	edi,edx
+	ror	r14d,2
+	add	r9d,ebx
+	add	ebx,edi
+	mov	r13d,r9d
+	add	r14d,ebx
+	ror	r13d,14
+	mov	ebx,r14d
+	mov	r12d,r10d
+	ror	r14d,9
+	xor	r13d,r9d
+	xor	r12d,r11d
+	ror	r13d,5
+	xor	r14d,ebx
+	and	r12d,r9d
+	xor	r13d,r9d
+	add	eax,DWORD[60+rsp]
+	mov	edi,ebx
+	xor	r12d,r11d
+	ror	r14d,11
+	xor	edi,ecx
+	add	eax,r12d
+	ror	r13d,6
+	and	r15d,edi
+	xor	r14d,ebx
+	add	eax,r13d
+	xor	r15d,ecx
+	ror	r14d,2
+	add	r8d,eax
+	add	eax,r15d
+	mov	r13d,r8d
+	add	r14d,eax
+	mov	rdi,QWORD[((64+0))+rsp]
+	mov	eax,r14d
+
+	add	eax,DWORD[rdi]
+	lea	rsi,[64+rsi]
+	add	ebx,DWORD[4+rdi]
+	add	ecx,DWORD[8+rdi]
+	add	edx,DWORD[12+rdi]
+	add	r8d,DWORD[16+rdi]
+	add	r9d,DWORD[20+rdi]
+	add	r10d,DWORD[24+rdi]
+	add	r11d,DWORD[28+rdi]
+
+	cmp	rsi,QWORD[((64+16))+rsp]
+
+	mov	DWORD[rdi],eax
+	mov	DWORD[4+rdi],ebx
+	mov	DWORD[8+rdi],ecx
+	mov	DWORD[12+rdi],edx
+	mov	DWORD[16+rdi],r8d
+	mov	DWORD[20+rdi],r9d
+	mov	DWORD[24+rdi],r10d
+	mov	DWORD[28+rdi],r11d
+	jb	NEAR $L$loop_ssse3
+
+	mov	rsi,QWORD[88+rsp]
+
+	movaps	xmm6,XMMWORD[((64+32))+rsp]
+	movaps	xmm7,XMMWORD[((64+48))+rsp]
+	movaps	xmm8,XMMWORD[((64+64))+rsp]
+	movaps	xmm9,XMMWORD[((64+80))+rsp]
+	mov	r15,QWORD[((-48))+rsi]
+
+	mov	r14,QWORD[((-40))+rsi]
+
+	mov	r13,QWORD[((-32))+rsi]
+
+	mov	r12,QWORD[((-24))+rsi]
+
+	mov	rbp,QWORD[((-16))+rsi]
+
+	mov	rbx,QWORD[((-8))+rsi]
+
+	lea	rsp,[rsi]
+
+$L$epilogue_ssse3:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_sha256_block_data_order_ssse3:
+
+ALIGN	64
+sha256_block_data_order_avx:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_sha256_block_data_order_avx:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+
+$L$avx_shortcut:
+	mov	rax,rsp
+
+	push	rbx
+
+	push	rbp
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+	shl	rdx,4
+	sub	rsp,160
+	lea	rdx,[rdx*4+rsi]
+	and	rsp,-64
+	mov	QWORD[((64+0))+rsp],rdi
+	mov	QWORD[((64+8))+rsp],rsi
+	mov	QWORD[((64+16))+rsp],rdx
+	mov	QWORD[88+rsp],rax
+
+	movaps	XMMWORD[(64+32)+rsp],xmm6
+	movaps	XMMWORD[(64+48)+rsp],xmm7
+	movaps	XMMWORD[(64+64)+rsp],xmm8
+	movaps	XMMWORD[(64+80)+rsp],xmm9
+$L$prologue_avx:
+
+	vzeroupper
+	mov	eax,DWORD[rdi]
+	mov	ebx,DWORD[4+rdi]
+	mov	ecx,DWORD[8+rdi]
+	mov	edx,DWORD[12+rdi]
+	mov	r8d,DWORD[16+rdi]
+	mov	r9d,DWORD[20+rdi]
+	mov	r10d,DWORD[24+rdi]
+	mov	r11d,DWORD[28+rdi]
+	vmovdqa	xmm8,XMMWORD[((K256+512+32))]
+	vmovdqa	xmm9,XMMWORD[((K256+512+64))]
+	jmp	NEAR $L$loop_avx
+ALIGN	16
+$L$loop_avx:
+	vmovdqa	xmm7,XMMWORD[((K256+512))]
+	vmovdqu	xmm0,XMMWORD[rsi]
+	vmovdqu	xmm1,XMMWORD[16+rsi]
+	vmovdqu	xmm2,XMMWORD[32+rsi]
+	vmovdqu	xmm3,XMMWORD[48+rsi]
+	vpshufb	xmm0,xmm0,xmm7
+	lea	rbp,[K256]
+	vpshufb	xmm1,xmm1,xmm7
+	vpshufb	xmm2,xmm2,xmm7
+	vpaddd	xmm4,xmm0,XMMWORD[rbp]
+	vpshufb	xmm3,xmm3,xmm7
+	vpaddd	xmm5,xmm1,XMMWORD[32+rbp]
+	vpaddd	xmm6,xmm2,XMMWORD[64+rbp]
+	vpaddd	xmm7,xmm3,XMMWORD[96+rbp]
+	vmovdqa	XMMWORD[rsp],xmm4
+	mov	r14d,eax
+	vmovdqa	XMMWORD[16+rsp],xmm5
+	mov	edi,ebx
+	vmovdqa	XMMWORD[32+rsp],xmm6
+	xor	edi,ecx
+	vmovdqa	XMMWORD[48+rsp],xmm7
+	mov	r13d,r8d
+	jmp	NEAR $L$avx_00_47
+
+ALIGN	16
+$L$avx_00_47:
+	sub	rbp,-128
+	vpalignr	xmm4,xmm1,xmm0,4
+	shrd	r13d,r13d,14
+	mov	eax,r14d
+	mov	r12d,r9d
+	vpalignr	xmm7,xmm3,xmm2,4
+	shrd	r14d,r14d,9
+	xor	r13d,r8d
+	xor	r12d,r10d
+	vpsrld	xmm6,xmm4,7
+	shrd	r13d,r13d,5
+	xor	r14d,eax
+	and	r12d,r8d
+	vpaddd	xmm0,xmm0,xmm7
+	xor	r13d,r8d
+	add	r11d,DWORD[rsp]
+	mov	r15d,eax
+	vpsrld	xmm7,xmm4,3
+	xor	r12d,r10d
+	shrd	r14d,r14d,11
+	xor	r15d,ebx
+	vpslld	xmm5,xmm4,14
+	add	r11d,r12d
+	shrd	r13d,r13d,6
+	and	edi,r15d
+	vpxor	xmm4,xmm7,xmm6
+	xor	r14d,eax
+	add	r11d,r13d
+	xor	edi,ebx
+	vpshufd	xmm7,xmm3,250
+	shrd	r14d,r14d,2
+	add	edx,r11d
+	add	r11d,edi
+	vpsrld	xmm6,xmm6,11
+	mov	r13d,edx
+	add	r14d,r11d
+	shrd	r13d,r13d,14
+	vpxor	xmm4,xmm4,xmm5
+	mov	r11d,r14d
+	mov	r12d,r8d
+	shrd	r14d,r14d,9
+	vpslld	xmm5,xmm5,11
+	xor	r13d,edx
+	xor	r12d,r9d
+	shrd	r13d,r13d,5
+	vpxor	xmm4,xmm4,xmm6
+	xor	r14d,r11d
+	and	r12d,edx
+	xor	r13d,edx
+	vpsrld	xmm6,xmm7,10
+	add	r10d,DWORD[4+rsp]
+	mov	edi,r11d
+	xor	r12d,r9d
+	vpxor	xmm4,xmm4,xmm5
+	shrd	r14d,r14d,11
+	xor	edi,eax
+	add	r10d,r12d
+	vpsrlq	xmm7,xmm7,17
+	shrd	r13d,r13d,6
+	and	r15d,edi
+	xor	r14d,r11d
+	vpaddd	xmm0,xmm0,xmm4
+	add	r10d,r13d
+	xor	r15d,eax
+	shrd	r14d,r14d,2
+	vpxor	xmm6,xmm6,xmm7
+	add	ecx,r10d
+	add	r10d,r15d
+	mov	r13d,ecx
+	vpsrlq	xmm7,xmm7,2
+	add	r14d,r10d
+	shrd	r13d,r13d,14
+	mov	r10d,r14d
+	vpxor	xmm6,xmm6,xmm7
+	mov	r12d,edx
+	shrd	r14d,r14d,9
+	xor	r13d,ecx
+	vpshufb	xmm6,xmm6,xmm8
+	xor	r12d,r8d
+	shrd	r13d,r13d,5
+	xor	r14d,r10d
+	vpaddd	xmm0,xmm0,xmm6
+	and	r12d,ecx
+	xor	r13d,ecx
+	add	r9d,DWORD[8+rsp]
+	vpshufd	xmm7,xmm0,80
+	mov	r15d,r10d
+	xor	r12d,r8d
+	shrd	r14d,r14d,11
+	vpsrld	xmm6,xmm7,10
+	xor	r15d,r11d
+	add	r9d,r12d
+	shrd	r13d,r13d,6
+	vpsrlq	xmm7,xmm7,17
+	and	edi,r15d
+	xor	r14d,r10d
+	add	r9d,r13d
+	vpxor	xmm6,xmm6,xmm7
+	xor	edi,r11d
+	shrd	r14d,r14d,2
+	add	ebx,r9d
+	vpsrlq	xmm7,xmm7,2
+	add	r9d,edi
+	mov	r13d,ebx
+	add	r14d,r9d
+	vpxor	xmm6,xmm6,xmm7
+	shrd	r13d,r13d,14
+	mov	r9d,r14d
+	mov	r12d,ecx
+	vpshufb	xmm6,xmm6,xmm9
+	shrd	r14d,r14d,9
+	xor	r13d,ebx
+	xor	r12d,edx
+	vpaddd	xmm0,xmm0,xmm6
+	shrd	r13d,r13d,5
+	xor	r14d,r9d
+	and	r12d,ebx
+	vpaddd	xmm6,xmm0,XMMWORD[rbp]
+	xor	r13d,ebx
+	add	r8d,DWORD[12+rsp]
+	mov	edi,r9d
+	xor	r12d,edx
+	shrd	r14d,r14d,11
+	xor	edi,r10d
+	add	r8d,r12d
+	shrd	r13d,r13d,6
+	and	r15d,edi
+	xor	r14d,r9d
+	add	r8d,r13d
+	xor	r15d,r10d
+	shrd	r14d,r14d,2
+	add	eax,r8d
+	add	r8d,r15d
+	mov	r13d,eax
+	add	r14d,r8d
+	vmovdqa	XMMWORD[rsp],xmm6
+	vpalignr	xmm4,xmm2,xmm1,4
+	shrd	r13d,r13d,14
+	mov	r8d,r14d
+	mov	r12d,ebx
+	vpalignr	xmm7,xmm0,xmm3,4
+	shrd	r14d,r14d,9
+	xor	r13d,eax
+	xor	r12d,ecx
+	vpsrld	xmm6,xmm4,7
+	shrd	r13d,r13d,5
+	xor	r14d,r8d
+	and	r12d,eax
+	vpaddd	xmm1,xmm1,xmm7
+	xor	r13d,eax
+	add	edx,DWORD[16+rsp]
+	mov	r15d,r8d
+	vpsrld	xmm7,xmm4,3
+	xor	r12d,ecx
+	shrd	r14d,r14d,11
+	xor	r15d,r9d
+	vpslld	xmm5,xmm4,14
+	add	edx,r12d
+	shrd	r13d,r13d,6
+	and	edi,r15d
+	vpxor	xmm4,xmm7,xmm6
+	xor	r14d,r8d
+	add	edx,r13d
+	xor	edi,r9d
+	vpshufd	xmm7,xmm0,250
+	shrd	r14d,r14d,2
+	add	r11d,edx
+	add	edx,edi
+	vpsrld	xmm6,xmm6,11
+	mov	r13d,r11d
+	add	r14d,edx
+	shrd	r13d,r13d,14
+	vpxor	xmm4,xmm4,xmm5
+	mov	edx,r14d
+	mov	r12d,eax
+	shrd	r14d,r14d,9
+	vpslld	xmm5,xmm5,11
+	xor	r13d,r11d
+	xor	r12d,ebx
+	shrd	r13d,r13d,5
+	vpxor	xmm4,xmm4,xmm6
+	xor	r14d,edx
+	and	r12d,r11d
+	xor	r13d,r11d
+	vpsrld	xmm6,xmm7,10
+	add	ecx,DWORD[20+rsp]
+	mov	edi,edx
+	xor	r12d,ebx
+	vpxor	xmm4,xmm4,xmm5
+	shrd	r14d,r14d,11
+	xor	edi,r8d
+	add	ecx,r12d
+	vpsrlq	xmm7,xmm7,17
+	shrd	r13d,r13d,6
+	and	r15d,edi
+	xor	r14d,edx
+	vpaddd	xmm1,xmm1,xmm4
+	add	ecx,r13d
+	xor	r15d,r8d
+	shrd	r14d,r14d,2
+	vpxor	xmm6,xmm6,xmm7
+	add	r10d,ecx
+	add	ecx,r15d
+	mov	r13d,r10d
+	vpsrlq	xmm7,xmm7,2
+	add	r14d,ecx
+	shrd	r13d,r13d,14
+	mov	ecx,r14d
+	vpxor	xmm6,xmm6,xmm7
+	mov	r12d,r11d
+	shrd	r14d,r14d,9
+	xor	r13d,r10d
+	vpshufb	xmm6,xmm6,xmm8
+	xor	r12d,eax
+	shrd	r13d,r13d,5
+	xor	r14d,ecx
+	vpaddd	xmm1,xmm1,xmm6
+	and	r12d,r10d
+	xor	r13d,r10d
+	add	ebx,DWORD[24+rsp]
+	vpshufd	xmm7,xmm1,80
+	mov	r15d,ecx
+	xor	r12d,eax
+	shrd	r14d,r14d,11
+	vpsrld	xmm6,xmm7,10
+	xor	r15d,edx
+	add	ebx,r12d
+	shrd	r13d,r13d,6
+	vpsrlq	xmm7,xmm7,17
+	and	edi,r15d
+	xor	r14d,ecx
+	add	ebx,r13d
+	vpxor	xmm6,xmm6,xmm7
+	xor	edi,edx
+	shrd	r14d,r14d,2
+	add	r9d,ebx
+	vpsrlq	xmm7,xmm7,2
+	add	ebx,edi
+	mov	r13d,r9d
+	add	r14d,ebx
+	vpxor	xmm6,xmm6,xmm7
+	shrd	r13d,r13d,14
+	mov	ebx,r14d
+	mov	r12d,r10d
+	vpshufb	xmm6,xmm6,xmm9
+	shrd	r14d,r14d,9
+	xor	r13d,r9d
+	xor	r12d,r11d
+	vpaddd	xmm1,xmm1,xmm6
+	shrd	r13d,r13d,5
+	xor	r14d,ebx
+	and	r12d,r9d
+	vpaddd	xmm6,xmm1,XMMWORD[32+rbp]
+	xor	r13d,r9d
+	add	eax,DWORD[28+rsp]
+	mov	edi,ebx
+	xor	r12d,r11d
+	shrd	r14d,r14d,11
+	xor	edi,ecx
+	add	eax,r12d
+	shrd	r13d,r13d,6
+	and	r15d,edi
+	xor	r14d,ebx
+	add	eax,r13d
+	xor	r15d,ecx
+	shrd	r14d,r14d,2
+	add	r8d,eax
+	add	eax,r15d
+	mov	r13d,r8d
+	add	r14d,eax
+	vmovdqa	XMMWORD[16+rsp],xmm6
+	vpalignr	xmm4,xmm3,xmm2,4
+	shrd	r13d,r13d,14
+	mov	eax,r14d
+	mov	r12d,r9d
+	vpalignr	xmm7,xmm1,xmm0,4
+	shrd	r14d,r14d,9
+	xor	r13d,r8d
+	xor	r12d,r10d
+	vpsrld	xmm6,xmm4,7
+	shrd	r13d,r13d,5
+	xor	r14d,eax
+	and	r12d,r8d
+	vpaddd	xmm2,xmm2,xmm7
+	xor	r13d,r8d
+	add	r11d,DWORD[32+rsp]
+	mov	r15d,eax
+	vpsrld	xmm7,xmm4,3
+	xor	r12d,r10d
+	shrd	r14d,r14d,11
+	xor	r15d,ebx
+	vpslld	xmm5,xmm4,14
+	add	r11d,r12d
+	shrd	r13d,r13d,6
+	and	edi,r15d
+	vpxor	xmm4,xmm7,xmm6
+	xor	r14d,eax
+	add	r11d,r13d
+	xor	edi,ebx
+	vpshufd	xmm7,xmm1,250
+	shrd	r14d,r14d,2
+	add	edx,r11d
+	add	r11d,edi
+	vpsrld	xmm6,xmm6,11
+	mov	r13d,edx
+	add	r14d,r11d
+	shrd	r13d,r13d,14
+	vpxor	xmm4,xmm4,xmm5
+	mov	r11d,r14d
+	mov	r12d,r8d
+	shrd	r14d,r14d,9
+	vpslld	xmm5,xmm5,11
+	xor	r13d,edx
+	xor	r12d,r9d
+	shrd	r13d,r13d,5
+	vpxor	xmm4,xmm4,xmm6
+	xor	r14d,r11d
+	and	r12d,edx
+	xor	r13d,edx
+	vpsrld	xmm6,xmm7,10
+	add	r10d,DWORD[36+rsp]
+	mov	edi,r11d
+	xor	r12d,r9d
+	vpxor	xmm4,xmm4,xmm5
+	shrd	r14d,r14d,11
+	xor	edi,eax
+	add	r10d,r12d
+	vpsrlq	xmm7,xmm7,17
+	shrd	r13d,r13d,6
+	and	r15d,edi
+	xor	r14d,r11d
+	vpaddd	xmm2,xmm2,xmm4
+	add	r10d,r13d
+	xor	r15d,eax
+	shrd	r14d,r14d,2
+	vpxor	xmm6,xmm6,xmm7
+	add	ecx,r10d
+	add	r10d,r15d
+	mov	r13d,ecx
+	vpsrlq	xmm7,xmm7,2
+	add	r14d,r10d
+	shrd	r13d,r13d,14
+	mov	r10d,r14d
+	vpxor	xmm6,xmm6,xmm7
+	mov	r12d,edx
+	shrd	r14d,r14d,9
+	xor	r13d,ecx
+	vpshufb	xmm6,xmm6,xmm8
+	xor	r12d,r8d
+	shrd	r13d,r13d,5
+	xor	r14d,r10d
+	vpaddd	xmm2,xmm2,xmm6
+	and	r12d,ecx
+	xor	r13d,ecx
+	add	r9d,DWORD[40+rsp]
+	vpshufd	xmm7,xmm2,80
+	mov	r15d,r10d
+	xor	r12d,r8d
+	shrd	r14d,r14d,11
+	vpsrld	xmm6,xmm7,10
+	xor	r15d,r11d
+	add	r9d,r12d
+	shrd	r13d,r13d,6
+	vpsrlq	xmm7,xmm7,17
+	and	edi,r15d
+	xor	r14d,r10d
+	add	r9d,r13d
+	vpxor	xmm6,xmm6,xmm7
+	xor	edi,r11d
+	shrd	r14d,r14d,2
+	add	ebx,r9d
+	vpsrlq	xmm7,xmm7,2
+	add	r9d,edi
+	mov	r13d,ebx
+	add	r14d,r9d
+	vpxor	xmm6,xmm6,xmm7
+	shrd	r13d,r13d,14
+	mov	r9d,r14d
+	mov	r12d,ecx
+	vpshufb	xmm6,xmm6,xmm9
+	shrd	r14d,r14d,9
+	xor	r13d,ebx
+	xor	r12d,edx
+	vpaddd	xmm2,xmm2,xmm6
+	shrd	r13d,r13d,5
+	xor	r14d,r9d
+	and	r12d,ebx
+	vpaddd	xmm6,xmm2,XMMWORD[64+rbp]
+	xor	r13d,ebx
+	add	r8d,DWORD[44+rsp]
+	mov	edi,r9d
+	xor	r12d,edx
+	shrd	r14d,r14d,11
+	xor	edi,r10d
+	add	r8d,r12d
+	shrd	r13d,r13d,6
+	and	r15d,edi
+	xor	r14d,r9d
+	add	r8d,r13d
+	xor	r15d,r10d
+	shrd	r14d,r14d,2
+	add	eax,r8d
+	add	r8d,r15d
+	mov	r13d,eax
+	add	r14d,r8d
+	vmovdqa	XMMWORD[32+rsp],xmm6
+	vpalignr	xmm4,xmm0,xmm3,4
+	shrd	r13d,r13d,14
+	mov	r8d,r14d
+	mov	r12d,ebx
+	vpalignr	xmm7,xmm2,xmm1,4
+	shrd	r14d,r14d,9
+	xor	r13d,eax
+	xor	r12d,ecx
+	vpsrld	xmm6,xmm4,7
+	shrd	r13d,r13d,5
+	xor	r14d,r8d
+	and	r12d,eax
+	vpaddd	xmm3,xmm3,xmm7
+	xor	r13d,eax
+	add	edx,DWORD[48+rsp]
+	mov	r15d,r8d
+	vpsrld	xmm7,xmm4,3
+	xor	r12d,ecx
+	shrd	r14d,r14d,11
+	xor	r15d,r9d
+	vpslld	xmm5,xmm4,14
+	add	edx,r12d
+	shrd	r13d,r13d,6
+	and	edi,r15d
+	vpxor	xmm4,xmm7,xmm6
+	xor	r14d,r8d
+	add	edx,r13d
+	xor	edi,r9d
+	vpshufd	xmm7,xmm2,250
+	shrd	r14d,r14d,2
+	add	r11d,edx
+	add	edx,edi
+	vpsrld	xmm6,xmm6,11
+	mov	r13d,r11d
+	add	r14d,edx
+	shrd	r13d,r13d,14
+	vpxor	xmm4,xmm4,xmm5
+	mov	edx,r14d
+	mov	r12d,eax
+	shrd	r14d,r14d,9
+	vpslld	xmm5,xmm5,11
+	xor	r13d,r11d
+	xor	r12d,ebx
+	shrd	r13d,r13d,5
+	vpxor	xmm4,xmm4,xmm6
+	xor	r14d,edx
+	and	r12d,r11d
+	xor	r13d,r11d
+	vpsrld	xmm6,xmm7,10
+	add	ecx,DWORD[52+rsp]
+	mov	edi,edx
+	xor	r12d,ebx
+	vpxor	xmm4,xmm4,xmm5
+	shrd	r14d,r14d,11
+	xor	edi,r8d
+	add	ecx,r12d
+	vpsrlq	xmm7,xmm7,17
+	shrd	r13d,r13d,6
+	and	r15d,edi
+	xor	r14d,edx
+	vpaddd	xmm3,xmm3,xmm4
+	add	ecx,r13d
+	xor	r15d,r8d
+	shrd	r14d,r14d,2
+	vpxor	xmm6,xmm6,xmm7
+	add	r10d,ecx
+	add	ecx,r15d
+	mov	r13d,r10d
+	vpsrlq	xmm7,xmm7,2
+	add	r14d,ecx
+	shrd	r13d,r13d,14
+	mov	ecx,r14d
+	vpxor	xmm6,xmm6,xmm7
+	mov	r12d,r11d
+	shrd	r14d,r14d,9
+	xor	r13d,r10d
+	vpshufb	xmm6,xmm6,xmm8
+	xor	r12d,eax
+	shrd	r13d,r13d,5
+	xor	r14d,ecx
+	vpaddd	xmm3,xmm3,xmm6
+	and	r12d,r10d
+	xor	r13d,r10d
+	add	ebx,DWORD[56+rsp]
+	vpshufd	xmm7,xmm3,80
+	mov	r15d,ecx
+	xor	r12d,eax
+	shrd	r14d,r14d,11
+	vpsrld	xmm6,xmm7,10
+	xor	r15d,edx
+	add	ebx,r12d
+	shrd	r13d,r13d,6
+	vpsrlq	xmm7,xmm7,17
+	and	edi,r15d
+	xor	r14d,ecx
+	add	ebx,r13d
+	vpxor	xmm6,xmm6,xmm7
+	xor	edi,edx
+	shrd	r14d,r14d,2
+	add	r9d,ebx
+	vpsrlq	xmm7,xmm7,2
+	add	ebx,edi
+	mov	r13d,r9d
+	add	r14d,ebx
+	vpxor	xmm6,xmm6,xmm7
+	shrd	r13d,r13d,14
+	mov	ebx,r14d
+	mov	r12d,r10d
+	vpshufb	xmm6,xmm6,xmm9
+	shrd	r14d,r14d,9
+	xor	r13d,r9d
+	xor	r12d,r11d
+	vpaddd	xmm3,xmm3,xmm6
+	shrd	r13d,r13d,5
+	xor	r14d,ebx
+	and	r12d,r9d
+	vpaddd	xmm6,xmm3,XMMWORD[96+rbp]
+	xor	r13d,r9d
+	add	eax,DWORD[60+rsp]
+	mov	edi,ebx
+	xor	r12d,r11d
+	shrd	r14d,r14d,11
+	xor	edi,ecx
+	add	eax,r12d
+	shrd	r13d,r13d,6
+	and	r15d,edi
+	xor	r14d,ebx
+	add	eax,r13d
+	xor	r15d,ecx
+	shrd	r14d,r14d,2
+	add	r8d,eax
+	add	eax,r15d
+	mov	r13d,r8d
+	add	r14d,eax
+	vmovdqa	XMMWORD[48+rsp],xmm6
+	cmp	BYTE[131+rbp],0
+	jne	NEAR $L$avx_00_47
+	shrd	r13d,r13d,14
+	mov	eax,r14d
+	mov	r12d,r9d
+	shrd	r14d,r14d,9
+	xor	r13d,r8d
+	xor	r12d,r10d
+	shrd	r13d,r13d,5
+	xor	r14d,eax
+	and	r12d,r8d
+	xor	r13d,r8d
+	add	r11d,DWORD[rsp]
+	mov	r15d,eax
+	xor	r12d,r10d
+	shrd	r14d,r14d,11
+	xor	r15d,ebx
+	add	r11d,r12d
+	shrd	r13d,r13d,6
+	and	edi,r15d
+	xor	r14d,eax
+	add	r11d,r13d
+	xor	edi,ebx
+	shrd	r14d,r14d,2
+	add	edx,r11d
+	add	r11d,edi
+	mov	r13d,edx
+	add	r14d,r11d
+	shrd	r13d,r13d,14
+	mov	r11d,r14d
+	mov	r12d,r8d
+	shrd	r14d,r14d,9
+	xor	r13d,edx
+	xor	r12d,r9d
+	shrd	r13d,r13d,5
+	xor	r14d,r11d
+	and	r12d,edx
+	xor	r13d,edx
+	add	r10d,DWORD[4+rsp]
+	mov	edi,r11d
+	xor	r12d,r9d
+	shrd	r14d,r14d,11
+	xor	edi,eax
+	add	r10d,r12d
+	shrd	r13d,r13d,6
+	and	r15d,edi
+	xor	r14d,r11d
+	add	r10d,r13d
+	xor	r15d,eax
+	shrd	r14d,r14d,2
+	add	ecx,r10d
+	add	r10d,r15d
+	mov	r13d,ecx
+	add	r14d,r10d
+	shrd	r13d,r13d,14
+	mov	r10d,r14d
+	mov	r12d,edx
+	shrd	r14d,r14d,9
+	xor	r13d,ecx
+	xor	r12d,r8d
+	shrd	r13d,r13d,5
+	xor	r14d,r10d
+	and	r12d,ecx
+	xor	r13d,ecx
+	add	r9d,DWORD[8+rsp]
+	mov	r15d,r10d
+	xor	r12d,r8d
+	shrd	r14d,r14d,11
+	xor	r15d,r11d
+	add	r9d,r12d
+	shrd	r13d,r13d,6
+	and	edi,r15d
+	xor	r14d,r10d
+	add	r9d,r13d
+	xor	edi,r11d
+	shrd	r14d,r14d,2
+	add	ebx,r9d
+	add	r9d,edi
+	mov	r13d,ebx
+	add	r14d,r9d
+	shrd	r13d,r13d,14
+	mov	r9d,r14d
+	mov	r12d,ecx
+	shrd	r14d,r14d,9
+	xor	r13d,ebx
+	xor	r12d,edx
+	shrd	r13d,r13d,5
+	xor	r14d,r9d
+	and	r12d,ebx
+	xor	r13d,ebx
+	add	r8d,DWORD[12+rsp]
+	mov	edi,r9d
+	xor	r12d,edx
+	shrd	r14d,r14d,11
+	xor	edi,r10d
+	add	r8d,r12d
+	shrd	r13d,r13d,6
+	and	r15d,edi
+	xor	r14d,r9d
+	add	r8d,r13d
+	xor	r15d,r10d
+	shrd	r14d,r14d,2
+	add	eax,r8d
+	add	r8d,r15d
+	mov	r13d,eax
+	add	r14d,r8d
+	shrd	r13d,r13d,14
+	mov	r8d,r14d
+	mov	r12d,ebx
+	shrd	r14d,r14d,9
+	xor	r13d,eax
+	xor	r12d,ecx
+	shrd	r13d,r13d,5
+	xor	r14d,r8d
+	and	r12d,eax
+	xor	r13d,eax
+	add	edx,DWORD[16+rsp]
+	mov	r15d,r8d
+	xor	r12d,ecx
+	shrd	r14d,r14d,11
+	xor	r15d,r9d
+	add	edx,r12d
+	shrd	r13d,r13d,6
+	and	edi,r15d
+	xor	r14d,r8d
+	add	edx,r13d
+	xor	edi,r9d
+	shrd	r14d,r14d,2
+	add	r11d,edx
+	add	edx,edi
+	mov	r13d,r11d
+	add	r14d,edx
+	shrd	r13d,r13d,14
+	mov	edx,r14d
+	mov	r12d,eax
+	shrd	r14d,r14d,9
+	xor	r13d,r11d
+	xor	r12d,ebx
+	shrd	r13d,r13d,5
+	xor	r14d,edx
+	and	r12d,r11d
+	xor	r13d,r11d
+	add	ecx,DWORD[20+rsp]
+	mov	edi,edx
+	xor	r12d,ebx
+	shrd	r14d,r14d,11
+	xor	edi,r8d
+	add	ecx,r12d
+	shrd	r13d,r13d,6
+	and	r15d,edi
+	xor	r14d,edx
+	add	ecx,r13d
+	xor	r15d,r8d
+	shrd	r14d,r14d,2
+	add	r10d,ecx
+	add	ecx,r15d
+	mov	r13d,r10d
+	add	r14d,ecx
+	shrd	r13d,r13d,14
+	mov	ecx,r14d
+	mov	r12d,r11d
+	shrd	r14d,r14d,9
+	xor	r13d,r10d
+	xor	r12d,eax
+	shrd	r13d,r13d,5
+	xor	r14d,ecx
+	and	r12d,r10d
+	xor	r13d,r10d
+	add	ebx,DWORD[24+rsp]
+	mov	r15d,ecx
+	xor	r12d,eax
+	shrd	r14d,r14d,11
+	xor	r15d,edx
+	add	ebx,r12d
+	shrd	r13d,r13d,6
+	and	edi,r15d
+	xor	r14d,ecx
+	add	ebx,r13d
+	xor	edi,edx
+	shrd	r14d,r14d,2
+	add	r9d,ebx
+	add	ebx,edi
+	mov	r13d,r9d
+	add	r14d,ebx
+	shrd	r13d,r13d,14
+	mov	ebx,r14d
+	mov	r12d,r10d
+	shrd	r14d,r14d,9
+	xor	r13d,r9d
+	xor	r12d,r11d
+	shrd	r13d,r13d,5
+	xor	r14d,ebx
+	and	r12d,r9d
+	xor	r13d,r9d
+	add	eax,DWORD[28+rsp]
+	mov	edi,ebx
+	xor	r12d,r11d
+	shrd	r14d,r14d,11
+	xor	edi,ecx
+	add	eax,r12d
+	shrd	r13d,r13d,6
+	and	r15d,edi
+	xor	r14d,ebx
+	add	eax,r13d
+	xor	r15d,ecx
+	shrd	r14d,r14d,2
+	add	r8d,eax
+	add	eax,r15d
+	mov	r13d,r8d
+	add	r14d,eax
+	shrd	r13d,r13d,14
+	mov	eax,r14d
+	mov	r12d,r9d
+	shrd	r14d,r14d,9
+	xor	r13d,r8d
+	xor	r12d,r10d
+	shrd	r13d,r13d,5
+	xor	r14d,eax
+	and	r12d,r8d
+	xor	r13d,r8d
+	add	r11d,DWORD[32+rsp]
+	mov	r15d,eax
+	xor	r12d,r10d
+	shrd	r14d,r14d,11
+	xor	r15d,ebx
+	add	r11d,r12d
+	shrd	r13d,r13d,6
+	and	edi,r15d
+	xor	r14d,eax
+	add	r11d,r13d
+	xor	edi,ebx
+	shrd	r14d,r14d,2
+	add	edx,r11d
+	add	r11d,edi
+	mov	r13d,edx
+	add	r14d,r11d
+	shrd	r13d,r13d,14
+	mov	r11d,r14d
+	mov	r12d,r8d
+	shrd	r14d,r14d,9
+	xor	r13d,edx
+	xor	r12d,r9d
+	shrd	r13d,r13d,5
+	xor	r14d,r11d
+	and	r12d,edx
+	xor	r13d,edx
+	add	r10d,DWORD[36+rsp]
+	mov	edi,r11d
+	xor	r12d,r9d
+	shrd	r14d,r14d,11
+	xor	edi,eax
+	add	r10d,r12d
+	shrd	r13d,r13d,6
+	and	r15d,edi
+	xor	r14d,r11d
+	add	r10d,r13d
+	xor	r15d,eax
+	shrd	r14d,r14d,2
+	add	ecx,r10d
+	add	r10d,r15d
+	mov	r13d,ecx
+	add	r14d,r10d
+	shrd	r13d,r13d,14
+	mov	r10d,r14d
+	mov	r12d,edx
+	shrd	r14d,r14d,9
+	xor	r13d,ecx
+	xor	r12d,r8d
+	shrd	r13d,r13d,5
+	xor	r14d,r10d
+	and	r12d,ecx
+	xor	r13d,ecx
+	add	r9d,DWORD[40+rsp]
+	mov	r15d,r10d
+	xor	r12d,r8d
+	shrd	r14d,r14d,11
+	xor	r15d,r11d
+	add	r9d,r12d
+	shrd	r13d,r13d,6
+	and	edi,r15d
+	xor	r14d,r10d
+	add	r9d,r13d
+	xor	edi,r11d
+	shrd	r14d,r14d,2
+	add	ebx,r9d
+	add	r9d,edi
+	mov	r13d,ebx
+	add	r14d,r9d
+	shrd	r13d,r13d,14
+	mov	r9d,r14d
+	mov	r12d,ecx
+	shrd	r14d,r14d,9
+	xor	r13d,ebx
+	xor	r12d,edx
+	shrd	r13d,r13d,5
+	xor	r14d,r9d
+	and	r12d,ebx
+	xor	r13d,ebx
+	add	r8d,DWORD[44+rsp]
+	mov	edi,r9d
+	xor	r12d,edx
+	shrd	r14d,r14d,11
+	xor	edi,r10d
+	add	r8d,r12d
+	shrd	r13d,r13d,6
+	and	r15d,edi
+	xor	r14d,r9d
+	add	r8d,r13d
+	xor	r15d,r10d
+	shrd	r14d,r14d,2
+	add	eax,r8d
+	add	r8d,r15d
+	mov	r13d,eax
+	add	r14d,r8d
+	shrd	r13d,r13d,14
+	mov	r8d,r14d
+	mov	r12d,ebx
+	shrd	r14d,r14d,9
+	xor	r13d,eax
+	xor	r12d,ecx
+	shrd	r13d,r13d,5
+	xor	r14d,r8d
+	and	r12d,eax
+	xor	r13d,eax
+	add	edx,DWORD[48+rsp]
+	mov	r15d,r8d
+	xor	r12d,ecx
+	shrd	r14d,r14d,11
+	xor	r15d,r9d
+	add	edx,r12d
+	shrd	r13d,r13d,6
+	and	edi,r15d
+	xor	r14d,r8d
+	add	edx,r13d
+	xor	edi,r9d
+	shrd	r14d,r14d,2
+	add	r11d,edx
+	add	edx,edi
+	mov	r13d,r11d
+	add	r14d,edx
+	shrd	r13d,r13d,14
+	mov	edx,r14d
+	mov	r12d,eax
+	shrd	r14d,r14d,9
+	xor	r13d,r11d
+	xor	r12d,ebx
+	shrd	r13d,r13d,5
+	xor	r14d,edx
+	and	r12d,r11d
+	xor	r13d,r11d
+	add	ecx,DWORD[52+rsp]
+	mov	edi,edx
+	xor	r12d,ebx
+	shrd	r14d,r14d,11
+	xor	edi,r8d
+	add	ecx,r12d
+	shrd	r13d,r13d,6
+	and	r15d,edi
+	xor	r14d,edx
+	add	ecx,r13d
+	xor	r15d,r8d
+	shrd	r14d,r14d,2
+	add	r10d,ecx
+	add	ecx,r15d
+	mov	r13d,r10d
+	add	r14d,ecx
+	shrd	r13d,r13d,14
+	mov	ecx,r14d
+	mov	r12d,r11d
+	shrd	r14d,r14d,9
+	xor	r13d,r10d
+	xor	r12d,eax
+	shrd	r13d,r13d,5
+	xor	r14d,ecx
+	and	r12d,r10d
+	xor	r13d,r10d
+	add	ebx,DWORD[56+rsp]
+	mov	r15d,ecx
+	xor	r12d,eax
+	shrd	r14d,r14d,11
+	xor	r15d,edx
+	add	ebx,r12d
+	shrd	r13d,r13d,6
+	and	edi,r15d
+	xor	r14d,ecx
+	add	ebx,r13d
+	xor	edi,edx
+	shrd	r14d,r14d,2
+	add	r9d,ebx
+	add	ebx,edi
+	mov	r13d,r9d
+	add	r14d,ebx
+	shrd	r13d,r13d,14
+	mov	ebx,r14d
+	mov	r12d,r10d
+	shrd	r14d,r14d,9
+	xor	r13d,r9d
+	xor	r12d,r11d
+	shrd	r13d,r13d,5
+	xor	r14d,ebx
+	and	r12d,r9d
+	xor	r13d,r9d
+	add	eax,DWORD[60+rsp]
+	mov	edi,ebx
+	xor	r12d,r11d
+	shrd	r14d,r14d,11
+	xor	edi,ecx
+	add	eax,r12d
+	shrd	r13d,r13d,6
+	and	r15d,edi
+	xor	r14d,ebx
+	add	eax,r13d
+	xor	r15d,ecx
+	shrd	r14d,r14d,2
+	add	r8d,eax
+	add	eax,r15d
+	mov	r13d,r8d
+	add	r14d,eax
+	mov	rdi,QWORD[((64+0))+rsp]
+	mov	eax,r14d
+
+	add	eax,DWORD[rdi]
+	lea	rsi,[64+rsi]
+	add	ebx,DWORD[4+rdi]
+	add	ecx,DWORD[8+rdi]
+	add	edx,DWORD[12+rdi]
+	add	r8d,DWORD[16+rdi]
+	add	r9d,DWORD[20+rdi]
+	add	r10d,DWORD[24+rdi]
+	add	r11d,DWORD[28+rdi]
+
+	cmp	rsi,QWORD[((64+16))+rsp]
+
+	mov	DWORD[rdi],eax
+	mov	DWORD[4+rdi],ebx
+	mov	DWORD[8+rdi],ecx
+	mov	DWORD[12+rdi],edx
+	mov	DWORD[16+rdi],r8d
+	mov	DWORD[20+rdi],r9d
+	mov	DWORD[24+rdi],r10d
+	mov	DWORD[28+rdi],r11d
+	jb	NEAR $L$loop_avx
+
+	mov	rsi,QWORD[88+rsp]
+
+	vzeroupper
+	movaps	xmm6,XMMWORD[((64+32))+rsp]
+	movaps	xmm7,XMMWORD[((64+48))+rsp]
+	movaps	xmm8,XMMWORD[((64+64))+rsp]
+	movaps	xmm9,XMMWORD[((64+80))+rsp]
+	mov	r15,QWORD[((-48))+rsi]
+
+	mov	r14,QWORD[((-40))+rsi]
+
+	mov	r13,QWORD[((-32))+rsi]
+
+	mov	r12,QWORD[((-24))+rsi]
+
+	mov	rbp,QWORD[((-16))+rsi]
+
+	mov	rbx,QWORD[((-8))+rsi]
+
+	lea	rsp,[rsi]
+
+$L$epilogue_avx:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_sha256_block_data_order_avx:
+EXTERN	__imp_RtlVirtualUnwind
+
+ALIGN	16
+se_handler:
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD[120+r8]
+	mov	rbx,QWORD[248+r8]
+
+	mov	rsi,QWORD[8+r9]
+	mov	r11,QWORD[56+r9]
+
+	mov	r10d,DWORD[r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jb	NEAR $L$in_prologue
+
+	mov	rax,QWORD[152+r8]
+
+	mov	r10d,DWORD[4+r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jae	NEAR $L$in_prologue
+	mov	rsi,rax
+	mov	rax,QWORD[((64+24))+rax]
+
+	mov	rbx,QWORD[((-8))+rax]
+	mov	rbp,QWORD[((-16))+rax]
+	mov	r12,QWORD[((-24))+rax]
+	mov	r13,QWORD[((-32))+rax]
+	mov	r14,QWORD[((-40))+rax]
+	mov	r15,QWORD[((-48))+rax]
+	mov	QWORD[144+r8],rbx
+	mov	QWORD[160+r8],rbp
+	mov	QWORD[216+r8],r12
+	mov	QWORD[224+r8],r13
+	mov	QWORD[232+r8],r14
+	mov	QWORD[240+r8],r15
+
+	lea	r10,[$L$epilogue]
+	cmp	rbx,r10
+	jb	NEAR $L$in_prologue
+
+	lea	rsi,[((64+32))+rsi]
+	lea	rdi,[512+r8]
+	mov	ecx,8
+	DD	0xa548f3fc
+
+$L$in_prologue:
+	mov	rdi,QWORD[8+rax]
+	mov	rsi,QWORD[16+rax]
+	mov	QWORD[152+r8],rax
+	mov	QWORD[168+r8],rsi
+	mov	QWORD[176+r8],rdi
+
+	mov	rdi,QWORD[40+r9]
+	mov	rsi,r8
+	mov	ecx,154
+	DD	0xa548f3fc
+
+	mov	rsi,r9
+	xor	rcx,rcx
+	mov	rdx,QWORD[8+rsi]
+	mov	r8,QWORD[rsi]
+	mov	r9,QWORD[16+rsi]
+	mov	r10,QWORD[40+rsi]
+	lea	r11,[56+rsi]
+	lea	r12,[24+rsi]
+	mov	QWORD[32+rsp],r10
+	mov	QWORD[40+rsp],r11
+	mov	QWORD[48+rsp],r12
+	mov	QWORD[56+rsp],rcx
+	call	QWORD[__imp_RtlVirtualUnwind]
+
+	mov	eax,1
+	add	rsp,64
+	popfq
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rbp
+	pop	rbx
+	pop	rdi
+	pop	rsi
+	DB	0F3h,0C3h		;repret
+
+section	.pdata rdata align=4
+ALIGN	4
+	DD	$L$SEH_begin_sha256_block_data_order wrt ..imagebase
+	DD	$L$SEH_end_sha256_block_data_order wrt ..imagebase
+	DD	$L$SEH_info_sha256_block_data_order wrt ..imagebase
+	DD	$L$SEH_begin_sha256_block_data_order_ssse3 wrt ..imagebase
+	DD	$L$SEH_end_sha256_block_data_order_ssse3 wrt ..imagebase
+	DD	$L$SEH_info_sha256_block_data_order_ssse3 wrt ..imagebase
+	DD	$L$SEH_begin_sha256_block_data_order_avx wrt ..imagebase
+	DD	$L$SEH_end_sha256_block_data_order_avx wrt ..imagebase
+	DD	$L$SEH_info_sha256_block_data_order_avx wrt ..imagebase
+section	.xdata rdata align=8
+ALIGN	8
+$L$SEH_info_sha256_block_data_order:
+DB	9,0,0,0
+	DD	se_handler wrt ..imagebase
+	DD	$L$prologue wrt ..imagebase,$L$epilogue wrt ..imagebase
+$L$SEH_info_sha256_block_data_order_ssse3:
+DB	9,0,0,0
+	DD	se_handler wrt ..imagebase
+	DD	$L$prologue_ssse3 wrt ..imagebase,$L$epilogue_ssse3 wrt ..imagebase
+$L$SEH_info_sha256_block_data_order_avx:
+DB	9,0,0,0
+	DD	se_handler wrt ..imagebase
+	DD	$L$prologue_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase
diff --git a/deps/boringssl/win-x86_64/crypto/fipsmodule/sha512-x86_64.asm b/deps/boringssl/win-x86_64/crypto/fipsmodule/sha512-x86_64.asm
new file mode 100644
index 0000000..33dc2c2
--- /dev/null
+++ b/deps/boringssl/win-x86_64/crypto/fipsmodule/sha512-x86_64.asm
@@ -0,0 +1,3139 @@
+; This file is generated from a similarly-named Perl script in the BoringSSL
+; source tree. Do not edit by hand.
+
+default	rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+
+%ifdef BORINGSSL_PREFIX
+%include "boringssl_prefix_symbols_nasm.inc"
+%endif
+section	.text code align=64
+
+
+EXTERN	OPENSSL_ia32cap_P
+global	sha512_block_data_order
+
+ALIGN	16
+sha512_block_data_order:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_sha512_block_data_order:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+
+	lea	r11,[OPENSSL_ia32cap_P]
+	mov	r9d,DWORD[r11]
+	mov	r10d,DWORD[4+r11]
+	mov	r11d,DWORD[8+r11]
+	and	r9d,1073741824
+	and	r10d,268435968
+	or	r10d,r9d
+	cmp	r10d,1342177792
+	je	NEAR $L$avx_shortcut
+	mov	rax,rsp
+
+	push	rbx
+
+	push	rbp
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+	shl	rdx,4
+	sub	rsp,16*8+4*8
+	lea	rdx,[rdx*8+rsi]
+	and	rsp,-64
+	mov	QWORD[((128+0))+rsp],rdi
+	mov	QWORD[((128+8))+rsp],rsi
+	mov	QWORD[((128+16))+rsp],rdx
+	mov	QWORD[152+rsp],rax
+
+$L$prologue:
+
+	mov	rax,QWORD[rdi]
+	mov	rbx,QWORD[8+rdi]
+	mov	rcx,QWORD[16+rdi]
+	mov	rdx,QWORD[24+rdi]
+	mov	r8,QWORD[32+rdi]
+	mov	r9,QWORD[40+rdi]
+	mov	r10,QWORD[48+rdi]
+	mov	r11,QWORD[56+rdi]
+	jmp	NEAR $L$loop
+
+ALIGN	16
+$L$loop:
+	mov	rdi,rbx
+	lea	rbp,[K512]
+	xor	rdi,rcx
+	mov	r12,QWORD[rsi]
+	mov	r13,r8
+	mov	r14,rax
+	bswap	r12
+	ror	r13,23
+	mov	r15,r9
+
+	xor	r13,r8
+	ror	r14,5
+	xor	r15,r10
+
+	mov	QWORD[rsp],r12
+	xor	r14,rax
+	and	r15,r8
+
+	ror	r13,4
+	add	r12,r11
+	xor	r15,r10
+
+	ror	r14,6
+	xor	r13,r8
+	add	r12,r15
+
+	mov	r15,rax
+	add	r12,QWORD[rbp]
+	xor	r14,rax
+
+	xor	r15,rbx
+	ror	r13,14
+	mov	r11,rbx
+
+	and	rdi,r15
+	ror	r14,28
+	add	r12,r13
+
+	xor	r11,rdi
+	add	rdx,r12
+	add	r11,r12
+
+	lea	rbp,[8+rbp]
+	add	r11,r14
+	mov	r12,QWORD[8+rsi]
+	mov	r13,rdx
+	mov	r14,r11
+	bswap	r12
+	ror	r13,23
+	mov	rdi,r8
+
+	xor	r13,rdx
+	ror	r14,5
+	xor	rdi,r9
+
+	mov	QWORD[8+rsp],r12
+	xor	r14,r11
+	and	rdi,rdx
+
+	ror	r13,4
+	add	r12,r10
+	xor	rdi,r9
+
+	ror	r14,6
+	xor	r13,rdx
+	add	r12,rdi
+
+	mov	rdi,r11
+	add	r12,QWORD[rbp]
+	xor	r14,r11
+
+	xor	rdi,rax
+	ror	r13,14
+	mov	r10,rax
+
+	and	r15,rdi
+	ror	r14,28
+	add	r12,r13
+
+	xor	r10,r15
+	add	rcx,r12
+	add	r10,r12
+
+	lea	rbp,[24+rbp]
+	add	r10,r14
+	mov	r12,QWORD[16+rsi]
+	mov	r13,rcx
+	mov	r14,r10
+	bswap	r12
+	ror	r13,23
+	mov	r15,rdx
+
+	xor	r13,rcx
+	ror	r14,5
+	xor	r15,r8
+
+	mov	QWORD[16+rsp],r12
+	xor	r14,r10
+	and	r15,rcx
+
+	ror	r13,4
+	add	r12,r9
+	xor	r15,r8
+
+	ror	r14,6
+	xor	r13,rcx
+	add	r12,r15
+
+	mov	r15,r10
+	add	r12,QWORD[rbp]
+	xor	r14,r10
+
+	xor	r15,r11
+	ror	r13,14
+	mov	r9,r11
+
+	and	rdi,r15
+	ror	r14,28
+	add	r12,r13
+
+	xor	r9,rdi
+	add	rbx,r12
+	add	r9,r12
+
+	lea	rbp,[8+rbp]
+	add	r9,r14
+	mov	r12,QWORD[24+rsi]
+	mov	r13,rbx
+	mov	r14,r9
+	bswap	r12
+	ror	r13,23
+	mov	rdi,rcx
+
+	xor	r13,rbx
+	ror	r14,5
+	xor	rdi,rdx
+
+	mov	QWORD[24+rsp],r12
+	xor	r14,r9
+	and	rdi,rbx
+
+	ror	r13,4
+	add	r12,r8
+	xor	rdi,rdx
+
+	ror	r14,6
+	xor	r13,rbx
+	add	r12,rdi
+
+	mov	rdi,r9
+	add	r12,QWORD[rbp]
+	xor	r14,r9
+
+	xor	rdi,r10
+	ror	r13,14
+	mov	r8,r10
+
+	and	r15,rdi
+	ror	r14,28
+	add	r12,r13
+
+	xor	r8,r15
+	add	rax,r12
+	add	r8,r12
+
+	lea	rbp,[24+rbp]
+	add	r8,r14
+	mov	r12,QWORD[32+rsi]
+	mov	r13,rax
+	mov	r14,r8
+	bswap	r12
+	ror	r13,23
+	mov	r15,rbx
+
+	xor	r13,rax
+	ror	r14,5
+	xor	r15,rcx
+
+	mov	QWORD[32+rsp],r12
+	xor	r14,r8
+	and	r15,rax
+
+	ror	r13,4
+	add	r12,rdx
+	xor	r15,rcx
+
+	ror	r14,6
+	xor	r13,rax
+	add	r12,r15
+
+	mov	r15,r8
+	add	r12,QWORD[rbp]
+	xor	r14,r8
+
+	xor	r15,r9
+	ror	r13,14
+	mov	rdx,r9
+
+	and	rdi,r15
+	ror	r14,28
+	add	r12,r13
+
+	xor	rdx,rdi
+	add	r11,r12
+	add	rdx,r12
+
+	lea	rbp,[8+rbp]
+	add	rdx,r14
+	mov	r12,QWORD[40+rsi]
+	mov	r13,r11
+	mov	r14,rdx
+	bswap	r12
+	ror	r13,23
+	mov	rdi,rax
+
+	xor	r13,r11
+	ror	r14,5
+	xor	rdi,rbx
+
+	mov	QWORD[40+rsp],r12
+	xor	r14,rdx
+	and	rdi,r11
+
+	ror	r13,4
+	add	r12,rcx
+	xor	rdi,rbx
+
+	ror	r14,6
+	xor	r13,r11
+	add	r12,rdi
+
+	mov	rdi,rdx
+	add	r12,QWORD[rbp]
+	xor	r14,rdx
+
+	xor	rdi,r8
+	ror	r13,14
+	mov	rcx,r8
+
+	and	r15,rdi
+	ror	r14,28
+	add	r12,r13
+
+	xor	rcx,r15
+	add	r10,r12
+	add	rcx,r12
+
+	lea	rbp,[24+rbp]
+	add	rcx,r14
+	mov	r12,QWORD[48+rsi]
+	mov	r13,r10
+	mov	r14,rcx
+	bswap	r12
+	ror	r13,23
+	mov	r15,r11
+
+	xor	r13,r10
+	ror	r14,5
+	xor	r15,rax
+
+	mov	QWORD[48+rsp],r12
+	xor	r14,rcx
+	and	r15,r10
+
+	ror	r13,4
+	add	r12,rbx
+	xor	r15,rax
+
+	ror	r14,6
+	xor	r13,r10
+	add	r12,r15
+
+	mov	r15,rcx
+	add	r12,QWORD[rbp]
+	xor	r14,rcx
+
+	xor	r15,rdx
+	ror	r13,14
+	mov	rbx,rdx
+
+	and	rdi,r15
+	ror	r14,28
+	add	r12,r13
+
+	xor	rbx,rdi
+	add	r9,r12
+	add	rbx,r12
+
+	lea	rbp,[8+rbp]
+	add	rbx,r14
+	mov	r12,QWORD[56+rsi]
+	mov	r13,r9
+	mov	r14,rbx
+	bswap	r12
+	ror	r13,23
+	mov	rdi,r10
+
+	xor	r13,r9
+	ror	r14,5
+	xor	rdi,r11
+
+	mov	QWORD[56+rsp],r12
+	xor	r14,rbx
+	and	rdi,r9
+
+	ror	r13,4
+	add	r12,rax
+	xor	rdi,r11
+
+	ror	r14,6
+	xor	r13,r9
+	add	r12,rdi
+
+	mov	rdi,rbx
+	add	r12,QWORD[rbp]
+	xor	r14,rbx
+
+	xor	rdi,rcx
+	ror	r13,14
+	mov	rax,rcx
+
+	and	r15,rdi
+	ror	r14,28
+	add	r12,r13
+
+	xor	rax,r15
+	add	r8,r12
+	add	rax,r12
+
+	lea	rbp,[24+rbp]
+	add	rax,r14
+	mov	r12,QWORD[64+rsi]
+	mov	r13,r8
+	mov	r14,rax
+	bswap	r12
+	ror	r13,23
+	mov	r15,r9
+
+	xor	r13,r8
+	ror	r14,5
+	xor	r15,r10
+
+	mov	QWORD[64+rsp],r12
+	xor	r14,rax
+	and	r15,r8
+
+	ror	r13,4
+	add	r12,r11
+	xor	r15,r10
+
+	ror	r14,6
+	xor	r13,r8
+	add	r12,r15
+
+	mov	r15,rax
+	add	r12,QWORD[rbp]
+	xor	r14,rax
+
+	xor	r15,rbx
+	ror	r13,14
+	mov	r11,rbx
+
+	and	rdi,r15
+	ror	r14,28
+	add	r12,r13
+
+	xor	r11,rdi
+	add	rdx,r12
+	add	r11,r12
+
+	lea	rbp,[8+rbp]
+	add	r11,r14
+	mov	r12,QWORD[72+rsi]
+	mov	r13,rdx
+	mov	r14,r11
+	bswap	r12
+	ror	r13,23
+	mov	rdi,r8
+
+	xor	r13,rdx
+	ror	r14,5
+	xor	rdi,r9
+
+	mov	QWORD[72+rsp],r12
+	xor	r14,r11
+	and	rdi,rdx
+
+	ror	r13,4
+	add	r12,r10
+	xor	rdi,r9
+
+	ror	r14,6
+	xor	r13,rdx
+	add	r12,rdi
+
+	mov	rdi,r11
+	add	r12,QWORD[rbp]
+	xor	r14,r11
+
+	xor	rdi,rax
+	ror	r13,14
+	mov	r10,rax
+
+	and	r15,rdi
+	ror	r14,28
+	add	r12,r13
+
+	xor	r10,r15
+	add	rcx,r12
+	add	r10,r12
+
+	lea	rbp,[24+rbp]
+	add	r10,r14
+	mov	r12,QWORD[80+rsi]
+	mov	r13,rcx
+	mov	r14,r10
+	bswap	r12
+	ror	r13,23
+	mov	r15,rdx
+
+	xor	r13,rcx
+	ror	r14,5
+	xor	r15,r8
+
+	mov	QWORD[80+rsp],r12
+	xor	r14,r10
+	and	r15,rcx
+
+	ror	r13,4
+	add	r12,r9
+	xor	r15,r8
+
+	ror	r14,6
+	xor	r13,rcx
+	add	r12,r15
+
+	mov	r15,r10
+	add	r12,QWORD[rbp]
+	xor	r14,r10
+
+	xor	r15,r11
+	ror	r13,14
+	mov	r9,r11
+
+	and	rdi,r15
+	ror	r14,28
+	add	r12,r13
+
+	xor	r9,rdi
+	add	rbx,r12
+	add	r9,r12
+
+	lea	rbp,[8+rbp]
+	add	r9,r14
+	mov	r12,QWORD[88+rsi]
+	mov	r13,rbx
+	mov	r14,r9
+	bswap	r12
+	ror	r13,23
+	mov	rdi,rcx
+
+	xor	r13,rbx
+	ror	r14,5
+	xor	rdi,rdx
+
+	mov	QWORD[88+rsp],r12
+	xor	r14,r9
+	and	rdi,rbx
+
+	ror	r13,4
+	add	r12,r8
+	xor	rdi,rdx
+
+	ror	r14,6
+	xor	r13,rbx
+	add	r12,rdi
+
+	mov	rdi,r9
+	add	r12,QWORD[rbp]
+	xor	r14,r9
+
+	xor	rdi,r10
+	ror	r13,14
+	mov	r8,r10
+
+	and	r15,rdi
+	ror	r14,28
+	add	r12,r13
+
+	xor	r8,r15
+	add	rax,r12
+	add	r8,r12
+
+	lea	rbp,[24+rbp]
+	add	r8,r14
+	mov	r12,QWORD[96+rsi]
+	mov	r13,rax
+	mov	r14,r8
+	bswap	r12
+	ror	r13,23
+	mov	r15,rbx
+
+	xor	r13,rax
+	ror	r14,5
+	xor	r15,rcx
+
+	mov	QWORD[96+rsp],r12
+	xor	r14,r8
+	and	r15,rax
+
+	ror	r13,4
+	add	r12,rdx
+	xor	r15,rcx
+
+	ror	r14,6
+	xor	r13,rax
+	add	r12,r15
+
+	mov	r15,r8
+	add	r12,QWORD[rbp]
+	xor	r14,r8
+
+	xor	r15,r9
+	ror	r13,14
+	mov	rdx,r9
+
+	and	rdi,r15
+	ror	r14,28
+	add	r12,r13
+
+	xor	rdx,rdi
+	add	r11,r12
+	add	rdx,r12
+
+	lea	rbp,[8+rbp]
+	add	rdx,r14
+	mov	r12,QWORD[104+rsi]
+	mov	r13,r11
+	mov	r14,rdx
+	bswap	r12
+	ror	r13,23
+	mov	rdi,rax
+
+	xor	r13,r11
+	ror	r14,5
+	xor	rdi,rbx
+
+	mov	QWORD[104+rsp],r12
+	xor	r14,rdx
+	and	rdi,r11
+
+	ror	r13,4
+	add	r12,rcx
+	xor	rdi,rbx
+
+	ror	r14,6
+	xor	r13,r11
+	add	r12,rdi
+
+	mov	rdi,rdx
+	add	r12,QWORD[rbp]
+	xor	r14,rdx
+
+	xor	rdi,r8
+	ror	r13,14
+	mov	rcx,r8
+
+	and	r15,rdi
+	ror	r14,28
+	add	r12,r13
+
+	xor	rcx,r15
+	add	r10,r12
+	add	rcx,r12
+
+	lea	rbp,[24+rbp]
+	add	rcx,r14
+	mov	r12,QWORD[112+rsi]
+	mov	r13,r10
+	mov	r14,rcx
+	bswap	r12
+	ror	r13,23
+	mov	r15,r11
+
+	xor	r13,r10
+	ror	r14,5
+	xor	r15,rax
+
+	mov	QWORD[112+rsp],r12
+	xor	r14,rcx
+	and	r15,r10
+
+	ror	r13,4
+	add	r12,rbx
+	xor	r15,rax
+
+	ror	r14,6
+	xor	r13,r10
+	add	r12,r15
+
+	mov	r15,rcx
+	add	r12,QWORD[rbp]
+	xor	r14,rcx
+
+	xor	r15,rdx
+	ror	r13,14
+	mov	rbx,rdx
+
+	and	rdi,r15
+	ror	r14,28
+	add	r12,r13
+
+	xor	rbx,rdi
+	add	r9,r12
+	add	rbx,r12
+
+	lea	rbp,[8+rbp]
+	add	rbx,r14
+	mov	r12,QWORD[120+rsi]
+	mov	r13,r9
+	mov	r14,rbx
+	bswap	r12
+	ror	r13,23
+	mov	rdi,r10
+
+	xor	r13,r9
+	ror	r14,5
+	xor	rdi,r11
+
+	mov	QWORD[120+rsp],r12
+	xor	r14,rbx
+	and	rdi,r9
+
+	ror	r13,4
+	add	r12,rax
+	xor	rdi,r11
+
+	ror	r14,6
+	xor	r13,r9
+	add	r12,rdi
+
+	mov	rdi,rbx
+	add	r12,QWORD[rbp]
+	xor	r14,rbx
+
+	xor	rdi,rcx
+	ror	r13,14
+	mov	rax,rcx
+
+	and	r15,rdi
+	ror	r14,28
+	add	r12,r13
+
+	xor	rax,r15
+	add	r8,r12
+	add	rax,r12
+
+	lea	rbp,[24+rbp]
+	jmp	NEAR $L$rounds_16_xx
+ALIGN	16
+$L$rounds_16_xx:
+	mov	r13,QWORD[8+rsp]
+	mov	r15,QWORD[112+rsp]
+
+	mov	r12,r13
+	ror	r13,7
+	add	rax,r14
+	mov	r14,r15
+	ror	r15,42
+
+	xor	r13,r12
+	shr	r12,7
+	ror	r13,1
+	xor	r15,r14
+	shr	r14,6
+
+	ror	r15,19
+	xor	r12,r13
+	xor	r15,r14
+	add	r12,QWORD[72+rsp]
+
+	add	r12,QWORD[rsp]
+	mov	r13,r8
+	add	r12,r15
+	mov	r14,rax
+	ror	r13,23
+	mov	r15,r9
+
+	xor	r13,r8
+	ror	r14,5
+	xor	r15,r10
+
+	mov	QWORD[rsp],r12
+	xor	r14,rax
+	and	r15,r8
+
+	ror	r13,4
+	add	r12,r11
+	xor	r15,r10
+
+	ror	r14,6
+	xor	r13,r8
+	add	r12,r15
+
+	mov	r15,rax
+	add	r12,QWORD[rbp]
+	xor	r14,rax
+
+	xor	r15,rbx
+	ror	r13,14
+	mov	r11,rbx
+
+	and	rdi,r15
+	ror	r14,28
+	add	r12,r13
+
+	xor	r11,rdi
+	add	rdx,r12
+	add	r11,r12
+
+	lea	rbp,[8+rbp]
+	mov	r13,QWORD[16+rsp]
+	mov	rdi,QWORD[120+rsp]
+
+	mov	r12,r13
+	ror	r13,7
+	add	r11,r14
+	mov	r14,rdi
+	ror	rdi,42
+
+	xor	r13,r12
+	shr	r12,7
+	ror	r13,1
+	xor	rdi,r14
+	shr	r14,6
+
+	ror	rdi,19
+	xor	r12,r13
+	xor	rdi,r14
+	add	r12,QWORD[80+rsp]
+
+	add	r12,QWORD[8+rsp]
+	mov	r13,rdx
+	add	r12,rdi
+	mov	r14,r11
+	ror	r13,23
+	mov	rdi,r8
+
+	xor	r13,rdx
+	ror	r14,5
+	xor	rdi,r9
+
+	mov	QWORD[8+rsp],r12
+	xor	r14,r11
+	and	rdi,rdx
+
+	ror	r13,4
+	add	r12,r10
+	xor	rdi,r9
+
+	ror	r14,6
+	xor	r13,rdx
+	add	r12,rdi
+
+	mov	rdi,r11
+	add	r12,QWORD[rbp]
+	xor	r14,r11
+
+	xor	rdi,rax
+	ror	r13,14
+	mov	r10,rax
+
+	and	r15,rdi
+	ror	r14,28
+	add	r12,r13
+
+	xor	r10,r15
+	add	rcx,r12
+	add	r10,r12
+
+	lea	rbp,[24+rbp]
+	mov	r13,QWORD[24+rsp]
+	mov	r15,QWORD[rsp]
+
+	mov	r12,r13
+	ror	r13,7
+	add	r10,r14
+	mov	r14,r15
+	ror	r15,42
+
+	xor	r13,r12
+	shr	r12,7
+	ror	r13,1
+	xor	r15,r14
+	shr	r14,6
+
+	ror	r15,19
+	xor	r12,r13
+	xor	r15,r14
+	add	r12,QWORD[88+rsp]
+
+	add	r12,QWORD[16+rsp]
+	mov	r13,rcx
+	add	r12,r15
+	mov	r14,r10
+	ror	r13,23
+	mov	r15,rdx
+
+	xor	r13,rcx
+	ror	r14,5
+	xor	r15,r8
+
+	mov	QWORD[16+rsp],r12
+	xor	r14,r10
+	and	r15,rcx
+
+	ror	r13,4
+	add	r12,r9
+	xor	r15,r8
+
+	ror	r14,6
+	xor	r13,rcx
+	add	r12,r15
+
+	mov	r15,r10
+	add	r12,QWORD[rbp]
+	xor	r14,r10
+
+	xor	r15,r11
+	ror	r13,14
+	mov	r9,r11
+
+	and	rdi,r15
+	ror	r14,28
+	add	r12,r13
+
+	xor	r9,rdi
+	add	rbx,r12
+	add	r9,r12
+
+	lea	rbp,[8+rbp]
+	mov	r13,QWORD[32+rsp]
+	mov	rdi,QWORD[8+rsp]
+
+	mov	r12,r13
+	ror	r13,7
+	add	r9,r14
+	mov	r14,rdi
+	ror	rdi,42
+
+	xor	r13,r12
+	shr	r12,7
+	ror	r13,1
+	xor	rdi,r14
+	shr	r14,6
+
+	ror	rdi,19
+	xor	r12,r13
+	xor	rdi,r14
+	add	r12,QWORD[96+rsp]
+
+	add	r12,QWORD[24+rsp]
+	mov	r13,rbx
+	add	r12,rdi
+	mov	r14,r9
+	ror	r13,23
+	mov	rdi,rcx
+
+	xor	r13,rbx
+	ror	r14,5
+	xor	rdi,rdx
+
+	mov	QWORD[24+rsp],r12
+	xor	r14,r9
+	and	rdi,rbx
+
+	ror	r13,4
+	add	r12,r8
+	xor	rdi,rdx
+
+	ror	r14,6
+	xor	r13,rbx
+	add	r12,rdi
+
+	mov	rdi,r9
+	add	r12,QWORD[rbp]
+	xor	r14,r9
+
+	xor	rdi,r10
+	ror	r13,14
+	mov	r8,r10
+
+	and	r15,rdi
+	ror	r14,28
+	add	r12,r13
+
+	xor	r8,r15
+	add	rax,r12
+	add	r8,r12
+
+	lea	rbp,[24+rbp]
+	mov	r13,QWORD[40+rsp]
+	mov	r15,QWORD[16+rsp]
+
+	mov	r12,r13
+	ror	r13,7
+	add	r8,r14
+	mov	r14,r15
+	ror	r15,42
+
+	xor	r13,r12
+	shr	r12,7
+	ror	r13,1
+	xor	r15,r14
+	shr	r14,6
+
+	ror	r15,19
+	xor	r12,r13
+	xor	r15,r14
+	add	r12,QWORD[104+rsp]
+
+	add	r12,QWORD[32+rsp]
+	mov	r13,rax
+	add	r12,r15
+	mov	r14,r8
+	ror	r13,23
+	mov	r15,rbx
+
+	xor	r13,rax
+	ror	r14,5
+	xor	r15,rcx
+
+	mov	QWORD[32+rsp],r12
+	xor	r14,r8
+	and	r15,rax
+
+	ror	r13,4
+	add	r12,rdx
+	xor	r15,rcx
+
+	ror	r14,6
+	xor	r13,rax
+	add	r12,r15
+
+	mov	r15,r8
+	add	r12,QWORD[rbp]
+	xor	r14,r8
+
+	xor	r15,r9
+	ror	r13,14
+	mov	rdx,r9
+
+	and	rdi,r15
+	ror	r14,28
+	add	r12,r13
+
+	xor	rdx,rdi
+	add	r11,r12
+	add	rdx,r12
+
+	lea	rbp,[8+rbp]
+	mov	r13,QWORD[48+rsp]
+	mov	rdi,QWORD[24+rsp]
+
+	mov	r12,r13
+	ror	r13,7
+	add	rdx,r14
+	mov	r14,rdi
+	ror	rdi,42
+
+	xor	r13,r12
+	shr	r12,7
+	ror	r13,1
+	xor	rdi,r14
+	shr	r14,6
+
+	ror	rdi,19
+	xor	r12,r13
+	xor	rdi,r14
+	add	r12,QWORD[112+rsp]
+
+	add	r12,QWORD[40+rsp]
+	mov	r13,r11
+	add	r12,rdi
+	mov	r14,rdx
+	ror	r13,23
+	mov	rdi,rax
+
+	xor	r13,r11
+	ror	r14,5
+	xor	rdi,rbx
+
+	mov	QWORD[40+rsp],r12
+	xor	r14,rdx
+	and	rdi,r11
+
+	ror	r13,4
+	add	r12,rcx
+	xor	rdi,rbx
+
+	ror	r14,6
+	xor	r13,r11
+	add	r12,rdi
+
+	mov	rdi,rdx
+	add	r12,QWORD[rbp]
+	xor	r14,rdx
+
+	xor	rdi,r8
+	ror	r13,14
+	mov	rcx,r8
+
+	and	r15,rdi
+	ror	r14,28
+	add	r12,r13
+
+	xor	rcx,r15
+	add	r10,r12
+	add	rcx,r12
+
+	lea	rbp,[24+rbp]
+	mov	r13,QWORD[56+rsp]
+	mov	r15,QWORD[32+rsp]
+
+	mov	r12,r13
+	ror	r13,7
+	add	rcx,r14
+	mov	r14,r15
+	ror	r15,42
+
+	xor	r13,r12
+	shr	r12,7
+	ror	r13,1
+	xor	r15,r14
+	shr	r14,6
+
+	ror	r15,19
+	xor	r12,r13
+	xor	r15,r14
+	add	r12,QWORD[120+rsp]
+
+	add	r12,QWORD[48+rsp]
+	mov	r13,r10
+	add	r12,r15
+	mov	r14,rcx
+	ror	r13,23
+	mov	r15,r11
+
+	xor	r13,r10
+	ror	r14,5
+	xor	r15,rax
+
+	mov	QWORD[48+rsp],r12
+	xor	r14,rcx
+	and	r15,r10
+
+	ror	r13,4
+	add	r12,rbx
+	xor	r15,rax
+
+	ror	r14,6
+	xor	r13,r10
+	add	r12,r15
+
+	mov	r15,rcx
+	add	r12,QWORD[rbp]
+	xor	r14,rcx
+
+	xor	r15,rdx
+	ror	r13,14
+	mov	rbx,rdx
+
+	and	rdi,r15
+	ror	r14,28
+	add	r12,r13
+
+	xor	rbx,rdi
+	add	r9,r12
+	add	rbx,r12
+
+	lea	rbp,[8+rbp]
+	mov	r13,QWORD[64+rsp]
+	mov	rdi,QWORD[40+rsp]
+
+	mov	r12,r13
+	ror	r13,7
+	add	rbx,r14
+	mov	r14,rdi
+	ror	rdi,42
+
+	xor	r13,r12
+	shr	r12,7
+	ror	r13,1
+	xor	rdi,r14
+	shr	r14,6
+
+	ror	rdi,19
+	xor	r12,r13
+	xor	rdi,r14
+	add	r12,QWORD[rsp]
+
+	add	r12,QWORD[56+rsp]
+	mov	r13,r9
+	add	r12,rdi
+	mov	r14,rbx
+	ror	r13,23
+	mov	rdi,r10
+
+	xor	r13,r9
+	ror	r14,5
+	xor	rdi,r11
+
+	mov	QWORD[56+rsp],r12
+	xor	r14,rbx
+	and	rdi,r9
+
+	ror	r13,4
+	add	r12,rax
+	xor	rdi,r11
+
+	ror	r14,6
+	xor	r13,r9
+	add	r12,rdi
+
+	mov	rdi,rbx
+	add	r12,QWORD[rbp]
+	xor	r14,rbx
+
+	xor	rdi,rcx
+	ror	r13,14
+	mov	rax,rcx
+
+	and	r15,rdi
+	ror	r14,28
+	add	r12,r13
+
+	xor	rax,r15
+	add	r8,r12
+	add	rax,r12
+
+	lea	rbp,[24+rbp]
+	mov	r13,QWORD[72+rsp]
+	mov	r15,QWORD[48+rsp]
+
+	mov	r12,r13
+	ror	r13,7
+	add	rax,r14
+	mov	r14,r15
+	ror	r15,42
+
+	xor	r13,r12
+	shr	r12,7
+	ror	r13,1
+	xor	r15,r14
+	shr	r14,6
+
+	ror	r15,19
+	xor	r12,r13
+	xor	r15,r14
+	add	r12,QWORD[8+rsp]
+
+	add	r12,QWORD[64+rsp]
+	mov	r13,r8
+	add	r12,r15
+	mov	r14,rax
+	ror	r13,23
+	mov	r15,r9
+
+	xor	r13,r8
+	ror	r14,5
+	xor	r15,r10
+
+	mov	QWORD[64+rsp],r12
+	xor	r14,rax
+	and	r15,r8
+
+	ror	r13,4
+	add	r12,r11
+	xor	r15,r10
+
+	ror	r14,6
+	xor	r13,r8
+	add	r12,r15
+
+	mov	r15,rax
+	add	r12,QWORD[rbp]
+	xor	r14,rax
+
+	xor	r15,rbx
+	ror	r13,14
+	mov	r11,rbx
+
+	and	rdi,r15
+	ror	r14,28
+	add	r12,r13
+
+	xor	r11,rdi
+	add	rdx,r12
+	add	r11,r12
+
+	lea	rbp,[8+rbp]
+	mov	r13,QWORD[80+rsp]
+	mov	rdi,QWORD[56+rsp]
+
+	mov	r12,r13
+	ror	r13,7
+	add	r11,r14
+	mov	r14,rdi
+	ror	rdi,42
+
+	xor	r13,r12
+	shr	r12,7
+	ror	r13,1
+	xor	rdi,r14
+	shr	r14,6
+
+	ror	rdi,19
+	xor	r12,r13
+	xor	rdi,r14
+	add	r12,QWORD[16+rsp]
+
+	add	r12,QWORD[72+rsp]
+	mov	r13,rdx
+	add	r12,rdi
+	mov	r14,r11
+	ror	r13,23
+	mov	rdi,r8
+
+	xor	r13,rdx
+	ror	r14,5
+	xor	rdi,r9
+
+	mov	QWORD[72+rsp],r12
+	xor	r14,r11
+	and	rdi,rdx
+
+	ror	r13,4
+	add	r12,r10
+	xor	rdi,r9
+
+	ror	r14,6
+	xor	r13,rdx
+	add	r12,rdi
+
+	mov	rdi,r11
+	add	r12,QWORD[rbp]
+	xor	r14,r11
+
+	xor	rdi,rax
+	ror	r13,14
+	mov	r10,rax
+
+	and	r15,rdi
+	ror	r14,28
+	add	r12,r13
+
+	xor	r10,r15
+	add	rcx,r12
+	add	r10,r12
+
+	lea	rbp,[24+rbp]
+	mov	r13,QWORD[88+rsp]
+	mov	r15,QWORD[64+rsp]
+
+	mov	r12,r13
+	ror	r13,7
+	add	r10,r14
+	mov	r14,r15
+	ror	r15,42
+
+	xor	r13,r12
+	shr	r12,7
+	ror	r13,1
+	xor	r15,r14
+	shr	r14,6
+
+	ror	r15,19
+	xor	r12,r13
+	xor	r15,r14
+	add	r12,QWORD[24+rsp]
+
+	add	r12,QWORD[80+rsp]
+	mov	r13,rcx
+	add	r12,r15
+	mov	r14,r10
+	ror	r13,23
+	mov	r15,rdx
+
+	xor	r13,rcx
+	ror	r14,5
+	xor	r15,r8
+
+	mov	QWORD[80+rsp],r12
+	xor	r14,r10
+	and	r15,rcx
+
+	ror	r13,4
+	add	r12,r9
+	xor	r15,r8
+
+	ror	r14,6
+	xor	r13,rcx
+	add	r12,r15
+
+	mov	r15,r10
+	add	r12,QWORD[rbp]
+	xor	r14,r10
+
+	xor	r15,r11
+	ror	r13,14
+	mov	r9,r11
+
+	and	rdi,r15
+	ror	r14,28
+	add	r12,r13
+
+	xor	r9,rdi
+	add	rbx,r12
+	add	r9,r12
+
+	lea	rbp,[8+rbp]
+	mov	r13,QWORD[96+rsp]
+	mov	rdi,QWORD[72+rsp]
+
+	mov	r12,r13
+	ror	r13,7
+	add	r9,r14
+	mov	r14,rdi
+	ror	rdi,42
+
+	xor	r13,r12
+	shr	r12,7
+	ror	r13,1
+	xor	rdi,r14
+	shr	r14,6
+
+	ror	rdi,19
+	xor	r12,r13
+	xor	rdi,r14
+	add	r12,QWORD[32+rsp]
+
+	add	r12,QWORD[88+rsp]
+	mov	r13,rbx
+	add	r12,rdi
+	mov	r14,r9
+	ror	r13,23
+	mov	rdi,rcx
+
+	xor	r13,rbx
+	ror	r14,5
+	xor	rdi,rdx
+
+	mov	QWORD[88+rsp],r12
+	xor	r14,r9
+	and	rdi,rbx
+
+	ror	r13,4
+	add	r12,r8
+	xor	rdi,rdx
+
+	ror	r14,6
+	xor	r13,rbx
+	add	r12,rdi
+
+	mov	rdi,r9
+	add	r12,QWORD[rbp]
+	xor	r14,r9
+
+	xor	rdi,r10
+	ror	r13,14
+	mov	r8,r10
+
+	and	r15,rdi
+	ror	r14,28
+	add	r12,r13
+
+	xor	r8,r15
+	add	rax,r12
+	add	r8,r12
+
+	lea	rbp,[24+rbp]
+	mov	r13,QWORD[104+rsp]
+	mov	r15,QWORD[80+rsp]
+
+	mov	r12,r13
+	ror	r13,7
+	add	r8,r14
+	mov	r14,r15
+	ror	r15,42
+
+	xor	r13,r12
+	shr	r12,7
+	ror	r13,1
+	xor	r15,r14
+	shr	r14,6
+
+	ror	r15,19
+	xor	r12,r13
+	xor	r15,r14
+	add	r12,QWORD[40+rsp]
+
+	add	r12,QWORD[96+rsp]
+	mov	r13,rax
+	add	r12,r15
+	mov	r14,r8
+	ror	r13,23
+	mov	r15,rbx
+
+	xor	r13,rax
+	ror	r14,5
+	xor	r15,rcx
+
+	mov	QWORD[96+rsp],r12
+	xor	r14,r8
+	and	r15,rax
+
+	ror	r13,4
+	add	r12,rdx
+	xor	r15,rcx
+
+	ror	r14,6
+	xor	r13,rax
+	add	r12,r15
+
+	mov	r15,r8
+	add	r12,QWORD[rbp]
+	xor	r14,r8
+
+	xor	r15,r9
+	ror	r13,14
+	mov	rdx,r9
+
+	and	rdi,r15
+	ror	r14,28
+	add	r12,r13
+
+	xor	rdx,rdi
+	add	r11,r12
+	add	rdx,r12
+
+	lea	rbp,[8+rbp]
+	mov	r13,QWORD[112+rsp]
+	mov	rdi,QWORD[88+rsp]
+
+	mov	r12,r13
+	ror	r13,7
+	add	rdx,r14
+	mov	r14,rdi
+	ror	rdi,42
+
+	xor	r13,r12
+	shr	r12,7
+	ror	r13,1
+	xor	rdi,r14
+	shr	r14,6
+
+	ror	rdi,19
+	xor	r12,r13
+	xor	rdi,r14
+	add	r12,QWORD[48+rsp]
+
+	add	r12,QWORD[104+rsp]
+	mov	r13,r11
+	add	r12,rdi
+	mov	r14,rdx
+	ror	r13,23
+	mov	rdi,rax
+
+	xor	r13,r11
+	ror	r14,5
+	xor	rdi,rbx
+
+	mov	QWORD[104+rsp],r12
+	xor	r14,rdx
+	and	rdi,r11
+
+	ror	r13,4
+	add	r12,rcx
+	xor	rdi,rbx
+
+	ror	r14,6
+	xor	r13,r11
+	add	r12,rdi
+
+	mov	rdi,rdx
+	add	r12,QWORD[rbp]
+	xor	r14,rdx
+
+	xor	rdi,r8
+	ror	r13,14
+	mov	rcx,r8
+
+	and	r15,rdi
+	ror	r14,28
+	add	r12,r13
+
+	xor	rcx,r15
+	add	r10,r12
+	add	rcx,r12
+
+	lea	rbp,[24+rbp]
+	mov	r13,QWORD[120+rsp]
+	mov	r15,QWORD[96+rsp]
+
+	mov	r12,r13
+	ror	r13,7
+	add	rcx,r14
+	mov	r14,r15
+	ror	r15,42
+
+	xor	r13,r12
+	shr	r12,7
+	ror	r13,1
+	xor	r15,r14
+	shr	r14,6
+
+	ror	r15,19
+	xor	r12,r13
+	xor	r15,r14
+	add	r12,QWORD[56+rsp]
+
+	add	r12,QWORD[112+rsp]
+	mov	r13,r10
+	add	r12,r15
+	mov	r14,rcx
+	ror	r13,23
+	mov	r15,r11
+
+	xor	r13,r10
+	ror	r14,5
+	xor	r15,rax
+
+	mov	QWORD[112+rsp],r12
+	xor	r14,rcx
+	and	r15,r10
+
+	ror	r13,4
+	add	r12,rbx
+	xor	r15,rax
+
+	ror	r14,6
+	xor	r13,r10
+	add	r12,r15
+
+	mov	r15,rcx
+	add	r12,QWORD[rbp]
+	xor	r14,rcx
+
+	xor	r15,rdx
+	ror	r13,14
+	mov	rbx,rdx
+
+	and	rdi,r15
+	ror	r14,28
+	add	r12,r13
+
+	xor	rbx,rdi
+	add	r9,r12
+	add	rbx,r12
+
+	lea	rbp,[8+rbp]
+	mov	r13,QWORD[rsp]
+	mov	rdi,QWORD[104+rsp]
+
+	mov	r12,r13
+	ror	r13,7
+	add	rbx,r14
+	mov	r14,rdi
+	ror	rdi,42
+
+	xor	r13,r12
+	shr	r12,7
+	ror	r13,1
+	xor	rdi,r14
+	shr	r14,6
+
+	ror	rdi,19
+	xor	r12,r13
+	xor	rdi,r14
+	add	r12,QWORD[64+rsp]
+
+	add	r12,QWORD[120+rsp]
+	mov	r13,r9
+	add	r12,rdi
+	mov	r14,rbx
+	ror	r13,23
+	mov	rdi,r10
+
+	xor	r13,r9
+	ror	r14,5
+	xor	rdi,r11
+
+	mov	QWORD[120+rsp],r12
+	xor	r14,rbx
+	and	rdi,r9
+
+	ror	r13,4
+	add	r12,rax
+	xor	rdi,r11
+
+	ror	r14,6
+	xor	r13,r9
+	add	r12,rdi
+
+	mov	rdi,rbx
+	add	r12,QWORD[rbp]
+	xor	r14,rbx
+
+	xor	rdi,rcx
+	ror	r13,14
+	mov	rax,rcx
+
+	and	r15,rdi
+	ror	r14,28
+	add	r12,r13
+
+	xor	rax,r15
+	add	r8,r12
+	add	rax,r12
+
+	lea	rbp,[24+rbp]
+	cmp	BYTE[7+rbp],0
+	jnz	NEAR $L$rounds_16_xx
+
+	mov	rdi,QWORD[((128+0))+rsp]
+	add	rax,r14
+	lea	rsi,[128+rsi]
+
+	add	rax,QWORD[rdi]
+	add	rbx,QWORD[8+rdi]
+	add	rcx,QWORD[16+rdi]
+	add	rdx,QWORD[24+rdi]
+	add	r8,QWORD[32+rdi]
+	add	r9,QWORD[40+rdi]
+	add	r10,QWORD[48+rdi]
+	add	r11,QWORD[56+rdi]
+
+	cmp	rsi,QWORD[((128+16))+rsp]
+
+	mov	QWORD[rdi],rax
+	mov	QWORD[8+rdi],rbx
+	mov	QWORD[16+rdi],rcx
+	mov	QWORD[24+rdi],rdx
+	mov	QWORD[32+rdi],r8
+	mov	QWORD[40+rdi],r9
+	mov	QWORD[48+rdi],r10
+	mov	QWORD[56+rdi],r11
+	jb	NEAR $L$loop
+
+	mov	rsi,QWORD[152+rsp]
+
+	mov	r15,QWORD[((-48))+rsi]
+
+	mov	r14,QWORD[((-40))+rsi]
+
+	mov	r13,QWORD[((-32))+rsi]
+
+	mov	r12,QWORD[((-24))+rsi]
+
+	mov	rbp,QWORD[((-16))+rsi]
+
+	mov	rbx,QWORD[((-8))+rsi]
+
+	lea	rsp,[rsi]
+
+$L$epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_sha512_block_data_order:
+ALIGN	64
+
+K512:
+	DQ	0x428a2f98d728ae22,0x7137449123ef65cd
+	DQ	0x428a2f98d728ae22,0x7137449123ef65cd
+	DQ	0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+	DQ	0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
+	DQ	0x3956c25bf348b538,0x59f111f1b605d019
+	DQ	0x3956c25bf348b538,0x59f111f1b605d019
+	DQ	0x923f82a4af194f9b,0xab1c5ed5da6d8118
+	DQ	0x923f82a4af194f9b,0xab1c5ed5da6d8118
+	DQ	0xd807aa98a3030242,0x12835b0145706fbe
+	DQ	0xd807aa98a3030242,0x12835b0145706fbe
+	DQ	0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+	DQ	0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
+	DQ	0x72be5d74f27b896f,0x80deb1fe3b1696b1
+	DQ	0x72be5d74f27b896f,0x80deb1fe3b1696b1
+	DQ	0x9bdc06a725c71235,0xc19bf174cf692694
+	DQ	0x9bdc06a725c71235,0xc19bf174cf692694
+	DQ	0xe49b69c19ef14ad2,0xefbe4786384f25e3
+	DQ	0xe49b69c19ef14ad2,0xefbe4786384f25e3
+	DQ	0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
+	DQ	0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
+	DQ	0x2de92c6f592b0275,0x4a7484aa6ea6e483
+	DQ	0x2de92c6f592b0275,0x4a7484aa6ea6e483
+	DQ	0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+	DQ	0x5cb0a9dcbd41fbd4,0x76f988da831153b5
+	DQ	0x983e5152ee66dfab,0xa831c66d2db43210
+	DQ	0x983e5152ee66dfab,0xa831c66d2db43210
+	DQ	0xb00327c898fb213f,0xbf597fc7beef0ee4
+	DQ	0xb00327c898fb213f,0xbf597fc7beef0ee4
+	DQ	0xc6e00bf33da88fc2,0xd5a79147930aa725
+	DQ	0xc6e00bf33da88fc2,0xd5a79147930aa725
+	DQ	0x06ca6351e003826f,0x142929670a0e6e70
+	DQ	0x06ca6351e003826f,0x142929670a0e6e70
+	DQ	0x27b70a8546d22ffc,0x2e1b21385c26c926
+	DQ	0x27b70a8546d22ffc,0x2e1b21385c26c926
+	DQ	0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+	DQ	0x4d2c6dfc5ac42aed,0x53380d139d95b3df
+	DQ	0x650a73548baf63de,0x766a0abb3c77b2a8
+	DQ	0x650a73548baf63de,0x766a0abb3c77b2a8
+	DQ	0x81c2c92e47edaee6,0x92722c851482353b
+	DQ	0x81c2c92e47edaee6,0x92722c851482353b
+	DQ	0xa2bfe8a14cf10364,0xa81a664bbc423001
+	DQ	0xa2bfe8a14cf10364,0xa81a664bbc423001
+	DQ	0xc24b8b70d0f89791,0xc76c51a30654be30
+	DQ	0xc24b8b70d0f89791,0xc76c51a30654be30
+	DQ	0xd192e819d6ef5218,0xd69906245565a910
+	DQ	0xd192e819d6ef5218,0xd69906245565a910
+	DQ	0xf40e35855771202a,0x106aa07032bbd1b8
+	DQ	0xf40e35855771202a,0x106aa07032bbd1b8
+	DQ	0x19a4c116b8d2d0c8,0x1e376c085141ab53
+	DQ	0x19a4c116b8d2d0c8,0x1e376c085141ab53
+	DQ	0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+	DQ	0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
+	DQ	0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+	DQ	0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
+	DQ	0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+	DQ	0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
+	DQ	0x748f82ee5defb2fc,0x78a5636f43172f60
+	DQ	0x748f82ee5defb2fc,0x78a5636f43172f60
+	DQ	0x84c87814a1f0ab72,0x8cc702081a6439ec
+	DQ	0x84c87814a1f0ab72,0x8cc702081a6439ec
+	DQ	0x90befffa23631e28,0xa4506cebde82bde9
+	DQ	0x90befffa23631e28,0xa4506cebde82bde9
+	DQ	0xbef9a3f7b2c67915,0xc67178f2e372532b
+	DQ	0xbef9a3f7b2c67915,0xc67178f2e372532b
+	DQ	0xca273eceea26619c,0xd186b8c721c0c207
+	DQ	0xca273eceea26619c,0xd186b8c721c0c207
+	DQ	0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+	DQ	0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
+	DQ	0x06f067aa72176fba,0x0a637dc5a2c898a6
+	DQ	0x06f067aa72176fba,0x0a637dc5a2c898a6
+	DQ	0x113f9804bef90dae,0x1b710b35131c471b
+	DQ	0x113f9804bef90dae,0x1b710b35131c471b
+	DQ	0x28db77f523047d84,0x32caab7b40c72493
+	DQ	0x28db77f523047d84,0x32caab7b40c72493
+	DQ	0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+	DQ	0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
+	DQ	0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+	DQ	0x4cc5d4becb3e42b6,0x597f299cfc657e2a
+	DQ	0x5fcb6fab3ad6faec,0x6c44198c4a475817
+	DQ	0x5fcb6fab3ad6faec,0x6c44198c4a475817
+
+	DQ	0x0001020304050607,0x08090a0b0c0d0e0f
+	DQ	0x0001020304050607,0x08090a0b0c0d0e0f
+DB	83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97
+DB	110,115,102,111,114,109,32,102,111,114,32,120,56,54,95,54
+DB	52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121
+DB	32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46
+DB	111,114,103,62,0
+
+ALIGN	64
+sha512_block_data_order_avx:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_sha512_block_data_order_avx:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+
+$L$avx_shortcut:
+	mov	rax,rsp
+
+	push	rbx
+
+	push	rbp
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+	shl	rdx,4
+	sub	rsp,256
+	lea	rdx,[rdx*8+rsi]
+	and	rsp,-64
+	mov	QWORD[((128+0))+rsp],rdi
+	mov	QWORD[((128+8))+rsp],rsi
+	mov	QWORD[((128+16))+rsp],rdx
+	mov	QWORD[152+rsp],rax
+
+	movaps	XMMWORD[(128+32)+rsp],xmm6
+	movaps	XMMWORD[(128+48)+rsp],xmm7
+	movaps	XMMWORD[(128+64)+rsp],xmm8
+	movaps	XMMWORD[(128+80)+rsp],xmm9
+	movaps	XMMWORD[(128+96)+rsp],xmm10
+	movaps	XMMWORD[(128+112)+rsp],xmm11
+$L$prologue_avx:
+
+	vzeroupper
+	mov	rax,QWORD[rdi]
+	mov	rbx,QWORD[8+rdi]
+	mov	rcx,QWORD[16+rdi]
+	mov	rdx,QWORD[24+rdi]
+	mov	r8,QWORD[32+rdi]
+	mov	r9,QWORD[40+rdi]
+	mov	r10,QWORD[48+rdi]
+	mov	r11,QWORD[56+rdi]
+	jmp	NEAR $L$loop_avx
+ALIGN	16
+$L$loop_avx:
+	vmovdqa	xmm11,XMMWORD[((K512+1280))]
+	vmovdqu	xmm0,XMMWORD[rsi]
+	lea	rbp,[((K512+128))]
+	vmovdqu	xmm1,XMMWORD[16+rsi]
+	vmovdqu	xmm2,XMMWORD[32+rsi]
+	vpshufb	xmm0,xmm0,xmm11
+	vmovdqu	xmm3,XMMWORD[48+rsi]
+	vpshufb	xmm1,xmm1,xmm11
+	vmovdqu	xmm4,XMMWORD[64+rsi]
+	vpshufb	xmm2,xmm2,xmm11
+	vmovdqu	xmm5,XMMWORD[80+rsi]
+	vpshufb	xmm3,xmm3,xmm11
+	vmovdqu	xmm6,XMMWORD[96+rsi]
+	vpshufb	xmm4,xmm4,xmm11
+	vmovdqu	xmm7,XMMWORD[112+rsi]
+	vpshufb	xmm5,xmm5,xmm11
+	vpaddq	xmm8,xmm0,XMMWORD[((-128))+rbp]
+	vpshufb	xmm6,xmm6,xmm11
+	vpaddq	xmm9,xmm1,XMMWORD[((-96))+rbp]
+	vpshufb	xmm7,xmm7,xmm11
+	vpaddq	xmm10,xmm2,XMMWORD[((-64))+rbp]
+	vpaddq	xmm11,xmm3,XMMWORD[((-32))+rbp]
+	vmovdqa	XMMWORD[rsp],xmm8
+	vpaddq	xmm8,xmm4,XMMWORD[rbp]
+	vmovdqa	XMMWORD[16+rsp],xmm9
+	vpaddq	xmm9,xmm5,XMMWORD[32+rbp]
+	vmovdqa	XMMWORD[32+rsp],xmm10
+	vpaddq	xmm10,xmm6,XMMWORD[64+rbp]
+	vmovdqa	XMMWORD[48+rsp],xmm11
+	vpaddq	xmm11,xmm7,XMMWORD[96+rbp]
+	vmovdqa	XMMWORD[64+rsp],xmm8
+	mov	r14,rax
+	vmovdqa	XMMWORD[80+rsp],xmm9
+	mov	rdi,rbx
+	vmovdqa	XMMWORD[96+rsp],xmm10
+	xor	rdi,rcx
+	vmovdqa	XMMWORD[112+rsp],xmm11
+	mov	r13,r8
+	jmp	NEAR $L$avx_00_47
+
+ALIGN	16
+$L$avx_00_47:
+	add	rbp,256
+	vpalignr	xmm8,xmm1,xmm0,8
+	shrd	r13,r13,23
+	mov	rax,r14
+	vpalignr	xmm11,xmm5,xmm4,8
+	mov	r12,r9
+	shrd	r14,r14,5
+	vpsrlq	xmm10,xmm8,1
+	xor	r13,r8
+	xor	r12,r10
+	vpaddq	xmm0,xmm0,xmm11
+	shrd	r13,r13,4
+	xor	r14,rax
+	vpsrlq	xmm11,xmm8,7
+	and	r12,r8
+	xor	r13,r8
+	vpsllq	xmm9,xmm8,56
+	add	r11,QWORD[rsp]
+	mov	r15,rax
+	vpxor	xmm8,xmm11,xmm10
+	xor	r12,r10
+	shrd	r14,r14,6
+	vpsrlq	xmm10,xmm10,7
+	xor	r15,rbx
+	add	r11,r12
+	vpxor	xmm8,xmm8,xmm9
+	shrd	r13,r13,14
+	and	rdi,r15
+	vpsllq	xmm9,xmm9,7
+	xor	r14,rax
+	add	r11,r13
+	vpxor	xmm8,xmm8,xmm10
+	xor	rdi,rbx
+	shrd	r14,r14,28
+	vpsrlq	xmm11,xmm7,6
+	add	rdx,r11
+	add	r11,rdi
+	vpxor	xmm8,xmm8,xmm9
+	mov	r13,rdx
+	add	r14,r11
+	vpsllq	xmm10,xmm7,3
+	shrd	r13,r13,23
+	mov	r11,r14
+	vpaddq	xmm0,xmm0,xmm8
+	mov	r12,r8
+	shrd	r14,r14,5
+	vpsrlq	xmm9,xmm7,19
+	xor	r13,rdx
+	xor	r12,r9
+	vpxor	xmm11,xmm11,xmm10
+	shrd	r13,r13,4
+	xor	r14,r11
+	vpsllq	xmm10,xmm10,42
+	and	r12,rdx
+	xor	r13,rdx
+	vpxor	xmm11,xmm11,xmm9
+	add	r10,QWORD[8+rsp]
+	mov	rdi,r11
+	vpsrlq	xmm9,xmm9,42
+	xor	r12,r9
+	shrd	r14,r14,6
+	vpxor	xmm11,xmm11,xmm10
+	xor	rdi,rax
+	add	r10,r12
+	vpxor	xmm11,xmm11,xmm9
+	shrd	r13,r13,14
+	and	r15,rdi
+	vpaddq	xmm0,xmm0,xmm11
+	xor	r14,r11
+	add	r10,r13
+	vpaddq	xmm10,xmm0,XMMWORD[((-128))+rbp]
+	xor	r15,rax
+	shrd	r14,r14,28
+	add	rcx,r10
+	add	r10,r15
+	mov	r13,rcx
+	add	r14,r10
+	vmovdqa	XMMWORD[rsp],xmm10
+	vpalignr	xmm8,xmm2,xmm1,8
+	shrd	r13,r13,23
+	mov	r10,r14
+	vpalignr	xmm11,xmm6,xmm5,8
+	mov	r12,rdx
+	shrd	r14,r14,5
+	vpsrlq	xmm10,xmm8,1
+	xor	r13,rcx
+	xor	r12,r8
+	vpaddq	xmm1,xmm1,xmm11
+	shrd	r13,r13,4
+	xor	r14,r10
+	vpsrlq	xmm11,xmm8,7
+	and	r12,rcx
+	xor	r13,rcx
+	vpsllq	xmm9,xmm8,56
+	add	r9,QWORD[16+rsp]
+	mov	r15,r10
+	vpxor	xmm8,xmm11,xmm10
+	xor	r12,r8
+	shrd	r14,r14,6
+	vpsrlq	xmm10,xmm10,7
+	xor	r15,r11
+	add	r9,r12
+	vpxor	xmm8,xmm8,xmm9
+	shrd	r13,r13,14
+	and	rdi,r15
+	vpsllq	xmm9,xmm9,7
+	xor	r14,r10
+	add	r9,r13
+	vpxor	xmm8,xmm8,xmm10
+	xor	rdi,r11
+	shrd	r14,r14,28
+	vpsrlq	xmm11,xmm0,6
+	add	rbx,r9
+	add	r9,rdi
+	vpxor	xmm8,xmm8,xmm9
+	mov	r13,rbx
+	add	r14,r9
+	vpsllq	xmm10,xmm0,3
+	shrd	r13,r13,23
+	mov	r9,r14
+	vpaddq	xmm1,xmm1,xmm8
+	mov	r12,rcx
+	shrd	r14,r14,5
+	vpsrlq	xmm9,xmm0,19
+	xor	r13,rbx
+	xor	r12,rdx
+	vpxor	xmm11,xmm11,xmm10
+	shrd	r13,r13,4
+	xor	r14,r9
+	vpsllq	xmm10,xmm10,42
+	and	r12,rbx
+	xor	r13,rbx
+	vpxor	xmm11,xmm11,xmm9
+	add	r8,QWORD[24+rsp]
+	mov	rdi,r9
+	vpsrlq	xmm9,xmm9,42
+	xor	r12,rdx
+	shrd	r14,r14,6
+	vpxor	xmm11,xmm11,xmm10
+	xor	rdi,r10
+	add	r8,r12
+	vpxor	xmm11,xmm11,xmm9
+	shrd	r13,r13,14
+	and	r15,rdi
+	vpaddq	xmm1,xmm1,xmm11
+	xor	r14,r9
+	add	r8,r13
+	vpaddq	xmm10,xmm1,XMMWORD[((-96))+rbp]
+	xor	r15,r10
+	shrd	r14,r14,28
+	add	rax,r8
+	add	r8,r15
+	mov	r13,rax
+	add	r14,r8
+	vmovdqa	XMMWORD[16+rsp],xmm10
+	vpalignr	xmm8,xmm3,xmm2,8
+	shrd	r13,r13,23
+	mov	r8,r14
+	vpalignr	xmm11,xmm7,xmm6,8
+	mov	r12,rbx
+	shrd	r14,r14,5
+	vpsrlq	xmm10,xmm8,1
+	xor	r13,rax
+	xor	r12,rcx
+	vpaddq	xmm2,xmm2,xmm11
+	shrd	r13,r13,4
+	xor	r14,r8
+	vpsrlq	xmm11,xmm8,7
+	and	r12,rax
+	xor	r13,rax
+	vpsllq	xmm9,xmm8,56
+	add	rdx,QWORD[32+rsp]
+	mov	r15,r8
+	vpxor	xmm8,xmm11,xmm10
+	xor	r12,rcx
+	shrd	r14,r14,6
+	vpsrlq	xmm10,xmm10,7
+	xor	r15,r9
+	add	rdx,r12
+	vpxor	xmm8,xmm8,xmm9
+	shrd	r13,r13,14
+	and	rdi,r15
+	vpsllq	xmm9,xmm9,7
+	xor	r14,r8
+	add	rdx,r13
+	vpxor	xmm8,xmm8,xmm10
+	xor	rdi,r9
+	shrd	r14,r14,28
+	vpsrlq	xmm11,xmm1,6
+	add	r11,rdx
+	add	rdx,rdi
+	vpxor	xmm8,xmm8,xmm9
+	mov	r13,r11
+	add	r14,rdx
+	vpsllq	xmm10,xmm1,3
+	shrd	r13,r13,23
+	mov	rdx,r14
+	vpaddq	xmm2,xmm2,xmm8
+	mov	r12,rax
+	shrd	r14,r14,5
+	vpsrlq	xmm9,xmm1,19
+	xor	r13,r11
+	xor	r12,rbx
+	vpxor	xmm11,xmm11,xmm10
+	shrd	r13,r13,4
+	xor	r14,rdx
+	vpsllq	xmm10,xmm10,42
+	and	r12,r11
+	xor	r13,r11
+	vpxor	xmm11,xmm11,xmm9
+	add	rcx,QWORD[40+rsp]
+	mov	rdi,rdx
+	vpsrlq	xmm9,xmm9,42
+	xor	r12,rbx
+	shrd	r14,r14,6
+	vpxor	xmm11,xmm11,xmm10
+	xor	rdi,r8
+	add	rcx,r12
+	vpxor	xmm11,xmm11,xmm9
+	shrd	r13,r13,14
+	and	r15,rdi
+	vpaddq	xmm2,xmm2,xmm11
+	xor	r14,rdx
+	add	rcx,r13
+	vpaddq	xmm10,xmm2,XMMWORD[((-64))+rbp]
+	xor	r15,r8
+	shrd	r14,r14,28
+	add	r10,rcx
+	add	rcx,r15
+	mov	r13,r10
+	add	r14,rcx
+	vmovdqa	XMMWORD[32+rsp],xmm10
+	vpalignr	xmm8,xmm4,xmm3,8
+	shrd	r13,r13,23
+	mov	rcx,r14
+	vpalignr	xmm11,xmm0,xmm7,8
+	mov	r12,r11
+	shrd	r14,r14,5
+	vpsrlq	xmm10,xmm8,1
+	xor	r13,r10
+	xor	r12,rax
+	vpaddq	xmm3,xmm3,xmm11
+	shrd	r13,r13,4
+	xor	r14,rcx
+	vpsrlq	xmm11,xmm8,7
+	and	r12,r10
+	xor	r13,r10
+	vpsllq	xmm9,xmm8,56
+	add	rbx,QWORD[48+rsp]
+	mov	r15,rcx
+	vpxor	xmm8,xmm11,xmm10
+	xor	r12,rax
+	shrd	r14,r14,6
+	vpsrlq	xmm10,xmm10,7
+	xor	r15,rdx
+	add	rbx,r12
+	vpxor	xmm8,xmm8,xmm9
+	shrd	r13,r13,14
+	and	rdi,r15
+	vpsllq	xmm9,xmm9,7
+	xor	r14,rcx
+	add	rbx,r13
+	vpxor	xmm8,xmm8,xmm10
+	xor	rdi,rdx
+	shrd	r14,r14,28
+	vpsrlq	xmm11,xmm2,6
+	add	r9,rbx
+	add	rbx,rdi
+	vpxor	xmm8,xmm8,xmm9
+	mov	r13,r9
+	add	r14,rbx
+	vpsllq	xmm10,xmm2,3
+	shrd	r13,r13,23
+	mov	rbx,r14
+	vpaddq	xmm3,xmm3,xmm8
+	mov	r12,r10
+	shrd	r14,r14,5
+	vpsrlq	xmm9,xmm2,19
+	xor	r13,r9
+	xor	r12,r11
+	vpxor	xmm11,xmm11,xmm10
+	shrd	r13,r13,4
+	xor	r14,rbx
+	vpsllq	xmm10,xmm10,42
+	and	r12,r9
+	xor	r13,r9
+	vpxor	xmm11,xmm11,xmm9
+	add	rax,QWORD[56+rsp]
+	mov	rdi,rbx
+	vpsrlq	xmm9,xmm9,42
+	xor	r12,r11
+	shrd	r14,r14,6
+	vpxor	xmm11,xmm11,xmm10
+	xor	rdi,rcx
+	add	rax,r12
+	vpxor	xmm11,xmm11,xmm9
+	shrd	r13,r13,14
+	and	r15,rdi
+	vpaddq	xmm3,xmm3,xmm11
+	xor	r14,rbx
+	add	rax,r13
+	vpaddq	xmm10,xmm3,XMMWORD[((-32))+rbp]
+	xor	r15,rcx
+	shrd	r14,r14,28
+	add	r8,rax
+	add	rax,r15
+	mov	r13,r8
+	add	r14,rax
+	vmovdqa	XMMWORD[48+rsp],xmm10
+	vpalignr	xmm8,xmm5,xmm4,8
+	shrd	r13,r13,23
+	mov	rax,r14
+	vpalignr	xmm11,xmm1,xmm0,8
+	mov	r12,r9
+	shrd	r14,r14,5
+	vpsrlq	xmm10,xmm8,1
+	xor	r13,r8
+	xor	r12,r10
+	vpaddq	xmm4,xmm4,xmm11
+	shrd	r13,r13,4
+	xor	r14,rax
+	vpsrlq	xmm11,xmm8,7
+	and	r12,r8
+	xor	r13,r8
+	vpsllq	xmm9,xmm8,56
+	add	r11,QWORD[64+rsp]
+	mov	r15,rax
+	vpxor	xmm8,xmm11,xmm10
+	xor	r12,r10
+	shrd	r14,r14,6
+	vpsrlq	xmm10,xmm10,7
+	xor	r15,rbx
+	add	r11,r12
+	vpxor	xmm8,xmm8,xmm9
+	shrd	r13,r13,14
+	and	rdi,r15
+	vpsllq	xmm9,xmm9,7
+	xor	r14,rax
+	add	r11,r13
+	vpxor	xmm8,xmm8,xmm10
+	xor	rdi,rbx
+	shrd	r14,r14,28
+	vpsrlq	xmm11,xmm3,6
+	add	rdx,r11
+	add	r11,rdi
+	vpxor	xmm8,xmm8,xmm9
+	mov	r13,rdx
+	add	r14,r11
+	vpsllq	xmm10,xmm3,3
+	shrd	r13,r13,23
+	mov	r11,r14
+	vpaddq	xmm4,xmm4,xmm8
+	mov	r12,r8
+	shrd	r14,r14,5
+	vpsrlq	xmm9,xmm3,19
+	xor	r13,rdx
+	xor	r12,r9
+	vpxor	xmm11,xmm11,xmm10
+	shrd	r13,r13,4
+	xor	r14,r11
+	vpsllq	xmm10,xmm10,42
+	and	r12,rdx
+	xor	r13,rdx
+	vpxor	xmm11,xmm11,xmm9
+	add	r10,QWORD[72+rsp]
+	mov	rdi,r11
+	vpsrlq	xmm9,xmm9,42
+	xor	r12,r9
+	shrd	r14,r14,6
+	vpxor	xmm11,xmm11,xmm10
+	xor	rdi,rax
+	add	r10,r12
+	vpxor	xmm11,xmm11,xmm9
+	shrd	r13,r13,14
+	and	r15,rdi
+	vpaddq	xmm4,xmm4,xmm11
+	xor	r14,r11
+	add	r10,r13
+	vpaddq	xmm10,xmm4,XMMWORD[rbp]
+	xor	r15,rax
+	shrd	r14,r14,28
+	add	rcx,r10
+	add	r10,r15
+	mov	r13,rcx
+	add	r14,r10
+	vmovdqa	XMMWORD[64+rsp],xmm10
+	vpalignr	xmm8,xmm6,xmm5,8
+	shrd	r13,r13,23
+	mov	r10,r14
+	vpalignr	xmm11,xmm2,xmm1,8
+	mov	r12,rdx
+	shrd	r14,r14,5
+	vpsrlq	xmm10,xmm8,1
+	xor	r13,rcx
+	xor	r12,r8
+	vpaddq	xmm5,xmm5,xmm11
+	shrd	r13,r13,4
+	xor	r14,r10
+	vpsrlq	xmm11,xmm8,7
+	and	r12,rcx
+	xor	r13,rcx
+	vpsllq	xmm9,xmm8,56
+	add	r9,QWORD[80+rsp]
+	mov	r15,r10
+	vpxor	xmm8,xmm11,xmm10
+	xor	r12,r8
+	shrd	r14,r14,6
+	vpsrlq	xmm10,xmm10,7
+	xor	r15,r11
+	add	r9,r12
+	vpxor	xmm8,xmm8,xmm9
+	shrd	r13,r13,14
+	and	rdi,r15
+	vpsllq	xmm9,xmm9,7
+	xor	r14,r10
+	add	r9,r13
+	vpxor	xmm8,xmm8,xmm10
+	xor	rdi,r11
+	shrd	r14,r14,28
+	vpsrlq	xmm11,xmm4,6
+	add	rbx,r9
+	add	r9,rdi
+	vpxor	xmm8,xmm8,xmm9
+	mov	r13,rbx
+	add	r14,r9
+	vpsllq	xmm10,xmm4,3
+	shrd	r13,r13,23
+	mov	r9,r14
+	vpaddq	xmm5,xmm5,xmm8
+	mov	r12,rcx
+	shrd	r14,r14,5
+	vpsrlq	xmm9,xmm4,19
+	xor	r13,rbx
+	xor	r12,rdx
+	vpxor	xmm11,xmm11,xmm10
+	shrd	r13,r13,4
+	xor	r14,r9
+	vpsllq	xmm10,xmm10,42
+	and	r12,rbx
+	xor	r13,rbx
+	vpxor	xmm11,xmm11,xmm9
+	add	r8,QWORD[88+rsp]
+	mov	rdi,r9
+	vpsrlq	xmm9,xmm9,42
+	xor	r12,rdx
+	shrd	r14,r14,6
+	vpxor	xmm11,xmm11,xmm10
+	xor	rdi,r10
+	add	r8,r12
+	vpxor	xmm11,xmm11,xmm9
+	shrd	r13,r13,14
+	and	r15,rdi
+	vpaddq	xmm5,xmm5,xmm11
+	xor	r14,r9
+	add	r8,r13
+	vpaddq	xmm10,xmm5,XMMWORD[32+rbp]
+	xor	r15,r10
+	shrd	r14,r14,28
+	add	rax,r8
+	add	r8,r15
+	mov	r13,rax
+	add	r14,r8
+	vmovdqa	XMMWORD[80+rsp],xmm10
+	vpalignr	xmm8,xmm7,xmm6,8
+	shrd	r13,r13,23
+	mov	r8,r14
+	vpalignr	xmm11,xmm3,xmm2,8
+	mov	r12,rbx
+	shrd	r14,r14,5
+	vpsrlq	xmm10,xmm8,1
+	xor	r13,rax
+	xor	r12,rcx
+	vpaddq	xmm6,xmm6,xmm11
+	shrd	r13,r13,4
+	xor	r14,r8
+	vpsrlq	xmm11,xmm8,7
+	and	r12,rax
+	xor	r13,rax
+	vpsllq	xmm9,xmm8,56
+	add	rdx,QWORD[96+rsp]
+	mov	r15,r8
+	vpxor	xmm8,xmm11,xmm10
+	xor	r12,rcx
+	shrd	r14,r14,6
+	vpsrlq	xmm10,xmm10,7
+	xor	r15,r9
+	add	rdx,r12
+	vpxor	xmm8,xmm8,xmm9
+	shrd	r13,r13,14
+	and	rdi,r15
+	vpsllq	xmm9,xmm9,7
+	xor	r14,r8
+	add	rdx,r13
+	vpxor	xmm8,xmm8,xmm10
+	xor	rdi,r9
+	shrd	r14,r14,28
+	vpsrlq	xmm11,xmm5,6
+	add	r11,rdx
+	add	rdx,rdi
+	vpxor	xmm8,xmm8,xmm9
+	mov	r13,r11
+	add	r14,rdx
+	vpsllq	xmm10,xmm5,3
+	shrd	r13,r13,23
+	mov	rdx,r14
+	vpaddq	xmm6,xmm6,xmm8
+	mov	r12,rax
+	shrd	r14,r14,5
+	vpsrlq	xmm9,xmm5,19
+	xor	r13,r11
+	xor	r12,rbx
+	vpxor	xmm11,xmm11,xmm10
+	shrd	r13,r13,4
+	xor	r14,rdx
+	vpsllq	xmm10,xmm10,42
+	and	r12,r11
+	xor	r13,r11
+	vpxor	xmm11,xmm11,xmm9
+	add	rcx,QWORD[104+rsp]
+	mov	rdi,rdx
+	vpsrlq	xmm9,xmm9,42
+	xor	r12,rbx
+	shrd	r14,r14,6
+	vpxor	xmm11,xmm11,xmm10
+	xor	rdi,r8
+	add	rcx,r12
+	vpxor	xmm11,xmm11,xmm9
+	shrd	r13,r13,14
+	and	r15,rdi
+	vpaddq	xmm6,xmm6,xmm11
+	xor	r14,rdx
+	add	rcx,r13
+	vpaddq	xmm10,xmm6,XMMWORD[64+rbp]
+	xor	r15,r8
+	shrd	r14,r14,28
+	add	r10,rcx
+	add	rcx,r15
+	mov	r13,r10
+	add	r14,rcx
+	vmovdqa	XMMWORD[96+rsp],xmm10
+	vpalignr	xmm8,xmm0,xmm7,8
+	shrd	r13,r13,23
+	mov	rcx,r14
+	vpalignr	xmm11,xmm4,xmm3,8
+	mov	r12,r11
+	shrd	r14,r14,5
+	vpsrlq	xmm10,xmm8,1
+	xor	r13,r10
+	xor	r12,rax
+	vpaddq	xmm7,xmm7,xmm11
+	shrd	r13,r13,4
+	xor	r14,rcx
+	vpsrlq	xmm11,xmm8,7
+	and	r12,r10
+	xor	r13,r10
+	vpsllq	xmm9,xmm8,56
+	add	rbx,QWORD[112+rsp]
+	mov	r15,rcx
+	vpxor	xmm8,xmm11,xmm10
+	xor	r12,rax
+	shrd	r14,r14,6
+	vpsrlq	xmm10,xmm10,7
+	xor	r15,rdx
+	add	rbx,r12
+	vpxor	xmm8,xmm8,xmm9
+	shrd	r13,r13,14
+	and	rdi,r15
+	vpsllq	xmm9,xmm9,7
+	xor	r14,rcx
+	add	rbx,r13
+	vpxor	xmm8,xmm8,xmm10
+	xor	rdi,rdx
+	shrd	r14,r14,28
+	vpsrlq	xmm11,xmm6,6
+	add	r9,rbx
+	add	rbx,rdi
+	vpxor	xmm8,xmm8,xmm9
+	mov	r13,r9
+	add	r14,rbx
+	vpsllq	xmm10,xmm6,3
+	shrd	r13,r13,23
+	mov	rbx,r14
+	vpaddq	xmm7,xmm7,xmm8
+	mov	r12,r10
+	shrd	r14,r14,5
+	vpsrlq	xmm9,xmm6,19
+	xor	r13,r9
+	xor	r12,r11
+	vpxor	xmm11,xmm11,xmm10
+	shrd	r13,r13,4
+	xor	r14,rbx
+	vpsllq	xmm10,xmm10,42
+	and	r12,r9
+	xor	r13,r9
+	vpxor	xmm11,xmm11,xmm9
+	add	rax,QWORD[120+rsp]
+	mov	rdi,rbx
+	vpsrlq	xmm9,xmm9,42
+	xor	r12,r11
+	shrd	r14,r14,6
+	vpxor	xmm11,xmm11,xmm10
+	xor	rdi,rcx
+	add	rax,r12
+	vpxor	xmm11,xmm11,xmm9
+	shrd	r13,r13,14
+	and	r15,rdi
+	vpaddq	xmm7,xmm7,xmm11
+	xor	r14,rbx
+	add	rax,r13
+	vpaddq	xmm10,xmm7,XMMWORD[96+rbp]
+	xor	r15,rcx
+	shrd	r14,r14,28
+	add	r8,rax
+	add	rax,r15
+	mov	r13,r8
+	add	r14,rax
+	vmovdqa	XMMWORD[112+rsp],xmm10
+	cmp	BYTE[135+rbp],0
+	jne	NEAR $L$avx_00_47
+	shrd	r13,r13,23
+	mov	rax,r14
+	mov	r12,r9
+	shrd	r14,r14,5
+	xor	r13,r8
+	xor	r12,r10
+	shrd	r13,r13,4
+	xor	r14,rax
+	and	r12,r8
+	xor	r13,r8
+	add	r11,QWORD[rsp]
+	mov	r15,rax
+	xor	r12,r10
+	shrd	r14,r14,6
+	xor	r15,rbx
+	add	r11,r12
+	shrd	r13,r13,14
+	and	rdi,r15
+	xor	r14,rax
+	add	r11,r13
+	xor	rdi,rbx
+	shrd	r14,r14,28
+	add	rdx,r11
+	add	r11,rdi
+	mov	r13,rdx
+	add	r14,r11
+	shrd	r13,r13,23
+	mov	r11,r14
+	mov	r12,r8
+	shrd	r14,r14,5
+	xor	r13,rdx
+	xor	r12,r9
+	shrd	r13,r13,4
+	xor	r14,r11
+	and	r12,rdx
+	xor	r13,rdx
+	add	r10,QWORD[8+rsp]
+	mov	rdi,r11
+	xor	r12,r9
+	shrd	r14,r14,6
+	xor	rdi,rax
+	add	r10,r12
+	shrd	r13,r13,14
+	and	r15,rdi
+	xor	r14,r11
+	add	r10,r13
+	xor	r15,rax
+	shrd	r14,r14,28
+	add	rcx,r10
+	add	r10,r15
+	mov	r13,rcx
+	add	r14,r10
+	shrd	r13,r13,23
+	mov	r10,r14
+	mov	r12,rdx
+	shrd	r14,r14,5
+	xor	r13,rcx
+	xor	r12,r8
+	shrd	r13,r13,4
+	xor	r14,r10
+	and	r12,rcx
+	xor	r13,rcx
+	add	r9,QWORD[16+rsp]
+	mov	r15,r10
+	xor	r12,r8
+	shrd	r14,r14,6
+	xor	r15,r11
+	add	r9,r12
+	shrd	r13,r13,14
+	and	rdi,r15
+	xor	r14,r10
+	add	r9,r13
+	xor	rdi,r11
+	shrd	r14,r14,28
+	add	rbx,r9
+	add	r9,rdi
+	mov	r13,rbx
+	add	r14,r9
+	shrd	r13,r13,23
+	mov	r9,r14
+	mov	r12,rcx
+	shrd	r14,r14,5
+	xor	r13,rbx
+	xor	r12,rdx
+	shrd	r13,r13,4
+	xor	r14,r9
+	and	r12,rbx
+	xor	r13,rbx
+	add	r8,QWORD[24+rsp]
+	mov	rdi,r9
+	xor	r12,rdx
+	shrd	r14,r14,6
+	xor	rdi,r10
+	add	r8,r12
+	shrd	r13,r13,14
+	and	r15,rdi
+	xor	r14,r9
+	add	r8,r13
+	xor	r15,r10
+	shrd	r14,r14,28
+	add	rax,r8
+	add	r8,r15
+	mov	r13,rax
+	add	r14,r8
+	shrd	r13,r13,23
+	mov	r8,r14
+	mov	r12,rbx
+	shrd	r14,r14,5
+	xor	r13,rax
+	xor	r12,rcx
+	shrd	r13,r13,4
+	xor	r14,r8
+	and	r12,rax
+	xor	r13,rax
+	add	rdx,QWORD[32+rsp]
+	mov	r15,r8
+	xor	r12,rcx
+	shrd	r14,r14,6
+	xor	r15,r9
+	add	rdx,r12
+	shrd	r13,r13,14
+	and	rdi,r15
+	xor	r14,r8
+	add	rdx,r13
+	xor	rdi,r9
+	shrd	r14,r14,28
+	add	r11,rdx
+	add	rdx,rdi
+	mov	r13,r11
+	add	r14,rdx
+	shrd	r13,r13,23
+	mov	rdx,r14
+	mov	r12,rax
+	shrd	r14,r14,5
+	xor	r13,r11
+	xor	r12,rbx
+	shrd	r13,r13,4
+	xor	r14,rdx
+	and	r12,r11
+	xor	r13,r11
+	add	rcx,QWORD[40+rsp]
+	mov	rdi,rdx
+	xor	r12,rbx
+	shrd	r14,r14,6
+	xor	rdi,r8
+	add	rcx,r12
+	shrd	r13,r13,14
+	and	r15,rdi
+	xor	r14,rdx
+	add	rcx,r13
+	xor	r15,r8
+	shrd	r14,r14,28
+	add	r10,rcx
+	add	rcx,r15
+	mov	r13,r10
+	add	r14,rcx
+	shrd	r13,r13,23
+	mov	rcx,r14
+	mov	r12,r11
+	shrd	r14,r14,5
+	xor	r13,r10
+	xor	r12,rax
+	shrd	r13,r13,4
+	xor	r14,rcx
+	and	r12,r10
+	xor	r13,r10
+	add	rbx,QWORD[48+rsp]
+	mov	r15,rcx
+	xor	r12,rax
+	shrd	r14,r14,6
+	xor	r15,rdx
+	add	rbx,r12
+	shrd	r13,r13,14
+	and	rdi,r15
+	xor	r14,rcx
+	add	rbx,r13
+	xor	rdi,rdx
+	shrd	r14,r14,28
+	add	r9,rbx
+	add	rbx,rdi
+	mov	r13,r9
+	add	r14,rbx
+	shrd	r13,r13,23
+	mov	rbx,r14
+	mov	r12,r10
+	shrd	r14,r14,5
+	xor	r13,r9
+	xor	r12,r11
+	shrd	r13,r13,4
+	xor	r14,rbx
+	and	r12,r9
+	xor	r13,r9
+	add	rax,QWORD[56+rsp]
+	mov	rdi,rbx
+	xor	r12,r11
+	shrd	r14,r14,6
+	xor	rdi,rcx
+	add	rax,r12
+	shrd	r13,r13,14
+	and	r15,rdi
+	xor	r14,rbx
+	add	rax,r13
+	xor	r15,rcx
+	shrd	r14,r14,28
+	add	r8,rax
+	add	rax,r15
+	mov	r13,r8
+	add	r14,rax
+	shrd	r13,r13,23
+	mov	rax,r14
+	mov	r12,r9
+	shrd	r14,r14,5
+	xor	r13,r8
+	xor	r12,r10
+	shrd	r13,r13,4
+	xor	r14,rax
+	and	r12,r8
+	xor	r13,r8
+	add	r11,QWORD[64+rsp]
+	mov	r15,rax
+	xor	r12,r10
+	shrd	r14,r14,6
+	xor	r15,rbx
+	add	r11,r12
+	shrd	r13,r13,14
+	and	rdi,r15
+	xor	r14,rax
+	add	r11,r13
+	xor	rdi,rbx
+	shrd	r14,r14,28
+	add	rdx,r11
+	add	r11,rdi
+	mov	r13,rdx
+	add	r14,r11
+	shrd	r13,r13,23
+	mov	r11,r14
+	mov	r12,r8
+	shrd	r14,r14,5
+	xor	r13,rdx
+	xor	r12,r9
+	shrd	r13,r13,4
+	xor	r14,r11
+	and	r12,rdx
+	xor	r13,rdx
+	add	r10,QWORD[72+rsp]
+	mov	rdi,r11
+	xor	r12,r9
+	shrd	r14,r14,6
+	xor	rdi,rax
+	add	r10,r12
+	shrd	r13,r13,14
+	and	r15,rdi
+	xor	r14,r11
+	add	r10,r13
+	xor	r15,rax
+	shrd	r14,r14,28
+	add	rcx,r10
+	add	r10,r15
+	mov	r13,rcx
+	add	r14,r10
+	shrd	r13,r13,23
+	mov	r10,r14
+	mov	r12,rdx
+	shrd	r14,r14,5
+	xor	r13,rcx
+	xor	r12,r8
+	shrd	r13,r13,4
+	xor	r14,r10
+	and	r12,rcx
+	xor	r13,rcx
+	add	r9,QWORD[80+rsp]
+	mov	r15,r10
+	xor	r12,r8
+	shrd	r14,r14,6
+	xor	r15,r11
+	add	r9,r12
+	shrd	r13,r13,14
+	and	rdi,r15
+	xor	r14,r10
+	add	r9,r13
+	xor	rdi,r11
+	shrd	r14,r14,28
+	add	rbx,r9
+	add	r9,rdi
+	mov	r13,rbx
+	add	r14,r9
+	shrd	r13,r13,23
+	mov	r9,r14
+	mov	r12,rcx
+	shrd	r14,r14,5
+	xor	r13,rbx
+	xor	r12,rdx
+	shrd	r13,r13,4
+	xor	r14,r9
+	and	r12,rbx
+	xor	r13,rbx
+	add	r8,QWORD[88+rsp]
+	mov	rdi,r9
+	xor	r12,rdx
+	shrd	r14,r14,6
+	xor	rdi,r10
+	add	r8,r12
+	shrd	r13,r13,14
+	and	r15,rdi
+	xor	r14,r9
+	add	r8,r13
+	xor	r15,r10
+	shrd	r14,r14,28
+	add	rax,r8
+	add	r8,r15
+	mov	r13,rax
+	add	r14,r8
+	shrd	r13,r13,23
+	mov	r8,r14
+	mov	r12,rbx
+	shrd	r14,r14,5
+	xor	r13,rax
+	xor	r12,rcx
+	shrd	r13,r13,4
+	xor	r14,r8
+	and	r12,rax
+	xor	r13,rax
+	add	rdx,QWORD[96+rsp]
+	mov	r15,r8
+	xor	r12,rcx
+	shrd	r14,r14,6
+	xor	r15,r9
+	add	rdx,r12
+	shrd	r13,r13,14
+	and	rdi,r15
+	xor	r14,r8
+	add	rdx,r13
+	xor	rdi,r9
+	shrd	r14,r14,28
+	add	r11,rdx
+	add	rdx,rdi
+	mov	r13,r11
+	add	r14,rdx
+	shrd	r13,r13,23
+	mov	rdx,r14
+	mov	r12,rax
+	shrd	r14,r14,5
+	xor	r13,r11
+	xor	r12,rbx
+	shrd	r13,r13,4
+	xor	r14,rdx
+	and	r12,r11
+	xor	r13,r11
+	add	rcx,QWORD[104+rsp]
+	mov	rdi,rdx
+	xor	r12,rbx
+	shrd	r14,r14,6
+	xor	rdi,r8
+	add	rcx,r12
+	shrd	r13,r13,14
+	and	r15,rdi
+	xor	r14,rdx
+	add	rcx,r13
+	xor	r15,r8
+	shrd	r14,r14,28
+	add	r10,rcx
+	add	rcx,r15
+	mov	r13,r10
+	add	r14,rcx
+	shrd	r13,r13,23
+	mov	rcx,r14
+	mov	r12,r11
+	shrd	r14,r14,5
+	xor	r13,r10
+	xor	r12,rax
+	shrd	r13,r13,4
+	xor	r14,rcx
+	and	r12,r10
+	xor	r13,r10
+	add	rbx,QWORD[112+rsp]
+	mov	r15,rcx
+	xor	r12,rax
+	shrd	r14,r14,6
+	xor	r15,rdx
+	add	rbx,r12
+	shrd	r13,r13,14
+	and	rdi,r15
+	xor	r14,rcx
+	add	rbx,r13
+	xor	rdi,rdx
+	shrd	r14,r14,28
+	add	r9,rbx
+	add	rbx,rdi
+	mov	r13,r9
+	add	r14,rbx
+	shrd	r13,r13,23
+	mov	rbx,r14
+	mov	r12,r10
+	shrd	r14,r14,5
+	xor	r13,r9
+	xor	r12,r11
+	shrd	r13,r13,4
+	xor	r14,rbx
+	and	r12,r9
+	xor	r13,r9
+	add	rax,QWORD[120+rsp]
+	mov	rdi,rbx
+	xor	r12,r11
+	shrd	r14,r14,6
+	xor	rdi,rcx
+	add	rax,r12
+	shrd	r13,r13,14
+	and	r15,rdi
+	xor	r14,rbx
+	add	rax,r13
+	xor	r15,rcx
+	shrd	r14,r14,28
+	add	r8,rax
+	add	rax,r15
+	mov	r13,r8
+	add	r14,rax
+	mov	rdi,QWORD[((128+0))+rsp]
+	mov	rax,r14
+
+	add	rax,QWORD[rdi]
+	lea	rsi,[128+rsi]
+	add	rbx,QWORD[8+rdi]
+	add	rcx,QWORD[16+rdi]
+	add	rdx,QWORD[24+rdi]
+	add	r8,QWORD[32+rdi]
+	add	r9,QWORD[40+rdi]
+	add	r10,QWORD[48+rdi]
+	add	r11,QWORD[56+rdi]
+
+	cmp	rsi,QWORD[((128+16))+rsp]
+
+	mov	QWORD[rdi],rax
+	mov	QWORD[8+rdi],rbx
+	mov	QWORD[16+rdi],rcx
+	mov	QWORD[24+rdi],rdx
+	mov	QWORD[32+rdi],r8
+	mov	QWORD[40+rdi],r9
+	mov	QWORD[48+rdi],r10
+	mov	QWORD[56+rdi],r11
+	jb	NEAR $L$loop_avx
+
+	mov	rsi,QWORD[152+rsp]
+
+	vzeroupper
+	movaps	xmm6,XMMWORD[((128+32))+rsp]
+	movaps	xmm7,XMMWORD[((128+48))+rsp]
+	movaps	xmm8,XMMWORD[((128+64))+rsp]
+	movaps	xmm9,XMMWORD[((128+80))+rsp]
+	movaps	xmm10,XMMWORD[((128+96))+rsp]
+	movaps	xmm11,XMMWORD[((128+112))+rsp]
+	mov	r15,QWORD[((-48))+rsi]
+
+	mov	r14,QWORD[((-40))+rsi]
+
+	mov	r13,QWORD[((-32))+rsi]
+
+	mov	r12,QWORD[((-24))+rsi]
+
+	mov	rbp,QWORD[((-16))+rsi]
+
+	mov	rbx,QWORD[((-8))+rsi]
+
+	lea	rsp,[rsi]
+
+$L$epilogue_avx:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_sha512_block_data_order_avx:
+EXTERN	__imp_RtlVirtualUnwind
+
+ALIGN	16
+se_handler:
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD[120+r8]
+	mov	rbx,QWORD[248+r8]
+
+	mov	rsi,QWORD[8+r9]
+	mov	r11,QWORD[56+r9]
+
+	mov	r10d,DWORD[r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jb	NEAR $L$in_prologue
+
+	mov	rax,QWORD[152+r8]
+
+	mov	r10d,DWORD[4+r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jae	NEAR $L$in_prologue
+	mov	rsi,rax
+	mov	rax,QWORD[((128+24))+rax]
+
+	mov	rbx,QWORD[((-8))+rax]
+	mov	rbp,QWORD[((-16))+rax]
+	mov	r12,QWORD[((-24))+rax]
+	mov	r13,QWORD[((-32))+rax]
+	mov	r14,QWORD[((-40))+rax]
+	mov	r15,QWORD[((-48))+rax]
+	mov	QWORD[144+r8],rbx
+	mov	QWORD[160+r8],rbp
+	mov	QWORD[216+r8],r12
+	mov	QWORD[224+r8],r13
+	mov	QWORD[232+r8],r14
+	mov	QWORD[240+r8],r15
+
+	lea	r10,[$L$epilogue]
+	cmp	rbx,r10
+	jb	NEAR $L$in_prologue
+
+	lea	rsi,[((128+32))+rsi]
+	lea	rdi,[512+r8]
+	mov	ecx,12
+	DD	0xa548f3fc
+
+$L$in_prologue:
+	mov	rdi,QWORD[8+rax]
+	mov	rsi,QWORD[16+rax]
+	mov	QWORD[152+r8],rax
+	mov	QWORD[168+r8],rsi
+	mov	QWORD[176+r8],rdi
+
+	mov	rdi,QWORD[40+r9]
+	mov	rsi,r8
+	mov	ecx,154
+	DD	0xa548f3fc
+
+	mov	rsi,r9
+	xor	rcx,rcx
+	mov	rdx,QWORD[8+rsi]
+	mov	r8,QWORD[rsi]
+	mov	r9,QWORD[16+rsi]
+	mov	r10,QWORD[40+rsi]
+	lea	r11,[56+rsi]
+	lea	r12,[24+rsi]
+	mov	QWORD[32+rsp],r10
+	mov	QWORD[40+rsp],r11
+	mov	QWORD[48+rsp],r12
+	mov	QWORD[56+rsp],rcx
+	call	QWORD[__imp_RtlVirtualUnwind]
+
+	mov	eax,1
+	add	rsp,64
+	popfq
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rbp
+	pop	rbx
+	pop	rdi
+	pop	rsi
+	DB	0F3h,0C3h		;repret
+
+section	.pdata rdata align=4
+ALIGN	4
+	DD	$L$SEH_begin_sha512_block_data_order wrt ..imagebase
+	DD	$L$SEH_end_sha512_block_data_order wrt ..imagebase
+	DD	$L$SEH_info_sha512_block_data_order wrt ..imagebase
+	DD	$L$SEH_begin_sha512_block_data_order_avx wrt ..imagebase
+	DD	$L$SEH_end_sha512_block_data_order_avx wrt ..imagebase
+	DD	$L$SEH_info_sha512_block_data_order_avx wrt ..imagebase
+section	.xdata rdata align=8
+ALIGN	8
+$L$SEH_info_sha512_block_data_order:
+DB	9,0,0,0
+	DD	se_handler wrt ..imagebase
+	DD	$L$prologue wrt ..imagebase,$L$epilogue wrt ..imagebase
+$L$SEH_info_sha512_block_data_order_avx:
+DB	9,0,0,0
+	DD	se_handler wrt ..imagebase
+	DD	$L$prologue_avx wrt ..imagebase,$L$epilogue_avx wrt ..imagebase
diff --git a/deps/boringssl/win-x86_64/crypto/fipsmodule/vpaes-x86_64.asm b/deps/boringssl/win-x86_64/crypto/fipsmodule/vpaes-x86_64.asm
new file mode 100644
index 0000000..ccfc870
--- /dev/null
+++ b/deps/boringssl/win-x86_64/crypto/fipsmodule/vpaes-x86_64.asm
@@ -0,0 +1,1472 @@
+; This file is generated from a similarly-named Perl script in the BoringSSL
+; source tree. Do not edit by hand.
+
+default	rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+
+%ifdef BORINGSSL_PREFIX
+%include "boringssl_prefix_symbols_nasm.inc"
+%endif
+section	.text code align=64
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ALIGN	16
+_vpaes_encrypt_core:
+
+	mov	r9,rdx
+	mov	r11,16
+	mov	eax,DWORD[240+rdx]
+	movdqa	xmm1,xmm9
+	movdqa	xmm2,XMMWORD[$L$k_ipt]
+	pandn	xmm1,xmm0
+	movdqu	xmm5,XMMWORD[r9]
+	psrld	xmm1,4
+	pand	xmm0,xmm9
+DB	102,15,56,0,208
+	movdqa	xmm0,XMMWORD[(($L$k_ipt+16))]
+DB	102,15,56,0,193
+	pxor	xmm2,xmm5
+	add	r9,16
+	pxor	xmm0,xmm2
+	lea	r10,[$L$k_mc_backward]
+	jmp	NEAR $L$enc_entry
+
+ALIGN	16
+$L$enc_loop:
+
+	movdqa	xmm4,xmm13
+	movdqa	xmm0,xmm12
+DB	102,15,56,0,226
+DB	102,15,56,0,195
+	pxor	xmm4,xmm5
+	movdqa	xmm5,xmm15
+	pxor	xmm0,xmm4
+	movdqa	xmm1,XMMWORD[((-64))+r10*1+r11]
+DB	102,15,56,0,234
+	movdqa	xmm4,XMMWORD[r10*1+r11]
+	movdqa	xmm2,xmm14
+DB	102,15,56,0,211
+	movdqa	xmm3,xmm0
+	pxor	xmm2,xmm5
+DB	102,15,56,0,193
+	add	r9,16
+	pxor	xmm0,xmm2
+DB	102,15,56,0,220
+	add	r11,16
+	pxor	xmm3,xmm0
+DB	102,15,56,0,193
+	and	r11,0x30
+	sub	rax,1
+	pxor	xmm0,xmm3
+
+$L$enc_entry:
+
+	movdqa	xmm1,xmm9
+	movdqa	xmm5,xmm11
+	pandn	xmm1,xmm0
+	psrld	xmm1,4
+	pand	xmm0,xmm9
+DB	102,15,56,0,232
+	movdqa	xmm3,xmm10
+	pxor	xmm0,xmm1
+DB	102,15,56,0,217
+	movdqa	xmm4,xmm10
+	pxor	xmm3,xmm5
+DB	102,15,56,0,224
+	movdqa	xmm2,xmm10
+	pxor	xmm4,xmm5
+DB	102,15,56,0,211
+	movdqa	xmm3,xmm10
+	pxor	xmm2,xmm0
+DB	102,15,56,0,220
+	movdqu	xmm5,XMMWORD[r9]
+	pxor	xmm3,xmm1
+	jnz	NEAR $L$enc_loop
+
+
+	movdqa	xmm4,XMMWORD[((-96))+r10]
+	movdqa	xmm0,XMMWORD[((-80))+r10]
+DB	102,15,56,0,226
+	pxor	xmm4,xmm5
+DB	102,15,56,0,195
+	movdqa	xmm1,XMMWORD[64+r10*1+r11]
+	pxor	xmm0,xmm4
+DB	102,15,56,0,193
+	DB	0F3h,0C3h		;repret
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ALIGN	16
+_vpaes_encrypt_core_2x:
+
+	mov	r9,rdx
+	mov	r11,16
+	mov	eax,DWORD[240+rdx]
+	movdqa	xmm1,xmm9
+	movdqa	xmm7,xmm9
+	movdqa	xmm2,XMMWORD[$L$k_ipt]
+	movdqa	xmm8,xmm2
+	pandn	xmm1,xmm0
+	pandn	xmm7,xmm6
+	movdqu	xmm5,XMMWORD[r9]
+
+	psrld	xmm1,4
+	psrld	xmm7,4
+	pand	xmm0,xmm9
+	pand	xmm6,xmm9
+DB	102,15,56,0,208
+DB	102,68,15,56,0,198
+	movdqa	xmm0,XMMWORD[(($L$k_ipt+16))]
+	movdqa	xmm6,xmm0
+DB	102,15,56,0,193
+DB	102,15,56,0,247
+	pxor	xmm2,xmm5
+	pxor	xmm8,xmm5
+	add	r9,16
+	pxor	xmm0,xmm2
+	pxor	xmm6,xmm8
+	lea	r10,[$L$k_mc_backward]
+	jmp	NEAR $L$enc2x_entry
+
+ALIGN	16
+$L$enc2x_loop:
+
+	movdqa	xmm4,XMMWORD[$L$k_sb1]
+	movdqa	xmm0,XMMWORD[(($L$k_sb1+16))]
+	movdqa	xmm12,xmm4
+	movdqa	xmm6,xmm0
+DB	102,15,56,0,226
+DB	102,69,15,56,0,224
+DB	102,15,56,0,195
+DB	102,65,15,56,0,243
+	pxor	xmm4,xmm5
+	pxor	xmm12,xmm5
+	movdqa	xmm5,XMMWORD[$L$k_sb2]
+	movdqa	xmm13,xmm5
+	pxor	xmm0,xmm4
+	pxor	xmm6,xmm12
+	movdqa	xmm1,XMMWORD[((-64))+r10*1+r11]
+
+DB	102,15,56,0,234
+DB	102,69,15,56,0,232
+	movdqa	xmm4,XMMWORD[r10*1+r11]
+
+	movdqa	xmm2,XMMWORD[(($L$k_sb2+16))]
+	movdqa	xmm8,xmm2
+DB	102,15,56,0,211
+DB	102,69,15,56,0,195
+	movdqa	xmm3,xmm0
+	movdqa	xmm11,xmm6
+	pxor	xmm2,xmm5
+	pxor	xmm8,xmm13
+DB	102,15,56,0,193
+DB	102,15,56,0,241
+	add	r9,16
+	pxor	xmm0,xmm2
+	pxor	xmm6,xmm8
+DB	102,15,56,0,220
+DB	102,68,15,56,0,220
+	add	r11,16
+	pxor	xmm3,xmm0
+	pxor	xmm11,xmm6
+DB	102,15,56,0,193
+DB	102,15,56,0,241
+	and	r11,0x30
+	sub	rax,1
+	pxor	xmm0,xmm3
+	pxor	xmm6,xmm11
+
+$L$enc2x_entry:
+
+	movdqa	xmm1,xmm9
+	movdqa	xmm7,xmm9
+	movdqa	xmm5,XMMWORD[(($L$k_inv+16))]
+	movdqa	xmm13,xmm5
+	pandn	xmm1,xmm0
+	pandn	xmm7,xmm6
+	psrld	xmm1,4
+	psrld	xmm7,4
+	pand	xmm0,xmm9
+	pand	xmm6,xmm9
+DB	102,15,56,0,232
+DB	102,68,15,56,0,238
+	movdqa	xmm3,xmm10
+	movdqa	xmm11,xmm10
+	pxor	xmm0,xmm1
+	pxor	xmm6,xmm7
+DB	102,15,56,0,217
+DB	102,68,15,56,0,223
+	movdqa	xmm4,xmm10
+	movdqa	xmm12,xmm10
+	pxor	xmm3,xmm5
+	pxor	xmm11,xmm13
+DB	102,15,56,0,224
+DB	102,68,15,56,0,230
+	movdqa	xmm2,xmm10
+	movdqa	xmm8,xmm10
+	pxor	xmm4,xmm5
+	pxor	xmm12,xmm13
+DB	102,15,56,0,211
+DB	102,69,15,56,0,195
+	movdqa	xmm3,xmm10
+	movdqa	xmm11,xmm10
+	pxor	xmm2,xmm0
+	pxor	xmm8,xmm6
+DB	102,15,56,0,220
+DB	102,69,15,56,0,220
+	movdqu	xmm5,XMMWORD[r9]
+
+	pxor	xmm3,xmm1
+	pxor	xmm11,xmm7
+	jnz	NEAR $L$enc2x_loop
+
+
+	movdqa	xmm4,XMMWORD[((-96))+r10]
+	movdqa	xmm0,XMMWORD[((-80))+r10]
+	movdqa	xmm12,xmm4
+	movdqa	xmm6,xmm0
+DB	102,15,56,0,226
+DB	102,69,15,56,0,224
+	pxor	xmm4,xmm5
+	pxor	xmm12,xmm5
+DB	102,15,56,0,195
+DB	102,65,15,56,0,243
+	movdqa	xmm1,XMMWORD[64+r10*1+r11]
+
+	pxor	xmm0,xmm4
+	pxor	xmm6,xmm12
+DB	102,15,56,0,193
+DB	102,15,56,0,241
+	DB	0F3h,0C3h		;repret
+
+
+
+
+
+
+
+
+
+ALIGN	16
+_vpaes_decrypt_core:
+
+	mov	r9,rdx
+	mov	eax,DWORD[240+rdx]
+	movdqa	xmm1,xmm9
+	movdqa	xmm2,XMMWORD[$L$k_dipt]
+	pandn	xmm1,xmm0
+	mov	r11,rax
+	psrld	xmm1,4
+	movdqu	xmm5,XMMWORD[r9]
+	shl	r11,4
+	pand	xmm0,xmm9
+DB	102,15,56,0,208
+	movdqa	xmm0,XMMWORD[(($L$k_dipt+16))]
+	xor	r11,0x30
+	lea	r10,[$L$k_dsbd]
+DB	102,15,56,0,193
+	and	r11,0x30
+	pxor	xmm2,xmm5
+	movdqa	xmm5,XMMWORD[(($L$k_mc_forward+48))]
+	pxor	xmm0,xmm2
+	add	r9,16
+	add	r11,r10
+	jmp	NEAR $L$dec_entry
+
+ALIGN	16
+$L$dec_loop:
+
+
+
+	movdqa	xmm4,XMMWORD[((-32))+r10]
+	movdqa	xmm1,XMMWORD[((-16))+r10]
+DB	102,15,56,0,226
+DB	102,15,56,0,203
+	pxor	xmm0,xmm4
+	movdqa	xmm4,XMMWORD[r10]
+	pxor	xmm0,xmm1
+	movdqa	xmm1,XMMWORD[16+r10]
+
+DB	102,15,56,0,226
+DB	102,15,56,0,197
+DB	102,15,56,0,203
+	pxor	xmm0,xmm4
+	movdqa	xmm4,XMMWORD[32+r10]
+	pxor	xmm0,xmm1
+	movdqa	xmm1,XMMWORD[48+r10]
+
+DB	102,15,56,0,226
+DB	102,15,56,0,197
+DB	102,15,56,0,203
+	pxor	xmm0,xmm4
+	movdqa	xmm4,XMMWORD[64+r10]
+	pxor	xmm0,xmm1
+	movdqa	xmm1,XMMWORD[80+r10]
+
+DB	102,15,56,0,226
+DB	102,15,56,0,197
+DB	102,15,56,0,203
+	pxor	xmm0,xmm4
+	add	r9,16
+DB	102,15,58,15,237,12
+	pxor	xmm0,xmm1
+	sub	rax,1
+
+$L$dec_entry:
+
+	movdqa	xmm1,xmm9
+	pandn	xmm1,xmm0
+	movdqa	xmm2,xmm11
+	psrld	xmm1,4
+	pand	xmm0,xmm9
+DB	102,15,56,0,208
+	movdqa	xmm3,xmm10
+	pxor	xmm0,xmm1
+DB	102,15,56,0,217
+	movdqa	xmm4,xmm10
+	pxor	xmm3,xmm2
+DB	102,15,56,0,224
+	pxor	xmm4,xmm2
+	movdqa	xmm2,xmm10
+DB	102,15,56,0,211
+	movdqa	xmm3,xmm10
+	pxor	xmm2,xmm0
+DB	102,15,56,0,220
+	movdqu	xmm0,XMMWORD[r9]
+	pxor	xmm3,xmm1
+	jnz	NEAR $L$dec_loop
+
+
+	movdqa	xmm4,XMMWORD[96+r10]
+DB	102,15,56,0,226
+	pxor	xmm4,xmm0
+	movdqa	xmm0,XMMWORD[112+r10]
+	movdqa	xmm2,XMMWORD[((-352))+r11]
+DB	102,15,56,0,195
+	pxor	xmm0,xmm4
+DB	102,15,56,0,194
+	DB	0F3h,0C3h		;repret
+
+
+
+
+
+
+
+
+
+ALIGN	16
+_vpaes_schedule_core:
+
+
+
+
+
+
+	call	_vpaes_preheat
+	movdqa	xmm8,XMMWORD[$L$k_rcon]
+	movdqu	xmm0,XMMWORD[rdi]
+
+
+	movdqa	xmm3,xmm0
+	lea	r11,[$L$k_ipt]
+	call	_vpaes_schedule_transform
+	movdqa	xmm7,xmm0
+
+	lea	r10,[$L$k_sr]
+	test	rcx,rcx
+	jnz	NEAR $L$schedule_am_decrypting
+
+
+	movdqu	XMMWORD[rdx],xmm0
+	jmp	NEAR $L$schedule_go
+
+$L$schedule_am_decrypting:
+
+	movdqa	xmm1,XMMWORD[r10*1+r8]
+DB	102,15,56,0,217
+	movdqu	XMMWORD[rdx],xmm3
+	xor	r8,0x30
+
+$L$schedule_go:
+	cmp	esi,192
+	ja	NEAR $L$schedule_256
+	je	NEAR $L$schedule_192
+
+
+
+
+
+
+
+
+
+
+$L$schedule_128:
+	mov	esi,10
+
+$L$oop_schedule_128:
+	call	_vpaes_schedule_round
+	dec	rsi
+	jz	NEAR $L$schedule_mangle_last
+	call	_vpaes_schedule_mangle
+	jmp	NEAR $L$oop_schedule_128
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ALIGN	16
+$L$schedule_192:
+	movdqu	xmm0,XMMWORD[8+rdi]
+	call	_vpaes_schedule_transform
+	movdqa	xmm6,xmm0
+	pxor	xmm4,xmm4
+	movhlps	xmm6,xmm4
+	mov	esi,4
+
+$L$oop_schedule_192:
+	call	_vpaes_schedule_round
+DB	102,15,58,15,198,8
+	call	_vpaes_schedule_mangle
+	call	_vpaes_schedule_192_smear
+	call	_vpaes_schedule_mangle
+	call	_vpaes_schedule_round
+	dec	rsi
+	jz	NEAR $L$schedule_mangle_last
+	call	_vpaes_schedule_mangle
+	call	_vpaes_schedule_192_smear
+	jmp	NEAR $L$oop_schedule_192
+
+
+
+
+
+
+
+
+
+
+
+ALIGN	16
+$L$schedule_256:
+	movdqu	xmm0,XMMWORD[16+rdi]
+	call	_vpaes_schedule_transform
+	mov	esi,7
+
+$L$oop_schedule_256:
+	call	_vpaes_schedule_mangle
+	movdqa	xmm6,xmm0
+
+
+	call	_vpaes_schedule_round
+	dec	rsi
+	jz	NEAR $L$schedule_mangle_last
+	call	_vpaes_schedule_mangle
+
+
+	pshufd	xmm0,xmm0,0xFF
+	movdqa	xmm5,xmm7
+	movdqa	xmm7,xmm6
+	call	_vpaes_schedule_low_round
+	movdqa	xmm7,xmm5
+
+	jmp	NEAR $L$oop_schedule_256
+
+
+
+
+
+
+
+
+
+
+
+
+ALIGN	16
+$L$schedule_mangle_last:
+
+	lea	r11,[$L$k_deskew]
+	test	rcx,rcx
+	jnz	NEAR $L$schedule_mangle_last_dec
+
+
+	movdqa	xmm1,XMMWORD[r10*1+r8]
+DB	102,15,56,0,193
+	lea	r11,[$L$k_opt]
+	add	rdx,32
+
+$L$schedule_mangle_last_dec:
+	add	rdx,-16
+	pxor	xmm0,XMMWORD[$L$k_s63]
+	call	_vpaes_schedule_transform
+	movdqu	XMMWORD[rdx],xmm0
+
+
+	pxor	xmm0,xmm0
+	pxor	xmm1,xmm1
+	pxor	xmm2,xmm2
+	pxor	xmm3,xmm3
+	pxor	xmm4,xmm4
+	pxor	xmm5,xmm5
+	pxor	xmm6,xmm6
+	pxor	xmm7,xmm7
+	DB	0F3h,0C3h		;repret
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ALIGN	16
+_vpaes_schedule_192_smear:
+
+	pshufd	xmm1,xmm6,0x80
+	pshufd	xmm0,xmm7,0xFE
+	pxor	xmm6,xmm1
+	pxor	xmm1,xmm1
+	pxor	xmm6,xmm0
+	movdqa	xmm0,xmm6
+	movhlps	xmm6,xmm1
+	DB	0F3h,0C3h		;repret
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ALIGN	16
+_vpaes_schedule_round:
+
+
+	pxor	xmm1,xmm1
+DB	102,65,15,58,15,200,15
+DB	102,69,15,58,15,192,15
+	pxor	xmm7,xmm1
+
+
+	pshufd	xmm0,xmm0,0xFF
+DB	102,15,58,15,192,1
+
+
+
+
+_vpaes_schedule_low_round:
+
+	movdqa	xmm1,xmm7
+	pslldq	xmm7,4
+	pxor	xmm7,xmm1
+	movdqa	xmm1,xmm7
+	pslldq	xmm7,8
+	pxor	xmm7,xmm1
+	pxor	xmm7,XMMWORD[$L$k_s63]
+
+
+	movdqa	xmm1,xmm9
+	pandn	xmm1,xmm0
+	psrld	xmm1,4
+	pand	xmm0,xmm9
+	movdqa	xmm2,xmm11
+DB	102,15,56,0,208
+	pxor	xmm0,xmm1
+	movdqa	xmm3,xmm10
+DB	102,15,56,0,217
+	pxor	xmm3,xmm2
+	movdqa	xmm4,xmm10
+DB	102,15,56,0,224
+	pxor	xmm4,xmm2
+	movdqa	xmm2,xmm10
+DB	102,15,56,0,211
+	pxor	xmm2,xmm0
+	movdqa	xmm3,xmm10
+DB	102,15,56,0,220
+	pxor	xmm3,xmm1
+	movdqa	xmm4,xmm13
+DB	102,15,56,0,226
+	movdqa	xmm0,xmm12
+DB	102,15,56,0,195
+	pxor	xmm0,xmm4
+
+
+	pxor	xmm0,xmm7
+	movdqa	xmm7,xmm0
+	DB	0F3h,0C3h		;repret
+
+
+
+
+
+
+
+
+
+
+
+
+
+ALIGN	16
+_vpaes_schedule_transform:
+
+	movdqa	xmm1,xmm9
+	pandn	xmm1,xmm0
+	psrld	xmm1,4
+	pand	xmm0,xmm9
+	movdqa	xmm2,XMMWORD[r11]
+DB	102,15,56,0,208
+	movdqa	xmm0,XMMWORD[16+r11]
+DB	102,15,56,0,193
+	pxor	xmm0,xmm2
+	DB	0F3h,0C3h		;repret
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ALIGN	16
+_vpaes_schedule_mangle:
+
+	movdqa	xmm4,xmm0
+	movdqa	xmm5,XMMWORD[$L$k_mc_forward]
+	test	rcx,rcx
+	jnz	NEAR $L$schedule_mangle_dec
+
+
+	add	rdx,16
+	pxor	xmm4,XMMWORD[$L$k_s63]
+DB	102,15,56,0,229
+	movdqa	xmm3,xmm4
+DB	102,15,56,0,229
+	pxor	xmm3,xmm4
+DB	102,15,56,0,229
+	pxor	xmm3,xmm4
+
+	jmp	NEAR $L$schedule_mangle_both
+ALIGN	16
+$L$schedule_mangle_dec:
+
+	lea	r11,[$L$k_dksd]
+	movdqa	xmm1,xmm9
+	pandn	xmm1,xmm4
+	psrld	xmm1,4
+	pand	xmm4,xmm9
+
+	movdqa	xmm2,XMMWORD[r11]
+DB	102,15,56,0,212
+	movdqa	xmm3,XMMWORD[16+r11]
+DB	102,15,56,0,217
+	pxor	xmm3,xmm2
+DB	102,15,56,0,221
+
+	movdqa	xmm2,XMMWORD[32+r11]
+DB	102,15,56,0,212
+	pxor	xmm2,xmm3
+	movdqa	xmm3,XMMWORD[48+r11]
+DB	102,15,56,0,217
+	pxor	xmm3,xmm2
+DB	102,15,56,0,221
+
+	movdqa	xmm2,XMMWORD[64+r11]
+DB	102,15,56,0,212
+	pxor	xmm2,xmm3
+	movdqa	xmm3,XMMWORD[80+r11]
+DB	102,15,56,0,217
+	pxor	xmm3,xmm2
+DB	102,15,56,0,221
+
+	movdqa	xmm2,XMMWORD[96+r11]
+DB	102,15,56,0,212
+	pxor	xmm2,xmm3
+	movdqa	xmm3,XMMWORD[112+r11]
+DB	102,15,56,0,217
+	pxor	xmm3,xmm2
+
+	add	rdx,-16
+
+$L$schedule_mangle_both:
+	movdqa	xmm1,XMMWORD[r10*1+r8]
+DB	102,15,56,0,217
+	add	r8,-16
+	and	r8,0x30
+	movdqu	XMMWORD[rdx],xmm3
+	DB	0F3h,0C3h		;repret
+
+
+
+
+
+
+global	vpaes_set_encrypt_key
+
+ALIGN	16
+vpaes_set_encrypt_key:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_vpaes_set_encrypt_key:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+
+%ifdef BORINGSSL_DISPATCH_TEST
+EXTERN	BORINGSSL_function_hit
+	mov	BYTE[((BORINGSSL_function_hit+5))],1
+%endif
+
+	lea	rsp,[((-184))+rsp]
+	movaps	XMMWORD[16+rsp],xmm6
+	movaps	XMMWORD[32+rsp],xmm7
+	movaps	XMMWORD[48+rsp],xmm8
+	movaps	XMMWORD[64+rsp],xmm9
+	movaps	XMMWORD[80+rsp],xmm10
+	movaps	XMMWORD[96+rsp],xmm11
+	movaps	XMMWORD[112+rsp],xmm12
+	movaps	XMMWORD[128+rsp],xmm13
+	movaps	XMMWORD[144+rsp],xmm14
+	movaps	XMMWORD[160+rsp],xmm15
+$L$enc_key_body:
+	mov	eax,esi
+	shr	eax,5
+	add	eax,5
+	mov	DWORD[240+rdx],eax
+
+	mov	ecx,0
+	mov	r8d,0x30
+	call	_vpaes_schedule_core
+	movaps	xmm6,XMMWORD[16+rsp]
+	movaps	xmm7,XMMWORD[32+rsp]
+	movaps	xmm8,XMMWORD[48+rsp]
+	movaps	xmm9,XMMWORD[64+rsp]
+	movaps	xmm10,XMMWORD[80+rsp]
+	movaps	xmm11,XMMWORD[96+rsp]
+	movaps	xmm12,XMMWORD[112+rsp]
+	movaps	xmm13,XMMWORD[128+rsp]
+	movaps	xmm14,XMMWORD[144+rsp]
+	movaps	xmm15,XMMWORD[160+rsp]
+	lea	rsp,[184+rsp]
+$L$enc_key_epilogue:
+	xor	eax,eax
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_vpaes_set_encrypt_key:
+
+global	vpaes_set_decrypt_key
+
+ALIGN	16
+vpaes_set_decrypt_key:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_vpaes_set_decrypt_key:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+
+	lea	rsp,[((-184))+rsp]
+	movaps	XMMWORD[16+rsp],xmm6
+	movaps	XMMWORD[32+rsp],xmm7
+	movaps	XMMWORD[48+rsp],xmm8
+	movaps	XMMWORD[64+rsp],xmm9
+	movaps	XMMWORD[80+rsp],xmm10
+	movaps	XMMWORD[96+rsp],xmm11
+	movaps	XMMWORD[112+rsp],xmm12
+	movaps	XMMWORD[128+rsp],xmm13
+	movaps	XMMWORD[144+rsp],xmm14
+	movaps	XMMWORD[160+rsp],xmm15
+$L$dec_key_body:
+	mov	eax,esi
+	shr	eax,5
+	add	eax,5
+	mov	DWORD[240+rdx],eax
+	shl	eax,4
+	lea	rdx,[16+rax*1+rdx]
+
+	mov	ecx,1
+	mov	r8d,esi
+	shr	r8d,1
+	and	r8d,32
+	xor	r8d,32
+	call	_vpaes_schedule_core
+	movaps	xmm6,XMMWORD[16+rsp]
+	movaps	xmm7,XMMWORD[32+rsp]
+	movaps	xmm8,XMMWORD[48+rsp]
+	movaps	xmm9,XMMWORD[64+rsp]
+	movaps	xmm10,XMMWORD[80+rsp]
+	movaps	xmm11,XMMWORD[96+rsp]
+	movaps	xmm12,XMMWORD[112+rsp]
+	movaps	xmm13,XMMWORD[128+rsp]
+	movaps	xmm14,XMMWORD[144+rsp]
+	movaps	xmm15,XMMWORD[160+rsp]
+	lea	rsp,[184+rsp]
+$L$dec_key_epilogue:
+	xor	eax,eax
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_vpaes_set_decrypt_key:
+
+global	vpaes_encrypt
+
+ALIGN	16
+vpaes_encrypt:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_vpaes_encrypt:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+
+%ifdef BORINGSSL_DISPATCH_TEST
+EXTERN	BORINGSSL_function_hit
+	mov	BYTE[((BORINGSSL_function_hit+4))],1
+%endif
+	lea	rsp,[((-184))+rsp]
+	movaps	XMMWORD[16+rsp],xmm6
+	movaps	XMMWORD[32+rsp],xmm7
+	movaps	XMMWORD[48+rsp],xmm8
+	movaps	XMMWORD[64+rsp],xmm9
+	movaps	XMMWORD[80+rsp],xmm10
+	movaps	XMMWORD[96+rsp],xmm11
+	movaps	XMMWORD[112+rsp],xmm12
+	movaps	XMMWORD[128+rsp],xmm13
+	movaps	XMMWORD[144+rsp],xmm14
+	movaps	XMMWORD[160+rsp],xmm15
+$L$enc_body:
+	movdqu	xmm0,XMMWORD[rdi]
+	call	_vpaes_preheat
+	call	_vpaes_encrypt_core
+	movdqu	XMMWORD[rsi],xmm0
+	movaps	xmm6,XMMWORD[16+rsp]
+	movaps	xmm7,XMMWORD[32+rsp]
+	movaps	xmm8,XMMWORD[48+rsp]
+	movaps	xmm9,XMMWORD[64+rsp]
+	movaps	xmm10,XMMWORD[80+rsp]
+	movaps	xmm11,XMMWORD[96+rsp]
+	movaps	xmm12,XMMWORD[112+rsp]
+	movaps	xmm13,XMMWORD[128+rsp]
+	movaps	xmm14,XMMWORD[144+rsp]
+	movaps	xmm15,XMMWORD[160+rsp]
+	lea	rsp,[184+rsp]
+$L$enc_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_vpaes_encrypt:
+
+global	vpaes_decrypt
+
+ALIGN	16
+vpaes_decrypt:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_vpaes_decrypt:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+
+
+
+	lea	rsp,[((-184))+rsp]
+	movaps	XMMWORD[16+rsp],xmm6
+	movaps	XMMWORD[32+rsp],xmm7
+	movaps	XMMWORD[48+rsp],xmm8
+	movaps	XMMWORD[64+rsp],xmm9
+	movaps	XMMWORD[80+rsp],xmm10
+	movaps	XMMWORD[96+rsp],xmm11
+	movaps	XMMWORD[112+rsp],xmm12
+	movaps	XMMWORD[128+rsp],xmm13
+	movaps	XMMWORD[144+rsp],xmm14
+	movaps	XMMWORD[160+rsp],xmm15
+$L$dec_body:
+	movdqu	xmm0,XMMWORD[rdi]
+	call	_vpaes_preheat
+	call	_vpaes_decrypt_core
+	movdqu	XMMWORD[rsi],xmm0
+	movaps	xmm6,XMMWORD[16+rsp]
+	movaps	xmm7,XMMWORD[32+rsp]
+	movaps	xmm8,XMMWORD[48+rsp]
+	movaps	xmm9,XMMWORD[64+rsp]
+	movaps	xmm10,XMMWORD[80+rsp]
+	movaps	xmm11,XMMWORD[96+rsp]
+	movaps	xmm12,XMMWORD[112+rsp]
+	movaps	xmm13,XMMWORD[128+rsp]
+	movaps	xmm14,XMMWORD[144+rsp]
+	movaps	xmm15,XMMWORD[160+rsp]
+	lea	rsp,[184+rsp]
+$L$dec_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_vpaes_decrypt:
+global	vpaes_cbc_encrypt
+
+ALIGN	16
+vpaes_cbc_encrypt:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_vpaes_cbc_encrypt:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+	mov	r9,QWORD[48+rsp]
+
+
+
+	xchg	rdx,rcx
+	sub	rcx,16
+	jc	NEAR $L$cbc_abort
+	lea	rsp,[((-184))+rsp]
+	movaps	XMMWORD[16+rsp],xmm6
+	movaps	XMMWORD[32+rsp],xmm7
+	movaps	XMMWORD[48+rsp],xmm8
+	movaps	XMMWORD[64+rsp],xmm9
+	movaps	XMMWORD[80+rsp],xmm10
+	movaps	XMMWORD[96+rsp],xmm11
+	movaps	XMMWORD[112+rsp],xmm12
+	movaps	XMMWORD[128+rsp],xmm13
+	movaps	XMMWORD[144+rsp],xmm14
+	movaps	XMMWORD[160+rsp],xmm15
+$L$cbc_body:
+	movdqu	xmm6,XMMWORD[r8]
+	sub	rsi,rdi
+	call	_vpaes_preheat
+	cmp	r9d,0
+	je	NEAR $L$cbc_dec_loop
+	jmp	NEAR $L$cbc_enc_loop
+ALIGN	16
+$L$cbc_enc_loop:
+	movdqu	xmm0,XMMWORD[rdi]
+	pxor	xmm0,xmm6
+	call	_vpaes_encrypt_core
+	movdqa	xmm6,xmm0
+	movdqu	XMMWORD[rdi*1+rsi],xmm0
+	lea	rdi,[16+rdi]
+	sub	rcx,16
+	jnc	NEAR $L$cbc_enc_loop
+	jmp	NEAR $L$cbc_done
+ALIGN	16
+$L$cbc_dec_loop:
+	movdqu	xmm0,XMMWORD[rdi]
+	movdqa	xmm7,xmm0
+	call	_vpaes_decrypt_core
+	pxor	xmm0,xmm6
+	movdqa	xmm6,xmm7
+	movdqu	XMMWORD[rdi*1+rsi],xmm0
+	lea	rdi,[16+rdi]
+	sub	rcx,16
+	jnc	NEAR $L$cbc_dec_loop
+$L$cbc_done:
+	movdqu	XMMWORD[r8],xmm6
+	movaps	xmm6,XMMWORD[16+rsp]
+	movaps	xmm7,XMMWORD[32+rsp]
+	movaps	xmm8,XMMWORD[48+rsp]
+	movaps	xmm9,XMMWORD[64+rsp]
+	movaps	xmm10,XMMWORD[80+rsp]
+	movaps	xmm11,XMMWORD[96+rsp]
+	movaps	xmm12,XMMWORD[112+rsp]
+	movaps	xmm13,XMMWORD[128+rsp]
+	movaps	xmm14,XMMWORD[144+rsp]
+	movaps	xmm15,XMMWORD[160+rsp]
+	lea	rsp,[184+rsp]
+$L$cbc_epilogue:
+$L$cbc_abort:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_vpaes_cbc_encrypt:
+global	vpaes_ctr32_encrypt_blocks
+
+ALIGN	16
+vpaes_ctr32_encrypt_blocks:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_vpaes_ctr32_encrypt_blocks:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+
+
+
+
+	xchg	rdx,rcx
+	test	rcx,rcx
+	jz	NEAR $L$ctr32_abort
+	lea	rsp,[((-184))+rsp]
+	movaps	XMMWORD[16+rsp],xmm6
+	movaps	XMMWORD[32+rsp],xmm7
+	movaps	XMMWORD[48+rsp],xmm8
+	movaps	XMMWORD[64+rsp],xmm9
+	movaps	XMMWORD[80+rsp],xmm10
+	movaps	XMMWORD[96+rsp],xmm11
+	movaps	XMMWORD[112+rsp],xmm12
+	movaps	XMMWORD[128+rsp],xmm13
+	movaps	XMMWORD[144+rsp],xmm14
+	movaps	XMMWORD[160+rsp],xmm15
+$L$ctr32_body:
+	movdqu	xmm0,XMMWORD[r8]
+	movdqa	xmm8,XMMWORD[$L$ctr_add_one]
+	sub	rsi,rdi
+	call	_vpaes_preheat
+	movdqa	xmm6,xmm0
+	pshufb	xmm6,XMMWORD[$L$rev_ctr]
+
+	test	rcx,1
+	jz	NEAR $L$ctr32_prep_loop
+
+
+
+	movdqu	xmm7,XMMWORD[rdi]
+	call	_vpaes_encrypt_core
+	pxor	xmm0,xmm7
+	paddd	xmm6,xmm8
+	movdqu	XMMWORD[rdi*1+rsi],xmm0
+	sub	rcx,1
+	lea	rdi,[16+rdi]
+	jz	NEAR $L$ctr32_done
+
+$L$ctr32_prep_loop:
+
+
+	movdqa	xmm14,xmm6
+	movdqa	xmm15,xmm6
+	paddd	xmm15,xmm8
+
+$L$ctr32_loop:
+	movdqa	xmm1,XMMWORD[$L$rev_ctr]
+	movdqa	xmm0,xmm14
+	movdqa	xmm6,xmm15
+DB	102,15,56,0,193
+DB	102,15,56,0,241
+	call	_vpaes_encrypt_core_2x
+	movdqu	xmm1,XMMWORD[rdi]
+	movdqu	xmm2,XMMWORD[16+rdi]
+	movdqa	xmm3,XMMWORD[$L$ctr_add_two]
+	pxor	xmm0,xmm1
+	pxor	xmm6,xmm2
+	paddd	xmm14,xmm3
+	paddd	xmm15,xmm3
+	movdqu	XMMWORD[rdi*1+rsi],xmm0
+	movdqu	XMMWORD[16+rdi*1+rsi],xmm6
+	sub	rcx,2
+	lea	rdi,[32+rdi]
+	jnz	NEAR $L$ctr32_loop
+
+$L$ctr32_done:
+	movaps	xmm6,XMMWORD[16+rsp]
+	movaps	xmm7,XMMWORD[32+rsp]
+	movaps	xmm8,XMMWORD[48+rsp]
+	movaps	xmm9,XMMWORD[64+rsp]
+	movaps	xmm10,XMMWORD[80+rsp]
+	movaps	xmm11,XMMWORD[96+rsp]
+	movaps	xmm12,XMMWORD[112+rsp]
+	movaps	xmm13,XMMWORD[128+rsp]
+	movaps	xmm14,XMMWORD[144+rsp]
+	movaps	xmm15,XMMWORD[160+rsp]
+	lea	rsp,[184+rsp]
+$L$ctr32_epilogue:
+$L$ctr32_abort:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_vpaes_ctr32_encrypt_blocks:
+
+
+
+
+
+
+
+ALIGN	16
+_vpaes_preheat:
+
+	lea	r10,[$L$k_s0F]
+	movdqa	xmm10,XMMWORD[((-32))+r10]
+	movdqa	xmm11,XMMWORD[((-16))+r10]
+	movdqa	xmm9,XMMWORD[r10]
+	movdqa	xmm13,XMMWORD[48+r10]
+	movdqa	xmm12,XMMWORD[64+r10]
+	movdqa	xmm15,XMMWORD[80+r10]
+	movdqa	xmm14,XMMWORD[96+r10]
+	DB	0F3h,0C3h		;repret
+
+
+
+
+
+
+
+
+ALIGN	64
+_vpaes_consts:
+$L$k_inv:
+	DQ	0x0E05060F0D080180,0x040703090A0B0C02
+	DQ	0x01040A060F0B0780,0x030D0E0C02050809
+
+$L$k_s0F:
+	DQ	0x0F0F0F0F0F0F0F0F,0x0F0F0F0F0F0F0F0F
+
+$L$k_ipt:
+	DQ	0xC2B2E8985A2A7000,0xCABAE09052227808
+	DQ	0x4C01307D317C4D00,0xCD80B1FCB0FDCC81
+
+$L$k_sb1:
+	DQ	0xB19BE18FCB503E00,0xA5DF7A6E142AF544
+	DQ	0x3618D415FAE22300,0x3BF7CCC10D2ED9EF
+$L$k_sb2:
+	DQ	0xE27A93C60B712400,0x5EB7E955BC982FCD
+	DQ	0x69EB88400AE12900,0xC2A163C8AB82234A
+$L$k_sbo:
+	DQ	0xD0D26D176FBDC700,0x15AABF7AC502A878
+	DQ	0xCFE474A55FBB6A00,0x8E1E90D1412B35FA
+
+$L$k_mc_forward:
+	DQ	0x0407060500030201,0x0C0F0E0D080B0A09
+	DQ	0x080B0A0904070605,0x000302010C0F0E0D
+	DQ	0x0C0F0E0D080B0A09,0x0407060500030201
+	DQ	0x000302010C0F0E0D,0x080B0A0904070605
+
+$L$k_mc_backward:
+	DQ	0x0605040702010003,0x0E0D0C0F0A09080B
+	DQ	0x020100030E0D0C0F,0x0A09080B06050407
+	DQ	0x0E0D0C0F0A09080B,0x0605040702010003
+	DQ	0x0A09080B06050407,0x020100030E0D0C0F
+
+$L$k_sr:
+	DQ	0x0706050403020100,0x0F0E0D0C0B0A0908
+	DQ	0x030E09040F0A0500,0x0B06010C07020D08
+	DQ	0x0F060D040B020900,0x070E050C030A0108
+	DQ	0x0B0E0104070A0D00,0x0306090C0F020508
+
+$L$k_rcon:
+	DQ	0x1F8391B9AF9DEEB6,0x702A98084D7C7D81
+
+$L$k_s63:
+	DQ	0x5B5B5B5B5B5B5B5B,0x5B5B5B5B5B5B5B5B
+
+$L$k_opt:
+	DQ	0xFF9F4929D6B66000,0xF7974121DEBE6808
+	DQ	0x01EDBD5150BCEC00,0xE10D5DB1B05C0CE0
+
+$L$k_deskew:
+	DQ	0x07E4A34047A4E300,0x1DFEB95A5DBEF91A
+	DQ	0x5F36B5DC83EA6900,0x2841C2ABF49D1E77
+
+
+
+
+
+$L$k_dksd:
+	DQ	0xFEB91A5DA3E44700,0x0740E3A45A1DBEF9
+	DQ	0x41C277F4B5368300,0x5FDC69EAAB289D1E
+$L$k_dksb:
+	DQ	0x9A4FCA1F8550D500,0x03D653861CC94C99
+	DQ	0x115BEDA7B6FC4A00,0xD993256F7E3482C8
+$L$k_dkse:
+	DQ	0xD5031CCA1FC9D600,0x53859A4C994F5086
+	DQ	0xA23196054FDC7BE8,0xCD5EF96A20B31487
+$L$k_dks9:
+	DQ	0xB6116FC87ED9A700,0x4AED933482255BFC
+	DQ	0x4576516227143300,0x8BB89FACE9DAFDCE
+
+
+
+
+
+$L$k_dipt:
+	DQ	0x0F505B040B545F00,0x154A411E114E451A
+	DQ	0x86E383E660056500,0x12771772F491F194
+
+$L$k_dsb9:
+	DQ	0x851C03539A86D600,0xCAD51F504F994CC9
+	DQ	0xC03B1789ECD74900,0x725E2C9EB2FBA565
+$L$k_dsbd:
+	DQ	0x7D57CCDFE6B1A200,0xF56E9B13882A4439
+	DQ	0x3CE2FAF724C6CB00,0x2931180D15DEEFD3
+$L$k_dsbb:
+	DQ	0xD022649296B44200,0x602646F6B0F2D404
+	DQ	0xC19498A6CD596700,0xF3FF0C3E3255AA6B
+$L$k_dsbe:
+	DQ	0x46F2929626D4D000,0x2242600464B4F6B0
+	DQ	0x0C55A6CDFFAAC100,0x9467F36B98593E32
+$L$k_dsbo:
+	DQ	0x1387EA537EF94000,0xC7AA6DB9D4943E2D
+	DQ	0x12D7560F93441D00,0xCA4B8159D8C58E9C
+
+
+$L$rev_ctr:
+	DQ	0x0706050403020100,0x0c0d0e0f0b0a0908
+
+
+$L$ctr_add_one:
+	DQ	0x0000000000000000,0x0000000100000000
+$L$ctr_add_two:
+	DQ	0x0000000000000000,0x0000000200000000
+
+DB	86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105
+DB	111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54
+DB	52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97
+DB	109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32
+DB	85,110,105,118,101,114,115,105,116,121,41,0
+ALIGN	64
+
+EXTERN	__imp_RtlVirtualUnwind
+
+ALIGN	16
+se_handler:
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD[120+r8]
+	mov	rbx,QWORD[248+r8]
+
+	mov	rsi,QWORD[8+r9]
+	mov	r11,QWORD[56+r9]
+
+	mov	r10d,DWORD[r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jb	NEAR $L$in_prologue
+
+	mov	rax,QWORD[152+r8]
+
+	mov	r10d,DWORD[4+r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jae	NEAR $L$in_prologue
+
+	lea	rsi,[16+rax]
+	lea	rdi,[512+r8]
+	mov	ecx,20
+	DD	0xa548f3fc
+	lea	rax,[184+rax]
+
+$L$in_prologue:
+	mov	rdi,QWORD[8+rax]
+	mov	rsi,QWORD[16+rax]
+	mov	QWORD[152+r8],rax
+	mov	QWORD[168+r8],rsi
+	mov	QWORD[176+r8],rdi
+
+	mov	rdi,QWORD[40+r9]
+	mov	rsi,r8
+	mov	ecx,154
+	DD	0xa548f3fc
+
+	mov	rsi,r9
+	xor	rcx,rcx
+	mov	rdx,QWORD[8+rsi]
+	mov	r8,QWORD[rsi]
+	mov	r9,QWORD[16+rsi]
+	mov	r10,QWORD[40+rsi]
+	lea	r11,[56+rsi]
+	lea	r12,[24+rsi]
+	mov	QWORD[32+rsp],r10
+	mov	QWORD[40+rsp],r11
+	mov	QWORD[48+rsp],r12
+	mov	QWORD[56+rsp],rcx
+	call	QWORD[__imp_RtlVirtualUnwind]
+
+	mov	eax,1
+	add	rsp,64
+	popfq
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rbp
+	pop	rbx
+	pop	rdi
+	pop	rsi
+	DB	0F3h,0C3h		;repret
+
+
+section	.pdata rdata align=4
+ALIGN	4
+	DD	$L$SEH_begin_vpaes_set_encrypt_key wrt ..imagebase
+	DD	$L$SEH_end_vpaes_set_encrypt_key wrt ..imagebase
+	DD	$L$SEH_info_vpaes_set_encrypt_key wrt ..imagebase
+
+	DD	$L$SEH_begin_vpaes_set_decrypt_key wrt ..imagebase
+	DD	$L$SEH_end_vpaes_set_decrypt_key wrt ..imagebase
+	DD	$L$SEH_info_vpaes_set_decrypt_key wrt ..imagebase
+
+	DD	$L$SEH_begin_vpaes_encrypt wrt ..imagebase
+	DD	$L$SEH_end_vpaes_encrypt wrt ..imagebase
+	DD	$L$SEH_info_vpaes_encrypt wrt ..imagebase
+
+	DD	$L$SEH_begin_vpaes_decrypt wrt ..imagebase
+	DD	$L$SEH_end_vpaes_decrypt wrt ..imagebase
+	DD	$L$SEH_info_vpaes_decrypt wrt ..imagebase
+
+	DD	$L$SEH_begin_vpaes_cbc_encrypt wrt ..imagebase
+	DD	$L$SEH_end_vpaes_cbc_encrypt wrt ..imagebase
+	DD	$L$SEH_info_vpaes_cbc_encrypt wrt ..imagebase
+
+	DD	$L$SEH_begin_vpaes_ctr32_encrypt_blocks wrt ..imagebase
+	DD	$L$SEH_end_vpaes_ctr32_encrypt_blocks wrt ..imagebase
+	DD	$L$SEH_info_vpaes_ctr32_encrypt_blocks wrt ..imagebase
+
+section	.xdata rdata align=8
+ALIGN	8
+$L$SEH_info_vpaes_set_encrypt_key:
+DB	9,0,0,0
+	DD	se_handler wrt ..imagebase
+	DD	$L$enc_key_body wrt ..imagebase,$L$enc_key_epilogue wrt ..imagebase
+$L$SEH_info_vpaes_set_decrypt_key:
+DB	9,0,0,0
+	DD	se_handler wrt ..imagebase
+	DD	$L$dec_key_body wrt ..imagebase,$L$dec_key_epilogue wrt ..imagebase
+$L$SEH_info_vpaes_encrypt:
+DB	9,0,0,0
+	DD	se_handler wrt ..imagebase
+	DD	$L$enc_body wrt ..imagebase,$L$enc_epilogue wrt ..imagebase
+$L$SEH_info_vpaes_decrypt:
+DB	9,0,0,0
+	DD	se_handler wrt ..imagebase
+	DD	$L$dec_body wrt ..imagebase,$L$dec_epilogue wrt ..imagebase
+$L$SEH_info_vpaes_cbc_encrypt:
+DB	9,0,0,0
+	DD	se_handler wrt ..imagebase
+	DD	$L$cbc_body wrt ..imagebase,$L$cbc_epilogue wrt ..imagebase
+$L$SEH_info_vpaes_ctr32_encrypt_blocks:
+DB	9,0,0,0
+	DD	se_handler wrt ..imagebase
+	DD	$L$ctr32_body wrt ..imagebase,$L$ctr32_epilogue wrt ..imagebase
diff --git a/deps/boringssl/win-x86_64/crypto/fipsmodule/x86_64-mont.asm b/deps/boringssl/win-x86_64/crypto/fipsmodule/x86_64-mont.asm
new file mode 100644
index 0000000..d6d8bdd
--- /dev/null
+++ b/deps/boringssl/win-x86_64/crypto/fipsmodule/x86_64-mont.asm
@@ -0,0 +1,1481 @@
+; This file is generated from a similarly-named Perl script in the BoringSSL
+; source tree. Do not edit by hand.
+
+default	rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+
+%ifdef BORINGSSL_PREFIX
+%include "boringssl_prefix_symbols_nasm.inc"
+%endif
+section	.text code align=64
+
+
+EXTERN	OPENSSL_ia32cap_P
+
+global	bn_mul_mont
+
+ALIGN	16
+bn_mul_mont:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_bn_mul_mont:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+	mov	r9,QWORD[48+rsp]
+
+
+
+	mov	r9d,r9d
+	mov	rax,rsp
+
+	test	r9d,3
+	jnz	NEAR $L$mul_enter
+	cmp	r9d,8
+	jb	NEAR $L$mul_enter
+	lea	r11,[OPENSSL_ia32cap_P]
+	mov	r11d,DWORD[8+r11]
+	cmp	rdx,rsi
+	jne	NEAR $L$mul4x_enter
+	test	r9d,7
+	jz	NEAR $L$sqr8x_enter
+	jmp	NEAR $L$mul4x_enter
+
+ALIGN	16
+$L$mul_enter:
+	push	rbx
+
+	push	rbp
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+
+	neg	r9
+	mov	r11,rsp
+	lea	r10,[((-16))+r9*8+rsp]
+	neg	r9
+	and	r10,-1024
+
+
+
+
+
+
+
+
+
+	sub	r11,r10
+	and	r11,-4096
+	lea	rsp,[r11*1+r10]
+	mov	r11,QWORD[rsp]
+	cmp	rsp,r10
+	ja	NEAR $L$mul_page_walk
+	jmp	NEAR $L$mul_page_walk_done
+
+ALIGN	16
+$L$mul_page_walk:
+	lea	rsp,[((-4096))+rsp]
+	mov	r11,QWORD[rsp]
+	cmp	rsp,r10
+	ja	NEAR $L$mul_page_walk
+$L$mul_page_walk_done:
+
+	mov	QWORD[8+r9*8+rsp],rax
+
+$L$mul_body:
+	mov	r12,rdx
+	mov	r8,QWORD[r8]
+	mov	rbx,QWORD[r12]
+	mov	rax,QWORD[rsi]
+
+	xor	r14,r14
+	xor	r15,r15
+
+	mov	rbp,r8
+	mul	rbx
+	mov	r10,rax
+	mov	rax,QWORD[rcx]
+
+	imul	rbp,r10
+	mov	r11,rdx
+
+	mul	rbp
+	add	r10,rax
+	mov	rax,QWORD[8+rsi]
+	adc	rdx,0
+	mov	r13,rdx
+
+	lea	r15,[1+r15]
+	jmp	NEAR $L$1st_enter
+
+ALIGN	16
+$L$1st:
+	add	r13,rax
+	mov	rax,QWORD[r15*8+rsi]
+	adc	rdx,0
+	add	r13,r11
+	mov	r11,r10
+	adc	rdx,0
+	mov	QWORD[((-16))+r15*8+rsp],r13
+	mov	r13,rdx
+
+$L$1st_enter:
+	mul	rbx
+	add	r11,rax
+	mov	rax,QWORD[r15*8+rcx]
+	adc	rdx,0
+	lea	r15,[1+r15]
+	mov	r10,rdx
+
+	mul	rbp
+	cmp	r15,r9
+	jne	NEAR $L$1st
+
+	add	r13,rax
+	mov	rax,QWORD[rsi]
+	adc	rdx,0
+	add	r13,r11
+	adc	rdx,0
+	mov	QWORD[((-16))+r15*8+rsp],r13
+	mov	r13,rdx
+	mov	r11,r10
+
+	xor	rdx,rdx
+	add	r13,r11
+	adc	rdx,0
+	mov	QWORD[((-8))+r9*8+rsp],r13
+	mov	QWORD[r9*8+rsp],rdx
+
+	lea	r14,[1+r14]
+	jmp	NEAR $L$outer
+ALIGN	16
+$L$outer:
+	mov	rbx,QWORD[r14*8+r12]
+	xor	r15,r15
+	mov	rbp,r8
+	mov	r10,QWORD[rsp]
+	mul	rbx
+	add	r10,rax
+	mov	rax,QWORD[rcx]
+	adc	rdx,0
+
+	imul	rbp,r10
+	mov	r11,rdx
+
+	mul	rbp
+	add	r10,rax
+	mov	rax,QWORD[8+rsi]
+	adc	rdx,0
+	mov	r10,QWORD[8+rsp]
+	mov	r13,rdx
+
+	lea	r15,[1+r15]
+	jmp	NEAR $L$inner_enter
+
+ALIGN	16
+$L$inner:
+	add	r13,rax
+	mov	rax,QWORD[r15*8+rsi]
+	adc	rdx,0
+	add	r13,r10
+	mov	r10,QWORD[r15*8+rsp]
+	adc	rdx,0
+	mov	QWORD[((-16))+r15*8+rsp],r13
+	mov	r13,rdx
+
+$L$inner_enter:
+	mul	rbx
+	add	r11,rax
+	mov	rax,QWORD[r15*8+rcx]
+	adc	rdx,0
+	add	r10,r11
+	mov	r11,rdx
+	adc	r11,0
+	lea	r15,[1+r15]
+
+	mul	rbp
+	cmp	r15,r9
+	jne	NEAR $L$inner
+
+	add	r13,rax
+	mov	rax,QWORD[rsi]
+	adc	rdx,0
+	add	r13,r10
+	mov	r10,QWORD[r15*8+rsp]
+	adc	rdx,0
+	mov	QWORD[((-16))+r15*8+rsp],r13
+	mov	r13,rdx
+
+	xor	rdx,rdx
+	add	r13,r11
+	adc	rdx,0
+	add	r13,r10
+	adc	rdx,0
+	mov	QWORD[((-8))+r9*8+rsp],r13
+	mov	QWORD[r9*8+rsp],rdx
+
+	lea	r14,[1+r14]
+	cmp	r14,r9
+	jb	NEAR $L$outer
+
+	xor	r14,r14
+	mov	rax,QWORD[rsp]
+	mov	r15,r9
+
+ALIGN	16
+$L$sub:	sbb	rax,QWORD[r14*8+rcx]
+	mov	QWORD[r14*8+rdi],rax
+	mov	rax,QWORD[8+r14*8+rsp]
+	lea	r14,[1+r14]
+	dec	r15
+	jnz	NEAR $L$sub
+
+	sbb	rax,0
+	mov	rbx,-1
+	xor	rbx,rax
+	xor	r14,r14
+	mov	r15,r9
+
+$L$copy:
+	mov	rcx,QWORD[r14*8+rdi]
+	mov	rdx,QWORD[r14*8+rsp]
+	and	rcx,rbx
+	and	rdx,rax
+	mov	QWORD[r14*8+rsp],r9
+	or	rdx,rcx
+	mov	QWORD[r14*8+rdi],rdx
+	lea	r14,[1+r14]
+	sub	r15,1
+	jnz	NEAR $L$copy
+
+	mov	rsi,QWORD[8+r9*8+rsp]
+
+	mov	rax,1
+	mov	r15,QWORD[((-48))+rsi]
+
+	mov	r14,QWORD[((-40))+rsi]
+
+	mov	r13,QWORD[((-32))+rsi]
+
+	mov	r12,QWORD[((-24))+rsi]
+
+	mov	rbp,QWORD[((-16))+rsi]
+
+	mov	rbx,QWORD[((-8))+rsi]
+
+	lea	rsp,[rsi]
+
+$L$mul_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_bn_mul_mont:
+
+ALIGN	16
+bn_mul4x_mont:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_bn_mul4x_mont:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+	mov	r9,QWORD[48+rsp]
+
+
+
+	mov	r9d,r9d
+	mov	rax,rsp
+
+$L$mul4x_enter:
+	and	r11d,0x80100
+	cmp	r11d,0x80100
+	je	NEAR $L$mulx4x_enter
+	push	rbx
+
+	push	rbp
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+
+	neg	r9
+	mov	r11,rsp
+	lea	r10,[((-32))+r9*8+rsp]
+	neg	r9
+	and	r10,-1024
+
+	sub	r11,r10
+	and	r11,-4096
+	lea	rsp,[r11*1+r10]
+	mov	r11,QWORD[rsp]
+	cmp	rsp,r10
+	ja	NEAR $L$mul4x_page_walk
+	jmp	NEAR $L$mul4x_page_walk_done
+
+$L$mul4x_page_walk:
+	lea	rsp,[((-4096))+rsp]
+	mov	r11,QWORD[rsp]
+	cmp	rsp,r10
+	ja	NEAR $L$mul4x_page_walk
+$L$mul4x_page_walk_done:
+
+	mov	QWORD[8+r9*8+rsp],rax
+
+$L$mul4x_body:
+	mov	QWORD[16+r9*8+rsp],rdi
+	mov	r12,rdx
+	mov	r8,QWORD[r8]
+	mov	rbx,QWORD[r12]
+	mov	rax,QWORD[rsi]
+
+	xor	r14,r14
+	xor	r15,r15
+
+	mov	rbp,r8
+	mul	rbx
+	mov	r10,rax
+	mov	rax,QWORD[rcx]
+
+	imul	rbp,r10
+	mov	r11,rdx
+
+	mul	rbp
+	add	r10,rax
+	mov	rax,QWORD[8+rsi]
+	adc	rdx,0
+	mov	rdi,rdx
+
+	mul	rbx
+	add	r11,rax
+	mov	rax,QWORD[8+rcx]
+	adc	rdx,0
+	mov	r10,rdx
+
+	mul	rbp
+	add	rdi,rax
+	mov	rax,QWORD[16+rsi]
+	adc	rdx,0
+	add	rdi,r11
+	lea	r15,[4+r15]
+	adc	rdx,0
+	mov	QWORD[rsp],rdi
+	mov	r13,rdx
+	jmp	NEAR $L$1st4x
+ALIGN	16
+$L$1st4x:
+	mul	rbx
+	add	r10,rax
+	mov	rax,QWORD[((-16))+r15*8+rcx]
+	adc	rdx,0
+	mov	r11,rdx
+
+	mul	rbp
+	add	r13,rax
+	mov	rax,QWORD[((-8))+r15*8+rsi]
+	adc	rdx,0
+	add	r13,r10
+	adc	rdx,0
+	mov	QWORD[((-24))+r15*8+rsp],r13
+	mov	rdi,rdx
+
+	mul	rbx
+	add	r11,rax
+	mov	rax,QWORD[((-8))+r15*8+rcx]
+	adc	rdx,0
+	mov	r10,rdx
+
+	mul	rbp
+	add	rdi,rax
+	mov	rax,QWORD[r15*8+rsi]
+	adc	rdx,0
+	add	rdi,r11
+	adc	rdx,0
+	mov	QWORD[((-16))+r15*8+rsp],rdi
+	mov	r13,rdx
+
+	mul	rbx
+	add	r10,rax
+	mov	rax,QWORD[r15*8+rcx]
+	adc	rdx,0
+	mov	r11,rdx
+
+	mul	rbp
+	add	r13,rax
+	mov	rax,QWORD[8+r15*8+rsi]
+	adc	rdx,0
+	add	r13,r10
+	adc	rdx,0
+	mov	QWORD[((-8))+r15*8+rsp],r13
+	mov	rdi,rdx
+
+	mul	rbx
+	add	r11,rax
+	mov	rax,QWORD[8+r15*8+rcx]
+	adc	rdx,0
+	lea	r15,[4+r15]
+	mov	r10,rdx
+
+	mul	rbp
+	add	rdi,rax
+	mov	rax,QWORD[((-16))+r15*8+rsi]
+	adc	rdx,0
+	add	rdi,r11
+	adc	rdx,0
+	mov	QWORD[((-32))+r15*8+rsp],rdi
+	mov	r13,rdx
+	cmp	r15,r9
+	jb	NEAR $L$1st4x
+
+	mul	rbx
+	add	r10,rax
+	mov	rax,QWORD[((-16))+r15*8+rcx]
+	adc	rdx,0
+	mov	r11,rdx
+
+	mul	rbp
+	add	r13,rax
+	mov	rax,QWORD[((-8))+r15*8+rsi]
+	adc	rdx,0
+	add	r13,r10
+	adc	rdx,0
+	mov	QWORD[((-24))+r15*8+rsp],r13
+	mov	rdi,rdx
+
+	mul	rbx
+	add	r11,rax
+	mov	rax,QWORD[((-8))+r15*8+rcx]
+	adc	rdx,0
+	mov	r10,rdx
+
+	mul	rbp
+	add	rdi,rax
+	mov	rax,QWORD[rsi]
+	adc	rdx,0
+	add	rdi,r11
+	adc	rdx,0
+	mov	QWORD[((-16))+r15*8+rsp],rdi
+	mov	r13,rdx
+
+	xor	rdi,rdi
+	add	r13,r10
+	adc	rdi,0
+	mov	QWORD[((-8))+r15*8+rsp],r13
+	mov	QWORD[r15*8+rsp],rdi
+
+	lea	r14,[1+r14]
+ALIGN	4
+$L$outer4x:
+	mov	rbx,QWORD[r14*8+r12]
+	xor	r15,r15
+	mov	r10,QWORD[rsp]
+	mov	rbp,r8
+	mul	rbx
+	add	r10,rax
+	mov	rax,QWORD[rcx]
+	adc	rdx,0
+
+	imul	rbp,r10
+	mov	r11,rdx
+
+	mul	rbp
+	add	r10,rax
+	mov	rax,QWORD[8+rsi]
+	adc	rdx,0
+	mov	rdi,rdx
+
+	mul	rbx
+	add	r11,rax
+	mov	rax,QWORD[8+rcx]
+	adc	rdx,0
+	add	r11,QWORD[8+rsp]
+	adc	rdx,0
+	mov	r10,rdx
+
+	mul	rbp
+	add	rdi,rax
+	mov	rax,QWORD[16+rsi]
+	adc	rdx,0
+	add	rdi,r11
+	lea	r15,[4+r15]
+	adc	rdx,0
+	mov	QWORD[rsp],rdi
+	mov	r13,rdx
+	jmp	NEAR $L$inner4x
+ALIGN	16
+$L$inner4x:
+	mul	rbx
+	add	r10,rax
+	mov	rax,QWORD[((-16))+r15*8+rcx]
+	adc	rdx,0
+	add	r10,QWORD[((-16))+r15*8+rsp]
+	adc	rdx,0
+	mov	r11,rdx
+
+	mul	rbp
+	add	r13,rax
+	mov	rax,QWORD[((-8))+r15*8+rsi]
+	adc	rdx,0
+	add	r13,r10
+	adc	rdx,0
+	mov	QWORD[((-24))+r15*8+rsp],r13
+	mov	rdi,rdx
+
+	mul	rbx
+	add	r11,rax
+	mov	rax,QWORD[((-8))+r15*8+rcx]
+	adc	rdx,0
+	add	r11,QWORD[((-8))+r15*8+rsp]
+	adc	rdx,0
+	mov	r10,rdx
+
+	mul	rbp
+	add	rdi,rax
+	mov	rax,QWORD[r15*8+rsi]
+	adc	rdx,0
+	add	rdi,r11
+	adc	rdx,0
+	mov	QWORD[((-16))+r15*8+rsp],rdi
+	mov	r13,rdx
+
+	mul	rbx
+	add	r10,rax
+	mov	rax,QWORD[r15*8+rcx]
+	adc	rdx,0
+	add	r10,QWORD[r15*8+rsp]
+	adc	rdx,0
+	mov	r11,rdx
+
+	mul	rbp
+	add	r13,rax
+	mov	rax,QWORD[8+r15*8+rsi]
+	adc	rdx,0
+	add	r13,r10
+	adc	rdx,0
+	mov	QWORD[((-8))+r15*8+rsp],r13
+	mov	rdi,rdx
+
+	mul	rbx
+	add	r11,rax
+	mov	rax,QWORD[8+r15*8+rcx]
+	adc	rdx,0
+	add	r11,QWORD[8+r15*8+rsp]
+	adc	rdx,0
+	lea	r15,[4+r15]
+	mov	r10,rdx
+
+	mul	rbp
+	add	rdi,rax
+	mov	rax,QWORD[((-16))+r15*8+rsi]
+	adc	rdx,0
+	add	rdi,r11
+	adc	rdx,0
+	mov	QWORD[((-32))+r15*8+rsp],rdi
+	mov	r13,rdx
+	cmp	r15,r9
+	jb	NEAR $L$inner4x
+
+	mul	rbx
+	add	r10,rax
+	mov	rax,QWORD[((-16))+r15*8+rcx]
+	adc	rdx,0
+	add	r10,QWORD[((-16))+r15*8+rsp]
+	adc	rdx,0
+	mov	r11,rdx
+
+	mul	rbp
+	add	r13,rax
+	mov	rax,QWORD[((-8))+r15*8+rsi]
+	adc	rdx,0
+	add	r13,r10
+	adc	rdx,0
+	mov	QWORD[((-24))+r15*8+rsp],r13
+	mov	rdi,rdx
+
+	mul	rbx
+	add	r11,rax
+	mov	rax,QWORD[((-8))+r15*8+rcx]
+	adc	rdx,0
+	add	r11,QWORD[((-8))+r15*8+rsp]
+	adc	rdx,0
+	lea	r14,[1+r14]
+	mov	r10,rdx
+
+	mul	rbp
+	add	rdi,rax
+	mov	rax,QWORD[rsi]
+	adc	rdx,0
+	add	rdi,r11
+	adc	rdx,0
+	mov	QWORD[((-16))+r15*8+rsp],rdi
+	mov	r13,rdx
+
+	xor	rdi,rdi
+	add	r13,r10
+	adc	rdi,0
+	add	r13,QWORD[r9*8+rsp]
+	adc	rdi,0
+	mov	QWORD[((-8))+r15*8+rsp],r13
+	mov	QWORD[r15*8+rsp],rdi
+
+	cmp	r14,r9
+	jb	NEAR $L$outer4x
+	mov	rdi,QWORD[16+r9*8+rsp]
+	lea	r15,[((-4))+r9]
+	mov	rax,QWORD[rsp]
+	mov	rdx,QWORD[8+rsp]
+	shr	r15,2
+	lea	rsi,[rsp]
+	xor	r14,r14
+
+	sub	rax,QWORD[rcx]
+	mov	rbx,QWORD[16+rsi]
+	mov	rbp,QWORD[24+rsi]
+	sbb	rdx,QWORD[8+rcx]
+
+$L$sub4x:
+	mov	QWORD[r14*8+rdi],rax
+	mov	QWORD[8+r14*8+rdi],rdx
+	sbb	rbx,QWORD[16+r14*8+rcx]
+	mov	rax,QWORD[32+r14*8+rsi]
+	mov	rdx,QWORD[40+r14*8+rsi]
+	sbb	rbp,QWORD[24+r14*8+rcx]
+	mov	QWORD[16+r14*8+rdi],rbx
+	mov	QWORD[24+r14*8+rdi],rbp
+	sbb	rax,QWORD[32+r14*8+rcx]
+	mov	rbx,QWORD[48+r14*8+rsi]
+	mov	rbp,QWORD[56+r14*8+rsi]
+	sbb	rdx,QWORD[40+r14*8+rcx]
+	lea	r14,[4+r14]
+	dec	r15
+	jnz	NEAR $L$sub4x
+
+	mov	QWORD[r14*8+rdi],rax
+	mov	rax,QWORD[32+r14*8+rsi]
+	sbb	rbx,QWORD[16+r14*8+rcx]
+	mov	QWORD[8+r14*8+rdi],rdx
+	sbb	rbp,QWORD[24+r14*8+rcx]
+	mov	QWORD[16+r14*8+rdi],rbx
+
+	sbb	rax,0
+	mov	QWORD[24+r14*8+rdi],rbp
+	pxor	xmm0,xmm0
+DB	102,72,15,110,224
+	pcmpeqd	xmm5,xmm5
+	pshufd	xmm4,xmm4,0
+	mov	r15,r9
+	pxor	xmm5,xmm4
+	shr	r15,2
+	xor	eax,eax
+
+	jmp	NEAR $L$copy4x
+ALIGN	16
+$L$copy4x:
+	movdqa	xmm1,XMMWORD[rax*1+rsp]
+	movdqu	xmm2,XMMWORD[rax*1+rdi]
+	pand	xmm1,xmm4
+	pand	xmm2,xmm5
+	movdqa	xmm3,XMMWORD[16+rax*1+rsp]
+	movdqa	XMMWORD[rax*1+rsp],xmm0
+	por	xmm1,xmm2
+	movdqu	xmm2,XMMWORD[16+rax*1+rdi]
+	movdqu	XMMWORD[rax*1+rdi],xmm1
+	pand	xmm3,xmm4
+	pand	xmm2,xmm5
+	movdqa	XMMWORD[16+rax*1+rsp],xmm0
+	por	xmm3,xmm2
+	movdqu	XMMWORD[16+rax*1+rdi],xmm3
+	lea	rax,[32+rax]
+	dec	r15
+	jnz	NEAR $L$copy4x
+	mov	rsi,QWORD[8+r9*8+rsp]
+
+	mov	rax,1
+	mov	r15,QWORD[((-48))+rsi]
+
+	mov	r14,QWORD[((-40))+rsi]
+
+	mov	r13,QWORD[((-32))+rsi]
+
+	mov	r12,QWORD[((-24))+rsi]
+
+	mov	rbp,QWORD[((-16))+rsi]
+
+	mov	rbx,QWORD[((-8))+rsi]
+
+	lea	rsp,[rsi]
+
+$L$mul4x_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_bn_mul4x_mont:
+EXTERN	bn_sqrx8x_internal
+EXTERN	bn_sqr8x_internal
+
+
+ALIGN	32
+bn_sqr8x_mont:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_bn_sqr8x_mont:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+	mov	r9,QWORD[48+rsp]
+
+
+
+	mov	rax,rsp
+
+$L$sqr8x_enter:
+	push	rbx
+
+	push	rbp
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+$L$sqr8x_prologue:
+
+	mov	r10d,r9d
+	shl	r9d,3
+	shl	r10,3+2
+	neg	r9
+
+
+
+
+
+
+	lea	r11,[((-64))+r9*2+rsp]
+	mov	rbp,rsp
+	mov	r8,QWORD[r8]
+	sub	r11,rsi
+	and	r11,4095
+	cmp	r10,r11
+	jb	NEAR $L$sqr8x_sp_alt
+	sub	rbp,r11
+	lea	rbp,[((-64))+r9*2+rbp]
+	jmp	NEAR $L$sqr8x_sp_done
+
+ALIGN	32
+$L$sqr8x_sp_alt:
+	lea	r10,[((4096-64))+r9*2]
+	lea	rbp,[((-64))+r9*2+rbp]
+	sub	r11,r10
+	mov	r10,0
+	cmovc	r11,r10
+	sub	rbp,r11
+$L$sqr8x_sp_done:
+	and	rbp,-64
+	mov	r11,rsp
+	sub	r11,rbp
+	and	r11,-4096
+	lea	rsp,[rbp*1+r11]
+	mov	r10,QWORD[rsp]
+	cmp	rsp,rbp
+	ja	NEAR $L$sqr8x_page_walk
+	jmp	NEAR $L$sqr8x_page_walk_done
+
+ALIGN	16
+$L$sqr8x_page_walk:
+	lea	rsp,[((-4096))+rsp]
+	mov	r10,QWORD[rsp]
+	cmp	rsp,rbp
+	ja	NEAR $L$sqr8x_page_walk
+$L$sqr8x_page_walk_done:
+
+	mov	r10,r9
+	neg	r9
+
+	mov	QWORD[32+rsp],r8
+	mov	QWORD[40+rsp],rax
+
+$L$sqr8x_body:
+
+DB	102,72,15,110,209
+	pxor	xmm0,xmm0
+DB	102,72,15,110,207
+DB	102,73,15,110,218
+	lea	rax,[OPENSSL_ia32cap_P]
+	mov	eax,DWORD[8+rax]
+	and	eax,0x80100
+	cmp	eax,0x80100
+	jne	NEAR $L$sqr8x_nox
+
+	call	bn_sqrx8x_internal
+
+
+
+
+	lea	rbx,[rcx*1+r8]
+	mov	r9,rcx
+	mov	rdx,rcx
+DB	102,72,15,126,207
+	sar	rcx,3+2
+	jmp	NEAR $L$sqr8x_sub
+
+ALIGN	32
+$L$sqr8x_nox:
+	call	bn_sqr8x_internal
+
+
+
+
+	lea	rbx,[r9*1+rdi]
+	mov	rcx,r9
+	mov	rdx,r9
+DB	102,72,15,126,207
+	sar	rcx,3+2
+	jmp	NEAR $L$sqr8x_sub
+
+ALIGN	32
+$L$sqr8x_sub:
+	mov	r12,QWORD[rbx]
+	mov	r13,QWORD[8+rbx]
+	mov	r14,QWORD[16+rbx]
+	mov	r15,QWORD[24+rbx]
+	lea	rbx,[32+rbx]
+	sbb	r12,QWORD[rbp]
+	sbb	r13,QWORD[8+rbp]
+	sbb	r14,QWORD[16+rbp]
+	sbb	r15,QWORD[24+rbp]
+	lea	rbp,[32+rbp]
+	mov	QWORD[rdi],r12
+	mov	QWORD[8+rdi],r13
+	mov	QWORD[16+rdi],r14
+	mov	QWORD[24+rdi],r15
+	lea	rdi,[32+rdi]
+	inc	rcx
+	jnz	NEAR $L$sqr8x_sub
+
+	sbb	rax,0
+	lea	rbx,[r9*1+rbx]
+	lea	rdi,[r9*1+rdi]
+
+DB	102,72,15,110,200
+	pxor	xmm0,xmm0
+	pshufd	xmm1,xmm1,0
+	mov	rsi,QWORD[40+rsp]
+
+	jmp	NEAR $L$sqr8x_cond_copy
+
+ALIGN	32
+$L$sqr8x_cond_copy:
+	movdqa	xmm2,XMMWORD[rbx]
+	movdqa	xmm3,XMMWORD[16+rbx]
+	lea	rbx,[32+rbx]
+	movdqu	xmm4,XMMWORD[rdi]
+	movdqu	xmm5,XMMWORD[16+rdi]
+	lea	rdi,[32+rdi]
+	movdqa	XMMWORD[(-32)+rbx],xmm0
+	movdqa	XMMWORD[(-16)+rbx],xmm0
+	movdqa	XMMWORD[(-32)+rdx*1+rbx],xmm0
+	movdqa	XMMWORD[(-16)+rdx*1+rbx],xmm0
+	pcmpeqd	xmm0,xmm1
+	pand	xmm2,xmm1
+	pand	xmm3,xmm1
+	pand	xmm4,xmm0
+	pand	xmm5,xmm0
+	pxor	xmm0,xmm0
+	por	xmm4,xmm2
+	por	xmm5,xmm3
+	movdqu	XMMWORD[(-32)+rdi],xmm4
+	movdqu	XMMWORD[(-16)+rdi],xmm5
+	add	r9,32
+	jnz	NEAR $L$sqr8x_cond_copy
+
+	mov	rax,1
+	mov	r15,QWORD[((-48))+rsi]
+
+	mov	r14,QWORD[((-40))+rsi]
+
+	mov	r13,QWORD[((-32))+rsi]
+
+	mov	r12,QWORD[((-24))+rsi]
+
+	mov	rbp,QWORD[((-16))+rsi]
+
+	mov	rbx,QWORD[((-8))+rsi]
+
+	lea	rsp,[rsi]
+
+$L$sqr8x_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_bn_sqr8x_mont:
+
+ALIGN	32
+bn_mulx4x_mont:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_bn_mulx4x_mont:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+	mov	r9,QWORD[48+rsp]
+
+
+
+	mov	rax,rsp
+
+$L$mulx4x_enter:
+	push	rbx
+
+	push	rbp
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+$L$mulx4x_prologue:
+
+	shl	r9d,3
+	xor	r10,r10
+	sub	r10,r9
+	mov	r8,QWORD[r8]
+	lea	rbp,[((-72))+r10*1+rsp]
+	and	rbp,-128
+	mov	r11,rsp
+	sub	r11,rbp
+	and	r11,-4096
+	lea	rsp,[rbp*1+r11]
+	mov	r10,QWORD[rsp]
+	cmp	rsp,rbp
+	ja	NEAR $L$mulx4x_page_walk
+	jmp	NEAR $L$mulx4x_page_walk_done
+
+ALIGN	16
+$L$mulx4x_page_walk:
+	lea	rsp,[((-4096))+rsp]
+	mov	r10,QWORD[rsp]
+	cmp	rsp,rbp
+	ja	NEAR $L$mulx4x_page_walk
+$L$mulx4x_page_walk_done:
+
+	lea	r10,[r9*1+rdx]
+
+
+
+
+
+
+
+
+
+
+
+
+	mov	QWORD[rsp],r9
+	shr	r9,5
+	mov	QWORD[16+rsp],r10
+	sub	r9,1
+	mov	QWORD[24+rsp],r8
+	mov	QWORD[32+rsp],rdi
+	mov	QWORD[40+rsp],rax
+
+	mov	QWORD[48+rsp],r9
+	jmp	NEAR $L$mulx4x_body
+
+ALIGN	32
+$L$mulx4x_body:
+	lea	rdi,[8+rdx]
+	mov	rdx,QWORD[rdx]
+	lea	rbx,[((64+32))+rsp]
+	mov	r9,rdx
+
+	mulx	rax,r8,QWORD[rsi]
+	mulx	r14,r11,QWORD[8+rsi]
+	add	r11,rax
+	mov	QWORD[8+rsp],rdi
+	mulx	r13,r12,QWORD[16+rsi]
+	adc	r12,r14
+	adc	r13,0
+
+	mov	rdi,r8
+	imul	r8,QWORD[24+rsp]
+	xor	rbp,rbp
+
+	mulx	r14,rax,QWORD[24+rsi]
+	mov	rdx,r8
+	lea	rsi,[32+rsi]
+	adcx	r13,rax
+	adcx	r14,rbp
+
+	mulx	r10,rax,QWORD[rcx]
+	adcx	rdi,rax
+	adox	r10,r11
+	mulx	r11,rax,QWORD[8+rcx]
+	adcx	r10,rax
+	adox	r11,r12
+DB	0xc4,0x62,0xfb,0xf6,0xa1,0x10,0x00,0x00,0x00
+	mov	rdi,QWORD[48+rsp]
+	mov	QWORD[((-32))+rbx],r10
+	adcx	r11,rax
+	adox	r12,r13
+	mulx	r15,rax,QWORD[24+rcx]
+	mov	rdx,r9
+	mov	QWORD[((-24))+rbx],r11
+	adcx	r12,rax
+	adox	r15,rbp
+	lea	rcx,[32+rcx]
+	mov	QWORD[((-16))+rbx],r12
+
+	jmp	NEAR $L$mulx4x_1st
+
+ALIGN	32
+$L$mulx4x_1st:
+	adcx	r15,rbp
+	mulx	rax,r10,QWORD[rsi]
+	adcx	r10,r14
+	mulx	r14,r11,QWORD[8+rsi]
+	adcx	r11,rax
+	mulx	rax,r12,QWORD[16+rsi]
+	adcx	r12,r14
+	mulx	r14,r13,QWORD[24+rsi]
+DB	0x67,0x67
+	mov	rdx,r8
+	adcx	r13,rax
+	adcx	r14,rbp
+	lea	rsi,[32+rsi]
+	lea	rbx,[32+rbx]
+
+	adox	r10,r15
+	mulx	r15,rax,QWORD[rcx]
+	adcx	r10,rax
+	adox	r11,r15
+	mulx	r15,rax,QWORD[8+rcx]
+	adcx	r11,rax
+	adox	r12,r15
+	mulx	r15,rax,QWORD[16+rcx]
+	mov	QWORD[((-40))+rbx],r10
+	adcx	r12,rax
+	mov	QWORD[((-32))+rbx],r11
+	adox	r13,r15
+	mulx	r15,rax,QWORD[24+rcx]
+	mov	rdx,r9
+	mov	QWORD[((-24))+rbx],r12
+	adcx	r13,rax
+	adox	r15,rbp
+	lea	rcx,[32+rcx]
+	mov	QWORD[((-16))+rbx],r13
+
+	dec	rdi
+	jnz	NEAR $L$mulx4x_1st
+
+	mov	rax,QWORD[rsp]
+	mov	rdi,QWORD[8+rsp]
+	adc	r15,rbp
+	add	r14,r15
+	sbb	r15,r15
+	mov	QWORD[((-8))+rbx],r14
+	jmp	NEAR $L$mulx4x_outer
+
+ALIGN	32
+$L$mulx4x_outer:
+	mov	rdx,QWORD[rdi]
+	lea	rdi,[8+rdi]
+	sub	rsi,rax
+	mov	QWORD[rbx],r15
+	lea	rbx,[((64+32))+rsp]
+	sub	rcx,rax
+
+	mulx	r11,r8,QWORD[rsi]
+	xor	ebp,ebp
+	mov	r9,rdx
+	mulx	r12,r14,QWORD[8+rsi]
+	adox	r8,QWORD[((-32))+rbx]
+	adcx	r11,r14
+	mulx	r13,r15,QWORD[16+rsi]
+	adox	r11,QWORD[((-24))+rbx]
+	adcx	r12,r15
+	adox	r12,QWORD[((-16))+rbx]
+	adcx	r13,rbp
+	adox	r13,rbp
+
+	mov	QWORD[8+rsp],rdi
+	mov	r15,r8
+	imul	r8,QWORD[24+rsp]
+	xor	ebp,ebp
+
+	mulx	r14,rax,QWORD[24+rsi]
+	mov	rdx,r8
+	adcx	r13,rax
+	adox	r13,QWORD[((-8))+rbx]
+	adcx	r14,rbp
+	lea	rsi,[32+rsi]
+	adox	r14,rbp
+
+	mulx	r10,rax,QWORD[rcx]
+	adcx	r15,rax
+	adox	r10,r11
+	mulx	r11,rax,QWORD[8+rcx]
+	adcx	r10,rax
+	adox	r11,r12
+	mulx	r12,rax,QWORD[16+rcx]
+	mov	QWORD[((-32))+rbx],r10
+	adcx	r11,rax
+	adox	r12,r13
+	mulx	r15,rax,QWORD[24+rcx]
+	mov	rdx,r9
+	mov	QWORD[((-24))+rbx],r11
+	lea	rcx,[32+rcx]
+	adcx	r12,rax
+	adox	r15,rbp
+	mov	rdi,QWORD[48+rsp]
+	mov	QWORD[((-16))+rbx],r12
+
+	jmp	NEAR $L$mulx4x_inner
+
+ALIGN	32
+$L$mulx4x_inner:
+	mulx	rax,r10,QWORD[rsi]
+	adcx	r15,rbp
+	adox	r10,r14
+	mulx	r14,r11,QWORD[8+rsi]
+	adcx	r10,QWORD[rbx]
+	adox	r11,rax
+	mulx	rax,r12,QWORD[16+rsi]
+	adcx	r11,QWORD[8+rbx]
+	adox	r12,r14
+	mulx	r14,r13,QWORD[24+rsi]
+	mov	rdx,r8
+	adcx	r12,QWORD[16+rbx]
+	adox	r13,rax
+	adcx	r13,QWORD[24+rbx]
+	adox	r14,rbp
+	lea	rsi,[32+rsi]
+	lea	rbx,[32+rbx]
+	adcx	r14,rbp
+
+	adox	r10,r15
+	mulx	r15,rax,QWORD[rcx]
+	adcx	r10,rax
+	adox	r11,r15
+	mulx	r15,rax,QWORD[8+rcx]
+	adcx	r11,rax
+	adox	r12,r15
+	mulx	r15,rax,QWORD[16+rcx]
+	mov	QWORD[((-40))+rbx],r10
+	adcx	r12,rax
+	adox	r13,r15
+	mulx	r15,rax,QWORD[24+rcx]
+	mov	rdx,r9
+	mov	QWORD[((-32))+rbx],r11
+	mov	QWORD[((-24))+rbx],r12
+	adcx	r13,rax
+	adox	r15,rbp
+	lea	rcx,[32+rcx]
+	mov	QWORD[((-16))+rbx],r13
+
+	dec	rdi
+	jnz	NEAR $L$mulx4x_inner
+
+	mov	rax,QWORD[rsp]
+	mov	rdi,QWORD[8+rsp]
+	adc	r15,rbp
+	sub	rbp,QWORD[rbx]
+	adc	r14,r15
+	sbb	r15,r15
+	mov	QWORD[((-8))+rbx],r14
+
+	cmp	rdi,QWORD[16+rsp]
+	jne	NEAR $L$mulx4x_outer
+
+	lea	rbx,[64+rsp]
+	sub	rcx,rax
+	neg	r15
+	mov	rdx,rax
+	shr	rax,3+2
+	mov	rdi,QWORD[32+rsp]
+	jmp	NEAR $L$mulx4x_sub
+
+ALIGN	32
+$L$mulx4x_sub:
+	mov	r11,QWORD[rbx]
+	mov	r12,QWORD[8+rbx]
+	mov	r13,QWORD[16+rbx]
+	mov	r14,QWORD[24+rbx]
+	lea	rbx,[32+rbx]
+	sbb	r11,QWORD[rcx]
+	sbb	r12,QWORD[8+rcx]
+	sbb	r13,QWORD[16+rcx]
+	sbb	r14,QWORD[24+rcx]
+	lea	rcx,[32+rcx]
+	mov	QWORD[rdi],r11
+	mov	QWORD[8+rdi],r12
+	mov	QWORD[16+rdi],r13
+	mov	QWORD[24+rdi],r14
+	lea	rdi,[32+rdi]
+	dec	rax
+	jnz	NEAR $L$mulx4x_sub
+
+	sbb	r15,0
+	lea	rbx,[64+rsp]
+	sub	rdi,rdx
+
+DB	102,73,15,110,207
+	pxor	xmm0,xmm0
+	pshufd	xmm1,xmm1,0
+	mov	rsi,QWORD[40+rsp]
+
+	jmp	NEAR $L$mulx4x_cond_copy
+
+ALIGN	32
+$L$mulx4x_cond_copy:
+	movdqa	xmm2,XMMWORD[rbx]
+	movdqa	xmm3,XMMWORD[16+rbx]
+	lea	rbx,[32+rbx]
+	movdqu	xmm4,XMMWORD[rdi]
+	movdqu	xmm5,XMMWORD[16+rdi]
+	lea	rdi,[32+rdi]
+	movdqa	XMMWORD[(-32)+rbx],xmm0
+	movdqa	XMMWORD[(-16)+rbx],xmm0
+	pcmpeqd	xmm0,xmm1
+	pand	xmm2,xmm1
+	pand	xmm3,xmm1
+	pand	xmm4,xmm0
+	pand	xmm5,xmm0
+	pxor	xmm0,xmm0
+	por	xmm4,xmm2
+	por	xmm5,xmm3
+	movdqu	XMMWORD[(-32)+rdi],xmm4
+	movdqu	XMMWORD[(-16)+rdi],xmm5
+	sub	rdx,32
+	jnz	NEAR $L$mulx4x_cond_copy
+
+	mov	QWORD[rbx],rdx
+
+	mov	rax,1
+	mov	r15,QWORD[((-48))+rsi]
+
+	mov	r14,QWORD[((-40))+rsi]
+
+	mov	r13,QWORD[((-32))+rsi]
+
+	mov	r12,QWORD[((-24))+rsi]
+
+	mov	rbp,QWORD[((-16))+rsi]
+
+	mov	rbx,QWORD[((-8))+rsi]
+
+	lea	rsp,[rsi]
+
+$L$mulx4x_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_bn_mulx4x_mont:
+DB	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
+DB	112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
+DB	54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83
+DB	32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
+DB	115,108,46,111,114,103,62,0
+ALIGN	16
+EXTERN	__imp_RtlVirtualUnwind
+
+ALIGN	16
+mul_handler:
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD[120+r8]
+	mov	rbx,QWORD[248+r8]
+
+	mov	rsi,QWORD[8+r9]
+	mov	r11,QWORD[56+r9]
+
+	mov	r10d,DWORD[r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jb	NEAR $L$common_seh_tail
+
+	mov	rax,QWORD[152+r8]
+
+	mov	r10d,DWORD[4+r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jae	NEAR $L$common_seh_tail
+
+	mov	r10,QWORD[192+r8]
+	mov	rax,QWORD[8+r10*8+rax]
+
+	jmp	NEAR $L$common_pop_regs
+
+
+
+ALIGN	16
+sqr_handler:
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD[120+r8]
+	mov	rbx,QWORD[248+r8]
+
+	mov	rsi,QWORD[8+r9]
+	mov	r11,QWORD[56+r9]
+
+	mov	r10d,DWORD[r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jb	NEAR $L$common_seh_tail
+
+	mov	r10d,DWORD[4+r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jb	NEAR $L$common_pop_regs
+
+	mov	rax,QWORD[152+r8]
+
+	mov	r10d,DWORD[8+r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jae	NEAR $L$common_seh_tail
+
+	mov	rax,QWORD[40+rax]
+
+$L$common_pop_regs:
+	mov	rbx,QWORD[((-8))+rax]
+	mov	rbp,QWORD[((-16))+rax]
+	mov	r12,QWORD[((-24))+rax]
+	mov	r13,QWORD[((-32))+rax]
+	mov	r14,QWORD[((-40))+rax]
+	mov	r15,QWORD[((-48))+rax]
+	mov	QWORD[144+r8],rbx
+	mov	QWORD[160+r8],rbp
+	mov	QWORD[216+r8],r12
+	mov	QWORD[224+r8],r13
+	mov	QWORD[232+r8],r14
+	mov	QWORD[240+r8],r15
+
+$L$common_seh_tail:
+	mov	rdi,QWORD[8+rax]
+	mov	rsi,QWORD[16+rax]
+	mov	QWORD[152+r8],rax
+	mov	QWORD[168+r8],rsi
+	mov	QWORD[176+r8],rdi
+
+	mov	rdi,QWORD[40+r9]
+	mov	rsi,r8
+	mov	ecx,154
+	DD	0xa548f3fc
+
+	mov	rsi,r9
+	xor	rcx,rcx
+	mov	rdx,QWORD[8+rsi]
+	mov	r8,QWORD[rsi]
+	mov	r9,QWORD[16+rsi]
+	mov	r10,QWORD[40+rsi]
+	lea	r11,[56+rsi]
+	lea	r12,[24+rsi]
+	mov	QWORD[32+rsp],r10
+	mov	QWORD[40+rsp],r11
+	mov	QWORD[48+rsp],r12
+	mov	QWORD[56+rsp],rcx
+	call	QWORD[__imp_RtlVirtualUnwind]
+
+	mov	eax,1
+	add	rsp,64
+	popfq
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rbp
+	pop	rbx
+	pop	rdi
+	pop	rsi
+	DB	0F3h,0C3h		;repret
+
+
+section	.pdata rdata align=4
+ALIGN	4
+	DD	$L$SEH_begin_bn_mul_mont wrt ..imagebase
+	DD	$L$SEH_end_bn_mul_mont wrt ..imagebase
+	DD	$L$SEH_info_bn_mul_mont wrt ..imagebase
+
+	DD	$L$SEH_begin_bn_mul4x_mont wrt ..imagebase
+	DD	$L$SEH_end_bn_mul4x_mont wrt ..imagebase
+	DD	$L$SEH_info_bn_mul4x_mont wrt ..imagebase
+
+	DD	$L$SEH_begin_bn_sqr8x_mont wrt ..imagebase
+	DD	$L$SEH_end_bn_sqr8x_mont wrt ..imagebase
+	DD	$L$SEH_info_bn_sqr8x_mont wrt ..imagebase
+	DD	$L$SEH_begin_bn_mulx4x_mont wrt ..imagebase
+	DD	$L$SEH_end_bn_mulx4x_mont wrt ..imagebase
+	DD	$L$SEH_info_bn_mulx4x_mont wrt ..imagebase
+section	.xdata rdata align=8
+ALIGN	8
+$L$SEH_info_bn_mul_mont:
+DB	9,0,0,0
+	DD	mul_handler wrt ..imagebase
+	DD	$L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase
+$L$SEH_info_bn_mul4x_mont:
+DB	9,0,0,0
+	DD	mul_handler wrt ..imagebase
+	DD	$L$mul4x_body wrt ..imagebase,$L$mul4x_epilogue wrt ..imagebase
+$L$SEH_info_bn_sqr8x_mont:
+DB	9,0,0,0
+	DD	sqr_handler wrt ..imagebase
+	DD	$L$sqr8x_prologue wrt ..imagebase,$L$sqr8x_body wrt ..imagebase,$L$sqr8x_epilogue wrt ..imagebase
+ALIGN	8
+$L$SEH_info_bn_mulx4x_mont:
+DB	9,0,0,0
+	DD	sqr_handler wrt ..imagebase
+	DD	$L$mulx4x_prologue wrt ..imagebase,$L$mulx4x_body wrt ..imagebase,$L$mulx4x_epilogue wrt ..imagebase
+ALIGN	8
diff --git a/deps/boringssl/win-x86_64/crypto/fipsmodule/x86_64-mont5.asm b/deps/boringssl/win-x86_64/crypto/fipsmodule/x86_64-mont5.asm
new file mode 100644
index 0000000..7a1d5db
--- /dev/null
+++ b/deps/boringssl/win-x86_64/crypto/fipsmodule/x86_64-mont5.asm
@@ -0,0 +1,4036 @@
+; This file is generated from a similarly-named Perl script in the BoringSSL
+; source tree. Do not edit by hand.
+
+default	rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+
+%ifdef BORINGSSL_PREFIX
+%include "boringssl_prefix_symbols_nasm.inc"
+%endif
+section	.text code align=64
+
+
+EXTERN	OPENSSL_ia32cap_P
+
+global	bn_mul_mont_gather5
+
+ALIGN	64
+bn_mul_mont_gather5:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_bn_mul_mont_gather5:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+	mov	r9,QWORD[48+rsp]
+
+
+
+	mov	r9d,r9d
+	mov	rax,rsp
+
+	test	r9d,7
+	jnz	NEAR $L$mul_enter
+	lea	r11,[OPENSSL_ia32cap_P]
+	mov	r11d,DWORD[8+r11]
+	jmp	NEAR $L$mul4x_enter
+
+ALIGN	16
+$L$mul_enter:
+	movd	xmm5,DWORD[56+rsp]
+	push	rbx
+
+	push	rbp
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+
+	neg	r9
+	mov	r11,rsp
+	lea	r10,[((-280))+r9*8+rsp]
+	neg	r9
+	and	r10,-1024
+
+
+
+
+
+
+
+
+
+	sub	r11,r10
+	and	r11,-4096
+	lea	rsp,[r11*1+r10]
+	mov	r11,QWORD[rsp]
+	cmp	rsp,r10
+	ja	NEAR $L$mul_page_walk
+	jmp	NEAR $L$mul_page_walk_done
+
+$L$mul_page_walk:
+	lea	rsp,[((-4096))+rsp]
+	mov	r11,QWORD[rsp]
+	cmp	rsp,r10
+	ja	NEAR $L$mul_page_walk
+$L$mul_page_walk_done:
+
+	lea	r10,[$L$inc]
+	mov	QWORD[8+r9*8+rsp],rax
+
+$L$mul_body:
+
+	lea	r12,[128+rdx]
+	movdqa	xmm0,XMMWORD[r10]
+	movdqa	xmm1,XMMWORD[16+r10]
+	lea	r10,[((24-112))+r9*8+rsp]
+	and	r10,-16
+
+	pshufd	xmm5,xmm5,0
+	movdqa	xmm4,xmm1
+	movdqa	xmm2,xmm1
+	paddd	xmm1,xmm0
+	pcmpeqd	xmm0,xmm5
+DB	0x67
+	movdqa	xmm3,xmm4
+	paddd	xmm2,xmm1
+	pcmpeqd	xmm1,xmm5
+	movdqa	XMMWORD[112+r10],xmm0
+	movdqa	xmm0,xmm4
+
+	paddd	xmm3,xmm2
+	pcmpeqd	xmm2,xmm5
+	movdqa	XMMWORD[128+r10],xmm1
+	movdqa	xmm1,xmm4
+
+	paddd	xmm0,xmm3
+	pcmpeqd	xmm3,xmm5
+	movdqa	XMMWORD[144+r10],xmm2
+	movdqa	xmm2,xmm4
+
+	paddd	xmm1,xmm0
+	pcmpeqd	xmm0,xmm5
+	movdqa	XMMWORD[160+r10],xmm3
+	movdqa	xmm3,xmm4
+	paddd	xmm2,xmm1
+	pcmpeqd	xmm1,xmm5
+	movdqa	XMMWORD[176+r10],xmm0
+	movdqa	xmm0,xmm4
+
+	paddd	xmm3,xmm2
+	pcmpeqd	xmm2,xmm5
+	movdqa	XMMWORD[192+r10],xmm1
+	movdqa	xmm1,xmm4
+
+	paddd	xmm0,xmm3
+	pcmpeqd	xmm3,xmm5
+	movdqa	XMMWORD[208+r10],xmm2
+	movdqa	xmm2,xmm4
+
+	paddd	xmm1,xmm0
+	pcmpeqd	xmm0,xmm5
+	movdqa	XMMWORD[224+r10],xmm3
+	movdqa	xmm3,xmm4
+	paddd	xmm2,xmm1
+	pcmpeqd	xmm1,xmm5
+	movdqa	XMMWORD[240+r10],xmm0
+	movdqa	xmm0,xmm4
+
+	paddd	xmm3,xmm2
+	pcmpeqd	xmm2,xmm5
+	movdqa	XMMWORD[256+r10],xmm1
+	movdqa	xmm1,xmm4
+
+	paddd	xmm0,xmm3
+	pcmpeqd	xmm3,xmm5
+	movdqa	XMMWORD[272+r10],xmm2
+	movdqa	xmm2,xmm4
+
+	paddd	xmm1,xmm0
+	pcmpeqd	xmm0,xmm5
+	movdqa	XMMWORD[288+r10],xmm3
+	movdqa	xmm3,xmm4
+	paddd	xmm2,xmm1
+	pcmpeqd	xmm1,xmm5
+	movdqa	XMMWORD[304+r10],xmm0
+
+	paddd	xmm3,xmm2
+DB	0x67
+	pcmpeqd	xmm2,xmm5
+	movdqa	XMMWORD[320+r10],xmm1
+
+	pcmpeqd	xmm3,xmm5
+	movdqa	XMMWORD[336+r10],xmm2
+	pand	xmm0,XMMWORD[64+r12]
+
+	pand	xmm1,XMMWORD[80+r12]
+	pand	xmm2,XMMWORD[96+r12]
+	movdqa	XMMWORD[352+r10],xmm3
+	pand	xmm3,XMMWORD[112+r12]
+	por	xmm0,xmm2
+	por	xmm1,xmm3
+	movdqa	xmm4,XMMWORD[((-128))+r12]
+	movdqa	xmm5,XMMWORD[((-112))+r12]
+	movdqa	xmm2,XMMWORD[((-96))+r12]
+	pand	xmm4,XMMWORD[112+r10]
+	movdqa	xmm3,XMMWORD[((-80))+r12]
+	pand	xmm5,XMMWORD[128+r10]
+	por	xmm0,xmm4
+	pand	xmm2,XMMWORD[144+r10]
+	por	xmm1,xmm5
+	pand	xmm3,XMMWORD[160+r10]
+	por	xmm0,xmm2
+	por	xmm1,xmm3
+	movdqa	xmm4,XMMWORD[((-64))+r12]
+	movdqa	xmm5,XMMWORD[((-48))+r12]
+	movdqa	xmm2,XMMWORD[((-32))+r12]
+	pand	xmm4,XMMWORD[176+r10]
+	movdqa	xmm3,XMMWORD[((-16))+r12]
+	pand	xmm5,XMMWORD[192+r10]
+	por	xmm0,xmm4
+	pand	xmm2,XMMWORD[208+r10]
+	por	xmm1,xmm5
+	pand	xmm3,XMMWORD[224+r10]
+	por	xmm0,xmm2
+	por	xmm1,xmm3
+	movdqa	xmm4,XMMWORD[r12]
+	movdqa	xmm5,XMMWORD[16+r12]
+	movdqa	xmm2,XMMWORD[32+r12]
+	pand	xmm4,XMMWORD[240+r10]
+	movdqa	xmm3,XMMWORD[48+r12]
+	pand	xmm5,XMMWORD[256+r10]
+	por	xmm0,xmm4
+	pand	xmm2,XMMWORD[272+r10]
+	por	xmm1,xmm5
+	pand	xmm3,XMMWORD[288+r10]
+	por	xmm0,xmm2
+	por	xmm1,xmm3
+	por	xmm0,xmm1
+	pshufd	xmm1,xmm0,0x4e
+	por	xmm0,xmm1
+	lea	r12,[256+r12]
+DB	102,72,15,126,195
+
+	mov	r8,QWORD[r8]
+	mov	rax,QWORD[rsi]
+
+	xor	r14,r14
+	xor	r15,r15
+
+	mov	rbp,r8
+	mul	rbx
+	mov	r10,rax
+	mov	rax,QWORD[rcx]
+
+	imul	rbp,r10
+	mov	r11,rdx
+
+	mul	rbp
+	add	r10,rax
+	mov	rax,QWORD[8+rsi]
+	adc	rdx,0
+	mov	r13,rdx
+
+	lea	r15,[1+r15]
+	jmp	NEAR $L$1st_enter
+
+ALIGN	16
+$L$1st:
+	add	r13,rax
+	mov	rax,QWORD[r15*8+rsi]
+	adc	rdx,0
+	add	r13,r11
+	mov	r11,r10
+	adc	rdx,0
+	mov	QWORD[((-16))+r15*8+rsp],r13
+	mov	r13,rdx
+
+$L$1st_enter:
+	mul	rbx
+	add	r11,rax
+	mov	rax,QWORD[r15*8+rcx]
+	adc	rdx,0
+	lea	r15,[1+r15]
+	mov	r10,rdx
+
+	mul	rbp
+	cmp	r15,r9
+	jne	NEAR $L$1st
+
+
+	add	r13,rax
+	adc	rdx,0
+	add	r13,r11
+	adc	rdx,0
+	mov	QWORD[((-16))+r9*8+rsp],r13
+	mov	r13,rdx
+	mov	r11,r10
+
+	xor	rdx,rdx
+	add	r13,r11
+	adc	rdx,0
+	mov	QWORD[((-8))+r9*8+rsp],r13
+	mov	QWORD[r9*8+rsp],rdx
+
+	lea	r14,[1+r14]
+	jmp	NEAR $L$outer
+ALIGN	16
+$L$outer:
+	lea	rdx,[((24+128))+r9*8+rsp]
+	and	rdx,-16
+	pxor	xmm4,xmm4
+	pxor	xmm5,xmm5
+	movdqa	xmm0,XMMWORD[((-128))+r12]
+	movdqa	xmm1,XMMWORD[((-112))+r12]
+	movdqa	xmm2,XMMWORD[((-96))+r12]
+	movdqa	xmm3,XMMWORD[((-80))+r12]
+	pand	xmm0,XMMWORD[((-128))+rdx]
+	pand	xmm1,XMMWORD[((-112))+rdx]
+	por	xmm4,xmm0
+	pand	xmm2,XMMWORD[((-96))+rdx]
+	por	xmm5,xmm1
+	pand	xmm3,XMMWORD[((-80))+rdx]
+	por	xmm4,xmm2
+	por	xmm5,xmm3
+	movdqa	xmm0,XMMWORD[((-64))+r12]
+	movdqa	xmm1,XMMWORD[((-48))+r12]
+	movdqa	xmm2,XMMWORD[((-32))+r12]
+	movdqa	xmm3,XMMWORD[((-16))+r12]
+	pand	xmm0,XMMWORD[((-64))+rdx]
+	pand	xmm1,XMMWORD[((-48))+rdx]
+	por	xmm4,xmm0
+	pand	xmm2,XMMWORD[((-32))+rdx]
+	por	xmm5,xmm1
+	pand	xmm3,XMMWORD[((-16))+rdx]
+	por	xmm4,xmm2
+	por	xmm5,xmm3
+	movdqa	xmm0,XMMWORD[r12]
+	movdqa	xmm1,XMMWORD[16+r12]
+	movdqa	xmm2,XMMWORD[32+r12]
+	movdqa	xmm3,XMMWORD[48+r12]
+	pand	xmm0,XMMWORD[rdx]
+	pand	xmm1,XMMWORD[16+rdx]
+	por	xmm4,xmm0
+	pand	xmm2,XMMWORD[32+rdx]
+	por	xmm5,xmm1
+	pand	xmm3,XMMWORD[48+rdx]
+	por	xmm4,xmm2
+	por	xmm5,xmm3
+	movdqa	xmm0,XMMWORD[64+r12]
+	movdqa	xmm1,XMMWORD[80+r12]
+	movdqa	xmm2,XMMWORD[96+r12]
+	movdqa	xmm3,XMMWORD[112+r12]
+	pand	xmm0,XMMWORD[64+rdx]
+	pand	xmm1,XMMWORD[80+rdx]
+	por	xmm4,xmm0
+	pand	xmm2,XMMWORD[96+rdx]
+	por	xmm5,xmm1
+	pand	xmm3,XMMWORD[112+rdx]
+	por	xmm4,xmm2
+	por	xmm5,xmm3
+	por	xmm4,xmm5
+	pshufd	xmm0,xmm4,0x4e
+	por	xmm0,xmm4
+	lea	r12,[256+r12]
+
+	mov	rax,QWORD[rsi]
+DB	102,72,15,126,195
+
+	xor	r15,r15
+	mov	rbp,r8
+	mov	r10,QWORD[rsp]
+
+	mul	rbx
+	add	r10,rax
+	mov	rax,QWORD[rcx]
+	adc	rdx,0
+
+	imul	rbp,r10
+	mov	r11,rdx
+
+	mul	rbp
+	add	r10,rax
+	mov	rax,QWORD[8+rsi]
+	adc	rdx,0
+	mov	r10,QWORD[8+rsp]
+	mov	r13,rdx
+
+	lea	r15,[1+r15]
+	jmp	NEAR $L$inner_enter
+
+ALIGN	16
+$L$inner:
+	add	r13,rax
+	mov	rax,QWORD[r15*8+rsi]
+	adc	rdx,0
+	add	r13,r10
+	mov	r10,QWORD[r15*8+rsp]
+	adc	rdx,0
+	mov	QWORD[((-16))+r15*8+rsp],r13
+	mov	r13,rdx
+
+$L$inner_enter:
+	mul	rbx
+	add	r11,rax
+	mov	rax,QWORD[r15*8+rcx]
+	adc	rdx,0
+	add	r10,r11
+	mov	r11,rdx
+	adc	r11,0
+	lea	r15,[1+r15]
+
+	mul	rbp
+	cmp	r15,r9
+	jne	NEAR $L$inner
+
+	add	r13,rax
+	adc	rdx,0
+	add	r13,r10
+	mov	r10,QWORD[r9*8+rsp]
+	adc	rdx,0
+	mov	QWORD[((-16))+r9*8+rsp],r13
+	mov	r13,rdx
+
+	xor	rdx,rdx
+	add	r13,r11
+	adc	rdx,0
+	add	r13,r10
+	adc	rdx,0
+	mov	QWORD[((-8))+r9*8+rsp],r13
+	mov	QWORD[r9*8+rsp],rdx
+
+	lea	r14,[1+r14]
+	cmp	r14,r9
+	jb	NEAR $L$outer
+
+	xor	r14,r14
+	mov	rax,QWORD[rsp]
+	lea	rsi,[rsp]
+	mov	r15,r9
+	jmp	NEAR $L$sub
+ALIGN	16
+$L$sub:	sbb	rax,QWORD[r14*8+rcx]
+	mov	QWORD[r14*8+rdi],rax
+	mov	rax,QWORD[8+r14*8+rsi]
+	lea	r14,[1+r14]
+	dec	r15
+	jnz	NEAR $L$sub
+
+	sbb	rax,0
+	mov	rbx,-1
+	xor	rbx,rax
+	xor	r14,r14
+	mov	r15,r9
+
+$L$copy:
+	mov	rcx,QWORD[r14*8+rdi]
+	mov	rdx,QWORD[r14*8+rsp]
+	and	rcx,rbx
+	and	rdx,rax
+	mov	QWORD[r14*8+rsp],r14
+	or	rdx,rcx
+	mov	QWORD[r14*8+rdi],rdx
+	lea	r14,[1+r14]
+	sub	r15,1
+	jnz	NEAR $L$copy
+
+	mov	rsi,QWORD[8+r9*8+rsp]
+
+	mov	rax,1
+
+	mov	r15,QWORD[((-48))+rsi]
+
+	mov	r14,QWORD[((-40))+rsi]
+
+	mov	r13,QWORD[((-32))+rsi]
+
+	mov	r12,QWORD[((-24))+rsi]
+
+	mov	rbp,QWORD[((-16))+rsi]
+
+	mov	rbx,QWORD[((-8))+rsi]
+
+	lea	rsp,[rsi]
+
+$L$mul_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_bn_mul_mont_gather5:
+
+ALIGN	32
+bn_mul4x_mont_gather5:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_bn_mul4x_mont_gather5:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+	mov	r9,QWORD[48+rsp]
+
+
+
+DB	0x67
+	mov	rax,rsp
+
+$L$mul4x_enter:
+	and	r11d,0x80108
+	cmp	r11d,0x80108
+	je	NEAR $L$mulx4x_enter
+	push	rbx
+
+	push	rbp
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+$L$mul4x_prologue:
+
+DB	0x67
+	shl	r9d,3
+	lea	r10,[r9*2+r9]
+	neg	r9
+
+
+
+
+
+
+
+
+
+
+	lea	r11,[((-320))+r9*2+rsp]
+	mov	rbp,rsp
+	sub	r11,rdi
+	and	r11,4095
+	cmp	r10,r11
+	jb	NEAR $L$mul4xsp_alt
+	sub	rbp,r11
+	lea	rbp,[((-320))+r9*2+rbp]
+	jmp	NEAR $L$mul4xsp_done
+
+ALIGN	32
+$L$mul4xsp_alt:
+	lea	r10,[((4096-320))+r9*2]
+	lea	rbp,[((-320))+r9*2+rbp]
+	sub	r11,r10
+	mov	r10,0
+	cmovc	r11,r10
+	sub	rbp,r11
+$L$mul4xsp_done:
+	and	rbp,-64
+	mov	r11,rsp
+	sub	r11,rbp
+	and	r11,-4096
+	lea	rsp,[rbp*1+r11]
+	mov	r10,QWORD[rsp]
+	cmp	rsp,rbp
+	ja	NEAR $L$mul4x_page_walk
+	jmp	NEAR $L$mul4x_page_walk_done
+
+$L$mul4x_page_walk:
+	lea	rsp,[((-4096))+rsp]
+	mov	r10,QWORD[rsp]
+	cmp	rsp,rbp
+	ja	NEAR $L$mul4x_page_walk
+$L$mul4x_page_walk_done:
+
+	neg	r9
+
+	mov	QWORD[40+rsp],rax
+
+$L$mul4x_body:
+
+	call	mul4x_internal
+
+	mov	rsi,QWORD[40+rsp]
+
+	mov	rax,1
+
+	mov	r15,QWORD[((-48))+rsi]
+
+	mov	r14,QWORD[((-40))+rsi]
+
+	mov	r13,QWORD[((-32))+rsi]
+
+	mov	r12,QWORD[((-24))+rsi]
+
+	mov	rbp,QWORD[((-16))+rsi]
+
+	mov	rbx,QWORD[((-8))+rsi]
+
+	lea	rsp,[rsi]
+
+$L$mul4x_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_bn_mul4x_mont_gather5:
+
+
+ALIGN	32
+mul4x_internal:
+
+	shl	r9,5
+	movd	xmm5,DWORD[56+rax]
+	lea	rax,[$L$inc]
+	lea	r13,[128+r9*1+rdx]
+	shr	r9,5
+	movdqa	xmm0,XMMWORD[rax]
+	movdqa	xmm1,XMMWORD[16+rax]
+	lea	r10,[((88-112))+r9*1+rsp]
+	lea	r12,[128+rdx]
+
+	pshufd	xmm5,xmm5,0
+	movdqa	xmm4,xmm1
+DB	0x67,0x67
+	movdqa	xmm2,xmm1
+	paddd	xmm1,xmm0
+	pcmpeqd	xmm0,xmm5
+DB	0x67
+	movdqa	xmm3,xmm4
+	paddd	xmm2,xmm1
+	pcmpeqd	xmm1,xmm5
+	movdqa	XMMWORD[112+r10],xmm0
+	movdqa	xmm0,xmm4
+
+	paddd	xmm3,xmm2
+	pcmpeqd	xmm2,xmm5
+	movdqa	XMMWORD[128+r10],xmm1
+	movdqa	xmm1,xmm4
+
+	paddd	xmm0,xmm3
+	pcmpeqd	xmm3,xmm5
+	movdqa	XMMWORD[144+r10],xmm2
+	movdqa	xmm2,xmm4
+
+	paddd	xmm1,xmm0
+	pcmpeqd	xmm0,xmm5
+	movdqa	XMMWORD[160+r10],xmm3
+	movdqa	xmm3,xmm4
+	paddd	xmm2,xmm1
+	pcmpeqd	xmm1,xmm5
+	movdqa	XMMWORD[176+r10],xmm0
+	movdqa	xmm0,xmm4
+
+	paddd	xmm3,xmm2
+	pcmpeqd	xmm2,xmm5
+	movdqa	XMMWORD[192+r10],xmm1
+	movdqa	xmm1,xmm4
+
+	paddd	xmm0,xmm3
+	pcmpeqd	xmm3,xmm5
+	movdqa	XMMWORD[208+r10],xmm2
+	movdqa	xmm2,xmm4
+
+	paddd	xmm1,xmm0
+	pcmpeqd	xmm0,xmm5
+	movdqa	XMMWORD[224+r10],xmm3
+	movdqa	xmm3,xmm4
+	paddd	xmm2,xmm1
+	pcmpeqd	xmm1,xmm5
+	movdqa	XMMWORD[240+r10],xmm0
+	movdqa	xmm0,xmm4
+
+	paddd	xmm3,xmm2
+	pcmpeqd	xmm2,xmm5
+	movdqa	XMMWORD[256+r10],xmm1
+	movdqa	xmm1,xmm4
+
+	paddd	xmm0,xmm3
+	pcmpeqd	xmm3,xmm5
+	movdqa	XMMWORD[272+r10],xmm2
+	movdqa	xmm2,xmm4
+
+	paddd	xmm1,xmm0
+	pcmpeqd	xmm0,xmm5
+	movdqa	XMMWORD[288+r10],xmm3
+	movdqa	xmm3,xmm4
+	paddd	xmm2,xmm1
+	pcmpeqd	xmm1,xmm5
+	movdqa	XMMWORD[304+r10],xmm0
+
+	paddd	xmm3,xmm2
+DB	0x67
+	pcmpeqd	xmm2,xmm5
+	movdqa	XMMWORD[320+r10],xmm1
+
+	pcmpeqd	xmm3,xmm5
+	movdqa	XMMWORD[336+r10],xmm2
+	pand	xmm0,XMMWORD[64+r12]
+
+	pand	xmm1,XMMWORD[80+r12]
+	pand	xmm2,XMMWORD[96+r12]
+	movdqa	XMMWORD[352+r10],xmm3
+	pand	xmm3,XMMWORD[112+r12]
+	por	xmm0,xmm2
+	por	xmm1,xmm3
+	movdqa	xmm4,XMMWORD[((-128))+r12]
+	movdqa	xmm5,XMMWORD[((-112))+r12]
+	movdqa	xmm2,XMMWORD[((-96))+r12]
+	pand	xmm4,XMMWORD[112+r10]
+	movdqa	xmm3,XMMWORD[((-80))+r12]
+	pand	xmm5,XMMWORD[128+r10]
+	por	xmm0,xmm4
+	pand	xmm2,XMMWORD[144+r10]
+	por	xmm1,xmm5
+	pand	xmm3,XMMWORD[160+r10]
+	por	xmm0,xmm2
+	por	xmm1,xmm3
+	movdqa	xmm4,XMMWORD[((-64))+r12]
+	movdqa	xmm5,XMMWORD[((-48))+r12]
+	movdqa	xmm2,XMMWORD[((-32))+r12]
+	pand	xmm4,XMMWORD[176+r10]
+	movdqa	xmm3,XMMWORD[((-16))+r12]
+	pand	xmm5,XMMWORD[192+r10]
+	por	xmm0,xmm4
+	pand	xmm2,XMMWORD[208+r10]
+	por	xmm1,xmm5
+	pand	xmm3,XMMWORD[224+r10]
+	por	xmm0,xmm2
+	por	xmm1,xmm3
+	movdqa	xmm4,XMMWORD[r12]
+	movdqa	xmm5,XMMWORD[16+r12]
+	movdqa	xmm2,XMMWORD[32+r12]
+	pand	xmm4,XMMWORD[240+r10]
+	movdqa	xmm3,XMMWORD[48+r12]
+	pand	xmm5,XMMWORD[256+r10]
+	por	xmm0,xmm4
+	pand	xmm2,XMMWORD[272+r10]
+	por	xmm1,xmm5
+	pand	xmm3,XMMWORD[288+r10]
+	por	xmm0,xmm2
+	por	xmm1,xmm3
+	por	xmm0,xmm1
+	pshufd	xmm1,xmm0,0x4e
+	por	xmm0,xmm1
+	lea	r12,[256+r12]
+DB	102,72,15,126,195
+
+	mov	QWORD[((16+8))+rsp],r13
+	mov	QWORD[((56+8))+rsp],rdi
+
+	mov	r8,QWORD[r8]
+	mov	rax,QWORD[rsi]
+	lea	rsi,[r9*1+rsi]
+	neg	r9
+
+	mov	rbp,r8
+	mul	rbx
+	mov	r10,rax
+	mov	rax,QWORD[rcx]
+
+	imul	rbp,r10
+	lea	r14,[((64+8))+rsp]
+	mov	r11,rdx
+
+	mul	rbp
+	add	r10,rax
+	mov	rax,QWORD[8+r9*1+rsi]
+	adc	rdx,0
+	mov	rdi,rdx
+
+	mul	rbx
+	add	r11,rax
+	mov	rax,QWORD[8+rcx]
+	adc	rdx,0
+	mov	r10,rdx
+
+	mul	rbp
+	add	rdi,rax
+	mov	rax,QWORD[16+r9*1+rsi]
+	adc	rdx,0
+	add	rdi,r11
+	lea	r15,[32+r9]
+	lea	rcx,[32+rcx]
+	adc	rdx,0
+	mov	QWORD[r14],rdi
+	mov	r13,rdx
+	jmp	NEAR $L$1st4x
+
+ALIGN	32
+$L$1st4x:
+	mul	rbx
+	add	r10,rax
+	mov	rax,QWORD[((-16))+rcx]
+	lea	r14,[32+r14]
+	adc	rdx,0
+	mov	r11,rdx
+
+	mul	rbp
+	add	r13,rax
+	mov	rax,QWORD[((-8))+r15*1+rsi]
+	adc	rdx,0
+	add	r13,r10
+	adc	rdx,0
+	mov	QWORD[((-24))+r14],r13
+	mov	rdi,rdx
+
+	mul	rbx
+	add	r11,rax
+	mov	rax,QWORD[((-8))+rcx]
+	adc	rdx,0
+	mov	r10,rdx
+
+	mul	rbp
+	add	rdi,rax
+	mov	rax,QWORD[r15*1+rsi]
+	adc	rdx,0
+	add	rdi,r11
+	adc	rdx,0
+	mov	QWORD[((-16))+r14],rdi
+	mov	r13,rdx
+
+	mul	rbx
+	add	r10,rax
+	mov	rax,QWORD[rcx]
+	adc	rdx,0
+	mov	r11,rdx
+
+	mul	rbp
+	add	r13,rax
+	mov	rax,QWORD[8+r15*1+rsi]
+	adc	rdx,0
+	add	r13,r10
+	adc	rdx,0
+	mov	QWORD[((-8))+r14],r13
+	mov	rdi,rdx
+
+	mul	rbx
+	add	r11,rax
+	mov	rax,QWORD[8+rcx]
+	adc	rdx,0
+	mov	r10,rdx
+
+	mul	rbp
+	add	rdi,rax
+	mov	rax,QWORD[16+r15*1+rsi]
+	adc	rdx,0
+	add	rdi,r11
+	lea	rcx,[32+rcx]
+	adc	rdx,0
+	mov	QWORD[r14],rdi
+	mov	r13,rdx
+
+	add	r15,32
+	jnz	NEAR $L$1st4x
+
+	mul	rbx
+	add	r10,rax
+	mov	rax,QWORD[((-16))+rcx]
+	lea	r14,[32+r14]
+	adc	rdx,0
+	mov	r11,rdx
+
+	mul	rbp
+	add	r13,rax
+	mov	rax,QWORD[((-8))+rsi]
+	adc	rdx,0
+	add	r13,r10
+	adc	rdx,0
+	mov	QWORD[((-24))+r14],r13
+	mov	rdi,rdx
+
+	mul	rbx
+	add	r11,rax
+	mov	rax,QWORD[((-8))+rcx]
+	adc	rdx,0
+	mov	r10,rdx
+
+	mul	rbp
+	add	rdi,rax
+	mov	rax,QWORD[r9*1+rsi]
+	adc	rdx,0
+	add	rdi,r11
+	adc	rdx,0
+	mov	QWORD[((-16))+r14],rdi
+	mov	r13,rdx
+
+	lea	rcx,[r9*1+rcx]
+
+	xor	rdi,rdi
+	add	r13,r10
+	adc	rdi,0
+	mov	QWORD[((-8))+r14],r13
+
+	jmp	NEAR $L$outer4x
+
+ALIGN	32
+$L$outer4x:
+	lea	rdx,[((16+128))+r14]
+	pxor	xmm4,xmm4
+	pxor	xmm5,xmm5
+	movdqa	xmm0,XMMWORD[((-128))+r12]
+	movdqa	xmm1,XMMWORD[((-112))+r12]
+	movdqa	xmm2,XMMWORD[((-96))+r12]
+	movdqa	xmm3,XMMWORD[((-80))+r12]
+	pand	xmm0,XMMWORD[((-128))+rdx]
+	pand	xmm1,XMMWORD[((-112))+rdx]
+	por	xmm4,xmm0
+	pand	xmm2,XMMWORD[((-96))+rdx]
+	por	xmm5,xmm1
+	pand	xmm3,XMMWORD[((-80))+rdx]
+	por	xmm4,xmm2
+	por	xmm5,xmm3
+	movdqa	xmm0,XMMWORD[((-64))+r12]
+	movdqa	xmm1,XMMWORD[((-48))+r12]
+	movdqa	xmm2,XMMWORD[((-32))+r12]
+	movdqa	xmm3,XMMWORD[((-16))+r12]
+	pand	xmm0,XMMWORD[((-64))+rdx]
+	pand	xmm1,XMMWORD[((-48))+rdx]
+	por	xmm4,xmm0
+	pand	xmm2,XMMWORD[((-32))+rdx]
+	por	xmm5,xmm1
+	pand	xmm3,XMMWORD[((-16))+rdx]
+	por	xmm4,xmm2
+	por	xmm5,xmm3
+	movdqa	xmm0,XMMWORD[r12]
+	movdqa	xmm1,XMMWORD[16+r12]
+	movdqa	xmm2,XMMWORD[32+r12]
+	movdqa	xmm3,XMMWORD[48+r12]
+	pand	xmm0,XMMWORD[rdx]
+	pand	xmm1,XMMWORD[16+rdx]
+	por	xmm4,xmm0
+	pand	xmm2,XMMWORD[32+rdx]
+	por	xmm5,xmm1
+	pand	xmm3,XMMWORD[48+rdx]
+	por	xmm4,xmm2
+	por	xmm5,xmm3
+	movdqa	xmm0,XMMWORD[64+r12]
+	movdqa	xmm1,XMMWORD[80+r12]
+	movdqa	xmm2,XMMWORD[96+r12]
+	movdqa	xmm3,XMMWORD[112+r12]
+	pand	xmm0,XMMWORD[64+rdx]
+	pand	xmm1,XMMWORD[80+rdx]
+	por	xmm4,xmm0
+	pand	xmm2,XMMWORD[96+rdx]
+	por	xmm5,xmm1
+	pand	xmm3,XMMWORD[112+rdx]
+	por	xmm4,xmm2
+	por	xmm5,xmm3
+	por	xmm4,xmm5
+	pshufd	xmm0,xmm4,0x4e
+	por	xmm0,xmm4
+	lea	r12,[256+r12]
+DB	102,72,15,126,195
+
+	mov	r10,QWORD[r9*1+r14]
+	mov	rbp,r8
+	mul	rbx
+	add	r10,rax
+	mov	rax,QWORD[rcx]
+	adc	rdx,0
+
+	imul	rbp,r10
+	mov	r11,rdx
+	mov	QWORD[r14],rdi
+
+	lea	r14,[r9*1+r14]
+
+	mul	rbp
+	add	r10,rax
+	mov	rax,QWORD[8+r9*1+rsi]
+	adc	rdx,0
+	mov	rdi,rdx
+
+	mul	rbx
+	add	r11,rax
+	mov	rax,QWORD[8+rcx]
+	adc	rdx,0
+	add	r11,QWORD[8+r14]
+	adc	rdx,0
+	mov	r10,rdx
+
+	mul	rbp
+	add	rdi,rax
+	mov	rax,QWORD[16+r9*1+rsi]
+	adc	rdx,0
+	add	rdi,r11
+	lea	r15,[32+r9]
+	lea	rcx,[32+rcx]
+	adc	rdx,0
+	mov	r13,rdx
+	jmp	NEAR $L$inner4x
+
+ALIGN	32
+$L$inner4x:
+	mul	rbx
+	add	r10,rax
+	mov	rax,QWORD[((-16))+rcx]
+	adc	rdx,0
+	add	r10,QWORD[16+r14]
+	lea	r14,[32+r14]
+	adc	rdx,0
+	mov	r11,rdx
+
+	mul	rbp
+	add	r13,rax
+	mov	rax,QWORD[((-8))+r15*1+rsi]
+	adc	rdx,0
+	add	r13,r10
+	adc	rdx,0
+	mov	QWORD[((-32))+r14],rdi
+	mov	rdi,rdx
+
+	mul	rbx
+	add	r11,rax
+	mov	rax,QWORD[((-8))+rcx]
+	adc	rdx,0
+	add	r11,QWORD[((-8))+r14]
+	adc	rdx,0
+	mov	r10,rdx
+
+	mul	rbp
+	add	rdi,rax
+	mov	rax,QWORD[r15*1+rsi]
+	adc	rdx,0
+	add	rdi,r11
+	adc	rdx,0
+	mov	QWORD[((-24))+r14],r13
+	mov	r13,rdx
+
+	mul	rbx
+	add	r10,rax
+	mov	rax,QWORD[rcx]
+	adc	rdx,0
+	add	r10,QWORD[r14]
+	adc	rdx,0
+	mov	r11,rdx
+
+	mul	rbp
+	add	r13,rax
+	mov	rax,QWORD[8+r15*1+rsi]
+	adc	rdx,0
+	add	r13,r10
+	adc	rdx,0
+	mov	QWORD[((-16))+r14],rdi
+	mov	rdi,rdx
+
+	mul	rbx
+	add	r11,rax
+	mov	rax,QWORD[8+rcx]
+	adc	rdx,0
+	add	r11,QWORD[8+r14]
+	adc	rdx,0
+	mov	r10,rdx
+
+	mul	rbp
+	add	rdi,rax
+	mov	rax,QWORD[16+r15*1+rsi]
+	adc	rdx,0
+	add	rdi,r11
+	lea	rcx,[32+rcx]
+	adc	rdx,0
+	mov	QWORD[((-8))+r14],r13
+	mov	r13,rdx
+
+	add	r15,32
+	jnz	NEAR $L$inner4x
+
+	mul	rbx
+	add	r10,rax
+	mov	rax,QWORD[((-16))+rcx]
+	adc	rdx,0
+	add	r10,QWORD[16+r14]
+	lea	r14,[32+r14]
+	adc	rdx,0
+	mov	r11,rdx
+
+	mul	rbp
+	add	r13,rax
+	mov	rax,QWORD[((-8))+rsi]
+	adc	rdx,0
+	add	r13,r10
+	adc	rdx,0
+	mov	QWORD[((-32))+r14],rdi
+	mov	rdi,rdx
+
+	mul	rbx
+	add	r11,rax
+	mov	rax,rbp
+	mov	rbp,QWORD[((-8))+rcx]
+	adc	rdx,0
+	add	r11,QWORD[((-8))+r14]
+	adc	rdx,0
+	mov	r10,rdx
+
+	mul	rbp
+	add	rdi,rax
+	mov	rax,QWORD[r9*1+rsi]
+	adc	rdx,0
+	add	rdi,r11
+	adc	rdx,0
+	mov	QWORD[((-24))+r14],r13
+	mov	r13,rdx
+
+	mov	QWORD[((-16))+r14],rdi
+	lea	rcx,[r9*1+rcx]
+
+	xor	rdi,rdi
+	add	r13,r10
+	adc	rdi,0
+	add	r13,QWORD[r14]
+	adc	rdi,0
+	mov	QWORD[((-8))+r14],r13
+
+	cmp	r12,QWORD[((16+8))+rsp]
+	jb	NEAR $L$outer4x
+	xor	rax,rax
+	sub	rbp,r13
+	adc	r15,r15
+	or	rdi,r15
+	sub	rax,rdi
+	lea	rbx,[r9*1+r14]
+	mov	r12,QWORD[rcx]
+	lea	rbp,[rcx]
+	mov	rcx,r9
+	sar	rcx,3+2
+	mov	rdi,QWORD[((56+8))+rsp]
+	dec	r12
+	xor	r10,r10
+	mov	r13,QWORD[8+rbp]
+	mov	r14,QWORD[16+rbp]
+	mov	r15,QWORD[24+rbp]
+	jmp	NEAR $L$sqr4x_sub_entry
+
+
+global	bn_power5
+
+ALIGN	32
+bn_power5:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_bn_power5:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+	mov	r9,QWORD[48+rsp]
+
+
+
+	mov	rax,rsp
+
+	lea	r11,[OPENSSL_ia32cap_P]
+	mov	r11d,DWORD[8+r11]
+	and	r11d,0x80108
+	cmp	r11d,0x80108
+	je	NEAR $L$powerx5_enter
+	push	rbx
+
+	push	rbp
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+$L$power5_prologue:
+
+	shl	r9d,3
+	lea	r10d,[r9*2+r9]
+	neg	r9
+	mov	r8,QWORD[r8]
+
+
+
+
+
+
+
+
+	lea	r11,[((-320))+r9*2+rsp]
+	mov	rbp,rsp
+	sub	r11,rdi
+	and	r11,4095
+	cmp	r10,r11
+	jb	NEAR $L$pwr_sp_alt
+	sub	rbp,r11
+	lea	rbp,[((-320))+r9*2+rbp]
+	jmp	NEAR $L$pwr_sp_done
+
+ALIGN	32
+$L$pwr_sp_alt:
+	lea	r10,[((4096-320))+r9*2]
+	lea	rbp,[((-320))+r9*2+rbp]
+	sub	r11,r10
+	mov	r10,0
+	cmovc	r11,r10
+	sub	rbp,r11
+$L$pwr_sp_done:
+	and	rbp,-64
+	mov	r11,rsp
+	sub	r11,rbp
+	and	r11,-4096
+	lea	rsp,[rbp*1+r11]
+	mov	r10,QWORD[rsp]
+	cmp	rsp,rbp
+	ja	NEAR $L$pwr_page_walk
+	jmp	NEAR $L$pwr_page_walk_done
+
+$L$pwr_page_walk:
+	lea	rsp,[((-4096))+rsp]
+	mov	r10,QWORD[rsp]
+	cmp	rsp,rbp
+	ja	NEAR $L$pwr_page_walk
+$L$pwr_page_walk_done:
+
+	mov	r10,r9
+	neg	r9
+
+
+
+
+
+
+
+
+
+
+	mov	QWORD[32+rsp],r8
+	mov	QWORD[40+rsp],rax
+
+$L$power5_body:
+DB	102,72,15,110,207
+DB	102,72,15,110,209
+DB	102,73,15,110,218
+DB	102,72,15,110,226
+
+	call	__bn_sqr8x_internal
+	call	__bn_post4x_internal
+	call	__bn_sqr8x_internal
+	call	__bn_post4x_internal
+	call	__bn_sqr8x_internal
+	call	__bn_post4x_internal
+	call	__bn_sqr8x_internal
+	call	__bn_post4x_internal
+	call	__bn_sqr8x_internal
+	call	__bn_post4x_internal
+
+DB	102,72,15,126,209
+DB	102,72,15,126,226
+	mov	rdi,rsi
+	mov	rax,QWORD[40+rsp]
+	lea	r8,[32+rsp]
+
+	call	mul4x_internal
+
+	mov	rsi,QWORD[40+rsp]
+
+	mov	rax,1
+	mov	r15,QWORD[((-48))+rsi]
+
+	mov	r14,QWORD[((-40))+rsi]
+
+	mov	r13,QWORD[((-32))+rsi]
+
+	mov	r12,QWORD[((-24))+rsi]
+
+	mov	rbp,QWORD[((-16))+rsi]
+
+	mov	rbx,QWORD[((-8))+rsi]
+
+	lea	rsp,[rsi]
+
+$L$power5_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_bn_power5:
+
+global	bn_sqr8x_internal
+
+
+ALIGN	32
+bn_sqr8x_internal:
+__bn_sqr8x_internal:
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+	lea	rbp,[32+r10]
+	lea	rsi,[r9*1+rsi]
+
+	mov	rcx,r9
+
+
+	mov	r14,QWORD[((-32))+rbp*1+rsi]
+	lea	rdi,[((48+8))+r9*2+rsp]
+	mov	rax,QWORD[((-24))+rbp*1+rsi]
+	lea	rdi,[((-32))+rbp*1+rdi]
+	mov	rbx,QWORD[((-16))+rbp*1+rsi]
+	mov	r15,rax
+
+	mul	r14
+	mov	r10,rax
+	mov	rax,rbx
+	mov	r11,rdx
+	mov	QWORD[((-24))+rbp*1+rdi],r10
+
+	mul	r14
+	add	r11,rax
+	mov	rax,rbx
+	adc	rdx,0
+	mov	QWORD[((-16))+rbp*1+rdi],r11
+	mov	r10,rdx
+
+
+	mov	rbx,QWORD[((-8))+rbp*1+rsi]
+	mul	r15
+	mov	r12,rax
+	mov	rax,rbx
+	mov	r13,rdx
+
+	lea	rcx,[rbp]
+	mul	r14
+	add	r10,rax
+	mov	rax,rbx
+	mov	r11,rdx
+	adc	r11,0
+	add	r10,r12
+	adc	r11,0
+	mov	QWORD[((-8))+rcx*1+rdi],r10
+	jmp	NEAR $L$sqr4x_1st
+
+ALIGN	32
+$L$sqr4x_1st:
+	mov	rbx,QWORD[rcx*1+rsi]
+	mul	r15
+	add	r13,rax
+	mov	rax,rbx
+	mov	r12,rdx
+	adc	r12,0
+
+	mul	r14
+	add	r11,rax
+	mov	rax,rbx
+	mov	rbx,QWORD[8+rcx*1+rsi]
+	mov	r10,rdx
+	adc	r10,0
+	add	r11,r13
+	adc	r10,0
+
+
+	mul	r15
+	add	r12,rax
+	mov	rax,rbx
+	mov	QWORD[rcx*1+rdi],r11
+	mov	r13,rdx
+	adc	r13,0
+
+	mul	r14
+	add	r10,rax
+	mov	rax,rbx
+	mov	rbx,QWORD[16+rcx*1+rsi]
+	mov	r11,rdx
+	adc	r11,0
+	add	r10,r12
+	adc	r11,0
+
+	mul	r15
+	add	r13,rax
+	mov	rax,rbx
+	mov	QWORD[8+rcx*1+rdi],r10
+	mov	r12,rdx
+	adc	r12,0
+
+	mul	r14
+	add	r11,rax
+	mov	rax,rbx
+	mov	rbx,QWORD[24+rcx*1+rsi]
+	mov	r10,rdx
+	adc	r10,0
+	add	r11,r13
+	adc	r10,0
+
+
+	mul	r15
+	add	r12,rax
+	mov	rax,rbx
+	mov	QWORD[16+rcx*1+rdi],r11
+	mov	r13,rdx
+	adc	r13,0
+	lea	rcx,[32+rcx]
+
+	mul	r14
+	add	r10,rax
+	mov	rax,rbx
+	mov	r11,rdx
+	adc	r11,0
+	add	r10,r12
+	adc	r11,0
+	mov	QWORD[((-8))+rcx*1+rdi],r10
+
+	cmp	rcx,0
+	jne	NEAR $L$sqr4x_1st
+
+	mul	r15
+	add	r13,rax
+	lea	rbp,[16+rbp]
+	adc	rdx,0
+	add	r13,r11
+	adc	rdx,0
+
+	mov	QWORD[rdi],r13
+	mov	r12,rdx
+	mov	QWORD[8+rdi],rdx
+	jmp	NEAR $L$sqr4x_outer
+
+ALIGN	32
+$L$sqr4x_outer:
+	mov	r14,QWORD[((-32))+rbp*1+rsi]
+	lea	rdi,[((48+8))+r9*2+rsp]
+	mov	rax,QWORD[((-24))+rbp*1+rsi]
+	lea	rdi,[((-32))+rbp*1+rdi]
+	mov	rbx,QWORD[((-16))+rbp*1+rsi]
+	mov	r15,rax
+
+	mul	r14
+	mov	r10,QWORD[((-24))+rbp*1+rdi]
+	add	r10,rax
+	mov	rax,rbx
+	adc	rdx,0
+	mov	QWORD[((-24))+rbp*1+rdi],r10
+	mov	r11,rdx
+
+	mul	r14
+	add	r11,rax
+	mov	rax,rbx
+	adc	rdx,0
+	add	r11,QWORD[((-16))+rbp*1+rdi]
+	mov	r10,rdx
+	adc	r10,0
+	mov	QWORD[((-16))+rbp*1+rdi],r11
+
+	xor	r12,r12
+
+	mov	rbx,QWORD[((-8))+rbp*1+rsi]
+	mul	r15
+	add	r12,rax
+	mov	rax,rbx
+	adc	rdx,0
+	add	r12,QWORD[((-8))+rbp*1+rdi]
+	mov	r13,rdx
+	adc	r13,0
+
+	mul	r14
+	add	r10,rax
+	mov	rax,rbx
+	adc	rdx,0
+	add	r10,r12
+	mov	r11,rdx
+	adc	r11,0
+	mov	QWORD[((-8))+rbp*1+rdi],r10
+
+	lea	rcx,[rbp]
+	jmp	NEAR $L$sqr4x_inner
+
+ALIGN	32
+$L$sqr4x_inner:
+	mov	rbx,QWORD[rcx*1+rsi]
+	mul	r15
+	add	r13,rax
+	mov	rax,rbx
+	mov	r12,rdx
+	adc	r12,0
+	add	r13,QWORD[rcx*1+rdi]
+	adc	r12,0
+
+DB	0x67
+	mul	r14
+	add	r11,rax
+	mov	rax,rbx
+	mov	rbx,QWORD[8+rcx*1+rsi]
+	mov	r10,rdx
+	adc	r10,0
+	add	r11,r13
+	adc	r10,0
+
+	mul	r15
+	add	r12,rax
+	mov	QWORD[rcx*1+rdi],r11
+	mov	rax,rbx
+	mov	r13,rdx
+	adc	r13,0
+	add	r12,QWORD[8+rcx*1+rdi]
+	lea	rcx,[16+rcx]
+	adc	r13,0
+
+	mul	r14
+	add	r10,rax
+	mov	rax,rbx
+	adc	rdx,0
+	add	r10,r12
+	mov	r11,rdx
+	adc	r11,0
+	mov	QWORD[((-8))+rcx*1+rdi],r10
+
+	cmp	rcx,0
+	jne	NEAR $L$sqr4x_inner
+
+DB	0x67
+	mul	r15
+	add	r13,rax
+	adc	rdx,0
+	add	r13,r11
+	adc	rdx,0
+
+	mov	QWORD[rdi],r13
+	mov	r12,rdx
+	mov	QWORD[8+rdi],rdx
+
+	add	rbp,16
+	jnz	NEAR $L$sqr4x_outer
+
+
+	mov	r14,QWORD[((-32))+rsi]
+	lea	rdi,[((48+8))+r9*2+rsp]
+	mov	rax,QWORD[((-24))+rsi]
+	lea	rdi,[((-32))+rbp*1+rdi]
+	mov	rbx,QWORD[((-16))+rsi]
+	mov	r15,rax
+
+	mul	r14
+	add	r10,rax
+	mov	rax,rbx
+	mov	r11,rdx
+	adc	r11,0
+
+	mul	r14
+	add	r11,rax
+	mov	rax,rbx
+	mov	QWORD[((-24))+rdi],r10
+	mov	r10,rdx
+	adc	r10,0
+	add	r11,r13
+	mov	rbx,QWORD[((-8))+rsi]
+	adc	r10,0
+
+	mul	r15
+	add	r12,rax
+	mov	rax,rbx
+	mov	QWORD[((-16))+rdi],r11
+	mov	r13,rdx
+	adc	r13,0
+
+	mul	r14
+	add	r10,rax
+	mov	rax,rbx
+	mov	r11,rdx
+	adc	r11,0
+	add	r10,r12
+	adc	r11,0
+	mov	QWORD[((-8))+rdi],r10
+
+	mul	r15
+	add	r13,rax
+	mov	rax,QWORD[((-16))+rsi]
+	adc	rdx,0
+	add	r13,r11
+	adc	rdx,0
+
+	mov	QWORD[rdi],r13
+	mov	r12,rdx
+	mov	QWORD[8+rdi],rdx
+
+	mul	rbx
+	add	rbp,16
+	xor	r14,r14
+	sub	rbp,r9
+	xor	r15,r15
+
+	add	rax,r12
+	adc	rdx,0
+	mov	QWORD[8+rdi],rax
+	mov	QWORD[16+rdi],rdx
+	mov	QWORD[24+rdi],r15
+
+	mov	rax,QWORD[((-16))+rbp*1+rsi]
+	lea	rdi,[((48+8))+rsp]
+	xor	r10,r10
+	mov	r11,QWORD[8+rdi]
+
+	lea	r12,[r10*2+r14]
+	shr	r10,63
+	lea	r13,[r11*2+rcx]
+	shr	r11,63
+	or	r13,r10
+	mov	r10,QWORD[16+rdi]
+	mov	r14,r11
+	mul	rax
+	neg	r15
+	mov	r11,QWORD[24+rdi]
+	adc	r12,rax
+	mov	rax,QWORD[((-8))+rbp*1+rsi]
+	mov	QWORD[rdi],r12
+	adc	r13,rdx
+
+	lea	rbx,[r10*2+r14]
+	mov	QWORD[8+rdi],r13
+	sbb	r15,r15
+	shr	r10,63
+	lea	r8,[r11*2+rcx]
+	shr	r11,63
+	or	r8,r10
+	mov	r10,QWORD[32+rdi]
+	mov	r14,r11
+	mul	rax
+	neg	r15
+	mov	r11,QWORD[40+rdi]
+	adc	rbx,rax
+	mov	rax,QWORD[rbp*1+rsi]
+	mov	QWORD[16+rdi],rbx
+	adc	r8,rdx
+	lea	rbp,[16+rbp]
+	mov	QWORD[24+rdi],r8
+	sbb	r15,r15
+	lea	rdi,[64+rdi]
+	jmp	NEAR $L$sqr4x_shift_n_add
+
+ALIGN	32
+$L$sqr4x_shift_n_add:
+	lea	r12,[r10*2+r14]
+	shr	r10,63
+	lea	r13,[r11*2+rcx]
+	shr	r11,63
+	or	r13,r10
+	mov	r10,QWORD[((-16))+rdi]
+	mov	r14,r11
+	mul	rax
+	neg	r15
+	mov	r11,QWORD[((-8))+rdi]
+	adc	r12,rax
+	mov	rax,QWORD[((-8))+rbp*1+rsi]
+	mov	QWORD[((-32))+rdi],r12
+	adc	r13,rdx
+
+	lea	rbx,[r10*2+r14]
+	mov	QWORD[((-24))+rdi],r13
+	sbb	r15,r15
+	shr	r10,63
+	lea	r8,[r11*2+rcx]
+	shr	r11,63
+	or	r8,r10
+	mov	r10,QWORD[rdi]
+	mov	r14,r11
+	mul	rax
+	neg	r15
+	mov	r11,QWORD[8+rdi]
+	adc	rbx,rax
+	mov	rax,QWORD[rbp*1+rsi]
+	mov	QWORD[((-16))+rdi],rbx
+	adc	r8,rdx
+
+	lea	r12,[r10*2+r14]
+	mov	QWORD[((-8))+rdi],r8
+	sbb	r15,r15
+	shr	r10,63
+	lea	r13,[r11*2+rcx]
+	shr	r11,63
+	or	r13,r10
+	mov	r10,QWORD[16+rdi]
+	mov	r14,r11
+	mul	rax
+	neg	r15
+	mov	r11,QWORD[24+rdi]
+	adc	r12,rax
+	mov	rax,QWORD[8+rbp*1+rsi]
+	mov	QWORD[rdi],r12
+	adc	r13,rdx
+
+	lea	rbx,[r10*2+r14]
+	mov	QWORD[8+rdi],r13
+	sbb	r15,r15
+	shr	r10,63
+	lea	r8,[r11*2+rcx]
+	shr	r11,63
+	or	r8,r10
+	mov	r10,QWORD[32+rdi]
+	mov	r14,r11
+	mul	rax
+	neg	r15
+	mov	r11,QWORD[40+rdi]
+	adc	rbx,rax
+	mov	rax,QWORD[16+rbp*1+rsi]
+	mov	QWORD[16+rdi],rbx
+	adc	r8,rdx
+	mov	QWORD[24+rdi],r8
+	sbb	r15,r15
+	lea	rdi,[64+rdi]
+	add	rbp,32
+	jnz	NEAR $L$sqr4x_shift_n_add
+
+	lea	r12,[r10*2+r14]
+DB	0x67
+	shr	r10,63
+	lea	r13,[r11*2+rcx]
+	shr	r11,63
+	or	r13,r10
+	mov	r10,QWORD[((-16))+rdi]
+	mov	r14,r11
+	mul	rax
+	neg	r15
+	mov	r11,QWORD[((-8))+rdi]
+	adc	r12,rax
+	mov	rax,QWORD[((-8))+rsi]
+	mov	QWORD[((-32))+rdi],r12
+	adc	r13,rdx
+
+	lea	rbx,[r10*2+r14]
+	mov	QWORD[((-24))+rdi],r13
+	sbb	r15,r15
+	shr	r10,63
+	lea	r8,[r11*2+rcx]
+	shr	r11,63
+	or	r8,r10
+	mul	rax
+	neg	r15
+	adc	rbx,rax
+	adc	r8,rdx
+	mov	QWORD[((-16))+rdi],rbx
+	mov	QWORD[((-8))+rdi],r8
+DB	102,72,15,126,213
+__bn_sqr8x_reduction:
+	xor	rax,rax
+	lea	rcx,[rbp*1+r9]
+	lea	rdx,[((48+8))+r9*2+rsp]
+	mov	QWORD[((0+8))+rsp],rcx
+	lea	rdi,[((48+8))+r9*1+rsp]
+	mov	QWORD[((8+8))+rsp],rdx
+	neg	r9
+	jmp	NEAR $L$8x_reduction_loop
+
+ALIGN	32
+$L$8x_reduction_loop:
+	lea	rdi,[r9*1+rdi]
+DB	0x66
+	mov	rbx,QWORD[rdi]
+	mov	r9,QWORD[8+rdi]
+	mov	r10,QWORD[16+rdi]
+	mov	r11,QWORD[24+rdi]
+	mov	r12,QWORD[32+rdi]
+	mov	r13,QWORD[40+rdi]
+	mov	r14,QWORD[48+rdi]
+	mov	r15,QWORD[56+rdi]
+	mov	QWORD[rdx],rax
+	lea	rdi,[64+rdi]
+
+DB	0x67
+	mov	r8,rbx
+	imul	rbx,QWORD[((32+8))+rsp]
+	mov	rax,QWORD[rbp]
+	mov	ecx,8
+	jmp	NEAR $L$8x_reduce
+
+ALIGN	32
+$L$8x_reduce:
+	mul	rbx
+	mov	rax,QWORD[8+rbp]
+	neg	r8
+	mov	r8,rdx
+	adc	r8,0
+
+	mul	rbx
+	add	r9,rax
+	mov	rax,QWORD[16+rbp]
+	adc	rdx,0
+	add	r8,r9
+	mov	QWORD[((48-8+8))+rcx*8+rsp],rbx
+	mov	r9,rdx
+	adc	r9,0
+
+	mul	rbx
+	add	r10,rax
+	mov	rax,QWORD[24+rbp]
+	adc	rdx,0
+	add	r9,r10
+	mov	rsi,QWORD[((32+8))+rsp]
+	mov	r10,rdx
+	adc	r10,0
+
+	mul	rbx
+	add	r11,rax
+	mov	rax,QWORD[32+rbp]
+	adc	rdx,0
+	imul	rsi,r8
+	add	r10,r11
+	mov	r11,rdx
+	adc	r11,0
+
+	mul	rbx
+	add	r12,rax
+	mov	rax,QWORD[40+rbp]
+	adc	rdx,0
+	add	r11,r12
+	mov	r12,rdx
+	adc	r12,0
+
+	mul	rbx
+	add	r13,rax
+	mov	rax,QWORD[48+rbp]
+	adc	rdx,0
+	add	r12,r13
+	mov	r13,rdx
+	adc	r13,0
+
+	mul	rbx
+	add	r14,rax
+	mov	rax,QWORD[56+rbp]
+	adc	rdx,0
+	add	r13,r14
+	mov	r14,rdx
+	adc	r14,0
+
+	mul	rbx
+	mov	rbx,rsi
+	add	r15,rax
+	mov	rax,QWORD[rbp]
+	adc	rdx,0
+	add	r14,r15
+	mov	r15,rdx
+	adc	r15,0
+
+	dec	ecx
+	jnz	NEAR $L$8x_reduce
+
+	lea	rbp,[64+rbp]
+	xor	rax,rax
+	mov	rdx,QWORD[((8+8))+rsp]
+	cmp	rbp,QWORD[((0+8))+rsp]
+	jae	NEAR $L$8x_no_tail
+
+DB	0x66
+	add	r8,QWORD[rdi]
+	adc	r9,QWORD[8+rdi]
+	adc	r10,QWORD[16+rdi]
+	adc	r11,QWORD[24+rdi]
+	adc	r12,QWORD[32+rdi]
+	adc	r13,QWORD[40+rdi]
+	adc	r14,QWORD[48+rdi]
+	adc	r15,QWORD[56+rdi]
+	sbb	rsi,rsi
+
+	mov	rbx,QWORD[((48+56+8))+rsp]
+	mov	ecx,8
+	mov	rax,QWORD[rbp]
+	jmp	NEAR $L$8x_tail
+
+ALIGN	32
+$L$8x_tail:
+	mul	rbx
+	add	r8,rax
+	mov	rax,QWORD[8+rbp]
+	mov	QWORD[rdi],r8
+	mov	r8,rdx
+	adc	r8,0
+
+	mul	rbx
+	add	r9,rax
+	mov	rax,QWORD[16+rbp]
+	adc	rdx,0
+	add	r8,r9
+	lea	rdi,[8+rdi]
+	mov	r9,rdx
+	adc	r9,0
+
+	mul	rbx
+	add	r10,rax
+	mov	rax,QWORD[24+rbp]
+	adc	rdx,0
+	add	r9,r10
+	mov	r10,rdx
+	adc	r10,0
+
+	mul	rbx
+	add	r11,rax
+	mov	rax,QWORD[32+rbp]
+	adc	rdx,0
+	add	r10,r11
+	mov	r11,rdx
+	adc	r11,0
+
+	mul	rbx
+	add	r12,rax
+	mov	rax,QWORD[40+rbp]
+	adc	rdx,0
+	add	r11,r12
+	mov	r12,rdx
+	adc	r12,0
+
+	mul	rbx
+	add	r13,rax
+	mov	rax,QWORD[48+rbp]
+	adc	rdx,0
+	add	r12,r13
+	mov	r13,rdx
+	adc	r13,0
+
+	mul	rbx
+	add	r14,rax
+	mov	rax,QWORD[56+rbp]
+	adc	rdx,0
+	add	r13,r14
+	mov	r14,rdx
+	adc	r14,0
+
+	mul	rbx
+	mov	rbx,QWORD[((48-16+8))+rcx*8+rsp]
+	add	r15,rax
+	adc	rdx,0
+	add	r14,r15
+	mov	rax,QWORD[rbp]
+	mov	r15,rdx
+	adc	r15,0
+
+	dec	ecx
+	jnz	NEAR $L$8x_tail
+
+	lea	rbp,[64+rbp]
+	mov	rdx,QWORD[((8+8))+rsp]
+	cmp	rbp,QWORD[((0+8))+rsp]
+	jae	NEAR $L$8x_tail_done
+
+	mov	rbx,QWORD[((48+56+8))+rsp]
+	neg	rsi
+	mov	rax,QWORD[rbp]
+	adc	r8,QWORD[rdi]
+	adc	r9,QWORD[8+rdi]
+	adc	r10,QWORD[16+rdi]
+	adc	r11,QWORD[24+rdi]
+	adc	r12,QWORD[32+rdi]
+	adc	r13,QWORD[40+rdi]
+	adc	r14,QWORD[48+rdi]
+	adc	r15,QWORD[56+rdi]
+	sbb	rsi,rsi
+
+	mov	ecx,8
+	jmp	NEAR $L$8x_tail
+
+ALIGN	32
+$L$8x_tail_done:
+	xor	rax,rax
+	add	r8,QWORD[rdx]
+	adc	r9,0
+	adc	r10,0
+	adc	r11,0
+	adc	r12,0
+	adc	r13,0
+	adc	r14,0
+	adc	r15,0
+	adc	rax,0
+
+	neg	rsi
+$L$8x_no_tail:
+	adc	r8,QWORD[rdi]
+	adc	r9,QWORD[8+rdi]
+	adc	r10,QWORD[16+rdi]
+	adc	r11,QWORD[24+rdi]
+	adc	r12,QWORD[32+rdi]
+	adc	r13,QWORD[40+rdi]
+	adc	r14,QWORD[48+rdi]
+	adc	r15,QWORD[56+rdi]
+	adc	rax,0
+	mov	rcx,QWORD[((-8))+rbp]
+	xor	rsi,rsi
+
+DB	102,72,15,126,213
+
+	mov	QWORD[rdi],r8
+	mov	QWORD[8+rdi],r9
+DB	102,73,15,126,217
+	mov	QWORD[16+rdi],r10
+	mov	QWORD[24+rdi],r11
+	mov	QWORD[32+rdi],r12
+	mov	QWORD[40+rdi],r13
+	mov	QWORD[48+rdi],r14
+	mov	QWORD[56+rdi],r15
+	lea	rdi,[64+rdi]
+
+	cmp	rdi,rdx
+	jb	NEAR $L$8x_reduction_loop
+	DB	0F3h,0C3h		;repret
+
+
+
+ALIGN	32
+__bn_post4x_internal:
+
+	mov	r12,QWORD[rbp]
+	lea	rbx,[r9*1+rdi]
+	mov	rcx,r9
+DB	102,72,15,126,207
+	neg	rax
+DB	102,72,15,126,206
+	sar	rcx,3+2
+	dec	r12
+	xor	r10,r10
+	mov	r13,QWORD[8+rbp]
+	mov	r14,QWORD[16+rbp]
+	mov	r15,QWORD[24+rbp]
+	jmp	NEAR $L$sqr4x_sub_entry
+
+ALIGN	16
+$L$sqr4x_sub:
+	mov	r12,QWORD[rbp]
+	mov	r13,QWORD[8+rbp]
+	mov	r14,QWORD[16+rbp]
+	mov	r15,QWORD[24+rbp]
+$L$sqr4x_sub_entry:
+	lea	rbp,[32+rbp]
+	not	r12
+	not	r13
+	not	r14
+	not	r15
+	and	r12,rax
+	and	r13,rax
+	and	r14,rax
+	and	r15,rax
+
+	neg	r10
+	adc	r12,QWORD[rbx]
+	adc	r13,QWORD[8+rbx]
+	adc	r14,QWORD[16+rbx]
+	adc	r15,QWORD[24+rbx]
+	mov	QWORD[rdi],r12
+	lea	rbx,[32+rbx]
+	mov	QWORD[8+rdi],r13
+	sbb	r10,r10
+	mov	QWORD[16+rdi],r14
+	mov	QWORD[24+rdi],r15
+	lea	rdi,[32+rdi]
+
+	inc	rcx
+	jnz	NEAR $L$sqr4x_sub
+
+	mov	r10,r9
+	neg	r9
+	DB	0F3h,0C3h		;repret
+
+
+global	bn_from_montgomery
+
+ALIGN	32
+bn_from_montgomery:
+
+	test	DWORD[48+rsp],7
+	jz	NEAR bn_from_mont8x
+	xor	eax,eax
+	DB	0F3h,0C3h		;repret
+
+
+
+
+ALIGN	32
+bn_from_mont8x:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_bn_from_mont8x:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+	mov	r9,QWORD[48+rsp]
+
+
+
+DB	0x67
+	mov	rax,rsp
+
+	push	rbx
+
+	push	rbp
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+$L$from_prologue:
+
+	shl	r9d,3
+	lea	r10,[r9*2+r9]
+	neg	r9
+	mov	r8,QWORD[r8]
+
+
+
+
+
+
+
+
+	lea	r11,[((-320))+r9*2+rsp]
+	mov	rbp,rsp
+	sub	r11,rdi
+	and	r11,4095
+	cmp	r10,r11
+	jb	NEAR $L$from_sp_alt
+	sub	rbp,r11
+	lea	rbp,[((-320))+r9*2+rbp]
+	jmp	NEAR $L$from_sp_done
+
+ALIGN	32
+$L$from_sp_alt:
+	lea	r10,[((4096-320))+r9*2]
+	lea	rbp,[((-320))+r9*2+rbp]
+	sub	r11,r10
+	mov	r10,0
+	cmovc	r11,r10
+	sub	rbp,r11
+$L$from_sp_done:
+	and	rbp,-64
+	mov	r11,rsp
+	sub	r11,rbp
+	and	r11,-4096
+	lea	rsp,[rbp*1+r11]
+	mov	r10,QWORD[rsp]
+	cmp	rsp,rbp
+	ja	NEAR $L$from_page_walk
+	jmp	NEAR $L$from_page_walk_done
+
+$L$from_page_walk:
+	lea	rsp,[((-4096))+rsp]
+	mov	r10,QWORD[rsp]
+	cmp	rsp,rbp
+	ja	NEAR $L$from_page_walk
+$L$from_page_walk_done:
+
+	mov	r10,r9
+	neg	r9
+
+
+
+
+
+
+
+
+
+
+	mov	QWORD[32+rsp],r8
+	mov	QWORD[40+rsp],rax
+
+$L$from_body:
+	mov	r11,r9
+	lea	rax,[48+rsp]
+	pxor	xmm0,xmm0
+	jmp	NEAR $L$mul_by_1
+
+ALIGN	32
+$L$mul_by_1:
+	movdqu	xmm1,XMMWORD[rsi]
+	movdqu	xmm2,XMMWORD[16+rsi]
+	movdqu	xmm3,XMMWORD[32+rsi]
+	movdqa	XMMWORD[r9*1+rax],xmm0
+	movdqu	xmm4,XMMWORD[48+rsi]
+	movdqa	XMMWORD[16+r9*1+rax],xmm0
+DB	0x48,0x8d,0xb6,0x40,0x00,0x00,0x00
+	movdqa	XMMWORD[rax],xmm1
+	movdqa	XMMWORD[32+r9*1+rax],xmm0
+	movdqa	XMMWORD[16+rax],xmm2
+	movdqa	XMMWORD[48+r9*1+rax],xmm0
+	movdqa	XMMWORD[32+rax],xmm3
+	movdqa	XMMWORD[48+rax],xmm4
+	lea	rax,[64+rax]
+	sub	r11,64
+	jnz	NEAR $L$mul_by_1
+
+DB	102,72,15,110,207
+DB	102,72,15,110,209
+DB	0x67
+	mov	rbp,rcx
+DB	102,73,15,110,218
+	lea	r11,[OPENSSL_ia32cap_P]
+	mov	r11d,DWORD[8+r11]
+	and	r11d,0x80108
+	cmp	r11d,0x80108
+	jne	NEAR $L$from_mont_nox
+
+	lea	rdi,[r9*1+rax]
+	call	__bn_sqrx8x_reduction
+	call	__bn_postx4x_internal
+
+	pxor	xmm0,xmm0
+	lea	rax,[48+rsp]
+	jmp	NEAR $L$from_mont_zero
+
+ALIGN	32
+$L$from_mont_nox:
+	call	__bn_sqr8x_reduction
+	call	__bn_post4x_internal
+
+	pxor	xmm0,xmm0
+	lea	rax,[48+rsp]
+	jmp	NEAR $L$from_mont_zero
+
+ALIGN	32
+$L$from_mont_zero:
+	mov	rsi,QWORD[40+rsp]
+
+	movdqa	XMMWORD[rax],xmm0
+	movdqa	XMMWORD[16+rax],xmm0
+	movdqa	XMMWORD[32+rax],xmm0
+	movdqa	XMMWORD[48+rax],xmm0
+	lea	rax,[64+rax]
+	sub	r9,32
+	jnz	NEAR $L$from_mont_zero
+
+	mov	rax,1
+	mov	r15,QWORD[((-48))+rsi]
+
+	mov	r14,QWORD[((-40))+rsi]
+
+	mov	r13,QWORD[((-32))+rsi]
+
+	mov	r12,QWORD[((-24))+rsi]
+
+	mov	rbp,QWORD[((-16))+rsi]
+
+	mov	rbx,QWORD[((-8))+rsi]
+
+	lea	rsp,[rsi]
+
+$L$from_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_bn_from_mont8x:
+
+ALIGN	32
+bn_mulx4x_mont_gather5:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_bn_mulx4x_mont_gather5:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+	mov	r9,QWORD[48+rsp]
+
+
+
+	mov	rax,rsp
+
+$L$mulx4x_enter:
+	push	rbx
+
+	push	rbp
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+$L$mulx4x_prologue:
+
+	shl	r9d,3
+	lea	r10,[r9*2+r9]
+	neg	r9
+	mov	r8,QWORD[r8]
+
+
+
+
+
+
+
+
+
+
+	lea	r11,[((-320))+r9*2+rsp]
+	mov	rbp,rsp
+	sub	r11,rdi
+	and	r11,4095
+	cmp	r10,r11
+	jb	NEAR $L$mulx4xsp_alt
+	sub	rbp,r11
+	lea	rbp,[((-320))+r9*2+rbp]
+	jmp	NEAR $L$mulx4xsp_done
+
+$L$mulx4xsp_alt:
+	lea	r10,[((4096-320))+r9*2]
+	lea	rbp,[((-320))+r9*2+rbp]
+	sub	r11,r10
+	mov	r10,0
+	cmovc	r11,r10
+	sub	rbp,r11
+$L$mulx4xsp_done:
+	and	rbp,-64
+	mov	r11,rsp
+	sub	r11,rbp
+	and	r11,-4096
+	lea	rsp,[rbp*1+r11]
+	mov	r10,QWORD[rsp]
+	cmp	rsp,rbp
+	ja	NEAR $L$mulx4x_page_walk
+	jmp	NEAR $L$mulx4x_page_walk_done
+
+$L$mulx4x_page_walk:
+	lea	rsp,[((-4096))+rsp]
+	mov	r10,QWORD[rsp]
+	cmp	rsp,rbp
+	ja	NEAR $L$mulx4x_page_walk
+$L$mulx4x_page_walk_done:
+
+
+
+
+
+
+
+
+
+
+
+
+
+	mov	QWORD[32+rsp],r8
+	mov	QWORD[40+rsp],rax
+
+$L$mulx4x_body:
+	call	mulx4x_internal
+
+	mov	rsi,QWORD[40+rsp]
+
+	mov	rax,1
+
+	mov	r15,QWORD[((-48))+rsi]
+
+	mov	r14,QWORD[((-40))+rsi]
+
+	mov	r13,QWORD[((-32))+rsi]
+
+	mov	r12,QWORD[((-24))+rsi]
+
+	mov	rbp,QWORD[((-16))+rsi]
+
+	mov	rbx,QWORD[((-8))+rsi]
+
+	lea	rsp,[rsi]
+
+$L$mulx4x_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_bn_mulx4x_mont_gather5:
+
+
+ALIGN	32
+mulx4x_internal:
+
+	mov	QWORD[8+rsp],r9
+	mov	r10,r9
+	neg	r9
+	shl	r9,5
+	neg	r10
+	lea	r13,[128+r9*1+rdx]
+	shr	r9,5+5
+	movd	xmm5,DWORD[56+rax]
+	sub	r9,1
+	lea	rax,[$L$inc]
+	mov	QWORD[((16+8))+rsp],r13
+	mov	QWORD[((24+8))+rsp],r9
+	mov	QWORD[((56+8))+rsp],rdi
+	movdqa	xmm0,XMMWORD[rax]
+	movdqa	xmm1,XMMWORD[16+rax]
+	lea	r10,[((88-112))+r10*1+rsp]
+	lea	rdi,[128+rdx]
+
+	pshufd	xmm5,xmm5,0
+	movdqa	xmm4,xmm1
+DB	0x67
+	movdqa	xmm2,xmm1
+DB	0x67
+	paddd	xmm1,xmm0
+	pcmpeqd	xmm0,xmm5
+	movdqa	xmm3,xmm4
+	paddd	xmm2,xmm1
+	pcmpeqd	xmm1,xmm5
+	movdqa	XMMWORD[112+r10],xmm0
+	movdqa	xmm0,xmm4
+
+	paddd	xmm3,xmm2
+	pcmpeqd	xmm2,xmm5
+	movdqa	XMMWORD[128+r10],xmm1
+	movdqa	xmm1,xmm4
+
+	paddd	xmm0,xmm3
+	pcmpeqd	xmm3,xmm5
+	movdqa	XMMWORD[144+r10],xmm2
+	movdqa	xmm2,xmm4
+
+	paddd	xmm1,xmm0
+	pcmpeqd	xmm0,xmm5
+	movdqa	XMMWORD[160+r10],xmm3
+	movdqa	xmm3,xmm4
+	paddd	xmm2,xmm1
+	pcmpeqd	xmm1,xmm5
+	movdqa	XMMWORD[176+r10],xmm0
+	movdqa	xmm0,xmm4
+
+	paddd	xmm3,xmm2
+	pcmpeqd	xmm2,xmm5
+	movdqa	XMMWORD[192+r10],xmm1
+	movdqa	xmm1,xmm4
+
+	paddd	xmm0,xmm3
+	pcmpeqd	xmm3,xmm5
+	movdqa	XMMWORD[208+r10],xmm2
+	movdqa	xmm2,xmm4
+
+	paddd	xmm1,xmm0
+	pcmpeqd	xmm0,xmm5
+	movdqa	XMMWORD[224+r10],xmm3
+	movdqa	xmm3,xmm4
+	paddd	xmm2,xmm1
+	pcmpeqd	xmm1,xmm5
+	movdqa	XMMWORD[240+r10],xmm0
+	movdqa	xmm0,xmm4
+
+	paddd	xmm3,xmm2
+	pcmpeqd	xmm2,xmm5
+	movdqa	XMMWORD[256+r10],xmm1
+	movdqa	xmm1,xmm4
+
+	paddd	xmm0,xmm3
+	pcmpeqd	xmm3,xmm5
+	movdqa	XMMWORD[272+r10],xmm2
+	movdqa	xmm2,xmm4
+
+	paddd	xmm1,xmm0
+	pcmpeqd	xmm0,xmm5
+	movdqa	XMMWORD[288+r10],xmm3
+	movdqa	xmm3,xmm4
+DB	0x67
+	paddd	xmm2,xmm1
+	pcmpeqd	xmm1,xmm5
+	movdqa	XMMWORD[304+r10],xmm0
+
+	paddd	xmm3,xmm2
+	pcmpeqd	xmm2,xmm5
+	movdqa	XMMWORD[320+r10],xmm1
+
+	pcmpeqd	xmm3,xmm5
+	movdqa	XMMWORD[336+r10],xmm2
+
+	pand	xmm0,XMMWORD[64+rdi]
+	pand	xmm1,XMMWORD[80+rdi]
+	pand	xmm2,XMMWORD[96+rdi]
+	movdqa	XMMWORD[352+r10],xmm3
+	pand	xmm3,XMMWORD[112+rdi]
+	por	xmm0,xmm2
+	por	xmm1,xmm3
+	movdqa	xmm4,XMMWORD[((-128))+rdi]
+	movdqa	xmm5,XMMWORD[((-112))+rdi]
+	movdqa	xmm2,XMMWORD[((-96))+rdi]
+	pand	xmm4,XMMWORD[112+r10]
+	movdqa	xmm3,XMMWORD[((-80))+rdi]
+	pand	xmm5,XMMWORD[128+r10]
+	por	xmm0,xmm4
+	pand	xmm2,XMMWORD[144+r10]
+	por	xmm1,xmm5
+	pand	xmm3,XMMWORD[160+r10]
+	por	xmm0,xmm2
+	por	xmm1,xmm3
+	movdqa	xmm4,XMMWORD[((-64))+rdi]
+	movdqa	xmm5,XMMWORD[((-48))+rdi]
+	movdqa	xmm2,XMMWORD[((-32))+rdi]
+	pand	xmm4,XMMWORD[176+r10]
+	movdqa	xmm3,XMMWORD[((-16))+rdi]
+	pand	xmm5,XMMWORD[192+r10]
+	por	xmm0,xmm4
+	pand	xmm2,XMMWORD[208+r10]
+	por	xmm1,xmm5
+	pand	xmm3,XMMWORD[224+r10]
+	por	xmm0,xmm2
+	por	xmm1,xmm3
+	movdqa	xmm4,XMMWORD[rdi]
+	movdqa	xmm5,XMMWORD[16+rdi]
+	movdqa	xmm2,XMMWORD[32+rdi]
+	pand	xmm4,XMMWORD[240+r10]
+	movdqa	xmm3,XMMWORD[48+rdi]
+	pand	xmm5,XMMWORD[256+r10]
+	por	xmm0,xmm4
+	pand	xmm2,XMMWORD[272+r10]
+	por	xmm1,xmm5
+	pand	xmm3,XMMWORD[288+r10]
+	por	xmm0,xmm2
+	por	xmm1,xmm3
+	pxor	xmm0,xmm1
+	pshufd	xmm1,xmm0,0x4e
+	por	xmm0,xmm1
+	lea	rdi,[256+rdi]
+DB	102,72,15,126,194
+	lea	rbx,[((64+32+8))+rsp]
+
+	mov	r9,rdx
+	mulx	rax,r8,QWORD[rsi]
+	mulx	r12,r11,QWORD[8+rsi]
+	add	r11,rax
+	mulx	r13,rax,QWORD[16+rsi]
+	adc	r12,rax
+	adc	r13,0
+	mulx	r14,rax,QWORD[24+rsi]
+
+	mov	r15,r8
+	imul	r8,QWORD[((32+8))+rsp]
+	xor	rbp,rbp
+	mov	rdx,r8
+
+	mov	QWORD[((8+8))+rsp],rdi
+
+	lea	rsi,[32+rsi]
+	adcx	r13,rax
+	adcx	r14,rbp
+
+	mulx	r10,rax,QWORD[rcx]
+	adcx	r15,rax
+	adox	r10,r11
+	mulx	r11,rax,QWORD[8+rcx]
+	adcx	r10,rax
+	adox	r11,r12
+	mulx	r12,rax,QWORD[16+rcx]
+	mov	rdi,QWORD[((24+8))+rsp]
+	mov	QWORD[((-32))+rbx],r10
+	adcx	r11,rax
+	adox	r12,r13
+	mulx	r15,rax,QWORD[24+rcx]
+	mov	rdx,r9
+	mov	QWORD[((-24))+rbx],r11
+	adcx	r12,rax
+	adox	r15,rbp
+	lea	rcx,[32+rcx]
+	mov	QWORD[((-16))+rbx],r12
+	jmp	NEAR $L$mulx4x_1st
+
+ALIGN	32
+$L$mulx4x_1st:
+	adcx	r15,rbp
+	mulx	rax,r10,QWORD[rsi]
+	adcx	r10,r14
+	mulx	r14,r11,QWORD[8+rsi]
+	adcx	r11,rax
+	mulx	rax,r12,QWORD[16+rsi]
+	adcx	r12,r14
+	mulx	r14,r13,QWORD[24+rsi]
+DB	0x67,0x67
+	mov	rdx,r8
+	adcx	r13,rax
+	adcx	r14,rbp
+	lea	rsi,[32+rsi]
+	lea	rbx,[32+rbx]
+
+	adox	r10,r15
+	mulx	r15,rax,QWORD[rcx]
+	adcx	r10,rax
+	adox	r11,r15
+	mulx	r15,rax,QWORD[8+rcx]
+	adcx	r11,rax
+	adox	r12,r15
+	mulx	r15,rax,QWORD[16+rcx]
+	mov	QWORD[((-40))+rbx],r10
+	adcx	r12,rax
+	mov	QWORD[((-32))+rbx],r11
+	adox	r13,r15
+	mulx	r15,rax,QWORD[24+rcx]
+	mov	rdx,r9
+	mov	QWORD[((-24))+rbx],r12
+	adcx	r13,rax
+	adox	r15,rbp
+	lea	rcx,[32+rcx]
+	mov	QWORD[((-16))+rbx],r13
+
+	dec	rdi
+	jnz	NEAR $L$mulx4x_1st
+
+	mov	rax,QWORD[8+rsp]
+	adc	r15,rbp
+	lea	rsi,[rax*1+rsi]
+	add	r14,r15
+	mov	rdi,QWORD[((8+8))+rsp]
+	adc	rbp,rbp
+	mov	QWORD[((-8))+rbx],r14
+	jmp	NEAR $L$mulx4x_outer
+
+ALIGN	32
+$L$mulx4x_outer:
+	lea	r10,[((16-256))+rbx]
+	pxor	xmm4,xmm4
+DB	0x67,0x67
+	pxor	xmm5,xmm5
+	movdqa	xmm0,XMMWORD[((-128))+rdi]
+	movdqa	xmm1,XMMWORD[((-112))+rdi]
+	movdqa	xmm2,XMMWORD[((-96))+rdi]
+	pand	xmm0,XMMWORD[256+r10]
+	movdqa	xmm3,XMMWORD[((-80))+rdi]
+	pand	xmm1,XMMWORD[272+r10]
+	por	xmm4,xmm0
+	pand	xmm2,XMMWORD[288+r10]
+	por	xmm5,xmm1
+	pand	xmm3,XMMWORD[304+r10]
+	por	xmm4,xmm2
+	por	xmm5,xmm3
+	movdqa	xmm0,XMMWORD[((-64))+rdi]
+	movdqa	xmm1,XMMWORD[((-48))+rdi]
+	movdqa	xmm2,XMMWORD[((-32))+rdi]
+	pand	xmm0,XMMWORD[320+r10]
+	movdqa	xmm3,XMMWORD[((-16))+rdi]
+	pand	xmm1,XMMWORD[336+r10]
+	por	xmm4,xmm0
+	pand	xmm2,XMMWORD[352+r10]
+	por	xmm5,xmm1
+	pand	xmm3,XMMWORD[368+r10]
+	por	xmm4,xmm2
+	por	xmm5,xmm3
+	movdqa	xmm0,XMMWORD[rdi]
+	movdqa	xmm1,XMMWORD[16+rdi]
+	movdqa	xmm2,XMMWORD[32+rdi]
+	pand	xmm0,XMMWORD[384+r10]
+	movdqa	xmm3,XMMWORD[48+rdi]
+	pand	xmm1,XMMWORD[400+r10]
+	por	xmm4,xmm0
+	pand	xmm2,XMMWORD[416+r10]
+	por	xmm5,xmm1
+	pand	xmm3,XMMWORD[432+r10]
+	por	xmm4,xmm2
+	por	xmm5,xmm3
+	movdqa	xmm0,XMMWORD[64+rdi]
+	movdqa	xmm1,XMMWORD[80+rdi]
+	movdqa	xmm2,XMMWORD[96+rdi]
+	pand	xmm0,XMMWORD[448+r10]
+	movdqa	xmm3,XMMWORD[112+rdi]
+	pand	xmm1,XMMWORD[464+r10]
+	por	xmm4,xmm0
+	pand	xmm2,XMMWORD[480+r10]
+	por	xmm5,xmm1
+	pand	xmm3,XMMWORD[496+r10]
+	por	xmm4,xmm2
+	por	xmm5,xmm3
+	por	xmm4,xmm5
+	pshufd	xmm0,xmm4,0x4e
+	por	xmm0,xmm4
+	lea	rdi,[256+rdi]
+DB	102,72,15,126,194
+
+	mov	QWORD[rbx],rbp
+	lea	rbx,[32+rax*1+rbx]
+	mulx	r11,r8,QWORD[rsi]
+	xor	rbp,rbp
+	mov	r9,rdx
+	mulx	r12,r14,QWORD[8+rsi]
+	adox	r8,QWORD[((-32))+rbx]
+	adcx	r11,r14
+	mulx	r13,r15,QWORD[16+rsi]
+	adox	r11,QWORD[((-24))+rbx]
+	adcx	r12,r15
+	mulx	r14,rdx,QWORD[24+rsi]
+	adox	r12,QWORD[((-16))+rbx]
+	adcx	r13,rdx
+	lea	rcx,[rax*1+rcx]
+	lea	rsi,[32+rsi]
+	adox	r13,QWORD[((-8))+rbx]
+	adcx	r14,rbp
+	adox	r14,rbp
+
+	mov	r15,r8
+	imul	r8,QWORD[((32+8))+rsp]
+
+	mov	rdx,r8
+	xor	rbp,rbp
+	mov	QWORD[((8+8))+rsp],rdi
+
+	mulx	r10,rax,QWORD[rcx]
+	adcx	r15,rax
+	adox	r10,r11
+	mulx	r11,rax,QWORD[8+rcx]
+	adcx	r10,rax
+	adox	r11,r12
+	mulx	r12,rax,QWORD[16+rcx]
+	adcx	r11,rax
+	adox	r12,r13
+	mulx	r15,rax,QWORD[24+rcx]
+	mov	rdx,r9
+	mov	rdi,QWORD[((24+8))+rsp]
+	mov	QWORD[((-32))+rbx],r10
+	adcx	r12,rax
+	mov	QWORD[((-24))+rbx],r11
+	adox	r15,rbp
+	mov	QWORD[((-16))+rbx],r12
+	lea	rcx,[32+rcx]
+	jmp	NEAR $L$mulx4x_inner
+
+ALIGN	32
+$L$mulx4x_inner:
+	mulx	rax,r10,QWORD[rsi]
+	adcx	r15,rbp
+	adox	r10,r14
+	mulx	r14,r11,QWORD[8+rsi]
+	adcx	r10,QWORD[rbx]
+	adox	r11,rax
+	mulx	rax,r12,QWORD[16+rsi]
+	adcx	r11,QWORD[8+rbx]
+	adox	r12,r14
+	mulx	r14,r13,QWORD[24+rsi]
+	mov	rdx,r8
+	adcx	r12,QWORD[16+rbx]
+	adox	r13,rax
+	adcx	r13,QWORD[24+rbx]
+	adox	r14,rbp
+	lea	rsi,[32+rsi]
+	lea	rbx,[32+rbx]
+	adcx	r14,rbp
+
+	adox	r10,r15
+	mulx	r15,rax,QWORD[rcx]
+	adcx	r10,rax
+	adox	r11,r15
+	mulx	r15,rax,QWORD[8+rcx]
+	adcx	r11,rax
+	adox	r12,r15
+	mulx	r15,rax,QWORD[16+rcx]
+	mov	QWORD[((-40))+rbx],r10
+	adcx	r12,rax
+	adox	r13,r15
+	mov	QWORD[((-32))+rbx],r11
+	mulx	r15,rax,QWORD[24+rcx]
+	mov	rdx,r9
+	lea	rcx,[32+rcx]
+	mov	QWORD[((-24))+rbx],r12
+	adcx	r13,rax
+	adox	r15,rbp
+	mov	QWORD[((-16))+rbx],r13
+
+	dec	rdi
+	jnz	NEAR $L$mulx4x_inner
+
+	mov	rax,QWORD[((0+8))+rsp]
+	adc	r15,rbp
+	sub	rdi,QWORD[rbx]
+	mov	rdi,QWORD[((8+8))+rsp]
+	mov	r10,QWORD[((16+8))+rsp]
+	adc	r14,r15
+	lea	rsi,[rax*1+rsi]
+	adc	rbp,rbp
+	mov	QWORD[((-8))+rbx],r14
+
+	cmp	rdi,r10
+	jb	NEAR $L$mulx4x_outer
+
+	mov	r10,QWORD[((-8))+rcx]
+	mov	r8,rbp
+	mov	r12,QWORD[rax*1+rcx]
+	lea	rbp,[rax*1+rcx]
+	mov	rcx,rax
+	lea	rdi,[rax*1+rbx]
+	xor	eax,eax
+	xor	r15,r15
+	sub	r10,r14
+	adc	r15,r15
+	or	r8,r15
+	sar	rcx,3+2
+	sub	rax,r8
+	mov	rdx,QWORD[((56+8))+rsp]
+	dec	r12
+	mov	r13,QWORD[8+rbp]
+	xor	r8,r8
+	mov	r14,QWORD[16+rbp]
+	mov	r15,QWORD[24+rbp]
+	jmp	NEAR $L$sqrx4x_sub_entry
+
+
+
+ALIGN	32
+bn_powerx5:
+	mov	QWORD[8+rsp],rdi	;WIN64 prologue
+	mov	QWORD[16+rsp],rsi
+	mov	rax,rsp
+$L$SEH_begin_bn_powerx5:
+	mov	rdi,rcx
+	mov	rsi,rdx
+	mov	rdx,r8
+	mov	rcx,r9
+	mov	r8,QWORD[40+rsp]
+	mov	r9,QWORD[48+rsp]
+
+
+
+	mov	rax,rsp
+
+$L$powerx5_enter:
+	push	rbx
+
+	push	rbp
+
+	push	r12
+
+	push	r13
+
+	push	r14
+
+	push	r15
+
+$L$powerx5_prologue:
+
+	shl	r9d,3
+	lea	r10,[r9*2+r9]
+	neg	r9
+	mov	r8,QWORD[r8]
+
+
+
+
+
+
+
+
+	lea	r11,[((-320))+r9*2+rsp]
+	mov	rbp,rsp
+	sub	r11,rdi
+	and	r11,4095
+	cmp	r10,r11
+	jb	NEAR $L$pwrx_sp_alt
+	sub	rbp,r11
+	lea	rbp,[((-320))+r9*2+rbp]
+	jmp	NEAR $L$pwrx_sp_done
+
+ALIGN	32
+$L$pwrx_sp_alt:
+	lea	r10,[((4096-320))+r9*2]
+	lea	rbp,[((-320))+r9*2+rbp]
+	sub	r11,r10
+	mov	r10,0
+	cmovc	r11,r10
+	sub	rbp,r11
+$L$pwrx_sp_done:
+	and	rbp,-64
+	mov	r11,rsp
+	sub	r11,rbp
+	and	r11,-4096
+	lea	rsp,[rbp*1+r11]
+	mov	r10,QWORD[rsp]
+	cmp	rsp,rbp
+	ja	NEAR $L$pwrx_page_walk
+	jmp	NEAR $L$pwrx_page_walk_done
+
+$L$pwrx_page_walk:
+	lea	rsp,[((-4096))+rsp]
+	mov	r10,QWORD[rsp]
+	cmp	rsp,rbp
+	ja	NEAR $L$pwrx_page_walk
+$L$pwrx_page_walk_done:
+
+	mov	r10,r9
+	neg	r9
+
+
+
+
+
+
+
+
+
+
+
+
+	pxor	xmm0,xmm0
+DB	102,72,15,110,207
+DB	102,72,15,110,209
+DB	102,73,15,110,218
+DB	102,72,15,110,226
+	mov	QWORD[32+rsp],r8
+	mov	QWORD[40+rsp],rax
+
+$L$powerx5_body:
+
+	call	__bn_sqrx8x_internal
+	call	__bn_postx4x_internal
+	call	__bn_sqrx8x_internal
+	call	__bn_postx4x_internal
+	call	__bn_sqrx8x_internal
+	call	__bn_postx4x_internal
+	call	__bn_sqrx8x_internal
+	call	__bn_postx4x_internal
+	call	__bn_sqrx8x_internal
+	call	__bn_postx4x_internal
+
+	mov	r9,r10
+	mov	rdi,rsi
+DB	102,72,15,126,209
+DB	102,72,15,126,226
+	mov	rax,QWORD[40+rsp]
+
+	call	mulx4x_internal
+
+	mov	rsi,QWORD[40+rsp]
+
+	mov	rax,1
+
+	mov	r15,QWORD[((-48))+rsi]
+
+	mov	r14,QWORD[((-40))+rsi]
+
+	mov	r13,QWORD[((-32))+rsi]
+
+	mov	r12,QWORD[((-24))+rsi]
+
+	mov	rbp,QWORD[((-16))+rsi]
+
+	mov	rbx,QWORD[((-8))+rsi]
+
+	lea	rsp,[rsi]
+
+$L$powerx5_epilogue:
+	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
+	mov	rsi,QWORD[16+rsp]
+	DB	0F3h,0C3h		;repret
+
+$L$SEH_end_bn_powerx5:
+
+global	bn_sqrx8x_internal
+
+
+ALIGN	32
+bn_sqrx8x_internal:
+__bn_sqrx8x_internal:
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+	lea	rdi,[((48+8))+rsp]
+	lea	rbp,[r9*1+rsi]
+	mov	QWORD[((0+8))+rsp],r9
+	mov	QWORD[((8+8))+rsp],rbp
+	jmp	NEAR $L$sqr8x_zero_start
+
+ALIGN	32
+DB	0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00
+$L$sqrx8x_zero:
+DB	0x3e
+	movdqa	XMMWORD[rdi],xmm0
+	movdqa	XMMWORD[16+rdi],xmm0
+	movdqa	XMMWORD[32+rdi],xmm0
+	movdqa	XMMWORD[48+rdi],xmm0
+$L$sqr8x_zero_start:
+	movdqa	XMMWORD[64+rdi],xmm0
+	movdqa	XMMWORD[80+rdi],xmm0
+	movdqa	XMMWORD[96+rdi],xmm0
+	movdqa	XMMWORD[112+rdi],xmm0
+	lea	rdi,[128+rdi]
+	sub	r9,64
+	jnz	NEAR $L$sqrx8x_zero
+
+	mov	rdx,QWORD[rsi]
+
+	xor	r10,r10
+	xor	r11,r11
+	xor	r12,r12
+	xor	r13,r13
+	xor	r14,r14
+	xor	r15,r15
+	lea	rdi,[((48+8))+rsp]
+	xor	rbp,rbp
+	jmp	NEAR $L$sqrx8x_outer_loop
+
+ALIGN	32
+$L$sqrx8x_outer_loop:
+	mulx	rax,r8,QWORD[8+rsi]
+	adcx	r8,r9
+	adox	r10,rax
+	mulx	rax,r9,QWORD[16+rsi]
+	adcx	r9,r10
+	adox	r11,rax
+DB	0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00
+	adcx	r10,r11
+	adox	r12,rax
+DB	0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00
+	adcx	r11,r12
+	adox	r13,rax
+	mulx	rax,r12,QWORD[40+rsi]
+	adcx	r12,r13
+	adox	r14,rax
+	mulx	rax,r13,QWORD[48+rsi]
+	adcx	r13,r14
+	adox	rax,r15
+	mulx	r15,r14,QWORD[56+rsi]
+	mov	rdx,QWORD[8+rsi]
+	adcx	r14,rax
+	adox	r15,rbp
+	adc	r15,QWORD[64+rdi]
+	mov	QWORD[8+rdi],r8
+	mov	QWORD[16+rdi],r9
+	sbb	rcx,rcx
+	xor	rbp,rbp
+
+
+	mulx	rbx,r8,QWORD[16+rsi]
+	mulx	rax,r9,QWORD[24+rsi]
+	adcx	r8,r10
+	adox	r9,rbx
+	mulx	rbx,r10,QWORD[32+rsi]
+	adcx	r9,r11
+	adox	r10,rax
+DB	0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00
+	adcx	r10,r12
+	adox	r11,rbx
+DB	0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00
+	adcx	r11,r13
+	adox	r12,r14
+DB	0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00
+	mov	rdx,QWORD[16+rsi]
+	adcx	r12,rax
+	adox	r13,rbx
+	adcx	r13,r15
+	adox	r14,rbp
+	adcx	r14,rbp
+
+	mov	QWORD[24+rdi],r8
+	mov	QWORD[32+rdi],r9
+
+	mulx	rbx,r8,QWORD[24+rsi]
+	mulx	rax,r9,QWORD[32+rsi]
+	adcx	r8,r10
+	adox	r9,rbx
+	mulx	rbx,r10,QWORD[40+rsi]
+	adcx	r9,r11
+	adox	r10,rax
+DB	0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00
+	adcx	r10,r12
+	adox	r11,r13
+DB	0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00
+DB	0x3e
+	mov	rdx,QWORD[24+rsi]
+	adcx	r11,rbx
+	adox	r12,rax
+	adcx	r12,r14
+	mov	QWORD[40+rdi],r8
+	mov	QWORD[48+rdi],r9
+	mulx	rax,r8,QWORD[32+rsi]
+	adox	r13,rbp
+	adcx	r13,rbp
+
+	mulx	rbx,r9,QWORD[40+rsi]
+	adcx	r8,r10
+	adox	r9,rax
+	mulx	rax,r10,QWORD[48+rsi]
+	adcx	r9,r11
+	adox	r10,r12
+	mulx	r12,r11,QWORD[56+rsi]
+	mov	rdx,QWORD[32+rsi]
+	mov	r14,QWORD[40+rsi]
+	adcx	r10,rbx
+	adox	r11,rax
+	mov	r15,QWORD[48+rsi]
+	adcx	r11,r13
+	adox	r12,rbp
+	adcx	r12,rbp
+
+	mov	QWORD[56+rdi],r8
+	mov	QWORD[64+rdi],r9
+
+	mulx	rax,r9,r14
+	mov	r8,QWORD[56+rsi]
+	adcx	r9,r10
+	mulx	rbx,r10,r15
+	adox	r10,rax
+	adcx	r10,r11
+	mulx	rax,r11,r8
+	mov	rdx,r14
+	adox	r11,rbx
+	adcx	r11,r12
+
+	adcx	rax,rbp
+
+	mulx	rbx,r14,r15
+	mulx	r13,r12,r8
+	mov	rdx,r15
+	lea	rsi,[64+rsi]
+	adcx	r11,r14
+	adox	r12,rbx
+	adcx	r12,rax
+	adox	r13,rbp
+
+DB	0x67,0x67
+	mulx	r14,r8,r8
+	adcx	r13,r8
+	adcx	r14,rbp
+
+	cmp	rsi,QWORD[((8+8))+rsp]
+	je	NEAR $L$sqrx8x_outer_break
+
+	neg	rcx
+	mov	rcx,-8
+	mov	r15,rbp
+	mov	r8,QWORD[64+rdi]
+	adcx	r9,QWORD[72+rdi]
+	adcx	r10,QWORD[80+rdi]
+	adcx	r11,QWORD[88+rdi]
+	adc	r12,QWORD[96+rdi]
+	adc	r13,QWORD[104+rdi]
+	adc	r14,QWORD[112+rdi]
+	adc	r15,QWORD[120+rdi]
+	lea	rbp,[rsi]
+	lea	rdi,[128+rdi]
+	sbb	rax,rax
+
+	mov	rdx,QWORD[((-64))+rsi]
+	mov	QWORD[((16+8))+rsp],rax
+	mov	QWORD[((24+8))+rsp],rdi
+
+
+	xor	eax,eax
+	jmp	NEAR $L$sqrx8x_loop
+
+ALIGN	32
+$L$sqrx8x_loop:
+	mov	rbx,r8
+	mulx	r8,rax,QWORD[rbp]
+	adcx	rbx,rax
+	adox	r8,r9
+
+	mulx	r9,rax,QWORD[8+rbp]
+	adcx	r8,rax
+	adox	r9,r10
+
+	mulx	r10,rax,QWORD[16+rbp]
+	adcx	r9,rax
+	adox	r10,r11
+
+	mulx	r11,rax,QWORD[24+rbp]
+	adcx	r10,rax
+	adox	r11,r12
+
+DB	0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00
+	adcx	r11,rax
+	adox	r12,r13
+
+	mulx	r13,rax,QWORD[40+rbp]
+	adcx	r12,rax
+	adox	r13,r14
+
+	mulx	r14,rax,QWORD[48+rbp]
+	mov	QWORD[rcx*8+rdi],rbx
+	mov	ebx,0
+	adcx	r13,rax
+	adox	r14,r15
+
+DB	0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00
+	mov	rdx,QWORD[8+rcx*8+rsi]
+	adcx	r14,rax
+	adox	r15,rbx
+	adcx	r15,rbx
+
+DB	0x67
+	inc	rcx
+	jnz	NEAR $L$sqrx8x_loop
+
+	lea	rbp,[64+rbp]
+	mov	rcx,-8
+	cmp	rbp,QWORD[((8+8))+rsp]
+	je	NEAR $L$sqrx8x_break
+
+	sub	rbx,QWORD[((16+8))+rsp]
+DB	0x66
+	mov	rdx,QWORD[((-64))+rsi]
+	adcx	r8,QWORD[rdi]
+	adcx	r9,QWORD[8+rdi]
+	adc	r10,QWORD[16+rdi]
+	adc	r11,QWORD[24+rdi]
+	adc	r12,QWORD[32+rdi]
+	adc	r13,QWORD[40+rdi]
+	adc	r14,QWORD[48+rdi]
+	adc	r15,QWORD[56+rdi]
+	lea	rdi,[64+rdi]
+DB	0x67
+	sbb	rax,rax
+	xor	ebx,ebx
+	mov	QWORD[((16+8))+rsp],rax
+	jmp	NEAR $L$sqrx8x_loop
+
+ALIGN	32
+$L$sqrx8x_break:
+	xor	rbp,rbp
+	sub	rbx,QWORD[((16+8))+rsp]
+	adcx	r8,rbp
+	mov	rcx,QWORD[((24+8))+rsp]
+	adcx	r9,rbp
+	mov	rdx,QWORD[rsi]
+	adc	r10,0
+	mov	QWORD[rdi],r8
+	adc	r11,0
+	adc	r12,0
+	adc	r13,0
+	adc	r14,0
+	adc	r15,0
+	cmp	rdi,rcx
+	je	NEAR $L$sqrx8x_outer_loop
+
+	mov	QWORD[8+rdi],r9
+	mov	r9,QWORD[8+rcx]
+	mov	QWORD[16+rdi],r10
+	mov	r10,QWORD[16+rcx]
+	mov	QWORD[24+rdi],r11
+	mov	r11,QWORD[24+rcx]
+	mov	QWORD[32+rdi],r12
+	mov	r12,QWORD[32+rcx]
+	mov	QWORD[40+rdi],r13
+	mov	r13,QWORD[40+rcx]
+	mov	QWORD[48+rdi],r14
+	mov	r14,QWORD[48+rcx]
+	mov	QWORD[56+rdi],r15
+	mov	r15,QWORD[56+rcx]
+	mov	rdi,rcx
+	jmp	NEAR $L$sqrx8x_outer_loop
+
+ALIGN	32
+$L$sqrx8x_outer_break:
+	mov	QWORD[72+rdi],r9
+DB	102,72,15,126,217
+	mov	QWORD[80+rdi],r10
+	mov	QWORD[88+rdi],r11
+	mov	QWORD[96+rdi],r12
+	mov	QWORD[104+rdi],r13
+	mov	QWORD[112+rdi],r14
+	lea	rdi,[((48+8))+rsp]
+	mov	rdx,QWORD[rcx*1+rsi]
+
+	mov	r11,QWORD[8+rdi]
+	xor	r10,r10
+	mov	r9,QWORD[((0+8))+rsp]
+	adox	r11,r11
+	mov	r12,QWORD[16+rdi]
+	mov	r13,QWORD[24+rdi]
+
+
+ALIGN	32
+$L$sqrx4x_shift_n_add:
+	mulx	rbx,rax,rdx
+	adox	r12,r12
+	adcx	rax,r10
+DB	0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00
+DB	0x4c,0x8b,0x97,0x20,0x00,0x00,0x00
+	adox	r13,r13
+	adcx	rbx,r11
+	mov	r11,QWORD[40+rdi]
+	mov	QWORD[rdi],rax
+	mov	QWORD[8+rdi],rbx
+
+	mulx	rbx,rax,rdx
+	adox	r10,r10
+	adcx	rax,r12
+	mov	rdx,QWORD[16+rcx*1+rsi]
+	mov	r12,QWORD[48+rdi]
+	adox	r11,r11
+	adcx	rbx,r13
+	mov	r13,QWORD[56+rdi]
+	mov	QWORD[16+rdi],rax
+	mov	QWORD[24+rdi],rbx
+
+	mulx	rbx,rax,rdx
+	adox	r12,r12
+	adcx	rax,r10
+	mov	rdx,QWORD[24+rcx*1+rsi]
+	lea	rcx,[32+rcx]
+	mov	r10,QWORD[64+rdi]
+	adox	r13,r13
+	adcx	rbx,r11
+	mov	r11,QWORD[72+rdi]
+	mov	QWORD[32+rdi],rax
+	mov	QWORD[40+rdi],rbx
+
+	mulx	rbx,rax,rdx
+	adox	r10,r10
+	adcx	rax,r12
+	jrcxz	$L$sqrx4x_shift_n_add_break
+DB	0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00
+	adox	r11,r11
+	adcx	rbx,r13
+	mov	r12,QWORD[80+rdi]
+	mov	r13,QWORD[88+rdi]
+	mov	QWORD[48+rdi],rax
+	mov	QWORD[56+rdi],rbx
+	lea	rdi,[64+rdi]
+	nop
+	jmp	NEAR $L$sqrx4x_shift_n_add
+
+ALIGN	32
+$L$sqrx4x_shift_n_add_break:
+	adcx	rbx,r13
+	mov	QWORD[48+rdi],rax
+	mov	QWORD[56+rdi],rbx
+	lea	rdi,[64+rdi]
+DB	102,72,15,126,213
+__bn_sqrx8x_reduction:
+	xor	eax,eax
+	mov	rbx,QWORD[((32+8))+rsp]
+	mov	rdx,QWORD[((48+8))+rsp]
+	lea	rcx,[((-64))+r9*1+rbp]
+
+	mov	QWORD[((0+8))+rsp],rcx
+	mov	QWORD[((8+8))+rsp],rdi
+
+	lea	rdi,[((48+8))+rsp]
+	jmp	NEAR $L$sqrx8x_reduction_loop
+
+ALIGN	32
+$L$sqrx8x_reduction_loop:
+	mov	r9,QWORD[8+rdi]
+	mov	r10,QWORD[16+rdi]
+	mov	r11,QWORD[24+rdi]
+	mov	r12,QWORD[32+rdi]
+	mov	r8,rdx
+	imul	rdx,rbx
+	mov	r13,QWORD[40+rdi]
+	mov	r14,QWORD[48+rdi]
+	mov	r15,QWORD[56+rdi]
+	mov	QWORD[((24+8))+rsp],rax
+
+	lea	rdi,[64+rdi]
+	xor	rsi,rsi
+	mov	rcx,-8
+	jmp	NEAR $L$sqrx8x_reduce
+
+ALIGN	32
+$L$sqrx8x_reduce:
+	mov	rbx,r8
+	mulx	r8,rax,QWORD[rbp]
+	adcx	rax,rbx
+	adox	r8,r9
+
+	mulx	r9,rbx,QWORD[8+rbp]
+	adcx	r8,rbx
+	adox	r9,r10
+
+	mulx	r10,rbx,QWORD[16+rbp]
+	adcx	r9,rbx
+	adox	r10,r11
+
+	mulx	r11,rbx,QWORD[24+rbp]
+	adcx	r10,rbx
+	adox	r11,r12
+
+DB	0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00
+	mov	rax,rdx
+	mov	rdx,r8
+	adcx	r11,rbx
+	adox	r12,r13
+
+	mulx	rdx,rbx,QWORD[((32+8))+rsp]
+	mov	rdx,rax
+	mov	QWORD[((64+48+8))+rcx*8+rsp],rax
+
+	mulx	r13,rax,QWORD[40+rbp]
+	adcx	r12,rax
+	adox	r13,r14
+
+	mulx	r14,rax,QWORD[48+rbp]
+	adcx	r13,rax
+	adox	r14,r15
+
+	mulx	r15,rax,QWORD[56+rbp]
+	mov	rdx,rbx
+	adcx	r14,rax
+	adox	r15,rsi
+	adcx	r15,rsi
+
+DB	0x67,0x67,0x67
+	inc	rcx
+	jnz	NEAR $L$sqrx8x_reduce
+
+	mov	rax,rsi
+	cmp	rbp,QWORD[((0+8))+rsp]
+	jae	NEAR $L$sqrx8x_no_tail
+
+	mov	rdx,QWORD[((48+8))+rsp]
+	add	r8,QWORD[rdi]
+	lea	rbp,[64+rbp]
+	mov	rcx,-8
+	adcx	r9,QWORD[8+rdi]
+	adcx	r10,QWORD[16+rdi]
+	adc	r11,QWORD[24+rdi]
+	adc	r12,QWORD[32+rdi]
+	adc	r13,QWORD[40+rdi]
+	adc	r14,QWORD[48+rdi]
+	adc	r15,QWORD[56+rdi]
+	lea	rdi,[64+rdi]
+	sbb	rax,rax
+
+	xor	rsi,rsi
+	mov	QWORD[((16+8))+rsp],rax
+	jmp	NEAR $L$sqrx8x_tail
+
+ALIGN	32
+$L$sqrx8x_tail:
+	mov	rbx,r8
+	mulx	r8,rax,QWORD[rbp]
+	adcx	rbx,rax
+	adox	r8,r9
+
+	mulx	r9,rax,QWORD[8+rbp]
+	adcx	r8,rax
+	adox	r9,r10
+
+	mulx	r10,rax,QWORD[16+rbp]
+	adcx	r9,rax
+	adox	r10,r11
+
+	mulx	r11,rax,QWORD[24+rbp]
+	adcx	r10,rax
+	adox	r11,r12
+
+DB	0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00
+	adcx	r11,rax
+	adox	r12,r13
+
+	mulx	r13,rax,QWORD[40+rbp]
+	adcx	r12,rax
+	adox	r13,r14
+
+	mulx	r14,rax,QWORD[48+rbp]
+	adcx	r13,rax
+	adox	r14,r15
+
+	mulx	r15,rax,QWORD[56+rbp]
+	mov	rdx,QWORD[((72+48+8))+rcx*8+rsp]
+	adcx	r14,rax
+	adox	r15,rsi
+	mov	QWORD[rcx*8+rdi],rbx
+	mov	rbx,r8
+	adcx	r15,rsi
+
+	inc	rcx
+	jnz	NEAR $L$sqrx8x_tail
+
+	cmp	rbp,QWORD[((0+8))+rsp]
+	jae	NEAR $L$sqrx8x_tail_done
+
+	sub	rsi,QWORD[((16+8))+rsp]
+	mov	rdx,QWORD[((48+8))+rsp]
+	lea	rbp,[64+rbp]
+	adc	r8,QWORD[rdi]
+	adc	r9,QWORD[8+rdi]
+	adc	r10,QWORD[16+rdi]
+	adc	r11,QWORD[24+rdi]
+	adc	r12,QWORD[32+rdi]
+	adc	r13,QWORD[40+rdi]
+	adc	r14,QWORD[48+rdi]
+	adc	r15,QWORD[56+rdi]
+	lea	rdi,[64+rdi]
+	sbb	rax,rax
+	sub	rcx,8
+
+	xor	rsi,rsi
+	mov	QWORD[((16+8))+rsp],rax
+	jmp	NEAR $L$sqrx8x_tail
+
+ALIGN	32
+$L$sqrx8x_tail_done:
+	xor	rax,rax
+	add	r8,QWORD[((24+8))+rsp]
+	adc	r9,0
+	adc	r10,0
+	adc	r11,0
+	adc	r12,0
+	adc	r13,0
+	adc	r14,0
+	adc	r15,0
+	adc	rax,0
+
+	sub	rsi,QWORD[((16+8))+rsp]
+$L$sqrx8x_no_tail:
+	adc	r8,QWORD[rdi]
+DB	102,72,15,126,217
+	adc	r9,QWORD[8+rdi]
+	mov	rsi,QWORD[56+rbp]
+DB	102,72,15,126,213
+	adc	r10,QWORD[16+rdi]
+	adc	r11,QWORD[24+rdi]
+	adc	r12,QWORD[32+rdi]
+	adc	r13,QWORD[40+rdi]
+	adc	r14,QWORD[48+rdi]
+	adc	r15,QWORD[56+rdi]
+	adc	rax,0
+
+	mov	rbx,QWORD[((32+8))+rsp]
+	mov	rdx,QWORD[64+rcx*1+rdi]
+
+	mov	QWORD[rdi],r8
+	lea	r8,[64+rdi]
+	mov	QWORD[8+rdi],r9
+	mov	QWORD[16+rdi],r10
+	mov	QWORD[24+rdi],r11
+	mov	QWORD[32+rdi],r12
+	mov	QWORD[40+rdi],r13
+	mov	QWORD[48+rdi],r14
+	mov	QWORD[56+rdi],r15
+
+	lea	rdi,[64+rcx*1+rdi]
+	cmp	r8,QWORD[((8+8))+rsp]
+	jb	NEAR $L$sqrx8x_reduction_loop
+	DB	0F3h,0C3h		;repret
+
+
+ALIGN	32
+
+__bn_postx4x_internal:
+
+	mov	r12,QWORD[rbp]
+	mov	r10,rcx
+	mov	r9,rcx
+	neg	rax
+	sar	rcx,3+2
+
+DB	102,72,15,126,202
+DB	102,72,15,126,206
+	dec	r12
+	mov	r13,QWORD[8+rbp]
+	xor	r8,r8
+	mov	r14,QWORD[16+rbp]
+	mov	r15,QWORD[24+rbp]
+	jmp	NEAR $L$sqrx4x_sub_entry
+
+ALIGN	16
+$L$sqrx4x_sub:
+	mov	r12,QWORD[rbp]
+	mov	r13,QWORD[8+rbp]
+	mov	r14,QWORD[16+rbp]
+	mov	r15,QWORD[24+rbp]
+$L$sqrx4x_sub_entry:
+	andn	r12,r12,rax
+	lea	rbp,[32+rbp]
+	andn	r13,r13,rax
+	andn	r14,r14,rax
+	andn	r15,r15,rax
+
+	neg	r8
+	adc	r12,QWORD[rdi]
+	adc	r13,QWORD[8+rdi]
+	adc	r14,QWORD[16+rdi]
+	adc	r15,QWORD[24+rdi]
+	mov	QWORD[rdx],r12
+	lea	rdi,[32+rdi]
+	mov	QWORD[8+rdx],r13
+	sbb	r8,r8
+	mov	QWORD[16+rdx],r14
+	mov	QWORD[24+rdx],r15
+	lea	rdx,[32+rdx]
+
+	inc	rcx
+	jnz	NEAR $L$sqrx4x_sub
+
+	neg	r9
+
+	DB	0F3h,0C3h		;repret
+
+
+global	bn_scatter5
+
+ALIGN	16
+bn_scatter5:
+
+	cmp	edx,0
+	jz	NEAR $L$scatter_epilogue
+	lea	r8,[r9*8+r8]
+$L$scatter:
+	mov	rax,QWORD[rcx]
+	lea	rcx,[8+rcx]
+	mov	QWORD[r8],rax
+	lea	r8,[256+r8]
+	sub	edx,1
+	jnz	NEAR $L$scatter
+$L$scatter_epilogue:
+	DB	0F3h,0C3h		;repret
+
+
+
+global	bn_gather5
+
+ALIGN	32
+bn_gather5:
+
+$L$SEH_begin_bn_gather5:
+
+DB	0x4c,0x8d,0x14,0x24
+
+DB	0x48,0x81,0xec,0x08,0x01,0x00,0x00
+	lea	rax,[$L$inc]
+	and	rsp,-16
+
+	movd	xmm5,r9d
+	movdqa	xmm0,XMMWORD[rax]
+	movdqa	xmm1,XMMWORD[16+rax]
+	lea	r11,[128+r8]
+	lea	rax,[128+rsp]
+
+	pshufd	xmm5,xmm5,0
+	movdqa	xmm4,xmm1
+	movdqa	xmm2,xmm1
+	paddd	xmm1,xmm0
+	pcmpeqd	xmm0,xmm5
+	movdqa	xmm3,xmm4
+
+	paddd	xmm2,xmm1
+	pcmpeqd	xmm1,xmm5
+	movdqa	XMMWORD[(-128)+rax],xmm0
+	movdqa	xmm0,xmm4
+
+	paddd	xmm3,xmm2
+	pcmpeqd	xmm2,xmm5
+	movdqa	XMMWORD[(-112)+rax],xmm1
+	movdqa	xmm1,xmm4
+
+	paddd	xmm0,xmm3
+	pcmpeqd	xmm3,xmm5
+	movdqa	XMMWORD[(-96)+rax],xmm2
+	movdqa	xmm2,xmm4
+	paddd	xmm1,xmm0
+	pcmpeqd	xmm0,xmm5
+	movdqa	XMMWORD[(-80)+rax],xmm3
+	movdqa	xmm3,xmm4
+
+	paddd	xmm2,xmm1
+	pcmpeqd	xmm1,xmm5
+	movdqa	XMMWORD[(-64)+rax],xmm0
+	movdqa	xmm0,xmm4
+
+	paddd	xmm3,xmm2
+	pcmpeqd	xmm2,xmm5
+	movdqa	XMMWORD[(-48)+rax],xmm1
+	movdqa	xmm1,xmm4
+
+	paddd	xmm0,xmm3
+	pcmpeqd	xmm3,xmm5
+	movdqa	XMMWORD[(-32)+rax],xmm2
+	movdqa	xmm2,xmm4
+	paddd	xmm1,xmm0
+	pcmpeqd	xmm0,xmm5
+	movdqa	XMMWORD[(-16)+rax],xmm3
+	movdqa	xmm3,xmm4
+
+	paddd	xmm2,xmm1
+	pcmpeqd	xmm1,xmm5
+	movdqa	XMMWORD[rax],xmm0
+	movdqa	xmm0,xmm4
+
+	paddd	xmm3,xmm2
+	pcmpeqd	xmm2,xmm5
+	movdqa	XMMWORD[16+rax],xmm1
+	movdqa	xmm1,xmm4
+
+	paddd	xmm0,xmm3
+	pcmpeqd	xmm3,xmm5
+	movdqa	XMMWORD[32+rax],xmm2
+	movdqa	xmm2,xmm4
+	paddd	xmm1,xmm0
+	pcmpeqd	xmm0,xmm5
+	movdqa	XMMWORD[48+rax],xmm3
+	movdqa	xmm3,xmm4
+
+	paddd	xmm2,xmm1
+	pcmpeqd	xmm1,xmm5
+	movdqa	XMMWORD[64+rax],xmm0
+	movdqa	xmm0,xmm4
+
+	paddd	xmm3,xmm2
+	pcmpeqd	xmm2,xmm5
+	movdqa	XMMWORD[80+rax],xmm1
+	movdqa	xmm1,xmm4
+
+	paddd	xmm0,xmm3
+	pcmpeqd	xmm3,xmm5
+	movdqa	XMMWORD[96+rax],xmm2
+	movdqa	xmm2,xmm4
+	movdqa	XMMWORD[112+rax],xmm3
+	jmp	NEAR $L$gather
+
+ALIGN	32
+$L$gather:
+	pxor	xmm4,xmm4
+	pxor	xmm5,xmm5
+	movdqa	xmm0,XMMWORD[((-128))+r11]
+	movdqa	xmm1,XMMWORD[((-112))+r11]
+	movdqa	xmm2,XMMWORD[((-96))+r11]
+	pand	xmm0,XMMWORD[((-128))+rax]
+	movdqa	xmm3,XMMWORD[((-80))+r11]
+	pand	xmm1,XMMWORD[((-112))+rax]
+	por	xmm4,xmm0
+	pand	xmm2,XMMWORD[((-96))+rax]
+	por	xmm5,xmm1
+	pand	xmm3,XMMWORD[((-80))+rax]
+	por	xmm4,xmm2
+	por	xmm5,xmm3
+	movdqa	xmm0,XMMWORD[((-64))+r11]
+	movdqa	xmm1,XMMWORD[((-48))+r11]
+	movdqa	xmm2,XMMWORD[((-32))+r11]
+	pand	xmm0,XMMWORD[((-64))+rax]
+	movdqa	xmm3,XMMWORD[((-16))+r11]
+	pand	xmm1,XMMWORD[((-48))+rax]
+	por	xmm4,xmm0
+	pand	xmm2,XMMWORD[((-32))+rax]
+	por	xmm5,xmm1
+	pand	xmm3,XMMWORD[((-16))+rax]
+	por	xmm4,xmm2
+	por	xmm5,xmm3
+	movdqa	xmm0,XMMWORD[r11]
+	movdqa	xmm1,XMMWORD[16+r11]
+	movdqa	xmm2,XMMWORD[32+r11]
+	pand	xmm0,XMMWORD[rax]
+	movdqa	xmm3,XMMWORD[48+r11]
+	pand	xmm1,XMMWORD[16+rax]
+	por	xmm4,xmm0
+	pand	xmm2,XMMWORD[32+rax]
+	por	xmm5,xmm1
+	pand	xmm3,XMMWORD[48+rax]
+	por	xmm4,xmm2
+	por	xmm5,xmm3
+	movdqa	xmm0,XMMWORD[64+r11]
+	movdqa	xmm1,XMMWORD[80+r11]
+	movdqa	xmm2,XMMWORD[96+r11]
+	pand	xmm0,XMMWORD[64+rax]
+	movdqa	xmm3,XMMWORD[112+r11]
+	pand	xmm1,XMMWORD[80+rax]
+	por	xmm4,xmm0
+	pand	xmm2,XMMWORD[96+rax]
+	por	xmm5,xmm1
+	pand	xmm3,XMMWORD[112+rax]
+	por	xmm4,xmm2
+	por	xmm5,xmm3
+	por	xmm4,xmm5
+	lea	r11,[256+r11]
+	pshufd	xmm0,xmm4,0x4e
+	por	xmm0,xmm4
+	movq	QWORD[rcx],xmm0
+	lea	rcx,[8+rcx]
+	sub	edx,1
+	jnz	NEAR $L$gather
+
+	lea	rsp,[r10]
+
+	DB	0F3h,0C3h		;repret
+$L$SEH_end_bn_gather5:
+
+
+ALIGN	64
+$L$inc:
+	DD	0,0,1,1
+	DD	2,2,2,2
+DB	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
+DB	112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115
+DB	99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111
+DB	114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79
+DB	71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111
+DB	112,101,110,115,115,108,46,111,114,103,62,0
+EXTERN	__imp_RtlVirtualUnwind
+
+ALIGN	16
+mul_handler:
+	push	rsi
+	push	rdi
+	push	rbx
+	push	rbp
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+	pushfq
+	sub	rsp,64
+
+	mov	rax,QWORD[120+r8]
+	mov	rbx,QWORD[248+r8]
+
+	mov	rsi,QWORD[8+r9]
+	mov	r11,QWORD[56+r9]
+
+	mov	r10d,DWORD[r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jb	NEAR $L$common_seh_tail
+
+	mov	r10d,DWORD[4+r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jb	NEAR $L$common_pop_regs
+
+	mov	rax,QWORD[152+r8]
+
+	mov	r10d,DWORD[8+r11]
+	lea	r10,[r10*1+rsi]
+	cmp	rbx,r10
+	jae	NEAR $L$common_seh_tail
+
+	lea	r10,[$L$mul_epilogue]
+	cmp	rbx,r10
+	ja	NEAR $L$body_40
+
+	mov	r10,QWORD[192+r8]
+	mov	rax,QWORD[8+r10*8+rax]
+
+	jmp	NEAR $L$common_pop_regs
+
+$L$body_40:
+	mov	rax,QWORD[40+rax]
+$L$common_pop_regs:
+	mov	rbx,QWORD[((-8))+rax]
+	mov	rbp,QWORD[((-16))+rax]
+	mov	r12,QWORD[((-24))+rax]
+	mov	r13,QWORD[((-32))+rax]
+	mov	r14,QWORD[((-40))+rax]
+	mov	r15,QWORD[((-48))+rax]
+	mov	QWORD[144+r8],rbx
+	mov	QWORD[160+r8],rbp
+	mov	QWORD[216+r8],r12
+	mov	QWORD[224+r8],r13
+	mov	QWORD[232+r8],r14
+	mov	QWORD[240+r8],r15
+
+$L$common_seh_tail:
+	mov	rdi,QWORD[8+rax]
+	mov	rsi,QWORD[16+rax]
+	mov	QWORD[152+r8],rax
+	mov	QWORD[168+r8],rsi
+	mov	QWORD[176+r8],rdi
+
+	mov	rdi,QWORD[40+r9]
+	mov	rsi,r8
+	mov	ecx,154
+	DD	0xa548f3fc
+
+	mov	rsi,r9
+	xor	rcx,rcx
+	mov	rdx,QWORD[8+rsi]
+	mov	r8,QWORD[rsi]
+	mov	r9,QWORD[16+rsi]
+	mov	r10,QWORD[40+rsi]
+	lea	r11,[56+rsi]
+	lea	r12,[24+rsi]
+	mov	QWORD[32+rsp],r10
+	mov	QWORD[40+rsp],r11
+	mov	QWORD[48+rsp],r12
+	mov	QWORD[56+rsp],rcx
+	call	QWORD[__imp_RtlVirtualUnwind]
+
+	mov	eax,1
+	add	rsp,64
+	popfq
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+	pop	rbp
+	pop	rbx
+	pop	rdi
+	pop	rsi
+	DB	0F3h,0C3h		;repret
+
+
+section	.pdata rdata align=4
+ALIGN	4
+	DD	$L$SEH_begin_bn_mul_mont_gather5 wrt ..imagebase
+	DD	$L$SEH_end_bn_mul_mont_gather5 wrt ..imagebase
+	DD	$L$SEH_info_bn_mul_mont_gather5 wrt ..imagebase
+
+	DD	$L$SEH_begin_bn_mul4x_mont_gather5 wrt ..imagebase
+	DD	$L$SEH_end_bn_mul4x_mont_gather5 wrt ..imagebase
+	DD	$L$SEH_info_bn_mul4x_mont_gather5 wrt ..imagebase
+
+	DD	$L$SEH_begin_bn_power5 wrt ..imagebase
+	DD	$L$SEH_end_bn_power5 wrt ..imagebase
+	DD	$L$SEH_info_bn_power5 wrt ..imagebase
+
+	DD	$L$SEH_begin_bn_from_mont8x wrt ..imagebase
+	DD	$L$SEH_end_bn_from_mont8x wrt ..imagebase
+	DD	$L$SEH_info_bn_from_mont8x wrt ..imagebase
+	DD	$L$SEH_begin_bn_mulx4x_mont_gather5 wrt ..imagebase
+	DD	$L$SEH_end_bn_mulx4x_mont_gather5 wrt ..imagebase
+	DD	$L$SEH_info_bn_mulx4x_mont_gather5 wrt ..imagebase
+
+	DD	$L$SEH_begin_bn_powerx5 wrt ..imagebase
+	DD	$L$SEH_end_bn_powerx5 wrt ..imagebase
+	DD	$L$SEH_info_bn_powerx5 wrt ..imagebase
+	DD	$L$SEH_begin_bn_gather5 wrt ..imagebase
+	DD	$L$SEH_end_bn_gather5 wrt ..imagebase
+	DD	$L$SEH_info_bn_gather5 wrt ..imagebase
+
+section	.xdata rdata align=8
+ALIGN	8
+$L$SEH_info_bn_mul_mont_gather5:
+DB	9,0,0,0
+	DD	mul_handler wrt ..imagebase
+	DD	$L$mul_body wrt ..imagebase,$L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase
+ALIGN	8
+$L$SEH_info_bn_mul4x_mont_gather5:
+DB	9,0,0,0
+	DD	mul_handler wrt ..imagebase
+	DD	$L$mul4x_prologue wrt ..imagebase,$L$mul4x_body wrt ..imagebase,$L$mul4x_epilogue wrt ..imagebase
+ALIGN	8
+$L$SEH_info_bn_power5:
+DB	9,0,0,0
+	DD	mul_handler wrt ..imagebase
+	DD	$L$power5_prologue wrt ..imagebase,$L$power5_body wrt ..imagebase,$L$power5_epilogue wrt ..imagebase
+ALIGN	8
+$L$SEH_info_bn_from_mont8x:
+DB	9,0,0,0
+	DD	mul_handler wrt ..imagebase
+	DD	$L$from_prologue wrt ..imagebase,$L$from_body wrt ..imagebase,$L$from_epilogue wrt ..imagebase
+ALIGN	8
+$L$SEH_info_bn_mulx4x_mont_gather5:
+DB	9,0,0,0
+	DD	mul_handler wrt ..imagebase
+	DD	$L$mulx4x_prologue wrt ..imagebase,$L$mulx4x_body wrt ..imagebase,$L$mulx4x_epilogue wrt ..imagebase
+ALIGN	8
+$L$SEH_info_bn_powerx5:
+DB	9,0,0,0
+	DD	mul_handler wrt ..imagebase
+	DD	$L$powerx5_prologue wrt ..imagebase,$L$powerx5_body wrt ..imagebase,$L$powerx5_epilogue wrt ..imagebase
+ALIGN	8
+$L$SEH_info_bn_gather5:
+DB	0x01,0x0b,0x03,0x0a
+DB	0x0b,0x01,0x21,0x00
+DB	0x04,0xa3,0x00,0x00
+ALIGN	8
diff --git a/deps/boringssl/win-x86_64/crypto/test/trampoline-x86_64.asm b/deps/boringssl/win-x86_64/crypto/test/trampoline-x86_64.asm
new file mode 100644
index 0000000..9900669
--- /dev/null
+++ b/deps/boringssl/win-x86_64/crypto/test/trampoline-x86_64.asm
@@ -0,0 +1,682 @@
+; This file is generated from a similarly-named Perl script in the BoringSSL
+; source tree. Do not edit by hand.
+
+default	rel
+%define XMMWORD
+%define YMMWORD
+%define ZMMWORD
+
+%ifdef BORINGSSL_PREFIX
+%include "boringssl_prefix_symbols_nasm.inc"
+%endif
+section	.text code align=64
+
+
+
+
+
+
+
+
+
+
+global	abi_test_trampoline
+ALIGN	16
+abi_test_trampoline:
+$L$abi_test_trampoline_seh_begin:
+
+
+
+
+
+
+
+
+
+
+	sub	rsp,344
+
+$L$abi_test_trampoline_seh_prolog_alloc:
+	mov	QWORD[112+rsp],rbx
+
+$L$abi_test_trampoline_seh_prolog_rbx:
+	mov	QWORD[120+rsp],rbp
+
+$L$abi_test_trampoline_seh_prolog_rbp:
+	mov	QWORD[128+rsp],rdi
+
+$L$abi_test_trampoline_seh_prolog_rdi:
+	mov	QWORD[136+rsp],rsi
+
+$L$abi_test_trampoline_seh_prolog_rsi:
+	mov	QWORD[144+rsp],r12
+
+$L$abi_test_trampoline_seh_prolog_r12:
+	mov	QWORD[152+rsp],r13
+
+$L$abi_test_trampoline_seh_prolog_r13:
+	mov	QWORD[160+rsp],r14
+
+$L$abi_test_trampoline_seh_prolog_r14:
+	mov	QWORD[168+rsp],r15
+
+$L$abi_test_trampoline_seh_prolog_r15:
+	movdqa	XMMWORD[176+rsp],xmm6
+
+$L$abi_test_trampoline_seh_prolog_xmm6:
+	movdqa	XMMWORD[192+rsp],xmm7
+
+$L$abi_test_trampoline_seh_prolog_xmm7:
+	movdqa	XMMWORD[208+rsp],xmm8
+
+$L$abi_test_trampoline_seh_prolog_xmm8:
+	movdqa	XMMWORD[224+rsp],xmm9
+
+$L$abi_test_trampoline_seh_prolog_xmm9:
+	movdqa	XMMWORD[240+rsp],xmm10
+
+$L$abi_test_trampoline_seh_prolog_xmm10:
+	movdqa	XMMWORD[256+rsp],xmm11
+
+$L$abi_test_trampoline_seh_prolog_xmm11:
+	movdqa	XMMWORD[272+rsp],xmm12
+
+$L$abi_test_trampoline_seh_prolog_xmm12:
+	movdqa	XMMWORD[288+rsp],xmm13
+
+$L$abi_test_trampoline_seh_prolog_xmm13:
+	movdqa	XMMWORD[304+rsp],xmm14
+
+$L$abi_test_trampoline_seh_prolog_xmm14:
+	movdqa	XMMWORD[320+rsp],xmm15
+
+$L$abi_test_trampoline_seh_prolog_xmm15:
+$L$abi_test_trampoline_seh_prolog_end:
+	mov	rbx,QWORD[rdx]
+	mov	rbp,QWORD[8+rdx]
+	mov	rdi,QWORD[16+rdx]
+	mov	rsi,QWORD[24+rdx]
+	mov	r12,QWORD[32+rdx]
+	mov	r13,QWORD[40+rdx]
+	mov	r14,QWORD[48+rdx]
+	mov	r15,QWORD[56+rdx]
+	movdqa	xmm6,XMMWORD[64+rdx]
+	movdqa	xmm7,XMMWORD[80+rdx]
+	movdqa	xmm8,XMMWORD[96+rdx]
+	movdqa	xmm9,XMMWORD[112+rdx]
+	movdqa	xmm10,XMMWORD[128+rdx]
+	movdqa	xmm11,XMMWORD[144+rdx]
+	movdqa	xmm12,XMMWORD[160+rdx]
+	movdqa	xmm13,XMMWORD[176+rdx]
+	movdqa	xmm14,XMMWORD[192+rdx]
+	movdqa	xmm15,XMMWORD[208+rdx]
+
+	mov	QWORD[88+rsp],rcx
+	mov	QWORD[96+rsp],rdx
+
+
+
+
+	mov	r10,r8
+	mov	r11,r9
+	dec	r11
+	js	NEAR $L$args_done
+	mov	rcx,QWORD[r10]
+	add	r10,8
+	dec	r11
+	js	NEAR $L$args_done
+	mov	rdx,QWORD[r10]
+	add	r10,8
+	dec	r11
+	js	NEAR $L$args_done
+	mov	r8,QWORD[r10]
+	add	r10,8
+	dec	r11
+	js	NEAR $L$args_done
+	mov	r9,QWORD[r10]
+	add	r10,8
+	lea	rax,[32+rsp]
+$L$args_loop:
+	dec	r11
+	js	NEAR $L$args_done
+
+
+
+
+
+
+	mov	QWORD[104+rsp],r11
+	mov	r11,QWORD[r10]
+	mov	QWORD[rax],r11
+	mov	r11,QWORD[104+rsp]
+
+	add	r10,8
+	add	rax,8
+	jmp	NEAR $L$args_loop
+
+$L$args_done:
+	mov	rax,QWORD[88+rsp]
+	mov	r10,QWORD[384+rsp]
+	test	r10,r10
+	jz	NEAR $L$no_unwind
+
+
+	pushfq
+	or	QWORD[rsp],0x100
+	popfq
+
+
+
+	nop
+global	abi_test_unwind_start
+abi_test_unwind_start:
+
+	call	rax
+global	abi_test_unwind_return
+abi_test_unwind_return:
+
+
+
+
+	pushfq
+	and	QWORD[rsp],-0x101
+	popfq
+global	abi_test_unwind_stop
+abi_test_unwind_stop:
+
+	jmp	NEAR $L$call_done
+
+$L$no_unwind:
+	call	rax
+
+$L$call_done:
+
+	mov	rdx,QWORD[96+rsp]
+	mov	QWORD[rdx],rbx
+	mov	QWORD[8+rdx],rbp
+	mov	QWORD[16+rdx],rdi
+	mov	QWORD[24+rdx],rsi
+	mov	QWORD[32+rdx],r12
+	mov	QWORD[40+rdx],r13
+	mov	QWORD[48+rdx],r14
+	mov	QWORD[56+rdx],r15
+	movdqa	XMMWORD[64+rdx],xmm6
+	movdqa	XMMWORD[80+rdx],xmm7
+	movdqa	XMMWORD[96+rdx],xmm8
+	movdqa	XMMWORD[112+rdx],xmm9
+	movdqa	XMMWORD[128+rdx],xmm10
+	movdqa	XMMWORD[144+rdx],xmm11
+	movdqa	XMMWORD[160+rdx],xmm12
+	movdqa	XMMWORD[176+rdx],xmm13
+	movdqa	XMMWORD[192+rdx],xmm14
+	movdqa	XMMWORD[208+rdx],xmm15
+	mov	rbx,QWORD[112+rsp]
+
+	mov	rbp,QWORD[120+rsp]
+
+	mov	rdi,QWORD[128+rsp]
+
+	mov	rsi,QWORD[136+rsp]
+
+	mov	r12,QWORD[144+rsp]
+
+	mov	r13,QWORD[152+rsp]
+
+	mov	r14,QWORD[160+rsp]
+
+	mov	r15,QWORD[168+rsp]
+
+	movdqa	xmm6,XMMWORD[176+rsp]
+
+	movdqa	xmm7,XMMWORD[192+rsp]
+
+	movdqa	xmm8,XMMWORD[208+rsp]
+
+	movdqa	xmm9,XMMWORD[224+rsp]
+
+	movdqa	xmm10,XMMWORD[240+rsp]
+
+	movdqa	xmm11,XMMWORD[256+rsp]
+
+	movdqa	xmm12,XMMWORD[272+rsp]
+
+	movdqa	xmm13,XMMWORD[288+rsp]
+
+	movdqa	xmm14,XMMWORD[304+rsp]
+
+	movdqa	xmm15,XMMWORD[320+rsp]
+
+	add	rsp,344
+
+
+
+	DB	0F3h,0C3h		;repret
+
+$L$abi_test_trampoline_seh_end:
+
+
+global	abi_test_clobber_rax
+ALIGN	16
+abi_test_clobber_rax:
+	xor	rax,rax
+	DB	0F3h,0C3h		;repret
+
+
+global	abi_test_clobber_rbx
+ALIGN	16
+abi_test_clobber_rbx:
+	xor	rbx,rbx
+	DB	0F3h,0C3h		;repret
+
+
+global	abi_test_clobber_rcx
+ALIGN	16
+abi_test_clobber_rcx:
+	xor	rcx,rcx
+	DB	0F3h,0C3h		;repret
+
+
+global	abi_test_clobber_rdx
+ALIGN	16
+abi_test_clobber_rdx:
+	xor	rdx,rdx
+	DB	0F3h,0C3h		;repret
+
+
+global	abi_test_clobber_rdi
+ALIGN	16
+abi_test_clobber_rdi:
+	xor	rdi,rdi
+	DB	0F3h,0C3h		;repret
+
+
+global	abi_test_clobber_rsi
+ALIGN	16
+abi_test_clobber_rsi:
+	xor	rsi,rsi
+	DB	0F3h,0C3h		;repret
+
+
+global	abi_test_clobber_rbp
+ALIGN	16
+abi_test_clobber_rbp:
+	xor	rbp,rbp
+	DB	0F3h,0C3h		;repret
+
+
+global	abi_test_clobber_r8
+ALIGN	16
+abi_test_clobber_r8:
+	xor	r8,r8
+	DB	0F3h,0C3h		;repret
+
+
+global	abi_test_clobber_r9
+ALIGN	16
+abi_test_clobber_r9:
+	xor	r9,r9
+	DB	0F3h,0C3h		;repret
+
+
+global	abi_test_clobber_r10
+ALIGN	16
+abi_test_clobber_r10:
+	xor	r10,r10
+	DB	0F3h,0C3h		;repret
+
+
+global	abi_test_clobber_r11
+ALIGN	16
+abi_test_clobber_r11:
+	xor	r11,r11
+	DB	0F3h,0C3h		;repret
+
+
+global	abi_test_clobber_r12
+ALIGN	16
+abi_test_clobber_r12:
+	xor	r12,r12
+	DB	0F3h,0C3h		;repret
+
+
+global	abi_test_clobber_r13
+ALIGN	16
+abi_test_clobber_r13:
+	xor	r13,r13
+	DB	0F3h,0C3h		;repret
+
+
+global	abi_test_clobber_r14
+ALIGN	16
+abi_test_clobber_r14:
+	xor	r14,r14
+	DB	0F3h,0C3h		;repret
+
+
+global	abi_test_clobber_r15
+ALIGN	16
+abi_test_clobber_r15:
+	xor	r15,r15
+	DB	0F3h,0C3h		;repret
+
+
+global	abi_test_clobber_xmm0
+ALIGN	16
+abi_test_clobber_xmm0:
+	pxor	xmm0,xmm0
+	DB	0F3h,0C3h		;repret
+
+
+global	abi_test_clobber_xmm1
+ALIGN	16
+abi_test_clobber_xmm1:
+	pxor	xmm1,xmm1
+	DB	0F3h,0C3h		;repret
+
+
+global	abi_test_clobber_xmm2
+ALIGN	16
+abi_test_clobber_xmm2:
+	pxor	xmm2,xmm2
+	DB	0F3h,0C3h		;repret
+
+
+global	abi_test_clobber_xmm3
+ALIGN	16
+abi_test_clobber_xmm3:
+	pxor	xmm3,xmm3
+	DB	0F3h,0C3h		;repret
+
+
+global	abi_test_clobber_xmm4
+ALIGN	16
+abi_test_clobber_xmm4:
+	pxor	xmm4,xmm4
+	DB	0F3h,0C3h		;repret
+
+
+global	abi_test_clobber_xmm5
+ALIGN	16
+abi_test_clobber_xmm5:
+	pxor	xmm5,xmm5
+	DB	0F3h,0C3h		;repret
+
+
+global	abi_test_clobber_xmm6
+ALIGN	16
+abi_test_clobber_xmm6:
+	pxor	xmm6,xmm6
+	DB	0F3h,0C3h		;repret
+
+
+global	abi_test_clobber_xmm7
+ALIGN	16
+abi_test_clobber_xmm7:
+	pxor	xmm7,xmm7
+	DB	0F3h,0C3h		;repret
+
+
+global	abi_test_clobber_xmm8
+ALIGN	16
+abi_test_clobber_xmm8:
+	pxor	xmm8,xmm8
+	DB	0F3h,0C3h		;repret
+
+
+global	abi_test_clobber_xmm9
+ALIGN	16
+abi_test_clobber_xmm9:
+	pxor	xmm9,xmm9
+	DB	0F3h,0C3h		;repret
+
+
+global	abi_test_clobber_xmm10
+ALIGN	16
+abi_test_clobber_xmm10:
+	pxor	xmm10,xmm10
+	DB	0F3h,0C3h		;repret
+
+
+global	abi_test_clobber_xmm11
+ALIGN	16
+abi_test_clobber_xmm11:
+	pxor	xmm11,xmm11
+	DB	0F3h,0C3h		;repret
+
+
+global	abi_test_clobber_xmm12
+ALIGN	16
+abi_test_clobber_xmm12:
+	pxor	xmm12,xmm12
+	DB	0F3h,0C3h		;repret
+
+
+global	abi_test_clobber_xmm13
+ALIGN	16
+abi_test_clobber_xmm13:
+	pxor	xmm13,xmm13
+	DB	0F3h,0C3h		;repret
+
+
+global	abi_test_clobber_xmm14
+ALIGN	16
+abi_test_clobber_xmm14:
+	pxor	xmm14,xmm14
+	DB	0F3h,0C3h		;repret
+
+
+global	abi_test_clobber_xmm15
+ALIGN	16
+abi_test_clobber_xmm15:
+	pxor	xmm15,xmm15
+	DB	0F3h,0C3h		;repret
+
+
+
+
+
+global	abi_test_bad_unwind_wrong_register
+ALIGN	16
+abi_test_bad_unwind_wrong_register:
+
+$L$abi_test_bad_unwind_wrong_register_seh_begin:
+	push	r12
+
+$L$abi_test_bad_unwind_wrong_register_seh_push_r13:
+
+
+
+	nop
+	pop	r12
+
+	DB	0F3h,0C3h		;repret
+$L$abi_test_bad_unwind_wrong_register_seh_end:
+
+
+
+
+
+
+
+global	abi_test_bad_unwind_temporary
+ALIGN	16
+abi_test_bad_unwind_temporary:
+
+$L$abi_test_bad_unwind_temporary_seh_begin:
+	push	r12
+
+$L$abi_test_bad_unwind_temporary_seh_push_r12:
+
+	mov	rax,r12
+	inc	rax
+	mov	QWORD[rsp],rax
+
+
+
+	mov	QWORD[rsp],r12
+
+
+	pop	r12
+
+	DB	0F3h,0C3h		;repret
+$L$abi_test_bad_unwind_temporary_seh_end:
+
+
+
+
+
+
+
+global	abi_test_get_and_clear_direction_flag
+abi_test_get_and_clear_direction_flag:
+	pushfq
+	pop	rax
+	and	rax,0x400
+	shr	rax,10
+	cld
+	DB	0F3h,0C3h		;repret
+
+
+
+
+
+global	abi_test_set_direction_flag
+abi_test_set_direction_flag:
+	std
+	DB	0F3h,0C3h		;repret
+
+
+
+
+
+
+global	abi_test_bad_unwind_epilog
+ALIGN	16
+abi_test_bad_unwind_epilog:
+$L$abi_test_bad_unwind_epilog_seh_begin:
+	push	r12
+$L$abi_test_bad_unwind_epilog_seh_push_r12:
+
+	nop
+
+
+	pop	r12
+	nop
+	DB	0F3h,0C3h		;repret
+$L$abi_test_bad_unwind_epilog_seh_end:
+
+section	.pdata rdata align=4
+ALIGN	4
+
+	DD	$L$abi_test_trampoline_seh_begin wrt ..imagebase
+	DD	$L$abi_test_trampoline_seh_end wrt ..imagebase
+	DD	$L$abi_test_trampoline_seh_info wrt ..imagebase
+
+	DD	$L$abi_test_bad_unwind_wrong_register_seh_begin wrt ..imagebase
+	DD	$L$abi_test_bad_unwind_wrong_register_seh_end wrt ..imagebase
+	DD	$L$abi_test_bad_unwind_wrong_register_seh_info wrt ..imagebase
+
+	DD	$L$abi_test_bad_unwind_temporary_seh_begin wrt ..imagebase
+	DD	$L$abi_test_bad_unwind_temporary_seh_end wrt ..imagebase
+	DD	$L$abi_test_bad_unwind_temporary_seh_info wrt ..imagebase
+
+	DD	$L$abi_test_bad_unwind_epilog_seh_begin wrt ..imagebase
+	DD	$L$abi_test_bad_unwind_epilog_seh_end wrt ..imagebase
+	DD	$L$abi_test_bad_unwind_epilog_seh_info wrt ..imagebase
+
+section	.xdata rdata align=8
+ALIGN	8
+$L$abi_test_trampoline_seh_info:
+
+DB	1
+DB	$L$abi_test_trampoline_seh_prolog_end-$L$abi_test_trampoline_seh_begin
+DB	38
+DB	0
+DB	$L$abi_test_trampoline_seh_prolog_xmm15-$L$abi_test_trampoline_seh_begin
+DB	248
+	DW	20
+DB	$L$abi_test_trampoline_seh_prolog_xmm14-$L$abi_test_trampoline_seh_begin
+DB	232
+	DW	19
+DB	$L$abi_test_trampoline_seh_prolog_xmm13-$L$abi_test_trampoline_seh_begin
+DB	216
+	DW	18
+DB	$L$abi_test_trampoline_seh_prolog_xmm12-$L$abi_test_trampoline_seh_begin
+DB	200
+	DW	17
+DB	$L$abi_test_trampoline_seh_prolog_xmm11-$L$abi_test_trampoline_seh_begin
+DB	184
+	DW	16
+DB	$L$abi_test_trampoline_seh_prolog_xmm10-$L$abi_test_trampoline_seh_begin
+DB	168
+	DW	15
+DB	$L$abi_test_trampoline_seh_prolog_xmm9-$L$abi_test_trampoline_seh_begin
+DB	152
+	DW	14
+DB	$L$abi_test_trampoline_seh_prolog_xmm8-$L$abi_test_trampoline_seh_begin
+DB	136
+	DW	13
+DB	$L$abi_test_trampoline_seh_prolog_xmm7-$L$abi_test_trampoline_seh_begin
+DB	120
+	DW	12
+DB	$L$abi_test_trampoline_seh_prolog_xmm6-$L$abi_test_trampoline_seh_begin
+DB	104
+	DW	11
+DB	$L$abi_test_trampoline_seh_prolog_r15-$L$abi_test_trampoline_seh_begin
+DB	244
+	DW	21
+DB	$L$abi_test_trampoline_seh_prolog_r14-$L$abi_test_trampoline_seh_begin
+DB	228
+	DW	20
+DB	$L$abi_test_trampoline_seh_prolog_r13-$L$abi_test_trampoline_seh_begin
+DB	212
+	DW	19
+DB	$L$abi_test_trampoline_seh_prolog_r12-$L$abi_test_trampoline_seh_begin
+DB	196
+	DW	18
+DB	$L$abi_test_trampoline_seh_prolog_rsi-$L$abi_test_trampoline_seh_begin
+DB	100
+	DW	17
+DB	$L$abi_test_trampoline_seh_prolog_rdi-$L$abi_test_trampoline_seh_begin
+DB	116
+	DW	16
+DB	$L$abi_test_trampoline_seh_prolog_rbp-$L$abi_test_trampoline_seh_begin
+DB	84
+	DW	15
+DB	$L$abi_test_trampoline_seh_prolog_rbx-$L$abi_test_trampoline_seh_begin
+DB	52
+	DW	14
+DB	$L$abi_test_trampoline_seh_prolog_alloc-$L$abi_test_trampoline_seh_begin
+DB	1
+	DW	43
+
+
+ALIGN	8
+$L$abi_test_bad_unwind_wrong_register_seh_info:
+DB	1
+DB	$L$abi_test_bad_unwind_wrong_register_seh_push_r13-$L$abi_test_bad_unwind_wrong_register_seh_begin
+DB	1
+DB	0
+
+DB	$L$abi_test_bad_unwind_wrong_register_seh_push_r13-$L$abi_test_bad_unwind_wrong_register_seh_begin
+DB	208
+
+ALIGN	8
+$L$abi_test_bad_unwind_temporary_seh_info:
+DB	1
+DB	$L$abi_test_bad_unwind_temporary_seh_push_r12-$L$abi_test_bad_unwind_temporary_seh_begin
+DB	1
+DB	0
+
+DB	$L$abi_test_bad_unwind_temporary_seh_push_r12-$L$abi_test_bad_unwind_temporary_seh_begin
+DB	192
+
+ALIGN	8
+$L$abi_test_bad_unwind_epilog_seh_info:
+DB	1
+DB	$L$abi_test_bad_unwind_epilog_seh_push_r12-$L$abi_test_bad_unwind_epilog_seh_begin
+DB	1
+DB	0
+
+DB	$L$abi_test_bad_unwind_epilog_seh_push_r12-$L$abi_test_bad_unwind_epilog_seh_begin
+DB	192
diff --git a/examples/Makefile b/examples/Makefile
index bb75e7d..5660bbd 100644
--- a/examples/Makefile
+++ b/examples/Makefile
@@ -34,7 +34,7 @@
 	$(CC) $(CFLAGS) $(LDFLAGS) $< -o $@ $(INCS) $(LIBS)
 
 $(LIB_DIR)/libquiche.a: $(shell find $(SOURCE_DIR) -type f -name '*.rs')
-	cd .. && cargo build --target-dir $(BUILD_DIR)
+	cd .. && cargo build --target-dir $(BUILD_DIR) --features ffi
 
 clean:
-	@$(RM) -rf client server http3-client http3-server build/
+	@$(RM) -rf client server http3-client http3-server build/ *.dSYM/
diff --git a/examples/client.c b/examples/client.c
index bca9781..0df9665 100644
--- a/examples/client.c
+++ b/examples/client.c
@@ -61,8 +61,11 @@
 static void flush_egress(struct ev_loop *loop, struct conn_io *conn_io) {
     static uint8_t out[MAX_DATAGRAM_SIZE];
 
+    quiche_send_info send_info;
+
     while (1) {
-        ssize_t written = quiche_conn_send(conn_io->conn, out, sizeof(out));
+        ssize_t written = quiche_conn_send(conn_io->conn, out, sizeof(out),
+                                           &send_info);
 
         if (written == QUICHE_ERR_DONE) {
             fprintf(stderr, "done writing\n");
@@ -74,7 +77,10 @@
             return;
         }
 
-        ssize_t sent = send(conn_io->sock, out, written, 0);
+        ssize_t sent = sendto(conn_io->sock, out, written, 0,
+                              (struct sockaddr *) &send_info.to,
+                              send_info.to_len);
+
         if (sent != written) {
             perror("failed to send");
             return;
@@ -96,7 +102,13 @@
     static uint8_t buf[65535];
 
     while (1) {
-        ssize_t read = recv(conn_io->sock, buf, sizeof(buf), 0);
+        struct sockaddr_storage peer_addr;
+        socklen_t peer_addr_len = sizeof(peer_addr);
+        memset(&peer_addr, 0, peer_addr_len);
+
+        ssize_t read = recvfrom(conn_io->sock, buf, sizeof(buf), 0,
+                                (struct sockaddr *) &peer_addr,
+                                &peer_addr_len);
 
         if (read < 0) {
             if ((errno == EWOULDBLOCK) || (errno == EAGAIN)) {
@@ -108,7 +120,13 @@
             return;
         }
 
-        ssize_t done = quiche_conn_recv(conn_io->conn, buf, read);
+        quiche_recv_info recv_info = {
+            (struct sockaddr *) &peer_addr,
+
+            peer_addr_len,
+        };
+
+        ssize_t done = quiche_conn_recv(conn_io->conn, buf, read, &recv_info);
 
         if (done < 0) {
             fprintf(stderr, "failed to process packet\n");
@@ -228,11 +246,6 @@
         return -1;
     }
 
-    if (connect(sock, peer->ai_addr, peer->ai_addrlen) < 0) {
-        perror("failed to connect socket");
-        return -1;
-    }
-
     quiche_config *config = quiche_config_new(0xbabababa);
     if (config == NULL) {
         fprintf(stderr, "failed to create config\n");
@@ -240,10 +253,11 @@
     }
 
     quiche_config_set_application_protos(config,
-        (uint8_t *) "\x05hq-29\x05hq-28\x05hq-27\x08http/0.9", 27);
+        (uint8_t *) "\x0ahq-interop\x05hq-29\x05hq-28\x05hq-27\x08http/0.9", 38);
 
     quiche_config_set_max_idle_timeout(config, 5000);
-    quiche_config_set_max_udp_payload_size(config, MAX_DATAGRAM_SIZE);
+    quiche_config_set_max_recv_udp_payload_size(config, MAX_DATAGRAM_SIZE);
+    quiche_config_set_max_send_udp_payload_size(config, MAX_DATAGRAM_SIZE);
     quiche_config_set_initial_max_data(config, 10000000);
     quiche_config_set_initial_max_stream_data_bidi_local(config, 1000000);
     quiche_config_set_initial_max_stream_data_uni(config, 1000000);
@@ -268,8 +282,9 @@
         return -1;
     }
 
-    quiche_conn *conn = quiche_connect(host, (const uint8_t *) scid,
-                                       sizeof(scid), config);
+    quiche_conn *conn = quiche_connect(host, (const uint8_t*) scid, sizeof(scid),
+                                       peer->ai_addr, peer->ai_addrlen, config);
+
     if (conn == NULL) {
         fprintf(stderr, "failed to create connection\n");
         return -1;
diff --git a/examples/client.rs b/examples/client.rs
index 2e427f6..88490aa 100644
--- a/examples/client.rs
+++ b/examples/client.rs
@@ -69,7 +69,6 @@
     // Create the UDP socket backing the QUIC connection, and register it with
     // the event loop.
     let socket = std::net::UdpSocket::bind(bind_addr).unwrap();
-    socket.connect(peer_addr).unwrap();
 
     let socket = mio::net::UdpSocket::from_socket(socket).unwrap();
     poll.register(
@@ -87,11 +86,14 @@
     config.verify_peer(false);
 
     config
-        .set_application_protos(b"\x05hq-29\x05hq-28\x05hq-27\x08http/0.9")
+        .set_application_protos(
+            b"\x0ahq-interop\x05hq-29\x05hq-28\x05hq-27\x08http/0.9",
+        )
         .unwrap();
 
     config.set_max_idle_timeout(5000);
-    config.set_max_udp_payload_size(MAX_DATAGRAM_SIZE as u64);
+    config.set_max_recv_udp_payload_size(MAX_DATAGRAM_SIZE);
+    config.set_max_send_udp_payload_size(MAX_DATAGRAM_SIZE);
     config.set_initial_max_data(10_000_000);
     config.set_initial_max_stream_data_bidi_local(1_000_000);
     config.set_initial_max_stream_data_bidi_remote(1_000_000);
@@ -103,8 +105,11 @@
     let mut scid = [0; quiche::MAX_CONN_ID_LEN];
     SystemRandom::new().fill(&mut scid[..]).unwrap();
 
+    let scid = quiche::ConnectionId::from_ref(&scid);
+
     // Create a QUIC connection and initiate handshake.
-    let mut conn = quiche::connect(url.domain(), &scid, &mut config).unwrap();
+    let mut conn =
+        quiche::connect(url.domain(), &scid, peer_addr, &mut config).unwrap();
 
     info!(
         "connecting to {:} from {:} with scid {}",
@@ -113,9 +118,9 @@
         hex_dump(&scid)
     );
 
-    let write = conn.send(&mut out).expect("initial send failed");
+    let (write, send_info) = conn.send(&mut out).expect("initial send failed");
 
-    while let Err(e) = socket.send(&out[..write]) {
+    while let Err(e) = socket.send_to(&out[..write], &send_info.to) {
         if e.kind() == std::io::ErrorKind::WouldBlock {
             debug!("send() would block");
             continue;
@@ -146,7 +151,7 @@
                 break 'read;
             }
 
-            let len = match socket.recv(&mut buf) {
+            let (len, from) = match socket.recv_from(&mut buf) {
                 Ok(v) => v,
 
                 Err(e) => {
@@ -163,8 +168,10 @@
 
             debug!("got {} bytes", len);
 
+            let recv_info = quiche::RecvInfo { from };
+
             // Process potentially coalesced packets.
-            let read = match conn.recv(&mut buf[..len]) {
+            let read = match conn.recv(&mut buf[..len], recv_info) {
                 Ok(v) => v,
 
                 Err(e) => {
@@ -228,7 +235,7 @@
         // Generate outgoing QUIC packets and send them on the UDP socket, until
         // quiche reports that there are no more packets to be sent.
         loop {
-            let write = match conn.send(&mut out) {
+            let (write, send_info) = match conn.send(&mut out) {
                 Ok(v) => v,
 
                 Err(quiche::Error::Done) => {
@@ -244,7 +251,7 @@
                 },
             };
 
-            if let Err(e) = socket.send(&out[..write]) {
+            if let Err(e) = socket.send_to(&out[..write], &send_info.to) {
                 if e.kind() == std::io::ErrorKind::WouldBlock {
                     debug!("send() would block");
                     break;
diff --git a/examples/http3-client.c b/examples/http3-client.c
index 8c75309..6b263ff 100644
--- a/examples/http3-client.c
+++ b/examples/http3-client.c
@@ -65,8 +65,11 @@
 static void flush_egress(struct ev_loop *loop, struct conn_io *conn_io) {
     static uint8_t out[MAX_DATAGRAM_SIZE];
 
+    quiche_send_info send_info;
+
     while (1) {
-        ssize_t written = quiche_conn_send(conn_io->conn, out, sizeof(out));
+        ssize_t written = quiche_conn_send(conn_io->conn, out, sizeof(out),
+                                           &send_info);
 
         if (written == QUICHE_ERR_DONE) {
             fprintf(stderr, "done writing\n");
@@ -78,7 +81,10 @@
             return;
         }
 
-        ssize_t sent = send(conn_io->sock, out, written, 0);
+        ssize_t sent = sendto(conn_io->sock, out, written, 0,
+                              (struct sockaddr *) &send_info.to,
+                              send_info.to_len);
+
         if (sent != written) {
             perror("failed to send");
             return;
@@ -109,7 +115,13 @@
     static uint8_t buf[65535];
 
     while (1) {
-        ssize_t read = recv(conn_io->sock, buf, sizeof(buf), 0);
+        struct sockaddr_storage peer_addr;
+        socklen_t peer_addr_len = sizeof(peer_addr);
+        memset(&peer_addr, 0, peer_addr_len);
+
+        ssize_t read = recvfrom(conn_io->sock, buf, sizeof(buf), 0,
+                                (struct sockaddr *) &peer_addr,
+                                &peer_addr_len);
 
         if (read < 0) {
             if ((errno == EWOULDBLOCK) || (errno == EAGAIN)) {
@@ -121,7 +133,13 @@
             return;
         }
 
-        ssize_t done = quiche_conn_recv(conn_io->conn, buf, read);
+        quiche_recv_info recv_info = {
+            (struct sockaddr *) &peer_addr,
+
+            peer_addr_len,
+        };
+
+        ssize_t done = quiche_conn_recv(conn_io->conn, buf, read, &recv_info);
 
         if (done < 0) {
             fprintf(stderr, "failed to process packet: %zd\n", done);
@@ -239,14 +257,18 @@
                 }
 
                 case QUICHE_H3_EVENT_DATA: {
-                    ssize_t len = quiche_h3_recv_body(conn_io->http3,
-                                                      conn_io->conn, s,
-                                                      buf, sizeof(buf));
-                    if (len <= 0) {
-                        break;
+                    for (;;) {
+                        ssize_t len = quiche_h3_recv_body(conn_io->http3,
+                                                          conn_io->conn, s,
+                                                          buf, sizeof(buf));
+
+                        if (len <= 0) {
+                            break;
+                        }
+
+                        printf("%.*s", (int) len, buf);
                     }
 
-                    printf("%.*s", (int) len, buf);
                     break;
                 }
 
@@ -258,7 +280,7 @@
 
                 case QUICHE_H3_EVENT_DATAGRAM:
                     break;
-                    
+
                 case QUICHE_H3_EVENT_GOAWAY: {
                     fprintf(stderr, "got GOAWAY\n");
                     break;
@@ -322,11 +344,6 @@
         return -1;
     }
 
-    if (connect(sock, peer->ai_addr, peer->ai_addrlen) < 0) {
-        perror("failed to connect socket");
-        return -1;
-    }
-
     quiche_config *config = quiche_config_new(0xbabababa);
     if (config == NULL) {
         fprintf(stderr, "failed to create config\n");
@@ -338,7 +355,8 @@
         sizeof(QUICHE_H3_APPLICATION_PROTOCOL) - 1);
 
     quiche_config_set_max_idle_timeout(config, 5000);
-    quiche_config_set_max_udp_payload_size(config, MAX_DATAGRAM_SIZE);
+    quiche_config_set_max_recv_udp_payload_size(config, MAX_DATAGRAM_SIZE);
+    quiche_config_set_max_send_udp_payload_size(config, MAX_DATAGRAM_SIZE);
     quiche_config_set_initial_max_data(config, 10000000);
     quiche_config_set_initial_max_stream_data_bidi_local(config, 1000000);
     quiche_config_set_initial_max_stream_data_bidi_remote(config, 1000000);
@@ -366,8 +384,9 @@
         return -1;
     }
 
-    quiche_conn *conn = quiche_connect(host, (const uint8_t *) scid,
-                                       sizeof(scid), config);
+    quiche_conn *conn = quiche_connect(host, (const uint8_t*) scid, sizeof(scid),
+                                       peer->ai_addr, peer->ai_addrlen, config);
+
     if (conn == NULL) {
         fprintf(stderr, "failed to create connection\n");
         return -1;
diff --git a/examples/http3-client.rs b/examples/http3-client.rs
index a93d67e..2acb2ca 100644
--- a/examples/http3-client.rs
+++ b/examples/http3-client.rs
@@ -67,7 +67,6 @@
     // Create the UDP socket backing the QUIC connection, and register it with
     // the event loop.
     let socket = std::net::UdpSocket::bind(bind_addr).unwrap();
-    socket.connect(peer_addr).unwrap();
 
     let socket = mio::net::UdpSocket::from_socket(socket).unwrap();
     poll.register(
@@ -89,7 +88,8 @@
         .unwrap();
 
     config.set_max_idle_timeout(5000);
-    config.set_max_udp_payload_size(MAX_DATAGRAM_SIZE as u64);
+    config.set_max_recv_udp_payload_size(MAX_DATAGRAM_SIZE);
+    config.set_max_send_udp_payload_size(MAX_DATAGRAM_SIZE);
     config.set_initial_max_data(10_000_000);
     config.set_initial_max_stream_data_bidi_local(1_000_000);
     config.set_initial_max_stream_data_bidi_remote(1_000_000);
@@ -104,8 +104,11 @@
     let mut scid = [0; quiche::MAX_CONN_ID_LEN];
     SystemRandom::new().fill(&mut scid[..]).unwrap();
 
+    let scid = quiche::ConnectionId::from_ref(&scid);
+
     // Create a QUIC connection and initiate handshake.
-    let mut conn = quiche::connect(url.domain(), &scid, &mut config).unwrap();
+    let mut conn =
+        quiche::connect(url.domain(), &scid, peer_addr, &mut config).unwrap();
 
     info!(
         "connecting to {:} from {:} with scid {}",
@@ -114,9 +117,9 @@
         hex_dump(&scid)
     );
 
-    let write = conn.send(&mut out).expect("initial send failed");
+    let (write, send_info) = conn.send(&mut out).expect("initial send failed");
 
-    while let Err(e) = socket.send(&out[..write]) {
+    while let Err(e) = socket.send_to(&out[..write], &send_info.to) {
         if e.kind() == std::io::ErrorKind::WouldBlock {
             debug!("send() would block");
             continue;
@@ -138,11 +141,14 @@
     }
 
     let req = vec![
-        quiche::h3::Header::new(":method", "GET"),
-        quiche::h3::Header::new(":scheme", url.scheme()),
-        quiche::h3::Header::new(":authority", url.host_str().unwrap()),
-        quiche::h3::Header::new(":path", &path),
-        quiche::h3::Header::new("user-agent", "quiche"),
+        quiche::h3::Header::new(b":method", b"GET"),
+        quiche::h3::Header::new(b":scheme", url.scheme().as_bytes()),
+        quiche::h3::Header::new(
+            b":authority",
+            url.host_str().unwrap().as_bytes(),
+        ),
+        quiche::h3::Header::new(b":path", path.as_bytes()),
+        quiche::h3::Header::new(b"user-agent", b"quiche"),
     ];
 
     let req_start = std::time::Instant::now();
@@ -166,7 +172,7 @@
                 break 'read;
             }
 
-            let len = match socket.recv(&mut buf) {
+            let (len, from) = match socket.recv_from(&mut buf) {
                 Ok(v) => v,
 
                 Err(e) => {
@@ -183,8 +189,10 @@
 
             debug!("got {} bytes", len);
 
+            let recv_info = quiche::RecvInfo { from };
+
             // Process potentially coalesced packets.
-            let read = match conn.recv(&mut buf[..len]) {
+            let read = match conn.recv(&mut buf[..len], recv_info) {
                 Ok(v) => v,
 
                 Err(e) => {
@@ -235,7 +243,7 @@
                     },
 
                     Ok((stream_id, quiche::h3::Event::Data)) => {
-                        if let Ok(read) =
+                        while let Ok(read) =
                             http3_conn.recv_body(&mut conn, stream_id, &mut buf)
                         {
                             debug!(
@@ -280,7 +288,7 @@
         // Generate outgoing QUIC packets and send them on the UDP socket, until
         // quiche reports that there are no more packets to be sent.
         loop {
-            let write = match conn.send(&mut out) {
+            let (write, send_info) = match conn.send(&mut out) {
                 Ok(v) => v,
 
                 Err(quiche::Error::Done) => {
@@ -296,7 +304,7 @@
                 },
             };
 
-            if let Err(e) = socket.send(&out[..write]) {
+            if let Err(e) = socket.send_to(&out[..write], &send_info.to) {
                 if e.kind() == std::io::ErrorKind::WouldBlock {
                     debug!("send() would block");
                     break;
diff --git a/examples/http3-server.c b/examples/http3-server.c
index b6b3041..73c29ae 100644
--- a/examples/http3-server.c
+++ b/examples/http3-server.c
@@ -89,8 +89,11 @@
 static void flush_egress(struct ev_loop *loop, struct conn_io *conn_io) {
     static uint8_t out[MAX_DATAGRAM_SIZE];
 
+    quiche_send_info send_info;
+
     while (1) {
-        ssize_t written = quiche_conn_send(conn_io->conn, out, sizeof(out));
+        ssize_t written = quiche_conn_send(conn_io->conn, out, sizeof(out),
+                                           &send_info);
 
         if (written == QUICHE_ERR_DONE) {
             fprintf(stderr, "done writing\n");
@@ -173,7 +176,9 @@
 }
 
 static struct conn_io *create_conn(uint8_t *scid, size_t scid_len,
-                                   uint8_t *odcid, size_t odcid_len) {
+                                   uint8_t *odcid, size_t odcid_len,
+                                   struct sockaddr_storage *peer_addr,
+                                   socklen_t peer_addr_len) {
     struct conn_io *conn_io = calloc(1, sizeof(*conn_io));
     if (conn_io == NULL) {
         fprintf(stderr, "failed to allocate connection IO\n");
@@ -187,7 +192,11 @@
     memcpy(conn_io->cid, scid, LOCAL_CONN_ID_LEN);
 
     quiche_conn *conn = quiche_accept(conn_io->cid, LOCAL_CONN_ID_LEN,
-                                      odcid, odcid_len, config);
+                                      odcid, odcid_len,
+                                      (struct sockaddr *) peer_addr,
+                                      peer_addr_len,
+                                      config);
+
     if (conn == NULL) {
         fprintf(stderr, "failed to create connection\n");
         return NULL;
@@ -196,6 +205,9 @@
     conn_io->sock = conns->sock;
     conn_io->conn = conn;
 
+    memcpy(&conn_io->peer_addr, &peer_addr, peer_addr_len);
+    conn_io->peer_addr_len = peer_addr_len;
+
     ev_init(&conn_io->timer, timeout_cb);
     conn_io->timer.data = conn_io;
 
@@ -334,16 +346,21 @@
                 continue;
             }
 
-            conn_io = create_conn(dcid, dcid_len, odcid, odcid_len);
+            conn_io = create_conn(dcid, dcid_len, odcid, odcid_len,
+                                  &peer_addr, peer_addr_len);
+
             if (conn_io == NULL) {
                 continue;
             }
-
-            memcpy(&conn_io->peer_addr, &peer_addr, peer_addr_len);
-            conn_io->peer_addr_len = peer_addr_len;
         }
 
-        ssize_t done = quiche_conn_recv(conn_io->conn, buf, read);
+        quiche_recv_info recv_info = {
+            (struct sockaddr *) &peer_addr,
+
+            peer_addr_len,
+        };
+
+        ssize_t done = quiche_conn_recv(conn_io->conn, buf, read, &recv_info);
 
         if (done < 0) {
             fprintf(stderr, "failed to process packet: %zd\n", done);
@@ -532,7 +549,8 @@
         sizeof(QUICHE_H3_APPLICATION_PROTOCOL) - 1);
 
     quiche_config_set_max_idle_timeout(config, 5000);
-    quiche_config_set_max_udp_payload_size(config, MAX_DATAGRAM_SIZE);
+    quiche_config_set_max_recv_udp_payload_size(config, MAX_DATAGRAM_SIZE);
+    quiche_config_set_max_send_udp_payload_size(config, MAX_DATAGRAM_SIZE);
     quiche_config_set_initial_max_data(config, 10000000);
     quiche_config_set_initial_max_stream_data_bidi_local(config, 1000000);
     quiche_config_set_initial_max_stream_data_bidi_remote(config, 1000000);
diff --git a/examples/http3-server.rs b/examples/http3-server.rs
index 4c41cbb..e84d1ea 100644
--- a/examples/http3-server.rs
+++ b/examples/http3-server.rs
@@ -53,7 +53,7 @@
     partial_responses: HashMap<u64, PartialResponse>,
 }
 
-type ClientMap = HashMap<Vec<u8>, (net::SocketAddr, Client)>;
+type ClientMap = HashMap<quiche::ConnectionId<'static>, Client>;
 
 fn main() {
     let mut buf = [0; 65535];
@@ -100,7 +100,8 @@
         .unwrap();
 
     config.set_max_idle_timeout(5000);
-    config.set_max_udp_payload_size(MAX_DATAGRAM_SIZE as u64);
+    config.set_max_recv_udp_payload_size(MAX_DATAGRAM_SIZE);
+    config.set_max_send_udp_payload_size(MAX_DATAGRAM_SIZE);
     config.set_initial_max_data(10_000_000);
     config.set_initial_max_stream_data_bidi_local(1_000_000);
     config.set_initial_max_stream_data_bidi_remote(1_000_000);
@@ -122,8 +123,7 @@
         // Find the shorter timeout from all the active connections.
         //
         // TODO: use event loop that properly supports timers
-        let timeout =
-            clients.values().filter_map(|(_, c)| c.conn.timeout()).min();
+        let timeout = clients.values().filter_map(|c| c.conn.timeout()).min();
 
         poll.poll(&mut events, timeout).unwrap();
 
@@ -136,12 +136,12 @@
             if events.is_empty() {
                 debug!("timed out");
 
-                clients.values_mut().for_each(|(_, c)| c.conn.on_timeout());
+                clients.values_mut().for_each(|c| c.conn.on_timeout());
 
                 break 'read;
             }
 
-            let (len, src) = match socket.recv_from(&mut buf) {
+            let (len, from) = match socket.recv_from(&mut buf) {
                 Ok(v) => v,
 
                 Err(e) => {
@@ -177,11 +177,12 @@
 
             let conn_id = ring::hmac::sign(&conn_id_seed, &hdr.dcid);
             let conn_id = &conn_id.as_ref()[..quiche::MAX_CONN_ID_LEN];
+            let conn_id = conn_id.to_vec().into();
 
             // Lookup a connection based on the packet's connection ID. If there
             // is no connection matching, create a new one.
-            let (_, client) = if !clients.contains_key(&hdr.dcid) &&
-                !clients.contains_key(conn_id)
+            let client = if !clients.contains_key(&hdr.dcid) &&
+                !clients.contains_key(&conn_id)
             {
                 if hdr.ty != quiche::Type::Initial {
                     error!("Packet is not Initial");
@@ -197,7 +198,7 @@
 
                     let out = &out[..len];
 
-                    if let Err(e) = socket.send_to(out, &src) {
+                    if let Err(e) = socket.send_to(out, &from) {
                         if e.kind() == std::io::ErrorKind::WouldBlock {
                             debug!("send() would block");
                             break;
@@ -211,6 +212,8 @@
                 let mut scid = [0; quiche::MAX_CONN_ID_LEN];
                 scid.copy_from_slice(&conn_id);
 
+                let scid = quiche::ConnectionId::from_ref(&scid);
+
                 // Token is always present in Initial packets.
                 let token = hdr.token.as_ref().unwrap();
 
@@ -218,7 +221,7 @@
                 if token.is_empty() {
                     warn!("Doing stateless retry");
 
-                    let new_token = mint_token(&hdr, &src);
+                    let new_token = mint_token(&hdr, &from);
 
                     let len = quiche::retry(
                         &hdr.scid,
@@ -232,7 +235,7 @@
 
                     let out = &out[..len];
 
-                    if let Err(e) = socket.send_to(out, &src) {
+                    if let Err(e) = socket.send_to(out, &from) {
                         if e.kind() == std::io::ErrorKind::WouldBlock {
                             debug!("send() would block");
                             break;
@@ -243,11 +246,11 @@
                     continue 'read;
                 }
 
-                let odcid = validate_token(&src, token);
+                let odcid = validate_token(&from, token);
 
                 // The token was not valid, meaning the retry failed, so
                 // drop the packet.
-                if odcid == None {
+                if odcid.is_none() {
                     error!("Invalid address validation token");
                     continue 'read;
                 }
@@ -257,17 +260,15 @@
                     continue 'read;
                 }
 
-                // Reuse the source connection ID we sent in the Retry
-                // packet, instead of changing it again.
-                scid.copy_from_slice(&hdr.dcid);
+                // Reuse the source connection ID we sent in the Retry packet,
+                // instead of changing it again.
+                let scid = hdr.dcid.clone();
 
-                debug!(
-                    "New connection: dcid={} scid={}",
-                    hex_dump(&hdr.dcid),
-                    hex_dump(&scid)
-                );
+                debug!("New connection: dcid={:?} scid={:?}", hdr.dcid, scid);
 
-                let conn = quiche::accept(&scid, odcid, &mut config).unwrap();
+                let conn =
+                    quiche::accept(&scid, odcid.as_ref(), from, &mut config)
+                        .unwrap();
 
                 let client = Client {
                     conn,
@@ -275,19 +276,21 @@
                     partial_responses: HashMap::new(),
                 };
 
-                clients.insert(scid.to_vec(), (src, client));
+                clients.insert(scid.clone(), client);
 
-                clients.get_mut(&scid[..]).unwrap()
+                clients.get_mut(&scid).unwrap()
             } else {
                 match clients.get_mut(&hdr.dcid) {
                     Some(v) => v,
 
-                    None => clients.get_mut(conn_id).unwrap(),
+                    None => clients.get_mut(&conn_id).unwrap(),
                 }
             };
 
+            let recv_info = quiche::RecvInfo { from };
+
             // Process potentially coalesced packets.
-            let read = match client.conn.recv(pkt_buf) {
+            let read = match client.conn.recv(pkt_buf, recv_info) {
                 Ok(v) => v,
 
                 Err(e) => {
@@ -382,9 +385,9 @@
         // Generate outgoing QUIC packets for all active connections and send
         // them on the UDP socket, until quiche reports that there are no more
         // packets to be sent.
-        for (peer, client) in clients.values_mut() {
+        for client in clients.values_mut() {
             loop {
-                let write = match client.conn.send(&mut out) {
+                let (write, send_info) = match client.conn.send(&mut out) {
                     Ok(v) => v,
 
                     Err(quiche::Error::Done) => {
@@ -400,8 +403,7 @@
                     },
                 };
 
-                // TODO: coalesce packets.
-                if let Err(e) = socket.send_to(&out[..write], &peer) {
+                if let Err(e) = socket.send_to(&out[..write], &send_info.to) {
                     if e.kind() == std::io::ErrorKind::WouldBlock {
                         debug!("send() would block");
                         break;
@@ -415,7 +417,7 @@
         }
 
         // Garbage collect closed connections.
-        clients.retain(|_, (_, ref mut c)| {
+        clients.retain(|_, ref mut c| {
             debug!("Collecting garbage");
 
             if c.conn.is_closed() {
@@ -464,7 +466,7 @@
 /// authenticate of the token. *It should not be used in production system*.
 fn validate_token<'a>(
     src: &net::SocketAddr, token: &'a [u8],
-) -> Option<&'a [u8]> {
+) -> Option<quiche::ConnectionId<'a>> {
     if token.len() < 6 {
         return None;
     }
@@ -484,9 +486,7 @@
         return None;
     }
 
-    let token = &token[addr.len()..];
-
-    Some(&token[..])
+    Some(quiche::ConnectionId::from_ref(&token[addr.len()..]))
 }
 
 /// Handles incoming HTTP/3 requests.
@@ -535,6 +535,8 @@
     let written = match http3_conn.send_body(conn, stream_id, &body, true) {
         Ok(v) => v,
 
+        Err(quiche::h3::Error::Done) => 0,
+
         Err(e) => {
             error!("{} stream send failed {:?}", conn.trace_id(), e);
             return;
@@ -558,25 +560,24 @@
 ) -> (Vec<quiche::h3::Header>, Vec<u8>) {
     let mut file_path = std::path::PathBuf::from(root);
     let mut path = std::path::Path::new("");
-    let mut method = "";
+    let mut method = None;
 
     // Look for the request's path and method.
     for hdr in request {
         match hdr.name() {
-            ":path" => {
-                path = std::path::Path::new(hdr.value());
-            },
+            b":path" =>
+                path = std::path::Path::new(
+                    std::str::from_utf8(hdr.value()).unwrap(),
+                ),
 
-            ":method" => {
-                method = hdr.value();
-            },
+            b":method" => method = Some(hdr.value()),
 
             _ => (),
         }
     }
 
     let (status, body) = match method {
-        "GET" => {
+        Some(b"GET") => {
             for c in path.components() {
                 if let std::path::Component::Normal(v) = c {
                     file_path.push(v)
@@ -594,9 +595,12 @@
     };
 
     let headers = vec![
-        quiche::h3::Header::new(":status", &status.to_string()),
-        quiche::h3::Header::new("server", "quiche"),
-        quiche::h3::Header::new("content-length", &body.len().to_string()),
+        quiche::h3::Header::new(b":status", status.to_string().as_bytes()),
+        quiche::h3::Header::new(b"server", b"quiche"),
+        quiche::h3::Header::new(
+            b"content-length",
+            body.len().to_string().as_bytes(),
+        ),
     ];
 
     (headers, body)
@@ -637,7 +641,11 @@
     let written = match http3_conn.send_body(conn, stream_id, body, true) {
         Ok(v) => v,
 
+        Err(quiche::h3::Error::Done) => 0,
+
         Err(e) => {
+            client.partial_responses.remove(&stream_id);
+
             error!("{} stream send failed {:?}", conn.trace_id(), e);
             return;
         },
@@ -649,9 +657,3 @@
         client.partial_responses.remove(&stream_id);
     }
 }
-
-fn hex_dump(buf: &[u8]) -> String {
-    let vec: Vec<String> = buf.iter().map(|b| format!("{:02x}", b)).collect();
-
-    vec.join("")
-}
diff --git a/examples/qpack-decode.rs b/examples/qpack-decode.rs
index 8468a85..d2aaaa5 100644
--- a/examples/qpack-decode.rs
+++ b/examples/qpack-decode.rs
@@ -77,7 +77,9 @@
         }
 
         for hdr in dec.decode(&data[..len], std::u64::MAX).unwrap() {
-            println!("{}\t{}", hdr.name(), hdr.value());
+            let name = std::str::from_utf8(hdr.name()).unwrap();
+            let value = std::str::from_utf8(hdr.value()).unwrap();
+            println!("{}\t{}", name, value);
         }
 
         println!();
diff --git a/examples/qpack-encode.rs b/examples/qpack-encode.rs
index e381227..5215fe4 100644
--- a/examples/qpack-encode.rs
+++ b/examples/qpack-encode.rs
@@ -83,6 +83,6 @@
         let name = line.split('\t').next().unwrap();
         let value = line.split('\t').last().unwrap();
 
-        headers.push(h3::Header::new(name, value));
+        headers.push(h3::Header::new(name.as_bytes(), value.as_bytes()));
     }
 }
diff --git a/examples/server.c b/examples/server.c
index 025e1d5..a97250f 100644
--- a/examples/server.c
+++ b/examples/server.c
@@ -86,8 +86,11 @@
 static void flush_egress(struct ev_loop *loop, struct conn_io *conn_io) {
     static uint8_t out[MAX_DATAGRAM_SIZE];
 
+    quiche_send_info send_info;
+
     while (1) {
-        ssize_t written = quiche_conn_send(conn_io->conn, out, sizeof(out));
+        ssize_t written = quiche_conn_send(conn_io->conn, out, sizeof(out),
+                                           &send_info);
 
         if (written == QUICHE_ERR_DONE) {
             fprintf(stderr, "done writing\n");
@@ -100,8 +103,9 @@
         }
 
         ssize_t sent = sendto(conn_io->sock, out, written, 0,
-                              (struct sockaddr *) &conn_io->peer_addr,
-                              conn_io->peer_addr_len);
+                              (struct sockaddr *) &send_info.to,
+                              send_info.to_len);
+
         if (sent != written) {
             perror("failed to send");
             return;
@@ -169,18 +173,28 @@
     return cid;
 }
 
-static struct conn_io *create_conn(uint8_t *dcid, size_t dcid_len, uint8_t *odcid,
-                                   size_t odcid_len) {
-    struct conn_io *conn_io = malloc(sizeof(*conn_io));
+static struct conn_io *create_conn(uint8_t *scid, size_t scid_len,
+                                   uint8_t *odcid, size_t odcid_len,
+                                   struct sockaddr_storage *peer_addr,
+                                   socklen_t peer_addr_len) {
+    struct conn_io *conn_io = calloc(1, sizeof(*conn_io));
     if (conn_io == NULL) {
         fprintf(stderr, "failed to allocate connection IO\n");
         return NULL;
     }
 
-    memcpy(conn_io->cid, dcid, LOCAL_CONN_ID_LEN);
+    if (scid_len != LOCAL_CONN_ID_LEN) {
+        fprintf(stderr, "failed, scid length too short\n");
+    }
+
+    memcpy(conn_io->cid, scid, LOCAL_CONN_ID_LEN);
 
     quiche_conn *conn = quiche_accept(conn_io->cid, LOCAL_CONN_ID_LEN,
-                                      odcid, odcid_len, config);
+                                      odcid, odcid_len,
+                                      (struct sockaddr *) peer_addr,
+                                      peer_addr_len,
+                                      config);
+
     if (conn == NULL) {
         fprintf(stderr, "failed to create connection\n");
         return NULL;
@@ -189,6 +203,9 @@
     conn_io->sock = conns->sock;
     conn_io->conn = conn;
 
+    memcpy(&conn_io->peer_addr, &peer_addr, peer_addr_len);
+    conn_io->peer_addr_len = peer_addr_len;
+
     ev_init(&conn_io->timer, timeout_cb);
     conn_io->timer.data = conn_io;
 
@@ -318,16 +335,21 @@
                 continue;
             }
 
-            conn_io = create_conn(dcid, dcid_len, odcid, odcid_len);
+            conn_io = create_conn(dcid, dcid_len, odcid, odcid_len,
+                                  &peer_addr, peer_addr_len);
+
             if (conn_io == NULL) {
                 continue;
             }
-
-            memcpy(&conn_io->peer_addr, &peer_addr, peer_addr_len);
-            conn_io->peer_addr_len = peer_addr_len;
         }
 
-        ssize_t done = quiche_conn_recv(conn_io->conn, buf, read);
+        quiche_recv_info recv_info = {
+            (struct sockaddr *) &peer_addr,
+
+            peer_addr_len,
+        };
+
+        ssize_t done = quiche_conn_recv(conn_io->conn, buf, read, &recv_info);
 
         if (done < 0) {
             fprintf(stderr, "failed to process packet: %zd\n", done);
@@ -451,10 +473,11 @@
     quiche_config_load_priv_key_from_pem_file(config, "./cert.key");
 
     quiche_config_set_application_protos(config,
-        (uint8_t *) "\x05hq-29\x05hq-28\x05hq-27\x08http/0.9", 27);
+        (uint8_t *) "\x0ahq-interop\x05hq-29\x05hq-28\x05hq-27\x08http/0.9", 38);
 
     quiche_config_set_max_idle_timeout(config, 5000);
-    quiche_config_set_max_udp_payload_size(config, MAX_DATAGRAM_SIZE);
+    quiche_config_set_max_recv_udp_payload_size(config, MAX_DATAGRAM_SIZE);
+    quiche_config_set_max_send_udp_payload_size(config, MAX_DATAGRAM_SIZE);
     quiche_config_set_initial_max_data(config, 10000000);
     quiche_config_set_initial_max_stream_data_bidi_local(config, 1000000);
     quiche_config_set_initial_max_stream_data_bidi_remote(config, 1000000);
diff --git a/examples/server.rs b/examples/server.rs
index 8213d95..90f0102 100644
--- a/examples/server.rs
+++ b/examples/server.rs
@@ -47,7 +47,7 @@
     partial_responses: HashMap<u64, PartialResponse>,
 }
 
-type ClientMap = HashMap<Vec<u8>, (net::SocketAddr, Client)>;
+type ClientMap = HashMap<quiche::ConnectionId<'static>, Client>;
 
 fn main() {
     let mut buf = [0; 65535];
@@ -90,11 +90,14 @@
         .unwrap();
 
     config
-        .set_application_protos(b"\x05hq-29\x05hq-28\x05hq-27\x08http/0.9")
+        .set_application_protos(
+            b"\x0ahq-interop\x05hq-29\x05hq-28\x05hq-27\x08http/0.9",
+        )
         .unwrap();
 
     config.set_max_idle_timeout(5000);
-    config.set_max_udp_payload_size(MAX_DATAGRAM_SIZE as u64);
+    config.set_max_recv_udp_payload_size(MAX_DATAGRAM_SIZE);
+    config.set_max_send_udp_payload_size(MAX_DATAGRAM_SIZE);
     config.set_initial_max_data(10_000_000);
     config.set_initial_max_stream_data_bidi_local(1_000_000);
     config.set_initial_max_stream_data_bidi_remote(1_000_000);
@@ -114,8 +117,7 @@
         // Find the shorter timeout from all the active connections.
         //
         // TODO: use event loop that properly supports timers
-        let timeout =
-            clients.values().filter_map(|(_, c)| c.conn.timeout()).min();
+        let timeout = clients.values().filter_map(|c| c.conn.timeout()).min();
 
         poll.poll(&mut events, timeout).unwrap();
 
@@ -128,12 +130,12 @@
             if events.is_empty() {
                 debug!("timed out");
 
-                clients.values_mut().for_each(|(_, c)| c.conn.on_timeout());
+                clients.values_mut().for_each(|c| c.conn.on_timeout());
 
                 break 'read;
             }
 
-            let (len, src) = match socket.recv_from(&mut buf) {
+            let (len, from) = match socket.recv_from(&mut buf) {
                 Ok(v) => v,
 
                 Err(e) => {
@@ -169,11 +171,12 @@
 
             let conn_id = ring::hmac::sign(&conn_id_seed, &hdr.dcid);
             let conn_id = &conn_id.as_ref()[..quiche::MAX_CONN_ID_LEN];
+            let conn_id = conn_id.to_vec().into();
 
             // Lookup a connection based on the packet's connection ID. If there
             // is no connection matching, create a new one.
-            let (_, client) = if !clients.contains_key(&hdr.dcid) &&
-                !clients.contains_key(conn_id)
+            let client = if !clients.contains_key(&hdr.dcid) &&
+                !clients.contains_key(&conn_id)
             {
                 if hdr.ty != quiche::Type::Initial {
                     error!("Packet is not Initial");
@@ -189,7 +192,7 @@
 
                     let out = &out[..len];
 
-                    if let Err(e) = socket.send_to(out, &src) {
+                    if let Err(e) = socket.send_to(out, &from) {
                         if e.kind() == std::io::ErrorKind::WouldBlock {
                             debug!("send() would block");
                             break;
@@ -203,6 +206,8 @@
                 let mut scid = [0; quiche::MAX_CONN_ID_LEN];
                 scid.copy_from_slice(&conn_id);
 
+                let scid = quiche::ConnectionId::from_ref(&scid);
+
                 // Token is always present in Initial packets.
                 let token = hdr.token.as_ref().unwrap();
 
@@ -210,7 +215,7 @@
                 if token.is_empty() {
                     warn!("Doing stateless retry");
 
-                    let new_token = mint_token(&hdr, &src);
+                    let new_token = mint_token(&hdr, &from);
 
                     let len = quiche::retry(
                         &hdr.scid,
@@ -224,7 +229,7 @@
 
                     let out = &out[..len];
 
-                    if let Err(e) = socket.send_to(out, &src) {
+                    if let Err(e) = socket.send_to(out, &from) {
                         if e.kind() == std::io::ErrorKind::WouldBlock {
                             debug!("send() would block");
                             break;
@@ -235,11 +240,11 @@
                     continue 'read;
                 }
 
-                let odcid = validate_token(&src, token);
+                let odcid = validate_token(&from, token);
 
                 // The token was not valid, meaning the retry failed, so
                 // drop the packet.
-                if odcid == None {
+                if odcid.is_none() {
                     error!("Invalid address validation token");
                     continue 'read;
                 }
@@ -249,36 +254,36 @@
                     continue 'read;
                 }
 
-                // Reuse the source connection ID we sent in the Retry
-                // packet, instead of changing it again.
-                scid.copy_from_slice(&hdr.dcid);
+                // Reuse the source connection ID we sent in the Retry packet,
+                // instead of changing it again.
+                let scid = hdr.dcid.clone();
 
-                debug!(
-                    "New connection: dcid={} scid={}",
-                    hex_dump(&hdr.dcid),
-                    hex_dump(&scid)
-                );
+                debug!("New connection: dcid={:?} scid={:?}", hdr.dcid, scid);
 
-                let conn = quiche::accept(&scid, odcid, &mut config).unwrap();
+                let conn =
+                    quiche::accept(&scid, odcid.as_ref(), from, &mut config)
+                        .unwrap();
 
                 let client = Client {
                     conn,
                     partial_responses: HashMap::new(),
                 };
 
-                clients.insert(scid.to_vec(), (src, client));
+                clients.insert(scid.clone(), client);
 
-                clients.get_mut(&scid[..]).unwrap()
+                clients.get_mut(&scid).unwrap()
             } else {
                 match clients.get_mut(&hdr.dcid) {
                     Some(v) => v,
 
-                    None => clients.get_mut(conn_id).unwrap(),
+                    None => clients.get_mut(&conn_id).unwrap(),
                 }
             };
 
+            let recv_info = quiche::RecvInfo { from };
+
             // Process potentially coalesced packets.
-            let read = match client.conn.recv(pkt_buf) {
+            let read = match client.conn.recv(pkt_buf, recv_info) {
                 Ok(v) => v,
 
                 Err(e) => {
@@ -325,9 +330,9 @@
         // Generate outgoing QUIC packets for all active connections and send
         // them on the UDP socket, until quiche reports that there are no more
         // packets to be sent.
-        for (peer, client) in clients.values_mut() {
+        for client in clients.values_mut() {
             loop {
-                let write = match client.conn.send(&mut out) {
+                let (write, send_info) = match client.conn.send(&mut out) {
                     Ok(v) => v,
 
                     Err(quiche::Error::Done) => {
@@ -343,8 +348,7 @@
                     },
                 };
 
-                // TODO: coalesce packets.
-                if let Err(e) = socket.send_to(&out[..write], &peer) {
+                if let Err(e) = socket.send_to(&out[..write], &send_info.to) {
                     if e.kind() == std::io::ErrorKind::WouldBlock {
                         debug!("send() would block");
                         break;
@@ -358,7 +362,7 @@
         }
 
         // Garbage collect closed connections.
-        clients.retain(|_, (_, ref mut c)| {
+        clients.retain(|_, ref mut c| {
             debug!("Collecting garbage");
 
             if c.conn.is_closed() {
@@ -407,7 +411,7 @@
 /// authenticate of the token. *It should not be used in production system*.
 fn validate_token<'a>(
     src: &net::SocketAddr, token: &'a [u8],
-) -> Option<&'a [u8]> {
+) -> Option<quiche::ConnectionId<'a>> {
     if token.len() < 6 {
         return None;
     }
@@ -427,9 +431,7 @@
         return None;
     }
 
-    let token = &token[addr.len()..];
-
-    Some(&token[..])
+    Some(quiche::ConnectionId::from_ref(&token[addr.len()..]))
 }
 
 /// Handles incoming HTTP/0.9 requests.
@@ -503,6 +505,8 @@
         Err(quiche::Error::Done) => 0,
 
         Err(e) => {
+            client.partial_responses.remove(&stream_id);
+
             error!("{} stream send failed {:?}", conn.trace_id(), e);
             return;
         },
@@ -514,9 +518,3 @@
         client.partial_responses.remove(&stream_id);
     }
 }
-
-fn hex_dump(buf: &[u8]) -> String {
-    let vec: Vec<String> = buf.iter().map(|b| format!("{:02x}", b)).collect();
-
-    vec.join("")
-}
diff --git a/include/quiche.h b/include/quiche.h
index 8a25975..ba0f04a 100644
--- a/include/quiche.h
+++ b/include/quiche.h
@@ -46,7 +46,7 @@
 //
 
 // The current QUIC wire version.
-#define QUICHE_PROTOCOL_VERSION 0xff00001d
+#define QUICHE_PROTOCOL_VERSION 0x00000001
 
 // The maximum length of a connection ID.
 #define QUICHE_MAX_CONN_ID_LEN 20
@@ -94,6 +94,9 @@
     // The peer violated the local stream limits.
     QUICHE_ERR_STREAM_LIMIT = -12,
 
+    // The specified stream was stopped by the peer.
+    QUICHE_ERR_STREAM_STOPPED = -15,
+
     // The received data exceeds the stream's final size.
     QUICHE_ERR_FINAL_SIZE = -13,
 
@@ -122,6 +125,10 @@
 int quiche_config_load_priv_key_from_pem_file(quiche_config *config,
                                               const char *path);
 
+// Specifies a file where trusted CA certificates are stored for the purposes of certificate verification.
+int quiche_config_load_verify_locations_from_file(quiche_config *config,
+                                                  const char *path);
+
 // Configures whether to verify the peer's certificate.
 void quiche_config_verify_peer(quiche_config *config, bool v);
 
@@ -139,11 +146,15 @@
                                          const uint8_t *protos,
                                          size_t protos_len);
 
-// Sets the `max_idle_timeout` transport parameter.
+// Sets the `max_idle_timeout` transport parameter, in milliseconds, default is
+// no timeout.
 void quiche_config_set_max_idle_timeout(quiche_config *config, uint64_t v);
 
 // Sets the `max_udp_payload_size transport` parameter.
-void quiche_config_set_max_udp_payload_size(quiche_config *config, uint64_t v);
+void quiche_config_set_max_recv_udp_payload_size(quiche_config *config, size_t v);
+
+// Sets the maximum outgoing UDP payload size.
+void quiche_config_set_max_send_udp_payload_size(quiche_config *config, size_t v);
 
 // Sets the `initial_max_data` transport parameter.
 void quiche_config_set_initial_max_data(quiche_config *config, uint64_t v);
@@ -205,11 +216,14 @@
 // Creates a new server-side connection.
 quiche_conn *quiche_accept(const uint8_t *scid, size_t scid_len,
                            const uint8_t *odcid, size_t odcid_len,
+                           const struct sockaddr *from, size_t from_len,
                            quiche_config *config);
 
 // Creates a new client-side connection.
-quiche_conn *quiche_connect(const char *server_name, const uint8_t *scid,
-                            size_t scid_len, quiche_config *config);
+quiche_conn *quiche_connect(const char *server_name,
+                            const uint8_t *scid, size_t scid_len,
+                            const struct sockaddr *to, size_t to_len,
+                            quiche_config *config);
 
 // Writes a version negotiation packet.
 ssize_t quiche_negotiate_version(const uint8_t *scid, size_t scid_len,
@@ -228,6 +242,7 @@
 
 quiche_conn *quiche_conn_new_with_tls(const uint8_t *scid, size_t scid_len,
                                       const uint8_t *odcid, size_t odcid_len,
+                                      const struct sockaddr *peer, size_t peer_len,
                                       quiche_config *config, void *ssl,
                                       bool is_server);
 
@@ -245,14 +260,30 @@
 void quiche_conn_set_qlog_fd(quiche_conn *conn, int fd, const char *log_title,
                              const char *log_desc);
 
+// Configures the given session for resumption.
+int quiche_conn_set_session(quiche_conn *conn, const uint8_t *buf, size_t buf_len);
+
+typedef struct {
+    struct sockaddr *from;
+    socklen_t from_len;
+} quiche_recv_info;
+
 // Processes QUIC packets received from the peer.
-ssize_t quiche_conn_recv(quiche_conn *conn, uint8_t *buf, size_t buf_len);
+ssize_t quiche_conn_recv(quiche_conn *conn, uint8_t *buf, size_t buf_len,
+                         const quiche_recv_info *info);
+
+typedef struct {
+    // The address the packet should be sent to.
+    struct sockaddr_storage to;
+    socklen_t to_len;
+
+    // The time to send the packet out.
+    struct timespec at;
+} quiche_send_info;
 
 // Writes a single QUIC packet to be sent to the peer.
-ssize_t quiche_conn_send(quiche_conn *conn, uint8_t *out, size_t out_len);
-
-// Buffer holding data at a specific offset.
-typedef struct RangeBuf quiche_rangebuf;
+ssize_t quiche_conn_send(quiche_conn *conn, uint8_t *out, size_t out_len,
+                         quiche_send_info *out_info);
 
 // Reads contiguous data from a stream.
 ssize_t quiche_conn_stream_recv(quiche_conn *conn, uint64_t stream_id,
@@ -267,12 +298,18 @@
     QUICHE_SHUTDOWN_WRITE = 1,
 };
 
+// Sets the priority for a stream.
+int quiche_conn_stream_priority(quiche_conn *conn, uint64_t stream_id,
+                                uint8_t urgency, bool incremental);
+
 // Shuts down reading or writing from/to the specified stream.
 int quiche_conn_stream_shutdown(quiche_conn *conn, uint64_t stream_id,
                                 enum quiche_shutdown direction, uint64_t err);
 
 ssize_t quiche_conn_stream_capacity(quiche_conn *conn, uint64_t stream_id);
 
+bool quiche_conn_stream_readable(quiche_conn *conn, uint64_t stream_id);
+
 // Returns true if all the data has been read from the specified stream.
 bool quiche_conn_stream_finished(quiche_conn *conn, uint64_t stream_id);
 
@@ -284,6 +321,9 @@
 // Returns an iterator over streams that can be written to.
 quiche_stream_iter *quiche_conn_writable(quiche_conn *conn);
 
+// Returns the maximum possible size of egress UDP payloads.
+size_t quiche_conn_max_send_udp_payload_size(quiche_conn *conn);
+
 // Returns the amount of time until the next timeout event, in nanoseconds.
 uint64_t quiche_conn_timeout_as_nanos(quiche_conn *conn);
 
@@ -297,10 +337,22 @@
 int quiche_conn_close(quiche_conn *conn, bool app, uint64_t err,
                       const uint8_t *reason, size_t reason_len);
 
+// Returns a string uniquely representing the connection.
+void quiche_conn_trace_id(quiche_conn *conn, const uint8_t **out, size_t *out_len);
+
+// Returns the source connection ID.
+void quiche_conn_source_id(quiche_conn *conn, const uint8_t **out, size_t *out_len);
+
+// Returns the destination connection ID.
+void quiche_conn_destination_id(quiche_conn *conn, const uint8_t **out, size_t *out_len);
+
 // Returns the negotiated ALPN protocol.
 void quiche_conn_application_proto(quiche_conn *conn, const uint8_t **out,
                                    size_t *out_len);
 
+// Returns the serialized cryptographic session for the connection.
+void quiche_conn_session(quiche_conn *conn, const uint8_t **out, size_t *out_len);
+
 // Returns true if the connection handshake is complete.
 bool quiche_conn_is_established(quiche_conn *conn);
 
@@ -308,9 +360,31 @@
 // enough to send or receive early data.
 bool quiche_conn_is_in_early_data(quiche_conn *conn);
 
+// Returns whether there is stream or DATAGRAM data available to read.
+bool quiche_conn_is_readable(quiche_conn *conn);
+
+// Returns true if the connection is draining.
+bool quiche_conn_is_draining(quiche_conn *conn);
+
+// Returns the number of bidirectional streams that can be created
+// before the peer's stream count limit is reached.
+uint64_t quiche_conn_peer_streams_left_bidi(quiche_conn *conn);
+
+// Returns the number of unidirectional streams that can be created
+// before the peer's stream count limit is reached.
+uint64_t quiche_conn_peer_streams_left_uni(quiche_conn *conn);
+
 // Returns true if the connection is closed.
 bool quiche_conn_is_closed(quiche_conn *conn);
 
+// Returns true if a connection error was received, and updates the provided
+// parameters accordingly.
+bool quiche_conn_peer_error(quiche_conn *conn,
+                            bool *is_app,
+                            uint64_t *error_code,
+                            const uint8_t **reason,
+                            size_t *reason_len);
+
 // Initializes the stream's application data.
 //
 // Stream data can only be initialized once. Additional calls to this method
@@ -357,6 +431,21 @@
 // Returns the maximum DATAGRAM payload that can be sent.
 ssize_t quiche_conn_dgram_max_writable_len(quiche_conn *conn);
 
+// Returns the length of the first stored DATAGRAM.
+ssize_t quiche_conn_dgram_recv_front_len(quiche_conn *conn);
+
+// Returns the number of items in the DATAGRAM receive queue.
+ssize_t quiche_conn_dgram_recv_queue_len(quiche_conn *conn);
+
+///Returns the total size of all items in the DATAGRAM receive queue.
+ssize_t quiche_conn_dgram_recv_queue_byte_size(quiche_conn *conn);
+
+// Returns the number of items in the DATAGRAM send queue.
+ssize_t quiche_conn_dgram_send_queue_len(quiche_conn *conn);
+
+// Returns the total size of all items in the DATAGRAM send queue.
+ssize_t quiche_conn_dgram_send_queue_byte_size(quiche_conn *conn);
+
 // Reads the first received DATAGRAM.
 ssize_t quiche_conn_dgram_recv(quiche_conn *conn, uint8_t *buf,
                                size_t buf_len);
@@ -377,7 +466,7 @@
 //
 
 // List of ALPN tokens of supported HTTP/3 versions.
-#define QUICHE_H3_APPLICATION_PROTOCOL "\x05h3-29\x05h3-28\x05h3-27"
+#define QUICHE_H3_APPLICATION_PROTOCOL "\x02h3\x05h3-29\x05h3-28\x05h3-27"
 
 enum quiche_h3_error {
     /// There is no error or no work to do
@@ -423,6 +512,30 @@
     /// The underlying QUIC stream (or connection) doesn't have enough capacity
     /// for the operation to complete. The application should retry later on.
     QUICHE_H3_ERR_STREAM_BLOCKED = -13,
+
+    /// Error in the payload of a SETTINGS frame.
+    QUICHE_H3_ERR_SETTINGS_ERROR = -14,
+
+    /// Server rejected request.
+    QUICHE_H3_ERR_REQUEST_REJECTED = -15,
+
+    /// Request or its response cancelled.
+    QUICHE_H3_ERR_REQUEST_CANCELLED = -16,
+
+    /// Client's request stream terminated without containing a full-formed
+    /// request.
+    QUICHE_H3_ERR_REQUEST_INCOMPLETE = -17,
+
+    /// An HTTP message was malformed and cannot be processed.
+    QUICHE_H3_ERR_MESSAGE_ERROR = -18,
+
+    // The TCP connection established in response to a CONNECT request was
+    /// reset or abnormally closed.
+    QUICHE_H3_ERR_CONNECT_ERROR = -19,
+
+    /// The requested operation cannot be served over HTTP/3. Peer should retry
+    /// over HTTP/1.1.
+    QUICHE_H3_ERR_VERSION_FALLBACK = -20,
 };
 
 // Stores configuration shared between multiple connections.
@@ -465,8 +578,8 @@
 typedef struct Http3Event quiche_h3_event;
 
 // Processes HTTP/3 data received from the peer.
-int quiche_h3_conn_poll(quiche_h3_conn *conn, quiche_conn *quic_conn,
-                        quiche_h3_event **ev);
+int64_t quiche_h3_conn_poll(quiche_h3_conn *conn, quiche_conn *quic_conn,
+                            quiche_h3_event **ev);
 
 // Returns the type of the event.
 enum quiche_h3_event_type quiche_h3_event_type(quiche_h3_event *ev);
@@ -522,13 +635,18 @@
 ssize_t quiche_h3_recv_body(quiche_h3_conn *conn, quiche_conn *quic_conn,
                             uint64_t stream_id, uint8_t *out, size_t out_len);
 
+// Returns whether the peer enabled HTTP/3 DATAGRAM frame support.
+bool quiche_h3_dgram_enabled_by_peer(quiche_h3_conn *conn,
+                                     quiche_conn *quic_conn);
+
 // Writes data to the DATAGRAM send queue.
 ssize_t quiche_h3_send_dgram(quiche_h3_conn *conn, quiche_conn *quic_conn,
                             uint64_t flow_id, uint8_t *data, size_t data_len);
 
 // Reads data from the DATAGRAM receive queue.
 ssize_t quiche_h3_recv_dgram(quiche_h3_conn *conn, quiche_conn *quic_conn,
-                            uint64_t *flow_id, uint8_t *out, size_t out_len);
+                            uint64_t *flow_id, size_t *flow_id_len,
+                            uint8_t *out, size_t out_len);
 
 // Frees the HTTP/3 connection object.
 void quiche_h3_conn_free(quiche_h3_conn *conn);
diff --git a/rustfmt.toml b/rustfmt.toml
index 4d853aa..f0d9d93 100644
--- a/rustfmt.toml
+++ b/rustfmt.toml
@@ -18,7 +18,7 @@
 where_single_line = false
 imports_indent = "Block"
 imports_layout = "Vertical"
-merge_imports = false
+imports_granularity = "Item"
 reorder_imports = true
 reorder_modules = true
 reorder_impl_items = true
diff --git a/src/build.rs b/src/build.rs
index 98774aa..875f556 100644
--- a/src/build.rs
+++ b/src/build.rs
@@ -28,6 +28,7 @@
     ("x86_64", &[("ANDROID_ABI", "x86_64")]),
 ];
 
+// iOS.
 const CMAKE_PARAMS_IOS: &[(&str, &[(&str, &str)])] = &[
     ("aarch64", &[
         ("CMAKE_OSX_ARCHITECTURES", "arm64"),
@@ -39,6 +40,12 @@
     ]),
 ];
 
+// ARM Linux.
+const CMAKE_PARAMS_ARM_LINUX: &[(&str, &[(&str, &str)])] = &[
+    ("aarch64", &[("CMAKE_SYSTEM_PROCESSOR", "aarch64")]),
+    ("arm", &[("CMAKE_SYSTEM_PROCESSOR", "arm")]),
+];
+
 /// Returns the platform-specific output path for lib.
 ///
 /// MSVC generator on Windows place static libs in a target sub-folder,
@@ -103,7 +110,6 @@
             for (android_arch, params) in cmake_params_android {
                 if *android_arch == arch {
                     for (name, value) in *params {
-                        eprintln!("android arch={} add {}={}", arch, name, value);
                         boringssl_cmake.define(name, value);
                     }
                 }
@@ -111,7 +117,6 @@
             let toolchain_file =
                 android_ndk_home.join("build/cmake/android.toolchain.cmake");
             let toolchain_file = toolchain_file.to_str().unwrap();
-            eprintln!("android toolchain={}", toolchain_file);
             boringssl_cmake.define("CMAKE_TOOLCHAIN_FILE", toolchain_file);
 
             // 21 is the minimum level tested. You can give higher value.
@@ -125,7 +130,6 @@
             for (ios_arch, params) in CMAKE_PARAMS_IOS {
                 if *ios_arch == arch {
                     for (name, value) in *params {
-                        eprintln!("ios arch={} add {}={}", arch, name, value);
                         boringssl_cmake.define(name, value);
                     }
                 }
@@ -149,6 +153,34 @@
             boringssl_cmake
         },
 
+        "linux" => match arch.as_ref() {
+            "aarch64" | "arm" => {
+                for (arm_arch, params) in CMAKE_PARAMS_ARM_LINUX {
+                    if *arm_arch == arch {
+                        for (name, value) in *params {
+                            boringssl_cmake.define(name, value);
+                        }
+                    }
+                }
+                boringssl_cmake.define("CMAKE_SYSTEM_NAME", "Linux");
+                boringssl_cmake.define("CMAKE_SYSTEM_VERSION", "1");
+
+                boringssl_cmake
+            },
+
+            "x86" => {
+                boringssl_cmake.define(
+                    "CMAKE_TOOLCHAIN_FILE",
+                    pwd.join("deps/boringssl/src/util/32-bit-toolchain.cmake")
+                        .as_os_str(),
+                );
+
+                boringssl_cmake
+            },
+
+            _ => boringssl_cmake,
+        },
+
         _ => {
             // Configure BoringSSL for building on 32-bit non-windows platforms.
             if arch == "x86" && os != "windows" {
@@ -197,7 +229,7 @@
 }
 
 fn main() {
-    if cfg!(feature = "boringssl-vendored") {
+    if cfg!(feature = "boringssl-vendored") && !cfg!(feature = "boring-sys") {
         let bssl_dir = std::env::var("QUICHE_BSSL_PATH").unwrap_or_else(|_| {
             let mut cfg = get_boringssl_cmake_config();
 
@@ -217,6 +249,11 @@
         println!("cargo:rustc-link-lib=static=ssl");
     }
 
+    if cfg!(feature = "boring-sys") {
+        println!("cargo:rustc-link-lib=static=crypto");
+        println!("cargo:rustc-link-lib=static=ssl");
+    }
+
     // MacOS: Allow cdylib to link with undefined symbols
     if cfg!(target_os = "macos") {
         println!("cargo:rustc-cdylib-link-arg=-Wl,-undefined,dynamic_lookup");
diff --git a/src/crypto.rs b/src/crypto.rs
index b45f4a5..6e33957 100644
--- a/src/crypto.rs
+++ b/src/crypto.rs
@@ -297,7 +297,7 @@
     let key_len = aead.key_len();
     let nonce_len = aead.nonce_len();
 
-    let initial_secret = derive_initial_secret(&cid, version)?;
+    let initial_secret = derive_initial_secret(&cid, version);
 
     // Client.
     let mut client_key = vec![0; key_len];
@@ -334,26 +334,33 @@
     Ok((open, seal))
 }
 
-fn derive_initial_secret(secret: &[u8], version: u32) -> Result<hkdf::Prk> {
+fn derive_initial_secret(secret: &[u8], version: u32) -> hkdf::Prk {
     const INITIAL_SALT: [u8; 20] = [
+        0x38, 0x76, 0x2c, 0xf7, 0xf5, 0x59, 0x34, 0xb3, 0x4d, 0x17, 0x9a, 0xe6,
+        0xa4, 0xc8, 0x0c, 0xad, 0xcc, 0xbb, 0x7f, 0x0a,
+    ];
+
+    const INITIAL_SALT_DRAFT29: [u8; 20] = [
         0xaf, 0xbf, 0xec, 0x28, 0x99, 0x93, 0xd2, 0x4c, 0x9e, 0x97, 0x86, 0xf1,
         0x9c, 0x61, 0x11, 0xe0, 0x43, 0x90, 0xa8, 0x99,
     ];
 
-    const INITIAL_SALT_OLD: [u8; 20] = [
+    const INITIAL_SALT_DRAFT27: [u8; 20] = [
         0xc3, 0xee, 0xf7, 0x12, 0xc7, 0x2e, 0xbb, 0x5a, 0x11, 0xa7, 0xd2, 0x43,
         0x2b, 0xb4, 0x63, 0x65, 0xbe, 0xf9, 0xf5, 0x02,
     ];
 
     let salt = match version {
         crate::PROTOCOL_VERSION_DRAFT27 | crate::PROTOCOL_VERSION_DRAFT28 =>
-            &INITIAL_SALT_OLD,
+            &INITIAL_SALT_DRAFT27,
+
+        crate::PROTOCOL_VERSION_DRAFT29 => &INITIAL_SALT_DRAFT29,
 
         _ => &INITIAL_SALT,
     };
 
     let salt = hkdf::Salt::new(hkdf::HKDF_SHA256, salt);
-    Ok(salt.extract(secret))
+    salt.extract(secret)
 }
 
 fn derive_client_initial_secret(prk: &hkdf::Prk, out: &mut [u8]) -> Result<()> {
@@ -458,7 +465,7 @@
     use super::*;
 
     #[test]
-    fn derive_initial_secrets() {
+    fn derive_initial_secrets_v1() {
         let dcid = [0x83, 0x94, 0xc8, 0xf0, 0x3e, 0x51, 0x57, 0x08];
 
         let mut secret = [0; 32];
@@ -469,7 +476,86 @@
         let aead = Algorithm::AES128_GCM;
 
         let initial_secret =
-            derive_initial_secret(&dcid, crate::PROTOCOL_VERSION).unwrap();
+            derive_initial_secret(&dcid, crate::PROTOCOL_VERSION_V1);
+
+        // Client.
+        assert!(
+            derive_client_initial_secret(&initial_secret, &mut secret).is_ok()
+        );
+        let expected_client_initial_secret = [
+            0xc0, 0x0c, 0xf1, 0x51, 0xca, 0x5b, 0xe0, 0x75, 0xed, 0x0e, 0xbf,
+            0xb5, 0xc8, 0x03, 0x23, 0xc4, 0x2d, 0x6b, 0x7d, 0xb6, 0x78, 0x81,
+            0x28, 0x9a, 0xf4, 0x00, 0x8f, 0x1f, 0x6c, 0x35, 0x7a, 0xea,
+        ];
+        assert_eq!(&secret, &expected_client_initial_secret);
+
+        assert!(derive_pkt_key(aead, &secret, &mut pkt_key).is_ok());
+        let expected_client_pkt_key = [
+            0x1f, 0x36, 0x96, 0x13, 0xdd, 0x76, 0xd5, 0x46, 0x77, 0x30, 0xef,
+            0xcb, 0xe3, 0xb1, 0xa2, 0x2d,
+        ];
+        assert_eq!(&pkt_key, &expected_client_pkt_key);
+
+        assert!(derive_pkt_iv(aead, &secret, &mut pkt_iv).is_ok());
+        let expected_client_pkt_iv = [
+            0xfa, 0x04, 0x4b, 0x2f, 0x42, 0xa3, 0xfd, 0x3b, 0x46, 0xfb, 0x25,
+            0x5c,
+        ];
+        assert_eq!(&pkt_iv, &expected_client_pkt_iv);
+
+        assert!(derive_hdr_key(aead, &secret, &mut hdr_key).is_ok());
+        let expected_client_hdr_key = [
+            0x9f, 0x50, 0x44, 0x9e, 0x04, 0xa0, 0xe8, 0x10, 0x28, 0x3a, 0x1e,
+            0x99, 0x33, 0xad, 0xed, 0xd2,
+        ];
+        assert_eq!(&hdr_key, &expected_client_hdr_key);
+
+        // Server.
+        assert!(
+            derive_server_initial_secret(&initial_secret, &mut secret).is_ok()
+        );
+        let expected_server_initial_secret = [
+            0x3c, 0x19, 0x98, 0x28, 0xfd, 0x13, 0x9e, 0xfd, 0x21, 0x6c, 0x15,
+            0x5a, 0xd8, 0x44, 0xcc, 0x81, 0xfb, 0x82, 0xfa, 0x8d, 0x74, 0x46,
+            0xfa, 0x7d, 0x78, 0xbe, 0x80, 0x3a, 0xcd, 0xda, 0x95, 0x1b,
+        ];
+        assert_eq!(&secret, &expected_server_initial_secret);
+
+        assert!(derive_pkt_key(aead, &secret, &mut pkt_key).is_ok());
+        let expected_server_pkt_key = [
+            0xcf, 0x3a, 0x53, 0x31, 0x65, 0x3c, 0x36, 0x4c, 0x88, 0xf0, 0xf3,
+            0x79, 0xb6, 0x06, 0x7e, 0x37,
+        ];
+        assert_eq!(&pkt_key, &expected_server_pkt_key);
+
+        assert!(derive_pkt_iv(aead, &secret, &mut pkt_iv).is_ok());
+        let expected_server_pkt_iv = [
+            0x0a, 0xc1, 0x49, 0x3c, 0xa1, 0x90, 0x58, 0x53, 0xb0, 0xbb, 0xa0,
+            0x3e,
+        ];
+        assert_eq!(&pkt_iv, &expected_server_pkt_iv);
+
+        assert!(derive_hdr_key(aead, &secret, &mut hdr_key).is_ok());
+        let expected_server_hdr_key = [
+            0xc2, 0x06, 0xb8, 0xd9, 0xb9, 0xf0, 0xf3, 0x76, 0x44, 0x43, 0x0b,
+            0x49, 0x0e, 0xea, 0xa3, 0x14,
+        ];
+        assert_eq!(&hdr_key, &expected_server_hdr_key);
+    }
+
+    #[test]
+    fn derive_initial_secrets_draft29() {
+        let dcid = [0x83, 0x94, 0xc8, 0xf0, 0x3e, 0x51, 0x57, 0x08];
+
+        let mut secret = [0; 32];
+        let mut pkt_key = [0; 16];
+        let mut pkt_iv = [0; 12];
+        let mut hdr_key = [0; 16];
+
+        let aead = Algorithm::AES128_GCM;
+
+        let initial_secret =
+            derive_initial_secret(&dcid, crate::PROTOCOL_VERSION_DRAFT29);
 
         // Client.
         assert!(
@@ -537,7 +623,7 @@
     }
 
     #[test]
-    fn derive_initial_secrets_old() {
+    fn derive_initial_secrets_draft27() {
         let dcid = [0x83, 0x94, 0xc8, 0xf0, 0x3e, 0x51, 0x57, 0x08];
 
         let mut secret = [0; 32];
@@ -548,8 +634,7 @@
         let aead = Algorithm::AES128_GCM;
 
         let initial_secret =
-            derive_initial_secret(&dcid, crate::PROTOCOL_VERSION_DRAFT28)
-                .unwrap();
+            derive_initial_secret(&dcid, crate::PROTOCOL_VERSION_DRAFT27);
 
         // Client.
         assert!(
diff --git a/src/dgram.rs b/src/dgram.rs
index 755d95f..023df5f 100644
--- a/src/dgram.rs
+++ b/src/dgram.rs
@@ -99,6 +99,10 @@
         self.queue.len() == self.queue_max_len
     }
 
+    pub fn len(&self) -> usize {
+        self.queue.len()
+    }
+
     pub fn byte_size(&self) -> usize {
         self.queue_bytes_size
     }
diff --git a/src/ffi.rs b/src/ffi.rs
index 39564dc..3290964 100644
--- a/src/ffi.rs
+++ b/src/ffi.rs
@@ -29,6 +29,8 @@
 use std::slice;
 use std::sync::atomic;
 
+use std::net::SocketAddr;
+
 #[cfg(unix)]
 use std::os::unix::io::FromRawFd;
 
@@ -36,13 +38,45 @@
 use libc::c_int;
 use libc::c_void;
 use libc::size_t;
+use libc::sockaddr;
 use libc::ssize_t;
+use libc::timespec;
+
+#[cfg(not(windows))]
+use libc::sockaddr_in;
+#[cfg(windows)]
+use winapi::shared::ws2def::SOCKADDR_IN as sockaddr_in;
+
+#[cfg(not(windows))]
+use libc::sockaddr_in6;
+#[cfg(windows)]
+use winapi::shared::ws2ipdef::SOCKADDR_IN6_LH as sockaddr_in6;
+
+#[cfg(not(windows))]
+use libc::sockaddr_storage;
+#[cfg(windows)]
+use winapi::shared::ws2def::SOCKADDR_STORAGE_LH as sockaddr_storage;
+
+#[cfg(windows)]
+use libc::c_int as socklen_t;
+#[cfg(not(windows))]
+use libc::socklen_t;
+
+#[cfg(not(windows))]
+use libc::AF_INET;
+#[cfg(windows)]
+use winapi::shared::ws2def::AF_INET;
+
+#[cfg(not(windows))]
+use libc::AF_INET6;
+#[cfg(windows)]
+use winapi::shared::ws2def::AF_INET6;
 
 use crate::*;
 
 #[no_mangle]
 pub extern fn quiche_version() -> *const u8 {
-    //static VERSION: &str = concat!(env!("CARGO_PKG_VERSION"), "\0");
+    //static VERSION: &str = concat!("0.9.0", "\0");
     // ANDROID's build system doesn't support environment variables
     // so we hardcode the package version here.
     static VERSION: &str = concat!("0.6.0", "\0");
@@ -119,6 +153,19 @@
 }
 
 #[no_mangle]
+pub extern fn quiche_config_load_verify_locations_from_file(
+    config: &mut Config, path: *const c_char,
+) -> c_int {
+    let path = unsafe { ffi::CStr::from_ptr(path).to_str().unwrap() };
+
+    match config.load_verify_locations_from_file(path) {
+        Ok(_) => 0,
+
+        Err(e) => e.to_c() as c_int,
+    }
+}
+
+#[no_mangle]
 pub extern fn quiche_config_verify_peer(config: &mut Config, v: bool) {
     config.verify_peer(v);
 }
@@ -157,10 +204,10 @@
 }
 
 #[no_mangle]
-pub extern fn quiche_config_set_max_udp_payload_size(
-    config: &mut Config, v: u64,
+pub extern fn quiche_config_set_max_recv_udp_payload_size(
+    config: &mut Config, v: size_t,
 ) {
-    config.set_max_udp_payload_size(v);
+    config.set_max_recv_udp_payload_size(v);
 }
 
 #[no_mangle]
@@ -253,6 +300,13 @@
 }
 
 #[no_mangle]
+pub extern fn quiche_config_set_max_send_udp_payload_size(
+    config: &mut Config, v: size_t,
+) {
+    config.set_max_send_udp_payload_size(v);
+}
+
+#[no_mangle]
 pub extern fn quiche_config_free(config: *mut Config) {
     unsafe { Box::from_raw(config) };
 }
@@ -325,17 +379,22 @@
 #[no_mangle]
 pub extern fn quiche_accept(
     scid: *const u8, scid_len: size_t, odcid: *const u8, odcid_len: size_t,
-    config: &mut Config,
+    from: &sockaddr, from_len: socklen_t, config: &mut Config,
 ) -> *mut Connection {
     let scid = unsafe { slice::from_raw_parts(scid, scid_len) };
+    let scid = ConnectionId::from_ref(scid);
 
-    let odcid = if !odcid.is_null() || odcid_len == 0 {
-        Some(unsafe { slice::from_raw_parts(odcid, odcid_len) })
+    let odcid = if !odcid.is_null() && odcid_len > 0 {
+        Some(ConnectionId::from_ref(unsafe {
+            slice::from_raw_parts(odcid, odcid_len)
+        }))
     } else {
         None
     };
 
-    match accept(scid, odcid, config) {
+    let from = std_addr_from_c(from, from_len);
+
+    match accept(&scid, odcid.as_ref(), from, config) {
         Ok(c) => Box::into_raw(Pin::into_inner(c)),
 
         Err(_) => ptr::null_mut(),
@@ -344,8 +403,8 @@
 
 #[no_mangle]
 pub extern fn quiche_connect(
-    server_name: *const c_char, scid: *const u8, scid_len: size_t,
-    config: &mut Config,
+    server_name: *const c_char, scid: *const u8, scid_len: size_t, to: &sockaddr,
+    to_len: socklen_t, config: &mut Config,
 ) -> *mut Connection {
     let server_name = if server_name.is_null() {
         None
@@ -354,8 +413,11 @@
     };
 
     let scid = unsafe { slice::from_raw_parts(scid, scid_len) };
+    let scid = ConnectionId::from_ref(scid);
 
-    match connect(server_name, scid, config) {
+    let to = std_addr_from_c(to, to_len);
+
+    match connect(server_name, &scid, to, config) {
         Ok(c) => Box::into_raw(Pin::into_inner(c)),
 
         Err(_) => ptr::null_mut(),
@@ -368,10 +430,14 @@
     out: *mut u8, out_len: size_t,
 ) -> ssize_t {
     let scid = unsafe { slice::from_raw_parts(scid, scid_len) };
+    let scid = ConnectionId::from_ref(scid);
+
     let dcid = unsafe { slice::from_raw_parts(dcid, dcid_len) };
+    let dcid = ConnectionId::from_ref(dcid);
+
     let out = unsafe { slice::from_raw_parts_mut(out, out_len) };
 
-    match negotiate_version(scid, dcid, out) {
+    match negotiate_version(&scid, &dcid, out) {
         Ok(v) => v as ssize_t,
 
         Err(e) => e.to_c(),
@@ -390,12 +456,18 @@
     token_len: size_t, version: u32, out: *mut u8, out_len: size_t,
 ) -> ssize_t {
     let scid = unsafe { slice::from_raw_parts(scid, scid_len) };
+    let scid = ConnectionId::from_ref(scid);
+
     let dcid = unsafe { slice::from_raw_parts(dcid, dcid_len) };
+    let dcid = ConnectionId::from_ref(dcid);
+
     let new_scid = unsafe { slice::from_raw_parts(new_scid, new_scid_len) };
+    let new_scid = ConnectionId::from_ref(new_scid);
+
     let token = unsafe { slice::from_raw_parts(token, token_len) };
     let out = unsafe { slice::from_raw_parts_mut(out, out_len) };
 
-    match retry(scid, dcid, new_scid, token, version, out) {
+    match retry(&scid, &dcid, &new_scid, token, version, out) {
         Ok(v) => v as ssize_t,
 
         Err(e) => e.to_c(),
@@ -405,19 +477,32 @@
 #[no_mangle]
 pub extern fn quiche_conn_new_with_tls(
     scid: *const u8, scid_len: size_t, odcid: *const u8, odcid_len: size_t,
-    config: &mut Config, ssl: *mut c_void, is_server: bool,
+    peer: &sockaddr, peer_len: socklen_t, config: &mut Config, ssl: *mut c_void,
+    is_server: bool,
 ) -> *mut Connection {
     let scid = unsafe { slice::from_raw_parts(scid, scid_len) };
+    let scid = ConnectionId::from_ref(scid);
 
-    let odcid = if !odcid.is_null() || odcid_len == 0 {
-        Some(unsafe { slice::from_raw_parts(odcid, odcid_len) })
+    let odcid = if !odcid.is_null() && odcid_len > 0 {
+        Some(ConnectionId::from_ref(unsafe {
+            slice::from_raw_parts(odcid, odcid_len)
+        }))
     } else {
         None
     };
 
+    let peer = std_addr_from_c(peer, peer_len);
+
     let tls = unsafe { tls::Handshake::from_ptr(ssl) };
 
-    match Connection::with_tls(scid, odcid, config, tls, is_server) {
+    match Connection::with_tls(
+        &scid,
+        odcid.as_ref(),
+        peer,
+        config,
+        tls,
+        is_server,
+    ) {
         Ok(c) => Box::into_raw(Pin::into_inner(c)),
 
         Err(_) => ptr::null_mut(),
@@ -506,8 +591,35 @@
 }
 
 #[no_mangle]
+pub extern fn quiche_conn_set_session(
+    conn: &mut Connection, buf: *const u8, buf_len: size_t,
+) -> c_int {
+    let buf = unsafe { slice::from_raw_parts(buf, buf_len) };
+
+    match conn.set_session(buf) {
+        Ok(_) => 0,
+
+        Err(e) => e.to_c() as c_int,
+    }
+}
+
+#[repr(C)]
+pub struct RecvInfo<'a> {
+    from: &'a sockaddr,
+    from_len: socklen_t,
+}
+
+impl<'a> From<&RecvInfo<'a>> for crate::RecvInfo {
+    fn from(info: &RecvInfo) -> crate::RecvInfo {
+        crate::RecvInfo {
+            from: std_addr_from_c(info.from, info.from_len),
+        }
+    }
+}
+
+#[no_mangle]
 pub extern fn quiche_conn_recv(
-    conn: &mut Connection, buf: *mut u8, buf_len: size_t,
+    conn: &mut Connection, buf: *mut u8, buf_len: size_t, info: &RecvInfo,
 ) -> ssize_t {
     if buf_len > <ssize_t>::max_value() as usize {
         panic!("The provided buffer is too large");
@@ -515,16 +627,24 @@
 
     let buf = unsafe { slice::from_raw_parts_mut(buf, buf_len) };
 
-    match conn.recv(buf) {
+    match conn.recv(buf, info.into()) {
         Ok(v) => v as ssize_t,
 
         Err(e) => e.to_c(),
     }
 }
 
+#[repr(C)]
+pub struct SendInfo {
+    to: sockaddr_storage,
+    to_len: socklen_t,
+
+    at: timespec,
+}
+
 #[no_mangle]
 pub extern fn quiche_conn_send(
-    conn: &mut Connection, out: *mut u8, out_len: size_t,
+    conn: &mut Connection, out: *mut u8, out_len: size_t, out_info: &mut SendInfo,
 ) -> ssize_t {
     if out_len > <ssize_t>::max_value() as usize {
         panic!("The provided buffer is too large");
@@ -533,7 +653,13 @@
     let out = unsafe { slice::from_raw_parts_mut(out, out_len) };
 
     match conn.send(out) {
-        Ok(v) => v as ssize_t,
+        Ok((v, info)) => {
+            out_info.to_len = std_addr_to_c(&info.to, &mut out_info.to);
+
+            std_time_to_c(&info.at, &mut out_info.at);
+
+            v as ssize_t
+        },
 
         Err(e) => e.to_c(),
     }
@@ -580,6 +706,17 @@
 }
 
 #[no_mangle]
+pub extern fn quiche_conn_stream_priority(
+    conn: &mut Connection, stream_id: u64, urgency: u8, incremental: bool,
+) -> c_int {
+    match conn.stream_priority(stream_id, urgency, incremental) {
+        Ok(_) => 0,
+
+        Err(e) => e.to_c() as c_int,
+    }
+}
+
+#[no_mangle]
 pub extern fn quiche_conn_stream_shutdown(
     conn: &mut Connection, stream_id: u64, direction: Shutdown, err: u64,
 ) -> c_int {
@@ -602,6 +739,13 @@
 }
 
 #[no_mangle]
+pub extern fn quiche_conn_stream_readable(
+    conn: &mut Connection, stream_id: u64,
+) -> bool {
+    conn.stream_readable(stream_id)
+}
+
+#[no_mangle]
 pub extern fn quiche_conn_stream_finished(
     conn: &mut Connection, stream_id: u64,
 ) -> bool {
@@ -618,8 +762,19 @@
     Box::into_raw(Box::new(conn.writable()))
 }
 
+#[no_mangle]
+pub extern fn quiche_conn_max_send_udp_payload_size(conn: &Connection) -> usize {
+    conn.max_send_udp_payload_size()
+}
+
+#[no_mangle]
+pub extern fn quiche_conn_is_readable(conn: &Connection) -> bool {
+    conn.is_readable()
+}
+
 struct AppData(*mut c_void);
 unsafe impl Send for AppData {}
+unsafe impl Sync for AppData {}
 
 #[no_mangle]
 pub extern fn quiche_conn_stream_init_application_data(
@@ -681,6 +836,37 @@
 }
 
 #[no_mangle]
+pub extern fn quiche_conn_trace_id(
+    conn: &mut Connection, out: &mut *const u8, out_len: &mut size_t,
+) {
+    let trace_id = conn.trace_id();
+
+    *out = trace_id.as_ptr();
+    *out_len = trace_id.len();
+}
+
+#[no_mangle]
+pub extern fn quiche_conn_source_id(
+    conn: &mut Connection, out: &mut *const u8, out_len: &mut size_t,
+) {
+    let conn_id = conn.source_id();
+    let id = conn_id.as_ref();
+    *out = id.as_ptr();
+    *out_len = id.len();
+}
+
+#[no_mangle]
+pub extern fn quiche_conn_destination_id(
+    conn: &mut Connection, out: &mut *const u8, out_len: &mut size_t,
+) {
+    let conn_id = conn.destination_id();
+    let id = conn_id.as_ref();
+
+    *out = id.as_ptr();
+    *out_len = id.len();
+}
+
+#[no_mangle]
 pub extern fn quiche_conn_application_proto(
     conn: &mut Connection, out: &mut *const u8, out_len: &mut size_t,
 ) {
@@ -691,6 +877,20 @@
 }
 
 #[no_mangle]
+pub extern fn quiche_conn_session(
+    conn: &mut Connection, out: &mut *const u8, out_len: &mut size_t,
+) {
+    match conn.session() {
+        Some(session) => {
+            *out = session.as_ptr();
+            *out_len = session.len();
+        },
+
+        None => *out_len = 0,
+    }
+}
+
+#[no_mangle]
 pub extern fn quiche_conn_is_established(conn: &mut Connection) -> bool {
     conn.is_established()
 }
@@ -701,11 +901,35 @@
 }
 
 #[no_mangle]
+pub extern fn quiche_conn_is_draining(conn: &mut Connection) -> bool {
+    conn.is_draining()
+}
+
+#[no_mangle]
 pub extern fn quiche_conn_is_closed(conn: &mut Connection) -> bool {
     conn.is_closed()
 }
 
 #[no_mangle]
+pub extern fn quiche_conn_peer_error(
+    conn: &mut Connection, is_app: *mut bool, error_code: *mut u64,
+    reason: &mut *const u8, reason_len: &mut size_t,
+) -> bool {
+    match &conn.peer_error {
+        Some(conn_err) => unsafe {
+            *is_app = conn_err.is_app;
+            *error_code = conn_err.error_code;
+            *reason = conn_err.reason.as_ptr();
+            *reason_len = conn_err.reason.len();
+
+            true
+        },
+
+        None => false,
+    }
+}
+
+#[no_mangle]
 pub extern fn quiche_stream_iter_next(
     iter: &mut StreamIter, stream_id: *mut u64,
 ) -> bool {
@@ -724,12 +948,12 @@
 
 #[repr(C)]
 pub struct Stats {
-    pub recv: usize,
-    pub sent: usize,
-    pub lost: usize,
-    pub rtt: u64,
-    pub cwnd: usize,
-    pub delivery_rate: u64,
+    recv: usize,
+    sent: usize,
+    lost: usize,
+    rtt: u64,
+    cwnd: usize,
+    delivery_rate: u64,
 }
 
 #[no_mangle]
@@ -754,6 +978,39 @@
 }
 
 #[no_mangle]
+pub extern fn quiche_conn_dgram_recv_front_len(conn: &Connection) -> ssize_t {
+    match conn.dgram_recv_front_len() {
+        None => Error::Done.to_c(),
+
+        Some(v) => v as ssize_t,
+    }
+}
+
+#[no_mangle]
+pub extern fn quiche_conn_dgram_recv_queue_len(conn: &Connection) -> ssize_t {
+    conn.dgram_recv_queue_len() as ssize_t
+}
+
+#[no_mangle]
+pub extern fn quiche_conn_dgram_recv_queue_byte_size(
+    conn: &Connection,
+) -> ssize_t {
+    conn.dgram_recv_queue_byte_size() as ssize_t
+}
+
+#[no_mangle]
+pub extern fn quiche_conn_dgram_send_queue_len(conn: &Connection) -> ssize_t {
+    conn.dgram_send_queue_len() as ssize_t
+}
+
+#[no_mangle]
+pub extern fn quiche_conn_dgram_send_queue_byte_size(
+    conn: &Connection,
+) -> ssize_t {
+    conn.dgram_send_queue_byte_size() as ssize_t
+}
+
+#[no_mangle]
 pub extern fn quiche_conn_dgram_send(
     conn: &mut Connection, buf: *const u8, buf_len: size_t,
 ) -> ssize_t {
@@ -805,3 +1062,79 @@
 pub extern fn quiche_conn_free(conn: *mut Connection) {
     unsafe { Box::from_raw(conn) };
 }
+
+#[no_mangle]
+pub extern fn quiche_conn_peer_streams_left_bidi(conn: &mut Connection) -> u64 {
+    conn.peer_streams_left_bidi()
+}
+
+#[no_mangle]
+pub extern fn quiche_conn_peer_streams_left_uni(conn: &mut Connection) -> u64 {
+    conn.peer_streams_left_uni()
+}
+
+fn std_addr_from_c(addr: &sockaddr, addr_len: socklen_t) -> SocketAddr {
+    unsafe {
+        match addr.sa_family as i32 {
+            AF_INET => {
+                assert!(addr_len as usize == std::mem::size_of::<sockaddr_in>());
+
+                SocketAddr::V4(
+                    *(addr as *const _ as *const sockaddr_in as *const _),
+                )
+            },
+
+            AF_INET6 => {
+                assert!(addr_len as usize == std::mem::size_of::<sockaddr_in6>());
+
+                SocketAddr::V6(
+                    *(addr as *const _ as *const sockaddr_in6 as *const _),
+                )
+            },
+
+            _ => unimplemented!("unsupported address type"),
+        }
+    }
+}
+
+fn std_addr_to_c(addr: &SocketAddr, out: &mut sockaddr_storage) -> socklen_t {
+    unsafe {
+        match addr {
+            SocketAddr::V4(addr) => {
+                let sa_len = std::mem::size_of::<sockaddr_in>();
+
+                let src = addr as *const _ as *const u8;
+                let dst = out as *mut _ as *mut u8;
+
+                std::ptr::copy_nonoverlapping(src, dst, sa_len);
+
+                sa_len as socklen_t
+            },
+
+            SocketAddr::V6(addr) => {
+                let sa_len = std::mem::size_of::<sockaddr_in6>();
+
+                let src = addr as *const _ as *const u8;
+                let dst = out as *mut _ as *mut u8;
+
+                std::ptr::copy_nonoverlapping(src, dst, sa_len);
+
+                sa_len as socklen_t
+            },
+        }
+    }
+}
+
+#[cfg(not(any(target_os = "macos", target_os = "ios", target_os = "windows")))]
+fn std_time_to_c(time: &std::time::Instant, out: &mut timespec) {
+    unsafe {
+        ptr::copy_nonoverlapping(time as *const _ as *const timespec, out, 1)
+    }
+}
+
+#[cfg(any(target_os = "macos", target_os = "ios", target_os = "windows"))]
+fn std_time_to_c(_time: &std::time::Instant, out: &mut timespec) {
+    // TODO: implement Instant conversion for systems that don't use timespec.
+    out.tv_sec = 0;
+    out.tv_nsec = 0;
+}
diff --git a/src/frame.rs b/src/frame.rs
index b191a42..c0cba7e 100644
--- a/src/frame.rs
+++ b/src/frame.rs
@@ -65,6 +65,11 @@
         data: stream::RangeBuf,
     },
 
+    CryptoHeader {
+        offset: u64,
+        length: usize,
+    },
+
     NewToken {
         token: Vec<u8>,
     },
@@ -74,6 +79,13 @@
         data: stream::RangeBuf,
     },
 
+    StreamHeader {
+        stream_id: u64,
+        offset: u64,
+        length: usize,
+        fin: bool,
+    },
+
     MaxData {
         max: u64,
     },
@@ -368,13 +380,13 @@
             },
 
             Frame::Crypto { data } => {
-                b.put_varint(0x06)?;
+                encode_crypto_header(data.off() as u64, data.len() as u64, b)?;
 
-                b.put_varint(data.off() as u64)?;
-                b.put_varint(data.len() as u64)?;
                 b.put_bytes(&data)?;
             },
 
+            Frame::CryptoHeader { .. } => (),
+
             Frame::NewToken { token } => {
                 b.put_varint(0x07)?;
 
@@ -383,26 +395,19 @@
             },
 
             Frame::Stream { stream_id, data } => {
-                let mut ty: u8 = 0x08;
+                encode_stream_header(
+                    *stream_id,
+                    data.off() as u64,
+                    data.len() as u64,
+                    data.fin(),
+                    b,
+                )?;
 
-                // Always encode offset
-                ty |= 0x04;
-
-                // Always encode length
-                ty |= 0x02;
-
-                if data.fin() {
-                    ty |= 0x01;
-                }
-
-                b.put_varint(u64::from(ty))?;
-
-                b.put_varint(*stream_id)?;
-                b.put_varint(data.off() as u64)?;
-                b.put_varint(data.len() as u64)?;
-                b.put_bytes(data.as_ref())?;
+                b.put_bytes(&data)?;
             },
 
+            Frame::StreamHeader { .. } => (),
+
             Frame::MaxData { max } => {
                 b.put_varint(0x10)?;
 
@@ -583,10 +588,17 @@
             Frame::Crypto { data } => {
                 1 + // frame type
                 octets::varint_len(data.off() as u64) + // offset
-                octets::varint_len(data.len() as u64) + // length
+                2 + // length, always encode as 2-byte varint
                 data.len() // data
             },
 
+            Frame::CryptoHeader { offset, length, .. } => {
+                1 + // frame type
+                octets::varint_len(*offset) + // offset
+                2 + // length, always encode as 2-byte varint
+                length // data
+            },
+
             Frame::NewToken { token } => {
                 1 + // frame type
                 octets::varint_len(token.len() as u64) + // token length
@@ -597,10 +609,23 @@
                 1 + // frame type
                 octets::varint_len(*stream_id) + // stream_id
                 octets::varint_len(data.off() as u64) + // offset
-                octets::varint_len(data.len() as u64) + // length
+                2 + // length, always encode as 2-byte varint
                 data.len() // data
             },
 
+            Frame::StreamHeader {
+                stream_id,
+                offset,
+                length,
+                ..
+            } => {
+                1 + // frame type
+                octets::varint_len(*stream_id) + // stream_id
+                octets::varint_len(*offset) + // offset
+                2 + // length, always encode as 2-byte varint
+                length // data
+            },
+
             Frame::MaxData { max } => {
                 1 + // frame type
                 octets::varint_len(*max) // max
@@ -706,10 +731,13 @@
 
     pub fn ack_eliciting(&self) -> bool {
         // Any other frame is ack-eliciting (note the `!`).
-        !matches!(self, Frame::Padding { .. } |
-            Frame::ACK { .. } |
-            Frame::ApplicationClose { .. } |
-            Frame::ConnectionClose { .. })
+        !matches!(
+            self,
+            Frame::Padding { .. } |
+                Frame::ACK { .. } |
+                Frame::ApplicationClose { .. } |
+                Frame::ConnectionClose { .. }
+        )
     }
 
     pub fn shrink_for_retransmission(&mut self) {
@@ -758,6 +786,9 @@
                 data.len().to_string(),
             ),
 
+            Frame::CryptoHeader { offset, length } =>
+                qlog::QuicFrame::crypto(offset.to_string(), length.to_string()),
+
             Frame::NewToken { token } => qlog::QuicFrame::new_token(
                 token.len().to_string(),
                 "TODO: https://github.com/quiclog/internet-drafts/issues/36"
@@ -772,6 +803,19 @@
                 None,
             ),
 
+            Frame::StreamHeader {
+                stream_id,
+                offset,
+                length,
+                fin,
+            } => qlog::QuicFrame::stream(
+                stream_id.to_string(),
+                offset.to_string(),
+                length.to_string(),
+                *fin,
+                None,
+            ),
+
             Frame::MaxData { max } => qlog::QuicFrame::max_data(max.to_string()),
 
             Frame::MaxStreamData { stream_id, max } =>
@@ -864,7 +908,8 @@
 
             Frame::HandshakeDone => qlog::QuicFrame::handshake_done(),
 
-            Frame::Datagram { .. } => qlog::QuicFrame::unknown(0x30),
+            Frame::Datagram { data } =>
+                qlog::QuicFrame::datagram(data.len().to_string(), None),
         }
     }
 }
@@ -911,6 +956,10 @@
                 write!(f, "CRYPTO off={} len={}", data.off(), data.len())?;
             },
 
+            Frame::CryptoHeader { offset, length } => {
+                write!(f, "CRYPTO off={} len={}", offset, length)?;
+            },
+
             Frame::NewToken { .. } => {
                 write!(f, "NEW_TOKEN (TODO)")?;
             },
@@ -926,6 +975,19 @@
                 )?;
             },
 
+            Frame::StreamHeader {
+                stream_id,
+                offset,
+                length,
+                fin,
+            } => {
+                write!(
+                    f,
+                    "STREAM id={} off={} len={} fin={}",
+                    stream_id, offset, length, fin
+                )?;
+            },
+
             Frame::MaxData { max } => {
                 write!(f, "MAX_DATA max={}", max)?;
             },
@@ -1051,6 +1113,46 @@
     Ok(Frame::ACK { ack_delay, ranges })
 }
 
+pub fn encode_crypto_header(
+    offset: u64, length: u64, b: &mut octets::OctetsMut,
+) -> Result<()> {
+    b.put_varint(0x06)?;
+
+    b.put_varint(offset)?;
+
+    // Always encode length field as 2-byte varint.
+    b.put_varint_with_len(length, 2)?;
+
+    Ok(())
+}
+
+pub fn encode_stream_header(
+    stream_id: u64, offset: u64, length: u64, fin: bool,
+    b: &mut octets::OctetsMut,
+) -> Result<()> {
+    let mut ty: u8 = 0x08;
+
+    // Always encode offset.
+    ty |= 0x04;
+
+    // Always encode length.
+    ty |= 0x02;
+
+    if fin {
+        ty |= 0x01;
+    }
+
+    b.put_varint(u64::from(ty))?;
+
+    b.put_varint(stream_id)?;
+    b.put_varint(offset)?;
+
+    // Always encode length field as 2-byte varint.
+    b.put_varint_with_len(length, 2)?;
+
+    Ok(())
+}
+
 fn parse_stream_frame(ty: u64, b: &mut octets::Octets) -> Result<Frame> {
     let first = ty as u8;
 
@@ -1262,7 +1364,7 @@
             frame.to_bytes(&mut b).unwrap()
         };
 
-        assert_eq!(wire_len, 18);
+        assert_eq!(wire_len, 19);
 
         let mut b = octets::Octets::with_slice(&d);
         assert_eq!(Frame::from_bytes(&mut b, packet::Type::Short), Ok(frame));
@@ -1321,7 +1423,7 @@
             frame.to_bytes(&mut b).unwrap()
         };
 
-        assert_eq!(wire_len, 19);
+        assert_eq!(wire_len, 20);
 
         let mut b = octets::Octets::with_slice(&d);
         assert_eq!(Frame::from_bytes(&mut b, packet::Type::Short), Ok(frame));
@@ -1352,7 +1454,7 @@
             frame.to_bytes(&mut b).unwrap()
         };
 
-        assert_eq!(wire_len, 23);
+        assert_eq!(wire_len, 24);
 
         let mut b = octets::Octets::with_slice(&d);
         assert_eq!(
diff --git a/src/h3/ffi.rs b/src/h3/ffi.rs
index 2d278c7..fc254b0 100644
--- a/src/h3/ffi.rs
+++ b/src/h3/ffi.rs
@@ -27,7 +27,6 @@
 use std::ffi;
 use std::ptr;
 use std::slice;
-use std::str;
 
 use libc::c_char;
 use libc::c_int;
@@ -264,6 +263,13 @@
 }
 
 #[no_mangle]
+pub extern fn quiche_h3_dgram_enabled_by_peer(
+    conn: &h3::Connection, quic_conn: &Connection,
+) -> bool {
+    conn.dgram_enabled_by_peer(quic_conn)
+}
+
+#[no_mangle]
 pub extern fn quiche_h3_send_dgram(
     conn: &mut h3::Connection, quic_conn: &mut Connection, flow_id: u64,
     data: *const u8, data_len: size_t,
@@ -317,15 +323,8 @@
 
     for h in headers {
         out.push({
-            let name = unsafe {
-                let slice = slice::from_raw_parts(h.name, h.name_len);
-                str::from_utf8_unchecked(slice)
-            };
-
-            let value = unsafe {
-                let slice = slice::from_raw_parts(h.value, h.value_len);
-                str::from_utf8_unchecked(slice)
-            };
+            let name = unsafe { slice::from_raw_parts(h.name, h.name_len) };
+            let value = unsafe { slice::from_raw_parts(h.value, h.value_len) };
 
             h3::HeaderRef::new(name, value)
         });
diff --git a/src/h3/frame.rs b/src/h3/frame.rs
index bee2b3b..085524b 100644
--- a/src/h3/frame.rs
+++ b/src/h3/frame.rs
@@ -39,6 +39,10 @@
 const SETTINGS_QPACK_MAX_TABLE_CAPACITY: u64 = 0x1;
 const SETTINGS_MAX_HEADER_LIST_SIZE: u64 = 0x6;
 const SETTINGS_QPACK_BLOCKED_STREAMS: u64 = 0x7;
+const SETTINGS_H3_DATAGRAM: u64 = 0x276;
+
+// Permit between 16 maximally-encoded and 128 minimally-encoded SETTINGS.
+const MAX_SETTINGS_PAYLOAD_SIZE: usize = 256;
 
 #[derive(Clone, PartialEq)]
 pub enum Frame {
@@ -58,6 +62,7 @@
         max_header_list_size: Option<u64>,
         qpack_max_table_capacity: Option<u64>,
         qpack_blocked_streams: Option<u64>,
+        h3_datagram: Option<u64>,
         grease: Option<(u64, u64)>,
     },
 
@@ -146,6 +151,7 @@
                 max_header_list_size,
                 qpack_max_table_capacity,
                 qpack_blocked_streams,
+                h3_datagram,
                 grease,
             } => {
                 let mut len = 0;
@@ -165,6 +171,11 @@
                     len += octets::varint_len(*val);
                 }
 
+                if let Some(val) = h3_datagram {
+                    len += octets::varint_len(SETTINGS_H3_DATAGRAM);
+                    len += octets::varint_len(*val);
+                }
+
                 if let Some(val) = grease {
                     len += octets::varint_len(val.0);
                     len += octets::varint_len(val.1);
@@ -188,6 +199,11 @@
                     b.put_varint(*val as u64)?;
                 }
 
+                if let Some(val) = h3_datagram {
+                    b.put_varint(SETTINGS_H3_DATAGRAM)?;
+                    b.put_varint(*val as u64)?;
+                }
+
                 if let Some(val) = grease {
                     b.put_varint(val.0)?;
                     b.put_varint(val.1)?;
@@ -286,6 +302,12 @@
     let mut max_header_list_size = None;
     let mut qpack_max_table_capacity = None;
     let mut qpack_blocked_streams = None;
+    let mut h3_datagram = None;
+
+    // Reject SETTINGS frames that are too long.
+    if settings_length > MAX_SETTINGS_PAYLOAD_SIZE {
+        return Err(super::Error::ExcessiveLoad);
+    }
 
     while b.off() < settings_length {
         let setting_ty = b.get_varint()?;
@@ -304,6 +326,18 @@
                 qpack_blocked_streams = Some(settings_val);
             },
 
+            SETTINGS_H3_DATAGRAM => {
+                if settings_val > 1 {
+                    return Err(super::Error::SettingsError);
+                }
+
+                h3_datagram = Some(settings_val);
+            },
+
+            // Reserved values overlap with HTTP/2 and MUST be rejected
+            0x0 | 0x2 | 0x3 | 0x4 | 0x5 =>
+                return Err(super::Error::SettingsError),
+
             // Unknown Settings parameters must be ignored.
             _ => (),
         }
@@ -313,6 +347,7 @@
         max_header_list_size,
         qpack_max_table_capacity,
         qpack_blocked_streams,
+        h3_datagram,
         grease: None,
     })
 }
@@ -425,10 +460,11 @@
             max_header_list_size: Some(0),
             qpack_max_table_capacity: Some(0),
             qpack_blocked_streams: Some(0),
+            h3_datagram: Some(0),
             grease: None,
         };
 
-        let frame_payload_len = 6;
+        let frame_payload_len = 9;
         let frame_header_len = 2;
 
         let wire_len = {
@@ -457,6 +493,7 @@
             max_header_list_size: Some(0),
             qpack_max_table_capacity: Some(0),
             qpack_blocked_streams: Some(0),
+            h3_datagram: Some(0),
             grease: Some((33, 33)),
         };
 
@@ -465,10 +502,11 @@
             max_header_list_size: Some(0),
             qpack_max_table_capacity: Some(0),
             qpack_blocked_streams: Some(0),
+            h3_datagram: Some(0),
             grease: None,
         };
 
-        let frame_payload_len = 8;
+        let frame_payload_len = 11;
         let frame_header_len = 2;
 
         let wire_len = {
@@ -497,6 +535,7 @@
             max_header_list_size: Some(1024),
             qpack_max_table_capacity: None,
             qpack_blocked_streams: None,
+            h3_datagram: None,
             grease: None,
         };
 
@@ -522,6 +561,71 @@
     }
 
     #[test]
+    fn settings_h3_dgram_only() {
+        let mut d = [42; 128];
+
+        let frame = Frame::Settings {
+            max_header_list_size: None,
+            qpack_max_table_capacity: None,
+            qpack_blocked_streams: None,
+            h3_datagram: Some(1),
+            grease: None,
+        };
+
+        let frame_payload_len = 3;
+        let frame_header_len = 2;
+
+        let wire_len = {
+            let mut b = octets::OctetsMut::with_slice(&mut d);
+            frame.to_bytes(&mut b).unwrap()
+        };
+
+        assert_eq!(wire_len, frame_header_len + frame_payload_len);
+
+        assert_eq!(
+            Frame::from_bytes(
+                SETTINGS_FRAME_TYPE_ID,
+                frame_payload_len as u64,
+                &d[frame_header_len..]
+            )
+            .unwrap(),
+            frame
+        );
+    }
+
+    #[test]
+    fn settings_h3_dgram_bad() {
+        let mut d = [42; 128];
+
+        let frame = Frame::Settings {
+            max_header_list_size: None,
+            qpack_max_table_capacity: None,
+            qpack_blocked_streams: None,
+            h3_datagram: Some(5),
+            grease: None,
+        };
+
+        let frame_payload_len = 3;
+        let frame_header_len = 2;
+
+        let wire_len = {
+            let mut b = octets::OctetsMut::with_slice(&mut d);
+            frame.to_bytes(&mut b).unwrap()
+        };
+
+        assert_eq!(wire_len, frame_header_len + frame_payload_len);
+
+        assert_eq!(
+            Frame::from_bytes(
+                SETTINGS_FRAME_TYPE_ID,
+                frame_payload_len as u64,
+                &d[frame_header_len..]
+            ),
+            Err(crate::h3::Error::SettingsError)
+        );
+    }
+
+    #[test]
     fn settings_qpack_only() {
         let mut d = [42; 128];
 
@@ -529,6 +633,7 @@
             max_header_list_size: None,
             qpack_max_table_capacity: Some(0),
             qpack_blocked_streams: Some(0),
+            h3_datagram: None,
             grease: None,
         };
 
@@ -554,6 +659,99 @@
     }
 
     #[test]
+    fn settings_h2_prohibited() {
+        // We need to test the prohibited values (0x0 | 0x2 | 0x3 | 0x4 | 0x5)
+        // but the quiche API doesn't support that, so use a manually created
+        // frame data buffer where d[frame_header_len] is the SETTING type field.
+        let frame_payload_len = 2u64;
+        let frame_header_len = 2;
+        let mut d = [
+            SETTINGS_FRAME_TYPE_ID as u8,
+            frame_payload_len as u8,
+            0x0,
+            1,
+        ];
+
+        assert_eq!(
+            Frame::from_bytes(
+                SETTINGS_FRAME_TYPE_ID,
+                frame_payload_len,
+                &d[frame_header_len..]
+            ),
+            Err(crate::h3::Error::SettingsError)
+        );
+
+        d[frame_header_len] = 0x2;
+
+        assert_eq!(
+            Frame::from_bytes(
+                SETTINGS_FRAME_TYPE_ID,
+                frame_payload_len,
+                &d[frame_header_len..]
+            ),
+            Err(crate::h3::Error::SettingsError)
+        );
+
+        d[frame_header_len] = 0x3;
+
+        assert_eq!(
+            Frame::from_bytes(
+                SETTINGS_FRAME_TYPE_ID,
+                frame_payload_len,
+                &d[frame_header_len..]
+            ),
+            Err(crate::h3::Error::SettingsError)
+        );
+
+        d[frame_header_len] = 0x4;
+
+        assert_eq!(
+            Frame::from_bytes(
+                SETTINGS_FRAME_TYPE_ID,
+                frame_payload_len,
+                &d[frame_header_len..]
+            ),
+            Err(crate::h3::Error::SettingsError)
+        );
+
+        d[frame_header_len] = 0x5;
+
+        assert_eq!(
+            Frame::from_bytes(
+                SETTINGS_FRAME_TYPE_ID,
+                frame_payload_len,
+                &d[frame_header_len..]
+            ),
+            Err(crate::h3::Error::SettingsError)
+        );
+    }
+
+    #[test]
+    fn settings_too_big() {
+        // We need to test a SETTINGS frame that exceeds
+        // MAX_SETTINGS_PAYLOAD_SIZE, so just craft a special buffer that look
+        // likes the frame. The payload content doesn't matter since quiche
+        // should abort before then.
+        let frame_payload_len = MAX_SETTINGS_PAYLOAD_SIZE + 1;
+        let frame_header_len = 2;
+        let d = [
+            SETTINGS_FRAME_TYPE_ID as u8,
+            frame_payload_len as u8,
+            0x1,
+            1,
+        ];
+
+        assert_eq!(
+            Frame::from_bytes(
+                SETTINGS_FRAME_TYPE_ID,
+                frame_payload_len as u64,
+                &d[frame_header_len..]
+            ),
+            Err(crate::h3::Error::ExcessiveLoad)
+        );
+    }
+
+    #[test]
     fn push_promise() {
         let mut d = [42; 128];
 
diff --git a/src/h3/mod.rs b/src/h3/mod.rs
index 6248688..dd2a51a 100644
--- a/src/h3/mod.rs
+++ b/src/h3/mod.rs
@@ -59,8 +59,9 @@
 //!
 //! ```no_run
 //! # let mut config = quiche::Config::new(quiche::PROTOCOL_VERSION).unwrap();
-//! # let scid = [0xba; 16];
-//! # let mut conn = quiche::connect(None, &scid, &mut config).unwrap();
+//! # let scid = quiche::ConnectionId::from_ref(&[0xba; 16]);
+//! # let from = "127.0.0.1:1234".parse().unwrap();
+//! # let mut conn = quiche::accept(&scid, None, from, &mut config).unwrap();
 //! # let h3_config = quiche::h3::Config::new()?;
 //! let h3_conn = quiche::h3::Connection::with_transport(&mut conn, &h3_config)?;
 //! # Ok::<(), quiche::h3::Error>(())
@@ -74,16 +75,17 @@
 //!
 //! ```no_run
 //! # let mut config = quiche::Config::new(quiche::PROTOCOL_VERSION).unwrap();
-//! # let scid = [0xba; 16];
-//! # let mut conn = quiche::connect(None, &scid, &mut config).unwrap();
+//! # let scid = quiche::ConnectionId::from_ref(&[0xba; 16]);
+//! # let to = "127.0.0.1:1234".parse().unwrap();
+//! # let mut conn = quiche::connect(None, &scid, to, &mut config).unwrap();
 //! # let h3_config = quiche::h3::Config::new()?;
 //! # let mut h3_conn = quiche::h3::Connection::with_transport(&mut conn, &h3_config)?;
 //! let req = vec![
-//!     quiche::h3::Header::new(":method", "GET"),
-//!     quiche::h3::Header::new(":scheme", "https"),
-//!     quiche::h3::Header::new(":authority", "quic.tech"),
-//!     quiche::h3::Header::new(":path", "/"),
-//!     quiche::h3::Header::new("user-agent", "quiche"),
+//!     quiche::h3::Header::new(b":method", b"GET"),
+//!     quiche::h3::Header::new(b":scheme", b"https"),
+//!     quiche::h3::Header::new(b":authority", b"quic.tech"),
+//!     quiche::h3::Header::new(b":path", b"/"),
+//!     quiche::h3::Header::new(b"user-agent", b"quiche"),
 //! ];
 //!
 //! h3_conn.send_request(&mut conn, &req, true)?;
@@ -95,16 +97,17 @@
 //!
 //! ```no_run
 //! # let mut config = quiche::Config::new(quiche::PROTOCOL_VERSION).unwrap();
-//! # let scid = [0xba; 16];
-//! # let mut conn = quiche::connect(None, &scid, &mut config).unwrap();
+//! # let scid = quiche::ConnectionId::from_ref(&[0xba; 16]);
+//! # let to = "127.0.0.1:1234".parse().unwrap();
+//! # let mut conn = quiche::connect(None, &scid, to, &mut config).unwrap();
 //! # let h3_config = quiche::h3::Config::new()?;
 //! # let mut h3_conn = quiche::h3::Connection::with_transport(&mut conn, &h3_config)?;
 //! let req = vec![
-//!     quiche::h3::Header::new(":method", "GET"),
-//!     quiche::h3::Header::new(":scheme", "https"),
-//!     quiche::h3::Header::new(":authority", "quic.tech"),
-//!     quiche::h3::Header::new(":path", "/"),
-//!     quiche::h3::Header::new("user-agent", "quiche"),
+//!     quiche::h3::Header::new(b":method", b"GET"),
+//!     quiche::h3::Header::new(b":scheme", b"https"),
+//!     quiche::h3::Header::new(b":authority", b"quic.tech"),
+//!     quiche::h3::Header::new(b":path", b"/"),
+//!     quiche::h3::Header::new(b"user-agent", b"quiche"),
 //! ];
 //!
 //! let stream_id = h3_conn.send_request(&mut conn, &req, false)?;
@@ -125,8 +128,9 @@
 //! use quiche::h3::NameValue;
 //!
 //! # let mut config = quiche::Config::new(quiche::PROTOCOL_VERSION).unwrap();
-//! # let scid = [0xba; 16];
-//! # let mut conn = quiche::accept(&scid, None, &mut config).unwrap();
+//! # let scid = quiche::ConnectionId::from_ref(&[0xba; 16]);
+//! # let from = "127.0.0.1:1234".parse().unwrap();
+//! # let mut conn = quiche::accept(&scid, None, from, &mut config).unwrap();
 //! # let h3_config = quiche::h3::Config::new()?;
 //! # let mut h3_conn = quiche::h3::Connection::with_transport(&mut conn, &h3_config)?;
 //! loop {
@@ -135,15 +139,15 @@
 //!             let mut headers = list.into_iter();
 //!
 //!             // Look for the request's method.
-//!             let method = headers.find(|h| h.name() == ":method").unwrap();
+//!             let method = headers.find(|h| h.name() == b":method").unwrap();
 //!
 //!             // Look for the request's path.
-//!             let path = headers.find(|h| h.name() == ":path").unwrap();
+//!             let path = headers.find(|h| h.name() == b":path").unwrap();
 //!
-//!             if method.value() == "GET" && path.value() == "/" {
+//!             if method.value() == b"GET" && path.value() == b"/" {
 //!                 let resp = vec![
-//!                     quiche::h3::Header::new(":status", &200.to_string()),
-//!                     quiche::h3::Header::new("server", "quiche"),
+//!                     quiche::h3::Header::new(b":status", 200.to_string().as_bytes()),
+//!                     quiche::h3::Header::new(b"server", b"quiche"),
 //!                 ];
 //!
 //!                 h3_conn.send_response(&mut conn, stream_id, &resp, false)?;
@@ -186,22 +190,25 @@
 //! use quiche::h3::NameValue;
 //!
 //! # let mut config = quiche::Config::new(quiche::PROTOCOL_VERSION).unwrap();
-//! # let scid = [0xba; 16];
-//! # let mut conn = quiche::connect(None, &scid, &mut config).unwrap();
+//! # let scid = quiche::ConnectionId::from_ref(&[0xba; 16]);
+//! # let to = "127.0.0.1:1234".parse().unwrap();
+//! # let mut conn = quiche::connect(None, &scid, to, &mut config).unwrap();
 //! # let h3_config = quiche::h3::Config::new()?;
 //! # let mut h3_conn = quiche::h3::Connection::with_transport(&mut conn, &h3_config)?;
 //! loop {
 //!     match h3_conn.poll(&mut conn) {
 //!         Ok((stream_id, quiche::h3::Event::Headers{list, has_body})) => {
-//!             let status = list.iter().find(|h| h.name() == ":status").unwrap();
+//!             let status = list.iter().find(|h| h.name() == b":status").unwrap();
 //!             println!("Received {} response on stream {}",
-//!                      status.value(), stream_id);
+//!                      std::str::from_utf8(status.value()).unwrap(),
+//!                      stream_id);
 //!         },
 //!
 //!         Ok((stream_id, quiche::h3::Event::Data)) => {
 //!             let mut body = vec![0; 4096];
 //!
-//!             if let Ok(read) =
+//!             // Consume all body data received on the stream.
+//!             while let Ok(read) =
 //!                 h3_conn.recv_body(&mut conn, stream_id, &mut body)
 //!             {
 //!                 println!("Received {} bytes of payload on stream {}",
@@ -274,7 +281,7 @@
 ///
 /// [`Config::set_application_protos()`]:
 /// ../struct.Config.html#method.set_application_protos
-pub const APPLICATION_PROTOCOL: &[u8] = b"\x05h3-29\x05h3-28\x05h3-27";
+pub const APPLICATION_PROTOCOL: &[u8] = b"\x02h3\x05h3-29\x05h3-28\x05h3-27";
 
 // The offset used when converting HTTP/3 urgency to quiche urgency.
 const PRIORITY_URGENCY_OFFSET: u8 = 124;
@@ -333,6 +340,30 @@
     /// The underlying QUIC stream (or connection) doesn't have enough capacity
     /// for the operation to complete. The application should retry later on.
     StreamBlocked,
+
+    /// Error in the payload of a SETTINGS frame.
+    SettingsError,
+
+    /// Server rejected request.
+    RequestRejected,
+
+    /// Request or its response cancelled.
+    RequestCancelled,
+
+    /// Client's request stream terminated without containing a full-formed
+    /// request.
+    RequestIncomplete,
+
+    /// An HTTP message was malformed and cannot be processed.
+    MessageError,
+
+    /// The TCP connection established in response to a CONNECT request was
+    /// reset or abnormally closed.
+    ConnectError,
+
+    /// The requested operation cannot be served over HTTP/3. Peer should retry
+    /// over HTTP/1.1.
+    VersionFallback,
 }
 
 impl Error {
@@ -351,9 +382,17 @@
             Error::BufferTooShort => 0x999,
             Error::TransportError { .. } => 0xFF,
             Error::StreamBlocked => 0xFF,
+            Error::SettingsError => 0x109,
+            Error::RequestRejected => 0x10B,
+            Error::RequestCancelled => 0x10C,
+            Error::RequestIncomplete => 0x10D,
+            Error::MessageError => 0x10E,
+            Error::ConnectError => 0x10F,
+            Error::VersionFallback => 0x110,
         }
     }
 
+    #[cfg(feature = "ffi")]
     fn to_c(self) -> libc::ssize_t {
         match self {
             Error::Done => -1,
@@ -369,6 +408,13 @@
             Error::QpackDecompressionFailed => -11,
             Error::TransportError { .. } => -12,
             Error::StreamBlocked => -13,
+            Error::SettingsError => -14,
+            Error::RequestRejected => -15,
+            Error::RequestCancelled => -16,
+            Error::RequestIncomplete => -17,
+            Error::MessageError => -18,
+            Error::ConnectError => -19,
+            Error::VersionFallback => -20,
         }
     }
 }
@@ -420,7 +466,13 @@
 
     /// Sets the `SETTINGS_MAX_HEADER_LIST_SIZE` setting.
     ///
-    /// By default no limit is enforced.
+    /// By default no limit is enforced. When a request whose headers exceed
+    /// the limit set by the application is received, the call to the [`poll()`]
+    /// method will return the [`Error::ExcessiveLoad`] error, and the
+    /// connection will be closed.
+    ///
+    /// [`poll()`]: struct.Connection.html#method.poll
+    /// [`Error::ExcessiveLoad`]: enum.Error.html#variant.ExcessiveLoad
     pub fn set_max_header_list_size(&mut self, v: u64) {
         self.max_header_list_size = Some(v);
     }
@@ -443,52 +495,52 @@
 /// A trait for types with associated string name and value.
 pub trait NameValue {
     /// Returns the object's name.
-    fn name(&self) -> &str;
+    fn name(&self) -> &[u8];
 
     /// Returns the object's value.
-    fn value(&self) -> &str;
+    fn value(&self) -> &[u8];
 }
 
 /// An owned name-value pair representing a raw HTTP header.
 #[derive(Clone, Debug, PartialEq)]
-pub struct Header(String, String);
+pub struct Header(Vec<u8>, Vec<u8>);
 
 impl Header {
     /// Creates a new header.
     ///
     /// Both `name` and `value` will be cloned.
-    pub fn new(name: &str, value: &str) -> Self {
-        Self(String::from(name), String::from(value))
+    pub fn new(name: &[u8], value: &[u8]) -> Self {
+        Self(name.to_vec(), value.to_vec())
     }
 }
 
 impl NameValue for Header {
-    fn name(&self) -> &str {
+    fn name(&self) -> &[u8] {
         &self.0
     }
 
-    fn value(&self) -> &str {
+    fn value(&self) -> &[u8] {
         &self.1
     }
 }
 
 /// A non-owned name-value pair representing a raw HTTP header.
 #[derive(Clone, Debug, PartialEq)]
-pub struct HeaderRef<'a>(&'a str, &'a str);
+pub struct HeaderRef<'a>(&'a [u8], &'a [u8]);
 
 impl<'a> HeaderRef<'a> {
     /// Creates a new header.
-    pub fn new(name: &'a str, value: &'a str) -> Self {
+    pub fn new(name: &'a [u8], value: &'a [u8]) -> Self {
         Self(name, value)
     }
 }
 
 impl<'a> NameValue for HeaderRef<'a> {
-    fn name(&self) -> &str {
+    fn name(&self) -> &[u8] {
         self.0
     }
 
-    fn value(&self) -> &str {
+    fn value(&self) -> &[u8] {
         self.1
     }
 }
@@ -511,16 +563,28 @@
     /// This indicates that the application can use the [`recv_body()`] method
     /// to retrieve the data from the stream.
     ///
-    /// This event will keep being reported until all the available data is
-    /// retrieved by the application.
+    /// Note that [`recv_body()`] will need to be called repeatedly until the
+    /// [`Done`] value is returned, as the event will not be re-armed until all
+    /// buffered data is read.
     ///
     /// [`recv_body()`]: struct.Connection.html#method.recv_body
+    /// [`Done`]: enum.Error.html#variant.Done
     Data,
 
     /// Stream was closed,
     Finished,
 
     /// DATAGRAM was received.
+    ///
+    /// This indicates that the application can use the [`recv_dgram()`] method
+    /// to retrieve the HTTP/3 DATAGRAM.
+    ///
+    /// Note that [`recv_dgram()`] will need to be called repeatedly until the
+    /// [`Done`] value is returned, as the event will not be re-armed until all
+    /// buffered DATAGRAMs with the same flow ID are read.
+    ///
+    /// [`recv_dgram()`]: struct.Connection.html#method.recv_dgram
+    /// [`Done`]: enum.Error.html#variant.Done
     Datagram,
 
     /// GOAWAY was received.
@@ -531,6 +595,7 @@
     pub max_header_list_size: Option<u64>,
     pub qpack_max_table_capacity: Option<u64>,
     pub qpack_blocked_streams: Option<u64>,
+    pub h3_datagram: Option<u64>,
 }
 
 struct QpackStreams {
@@ -556,6 +621,7 @@
     qpack_encoder: qpack::Encoder,
     qpack_decoder: qpack::Decoder,
 
+    #[allow(dead_code)]
     local_qpack_streams: QpackStreams,
     peer_qpack_streams: QpackStreams,
 
@@ -567,11 +633,17 @@
 
     local_goaway_id: Option<u64>,
     peer_goaway_id: Option<u64>,
+
+    dgram_event_triggered: bool,
 }
 
 impl Connection {
-    fn new(config: &Config, is_server: bool) -> Result<Connection> {
+    #[allow(clippy::unnecessary_wraps)]
+    fn new(
+        config: &Config, is_server: bool, enable_dgram: bool,
+    ) -> Result<Connection> {
         let initial_uni_stream_id = if is_server { 0x3 } else { 0x2 };
+        let h3_datagram = if enable_dgram { Some(1) } else { None };
 
         Ok(Connection {
             is_server,
@@ -586,12 +658,14 @@
                 max_header_list_size: config.max_header_list_size,
                 qpack_max_table_capacity: config.qpack_max_table_capacity,
                 qpack_blocked_streams: config.qpack_blocked_streams,
+                h3_datagram,
             },
 
             peer_settings: ConnectionSettings {
                 max_header_list_size: None,
                 qpack_max_table_capacity: None,
                 qpack_blocked_streams: None,
+                h3_datagram: None,
             },
 
             control_stream_id: None,
@@ -618,6 +692,8 @@
 
             local_goaway_id: None,
             peer_goaway_id: None,
+
+            dgram_event_triggered: false,
         })
     }
 
@@ -625,12 +701,27 @@
     ///
     /// This will also initiate the HTTP/3 handshake with the peer by opening
     /// all control streams (including QPACK) and sending the local settings.
+    ///
+    /// On success the new connection is returned.
+    ///
+    /// The [`StreamLimit`] error is returned when the HTTP/3 control stream
+    /// cannot be created.
+    ///
+    /// [`StreamLimit`]: ../enum.Error.html#variant.InvalidState
     pub fn with_transport(
         conn: &mut super::Connection, config: &Config,
     ) -> Result<Connection> {
-        let mut http3_conn = Connection::new(config, conn.is_server)?;
+        let mut http3_conn =
+            Connection::new(config, conn.is_server, conn.dgram_enabled())?;
 
-        http3_conn.send_settings(conn)?;
+        match http3_conn.send_settings(conn) {
+            Ok(_) => (),
+
+            Err(e) => {
+                conn.close(true, e.to_wire(), b"Error opening control stream")?;
+                return Err(e);
+            },
+        };
 
         // Try opening QPACK streams, but ignore errors if it fails since we
         // don't need them right now.
@@ -680,7 +771,11 @@
         // stream_capacity() will fail. By writing a 0-length buffer, we force
         // the creation of the QUIC stream state, without actually writing
         // anything.
-        conn.stream_send(stream_id, b"", false)?;
+        if let Err(e) = conn.stream_send(stream_id, b"", false) {
+            self.streams.remove(&stream_id);
+
+            return Err(e.into());
+        };
 
         self.send_headers(conn, stream_id, headers, fin)?;
 
@@ -737,7 +832,7 @@
             return Err(Error::FrameUnexpected);
         }
 
-        let mut urgency = 3;
+        let mut urgency = 3u8.saturating_add(PRIORITY_URGENCY_OFFSET);
         let mut incremental = false;
 
         for param in priority.split(',') {
@@ -755,11 +850,14 @@
                 // TODO: this also detects when u is not an sh-integer and
                 // clamps it in the same way. A real structured header parser
                 // would actually fail to parse.
-                let mut u =
-                    i64::from_str_radix(param.rsplit('=').next().unwrap(), 10)
-                        .unwrap_or(7);
+                let mut u = param
+                    .rsplit('=')
+                    .next()
+                    .unwrap()
+                    .parse::<i64>()
+                    .unwrap_or(7);
 
-                if u < 0 || u > 7 {
+                if !(0..=7).contains(&u) {
                     u = 7;
                 }
 
@@ -806,7 +904,17 @@
             self.frames_greased = true;
         }
 
-        let stream_cap = conn.stream_capacity(stream_id)?;
+        let stream_cap = match conn.stream_capacity(stream_id) {
+            Ok(v) => v,
+
+            Err(e) => {
+                if conn.stream_finished(stream_id) {
+                    self.streams.remove(&stream_id);
+                }
+
+                return Err(e.into());
+            },
+        };
 
         let header_block = self.encode_header_block(headers)?;
 
@@ -881,15 +989,28 @@
             },
         };
 
+        // Avoid sending 0-length DATA frames when the fin flag is false.
+        if body.is_empty() && !fin {
+            return Err(Error::Done);
+        }
+
         let overhead = octets::varint_len(frame::DATA_FRAME_TYPE_ID) +
             octets::varint_len(body.len() as u64);
 
-        let stream_cap = conn.stream_capacity(stream_id)?;
+        let stream_cap = match conn.stream_capacity(stream_id) {
+            Ok(v) => v,
 
-        // Make sure there is enough capacity to send the frame header and at
-        // least one byte of frame payload (this to avoid sending 0-length DATA
-        // frames).
-        if stream_cap <= overhead {
+            Err(e) => {
+                if conn.stream_finished(stream_id) {
+                    self.streams.remove(&stream_id);
+                }
+
+                return Err(e.into());
+            },
+        };
+
+        // Make sure there is enough capacity to send the DATA frame header.
+        if stream_cap < overhead {
             return Err(Error::Done);
         }
 
@@ -900,6 +1021,11 @@
         // application can try again later.
         let fin = if body_len != body.len() { false } else { fin };
 
+        // Again, avoid sending 0-length DATA frames when the fin flag is false.
+        if body_len == 0 && !fin {
+            return Err(Error::Done);
+        }
+
         trace!(
             "{} tx frm DATA stream={} len={} fin={}",
             conn.trace_id(),
@@ -924,6 +1050,18 @@
         Ok(written)
     }
 
+    /// Returns whether the peer enabled HTTP/3 DATAGRAM frame support.
+    ///
+    /// Support is signalled by the peer's SETTINGS, so this method always
+    /// returns false until they have been processed using the [`poll()`]
+    /// method.
+    ///
+    /// [`poll()`]: struct.Connection.html#method.poll
+    pub fn dgram_enabled_by_peer(&self, conn: &super::Connection) -> bool {
+        self.peer_settings.h3_datagram == Some(1) &&
+            conn.dgram_max_writable_len().is_some()
+    }
+
     /// Sends an HTTP/3 DATAGRAM with the specified flow ID.
     pub fn send_dgram(
         &mut self, conn: &mut super::Connection, flow_id: u64, buf: &[u8],
@@ -977,6 +1115,35 @@
         }
     }
 
+    // A helper function for determining if there is a DATAGRAM event.
+    fn process_dgrams(
+        &mut self, conn: &mut super::Connection,
+    ) -> Result<(u64, Event)> {
+        let mut d = [0; 8];
+
+        match conn.dgram_recv_peek(&mut d, 8) {
+            Ok(_) => {
+                if self.dgram_event_triggered {
+                    return Err(Error::Done);
+                }
+
+                self.dgram_event_triggered = true;
+
+                Ok((0, Event::Datagram))
+            },
+
+            Err(crate::Error::Done) => {
+                // The dgram recv queue is empty, so re-arm the Datagram event
+                // so it is issued next time a DATAGRAM is received.
+                self.dgram_event_triggered = false;
+
+                Err(Error::Done)
+            },
+
+            Err(e) => Err(Error::TransportError(e)),
+        }
+    }
+
     /// Reads request or response body data into the provided buffer.
     ///
     /// Applications should call this method whenever the [`poll()`] method
@@ -991,32 +1158,78 @@
     pub fn recv_body(
         &mut self, conn: &mut super::Connection, stream_id: u64, out: &mut [u8],
     ) -> Result<usize> {
-        let stream = self.streams.get_mut(&stream_id).ok_or(Error::Done)?;
+        let mut total = 0;
 
-        if stream.state() != stream::State::Data {
-            return Err(Error::Done);
+        // Try to consume all buffered data for the stream, even across multiple
+        // DATA frames.
+        while total < out.len() {
+            let stream = self.streams.get_mut(&stream_id).ok_or(Error::Done)?;
+
+            if stream.state() != stream::State::Data {
+                break;
+            }
+
+            let (read, fin) =
+                match stream.try_consume_data(conn, &mut out[total..]) {
+                    Ok(v) => v,
+
+                    Err(Error::Done) => break,
+
+                    Err(e) => return Err(e),
+                };
+
+            total += read;
+
+            // No more data to read, we are done.
+            if read == 0 || fin {
+                break;
+            }
+
+            // Process incoming data from the stream. For example, if a whole
+            // DATA frame was consumed, and another one is queued behind it,
+            // this will ensure the additional data will also be returned to
+            // the application.
+            match self.process_readable_stream(conn, stream_id, false) {
+                Ok(_) => unreachable!(),
+
+                Err(Error::Done) => (),
+
+                Err(e) => return Err(e),
+            };
+
+            if conn.stream_finished(stream_id) {
+                break;
+            }
         }
 
-        let read = stream.try_consume_data(conn, out)?;
-
         // While body is being received, the stream is marked as finished only
         // when all data is read by the application.
         if conn.stream_finished(stream_id) {
-            self.finished_streams.push_back(stream_id);
+            self.process_finished_stream(stream_id);
         }
 
-        Ok(read)
+        if total == 0 {
+            return Err(Error::Done);
+        }
+
+        Ok(total)
     }
 
     /// Processes HTTP/3 data received from the peer.
     ///
-    /// On success it returns an [`Event`] and an ID.
+    /// On success it returns an [`Event`] and an ID, or [`Done`] when there are
+    /// no events to report.
+    ///
+    /// Note that all events are edge-triggered, meaning that once reported they
+    /// will not be reported again by calling this method again, until the event
+    /// is re-armed.
     ///
     /// The events [`Headers`], [`Data`] and [`Finished`] return a stream ID,
     /// which is used in methods [`recv_body()`], [`send_response()`] or
     /// [`send_body()`].
     ///
-    /// The event [`Datagram`] returns a flow ID.
+    /// The event [`Datagram`] returns a dummy value of `0`, this should be
+    /// ignored by the application.
     ///
     /// The event [`GoAway`] returns an ID that depends on the connection role.
     /// A client receives the largest processed stream ID. A server receives the
@@ -1026,6 +1239,7 @@
     /// the appropriate error code, using the transport's [`close()`] method.
     ///
     /// [`Event`]: enum.Event.html
+    /// [`Done`]: enum.Error.html#variant.Done
     /// [`Headers`]: enum.Event.html#variant.Headers
     /// [`Data`]: enum.Event.html#variant.Data
     /// [`Finished`]: enum.Event.html#variant.Finished
@@ -1040,7 +1254,7 @@
         // When connection close is initiated by the local application (e.g. due
         // to a protocol error), the connection itself might be in a broken
         // state, so return early.
-        if conn.error.is_some() || conn.app_error.is_some() {
+        if conn.local_error.is_some() {
             return Err(Error::Done);
         }
 
@@ -1080,26 +1294,20 @@
             return Ok((finished, Event::Finished));
         }
 
-        // Process DATAGRAMs
-        let mut d = [0; 8];
+        // Process queued DATAGRAMs if the poll threshold allows it.
+        match self.process_dgrams(conn) {
+            Ok(v) => return Ok(v),
 
-        match conn.dgram_recv_peek(&mut d, 8) {
-            Ok(_) => {
-                let mut b = octets::Octets::with_slice(&d);
-                let flow_id = b.get_varint()?;
-                return Ok((flow_id, Event::Datagram));
-            },
+            Err(Error::Done) => (),
 
-            Err(crate::Error::Done) => (),
-
-            Err(e) => return Err(Error::TransportError(e)),
+            Err(e) => return Err(e),
         };
 
         // Process HTTP/3 data from readable streams.
         for s in conn.readable() {
             trace!("{} stream id {} is readable", conn.trace_id(), s);
 
-            let ev = match self.process_readable_stream(conn, s) {
+            let ev = match self.process_readable_stream(conn, s, true) {
                 Ok(v) => Some(v),
 
                 Err(Error::Done) => None,
@@ -1108,16 +1316,22 @@
             };
 
             if conn.stream_finished(s) {
-                self.finished_streams.push_back(s);
+                self.process_finished_stream(s);
             }
 
             // TODO: check if stream is completed so it can be freed
-
             if let Some(ev) = ev {
                 return Ok(ev);
             }
         }
 
+        // Process finished streams list once again, to make sure `Finished`
+        // events are returned when receiving empty stream frames with the fin
+        // flag set.
+        if let Some(finished) = self.finished_streams.pop_front() {
+            return Ok((finished, Event::Finished));
+        }
+
         Err(Error::Done)
     }
 
@@ -1135,9 +1349,13 @@
     pub fn send_goaway(
         &mut self, conn: &mut super::Connection, id: u64,
     ) -> Result<()> {
+        let mut id = id;
+
+        // TODO: server push
+        //
+        // In the meantime always send 0 from client.
         if !self.is_server {
-            // TODO: server push
-            return Ok(());
+            id = 0;
         }
 
         if self.is_server && id % 4 != 0 {
@@ -1237,7 +1455,17 @@
     ) -> Result<()> {
         let mut d = [0; 8];
 
-        let stream_cap = conn.stream_capacity(stream_id)?;
+        let stream_cap = match conn.stream_capacity(stream_id) {
+            Ok(v) => v,
+
+            Err(e) => {
+                if conn.stream_finished(stream_id) {
+                    self.streams.remove(&stream_id);
+                }
+
+                return Err(e.into());
+            },
+        };
 
         let grease_frame1 = grease_value();
         let grease_frame2 = grease_value();
@@ -1283,7 +1511,7 @@
             Ok(stream_id) => {
                 trace!("{} open GREASE stream {}", conn.trace_id(), stream_id);
 
-                conn.stream_send(stream_id, b"GREASE is the word", false)?;
+                conn.stream_send(stream_id, b"GREASE is the word", true)?;
             },
 
             Err(Error::IdError) => {
@@ -1316,6 +1544,7 @@
                 .local_settings
                 .qpack_max_table_capacity,
             qpack_blocked_streams: self.local_settings.qpack_blocked_streams,
+            h3_datagram: self.local_settings.h3_datagram,
             grease,
         };
 
@@ -1346,7 +1575,7 @@
             return Err(Error::ClosedCriticalStream);
         }
 
-        match self.process_readable_stream(conn, stream_id) {
+        match self.process_readable_stream(conn, stream_id, true) {
             Ok(ev) => return Ok(ev),
 
             Err(Error::Done) => (),
@@ -1368,7 +1597,7 @@
     }
 
     fn process_readable_stream(
-        &mut self, conn: &mut super::Connection, stream_id: u64,
+        &mut self, conn: &mut super::Connection, stream_id: u64, polling: bool,
     ) -> Result<(u64, Event)> {
         self.streams
             .entry(stream_id)
@@ -1541,6 +1770,11 @@
                 },
 
                 stream::State::FramePayload => {
+                    // Do not emit events when not polling.
+                    if !polling {
+                        break;
+                    }
+
                     stream.try_fill_buffer(conn)?;
 
                     let frame = match stream.try_consume_frame() {
@@ -1569,6 +1803,15 @@
                 },
 
                 stream::State::Data => {
+                    // Do not emit events when not polling.
+                    if !polling {
+                        break;
+                    }
+
+                    if !stream.try_trigger_data_event() {
+                        break;
+                    }
+
                     return Ok((stream_id, Event::Data));
                 },
 
@@ -1583,16 +1826,44 @@
 
                 stream::State::Drain => {
                     // Discard incoming data on the stream.
-                    conn.stream_shutdown(stream_id, crate::Shutdown::Read, 0)?;
+                    conn.stream_shutdown(
+                        stream_id,
+                        crate::Shutdown::Read,
+                        0x100,
+                    )?;
 
                     break;
                 },
+
+                stream::State::Finished => break,
             }
         }
 
         Err(Error::Done)
     }
 
+    fn process_finished_stream(&mut self, stream_id: u64) {
+        let stream = match self.streams.get_mut(&stream_id) {
+            Some(v) => v,
+
+            None => return,
+        };
+
+        if stream.state() == stream::State::Finished {
+            return;
+        }
+
+        match stream.ty() {
+            Some(stream::Type::Request) | Some(stream::Type::Push) => {
+                stream.finished();
+
+                self.finished_streams.push_back(stream_id);
+            },
+
+            _ => (),
+        };
+    }
+
     fn process_frame(
         &mut self, conn: &mut super::Connection, stream_id: u64,
         frame: frame::Frame,
@@ -1609,13 +1880,28 @@
                 max_header_list_size,
                 qpack_max_table_capacity,
                 qpack_blocked_streams,
+                h3_datagram,
                 ..
             } => {
                 self.peer_settings = ConnectionSettings {
                     max_header_list_size,
                     qpack_max_table_capacity,
                     qpack_blocked_streams,
+                    h3_datagram,
                 };
+
+                if let Some(1) = h3_datagram {
+                    // The peer MUST have also enabled DATAGRAM with a TP
+                    if conn.dgram_max_writable_len().is_none() {
+                        conn.close(
+                            true,
+                            Error::SettingsError.to_wire(),
+                            b"H3_DATAGRAM sent with value 1 but max_datagram_frame_size TP not set.",
+                        )?;
+
+                        return Err(Error::SettingsError);
+                    }
+                }
             },
 
             frame::Frame::Headers { header_block } => {
@@ -1636,14 +1922,25 @@
                     .max_header_list_size
                     .unwrap_or(std::u64::MAX);
 
-                let headers = self
+                let headers = match self
                     .qpack_decoder
                     .decode(&header_block[..], max_size)
-                    .map_err(|e| match e {
-                        qpack::Error::HeaderListTooLarge => Error::ExcessiveLoad,
+                {
+                    Ok(v) => v,
 
-                        _ => Error::QpackDecompressionFailed,
-                    })?;
+                    Err(e) => {
+                        let e = match e {
+                            qpack::Error::HeaderListTooLarge =>
+                                Error::ExcessiveLoad,
+
+                            _ => Error::QpackDecompressionFailed,
+                        };
+
+                        conn.close(true, e.to_wire(), b"Error parsing headers.")?;
+
+                        return Err(e);
+                    },
+                };
 
                 let has_body = !conn.stream_finished(stream_id);
 
@@ -1814,8 +2111,6 @@
         pub pipe: testing::Pipe,
         pub client: Connection,
         pub server: Connection,
-
-        buf: [u8; 65535],
     }
 
     impl Session {
@@ -1831,6 +2126,7 @@
             config.set_initial_max_streams_bidi(5);
             config.set_initial_max_streams_uni(5);
             config.verify_peer(false);
+            config.enable_dgram(true, 3, 3);
 
             let h3_config = Config::new()?;
             Session::with_configs(&mut config, &h3_config)
@@ -1839,47 +2135,49 @@
         pub fn with_configs(
             config: &mut crate::Config, h3_config: &Config,
         ) -> Result<Session> {
+            let pipe = testing::Pipe::with_config(config)?;
+            let client_dgram = pipe.client.dgram_enabled();
+            let server_dgram = pipe.server.dgram_enabled();
             Ok(Session {
-                pipe: testing::Pipe::with_config(config)?,
-                client: Connection::new(&h3_config, false)?,
-                server: Connection::new(&h3_config, true)?,
-                buf: [0; 65535],
+                pipe,
+                client: Connection::new(&h3_config, false, client_dgram)?,
+                server: Connection::new(&h3_config, true, server_dgram)?,
             })
         }
 
         /// Do the HTTP/3 handshake so both ends are in sane initial state.
         pub fn handshake(&mut self) -> Result<()> {
-            self.pipe.handshake(&mut self.buf)?;
+            self.pipe.handshake()?;
 
             // Client streams.
             self.client.send_settings(&mut self.pipe.client)?;
-            self.pipe.advance(&mut self.buf).ok();
+            self.pipe.advance().ok();
 
             self.client
                 .open_qpack_encoder_stream(&mut self.pipe.client)?;
-            self.pipe.advance(&mut self.buf).ok();
+            self.pipe.advance().ok();
 
             self.client
                 .open_qpack_decoder_stream(&mut self.pipe.client)?;
-            self.pipe.advance(&mut self.buf).ok();
+            self.pipe.advance().ok();
 
             if self.pipe.client.grease {
                 self.client.open_grease_stream(&mut self.pipe.client)?;
             }
 
-            self.pipe.advance(&mut self.buf).ok();
+            self.pipe.advance().ok();
 
             // Server streams.
             self.server.send_settings(&mut self.pipe.server)?;
-            self.pipe.advance(&mut self.buf).ok();
+            self.pipe.advance().ok();
 
             self.server
                 .open_qpack_encoder_stream(&mut self.pipe.server)?;
-            self.pipe.advance(&mut self.buf).ok();
+            self.pipe.advance().ok();
 
             self.server
                 .open_qpack_decoder_stream(&mut self.pipe.server)?;
-            self.pipe.advance(&mut self.buf).ok();
+            self.pipe.advance().ok();
 
             if self.pipe.server.grease {
                 self.server.open_grease_stream(&mut self.pipe.server)?;
@@ -1900,7 +2198,7 @@
 
         /// Advances the session pipe over the buffer.
         pub fn advance(&mut self) -> crate::Result<()> {
-            self.pipe.advance(&mut self.buf)
+            self.pipe.advance()
         }
 
         /// Polls the client for events.
@@ -1918,11 +2216,11 @@
         /// On success it returns the newly allocated stream and the headers.
         pub fn send_request(&mut self, fin: bool) -> Result<(u64, Vec<Header>)> {
             let req = vec![
-                Header::new(":method", "GET"),
-                Header::new(":scheme", "https"),
-                Header::new(":authority", "quic.tech"),
-                Header::new(":path", "/test"),
-                Header::new("user-agent", "quiche-test"),
+                Header::new(b":method", b"GET"),
+                Header::new(b":scheme", b"https"),
+                Header::new(b":authority", b"quic.tech"),
+                Header::new(b":path", b"/test"),
+                Header::new(b"user-agent", b"quiche-test"),
             ];
 
             let stream =
@@ -1940,8 +2238,8 @@
             &mut self, stream: u64, fin: bool,
         ) -> Result<Vec<Header>> {
             let resp = vec![
-                Header::new(":status", "200"),
-                Header::new("server", "quiche-test"),
+                Header::new(b":status", b"200"),
+                Header::new(b"server", b"quiche-test"),
             ];
 
             self.server.send_response(
@@ -2024,6 +2322,54 @@
             Ok(())
         }
 
+        /// Send an HTTP/3 DATAGRAM with default data from the client.
+        ///
+        /// On success it returns the data.
+        pub fn send_dgram_client(&mut self, flow_id: u64) -> Result<Vec<u8>> {
+            let bytes = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+
+            self.client
+                .send_dgram(&mut self.pipe.client, flow_id, &bytes)?;
+
+            self.advance().ok();
+
+            Ok(bytes)
+        }
+
+        /// Receives an HTTP/3 DATAGRAM from the server.
+        ///
+        /// On success it returns the DATAGRAM length, flow ID and flow ID
+        /// length.
+        pub fn recv_dgram_client(
+            &mut self, buf: &mut [u8],
+        ) -> Result<(usize, u64, usize)> {
+            self.client.recv_dgram(&mut self.pipe.client, buf)
+        }
+
+        /// Send an HTTP/3 DATAGRAM with default data from the server
+        ///
+        /// On success it returns the data.
+        pub fn send_dgram_server(&mut self, flow_id: u64) -> Result<Vec<u8>> {
+            let bytes = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+
+            self.server
+                .send_dgram(&mut self.pipe.server, flow_id, &bytes)?;
+
+            self.advance().ok();
+
+            Ok(bytes)
+        }
+
+        /// Receives an HTTP/3 DATAGRAM from the client.
+        ///
+        /// On success it returns the DATAGRAM length, flow ID and flow ID
+        /// length.
+        pub fn recv_dgram_server(
+            &mut self, buf: &mut [u8],
+        ) -> Result<(usize, u64, usize)> {
+            self.server.recv_dgram(&mut self.pipe.server, buf)
+        }
+
         /// Sends a single HTTP/3 frame from the server.
         pub fn send_frame_server(
             &mut self, frame: frame::Frame, stream_id: u64, fin: bool,
@@ -2041,6 +2387,28 @@
 
             Ok(())
         }
+
+        /// Sends an arbitrary buffer of HTTP/3 stream data from the client.
+        pub fn send_arbitrary_stream_data_client(
+            &mut self, data: &[u8], stream_id: u64, fin: bool,
+        ) -> Result<()> {
+            self.pipe.client.stream_send(stream_id, data, fin)?;
+
+            self.advance().ok();
+
+            Ok(())
+        }
+
+        /// Sends an arbitrary buffer of HTTP/3 stream data from the server.
+        pub fn send_arbitrary_stream_data_server(
+            &mut self, data: &[u8], stream_id: u64, fin: bool,
+        ) -> Result<()> {
+            self.pipe.server.stream_send(stream_id, data, fin)?;
+
+            self.advance().ok();
+
+            Ok(())
+        }
     }
 }
 
@@ -2158,9 +2526,10 @@
         };
 
         assert_eq!(s.poll_client(), Ok((stream, ev_headers)));
+        assert_eq!(s.poll_client(), Ok((stream, Event::Data)));
+        assert_eq!(s.poll_client(), Err(Error::Done));
 
         for _ in 0..total_data_frames {
-            assert_eq!(s.poll_client(), Ok((stream, Event::Data)));
             assert_eq!(s.recv_body_client(stream, &mut recv_buf), Ok(body.len()));
         }
 
@@ -2227,9 +2596,10 @@
         };
 
         assert_eq!(s.poll_server(), Ok((stream, ev_headers)));
+        assert_eq!(s.poll_server(), Ok((stream, Event::Data)));
+        assert_eq!(s.poll_server(), Err(Error::Done));
 
         for _ in 0..total_data_frames {
-            assert_eq!(s.poll_server(), Ok((stream, Event::Data)));
             assert_eq!(s.recv_body_server(stream, &mut recv_buf), Ok(body.len()));
         }
 
@@ -2288,7 +2658,8 @@
             assert_eq!(ev, ev_headers);
             assert_eq!(s.poll_server(), Ok((stream, Event::Data)));
             assert_eq!(s.recv_body_server(stream, &mut recv_buf), Ok(body.len()));
-            assert_eq!(s.poll_server(), Ok((stream, Event::Data)));
+            assert_eq!(s.poll_client(), Err(Error::Done));
+
             assert_eq!(s.recv_body_server(stream, &mut recv_buf), Ok(body.len()));
             assert_eq!(s.poll_server(), Ok((stream, Event::Finished)));
         }
@@ -2320,6 +2691,46 @@
     }
 
     #[test]
+    /// Send a request with no body, get a response with one DATA frame and an
+    /// empty FIN after reception from the client.
+    fn request_no_body_response_one_chunk_empty_fin() {
+        let mut s = Session::default().unwrap();
+        s.handshake().unwrap();
+
+        let (stream, req) = s.send_request(true).unwrap();
+
+        let ev_headers = Event::Headers {
+            list: req,
+            has_body: false,
+        };
+
+        assert_eq!(s.poll_server(), Ok((stream, ev_headers)));
+        assert_eq!(s.poll_server(), Ok((stream, Event::Finished)));
+
+        let resp = s.send_response(stream, false).unwrap();
+
+        let body = s.send_body_server(stream, false).unwrap();
+
+        let mut recv_buf = vec![0; body.len()];
+
+        let ev_headers = Event::Headers {
+            list: resp,
+            has_body: true,
+        };
+
+        assert_eq!(s.poll_client(), Ok((stream, ev_headers)));
+
+        assert_eq!(s.poll_client(), Ok((stream, Event::Data)));
+        assert_eq!(s.recv_body_client(stream, &mut recv_buf), Ok(body.len()));
+
+        assert_eq!(s.pipe.server.stream_send(stream, &[], true), Ok(0));
+        s.advance().ok();
+
+        assert_eq!(s.poll_client(), Ok((stream, Event::Finished)));
+        assert_eq!(s.poll_client(), Err(Error::Done));
+    }
+
+    #[test]
     /// Try to send DATA frames before HEADERS.
     fn body_response_before_headers() {
         let mut s = Session::default().unwrap();
@@ -2620,12 +3031,12 @@
         let mut s = Session::default().unwrap();
         s.handshake().unwrap();
 
-        s.client.send_goaway(&mut s.pipe.client, 1).unwrap();
+        s.client.send_goaway(&mut s.pipe.client, 100).unwrap();
 
         s.advance().ok();
 
         // TODO: server push
-        assert_eq!(s.poll_server(), Err(Error::Done));
+        assert_eq!(s.poll_server(), Ok((0, Event::GoAway)));
     }
 
     #[test]
@@ -2703,10 +3114,10 @@
     fn uni_stream_local_counting() {
         let config = Config::new().unwrap();
 
-        let h3_cln = Connection::new(&config, false).unwrap();
+        let h3_cln = Connection::new(&config, false, false).unwrap();
         assert_eq!(h3_cln.next_uni_stream_id, 2);
 
-        let h3_srv = Connection::new(&config, true).unwrap();
+        let h3_srv = Connection::new(&config, true, false).unwrap();
         assert_eq!(h3_srv.next_uni_stream_id, 3);
     }
 
@@ -2842,10 +3253,32 @@
     #[test]
     /// Tests limits for the stream state buffer maximum size.
     fn max_state_buf_size() {
-        // DATA frames don't consume the state buffer, so can be of any size.
         let mut s = Session::default().unwrap();
         s.handshake().unwrap();
 
+        let req = vec![
+            Header::new(b":method", b"GET"),
+            Header::new(b":scheme", b"https"),
+            Header::new(b":authority", b"quic.tech"),
+            Header::new(b":path", b"/test"),
+            Header::new(b"user-agent", b"quiche-test"),
+        ];
+
+        assert_eq!(
+            s.client.send_request(&mut s.pipe.client, &req, false),
+            Ok(0)
+        );
+
+        s.advance().ok();
+
+        let ev_headers = Event::Headers {
+            list: req,
+            has_body: true,
+        };
+
+        assert_eq!(s.server.poll(&mut s.pipe.server), Ok((0, ev_headers)));
+
+        // DATA frames don't consume the state buffer, so can be of any size.
         let mut d = [42; 128];
         let mut b = octets::OctetsMut::with_slice(&mut d);
 
@@ -2919,16 +3352,16 @@
         };
 
         assert_eq!(s.poll_server(), Ok((stream, ev_headers)));
+        assert_eq!(s.poll_server(), Ok((stream, Event::Data)));
+        assert_eq!(s.poll_server(), Err(Error::Done));
 
         for _ in 0..total_data_frames {
-            assert_eq!(s.poll_server(), Ok((stream, Event::Data)));
             assert_eq!(
                 s.recv_body_server(stream, &mut recv_buf),
                 Ok(bytes.len())
             );
         }
 
-        assert_eq!(s.poll_server(), Ok((stream, Event::Data)));
         assert_eq!(
             s.recv_body_server(stream, &mut recv_buf),
             Ok(bytes.len() - 2)
@@ -2966,11 +3399,11 @@
         s.handshake().unwrap();
 
         let req = vec![
-            Header::new(":method", "GET"),
-            Header::new(":scheme", "https"),
-            Header::new(":authority", "quic.tech"),
-            Header::new(":path", "/test"),
-            Header::new("aaaaaaa", "aaaaaaaa"),
+            Header::new(b":method", b"GET"),
+            Header::new(b":scheme", b"https"),
+            Header::new(b":authority", b"quic.tech"),
+            Header::new(b":path", b"/test"),
+            Header::new(b"aaaaaaa", b"aaaaaaaa"),
         ];
 
         let stream = s
@@ -2983,6 +3416,11 @@
         assert_eq!(stream, 0);
 
         assert_eq!(s.poll_server(), Err(Error::ExcessiveLoad));
+
+        assert_eq!(
+            s.pipe.server.local_error.as_ref().unwrap().error_code,
+            Error::to_wire(Error::ExcessiveLoad)
+        );
     }
 
     #[test]
@@ -2992,11 +3430,11 @@
         s.handshake().unwrap();
 
         let req = vec![
-            Header::new(":method", "GET"),
-            Header::new(":scheme", "https"),
-            Header::new(":authority", "quic.tech"),
-            Header::new(":path", "/test"),
-            Header::new("user-agent", "quiche-test"),
+            Header::new(b":method", b"GET"),
+            Header::new(b":scheme", b"https"),
+            Header::new(b":authority", b"quic.tech"),
+            Header::new(b":path", b"/test"),
+            Header::new(b"user-agent", b"quiche-test"),
         ];
 
         // We need to open all streams in the same flight, so we can't use the
@@ -3022,9 +3460,8 @@
     }
 
     #[test]
-    /// Tests that calling poll() after an error occured does nothing.
-    fn poll_after_error() {
-        // DATA frames don't consume the state buffer, so can be of any size.
+    /// Tests that sending DATA before HEADERS causes an error.
+    fn data_before_headers() {
         let mut s = Session::default().unwrap();
         s.handshake().unwrap();
 
@@ -3034,16 +3471,22 @@
         let frame_type = b.put_varint(frame::DATA_FRAME_TYPE_ID).unwrap();
         s.pipe.client.stream_send(0, frame_type, false).unwrap();
 
-        let frame_len = b.put_varint(1 << 24).unwrap();
+        let frame_len = b.put_varint(5).unwrap();
         s.pipe.client.stream_send(0, frame_len, false).unwrap();
 
-        s.pipe.client.stream_send(0, &d, false).unwrap();
+        s.pipe.client.stream_send(0, b"hello", false).unwrap();
 
         s.advance().ok();
 
-        assert_eq!(s.server.poll(&mut s.pipe.server), Ok((0, Event::Data)));
+        assert_eq!(
+            s.server.poll(&mut s.pipe.server),
+            Err(Error::FrameUnexpected)
+        );
+    }
 
-        // GREASE frames consume the state buffer, so need to be limited.
+    #[test]
+    /// Tests that calling poll() after an error occured does nothing.
+    fn poll_after_error() {
         let mut s = Session::default().unwrap();
         s.handshake().unwrap();
 
@@ -3092,10 +3535,10 @@
         s.handshake().unwrap();
 
         let req = vec![
-            Header::new(":method", "GET"),
-            Header::new(":scheme", "https"),
-            Header::new(":authority", "quic.tech"),
-            Header::new(":path", "/test"),
+            Header::new(b":method", b"GET"),
+            Header::new(b":scheme", b"https"),
+            Header::new(b":authority", b"quic.tech"),
+            Header::new(b":path", b"/test"),
         ];
 
         assert_eq!(s.client.send_request(&mut s.pipe.client, &req, true), Ok(0));
@@ -3113,6 +3556,61 @@
     }
 
     #[test]
+    /// Test handling of 0-length DATA writes with and without fin.
+    fn zero_length_data() {
+        let mut s = Session::default().unwrap();
+        s.handshake().unwrap();
+
+        let (stream, req) = s.send_request(false).unwrap();
+
+        assert_eq!(
+            s.client.send_body(&mut s.pipe.client, 0, b"", false),
+            Err(Error::Done)
+        );
+        assert_eq!(s.client.send_body(&mut s.pipe.client, 0, b"", true), Ok(0));
+
+        s.advance().ok();
+
+        let mut recv_buf = vec![0; 100];
+
+        let ev_headers = Event::Headers {
+            list: req,
+            has_body: true,
+        };
+
+        assert_eq!(s.poll_server(), Ok((stream, ev_headers)));
+
+        assert_eq!(s.poll_server(), Ok((stream, Event::Data)));
+        assert_eq!(s.recv_body_server(stream, &mut recv_buf), Err(Error::Done));
+
+        assert_eq!(s.poll_server(), Ok((stream, Event::Finished)));
+        assert_eq!(s.poll_server(), Err(Error::Done));
+
+        let resp = s.send_response(stream, false).unwrap();
+
+        assert_eq!(
+            s.server.send_body(&mut s.pipe.server, 0, b"", false),
+            Err(Error::Done)
+        );
+        assert_eq!(s.server.send_body(&mut s.pipe.server, 0, b"", true), Ok(0));
+
+        s.advance().ok();
+
+        let ev_headers = Event::Headers {
+            list: resp,
+            has_body: true,
+        };
+
+        assert_eq!(s.poll_client(), Ok((stream, ev_headers)));
+
+        assert_eq!(s.poll_client(), Ok((stream, Event::Data)));
+        assert_eq!(s.recv_body_client(stream, &mut recv_buf), Err(Error::Done));
+
+        assert_eq!(s.poll_client(), Ok((stream, Event::Finished)));
+        assert_eq!(s.poll_client(), Err(Error::Done));
+    }
+
+    #[test]
     /// Tests that blocked 0-length DATA writes are reported correctly.
     fn zero_length_data_blocked() {
         let mut config = crate::Config::new(crate::PROTOCOL_VERSION).unwrap();
@@ -3123,7 +3621,7 @@
             .load_priv_key_from_pem_file("examples/cert.key")
             .unwrap();
         config.set_application_protos(b"\x02h3").unwrap();
-        config.set_initial_max_data(70);
+        config.set_initial_max_data(69);
         config.set_initial_max_stream_data_bidi_local(150);
         config.set_initial_max_stream_data_bidi_remote(150);
         config.set_initial_max_stream_data_uni(150);
@@ -3138,10 +3636,10 @@
         s.handshake().unwrap();
 
         let req = vec![
-            Header::new(":method", "GET"),
-            Header::new(":scheme", "https"),
-            Header::new(":authority", "quic.tech"),
-            Header::new(":path", "/test"),
+            Header::new(b":method", b"GET"),
+            Header::new(b":scheme", b"https"),
+            Header::new(b":authority", b"quic.tech"),
+            Header::new(b":path", b"/test"),
         ];
 
         assert_eq!(
@@ -3159,8 +3657,837 @@
         // Once the server gives flow control credits back, we can send the body.
         assert_eq!(s.client.send_body(&mut s.pipe.client, 0, b"", true), Ok(0));
     }
+
+    #[test]
+    /// Tests that receiving a H3_DATAGRAM setting is ok.
+    fn dgram_setting() {
+        let mut config = crate::Config::new(crate::PROTOCOL_VERSION).unwrap();
+        config
+            .load_cert_chain_from_pem_file("examples/cert.crt")
+            .unwrap();
+        config
+            .load_priv_key_from_pem_file("examples/cert.key")
+            .unwrap();
+        config.set_application_protos(b"\x02h3").unwrap();
+        config.set_initial_max_data(70);
+        config.set_initial_max_stream_data_bidi_local(150);
+        config.set_initial_max_stream_data_bidi_remote(150);
+        config.set_initial_max_stream_data_uni(150);
+        config.set_initial_max_streams_bidi(100);
+        config.set_initial_max_streams_uni(5);
+        config.enable_dgram(true, 1000, 1000);
+        config.verify_peer(false);
+
+        let h3_config = Config::new().unwrap();
+
+        let mut s = Session::with_configs(&mut config, &h3_config).unwrap();
+        assert_eq!(s.pipe.handshake(), Ok(()));
+
+        s.client.send_settings(&mut s.pipe.client).unwrap();
+        assert_eq!(s.pipe.advance(), Ok(()));
+
+        // Before processing SETTINGS (via poll), HTTP/3 DATAGRAMS are not
+        // enabled.
+        assert!(!s.server.dgram_enabled_by_peer(&s.pipe.server));
+
+        // When everything is ok, poll returns Done and DATAGRAM is enabled.
+        assert_eq!(s.server.poll(&mut s.pipe.server), Err(Error::Done));
+        assert!(s.server.dgram_enabled_by_peer(&s.pipe.server));
+
+        // Now detect things on the client
+        s.server.send_settings(&mut s.pipe.server).unwrap();
+        assert_eq!(s.pipe.advance(), Ok(()));
+        assert!(!s.client.dgram_enabled_by_peer(&s.pipe.client));
+        assert_eq!(s.client.poll(&mut s.pipe.client), Err(Error::Done));
+        assert!(s.client.dgram_enabled_by_peer(&s.pipe.client));
+    }
+
+    #[test]
+    /// Tests that receiving a H3_DATAGRAM setting when no TP is set generates
+    /// an error.
+    fn dgram_setting_no_tp() {
+        let mut config = crate::Config::new(crate::PROTOCOL_VERSION).unwrap();
+        config
+            .load_cert_chain_from_pem_file("examples/cert.crt")
+            .unwrap();
+        config
+            .load_priv_key_from_pem_file("examples/cert.key")
+            .unwrap();
+        config.set_application_protos(b"\x02h3").unwrap();
+        config.set_initial_max_data(70);
+        config.set_initial_max_stream_data_bidi_local(150);
+        config.set_initial_max_stream_data_bidi_remote(150);
+        config.set_initial_max_stream_data_uni(150);
+        config.set_initial_max_streams_bidi(100);
+        config.set_initial_max_streams_uni(5);
+        config.verify_peer(false);
+
+        let h3_config = Config::new().unwrap();
+
+        let mut s = Session::with_configs(&mut config, &h3_config).unwrap();
+        assert_eq!(s.pipe.handshake(), Ok(()));
+
+        s.client.control_stream_id = Some(
+            s.client
+                .open_uni_stream(
+                    &mut s.pipe.client,
+                    stream::HTTP3_CONTROL_STREAM_TYPE_ID,
+                )
+                .unwrap(),
+        );
+
+        let settings = frame::Frame::Settings {
+            max_header_list_size: None,
+            qpack_max_table_capacity: None,
+            qpack_blocked_streams: None,
+            h3_datagram: Some(1),
+            grease: None,
+        };
+
+        s.send_frame_client(settings, s.client.control_stream_id.unwrap(), false)
+            .unwrap();
+
+        assert_eq!(s.pipe.advance(), Ok(()));
+
+        assert_eq!(s.server.poll(&mut s.pipe.server), Err(Error::SettingsError));
+    }
+
+    #[test]
+    /// Tests that receiving SETTINGS with prohibited values generates an error.
+    fn settings_h2_prohibited() {
+        let mut config = crate::Config::new(crate::PROTOCOL_VERSION).unwrap();
+        config
+            .load_cert_chain_from_pem_file("examples/cert.crt")
+            .unwrap();
+        config
+            .load_priv_key_from_pem_file("examples/cert.key")
+            .unwrap();
+        config.set_application_protos(b"\x02h3").unwrap();
+        config.set_initial_max_data(70);
+        config.set_initial_max_stream_data_bidi_local(150);
+        config.set_initial_max_stream_data_bidi_remote(150);
+        config.set_initial_max_stream_data_uni(150);
+        config.set_initial_max_streams_bidi(100);
+        config.set_initial_max_streams_uni(5);
+        config.verify_peer(false);
+
+        let h3_config = Config::new().unwrap();
+
+        let mut s = Session::with_configs(&mut config, &h3_config).unwrap();
+        assert_eq!(s.pipe.handshake(), Ok(()));
+
+        s.client.control_stream_id = Some(
+            s.client
+                .open_uni_stream(
+                    &mut s.pipe.client,
+                    stream::HTTP3_CONTROL_STREAM_TYPE_ID,
+                )
+                .unwrap(),
+        );
+
+        s.server.control_stream_id = Some(
+            s.server
+                .open_uni_stream(
+                    &mut s.pipe.server,
+                    stream::HTTP3_CONTROL_STREAM_TYPE_ID,
+                )
+                .unwrap(),
+        );
+
+        let frame_payload_len = 2u64;
+        let settings = [
+            frame::SETTINGS_FRAME_TYPE_ID as u8,
+            frame_payload_len as u8,
+            0x2, // 0x2 is a reserved setting type
+            1,
+        ];
+
+        s.send_arbitrary_stream_data_client(
+            &settings,
+            s.client.control_stream_id.unwrap(),
+            false,
+        )
+        .unwrap();
+
+        s.send_arbitrary_stream_data_server(
+            &settings,
+            s.server.control_stream_id.unwrap(),
+            false,
+        )
+        .unwrap();
+
+        assert_eq!(s.pipe.advance(), Ok(()));
+
+        assert_eq!(s.server.poll(&mut s.pipe.server), Err(Error::SettingsError));
+
+        assert_eq!(s.client.poll(&mut s.pipe.client), Err(Error::SettingsError));
+    }
+
+    #[test]
+    /// Send a single DATAGRAM.
+    fn single_dgram() {
+        let mut buf = [0; 65535];
+        let mut s = Session::default().unwrap();
+        s.handshake().unwrap();
+
+        // We'll send default data of 10 bytes on flow ID 0.
+        let result = (11, 0, 1);
+
+        s.send_dgram_client(0).unwrap();
+
+        assert_eq!(s.poll_server(), Ok((0, Event::Datagram)));
+        assert_eq!(s.recv_dgram_server(&mut buf), Ok(result));
+        assert_eq!(s.poll_server(), Err(Error::Done));
+
+        s.send_dgram_server(0).unwrap();
+        assert_eq!(s.poll_client(), Ok((0, Event::Datagram)));
+        assert_eq!(s.recv_dgram_client(&mut buf), Ok(result));
+    }
+
+    #[test]
+    /// Send multiple DATAGRAMs.
+    fn multiple_dgram() {
+        let mut buf = [0; 65535];
+        let mut s = Session::default().unwrap();
+        s.handshake().unwrap();
+
+        // We'll send default data of 10 bytes on flow ID 0.
+        let result = (11, 0, 1);
+
+        s.send_dgram_client(0).unwrap();
+        s.send_dgram_client(0).unwrap();
+        s.send_dgram_client(0).unwrap();
+
+        assert_eq!(s.poll_server(), Ok((0, Event::Datagram)));
+        assert_eq!(s.recv_dgram_server(&mut buf), Ok(result));
+        assert_eq!(s.poll_server(), Err(Error::Done));
+        assert_eq!(s.recv_dgram_server(&mut buf), Ok(result));
+        assert_eq!(s.poll_server(), Err(Error::Done));
+        assert_eq!(s.recv_dgram_server(&mut buf), Ok(result));
+        assert_eq!(s.poll_server(), Err(Error::Done));
+        assert_eq!(s.recv_dgram_server(&mut buf), Err(Error::Done));
+        assert_eq!(s.poll_server(), Err(Error::Done));
+
+        s.send_dgram_server(0).unwrap();
+        s.send_dgram_server(0).unwrap();
+        s.send_dgram_server(0).unwrap();
+
+        assert_eq!(s.poll_client(), Ok((0, Event::Datagram)));
+        assert_eq!(s.poll_server(), Err(Error::Done));
+        assert_eq!(s.recv_dgram_client(&mut buf), Ok(result));
+        assert_eq!(s.poll_client(), Err(Error::Done));
+        assert_eq!(s.recv_dgram_client(&mut buf), Ok(result));
+        assert_eq!(s.poll_client(), Err(Error::Done));
+        assert_eq!(s.recv_dgram_client(&mut buf), Ok(result));
+        assert_eq!(s.poll_client(), Err(Error::Done));
+        assert_eq!(s.recv_dgram_client(&mut buf), Err(Error::Done));
+        assert_eq!(s.poll_client(), Err(Error::Done));
+    }
+
+    #[test]
+    /// Send more DATAGRAMs than the send queue allows.
+    fn multiple_dgram_overflow() {
+        let mut buf = [0; 65535];
+        let mut s = Session::default().unwrap();
+        s.handshake().unwrap();
+
+        // We'll send default data of 10 bytes on flow ID 0.
+        let result = (11, 0, 1);
+
+        // Five DATAGRAMs
+        s.send_dgram_client(0).unwrap();
+        s.send_dgram_client(0).unwrap();
+        s.send_dgram_client(0).unwrap();
+        s.send_dgram_client(0).unwrap();
+        s.send_dgram_client(0).unwrap();
+
+        // Only 3 independent DATAGRAM events will fire.
+        assert_eq!(s.poll_server(), Ok((0, Event::Datagram)));
+        assert_eq!(s.recv_dgram_server(&mut buf), Ok(result));
+        assert_eq!(s.poll_server(), Err(Error::Done));
+        assert_eq!(s.recv_dgram_server(&mut buf), Ok(result));
+        assert_eq!(s.poll_server(), Err(Error::Done));
+        assert_eq!(s.recv_dgram_server(&mut buf), Ok(result));
+        assert_eq!(s.poll_server(), Err(Error::Done));
+        assert_eq!(s.recv_dgram_server(&mut buf), Err(Error::Done));
+        assert_eq!(s.poll_server(), Err(Error::Done));
+    }
+
+    #[test]
+    /// Send a single DATAGRAM and request. Ensure that poll continuously cycles
+    /// between the two types if the data is not read.
+    fn poll_yield_cycling() {
+        let mut config = crate::Config::new(crate::PROTOCOL_VERSION).unwrap();
+        config
+            .load_cert_chain_from_pem_file("examples/cert.crt")
+            .unwrap();
+        config
+            .load_priv_key_from_pem_file("examples/cert.key")
+            .unwrap();
+        config.set_application_protos(b"\x02h3").unwrap();
+        config.set_initial_max_data(1500);
+        config.set_initial_max_stream_data_bidi_local(150);
+        config.set_initial_max_stream_data_bidi_remote(150);
+        config.set_initial_max_stream_data_uni(150);
+        config.set_initial_max_streams_bidi(100);
+        config.set_initial_max_streams_uni(5);
+        config.verify_peer(false);
+        config.enable_dgram(true, 100, 100);
+
+        let mut h3_config = Config::new().unwrap();
+        let mut s = Session::with_configs(&mut config, &mut h3_config).unwrap();
+        s.handshake().unwrap();
+
+        // Send request followed by DATAGRAM on client side.
+        let (stream, req) = s.send_request(false).unwrap();
+
+        s.send_body_client(stream, true).unwrap();
+
+        let ev_headers = Event::Headers {
+            list: req,
+            has_body: true,
+        };
+
+        s.send_dgram_client(0).unwrap();
+
+        // Now let's test the poll counts and yielding.
+        assert_eq!(s.poll_server(), Ok((0, Event::Datagram)));
+
+        assert_eq!(s.poll_server(), Ok((stream, ev_headers)));
+        assert_eq!(s.poll_server(), Ok((stream, Event::Data)));
+
+        assert_eq!(s.poll_server(), Err(Error::Done));
+    }
+
+    #[test]
+    /// Send a single DATAGRAM and request. Ensure that poll
+    /// yield cycles and cleanly exits if data is read.
+    fn poll_yield_single_read() {
+        let mut buf = [0; 65535];
+
+        let mut config = crate::Config::new(crate::PROTOCOL_VERSION).unwrap();
+        config
+            .load_cert_chain_from_pem_file("examples/cert.crt")
+            .unwrap();
+        config
+            .load_priv_key_from_pem_file("examples/cert.key")
+            .unwrap();
+        config.set_application_protos(b"\x02h3").unwrap();
+        config.set_initial_max_data(1500);
+        config.set_initial_max_stream_data_bidi_local(150);
+        config.set_initial_max_stream_data_bidi_remote(150);
+        config.set_initial_max_stream_data_uni(150);
+        config.set_initial_max_streams_bidi(100);
+        config.set_initial_max_streams_uni(5);
+        config.verify_peer(false);
+        config.enable_dgram(true, 100, 100);
+
+        let mut h3_config = Config::new().unwrap();
+        let mut s = Session::with_configs(&mut config, &mut h3_config).unwrap();
+        s.handshake().unwrap();
+
+        // We'll send default data of 10 bytes on flow ID 0.
+        let result = (11, 0, 1);
+
+        // Send request followed by DATAGRAM on client side.
+        let (stream, req) = s.send_request(false).unwrap();
+
+        let body = s.send_body_client(stream, true).unwrap();
+
+        let mut recv_buf = vec![0; body.len()];
+
+        let ev_headers = Event::Headers {
+            list: req,
+            has_body: true,
+        };
+
+        s.send_dgram_client(0).unwrap();
+
+        // Now let's test the poll counts and yielding.
+        assert_eq!(s.poll_server(), Ok((0, Event::Datagram)));
+
+        assert_eq!(s.poll_server(), Ok((stream, ev_headers)));
+        assert_eq!(s.poll_server(), Ok((stream, Event::Data)));
+
+        assert_eq!(s.poll_server(), Err(Error::Done));
+
+        assert_eq!(s.recv_dgram_server(&mut buf), Ok(result));
+
+        assert_eq!(s.poll_server(), Err(Error::Done));
+
+        assert_eq!(s.recv_body_server(stream, &mut recv_buf), Ok(body.len()));
+        assert_eq!(s.poll_server(), Ok((stream, Event::Finished)));
+        assert_eq!(s.poll_server(), Err(Error::Done));
+
+        // Send response followed by DATAGRAM on server side
+        let resp = s.send_response(stream, false).unwrap();
+
+        let body = s.send_body_server(stream, true).unwrap();
+
+        let mut recv_buf = vec![0; body.len()];
+
+        let ev_headers = Event::Headers {
+            list: resp,
+            has_body: true,
+        };
+
+        s.send_dgram_server(0).unwrap();
+
+        // Now let's test the poll counts and yielding.
+        assert_eq!(s.poll_client(), Ok((0, Event::Datagram)));
+
+        assert_eq!(s.poll_client(), Ok((stream, ev_headers)));
+        assert_eq!(s.poll_client(), Ok((stream, Event::Data)));
+
+        assert_eq!(s.poll_client(), Err(Error::Done));
+
+        assert_eq!(s.recv_dgram_client(&mut buf), Ok(result));
+
+        assert_eq!(s.poll_client(), Err(Error::Done));
+
+        assert_eq!(s.recv_body_client(stream, &mut recv_buf), Ok(body.len()));
+
+        assert_eq!(s.poll_client(), Ok((stream, Event::Finished)));
+        assert_eq!(s.poll_client(), Err(Error::Done));
+    }
+
+    #[test]
+    /// Send a multiple DATAGRAMs and requests. Ensure that poll
+    /// yield cycles and cleanly exits if data is read.
+    fn poll_yield_multi_read() {
+        let mut buf = [0; 65535];
+
+        let mut config = crate::Config::new(crate::PROTOCOL_VERSION).unwrap();
+        config
+            .load_cert_chain_from_pem_file("examples/cert.crt")
+            .unwrap();
+        config
+            .load_priv_key_from_pem_file("examples/cert.key")
+            .unwrap();
+        config.set_application_protos(b"\x02h3").unwrap();
+        config.set_initial_max_data(1500);
+        config.set_initial_max_stream_data_bidi_local(150);
+        config.set_initial_max_stream_data_bidi_remote(150);
+        config.set_initial_max_stream_data_uni(150);
+        config.set_initial_max_streams_bidi(100);
+        config.set_initial_max_streams_uni(5);
+        config.verify_peer(false);
+        config.enable_dgram(true, 100, 100);
+
+        let mut h3_config = Config::new().unwrap();
+        let mut s = Session::with_configs(&mut config, &mut h3_config).unwrap();
+        s.handshake().unwrap();
+
+        // 10 bytes on flow ID 0 and 2.
+        let flow_0_result = (11, 0, 1);
+        let flow_2_result = (11, 2, 1);
+
+        // Send requests followed by DATAGRAMs on client side.
+        let (stream, req) = s.send_request(false).unwrap();
+
+        let body = s.send_body_client(stream, true).unwrap();
+
+        let mut recv_buf = vec![0; body.len()];
+
+        let ev_headers = Event::Headers {
+            list: req,
+            has_body: true,
+        };
+
+        s.send_dgram_client(0).unwrap();
+        s.send_dgram_client(0).unwrap();
+        s.send_dgram_client(0).unwrap();
+        s.send_dgram_client(0).unwrap();
+        s.send_dgram_client(0).unwrap();
+        s.send_dgram_client(2).unwrap();
+        s.send_dgram_client(2).unwrap();
+        s.send_dgram_client(2).unwrap();
+        s.send_dgram_client(2).unwrap();
+        s.send_dgram_client(2).unwrap();
+
+        // Now let's test the poll counts and yielding.
+        assert_eq!(s.poll_server(), Ok((0, Event::Datagram)));
+
+        assert_eq!(s.poll_server(), Ok((stream, ev_headers)));
+        assert_eq!(s.poll_server(), Ok((stream, Event::Data)));
+
+        assert_eq!(s.poll_server(), Err(Error::Done));
+
+        // Second cycle, start to read
+        assert_eq!(s.recv_dgram_server(&mut buf), Ok(flow_0_result));
+        assert_eq!(s.poll_server(), Err(Error::Done));
+        assert_eq!(s.recv_dgram_server(&mut buf), Ok(flow_0_result));
+        assert_eq!(s.poll_server(), Err(Error::Done));
+        assert_eq!(s.recv_dgram_server(&mut buf), Ok(flow_0_result));
+        assert_eq!(s.poll_server(), Err(Error::Done));
+
+        assert_eq!(s.recv_body_server(stream, &mut recv_buf), Ok(body.len()));
+        assert_eq!(s.poll_server(), Ok((stream, Event::Finished)));
+
+        assert_eq!(s.poll_server(), Err(Error::Done));
+
+        // Third cycle.
+        assert_eq!(s.recv_dgram_server(&mut buf), Ok(flow_0_result));
+        assert_eq!(s.poll_server(), Err(Error::Done));
+        assert_eq!(s.recv_dgram_server(&mut buf), Ok(flow_0_result));
+        assert_eq!(s.poll_server(), Err(Error::Done));
+        assert_eq!(s.recv_dgram_server(&mut buf), Ok(flow_2_result));
+        assert_eq!(s.poll_server(), Err(Error::Done));
+        assert_eq!(s.recv_dgram_server(&mut buf), Ok(flow_2_result));
+        assert_eq!(s.poll_server(), Err(Error::Done));
+        assert_eq!(s.recv_dgram_server(&mut buf), Ok(flow_2_result));
+        assert_eq!(s.poll_server(), Err(Error::Done));
+        assert_eq!(s.recv_dgram_server(&mut buf), Ok(flow_2_result));
+        assert_eq!(s.poll_server(), Err(Error::Done));
+        assert_eq!(s.recv_dgram_server(&mut buf), Ok(flow_2_result));
+        assert_eq!(s.poll_server(), Err(Error::Done));
+
+        // Send response followed by DATAGRAM on server side
+        let resp = s.send_response(stream, false).unwrap();
+
+        let body = s.send_body_server(stream, true).unwrap();
+
+        let mut recv_buf = vec![0; body.len()];
+
+        let ev_headers = Event::Headers {
+            list: resp,
+            has_body: true,
+        };
+
+        s.send_dgram_server(0).unwrap();
+        s.send_dgram_server(0).unwrap();
+        s.send_dgram_server(0).unwrap();
+        s.send_dgram_server(0).unwrap();
+        s.send_dgram_server(0).unwrap();
+        s.send_dgram_server(2).unwrap();
+        s.send_dgram_server(2).unwrap();
+        s.send_dgram_server(2).unwrap();
+        s.send_dgram_server(2).unwrap();
+        s.send_dgram_server(2).unwrap();
+
+        assert_eq!(s.poll_client(), Ok((0, Event::Datagram)));
+
+        assert_eq!(s.poll_client(), Ok((stream, ev_headers)));
+        assert_eq!(s.poll_client(), Ok((stream, Event::Data)));
+
+        assert_eq!(s.poll_client(), Err(Error::Done));
+
+        // Second cycle, start to read
+        assert_eq!(s.recv_dgram_client(&mut buf), Ok(flow_0_result));
+        assert_eq!(s.poll_client(), Err(Error::Done));
+        assert_eq!(s.recv_dgram_client(&mut buf), Ok(flow_0_result));
+        assert_eq!(s.poll_client(), Err(Error::Done));
+        assert_eq!(s.recv_dgram_client(&mut buf), Ok(flow_0_result));
+        assert_eq!(s.poll_client(), Err(Error::Done));
+
+        assert_eq!(s.recv_body_client(stream, &mut recv_buf), Ok(body.len()));
+        assert_eq!(s.poll_client(), Ok((stream, Event::Finished)));
+
+        assert_eq!(s.poll_client(), Err(Error::Done));
+
+        // Third cycle.
+        assert_eq!(s.recv_dgram_client(&mut buf), Ok(flow_0_result));
+        assert_eq!(s.poll_client(), Err(Error::Done));
+        assert_eq!(s.recv_dgram_client(&mut buf), Ok(flow_0_result));
+        assert_eq!(s.poll_client(), Err(Error::Done));
+        assert_eq!(s.recv_dgram_client(&mut buf), Ok(flow_2_result));
+        assert_eq!(s.poll_client(), Err(Error::Done));
+        assert_eq!(s.recv_dgram_client(&mut buf), Ok(flow_2_result));
+        assert_eq!(s.poll_client(), Err(Error::Done));
+        assert_eq!(s.recv_dgram_client(&mut buf), Ok(flow_2_result));
+        assert_eq!(s.poll_client(), Err(Error::Done));
+        assert_eq!(s.recv_dgram_client(&mut buf), Ok(flow_2_result));
+        assert_eq!(s.poll_client(), Err(Error::Done));
+        assert_eq!(s.recv_dgram_client(&mut buf), Ok(flow_2_result));
+        assert_eq!(s.poll_client(), Err(Error::Done));
+    }
+
+    #[test]
+    /// Tests that the Finished event is not issued for streams of unknown type
+    /// (e.g. GREASE).
+    fn finished_is_for_requests() {
+        let mut s = Session::default().unwrap();
+        s.handshake().unwrap();
+
+        assert_eq!(s.poll_client(), Err(Error::Done));
+        assert_eq!(s.poll_server(), Err(Error::Done));
+
+        assert_eq!(s.client.open_grease_stream(&mut s.pipe.client), Ok(()));
+        assert_eq!(s.pipe.advance(), Ok(()));
+
+        assert_eq!(s.poll_client(), Err(Error::Done));
+        assert_eq!(s.poll_server(), Err(Error::Done));
+    }
+
+    #[test]
+    /// Tests that streams are marked as finished only once.
+    fn finished_once() {
+        let mut s = Session::default().unwrap();
+        s.handshake().unwrap();
+
+        let (stream, req) = s.send_request(false).unwrap();
+        let body = s.send_body_client(stream, true).unwrap();
+
+        let mut recv_buf = vec![0; body.len()];
+
+        let ev_headers = Event::Headers {
+            list: req,
+            has_body: true,
+        };
+
+        assert_eq!(s.poll_server(), Ok((stream, ev_headers)));
+        assert_eq!(s.poll_server(), Ok((stream, Event::Data)));
+
+        assert_eq!(s.recv_body_server(stream, &mut recv_buf), Ok(body.len()));
+        assert_eq!(s.poll_server(), Ok((stream, Event::Finished)));
+
+        assert_eq!(s.recv_body_server(stream, &mut recv_buf), Err(Error::Done));
+        assert_eq!(s.poll_server(), Err(Error::Done));
+    }
+
+    #[test]
+    /// Tests that the Data event is properly re-armed.
+    fn data_event_rearm() {
+        let bytes = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
+
+        let mut s = Session::default().unwrap();
+        s.handshake().unwrap();
+
+        let (stream, req) = s.send_request(false).unwrap();
+
+        let mut recv_buf = vec![0; bytes.len()];
+
+        let ev_headers = Event::Headers {
+            list: req,
+            has_body: true,
+        };
+
+        // Manually send an incomplete DATA frame (i.e. the frame size is longer
+        // than the actual data sent).
+        {
+            let mut d = [42; 10];
+            let mut b = octets::OctetsMut::with_slice(&mut d);
+
+            b.put_varint(frame::DATA_FRAME_TYPE_ID).unwrap();
+            b.put_varint(bytes.len() as u64).unwrap();
+            let off = b.off();
+            s.pipe.client.stream_send(stream, &d[..off], false).unwrap();
+
+            assert_eq!(
+                s.pipe.client.stream_send(stream, &bytes[..5], false),
+                Ok(5)
+            );
+
+            s.advance().ok();
+        }
+
+        assert_eq!(s.poll_server(), Ok((stream, ev_headers)));
+        assert_eq!(s.poll_server(), Ok((stream, Event::Data)));
+        assert_eq!(s.poll_server(), Err(Error::Done));
+
+        // Read the available body data.
+        assert_eq!(s.recv_body_server(stream, &mut recv_buf), Ok(5));
+
+        // Send the remaining DATA payload.
+        assert_eq!(s.pipe.client.stream_send(stream, &bytes[5..], false), Ok(5));
+        s.advance().ok();
+
+        assert_eq!(s.poll_server(), Ok((stream, Event::Data)));
+        assert_eq!(s.poll_server(), Err(Error::Done));
+
+        // Read the rest of the body data.
+        assert_eq!(s.recv_body_server(stream, &mut recv_buf), Ok(5));
+        assert_eq!(s.poll_server(), Err(Error::Done));
+
+        // Send more data.
+        let body = s.send_body_client(stream, false).unwrap();
+
+        assert_eq!(s.poll_server(), Ok((stream, Event::Data)));
+        assert_eq!(s.poll_server(), Err(Error::Done));
+
+        assert_eq!(s.recv_body_server(stream, &mut recv_buf), Ok(body.len()));
+
+        // Send more data, then HEADERS, then more data.
+        let body = s.send_body_client(stream, false).unwrap();
+
+        let trailers = vec![Header::new(b"hello", b"world")];
+
+        s.client
+            .send_headers(&mut s.pipe.client, stream, &trailers, false)
+            .unwrap();
+
+        let ev_trailers = Event::Headers {
+            list: trailers,
+            has_body: true,
+        };
+
+        s.advance().ok();
+
+        s.send_body_client(stream, false).unwrap();
+
+        assert_eq!(s.poll_server(), Ok((stream, Event::Data)));
+        assert_eq!(s.recv_body_server(stream, &mut recv_buf), Ok(body.len()));
+
+        assert_eq!(s.poll_server(), Ok((stream, ev_trailers)));
+
+        assert_eq!(s.poll_server(), Ok((stream, Event::Data)));
+        assert_eq!(s.recv_body_server(stream, &mut recv_buf), Ok(body.len()));
+
+        let (stream, req) = s.send_request(false).unwrap();
+
+        let ev_headers = Event::Headers {
+            list: req,
+            has_body: true,
+        };
+
+        // Manually send an incomplete DATA frame (i.e. only the header is sent).
+        {
+            let mut d = [42; 10];
+            let mut b = octets::OctetsMut::with_slice(&mut d);
+
+            b.put_varint(frame::DATA_FRAME_TYPE_ID).unwrap();
+            b.put_varint(bytes.len() as u64).unwrap();
+            let off = b.off();
+            s.pipe.client.stream_send(stream, &d[..off], false).unwrap();
+
+            s.advance().ok();
+        }
+
+        assert_eq!(s.poll_server(), Ok((stream, ev_headers)));
+        assert_eq!(s.poll_server(), Ok((stream, Event::Data)));
+        assert_eq!(s.poll_server(), Err(Error::Done));
+
+        assert_eq!(s.recv_body_server(stream, &mut recv_buf), Err(Error::Done));
+
+        assert_eq!(s.pipe.client.stream_send(stream, &bytes[..5], false), Ok(5));
+
+        s.advance().ok();
+
+        assert_eq!(s.poll_server(), Ok((stream, Event::Data)));
+        assert_eq!(s.poll_server(), Err(Error::Done));
+
+        assert_eq!(s.recv_body_server(stream, &mut recv_buf), Ok(5));
+
+        assert_eq!(s.pipe.client.stream_send(stream, &bytes[5..], false), Ok(5));
+        s.advance().ok();
+
+        assert_eq!(s.poll_server(), Ok((stream, Event::Data)));
+        assert_eq!(s.poll_server(), Err(Error::Done));
+
+        assert_eq!(s.recv_body_server(stream, &mut recv_buf), Ok(5));
+
+        // Buffer multiple data frames.
+        let body = s.send_body_client(stream, false).unwrap();
+        s.send_body_client(stream, false).unwrap();
+        s.send_body_client(stream, false).unwrap();
+
+        assert_eq!(s.poll_server(), Ok((stream, Event::Data)));
+        assert_eq!(s.poll_server(), Err(Error::Done));
+
+        {
+            let mut d = [42; 10];
+            let mut b = octets::OctetsMut::with_slice(&mut d);
+
+            b.put_varint(frame::DATA_FRAME_TYPE_ID).unwrap();
+            b.put_varint(0).unwrap();
+            let off = b.off();
+            s.pipe.client.stream_send(stream, &d[..off], true).unwrap();
+
+            s.advance().ok();
+        }
+
+        let mut recv_buf = vec![0; bytes.len() * 3];
+
+        assert_eq!(
+            s.recv_body_server(stream, &mut recv_buf),
+            Ok(body.len() * 3)
+        );
+    }
+
+    #[test]
+    /// Tests that the Datagram event is properly re-armed.
+    fn dgram_event_rearm() {
+        let mut buf = [0; 65535];
+
+        let mut config = crate::Config::new(crate::PROTOCOL_VERSION).unwrap();
+        config
+            .load_cert_chain_from_pem_file("examples/cert.crt")
+            .unwrap();
+        config
+            .load_priv_key_from_pem_file("examples/cert.key")
+            .unwrap();
+        config.set_application_protos(b"\x02h3").unwrap();
+        config.set_initial_max_data(1500);
+        config.set_initial_max_stream_data_bidi_local(150);
+        config.set_initial_max_stream_data_bidi_remote(150);
+        config.set_initial_max_stream_data_uni(150);
+        config.set_initial_max_streams_bidi(100);
+        config.set_initial_max_streams_uni(5);
+        config.verify_peer(false);
+        config.enable_dgram(true, 100, 100);
+
+        let mut h3_config = Config::new().unwrap();
+        let mut s = Session::with_configs(&mut config, &mut h3_config).unwrap();
+        s.handshake().unwrap();
+
+        // 10 bytes on flow ID 0 and 2.
+        let flow_0_result = (11, 0, 1);
+        let flow_2_result = (11, 2, 1);
+
+        // Send requests followed by DATAGRAMs on client side.
+        let (stream, req) = s.send_request(false).unwrap();
+
+        let body = s.send_body_client(stream, true).unwrap();
+
+        let mut recv_buf = vec![0; body.len()];
+
+        let ev_headers = Event::Headers {
+            list: req,
+            has_body: true,
+        };
+
+        s.send_dgram_client(0).unwrap();
+        s.send_dgram_client(0).unwrap();
+        s.send_dgram_client(2).unwrap();
+        s.send_dgram_client(2).unwrap();
+
+        assert_eq!(s.poll_server(), Ok((0, Event::Datagram)));
+
+        assert_eq!(s.poll_server(), Ok((stream, ev_headers)));
+        assert_eq!(s.poll_server(), Ok((stream, Event::Data)));
+
+        assert_eq!(s.poll_server(), Err(Error::Done));
+        assert_eq!(s.recv_dgram_server(&mut buf), Ok(flow_0_result));
+
+        assert_eq!(s.poll_server(), Err(Error::Done));
+        assert_eq!(s.recv_dgram_server(&mut buf), Ok(flow_0_result));
+
+        assert_eq!(s.poll_server(), Err(Error::Done));
+        assert_eq!(s.recv_dgram_server(&mut buf), Ok(flow_2_result));
+
+        assert_eq!(s.poll_server(), Err(Error::Done));
+        assert_eq!(s.recv_dgram_server(&mut buf), Ok(flow_2_result));
+
+        assert_eq!(s.poll_server(), Err(Error::Done));
+
+        s.send_dgram_client(0).unwrap();
+        s.send_dgram_client(2).unwrap();
+
+        assert_eq!(s.poll_server(), Ok((0, Event::Datagram)));
+        assert_eq!(s.poll_server(), Err(Error::Done));
+
+        assert_eq!(s.recv_dgram_server(&mut buf), Ok(flow_0_result));
+        assert_eq!(s.poll_server(), Err(Error::Done));
+
+        assert_eq!(s.recv_dgram_server(&mut buf), Ok(flow_2_result));
+        assert_eq!(s.poll_server(), Err(Error::Done));
+
+        assert_eq!(s.recv_body_server(stream, &mut recv_buf), Ok(body.len()));
+        assert_eq!(s.poll_server(), Ok((stream, Event::Finished)));
+    }
 }
 
+#[cfg(feature = "ffi")]
 mod ffi;
 mod frame;
 #[doc(hidden)]
diff --git a/src/h3/qpack/decoder.rs b/src/h3/qpack/decoder.rs
index 240ca69..1bc5755 100644
--- a/src/h3/qpack/decoder.rs
+++ b/src/h3/qpack/decoder.rs
@@ -149,9 +149,7 @@
                         name.to_vec()
                     };
 
-                    let name = String::from_utf8(name)
-                        .map_err(|_| Error::InvalidHeaderValue)?;
-
+                    let name = name.to_vec();
                     let value = decode_str(&mut b)?;
 
                     trace!(
@@ -198,7 +196,7 @@
                     // Instead of calling Header::new(), create Header directly
                     // from `value`, which is already String, but clone `name`
                     // as it is just a reference.
-                    let hdr = Header(name.to_string(), value);
+                    let hdr = Header(name.to_vec(), value);
                     out.push(hdr);
                 },
 
@@ -215,7 +213,7 @@
     }
 }
 
-fn lookup_static(idx: u64) -> Result<(&'static str, &'static str)> {
+fn lookup_static(idx: u64) -> Result<(&'static [u8], &'static [u8])> {
     if idx >= super::static_table::STATIC_TABLE.len() as u64 {
         return Err(Error::InvalidStaticTableIndex);
     }
@@ -254,7 +252,7 @@
     Err(Error::BufferTooShort)
 }
 
-fn decode_str<'a>(b: &'a mut octets::Octets) -> Result<String> {
+fn decode_str(b: &mut octets::Octets) -> Result<Vec<u8>> {
     let first = b.peek_u8()?;
 
     let huff = first & 0x80 == 0x80;
@@ -269,7 +267,6 @@
         val.to_vec()
     };
 
-    let val = String::from_utf8(val).map_err(|_| Error::InvalidHeaderValue)?;
     Ok(val)
 }
 
diff --git a/src/h3/qpack/encoder.rs b/src/h3/qpack/encoder.rs
index 1307df3..09c8b08 100644
--- a/src/h3/qpack/encoder.rs
+++ b/src/h3/qpack/encoder.rs
@@ -55,7 +55,7 @@
     ) -> Result<usize> {
         let mut b = octets::OctetsMut::with_slice(out);
 
-        // Request Insert Count.
+        // Required Insert Count.
         encode_int(0, 0, 8, &mut b)?;
 
         // Base.
@@ -80,14 +80,12 @@
 
                 None => {
                     // Encode as fully literal.
-                    let name_len = super::huffman::encode_output_length(
-                        h.name().as_bytes(),
-                        true,
-                    )?;
+                    let name_len =
+                        super::huffman::encode_output_length(h.name(), true)?;
 
                     encode_int(name_len as u64, LITERAL | 0x08, 3, &mut b)?;
 
-                    super::huffman::encode(h.name().as_bytes(), &mut b, true)?;
+                    super::huffman::encode(h.name(), &mut b, true)?;
 
                     encode_str(h.value(), 7, &mut b)?;
                 },
@@ -151,12 +149,12 @@
     Ok(())
 }
 
-fn encode_str(v: &str, prefix: usize, b: &mut octets::OctetsMut) -> Result<()> {
-    let len = super::huffman::encode_output_length(v.as_bytes(), false)?;
+fn encode_str(v: &[u8], prefix: usize, b: &mut octets::OctetsMut) -> Result<()> {
+    let len = super::huffman::encode_output_length(v, false)?;
 
     encode_int(len as u64, 0x80, prefix, b)?;
 
-    super::huffman::encode(v.as_bytes(), b, false)?;
+    super::huffman::encode(v, b, false)?;
 
     Ok(())
 }
diff --git a/src/h3/qpack/mod.rs b/src/h3/qpack/mod.rs
index 6f2bdda..0a23306 100644
--- a/src/h3/qpack/mod.rs
+++ b/src/h3/qpack/mod.rs
@@ -87,15 +87,15 @@
         let mut encoded = [0u8; 240];
 
         let headers = vec![
-            h3::Header::new(":path", "/rsrc.php/v3/yn/r/rIPZ9Qkrdd9.png"),
-            h3::Header::new("accept-encoding", "gzip, deflate, br"),
-            h3::Header::new("accept-language", "en-US,en;q=0.9"),
-            h3::Header::new("user-agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.70 Safari/537.36"),
-            h3::Header::new("accept", "image/webp,image/apng,image/*,*/*;q=0.8"),
-            h3::Header::new("referer", "https://static.xx.fbcdn.net/rsrc.php/v3/yT/l/0,cross/dzXGESIlGQQ.css"),
-            h3::Header::new(":authority", "static.xx.fbcdn.net"),
-            h3::Header::new(":scheme", "https"),
-            h3::Header::new(":method", "GET"),
+            h3::Header::new(b":path", b"/rsrc.php/v3/yn/r/rIPZ9Qkrdd9.png"),
+            h3::Header::new(b"accept-encoding", b"gzip, deflate, br"),
+            h3::Header::new(b"accept-language", b"en-US,en;q=0.9"),
+            h3::Header::new(b"user-agent", b"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.70 Safari/537.36"),
+            h3::Header::new(b"accept", b"image/webp,image/apng,image/*,*/*;q=0.8"),
+            h3::Header::new(b"referer", b"https://static.xx.fbcdn.net/rsrc.php/v3/yT/l/0,cross/dzXGESIlGQQ.css"),
+            h3::Header::new(b":authority", b"static.xx.fbcdn.net"),
+            h3::Header::new(b":scheme", b"https"),
+            h3::Header::new(b":method", b"GET"),
         ];
 
         let mut enc = Encoder::new();
@@ -110,20 +110,20 @@
         let mut encoded = [0u8; 35];
 
         let headers_expected = vec![
-            crate::h3::Header::new(":status", "200"),
-            crate::h3::Header::new(":path", "/HeLlO"),
-            crate::h3::Header::new("woot", "woot"),
-            crate::h3::Header::new("hello", "WorlD"),
-            crate::h3::Header::new("foo", "BaR"),
+            crate::h3::Header::new(b":status", b"200"),
+            crate::h3::Header::new(b":path", b"/HeLlO"),
+            crate::h3::Header::new(b"woot", b"woot"),
+            crate::h3::Header::new(b"hello", b"WorlD"),
+            crate::h3::Header::new(b"foo", b"BaR"),
         ];
 
         // Header.
         let headers_in = vec![
-            crate::h3::Header::new(":StAtUs", "200"),
-            crate::h3::Header::new(":PaTh", "/HeLlO"),
-            crate::h3::Header::new("WooT", "woot"),
-            crate::h3::Header::new("hello", "WorlD"),
-            crate::h3::Header::new("fOo", "BaR"),
+            crate::h3::Header::new(b":StAtUs", b"200"),
+            crate::h3::Header::new(b":PaTh", b"/HeLlO"),
+            crate::h3::Header::new(b"WooT", b"woot"),
+            crate::h3::Header::new(b"hello", b"WorlD"),
+            crate::h3::Header::new(b"fOo", b"BaR"),
         ];
 
         let mut enc = Encoder::new();
@@ -136,11 +136,11 @@
 
         // HeaderRef.
         let headers_in = vec![
-            crate::h3::HeaderRef::new(":StAtUs", "200"),
-            crate::h3::HeaderRef::new(":PaTh", "/HeLlO"),
-            crate::h3::HeaderRef::new("WooT", "woot"),
-            crate::h3::HeaderRef::new("hello", "WorlD"),
-            crate::h3::HeaderRef::new("fOo", "BaR"),
+            crate::h3::HeaderRef::new(b":StAtUs", b"200"),
+            crate::h3::HeaderRef::new(b":PaTh", b"/HeLlO"),
+            crate::h3::HeaderRef::new(b"WooT", b"woot"),
+            crate::h3::HeaderRef::new(b"hello", b"WorlD"),
+            crate::h3::HeaderRef::new(b"fOo", b"BaR"),
         ];
 
         let mut enc = Encoder::new();
diff --git a/src/h3/qpack/static_table.rs b/src/h3/qpack/static_table.rs
index 4010d12..3cc10d4 100644
--- a/src/h3/qpack/static_table.rs
+++ b/src/h3/qpack/static_table.rs
@@ -24,113 +24,113 @@
 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-pub const STATIC_TABLE: [(&str, &str); 99] = [
-    (":authority", ""),
-    (":path", "/"),
-    ("age", "0"),
-    ("content-disposition", ""),
-    ("content-length", "0"),
-    ("cookie", ""),
-    ("date", ""),
-    ("etag", ""),
-    ("if-modified-since", ""),
-    ("if-none-match", ""),
-    ("last-modified", ""),
-    ("link", ""),
-    ("location", ""),
-    ("referer", ""),
-    ("set-cookie", ""),
-    (":method", "CONNECT"),
-    (":method", "DELETE"),
-    (":method", "GET"),
-    (":method", "HEAD"),
-    (":method", "OPTIONS"),
-    (":method", "POST"),
-    (":method", "PUT"),
-    (":scheme", "http"),
-    (":scheme", "https"),
-    (":status", "103"),
-    (":status", "200"),
-    (":status", "304"),
-    (":status", "404"),
-    (":status", "503"),
-    ("accept", "*/*"),
-    ("accept", "application/dns-message"),
-    ("accept-encoding", "gzip, deflate, br"),
-    ("accept-ranges", "bytes"),
-    ("access-control-allow-headers", "cache-control"),
-    ("access-control-allow-headers", "content-type"),
-    ("access-control-allow-origin", "*"),
-    ("cache-control", "max-age=0"),
-    ("cache-control", "max-age=2592000"),
-    ("cache-control", "max-age=604800"),
-    ("cache-control", "no-cache"),
-    ("cache-control", "no-store"),
-    ("cache-control", "public, max-age=31536000"),
-    ("content-encoding", "br"),
-    ("content-encoding", "gzip"),
-    ("content-type", "application/dns-message"),
-    ("content-type", "application/javascript"),
-    ("content-type", "application/json"),
-    ("content-type", "application/x-www-form-urlencoded"),
-    ("content-type", "image/gif"),
-    ("content-type", "image/jpeg"),
-    ("content-type", "image/png"),
-    ("content-type", "text/css"),
-    ("content-type", "text/html; charset=utf-8"),
-    ("content-type", "text/plain"),
-    ("content-type", "text/plain;charset=utf-8"),
-    ("range", "bytes=0-"),
-    ("strict-transport-security", "max-age=31536000"),
+pub const STATIC_TABLE: [(&[u8], &[u8]); 99] = [
+    (b":authority", b""),
+    (b":path", b"/"),
+    (b"age", b"0"),
+    (b"content-disposition", b""),
+    (b"content-length", b"0"),
+    (b"cookie", b""),
+    (b"date", b""),
+    (b"etag", b""),
+    (b"if-modified-since", b""),
+    (b"if-none-match", b""),
+    (b"last-modified", b""),
+    (b"link", b""),
+    (b"location", b""),
+    (b"referer", b""),
+    (b"set-cookie", b""),
+    (b":method", b"CONNECT"),
+    (b":method", b"DELETE"),
+    (b":method", b"GET"),
+    (b":method", b"HEAD"),
+    (b":method", b"OPTIONS"),
+    (b":method", b"POST"),
+    (b":method", b"PUT"),
+    (b":scheme", b"http"),
+    (b":scheme", b"https"),
+    (b":status", b"103"),
+    (b":status", b"200"),
+    (b":status", b"304"),
+    (b":status", b"404"),
+    (b":status", b"503"),
+    (b"accept", b"*/*"),
+    (b"accept", b"application/dns-message"),
+    (b"accept-encoding", b"gzip, deflate, br"),
+    (b"accept-ranges", b"bytes"),
+    (b"access-control-allow-headers", b"cache-control"),
+    (b"access-control-allow-headers", b"content-type"),
+    (b"access-control-allow-origin", b"*"),
+    (b"cache-control", b"max-age=0"),
+    (b"cache-control", b"max-age=2592000"),
+    (b"cache-control", b"max-age=604800"),
+    (b"cache-control", b"no-cache"),
+    (b"cache-control", b"no-store"),
+    (b"cache-control", b"public, max-age=31536000"),
+    (b"content-encoding", b"br"),
+    (b"content-encoding", b"gzip"),
+    (b"content-type", b"application/dns-message"),
+    (b"content-type", b"application/javascript"),
+    (b"content-type", b"application/json"),
+    (b"content-type", b"application/x-www-form-urlencoded"),
+    (b"content-type", b"image/gif"),
+    (b"content-type", b"image/jpeg"),
+    (b"content-type", b"image/png"),
+    (b"content-type", b"text/css"),
+    (b"content-type", b"text/html; charset=utf-8"),
+    (b"content-type", b"text/plain"),
+    (b"content-type", b"text/plain;charset=utf-8"),
+    (b"range", b"bytes=0-"),
+    (b"strict-transport-security", b"max-age=31536000"),
     (
-        "strict-transport-security",
-        "max-age=31536000; includesubdomains",
+        b"strict-transport-security",
+        b"max-age=31536000; includesubdomains",
     ),
     (
-        "strict-transport-security",
-        "max-age=31536000; includesubdomains; preload",
+        b"strict-transport-security",
+        b"max-age=31536000; includesubdomains; preload",
     ),
-    ("vary", "accept-encoding"),
-    ("vary", "origin"),
-    ("x-content-type-options", "nosniff"),
-    ("x-xss-protection", "1; mode=block"),
-    (":status", "100"),
-    (":status", "204"),
-    (":status", "206"),
-    (":status", "302"),
-    (":status", "400"),
-    (":status", "403"),
-    (":status", "421"),
-    (":status", "425"),
-    (":status", "500"),
-    ("accept-language", ""),
-    ("access-control-allow-credentials", "FALSE"),
-    ("access-control-allow-credentials", "TRUE"),
-    ("access-control-allow-headers", "*"),
-    ("access-control-allow-methods", "get"),
-    ("access-control-allow-methods", "get, post, options"),
-    ("access-control-allow-methods", "options"),
-    ("access-control-expose-headers", "content-length"),
-    ("access-control-request-headers", "content-type"),
-    ("access-control-request-method", "get"),
-    ("access-control-request-method", "post"),
-    ("alt-svc", "clear"),
-    ("authorization", ""),
+    (b"vary", b"accept-encoding"),
+    (b"vary", b"origin"),
+    (b"x-content-type-options", b"nosniff"),
+    (b"x-xss-protection", b"1; mode=block"),
+    (b":status", b"100"),
+    (b":status", b"204"),
+    (b":status", b"206"),
+    (b":status", b"302"),
+    (b":status", b"400"),
+    (b":status", b"403"),
+    (b":status", b"421"),
+    (b":status", b"425"),
+    (b":status", b"500"),
+    (b"accept-language", b""),
+    (b"access-control-allow-credentials", b"FALSE"),
+    (b"access-control-allow-credentials", b"TRUE"),
+    (b"access-control-allow-headers", b"*"),
+    (b"access-control-allow-methods", b"get"),
+    (b"access-control-allow-methods", b"get, post, options"),
+    (b"access-control-allow-methods", b"options"),
+    (b"access-control-expose-headers", b"content-length"),
+    (b"access-control-request-headers", b"content-type"),
+    (b"access-control-request-method", b"get"),
+    (b"access-control-request-method", b"post"),
+    (b"alt-svc", b"clear"),
+    (b"authorization", b""),
     (
-        "content-security-policy",
-        "script-src 'none'; object-src 'none'; base-uri 'none'",
+        b"content-security-policy",
+        b"script-src 'none'; object-src 'none'; base-uri 'none'",
     ),
-    ("early-data", "1"),
-    ("expect-ct", ""),
-    ("forwarded", ""),
-    ("if-range", ""),
-    ("origin", ""),
-    ("purpose", "prefetch"),
-    ("server", ""),
-    ("timing-allow-origin", "*"),
-    ("upgrade-insecure-requests", "1"),
-    ("user-agent", ""),
-    ("x-forwarded-for", ""),
-    ("x-frame-options", "deny"),
-    ("x-frame-options", "sameorigin"),
+    (b"early-data", b"1"),
+    (b"expect-ct", b""),
+    (b"forwarded", b""),
+    (b"if-range", b""),
+    (b"origin", b""),
+    (b"purpose", b"prefetch"),
+    (b"server", b""),
+    (b"timing-allow-origin", b"*"),
+    (b"upgrade-insecure-requests", b"1"),
+    (b"user-agent", b""),
+    (b"x-forwarded-for", b""),
+    (b"x-frame-options", b"deny"),
+    (b"x-frame-options", b"sameorigin"),
 ];
diff --git a/src/h3/stream.rs b/src/h3/stream.rs
index f2f8f0c..58e2873 100644
--- a/src/h3/stream.rs
+++ b/src/h3/stream.rs
@@ -73,6 +73,9 @@
 
     /// Reading and discarding data.
     Drain,
+
+    /// All data has been read.
+    Finished,
 }
 
 impl Type {
@@ -135,6 +138,9 @@
 
     /// Whether the stream has been locally initialized.
     local_initialized: bool,
+
+    /// Whether a `Data` event has been triggered for this stream.
+    data_event_triggered: bool,
 }
 
 impl Stream {
@@ -171,9 +177,15 @@
             is_local,
             remote_initialized: false,
             local_initialized: false,
+
+            data_event_triggered: false,
         }
     }
 
+    pub fn ty(&self) -> Option<Type> {
+        self.ty
+    }
+
     pub fn state(&self) -> State {
         self.state
     }
@@ -261,6 +273,9 @@
                         (frame::HEADERS_FRAME_TYPE_ID, false) =>
                             self.remote_initialized = true,
 
+                        (frame::DATA_FRAME_TYPE_ID, false) =>
+                            return Err(Error::FrameUnexpected),
+
                         (frame::CANCEL_PUSH_FRAME_TYPE_ID, _) =>
                             return Err(Error::FrameUnexpected),
 
@@ -275,9 +290,7 @@
 
                         // All other frames can be ignored regardless of stream
                         // state.
-                        (_, false) => (),
-
-                        (_, true) => (),
+                        _ => (),
                     }
                 }
             },
@@ -347,7 +360,18 @@
     ) -> Result<()> {
         let buf = &mut self.state_buf[self.state_off..self.state_len];
 
-        let (read, _) = conn.stream_recv(self.id, buf)?;
+        let read = match conn.stream_recv(self.id, buf) {
+            Ok((len, _)) => len,
+
+            Err(e) => {
+                // The stream is not readable anymore, so re-arm the Data event.
+                if e == crate::Error::Done {
+                    self.reset_data_event();
+                }
+
+                return Err(e.into());
+            },
+        };
 
         trace!(
             "{} read {} bytes on stream {}",
@@ -359,6 +383,8 @@
         self.state_off += read;
 
         if !self.state_buffer_complete() {
+            self.reset_data_event();
+
             return Err(Error::Done);
         }
 
@@ -416,6 +442,9 @@
 
     /// Tries to parse a frame from the state buffer.
     pub fn try_consume_frame(&mut self) -> Result<frame::Frame> {
+        // Processing a frame other than DATA, so re-arm the Data event.
+        self.reset_data_event();
+
         // TODO: properly propagate frame parsing errors.
         let frame = frame::Frame::from_bytes(
             self.frame_type.unwrap(),
@@ -431,18 +460,39 @@
     /// Tries to read DATA payload from the transport stream.
     pub fn try_consume_data(
         &mut self, conn: &mut crate::Connection, out: &mut [u8],
-    ) -> Result<usize> {
+    ) -> Result<(usize, bool)> {
         let left = std::cmp::min(out.len(), self.state_len - self.state_off);
 
-        let (len, _) = conn.stream_recv(self.id, &mut out[..left])?;
+        let (len, fin) = match conn.stream_recv(self.id, &mut out[..left]) {
+            Ok(v) => v,
+
+            Err(e) => {
+                // The stream is not readable anymore, so re-arm the Data event.
+                if e == crate::Error::Done {
+                    self.reset_data_event();
+                }
+
+                return Err(e.into());
+            },
+        };
 
         self.state_off += len;
 
+        // The stream is not readable anymore, so re-arm the Data event.
+        if !conn.stream_readable(self.id) {
+            self.reset_data_event();
+        }
+
         if self.state_buffer_complete() {
             self.state_transition(State::FrameType, 1, true)?;
         }
 
-        Ok(len)
+        Ok((len, fin))
+    }
+
+    /// Marks the stream as finished.
+    pub fn finished(&mut self) {
+        let _ = self.state_transition(State::Finished, 0, false);
     }
 
     /// Tries to read DATA payload from the given cursor.
@@ -466,6 +516,25 @@
         Ok(len)
     }
 
+    /// Tries to update the data triggered state for the stream.
+    ///
+    /// This returns `true` if a Data event was not already triggered before
+    /// the last reset, and updates the state. Returns `false` otherwise.
+    pub fn try_trigger_data_event(&mut self) -> bool {
+        if self.data_event_triggered {
+            return false;
+        }
+
+        self.data_event_triggered = true;
+
+        true
+    }
+
+    /// Resets the data triggered state.
+    fn reset_data_event(&mut self) {
+        self.data_event_triggered = false;
+    }
+
     /// Returns true if the state buffer has enough data to complete the state.
     fn state_buffer_complete(&self) -> bool {
         self.state_off == self.state_len
@@ -514,6 +583,7 @@
             max_header_list_size: Some(0),
             qpack_max_table_capacity: Some(0),
             qpack_blocked_streams: Some(0),
+            h3_datagram: None,
             grease: None,
         };
 
@@ -569,6 +639,7 @@
             max_header_list_size: Some(0),
             qpack_max_table_capacity: Some(0),
             qpack_blocked_streams: Some(0),
+            h3_datagram: None,
             grease: None,
         };
 
@@ -633,6 +704,7 @@
             max_header_list_size: Some(0),
             qpack_max_table_capacity: Some(0),
             qpack_blocked_streams: Some(0),
+            h3_datagram: None,
             grease: None,
         };
 
@@ -675,6 +747,7 @@
             max_header_list_size: Some(0),
             qpack_max_table_capacity: Some(0),
             qpack_blocked_streams: Some(0),
+            h3_datagram: None,
             grease: None,
         };
 
@@ -910,4 +983,28 @@
             .unwrap();
         assert_eq!(stream.state, State::Drain);
     }
+
+    #[test]
+    fn data_before_headers() {
+        let mut stream = Stream::new(0, false);
+
+        let mut d = vec![42; 128];
+        let mut b = octets::OctetsMut::with_slice(&mut d);
+
+        let data = frame::Frame::Data {
+            payload: vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
+        };
+
+        data.to_bytes(&mut b).unwrap();
+
+        let mut cursor = std::io::Cursor::new(d);
+
+        // Parse the DATA frame type.
+        stream.try_fill_buffer_for_tests(&mut cursor).unwrap();
+
+        let frame_ty = stream.try_consume_varint().unwrap();
+        assert_eq!(frame_ty, frame::DATA_FRAME_TYPE_ID);
+
+        assert_eq!(stream.set_frame_type(frame_ty), Err(Error::FrameUnexpected));
+    }
 }
diff --git a/src/lib.rs b/src/lib.rs
index 351352c..8ad1194 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -54,15 +54,25 @@
 //! ```
 //! # let mut config = quiche::Config::new(quiche::PROTOCOL_VERSION)?;
 //! # let server_name = "quic.tech";
-//! # let scid = [0xba; 16];
+//! # let scid = quiche::ConnectionId::from_ref(&[0xba; 16]);
+//! # let to = "127.0.0.1:1234".parse().unwrap();
 //! // Client connection.
-//! let conn = quiche::connect(Some(&server_name), &scid, &mut config)?;
+//! let conn = quiche::connect(Some(&server_name), &scid, to, &mut config)?;
 //!
 //! // Server connection.
-//! let conn = quiche::accept(&scid, None, &mut config)?;
+//! # let from = "127.0.0.1:1234".parse().unwrap();
+//! let conn = quiche::accept(&scid, None, from, &mut config)?;
 //! # Ok::<(), quiche::Error>(())
 //! ```
 //!
+//! In both cases, the application is responsible for generating a new source
+//! connection ID that will be used to identify the new connection.
+//!
+//! The application also need to pass the address of the remote peer of the
+//! connection: in the case of a client that would be the address of the server
+//! it is trying to connect to, and for a server that is the address of the
+//! client that initiated the connection.
+//!
 //! ## Handling incoming packets
 //!
 //! Using the connection's [`recv()`] method the application can process
@@ -72,12 +82,15 @@
 //! # let mut buf = [0; 512];
 //! # let socket = std::net::UdpSocket::bind("127.0.0.1:0").unwrap();
 //! # let mut config = quiche::Config::new(quiche::PROTOCOL_VERSION)?;
-//! # let scid = [0xba; 16];
-//! # let mut conn = quiche::accept(&scid, None, &mut config)?;
+//! # let scid = quiche::ConnectionId::from_ref(&[0xba; 16]);
+//! # let from = "127.0.0.1:1234".parse().unwrap();
+//! # let mut conn = quiche::accept(&scid, None, from, &mut config)?;
 //! loop {
-//!     let read = socket.recv(&mut buf).unwrap();
+//!     let (read, from) = socket.recv_from(&mut buf).unwrap();
 //!
-//!     let read = match conn.recv(&mut buf[..read]) {
+//!     let recv_info = quiche::RecvInfo { from };
+//!
+//!     let read = match conn.recv(&mut buf[..read], recv_info) {
 //!         Ok(v) => v,
 //!
 //!         Err(quiche::Error::Done) => {
@@ -94,6 +107,10 @@
 //! # Ok::<(), quiche::Error>(())
 //! ```
 //!
+//! The application has to pass a [`RecvInfo`] structure in order to provide
+//! additional information about the received packet (such as the address it
+//! was received from).
+//!
 //! ## Generating outgoing packets
 //!
 //! Outgoing packet are generated using the connection's [`send()`] method
@@ -103,10 +120,11 @@
 //! # let mut out = [0; 512];
 //! # let socket = std::net::UdpSocket::bind("127.0.0.1:0").unwrap();
 //! # let mut config = quiche::Config::new(quiche::PROTOCOL_VERSION)?;
-//! # let scid = [0xba; 16];
-//! # let mut conn = quiche::accept(&scid, None, &mut config)?;
+//! # let scid = quiche::ConnectionId::from_ref(&[0xba; 16]);
+//! # let from = "127.0.0.1:1234".parse().unwrap();
+//! # let mut conn = quiche::accept(&scid, None, from, &mut config)?;
 //! loop {
-//!     let write = match conn.send(&mut out) {
+//!     let (write, send_info) = match conn.send(&mut out) {
 //!         Ok(v) => v,
 //!
 //!         Err(quiche::Error::Done) => {
@@ -120,19 +138,24 @@
 //!         },
 //!     };
 //!
-//!     socket.send(&out[..write]).unwrap();
+//!     socket.send_to(&out[..write], &send_info.to).unwrap();
 //! }
 //! # Ok::<(), quiche::Error>(())
 //! ```
 //!
+//! The application will be provided with a [`SendInfo`] structure providing
+//! additional information about the newly created packet (such as the address
+//! the packet should be sent to).
+//!
 //! When packets are sent, the application is responsible for maintaining a
 //! timer to react to time-based connection events. The timer expiration can be
 //! obtained using the connection's [`timeout()`] method.
 //!
 //! ```
 //! # let mut config = quiche::Config::new(quiche::PROTOCOL_VERSION)?;
-//! # let scid = [0xba; 16];
-//! # let mut conn = quiche::accept(&scid, None, &mut config)?;
+//! # let scid = quiche::ConnectionId::from_ref(&[0xba; 16]);
+//! # let from = "127.0.0.1:1234".parse().unwrap();
+//! # let mut conn = quiche::accept(&scid, None, from, &mut config)?;
 //! let timeout = conn.timeout();
 //! # Ok::<(), quiche::Error>(())
 //! ```
@@ -146,14 +169,15 @@
 //! # let mut out = [0; 512];
 //! # let socket = std::net::UdpSocket::bind("127.0.0.1:0").unwrap();
 //! # let mut config = quiche::Config::new(quiche::PROTOCOL_VERSION)?;
-//! # let scid = [0xba; 16];
-//! # let mut conn = quiche::accept(&scid, None, &mut config)?;
+//! # let scid = quiche::ConnectionId::from_ref(&[0xba; 16]);
+//! # let from = "127.0.0.1:1234".parse().unwrap();
+//! # let mut conn = quiche::accept(&scid, None, from, &mut config)?;
 //! // Timeout expired, handle it.
 //! conn.on_timeout();
 //!
 //! // Send more packets as needed after timeout.
 //! loop {
-//!     let write = match conn.send(&mut out) {
+//!     let (write, send_info) = match conn.send(&mut out) {
 //!         Ok(v) => v,
 //!
 //!         Err(quiche::Error::Done) => {
@@ -167,7 +191,7 @@
 //!         },
 //!     };
 //!
-//!     socket.send(&out[..write]).unwrap();
+//!     socket.send_to(&out[..write], &send_info.to).unwrap();
 //! }
 //! # Ok::<(), quiche::Error>(())
 //! ```
@@ -181,8 +205,9 @@
 //!
 //! ```no_run
 //! # let mut config = quiche::Config::new(quiche::PROTOCOL_VERSION)?;
-//! # let scid = [0xba; 16];
-//! # let mut conn = quiche::accept(&scid, None, &mut config)?;
+//! # let scid = quiche::ConnectionId::from_ref(&[0xba; 16]);
+//! # let from = "127.0.0.1:1234".parse().unwrap();
+//! # let mut conn = quiche::accept(&scid, None, from, &mut config)?;
 //! if conn.is_established() {
 //!     // Handshake completed, send some data on stream 0.
 //!     conn.stream_send(0, b"hello", true)?;
@@ -200,8 +225,9 @@
 //! ```no_run
 //! # let mut buf = [0; 512];
 //! # let mut config = quiche::Config::new(quiche::PROTOCOL_VERSION)?;
-//! # let scid = [0xba; 16];
-//! # let mut conn = quiche::accept(&scid, None, &mut config)?;
+//! # let scid = quiche::ConnectionId::from_ref(&[0xba; 16]);
+//! # let from = "127.0.0.1:1234".parse().unwrap();
+//! # let mut conn = quiche::accept(&scid, None, from, &mut config)?;
 //! if conn.is_established() {
 //!     // Iterate over readable streams.
 //!     for stream_id in conn.readable() {
@@ -222,7 +248,9 @@
 //! [`connect()`]: fn.connect.html
 //! [`accept()`]: fn.accept.html
 //! [`recv()`]: struct.Connection.html#method.recv
+//! [`RecvInfo`]: struct.RecvInfo.html
 //! [`send()`]: struct.Connection.html#method.send
+//! [`SendInfo`]: struct.SendInfo.html
 //! [`timeout()`]: struct.Connection.html#method.timeout
 //! [`on_timeout()`]: struct.Connection.html#method.on_timeout
 //! [`stream_send()`]: struct.Connection.html#method.stream_send
@@ -260,6 +288,8 @@
 //! [`CongestionControlAlgorithm`]: enum.CongestionControlAlgorithm.html
 
 #![allow(improper_ctypes)]
+#![allow(clippy::suspicious_operation_groupings)]
+#![allow(clippy::upper_case_acronyms)]
 #![warn(missing_docs)]
 
 #[macro_use]
@@ -268,15 +298,22 @@
 use std::cmp;
 use std::time;
 
+use std::net::SocketAddr;
+
 use std::pin::Pin;
 use std::str::FromStr;
 
+use std::sync::Mutex;
+
+use std::collections::VecDeque;
+
 /// The current QUIC wire version.
-pub const PROTOCOL_VERSION: u32 = PROTOCOL_VERSION_DRAFT29;
+pub const PROTOCOL_VERSION: u32 = PROTOCOL_VERSION_V1;
 
 /// Supported QUIC versions.
 ///
 /// Note that the older ones might not be fully supported.
+const PROTOCOL_VERSION_V1: u32 = 0x0000_0001;
 const PROTOCOL_VERSION_DRAFT27: u32 = 0xff00_001b;
 const PROTOCOL_VERSION_DRAFT28: u32 = 0xff00_001c;
 const PROTOCOL_VERSION_DRAFT29: u32 = 0xff00_001d;
@@ -306,6 +343,9 @@
 // The highest possible stream ID allowed.
 const MAX_STREAM_ID: u64 = 1 << 60;
 
+// The default max_datagram_size used in congestion control.
+const MAX_SEND_UDP_PAYLOAD_SIZE: usize = 1200;
+
 // The default length of DATAGRAM queues.
 const DEFAULT_MAX_DGRAM_QUEUE_LEN: usize = 0;
 
@@ -313,6 +353,14 @@
 // frames size. We enforce the recommendation for forward compatibility.
 const MAX_DGRAM_FRAME_SIZE: u64 = 65536;
 
+// The length of the payload length field.
+const PAYLOAD_LENGTH_LEN: usize = 2;
+
+// The number of undecryptable that can be buffered.
+const MAX_UNDECRYPTABLE_PACKETS: usize = 10;
+
+const RESERVED_VERSION_MASK: u32 = 0xfafafafa;
+
 /// A specialized [`Result`] type for quiche operations.
 ///
 /// This type is used throughout quiche's public API for any operation that
@@ -323,52 +371,59 @@
 
 /// A QUIC error.
 #[derive(Clone, Copy, Debug, PartialEq)]
-#[repr(C)]
 pub enum Error {
     /// There is no more work to do.
-    Done               = -1,
+    Done,
 
     /// The provided buffer is too short.
-    BufferTooShort     = -2,
+    BufferTooShort,
 
     /// The provided packet cannot be parsed because its version is unknown.
-    UnknownVersion     = -3,
+    UnknownVersion,
 
     /// The provided packet cannot be parsed because it contains an invalid
     /// frame.
-    InvalidFrame       = -4,
+    InvalidFrame,
 
     /// The provided packet cannot be parsed.
-    InvalidPacket      = -5,
+    InvalidPacket,
 
     /// The operation cannot be completed because the connection is in an
     /// invalid state.
-    InvalidState       = -6,
+    InvalidState,
 
     /// The operation cannot be completed because the stream is in an
     /// invalid state.
-    InvalidStreamState = -7,
+    ///
+    /// The stream ID is provided as associated data.
+    InvalidStreamState(u64),
 
     /// The peer's transport params cannot be parsed.
-    InvalidTransportParam = -8,
+    InvalidTransportParam,
 
     /// A cryptographic operation failed.
-    CryptoFail         = -9,
+    CryptoFail,
 
     /// The TLS handshake failed.
-    TlsFail            = -10,
+    TlsFail,
 
     /// The peer violated the local flow control limits.
-    FlowControl        = -11,
+    FlowControl,
 
     /// The peer violated the local stream limits.
-    StreamLimit        = -12,
+    StreamLimit,
+
+    /// The specified stream was stopped by the peer.
+    ///
+    /// The error code sent as part of the `STOP_SENDING` frame is provided as
+    /// associated data.
+    StreamStopped(u64),
 
     /// The received data exceeds the stream's final size.
-    FinalSize          = -13,
+    FinalSize,
 
     /// Error in congestion control.
-    CongestionControl  = -14,
+    CongestionControl,
 }
 
 impl Error {
@@ -376,7 +431,7 @@
         match self {
             Error::Done => 0x0,
             Error::InvalidFrame => 0x7,
-            Error::InvalidStreamState => 0x5,
+            Error::InvalidStreamState(..) => 0x5,
             Error::InvalidTransportParam => 0x8,
             Error::FlowControl => 0x3,
             Error::StreamLimit => 0x4,
@@ -385,8 +440,25 @@
         }
     }
 
+    #[cfg(feature = "ffi")]
     fn to_c(self) -> libc::ssize_t {
-        self as _
+        match self {
+            Error::Done => -1,
+            Error::BufferTooShort => -2,
+            Error::UnknownVersion => -3,
+            Error::InvalidFrame => -4,
+            Error::InvalidPacket => -5,
+            Error::InvalidState => -6,
+            Error::InvalidStreamState(_) => -7,
+            Error::InvalidTransportParam => -8,
+            Error::CryptoFail => -9,
+            Error::TlsFail => -10,
+            Error::FlowControl => -11,
+            Error::StreamLimit => -12,
+            Error::FinalSize => -13,
+            Error::CongestionControl => -14,
+            Error::StreamStopped { .. } => -15,
+        }
     }
 }
 
@@ -408,6 +480,36 @@
     }
 }
 
+/// Ancillary information about incoming packets.
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub struct RecvInfo {
+    /// The address the packet was received from.
+    pub from: SocketAddr,
+}
+
+/// Ancillary information about outgoing packets.
+#[derive(Clone, Copy, Debug, PartialEq)]
+pub struct SendInfo {
+    /// The address the packet should be sent to.
+    pub to: SocketAddr,
+
+    /// The time to send the packet out.
+    pub at: time::Instant,
+}
+
+/// Represents information carried by `CONNECTION_CLOSE` frames.
+#[derive(Clone, Debug, PartialEq)]
+pub struct ConnectionError {
+    /// Whether the error came from the application or the transport layer.
+    pub is_app: bool,
+
+    /// The error code carried by the `CONNECTION_CLOSE` frame.
+    pub error_code: u64,
+
+    /// The reason carried by the `CONNECTION_CLOSE` frame.
+    pub reason: Vec<u8>,
+}
+
 /// The stream's side to shutdown.
 ///
 /// This should be used when calling [`stream_shutdown()`].
@@ -428,7 +530,11 @@
 
     version: u32,
 
-    tls_ctx: tls::Context,
+    // BoringSSL's SSL_CTX structure is technically safe to share across threads
+    // but once shared, functions that modify it can't be used any more. We can't
+    // encode that in Rust, so just make it Send+Sync with a mutex to fulfill
+    // the Sync constraint.
+    tls_ctx: Mutex<tls::Context>,
 
     application_protos: Vec<Vec<u8>>,
 
@@ -440,6 +546,13 @@
 
     dgram_recv_max_queue_len: usize,
     dgram_send_max_queue_len: usize,
+
+    max_send_udp_payload_size: usize,
+}
+
+// See https://quicwg.org/base-drafts/rfc9000.html#section-15
+fn is_reserved_version(version: u32) -> bool {
+    version & RESERVED_VERSION_MASK == version
 }
 
 impl Config {
@@ -452,7 +565,11 @@
     /// # Ok::<(), quiche::Error>(())
     /// ```
     pub fn new(version: u32) -> Result<Config> {
-        let tls_ctx = tls::Context::new()?;
+        if !is_reserved_version(version) && !version_is_supported(version) {
+            return Err(Error::UnknownVersion);
+        }
+
+        let tls_ctx = Mutex::new(tls::Context::new()?);
 
         Ok(Config {
             local_transport_params: TransportParams::default(),
@@ -465,6 +582,8 @@
 
             dgram_recv_max_queue_len: DEFAULT_MAX_DGRAM_QUEUE_LEN,
             dgram_send_max_queue_len: DEFAULT_MAX_DGRAM_QUEUE_LEN,
+
+            max_send_udp_payload_size: MAX_SEND_UDP_PAYLOAD_SIZE,
         })
     }
 
@@ -481,7 +600,10 @@
     /// # Ok::<(), quiche::Error>(())
     /// ```
     pub fn load_cert_chain_from_pem_file(&mut self, file: &str) -> Result<()> {
-        self.tls_ctx.use_certificate_chain_file(file)
+        self.tls_ctx
+            .lock()
+            .unwrap()
+            .use_certificate_chain_file(file)
     }
 
     /// Configures the given private key.
@@ -496,7 +618,7 @@
     /// # Ok::<(), quiche::Error>(())
     /// ```
     pub fn load_priv_key_from_pem_file(&mut self, file: &str) -> Result<()> {
-        self.tls_ctx.use_privkey_file(file)
+        self.tls_ctx.lock().unwrap().use_privkey_file(file)
     }
 
     /// Specifies a file where trusted CA certificates are stored for the
@@ -512,7 +634,10 @@
     /// # Ok::<(), quiche::Error>(())
     /// ```
     pub fn load_verify_locations_from_file(&mut self, file: &str) -> Result<()> {
-        self.tls_ctx.load_verify_locations_from_file(file)
+        self.tls_ctx
+            .lock()
+            .unwrap()
+            .load_verify_locations_from_file(file)
     }
 
     /// Specifies a directory where trusted CA certificates are stored for the
@@ -530,7 +655,10 @@
     pub fn load_verify_locations_from_directory(
         &mut self, dir: &str,
     ) -> Result<()> {
-        self.tls_ctx.load_verify_locations_from_directory(dir)
+        self.tls_ctx
+            .lock()
+            .unwrap()
+            .load_verify_locations_from_directory(dir)
     }
 
     /// Configures whether to verify the peer's certificate.
@@ -538,7 +666,7 @@
     /// The default value is `true` for client connections, and `false` for
     /// server ones.
     pub fn verify_peer(&mut self, verify: bool) {
-        self.tls_ctx.set_verify(verify);
+        self.tls_ctx.lock().unwrap().set_verify(verify);
     }
 
     /// Configures whether to send GREASE values.
@@ -557,12 +685,26 @@
     /// [`set_keylog()`]: struct.Connection.html#method.set_keylog
     /// [keylog]: https://developer.mozilla.org/en-US/docs/Mozilla/Projects/NSS/Key_Log_Format
     pub fn log_keys(&mut self) {
-        self.tls_ctx.enable_keylog();
+        self.tls_ctx.lock().unwrap().enable_keylog();
+    }
+
+    /// Configures the session ticket key material.
+    ///
+    /// On the server this key will be used to encrypt and decrypt session
+    /// tickets, used to perform session resumption without server-side state.
+    ///
+    /// By default a key is generated internally, and rotated regularly, so
+    /// applications don't need to call this unless they need to use a
+    /// specific key (e.g. in order to support resumption across multiple
+    /// servers), in which case the application is also responsible for
+    /// rotating the key to provide forward secrecy.
+    pub fn set_ticket_key(&mut self, key: &[u8]) -> Result<()> {
+        self.tls_ctx.lock().unwrap().set_ticket_key(key)
     }
 
     /// Enables sending or receiving early data.
     pub fn enable_early_data(&mut self) {
-        self.tls_ctx.set_early_data_enabled(true);
+        self.tls_ctx.lock().unwrap().set_early_data_enabled(true);
     }
 
     /// Configures the list of supported application protocols.
@@ -596,10 +738,13 @@
 
         self.application_protos = protos_list;
 
-        self.tls_ctx.set_alpn(&self.application_protos)
+        self.tls_ctx
+            .lock()
+            .unwrap()
+            .set_alpn(&self.application_protos)
     }
 
-    /// Sets the `max_idle_timeout` transport parameter.
+    /// Sets the `max_idle_timeout` transport parameter, in milliseconds.
     ///
     /// The default value is infinite, that is, no timeout is used.
     pub fn set_max_idle_timeout(&mut self, v: u64) {
@@ -609,8 +754,15 @@
     /// Sets the `max_udp_payload_size transport` parameter.
     ///
     /// The default value is `65527`.
-    pub fn set_max_udp_payload_size(&mut self, v: u64) {
-        self.local_transport_params.max_udp_payload_size = v;
+    pub fn set_max_recv_udp_payload_size(&mut self, v: usize) {
+        self.local_transport_params.max_udp_payload_size = v as u64;
+    }
+
+    /// Sets the maximum outgoing UDP payload size.
+    ///
+    /// The default and minimum value is `1200`.
+    pub fn set_max_send_udp_payload_size(&mut self, v: usize) {
+        self.max_send_udp_payload_size = cmp::max(v, MAX_SEND_UDP_PAYLOAD_SIZE);
     }
 
     /// Sets the `initial_max_data` transport parameter.
@@ -722,7 +874,7 @@
 
     /// Sets the congestion control algorithm used by string.
     ///
-    /// The default value is `reno`. On error `Error::CongestionControl`
+    /// The default value is `cubic`. On error `Error::CongestionControl`
     /// will be returned.
     ///
     /// ## Examples:
@@ -777,10 +929,10 @@
     version: u32,
 
     /// Peer's connection ID.
-    dcid: Vec<u8>,
+    dcid: ConnectionId<'static>,
 
     /// Local connection ID.
-    scid: Vec<u8>,
+    scid: ConnectionId<'static>,
 
     /// Unique opaque ID for the connection that can be used for logging.
     trace_id: String,
@@ -795,11 +947,23 @@
     local_transport_params: TransportParams,
 
     /// TLS handshake state.
-    handshake: tls::Handshake,
+    ///
+    /// Due to the requirement for `Connection` to be Send+Sync, and the fact
+    /// that BoringSSL's SSL structure is not thread safe, we need to wrap the
+    /// handshake object in a mutex.
+    handshake: Mutex<tls::Handshake>,
+
+    /// Serialized TLS session buffer.
+    ///
+    /// This field is populated when a new session ticket is processed on the
+    /// client. On the server this is empty.
+    session: Option<Vec<u8>>,
 
     /// Loss recovery and congestion control state.
     recovery: recovery::Recovery,
 
+    peer_addr: SocketAddr,
+
     /// List of supported application protocols.
     application_protos: Vec<Vec<u8>>,
 
@@ -822,6 +986,9 @@
     /// Whether we send MAX_DATA frame.
     almost_full: bool,
 
+    /// Number of stream data bytes that can be buffered.
+    tx_cap: usize,
+
     /// Total number of bytes sent to the peer.
     tx_data: u64,
 
@@ -837,23 +1004,22 @@
 
     /// Peer's original destination connection ID. Used by the client to
     /// validate the server's transport parameter.
-    odcid: Option<Vec<u8>>,
+    odcid: Option<ConnectionId<'static>>,
 
     /// Peer's retry source connection ID. Used by the client during stateless
     /// retry to validate the server's transport parameter.
-    rscid: Option<Vec<u8>>,
+    rscid: Option<ConnectionId<'static>>,
 
     /// Received address verification token.
     token: Option<Vec<u8>>,
 
-    /// Error code to be sent to the peer in CONNECTION_CLOSE.
-    error: Option<u64>,
+    /// Error code and reason to be sent to the peer in a CONNECTION_CLOSE
+    /// frame.
+    local_error: Option<ConnectionError>,
 
-    /// Error code to be sent to the peer in APPLICATION_CLOSE.
-    app_error: Option<u64>,
-
-    /// Error reason to be sent to the peer in APPLICATION_CLOSE.
-    app_reason: Vec<u8>,
+    /// Error code and reason received from the peer in a CONNECTION_CLOSE
+    /// frame.
+    peer_error: Option<ConnectionError>,
 
     /// Received path challenge.
     challenge: Option<Vec<u8>>,
@@ -867,6 +1033,12 @@
     /// Draining timeout expiration time.
     draining_timer: Option<time::Instant>,
 
+    /// List of raw packets that were received before they could be decrypted.
+    undecryptable_pkts: VecDeque<(Vec<u8>, RecvInfo)>,
+
+    /// The negotiated ALPN protocol.
+    alpn: Vec<u8>,
+
     /// Whether this is a server-side connection.
     is_server: bool,
 
@@ -892,6 +1064,9 @@
     /// Whether the peer's transport parameters were parsed.
     parsed_peer_transport_params: bool,
 
+    /// Whether the connection handshake has been completed.
+    handshake_completed: bool,
+
     /// Whether the HANDSHAKE_DONE has been sent.
     handshake_done_sent: bool,
 
@@ -909,7 +1084,7 @@
     grease: bool,
 
     /// TLS keylog writer.
-    keylog: Option<Box<dyn std::io::Write + Send>>,
+    keylog: Option<Box<dyn std::io::Write + Send + Sync>>,
 
     /// Qlog streaming output.
     #[cfg(feature = "qlog")]
@@ -922,6 +1097,9 @@
     /// DATAGRAM queues.
     dgram_recv_queue: dgram::DatagramQueue,
     dgram_send_queue: dgram::DatagramQueue,
+
+    /// Whether to emit DATAGRAM frames in the next packet.
+    emit_dgram: bool,
 }
 
 /// Creates a new server-side connection.
@@ -937,14 +1115,17 @@
 ///
 /// ```no_run
 /// # let mut config = quiche::Config::new(0xbabababa)?;
-/// # let scid = [0xba; 16];
-/// let conn = quiche::accept(&scid, None, &mut config)?;
+/// # let scid = quiche::ConnectionId::from_ref(&[0xba; 16]);
+/// # let from = "127.0.0.1:1234".parse().unwrap();
+/// let conn = quiche::accept(&scid, None, from, &mut config)?;
 /// # Ok::<(), quiche::Error>(())
 /// ```
+#[inline]
 pub fn accept(
-    scid: &[u8], odcid: Option<&[u8]>, config: &mut Config,
+    scid: &ConnectionId, odcid: Option<&ConnectionId>, from: SocketAddr,
+    config: &mut Config,
 ) -> Result<Pin<Box<Connection>>> {
-    let conn = Connection::new(scid, odcid, config, true)?;
+    let conn = Connection::new(scid, odcid, from, config, true)?;
 
     Ok(conn)
 }
@@ -960,17 +1141,20 @@
 /// ```no_run
 /// # let mut config = quiche::Config::new(0xbabababa)?;
 /// # let server_name = "quic.tech";
-/// # let scid = [0xba; 16];
-/// let conn = quiche::connect(Some(&server_name), &scid, &mut config)?;
+/// # let scid = quiche::ConnectionId::from_ref(&[0xba; 16]);
+/// # let to = "127.0.0.1:1234".parse().unwrap();
+/// let conn = quiche::connect(Some(&server_name), &scid, to, &mut config)?;
 /// # Ok::<(), quiche::Error>(())
 /// ```
+#[inline]
 pub fn connect(
-    server_name: Option<&str>, scid: &[u8], config: &mut Config,
+    server_name: Option<&str>, scid: &ConnectionId, to: SocketAddr,
+    config: &mut Config,
 ) -> Result<Pin<Box<Connection>>> {
-    let conn = Connection::new(scid, None, config, false)?;
+    let conn = Connection::new(scid, None, to, config, false)?;
 
     if let Some(server_name) = server_name {
-        conn.handshake.set_host_name(server_name)?;
+        conn.handshake.lock().unwrap().set_host_name(server_name)?;
     }
 
     Ok(conn)
@@ -999,8 +1183,9 @@
 /// }
 /// # Ok::<(), quiche::Error>(())
 /// ```
+#[inline]
 pub fn negotiate_version(
-    scid: &[u8], dcid: &[u8], out: &mut [u8],
+    scid: &ConnectionId, dcid: &ConnectionId, out: &mut [u8],
 ) -> Result<usize> {
     packet::negotiate_version(scid, dcid, out)
 }
@@ -1026,12 +1211,12 @@
 /// # let mut config = quiche::Config::new(0xbabababa)?;
 /// # let mut buf = [0; 512];
 /// # let mut out = [0; 512];
-/// # let scid = [0xba; 16];
+/// # let scid = quiche::ConnectionId::from_ref(&[0xba; 16]);
 /// # let socket = std::net::UdpSocket::bind("127.0.0.1:0").unwrap();
 /// # fn mint_token(hdr: &quiche::Header, src: &std::net::SocketAddr) -> Vec<u8> {
 /// #     vec![]
 /// # }
-/// # fn validate_token<'a>(src: &std::net::SocketAddr, token: &'a [u8]) -> Option<&'a [u8]> {
+/// # fn validate_token<'a>(src: &std::net::SocketAddr, token: &'a [u8]) -> Option<quiche::ConnectionId<'a>> {
 /// #     None
 /// # }
 /// let (len, src) = socket.recv_from(&mut buf).unwrap();
@@ -1055,26 +1240,29 @@
 /// // Client sent token, validate it.
 /// let odcid = validate_token(&src, token);
 ///
-/// if odcid == None {
+/// if odcid.is_none() {
 ///     // Invalid address validation token.
 ///     return Ok(());
 /// }
 ///
-/// let conn = quiche::accept(&scid, odcid, &mut config)?;
+/// let conn = quiche::accept(&scid, odcid.as_ref(), src, &mut config)?;
 /// # Ok::<(), quiche::Error>(())
 /// ```
+#[inline]
 pub fn retry(
-    scid: &[u8], dcid: &[u8], new_scid: &[u8], token: &[u8], version: u32,
-    out: &mut [u8],
+    scid: &ConnectionId, dcid: &ConnectionId, new_scid: &ConnectionId,
+    token: &[u8], version: u32, out: &mut [u8],
 ) -> Result<usize> {
     packet::retry(scid, dcid, new_scid, token, version, out)
 }
 
 /// Returns true if the given protocol version is supported.
+#[inline]
 pub fn version_is_supported(version: u32) -> bool {
     matches!(
         version,
-        PROTOCOL_VERSION_DRAFT27 |
+        PROTOCOL_VERSION_V1 |
+            PROTOCOL_VERSION_DRAFT27 |
             PROTOCOL_VERSION_DRAFT28 |
             PROTOCOL_VERSION_DRAFT29
     )
@@ -1086,11 +1274,12 @@
 /// there is no room to add the frame in the packet. You may retry to add the
 /// frame later.
 macro_rules! push_frame_to_pkt {
-    ($frames:expr, $frame:expr, $payload_len: expr, $left:expr) => {{
+    ($out:expr, $frames:expr, $frame:expr, $left:expr) => {{
         if $frame.wire_len() <= $left {
-            $payload_len += $frame.wire_len();
             $left -= $frame.wire_len();
 
+            $frame.to_bytes(&mut $out)?;
+
             $frames.push($frame);
 
             true
@@ -1117,15 +1306,16 @@
 
 impl Connection {
     fn new(
-        scid: &[u8], odcid: Option<&[u8]>, config: &mut Config, is_server: bool,
+        scid: &ConnectionId, odcid: Option<&ConnectionId>, peer: SocketAddr,
+        config: &mut Config, is_server: bool,
     ) -> Result<Pin<Box<Connection>>> {
-        let tls = config.tls_ctx.new_handshake()?;
-        Connection::with_tls(scid, odcid, config, tls, is_server)
+        let tls = config.tls_ctx.lock().unwrap().new_handshake()?;
+        Connection::with_tls(scid, odcid, peer, config, tls, is_server)
     }
 
     fn with_tls(
-        scid: &[u8], odcid: Option<&[u8]>, config: &mut Config,
-        tls: tls::Handshake, is_server: bool,
+        scid: &ConnectionId, odcid: Option<&ConnectionId>, peer: SocketAddr,
+        config: &mut Config, tls: tls::Handshake, is_server: bool,
     ) -> Result<Pin<Box<Connection>>> {
         let max_rx_data = config.local_transport_params.initial_max_data;
 
@@ -1135,8 +1325,8 @@
         let mut conn = Box::pin(Connection {
             version: config.version,
 
-            dcid: Vec::new(),
-            scid: scid.to_vec(),
+            dcid: ConnectionId::default(),
+            scid: scid.to_vec().into(),
 
             trace_id: scid_as_hex.join(""),
 
@@ -1150,10 +1340,14 @@
 
             local_transport_params: config.local_transport_params.clone(),
 
-            handshake: tls,
+            handshake: Mutex::new(tls),
+
+            session: None,
 
             recovery: recovery::Recovery::new(&config),
 
+            peer_addr: peer,
+
             application_protos: config.application_protos.clone(),
 
             recv_count: 0,
@@ -1164,6 +1358,8 @@
             max_rx_data_next: max_rx_data,
             almost_full: false,
 
+            tx_cap: 0,
+
             tx_data: 0,
             max_tx_data: 0,
 
@@ -1180,10 +1376,9 @@
 
             token: None,
 
-            error: None,
+            local_error: None,
 
-            app_error: None,
-            app_reason: Vec::new(),
+            peer_error: None,
 
             challenge: None,
 
@@ -1193,6 +1388,10 @@
 
             draining_timer: None,
 
+            undecryptable_pkts: VecDeque::new(),
+
+            alpn: Vec::new(),
+
             is_server,
 
             derived_initial_secrets: false,
@@ -1211,6 +1410,8 @@
 
             parsed_peer_transport_params: false,
 
+            handshake_completed: false,
+
             handshake_done_sent: false,
 
             handshake_confirmed: false,
@@ -1236,22 +1437,29 @@
             dgram_send_queue: dgram::DatagramQueue::new(
                 config.dgram_send_max_queue_len,
             ),
+
+            emit_dgram: true,
         });
 
         if let Some(odcid) = odcid {
             conn.local_transport_params
-                .original_destination_connection_id = Some(odcid.to_vec());
+                .original_destination_connection_id = Some(odcid.to_vec().into());
 
             conn.local_transport_params.retry_source_connection_id =
-                Some(scid.to_vec());
+                Some(scid.to_vec().into());
 
             conn.did_retry = true;
         }
 
         conn.local_transport_params.initial_source_connection_id =
-            Some(scid.to_vec());
+            Some(scid.to_vec().into());
 
-        conn.handshake.init(&conn)?;
+        conn.handshake.lock().unwrap().init(&conn)?;
+
+        conn.handshake
+            .lock()
+            .unwrap()
+            .use_legacy_codepoint(config.version != PROTOCOL_VERSION_V1);
 
         conn.encode_transport_params()?;
 
@@ -1267,7 +1475,7 @@
                 conn.is_server,
             )?;
 
-            conn.dcid.extend_from_slice(&dcid);
+            conn.dcid = dcid.to_vec().into();
 
             conn.pkt_num_spaces[packet::EPOCH_INITIAL].crypto_open =
                 Some(aead_open);
@@ -1286,7 +1494,8 @@
     /// missing some early logs.
     ///
     /// [`Writer`]: https://doc.rust-lang.org/std/io/trait.Write.html
-    pub fn set_keylog(&mut self, writer: Box<dyn std::io::Write + Send>) {
+    #[inline]
+    pub fn set_keylog(&mut self, writer: Box<dyn std::io::Write + Send + Sync>) {
         self.keylog = Some(writer);
     }
 
@@ -1298,7 +1507,7 @@
     /// [`Writer`]: https://doc.rust-lang.org/std/io/trait.Write.html
     #[cfg(feature = "qlog")]
     pub fn set_qlog(
-        &mut self, writer: Box<dyn std::io::Write + Send>, title: String,
+        &mut self, writer: Box<dyn std::io::Write + Send + Sync>, title: String,
         description: String,
     ) {
         let vp = if self.is_server {
@@ -1335,11 +1544,13 @@
 
         streamer.start_log().ok();
 
+        let handshake = self.handshake.lock().unwrap();
+
         let ev = self.local_transport_params.to_qlog(
             qlog::TransportOwner::Local,
             self.version,
-            self.handshake.alpn_protocol(),
-            self.handshake.cipher(),
+            handshake.alpn_protocol(),
+            handshake.cipher(),
         );
 
         streamer.add_event(ev).ok();
@@ -1347,6 +1558,38 @@
         self.qlog_streamer = Some(streamer);
     }
 
+    /// Configures the given session for resumption.
+    ///
+    /// On the client, this can be used to offer the given serialized session,
+    /// as returned by [`session()`], for resumption.
+    ///
+    /// This must only be called immediately after creating a connection, that
+    /// is, before any packet is sent or received.
+    ///
+    /// [`session()`]: struct.Connection.html#method.session
+    #[inline]
+    pub fn set_session(&mut self, session: &[u8]) -> Result<()> {
+        let mut b = octets::Octets::with_slice(session);
+
+        let session_len = b.get_u64()? as usize;
+        let session_bytes = b.get_bytes(session_len)?;
+
+        self.handshake
+            .lock()
+            .unwrap()
+            .set_session(session_bytes.as_ref())?;
+
+        let raw_params_len = b.get_u64()? as usize;
+        let raw_params_bytes = b.get_bytes(raw_params_len)?;
+
+        let peer_params =
+            TransportParams::decode(raw_params_bytes.as_ref(), self.is_server)?;
+
+        self.process_peer_transport_params(peer_params);
+
+        Ok(())
+    }
+
     /// Processes QUIC packets received from the peer.
     ///
     /// On success the number of bytes processed from the input buffer is
@@ -1366,12 +1609,15 @@
     /// # let mut buf = [0; 512];
     /// # let socket = std::net::UdpSocket::bind("127.0.0.1:0").unwrap();
     /// # let mut config = quiche::Config::new(quiche::PROTOCOL_VERSION)?;
-    /// # let scid = [0xba; 16];
-    /// # let mut conn = quiche::accept(&scid, None, &mut config)?;
+    /// # let scid = quiche::ConnectionId::from_ref(&[0xba; 16]);
+    /// # let from = "127.0.0.1:1234".parse().unwrap();
+    /// # let mut conn = quiche::accept(&scid, None, from, &mut config)?;
     /// loop {
-    ///     let read = socket.recv(&mut buf).unwrap();
+    ///     let (read, from) = socket.recv_from(&mut buf).unwrap();
     ///
-    ///     let read = match conn.recv(&mut buf[..read]) {
+    ///     let recv_info = quiche::RecvInfo { from };
+    ///
+    ///     let read = match conn.recv(&mut buf[..read], recv_info) {
     ///         Ok(v) => v,
     ///
     ///         Err(e) => {
@@ -1382,9 +1628,13 @@
     /// }
     /// # Ok::<(), quiche::Error>(())
     /// ```
-    pub fn recv(&mut self, buf: &mut [u8]) -> Result<usize> {
+    pub fn recv(&mut self, buf: &mut [u8], info: RecvInfo) -> Result<usize> {
         let len = buf.len();
 
+        if len == 0 {
+            return Err(Error::BufferTooShort);
+        }
+
         // Keep track of how many bytes we received from the client, so we
         // can limit bytes sent back before address validation, to a multiple
         // of this. The limit needs to be increased early on, so that if there
@@ -1401,7 +1651,7 @@
 
         // Process coalesced packets.
         while left > 0 {
-            let read = match self.recv_single(&mut buf[len - left..len]) {
+            let read = match self.recv_single(&mut buf[len - left..len], &info) {
                 Ok(v) => v,
 
                 Err(Error::Done) => left,
@@ -1418,6 +1668,25 @@
             left -= read;
         }
 
+        // Process previously undecryptable 0-RTT packets if the decryption key
+        // is now available.
+        if self.pkt_num_spaces[packet::EPOCH_APPLICATION]
+            .crypto_0rtt_open
+            .is_some()
+        {
+            while let Some((mut pkt, info)) = self.undecryptable_pkts.pop_front()
+            {
+                if let Err(e) = self.recv(&mut pkt, info) {
+                    self.undecryptable_pkts.clear();
+
+                    // Even though the packet was previously "accepted", it
+                    // should be safe to forward the error, as it also comes
+                    // from the `recv()` method.
+                    return Err(e);
+                }
+            }
+        }
+
         Ok(done)
     }
 
@@ -1430,18 +1699,18 @@
     /// On error, an error other than [`Done`] is returned.
     ///
     /// [`Done`]: enum.Error.html#variant.Done
-    fn recv_single(&mut self, buf: &mut [u8]) -> Result<usize> {
+    fn recv_single(&mut self, buf: &mut [u8], info: &RecvInfo) -> Result<usize> {
         let now = time::Instant::now();
 
         if buf.is_empty() {
             return Err(Error::Done);
         }
 
-        if self.is_closed() || self.draining_timer.is_some() {
+        if self.is_closed() || self.is_draining() {
             return Err(Error::Done);
         }
 
-        let is_closing = self.error.is_some() || self.app_error.is_some();
+        let is_closing = self.local_error.is_some();
 
         if is_closing {
             return Err(Error::Done);
@@ -1494,20 +1763,33 @@
                 return Err(Error::Done);
             }
 
-            match versions.iter().filter(|&&v| version_is_supported(v)).max() {
-                Some(v) => self.version = *v,
+            let supported_versions =
+                versions.iter().filter(|&&v| version_is_supported(v));
 
-                None => {
-                    // We don't support any of the versions offered.
-                    //
-                    // While a man-in-the-middle attacker might be able to
-                    // inject a version negotiation packet that triggers this
-                    // failure, the window of opportunity is very small and
-                    // this error is quite useful for debugging, so don't just
-                    // ignore the packet.
-                    return Err(Error::UnknownVersion);
-                },
-            };
+            let mut found_version = false;
+
+            for &v in supported_versions {
+                found_version = true;
+
+                // The final version takes precedence over draft ones.
+                if v == PROTOCOL_VERSION_V1 {
+                    self.version = v;
+                    break;
+                }
+
+                self.version = cmp::max(self.version, v);
+            }
+
+            if !found_version {
+                // We don't support any of the versions offered.
+                //
+                // While a man-in-the-middle attacker might be able to
+                // inject a version negotiation packet that triggers this
+                // failure, the window of opportunity is very small and
+                // this error is quite useful for debugging, so don't just
+                // ignore the packet.
+                return Err(Error::UnknownVersion);
+            }
 
             self.did_version_negotiation = true;
 
@@ -1521,13 +1803,18 @@
             // Reset connection state to force sending another Initial packet.
             self.drop_epoch_state(packet::EPOCH_INITIAL, now);
             self.got_peer_conn_id = false;
-            self.handshake.clear()?;
+            self.handshake.lock().unwrap().clear()?;
 
             self.pkt_num_spaces[packet::EPOCH_INITIAL].crypto_open =
                 Some(aead_open);
             self.pkt_num_spaces[packet::EPOCH_INITIAL].crypto_seal =
                 Some(aead_seal);
 
+            self.handshake
+                .lock()
+                .unwrap()
+                .use_legacy_codepoint(self.version != PROTOCOL_VERSION_V1);
+
             // Encode transport parameters again, as the new version might be
             // using a different format.
             self.encode_transport_params()?;
@@ -1561,8 +1848,7 @@
             // Remember peer's new connection ID.
             self.odcid = Some(self.dcid.clone());
 
-            self.dcid.resize(hdr.scid.len(), 0);
-            self.dcid.copy_from_slice(&hdr.scid);
+            self.dcid = hdr.scid.clone();
 
             self.rscid = Some(self.dcid.clone());
 
@@ -1576,7 +1862,7 @@
             // Reset connection state to force sending another Initial packet.
             self.drop_epoch_state(packet::EPOCH_INITIAL, now);
             self.got_peer_conn_id = false;
-            self.handshake.clear()?;
+            self.handshake.lock().unwrap().clear()?;
 
             self.pkt_num_spaces[packet::EPOCH_INITIAL].crypto_open =
                 Some(aead_open);
@@ -1594,6 +1880,11 @@
             self.version = hdr.version;
             self.did_version_negotiation = true;
 
+            self.handshake
+                .lock()
+                .unwrap()
+                .use_legacy_codepoint(self.version != PROTOCOL_VERSION_V1);
+
             // Encode transport parameters again, as the new version might be
             // using a different format.
             self.encode_transport_params()?;
@@ -1620,6 +1911,17 @@
             })? as usize
         };
 
+        // Make sure the buffer is same or larger than an explicit
+        // payload length.
+        if payload_len > b.cap() {
+            return Err(drop_pkt_on_err(
+                Error::InvalidPacket,
+                self.recv_count,
+                self.is_server,
+                &self.trace_id,
+            ));
+        }
+
         // Derive initial secrets on the server.
         if !self.derived_initial_secrets {
             let (aead_open, aead_seal) = crypto::derive_initial_key_material(
@@ -1640,24 +1942,45 @@
         let epoch = hdr.ty.to_epoch()?;
 
         // Select AEAD context used to open incoming packet.
-        #[allow(clippy::or_fun_call)]
-        let aead = (self.pkt_num_spaces[epoch].crypto_0rtt_open.as_ref())
+        let aead = if hdr.ty == packet::Type::ZeroRTT {
             // Only use 0-RTT key if incoming packet is 0-RTT.
-            .filter(|_| hdr.ty == packet::Type::ZeroRTT)
+            self.pkt_num_spaces[epoch].crypto_0rtt_open.as_ref()
+        } else {
             // Otherwise use the packet number space's main key.
-            .or(self.pkt_num_spaces[epoch].crypto_open.as_ref())
-            // Finally, discard packet if no usable key is available.
-            //
-            // TODO: buffer 0-RTT/1-RTT packets instead of discarding when the
-            // required key is not available yet, as an optimization.
-            .ok_or_else(|| {
-                drop_pkt_on_err(
+            self.pkt_num_spaces[epoch].crypto_open.as_ref()
+        };
+
+        // Finally, discard packet if no usable key is available.
+        let aead = match aead {
+            Some(v) => v,
+
+            None => {
+                if hdr.ty == packet::Type::ZeroRTT &&
+                    self.undecryptable_pkts.len() < MAX_UNDECRYPTABLE_PACKETS &&
+                    !self.is_established()
+                {
+                    // Buffer 0-RTT packets when the required read key is not
+                    // available yet, and process them later.
+                    //
+                    // TODO: in the future we might want to buffer other types
+                    // of undecryptable packets as well.
+                    let pkt_len = b.off() + payload_len;
+                    let pkt = (b.buf()[..pkt_len]).to_vec();
+
+                    self.undecryptable_pkts.push_back((pkt, *info));
+                    return Ok(pkt_len);
+                }
+
+                let e = drop_pkt_on_err(
                     Error::CryptoFail,
                     self.recv_count,
                     self.is_server,
                     &self.trace_id,
-                )
-            })?;
+                );
+
+                return Err(e);
+            },
+        };
 
         let aead_tag_len = aead.alg().tag_len();
 
@@ -1721,6 +2044,11 @@
             return Err(Error::Done);
         }
 
+        // Packets with no frames are invalid.
+        if payload.cap() == 0 {
+            return Err(Error::InvalidPacket);
+        }
+
         if !self.is_server && !self.got_peer_conn_id {
             if self.odcid.is_none() {
                 self.odcid = Some(self.dcid.clone());
@@ -1728,18 +2056,21 @@
 
             // Replace the randomly generated destination connection ID with
             // the one supplied by the server.
-            self.dcid.resize(hdr.scid.len(), 0);
-            self.dcid.copy_from_slice(&hdr.scid);
+            self.dcid = hdr.scid.clone();
 
             self.got_peer_conn_id = true;
         }
 
         if self.is_server && !self.got_peer_conn_id {
-            self.dcid.extend_from_slice(&hdr.scid);
+            self.dcid = hdr.scid.clone();
 
-            if !self.did_retry && self.version >= PROTOCOL_VERSION_DRAFT28 {
+            if !self.did_retry &&
+                (self.version >= PROTOCOL_VERSION_DRAFT28 ||
+                    self.version == PROTOCOL_VERSION_V1)
+            {
                 self.local_transport_params
-                    .original_destination_connection_id = Some(hdr.dcid.to_vec());
+                    .original_destination_connection_id =
+                    Some(hdr.dcid.to_vec().into());
 
                 self.encode_transport_params()?;
             }
@@ -1790,11 +2121,13 @@
         if self.is_established() {
             qlog_with!(self.qlog_streamer, q, {
                 if !self.qlogged_peer_params {
+                    let handshake = self.handshake.lock().unwrap();
+
                     let ev = self.peer_transport_params.to_qlog(
                         qlog::TransportOwner::Remote,
                         self.version,
-                        self.handshake.alpn_protocol(),
-                        self.handshake.cipher(),
+                        handshake.alpn_protocol(),
+                        handshake.cipher(),
                     );
 
                     q.add_event(ev).ok();
@@ -1818,23 +2151,47 @@
                     }
                 },
 
-                frame::Frame::Crypto { data } => {
+                frame::Frame::CryptoHeader { offset, length } => {
                     self.pkt_num_spaces[epoch]
                         .crypto_stream
                         .send
-                        .ack(data.off(), data.len());
+                        .ack_and_drop(offset, length);
                 },
 
-                frame::Frame::Stream { stream_id, data } => {
+                frame::Frame::StreamHeader {
+                    stream_id,
+                    offset,
+                    length,
+                    ..
+                } => {
                     let stream = match self.streams.get_mut(stream_id) {
                         Some(v) => v,
 
                         None => continue,
                     };
 
-                    stream.send.ack(data.off(), data.len());
+                    stream.send.ack_and_drop(offset, length);
 
-                    if stream.is_complete() {
+                    // Only collect the stream if it is complete and not
+                    // readable. If it is readable, it will get collected when
+                    // stream_recv() is used.
+                    if stream.is_complete() && !stream.is_readable() {
+                        let local = stream.local;
+                        self.streams.collect(stream_id, local);
+                    }
+                },
+
+                frame::Frame::ResetStream { stream_id, .. } => {
+                    let stream = match self.streams.get_mut(stream_id) {
+                        Some(v) => v,
+
+                        None => continue,
+                    };
+
+                    // Only collect the stream if it is complete and not
+                    // readable. If it is readable, it will get collected when
+                    // stream_recv() is used.
+                    if stream.is_complete() && !stream.is_readable() {
                         let local = stream.local;
                         self.streams.collect(stream_id, local);
                     }
@@ -1864,6 +2221,12 @@
             self.idle_timer = Some(now + idle_timeout);
         }
 
+        // Update send capacity.
+        self.tx_cap = cmp::min(
+            self.recovery.cwnd_available() as u64,
+            self.max_tx_data - self.tx_data,
+        ) as usize;
+
         self.recv_count += 1;
 
         let read = b.off() + aead_tag_len;
@@ -1912,10 +2275,11 @@
     /// # let mut out = [0; 512];
     /// # let socket = std::net::UdpSocket::bind("127.0.0.1:0").unwrap();
     /// # let mut config = quiche::Config::new(quiche::PROTOCOL_VERSION)?;
-    /// # let scid = [0xba; 16];
-    /// # let mut conn = quiche::accept(&scid, None, &mut config)?;
+    /// # let scid = quiche::ConnectionId::from_ref(&[0xba; 16]);
+    /// # let from = "127.0.0.1:1234".parse().unwrap();
+    /// # let mut conn = quiche::accept(&scid, None, from, &mut config)?;
     /// loop {
-    ///     let write = match conn.send(&mut out) {
+    ///     let (write, send_info) = match conn.send(&mut out) {
     ///         Ok(v) => v,
     ///
     ///         Err(quiche::Error::Done) => {
@@ -1929,47 +2293,162 @@
     ///         },
     ///     };
     ///
-    ///     socket.send(&out[..write]).unwrap();
+    ///     socket.send_to(&out[..write], &send_info.to).unwrap();
     /// }
     /// # Ok::<(), quiche::Error>(())
     /// ```
-    pub fn send(&mut self, out: &mut [u8]) -> Result<usize> {
+    pub fn send(&mut self, out: &mut [u8]) -> Result<(usize, SendInfo)> {
+        if out.is_empty() {
+            return Err(Error::BufferTooShort);
+        }
+
+        if self.is_closed() || self.is_draining() {
+            return Err(Error::Done);
+        }
+
+        if self.local_error.is_none() {
+            self.do_handshake()?;
+        }
+
+        // Process previously undecryptable 0-RTT packets if the decryption key
+        // is now available.
+        if self.pkt_num_spaces[packet::EPOCH_APPLICATION]
+            .crypto_0rtt_open
+            .is_some()
+        {
+            while let Some((mut pkt, info)) = self.undecryptable_pkts.pop_front()
+            {
+                if self.recv(&mut pkt, info).is_err() {
+                    self.undecryptable_pkts.clear();
+
+                    // Forwarding the error value here could confuse
+                    // applications, as they may not expect getting a `recv()`
+                    // error when calling `send()`.
+                    //
+                    // We simply fall-through to sending packets, which should
+                    // take care of terminating the connection as needed.
+                    break;
+                }
+            }
+        }
+
+        // There's no point in trying to send a packet if the Initial secrets
+        // have not been derived yet, so return early.
+        if !self.derived_initial_secrets {
+            return Err(Error::Done);
+        }
+
+        let mut has_initial = false;
+
+        let mut done = 0;
+
+        // Limit output packet size to respect the sender and receiver's
+        // maximum UDP payload size limit.
+        let mut left = cmp::min(out.len(), self.max_send_udp_payload_size());
+
+        // Limit data sent by the server based on the amount of data received
+        // from the client before its address is validated.
+        if !self.verified_peer_address && self.is_server {
+            left = cmp::min(left, self.max_send_bytes);
+        }
+
+        // Generate coalesced packets.
+        while left > 0 {
+            let (ty, written) = match self
+                .send_single(&mut out[done..done + left], has_initial)
+            {
+                Ok(v) => v,
+
+                Err(Error::BufferTooShort) | Err(Error::Done) => break,
+
+                Err(e) => return Err(e),
+            };
+
+            done += written;
+            left -= written;
+
+            match ty {
+                packet::Type::Initial => has_initial = true,
+
+                // No more packets can be coalesced after a 1-RTT.
+                packet::Type::Short => break,
+
+                _ => (),
+            };
+
+            // When sending multiple PTO probes, don't coalesce them together,
+            // so they are sent on separate UDP datagrams.
+            if let Ok(epoch) = ty.to_epoch() {
+                if self.recovery.loss_probes[epoch] > 0 {
+                    break;
+                }
+            }
+        }
+
+        if done == 0 {
+            return Err(Error::Done);
+        }
+
+        // Pad UDP datagram if it contains a QUIC Initial packet.
+        if has_initial && left > 0 && done < MIN_CLIENT_INITIAL_LEN {
+            let pad_len = cmp::min(left, MIN_CLIENT_INITIAL_LEN - done);
+
+            // Fill padding area with null bytes, to avoid leaking information
+            // in case the application reuses the packet buffer.
+            out[done..done + pad_len].fill(0);
+
+            done += pad_len;
+        }
+
+        let info = SendInfo {
+            to: self.peer_addr,
+
+            at: self
+                .recovery
+                .get_packet_send_time()
+                .unwrap_or_else(time::Instant::now),
+        };
+
+        Ok((done, info))
+    }
+
+    fn send_single(
+        &mut self, out: &mut [u8], has_initial: bool,
+    ) -> Result<(packet::Type, usize)> {
         let now = time::Instant::now();
 
         if out.is_empty() {
             return Err(Error::BufferTooShort);
         }
 
-        if self.is_closed() || self.draining_timer.is_some() {
+        if self.is_draining() {
             return Err(Error::Done);
         }
 
-        // If the Initial secrets have not been derived yet, there's no point
-        // in trying to send a packet, so return early.
-        if !self.derived_initial_secrets {
-            return Err(Error::Done);
-        }
-
-        let is_closing = self.error.is_some() || self.app_error.is_some();
-
-        if !is_closing {
-            self.do_handshake()?;
-        }
+        let is_closing = self.local_error.is_some();
 
         let mut b = octets::OctetsMut::with_slice(out);
 
-        let epoch = self.write_epoch()?;
+        let pkt_type = self.write_pkt_type()?;
 
-        let pkt_type = packet::Type::from_epoch(epoch);
+        let epoch = pkt_type.to_epoch()?;
 
         // Process lost frames.
         for lost in self.recovery.lost[epoch].drain(..) {
             match lost {
-                frame::Frame::Crypto { data } => {
-                    self.pkt_num_spaces[epoch].crypto_stream.send.push(data)?;
+                frame::Frame::CryptoHeader { offset, length } => {
+                    self.pkt_num_spaces[epoch]
+                        .crypto_stream
+                        .send
+                        .retransmit(offset, length);
                 },
 
-                frame::Frame::Stream { stream_id, data } => {
+                frame::Frame::StreamHeader {
+                    stream_id,
+                    offset,
+                    length,
+                    fin,
+                } => {
                     let stream = match self.streams.get_mut(stream_id) {
                         Some(v) => v,
 
@@ -1978,9 +2457,9 @@
 
                     let was_flushable = stream.is_flushable();
 
-                    let empty_fin = data.is_empty() && data.fin();
+                    let empty_fin = length == 0 && fin;
 
-                    stream.send.push(data)?;
+                    stream.send.retransmit(offset, length);
 
                     // If the stream is now flushable push it to the flushable
                     // queue, but only if it wasn't already queued.
@@ -2002,6 +2481,16 @@
                     self.pkt_num_spaces[epoch].ack_elicited = true;
                 },
 
+                frame::Frame::ResetStream {
+                    stream_id,
+                    error_code,
+                    final_size,
+                } =>
+                    if self.streams.get(stream_id).is_some() {
+                        self.streams
+                            .mark_reset(stream_id, true, error_code, final_size);
+                    },
+
                 frame::Frame::HandshakeDone => {
                     self.handshake_done_sent = false;
                 },
@@ -2022,18 +2511,9 @@
 
         let mut left = b.cap();
 
-        // Limit output packet size to respect peer's max_packet_size limit.
-        left = cmp::min(left, self.max_send_udp_payload_len());
-
         // Limit output packet size by congestion window size.
         left = cmp::min(left, self.recovery.cwnd_available());
 
-        // Limit data sent by the server based on the amount of data received
-        // from the client before its address is validated.
-        if !self.verified_peer_address && self.is_server {
-            left = cmp::min(left, self.max_send_bytes);
-        }
-
         let pn = self.pkt_num_spaces[epoch].next_pkt_num;
         let pn_len = packet::pkt_num_len(pn)?;
 
@@ -2044,16 +2524,11 @@
 
         let hdr = Header {
             ty: pkt_type,
-            version: self.version,
-            dcid: self.dcid.clone(),
 
-            // Don't needlessly clone the source connection ID for 1-RTT packets
-            // as it is not used.
-            scid: if pkt_type != packet::Type::Short {
-                self.scid.clone()
-            } else {
-                Vec::new()
-            },
+            version: self.version,
+
+            dcid: ConnectionId::from_ref(&self.dcid),
+            scid: ConnectionId::from_ref(&self.scid),
 
             pkt_num: 0,
             pkt_num_len: pn_len,
@@ -2080,10 +2555,10 @@
         // We assume that the payload length, which is only present in long
         // header packets, can always be encoded with a 2-byte varint.
         if pkt_type != packet::Type::Short {
-            overhead += 2;
+            overhead += PAYLOAD_LENGTH_LEN;
         }
 
-        // Make sure we have enough space left for the packet.
+        // Make sure we have enough space left for the packet overhead.
         match left.checked_sub(overhead) {
             Some(v) => left = v,
 
@@ -2099,13 +2574,31 @@
             },
         }
 
+        // Make sure there is enough space for the minimum payload length.
+        if left < PAYLOAD_MIN_LEN {
+            self.recovery.update_app_limited(false);
+            return Err(Error::Done);
+        }
+
         let mut frames: Vec<frame::Frame> = Vec::new();
 
         let mut ack_eliciting = false;
         let mut in_flight = false;
         let mut has_data = false;
 
-        let mut payload_len = 0;
+        let header_offset = b.off();
+
+        // Reserve space for payload length in advance. Since we don't yet know
+        // what the final length will be, we reserve 2 bytes in all cases.
+        //
+        // Only long header packets have an explicit length field.
+        if pkt_type != packet::Type::Short {
+            b.skip(PAYLOAD_LENGTH_LEN)?;
+        }
+
+        packet::encode_pkt_num(pn, &mut b)?;
+
+        let payload_offset = b.off();
 
         // Create ACK frame.
         if self.pkt_num_spaces[epoch].recv_pkt_need_ack.len() > 0 &&
@@ -2125,7 +2618,7 @@
                 ranges: self.pkt_num_spaces[epoch].recv_pkt_need_ack.clone(),
             };
 
-            if push_frame_to_pkt!(frames, frame, payload_len, left) {
+            if push_frame_to_pkt!(b, frames, frame, left) {
                 self.pkt_num_spaces[epoch].ack_elicited = false;
             }
         }
@@ -2138,7 +2631,7 @@
             {
                 let frame = frame::Frame::HandshakeDone;
 
-                if push_frame_to_pkt!(frames, frame, payload_len, left) {
+                if push_frame_to_pkt!(b, frames, frame, left) {
                     self.handshake_done_sent = true;
 
                     ack_eliciting = true;
@@ -2152,7 +2645,7 @@
                     max: self.streams.max_streams_bidi_next(),
                 };
 
-                if push_frame_to_pkt!(frames, frame, payload_len, left) {
+                if push_frame_to_pkt!(b, frames, frame, left) {
                     self.streams.update_max_streams_bidi();
 
                     ack_eliciting = true;
@@ -2166,7 +2659,7 @@
                     max: self.streams.max_streams_uni_next(),
                 };
 
-                if push_frame_to_pkt!(frames, frame, payload_len, left) {
+                if push_frame_to_pkt!(b, frames, frame, left) {
                     self.streams.update_max_streams_uni();
 
                     ack_eliciting = true;
@@ -2174,28 +2667,11 @@
                 }
             }
 
-            // Create MAX_DATA frame as needed.
-            if self.almost_full {
-                let frame = frame::Frame::MaxData {
-                    max: self.max_rx_data_next,
-                };
-
-                if push_frame_to_pkt!(frames, frame, payload_len, left) {
-                    self.almost_full = false;
-
-                    // Commits the new max_rx_data limit.
-                    self.max_rx_data = self.max_rx_data_next;
-
-                    ack_eliciting = true;
-                    in_flight = true;
-                }
-            }
-
             // Create DATA_BLOCKED frame.
             if let Some(limit) = self.blocked_limit {
                 let frame = frame::Frame::DataBlocked { limit };
 
-                if push_frame_to_pkt!(frames, frame, payload_len, left) {
+                if push_frame_to_pkt!(b, frames, frame, left) {
                     self.blocked_limit = None;
 
                     ack_eliciting = true;
@@ -2221,13 +2697,75 @@
                     max: stream.recv.max_data_next(),
                 };
 
-                if push_frame_to_pkt!(frames, frame, payload_len, left) {
+                if push_frame_to_pkt!(b, frames, frame, left) {
                     stream.recv.update_max_data();
 
                     self.streams.mark_almost_full(stream_id, false);
 
                     ack_eliciting = true;
                     in_flight = true;
+
+                    // Also send MAX_DATA when MAX_STREAM_DATA is sent, to avoid a
+                    // potential race condition.
+                    self.almost_full = true;
+                }
+            }
+
+            // Create MAX_DATA frame as needed.
+            if self.almost_full && self.max_rx_data < self.max_rx_data_next {
+                let frame = frame::Frame::MaxData {
+                    max: self.max_rx_data_next,
+                };
+
+                if push_frame_to_pkt!(b, frames, frame, left) {
+                    self.almost_full = false;
+
+                    // Commits the new max_rx_data limit.
+                    self.max_rx_data = self.max_rx_data_next;
+
+                    ack_eliciting = true;
+                    in_flight = true;
+                }
+            }
+
+            // Create STOP_SENDING frames as needed.
+            for (stream_id, error_code) in self
+                .streams
+                .stopped()
+                .map(|(&k, &v)| (k, v))
+                .collect::<Vec<(u64, u64)>>()
+            {
+                let frame = frame::Frame::StopSending {
+                    stream_id,
+                    error_code,
+                };
+
+                if push_frame_to_pkt!(b, frames, frame, left) {
+                    self.streams.mark_stopped(stream_id, false, 0);
+
+                    ack_eliciting = true;
+                    in_flight = true;
+                }
+            }
+
+            // Create RESET_STREAM frames as needed.
+            for (stream_id, (error_code, final_size)) in self
+                .streams
+                .reset()
+                .map(|(&k, &v)| (k, v))
+                .collect::<Vec<(u64, (u64, u64))>>()
+            {
+                let frame = frame::Frame::ResetStream {
+                    stream_id,
+                    error_code,
+                    final_size,
+                };
+
+                if push_frame_to_pkt!(b, frames, frame, left) {
+                    self.streams.mark_reset(stream_id, false, 0, 0);
+
+                    ack_eliciting = true;
+                    in_flight = true;
                 }
             }
 
@@ -2240,7 +2778,7 @@
             {
                 let frame = frame::Frame::StreamDataBlocked { stream_id, limit };
 
-                if push_frame_to_pkt!(frames, frame, payload_len, left) {
+                if push_frame_to_pkt!(b, frames, frame, left) {
                     self.streams.mark_blocked(stream_id, false, 0);
 
                     ack_eliciting = true;
@@ -2250,30 +2788,32 @@
         }
 
         // Create CONNECTION_CLOSE frame.
-        if let Some(err) = self.error {
-            let frame = frame::Frame::ConnectionClose {
-                error_code: err,
-                frame_type: 0,
-                reason: Vec::new(),
-            };
+        if let Some(conn_err) = self.local_error.as_ref() {
+            if conn_err.is_app {
+                // Create ApplicationClose frame.
+                if pkt_type == packet::Type::Short {
+                    let frame = frame::Frame::ApplicationClose {
+                        error_code: conn_err.error_code,
+                        reason: conn_err.reason.clone(),
+                    };
 
-            if push_frame_to_pkt!(frames, frame, payload_len, left) {
-                self.draining_timer = Some(now + (self.recovery.pto() * 3));
+                    if push_frame_to_pkt!(b, frames, frame, left) {
+                        self.draining_timer =
+                            Some(now + (self.recovery.pto() * 3));
 
-                ack_eliciting = true;
-                in_flight = true;
-            }
-        }
-
-        // Create APPLICATION_CLOSE frame.
-        if let Some(err) = self.app_error {
-            if pkt_type == packet::Type::Short {
-                let frame = frame::Frame::ApplicationClose {
-                    error_code: err,
-                    reason: self.app_reason.clone(),
+                        ack_eliciting = true;
+                        in_flight = true;
+                    }
+                }
+            } else {
+                // Create ConnectionClose frame.
+                let frame = frame::Frame::ConnectionClose {
+                    error_code: conn_err.error_code,
+                    frame_type: 0,
+                    reason: conn_err.reason.clone(),
                 };
 
-                if push_frame_to_pkt!(frames, frame, payload_len, left) {
+                if push_frame_to_pkt!(b, frames, frame, left) {
                     self.draining_timer = Some(now + (self.recovery.pto() * 3));
 
                     ack_eliciting = true;
@@ -2288,7 +2828,7 @@
                 data: challenge.clone(),
             };
 
-            if push_frame_to_pkt!(frames, frame, payload_len, left) {
+            if push_frame_to_pkt!(b, frames, frame, left) {
                 self.challenge = None;
 
                 ack_eliciting = true;
@@ -2301,25 +2841,88 @@
             left > frame::MAX_CRYPTO_OVERHEAD &&
             !is_closing
         {
-            let crypto_len = left - frame::MAX_CRYPTO_OVERHEAD;
-            let crypto_buf = self.pkt_num_spaces[epoch]
-                .crypto_stream
-                .send
-                .pop(crypto_len)?;
+            let crypto_off =
+                self.pkt_num_spaces[epoch].crypto_stream.send.off_front();
 
-            let frame = frame::Frame::Crypto { data: crypto_buf };
+            // Encode the frame.
+            //
+            // Instead of creating a `frame::Frame` object, encode the frame
+            // directly into the packet buffer.
+            //
+            // First we reserve some space in the output buffer for writing the
+            // frame header (we assume the length field is always a 2-byte
+            // varint as we don't know the value yet).
+            //
+            // Then we emit the data from the crypto stream's send buffer.
+            //
+            // Finally we go back and encode the frame header with the now
+            // available information.
+            let hdr_off = b.off();
+            let hdr_len = 1 + // frame type
+                octets::varint_len(crypto_off) + // offset
+                2; // length, always encode as 2-byte varint
 
-            if push_frame_to_pkt!(frames, frame, payload_len, left) {
-                ack_eliciting = true;
-                in_flight = true;
-                has_data = true;
+            if let Some(max_len) = left.checked_sub(hdr_len) {
+                let (mut crypto_hdr, mut crypto_payload) =
+                    b.split_at(hdr_off + hdr_len)?;
+
+                // Write stream data into the packet buffer.
+                let (len, _) = self.pkt_num_spaces[epoch]
+                    .crypto_stream
+                    .send
+                    .emit(&mut crypto_payload.as_mut()[..max_len])?;
+
+                // Encode the frame's header.
+                //
+                // Due to how `OctetsMut::split_at()` works, `crypto_hdr` starts
+                // from the initial offset of `b` (rather than the current
+                // offset), so it needs to be advanced to the
+                // initial frame offset.
+                crypto_hdr.skip(hdr_off)?;
+
+                frame::encode_crypto_header(
+                    crypto_off,
+                    len as u64,
+                    &mut crypto_hdr,
+                )?;
+
+                // Advance the packet buffer's offset.
+                b.skip(hdr_len + len)?;
+
+                let frame = frame::Frame::CryptoHeader {
+                    offset: crypto_off,
+                    length: len,
+                };
+
+                if push_frame_to_pkt!(b, frames, frame, left) {
+                    ack_eliciting = true;
+                    in_flight = true;
+                    has_data = true;
+                }
             }
         }
 
+        // The preference of data-bearing frame to include in a packet
+        // is managed by `self.emit_dgram`. However, whether any frames
+        // can be sent depends on the state of their buffers. In the case
+        // where one type is preferred but its buffer is empty, fall back
+        // to the other type in order not to waste this function call.
+        let mut dgram_emitted = false;
+        let dgrams_to_emit = self.dgram_max_writable_len().is_some();
+        let stream_to_emit = self.streams.has_flushable();
+
+        let mut do_dgram = self.emit_dgram && dgrams_to_emit;
+        let do_stream = !self.emit_dgram && stream_to_emit;
+
+        if !do_stream && dgrams_to_emit {
+            do_dgram = true;
+        }
+
         // Create DATAGRAM frame.
-        if pkt_type == packet::Type::Short &&
+        if (pkt_type == packet::Type::Short || pkt_type == packet::Type::ZeroRTT) &&
             left > frame::MAX_DGRAM_OVERHEAD &&
-            !is_closing
+            !is_closing &&
+            do_dgram
         {
             if let Some(max_dgram_payload) = self.dgram_max_writable_len() {
                 while let Some(len) = self.dgram_send_queue.peek_front_len() {
@@ -2329,14 +2932,10 @@
                             Some(data) => {
                                 let frame = frame::Frame::Datagram { data };
 
-                                if push_frame_to_pkt!(
-                                    frames,
-                                    frame,
-                                    payload_len,
-                                    left
-                                ) {
+                                if push_frame_to_pkt!(b, frames, frame, left) {
                                     ack_eliciting = true;
                                     in_flight = true;
+                                    dgram_emitted = true;
                                 }
                             },
 
@@ -2353,9 +2952,10 @@
         }
 
         // Create a single STREAM frame for the first stream that is flushable.
-        if pkt_type == packet::Type::Short &&
+        if (pkt_type == packet::Type::Short || pkt_type == packet::Type::ZeroRTT) &&
             left > frame::MAX_STREAM_OVERHEAD &&
-            !is_closing
+            !is_closing &&
+            !dgram_emitted
         {
             while let Some(stream_id) = self.streams.pop_flushable() {
                 let stream = match self.streams.get_mut(stream_id) {
@@ -2364,34 +2964,76 @@
                     None => continue,
                 };
 
-                let off = stream.send.off_front();
+                // Avoid sending frames for streams that were already stopped.
+                //
+                // This might happen if stream data was buffered but not yet
+                // flushed on the wire when a STOP_SENDING frame is received.
+                if stream.send.is_stopped() {
+                    continue;
+                }
 
-                // Try to accurately account for the STREAM frame's overhead,
-                // such that we can fill as much of the packet buffer as
-                // possible.
-                let overhead = 1 +
-                    octets::varint_len(stream_id) +
-                    octets::varint_len(off) +
-                    octets::varint_len(left as u64);
+                let stream_off = stream.send.off_front();
 
-                let max_len = match left.checked_sub(overhead) {
+                // Encode the frame.
+                //
+                // Instead of creating a `frame::Frame` object, encode the frame
+                // directly into the packet buffer.
+                //
+                // First we reserve some space in the output buffer for writing
+                // the frame header (we assume the length field is
+                // always a 2-byte varint as we don't know the
+                // value yet).
+                //
+                // Then we emit the data from the stream's send buffer.
+                //
+                // Finally we go back and encode the frame header with the now
+                // available information.
+                let hdr_off = b.off();
+                let hdr_len = 1 + // frame type
+                    octets::varint_len(stream_id) + // stream_id
+                    octets::varint_len(stream_off) + // offset
+                    2; // length, always encode as 2-byte varint
+
+                let max_len = match left.checked_sub(hdr_len) {
                     Some(v) => v,
 
                     None => continue,
                 };
 
-                let stream_buf = stream.send.pop(max_len)?;
+                let (mut stream_hdr, mut stream_payload) =
+                    b.split_at(hdr_off + hdr_len)?;
 
-                if stream_buf.is_empty() && !stream_buf.fin() {
-                    continue;
-                }
+                // Write stream data into the packet buffer.
+                let (len, fin) =
+                    stream.send.emit(&mut stream_payload.as_mut()[..max_len])?;
 
-                let frame = frame::Frame::Stream {
+                // Encode the frame's header.
+                //
+                // Due to how `OctetsMut::split_at()` works, `stream_hdr` starts
+                // from the initial offset of `b` (rather than the current
+                // offset), so it needs to be advanced to the initial frame
+                // offset.
+                stream_hdr.skip(hdr_off)?;
+
+                frame::encode_stream_header(
                     stream_id,
-                    data: stream_buf,
+                    stream_off,
+                    len as u64,
+                    fin,
+                    &mut stream_hdr,
+                )?;
+
+                // Advance the packet buffer's offset.
+                b.skip(hdr_len + len)?;
+
+                let frame = frame::Frame::StreamHeader {
+                    stream_id,
+                    offset: stream_off,
+                    length: len,
+                    fin,
                 };
 
-                if push_frame_to_pkt!(frames, frame, payload_len, left) {
+                if push_frame_to_pkt!(b, frames, frame, left) {
                     ack_eliciting = true;
                     in_flight = true;
                     has_data = true;
@@ -2416,6 +3058,9 @@
             }
         }
 
+        // Alternate trying to send DATAGRAMs next time.
+        self.emit_dgram = !dgram_emitted;
+
         // Create PING for PTO probe if no other ack-elicitng frame is sent.
         if self.recovery.loss_probes[epoch] > 0 &&
             !ack_eliciting &&
@@ -2424,7 +3069,7 @@
         {
             let frame = frame::Frame::Ping;
 
-            if push_frame_to_pkt!(frames, frame, payload_len, left) {
+            if push_frame_to_pkt!(b, frames, frame, left) {
                 ack_eliciting = true;
                 in_flight = true;
             }
@@ -2442,46 +3087,45 @@
             return Err(Error::Done);
         }
 
-        // Pad the client's initial packet.
-        if !self.is_server && pkt_type == packet::Type::Initial {
-            let pkt_len = pn_len + payload_len + crypto_overhead;
+        // When coalescing a 1-RTT packet, we can't add padding in the UDP
+        // datagram, so use PADDING frames instead.
+        //
+        // This is only needed if an Initial packet has already been written to
+        // the UDP datagram, as Initial always requires padding.
+        if has_initial && pkt_type == packet::Type::Short && left >= 1 {
+            let frame = frame::Frame::Padding { len: left };
 
-            let frame = frame::Frame::Padding {
-                len: cmp::min(MIN_CLIENT_INITIAL_LEN - pkt_len, left),
-            };
-
-            payload_len += frame.wire_len();
-
-            frames.push(frame);
-
-            in_flight = true;
+            if push_frame_to_pkt!(b, frames, frame, left) {
+                in_flight = true;
+            }
         }
 
         // Pad payload so that it's always at least 4 bytes.
-        if payload_len < PAYLOAD_MIN_LEN {
+        if b.off() - payload_offset < PAYLOAD_MIN_LEN {
+            let payload_len = b.off() - payload_offset;
+
             let frame = frame::Frame::Padding {
                 len: PAYLOAD_MIN_LEN - payload_len,
             };
 
-            payload_len += frame.wire_len();
-
-            frames.push(frame);
-
-            in_flight = true;
+            #[allow(unused_assignments)]
+            if push_frame_to_pkt!(b, frames, frame, left) {
+                in_flight = true;
+            }
         }
 
-        payload_len += crypto_overhead;
+        let payload_len = b.off() - payload_offset;
+        let payload_len = payload_len + crypto_overhead;
 
-        // Only long header packets have an explicit length field.
+        // Fill in payload length.
         if pkt_type != packet::Type::Short {
             let len = pn_len + payload_len;
-            b.put_varint(len as u64)?;
+
+            let (_, mut payload_with_len) = b.split_at(header_offset)?;
+            payload_with_len
+                .put_varint_with_len(len as u64, PAYLOAD_LENGTH_LEN)?;
         }
 
-        packet::encode_pkt_num(pn, &mut b)?;
-
-        let payload_offset = b.off();
-
         trace!(
             "{} tx pkt {:?} len={} pn={}",
             self.trace_id,
@@ -2510,12 +3154,9 @@
             q.add_event(packet_sent_ev).ok();
         });
 
-        // Encode frames into the output packet.
         for frame in &mut frames {
             trace!("{} tx frm {:?}", self.trace_id, frame);
 
-            frame.to_bytes(&mut b)?;
-
             qlog_with!(self.qlog_streamer, q, {
                 q.add_frame(frame.to_qlog(), false).ok();
             });
@@ -2600,23 +3241,7 @@
             self.ack_eliciting_sent = true;
         }
 
-        Ok(written)
-    }
-
-    // Returns the maximum len of a packet to be sent. This is max_packet_size
-    // as sent by the peer, except during the handshake when we haven't parsed
-    // transport parameters yet, so use a default value then.
-    fn max_send_udp_payload_len(&self) -> usize {
-        if self.is_established() {
-            // We cap the maximum packet size to 16KB or so, so that it can be
-            // always encoded with a 2-byte varint.
-            cmp::min(16383, self.peer_transport_params.max_udp_payload_size)
-                as usize
-        } else {
-            // Allow for 1200 bytes (minimum QUIC packet size) during the
-            // handshake.
-            MIN_CLIENT_INITIAL_LEN
-        }
+        Ok((pkt_type, written))
     }
 
     /// Reads contiguous data from a stream into the provided slice.
@@ -2635,8 +3260,9 @@
     /// # let mut buf = [0; 512];
     /// # let socket = std::net::UdpSocket::bind("127.0.0.1:0").unwrap();
     /// # let mut config = quiche::Config::new(quiche::PROTOCOL_VERSION)?;
-    /// # let scid = [0xba; 16];
-    /// # let mut conn = quiche::accept(&scid, None, &mut config)?;
+    /// # let scid = quiche::ConnectionId::from_ref(&[0xba; 16]);
+    /// # let from = "127.0.0.1:1234".parse().unwrap();
+    /// # let mut conn = quiche::accept(&scid, None, from, &mut config)?;
     /// # let stream_id = 0;
     /// while let Ok((read, fin)) = conn.stream_recv(stream_id, &mut buf) {
     ///     println!("Got {} bytes on stream {}", read, stream_id);
@@ -2650,13 +3276,13 @@
         if !stream::is_bidi(stream_id) &&
             stream::is_local(stream_id, self.is_server)
         {
-            return Err(Error::InvalidStreamState);
+            return Err(Error::InvalidStreamState(stream_id));
         }
 
         let stream = self
             .streams
             .get_mut(stream_id)
-            .ok_or(Error::InvalidStreamState)?;
+            .ok_or(Error::InvalidStreamState(stream_id))?;
 
         if !stream.is_readable() {
             return Err(Error::Done);
@@ -2665,7 +3291,7 @@
         #[cfg(feature = "qlog")]
         let offset = stream.recv.off_front();
 
-        let (read, fin) = stream.recv.pop(out)?;
+        let (read, fin) = stream.recv.emit(out)?;
 
         self.max_rx_data_next = self.max_rx_data_next.saturating_add(read as u64);
 
@@ -2711,6 +3337,10 @@
     /// On success the number of bytes written is returned, or [`Done`] if no
     /// data was written (e.g. because the stream has no capacity).
     ///
+    /// In addition, if the peer has signalled that it doesn't want to receive
+    /// any more data from this stream by sending the `STOP_SENDING` frame, the
+    /// [`StreamStopped`] error will be returned instead of any data.
+    ///
     /// Note that in order to avoid buffering an infinite amount of data in the
     /// stream's send buffer, streams are only allowed to buffer outgoing data
     /// up to the amount that the peer allows it to send (that is, up to the
@@ -2726,6 +3356,7 @@
     /// early data if enabled (whenever [`is_in_early_data()`] returns `true`).
     ///
     /// [`Done`]: enum.Error.html#variant.Done
+    /// [`StreamStopped`]: enum.Error.html#variant.StreamStopped
     /// [`is_established()`]: struct.Connection.html#method.is_established
     /// [`is_in_early_data()`]: struct.Connection.html#method.is_in_early_data
     ///
@@ -2735,8 +3366,9 @@
     /// # let mut buf = [0; 512];
     /// # let socket = std::net::UdpSocket::bind("127.0.0.1:0").unwrap();
     /// # let mut config = quiche::Config::new(quiche::PROTOCOL_VERSION)?;
-    /// # let scid = [0xba; 16];
-    /// # let mut conn = quiche::accept(&scid, None, &mut config)?;
+    /// # let scid = quiche::ConnectionId::from_ref(&[0xba; 16]);
+    /// # let from = "127.0.0.1:1234".parse().unwrap();
+    /// # let mut conn = quiche::accept(&scid, None, from, &mut config)?;
     /// # let stream_id = 0;
     /// conn.stream_send(stream_id, b"hello", true)?;
     /// # Ok::<(), quiche::Error>(())
@@ -2748,7 +3380,7 @@
         if !stream::is_bidi(stream_id) &&
             !stream::is_local(stream_id, self.is_server)
         {
-            return Err(Error::InvalidStreamState);
+            return Err(Error::InvalidStreamState(stream_id));
         }
 
         // Mark the connection as blocked if the connection-level flow control
@@ -2762,7 +3394,7 @@
 
         // Truncate the input buffer based on the connection's send capacity if
         // necessary.
-        let cap = self.send_capacity();
+        let cap = self.tx_cap;
 
         let (buf, fin) = if cap < buf.len() {
             (&buf[..cap], false)
@@ -2778,7 +3410,14 @@
 
         let was_flushable = stream.is_flushable();
 
-        let sent = stream.send.push_slice(buf, fin)?;
+        let sent = match stream.send.write(buf, fin) {
+            Ok(v) => v,
+
+            Err(e) => {
+                self.streams.mark_writable(stream_id, false);
+                return Err(e);
+            },
+        };
 
         let urgency = stream.urgency;
         let incremental = stream.incremental;
@@ -2810,6 +3449,8 @@
             self.streams.mark_writable(stream_id, false);
         }
 
+        self.tx_cap -= sent;
+
         self.tx_data += sent as u64;
 
         self.recovery.rate_check_app_limited();
@@ -2868,7 +3509,8 @@
     /// data in the stream's receive buffer is dropped, and no additional data
     /// is added to it. Data received after calling this method is still
     /// validated and acked but not stored, and [`stream_recv()`] will not
-    /// return it to the application.
+    /// return it to the application. In addition, a `STOP_SENDING` frame will
+    /// be sent to the peer to signal it to stop sending data.
     ///
     /// When the `direction` argument is set to [`Shutdown::Write`], outstanding
     /// data in the stream's send buffer is dropped, and no additional data
@@ -2880,23 +3522,27 @@
     /// [`stream_recv()`]: struct.Connection.html#method.stream_recv
     /// [`stream_send()`]: struct.Connection.html#method.stream_send
     pub fn stream_shutdown(
-        &mut self, stream_id: u64, direction: Shutdown, _err: u64,
+        &mut self, stream_id: u64, direction: Shutdown, err: u64,
     ) -> Result<()> {
         // Get existing stream.
         let stream = self.streams.get_mut(stream_id).ok_or(Error::Done)?;
 
         match direction {
-            // TODO: send STOP_SENDING
             Shutdown::Read => {
                 stream.recv.shutdown()?;
 
+                if !stream.recv.is_fin() {
+                    self.streams.mark_stopped(stream_id, true, err);
+                }
+
                 // Once shutdown, the stream is guaranteed to be non-readable.
                 self.streams.mark_readable(stream_id, false);
             },
 
-            // TODO: send RESET_STREAM
             Shutdown::Write => {
-                stream.send.shutdown()?;
+                let final_size = stream.send.shutdown()?;
+
+                self.streams.mark_reset(stream_id, true, err, final_size);
 
                 // Once shutdown, the stream is guaranteed to be non-writable.
                 self.streams.mark_writable(stream_id, false);
@@ -2907,13 +3553,36 @@
     }
 
     /// Returns the stream's send capacity in bytes.
+    ///
+    /// If the specified stream doesn't exist (including when it has already
+    /// been completed and closed), the [`InvalidStreamState`] error will be
+    /// returned.
+    ///
+    /// In addition, if the peer has signalled that it doesn't want to receive
+    /// any more data from this stream by sending the `STOP_SENDING` frame, the
+    /// [`StreamStopped`] error will be returned.
+    ///
+    /// [`InvalidStreamState`]: enum.Error.html#variant.InvalidStreamState
+    /// [`StreamStopped`]: enum.Error.html#variant.StreamStopped
+    #[inline]
     pub fn stream_capacity(&self, stream_id: u64) -> Result<usize> {
         if let Some(stream) = self.streams.get(stream_id) {
-            let cap = cmp::min(self.send_capacity(), stream.send.cap());
+            let cap = cmp::min(self.tx_cap, stream.send.cap()?);
             return Ok(cap);
         };
 
-        Err(Error::InvalidStreamState)
+        Err(Error::InvalidStreamState(stream_id))
+    }
+
+    /// Returns true if the stream has data that can be read.
+    pub fn stream_readable(&self, stream_id: u64) -> bool {
+        let stream = match self.streams.get(stream_id) {
+            Some(v) => v,
+
+            None => return false,
+        };
+
+        stream.is_readable()
     }
 
     /// Returns true if all the data has been read from the specified stream.
@@ -2924,6 +3593,7 @@
     ///
     /// Basically this returns true when the peer either set the `fin` flag
     /// for the stream, or sent `RESET_STREAM`.
+    #[inline]
     pub fn stream_finished(&self, stream_id: u64) -> bool {
         let stream = match self.streams.get(stream_id) {
             Some(v) => v,
@@ -2934,6 +3604,26 @@
         stream.recv.is_fin()
     }
 
+    /// Returns the number of bidirectional streams that can be created
+    /// before the peer's stream count limit is reached.
+    ///
+    /// This can be useful to know if it's possible to create a bidirectional
+    /// stream without trying it first.
+    #[inline]
+    pub fn peer_streams_left_bidi(&self) -> u64 {
+        self.streams.peer_streams_left_bidi()
+    }
+
+    /// Returns the number of unidirectional streams that can be created
+    /// before the peer's stream count limit is reached.
+    ///
+    /// This can be useful to know if it's possible to create a unidirectional
+    /// stream without trying it first.
+    #[inline]
+    pub fn peer_streams_left_uni(&self) -> u64 {
+        self.streams.peer_streams_left_uni()
+    }
+
     /// Initializes the stream's application data.
     ///
     /// This can be used by applications to store per-stream information without
@@ -2947,7 +3637,7 @@
         &mut self, stream_id: u64, data: T,
     ) -> Result<()>
     where
-        T: std::any::Any + Send,
+        T: std::any::Any + Send + Sync,
     {
         // Get existing stream.
         let stream = self.streams.get_mut(stream_id).ok_or(Error::Done)?;
@@ -2994,8 +3684,9 @@
     /// # let mut buf = [0; 512];
     /// # let socket = std::net::UdpSocket::bind("127.0.0.1:0").unwrap();
     /// # let mut config = quiche::Config::new(quiche::PROTOCOL_VERSION)?;
-    /// # let scid = [0xba; 16];
-    /// # let mut conn = quiche::accept(&scid, None, &mut config)?;
+    /// # let scid = quiche::ConnectionId::from_ref(&[0xba; 16]);
+    /// # let from = "127.0.0.1:1234".parse().unwrap();
+    /// # let mut conn = quiche::accept(&scid, None, from, &mut config)?;
     /// // Iterate over readable streams.
     /// for stream_id in conn.readable() {
     ///     // Stream is readable, read until there's no more data.
@@ -3005,6 +3696,7 @@
     /// }
     /// # Ok::<(), quiche::Error>(())
     /// ```
+    #[inline]
     pub fn readable(&self) -> StreamIter {
         self.streams.readable()
     }
@@ -3027,8 +3719,9 @@
     /// # let mut buf = [0; 512];
     /// # let socket = std::net::UdpSocket::bind("127.0.0.1:0").unwrap();
     /// # let mut config = quiche::Config::new(quiche::PROTOCOL_VERSION)?;
-    /// # let scid = [0xba; 16];
-    /// # let mut conn = quiche::accept(&scid, None, &mut config)?;
+    /// # let scid = quiche::ConnectionId::from_ref(&[0xba; 16]);
+    /// # let from = "127.0.0.1:1234".parse().unwrap();
+    /// # let mut conn = quiche::accept(&scid, None, from, &mut config)?;
     /// // Iterate over writable streams.
     /// for stream_id in conn.writable() {
     ///     // Stream is writable, write some data.
@@ -3038,16 +3731,42 @@
     /// }
     /// # Ok::<(), quiche::Error>(())
     /// ```
+    #[inline]
     pub fn writable(&self) -> StreamIter {
         // If there is not enough connection-level send capacity, none of the
         // streams are writable, so return an empty iterator.
-        if self.send_capacity() == 0 {
+        if self.tx_cap == 0 {
             return StreamIter::default();
         }
 
         self.streams.writable()
     }
 
+    /// Returns the maximum possible size of egress UDP payloads.
+    ///
+    /// This is the maximum size of UDP payloads that can be sent, and depends
+    /// on both the configured maximum send payload size of the local endpoint
+    /// (as configured with [`set_max_send_udp_payload_size()`]), as well as
+    /// the transport parameter advertised by the remote peer.
+    ///
+    /// Note that this value can change during the lifetime of the connection,
+    /// but should remain stable across consecutive calls to [`send()`].
+    ///
+    /// [`set_max_send_udp_payload_size()`]:
+    ///     struct.Config.html#method.set_max_send_udp_payload_size
+    /// [`send()`]: struct.Connection.html#method.send
+    pub fn max_send_udp_payload_size(&self) -> usize {
+        if self.is_established() {
+            // We cap the maximum packet size to 16KB or so, so that it can be
+            // always encoded with a 2-byte varint.
+            cmp::min(16383, self.recovery.max_datagram_size())
+        } else {
+            // Allow for 1200 bytes (minimum QUIC packet size) during the
+            // handshake.
+            MIN_CLIENT_INITIAL_LEN
+        }
+    }
+
     /// Reads the first received DATAGRAM.
     ///
     /// On success the DATAGRAM's data is returned along with its size.
@@ -3066,14 +3785,16 @@
     /// # let mut buf = [0; 512];
     /// # let socket = std::net::UdpSocket::bind("127.0.0.1:0").unwrap();
     /// # let mut config = quiche::Config::new(quiche::PROTOCOL_VERSION)?;
-    /// # let scid = [0xba; 16];
-    /// # let mut conn = quiche::accept(&scid, None, &mut config)?;
+    /// # let scid = quiche::ConnectionId::from_ref(&[0xba; 16]);
+    /// # let from = "127.0.0.1:1234".parse().unwrap();
+    /// # let mut conn = quiche::accept(&scid, None, from, &mut config)?;
     /// let mut dgram_buf = [0; 512];
     /// while let Ok((len)) = conn.dgram_recv(&mut dgram_buf) {
     ///     println!("Got {} bytes of DATAGRAM", len);
     /// }
     /// # Ok::<(), quiche::Error>(())
     /// ```
+    #[inline]
     pub fn dgram_recv(&mut self, buf: &mut [u8]) -> Result<usize> {
         match self.dgram_recv_queue.pop() {
             Some(d) => {
@@ -3102,15 +3823,41 @@
     ///
     /// [`Done`]: enum.Error.html#variant.Done
     /// [`BufferTooShort`]: enum.Error.html#variant.BufferTooShort
+    #[inline]
     pub fn dgram_recv_peek(&self, buf: &mut [u8], len: usize) -> Result<usize> {
         self.dgram_recv_queue.peek_front_bytes(buf, len)
     }
 
     /// Returns the length of the first stored DATAGRAM.
+    #[inline]
     pub fn dgram_recv_front_len(&self) -> Option<usize> {
         self.dgram_recv_queue.peek_front_len()
     }
 
+    /// Returns the number of items in the DATAGRAM receive queue.
+    #[inline]
+    pub fn dgram_recv_queue_len(&self) -> usize {
+        self.dgram_recv_queue.len()
+    }
+
+    /// Returns the total size of all items in the DATAGRAM receive queue.
+    #[inline]
+    pub fn dgram_recv_queue_byte_size(&self) -> usize {
+        self.dgram_recv_queue.byte_size()
+    }
+
+    /// Returns the number of items in the DATAGRAM send queue.
+    #[inline]
+    pub fn dgram_send_queue_len(&self) -> usize {
+        self.dgram_send_queue.len()
+    }
+
+    /// Returns the total size of all items in the DATAGRAM send queue.
+    #[inline]
+    pub fn dgram_send_queue_byte_size(&self) -> usize {
+        self.dgram_send_queue.byte_size()
+    }
+
     /// Sends data in a DATAGRAM frame.
     ///
     /// [`Done`] is returned if no data was written.
@@ -3136,8 +3883,9 @@
     /// # let mut buf = [0; 512];
     /// # let socket = std::net::UdpSocket::bind("127.0.0.1:0").unwrap();
     /// # let mut config = quiche::Config::new(quiche::PROTOCOL_VERSION)?;
-    /// # let scid = [0xba; 16];
-    /// # let mut conn = quiche::accept(&scid, None, &mut config)?;
+    /// # let scid = quiche::ConnectionId::from_ref(&[0xba; 16]);
+    /// # let from = "127.0.0.1:1234".parse().unwrap();
+    /// # let mut conn = quiche::accept(&scid, None, from, &mut config)?;
     /// conn.dgram_send(b"hello")?;
     /// # Ok::<(), quiche::Error>(())
     /// ```
@@ -3170,12 +3918,14 @@
     /// ```no_run
     /// # let socket = std::net::UdpSocket::bind("127.0.0.1:0").unwrap();
     /// # let mut config = quiche::Config::new(quiche::PROTOCOL_VERSION)?;
-    /// # let scid = [0xba; 16];
-    /// # let mut conn = quiche::accept(&scid, None, &mut config)?;
+    /// # let scid = quiche::ConnectionId::from_ref(&[0xba; 16]);
+    /// # let from = "127.0.0.1:1234".parse().unwrap();
+    /// # let mut conn = quiche::accept(&scid, None, from, &mut config)?;
     /// conn.dgram_send(b"hello")?;
     /// conn.dgram_purge_outgoing(&|d: &[u8]| -> bool { d[0] == 0 });
     /// # Ok::<(), quiche::Error>(())
     /// ```
+    #[inline]
     pub fn dgram_purge_outgoing<F: Fn(&[u8]) -> bool>(&mut self, f: F) {
         self.dgram_send_queue.purge(f);
     }
@@ -3191,8 +3941,9 @@
     /// # let mut buf = [0; 512];
     /// # let socket = std::net::UdpSocket::bind("127.0.0.1:0").unwrap();
     /// # let mut config = quiche::Config::new(quiche::PROTOCOL_VERSION)?;
-    /// # let scid = [0xba; 16];
-    /// # let mut conn = quiche::accept(&scid, None, &mut config)?;
+    /// # let scid = quiche::ConnectionId::from_ref(&[0xba; 16]);
+    /// # let from = "127.0.0.1:1234".parse().unwrap();
+    /// # let mut conn = quiche::accept(&scid, None, from, &mut config)?;
     /// if let Some(payload_size) = conn.dgram_max_writable_len() {
     ///     if payload_size > 5 {
     ///         conn.dgram_send(b"hello")?;
@@ -3200,12 +3951,13 @@
     /// }
     /// # Ok::<(), quiche::Error>(())
     /// ```
+    #[inline]
     pub fn dgram_max_writable_len(&self) -> Option<usize> {
         match self.peer_transport_params.max_datagram_frame_size {
             None => None,
             Some(peer_frame_len) => {
                 // Start from the maximum packet size...
-                let mut max_len = self.max_send_udp_payload_len();
+                let mut max_len = self.max_send_udp_payload_size();
                 // ...subtract the Short packet header overhead...
                 // (1 byte of pkt_len + len of dcid)
                 max_len = max_len.saturating_sub(1 + self.dcid.len());
@@ -3219,7 +3971,8 @@
                 // ...clamp to what peer can support...
                 max_len = cmp::min(peer_frame_len as usize, max_len);
                 // ...subtract frame overhead, checked for underflow.
-                max_len.checked_sub(frame::MAX_DGRAM_OVERHEAD)
+                // (1 byte of frame type + len of length )
+                max_len.checked_sub(1 + frame::MAX_DGRAM_OVERHEAD)
             },
         }
     }
@@ -3227,7 +3980,7 @@
     fn dgram_enabled(&self) -> bool {
         self.local_transport_params
             .max_datagram_frame_size
-            .is_none()
+            .is_some()
     }
 
     /// Returns the amount of time until the next timeout event.
@@ -3241,7 +3994,7 @@
             return None;
         }
 
-        let timeout = if self.draining_timer.is_some() {
+        let timeout = if self.is_draining() {
             // Draining timer takes precedence over all other timers. If it is
             // set it means the connection is closing so there's no point in
             // processing the other timers.
@@ -3333,29 +4086,30 @@
     /// Returns [`Done`] if the connection had already been closed.
     ///
     /// Note that the connection will not be closed immediately. An application
-    /// should continue calling [`recv()`], [`send()`] and [`timeout()`] as
-    /// normal, until the [`is_closed()`] method returns `true`.
+    /// should continue calling the [`recv()`], [`send()`], [`timeout()`] and
+    /// [`on_timeout()`] methods as normal, until the [`is_closed()`] method
+    /// returns `true`.
     ///
     /// [`Done`]: enum.Error.html#variant.Done
     /// [`recv()`]: struct.Connection.html#method.recv
     /// [`send()`]: struct.Connection.html#method.send
     /// [`timeout()`]: struct.Connection.html#method.timeout
+    /// [`on_timeout()`]: struct.Connection.html#method.on_timeout
     /// [`is_closed()`]: struct.Connection.html#method.is_closed
     pub fn close(&mut self, app: bool, err: u64, reason: &[u8]) -> Result<()> {
-        if self.is_closed() || self.draining_timer.is_some() {
+        if self.is_closed() || self.is_draining() {
             return Err(Error::Done);
         }
 
-        if self.error.is_some() || self.app_error.is_some() {
+        if self.local_error.is_some() {
             return Err(Error::Done);
         }
 
-        if app {
-            self.app_error = Some(err);
-            self.app_reason.extend_from_slice(reason);
-        } else {
-            self.error = Some(err);
-        }
+        self.local_error = Some(ConnectionError {
+            is_app: app,
+            error_code: err,
+            reason: reason.to_vec(),
+        });
 
         // When no packet was successfully processed close connection immediately.
         if self.recv_count == 0 {
@@ -3369,6 +4123,7 @@
     ///
     /// This can be used for logging purposes to differentiate between multiple
     /// connections.
+    #[inline]
     pub fn trace_id(&self) -> &str {
         &self.trace_id
     }
@@ -3376,39 +4131,114 @@
     /// Returns the negotiated ALPN protocol.
     ///
     /// If no protocol has been negotiated, the returned value is empty.
+    #[inline]
     pub fn application_proto(&self) -> &[u8] {
-        self.handshake.alpn_protocol()
+        self.alpn.as_ref()
     }
 
     /// Returns the peer's leaf certificate (if any) as a DER-encoded buffer.
+    #[inline]
     pub fn peer_cert(&self) -> Option<Vec<u8>> {
-        self.handshake.peer_cert()
+        self.handshake.lock().unwrap().peer_cert()
+    }
+
+    /// Returns the serialized cryptographic session for the connection.
+    ///
+    /// This can be used by a client to cache a connection's session, and resume
+    /// it later using the [`set_session()`] method.
+    ///
+    /// [`set_session()`]: struct.Connection.html#method.set_session
+    #[inline]
+    pub fn session(&self) -> Option<Vec<u8>> {
+        self.session.clone()
+    }
+
+    /// Returns the source connection ID.
+    ///
+    /// Note that the value returned can change throughout the connection's
+    /// lifetime.
+    #[inline]
+    pub fn source_id(&self) -> ConnectionId {
+        ConnectionId::from_ref(self.scid.as_ref())
+    }
+
+    /// Returns the destination connection ID.
+    ///
+    /// Note that the value returned can change throughout the connection's
+    /// lifetime.
+    #[inline]
+    pub fn destination_id(&self) -> ConnectionId {
+        ConnectionId::from_ref(self.dcid.as_ref())
     }
 
     /// Returns true if the connection handshake is complete.
+    #[inline]
     pub fn is_established(&self) -> bool {
-        self.handshake.is_completed()
+        self.handshake_completed
     }
 
     /// Returns true if the connection is resumed.
+    #[inline]
     pub fn is_resumed(&self) -> bool {
-        self.handshake.is_resumed()
+        self.handshake.lock().unwrap().is_resumed()
     }
 
     /// Returns true if the connection has a pending handshake that has
     /// progressed enough to send or receive early data.
+    #[inline]
     pub fn is_in_early_data(&self) -> bool {
-        self.handshake.is_in_early_data()
+        self.handshake.lock().unwrap().is_in_early_data()
+    }
+
+    /// Returns whether there is stream or DATAGRAM data available to read.
+    #[inline]
+    pub fn is_readable(&self) -> bool {
+        self.streams.has_readable() || self.dgram_recv_front_len().is_some()
+    }
+
+    /// Returns true if the connection is draining.
+    ///
+    /// If this returns true, the connection object cannot yet be dropped, but
+    /// no new application data can be sent or received. An application should
+    /// continue calling the [`recv()`], [`send()`], [`timeout()`], and
+    /// [`on_timeout()`] methods as normal, until the [`is_closed()`] method
+    /// returns `true`.
+    ///
+    /// [`recv()`]: struct.Connection.html#method.recv
+    /// [`send()`]: struct.Connection.html#method.send
+    /// [`timeout()`]: struct.Connection.html#method.timeout
+    /// [`on_timeout()`]: struct.Connection.html#method.on_timeout
+    /// [`is_closed()`]: struct.Connection.html#method.is_closed
+    #[inline]
+    pub fn is_draining(&self) -> bool {
+        self.draining_timer.is_some()
     }
 
     /// Returns true if the connection is closed.
     ///
     /// If this returns true, the connection object can be dropped.
+    #[inline]
     pub fn is_closed(&self) -> bool {
         self.closed
     }
 
+    /// Returns the error received from the peer, if any.
+    ///
+    /// The values contained in the tuple are symmetric with the [`close()`]
+    /// method.
+    ///
+    /// Note that a `Some` return value does not necessarily imply
+    /// [`is_closed()`] or any other connection state.
+    ///
+    /// [`close()`]: struct.Connection.html#method.close
+    /// [`is_closed()`]: struct.Connection.html#method.is_closed
+    #[inline]
+    pub fn peer_error(&self) -> Option<&ConnectionError> {
+        self.peer_error.as_ref()
+    }
+
     /// Collects and returns statistics about the connection.
+    #[inline]
     pub fn stats(&self) -> Stats {
         Stats {
             recv: self.recv_count,
@@ -3429,53 +4259,185 @@
             &mut raw_params,
         )?;
 
-        self.handshake.set_quic_transport_params(raw_params)?;
+        self.handshake
+            .lock()
+            .unwrap()
+            .set_quic_transport_params(raw_params)?;
 
         Ok(())
     }
 
+    fn parse_peer_transport_params(
+        &mut self, peer_params: TransportParams,
+    ) -> Result<()> {
+        if self.version >= PROTOCOL_VERSION_DRAFT28 ||
+            self.version == PROTOCOL_VERSION_V1
+        {
+            // Validate initial_source_connection_id.
+            match &peer_params.initial_source_connection_id {
+                Some(v) if v != &self.dcid =>
+                    return Err(Error::InvalidTransportParam),
+
+                Some(_) => (),
+
+                // initial_source_connection_id must be sent by
+                // both endpoints.
+                None => return Err(Error::InvalidTransportParam),
+            }
+
+            // Validate original_destination_connection_id.
+            if let Some(odcid) = &self.odcid {
+                match &peer_params.original_destination_connection_id {
+                    Some(v) if v != odcid =>
+                        return Err(Error::InvalidTransportParam),
+
+                    Some(_) => (),
+
+                    // original_destination_connection_id must be
+                    // sent by the server.
+                    None if !self.is_server =>
+                        return Err(Error::InvalidTransportParam),
+
+                    None => (),
+                }
+            }
+
+            // Validate retry_source_connection_id.
+            if let Some(rscid) = &self.rscid {
+                match &peer_params.retry_source_connection_id {
+                    Some(v) if v != rscid =>
+                        return Err(Error::InvalidTransportParam),
+
+                    Some(_) => (),
+
+                    // retry_source_connection_id must be sent by
+                    // the server.
+                    None => return Err(Error::InvalidTransportParam),
+                }
+            }
+        } else {
+            // Legacy validation of the original connection ID when
+            // stateless retry is performed, for drafts < 28.
+            if self.did_retry &&
+                peer_params.original_destination_connection_id != self.odcid
+            {
+                return Err(Error::InvalidTransportParam);
+            }
+        }
+
+        self.process_peer_transport_params(peer_params);
+
+        self.parsed_peer_transport_params = true;
+
+        Ok(())
+    }
+
+    fn process_peer_transport_params(&mut self, peer_params: TransportParams) {
+        self.max_tx_data = peer_params.initial_max_data;
+
+        // Update send capacity.
+        self.tx_cap = cmp::min(
+            self.recovery.cwnd_available() as u64,
+            self.max_tx_data - self.tx_data,
+        ) as usize;
+
+        self.streams
+            .update_peer_max_streams_bidi(peer_params.initial_max_streams_bidi);
+        self.streams
+            .update_peer_max_streams_uni(peer_params.initial_max_streams_uni);
+
+        self.recovery.max_ack_delay =
+            time::Duration::from_millis(peer_params.max_ack_delay);
+
+        self.recovery
+            .update_max_datagram_size(peer_params.max_udp_payload_size as usize);
+
+        self.peer_transport_params = peer_params;
+    }
+
     /// Continues the handshake.
     ///
     /// If the connection is already established, it does nothing.
     fn do_handshake(&mut self) -> Result<()> {
-        // Handshake is already complete, there's nothing to do.
-        if self.is_established() {
+        let handshake = self.handshake.lock().unwrap();
+
+        // Handshake is already complete, nothing more to do.
+        if handshake.is_completed() {
             return Ok(());
         }
 
-        match self.handshake.do_handshake() {
+        match handshake.do_handshake() {
             Ok(_) => (),
 
-            Err(Error::Done) => return Ok(()),
+            Err(Error::Done) => {
+                // Try to parse transport parameters as soon as the first flight
+                // of handshake data is processed.
+                //
+                // This is potentially dangerous as the handshake hasn't been
+                // completed yet, though it's required to be able to send data
+                // in 0.5 RTT.
+                let raw_params = handshake.quic_transport_params();
+
+                if !self.parsed_peer_transport_params && !raw_params.is_empty() {
+                    let peer_params =
+                        TransportParams::decode(&raw_params, self.is_server)?;
+
+                    // Unlock handshake object.
+                    drop(handshake);
+
+                    self.parse_peer_transport_params(peer_params)?;
+                }
+
+                return Ok(());
+            },
 
             Err(e) => return Err(e),
         };
 
-        if self.application_proto().is_empty() {
-            // Send no_application_proto TLS alert when no protocol
-            // can be negotiated.
-            self.error = Some(0x178);
-            return Err(Error::TlsFail);
+        self.handshake_completed = handshake.is_completed();
+
+        self.alpn = handshake.alpn_protocol().to_vec();
+
+        let cipher = handshake.cipher();
+        let curve = handshake.curve();
+        let sigalg = handshake.sigalg();
+        let is_resumed = handshake.is_resumed();
+
+        let raw_params = handshake.quic_transport_params();
+
+        if !self.parsed_peer_transport_params && !raw_params.is_empty() {
+            let peer_params =
+                TransportParams::decode(&raw_params, self.is_server)?;
+
+            // Unlock handshake object.
+            drop(handshake);
+
+            self.parse_peer_transport_params(peer_params)?;
+        }
+
+        // Once the handshake is completed there's no point in processing 0-RTT
+        // packets anymore, so clear the buffer now.
+        if self.handshake_completed {
+            self.undecryptable_pkts.clear();
         }
 
         trace!("{} connection established: proto={:?} cipher={:?} curve={:?} sigalg={:?} resumed={} {:?}",
-               &self.trace_id,
-               std::str::from_utf8(self.application_proto()),
-               self.handshake.cipher(),
-               self.handshake.curve(),
-               self.handshake.sigalg(),
-               self.is_resumed(),
-               self.peer_transport_params);
+               &self.trace_id, std::str::from_utf8(self.application_proto()),
+               cipher, curve, sigalg, is_resumed, self.peer_transport_params);
 
         Ok(())
     }
 
-    /// Selects the packet number space for outgoing packets.
-    fn write_epoch(&self) -> Result<packet::Epoch> {
+    /// Selects the packet type for the next outgoing packet.
+    fn write_pkt_type(&self) -> Result<packet::Type> {
         // On error send packet in the latest epoch available, but only send
         // 1-RTT ones when the handshake is completed.
-        if self.error.is_some() {
-            let epoch = match self.handshake.write_level() {
+        if self
+            .local_error
+            .as_ref()
+            .map_or(false, |conn_err| !conn_err.is_app)
+        {
+            let epoch = match self.handshake.lock().unwrap().write_level() {
                 crypto::Level::Initial => packet::EPOCH_INITIAL,
                 crypto::Level::ZeroRTT => unreachable!(),
                 crypto::Level::Handshake => packet::EPOCH_HANDSHAKE,
@@ -3485,10 +4447,10 @@
             if epoch == packet::EPOCH_APPLICATION && !self.is_established() {
                 // Downgrade the epoch to handshake as the handshake is not
                 // completed yet.
-                return Ok(packet::EPOCH_HANDSHAKE);
+                return Ok(packet::Type::Handshake);
             }
 
-            return Ok(epoch);
+            return Ok(packet::Type::from_epoch(epoch));
         }
 
         for epoch in packet::EPOCH_INITIAL..packet::EPOCH_COUNT {
@@ -3499,33 +4461,43 @@
 
             // We are ready to send data for this packet number space.
             if self.pkt_num_spaces[epoch].ready() {
-                return Ok(epoch);
+                return Ok(packet::Type::from_epoch(epoch));
             }
 
             // There are lost frames in this packet number space.
             if !self.recovery.lost[epoch].is_empty() {
-                return Ok(epoch);
+                return Ok(packet::Type::from_epoch(epoch));
             }
 
             // We need to send PTO probe packets.
             if self.recovery.loss_probes[epoch] > 0 {
-                return Ok(epoch);
+                return Ok(packet::Type::from_epoch(epoch));
             }
         }
 
         // If there are flushable, almost full or blocked streams, use the
         // Application epoch.
         if (self.is_established() || self.is_in_early_data()) &&
-            (self.almost_full ||
+            ((self.is_server && !self.handshake_done_sent) ||
+                self.almost_full ||
                 self.blocked_limit.is_some() ||
                 self.dgram_send_queue.has_pending() ||
+                self.local_error
+                    .as_ref()
+                    .map_or(false, |conn_err| conn_err.is_app) ||
                 self.streams.should_update_max_streams_bidi() ||
                 self.streams.should_update_max_streams_uni() ||
                 self.streams.has_flushable() ||
                 self.streams.has_almost_full() ||
-                self.streams.has_blocked())
+                self.streams.has_blocked() ||
+                self.streams.has_reset() ||
+                self.streams.has_stopped())
         {
-            return Ok(packet::EPOCH_APPLICATION);
+            if self.is_in_early_data() && !self.is_server {
+                return Ok(packet::Type::ZeroRTT);
+            }
+
+            return Ok(packet::Type::Short);
         }
 
         Err(Error::Done)
@@ -3599,7 +4571,7 @@
                 if !stream::is_bidi(stream_id) &&
                     stream::is_local(stream_id, self.is_server)
                 {
-                    return Err(Error::InvalidStreamState);
+                    return Err(Error::InvalidStreamState(stream_id));
                 }
 
                 // Get existing stream or create a new one, but if the stream
@@ -3627,18 +4599,51 @@
                 }
             },
 
-            frame::Frame::StopSending { stream_id, .. } => {
+            frame::Frame::StopSending {
+                stream_id,
+                error_code,
+            } => {
                 // STOP_SENDING on a receive-only stream is a fatal error.
                 if !stream::is_local(stream_id, self.is_server) &&
                     !stream::is_bidi(stream_id)
                 {
-                    return Err(Error::InvalidStreamState);
+                    return Err(Error::InvalidStreamState(stream_id));
+                }
+
+                // Get existing stream or create a new one, but if the stream
+                // has already been closed and collected, ignore the frame.
+                //
+                // This can happen if e.g. an ACK frame is lost, and the peer
+                // retransmits another frame before it realizes that the stream
+                // is gone.
+                //
+                // Note that it makes it impossible to check if the frame is
+                // illegal, since we have no state, but since we ignore the
+                // frame, it should be fine.
+                let stream = match self.get_or_create_stream(stream_id, false) {
+                    Ok(v) => v,
+
+                    Err(Error::Done) => return Ok(()),
+
+                    Err(e) => return Err(e),
+                };
+
+                let was_writable = stream.is_writable();
+
+                // Try stopping the stream.
+                if let Ok(final_size) = stream.send.stop(error_code) {
+                    self.streams
+                        .mark_reset(stream_id, true, error_code, final_size);
+
+                    if !was_writable {
+                        self.streams.mark_writable(stream_id, true);
+                    }
                 }
             },
 
             frame::Frame::Crypto { data } => {
                 // Push the data to the stream so it can be re-ordered.
-                self.pkt_num_spaces[epoch].crypto_stream.recv.push(data)?;
+                self.pkt_num_spaces[epoch].crypto_stream.recv.write(data)?;
 
                 // Feed crypto data to the TLS state, if there's data
                 // available at the expected offset.
@@ -3648,99 +4653,23 @@
 
                 let stream = &mut self.pkt_num_spaces[epoch].crypto_stream;
 
-                while let Ok((read, _)) = stream.recv.pop(&mut crypto_buf) {
+                while let Ok((read, _)) = stream.recv.emit(&mut crypto_buf) {
                     let recv_buf = &crypto_buf[..read];
-                    self.handshake.provide_data(level, &recv_buf)?;
+                    self.handshake
+                        .lock()
+                        .unwrap()
+                        .provide_data(level, &recv_buf)?;
                 }
 
-                self.do_handshake()?;
-
-                // Try to parse transport parameters as soon as the first flight
-                // of handshake data is processed.
-                //
-                // This is potentially dangerous as the handshake hasn't been
-                // completed yet, though it's required to be able to send data
-                // in 0.5 RTT.
-                let raw_params = self.handshake.quic_transport_params();
-
-                if !self.parsed_peer_transport_params && !raw_params.is_empty() {
-                    let peer_params =
-                        TransportParams::decode(&raw_params, self.is_server)?;
-
-                    if self.version >= PROTOCOL_VERSION_DRAFT28 {
-                        // Validate initial_source_connection_id.
-                        match &peer_params.initial_source_connection_id {
-                            Some(v) if v != &self.dcid =>
-                                return Err(Error::InvalidTransportParam),
-
-                            Some(_) => (),
-
-                            // initial_source_connection_id must be sent by
-                            // both endpoints.
-                            None => return Err(Error::InvalidTransportParam),
-                        }
-
-                        // Validate original_destination_connection_id.
-                        if let Some(odcid) = &self.odcid {
-                            match &peer_params.original_destination_connection_id
-                            {
-                                Some(v) if v != odcid =>
-                                    return Err(Error::InvalidTransportParam),
-
-                                Some(_) => (),
-
-                                // original_destination_connection_id must be
-                                // sent by the server.
-                                None if !self.is_server =>
-                                    return Err(Error::InvalidTransportParam),
-
-                                None => (),
-                            }
-                        }
-
-                        // Validate retry_source_connection_id.
-                        if let Some(rscid) = &self.rscid {
-                            match &peer_params.retry_source_connection_id {
-                                Some(v) if v != rscid =>
-                                    return Err(Error::InvalidTransportParam),
-
-                                Some(_) => (),
-
-                                // retry_source_connection_id must be sent by
-                                // the server.
-                                None => return Err(Error::InvalidTransportParam),
-                            }
-                        }
-                    } else {
-                        // Legacy validation of the original connection ID when
-                        // stateless retry is performed, for drafts < 28.
-                        if self.did_retry &&
-                            peer_params.original_destination_connection_id !=
-                                self.odcid
-                        {
-                            return Err(Error::InvalidTransportParam);
-                        }
-                    }
-
-                    // Update flow control limits.
-                    self.max_tx_data = peer_params.initial_max_data;
-
-                    self.streams.update_peer_max_streams_bidi(
-                        peer_params.initial_max_streams_bidi,
-                    );
-                    self.streams.update_peer_max_streams_uni(
-                        peer_params.initial_max_streams_uni,
-                    );
-
-                    self.recovery.max_ack_delay =
-                        time::Duration::from_millis(peer_params.max_ack_delay);
-
-                    self.peer_transport_params = peer_params;
-
-                    self.parsed_peer_transport_params = true;
+                if self.is_established() {
+                    self.handshake.lock().unwrap().process_post_handshake()?;
+                } else {
+                    self.do_handshake()?;
                 }
             },
 
+            frame::Frame::CryptoHeader { .. } => unreachable!(),
+
             // TODO: implement stateless retry
             frame::Frame::NewToken { .. } => (),
 
@@ -3749,7 +4678,7 @@
                 if !stream::is_bidi(stream_id) &&
                     stream::is_local(stream_id, self.is_server)
                 {
-                    return Err(Error::InvalidStreamState);
+                    return Err(Error::InvalidStreamState(stream_id));
                 }
 
                 let max_rx_data_left = self.max_rx_data - self.rx_data;
@@ -3780,7 +4709,7 @@
                     return Err(Error::FlowControl);
                 }
 
-                stream.recv.push(data)?;
+                stream.recv.write(data)?;
 
                 if stream.is_readable() {
                     self.streams.mark_readable(stream_id, true);
@@ -3789,11 +4718,20 @@
                 self.rx_data += max_off_delta;
             },
 
+            frame::Frame::StreamHeader { .. } => unreachable!(),
+
             frame::Frame::MaxData { max } => {
                 self.max_tx_data = cmp::max(self.max_tx_data, max);
             },
 
             frame::Frame::MaxStreamData { stream_id, max } => {
+                // Peer can't receive on its own unidirectional streams.
+                if !stream::is_bidi(stream_id) &&
+                    !stream::is_local(stream_id, self.is_server)
+                {
+                    return Err(Error::InvalidStreamState(stream_id));
+                }
+
                 // Get existing stream or create a new one, but if the stream
                 // has already been closed and collected, ignore the frame.
                 //
@@ -3873,11 +4811,23 @@
 
             frame::Frame::PathResponse { .. } => (),
 
-            frame::Frame::ConnectionClose { .. } => {
+            frame::Frame::ConnectionClose {
+                error_code, reason, ..
+            } => {
+                self.peer_error = Some(ConnectionError {
+                    is_app: false,
+                    error_code,
+                    reason,
+                });
                 self.draining_timer = Some(now + (self.recovery.pto() * 3));
             },
 
-            frame::Frame::ApplicationClose { .. } => {
+            frame::Frame::ApplicationClose { error_code, reason } => {
+                self.peer_error = Some(ConnectionError {
+                    is_app: true,
+                    error_code,
+                    reason,
+                });
                 self.draining_timer = Some(now + (self.recovery.pto() * 3));
             },
 
@@ -3899,7 +4849,7 @@
                 // quiche always advertises support for 64K sized DATAGRAM
                 // frames, as recommended by the standard, so we don't need a
                 // size check.
-                if self.dgram_enabled() {
+                if !self.dgram_enabled() {
                     return Err(Error::InvalidState);
                 }
 
@@ -3975,12 +4925,6 @@
         Some(idle_timeout)
     }
 
-    /// Returns the connection's overall send capacity.
-    fn send_capacity(&self) -> usize {
-        let cap = self.max_tx_data - self.tx_data;
-        cmp::min(cap, self.recovery.cwnd_available() as u64) as usize
-    }
-
     /// Returns the connection's handshake status for use in loss recovery.
     fn handshake_status(&self) -> recovery::HandshakeStatus {
         recovery::HandshakeStatus {
@@ -4031,15 +4975,15 @@
 
 /// Statistics about the connection.
 ///
-/// A connections's statistics can be collected using the [`stats()`] method.
+/// A connection's statistics can be collected using the [`stats()`] method.
 ///
 /// [`stats()`]: struct.Connection.html#method.stats
 #[derive(Clone)]
 pub struct Stats {
-    /// The number of QUIC packets received on this connection.
+    /// The number of QUIC packets received.
     pub recv: usize,
 
-    /// The number of QUIC packets sent on this connection.
+    /// The number of QUIC packets sent.
     pub sent: usize,
 
     /// The number of QUIC packets that were lost.
@@ -4051,28 +4995,24 @@
     /// The size of the connection's congestion window in bytes.
     pub cwnd: usize,
 
-    /// The estimated data delivery rate in bytes/s.
+    /// The most recent data delivery rate estimate in bytes/s.
     pub delivery_rate: u64,
 }
 
 impl std::fmt::Debug for Stats {
+    #[inline]
     fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
         write!(
             f,
-            "recv={} sent={} lost={} rtt={:?} cwnd={} delivery_rate={}",
-            self.recv,
-            self.sent,
-            self.lost,
-            self.rtt,
-            self.cwnd,
-            self.delivery_rate
+            "recv={} sent={} lost={} rtt={:?} cwnd={}",
+            self.recv, self.sent, self.lost, self.rtt, self.cwnd,
         )
     }
 }
 
 #[derive(Clone, Debug, PartialEq)]
 struct TransportParams {
-    pub original_destination_connection_id: Option<Vec<u8>>,
+    pub original_destination_connection_id: Option<ConnectionId<'static>>,
     pub max_idle_timeout: u64,
     pub stateless_reset_token: Option<Vec<u8>>,
     pub max_udp_payload_size: u64,
@@ -4087,8 +5027,8 @@
     pub disable_active_migration: bool,
     // pub preferred_address: ...,
     pub active_conn_id_limit: u64,
-    pub initial_source_connection_id: Option<Vec<u8>>,
-    pub retry_source_connection_id: Option<Vec<u8>>,
+    pub initial_source_connection_id: Option<ConnectionId<'static>>,
+    pub retry_source_connection_id: Option<ConnectionId<'static>>,
     pub max_datagram_frame_size: Option<u64>,
 }
 
@@ -4135,7 +5075,8 @@
                         return Err(Error::InvalidTransportParam);
                     }
 
-                    tp.original_destination_connection_id = Some(val.to_vec());
+                    tp.original_destination_connection_id =
+                        Some(val.to_vec().into());
                 },
 
                 0x0001 => {
@@ -4237,7 +5178,7 @@
                 },
 
                 0x000f => {
-                    tp.initial_source_connection_id = Some(val.to_vec());
+                    tp.initial_source_connection_id = Some(val.to_vec().into());
                 },
 
                 0x00010 => {
@@ -4245,7 +5186,7 @@
                         return Err(Error::InvalidTransportParam);
                     }
 
-                    tp.retry_source_connection_id = Some(val.to_vec());
+                    tp.retry_source_connection_id = Some(val.to_vec().into());
                 },
 
                 0x0020 => {
@@ -4480,6 +5421,7 @@
             config.set_initial_max_streams_uni(3);
             config.set_max_idle_timeout(180_000);
             config.verify_peer(false);
+            config.set_ack_delay_exponent(5);
 
             Pipe::with_config(&mut config)
         }
@@ -4487,22 +5429,35 @@
         pub fn with_config(config: &mut Config) -> Result<Pipe> {
             let mut client_scid = [0; 16];
             rand::rand_bytes(&mut client_scid[..]);
+            let client_scid = ConnectionId::from_ref(&client_scid);
+            let client_addr = "127.0.0.1:1234".parse().unwrap();
 
             let mut server_scid = [0; 16];
             rand::rand_bytes(&mut server_scid[..]);
+            let server_scid = ConnectionId::from_ref(&server_scid);
+            let server_addr = "127.0.0.1:4321".parse().unwrap();
 
             Ok(Pipe {
-                client: connect(Some("quic.tech"), &client_scid, config)?,
-                server: accept(&server_scid, None, config)?,
+                client: connect(
+                    Some("quic.tech"),
+                    &client_scid,
+                    client_addr,
+                    config,
+                )?,
+                server: accept(&server_scid, None, server_addr, config)?,
             })
         }
 
         pub fn with_client_config(client_config: &mut Config) -> Result<Pipe> {
             let mut client_scid = [0; 16];
             rand::rand_bytes(&mut client_scid[..]);
+            let client_scid = ConnectionId::from_ref(&client_scid);
+            let client_addr = "127.0.0.1:1234".parse().unwrap();
 
             let mut server_scid = [0; 16];
             rand::rand_bytes(&mut server_scid[..]);
+            let server_scid = ConnectionId::from_ref(&server_scid);
+            let server_addr = "127.0.0.1:4321".parse().unwrap();
 
             let mut config = Config::new(crate::PROTOCOL_VERSION)?;
             config.load_cert_chain_from_pem_file("examples/cert.crt")?;
@@ -4515,17 +5470,26 @@
             config.set_initial_max_streams_uni(3);
 
             Ok(Pipe {
-                client: connect(Some("quic.tech"), &client_scid, client_config)?,
-                server: accept(&server_scid, None, &mut config)?,
+                client: connect(
+                    Some("quic.tech"),
+                    &client_scid,
+                    client_addr,
+                    client_config,
+                )?,
+                server: accept(&server_scid, None, server_addr, &mut config)?,
             })
         }
 
         pub fn with_server_config(server_config: &mut Config) -> Result<Pipe> {
             let mut client_scid = [0; 16];
             rand::rand_bytes(&mut client_scid[..]);
+            let client_scid = ConnectionId::from_ref(&client_scid);
+            let client_addr = "127.0.0.1:1234".parse().unwrap();
 
             let mut server_scid = [0; 16];
             rand::rand_bytes(&mut server_scid[..]);
+            let server_scid = ConnectionId::from_ref(&server_scid);
+            let server_addr = "127.0.0.1:4321".parse().unwrap();
 
             let mut config = Config::new(crate::PROTOCOL_VERSION)?;
             config.set_application_protos(b"\x06proto1\x06proto2")?;
@@ -4536,85 +5500,69 @@
             config.set_initial_max_streams_uni(3);
 
             Ok(Pipe {
-                client: connect(Some("quic.tech"), &client_scid, &mut config)?,
-                server: accept(&server_scid, None, server_config)?,
+                client: connect(
+                    Some("quic.tech"),
+                    &client_scid,
+                    client_addr,
+                    &mut config,
+                )?,
+                server: accept(&server_scid, None, server_addr, server_config)?,
             })
         }
 
-        pub fn handshake(&mut self, buf: &mut [u8]) -> Result<()> {
-            let mut len = self.client.send(buf)?;
+        pub fn handshake(&mut self) -> Result<()> {
+            while !self.client.is_established() || !self.server.is_established() {
+                let flight = emit_flight(&mut self.client)?;
+                process_flight(&mut self.server, flight)?;
 
-            while !self.client.is_established() && !self.server.is_established() {
-                len = recv_send(&mut self.server, buf, len)?;
-                len = recv_send(&mut self.client, buf, len)?;
-            }
-
-            recv_send(&mut self.server, buf, len)?;
-
-            Ok(())
-        }
-
-        pub fn flush_client(&mut self, buf: &mut [u8]) -> Result<()> {
-            loop {
-                let len = match self.client.send(buf) {
-                    Ok(v) => v,
-
-                    Err(Error::Done) => break,
-
-                    Err(e) => return Err(e),
-                };
-
-                match self.server.recv(&mut buf[..len]) {
-                    Ok(_) => (),
-
-                    Err(Error::Done) => (),
-
-                    Err(e) => return Err(e),
-                }
+                let flight = emit_flight(&mut self.server)?;
+                process_flight(&mut self.client, flight)?;
             }
 
             Ok(())
         }
 
-        pub fn flush_server(&mut self, buf: &mut [u8]) -> Result<()> {
-            loop {
-                let len = match self.server.send(buf) {
-                    Ok(v) => v,
-
-                    Err(Error::Done) => break,
-
-                    Err(e) => return Err(e),
-                };
-
-                match self.client.recv(&mut buf[..len]) {
-                    Ok(_) => (),
-
-                    Err(Error::Done) => (),
-
-                    Err(e) => return Err(e),
-                }
-            }
-
-            Ok(())
-        }
-
-        pub fn advance(&mut self, buf: &mut [u8]) -> Result<()> {
+        pub fn advance(&mut self) -> Result<()> {
             let mut client_done = false;
             let mut server_done = false;
 
-            let mut len = 0;
-
             while !client_done || !server_done {
-                len = recv_send(&mut self.client, buf, len)?;
-                client_done = len == 0;
+                match emit_flight(&mut self.client) {
+                    Ok(flight) => process_flight(&mut self.server, flight)?,
 
-                len = recv_send(&mut self.server, buf, len)?;
-                server_done = len == 0;
+                    Err(Error::Done) => client_done = true,
+
+                    Err(e) => return Err(e),
+                };
+
+                match emit_flight(&mut self.server) {
+                    Ok(flight) => process_flight(&mut self.client, flight)?,
+
+                    Err(Error::Done) => server_done = true,
+
+                    Err(e) => return Err(e),
+                };
             }
 
             Ok(())
         }
 
+        pub fn client_recv(&mut self, buf: &mut [u8]) -> Result<usize> {
+            let info = RecvInfo {
+                from: self.client.peer_addr,
+            };
+
+            self.client.recv(buf, info)
+        }
+
+        pub fn server_recv(&mut self, buf: &mut [u8]) -> Result<usize> {
+            let info = RecvInfo {
+                from: self.server.peer_addr,
+            };
+
+            self.server.recv(buf, info)
+        }
+
         pub fn send_pkt_to_server(
             &mut self, pkt_type: packet::Type, frames: &[frame::Frame],
             buf: &mut [u8],
@@ -4627,33 +5575,61 @@
     pub fn recv_send(
         conn: &mut Connection, buf: &mut [u8], len: usize,
     ) -> Result<usize> {
-        let mut left = len;
+        let info = RecvInfo {
+            from: conn.peer_addr,
+        };
 
-        while left > 0 {
-            match conn.recv(&mut buf[len - left..len]) {
-                Ok(read) => left -= read,
-
-                Err(Error::Done) => break,
-
-                Err(e) => return Err(e),
-            }
-        }
-
-        assert_eq!(left, 0);
+        conn.recv(&mut buf[..len], info)?;
 
         let mut off = 0;
 
-        while off < buf.len() {
-            match conn.send(&mut buf[off..]) {
-                Ok(write) => off += write,
+        match conn.send(&mut buf[off..]) {
+            Ok((write, _)) => off += write,
+
+            Err(Error::Done) => (),
+
+            Err(e) => return Err(e),
+        }
+
+        Ok(off)
+    }
+
+    pub fn process_flight(
+        conn: &mut Connection, flight: Vec<Vec<u8>>,
+    ) -> Result<()> {
+        for mut pkt in flight {
+            let info = RecvInfo {
+                from: conn.peer_addr,
+            };
+
+            conn.recv(&mut pkt, info)?;
+        }
+
+        Ok(())
+    }
+
+    pub fn emit_flight(conn: &mut Connection) -> Result<Vec<Vec<u8>>> {
+        let mut flight = Vec::new();
+
+        loop {
+            let mut out = vec![0u8; 65535];
+
+            match conn.send(&mut out) {
+                Ok((written, _)) => out.truncate(written),
 
                 Err(Error::Done) => break,
 
                 Err(e) => return Err(e),
-            }
+            };
+
+            flight.push(out);
         }
 
-        Ok(off)
+        if flight.is_empty() {
+            return Err(Error::Done);
+        }
+
+        Ok(flight)
     }
 
     pub fn encode_pkt(
@@ -4667,13 +5643,13 @@
         let space = &mut conn.pkt_num_spaces[epoch];
 
         let pn = space.next_pkt_num;
-        let pn_len = packet::pkt_num_len(pn)?;
+        let pn_len = 4;
 
         let hdr = Header {
             ty: pkt_type,
             version: conn.version,
-            dcid: conn.dcid.clone(),
-            scid: conn.scid.clone(),
+            dcid: ConnectionId::from_ref(&conn.dcid),
+            scid: ConnectionId::from_ref(&conn.scid),
             pkt_num: 0,
             pkt_num_len: pn_len,
             token: conn.token.clone(),
@@ -4691,7 +5667,9 @@
             b.put_varint(len as u64)?;
         }
 
-        packet::encode_pkt_num(pn, &mut b)?;
+        // Always encode packet number in 4 bytes, to allow encoding packets
+        // with empty payloads.
+        b.put_u32(pn as u32)?;
 
         let payload_offset = b.off();
 
@@ -4776,8 +5754,8 @@
             max_ack_delay: 2_u64.pow(14) - 1,
             disable_active_migration: true,
             active_conn_id_limit: 8,
-            initial_source_connection_id: Some(b"woot woot".to_vec()),
-            retry_source_connection_id: Some(b"retry".to_vec()),
+            initial_source_connection_id: Some(b"woot woot".to_vec().into()),
+            retry_source_connection_id: Some(b"retry".to_vec().into()),
             max_datagram_frame_size: Some(32),
         };
 
@@ -4806,7 +5784,7 @@
             max_ack_delay: 2_u64.pow(14) - 1,
             disable_active_migration: true,
             active_conn_id_limit: 8,
-            initial_source_connection_id: Some(b"woot woot".to_vec()),
+            initial_source_connection_id: Some(b"woot woot".to_vec().into()),
             retry_source_connection_id: None,
             max_datagram_frame_size: Some(32),
         };
@@ -4824,14 +5802,28 @@
     #[test]
     #[ignore = "Android: failure reason unkown."]
     fn unknown_version() {
-        let mut buf = [0; 65535];
-
         let mut config = Config::new(0xbabababa).unwrap();
+        config
+            .set_application_protos(b"\x06proto1\x06proto2")
+            .unwrap();
         config.verify_peer(false);
 
         let mut pipe = testing::Pipe::with_client_config(&mut config).unwrap();
+        assert_eq!(pipe.handshake(), Err(Error::UnknownVersion));
+    }
 
-        assert_eq!(pipe.handshake(&mut buf), Err(Error::UnknownVersion));
+    #[test]
+    fn config_version_reserved() {
+        Config::new(0xbabababa).unwrap();
+        Config::new(0x1a2a3a4a).unwrap();
+    }
+
+    #[test]
+    fn config_version_invalid() {
+        assert_eq!(
+            Config::new(0xb1bababa).err().unwrap(),
+            Error::UnknownVersion
+        );
     }
 
     #[test]
@@ -4846,14 +5838,14 @@
 
         let mut pipe = testing::Pipe::with_client_config(&mut config).unwrap();
 
-        let mut len = pipe.client.send(&mut buf).unwrap();
+        let (mut len, _) = pipe.client.send(&mut buf).unwrap();
 
         let hdr = packet::Header::from_slice(&mut buf[..len], 0).unwrap();
         len = crate::negotiate_version(&hdr.scid, &hdr.dcid, &mut buf).unwrap();
 
-        assert_eq!(pipe.client.recv(&mut buf[..len]), Ok(len));
+        assert_eq!(pipe.client_recv(&mut buf[..len]), Ok(len));
 
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
 
         assert_eq!(pipe.client.version, PROTOCOL_VERSION);
         assert_eq!(pipe.server.version, PROTOCOL_VERSION);
@@ -4861,8 +5853,6 @@
 
     #[test]
     fn verify_custom_root() {
-        let mut buf = [0; 65535];
-
         let mut config = Config::new(PROTOCOL_VERSION).unwrap();
         config.verify_peer(true);
         config
@@ -4873,7 +5863,7 @@
             .unwrap();
 
         let mut pipe = testing::Pipe::with_client_config(&mut config).unwrap();
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
     }
 
     #[test]
@@ -4889,11 +5879,11 @@
         assert_eq!(pipe.client.encode_transport_params(), Ok(()));
 
         // Client sends initial flight.
-        let len = pipe.client.send(&mut buf).unwrap();
+        let (len, _) = pipe.client.send(&mut buf).unwrap();
 
         // Server rejects transport parameters.
         assert_eq!(
-            testing::recv_send(&mut pipe.server, &mut buf, len),
+            pipe.server_recv(&mut buf[..len]),
             Err(Error::InvalidTransportParam)
         );
     }
@@ -4907,26 +5897,23 @@
         // Scramble initial_source_connection_id.
         pipe.client
             .local_transport_params
-            .initial_source_connection_id = Some(b"bogus value".to_vec());
+            .initial_source_connection_id = Some(b"bogus value".to_vec().into());
         assert_eq!(pipe.client.encode_transport_params(), Ok(()));
 
         // Client sends initial flight.
-        let len = pipe.client.send(&mut buf).unwrap();
+        let (len, _) = pipe.client.send(&mut buf).unwrap();
 
         // Server rejects transport parameters.
         assert_eq!(
-            testing::recv_send(&mut pipe.server, &mut buf, len),
+            pipe.server_recv(&mut buf[..len]),
             Err(Error::InvalidTransportParam)
         );
     }
 
     #[test]
     fn handshake() {
-        let mut buf = [0; 65535];
-
         let mut pipe = testing::Pipe::default().unwrap();
-
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
 
         assert_eq!(
             pipe.client.application_proto(),
@@ -4935,16 +5922,32 @@
     }
 
     #[test]
-    fn handshake_confirmation() {
-        let mut buf = [0; 65535];
+    fn handshake_done() {
+        let mut pipe = testing::Pipe::default().unwrap();
 
+        // Disable session tickets on the server (SSL_OP_NO_TICKET) to avoid
+        // triggering 1-RTT packet send with a CRYPTO frame.
+        pipe.server
+            .handshake
+            .lock()
+            .unwrap()
+            .set_options(0x0000_4000);
+
+        assert_eq!(pipe.handshake(), Ok(()));
+
+        assert!(pipe.server.handshake_done_sent);
+    }
+
+    #[test]
+    fn handshake_confirmation() {
         let mut pipe = testing::Pipe::default().unwrap();
 
         // Client sends initial flight.
-        let mut len = pipe.client.send(&mut buf).unwrap();
+        let flight = testing::emit_flight(&mut pipe.client).unwrap();
+        testing::process_flight(&mut pipe.server, flight).unwrap();
 
         // Server sends initial flight.
-        len = testing::recv_send(&mut pipe.server, &mut buf, len).unwrap();
+        let flight = testing::emit_flight(&mut pipe.server).unwrap();
 
         assert!(!pipe.client.is_established());
         assert!(!pipe.client.handshake_confirmed);
@@ -4952,8 +5955,10 @@
         assert!(!pipe.server.is_established());
         assert!(!pipe.server.handshake_confirmed);
 
+        testing::process_flight(&mut pipe.client, flight).unwrap();
+
         // Client sends Handshake packet and completes handshake.
-        len = testing::recv_send(&mut pipe.client, &mut buf, len).unwrap();
+        let flight = testing::emit_flight(&mut pipe.client).unwrap();
 
         assert!(pipe.client.is_established());
         assert!(!pipe.client.handshake_confirmed);
@@ -4961,8 +5966,10 @@
         assert!(!pipe.server.is_established());
         assert!(!pipe.server.handshake_confirmed);
 
+        testing::process_flight(&mut pipe.server, flight).unwrap();
+
         // Server completes handshake and sends HANDSHAKE_DONE.
-        len = testing::recv_send(&mut pipe.server, &mut buf, len).unwrap();
+        let flight = testing::emit_flight(&mut pipe.server).unwrap();
 
         assert!(pipe.client.is_established());
         assert!(!pipe.client.handshake_confirmed);
@@ -4970,8 +5977,10 @@
         assert!(pipe.server.is_established());
         assert!(!pipe.server.handshake_confirmed);
 
+        testing::process_flight(&mut pipe.client, flight).unwrap();
+
         // Client acks 1-RTT packet, and confirms handshake.
-        len = testing::recv_send(&mut pipe.client, &mut buf, len).unwrap();
+        let flight = testing::emit_flight(&mut pipe.client).unwrap();
 
         assert!(pipe.client.is_established());
         assert!(pipe.client.handshake_confirmed);
@@ -4979,9 +5988,9 @@
         assert!(pipe.server.is_established());
         assert!(!pipe.server.handshake_confirmed);
 
-        // Server handshake is confirmed.
-        testing::recv_send(&mut pipe.server, &mut buf, len).unwrap();
+        testing::process_flight(&mut pipe.server, flight).unwrap();
 
+        // Server handshake is confirmed.
         assert!(pipe.client.is_established());
         assert!(pipe.client.handshake_confirmed);
 
@@ -4990,6 +5999,68 @@
     }
 
     #[test]
+    fn handshake_resumption() {
+        const SESSION_TICKET_KEY: [u8; 48] = [0xa; 48];
+
+        let mut config = Config::new(crate::PROTOCOL_VERSION).unwrap();
+        config
+            .load_cert_chain_from_pem_file("examples/cert.crt")
+            .unwrap();
+        config
+            .load_priv_key_from_pem_file("examples/cert.key")
+            .unwrap();
+        config
+            .set_application_protos(b"\x06proto1\x06proto2")
+            .unwrap();
+        config.set_initial_max_data(30);
+        config.set_initial_max_stream_data_bidi_local(15);
+        config.set_initial_max_stream_data_bidi_remote(15);
+        config.set_initial_max_streams_bidi(3);
+        config.set_ticket_key(&SESSION_TICKET_KEY).unwrap();
+
+        // Perform initial handshake.
+        let mut pipe = testing::Pipe::with_server_config(&mut config).unwrap();
+        assert_eq!(pipe.handshake(), Ok(()));
+
+        assert_eq!(pipe.client.is_established(), true);
+        assert_eq!(pipe.server.is_established(), true);
+
+        assert_eq!(pipe.client.is_resumed(), false);
+        assert_eq!(pipe.server.is_resumed(), false);
+
+        // Extract session,
+        let session = pipe.client.session().unwrap();
+
+        // Configure session on new connection and perform handshake.
+        let mut config = Config::new(crate::PROTOCOL_VERSION).unwrap();
+        config
+            .load_cert_chain_from_pem_file("examples/cert.crt")
+            .unwrap();
+        config
+            .load_priv_key_from_pem_file("examples/cert.key")
+            .unwrap();
+        config
+            .set_application_protos(b"\x06proto1\x06proto2")
+            .unwrap();
+        config.set_initial_max_data(30);
+        config.set_initial_max_stream_data_bidi_local(15);
+        config.set_initial_max_stream_data_bidi_remote(15);
+        config.set_initial_max_streams_bidi(3);
+        config.set_ticket_key(&SESSION_TICKET_KEY).unwrap();
+
+        let mut pipe = testing::Pipe::with_server_config(&mut config).unwrap();
+
+        assert_eq!(pipe.client.set_session(&session), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
+
+        assert_eq!(pipe.client.is_established(), true);
+        assert_eq!(pipe.server.is_established(), true);
+
+        assert_eq!(pipe.client.is_resumed(), true);
+        assert_eq!(pipe.server.is_resumed(), true);
+    }
+
+    #[test]
     fn handshake_alpn_mismatch() {
         let mut buf = [0; 65535];
 
@@ -5000,17 +6071,228 @@
         config.verify_peer(false);
 
         let mut pipe = testing::Pipe::with_client_config(&mut config).unwrap();
-
-        assert_eq!(pipe.handshake(&mut buf), Err(Error::TlsFail));
+        assert_eq!(pipe.handshake(), Err(Error::TlsFail));
 
         assert_eq!(pipe.client.application_proto(), b"");
         assert_eq!(pipe.server.application_proto(), b"");
+
+        // Server should only send one packet in response to ALPN mismatch.
+        let (len, _) = pipe.server.send(&mut buf).unwrap();
+        assert_eq!(len, 1200);
+
+        assert_eq!(pipe.server.send(&mut buf), Err(Error::Done));
+        assert_eq!(pipe.server.sent_count, 1);
+    }
+
+    #[test]
+    fn handshake_0rtt() {
+        let mut buf = [0; 65535];
+
+        let mut config = Config::new(crate::PROTOCOL_VERSION).unwrap();
+        config
+            .load_cert_chain_from_pem_file("examples/cert.crt")
+            .unwrap();
+        config
+            .load_priv_key_from_pem_file("examples/cert.key")
+            .unwrap();
+        config
+            .set_application_protos(b"\x06proto1\x06proto2")
+            .unwrap();
+        config.set_initial_max_data(30);
+        config.set_initial_max_stream_data_bidi_local(15);
+        config.set_initial_max_stream_data_bidi_remote(15);
+        config.set_initial_max_streams_bidi(3);
+        config.enable_early_data();
+        config.verify_peer(false);
+
+        // Perform initial handshake.
+        let mut pipe = testing::Pipe::with_config(&mut config).unwrap();
+        assert_eq!(pipe.handshake(), Ok(()));
+
+        // Extract session,
+        let session = pipe.client.session().unwrap();
+
+        // Configure session on new connection.
+        let mut pipe = testing::Pipe::with_config(&mut config).unwrap();
+        assert_eq!(pipe.client.set_session(&session), Ok(()));
+
+        // Client sends initial flight.
+        let (len, _) = pipe.client.send(&mut buf).unwrap();
+        assert_eq!(pipe.server_recv(&mut buf[..len]), Ok(len));
+
+        // Client sends 0-RTT packet.
+        let pkt_type = packet::Type::ZeroRTT;
+
+        let frames = [frame::Frame::Stream {
+            stream_id: 4,
+            data: stream::RangeBuf::from(b"aaaaa", 0, true),
+        }];
+
+        assert_eq!(
+            pipe.send_pkt_to_server(pkt_type, &frames, &mut buf),
+            Ok(1200)
+        );
+
+        assert_eq!(pipe.server.undecryptable_pkts.len(), 0);
+
+        // 0-RTT stream data is readable.
+        let mut r = pipe.server.readable();
+        assert_eq!(r.next(), Some(4));
+        assert_eq!(r.next(), None);
+
+        let mut b = [0; 15];
+        assert_eq!(pipe.server.stream_recv(4, &mut b), Ok((5, true)));
+        assert_eq!(&b[..5], b"aaaaa");
+    }
+
+    #[test]
+    fn handshake_0rtt_reordered() {
+        let mut buf = [0; 65535];
+
+        let mut config = Config::new(crate::PROTOCOL_VERSION).unwrap();
+        config
+            .load_cert_chain_from_pem_file("examples/cert.crt")
+            .unwrap();
+        config
+            .load_priv_key_from_pem_file("examples/cert.key")
+            .unwrap();
+        config
+            .set_application_protos(b"\x06proto1\x06proto2")
+            .unwrap();
+        config.set_initial_max_data(30);
+        config.set_initial_max_stream_data_bidi_local(15);
+        config.set_initial_max_stream_data_bidi_remote(15);
+        config.set_initial_max_streams_bidi(3);
+        config.enable_early_data();
+        config.verify_peer(false);
+
+        // Perform initial handshake.
+        let mut pipe = testing::Pipe::with_config(&mut config).unwrap();
+        assert_eq!(pipe.handshake(), Ok(()));
+
+        // Extract session,
+        let session = pipe.client.session().unwrap();
+
+        // Configure session on new connection.
+        let mut pipe = testing::Pipe::with_config(&mut config).unwrap();
+        assert_eq!(pipe.client.set_session(&session), Ok(()));
+
+        // Client sends initial flight.
+        let (len, _) = pipe.client.send(&mut buf).unwrap();
+        let mut initial = (&buf[..len]).to_vec();
+
+        // Client sends 0-RTT packet.
+        let pkt_type = packet::Type::ZeroRTT;
+
+        let frames = [frame::Frame::Stream {
+            stream_id: 4,
+            data: stream::RangeBuf::from(b"aaaaa", 0, true),
+        }];
+
+        let len =
+            testing::encode_pkt(&mut pipe.client, pkt_type, &frames, &mut buf)
+                .unwrap();
+        let mut zrtt = (&buf[..len]).to_vec();
+
+        // 0-RTT packet is received before the Initial one.
+        assert_eq!(pipe.server_recv(&mut zrtt), Ok(zrtt.len()));
+
+        assert_eq!(pipe.server.undecryptable_pkts.len(), 1);
+        assert_eq!(pipe.server.undecryptable_pkts[0].0.len(), zrtt.len());
+
+        let mut r = pipe.server.readable();
+        assert_eq!(r.next(), None);
+
+        // Initial packet is also received.
+        assert_eq!(pipe.server_recv(&mut initial), Ok(initial.len()));
+
+        // 0-RTT stream data is readable.
+        let mut r = pipe.server.readable();
+        assert_eq!(r.next(), Some(4));
+        assert_eq!(r.next(), None);
+
+        let mut b = [0; 15];
+        assert_eq!(pipe.server.stream_recv(4, &mut b), Ok((5, true)));
+        assert_eq!(&b[..5], b"aaaaa");
+    }
+
+    #[test]
+    fn handshake_0rtt_truncated() {
+        let mut buf = [0; 65535];
+
+        let mut config = Config::new(crate::PROTOCOL_VERSION).unwrap();
+        config
+            .load_cert_chain_from_pem_file("examples/cert.crt")
+            .unwrap();
+        config
+            .load_priv_key_from_pem_file("examples/cert.key")
+            .unwrap();
+        config
+            .set_application_protos(b"\x06proto1\x06proto2")
+            .unwrap();
+        config.set_initial_max_data(30);
+        config.set_initial_max_stream_data_bidi_local(15);
+        config.set_initial_max_stream_data_bidi_remote(15);
+        config.set_initial_max_streams_bidi(3);
+        config.enable_early_data();
+        config.verify_peer(false);
+
+        // Perform initial handshake.
+        let mut pipe = testing::Pipe::with_config(&mut config).unwrap();
+        assert_eq!(pipe.handshake(), Ok(()));
+
+        // Extract session,
+        let session = pipe.client.session().unwrap();
+
+        // Configure session on new connection.
+        let mut pipe = testing::Pipe::with_config(&mut config).unwrap();
+        assert_eq!(pipe.client.set_session(&session), Ok(()));
+
+        // Client sends initial flight.
+        pipe.client.send(&mut buf).unwrap();
+
+        // Client sends 0-RTT packet.
+        let pkt_type = packet::Type::ZeroRTT;
+
+        let frames = [frame::Frame::Stream {
+            stream_id: 4,
+            data: stream::RangeBuf::from(b"aaaaa", 0, true),
+        }];
+
+        let len =
+            testing::encode_pkt(&mut pipe.client, pkt_type, &frames, &mut buf)
+                .unwrap();
+
+        // Simulate a truncated packet by sending one byte less.
+        let mut zrtt = (&buf[..len - 1]).to_vec();
+
+        // 0-RTT packet is received before the Initial one.
+        assert_eq!(pipe.server_recv(&mut zrtt), Err(Error::InvalidPacket));
+
+        assert_eq!(pipe.server.undecryptable_pkts.len(), 0);
+
+        assert!(pipe.server.is_closed());
+    }
+
+    #[test]
+    /// Tests that a pre-v1 client can connect to a v1-enabled server, by making
+    /// the server downgrade to the pre-v1 version.
+    fn handshake_downgrade_v1() {
+        let mut config = Config::new(PROTOCOL_VERSION_DRAFT29).unwrap();
+        config
+            .set_application_protos(b"\x06proto1\x06proto2")
+            .unwrap();
+        config.verify_peer(false);
+
+        let mut pipe = testing::Pipe::with_client_config(&mut config).unwrap();
+        assert_eq!(pipe.handshake(), Ok(()));
+
+        assert_eq!(pipe.client.version, PROTOCOL_VERSION_DRAFT29);
+        assert_eq!(pipe.server.version, PROTOCOL_VERSION_DRAFT29);
     }
 
     #[test]
     fn limit_handshake_data() {
-        let mut buf = [0; 65535];
-
         let mut config = Config::new(PROTOCOL_VERSION).unwrap();
         config
             .load_cert_chain_from_pem_file("examples/cert-big.crt")
@@ -5024,24 +6306,23 @@
 
         let mut pipe = testing::Pipe::with_server_config(&mut config).unwrap();
 
-        let client_sent = pipe.client.send(&mut buf).unwrap();
-        let server_sent =
-            testing::recv_send(&mut pipe.server, &mut buf, client_sent).unwrap();
+        let flight = testing::emit_flight(&mut pipe.client).unwrap();
+        let client_sent = flight.iter().fold(0, |out, p| out + p.len());
+        testing::process_flight(&mut pipe.server, flight).unwrap();
 
-        assert_eq!(server_sent, (client_sent - 1) * MAX_AMPLIFICATION_FACTOR);
+        let flight = testing::emit_flight(&mut pipe.server).unwrap();
+        let server_sent = flight.iter().fold(0, |out, p| out + p.len());
+
+        assert_eq!(server_sent, client_sent * MAX_AMPLIFICATION_FACTOR);
     }
 
     #[test]
     fn stream() {
-        let mut buf = [0; 65535];
-
         let mut pipe = testing::Pipe::default().unwrap();
-
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
 
         assert_eq!(pipe.client.stream_send(4, b"hello, world", true), Ok(12));
-
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         assert!(!pipe.server.stream_finished(4));
 
@@ -5057,7 +6338,7 @@
     }
 
     #[test]
-    fn stream_send_on_32bit_arch() {
+    fn zero_rtt() {
         let mut buf = [0; 65535];
 
         let mut config = Config::new(crate::PROTOCOL_VERSION).unwrap();
@@ -5070,6 +6351,64 @@
         config
             .set_application_protos(b"\x06proto1\x06proto2")
             .unwrap();
+        config.set_initial_max_data(30);
+        config.set_initial_max_stream_data_bidi_local(15);
+        config.set_initial_max_stream_data_bidi_remote(15);
+        config.set_initial_max_streams_bidi(3);
+        config.enable_early_data();
+        config.verify_peer(false);
+
+        // Perform initial handshake.
+        let mut pipe = testing::Pipe::with_config(&mut config).unwrap();
+        assert_eq!(pipe.handshake(), Ok(()));
+
+        // Extract session,
+        let session = pipe.client.session().unwrap();
+
+        // Configure session on new connection.
+        let mut pipe = testing::Pipe::with_config(&mut config).unwrap();
+        assert_eq!(pipe.client.set_session(&session), Ok(()));
+
+        // Client sends initial flight.
+        let (len, _) = pipe.client.send(&mut buf).unwrap();
+        let mut initial = (&buf[..len]).to_vec();
+
+        assert_eq!(pipe.client.is_in_early_data(), true);
+
+        // Client sends 0-RTT data.
+        assert_eq!(pipe.client.stream_send(4, b"hello, world", true), Ok(12));
+
+        let (len, _) = pipe.client.send(&mut buf).unwrap();
+        let mut zrtt = (&buf[..len]).to_vec();
+
+        // Server receives packets.
+        assert_eq!(pipe.server_recv(&mut initial), Ok(initial.len()));
+        assert_eq!(pipe.server.is_in_early_data(), true);
+
+        assert_eq!(pipe.server_recv(&mut zrtt), Ok(zrtt.len()));
+
+        // 0-RTT stream data is readable.
+        let mut r = pipe.server.readable();
+        assert_eq!(r.next(), Some(4));
+        assert_eq!(r.next(), None);
+
+        let mut b = [0; 15];
+        assert_eq!(pipe.server.stream_recv(4, &mut b), Ok((12, true)));
+        assert_eq!(&b[..12], b"hello, world");
+    }
+
+    #[test]
+    fn stream_send_on_32bit_arch() {
+        let mut config = Config::new(crate::PROTOCOL_VERSION).unwrap();
+        config
+            .load_cert_chain_from_pem_file("examples/cert.crt")
+            .unwrap();
+        config
+            .load_priv_key_from_pem_file("examples/cert.key")
+            .unwrap();
+        config
+            .set_application_protos(b"\x06proto1\x06proto2")
+            .unwrap();
         config.set_initial_max_data(2_u64.pow(32) + 5);
         config.set_initial_max_stream_data_bidi_local(15);
         config.set_initial_max_stream_data_bidi_remote(15);
@@ -5079,14 +6418,13 @@
         config.verify_peer(false);
 
         let mut pipe = testing::Pipe::with_config(&mut config).unwrap();
-
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
 
         // In 32bit arch, send_capacity() should be min(2^32+5, cwnd),
         // not min(5, cwnd)
         assert_eq!(pipe.client.stream_send(4, b"hello, world", true), Ok(12));
 
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         assert!(!pipe.server.stream_finished(4));
     }
@@ -5096,8 +6434,7 @@
         let mut buf = [0; 65535];
 
         let mut pipe = testing::Pipe::default().unwrap();
-
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
 
         let frames = [frame::Frame::Stream {
             stream_id: 4,
@@ -5138,12 +6475,78 @@
     }
 
     #[test]
-    fn flow_control_limit() {
+    /// Tests that receiving a MAX_STREAM_DATA frame for a receive-only
+    /// unidirectional stream is forbidden.
+    fn max_stream_data_receive_uni() {
+        let mut buf = [0; 65535];
+
+        let mut pipe = testing::Pipe::default().unwrap();
+        assert_eq!(pipe.handshake(), Ok(()));
+
+        // Client opens unidirectional stream.
+        assert_eq!(pipe.client.stream_send(2, b"hello", false), Ok(5));
+        assert_eq!(pipe.advance(), Ok(()));
+
+        // Client sends MAX_STREAM_DATA on local unidirectional stream.
+        let frames = [frame::Frame::MaxStreamData {
+            stream_id: 2,
+            max: 1024,
+        }];
+
+        let pkt_type = packet::Type::Short;
+        assert_eq!(
+            pipe.send_pkt_to_server(pkt_type, &frames, &mut buf),
+            Err(Error::InvalidStreamState(2)),
+        );
+    }
+
+    #[test]
+    fn empty_payload() {
+        let mut buf = [0; 65535];
+
+        let mut pipe = testing::Pipe::default().unwrap();
+        assert_eq!(pipe.handshake(), Ok(()));
+
+        // Send a packet with no frames.
+        let pkt_type = packet::Type::Short;
+        assert_eq!(
+            pipe.send_pkt_to_server(pkt_type, &[], &mut buf),
+            Err(Error::InvalidPacket)
+        );
+    }
+
+    #[test]
+    fn min_payload() {
         let mut buf = [0; 65535];
 
         let mut pipe = testing::Pipe::default().unwrap();
 
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        // Send a non-ack-eliciting packet.
+        let frames = [frame::Frame::Padding { len: 4 }];
+
+        let pkt_type = packet::Type::Initial;
+        let written =
+            testing::encode_pkt(&mut pipe.client, pkt_type, &frames, &mut buf)
+                .unwrap();
+        assert_eq!(pipe.server_recv(&mut buf[..written]), Ok(written));
+
+        assert_eq!(pipe.server.max_send_bytes, 195);
+
+        // Force server to send a single PING frame.
+        pipe.server.recovery.loss_probes[packet::EPOCH_INITIAL] = 1;
+
+        // Artifically limit the amount of bytes the server can send.
+        pipe.server.max_send_bytes = 60;
+
+        assert_eq!(pipe.server.send(&mut buf), Err(Error::Done));
+    }
+
+    #[test]
+    fn flow_control_limit() {
+        let mut buf = [0; 65535];
+
+        let mut pipe = testing::Pipe::default().unwrap();
+        assert_eq!(pipe.handshake(), Ok(()));
 
         let frames = [
             frame::Frame::Stream {
@@ -5172,8 +6575,7 @@
         let mut buf = [0; 65535];
 
         let mut pipe = testing::Pipe::default().unwrap();
-
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
 
         let frames = [
             // One byte less than stream limit.
@@ -5201,8 +6603,7 @@
         let mut buf = [0; 65535];
 
         let mut pipe = testing::Pipe::default().unwrap();
-
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
 
         let frames = [
             frame::Frame::Stream {
@@ -5240,16 +6641,56 @@
         // Ignore ACK.
         iter.next().unwrap();
 
+        assert_eq!(
+            iter.next(),
+            Some(&frame::Frame::MaxStreamData {
+                stream_id: 4,
+                max: 30
+            })
+        );
         assert_eq!(iter.next(), Some(&frame::Frame::MaxData { max: 46 }));
     }
 
     #[test]
+    /// Tests that flow control is properly updated even when a stream is shut
+    /// down.
+    fn flow_control_drain() {
+        let mut pipe = testing::Pipe::default().unwrap();
+        assert_eq!(pipe.handshake(), Ok(()));
+
+        // Client opens a stream and sends some data.
+        assert_eq!(pipe.client.stream_send(4, b"aaaaa", false), Ok(5));
+        assert_eq!(pipe.advance(), Ok(()));
+
+        // Server receives data, without reading it.
+        let mut r = pipe.server.readable();
+        assert_eq!(r.next(), Some(4));
+        assert_eq!(r.next(), None);
+
+        // In the meantime, client sends more data.
+        assert_eq!(pipe.client.stream_send(4, b"aaaaa", false), Ok(5));
+        assert_eq!(pipe.client.stream_send(4, b"aaaaa", true), Ok(5));
+
+        assert_eq!(pipe.client.stream_send(8, b"aaaaa", false), Ok(5));
+        assert_eq!(pipe.client.stream_send(8, b"aaaaa", false), Ok(5));
+        assert_eq!(pipe.client.stream_send(8, b"aaaaa", true), Ok(5));
+
+        // Server shuts down one stream.
+        assert_eq!(pipe.server.stream_shutdown(4, Shutdown::Read, 42), Ok(()));
+
+        let mut r = pipe.server.readable();
+        assert_eq!(r.next(), None);
+
+        // Flush connection.
+        assert_eq!(pipe.advance(), Ok(()));
+    }
+
+    #[test]
     fn stream_flow_control_limit_bidi() {
         let mut buf = [0; 65535];
 
         let mut pipe = testing::Pipe::default().unwrap();
-
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
 
         let frames = [frame::Frame::Stream {
             stream_id: 4,
@@ -5268,8 +6709,7 @@
         let mut buf = [0; 65535];
 
         let mut pipe = testing::Pipe::default().unwrap();
-
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
 
         let frames = [frame::Frame::Stream {
             stream_id: 2,
@@ -5288,8 +6728,7 @@
         let mut buf = [0; 65535];
 
         let mut pipe = testing::Pipe::default().unwrap();
-
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
 
         let frames = [frame::Frame::Stream {
             stream_id: 4,
@@ -5330,12 +6769,63 @@
     }
 
     #[test]
+    fn stream_left_bidi() {
+        let mut buf = [0; 65535];
+
+        let mut pipe = testing::Pipe::default().unwrap();
+        assert_eq!(pipe.handshake(), Ok(()));
+
+        assert_eq!(3, pipe.client.peer_streams_left_bidi());
+        assert_eq!(3, pipe.server.peer_streams_left_bidi());
+
+        pipe.server.stream_send(1, b"a", false).ok();
+        assert_eq!(2, pipe.server.peer_streams_left_bidi());
+        pipe.server.stream_send(5, b"a", false).ok();
+        assert_eq!(1, pipe.server.peer_streams_left_bidi());
+
+        pipe.server.stream_send(9, b"a", false).ok();
+        assert_eq!(0, pipe.server.peer_streams_left_bidi());
+
+        let frames = [frame::Frame::MaxStreamsBidi { max: MAX_STREAM_ID }];
+
+        let pkt_type = packet::Type::Short;
+        assert!(pipe.send_pkt_to_server(pkt_type, &frames, &mut buf).is_ok());
+
+        assert_eq!(MAX_STREAM_ID - 3, pipe.server.peer_streams_left_bidi());
+    }
+
+    #[test]
+    fn stream_left_uni() {
+        let mut buf = [0; 65535];
+
+        let mut pipe = testing::Pipe::default().unwrap();
+        assert_eq!(pipe.handshake(), Ok(()));
+
+        assert_eq!(3, pipe.client.peer_streams_left_uni());
+        assert_eq!(3, pipe.server.peer_streams_left_uni());
+
+        pipe.server.stream_send(3, b"a", false).ok();
+        assert_eq!(2, pipe.server.peer_streams_left_uni());
+        pipe.server.stream_send(7, b"a", false).ok();
+        assert_eq!(1, pipe.server.peer_streams_left_uni());
+
+        pipe.server.stream_send(11, b"a", false).ok();
+        assert_eq!(0, pipe.server.peer_streams_left_uni());
+
+        let frames = [frame::Frame::MaxStreamsUni { max: MAX_STREAM_ID }];
+
+        let pkt_type = packet::Type::Short;
+        assert!(pipe.send_pkt_to_server(pkt_type, &frames, &mut buf).is_ok());
+
+        assert_eq!(MAX_STREAM_ID - 3, pipe.server.peer_streams_left_uni());
+    }
+
+    #[test]
     fn stream_limit_bidi() {
         let mut buf = [0; 65535];
 
         let mut pipe = testing::Pipe::default().unwrap();
-
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
 
         let frames = [
             frame::Frame::Stream {
@@ -5380,8 +6870,7 @@
         let mut buf = [0; 65535];
 
         let mut pipe = testing::Pipe::default().unwrap();
-
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
 
         let frames = [frame::Frame::MaxStreamsBidi { max: MAX_STREAM_ID }];
 
@@ -5404,8 +6893,7 @@
         let mut buf = [0; 65535];
 
         let mut pipe = testing::Pipe::default().unwrap();
-
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
 
         let frames = [
             frame::Frame::Stream {
@@ -5450,8 +6938,7 @@
         let mut buf = [0; 65535];
 
         let mut pipe = testing::Pipe::default().unwrap();
-
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
 
         let frames = [frame::Frame::MaxStreamsUni { max: MAX_STREAM_ID }];
 
@@ -5474,8 +6961,7 @@
         let mut buf = [0; 65535];
 
         let mut pipe = testing::Pipe::default().unwrap();
-
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
 
         let frames = [frame::Frame::StreamsBlockedBidi {
             limit: MAX_STREAM_ID,
@@ -5500,8 +6986,7 @@
         let mut buf = [0; 65535];
 
         let mut pipe = testing::Pipe::default().unwrap();
-
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
 
         let frames = [frame::Frame::StreamsBlockedUni {
             limit: MAX_STREAM_ID,
@@ -5526,8 +7011,7 @@
         let mut buf = [0; 65535];
 
         let mut pipe = testing::Pipe::default().unwrap();
-
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
 
         let frames = [
             frame::Frame::Stream {
@@ -5557,8 +7041,7 @@
         let mut buf = [0; 65535];
 
         let mut pipe = testing::Pipe::default().unwrap();
-
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
 
         let frames = [
             frame::Frame::Stream {
@@ -5588,8 +7071,7 @@
         let mut buf = [0; 65535];
 
         let mut pipe = testing::Pipe::default().unwrap();
-
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
 
         let frames = [
             frame::Frame::Stream {
@@ -5623,8 +7105,7 @@
         let mut buf = [0; 65535];
 
         let mut pipe = testing::Pipe::default().unwrap();
-
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
 
         let frames = [frame::Frame::PathChallenge {
             data: vec![0xba; 8],
@@ -5662,18 +7143,21 @@
         let mut pipe = testing::Pipe::default().unwrap();
 
         // Client sends initial flight
-        let mut len = pipe.client.send(&mut buf).unwrap();
+        let flight = testing::emit_flight(&mut pipe.client).unwrap();
+        testing::process_flight(&mut pipe.server, flight).unwrap();
 
-        // Server sends initial flight..
-        len = testing::recv_send(&mut pipe.server, &mut buf, len).unwrap();
+        // Server sends initial flight.
+        let flight = testing::emit_flight(&mut pipe.server).unwrap();
+        testing::process_flight(&mut pipe.client, flight).unwrap();
 
         // Client sends Handshake packet.
-        len = testing::recv_send(&mut pipe.client, &mut buf, len).unwrap();
+        let flight = testing::emit_flight(&mut pipe.client).unwrap();
 
         // Emulate handshake packet delay by not making server process client
         // packet.
-        let mut delayed = (&buf[..len]).to_vec();
-        testing::recv_send(&mut pipe.server, &mut buf, 0).unwrap();
+        let delayed = flight.clone();
+
+        testing::emit_flight(&mut pipe.server).ok();
 
         assert!(pipe.client.is_established());
 
@@ -5687,7 +7171,8 @@
         let written =
             testing::encode_pkt(&mut pipe.client, pkt_type, &frames, &mut buf)
                 .unwrap();
-        assert_eq!(pipe.server.recv(&mut buf[..written]), Ok(written));
+
+        assert_eq!(pipe.server_recv(&mut buf[..written]), Ok(written));
 
         // Send 1-RTT packet #1.
         let frames = [frame::Frame::Stream {
@@ -5698,7 +7183,8 @@
         let written =
             testing::encode_pkt(&mut pipe.client, pkt_type, &frames, &mut buf)
                 .unwrap();
-        assert_eq!(pipe.server.recv(&mut buf[..written]), Ok(written));
+
+        assert_eq!(pipe.server_recv(&mut buf[..written]), Ok(written));
 
         assert!(!pipe.server.is_established());
 
@@ -5713,7 +7199,7 @@
         );
 
         // Process delayed packet.
-        pipe.server.recv(&mut delayed).unwrap();
+        testing::process_flight(&mut pipe.server, delayed).unwrap();
 
         assert!(pipe.server.is_established());
 
@@ -5725,31 +7211,310 @@
     }
 
     #[test]
+    fn stop_sending() {
+        let mut b = [0; 15];
+
+        let mut buf = [0; 65535];
+
+        let mut pipe = testing::Pipe::default().unwrap();
+        assert_eq!(pipe.handshake(), Ok(()));
+
+        // Client sends some data, and closes stream.
+        assert_eq!(pipe.client.stream_send(4, b"hello", true), Ok(5));
+        assert_eq!(pipe.advance(), Ok(()));
+
+        // Server gets data.
+        let mut r = pipe.server.readable();
+        assert_eq!(r.next(), Some(4));
+        assert_eq!(r.next(), None);
+
+        assert_eq!(pipe.server.stream_recv(4, &mut b), Ok((5, true)));
+        assert!(pipe.server.stream_finished(4));
+
+        let mut r = pipe.server.readable();
+        assert_eq!(r.next(), None);
+
+        // Server sends data, until blocked.
+        let mut r = pipe.server.writable();
+        assert_eq!(r.next(), Some(4));
+        assert_eq!(r.next(), None);
+
+        loop {
+            if pipe.server.stream_send(4, b"world", false) == Ok(0) {
+                break;
+            }
+
+            assert_eq!(pipe.advance(), Ok(()));
+        }
+
+        let mut r = pipe.server.writable();
+        assert_eq!(r.next(), None);
+
+        // Client sends STOP_SENDING.
+        let frames = [frame::Frame::StopSending {
+            stream_id: 4,
+            error_code: 42,
+        }];
+
+        let pkt_type = packet::Type::Short;
+        let len = pipe
+            .send_pkt_to_server(pkt_type, &frames, &mut buf)
+            .unwrap();
+
+        // Server sent a RESET_STREAM frame in response.
+        let frames =
+            testing::decode_pkt(&mut pipe.client, &mut buf, len).unwrap();
+
+        let mut iter = frames.iter();
+
+        // Skip ACK frame.
+        iter.next();
+
+        assert_eq!(
+            iter.next(),
+            Some(&frame::Frame::ResetStream {
+                stream_id: 4,
+                error_code: 42,
+                final_size: 15,
+            })
+        );
+
+        // Stream is writable, but writing returns an error.
+        let mut r = pipe.server.writable();
+        assert_eq!(r.next(), Some(4));
+        assert_eq!(r.next(), None);
+
+        assert_eq!(
+            pipe.server.stream_send(4, b"world", true),
+            Err(Error::StreamStopped(42)),
+        );
+
+        assert_eq!(pipe.server.streams.len(), 1);
+
+        // Client acks RESET_STREAM frame.
+        let mut ranges = ranges::RangeSet::default();
+        ranges.insert(0..6);
+
+        let frames = [frame::Frame::ACK {
+            ack_delay: 15,
+            ranges,
+        }];
+
+        assert_eq!(pipe.send_pkt_to_server(pkt_type, &frames, &mut buf), Ok(0));
+
+        // Stream is collected on the server after RESET_STREAM is acked.
+        assert_eq!(pipe.server.streams.len(), 0);
+
+        // Sending STOP_SENDING again shouldn't trigger RESET_STREAM again.
+        let frames = [frame::Frame::StopSending {
+            stream_id: 4,
+            error_code: 42,
+        }];
+
+        let len = pipe
+            .send_pkt_to_server(pkt_type, &frames, &mut buf)
+            .unwrap();
+
+        let frames =
+            testing::decode_pkt(&mut pipe.client, &mut buf, len).unwrap();
+
+        assert_eq!(frames.len(), 1);
+
+        match frames.iter().next() {
+            Some(frame::Frame::ACK { .. }) => (),
+
+            f => panic!("expected ACK frame, got {:?}", f),
+        };
+
+        let mut r = pipe.server.writable();
+        assert_eq!(r.next(), None);
+    }
+
+    #[test]
+    fn stop_sending_fin() {
+        let mut b = [0; 15];
+
+        let mut buf = [0; 65535];
+
+        let mut pipe = testing::Pipe::default().unwrap();
+        assert_eq!(pipe.handshake(), Ok(()));
+
+        // Client sends some data, and closes stream.
+        assert_eq!(pipe.client.stream_send(4, b"hello", true), Ok(5));
+        assert_eq!(pipe.advance(), Ok(()));
+
+        // Server gets data.
+        let mut r = pipe.server.readable();
+        assert_eq!(r.next(), Some(4));
+        assert_eq!(r.next(), None);
+
+        assert_eq!(pipe.server.stream_recv(4, &mut b), Ok((5, true)));
+        assert!(pipe.server.stream_finished(4));
+
+        let mut r = pipe.server.readable();
+        assert_eq!(r.next(), None);
+
+        // Server sends data, and closes stream.
+        let mut r = pipe.server.writable();
+        assert_eq!(r.next(), Some(4));
+        assert_eq!(r.next(), None);
+
+        assert_eq!(pipe.server.stream_send(4, b"world", true), Ok(5));
+
+        // Client sends STOP_SENDING before server flushes stream.
+        let frames = [frame::Frame::StopSending {
+            stream_id: 4,
+            error_code: 42,
+        }];
+
+        let pkt_type = packet::Type::Short;
+        let len = pipe
+            .send_pkt_to_server(pkt_type, &frames, &mut buf)
+            .unwrap();
+
+        // Server sent a RESET_STREAM frame in response.
+        let frames =
+            testing::decode_pkt(&mut pipe.client, &mut buf, len).unwrap();
+
+        let mut iter = frames.iter();
+
+        // Skip ACK frame.
+        iter.next();
+
+        assert_eq!(
+            iter.next(),
+            Some(&frame::Frame::ResetStream {
+                stream_id: 4,
+                error_code: 42,
+                final_size: 5,
+            })
+        );
+
+        // No more frames are sent by the server.
+        assert_eq!(iter.next(), None);
+    }
+
+    #[test]
     fn stream_shutdown_read() {
         let mut buf = [0; 65535];
 
         let mut pipe = testing::Pipe::default().unwrap();
+        assert_eq!(pipe.handshake(), Ok(()));
 
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
-
+        // Client sends some data.
         assert_eq!(pipe.client.stream_send(4, b"hello, world", false), Ok(12));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         let mut r = pipe.server.readable();
         assert_eq!(r.next(), Some(4));
         assert_eq!(r.next(), None);
 
-        assert_eq!(pipe.server.stream_shutdown(4, Shutdown::Read, 0), Ok(()));
+        assert_eq!(pipe.client.streams.len(), 1);
+        assert_eq!(pipe.server.streams.len(), 1);
+
+        // Server shuts down stream.
+        assert_eq!(pipe.server.stream_shutdown(4, Shutdown::Read, 42), Ok(()));
 
         let mut r = pipe.server.readable();
         assert_eq!(r.next(), None);
 
-        assert_eq!(pipe.client.stream_send(4, b"bye", false), Ok(3));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        let (len, _) = pipe.server.send(&mut buf).unwrap();
+
+        let mut dummy = buf[..len].to_vec();
+
+        let frames =
+            testing::decode_pkt(&mut pipe.client, &mut dummy, len).unwrap();
+        let mut iter = frames.iter();
+
+        assert_eq!(
+            iter.next(),
+            Some(&frame::Frame::StopSending {
+                stream_id: 4,
+                error_code: 42,
+            })
+        );
+
+        assert_eq!(pipe.client_recv(&mut buf[..len]), Ok(len));
+
+        assert_eq!(pipe.advance(), Ok(()));
+
+        // Sending more data is forbidden.
+        let mut r = pipe.client.writable();
+        assert_eq!(r.next(), Some(4));
+        assert_eq!(r.next(), None);
+
+        assert_eq!(
+            pipe.client.stream_send(4, b"bye", false),
+            Err(Error::StreamStopped(42))
+        );
+
+        // Server sends some data, without reading the incoming data, and closes
+        // the stream.
+        assert_eq!(pipe.server.stream_send(4, b"hello, world", true), Ok(12));
+        assert_eq!(pipe.advance(), Ok(()));
+
+        // Client reads the data.
+        let mut r = pipe.client.readable();
+        assert_eq!(r.next(), Some(4));
+        assert_eq!(r.next(), None);
+
+        assert_eq!(pipe.client.stream_recv(4, &mut buf), Ok((12, true)));
+
+        // Stream is collected on both sides.
+        assert_eq!(pipe.client.streams.len(), 0);
+        assert_eq!(pipe.server.streams.len(), 0);
+
+        assert_eq!(
+            pipe.server.stream_shutdown(4, Shutdown::Read, 0),
+            Err(Error::Done)
+        );
+    }
+
+    #[test]
+    fn stream_shutdown_read_after_fin() {
+        let mut buf = [0; 65535];
+
+        let mut pipe = testing::Pipe::default().unwrap();
+        assert_eq!(pipe.handshake(), Ok(()));
+
+        // Client sends some data.
+        assert_eq!(pipe.client.stream_send(4, b"hello, world", true), Ok(12));
+        assert_eq!(pipe.advance(), Ok(()));
+
+        let mut r = pipe.server.readable();
+        assert_eq!(r.next(), Some(4));
+        assert_eq!(r.next(), None);
+
+        assert_eq!(pipe.client.streams.len(), 1);
+        assert_eq!(pipe.server.streams.len(), 1);
+
+        // Server shuts down stream.
+        assert_eq!(pipe.server.stream_shutdown(4, Shutdown::Read, 42), Ok(()));
 
         let mut r = pipe.server.readable();
         assert_eq!(r.next(), None);
 
+        // Server has nothing to send.
+        assert_eq!(pipe.server.send(&mut buf), Err(Error::Done));
+
+        assert_eq!(pipe.advance(), Ok(()));
+
+        // Server sends some data, without reading the incoming data, and closes
+        // the stream.
+        assert_eq!(pipe.server.stream_send(4, b"hello, world", true), Ok(12));
+        assert_eq!(pipe.advance(), Ok(()));
+
+        // Client reads the data.
+        let mut r = pipe.client.readable();
+        assert_eq!(r.next(), Some(4));
+        assert_eq!(r.next(), None);
+
+        assert_eq!(pipe.client.stream_recv(4, &mut buf), Ok((12, true)));
+
+        // Stream is collected on both sides.
+        assert_eq!(pipe.client.streams.len(), 0);
+        assert_eq!(pipe.server.streams.len(), 0);
+
         assert_eq!(
             pipe.server.stream_shutdown(4, Shutdown::Read, 0),
             Err(Error::Done)
@@ -5761,34 +7526,76 @@
         let mut buf = [0; 65535];
 
         let mut pipe = testing::Pipe::default().unwrap();
+        assert_eq!(pipe.handshake(), Ok(()));
 
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
-
+        // Client sends some data.
         assert_eq!(pipe.client.stream_send(4, b"hello, world", false), Ok(12));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         let mut r = pipe.server.readable();
         assert_eq!(r.next(), Some(4));
         assert_eq!(r.next(), None);
 
-        let mut b = [0; 15];
-        pipe.server.stream_recv(4, &mut b).unwrap();
-
-        assert_eq!(pipe.client.stream_send(4, b"a", false), Ok(1));
-        assert_eq!(pipe.client.stream_shutdown(4, Shutdown::Write, 0), Ok(()));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
-
-        let mut r = pipe.server.readable();
+        let mut r = pipe.server.writable();
+        assert_eq!(r.next(), Some(4));
         assert_eq!(r.next(), None);
 
-        assert_eq!(pipe.client.stream_send(4, b"bye", false), Ok(3));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.client.streams.len(), 1);
+        assert_eq!(pipe.server.streams.len(), 1);
 
-        let mut r = pipe.server.readable();
+        // Server sends some data.
+        assert_eq!(pipe.server.stream_send(4, b"goodbye, world", false), Ok(14));
+
+        // Server shuts down stream.
+        assert_eq!(pipe.server.stream_shutdown(4, Shutdown::Write, 42), Ok(()));
+
+        let mut r = pipe.server.writable();
         assert_eq!(r.next(), None);
 
+        let (len, _) = pipe.server.send(&mut buf).unwrap();
+
+        let mut dummy = buf[..len].to_vec();
+
+        let frames =
+            testing::decode_pkt(&mut pipe.client, &mut dummy, len).unwrap();
+        let mut iter = frames.iter();
+
         assert_eq!(
-            pipe.client.stream_shutdown(4, Shutdown::Write, 0),
+            iter.next(),
+            Some(&frame::Frame::ResetStream {
+                stream_id: 4,
+                error_code: 42,
+                final_size: 14,
+            })
+        );
+
+        assert_eq!(pipe.client_recv(&mut buf[..len]), Ok(len));
+
+        assert_eq!(pipe.advance(), Ok(()));
+
+        // Sending more data is forbidden.
+        assert_eq!(
+            pipe.server.stream_send(4, b"bye", false),
+            Err(Error::FinalSize)
+        );
+
+        // Client sends some data and closes the stream.
+        assert_eq!(pipe.client.stream_send(4, b"bye", true), Ok(3));
+        assert_eq!(pipe.advance(), Ok(()));
+
+        // Server reads the data.
+        let mut r = pipe.server.readable();
+        assert_eq!(r.next(), Some(4));
+        assert_eq!(r.next(), None);
+
+        assert_eq!(pipe.server.stream_recv(4, &mut buf), Ok((15, true)));
+
+        // Stream is collected on both sides.
+        // TODO: assert_eq!(pipe.client.streams.len(), 0);
+        assert_eq!(pipe.server.streams.len(), 0);
+
+        assert_eq!(
+            pipe.server.stream_shutdown(4, Shutdown::Write, 0),
             Err(Error::Done)
         );
     }
@@ -5800,27 +7607,31 @@
         let mut buf = [0; 65535];
 
         let mut pipe = testing::Pipe::default().unwrap();
-
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
 
         assert_eq!(pipe.client.stream_send(8, b"aaaaa", false), Ok(5));
         assert_eq!(pipe.client.stream_send(0, b"aaaaa", false), Ok(5));
         assert_eq!(pipe.client.stream_send(4, b"aaaaa", false), Ok(5));
 
-        let len = pipe.client.send(&mut buf).unwrap();
+        let (len, _) = pipe.client.send(&mut buf).unwrap();
 
         let frames =
             testing::decode_pkt(&mut pipe.server, &mut buf, len).unwrap();
 
+        let mut iter = frames.iter();
+
+        // Skip ACK frame.
+        iter.next();
+
         assert_eq!(
-            frames.iter().next(),
+            iter.next(),
             Some(&frame::Frame::Stream {
                 stream_id: 8,
                 data: stream::RangeBuf::from(b"aaaaa", 0, false),
             })
         );
 
-        let len = pipe.client.send(&mut buf).unwrap();
+        let (len, _) = pipe.client.send(&mut buf).unwrap();
 
         let frames =
             testing::decode_pkt(&mut pipe.server, &mut buf, len).unwrap();
@@ -5833,7 +7644,7 @@
             })
         );
 
-        let len = pipe.client.send(&mut buf).unwrap();
+        let (len, _) = pipe.client.send(&mut buf).unwrap();
 
         let frames =
             testing::decode_pkt(&mut pipe.server, &mut buf, len).unwrap();
@@ -5850,11 +7661,8 @@
     #[test]
     /// Tests the readable iterator.
     fn stream_readable() {
-        let mut buf = [0; 65535];
-
         let mut pipe = testing::Pipe::default().unwrap();
-
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
 
         // No readable streams.
         let mut r = pipe.client.readable();
@@ -5868,7 +7676,7 @@
         let mut r = pipe.server.readable();
         assert_eq!(r.next(), None);
 
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         // Server received stream.
         let mut r = pipe.server.readable();
@@ -5879,7 +7687,7 @@
             pipe.server.stream_send(4, b"aaaaaaaaaaaaaaa", false),
             Ok(15)
         );
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         let mut r = pipe.client.readable();
         assert_eq!(r.next(), Some(4));
@@ -5888,7 +7696,7 @@
         // Client drains stream.
         let mut b = [0; 15];
         pipe.client.stream_recv(4, &mut b).unwrap();
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         let mut r = pipe.client.readable();
         assert_eq!(r.next(), None);
@@ -5905,10 +7713,10 @@
 
         // Client creates multiple streams.
         assert_eq!(pipe.client.stream_send(8, b"aaaaa", false), Ok(5));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         assert_eq!(pipe.client.stream_send(12, b"aaaaa", false), Ok(5));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         let mut r = pipe.server.readable();
         assert_eq!(r.len(), 2);
@@ -5923,11 +7731,8 @@
     #[test]
     /// Tests the writable iterator.
     fn stream_writable() {
-        let mut buf = [0; 65535];
-
         let mut pipe = testing::Pipe::default().unwrap();
-
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
 
         // No writable streams.
         let mut w = pipe.client.writable();
@@ -5940,7 +7745,7 @@
         assert_eq!(w.next(), Some(4));
         assert_eq!(w.next(), None);
 
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         // Server created stream.
         let mut w = pipe.server.writable();
@@ -5956,12 +7761,12 @@
         let mut w = pipe.server.writable();
         assert_eq!(w.next(), None);
 
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         // Client drains stream.
         let mut b = [0; 15];
         pipe.client.stream_recv(4, &mut b).unwrap();
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         // Server stream is writable again.
         let mut w = pipe.server.writable();
@@ -5976,10 +7781,10 @@
 
         // Client creates multiple streams.
         assert_eq!(pipe.client.stream_send(8, b"aaaaa", false), Ok(5));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         assert_eq!(pipe.client.stream_send(12, b"aaaaa", false), Ok(5));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         let mut w = pipe.server.writable();
         assert_eq!(w.len(), 2);
@@ -6002,24 +7807,21 @@
     /// Tests that we don't exceed the per-connection flow control limit set by
     /// the peer.
     fn flow_control_limit_send() {
-        let mut buf = [0; 65535];
-
         let mut pipe = testing::Pipe::default().unwrap();
-
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
 
         assert_eq!(
             pipe.client.stream_send(0, b"aaaaaaaaaaaaaaa", false),
             Ok(15)
         );
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
         assert_eq!(
             pipe.client.stream_send(4, b"aaaaaaaaaaaaaaa", false),
             Ok(15)
         );
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
         assert_eq!(pipe.client.stream_send(8, b"a", false), Ok(0));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         let mut r = pipe.server.readable();
         assert!(r.next().is_some());
@@ -6052,7 +7854,7 @@
         assert_eq!(pipe.server.timeout(), None);
 
         assert_eq!(
-            pipe.server.recv(&mut buf[..written]),
+            pipe.server_recv(&mut buf[..written]),
             Err(Error::CryptoFail)
         );
 
@@ -6067,10 +7869,10 @@
         let mut pipe = testing::Pipe::default().unwrap();
 
         // Client sends initial flight.
-        let len = pipe.client.send(&mut buf).unwrap();
+        let (len, _) = pipe.client.send(&mut buf).unwrap();
 
         // Server sends initial flight.
-        assert_eq!(pipe.server.recv(&mut buf[..len]), Ok(1200));
+        assert_eq!(pipe.server_recv(&mut buf[..len]), Ok(1200));
 
         let frames = [frame::Frame::Padding { len: 10 }];
 
@@ -6088,7 +7890,7 @@
         buf[written - 1] = !buf[written - 1];
 
         // Client will ignore invalid packet.
-        assert_eq!(pipe.client.recv(&mut buf[..written]), Ok(68));
+        assert_eq!(pipe.client_recv(&mut buf[..written]), Ok(71));
 
         // The connection should be alive...
         assert_eq!(pipe.client.is_closed(), false);
@@ -6114,8 +7916,8 @@
         let hdr = Header {
             ty: packet::Type::Initial,
             version: pipe.client.version,
-            dcid: pipe.client.dcid.clone(),
-            scid: pipe.client.scid.clone(),
+            dcid: ConnectionId::from_ref(&pipe.client.dcid),
+            scid: ConnectionId::from_ref(&pipe.client.scid),
             pkt_num: 0,
             pkt_num_len: pn_len,
             token: pipe.client.token.clone(),
@@ -6162,8 +7964,8 @@
         assert_eq!(pipe.server.timeout(), None);
 
         assert_eq!(
-            pipe.server.recv(&mut buf[..written]),
-            Err(Error::BufferTooShort)
+            pipe.server_recv(&mut buf[..written]),
+            Err(Error::InvalidPacket)
         );
 
         assert!(pipe.server.is_closed());
@@ -6173,9 +7975,9 @@
     /// Tests that invalid packets don't cause the connection to be closed.
     fn invalid_packet() {
         let mut buf = [0; 65535];
-        let mut pipe = testing::Pipe::default().unwrap();
 
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        let mut pipe = testing::Pipe::default().unwrap();
+        assert_eq!(pipe.handshake(), Ok(()));
 
         let frames = [frame::Frame::Padding { len: 10 }];
 
@@ -6192,19 +7994,27 @@
         // cannot be authenticated during decryption).
         buf[written - 1] = !buf[written - 1];
 
-        assert_eq!(pipe.server.recv(&mut buf[..written]), Ok(written));
+        assert_eq!(pipe.server_recv(&mut buf[..written]), Ok(written));
 
         // Corrupt the packets's first byte to make the header fail decoding.
         buf[0] = 255;
 
-        assert_eq!(pipe.server.recv(&mut buf[..written]), Ok(written));
+        assert_eq!(pipe.server_recv(&mut buf[..written]), Ok(written));
+    }
+
+    #[test]
+    fn recv_empty_buffer() {
+        let mut buf = [0; 65535];
+
+        let mut pipe = testing::Pipe::default().unwrap();
+        assert_eq!(pipe.handshake(), Ok(()));
+
+        assert_eq!(pipe.server_recv(&mut buf[..0]), Err(Error::BufferTooShort));
     }
 
     #[test]
     /// Tests that the MAX_STREAMS frame is sent for bidirectional streams.
     fn stream_limit_update_bidi() {
-        let mut buf = [0; 65535];
-
         let mut config = Config::new(crate::PROTOCOL_VERSION).unwrap();
         config
             .load_cert_chain_from_pem_file("examples/cert.crt")
@@ -6224,51 +8034,51 @@
         config.verify_peer(false);
 
         let mut pipe = testing::Pipe::with_config(&mut config).unwrap();
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
 
         // Client sends stream data.
         assert_eq!(pipe.client.stream_send(0, b"a", false), Ok(1));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         assert_eq!(pipe.client.stream_send(4, b"a", false), Ok(1));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         assert_eq!(pipe.client.stream_send(4, b"b", true), Ok(1));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         assert_eq!(pipe.client.stream_send(0, b"b", true), Ok(1));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         // Server reads stream data.
         let mut b = [0; 15];
         pipe.server.stream_recv(0, &mut b).unwrap();
         pipe.server.stream_recv(4, &mut b).unwrap();
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         // Server sends stream data, with fin.
         assert_eq!(pipe.server.stream_send(0, b"a", false), Ok(1));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         assert_eq!(pipe.server.stream_send(4, b"a", false), Ok(1));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         assert_eq!(pipe.server.stream_send(4, b"b", true), Ok(1));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         assert_eq!(pipe.server.stream_send(0, b"b", true), Ok(1));
 
         // Server sends MAX_STREAMS.
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         // Client tries to create new streams.
         assert_eq!(pipe.client.stream_send(8, b"a", false), Ok(1));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         assert_eq!(pipe.client.stream_send(12, b"a", false), Ok(1));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         assert_eq!(pipe.client.stream_send(16, b"a", false), Ok(1));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         assert_eq!(
             pipe.client.stream_send(20, b"a", false),
@@ -6281,8 +8091,6 @@
     #[test]
     /// Tests that the MAX_STREAMS frame is sent for unirectional streams.
     fn stream_limit_update_uni() {
-        let mut buf = [0; 65535];
-
         let mut config = Config::new(crate::PROTOCOL_VERSION).unwrap();
         config
             .load_cert_chain_from_pem_file("examples/cert.crt")
@@ -6302,20 +8110,20 @@
         config.verify_peer(false);
 
         let mut pipe = testing::Pipe::with_config(&mut config).unwrap();
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
 
         // Client sends stream data.
         assert_eq!(pipe.client.stream_send(2, b"a", false), Ok(1));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         assert_eq!(pipe.client.stream_send(6, b"a", false), Ok(1));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         assert_eq!(pipe.client.stream_send(6, b"b", true), Ok(1));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         assert_eq!(pipe.client.stream_send(2, b"b", true), Ok(1));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         // Server reads stream data.
         let mut b = [0; 15];
@@ -6323,17 +8131,17 @@
         pipe.server.stream_recv(6, &mut b).unwrap();
 
         // Server sends MAX_STREAMS.
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         // Client tries to create new streams.
         assert_eq!(pipe.client.stream_send(10, b"a", false), Ok(1));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         assert_eq!(pipe.client.stream_send(14, b"a", false), Ok(1));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         assert_eq!(pipe.client.stream_send(18, b"a", false), Ok(1));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         assert_eq!(
             pipe.client.stream_send(22, b"a", false),
@@ -6348,17 +8156,14 @@
     /// data in the buffer, and that the buffer becomes readable on the other
     /// side.
     fn stream_zero_length_fin() {
-        let mut buf = [0; 65535];
-
         let mut pipe = testing::Pipe::default().unwrap();
-
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
 
         assert_eq!(
             pipe.client.stream_send(0, b"aaaaaaaaaaaaaaa", false),
             Ok(15)
         );
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         let mut r = pipe.server.readable();
         assert_eq!(r.next(), Some(0));
@@ -6366,11 +8171,11 @@
 
         let mut b = [0; 15];
         pipe.server.stream_recv(0, &mut b).unwrap();
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         // Client sends zero-length frame.
         assert_eq!(pipe.client.stream_send(0, b"", true), Ok(0));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         // Stream should be readable on the server after receiving empty fin.
         let mut r = pipe.server.readable();
@@ -6379,11 +8184,11 @@
 
         let mut b = [0; 15];
         pipe.server.stream_recv(0, &mut b).unwrap();
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         // Client sends zero-length frame (again).
         assert_eq!(pipe.client.stream_send(0, b"", true), Ok(0));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         // Stream should _not_ be readable on the server after receiving empty
         // fin, because it was already finished.
@@ -6392,19 +8197,78 @@
     }
 
     #[test]
+    /// Tests that the stream's fin flag is properly flushed even if there's no
+    /// data in the buffer, that the buffer becomes readable on the other
+    /// side and stays readable even if the stream is fin'd locally.
+    fn stream_zero_length_fin_deferred_collection() {
+        let mut pipe = testing::Pipe::default().unwrap();
+        assert_eq!(pipe.handshake(), Ok(()));
+
+        assert_eq!(
+            pipe.client.stream_send(0, b"aaaaaaaaaaaaaaa", false),
+            Ok(15)
+        );
+        assert_eq!(pipe.advance(), Ok(()));
+
+        let mut r = pipe.server.readable();
+        assert_eq!(r.next(), Some(0));
+        assert!(r.next().is_none());
+
+        let mut b = [0; 15];
+        pipe.server.stream_recv(0, &mut b).unwrap();
+        assert_eq!(pipe.advance(), Ok(()));
+
+        // Client sends zero-length frame.
+        assert_eq!(pipe.client.stream_send(0, b"", true), Ok(0));
+        assert_eq!(pipe.advance(), Ok(()));
+
+        // Server sends zero-length frame.
+        assert_eq!(pipe.server.stream_send(0, b"", true), Ok(0));
+        assert_eq!(pipe.advance(), Ok(()));
+
+        // Stream should be readable on the server after receiving empty fin.
+        let mut r = pipe.server.readable();
+        assert_eq!(r.next(), Some(0));
+        assert!(r.next().is_none());
+
+        let mut b = [0; 15];
+        pipe.server.stream_recv(0, &mut b).unwrap();
+        assert_eq!(pipe.advance(), Ok(()));
+
+        // Client sends zero-length frame (again).
+        assert_eq!(pipe.client.stream_send(0, b"", true), Ok(0));
+        assert_eq!(pipe.advance(), Ok(()));
+
+        // Stream should _not_ be readable on the server after receiving empty
+        // fin, because it was already finished.
+        let mut r = pipe.server.readable();
+        assert_eq!(r.next(), None);
+
+        // Stream _is_readable on the client side.
+        let mut r = pipe.client.readable();
+        assert_eq!(r.next(), Some(0));
+
+        pipe.client.stream_recv(0, &mut b).unwrap();
+        assert_eq!(pipe.advance(), Ok(()));
+
+        // Stream is completed and _is not_ readable.
+        let mut r = pipe.client.readable();
+        assert_eq!(r.next(), None);
+    }
+
+    #[test]
     /// Tests that completed streams are garbage collected.
     fn collect_streams() {
         let mut buf = [0; 65535];
 
         let mut pipe = testing::Pipe::default().unwrap();
-
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
 
         assert_eq!(pipe.client.streams.len(), 0);
         assert_eq!(pipe.server.streams.len(), 0);
 
         assert_eq!(pipe.client.stream_send(0, b"aaaaa", true), Ok(5));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         assert!(!pipe.client.stream_finished(0));
         assert!(!pipe.server.stream_finished(0));
@@ -6414,10 +8278,10 @@
 
         let mut b = [0; 5];
         pipe.server.stream_recv(0, &mut b).unwrap();
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         assert_eq!(pipe.server.stream_send(0, b"aaaaa", true), Ok(5));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         assert!(!pipe.client.stream_finished(0));
         assert!(pipe.server.stream_finished(0));
@@ -6427,7 +8291,7 @@
 
         let mut b = [0; 5];
         pipe.client.stream_recv(0, &mut b).unwrap();
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         assert_eq!(pipe.client.streams.len(), 0);
         assert_eq!(pipe.server.streams.len(), 0);
@@ -6461,11 +8325,8 @@
 
     #[test]
     fn peer_cert() {
-        let mut buf = [0; 65535];
-
         let mut pipe = testing::Pipe::default().unwrap();
-
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
 
         match pipe.client.peer_cert() {
             Some(c) => assert_eq!(c.len(), 753),
@@ -6492,15 +8353,16 @@
         let mut pipe = testing::Pipe::with_server_config(&mut config).unwrap();
 
         // Client sends initial flight.
-        let mut len = pipe.client.send(&mut buf).unwrap();
+        let (mut len, _) = pipe.client.send(&mut buf).unwrap();
 
         // Server sends Retry packet.
         let hdr = Header::from_slice(&mut buf[..len], MAX_CONN_ID_LEN).unwrap();
 
-        let odcid = hdr.dcid.to_vec();
+        let odcid = hdr.dcid.clone();
 
         let mut scid = [0; MAX_CONN_ID_LEN];
         rand::rand_bytes(&mut scid[..]);
+        let scid = ConnectionId::from_ref(&scid);
 
         let token = b"quiche test retry token";
 
@@ -6515,19 +8377,19 @@
         .unwrap();
 
         // Client receives Retry and sends new Initial.
-        assert_eq!(pipe.client.recv(&mut buf[..len]), Ok(len));
+        assert_eq!(pipe.client_recv(&mut buf[..len]), Ok(len));
 
-        len = pipe.client.send(&mut buf).unwrap();
+        let (len, _) = pipe.client.send(&mut buf).unwrap();
 
         let hdr = Header::from_slice(&mut buf[..len], MAX_CONN_ID_LEN).unwrap();
         assert_eq!(&hdr.token.unwrap(), token);
 
-        // Server accepts connection and send first flight.
-        pipe.server = accept(&scid, Some(&odcid), &mut config).unwrap();
+        // Server accepts connection.
+        let from = "127.0.0.1:1234".parse().unwrap();
+        pipe.server = accept(&scid, Some(&odcid), from, &mut config).unwrap();
+        assert_eq!(pipe.server_recv(&mut buf[..len]), Ok(len));
 
-        len = testing::recv_send(&mut pipe.server, &mut buf, len).unwrap();
-        len = testing::recv_send(&mut pipe.client, &mut buf, len).unwrap();
-        testing::recv_send(&mut pipe.server, &mut buf, len).unwrap();
+        assert_eq!(pipe.advance(), Ok(()));
 
         assert!(pipe.client.is_established());
         assert!(pipe.server.is_established());
@@ -6551,13 +8413,14 @@
         let mut pipe = testing::Pipe::with_server_config(&mut config).unwrap();
 
         // Client sends initial flight.
-        let mut len = pipe.client.send(&mut buf).unwrap();
+        let (mut len, _) = pipe.client.send(&mut buf).unwrap();
 
         // Server sends Retry packet.
         let hdr = Header::from_slice(&mut buf[..len], MAX_CONN_ID_LEN).unwrap();
 
         let mut scid = [0; MAX_CONN_ID_LEN];
         rand::rand_bytes(&mut scid[..]);
+        let scid = ConnectionId::from_ref(&scid);
 
         let token = b"quiche test retry token";
 
@@ -6572,18 +8435,20 @@
         .unwrap();
 
         // Client receives Retry and sends new Initial.
-        assert_eq!(pipe.client.recv(&mut buf[..len]), Ok(len));
+        assert_eq!(pipe.client_recv(&mut buf[..len]), Ok(len));
 
-        len = pipe.client.send(&mut buf).unwrap();
+        let (len, _) = pipe.client.send(&mut buf).unwrap();
 
         // Server accepts connection and send first flight. But original
         // destination connection ID is ignored.
-        pipe.server = accept(&scid, None, &mut config).unwrap();
+        let from = "127.0.0.1:1234".parse().unwrap();
+        pipe.server = accept(&scid, None, from, &mut config).unwrap();
+        assert_eq!(pipe.server_recv(&mut buf[..len]), Ok(len));
 
-        len = testing::recv_send(&mut pipe.server, &mut buf, len).unwrap();
+        let flight = testing::emit_flight(&mut pipe.server).unwrap();
 
         assert_eq!(
-            pipe.client.recv(&mut buf[..len]),
+            testing::process_flight(&mut pipe.client, flight),
             Err(Error::InvalidTransportParam)
         );
     }
@@ -6606,13 +8471,14 @@
         let mut pipe = testing::Pipe::with_server_config(&mut config).unwrap();
 
         // Client sends initial flight.
-        let mut len = pipe.client.send(&mut buf).unwrap();
+        let (mut len, _) = pipe.client.send(&mut buf).unwrap();
 
         // Server sends Retry packet.
         let hdr = Header::from_slice(&mut buf[..len], MAX_CONN_ID_LEN).unwrap();
 
         let mut scid = [0; MAX_CONN_ID_LEN];
         rand::rand_bytes(&mut scid[..]);
+        let scid = ConnectionId::from_ref(&scid);
 
         let token = b"quiche test retry token";
 
@@ -6627,18 +8493,21 @@
         .unwrap();
 
         // Client receives Retry and sends new Initial.
-        assert_eq!(pipe.client.recv(&mut buf[..len]), Ok(len));
+        assert_eq!(pipe.client_recv(&mut buf[..len]), Ok(len));
 
-        len = pipe.client.send(&mut buf).unwrap();
+        let (len, _) = pipe.client.send(&mut buf).unwrap();
 
         // Server accepts connection and send first flight. But original
         // destination connection ID is invalid.
-        pipe.server = accept(&scid, Some(b"bogus value"), &mut config).unwrap();
+        let from = "127.0.0.1:1234".parse().unwrap();
+        let odcid = ConnectionId::from_ref(b"bogus value");
+        pipe.server = accept(&scid, Some(&odcid), from, &mut config).unwrap();
+        assert_eq!(pipe.server_recv(&mut buf[..len]), Ok(len));
 
-        len = testing::recv_send(&mut pipe.server, &mut buf, len).unwrap();
+        let flight = testing::emit_flight(&mut pipe.server).unwrap();
 
         assert_eq!(
-            pipe.client.recv(&mut buf[..len]),
+            testing::process_flight(&mut pipe.client, flight),
             Err(Error::InvalidTransportParam)
         );
     }
@@ -6646,31 +8515,50 @@
     fn check_send(_: &mut impl Send) {}
 
     #[test]
+    fn config_must_be_send() {
+        let mut config = Config::new(crate::PROTOCOL_VERSION).unwrap();
+        check_send(&mut config);
+    }
+
+    #[test]
     fn connection_must_be_send() {
         let mut pipe = testing::Pipe::default().unwrap();
         check_send(&mut pipe.client);
     }
 
+    fn check_sync(_: &mut impl Sync) {}
+
+    #[test]
+    fn config_must_be_sync() {
+        let mut config = Config::new(crate::PROTOCOL_VERSION).unwrap();
+        check_sync(&mut config);
+    }
+
+    #[test]
+    fn connection_must_be_sync() {
+        let mut pipe = testing::Pipe::default().unwrap();
+        check_sync(&mut pipe.client);
+    }
+
     #[test]
     fn data_blocked() {
         let mut buf = [0; 65535];
 
         let mut pipe = testing::Pipe::default().unwrap();
-
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
 
         assert_eq!(pipe.client.stream_send(0, b"aaaaaaaaaa", false), Ok(10));
         assert_eq!(pipe.client.blocked_limit, None);
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         assert_eq!(pipe.client.stream_send(4, b"aaaaaaaaaa", false), Ok(10));
         assert_eq!(pipe.client.blocked_limit, None);
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         assert_eq!(pipe.client.stream_send(8, b"aaaaaaaaaaa", false), Ok(10));
         assert_eq!(pipe.client.blocked_limit, Some(30));
 
-        let len = pipe.client.send(&mut buf).unwrap();
+        let (len, _) = pipe.client.send(&mut buf).unwrap();
         assert_eq!(pipe.client.blocked_limit, None);
 
         let frames =
@@ -6696,8 +8584,7 @@
         let mut buf = [0; 65535];
 
         let mut pipe = testing::Pipe::default().unwrap();
-
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
 
         assert_eq!(pipe.client.stream_send(0, b"aaaaa", false), Ok(5));
         assert_eq!(pipe.client.streams.blocked().len(), 0);
@@ -6708,7 +8595,7 @@
         assert_eq!(pipe.client.stream_send(0, b"aaaaaa", false), Ok(5));
         assert_eq!(pipe.client.streams.blocked().len(), 1);
 
-        let len = pipe.client.send(&mut buf).unwrap();
+        let (len, _) = pipe.client.send(&mut buf).unwrap();
         assert_eq!(pipe.client.streams.blocked().len(), 0);
 
         let frames =
@@ -6716,6 +8603,9 @@
 
         let mut iter = frames.iter();
 
+        // Skip ACK frame.
+        iter.next();
+
         assert_eq!(
             iter.next(),
             Some(&frame::Frame::StreamDataBlocked {
@@ -6738,7 +8628,7 @@
         // again.
         assert_eq!(pipe.client.stream_send(4, b"a", false), Ok(1));
 
-        let len = pipe.client.send(&mut buf).unwrap();
+        let (len, _) = pipe.client.send(&mut buf).unwrap();
         assert_eq!(pipe.client.streams.blocked().len(), 0);
 
         let frames =
@@ -6761,7 +8651,7 @@
         assert_eq!(pipe.client.stream_send(0, b"aaaaaa", false), Ok(0));
         assert_eq!(pipe.client.streams.blocked().len(), 1);
 
-        let len = pipe.client.send(&mut buf).unwrap();
+        let (len, _) = pipe.client.send(&mut buf).unwrap();
         assert_eq!(pipe.client.streams.blocked().len(), 0);
 
         let frames =
@@ -6784,8 +8674,6 @@
 
     #[test]
     fn app_limited_true() {
-        let mut buf = [0; 65535];
-
         let mut config = Config::new(PROTOCOL_VERSION).unwrap();
         config
             .set_application_protos(b"\x06proto1\x06proto2")
@@ -6793,26 +8681,25 @@
         config.set_initial_max_data(50000);
         config.set_initial_max_stream_data_bidi_local(50000);
         config.set_initial_max_stream_data_bidi_remote(50000);
-        config.set_max_udp_payload_size(1200);
+        config.set_max_recv_udp_payload_size(1200);
         config.verify_peer(false);
 
         let mut pipe = testing::Pipe::with_client_config(&mut config).unwrap();
-
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
 
         // Client sends stream data.
         assert_eq!(pipe.client.stream_send(0, b"a", true), Ok(1));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         // Server reads stream data.
         let mut b = [0; 15];
         pipe.server.stream_recv(0, &mut b).unwrap();
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         // Server sends stream data smaller than cwnd.
         let send_buf = [0; 10000];
         assert_eq!(pipe.server.stream_send(0, &send_buf, false), Ok(10000));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         // app_limited should be true because we send less than cwnd.
         assert_eq!(pipe.server.recovery.app_limited(), true);
@@ -6820,8 +8707,6 @@
 
     #[test]
     fn app_limited_false() {
-        let mut buf = [0; 65535];
-
         let mut config = Config::new(PROTOCOL_VERSION).unwrap();
         config
             .set_application_protos(b"\x06proto1\x06proto2")
@@ -6829,26 +8714,26 @@
         config.set_initial_max_data(50000);
         config.set_initial_max_stream_data_bidi_local(50000);
         config.set_initial_max_stream_data_bidi_remote(50000);
-        config.set_max_udp_payload_size(1200);
+        config.set_max_recv_udp_payload_size(1200);
         config.verify_peer(false);
 
         let mut pipe = testing::Pipe::with_client_config(&mut config).unwrap();
-
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
 
         // Client sends stream data.
         assert_eq!(pipe.client.stream_send(0, b"a", true), Ok(1));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         // Server reads stream data.
         let mut b = [0; 15];
         pipe.server.stream_recv(0, &mut b).unwrap();
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         // Server sends stream data bigger than cwnd.
         let send_buf1 = [0; 20000];
-        assert_eq!(pipe.server.stream_send(0, &send_buf1, false), Ok(14085));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.server.stream_send(0, &send_buf1, false), Ok(12000));
+
+        testing::emit_flight(&mut pipe.server).ok();
 
         // We can't create a new packet header because there is no room by cwnd.
         // app_limited should be false because we can't send more by cwnd.
@@ -6857,8 +8742,6 @@
 
     #[test]
     fn app_limited_false_no_frame() {
-        let mut buf = [0; 65535];
-
         let mut config = Config::new(PROTOCOL_VERSION).unwrap();
         config
             .set_application_protos(b"\x06proto1\x06proto2")
@@ -6866,26 +8749,26 @@
         config.set_initial_max_data(50000);
         config.set_initial_max_stream_data_bidi_local(50000);
         config.set_initial_max_stream_data_bidi_remote(50000);
-        config.set_max_udp_payload_size(1405);
+        config.set_max_recv_udp_payload_size(1405);
         config.verify_peer(false);
 
         let mut pipe = testing::Pipe::with_client_config(&mut config).unwrap();
-
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
 
         // Client sends stream data.
         assert_eq!(pipe.client.stream_send(0, b"a", true), Ok(1));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         // Server reads stream data.
         let mut b = [0; 15];
         pipe.server.stream_recv(0, &mut b).unwrap();
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         // Server sends stream data bigger than cwnd.
         let send_buf1 = [0; 20000];
-        assert_eq!(pipe.server.stream_send(0, &send_buf1, false), Ok(14085));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.server.stream_send(0, &send_buf1, false), Ok(12000));
+
+        testing::emit_flight(&mut pipe.server).ok();
 
         // We can't create a new packet header because there is no room by cwnd.
         // app_limited should be false because we can't send more by cwnd.
@@ -6894,8 +8777,6 @@
 
     #[test]
     fn app_limited_false_no_header() {
-        let mut buf = [0; 65535];
-
         let mut config = Config::new(PROTOCOL_VERSION).unwrap();
         config
             .set_application_protos(b"\x06proto1\x06proto2")
@@ -6903,26 +8784,26 @@
         config.set_initial_max_data(50000);
         config.set_initial_max_stream_data_bidi_local(50000);
         config.set_initial_max_stream_data_bidi_remote(50000);
-        config.set_max_udp_payload_size(1406);
+        config.set_max_recv_udp_payload_size(1406);
         config.verify_peer(false);
 
         let mut pipe = testing::Pipe::with_client_config(&mut config).unwrap();
-
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
 
         // Client sends stream data.
         assert_eq!(pipe.client.stream_send(0, b"a", true), Ok(1));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         // Server reads stream data.
         let mut b = [0; 15];
         pipe.server.stream_recv(0, &mut b).unwrap();
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         // Server sends stream data bigger than cwnd.
         let send_buf1 = [0; 20000];
-        assert_eq!(pipe.server.stream_send(0, &send_buf1, false), Ok(14085));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.server.stream_send(0, &send_buf1, false), Ok(12000));
+
+        testing::emit_flight(&mut pipe.server).ok();
 
         // We can't create a new frame because there is no room by cwnd.
         // app_limited should be false because we can't send more by cwnd.
@@ -6930,12 +8811,46 @@
     }
 
     #[test]
+    fn app_limited_not_changed_on_no_new_frames() {
+        let mut config = Config::new(PROTOCOL_VERSION).unwrap();
+        config
+            .set_application_protos(b"\x06proto1\x06proto2")
+            .unwrap();
+        config.set_initial_max_data(50000);
+        config.set_initial_max_stream_data_bidi_local(50000);
+        config.set_initial_max_stream_data_bidi_remote(50000);
+        config.set_max_recv_udp_payload_size(1200);
+        config.verify_peer(false);
+
+        let mut pipe = testing::Pipe::with_client_config(&mut config).unwrap();
+        assert_eq!(pipe.handshake(), Ok(()));
+
+        // Client sends stream data.
+        assert_eq!(pipe.client.stream_send(0, b"a", true), Ok(1));
+        assert_eq!(pipe.advance(), Ok(()));
+
+        // Server reads stream data.
+        let mut b = [0; 15];
+        pipe.server.stream_recv(0, &mut b).unwrap();
+        assert_eq!(pipe.advance(), Ok(()));
+
+        // Client's app_limited is true because its bytes-in-flight
+        // is much smaller than the current cwnd.
+        assert_eq!(pipe.client.recovery.app_limited(), true);
+
+        // Client has no new frames to send - returns Done.
+        assert_eq!(testing::emit_flight(&mut pipe.client), Err(Error::Done));
+
+        // Client's app_limited should remain the same.
+        assert_eq!(pipe.client.recovery.app_limited(), true);
+    }
+
+    #[test]
     fn limit_ack_ranges() {
         let mut buf = [0; 65535];
 
         let mut pipe = testing::Pipe::default().unwrap();
-
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
 
         let epoch = packet::EPOCH_APPLICATION;
 
@@ -7004,25 +8919,25 @@
         config.verify_peer(false);
 
         let mut pipe = testing::Pipe::with_config(&mut config).unwrap();
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
 
         assert_eq!(pipe.client.stream_send(0, b"a", false), Ok(1));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         assert_eq!(pipe.client.stream_send(4, b"a", false), Ok(1));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         assert_eq!(pipe.client.stream_send(8, b"a", false), Ok(1));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         assert_eq!(pipe.client.stream_send(12, b"a", false), Ok(1));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         assert_eq!(pipe.client.stream_send(16, b"a", false), Ok(1));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         assert_eq!(pipe.client.stream_send(20, b"a", false), Ok(1));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         let mut b = [0; 1];
 
@@ -7074,7 +8989,8 @@
         let mut off = 0;
 
         for _ in 1..=3 {
-            let len = pipe.server.send(&mut buf[..MAX_TEST_PACKET_SIZE]).unwrap();
+            let (len, _) =
+                pipe.server.send(&mut buf[..MAX_TEST_PACKET_SIZE]).unwrap();
 
             let frames =
                 testing::decode_pkt(&mut pipe.client, &mut buf, len).unwrap();
@@ -7096,7 +9012,8 @@
         let mut off = 0;
 
         for _ in 1..=3 {
-            let len = pipe.server.send(&mut buf[..MAX_TEST_PACKET_SIZE]).unwrap();
+            let (len, _) =
+                pipe.server.send(&mut buf[..MAX_TEST_PACKET_SIZE]).unwrap();
 
             let frames =
                 testing::decode_pkt(&mut pipe.client, &mut buf, len).unwrap();
@@ -7118,7 +9035,8 @@
         let mut off = 0;
 
         for _ in 1..=3 {
-            let len = pipe.server.send(&mut buf[..MAX_TEST_PACKET_SIZE]).unwrap();
+            let (len, _) =
+                pipe.server.send(&mut buf[..MAX_TEST_PACKET_SIZE]).unwrap();
 
             let frames =
                 testing::decode_pkt(&mut pipe.client, &mut buf, len).unwrap();
@@ -7140,7 +9058,8 @@
         let mut off = 0;
 
         for _ in 1..=3 {
-            let len = pipe.server.send(&mut buf[..MAX_TEST_PACKET_SIZE]).unwrap();
+            let (len, _) =
+                pipe.server.send(&mut buf[..MAX_TEST_PACKET_SIZE]).unwrap();
 
             let frames =
                 testing::decode_pkt(&mut pipe.client, &mut buf, len).unwrap();
@@ -7153,7 +9072,8 @@
                 })
             );
 
-            let len = pipe.server.send(&mut buf[..MAX_TEST_PACKET_SIZE]).unwrap();
+            let (len, _) =
+                pipe.server.send(&mut buf[..MAX_TEST_PACKET_SIZE]).unwrap();
 
             let frames =
                 testing::decode_pkt(&mut pipe.client, &mut buf, len).unwrap();
@@ -7176,7 +9096,8 @@
         let mut off = 0;
 
         for _ in 1..=3 {
-            let len = pipe.server.send(&mut buf[..MAX_TEST_PACKET_SIZE]).unwrap();
+            let (len, _) =
+                pipe.server.send(&mut buf[..MAX_TEST_PACKET_SIZE]).unwrap();
 
             let frames =
                 testing::decode_pkt(&mut pipe.client, &mut buf, len).unwrap();
@@ -7224,19 +9145,19 @@
         config.verify_peer(false);
 
         let mut pipe = testing::Pipe::with_config(&mut config).unwrap();
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
 
         assert_eq!(pipe.client.stream_send(0, b"a", false), Ok(1));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         assert_eq!(pipe.client.stream_send(4, b"a", false), Ok(1));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         assert_eq!(pipe.client.stream_send(8, b"a", false), Ok(1));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         assert_eq!(pipe.client.stream_send(12, b"a", false), Ok(1));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         let mut b = [0; 1];
 
@@ -7260,7 +9181,7 @@
         assert_eq!(pipe.server.stream_priority(0, 20, true), Ok(()));
 
         // First is stream 8.
-        let len = pipe.server.send(&mut buf).unwrap();
+        let (len, _) = pipe.server.send(&mut buf).unwrap();
 
         let frames =
             testing::decode_pkt(&mut pipe.client, &mut buf, len).unwrap();
@@ -7274,7 +9195,7 @@
         );
 
         // Then is stream 0.
-        let len = pipe.server.send(&mut buf).unwrap();
+        let (len, _) = pipe.server.send(&mut buf).unwrap();
 
         let frames =
             testing::decode_pkt(&mut pipe.client, &mut buf, len).unwrap();
@@ -7288,7 +9209,7 @@
         );
 
         // Then are stream 12 and 4, with the same priority.
-        let len = pipe.server.send(&mut buf).unwrap();
+        let (len, _) = pipe.server.send(&mut buf).unwrap();
 
         let frames =
             testing::decode_pkt(&mut pipe.client, &mut buf, len).unwrap();
@@ -7301,7 +9222,7 @@
             })
         );
 
-        let len = pipe.server.send(&mut buf).unwrap();
+        let (len, _) = pipe.server.send(&mut buf).unwrap();
 
         let frames =
             testing::decode_pkt(&mut pipe.client, &mut buf, len).unwrap();
@@ -7318,16 +9239,151 @@
     }
 
     #[test]
+    /// Tests that streams and datagrams are correctly scheduled.
+    fn stream_datagram_priority() {
+        // Limit 1-RTT packet size to avoid congestion control interference.
+        const MAX_TEST_PACKET_SIZE: usize = 540;
+
+        let mut buf = [0; 65535];
+
+        let mut config = Config::new(crate::PROTOCOL_VERSION).unwrap();
+        config
+            .load_cert_chain_from_pem_file("examples/cert.crt")
+            .unwrap();
+        config
+            .load_priv_key_from_pem_file("examples/cert.key")
+            .unwrap();
+        config
+            .set_application_protos(b"\x06proto1\x06proto2")
+            .unwrap();
+        config.set_initial_max_data(1_000_000);
+        config.set_initial_max_stream_data_bidi_local(1_000_000);
+        config.set_initial_max_stream_data_bidi_remote(1_000_000);
+        config.set_initial_max_stream_data_uni(0);
+        config.set_initial_max_streams_bidi(100);
+        config.set_initial_max_streams_uni(0);
+        config.enable_dgram(true, 10, 10);
+        config.verify_peer(false);
+
+        let mut pipe = testing::Pipe::with_config(&mut config).unwrap();
+        assert_eq!(pipe.handshake(), Ok(()));
+
+        assert_eq!(pipe.client.stream_send(0, b"a", false), Ok(1));
+        assert_eq!(pipe.advance(), Ok(()));
+
+        assert_eq!(pipe.client.stream_send(4, b"a", false), Ok(1));
+        assert_eq!(pipe.advance(), Ok(()));
+
+        let mut b = [0; 1];
+
+        let out = [b'b'; 500];
+
+        // Server prioritizes Stream 0 and 4 with the same urgency with
+        // incremental, meaning the frames should be sent in round-robin
+        // fashion. It also sends DATAGRAMS which are always interleaved with
+        // STREAM frames. So we'll expect a mix of frame types regardless
+        // of the order that the application writes things in.
+
+        pipe.server.stream_recv(0, &mut b).unwrap();
+        assert_eq!(pipe.server.stream_priority(0, 255, true), Ok(()));
+        pipe.server.stream_send(0, &out, false).unwrap();
+        pipe.server.stream_send(0, &out, false).unwrap();
+        pipe.server.stream_send(0, &out, false).unwrap();
+
+        assert_eq!(pipe.server.stream_priority(4, 255, true), Ok(()));
+        pipe.server.stream_send(4, &out, false).unwrap();
+        pipe.server.stream_send(4, &out, false).unwrap();
+        pipe.server.stream_send(4, &out, false).unwrap();
+
+        for _ in 1..=6 {
+            assert_eq!(pipe.server.dgram_send(&out), Ok(()));
+        }
+
+        let mut off_0 = 0;
+        let mut off_4 = 0;
+
+        for _ in 1..=3 {
+            // DATAGRAM
+            let (len, _) =
+                pipe.server.send(&mut buf[..MAX_TEST_PACKET_SIZE]).unwrap();
+
+            let frames =
+                testing::decode_pkt(&mut pipe.client, &mut buf, len).unwrap();
+            let mut frame_iter = frames.iter();
+
+            assert_eq!(frame_iter.next().unwrap(), &frame::Frame::Datagram {
+                data: out.into(),
+            });
+            assert_eq!(frame_iter.next(), None);
+
+            // STREAM 0
+            let (len, _) =
+                pipe.server.send(&mut buf[..MAX_TEST_PACKET_SIZE]).unwrap();
+
+            let frames =
+                testing::decode_pkt(&mut pipe.client, &mut buf, len).unwrap();
+            let mut frame_iter = frames.iter();
+            let stream = frame_iter.next().unwrap();
+
+            assert_eq!(stream, &frame::Frame::Stream {
+                stream_id: 0,
+                data: stream::RangeBuf::from(&out, off_0, false),
+            });
+
+            off_0 = match stream {
+                frame::Frame::Stream { data, .. } => data.max_off(),
+
+                _ => unreachable!(),
+            };
+            assert_eq!(frame_iter.next(), None);
+
+            // DATAGRAM
+            let (len, _) =
+                pipe.server.send(&mut buf[..MAX_TEST_PACKET_SIZE]).unwrap();
+
+            let frames =
+                testing::decode_pkt(&mut pipe.client, &mut buf, len).unwrap();
+            let mut frame_iter = frames.iter();
+
+            assert_eq!(frame_iter.next().unwrap(), &frame::Frame::Datagram {
+                data: out.into(),
+            });
+            assert_eq!(frame_iter.next(), None);
+
+            // STREAM 4
+            let (len, _) =
+                pipe.server.send(&mut buf[..MAX_TEST_PACKET_SIZE]).unwrap();
+
+            let frames =
+                testing::decode_pkt(&mut pipe.client, &mut buf, len).unwrap();
+            let mut frame_iter = frames.iter();
+            let stream = frame_iter.next().unwrap();
+
+            assert_eq!(stream, &frame::Frame::Stream {
+                stream_id: 4,
+                data: stream::RangeBuf::from(&out, off_4, false),
+            });
+
+            off_4 = match stream {
+                frame::Frame::Stream { data, .. } => data.max_off(),
+
+                _ => unreachable!(),
+            };
+            assert_eq!(frame_iter.next(), None);
+        }
+    }
+
+    #[test]
     /// Tests that old data is retransmitted on PTO.
     fn early_retransmit() {
         let mut buf = [0; 65535];
 
         let mut pipe = testing::Pipe::default().unwrap();
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
 
         // Client sends stream data.
         assert_eq!(pipe.client.stream_send(0, b"a", false), Ok(1));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         // Client sends more stream data, but packet is lost
         assert_eq!(pipe.client.stream_send(4, b"b", false), Ok(1));
@@ -7343,7 +9399,7 @@
         assert_eq!(pipe.client.recovery.loss_probes[epoch], 1);
 
         // Client retransmits stream data in PTO probe.
-        let len = pipe.client.send(&mut buf).unwrap();
+        let (len, _) = pipe.client.send(&mut buf).unwrap();
         assert_eq!(pipe.client.recovery.loss_probes[epoch], 0);
 
         let frames =
@@ -7364,6 +9420,83 @@
     }
 
     #[test]
+    /// Tests that PTO probe packets are not coalesced together.
+    fn dont_coalesce_probes() {
+        let mut buf = [0; 65535];
+
+        let mut pipe = testing::Pipe::default().unwrap();
+
+        // Client sends Initial packet.
+        let (len, _) = pipe.client.send(&mut buf).unwrap();
+        assert_eq!(len, 1200);
+
+        // Wait for PTO to expire.
+        let timer = pipe.client.timeout().unwrap();
+        std::thread::sleep(timer + time::Duration::from_millis(1));
+
+        pipe.client.on_timeout();
+
+        let epoch = packet::EPOCH_INITIAL;
+        assert_eq!(pipe.client.recovery.loss_probes[epoch], 1);
+
+        // Client sends PTO probe.
+        let (len, _) = pipe.client.send(&mut buf).unwrap();
+        assert_eq!(len, 1200);
+        assert_eq!(pipe.client.recovery.loss_probes[epoch], 0);
+
+        // Wait for PTO to expire.
+        let timer = pipe.client.timeout().unwrap();
+        std::thread::sleep(timer + time::Duration::from_millis(1));
+
+        pipe.client.on_timeout();
+
+        assert_eq!(pipe.client.recovery.loss_probes[epoch], 2);
+
+        // Client sends first PTO probe.
+        let (len, _) = pipe.client.send(&mut buf).unwrap();
+        assert_eq!(len, 1200);
+        assert_eq!(pipe.client.recovery.loss_probes[epoch], 1);
+
+        // Client sends second PTO probe.
+        let (len, _) = pipe.client.send(&mut buf).unwrap();
+        assert_eq!(len, 1200);
+        assert_eq!(pipe.client.recovery.loss_probes[epoch], 0);
+    }
+
+    #[test]
+    fn coalesce_padding_short() {
+        let mut buf = [0; 65535];
+
+        let mut pipe = testing::Pipe::default().unwrap();
+
+        // Client sends first flight.
+        let (len, _) = pipe.client.send(&mut buf).unwrap();
+        assert_eq!(len, MIN_CLIENT_INITIAL_LEN);
+        assert_eq!(pipe.server_recv(&mut buf[..len]), Ok(len));
+
+        // Server sends first flight.
+        let (len, _) = pipe.server.send(&mut buf).unwrap();
+        assert_eq!(len, MIN_CLIENT_INITIAL_LEN);
+        assert_eq!(pipe.client_recv(&mut buf[..len]), Ok(len));
+
+        let (len, _) = pipe.server.send(&mut buf).unwrap();
+        assert_eq!(pipe.client_recv(&mut buf[..len]), Ok(len));
+
+        // Client sends stream data.
+        assert_eq!(pipe.client.is_established(), true);
+        assert_eq!(pipe.client.stream_send(4, b"hello", true), Ok(5));
+
+        // Client sends second flight.
+        let (len, _) = pipe.client.send(&mut buf).unwrap();
+        assert_eq!(len, MIN_CLIENT_INITIAL_LEN);
+        assert_eq!(pipe.server_recv(&mut buf[..len]), Ok(len));
+
+        // None of the sent packets should have been dropped.
+        assert_eq!(pipe.client.sent_count, pipe.server.recv_count);
+        assert_eq!(pipe.server.sent_count, pipe.client.recv_count);
+    }
+
+    #[test]
     /// Tests that client avoids handshake deadlock by arming PTO.
     fn handshake_anti_deadlock() {
         let mut buf = [0; 65535];
@@ -7387,13 +9520,13 @@
         assert_eq!(pipe.server.handshake_status().peer_verified_address, true);
 
         // Client sends padded Initial.
-        let len = pipe.client.send(&mut buf).unwrap();
+        let (len, _) = pipe.client.send(&mut buf).unwrap();
         assert_eq!(len, 1200);
 
         // Server receives client's Initial and sends own Initial and Handshake
         // until it's blocked by the anti-amplification limit.
-        let len = testing::recv_send(&mut pipe.server, &mut buf, len).unwrap();
-        assert_eq!(pipe.server.send(&mut buf[len..]), Err(Error::Done));
+        assert_eq!(pipe.server_recv(&mut buf[..len]), Ok(len));
+        let flight = testing::emit_flight(&mut pipe.server).unwrap();
 
         assert_eq!(pipe.client.handshake_status().has_handshake_keys, false);
         assert_eq!(pipe.client.handshake_status().peer_verified_address, false);
@@ -7402,7 +9535,8 @@
 
         // Client receives the server flight and sends Handshake ACK, but it is
         // lost.
-        assert!(testing::recv_send(&mut pipe.client, &mut buf, len).is_ok());
+        testing::process_flight(&mut pipe.client, flight).unwrap();
+        testing::emit_flight(&mut pipe.client).unwrap();
 
         assert_eq!(pipe.client.handshake_status().has_handshake_keys, true);
         assert_eq!(pipe.client.handshake_status().peer_verified_address, false);
@@ -7422,44 +9556,39 @@
         let mut pipe = testing::Pipe::default().unwrap();
 
         // Client sends padded Initial.
-        let len = pipe.client.send(&mut buf).unwrap();
+        let (len, _) = pipe.client.send(&mut buf).unwrap();
         assert_eq!(len, 1200);
 
         // Server receives client's Initial and sends own Initial and Handshake.
-        let len = testing::recv_send(&mut pipe.server, &mut buf, len).unwrap();
-        assert_eq!(pipe.client.recv(&mut buf[..len]), Ok(len));
+        assert_eq!(pipe.server_recv(&mut buf[..len]), Ok(len));
+
+        let flight = testing::emit_flight(&mut pipe.server).unwrap();
+        testing::process_flight(&mut pipe.client, flight).unwrap();
 
         // Client sends Initial packet with ACK.
-        let len = pipe.client.send(&mut buf).unwrap();
+        let (ty, len) = pipe.client.send_single(&mut buf, false).unwrap();
+        assert_eq!(ty, Type::Initial);
 
-        let hdr = Header::from_slice(&mut buf[..len], 0).unwrap();
-        assert_eq!(hdr.ty, Type::Initial);
-
-        assert_eq!(pipe.server.recv(&mut buf[..len]), Ok(len));
+        assert_eq!(pipe.server_recv(&mut buf[..len]), Ok(len));
 
         // Client sends Handshake packet.
-        let len = pipe.client.send(&mut buf).unwrap();
+        let (ty, len) = pipe.client.send_single(&mut buf, false).unwrap();
+        assert_eq!(ty, Type::Handshake);
 
-        let hdr = Header::from_slice(&mut buf[..len], 0).unwrap();
-        assert_eq!(hdr.ty, Type::Handshake);
-
-        // Packet type is corrupted to Initial..
+        // Packet type is corrupted to Initial.
         buf[0] &= !(0x20);
 
         let hdr = Header::from_slice(&mut buf[..len], 0).unwrap();
         assert_eq!(hdr.ty, Type::Initial);
 
         // Server receives corrupted packet without returning an error.
-        assert_eq!(pipe.server.recv(&mut buf[..len]), Ok(len));
+        assert_eq!(pipe.server_recv(&mut buf[..len]), Ok(len));
     }
 
     #[test]
     fn dgram_send_fails_invalidstate() {
-        let mut buf = [0; 65535];
-
         let mut pipe = testing::Pipe::default().unwrap();
-
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
 
         assert_eq!(
             pipe.client.dgram_send(b"hello, world"),
@@ -7489,13 +9618,11 @@
         config.set_initial_max_streams_bidi(3);
         config.set_initial_max_streams_uni(3);
         config.enable_dgram(true, 1000, 1000);
-        config.set_max_udp_payload_size(1200);
+        config.set_max_recv_udp_payload_size(1200);
         config.verify_peer(false);
 
         let mut pipe = testing::Pipe::with_config(&mut config).unwrap();
-
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
 
         for _ in 0..1000 {
             assert_eq!(pipe.client.dgram_send(&send_buf), Ok(()));
@@ -7504,14 +9631,19 @@
         assert!(!pipe.client.recovery.app_limited());
         assert_eq!(pipe.client.dgram_send_queue.byte_size(), 1_000_000);
 
-        let len = pipe.client.send(&mut buf).unwrap();
+        let (len, _) = pipe.client.send(&mut buf).unwrap();
 
         assert_ne!(pipe.client.dgram_send_queue.byte_size(), 0);
         assert_ne!(pipe.client.dgram_send_queue.byte_size(), 1_000_000);
         assert!(!pipe.client.recovery.app_limited());
 
-        testing::recv_send(&mut pipe.client, &mut buf, len).unwrap();
-        testing::recv_send(&mut pipe.server, &mut buf, len).unwrap();
+        assert_eq!(pipe.server_recv(&mut buf[..len]), Ok(len));
+
+        let flight = testing::emit_flight(&mut pipe.client).unwrap();
+        testing::process_flight(&mut pipe.server, flight).unwrap();
+
+        let flight = testing::emit_flight(&mut pipe.server).unwrap();
+        testing::process_flight(&mut pipe.client, flight).unwrap();
 
         assert_ne!(pipe.client.dgram_send_queue.byte_size(), 0);
         assert_ne!(pipe.client.dgram_send_queue.byte_size(), 1_000_000);
@@ -7543,12 +9675,11 @@
         config.verify_peer(false);
 
         let mut pipe = testing::Pipe::with_config(&mut config).unwrap();
-
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
 
         assert_eq!(pipe.client.dgram_send(b"hello, world"), Ok(()));
 
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         let result1 = pipe.server.dgram_recv(&mut buf);
         assert_eq!(result1, Ok(12));
@@ -7581,17 +9712,34 @@
         config.verify_peer(false);
 
         let mut pipe = testing::Pipe::with_config(&mut config).unwrap();
+        assert_eq!(pipe.handshake(), Ok(()));
 
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        assert_eq!(pipe.client.dgram_send_queue_len(), 0);
+        assert_eq!(pipe.client.dgram_send_queue_byte_size(), 0);
 
         assert_eq!(pipe.client.dgram_send(b"hello, world"), Ok(()));
         assert_eq!(pipe.client.dgram_send(b"ciao, mondo"), Ok(()));
         assert_eq!(pipe.client.dgram_send(b"hola, mundo"), Ok(()));
 
+        assert_eq!(pipe.client.dgram_send_queue_byte_size(), 34);
+
         pipe.client
             .dgram_purge_outgoing(|d: &[u8]| -> bool { d[0] == b'c' });
 
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.client.dgram_send_queue_len(), 2);
+        assert_eq!(pipe.client.dgram_send_queue_byte_size(), 23);
+
+        // Before packets exchanged, no dgrams on server receive side.
+        assert_eq!(pipe.server.dgram_recv_queue_len(), 0);
+
+        assert_eq!(pipe.advance(), Ok(()));
+
+        // After packets exchanged, no dgrams on client send side.
+        assert_eq!(pipe.client.dgram_send_queue_len(), 0);
+        assert_eq!(pipe.client.dgram_send_queue_byte_size(), 0);
+
+        assert_eq!(pipe.server.dgram_recv_queue_len(), 2);
+        assert_eq!(pipe.server.dgram_recv_queue_byte_size(), 23);
 
         let result1 = pipe.server.dgram_recv(&mut buf);
         assert_eq!(result1, Ok(12));
@@ -7605,6 +9753,9 @@
 
         let result3 = pipe.server.dgram_recv(&mut buf);
         assert_eq!(result3, Err(Error::Done));
+
+        assert_eq!(pipe.server.dgram_recv_queue_len(), 0);
+        assert_eq!(pipe.server.dgram_recv_queue_byte_size(), 0);
     }
 
     #[test]
@@ -7631,14 +9782,13 @@
         config.verify_peer(false);
 
         let mut pipe = testing::Pipe::with_config(&mut config).unwrap();
-
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
 
         assert_eq!(pipe.client.dgram_send(b"hello, world"), Ok(()));
         assert_eq!(pipe.client.dgram_send(b"ciao, mondo"), Ok(()));
         assert_eq!(pipe.client.dgram_send(b"hola, mundo"), Err(Error::Done));
 
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         let result1 = pipe.server.dgram_recv(&mut buf);
         assert_eq!(result1, Ok(12));
@@ -7675,18 +9825,17 @@
         config.set_initial_max_streams_bidi(3);
         config.set_initial_max_streams_uni(3);
         config.enable_dgram(true, 2, 10);
-        config.set_max_udp_payload_size(1200);
+        config.set_max_recv_udp_payload_size(1200);
         config.verify_peer(false);
 
         let mut pipe = testing::Pipe::with_config(&mut config).unwrap();
-
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
 
         assert_eq!(pipe.client.dgram_send(b"hello, world"), Ok(()));
         assert_eq!(pipe.client.dgram_send(b"ciao, mondo"), Ok(()));
         assert_eq!(pipe.client.dgram_send(b"hola, mundo"), Ok(()));
 
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         let result1 = pipe.server.dgram_recv(&mut buf);
         assert_eq!(result1, Ok(11));
@@ -7723,7 +9872,7 @@
         config.set_initial_max_streams_bidi(3);
         config.set_initial_max_streams_uni(3);
         config.enable_dgram(true, 10, 10);
-        config.set_max_udp_payload_size(1452);
+        config.set_max_recv_udp_payload_size(1452);
         config.verify_peer(false);
 
         let mut pipe = testing::Pipe::with_config(&mut config).unwrap();
@@ -7731,15 +9880,19 @@
         // Before handshake (before peer settings) we don't know max dgram size
         assert_eq!(pipe.client.dgram_max_writable_len(), None);
 
-        assert_eq!(pipe.handshake(&mut buf), Ok(()));
+        assert_eq!(pipe.handshake(), Ok(()));
 
         let max_dgram_size = pipe.client.dgram_max_writable_len().unwrap();
 
+        // Tests use a 16-byte connection ID, so the max datagram frame payload
+        // size is (1200 byte-long packet - 40 bytes overhead)
+        assert_eq!(max_dgram_size, 1160);
+
         let dgram_packet: Vec<u8> = vec![42; max_dgram_size];
 
         assert_eq!(pipe.client.dgram_send(&dgram_packet), Ok(()));
 
-        assert_eq!(pipe.advance(&mut buf), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
 
         let result1 = pipe.server.dgram_recv(&mut buf);
         assert_eq!(result1, Ok(max_dgram_size));
@@ -7747,15 +9900,314 @@
         let result2 = pipe.server.dgram_recv(&mut buf);
         assert_eq!(result2, Err(Error::Done));
     }
+
+    #[test]
+    /// Tests is_readable check.
+    fn is_readable() {
+        let mut buf = [0; 65535];
+
+        let mut config = Config::new(crate::PROTOCOL_VERSION).unwrap();
+        config
+            .load_cert_chain_from_pem_file("examples/cert.crt")
+            .unwrap();
+        config
+            .load_priv_key_from_pem_file("examples/cert.key")
+            .unwrap();
+        config
+            .set_application_protos(b"\x06proto1\x06proto2")
+            .unwrap();
+        config.set_initial_max_data(30);
+        config.set_initial_max_stream_data_bidi_local(15);
+        config.set_initial_max_stream_data_bidi_remote(15);
+        config.set_initial_max_stream_data_uni(10);
+        config.set_initial_max_streams_bidi(3);
+        config.set_initial_max_streams_uni(3);
+        config.enable_dgram(true, 10, 10);
+        config.set_max_recv_udp_payload_size(1452);
+        config.verify_peer(false);
+
+        let mut pipe = testing::Pipe::with_config(&mut config).unwrap();
+        assert_eq!(pipe.handshake(), Ok(()));
+
+        // No readable data.
+        assert_eq!(pipe.client.is_readable(), false);
+        assert_eq!(pipe.server.is_readable(), false);
+
+        assert_eq!(pipe.client.stream_send(4, b"aaaaa", false), Ok(5));
+        assert_eq!(pipe.advance(), Ok(()));
+
+        // Server received stream.
+        assert_eq!(pipe.client.is_readable(), false);
+        assert_eq!(pipe.server.is_readable(), true);
+
+        assert_eq!(
+            pipe.server.stream_send(4, b"aaaaaaaaaaaaaaa", false),
+            Ok(15)
+        );
+        assert_eq!(pipe.advance(), Ok(()));
+
+        // Client received stream.
+        assert_eq!(pipe.client.is_readable(), true);
+        assert_eq!(pipe.server.is_readable(), true);
+
+        // Client drains stream.
+        let mut b = [0; 15];
+        pipe.client.stream_recv(4, &mut b).unwrap();
+        assert_eq!(pipe.advance(), Ok(()));
+
+        assert_eq!(pipe.client.is_readable(), false);
+        assert_eq!(pipe.server.is_readable(), true);
+
+        // Server shuts down stream.
+        assert_eq!(pipe.server.stream_shutdown(4, Shutdown::Read, 0), Ok(()));
+        assert_eq!(pipe.server.is_readable(), false);
+
+        // Server received dgram.
+        assert_eq!(pipe.client.dgram_send(b"dddddddddddddd"), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
+
+        assert_eq!(pipe.client.is_readable(), false);
+        assert_eq!(pipe.server.is_readable(), true);
+
+        // Client received dgram.
+        assert_eq!(pipe.server.dgram_send(b"dddddddddddddd"), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
+
+        assert_eq!(pipe.client.is_readable(), true);
+        assert_eq!(pipe.server.is_readable(), true);
+
+        // Drain the dgram queues.
+        let r = pipe.server.dgram_recv(&mut buf);
+        assert_eq!(r, Ok(14));
+        assert_eq!(pipe.server.is_readable(), false);
+
+        let r = pipe.client.dgram_recv(&mut buf);
+        assert_eq!(r, Ok(14));
+        assert_eq!(pipe.client.is_readable(), false);
+    }
+
+    #[test]
+    fn close() {
+        let mut buf = [0; 65535];
+
+        let mut pipe = testing::Pipe::default().unwrap();
+        assert_eq!(pipe.handshake(), Ok(()));
+
+        assert_eq!(pipe.client.close(false, 0x1234, b"hello?"), Ok(()));
+
+        assert_eq!(
+            pipe.client.close(false, 0x4321, b"hello?"),
+            Err(Error::Done)
+        );
+
+        let (len, _) = pipe.client.send(&mut buf).unwrap();
+
+        let frames =
+            testing::decode_pkt(&mut pipe.server, &mut buf, len).unwrap();
+
+        assert_eq!(
+            frames.iter().next(),
+            Some(&frame::Frame::ConnectionClose {
+                error_code: 0x1234,
+                frame_type: 0,
+                reason: b"hello?".to_vec(),
+            })
+        );
+    }
+
+    #[test]
+    fn app_close() {
+        let mut buf = [0; 65535];
+
+        let mut pipe = testing::Pipe::default().unwrap();
+        assert_eq!(pipe.handshake(), Ok(()));
+
+        assert_eq!(pipe.client.close(true, 0x1234, b"hello!"), Ok(()));
+
+        assert_eq!(pipe.client.close(true, 0x4321, b"hello!"), Err(Error::Done));
+
+        let (len, _) = pipe.client.send(&mut buf).unwrap();
+
+        let frames =
+            testing::decode_pkt(&mut pipe.server, &mut buf, len).unwrap();
+
+        assert_eq!(
+            frames.iter().next(),
+            Some(&frame::Frame::ApplicationClose {
+                error_code: 0x1234,
+                reason: b"hello!".to_vec(),
+            })
+        );
+    }
+
+    #[test]
+    fn peer_error() {
+        let mut pipe = testing::Pipe::default().unwrap();
+        assert_eq!(pipe.handshake(), Ok(()));
+
+        assert_eq!(pipe.server.close(false, 0x1234, b"hello?"), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
+
+        assert_eq!(
+            pipe.client.peer_error(),
+            Some(&ConnectionError {
+                is_app: false,
+                error_code: 0x1234u64,
+                reason: b"hello?".to_vec()
+            })
+        );
+    }
+
+    #[test]
+    fn app_peer_error() {
+        let mut pipe = testing::Pipe::default().unwrap();
+        assert_eq!(pipe.handshake(), Ok(()));
+
+        assert_eq!(pipe.server.close(true, 0x1234, b"hello!"), Ok(()));
+        assert_eq!(pipe.advance(), Ok(()));
+
+        assert_eq!(
+            pipe.client.peer_error(),
+            Some(&ConnectionError {
+                is_app: true,
+                error_code: 0x1234u64,
+                reason: b"hello!".to_vec()
+            })
+        );
+    }
+
+    #[test]
+    fn update_max_datagram_size() {
+        let mut client_scid = [0; 16];
+        rand::rand_bytes(&mut client_scid[..]);
+        let client_scid = ConnectionId::from_ref(&client_scid);
+        let client_addr = "127.0.0.1:1234".parse().unwrap();
+
+        let mut server_scid = [0; 16];
+        rand::rand_bytes(&mut server_scid[..]);
+        let server_scid = ConnectionId::from_ref(&server_scid);
+        let server_addr = "127.0.0.1:4321".parse().unwrap();
+
+        let mut client_config = Config::new(crate::PROTOCOL_VERSION).unwrap();
+        client_config
+            .set_application_protos(b"\x06proto1\x06proto2")
+            .unwrap();
+        client_config.set_max_recv_udp_payload_size(1200);
+
+        let mut server_config = Config::new(crate::PROTOCOL_VERSION).unwrap();
+        server_config
+            .load_cert_chain_from_pem_file("examples/cert.crt")
+            .unwrap();
+        server_config
+            .load_priv_key_from_pem_file("examples/cert.key")
+            .unwrap();
+        server_config
+            .set_application_protos(b"\x06proto1\x06proto2")
+            .unwrap();
+        server_config.verify_peer(false);
+        server_config
+            .set_application_protos(b"\x06proto1\x06proto2")
+            .unwrap();
+        // Larger than the client
+        server_config.set_max_send_udp_payload_size(1500);
+
+        let mut pipe = testing::Pipe {
+            client: connect(
+                Some("quic.tech"),
+                &client_scid,
+                client_addr,
+                &mut client_config,
+            )
+            .unwrap(),
+            server: accept(&server_scid, None, server_addr, &mut server_config)
+                .unwrap(),
+        };
+
+        // Before handshake
+        assert_eq!(pipe.server.recovery.max_datagram_size(), 1500);
+
+        assert_eq!(pipe.handshake(), Ok(()));
+
+        // After handshake, max_datagram_size should match to client's
+        // max_recv_udp_payload_size which is smaller
+        assert_eq!(pipe.server.recovery.max_datagram_size(), 1200);
+        assert_eq!(pipe.server.recovery.cwnd(), 12000);
+    }
+
+    #[test]
+    /// Tests that connection-level send capacity decreases as more stream data
+    /// is buffered.
+    fn send_capacity() {
+        let mut buf = [0; 65535];
+
+        let mut config = Config::new(crate::PROTOCOL_VERSION).unwrap();
+        config
+            .load_cert_chain_from_pem_file("examples/cert.crt")
+            .unwrap();
+        config
+            .load_priv_key_from_pem_file("examples/cert.key")
+            .unwrap();
+        config
+            .set_application_protos(b"\x06proto1\x06proto2")
+            .unwrap();
+        config.set_initial_max_data(100000);
+        config.set_initial_max_stream_data_bidi_local(10000);
+        config.set_initial_max_stream_data_bidi_remote(10000);
+        config.set_initial_max_streams_bidi(10);
+        config.verify_peer(false);
+
+        let mut pipe = testing::Pipe::with_config(&mut config).unwrap();
+        assert_eq!(pipe.handshake(), Ok(()));
+
+        assert_eq!(pipe.client.stream_send(0, b"hello!", true), Ok(6));
+        assert_eq!(pipe.advance(), Ok(()));
+
+        assert_eq!(pipe.client.stream_send(4, b"hello!", true), Ok(6));
+        assert_eq!(pipe.advance(), Ok(()));
+
+        assert_eq!(pipe.client.stream_send(8, b"hello!", true), Ok(6));
+        assert_eq!(pipe.advance(), Ok(()));
+
+        assert_eq!(pipe.client.stream_send(12, b"hello!", true), Ok(6));
+        assert_eq!(pipe.advance(), Ok(()));
+
+        let mut r = pipe.server.readable().collect::<Vec<u64>>();
+        assert_eq!(r.len(), 4);
+
+        r.sort();
+
+        assert_eq!(r, [0, 4, 8, 12]);
+
+        assert_eq!(pipe.server.stream_recv(0, &mut buf), Ok((6, true)));
+        assert_eq!(pipe.server.stream_recv(4, &mut buf), Ok((6, true)));
+        assert_eq!(pipe.server.stream_recv(8, &mut buf), Ok((6, true)));
+        assert_eq!(pipe.server.stream_recv(12, &mut buf), Ok((6, true)));
+
+        assert_eq!(pipe.server.tx_cap, 12000);
+
+        assert_eq!(pipe.server.stream_send(0, &buf[..5000], false), Ok(5000));
+        assert_eq!(pipe.server.stream_send(4, &buf[..5000], false), Ok(5000));
+        assert_eq!(pipe.server.stream_send(8, &buf[..5000], false), Ok(2000));
+
+        // No more connection send capacity.
+        assert_eq!(pipe.server.stream_send(12, &buf[..5000], false), Ok(0));
+        assert_eq!(pipe.server.tx_cap, 0);
+
+        assert_eq!(pipe.advance(), Ok(()));
+    }
 }
 
+pub use crate::packet::ConnectionId;
 pub use crate::packet::Header;
 pub use crate::packet::Type;
+
 pub use crate::recovery::CongestionControlAlgorithm;
+
 pub use crate::stream::StreamIter;
 
 mod crypto;
 mod dgram;
+#[cfg(feature = "ffi")]
 mod ffi;
 mod frame;
 pub mod h3;
diff --git a/src/octets.rs b/src/octets.rs
index 2b36707..3983667 100644
--- a/src/octets.rs
+++ b/src/octets.rs
@@ -183,18 +183,15 @@
             return Err(BufferTooShortError);
         }
 
-        let mut vec = self.get_bytes(len)?.to_vec();
-
-        // Mask the 2 most significant bits to remove the encoded length.
-        vec[0] &= 0x3f;
-
-        let mut b = OctetsMut::with_slice(&mut vec);
-
         let out = match len {
-            1 => u64::from(b.get_u8()?),
-            2 => u64::from(b.get_u16()?),
-            4 => u64::from(b.get_u32()?),
-            8 => b.get_u64()?,
+            1 => u64::from(self.get_u8()?),
+
+            2 => u64::from(self.get_u16()? & 0x3fff),
+
+            4 => u64::from(self.get_u32()? & 0x3fffffff),
+
+            8 => self.get_u64()? & 0x3fffffffffffffff,
+
             _ => unreachable!(),
         };
 
@@ -275,6 +272,17 @@
         Ok(&self.buf[cap - len..])
     }
 
+    /// Advances the buffer's offset.
+    pub fn skip(&mut self, skip: usize) -> Result<()> {
+        if skip > self.cap() {
+            return Err(BufferTooShortError);
+        }
+
+        self.off += skip;
+
+        Ok(())
+    }
+
     /// Returns the remaining capacity in the buffer.
     pub fn cap(&self) -> usize {
         self.buf.len() - self.off
@@ -402,18 +410,15 @@
             return Err(BufferTooShortError);
         }
 
-        let mut vec = self.get_bytes(len)?.to_vec();
-
-        // Mask the 2 most significant bits to remove the encoded length.
-        vec[0] &= 0x3f;
-
-        let mut b = OctetsMut::with_slice(&mut vec);
-
         let out = match len {
-            1 => u64::from(b.get_u8()?),
-            2 => u64::from(b.get_u16()?),
-            4 => u64::from(b.get_u32()?),
-            8 => b.get_u64()?,
+            1 => u64::from(self.get_u8()?),
+
+            2 => u64::from(self.get_u16()? & 0x3fff),
+
+            4 => u64::from(self.get_u32()? & 0x3fffffff),
+
+            8 => self.get_u64()? & 0x3fffffffffffffff,
+
             _ => unreachable!(),
         };
 
@@ -603,6 +608,17 @@
         Ok(&mut self.buf[cap - len..])
     }
 
+    /// Advances the buffer's offset.
+    pub fn skip(&mut self, skip: usize) -> Result<()> {
+        if skip > self.cap() {
+            return Err(BufferTooShortError);
+        }
+
+        self.off += skip;
+
+        Ok(())
+    }
+
     /// Returns the remaining capacity in the buffer.
     pub fn cap(&self) -> usize {
         self.buf.len() - self.off
diff --git a/src/packet.rs b/src/packet.rs
index 0534df5..e6180f2 100644
--- a/src/packet.rs
+++ b/src/packet.rs
@@ -129,9 +129,117 @@
     }
 }
 
+/// A QUIC connection ID.
+pub struct ConnectionId<'a>(ConnectionIdInner<'a>);
+
+enum ConnectionIdInner<'a> {
+    Vec(Vec<u8>),
+    Ref(&'a [u8]),
+}
+
+impl<'a> ConnectionId<'a> {
+    /// Creates a new connection ID from the given vector.
+    #[inline]
+    pub const fn from_vec(cid: Vec<u8>) -> Self {
+        Self(ConnectionIdInner::Vec(cid))
+    }
+
+    /// Creates a new connection ID from the given slice.
+    #[inline]
+    pub const fn from_ref(cid: &'a [u8]) -> Self {
+        Self(ConnectionIdInner::Ref(cid))
+    }
+
+    /// Returns a new owning connection ID from the given existing one.
+    #[inline]
+    pub fn into_owned(self) -> ConnectionId<'static> {
+        ConnectionId::from_vec(self.into())
+    }
+}
+
+impl<'a> Default for ConnectionId<'a> {
+    #[inline]
+    fn default() -> Self {
+        Self::from_vec(Vec::new())
+    }
+}
+
+impl<'a> From<Vec<u8>> for ConnectionId<'a> {
+    #[inline]
+    fn from(v: Vec<u8>) -> Self {
+        Self::from_vec(v)
+    }
+}
+
+impl<'a> From<ConnectionId<'a>> for Vec<u8> {
+    #[inline]
+    fn from(id: ConnectionId<'a>) -> Self {
+        match id.0 {
+            ConnectionIdInner::Vec(cid) => cid,
+            ConnectionIdInner::Ref(cid) => cid.to_vec(),
+        }
+    }
+}
+
+impl<'a> PartialEq for ConnectionId<'a> {
+    #[inline]
+    fn eq(&self, other: &Self) -> bool {
+        self.as_ref() == other.as_ref()
+    }
+}
+
+impl<'a> Eq for ConnectionId<'a> {}
+
+impl<'a> AsRef<[u8]> for ConnectionId<'a> {
+    #[inline]
+    fn as_ref(&self) -> &[u8] {
+        match &self.0 {
+            ConnectionIdInner::Vec(v) => v.as_ref(),
+            ConnectionIdInner::Ref(v) => v,
+        }
+    }
+}
+
+impl<'a> std::hash::Hash for ConnectionId<'a> {
+    #[inline]
+    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
+        self.as_ref().hash(state);
+    }
+}
+
+impl<'a> std::ops::Deref for ConnectionId<'a> {
+    type Target = [u8];
+
+    #[inline]
+    fn deref(&self) -> &[u8] {
+        match &self.0 {
+            ConnectionIdInner::Vec(v) => v.as_ref(),
+            ConnectionIdInner::Ref(v) => v,
+        }
+    }
+}
+
+impl<'a> Clone for ConnectionId<'a> {
+    #[inline]
+    fn clone(&self) -> Self {
+        Self::from_vec(self.as_ref().to_vec())
+    }
+}
+
+impl<'a> std::fmt::Debug for ConnectionId<'a> {
+    #[inline]
+    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
+        for c in self.as_ref() {
+            write!(f, "{:02x}", c)?;
+        }
+
+        Ok(())
+    }
+}
+
 /// A QUIC packet's header.
 #[derive(Clone, PartialEq)]
-pub struct Header {
+pub struct Header<'a> {
     /// The type of the packet.
     pub ty: Type,
 
@@ -139,10 +247,10 @@
     pub version: u32,
 
     /// The destination connection ID of the packet.
-    pub dcid: Vec<u8>,
+    pub dcid: ConnectionId<'a>,
 
     /// The source connection ID of the packet.
-    pub scid: Vec<u8>,
+    pub scid: ConnectionId<'a>,
 
     /// The packet number. It's only meaningful after the header protection is
     /// removed.
@@ -165,7 +273,7 @@
     pub(crate) key_phase: bool,
 }
 
-impl Header {
+impl<'a> Header<'a> {
     /// Parses a QUIC packet header from the given buffer.
     ///
     /// The `dcid_len` parameter is the length of the destination connection ID,
@@ -183,14 +291,17 @@
     /// let hdr = quiche::Header::from_slice(&mut buf[..len], LOCAL_CONN_ID_LEN)?;
     /// # Ok::<(), quiche::Error>(())
     /// ```
-    pub fn from_slice(buf: &mut [u8], dcid_len: usize) -> Result<Header> {
+    #[inline]
+    pub fn from_slice<'b>(
+        buf: &'b mut [u8], dcid_len: usize,
+    ) -> Result<Header<'a>> {
         let mut b = octets::OctetsMut::with_slice(buf);
         Header::from_bytes(&mut b, dcid_len)
     }
 
-    pub(crate) fn from_bytes(
-        b: &mut octets::OctetsMut, dcid_len: usize,
-    ) -> Result<Header> {
+    pub(crate) fn from_bytes<'b>(
+        b: &'b mut octets::OctetsMut, dcid_len: usize,
+    ) -> Result<Header<'a>> {
         let first = b.get_u8()?;
 
         if !Header::is_long(first) {
@@ -200,8 +311,8 @@
             return Ok(Header {
                 ty: Type::Short,
                 version: 0,
-                dcid: dcid.to_vec(),
-                scid: Vec::new(),
+                dcid: dcid.to_vec().into(),
+                scid: ConnectionId::default(),
                 pkt_num: 0,
                 pkt_num_len: 0,
                 token: None,
@@ -274,8 +385,8 @@
         Ok(Header {
             ty,
             version,
-            dcid,
-            scid,
+            dcid: dcid.into(),
+            scid: scid.into(),
             pkt_num: 0,
             pkt_num_len: 0,
             token,
@@ -367,7 +478,7 @@
     }
 }
 
-impl std::fmt::Debug for Header {
+impl<'a> std::fmt::Debug for Header<'a> {
     fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
         write!(f, "{:?}", self.ty)?;
 
@@ -375,16 +486,10 @@
             write!(f, " version={:x}", self.version)?;
         }
 
-        write!(f, " dcid=")?;
-        for b in &self.dcid {
-            write!(f, "{:02x}", b)?;
-        }
+        write!(f, " dcid={:?}", self.dcid)?;
 
         if self.ty != Type::Short {
-            write!(f, " scid=")?;
-            for b in &self.scid {
-                write!(f, "{:02x}", b)?;
-            }
+            write!(f, " scid={:?}", self.scid)?;
         }
 
         if let Some(ref token) = self.token {
@@ -589,6 +694,7 @@
     b.put_bytes(&scid)?;
     b.put_u8(dcid.len() as u8)?;
     b.put_bytes(&dcid)?;
+    b.put_u32(crate::PROTOCOL_VERSION_V1)?;
     b.put_u32(crate::PROTOCOL_VERSION_DRAFT29)?;
     b.put_u32(crate::PROTOCOL_VERSION_DRAFT28)?;
     b.put_u32(crate::PROTOCOL_VERSION_DRAFT27)?;
@@ -609,8 +715,8 @@
     let hdr = Header {
         ty: Type::Retry,
         version,
-        dcid: scid.to_vec(),
-        scid: new_scid.to_vec(),
+        dcid: ConnectionId::from_ref(scid),
+        scid: ConnectionId::from_ref(new_scid),
         pkt_num: 0,
         pkt_num_len: 0,
         token: Some(token.to_vec()),
@@ -644,29 +750,41 @@
 fn compute_retry_integrity_tag(
     b: &octets::OctetsMut, odcid: &[u8], version: u32,
 ) -> Result<aead::Tag> {
-    const RETRY_INTEGRITY_KEY: [u8; 16] = [
+    const RETRY_INTEGRITY_KEY_V1: [u8; 16] = [
+        0xbe, 0x0c, 0x69, 0x0b, 0x9f, 0x66, 0x57, 0x5a, 0x1d, 0x76, 0x6b, 0x54,
+        0xe3, 0x68, 0xc8, 0x4e,
+    ];
+
+    const RETRY_INTEGRITY_NONCE_V1: [u8; aead::NONCE_LEN] = [
+        0x46, 0x15, 0x99, 0xd3, 0x5d, 0x63, 0x2b, 0xf2, 0x23, 0x98, 0x25, 0xbb,
+    ];
+
+    const RETRY_INTEGRITY_KEY_DRAFT29: [u8; 16] = [
         0xcc, 0xce, 0x18, 0x7e, 0xd0, 0x9a, 0x09, 0xd0, 0x57, 0x28, 0x15, 0x5a,
         0x6c, 0xb9, 0x6b, 0xe1,
     ];
 
-    const RETRY_INTEGRITY_NONCE: [u8; aead::NONCE_LEN] = [
+    const RETRY_INTEGRITY_NONCE_DRAFT29: [u8; aead::NONCE_LEN] = [
         0xe5, 0x49, 0x30, 0xf9, 0x7f, 0x21, 0x36, 0xf0, 0x53, 0x0a, 0x8c, 0x1c,
     ];
 
-    const RETRY_INTEGRITY_KEY_OLD: [u8; 16] = [
+    const RETRY_INTEGRITY_KEY_DRAFT27: [u8; 16] = [
         0x4d, 0x32, 0xec, 0xdb, 0x2a, 0x21, 0x33, 0xc8, 0x41, 0xe4, 0x04, 0x3d,
         0xf2, 0x7d, 0x44, 0x30,
     ];
 
-    const RETRY_INTEGRITY_NONCE_OLD: [u8; aead::NONCE_LEN] = [
+    const RETRY_INTEGRITY_NONCE_DRAFT27: [u8; aead::NONCE_LEN] = [
         0x4d, 0x16, 0x11, 0xd0, 0x55, 0x13, 0xa5, 0x52, 0xc5, 0x87, 0xd5, 0x75,
     ];
 
     let (key, nonce) = match version {
         crate::PROTOCOL_VERSION_DRAFT27 | crate::PROTOCOL_VERSION_DRAFT28 =>
-            (&RETRY_INTEGRITY_KEY_OLD, RETRY_INTEGRITY_NONCE_OLD),
+            (&RETRY_INTEGRITY_KEY_DRAFT27, RETRY_INTEGRITY_NONCE_DRAFT27),
 
-        _ => (&RETRY_INTEGRITY_KEY, RETRY_INTEGRITY_NONCE),
+        crate::PROTOCOL_VERSION_DRAFT29 =>
+            (&RETRY_INTEGRITY_KEY_DRAFT29, RETRY_INTEGRITY_NONCE_DRAFT29),
+
+        _ => (&RETRY_INTEGRITY_KEY_V1, RETRY_INTEGRITY_NONCE_V1),
     };
 
     let hdr_len = b.off();
@@ -823,8 +941,9 @@
         let hdr = Header {
             ty: Type::Retry,
             version: 0xafafafaf,
-            dcid: vec![0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba],
-            scid: vec![0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb],
+            dcid: vec![0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba]
+                .into(),
+            scid: vec![0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb].into(),
             pkt_num: 0,
             pkt_num_len: 0,
             token: Some(vec![0xba; 24]),
@@ -849,8 +968,9 @@
         let hdr = Header {
             ty: Type::Initial,
             version: 0xafafafaf,
-            dcid: vec![0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba],
-            scid: vec![0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb],
+            dcid: vec![0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba]
+                .into(),
+            scid: vec![0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb].into(),
             pkt_num: 0,
             pkt_num_len: 0,
             token: Some(vec![0x05, 0x06, 0x07, 0x08]),
@@ -875,8 +995,9 @@
             dcid: vec![
                 0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba,
                 0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba,
-            ],
-            scid: vec![0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb],
+            ]
+            .into(),
+            scid: vec![0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb].into(),
             pkt_num: 0,
             pkt_num_len: 0,
             token: Some(vec![0x05, 0x06, 0x07, 0x08]),
@@ -898,11 +1019,13 @@
         let hdr = Header {
             ty: Type::Initial,
             version: crate::PROTOCOL_VERSION,
-            dcid: vec![0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba],
+            dcid: vec![0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba]
+                .into(),
             scid: vec![
                 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb,
                 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb,
-            ],
+            ]
+            .into(),
             pkt_num: 0,
             pkt_num_len: 0,
             token: Some(vec![0x05, 0x06, 0x07, 0x08]),
@@ -924,11 +1047,13 @@
         let hdr = Header {
             ty: Type::Initial,
             version: 0xafafafaf,
-            dcid: vec![0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba],
+            dcid: vec![0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba]
+                .into(),
             scid: vec![
                 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb,
                 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb,
-            ],
+            ]
+            .into(),
             pkt_num: 0,
             pkt_num_len: 0,
             token: Some(vec![0x05, 0x06, 0x07, 0x08]),
@@ -950,8 +1075,9 @@
         let hdr = Header {
             ty: Type::Handshake,
             version: 0xafafafaf,
-            dcid: vec![0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba],
-            scid: vec![0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb],
+            dcid: vec![0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba]
+                .into(),
+            scid: vec![0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb, 0xbb].into(),
             pkt_num: 0,
             pkt_num_len: 0,
             token: None,
@@ -973,8 +1099,9 @@
         let hdr = Header {
             ty: Type::Short,
             version: 0,
-            dcid: vec![0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba],
-            scid: vec![],
+            dcid: vec![0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba, 0xba]
+                .into(),
+            scid: ConnectionId::default(),
             pkt_num: 0,
             pkt_num_len: 0,
             token: None,
@@ -1132,7 +1259,153 @@
     }
 
     #[test]
-    fn decrypt_client_initial() {
+    fn decrypt_client_initial_v1() {
+        let mut pkt = [
+            0xc0, 0x00, 0x00, 0x00, 0x01, 0x08, 0x83, 0x94, 0xc8, 0xf0, 0x3e,
+            0x51, 0x57, 0x08, 0x00, 0x00, 0x44, 0x9e, 0x7b, 0x9a, 0xec, 0x34,
+            0xd1, 0xb1, 0xc9, 0x8d, 0xd7, 0x68, 0x9f, 0xb8, 0xec, 0x11, 0xd2,
+            0x42, 0xb1, 0x23, 0xdc, 0x9b, 0xd8, 0xba, 0xb9, 0x36, 0xb4, 0x7d,
+            0x92, 0xec, 0x35, 0x6c, 0x0b, 0xab, 0x7d, 0xf5, 0x97, 0x6d, 0x27,
+            0xcd, 0x44, 0x9f, 0x63, 0x30, 0x00, 0x99, 0xf3, 0x99, 0x1c, 0x26,
+            0x0e, 0xc4, 0xc6, 0x0d, 0x17, 0xb3, 0x1f, 0x84, 0x29, 0x15, 0x7b,
+            0xb3, 0x5a, 0x12, 0x82, 0xa6, 0x43, 0xa8, 0xd2, 0x26, 0x2c, 0xad,
+            0x67, 0x50, 0x0c, 0xad, 0xb8, 0xe7, 0x37, 0x8c, 0x8e, 0xb7, 0x53,
+            0x9e, 0xc4, 0xd4, 0x90, 0x5f, 0xed, 0x1b, 0xee, 0x1f, 0xc8, 0xaa,
+            0xfb, 0xa1, 0x7c, 0x75, 0x0e, 0x2c, 0x7a, 0xce, 0x01, 0xe6, 0x00,
+            0x5f, 0x80, 0xfc, 0xb7, 0xdf, 0x62, 0x12, 0x30, 0xc8, 0x37, 0x11,
+            0xb3, 0x93, 0x43, 0xfa, 0x02, 0x8c, 0xea, 0x7f, 0x7f, 0xb5, 0xff,
+            0x89, 0xea, 0xc2, 0x30, 0x82, 0x49, 0xa0, 0x22, 0x52, 0x15, 0x5e,
+            0x23, 0x47, 0xb6, 0x3d, 0x58, 0xc5, 0x45, 0x7a, 0xfd, 0x84, 0xd0,
+            0x5d, 0xff, 0xfd, 0xb2, 0x03, 0x92, 0x84, 0x4a, 0xe8, 0x12, 0x15,
+            0x46, 0x82, 0xe9, 0xcf, 0x01, 0x2f, 0x90, 0x21, 0xa6, 0xf0, 0xbe,
+            0x17, 0xdd, 0xd0, 0xc2, 0x08, 0x4d, 0xce, 0x25, 0xff, 0x9b, 0x06,
+            0xcd, 0xe5, 0x35, 0xd0, 0xf9, 0x20, 0xa2, 0xdb, 0x1b, 0xf3, 0x62,
+            0xc2, 0x3e, 0x59, 0x6d, 0xee, 0x38, 0xf5, 0xa6, 0xcf, 0x39, 0x48,
+            0x83, 0x8a, 0x3a, 0xec, 0x4e, 0x15, 0xda, 0xf8, 0x50, 0x0a, 0x6e,
+            0xf6, 0x9e, 0xc4, 0xe3, 0xfe, 0xb6, 0xb1, 0xd9, 0x8e, 0x61, 0x0a,
+            0xc8, 0xb7, 0xec, 0x3f, 0xaf, 0x6a, 0xd7, 0x60, 0xb7, 0xba, 0xd1,
+            0xdb, 0x4b, 0xa3, 0x48, 0x5e, 0x8a, 0x94, 0xdc, 0x25, 0x0a, 0xe3,
+            0xfd, 0xb4, 0x1e, 0xd1, 0x5f, 0xb6, 0xa8, 0xe5, 0xeb, 0xa0, 0xfc,
+            0x3d, 0xd6, 0x0b, 0xc8, 0xe3, 0x0c, 0x5c, 0x42, 0x87, 0xe5, 0x38,
+            0x05, 0xdb, 0x05, 0x9a, 0xe0, 0x64, 0x8d, 0xb2, 0xf6, 0x42, 0x64,
+            0xed, 0x5e, 0x39, 0xbe, 0x2e, 0x20, 0xd8, 0x2d, 0xf5, 0x66, 0xda,
+            0x8d, 0xd5, 0x99, 0x8c, 0xca, 0xbd, 0xae, 0x05, 0x30, 0x60, 0xae,
+            0x6c, 0x7b, 0x43, 0x78, 0xe8, 0x46, 0xd2, 0x9f, 0x37, 0xed, 0x7b,
+            0x4e, 0xa9, 0xec, 0x5d, 0x82, 0xe7, 0x96, 0x1b, 0x7f, 0x25, 0xa9,
+            0x32, 0x38, 0x51, 0xf6, 0x81, 0xd5, 0x82, 0x36, 0x3a, 0xa5, 0xf8,
+            0x99, 0x37, 0xf5, 0xa6, 0x72, 0x58, 0xbf, 0x63, 0xad, 0x6f, 0x1a,
+            0x0b, 0x1d, 0x96, 0xdb, 0xd4, 0xfa, 0xdd, 0xfc, 0xef, 0xc5, 0x26,
+            0x6b, 0xa6, 0x61, 0x17, 0x22, 0x39, 0x5c, 0x90, 0x65, 0x56, 0xbe,
+            0x52, 0xaf, 0xe3, 0xf5, 0x65, 0x63, 0x6a, 0xd1, 0xb1, 0x7d, 0x50,
+            0x8b, 0x73, 0xd8, 0x74, 0x3e, 0xeb, 0x52, 0x4b, 0xe2, 0x2b, 0x3d,
+            0xcb, 0xc2, 0xc7, 0x46, 0x8d, 0x54, 0x11, 0x9c, 0x74, 0x68, 0x44,
+            0x9a, 0x13, 0xd8, 0xe3, 0xb9, 0x58, 0x11, 0xa1, 0x98, 0xf3, 0x49,
+            0x1d, 0xe3, 0xe7, 0xfe, 0x94, 0x2b, 0x33, 0x04, 0x07, 0xab, 0xf8,
+            0x2a, 0x4e, 0xd7, 0xc1, 0xb3, 0x11, 0x66, 0x3a, 0xc6, 0x98, 0x90,
+            0xf4, 0x15, 0x70, 0x15, 0x85, 0x3d, 0x91, 0xe9, 0x23, 0x03, 0x7c,
+            0x22, 0x7a, 0x33, 0xcd, 0xd5, 0xec, 0x28, 0x1c, 0xa3, 0xf7, 0x9c,
+            0x44, 0x54, 0x6b, 0x9d, 0x90, 0xca, 0x00, 0xf0, 0x64, 0xc9, 0x9e,
+            0x3d, 0xd9, 0x79, 0x11, 0xd3, 0x9f, 0xe9, 0xc5, 0xd0, 0xb2, 0x3a,
+            0x22, 0x9a, 0x23, 0x4c, 0xb3, 0x61, 0x86, 0xc4, 0x81, 0x9e, 0x8b,
+            0x9c, 0x59, 0x27, 0x72, 0x66, 0x32, 0x29, 0x1d, 0x6a, 0x41, 0x82,
+            0x11, 0xcc, 0x29, 0x62, 0xe2, 0x0f, 0xe4, 0x7f, 0xeb, 0x3e, 0xdf,
+            0x33, 0x0f, 0x2c, 0x60, 0x3a, 0x9d, 0x48, 0xc0, 0xfc, 0xb5, 0x69,
+            0x9d, 0xbf, 0xe5, 0x89, 0x64, 0x25, 0xc5, 0xba, 0xc4, 0xae, 0xe8,
+            0x2e, 0x57, 0xa8, 0x5a, 0xaf, 0x4e, 0x25, 0x13, 0xe4, 0xf0, 0x57,
+            0x96, 0xb0, 0x7b, 0xa2, 0xee, 0x47, 0xd8, 0x05, 0x06, 0xf8, 0xd2,
+            0xc2, 0x5e, 0x50, 0xfd, 0x14, 0xde, 0x71, 0xe6, 0xc4, 0x18, 0x55,
+            0x93, 0x02, 0xf9, 0x39, 0xb0, 0xe1, 0xab, 0xd5, 0x76, 0xf2, 0x79,
+            0xc4, 0xb2, 0xe0, 0xfe, 0xb8, 0x5c, 0x1f, 0x28, 0xff, 0x18, 0xf5,
+            0x88, 0x91, 0xff, 0xef, 0x13, 0x2e, 0xef, 0x2f, 0xa0, 0x93, 0x46,
+            0xae, 0xe3, 0x3c, 0x28, 0xeb, 0x13, 0x0f, 0xf2, 0x8f, 0x5b, 0x76,
+            0x69, 0x53, 0x33, 0x41, 0x13, 0x21, 0x19, 0x96, 0xd2, 0x00, 0x11,
+            0xa1, 0x98, 0xe3, 0xfc, 0x43, 0x3f, 0x9f, 0x25, 0x41, 0x01, 0x0a,
+            0xe1, 0x7c, 0x1b, 0xf2, 0x02, 0x58, 0x0f, 0x60, 0x47, 0x47, 0x2f,
+            0xb3, 0x68, 0x57, 0xfe, 0x84, 0x3b, 0x19, 0xf5, 0x98, 0x40, 0x09,
+            0xdd, 0xc3, 0x24, 0x04, 0x4e, 0x84, 0x7a, 0x4f, 0x4a, 0x0a, 0xb3,
+            0x4f, 0x71, 0x95, 0x95, 0xde, 0x37, 0x25, 0x2d, 0x62, 0x35, 0x36,
+            0x5e, 0x9b, 0x84, 0x39, 0x2b, 0x06, 0x10, 0x85, 0x34, 0x9d, 0x73,
+            0x20, 0x3a, 0x4a, 0x13, 0xe9, 0x6f, 0x54, 0x32, 0xec, 0x0f, 0xd4,
+            0xa1, 0xee, 0x65, 0xac, 0xcd, 0xd5, 0xe3, 0x90, 0x4d, 0xf5, 0x4c,
+            0x1d, 0xa5, 0x10, 0xb0, 0xff, 0x20, 0xdc, 0xc0, 0xc7, 0x7f, 0xcb,
+            0x2c, 0x0e, 0x0e, 0xb6, 0x05, 0xcb, 0x05, 0x04, 0xdb, 0x87, 0x63,
+            0x2c, 0xf3, 0xd8, 0xb4, 0xda, 0xe6, 0xe7, 0x05, 0x76, 0x9d, 0x1d,
+            0xe3, 0x54, 0x27, 0x01, 0x23, 0xcb, 0x11, 0x45, 0x0e, 0xfc, 0x60,
+            0xac, 0x47, 0x68, 0x3d, 0x7b, 0x8d, 0x0f, 0x81, 0x13, 0x65, 0x56,
+            0x5f, 0xd9, 0x8c, 0x4c, 0x8e, 0xb9, 0x36, 0xbc, 0xab, 0x8d, 0x06,
+            0x9f, 0xc3, 0x3b, 0xd8, 0x01, 0xb0, 0x3a, 0xde, 0xa2, 0xe1, 0xfb,
+            0xc5, 0xaa, 0x46, 0x3d, 0x08, 0xca, 0x19, 0x89, 0x6d, 0x2b, 0xf5,
+            0x9a, 0x07, 0x1b, 0x85, 0x1e, 0x6c, 0x23, 0x90, 0x52, 0x17, 0x2f,
+            0x29, 0x6b, 0xfb, 0x5e, 0x72, 0x40, 0x47, 0x90, 0xa2, 0x18, 0x10,
+            0x14, 0xf3, 0xb9, 0x4a, 0x4e, 0x97, 0xd1, 0x17, 0xb4, 0x38, 0x13,
+            0x03, 0x68, 0xcc, 0x39, 0xdb, 0xb2, 0xd1, 0x98, 0x06, 0x5a, 0xe3,
+            0x98, 0x65, 0x47, 0x92, 0x6c, 0xd2, 0x16, 0x2f, 0x40, 0xa2, 0x9f,
+            0x0c, 0x3c, 0x87, 0x45, 0xc0, 0xf5, 0x0f, 0xba, 0x38, 0x52, 0xe5,
+            0x66, 0xd4, 0x45, 0x75, 0xc2, 0x9d, 0x39, 0xa0, 0x3f, 0x0c, 0xda,
+            0x72, 0x19, 0x84, 0xb6, 0xf4, 0x40, 0x59, 0x1f, 0x35, 0x5e, 0x12,
+            0xd4, 0x39, 0xff, 0x15, 0x0a, 0xab, 0x76, 0x13, 0x49, 0x9d, 0xbd,
+            0x49, 0xad, 0xab, 0xc8, 0x67, 0x6e, 0xef, 0x02, 0x3b, 0x15, 0xb6,
+            0x5b, 0xfc, 0x5c, 0xa0, 0x69, 0x48, 0x10, 0x9f, 0x23, 0xf3, 0x50,
+            0xdb, 0x82, 0x12, 0x35, 0x35, 0xeb, 0x8a, 0x74, 0x33, 0xbd, 0xab,
+            0xcb, 0x90, 0x92, 0x71, 0xa6, 0xec, 0xbc, 0xb5, 0x8b, 0x93, 0x6a,
+            0x88, 0xcd, 0x4e, 0x8f, 0x2e, 0x6f, 0xf5, 0x80, 0x01, 0x75, 0xf1,
+            0x13, 0x25, 0x3d, 0x8f, 0xa9, 0xca, 0x88, 0x85, 0xc2, 0xf5, 0x52,
+            0xe6, 0x57, 0xdc, 0x60, 0x3f, 0x25, 0x2e, 0x1a, 0x8e, 0x30, 0x8f,
+            0x76, 0xf0, 0xbe, 0x79, 0xe2, 0xfb, 0x8f, 0x5d, 0x5f, 0xbb, 0xe2,
+            0xe3, 0x0e, 0xca, 0xdd, 0x22, 0x07, 0x23, 0xc8, 0xc0, 0xae, 0xa8,
+            0x07, 0x8c, 0xdf, 0xcb, 0x38, 0x68, 0x26, 0x3f, 0xf8, 0xf0, 0x94,
+            0x00, 0x54, 0xda, 0x48, 0x78, 0x18, 0x93, 0xa7, 0xe4, 0x9a, 0xd5,
+            0xaf, 0xf4, 0xaf, 0x30, 0x0c, 0xd8, 0x04, 0xa6, 0xb6, 0x27, 0x9a,
+            0xb3, 0xff, 0x3a, 0xfb, 0x64, 0x49, 0x1c, 0x85, 0x19, 0x4a, 0xab,
+            0x76, 0x0d, 0x58, 0xa6, 0x06, 0x65, 0x4f, 0x9f, 0x44, 0x00, 0xe8,
+            0xb3, 0x85, 0x91, 0x35, 0x6f, 0xbf, 0x64, 0x25, 0xac, 0xa2, 0x6d,
+            0xc8, 0x52, 0x44, 0x25, 0x9f, 0xf2, 0xb1, 0x9c, 0x41, 0xb9, 0xf9,
+            0x6f, 0x3c, 0xa9, 0xec, 0x1d, 0xde, 0x43, 0x4d, 0xa7, 0xd2, 0xd3,
+            0x92, 0xb9, 0x05, 0xdd, 0xf3, 0xd1, 0xf9, 0xaf, 0x93, 0xd1, 0xaf,
+            0x59, 0x50, 0xbd, 0x49, 0x3f, 0x5a, 0xa7, 0x31, 0xb4, 0x05, 0x6d,
+            0xf3, 0x1b, 0xd2, 0x67, 0xb6, 0xb9, 0x0a, 0x07, 0x98, 0x31, 0xaa,
+            0xf5, 0x79, 0xbe, 0x0a, 0x39, 0x01, 0x31, 0x37, 0xaa, 0xc6, 0xd4,
+            0x04, 0xf5, 0x18, 0xcf, 0xd4, 0x68, 0x40, 0x64, 0x7e, 0x78, 0xbf,
+            0xe7, 0x06, 0xca, 0x4c, 0xf5, 0xe9, 0xc5, 0x45, 0x3e, 0x9f, 0x7c,
+            0xfd, 0x2b, 0x8b, 0x4c, 0x8d, 0x16, 0x9a, 0x44, 0xe5, 0x5c, 0x88,
+            0xd4, 0xa9, 0xa7, 0xf9, 0x47, 0x42, 0x41, 0x10, 0x92, 0xab, 0xbd,
+            0xf8, 0xb8, 0x89, 0xe5, 0xc1, 0x99, 0xd0, 0x96, 0xe3, 0xf2, 0x47,
+            0x88,
+        ];
+
+        let dcid = [0x83, 0x94, 0xc8, 0xf0, 0x3e, 0x51, 0x57, 0x08];
+
+        let frames = [
+            0x06, 0x00, 0x40, 0xf1, 0x01, 0x00, 0x00, 0xed, 0x03, 0x03, 0xeb,
+            0xf8, 0xfa, 0x56, 0xf1, 0x29, 0x39, 0xb9, 0x58, 0x4a, 0x38, 0x96,
+            0x47, 0x2e, 0xc4, 0x0b, 0xb8, 0x63, 0xcf, 0xd3, 0xe8, 0x68, 0x04,
+            0xfe, 0x3a, 0x47, 0xf0, 0x6a, 0x2b, 0x69, 0x48, 0x4c, 0x00, 0x00,
+            0x04, 0x13, 0x01, 0x13, 0x02, 0x01, 0x00, 0x00, 0xc0, 0x00, 0x00,
+            0x00, 0x10, 0x00, 0x0e, 0x00, 0x00, 0x0b, 0x65, 0x78, 0x61, 0x6d,
+            0x70, 0x6c, 0x65, 0x2e, 0x63, 0x6f, 0x6d, 0xff, 0x01, 0x00, 0x01,
+            0x00, 0x00, 0x0a, 0x00, 0x08, 0x00, 0x06, 0x00, 0x1d, 0x00, 0x17,
+            0x00, 0x18, 0x00, 0x10, 0x00, 0x07, 0x00, 0x05, 0x04, 0x61, 0x6c,
+            0x70, 0x6e, 0x00, 0x05, 0x00, 0x05, 0x01, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x33, 0x00, 0x26, 0x00, 0x24, 0x00, 0x1d, 0x00, 0x20, 0x93,
+            0x70, 0xb2, 0xc9, 0xca, 0xa4, 0x7f, 0xba, 0xba, 0xf4, 0x55, 0x9f,
+            0xed, 0xba, 0x75, 0x3d, 0xe1, 0x71, 0xfa, 0x71, 0xf5, 0x0f, 0x1c,
+            0xe1, 0x5d, 0x43, 0xe9, 0x94, 0xec, 0x74, 0xd7, 0x48, 0x00, 0x2b,
+            0x00, 0x03, 0x02, 0x03, 0x04, 0x00, 0x0d, 0x00, 0x10, 0x00, 0x0e,
+            0x04, 0x03, 0x05, 0x03, 0x06, 0x03, 0x02, 0x03, 0x08, 0x04, 0x08,
+            0x05, 0x08, 0x06, 0x00, 0x2d, 0x00, 0x02, 0x01, 0x01, 0x00, 0x1c,
+            0x00, 0x02, 0x40, 0x01, 0xff, 0xa5, 0x00, 0x32, 0x04, 0x08, 0xff,
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x05, 0x04, 0x80, 0x00,
+            0xff, 0xff, 0x07, 0x04, 0x80, 0x00, 0xff, 0xff, 0x08, 0x01, 0x10,
+            0x01, 0x04, 0x80, 0x00, 0x75, 0x30, 0x09, 0x01, 0x10, 0x0f, 0x08,
+            0x83, 0x94, 0xc8, 0xf0, 0x3e, 0x51, 0x57, 0x08, 0x06, 0x04, 0x80,
+            0x00, 0xff, 0xff,
+        ];
+
+        assert_decrypt_initial_pkt(&mut pkt, &dcid, true, &frames, 2, 4);
+    }
+
+    #[test]
+    fn decrypt_client_initial_draft29() {
         let mut pkt = [
             0xc5, 0xff, 0x00, 0x00, 0x1d, 0x08, 0x83, 0x94, 0xc8, 0xf0, 0x3e,
             0x51, 0x57, 0x08, 0x00, 0x00, 0x44, 0x9e, 0x4a, 0x95, 0x24, 0x5b,
@@ -1274,7 +1547,7 @@
     }
 
     #[test]
-    fn decrypt_client_initial_old() {
+    fn decrypt_client_initial_draft28() {
         let mut pkt = [
             0xc0, 0xff, 0x00, 0x00, 0x1c, 0x08, 0x83, 0x94, 0xc8, 0xf0, 0x3e,
             0x51, 0x57, 0x08, 0x00, 0x00, 0x44, 0x9e, 0x3b, 0x34, 0x3a, 0xa8,
@@ -1416,7 +1689,42 @@
     }
 
     #[test]
-    fn decrypt_server_initial() {
+    fn decrypt_server_initial_v1() {
+        let mut pkt = [
+            0xcf, 0x00, 0x00, 0x00, 0x01, 0x00, 0x08, 0xf0, 0x67, 0xa5, 0x50,
+            0x2a, 0x42, 0x62, 0xb5, 0x00, 0x40, 0x75, 0xc0, 0xd9, 0x5a, 0x48,
+            0x2c, 0xd0, 0x99, 0x1c, 0xd2, 0x5b, 0x0a, 0xac, 0x40, 0x6a, 0x58,
+            0x16, 0xb6, 0x39, 0x41, 0x00, 0xf3, 0x7a, 0x1c, 0x69, 0x79, 0x75,
+            0x54, 0x78, 0x0b, 0xb3, 0x8c, 0xc5, 0xa9, 0x9f, 0x5e, 0xde, 0x4c,
+            0xf7, 0x3c, 0x3e, 0xc2, 0x49, 0x3a, 0x18, 0x39, 0xb3, 0xdb, 0xcb,
+            0xa3, 0xf6, 0xea, 0x46, 0xc5, 0xb7, 0x68, 0x4d, 0xf3, 0x54, 0x8e,
+            0x7d, 0xde, 0xb9, 0xc3, 0xbf, 0x9c, 0x73, 0xcc, 0x3f, 0x3b, 0xde,
+            0xd7, 0x4b, 0x56, 0x2b, 0xfb, 0x19, 0xfb, 0x84, 0x02, 0x2f, 0x8e,
+            0xf4, 0xcd, 0xd9, 0x37, 0x95, 0xd7, 0x7d, 0x06, 0xed, 0xbb, 0x7a,
+            0xaf, 0x2f, 0x58, 0x89, 0x18, 0x50, 0xab, 0xbd, 0xca, 0x3d, 0x20,
+            0x39, 0x8c, 0x27, 0x64, 0x56, 0xcb, 0xc4, 0x21, 0x58, 0x40, 0x7d,
+            0xd0, 0x74, 0xee,
+        ];
+
+        let dcid = [0x83, 0x94, 0xc8, 0xf0, 0x3e, 0x51, 0x57, 0x08];
+
+        let frames = [
+            0x02, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x40, 0x5a, 0x02, 0x00,
+            0x00, 0x56, 0x03, 0x03, 0xee, 0xfc, 0xe7, 0xf7, 0xb3, 0x7b, 0xa1,
+            0xd1, 0x63, 0x2e, 0x96, 0x67, 0x78, 0x25, 0xdd, 0xf7, 0x39, 0x88,
+            0xcf, 0xc7, 0x98, 0x25, 0xdf, 0x56, 0x6d, 0xc5, 0x43, 0x0b, 0x9a,
+            0x04, 0x5a, 0x12, 0x00, 0x13, 0x01, 0x00, 0x00, 0x2e, 0x00, 0x33,
+            0x00, 0x24, 0x00, 0x1d, 0x00, 0x20, 0x9d, 0x3c, 0x94, 0x0d, 0x89,
+            0x69, 0x0b, 0x84, 0xd0, 0x8a, 0x60, 0x99, 0x3c, 0x14, 0x4e, 0xca,
+            0x68, 0x4d, 0x10, 0x81, 0x28, 0x7c, 0x83, 0x4d, 0x53, 0x11, 0xbc,
+            0xf3, 0x2b, 0xb9, 0xda, 0x1a, 0x00, 0x2b, 0x00, 0x02, 0x03, 0x04,
+        ];
+
+        assert_decrypt_initial_pkt(&mut pkt, &dcid, false, &frames, 1, 2);
+    }
+
+    #[test]
+    fn decrypt_server_initial_draft29() {
         let mut pkt = [
             0xca, 0xff, 0x00, 0x00, 0x1d, 0x00, 0x08, 0xf0, 0x67, 0xa5, 0x50,
             0x2a, 0x42, 0x62, 0xb5, 0x00, 0x40, 0x74, 0xaa, 0xf2, 0xf0, 0x07,
@@ -1451,7 +1759,7 @@
     }
 
     #[test]
-    fn decrypt_server_initial_old() {
+    fn decrypt_server_initial_draft28() {
         let mut pkt = [
             0xc9, 0xff, 0x00, 0x00, 0x1c, 0x00, 0x08, 0xf0, 0x67, 0xa5, 0x50,
             0x2a, 0x42, 0x62, 0xb5, 0x00, 0x40, 0x74, 0x16, 0x8b, 0xf2, 0x2b,
@@ -1557,7 +1865,249 @@
     }
 
     #[test]
-    fn encrypt_client_initial() {
+    fn encrypt_client_initial_v1() {
+        let mut header = [
+            0xc3, 0x00, 0x00, 0x00, 0x01, 0x08, 0x83, 0x94, 0xc8, 0xf0, 0x3e,
+            0x51, 0x57, 0x08, 0x00, 0x00, 0x44, 0x9e, 0x00, 0x00, 0x00, 0x02,
+        ];
+
+        let dcid = [0x83, 0x94, 0xc8, 0xf0, 0x3e, 0x51, 0x57, 0x08];
+
+        let frames = [
+            0x06, 0x00, 0x40, 0xf1, 0x01, 0x00, 0x00, 0xed, 0x03, 0x03, 0xeb,
+            0xf8, 0xfa, 0x56, 0xf1, 0x29, 0x39, 0xb9, 0x58, 0x4a, 0x38, 0x96,
+            0x47, 0x2e, 0xc4, 0x0b, 0xb8, 0x63, 0xcf, 0xd3, 0xe8, 0x68, 0x04,
+            0xfe, 0x3a, 0x47, 0xf0, 0x6a, 0x2b, 0x69, 0x48, 0x4c, 0x00, 0x00,
+            0x04, 0x13, 0x01, 0x13, 0x02, 0x01, 0x00, 0x00, 0xc0, 0x00, 0x00,
+            0x00, 0x10, 0x00, 0x0e, 0x00, 0x00, 0x0b, 0x65, 0x78, 0x61, 0x6d,
+            0x70, 0x6c, 0x65, 0x2e, 0x63, 0x6f, 0x6d, 0xff, 0x01, 0x00, 0x01,
+            0x00, 0x00, 0x0a, 0x00, 0x08, 0x00, 0x06, 0x00, 0x1d, 0x00, 0x17,
+            0x00, 0x18, 0x00, 0x10, 0x00, 0x07, 0x00, 0x05, 0x04, 0x61, 0x6c,
+            0x70, 0x6e, 0x00, 0x05, 0x00, 0x05, 0x01, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x33, 0x00, 0x26, 0x00, 0x24, 0x00, 0x1d, 0x00, 0x20, 0x93,
+            0x70, 0xb2, 0xc9, 0xca, 0xa4, 0x7f, 0xba, 0xba, 0xf4, 0x55, 0x9f,
+            0xed, 0xba, 0x75, 0x3d, 0xe1, 0x71, 0xfa, 0x71, 0xf5, 0x0f, 0x1c,
+            0xe1, 0x5d, 0x43, 0xe9, 0x94, 0xec, 0x74, 0xd7, 0x48, 0x00, 0x2b,
+            0x00, 0x03, 0x02, 0x03, 0x04, 0x00, 0x0d, 0x00, 0x10, 0x00, 0x0e,
+            0x04, 0x03, 0x05, 0x03, 0x06, 0x03, 0x02, 0x03, 0x08, 0x04, 0x08,
+            0x05, 0x08, 0x06, 0x00, 0x2d, 0x00, 0x02, 0x01, 0x01, 0x00, 0x1c,
+            0x00, 0x02, 0x40, 0x01, 0xff, 0xa5, 0x00, 0x32, 0x04, 0x08, 0xff,
+            0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x05, 0x04, 0x80, 0x00,
+            0xff, 0xff, 0x07, 0x04, 0x80, 0x00, 0xff, 0xff, 0x08, 0x01, 0x10,
+            0x01, 0x04, 0x80, 0x00, 0x75, 0x30, 0x09, 0x01, 0x10, 0x0f, 0x08,
+            0x83, 0x94, 0xc8, 0xf0, 0x3e, 0x51, 0x57, 0x08, 0x06, 0x04, 0x80,
+            0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+            0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+        ];
+
+        let pkt = [
+            0xc0, 0x00, 0x00, 0x00, 0x01, 0x08, 0x83, 0x94, 0xc8, 0xf0, 0x3e,
+            0x51, 0x57, 0x08, 0x00, 0x00, 0x44, 0x9e, 0x7b, 0x9a, 0xec, 0x34,
+            0xd1, 0xb1, 0xc9, 0x8d, 0xd7, 0x68, 0x9f, 0xb8, 0xec, 0x11, 0xd2,
+            0x42, 0xb1, 0x23, 0xdc, 0x9b, 0xd8, 0xba, 0xb9, 0x36, 0xb4, 0x7d,
+            0x92, 0xec, 0x35, 0x6c, 0x0b, 0xab, 0x7d, 0xf5, 0x97, 0x6d, 0x27,
+            0xcd, 0x44, 0x9f, 0x63, 0x30, 0x00, 0x99, 0xf3, 0x99, 0x1c, 0x26,
+            0x0e, 0xc4, 0xc6, 0x0d, 0x17, 0xb3, 0x1f, 0x84, 0x29, 0x15, 0x7b,
+            0xb3, 0x5a, 0x12, 0x82, 0xa6, 0x43, 0xa8, 0xd2, 0x26, 0x2c, 0xad,
+            0x67, 0x50, 0x0c, 0xad, 0xb8, 0xe7, 0x37, 0x8c, 0x8e, 0xb7, 0x53,
+            0x9e, 0xc4, 0xd4, 0x90, 0x5f, 0xed, 0x1b, 0xee, 0x1f, 0xc8, 0xaa,
+            0xfb, 0xa1, 0x7c, 0x75, 0x0e, 0x2c, 0x7a, 0xce, 0x01, 0xe6, 0x00,
+            0x5f, 0x80, 0xfc, 0xb7, 0xdf, 0x62, 0x12, 0x30, 0xc8, 0x37, 0x11,
+            0xb3, 0x93, 0x43, 0xfa, 0x02, 0x8c, 0xea, 0x7f, 0x7f, 0xb5, 0xff,
+            0x89, 0xea, 0xc2, 0x30, 0x82, 0x49, 0xa0, 0x22, 0x52, 0x15, 0x5e,
+            0x23, 0x47, 0xb6, 0x3d, 0x58, 0xc5, 0x45, 0x7a, 0xfd, 0x84, 0xd0,
+            0x5d, 0xff, 0xfd, 0xb2, 0x03, 0x92, 0x84, 0x4a, 0xe8, 0x12, 0x15,
+            0x46, 0x82, 0xe9, 0xcf, 0x01, 0x2f, 0x90, 0x21, 0xa6, 0xf0, 0xbe,
+            0x17, 0xdd, 0xd0, 0xc2, 0x08, 0x4d, 0xce, 0x25, 0xff, 0x9b, 0x06,
+            0xcd, 0xe5, 0x35, 0xd0, 0xf9, 0x20, 0xa2, 0xdb, 0x1b, 0xf3, 0x62,
+            0xc2, 0x3e, 0x59, 0x6d, 0xee, 0x38, 0xf5, 0xa6, 0xcf, 0x39, 0x48,
+            0x83, 0x8a, 0x3a, 0xec, 0x4e, 0x15, 0xda, 0xf8, 0x50, 0x0a, 0x6e,
+            0xf6, 0x9e, 0xc4, 0xe3, 0xfe, 0xb6, 0xb1, 0xd9, 0x8e, 0x61, 0x0a,
+            0xc8, 0xb7, 0xec, 0x3f, 0xaf, 0x6a, 0xd7, 0x60, 0xb7, 0xba, 0xd1,
+            0xdb, 0x4b, 0xa3, 0x48, 0x5e, 0x8a, 0x94, 0xdc, 0x25, 0x0a, 0xe3,
+            0xfd, 0xb4, 0x1e, 0xd1, 0x5f, 0xb6, 0xa8, 0xe5, 0xeb, 0xa0, 0xfc,
+            0x3d, 0xd6, 0x0b, 0xc8, 0xe3, 0x0c, 0x5c, 0x42, 0x87, 0xe5, 0x38,
+            0x05, 0xdb, 0x05, 0x9a, 0xe0, 0x64, 0x8d, 0xb2, 0xf6, 0x42, 0x64,
+            0xed, 0x5e, 0x39, 0xbe, 0x2e, 0x20, 0xd8, 0x2d, 0xf5, 0x66, 0xda,
+            0x8d, 0xd5, 0x99, 0x8c, 0xca, 0xbd, 0xae, 0x05, 0x30, 0x60, 0xae,
+            0x6c, 0x7b, 0x43, 0x78, 0xe8, 0x46, 0xd2, 0x9f, 0x37, 0xed, 0x7b,
+            0x4e, 0xa9, 0xec, 0x5d, 0x82, 0xe7, 0x96, 0x1b, 0x7f, 0x25, 0xa9,
+            0x32, 0x38, 0x51, 0xf6, 0x81, 0xd5, 0x82, 0x36, 0x3a, 0xa5, 0xf8,
+            0x99, 0x37, 0xf5, 0xa6, 0x72, 0x58, 0xbf, 0x63, 0xad, 0x6f, 0x1a,
+            0x0b, 0x1d, 0x96, 0xdb, 0xd4, 0xfa, 0xdd, 0xfc, 0xef, 0xc5, 0x26,
+            0x6b, 0xa6, 0x61, 0x17, 0x22, 0x39, 0x5c, 0x90, 0x65, 0x56, 0xbe,
+            0x52, 0xaf, 0xe3, 0xf5, 0x65, 0x63, 0x6a, 0xd1, 0xb1, 0x7d, 0x50,
+            0x8b, 0x73, 0xd8, 0x74, 0x3e, 0xeb, 0x52, 0x4b, 0xe2, 0x2b, 0x3d,
+            0xcb, 0xc2, 0xc7, 0x46, 0x8d, 0x54, 0x11, 0x9c, 0x74, 0x68, 0x44,
+            0x9a, 0x13, 0xd8, 0xe3, 0xb9, 0x58, 0x11, 0xa1, 0x98, 0xf3, 0x49,
+            0x1d, 0xe3, 0xe7, 0xfe, 0x94, 0x2b, 0x33, 0x04, 0x07, 0xab, 0xf8,
+            0x2a, 0x4e, 0xd7, 0xc1, 0xb3, 0x11, 0x66, 0x3a, 0xc6, 0x98, 0x90,
+            0xf4, 0x15, 0x70, 0x15, 0x85, 0x3d, 0x91, 0xe9, 0x23, 0x03, 0x7c,
+            0x22, 0x7a, 0x33, 0xcd, 0xd5, 0xec, 0x28, 0x1c, 0xa3, 0xf7, 0x9c,
+            0x44, 0x54, 0x6b, 0x9d, 0x90, 0xca, 0x00, 0xf0, 0x64, 0xc9, 0x9e,
+            0x3d, 0xd9, 0x79, 0x11, 0xd3, 0x9f, 0xe9, 0xc5, 0xd0, 0xb2, 0x3a,
+            0x22, 0x9a, 0x23, 0x4c, 0xb3, 0x61, 0x86, 0xc4, 0x81, 0x9e, 0x8b,
+            0x9c, 0x59, 0x27, 0x72, 0x66, 0x32, 0x29, 0x1d, 0x6a, 0x41, 0x82,
+            0x11, 0xcc, 0x29, 0x62, 0xe2, 0x0f, 0xe4, 0x7f, 0xeb, 0x3e, 0xdf,
+            0x33, 0x0f, 0x2c, 0x60, 0x3a, 0x9d, 0x48, 0xc0, 0xfc, 0xb5, 0x69,
+            0x9d, 0xbf, 0xe5, 0x89, 0x64, 0x25, 0xc5, 0xba, 0xc4, 0xae, 0xe8,
+            0x2e, 0x57, 0xa8, 0x5a, 0xaf, 0x4e, 0x25, 0x13, 0xe4, 0xf0, 0x57,
+            0x96, 0xb0, 0x7b, 0xa2, 0xee, 0x47, 0xd8, 0x05, 0x06, 0xf8, 0xd2,
+            0xc2, 0x5e, 0x50, 0xfd, 0x14, 0xde, 0x71, 0xe6, 0xc4, 0x18, 0x55,
+            0x93, 0x02, 0xf9, 0x39, 0xb0, 0xe1, 0xab, 0xd5, 0x76, 0xf2, 0x79,
+            0xc4, 0xb2, 0xe0, 0xfe, 0xb8, 0x5c, 0x1f, 0x28, 0xff, 0x18, 0xf5,
+            0x88, 0x91, 0xff, 0xef, 0x13, 0x2e, 0xef, 0x2f, 0xa0, 0x93, 0x46,
+            0xae, 0xe3, 0x3c, 0x28, 0xeb, 0x13, 0x0f, 0xf2, 0x8f, 0x5b, 0x76,
+            0x69, 0x53, 0x33, 0x41, 0x13, 0x21, 0x19, 0x96, 0xd2, 0x00, 0x11,
+            0xa1, 0x98, 0xe3, 0xfc, 0x43, 0x3f, 0x9f, 0x25, 0x41, 0x01, 0x0a,
+            0xe1, 0x7c, 0x1b, 0xf2, 0x02, 0x58, 0x0f, 0x60, 0x47, 0x47, 0x2f,
+            0xb3, 0x68, 0x57, 0xfe, 0x84, 0x3b, 0x19, 0xf5, 0x98, 0x40, 0x09,
+            0xdd, 0xc3, 0x24, 0x04, 0x4e, 0x84, 0x7a, 0x4f, 0x4a, 0x0a, 0xb3,
+            0x4f, 0x71, 0x95, 0x95, 0xde, 0x37, 0x25, 0x2d, 0x62, 0x35, 0x36,
+            0x5e, 0x9b, 0x84, 0x39, 0x2b, 0x06, 0x10, 0x85, 0x34, 0x9d, 0x73,
+            0x20, 0x3a, 0x4a, 0x13, 0xe9, 0x6f, 0x54, 0x32, 0xec, 0x0f, 0xd4,
+            0xa1, 0xee, 0x65, 0xac, 0xcd, 0xd5, 0xe3, 0x90, 0x4d, 0xf5, 0x4c,
+            0x1d, 0xa5, 0x10, 0xb0, 0xff, 0x20, 0xdc, 0xc0, 0xc7, 0x7f, 0xcb,
+            0x2c, 0x0e, 0x0e, 0xb6, 0x05, 0xcb, 0x05, 0x04, 0xdb, 0x87, 0x63,
+            0x2c, 0xf3, 0xd8, 0xb4, 0xda, 0xe6, 0xe7, 0x05, 0x76, 0x9d, 0x1d,
+            0xe3, 0x54, 0x27, 0x01, 0x23, 0xcb, 0x11, 0x45, 0x0e, 0xfc, 0x60,
+            0xac, 0x47, 0x68, 0x3d, 0x7b, 0x8d, 0x0f, 0x81, 0x13, 0x65, 0x56,
+            0x5f, 0xd9, 0x8c, 0x4c, 0x8e, 0xb9, 0x36, 0xbc, 0xab, 0x8d, 0x06,
+            0x9f, 0xc3, 0x3b, 0xd8, 0x01, 0xb0, 0x3a, 0xde, 0xa2, 0xe1, 0xfb,
+            0xc5, 0xaa, 0x46, 0x3d, 0x08, 0xca, 0x19, 0x89, 0x6d, 0x2b, 0xf5,
+            0x9a, 0x07, 0x1b, 0x85, 0x1e, 0x6c, 0x23, 0x90, 0x52, 0x17, 0x2f,
+            0x29, 0x6b, 0xfb, 0x5e, 0x72, 0x40, 0x47, 0x90, 0xa2, 0x18, 0x10,
+            0x14, 0xf3, 0xb9, 0x4a, 0x4e, 0x97, 0xd1, 0x17, 0xb4, 0x38, 0x13,
+            0x03, 0x68, 0xcc, 0x39, 0xdb, 0xb2, 0xd1, 0x98, 0x06, 0x5a, 0xe3,
+            0x98, 0x65, 0x47, 0x92, 0x6c, 0xd2, 0x16, 0x2f, 0x40, 0xa2, 0x9f,
+            0x0c, 0x3c, 0x87, 0x45, 0xc0, 0xf5, 0x0f, 0xba, 0x38, 0x52, 0xe5,
+            0x66, 0xd4, 0x45, 0x75, 0xc2, 0x9d, 0x39, 0xa0, 0x3f, 0x0c, 0xda,
+            0x72, 0x19, 0x84, 0xb6, 0xf4, 0x40, 0x59, 0x1f, 0x35, 0x5e, 0x12,
+            0xd4, 0x39, 0xff, 0x15, 0x0a, 0xab, 0x76, 0x13, 0x49, 0x9d, 0xbd,
+            0x49, 0xad, 0xab, 0xc8, 0x67, 0x6e, 0xef, 0x02, 0x3b, 0x15, 0xb6,
+            0x5b, 0xfc, 0x5c, 0xa0, 0x69, 0x48, 0x10, 0x9f, 0x23, 0xf3, 0x50,
+            0xdb, 0x82, 0x12, 0x35, 0x35, 0xeb, 0x8a, 0x74, 0x33, 0xbd, 0xab,
+            0xcb, 0x90, 0x92, 0x71, 0xa6, 0xec, 0xbc, 0xb5, 0x8b, 0x93, 0x6a,
+            0x88, 0xcd, 0x4e, 0x8f, 0x2e, 0x6f, 0xf5, 0x80, 0x01, 0x75, 0xf1,
+            0x13, 0x25, 0x3d, 0x8f, 0xa9, 0xca, 0x88, 0x85, 0xc2, 0xf5, 0x52,
+            0xe6, 0x57, 0xdc, 0x60, 0x3f, 0x25, 0x2e, 0x1a, 0x8e, 0x30, 0x8f,
+            0x76, 0xf0, 0xbe, 0x79, 0xe2, 0xfb, 0x8f, 0x5d, 0x5f, 0xbb, 0xe2,
+            0xe3, 0x0e, 0xca, 0xdd, 0x22, 0x07, 0x23, 0xc8, 0xc0, 0xae, 0xa8,
+            0x07, 0x8c, 0xdf, 0xcb, 0x38, 0x68, 0x26, 0x3f, 0xf8, 0xf0, 0x94,
+            0x00, 0x54, 0xda, 0x48, 0x78, 0x18, 0x93, 0xa7, 0xe4, 0x9a, 0xd5,
+            0xaf, 0xf4, 0xaf, 0x30, 0x0c, 0xd8, 0x04, 0xa6, 0xb6, 0x27, 0x9a,
+            0xb3, 0xff, 0x3a, 0xfb, 0x64, 0x49, 0x1c, 0x85, 0x19, 0x4a, 0xab,
+            0x76, 0x0d, 0x58, 0xa6, 0x06, 0x65, 0x4f, 0x9f, 0x44, 0x00, 0xe8,
+            0xb3, 0x85, 0x91, 0x35, 0x6f, 0xbf, 0x64, 0x25, 0xac, 0xa2, 0x6d,
+            0xc8, 0x52, 0x44, 0x25, 0x9f, 0xf2, 0xb1, 0x9c, 0x41, 0xb9, 0xf9,
+            0x6f, 0x3c, 0xa9, 0xec, 0x1d, 0xde, 0x43, 0x4d, 0xa7, 0xd2, 0xd3,
+            0x92, 0xb9, 0x05, 0xdd, 0xf3, 0xd1, 0xf9, 0xaf, 0x93, 0xd1, 0xaf,
+            0x59, 0x50, 0xbd, 0x49, 0x3f, 0x5a, 0xa7, 0x31, 0xb4, 0x05, 0x6d,
+            0xf3, 0x1b, 0xd2, 0x67, 0xb6, 0xb9, 0x0a, 0x07, 0x98, 0x31, 0xaa,
+            0xf5, 0x79, 0xbe, 0x0a, 0x39, 0x01, 0x31, 0x37, 0xaa, 0xc6, 0xd4,
+            0x04, 0xf5, 0x18, 0xcf, 0xd4, 0x68, 0x40, 0x64, 0x7e, 0x78, 0xbf,
+            0xe7, 0x06, 0xca, 0x4c, 0xf5, 0xe9, 0xc5, 0x45, 0x3e, 0x9f, 0x7c,
+            0xfd, 0x2b, 0x8b, 0x4c, 0x8d, 0x16, 0x9a, 0x44, 0xe5, 0x5c, 0x88,
+            0xd4, 0xa9, 0xa7, 0xf9, 0x47, 0x42, 0x41, 0x10, 0x92, 0xab, 0xbd,
+            0xf8, 0xb8, 0x89, 0xe5, 0xc1, 0x99, 0xd0, 0x96, 0xe3, 0xf2, 0x47,
+            0x88,
+        ];
+
+        assert_encrypt_initial_pkt(
+            &mut header,
+            &dcid,
+            &frames,
+            2,
+            4,
+            false,
+            &pkt,
+        );
+    }
+
+    #[test]
+    fn encrypt_client_initial_draft29() {
         let mut header = [
             0xc3, 0xff, 0x00, 0x00, 0x1d, 0x08, 0x83, 0x94, 0xc8, 0xf0, 0x3e,
             0x51, 0x57, 0x08, 0x00, 0x00, 0x44, 0x9e, 0x00, 0x00, 0x00, 0x02,
@@ -1799,7 +2349,7 @@
     }
 
     #[test]
-    fn encrypt_client_initial_old() {
+    fn encrypt_client_initial_draft28() {
         let mut header = [
             0xc3, 0xff, 0x00, 0x00, 0x1c, 0x08, 0x83, 0x94, 0xc8, 0xf0, 0x3e,
             0x51, 0x57, 0x08, 0x00, 0x00, 0x44, 0x9e, 0x00, 0x00, 0x00, 0x02,
@@ -2041,7 +2591,47 @@
     }
 
     #[test]
-    fn encrypt_server_initial() {
+    fn encrypt_server_initial_v1() {
+        let mut header = [
+            0xc1, 0x00, 0x00, 0x00, 0x01, 0x00, 0x08, 0xf0, 0x67, 0xa5, 0x50,
+            0x2a, 0x42, 0x62, 0xb5, 0x00, 0x40, 0x75, 0x00, 0x01,
+        ];
+
+        let dcid = [0x83, 0x94, 0xc8, 0xf0, 0x3e, 0x51, 0x57, 0x08];
+
+        let frames = [
+            0x02, 0x00, 0x00, 0x00, 0x00, 0x06, 0x00, 0x40, 0x5a, 0x02, 0x00,
+            0x00, 0x56, 0x03, 0x03, 0xee, 0xfc, 0xe7, 0xf7, 0xb3, 0x7b, 0xa1,
+            0xd1, 0x63, 0x2e, 0x96, 0x67, 0x78, 0x25, 0xdd, 0xf7, 0x39, 0x88,
+            0xcf, 0xc7, 0x98, 0x25, 0xdf, 0x56, 0x6d, 0xc5, 0x43, 0x0b, 0x9a,
+            0x04, 0x5a, 0x12, 0x00, 0x13, 0x01, 0x00, 0x00, 0x2e, 0x00, 0x33,
+            0x00, 0x24, 0x00, 0x1d, 0x00, 0x20, 0x9d, 0x3c, 0x94, 0x0d, 0x89,
+            0x69, 0x0b, 0x84, 0xd0, 0x8a, 0x60, 0x99, 0x3c, 0x14, 0x4e, 0xca,
+            0x68, 0x4d, 0x10, 0x81, 0x28, 0x7c, 0x83, 0x4d, 0x53, 0x11, 0xbc,
+            0xf3, 0x2b, 0xb9, 0xda, 0x1a, 0x00, 0x2b, 0x00, 0x02, 0x03, 0x04,
+        ];
+
+        let pkt = [
+            0xcf, 0x00, 0x00, 0x00, 0x01, 0x00, 0x08, 0xf0, 0x67, 0xa5, 0x50,
+            0x2a, 0x42, 0x62, 0xb5, 0x00, 0x40, 0x75, 0xc0, 0xd9, 0x5a, 0x48,
+            0x2c, 0xd0, 0x99, 0x1c, 0xd2, 0x5b, 0x0a, 0xac, 0x40, 0x6a, 0x58,
+            0x16, 0xb6, 0x39, 0x41, 0x00, 0xf3, 0x7a, 0x1c, 0x69, 0x79, 0x75,
+            0x54, 0x78, 0x0b, 0xb3, 0x8c, 0xc5, 0xa9, 0x9f, 0x5e, 0xde, 0x4c,
+            0xf7, 0x3c, 0x3e, 0xc2, 0x49, 0x3a, 0x18, 0x39, 0xb3, 0xdb, 0xcb,
+            0xa3, 0xf6, 0xea, 0x46, 0xc5, 0xb7, 0x68, 0x4d, 0xf3, 0x54, 0x8e,
+            0x7d, 0xde, 0xb9, 0xc3, 0xbf, 0x9c, 0x73, 0xcc, 0x3f, 0x3b, 0xde,
+            0xd7, 0x4b, 0x56, 0x2b, 0xfb, 0x19, 0xfb, 0x84, 0x02, 0x2f, 0x8e,
+            0xf4, 0xcd, 0xd9, 0x37, 0x95, 0xd7, 0x7d, 0x06, 0xed, 0xbb, 0x7a,
+            0xaf, 0x2f, 0x58, 0x89, 0x18, 0x50, 0xab, 0xbd, 0xca, 0x3d, 0x20,
+            0x39, 0x8c, 0x27, 0x64, 0x56, 0xcb, 0xc4, 0x21, 0x58, 0x40, 0x7d,
+            0xd0, 0x74, 0xee,
+        ];
+
+        assert_encrypt_initial_pkt(&mut header, &dcid, &frames, 1, 2, true, &pkt);
+    }
+
+    #[test]
+    fn encrypt_server_initial_draft29() {
         let mut header = [
             0xc1, 0xff, 0x00, 0x00, 0x1d, 0x00, 0x08, 0xf0, 0x67, 0xa5, 0x50,
             0x2a, 0x42, 0x62, 0xb5, 0x00, 0x40, 0x74, 0x00, 0x01,
@@ -2081,7 +2671,7 @@
     }
 
     #[test]
-    fn encrypt_server_initial_old() {
+    fn encrypt_server_initial_draft28() {
         let mut header = [
             0xc1, 0xff, 0x00, 0x00, 0x1c, 0x00, 0x08, 0xf0, 0x67, 0xa5, 0x50,
             0x2a, 0x42, 0x62, 0xb5, 0x00, 0x40, 0x74, 0x00, 0x01,
@@ -2178,8 +2768,8 @@
         let hdr = Header {
             ty: Type::Initial,
             version: crate::PROTOCOL_VERSION,
-            dcid: Vec::new(),
-            scid: Vec::new(),
+            dcid: ConnectionId::default(),
+            scid: ConnectionId::default(),
             pkt_num: 0,
             pkt_num_len: 0,
             token: None,
diff --git a/src/recovery/cubic.rs b/src/recovery/cubic.rs
index a53c68b..8b091c4 100644
--- a/src/recovery/cubic.rs
+++ b/src/recovery/cubic.rs
@@ -26,8 +26,8 @@
 
 //! CUBIC Congestion Control
 //!
-//! This implementation is based on the following RFC:
-//! https://tools.ietf.org/html/rfc8312
+//! This implementation is based on the following draft:
+//! <https://tools.ietf.org/html/draft-ietf-tcpm-rfc8312bis-02>
 //!
 //! Note that Slow Start can use HyStart++ when enabled.
 
@@ -49,6 +49,9 @@
     on_packet_acked,
     congestion_event,
     collapse_cwnd,
+    checkpoint,
+    rollback,
+    has_custom_pacing,
 };
 
 /// CUBIC Constants.
@@ -58,23 +61,55 @@
 
 const C: f64 = 0.4;
 
+/// The packet count threshold to restore to the prior state if the
+/// lost packet count since the last checkpoint is less than the threshold.
+const RESTORE_COUNT_THRESHOLD: usize = 10;
+
+/// Default value of alpha_aimd in the beginning of congestion avoidance.
+const ALPHA_AIMD: f64 = 3.0 * (1.0 - BETA_CUBIC) / (1.0 + BETA_CUBIC);
+
 /// CUBIC State Variables.
 ///
 /// We need to keep those variables across the connection.
-/// k, w_max, w_last_max is described in the RFC.
+/// k, w_max, w_est are described in the RFC.
 #[derive(Debug, Default)]
 pub struct State {
     k: f64,
 
     w_max: f64,
 
-    w_last_max: f64,
+    w_est: f64,
+
+    alpha_aimd: f64,
 
     // Used in CUBIC fix (see on_packet_sent())
     last_sent_time: Option<Instant>,
 
     // Store cwnd increment during congestion avoidance.
     cwnd_inc: usize,
+
+    // CUBIC state checkpoint preceding the last congestion event.
+    prior: PriorState,
+}
+
+/// Stores the CUBIC state from before the last congestion event.
+///
+/// <https://tools.ietf.org/id/draft-ietf-tcpm-rfc8312bis-00.html#section-4.9>
+#[derive(Debug, Default)]
+struct PriorState {
+    congestion_window: usize,
+
+    ssthresh: usize,
+
+    w_max: f64,
+
+    w_last_max: f64,
+
+    k: f64,
+
+    epoch_start: Option<Instant>,
+
+    lost_count: usize,
 }
 
 /// CUBIC Functions.
@@ -83,28 +118,27 @@
 /// not packets.
 /// Unit of t (duration) and RTT are based on seconds (f64).
 impl State {
-    // K = cbrt(w_max * (1 - beta_cubic) / C) (Eq. 2)
-    fn cubic_k(&self) -> f64 {
-        let w_max = self.w_max / recovery::MAX_DATAGRAM_SIZE as f64;
-        libm::cbrt(w_max * (1.0 - BETA_CUBIC) / C)
+    // K = cubic_root ((w_max - cwnd) / C) (Eq. 2)
+    fn cubic_k(&self, cwnd: usize, max_datagram_size: usize) -> f64 {
+        let w_max = self.w_max / max_datagram_size as f64;
+        let cwnd = cwnd as f64 / max_datagram_size as f64;
+
+        libm::cbrt((w_max - cwnd) / C)
     }
 
-    // W_cubic(t) = C * (t - K)^3 - w_max (Eq. 1)
-    fn w_cubic(&self, t: Duration) -> f64 {
-        let w_max = self.w_max / recovery::MAX_DATAGRAM_SIZE as f64;
+    // W_cubic(t) = C * (t - K)^3 + w_max (Eq. 1)
+    fn w_cubic(&self, t: Duration, max_datagram_size: usize) -> f64 {
+        let w_max = self.w_max / max_datagram_size as f64;
 
         (C * (t.as_secs_f64() - self.k).powi(3) + w_max) *
-            recovery::MAX_DATAGRAM_SIZE as f64
+            max_datagram_size as f64
     }
 
-    // W_est(t) = w_max * beta_cubic + 3 * (1 - beta_cubic) / (1 + beta_cubic) *
-    // (t / RTT) (Eq. 4)
-    fn w_est(&self, t: Duration, rtt: Duration) -> f64 {
-        let w_max = self.w_max / recovery::MAX_DATAGRAM_SIZE as f64;
-        (w_max * BETA_CUBIC +
-            3.0 * (1.0 - BETA_CUBIC) / (1.0 + BETA_CUBIC) * t.as_secs_f64() /
-                rtt.as_secs_f64()) *
-            recovery::MAX_DATAGRAM_SIZE as f64
+    // W_est = W_est + alpha_aimd * (segments_acked / cwnd)  (Eq. 4)
+    fn w_est_inc(
+        &self, acked: usize, cwnd: usize, max_datagram_size: usize,
+    ) -> f64 {
+        self.alpha_aimd * (acked as f64 / cwnd as f64) * max_datagram_size as f64
     }
 }
 
@@ -113,12 +147,14 @@
 
     r.congestion_recovery_start_time = None;
 
-    cubic.w_last_max = r.congestion_window as f64;
-    cubic.w_max = cubic.w_last_max;
+    cubic.w_max = r.congestion_window as f64;
 
     // 4.7 Timeout - reduce ssthresh based on BETA_CUBIC
     r.ssthresh = (r.congestion_window as f64 * BETA_CUBIC) as usize;
-    r.ssthresh = cmp::max(r.ssthresh, recovery::MINIMUM_WINDOW);
+    r.ssthresh = cmp::max(
+        r.ssthresh,
+        r.max_datagram_size * recovery::MINIMUM_WINDOW_PACKETS,
+    );
 
     cubic.cwnd_inc = 0;
 
@@ -165,32 +201,60 @@
         return;
     }
 
-    if r.congestion_window < r.ssthresh {
-        // Slow start.
-        if r.hystart.enabled() && epoch == packet::EPOCH_APPLICATION {
-            let (cwnd, ssthresh) = r.hystart_on_packet_acked(packet, now);
+    // Detecting spurious congestion events.
+    // <https://tools.ietf.org/id/draft-ietf-tcpm-rfc8312bis-00.html#section-4.9>
+    //
+    // When the recovery episode ends with recovering
+    // a few packets (less than RESTORE_COUNT_THRESHOLD), it's considered
+    // as spurious and restore to the previous state.
+    if r.congestion_recovery_start_time.is_some() {
+        let new_lost = r.lost_count - r.cubic_state.prior.lost_count;
 
-            r.congestion_window = cwnd;
-            r.ssthresh = ssthresh;
-        } else {
-            // Reno Slow Start.
-            r.congestion_window += packet.size;
+        if r.congestion_window < r.cubic_state.prior.congestion_window &&
+            new_lost < RESTORE_COUNT_THRESHOLD
+        {
+            rollback(r);
+            return;
+        }
+    }
+
+    if r.congestion_window < r.ssthresh {
+        // In Slow slart, bytes_acked_sl is used for counting
+        // acknowledged bytes.
+        r.bytes_acked_sl += packet.size;
+
+        if r.bytes_acked_sl >= r.max_datagram_size {
+            r.congestion_window += r.max_datagram_size;
+            r.bytes_acked_sl -= r.max_datagram_size;
+        }
+
+        if r.hystart.enabled() &&
+            epoch == packet::EPOCH_APPLICATION &&
+            r.hystart.try_enter_lss(
+                packet,
+                r.latest_rtt,
+                r.congestion_window,
+                now,
+                r.max_datagram_size,
+            )
+        {
+            r.ssthresh = r.congestion_window;
         }
     } else {
         // Congestion avoidance.
         let ca_start_time;
 
         // In LSS, use lss_start_time instead of congestion_recovery_start_time.
-        if r.hystart.enabled() &&
-            epoch == packet::EPOCH_APPLICATION &&
-            r.hystart.lss_start_time().is_some()
-        {
+        if r.hystart.in_lss(epoch) {
             ca_start_time = r.hystart.lss_start_time().unwrap();
 
             // Reset w_max and k when LSS started.
             if r.cubic_state.w_max == 0.0 {
                 r.cubic_state.w_max = r.congestion_window as f64;
                 r.cubic_state.k = 0.0;
+
+                r.cubic_state.w_est = r.congestion_window as f64;
+                r.cubic_state.alpha_aimd = ALPHA_AIMD;
             }
         } else {
             match r.congestion_recovery_start_time {
@@ -203,51 +267,65 @@
 
                     r.cubic_state.w_max = r.congestion_window as f64;
                     r.cubic_state.k = 0.0;
+
+                    r.cubic_state.w_est = r.congestion_window as f64;
+                    r.cubic_state.alpha_aimd = ALPHA_AIMD;
                 },
             }
         }
 
         let t = now - ca_start_time;
 
-        // w_cubic(t + rtt)
-        let w_cubic = r.cubic_state.w_cubic(t + r.min_rtt);
+        // target = w_cubic(t + rtt)
+        let target = r.cubic_state.w_cubic(t + r.min_rtt, r.max_datagram_size);
 
-        // w_est(t)
-        let w_est = r.cubic_state.w_est(t, r.min_rtt);
+        // Clipping target to [cwnd, 1.5 x cwnd]
+        let target = f64::max(target, r.congestion_window as f64);
+        let target = f64::min(target, r.congestion_window as f64 * 1.5);
+
+        // Update w_est.
+        let w_est_inc = r.cubic_state.w_est_inc(
+            packet.size,
+            r.congestion_window,
+            r.max_datagram_size,
+        );
+        r.cubic_state.w_est += w_est_inc;
+
+        if r.cubic_state.w_est >= r.cubic_state.w_max {
+            r.cubic_state.alpha_aimd = 1.0;
+        }
 
         let mut cubic_cwnd = r.congestion_window;
 
-        if w_cubic < w_est {
-            // TCP friendly region.
-            cubic_cwnd = cmp::max(cubic_cwnd, w_est as usize);
-        } else if cubic_cwnd < w_cubic as usize {
+        if r.cubic_state.w_cubic(t, r.max_datagram_size) < r.cubic_state.w_est {
+            // AIMD friendly region (W_cubic(t) < W_est)
+            cubic_cwnd = cmp::max(cubic_cwnd, r.cubic_state.w_est as usize);
+        } else {
             // Concave region or convex region use same increment.
-            let cubic_inc = (w_cubic - cubic_cwnd as f64) / cubic_cwnd as f64 *
-                recovery::MAX_DATAGRAM_SIZE as f64;
+            let cubic_inc =
+                r.max_datagram_size * (target as usize - cubic_cwnd) / cubic_cwnd;
 
-            cubic_cwnd += cubic_inc as usize;
+            cubic_cwnd += cubic_inc;
         }
 
         // When in Limited Slow Start, take the max of CA cwnd and
         // LSS cwnd.
-        if r.hystart.enabled() &&
-            epoch == packet::EPOCH_APPLICATION &&
-            r.hystart.lss_start_time().is_some()
-        {
-            let (lss_cwnd, _) = r.hystart_on_packet_acked(packet, now);
+        if r.hystart.in_lss(epoch) {
+            let lss_cwnd_inc = r.hystart.lss_cwnd_inc(
+                packet.size,
+                r.congestion_window,
+                r.ssthresh,
+            );
 
-            cubic_cwnd = cmp::max(cubic_cwnd, lss_cwnd);
+            cubic_cwnd = cmp::max(cubic_cwnd, r.congestion_window + lss_cwnd_inc);
         }
 
         // Update the increment and increase cwnd by MSS.
         r.cubic_state.cwnd_inc += cubic_cwnd - r.congestion_window;
 
-        // cwnd_inc can be more than 1 MSS in the late stage of max probing.
-        // however QUIC recovery draft 7.4 (Congestion Avoidance) limits
-        // the increase of cwnd to 1 max packet size per cwnd acknowledged.
-        if r.cubic_state.cwnd_inc >= recovery::MAX_DATAGRAM_SIZE {
-            r.congestion_window += recovery::MAX_DATAGRAM_SIZE;
-            r.cubic_state.cwnd_inc -= recovery::MAX_DATAGRAM_SIZE;
+        if r.cubic_state.cwnd_inc >= r.max_datagram_size {
+            r.congestion_window += r.max_datagram_size;
+            r.cubic_state.cwnd_inc -= r.max_datagram_size;
         }
     }
 }
@@ -263,29 +341,60 @@
         r.congestion_recovery_start_time = Some(now);
 
         // Fast convergence
-        if r.cubic_state.w_max < r.cubic_state.w_last_max {
-            r.cubic_state.w_last_max = r.cubic_state.w_max;
+        if (r.congestion_window as f64) < r.cubic_state.w_max {
             r.cubic_state.w_max =
-                r.cubic_state.w_max as f64 * (1.0 + BETA_CUBIC) / 2.0;
+                r.congestion_window as f64 * (1.0 + BETA_CUBIC) / 2.0;
         } else {
-            r.cubic_state.w_last_max = r.cubic_state.w_max;
+            r.cubic_state.w_max = r.congestion_window as f64;
         }
 
-        r.cubic_state.w_max = r.congestion_window as f64;
-        r.ssthresh = (r.cubic_state.w_max * BETA_CUBIC) as usize;
-        r.ssthresh = cmp::max(r.ssthresh, recovery::MINIMUM_WINDOW);
+        r.ssthresh = (r.congestion_window as f64 * BETA_CUBIC) as usize;
+        r.ssthresh = cmp::max(
+            r.ssthresh,
+            r.max_datagram_size * recovery::MINIMUM_WINDOW_PACKETS,
+        );
         r.congestion_window = r.ssthresh;
-        r.cubic_state.k = r.cubic_state.cubic_k();
+
+        r.cubic_state.k = if r.cubic_state.w_max < r.congestion_window as f64 {
+            0.0
+        } else {
+            r.cubic_state
+                .cubic_k(r.congestion_window, r.max_datagram_size)
+        };
 
         r.cubic_state.cwnd_inc =
             (r.cubic_state.cwnd_inc as f64 * BETA_CUBIC) as usize;
 
-        if r.hystart.enabled() && epoch == packet::EPOCH_APPLICATION {
+        r.cubic_state.w_est = r.congestion_window as f64;
+        r.cubic_state.alpha_aimd = ALPHA_AIMD;
+
+        if r.hystart.in_lss(epoch) {
             r.hystart.congestion_event();
         }
     }
 }
 
+fn checkpoint(r: &mut Recovery) {
+    r.cubic_state.prior.congestion_window = r.congestion_window;
+    r.cubic_state.prior.ssthresh = r.ssthresh;
+    r.cubic_state.prior.w_max = r.cubic_state.w_max;
+    r.cubic_state.prior.k = r.cubic_state.k;
+    r.cubic_state.prior.epoch_start = r.congestion_recovery_start_time;
+    r.cubic_state.prior.lost_count = r.lost_count;
+}
+
+fn rollback(r: &mut Recovery) {
+    r.congestion_window = r.cubic_state.prior.congestion_window;
+    r.ssthresh = r.cubic_state.prior.ssthresh;
+    r.cubic_state.w_max = r.cubic_state.prior.w_max;
+    r.cubic_state.k = r.cubic_state.prior.k;
+    r.congestion_recovery_start_time = r.cubic_state.prior.epoch_start;
+}
+
+fn has_custom_pacing() -> bool {
+    false
+}
+
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -328,7 +437,7 @@
             time_sent: now,
             time_acked: None,
             time_lost: None,
-            size: 5000,
+            size: r.max_datagram_size,
             ack_eliciting: true,
             in_flight: true,
             delivered: 0,
@@ -338,12 +447,10 @@
             has_data: false,
         };
 
-        // Send 5k x 4 = 20k, higher than default cwnd(~15k)
-        // to become no longer app limited
-        r.on_packet_sent_cc(p.size, now);
-        r.on_packet_sent_cc(p.size, now);
-        r.on_packet_sent_cc(p.size, now);
-        r.on_packet_sent_cc(p.size, now);
+        // Send initcwnd full MSS packets to become no longer app limited
+        for _ in 0..recovery::INITIAL_WINDOW_PACKETS {
+            r.on_packet_sent_cc(p.size, now);
+        }
 
         let cwnd_prev = r.cwnd();
 
@@ -360,6 +467,61 @@
     }
 
     #[test]
+    fn cubic_slow_start_multi_acks() {
+        let mut cfg = crate::Config::new(crate::PROTOCOL_VERSION).unwrap();
+        cfg.set_cc_algorithm(recovery::CongestionControlAlgorithm::CUBIC);
+
+        let mut r = Recovery::new(&cfg);
+        let now = Instant::now();
+
+        let p = recovery::Sent {
+            pkt_num: 0,
+            frames: vec![],
+            time_sent: now,
+            time_acked: None,
+            time_lost: None,
+            size: r.max_datagram_size,
+            ack_eliciting: true,
+            in_flight: true,
+            delivered: 0,
+            delivered_time: now,
+            recent_delivered_packet_sent_time: now,
+            is_app_limited: false,
+            has_data: false,
+        };
+
+        // Send initcwnd full MSS packets to become no longer app limited
+        for _ in 0..recovery::INITIAL_WINDOW_PACKETS {
+            r.on_packet_sent_cc(p.size, now);
+        }
+
+        let cwnd_prev = r.cwnd();
+
+        let acked = vec![
+            Acked {
+                pkt_num: p.pkt_num,
+                time_sent: p.time_sent,
+                size: p.size,
+            },
+            Acked {
+                pkt_num: p.pkt_num,
+                time_sent: p.time_sent,
+                size: p.size,
+            },
+            Acked {
+                pkt_num: p.pkt_num,
+                time_sent: p.time_sent,
+                size: p.size,
+            },
+        ];
+
+        r.on_packets_acked(acked, packet::EPOCH_APPLICATION, now);
+
+        // Acked 3 packets.
+        assert_eq!(r.cwnd(), cwnd_prev + p.size * 3);
+    }
+
+    #[test]
     fn cubic_congestion_event() {
         let mut cfg = crate::Config::new(crate::PROTOCOL_VERSION).unwrap();
         cfg.set_cc_algorithm(recovery::CongestionControlAlgorithm::CUBIC);
@@ -381,11 +543,13 @@
         cfg.set_cc_algorithm(recovery::CongestionControlAlgorithm::CUBIC);
 
         let mut r = Recovery::new(&cfg);
-        let now = Instant::now();
+        let mut now = Instant::now();
         let prev_cwnd = r.cwnd();
 
-        // Fill up bytes_in_flight to avoid app_limited=true
-        r.on_packet_sent_cc(20000, now);
+        // Send initcwnd full MSS packets to become no longer app limited
+        for _ in 0..recovery::INITIAL_WINDOW_PACKETS {
+            r.on_packet_sent_cc(r.max_datagram_size, now);
+        }
 
         // Trigger congestion event to update ssthresh
         r.congestion_event(now, packet::EPOCH_APPLICATION, now);
@@ -394,21 +558,31 @@
         let cur_cwnd = (prev_cwnd as f64 * BETA_CUBIC) as usize;
         assert_eq!(r.cwnd(), cur_cwnd);
 
+        // Shift current time by 1 RTT.
         let rtt = Duration::from_millis(100);
 
-        let acked = vec![Acked {
-            pkt_num: 0,
-            // To exit from recovery
-            time_sent: now + rtt,
-            size: 8000,
-        }];
-
-        // Ack more than cwnd bytes with rtt=100ms
         r.update_rtt(rtt, Duration::from_millis(0), now);
-        r.on_packets_acked(acked, packet::EPOCH_APPLICATION, now + rtt * 3);
 
-        // After acking more than cwnd, expect cwnd increased by MSS
-        assert_eq!(r.cwnd(), cur_cwnd + recovery::MAX_DATAGRAM_SIZE);
+        // Exit from the recovery.
+        now += rtt;
+
+        // To avoid rollback
+        r.lost_count += RESTORE_COUNT_THRESHOLD;
+
+        // During Congestion Avoidance, it will take
+        // 5 ACKs to increase cwnd by 1 MSS.
+        for _ in 0..5 {
+            let acked = vec![Acked {
+                pkt_num: 0,
+                time_sent: now,
+                size: r.max_datagram_size,
+            }];
+
+            r.on_packets_acked(acked, packet::EPOCH_APPLICATION, now);
+            now += rtt;
+        }
+
+        assert_eq!(r.cwnd(), cur_cwnd + r.max_datagram_size);
     }
 
     #[test]
@@ -425,29 +599,27 @@
         // Trigger congestion event to update ssthresh
         r.congestion_event(now, packet::EPOCH_APPLICATION, now);
 
-        // After persistent congestion, cwnd should be MINIMUM_WINDOW
+        // After persistent congestion, cwnd should be the minimum window
         r.collapse_cwnd();
-        assert_eq!(r.cwnd(), recovery::MINIMUM_WINDOW);
+        assert_eq!(
+            r.cwnd(),
+            r.max_datagram_size * recovery::MINIMUM_WINDOW_PACKETS
+        );
 
         let acked = vec![Acked {
             pkt_num: 0,
             // To exit from recovery
             time_sent: now + Duration::from_millis(1),
-            size: 10000,
+            size: r.max_datagram_size,
         }];
 
-        // rtt = 100ms
-        let rtt = Duration::from_millis(100);
-        std::thread::sleep(rtt);
-
-        // Ack 10000 x 2 to exit from slow start
-        r.on_packets_acked(acked.clone(), packet::EPOCH_APPLICATION, now);
-        std::thread::sleep(rtt);
-
-        // This will make CC into congestion avoidance mode
         r.on_packets_acked(acked, packet::EPOCH_APPLICATION, now);
 
-        assert_eq!(r.cwnd(), recovery::MINIMUM_WINDOW + 10000);
+        // Slow start again - cwnd will be increased by 1 MSS
+        assert_eq!(
+            r.cwnd(),
+            r.max_datagram_size * (recovery::MINIMUM_WINDOW_PACKETS + 1)
+        );
     }
 
     #[test]
@@ -467,7 +639,7 @@
             time_sent: now,
             time_acked: None,
             time_lost: None,
-            size: recovery::MAX_DATAGRAM_SIZE,
+            size: r.max_datagram_size,
             ack_eliciting: true,
             in_flight: true,
             delivered: 0,
@@ -511,13 +683,13 @@
         assert_eq!(r.hystart.lss_start_time().is_some(), false);
 
         // 2nd round.
-        r.hystart.start_round(pkts_1st_round * 2 + 1);
+        r.hystart.start_round(pkts_1st_round * 2);
 
         let mut rtt_2nd = 100;
         let now = now + Duration::from_millis(rtt_2nd);
 
         // Send 2nd round packets.
-        for _ in 0..n_rtt_sample + 1 {
+        for _ in 0..n_rtt_sample {
             r.on_packet_sent_cc(p.size, now);
         }
 
@@ -525,7 +697,7 @@
         // Last ack will cause to exit to LSS.
         let mut cwnd_prev = r.cwnd();
 
-        for _ in 0..n_rtt_sample + 1 {
+        for _ in 0..n_rtt_sample {
             cwnd_prev = r.cwnd();
             r.update_rtt(
                 Duration::from_millis(rtt_2nd),
@@ -547,10 +719,14 @@
 
         // Now we are in LSS.
         assert_eq!(r.hystart.lss_start_time().is_some(), true);
-        assert_eq!(r.cwnd(), cwnd_prev);
+        assert_eq!(r.cwnd(), cwnd_prev + r.max_datagram_size);
 
-        // Ack'ing more packet to increase cwnd by 1 MSS
-        for _ in 0..3 {
+        // Send a full cwnd.
+        r.on_packet_sent_cc(r.cwnd(), now);
+
+        // Ack'ing 4 packets to increase cwnd by 1 MSS during LSS
+        cwnd_prev = r.cwnd();
+        for _ in 0..4 {
             let acked = vec![Acked {
                 pkt_num: p.pkt_num,
                 time_sent: p.time_sent,
@@ -559,6 +735,115 @@
             r.on_packets_acked(acked, epoch, now);
         }
 
-        assert_eq!(r.cwnd(), cwnd_prev + recovery::MAX_DATAGRAM_SIZE);
+        // During LSS cwnd will be increased less than usual slow start.
+        assert_eq!(r.cwnd(), cwnd_prev + r.max_datagram_size);
+    }
+
+    #[test]
+    fn cubic_spurious_congestion_event() {
+        let mut cfg = crate::Config::new(crate::PROTOCOL_VERSION).unwrap();
+        cfg.set_cc_algorithm(recovery::CongestionControlAlgorithm::CUBIC);
+
+        let mut r = Recovery::new(&cfg);
+        let now = Instant::now();
+        let prev_cwnd = r.cwnd();
+
+        // Send initcwnd full MSS packets to become no longer app limited
+        for _ in 0..recovery::INITIAL_WINDOW_PACKETS {
+            r.on_packet_sent_cc(r.max_datagram_size, now);
+        }
+
+        // Trigger congestion event to update ssthresh
+        r.congestion_event(now, packet::EPOCH_APPLICATION, now);
+
+        // After congestion event, cwnd will be reduced.
+        let cur_cwnd = (prev_cwnd as f64 * BETA_CUBIC) as usize;
+        assert_eq!(r.cwnd(), cur_cwnd);
+
+        let rtt = Duration::from_millis(100);
+
+        let acked = vec![Acked {
+            pkt_num: 0,
+            // To exit from recovery
+            time_sent: now + rtt,
+            size: r.max_datagram_size,
+        }];
+
+        // Ack more than cwnd bytes with rtt=100ms
+        r.update_rtt(rtt, Duration::from_millis(0), now);
+
+        // Trigger detecting sprurious congestion event
+        r.on_packets_acked(
+            acked,
+            packet::EPOCH_APPLICATION,
+            now + rtt + Duration::from_millis(5),
+        );
+
+        // cwnd is restored to the previous one.
+        assert_eq!(r.cwnd(), prev_cwnd);
+    }
+
+    #[test]
+    fn cubic_fast_convergence() {
+        let mut cfg = crate::Config::new(crate::PROTOCOL_VERSION).unwrap();
+        cfg.set_cc_algorithm(recovery::CongestionControlAlgorithm::CUBIC);
+
+        let mut r = Recovery::new(&cfg);
+        let mut now = Instant::now();
+        let prev_cwnd = r.cwnd();
+
+        // Send initcwnd full MSS packets to become no longer app limited
+        for _ in 0..recovery::INITIAL_WINDOW_PACKETS {
+            r.on_packet_sent_cc(r.max_datagram_size, now);
+        }
+
+        // Trigger congestion event to update ssthresh
+        r.congestion_event(now, packet::EPOCH_APPLICATION, now);
+
+        // After 1st congestion event, cwnd will be reduced.
+        let cur_cwnd = (prev_cwnd as f64 * BETA_CUBIC) as usize;
+        assert_eq!(r.cwnd(), cur_cwnd);
+
+        // Shift current time by 1 RTT.
+        let rtt = Duration::from_millis(100);
+        r.update_rtt(rtt, Duration::from_millis(0), now);
+
+        // Exit from the recovery.
+        now += rtt;
+
+        // To avoid rollback
+        r.lost_count += RESTORE_COUNT_THRESHOLD;
+
+        // During Congestion Avoidance, it will take
+        // 5 ACKs to increase cwnd by 1 MSS.
+        for _ in 0..5 {
+            let acked = vec![Acked {
+                pkt_num: 0,
+                time_sent: now,
+                size: r.max_datagram_size,
+            }];
+
+            r.on_packets_acked(acked, packet::EPOCH_APPLICATION, now);
+            now += rtt;
+        }
+
+        assert_eq!(r.cwnd(), cur_cwnd + r.max_datagram_size);
+
+        let prev_cwnd = r.cwnd();
+
+        // Fast convergence: now there is 2nd congestion event and
+        // cwnd is not fully recovered to w_max, w_max will be
+        // further reduced.
+        r.congestion_event(now, packet::EPOCH_APPLICATION, now);
+
+        // After 2nd congestion event, cwnd will be reduced.
+        let cur_cwnd = (prev_cwnd as f64 * BETA_CUBIC) as usize;
+        assert_eq!(r.cwnd(), cur_cwnd);
+
+        // w_max will be further reduced, not prev_cwnd
+        assert_eq!(
+            r.cubic_state.w_max,
+            prev_cwnd as f64 * (1.0 + BETA_CUBIC) / 2.0
+        );
     }
 }
diff --git a/src/recovery/delivery_rate.rs b/src/recovery/delivery_rate.rs
index 77fd248..97edeba 100644
--- a/src/recovery/delivery_rate.rs
+++ b/src/recovery/delivery_rate.rs
@@ -27,7 +27,7 @@
 //! Delivery rate estimation.
 //!
 //! This implements the algorithm for estimating delivery rate as described in
-//! https://tools.ietf.org/html/draft-cheng-iccrg-delivery-rate-estimation-00
+//! <https://tools.ietf.org/html/draft-cheng-iccrg-delivery-rate-estimation-00>
 
 use std::cmp;
 
@@ -76,7 +76,6 @@
 
         if pkt.delivered > self.rate_sample.prior_delivered {
             self.rate_sample.prior_delivered = pkt.delivered;
-            self.rate_sample.is_app_limited = pkt.is_app_limited;
 
             self.rate_sample.send_elapsed =
                 pkt.time_sent - pkt.recent_delivered_packet_sent_time;
@@ -149,8 +148,6 @@
 struct RateSample {
     delivery_rate: u64,
 
-    is_app_limited: bool,
-
     interval: Duration,
 
     delivered: usize,
diff --git a/src/recovery/hystart.rs b/src/recovery/hystart.rs
index 5d02739..6a275bc 100644
--- a/src/recovery/hystart.rs
+++ b/src/recovery/hystart.rs
@@ -28,12 +28,13 @@
 //!
 //! This implementation is based on the following I-D:
 //!
-//! https://tools.ietf.org/html/draft-balasubramanian-tcpm-hystartplusplus-02
+//! <https://tools.ietf.org/html/draft-balasubramanian-tcpm-hystartplusplus-03>
 
 use std::cmp;
 use std::time::Duration;
 use std::time::Instant;
 
+use crate::packet;
 use crate::recovery;
 
 /// Constants from I-D.
@@ -91,6 +92,12 @@
         self.lss_start_time
     }
 
+    pub fn in_lss(&self, epoch: packet::Epoch) -> bool {
+        self.enabled &&
+            epoch == packet::EPOCH_APPLICATION &&
+            self.lss_start_time().is_some()
+    }
+
     pub fn start_round(&mut self, pkt_num: u64) {
         if self.window_end.is_none() {
             *self = Hystart {
@@ -109,18 +116,12 @@
         }
     }
 
-    // Returns a new (ssthresh, cwnd) during slow start.
-    pub fn on_packet_acked(
+    // Returns true if LSS started.
+    pub fn try_enter_lss(
         &mut self, packet: &recovery::Acked, rtt: Duration, cwnd: usize,
-        ssthresh: usize, now: Instant,
-    ) -> (usize, usize) {
-        let mut ssthresh = ssthresh;
-        let mut cwnd = cwnd;
-
+        now: Instant, max_datagram_size: usize,
+    ) -> bool {
         if self.lss_start_time().is_none() {
-            // Reno Slow Start.
-            cwnd += packet.size;
-
             if let Some(current_round_min_rtt) = self.current_round_min_rtt {
                 self.current_round_min_rtt =
                     Some(cmp::min(current_round_min_rtt, rtt));
@@ -130,7 +131,7 @@
 
             self.rtt_sample_count += 1;
 
-            if cwnd >= (LOW_CWND * recovery::MAX_DATAGRAM_SIZE) &&
+            if cwnd >= (LOW_CWND * max_datagram_size) &&
                 self.rtt_sample_count >= N_RTT_SAMPLE &&
                 self.current_round_min_rtt.is_some() &&
                 self.last_round_min_rtt.is_some()
@@ -147,8 +148,6 @@
                 if self.current_round_min_rtt.unwrap() >=
                     (self.last_round_min_rtt.unwrap() + rtt_thresh)
                 {
-                    ssthresh = cwnd;
-
                     self.lss_start_time = Some(now);
                 }
             }
@@ -160,23 +159,22 @@
                     self.window_end = None;
                 }
             }
-        } else {
-            // LSS (Limited Slow Start).
-            let k = cwnd as f64 / (LSS_DIVISOR * ssthresh as f64);
-
-            cwnd += (packet.size as f64 / k) as usize;
         }
 
-        (cwnd, ssthresh)
+        self.lss_start_time.is_some()
+    }
+
+    // Return a cwnd increment during LSS (Limited Slow Start).
+    pub fn lss_cwnd_inc(
+        &self, pkt_size: usize, cwnd: usize, ssthresh: usize,
+    ) -> usize {
+        pkt_size / (cwnd as f64 / (LSS_DIVISOR * ssthresh as f64)) as usize
     }
 
     // Exit HyStart++ when entering congestion avoidance.
     pub fn congestion_event(&mut self) {
-        if self.window_end.is_some() {
-            self.window_end = None;
-
-            self.lss_start_time = None;
-        }
+        self.window_end = None;
+        self.lss_start_time = None;
     }
 }
 
@@ -196,111 +194,16 @@
     }
 
     #[test]
-    fn reno_slow_start() {
-        let mut hspp = Hystart::default();
-        let pkt_num = 100;
-        let size = 1000;
-        let now = Instant::now();
+    fn lss_cwnd_inc() {
+        let hspp = Hystart::default();
 
-        hspp.start_round(pkt_num);
+        let datagram_size = 1200;
+        let cwnd = 24000;
+        let ssthresh = 24000;
 
-        assert_eq!(hspp.window_end, Some(pkt_num));
+        let lss_cwnd_inc = hspp.lss_cwnd_inc(datagram_size, cwnd, ssthresh);
 
-        let p = recovery::Acked {
-            pkt_num,
-            time_sent: now + Duration::from_millis(10),
-            size,
-        };
-
-        let init_cwnd = 30000;
-        let init_ssthresh = 1000000;
-
-        let (cwnd, ssthresh) = hspp.on_packet_acked(
-            &p,
-            Duration::from_millis(10),
-            init_cwnd,
-            init_ssthresh,
-            now,
-        );
-
-        // Expecting Reno slow start.
-        assert_eq!(hspp.lss_start_time().is_some(), false);
-        assert_eq!((cwnd, ssthresh), (init_cwnd + size, init_ssthresh));
-    }
-
-    #[test]
-    fn limited_slow_start() {
-        let mut hspp = Hystart::default();
-        let size = 1000;
-        let now = Instant::now();
-
-        // 1st round rtt = 50ms
-        let rtt_1st = 50;
-
-        // end of 1st round
-        let pkt_1st = N_RTT_SAMPLE as u64;
-
-        hspp.start_round(pkt_1st);
-
-        assert_eq!(hspp.window_end, Some(pkt_1st));
-
-        let (mut cwnd, mut ssthresh) = (30000, 1000000);
-        let mut pkt_num = 0;
-
-        // 1st round.
-        for _ in 0..N_RTT_SAMPLE + 1 {
-            let p = recovery::Acked {
-                pkt_num,
-                time_sent: now + Duration::from_millis(pkt_num),
-                size,
-            };
-
-            // We use a fixed rtt for 1st round.
-            let rtt = Duration::from_millis(rtt_1st);
-
-            let (new_cwnd, new_ssthresh) =
-                hspp.on_packet_acked(&p, rtt, cwnd, ssthresh, now);
-
-            cwnd = new_cwnd;
-            ssthresh = new_ssthresh;
-
-            pkt_num += 1;
-        }
-
-        // 2nd round. rtt = 100ms to trigger LSS.
-        let rtt_2nd = 100;
-
-        hspp.start_round(pkt_1st * 2 + 1);
-
-        for _ in 0..N_RTT_SAMPLE + 1 {
-            let p = recovery::Acked {
-                pkt_num,
-                time_sent: now + Duration::from_millis(pkt_num),
-                size,
-            };
-
-            // Keep increasing rtt to simulate buffer queueing delay
-            // This is to exit from slow slart to LSS.
-            let rtt = Duration::from_millis(rtt_2nd + pkt_num * 4);
-
-            let (new_cwnd, new_ssthresh) =
-                hspp.on_packet_acked(&p, rtt, cwnd, ssthresh, now);
-
-            cwnd = new_cwnd;
-            ssthresh = new_ssthresh;
-
-            pkt_num += 1;
-        }
-
-        // At this point, cwnd exits to LSS mode.
-        assert_eq!(hspp.lss_start_time().is_some(), true);
-
-        // Check if current cwnd is in LSS.
-        let cur_ssthresh = 47000;
-        let k = cur_ssthresh as f64 / (LSS_DIVISOR * cur_ssthresh as f64);
-        let lss_cwnd = cur_ssthresh as f64 + size as f64 / k;
-
-        assert_eq!((cwnd, ssthresh), (lss_cwnd as usize, cur_ssthresh));
+        assert_eq!((datagram_size as f64 * LSS_DIVISOR) as usize, lss_cwnd_inc);
     }
 
     #[test]
diff --git a/src/recovery/mod.rs b/src/recovery/mod.rs
index 4621e24..2bffa79 100644
--- a/src/recovery/mod.rs
+++ b/src/recovery/mod.rs
@@ -60,11 +60,7 @@
 // Congestion Control
 const INITIAL_WINDOW_PACKETS: usize = 10;
 
-const INITIAL_WINDOW: usize = INITIAL_WINDOW_PACKETS * MAX_DATAGRAM_SIZE;
-
-const MINIMUM_WINDOW: usize = 2 * MAX_DATAGRAM_SIZE;
-
-const MAX_DATAGRAM_SIZE: usize = 1452;
+const MINIMUM_WINDOW_PACKETS: usize = 2;
 
 const LOSS_REDUCTION_FACTOR: f64 = 0.5;
 
@@ -118,14 +114,25 @@
 
     ssthresh: usize,
 
-    bytes_acked: usize,
+    bytes_acked_sl: usize,
+
+    bytes_acked_ca: usize,
+
+    bytes_sent: usize,
 
     congestion_recovery_start_time: Option<Instant>,
 
+    max_datagram_size: usize,
+
     cubic_state: cubic::State,
 
     // HyStart++.
     hystart: hystart::Hystart,
+
+    // Pacing.
+    pacing_rate: u64,
+
+    last_packet_scheduled_time: Option<Instant>,
 }
 
 impl Recovery {
@@ -171,16 +178,23 @@
 
             in_flight_count: [0; packet::EPOCH_COUNT],
 
-            congestion_window: INITIAL_WINDOW,
+            congestion_window: config.max_send_udp_payload_size *
+                INITIAL_WINDOW_PACKETS,
 
             bytes_in_flight: 0,
 
             ssthresh: std::usize::MAX,
 
-            bytes_acked: 0,
+            bytes_acked_sl: 0,
+
+            bytes_acked_ca: 0,
+
+            bytes_sent: 0,
 
             congestion_recovery_start_time: None,
 
+            max_datagram_size: config.max_send_udp_payload_size,
+
             cc_ops: config.cc_algorithm.into(),
 
             delivery_rate: delivery_rate::Rate::default(),
@@ -190,6 +204,10 @@
             app_limited: false,
 
             hystart: hystart::Hystart::new(config.hystart),
+
+            pacing_rate: 0,
+
+            last_packet_scheduled_time: None,
         }
     }
 
@@ -232,6 +250,18 @@
             self.hystart.start_round(pkt_num);
         }
 
+        // Pacing: Set the pacing rate if CC doesn't do its own.
+        if !(self.cc_ops.has_custom_pacing)() {
+            if let Some(srtt) = self.smoothed_rtt {
+                let rate = (self.congestion_window as u64 * 1000000) /
+                    srtt.as_micros() as u64;
+                self.set_pacing_rate(rate);
+            }
+        }
+
+        self.schedule_next_packet(epoch, now, sent_bytes);
+
+        self.bytes_sent += sent_bytes;
         trace!("{} {:?}", trace_id, self);
     }
 
@@ -239,6 +269,45 @@
         (self.cc_ops.on_packet_sent)(self, sent_bytes, now);
     }
 
+    pub fn set_pacing_rate(&mut self, rate: u64) {
+        if rate != 0 {
+            self.pacing_rate = rate;
+        }
+    }
+
+    pub fn get_packet_send_time(&self) -> Option<Instant> {
+        self.last_packet_scheduled_time
+    }
+
+    fn schedule_next_packet(
+        &mut self, epoch: packet::Epoch, now: Instant, packet_size: usize,
+    ) {
+        // Don't pace in any of these cases:
+        //   * Packet epoch is not EPOCH_APPLICATION.
+        //   * Packet contains only ACK frames.
+        //   * The start of the connection.
+        if epoch != packet::EPOCH_APPLICATION ||
+            packet_size == 0 ||
+            self.bytes_sent <= self.congestion_window ||
+            self.pacing_rate == 0
+        {
+            self.last_packet_scheduled_time = Some(now);
+            return;
+        }
+
+        self.last_packet_scheduled_time = match self.last_packet_scheduled_time {
+            Some(last_scheduled_time) => {
+                let interval: u64 =
+                    (packet_size as u64 * 1000000) / self.pacing_rate;
+                let interval = Duration::from_micros(interval);
+                let next_schedule_time = last_scheduled_time + interval;
+                Some(cmp::max(now, next_schedule_time))
+            },
+
+            None => Some(now),
+        };
+    }
+
     pub fn on_ack_received(
         &mut self, ranges: &ranges::RangeSet, ack_delay: u64,
         epoch: packet::Epoch, handshake_status: HandshakeStatus, now: Instant,
@@ -347,8 +416,6 @@
 
         self.drain_packets(epoch);
 
-        trace!("{} {:?}", trace_id, self);
-
         Ok(())
     }
 
@@ -469,6 +536,24 @@
         self.delivery_rate.delivery_rate()
     }
 
+    pub fn max_datagram_size(&self) -> usize {
+        self.max_datagram_size
+    }
+
+    pub fn update_max_datagram_size(&mut self, new_max_datagram_size: usize) {
+        let max_datagram_size =
+            cmp::min(self.max_datagram_size, new_max_datagram_size);
+
+        // Congestion Window is updated only when it's not updated already.
+        if self.congestion_window ==
+            self.max_datagram_size * INITIAL_WINDOW_PACKETS
+        {
+            self.congestion_window = max_datagram_size * INITIAL_WINDOW_PACKETS;
+        }
+
+        self.max_datagram_size = max_datagram_size;
+    }
+
     fn update_rtt(
         &mut self, latest_rtt: Duration, ack_delay: Duration, now: Instant,
     ) {
@@ -729,6 +814,10 @@
     fn congestion_event(
         &mut self, time_sent: Instant, epoch: packet::Epoch, now: Instant,
     ) {
+        if !self.in_congestion_recovery(time_sent) {
+            (self.cc_ops.checkpoint)(self);
+        }
+
         (self.cc_ops.congestion_event)(self, time_sent, epoch, now);
     }
 
@@ -742,18 +831,6 @@
         }
     }
 
-    fn hystart_on_packet_acked(
-        &mut self, packet: &Acked, now: Instant,
-    ) -> (usize, usize) {
-        self.hystart.on_packet_acked(
-            packet,
-            self.latest_rtt,
-            self.congestion_window,
-            self.ssthresh,
-            now,
-        )
-    }
-
     pub fn update_app_limited(&mut self, v: bool) {
         self.app_limited = v;
     }
@@ -825,6 +902,12 @@
     ),
 
     pub collapse_cwnd: fn(r: &mut Recovery),
+
+    pub checkpoint: fn(r: &mut Recovery),
+
+    pub rollback: fn(r: &mut Recovery),
+
+    pub has_custom_pacing: fn() -> bool,
 }
 
 impl From<CongestionControlAlgorithm> for &'static CongestionControlOps {
@@ -871,6 +954,12 @@
             self.congestion_recovery_start_time
         )?;
         write!(f, "{:?} ", self.delivery_rate)?;
+        write!(f, "pacing_rate={:?} ", self.pacing_rate)?;
+        write!(
+            f,
+            "last_packet_scheduled_time={:?} ",
+            self.last_packet_scheduled_time
+        )?;
 
         if self.hystart.enabled() {
             write!(f, "hystart={:?} ", self.hystart)?;
@@ -994,7 +1083,7 @@
 
         // cwnd will be reset.
         r.collapse_cwnd();
-        assert_eq!(r.cwnd(), MINIMUM_WINDOW);
+        assert_eq!(r.cwnd(), r.max_datagram_size * MINIMUM_WINDOW_PACKETS);
     }
 
     #[test]
@@ -1534,6 +1623,142 @@
         // Spurious loss.
         assert_eq!(r.lost_count, 1);
     }
+
+    #[test]
+    fn pacing() {
+        let mut cfg = crate::Config::new(crate::PROTOCOL_VERSION).unwrap();
+        cfg.set_cc_algorithm(CongestionControlAlgorithm::CUBIC);
+
+        let mut r = Recovery::new(&cfg);
+
+        let mut now = Instant::now();
+
+        assert_eq!(r.sent[packet::EPOCH_APPLICATION].len(), 0);
+
+        // send out first packet.
+        let p = Sent {
+            pkt_num: 0,
+            frames: vec![],
+            time_sent: now,
+            time_acked: None,
+            time_lost: None,
+            size: 6500,
+            ack_eliciting: true,
+            in_flight: true,
+            delivered: 0,
+            delivered_time: now,
+            recent_delivered_packet_sent_time: now,
+            is_app_limited: false,
+            has_data: false,
+        };
+
+        r.on_packet_sent(
+            p,
+            packet::EPOCH_APPLICATION,
+            HandshakeStatus::default(),
+            now,
+            "",
+        );
+
+        assert_eq!(r.sent[packet::EPOCH_APPLICATION].len(), 1);
+        assert_eq!(r.bytes_in_flight, 6500);
+
+        // First packet will be sent out immidiately.
+        assert_eq!(r.pacing_rate, 0);
+        assert_eq!(r.get_packet_send_time().unwrap(), now);
+
+        // Wait 50ms for ACK.
+        now += Duration::from_millis(50);
+
+        let mut acked = ranges::RangeSet::default();
+        acked.insert(0..1);
+
+        assert_eq!(
+            r.on_ack_received(
+                &acked,
+                10,
+                packet::EPOCH_APPLICATION,
+                HandshakeStatus::default(),
+                now,
+                ""
+            ),
+            Ok(())
+        );
+
+        assert_eq!(r.sent[packet::EPOCH_APPLICATION].len(), 0);
+        assert_eq!(r.bytes_in_flight, 0);
+        assert_eq!(r.smoothed_rtt.unwrap(), Duration::from_millis(50));
+
+        // Send out second packet.
+        let p = Sent {
+            pkt_num: 1,
+            frames: vec![],
+            time_sent: now,
+            time_acked: None,
+            time_lost: None,
+            size: 6500,
+            ack_eliciting: true,
+            in_flight: true,
+            delivered: 0,
+            delivered_time: now,
+            recent_delivered_packet_sent_time: now,
+            is_app_limited: false,
+            has_data: false,
+        };
+
+        r.on_packet_sent(
+            p,
+            packet::EPOCH_APPLICATION,
+            HandshakeStatus::default(),
+            now,
+            "",
+        );
+
+        assert_eq!(r.sent[packet::EPOCH_APPLICATION].len(), 1);
+        assert_eq!(r.bytes_in_flight, 6500);
+
+        // Pacing is not done during intial phase of connection.
+        assert_eq!(r.get_packet_send_time().unwrap(), now);
+
+        // Send the third packet out.
+        let p = Sent {
+            pkt_num: 2,
+            frames: vec![],
+            time_sent: now,
+            time_acked: None,
+            time_lost: None,
+            size: 6500,
+            ack_eliciting: true,
+            in_flight: true,
+            delivered: 0,
+            delivered_time: now,
+            recent_delivered_packet_sent_time: now,
+            is_app_limited: false,
+            has_data: false,
+        };
+
+        r.on_packet_sent(
+            p,
+            packet::EPOCH_APPLICATION,
+            HandshakeStatus::default(),
+            now,
+            "",
+        );
+
+        assert_eq!(r.sent[packet::EPOCH_APPLICATION].len(), 2);
+        assert_eq!(r.bytes_in_flight, 13000);
+        assert_eq!(r.smoothed_rtt.unwrap(), Duration::from_millis(50));
+
+        // We pace this outgoing packet. as all conditions for pacing
+        // are passed.
+        assert_eq!(r.pacing_rate, (12000.0 / 0.05) as u64);
+        assert_eq!(
+            r.get_packet_send_time().unwrap(),
+            now + Duration::from_micros(
+                (6500 * 1000000) / (12000.0 / 0.05) as u64
+            )
+        );
+    }
 }
 
 mod cubic;
diff --git a/src/recovery/reno.rs b/src/recovery/reno.rs
index b5019f7..404b63f 100644
--- a/src/recovery/reno.rs
+++ b/src/recovery/reno.rs
@@ -43,6 +43,9 @@
     on_packet_acked,
     congestion_event,
     collapse_cwnd,
+    checkpoint,
+    rollback,
+    has_custom_pacing,
 };
 
 pub fn on_packet_sent(r: &mut Recovery, sent_bytes: usize, _now: Instant) {
@@ -63,38 +66,52 @@
     }
 
     if r.congestion_window < r.ssthresh {
-        // Slow start.
-        if r.hystart.enabled() && epoch == packet::EPOCH_APPLICATION {
-            let (cwnd, ssthresh) = r.hystart_on_packet_acked(packet, now);
+        // In Slow slart, bytes_acked_sl is used for counting
+        // acknowledged bytes.
+        r.bytes_acked_sl += packet.size;
 
-            r.congestion_window = cwnd;
-            r.ssthresh = ssthresh;
-        } else {
-            r.congestion_window += packet.size;
+        if r.bytes_acked_sl >= r.max_datagram_size {
+            r.congestion_window += r.max_datagram_size;
+            r.bytes_acked_sl -= r.max_datagram_size;
+        }
+
+        if r.hystart.enabled() &&
+            epoch == packet::EPOCH_APPLICATION &&
+            r.hystart.try_enter_lss(
+                packet,
+                r.latest_rtt,
+                r.congestion_window,
+                now,
+                r.max_datagram_size,
+            )
+        {
+            r.ssthresh = r.congestion_window;
         }
     } else {
         // Congestion avoidance.
         let mut reno_cwnd = r.congestion_window;
 
-        r.bytes_acked += packet.size;
+        r.bytes_acked_ca += packet.size;
 
-        if r.bytes_acked >= r.congestion_window {
-            r.bytes_acked -= r.congestion_window;
-            reno_cwnd += recovery::MAX_DATAGRAM_SIZE;
+        if r.bytes_acked_ca >= r.congestion_window {
+            r.bytes_acked_ca -= r.congestion_window;
+            reno_cwnd += r.max_datagram_size;
         }
 
         // When in Limited Slow Start, take the max of CA cwnd and
         // LSS cwnd.
-        if r.hystart.enabled() &&
-            epoch == packet::EPOCH_APPLICATION &&
-            r.hystart.lss_start_time().is_some()
-        {
-            let (lss_cwnd, _) = r.hystart_on_packet_acked(packet, now);
+        if r.hystart.in_lss(epoch) {
+            let lss_cwnd_inc = r.hystart.lss_cwnd_inc(
+                packet.size,
+                r.congestion_window,
+                r.ssthresh,
+            );
 
-            reno_cwnd = cmp::max(reno_cwnd, lss_cwnd);
+            r.congestion_window =
+                cmp::max(reno_cwnd, r.congestion_window + lss_cwnd_inc);
+        } else {
+            r.congestion_window = reno_cwnd;
         }
-
-        r.congestion_window = reno_cwnd;
     }
 }
 
@@ -110,23 +127,34 @@
             recovery::LOSS_REDUCTION_FACTOR)
             as usize;
 
-        r.congestion_window =
-            cmp::max(r.congestion_window, recovery::MINIMUM_WINDOW);
+        r.congestion_window = cmp::max(
+            r.congestion_window,
+            r.max_datagram_size * recovery::MINIMUM_WINDOW_PACKETS,
+        );
 
-        r.bytes_acked = (r.congestion_window as f64 *
+        r.bytes_acked_ca = (r.congestion_window as f64 *
             recovery::LOSS_REDUCTION_FACTOR) as usize;
 
         r.ssthresh = r.congestion_window;
 
-        if r.hystart.enabled() && epoch == packet::EPOCH_APPLICATION {
+        if r.hystart.in_lss(epoch) {
             r.hystart.congestion_event();
         }
     }
 }
 
 pub fn collapse_cwnd(r: &mut Recovery) {
-    r.congestion_window = recovery::MINIMUM_WINDOW;
-    r.bytes_acked = 0;
+    r.congestion_window = r.max_datagram_size * recovery::MINIMUM_WINDOW_PACKETS;
+    r.bytes_acked_sl = 0;
+    r.bytes_acked_ca = 0;
+}
+
+fn checkpoint(_r: &mut Recovery) {}
+
+fn rollback(_r: &mut Recovery) {}
+
+fn has_custom_pacing() -> bool {
+    false
 }
 
 #[cfg(test)]
@@ -175,7 +203,7 @@
             time_sent: now,
             time_acked: None,
             time_lost: None,
-            size: 5000,
+            size: r.max_datagram_size,
             ack_eliciting: true,
             in_flight: true,
             delivered: 0,
@@ -185,12 +213,10 @@
             has_data: false,
         };
 
-        // Send 5k x 4 = 20k, higher than default cwnd(~15k)
-        // to become no longer app limited.
-        r.on_packet_sent_cc(p.size, now);
-        r.on_packet_sent_cc(p.size, now);
-        r.on_packet_sent_cc(p.size, now);
-        r.on_packet_sent_cc(p.size, now);
+        // Send initcwnd full MSS packets to become no longer app limited
+        for _ in 0..recovery::INITIAL_WINDOW_PACKETS {
+            r.on_packet_sent_cc(p.size, now);
+        }
 
         let cwnd_prev = r.cwnd();
 
@@ -207,6 +233,62 @@
     }
 
     #[test]
+    fn reno_slow_start_multi_acks() {
+        let mut cfg = crate::Config::new(crate::PROTOCOL_VERSION).unwrap();
+        cfg.set_cc_algorithm(recovery::CongestionControlAlgorithm::Reno);
+
+        let mut r = Recovery::new(&cfg);
+
+        let now = Instant::now();
+
+        let p = recovery::Sent {
+            pkt_num: 0,
+            frames: vec![],
+            time_sent: now,
+            time_acked: None,
+            time_lost: None,
+            size: r.max_datagram_size,
+            ack_eliciting: true,
+            in_flight: true,
+            delivered: 0,
+            delivered_time: std::time::Instant::now(),
+            recent_delivered_packet_sent_time: std::time::Instant::now(),
+            is_app_limited: false,
+            has_data: false,
+        };
+
+        // Send initcwnd full MSS packets to become no longer app limited
+        for _ in 0..recovery::INITIAL_WINDOW_PACKETS {
+            r.on_packet_sent_cc(p.size, now);
+        }
+
+        let cwnd_prev = r.cwnd();
+
+        let acked = vec![
+            Acked {
+                pkt_num: p.pkt_num,
+                time_sent: p.time_sent,
+                size: p.size,
+            },
+            Acked {
+                pkt_num: p.pkt_num,
+                time_sent: p.time_sent,
+                size: p.size,
+            },
+            Acked {
+                pkt_num: p.pkt_num,
+                time_sent: p.time_sent,
+                size: p.size,
+            },
+        ];
+
+        r.on_packets_acked(acked, packet::EPOCH_APPLICATION, now);
+
+        // Acked 3 packets.
+        assert_eq!(r.cwnd(), cwnd_prev + p.size * 3);
+    }
+
+    #[test]
     fn reno_congestion_event() {
         let mut cfg = crate::Config::new(crate::PROTOCOL_VERSION).unwrap();
         cfg.set_cc_algorithm(recovery::CongestionControlAlgorithm::Reno);
@@ -258,6 +340,6 @@
         r.on_packets_acked(acked, packet::EPOCH_APPLICATION, now + rtt * 2);
 
         // After acking more than cwnd, expect cwnd increased by MSS
-        assert_eq!(r.cwnd(), cur_cwnd + recovery::MAX_DATAGRAM_SIZE);
+        assert_eq!(r.cwnd(), cur_cwnd + r.max_datagram_size);
     }
 }
diff --git a/src/stream.rs b/src/stream.rs
index c2fbf36..dfb9b0d 100644
--- a/src/stream.rs
+++ b/src/stream.rs
@@ -26,7 +26,10 @@
 
 use std::cmp;
 
+use std::sync::Arc;
+
 use std::collections::hash_map;
+
 use std::collections::BTreeMap;
 use std::collections::BinaryHeap;
 use std::collections::HashMap;
@@ -40,6 +43,12 @@
 
 const DEFAULT_URGENCY: u8 = 127;
 
+#[cfg(test)]
+const SEND_BUFFER_SIZE: usize = 5;
+
+#[cfg(not(test))]
+const SEND_BUFFER_SIZE: usize = 4096;
+
 /// Keeps track of QUIC streams and enforces stream limits.
 #[derive(Default)]
 pub struct StreamMap {
@@ -113,6 +122,16 @@
     /// of the map elements represents the offset of the stream at which the
     /// blocking occurred.
     blocked: HashMap<u64, u64>,
+
+    /// Set of stream IDs corresponding to streams that are reset. The value
+    /// of the map elements is a tuple of the error code and final size values
+    /// to include in the RESET_STREAM frame.
+    reset: HashMap<u64, (u64, u64)>,
+
+    /// Set of stream IDs corresponding to streams that are shutdown on the
+    /// receive side, and need to send a STOP_SENDING frame. The value of the
+    /// map elements is the error code to include in the STOP_SENDING frame.
+    stopped: HashMap<u64, u64>,
 }
 
 impl StreamMap {
@@ -162,7 +181,7 @@
                 }
 
                 if local != is_local(id, is_server) {
-                    return Err(Error::InvalidStreamState);
+                    return Err(Error::InvalidStreamState(id));
                 }
 
                 let (max_rx_data, max_tx_data) = match (local, is_bidi(id)) {
@@ -355,6 +374,34 @@
         }
     }
 
+    /// Adds or removes the stream ID to/from the reset streams set with the
+    /// given error code and final size values.
+    ///
+    /// If the stream was already in the list, this does nothing.
+    pub fn mark_reset(
+        &mut self, stream_id: u64, reset: bool, error_code: u64, final_size: u64,
+    ) {
+        if reset {
+            self.reset.insert(stream_id, (error_code, final_size));
+        } else {
+            self.reset.remove(&stream_id);
+        }
+    }
+
+    /// Adds or removes the stream ID to/from the stopped streams set with the
+    /// given error code.
+    ///
+    /// If the stream was already in the list, this does nothing.
+    pub fn mark_stopped(
+        &mut self, stream_id: u64, stopped: bool, error_code: u64,
+    ) {
+        if stopped {
+            self.stopped.insert(stream_id, error_code);
+        } else {
+            self.stopped.remove(&stream_id);
+        }
+    }
+
     /// Updates the peer's maximum bidirectional stream count limit.
     pub fn update_peer_max_streams_bidi(&mut self, v: u64) {
         self.peer_max_streams_bidi = cmp::max(self.peer_max_streams_bidi, v);
@@ -385,6 +432,18 @@
         self.local_max_streams_uni_next
     }
 
+    /// Returns the number of bidirectional streams that can be created
+    /// before the peer's stream count limit is reached.
+    pub fn peer_streams_left_bidi(&self) -> u64 {
+        self.peer_max_streams_bidi - self.local_opened_streams_bidi
+    }
+
+    /// Returns the number of unidirectional streams that can be created
+    /// before the peer's stream count limit is reached.
+    pub fn peer_streams_left_uni(&self) -> u64 {
+        self.peer_max_streams_uni - self.local_opened_streams_uni
+    }
+
     /// Drops completed stream.
     ///
     /// This should only be called when Stream::is_complete() returns true for
@@ -426,11 +485,26 @@
         self.blocked.iter()
     }
 
+    /// Creates an iterator over streams that need to send RESET_STREAM.
+    pub fn reset(&self) -> hash_map::Iter<u64, (u64, u64)> {
+        self.reset.iter()
+    }
+
+    /// Creates an iterator over streams that need to send STOP_SENDING.
+    pub fn stopped(&self) -> hash_map::Iter<u64, u64> {
+        self.stopped.iter()
+    }
+
     /// Returns true if there are any streams that have data to write.
     pub fn has_flushable(&self) -> bool {
         !self.flushable.is_empty()
     }
 
+    /// Returns true if there are any streams that have data to read.
+    pub fn has_readable(&self) -> bool {
+        !self.readable.is_empty()
+    }
+
     /// Returns true if there are any streams that need to update the local
     /// flow control limit.
     pub fn has_almost_full(&self) -> bool {
@@ -442,6 +516,16 @@
         !self.blocked.is_empty()
     }
 
+    /// Returns true if there are any streams that are reset.
+    pub fn has_reset(&self) -> bool {
+        !self.reset.is_empty()
+    }
+
+    /// Returns true if there are any streams that need to send STOP_SENDING.
+    pub fn has_stopped(&self) -> bool {
+        !self.stopped.is_empty()
+    }
+
     /// Returns true if the max bidirectional streams count needs to be updated
     /// by sending a MAX_STREAMS frame to the peer.
     pub fn should_update_max_streams_bidi(&self) -> bool {
@@ -481,7 +565,7 @@
     pub local: bool,
 
     /// Application data.
-    pub data: Option<Box<dyn Send + std::any::Any>>,
+    pub data: Option<Box<dyn std::any::Any + Send + Sync>>,
 
     /// The stream's urgency (lower is better). Default is `DEFAULT_URGENCY`.
     pub urgency: u8,
@@ -568,6 +652,7 @@
 }
 
 impl StreamIter {
+    #[inline]
     fn from(streams: &HashSet<u64>) -> Self {
         StreamIter {
             streams: streams.iter().copied().collect(),
@@ -578,12 +663,14 @@
 impl Iterator for StreamIter {
     type Item = u64;
 
+    #[inline]
     fn next(&mut self) -> Option<Self::Item> {
         self.streams.pop()
     }
 }
 
 impl ExactSizeIterator for StreamIter {
+    #[inline]
     fn len(&self) -> usize {
         self.streams.len()
     }
@@ -634,7 +721,7 @@
     /// This also takes care of enforcing stream flow control limits, as well
     /// as handling incoming data that overlaps data that is already in the
     /// buffer.
-    pub fn push(&mut self, buf: RangeBuf) -> Result<()> {
+    pub fn write(&mut self, buf: RangeBuf) -> Result<()> {
         if buf.max_off() > self.max_data {
             return Err(Error::FlowControl);
         }
@@ -690,10 +777,6 @@
             }
         }
 
-        if self.drain {
-            return Ok(());
-        }
-
         let mut tmp_buf = Some(buf);
 
         while let Some(mut buf) = tmp_buf {
@@ -727,7 +810,9 @@
 
             self.len = cmp::max(self.len, buf.max_off());
 
-            self.data.push(buf);
+            if !self.drain {
+                self.data.push(buf);
+            }
         }
 
         Ok(())
@@ -742,7 +827,7 @@
     ///
     /// On success the amount of data read, and a flag indicating if there is
     /// no more data in the buffer, are returned as a tuple.
-    pub fn pop(&mut self, out: &mut [u8]) -> Result<(usize, bool)> {
+    pub fn emit(&mut self, out: &mut [u8]) -> Result<(usize, bool)> {
         let mut len = 0;
         let mut cap = out.len();
 
@@ -822,6 +907,8 @@
 
         self.data.clear();
 
+        self.off = self.max_off();
+
         Ok(())
     }
 
@@ -881,12 +968,15 @@
 #[derive(Debug, Default)]
 pub struct SendBuf {
     /// Chunks of data to be sent, ordered by offset.
-    data: BinaryHeap<RangeBuf>,
+    data: VecDeque<RangeBuf>,
+
+    /// The index of the buffer that needs to be sent next.
+    pos: usize,
 
     /// The maximum offset of data buffered in the stream.
     off: u64,
 
-    /// The amount of data that was ever written to this stream.
+    /// The amount of data currently buffered.
     len: u64,
 
     /// The maximum offset we are allowed to send to the peer.
@@ -900,6 +990,9 @@
 
     /// Ranges of data offsets that have been acked.
     acked: ranges::RangeSet,
+
+    /// The error code received via STOP_SENDING.
+    error: Option<u64>,
 }
 
 impl SendBuf {
@@ -916,122 +1009,116 @@
     /// The number of bytes that were actually stored in the buffer is returned
     /// (this may be lower than the size of the input buffer, in case of partial
     /// writes).
-    pub fn push_slice(
-        &mut self, mut data: &[u8], mut fin: bool,
-    ) -> Result<usize> {
-        if self.shutdown {
-            // Since we won't write any more data anyway, pretend that we sent
-            // all data that was passed in.
-            return Ok(data.len());
-        }
+    pub fn write(&mut self, mut data: &[u8], mut fin: bool) -> Result<usize> {
+        let max_off = self.off + data.len() as u64;
 
-        if data.is_empty() {
-            // Create a dummy range buffer, in order to propagate the `fin` flag
-            // into `RangeBuf::push()`. This will be discarded later on.
-            let buf = RangeBuf::from(&[], self.off, fin);
+        // Get the stream send capacity. This will return an error if the stream
+        // was stopped.
+        let capacity = self.cap()?;
 
-            return self.push(buf).map(|_| 0);
-        }
-
-        if data.len() > self.cap() {
+        if data.len() > capacity {
             // Truncate the input buffer according to the stream's capacity.
-            let len = self.cap();
+            let len = capacity;
             data = &data[..len];
 
             // We are not buffering the full input, so clear the fin flag.
             fin = false;
         }
 
-        let buf = RangeBuf::from(data, self.off, fin);
-        self.push(buf)?;
-
-        self.off += data.len() as u64;
-
-        Ok(data.len())
-    }
-
-    /// Inserts the given chunk of data in the buffer.
-    pub fn push(&mut self, buf: RangeBuf) -> Result<()> {
         if let Some(fin_off) = self.fin_off {
             // Can't write past final offset.
-            if buf.max_off() > fin_off {
+            if max_off > fin_off {
                 return Err(Error::FinalSize);
             }
 
             // Can't "undo" final offset.
-            if buf.max_off() == fin_off && !buf.fin() {
+            if max_off == fin_off && !fin {
                 return Err(Error::FinalSize);
             }
         }
 
-        if self.shutdown {
-            return Ok(());
-        }
-
-        if buf.fin() {
-            self.fin_off = Some(buf.max_off());
+        if fin {
+            self.fin_off = Some(max_off);
         }
 
         // Don't queue data that was already fully acked.
-        if self.ack_off() >= buf.max_off() {
-            return Ok(());
+        if self.ack_off() >= max_off {
+            return Ok(data.len());
         }
 
-        self.len += buf.len() as u64;
-
         // We already recorded the final offset, so we can just discard the
         // empty buffer now.
-        if buf.is_empty() {
-            return Ok(());
+        if data.is_empty() {
+            return Ok(data.len());
         }
 
-        self.data.push(buf);
+        let mut len = 0;
 
-        Ok(())
+        // Split the remaining input data into consistently-sized buffers to
+        // avoid fragmentation.
+        for chunk in data.chunks(SEND_BUFFER_SIZE) {
+            len += chunk.len();
+
+            let fin = len == data.len() && fin;
+
+            let buf = RangeBuf::from(chunk, self.off, fin);
+
+            // The new data can simply be appended at the end of the send buffer.
+            self.data.push_back(buf);
+
+            self.off += chunk.len() as u64;
+            self.len += chunk.len() as u64;
+        }
+
+        Ok(len)
     }
 
-    /// Returns contiguous data from the send buffer as a single `RangeBuf`.
-    pub fn pop(&mut self, max_data: usize) -> Result<RangeBuf> {
-        let mut out = RangeBuf::default();
-        out.data =
-            Vec::with_capacity(cmp::min(max_data as u64, self.len) as usize);
-        out.off = self.off;
+    /// Writes data from the send buffer into the given output buffer.
+    pub fn emit(&mut self, out: &mut [u8]) -> Result<(usize, bool)> {
+        let mut out_len = out.len();
+        let out_off = self.off_front();
 
-        let mut out_len = max_data;
-        let mut out_off = self.data.peek().map_or_else(|| out.off, RangeBuf::off);
+        let mut next_off = out_off;
 
         while out_len > 0 &&
             self.ready() &&
-            self.off_front() == out_off &&
+            self.off_front() == next_off &&
             self.off_front() < self.max_data
         {
-            let mut buf = match self.data.peek_mut() {
+            let buf = match self.data.get_mut(self.pos) {
                 Some(v) => v,
 
                 None => break,
             };
 
+            if buf.is_empty() {
+                self.pos += 1;
+                continue;
+            }
+
             let buf_len = cmp::min(buf.len(), out_len);
 
-            if out.is_empty() {
-                out.off = buf.off();
-            }
+            // Copy data to the output buffer.
+            let out_pos = (next_off - out_off) as usize;
+            (&mut out[out_pos..out_pos + buf_len])
+                .copy_from_slice(&buf[..buf_len]);
 
             self.len -= buf_len as u64;
 
             out_len -= buf_len;
-            out_off = buf.off() + buf_len as u64;
 
-            out.data.extend_from_slice(&buf[..buf_len]);
+            next_off = buf.off() + buf_len as u64;
 
-            if buf_len < buf.len() {
+            if !buf.is_empty() && buf_len < buf.len() {
                 buf.consume(buf_len);
 
                 // We reached the maximum capacity, so end here.
                 break;
             }
 
-            std::collections::binary_heap::PeekMut::pop(buf);
+            buf.consume(buf_len);
+
+            self.pos += 1;
         }
 
         // Override the `fin` flag set for the output buffer by matching the
@@ -1040,9 +1127,9 @@
         // This is more efficient than tracking `fin` using the range buffers
         // themselves, and lets us avoid queueing empty buffers just so we can
         // propagate the final size.
-        out.fin = self.fin_off == Some(out.max_off());
+        let fin = self.fin_off == Some(next_off);
 
-        Ok(out)
+        Ok((out.len() - out_len, fin))
     }
 
     /// Updates the max_data limit to the given value.
@@ -1055,17 +1142,138 @@
         self.acked.insert(off..off + len as u64);
     }
 
+    pub fn ack_and_drop(&mut self, off: u64, len: usize) {
+        self.ack(off, len);
+
+        let ack_off = self.ack_off();
+
+        if self.data.is_empty() {
+            return;
+        }
+
+        if off > ack_off {
+            return;
+        }
+
+        let mut drop_until = None;
+
+        // Drop contiguously acked data from the front of the buffer.
+        for (i, buf) in self.data.iter_mut().enumerate() {
+            // Newly acked range is past highest contiguous acked range, so we
+            // can't drop it.
+            if buf.off >= ack_off {
+                break;
+            }
+
+            // Highest contiguous acked range falls within newly acked range,
+            // so we can't drop it.
+            if buf.off < ack_off && ack_off < buf.max_off() {
+                break;
+            }
+
+            // Newly acked range can be dropped.
+            drop_until = Some(i);
+        }
+
+        if let Some(drop) = drop_until {
+            self.data.drain(..=drop);
+
+            // When a buffer is marked for retransmission, but then acked before
+            // it could be retransmitted, we might end up decreasing the SendBuf
+            // position too much, so make sure that doesn't happen.
+            self.pos = self.pos.saturating_sub(drop + 1);
+        }
+    }
+
+    pub fn retransmit(&mut self, off: u64, len: usize) {
+        let max_off = off + len as u64;
+        let ack_off = self.ack_off();
+
+        if self.data.is_empty() {
+            return;
+        }
+
+        if max_off <= ack_off {
+            return;
+        }
+
+        for i in 0..self.data.len() {
+            let buf = &mut self.data[i];
+
+            if buf.off >= max_off {
+                break;
+            }
+
+            if off > buf.max_off() {
+                continue;
+            }
+
+            // Split the buffer into 2 if the retransmit range ends before the
+            // buffer's final offset.
+            let new_buf = if buf.off < max_off && max_off < buf.max_off() {
+                Some(buf.split_off((max_off - buf.off as u64) as usize))
+            } else {
+                None
+            };
+
+            // Advance the buffer's position if the retransmit range is past
+            // the buffer's starting offset.
+            buf.pos = if off > buf.off && off <= buf.max_off() {
+                cmp::min(buf.pos, buf.start + (off - buf.off) as usize)
+            } else {
+                buf.start
+            };
+
+            self.pos = cmp::min(self.pos, i);
+
+            self.len += buf.len() as u64;
+
+            if let Some(b) = new_buf {
+                self.data.insert(i + 1, b);
+            }
+        }
+    }
+
+    /// Resets the stream at the current offset and clears all buffered data.
+    pub fn reset(&mut self) -> Result<u64> {
+        self.write(b"", true)?;
+
+        // Drop all buffered data.
+        self.data.clear();
+
+        // Mark all data as acked.
+        self.ack(0, self.off as usize);
+
+        self.pos = 0;
+        self.len = 0;
+
+        Ok(self.fin_off.unwrap())
+    }
+
+    /// Resets the streams and records the received error code.
+    ///
+    /// Calling this again after the first time has no effect.
+    pub fn stop(&mut self, error_code: u64) -> Result<u64> {
+        if self.error.is_some() {
+            return Err(Error::Done);
+        }
+
+        let fin_off = self.reset()?;
+
+        self.error = Some(error_code);
+
+        Ok(fin_off)
+    }
+
     /// Shuts down sending data.
-    pub fn shutdown(&mut self) -> Result<()> {
+    pub fn shutdown(&mut self) -> Result<u64> {
         if self.shutdown {
             return Err(Error::Done);
         }
 
         self.shutdown = true;
 
-        self.data.clear();
-
-        Ok(())
+        self.reset()
     }
 
     /// Returns the largest offset of data buffered.
@@ -1076,11 +1284,18 @@
 
     /// Returns the lowest offset of data buffered.
     pub fn off_front(&self) -> u64 {
-        match self.data.peek() {
-            Some(v) => v.off(),
+        let mut pos = self.pos;
 
-            None => self.off,
+        // Skip empty buffers from the start of the queue.
+        while let Some(b) = self.data.get(pos) {
+            if !b.is_empty() {
+                return b.off();
+            }
+
+            pos += 1;
         }
+
+        self.off
     }
 
     /// The maximum offset we are allowed to send to the peer.
@@ -1114,9 +1329,14 @@
         false
     }
 
+    /// Returns true if the stream was stopped before completion.
+    pub fn is_stopped(&self) -> bool {
+        self.error.is_some()
+    }
+
     /// Returns true if there is data to be written.
     fn ready(&self) -> bool {
-        !self.data.is_empty()
+        !self.data.is_empty() && self.off_front() < self.off
     }
 
     /// Returns the highest contiguously acked offset.
@@ -1131,22 +1351,49 @@
     }
 
     /// Returns the outgoing flow control capacity.
-    pub fn cap(&self) -> usize {
-        (self.max_data - self.off) as usize
+    pub fn cap(&self) -> Result<usize> {
+        // The stream was stopped, so return the error code instead.
+        if let Some(e) = self.error {
+            return Err(Error::StreamStopped(e));
+        }
+
+        Ok((self.max_data - self.off) as usize)
     }
 }
 
 /// Buffer holding data at a specific offset.
+///
+/// The data is stored in a `Vec<u8>` in such a way that it can be shared
+/// between multiple `RangeBuf` objects.
+///
+/// Each `RangeBuf` will have its own view of that buffer, where the `start`
+/// value indicates the initial offset within the `Vec`, and `len` indicates the
+/// number of bytes, starting from `start` that are included.
+///
+/// In addition, `pos` indicates the current offset within the `Vec`, starting
+/// from the very beginning of the `Vec`.
+///
+/// Finally, `off` is the starting offset for the specific `RangeBuf` within the
+/// stream the buffer belongs to.
 #[derive(Clone, Debug, Default, Eq)]
 pub struct RangeBuf {
     /// The internal buffer holding the data.
-    data: Vec<u8>,
+    ///
+    /// To avoid neeless allocations when a RangeBuf is split, this field is
+    /// reference-counted and can be shared between multiple RangeBuf objects,
+    /// and sliced using the `start` and `len` values.
+    data: Arc<Vec<u8>>,
 
-    /// The starting offset within `data`. This allows partially consuming a
-    /// buffer without duplicating the data.
+    /// The initial offset within the internal buffer.
+    start: usize,
+
+    /// The current offset within the internal buffer.
     pos: usize,
 
-    /// The starting offset within a stream.
+    /// The number of bytes in the buffer, from the initial offset.
+    len: usize,
+
+    /// The offset of the buffer within a stream.
     off: u64,
 
     /// Whether this contains the final byte in the stream.
@@ -1155,10 +1402,12 @@
 
 impl RangeBuf {
     /// Creates a new `RangeBuf` from the given slice.
-    pub(crate) fn from(buf: &[u8], off: u64, fin: bool) -> RangeBuf {
+    pub fn from(buf: &[u8], off: u64, fin: bool) -> RangeBuf {
         RangeBuf {
-            data: Vec::from(buf),
+            data: Arc::new(Vec::from(buf)),
+            start: 0,
             pos: 0,
+            len: buf.len(),
             off,
             fin,
         }
@@ -1171,7 +1420,7 @@
 
     /// Returns the starting offset of `self`.
     pub fn off(&self) -> u64 {
-        self.off + self.pos as u64
+        (self.off - self.start as u64) + self.pos as u64
     }
 
     /// Returns the final offset of `self`.
@@ -1181,7 +1430,7 @@
 
     /// Returns the length of `self`.
     pub fn len(&self) -> usize {
-        self.data.len() - self.pos
+        self.len - (self.pos - self.start)
     }
 
     /// Returns true if `self` has a length of zero bytes.
@@ -1196,13 +1445,24 @@
 
     /// Splits the buffer into two at the given index.
     pub fn split_off(&mut self, at: usize) -> RangeBuf {
+        if at > self.len {
+            panic!(
+                "`at` split index (is {}) should be <= len (is {})",
+                at, self.len
+            );
+        }
+
         let buf = RangeBuf {
-            data: self.data.split_off(at),
-            pos: 0,
+            data: self.data.clone(),
+            start: self.start + at,
+            pos: cmp::max(self.pos, self.start + at),
+            len: self.len - at,
             off: self.off + at as u64,
             fin: self.fin,
         };
 
+        self.pos = cmp::min(self.pos, self.start + at);
+        self.len = at;
         self.fin = false;
 
         buf
@@ -1213,13 +1473,7 @@
     type Target = [u8];
 
     fn deref(&self) -> &[u8] {
-        &self.data[self.pos..]
-    }
-}
-
-impl std::ops::DerefMut for RangeBuf {
-    fn deref_mut(&mut self) -> &mut [u8] {
-        &mut self.data[self.pos..]
+        &self.data[self.pos..self.start + self.len]
     }
 }
 
@@ -1253,7 +1507,7 @@
 
         let mut buf = [0; 32];
 
-        assert_eq!(recv.pop(&mut buf), Err(Error::Done));
+        assert_eq!(recv.emit(&mut buf), Err(Error::Done));
     }
 
     #[test]
@@ -1262,54 +1516,54 @@
         assert_eq!(recv.len, 0);
 
         let buf = RangeBuf::from(b"hello", 0, false);
-        assert!(recv.push(buf).is_ok());
+        assert!(recv.write(buf).is_ok());
         assert_eq!(recv.len, 5);
         assert_eq!(recv.off, 0);
         assert_eq!(recv.data.len(), 1);
 
         let mut buf = [0; 32];
-        assert_eq!(recv.pop(&mut buf), Ok((5, false)));
+        assert_eq!(recv.emit(&mut buf), Ok((5, false)));
 
         // Don't store non-fin empty buffer.
         let buf = RangeBuf::from(b"", 10, false);
-        assert!(recv.push(buf).is_ok());
+        assert!(recv.write(buf).is_ok());
         assert_eq!(recv.len, 5);
         assert_eq!(recv.off, 5);
         assert_eq!(recv.data.len(), 0);
 
         // Check flow control for empty buffer.
         let buf = RangeBuf::from(b"", 16, false);
-        assert_eq!(recv.push(buf), Err(Error::FlowControl));
+        assert_eq!(recv.write(buf), Err(Error::FlowControl));
 
         // Store fin empty buffer.
         let buf = RangeBuf::from(b"", 5, true);
-        assert!(recv.push(buf).is_ok());
+        assert!(recv.write(buf).is_ok());
         assert_eq!(recv.len, 5);
         assert_eq!(recv.off, 5);
         assert_eq!(recv.data.len(), 1);
 
         // Don't store additional fin empty buffers.
         let buf = RangeBuf::from(b"", 5, true);
-        assert!(recv.push(buf).is_ok());
+        assert!(recv.write(buf).is_ok());
         assert_eq!(recv.len, 5);
         assert_eq!(recv.off, 5);
         assert_eq!(recv.data.len(), 1);
 
         // Don't store additional fin non-empty buffers.
         let buf = RangeBuf::from(b"aa", 3, true);
-        assert!(recv.push(buf).is_ok());
+        assert!(recv.write(buf).is_ok());
         assert_eq!(recv.len, 5);
         assert_eq!(recv.off, 5);
         assert_eq!(recv.data.len(), 1);
 
         // Validate final size with fin empty buffers.
         let buf = RangeBuf::from(b"", 6, true);
-        assert_eq!(recv.push(buf), Err(Error::FinalSize));
+        assert_eq!(recv.write(buf), Err(Error::FinalSize));
         let buf = RangeBuf::from(b"", 4, true);
-        assert_eq!(recv.push(buf), Err(Error::FinalSize));
+        assert_eq!(recv.write(buf), Err(Error::FinalSize));
 
         let mut buf = [0; 32];
-        assert_eq!(recv.pop(&mut buf), Ok((0, true)));
+        assert_eq!(recv.emit(&mut buf), Ok((0, true)));
     }
 
     #[test]
@@ -1323,30 +1577,30 @@
         let second = RangeBuf::from(b"world", 5, false);
         let third = RangeBuf::from(b"something", 10, true);
 
-        assert!(recv.push(second).is_ok());
+        assert!(recv.write(second).is_ok());
         assert_eq!(recv.len, 10);
         assert_eq!(recv.off, 0);
 
-        assert_eq!(recv.pop(&mut buf), Err(Error::Done));
+        assert_eq!(recv.emit(&mut buf), Err(Error::Done));
 
-        assert!(recv.push(third).is_ok());
+        assert!(recv.write(third).is_ok());
         assert_eq!(recv.len, 19);
         assert_eq!(recv.off, 0);
 
-        assert_eq!(recv.pop(&mut buf), Err(Error::Done));
+        assert_eq!(recv.emit(&mut buf), Err(Error::Done));
 
-        assert!(recv.push(first).is_ok());
+        assert!(recv.write(first).is_ok());
         assert_eq!(recv.len, 19);
         assert_eq!(recv.off, 0);
 
-        let (len, fin) = recv.pop(&mut buf).unwrap();
+        let (len, fin) = recv.emit(&mut buf).unwrap();
         assert_eq!(len, 19);
         assert_eq!(fin, true);
         assert_eq!(&buf[..len], b"helloworldsomething");
         assert_eq!(recv.len, 19);
         assert_eq!(recv.off, 19);
 
-        assert_eq!(recv.pop(&mut buf), Err(Error::Done));
+        assert_eq!(recv.emit(&mut buf), Err(Error::Done));
     }
 
     #[test]
@@ -1359,29 +1613,29 @@
         let first = RangeBuf::from(b"something", 0, false);
         let second = RangeBuf::from(b"helloworld", 9, true);
 
-        assert!(recv.push(first).is_ok());
+        assert!(recv.write(first).is_ok());
         assert_eq!(recv.len, 9);
         assert_eq!(recv.off, 0);
 
-        assert!(recv.push(second).is_ok());
+        assert!(recv.write(second).is_ok());
         assert_eq!(recv.len, 19);
         assert_eq!(recv.off, 0);
 
-        let (len, fin) = recv.pop(&mut buf[..10]).unwrap();
+        let (len, fin) = recv.emit(&mut buf[..10]).unwrap();
         assert_eq!(len, 10);
         assert_eq!(fin, false);
         assert_eq!(&buf[..len], b"somethingh");
         assert_eq!(recv.len, 19);
         assert_eq!(recv.off, 10);
 
-        let (len, fin) = recv.pop(&mut buf[..5]).unwrap();
+        let (len, fin) = recv.emit(&mut buf[..5]).unwrap();
         assert_eq!(len, 5);
         assert_eq!(fin, false);
         assert_eq!(&buf[..len], b"ellow");
         assert_eq!(recv.len, 19);
         assert_eq!(recv.off, 15);
 
-        let (len, fin) = recv.pop(&mut buf[..10]).unwrap();
+        let (len, fin) = recv.emit(&mut buf[..10]).unwrap();
         assert_eq!(len, 4);
         assert_eq!(fin, true);
         assert_eq!(&buf[..len], b"orld");
@@ -1399,17 +1653,17 @@
         let first = RangeBuf::from(b"something", 0, false);
         let second = RangeBuf::from(b"helloworld", 9, true);
 
-        assert!(recv.push(second).is_ok());
+        assert!(recv.write(second).is_ok());
         assert_eq!(recv.len, 19);
         assert_eq!(recv.off, 0);
 
-        assert_eq!(recv.pop(&mut buf), Err(Error::Done));
+        assert_eq!(recv.emit(&mut buf), Err(Error::Done));
 
-        assert!(recv.push(first).is_ok());
+        assert!(recv.write(first).is_ok());
         assert_eq!(recv.len, 19);
         assert_eq!(recv.off, 0);
 
-        let (len, fin) = recv.pop(&mut buf).unwrap();
+        let (len, fin) = recv.emit(&mut buf).unwrap();
         assert_eq!(len, 19);
         assert_eq!(fin, true);
         assert_eq!(&buf[..len], b"somethinghelloworld");
@@ -1427,17 +1681,17 @@
         let first = RangeBuf::from(b"something", 0, false);
         let second = RangeBuf::from(b"", 9, true);
 
-        assert!(recv.push(first).is_ok());
+        assert!(recv.write(first).is_ok());
         assert_eq!(recv.len, 9);
         assert_eq!(recv.off, 0);
         assert_eq!(recv.data.len(), 1);
 
-        assert!(recv.push(second).is_ok());
+        assert!(recv.write(second).is_ok());
         assert_eq!(recv.len, 9);
         assert_eq!(recv.off, 0);
         assert_eq!(recv.data.len(), 1);
 
-        let (len, fin) = recv.pop(&mut buf).unwrap();
+        let (len, fin) = recv.emit(&mut buf).unwrap();
         assert_eq!(len, 9);
         assert_eq!(fin, true);
         assert_eq!(&buf[..len], b"something");
@@ -1457,31 +1711,31 @@
         let third = RangeBuf::from(b"ello", 4, true);
         let fourth = RangeBuf::from(b"ello", 5, true);
 
-        assert!(recv.push(first).is_ok());
+        assert!(recv.write(first).is_ok());
         assert_eq!(recv.len, 9);
         assert_eq!(recv.off, 0);
         assert_eq!(recv.data.len(), 1);
 
-        let (len, fin) = recv.pop(&mut buf).unwrap();
+        let (len, fin) = recv.emit(&mut buf).unwrap();
         assert_eq!(len, 9);
         assert_eq!(fin, false);
         assert_eq!(&buf[..len], b"something");
         assert_eq!(recv.len, 9);
         assert_eq!(recv.off, 9);
 
-        assert!(recv.push(second).is_ok());
+        assert!(recv.write(second).is_ok());
         assert_eq!(recv.len, 9);
         assert_eq!(recv.off, 9);
         assert_eq!(recv.data.len(), 0);
 
-        assert_eq!(recv.push(third), Err(Error::FinalSize));
+        assert_eq!(recv.write(third), Err(Error::FinalSize));
 
-        assert!(recv.push(fourth).is_ok());
+        assert!(recv.write(fourth).is_ok());
         assert_eq!(recv.len, 9);
         assert_eq!(recv.off, 9);
         assert_eq!(recv.data.len(), 0);
 
-        assert_eq!(recv.pop(&mut buf), Err(Error::Done));
+        assert_eq!(recv.emit(&mut buf), Err(Error::Done));
     }
 
     #[test]
@@ -1494,17 +1748,17 @@
         let first = RangeBuf::from(b"something", 0, false);
         let second = RangeBuf::from(b"hello", 4, false);
 
-        assert!(recv.push(first).is_ok());
+        assert!(recv.write(first).is_ok());
         assert_eq!(recv.len, 9);
         assert_eq!(recv.off, 0);
         assert_eq!(recv.data.len(), 1);
 
-        assert!(recv.push(second).is_ok());
+        assert!(recv.write(second).is_ok());
         assert_eq!(recv.len, 9);
         assert_eq!(recv.off, 0);
         assert_eq!(recv.data.len(), 1);
 
-        let (len, fin) = recv.pop(&mut buf).unwrap();
+        let (len, fin) = recv.emit(&mut buf).unwrap();
         assert_eq!(len, 9);
         assert_eq!(fin, false);
         assert_eq!(&buf[..len], b"something");
@@ -1512,7 +1766,7 @@
         assert_eq!(recv.off, 9);
         assert_eq!(recv.data.len(), 0);
 
-        assert_eq!(recv.pop(&mut buf), Err(Error::Done));
+        assert_eq!(recv.emit(&mut buf), Err(Error::Done));
     }
 
     #[test]
@@ -1525,17 +1779,17 @@
         let first = RangeBuf::from(b"something", 0, false);
         let second = RangeBuf::from(b"hello", 4, false);
 
-        assert!(recv.push(second).is_ok());
+        assert!(recv.write(second).is_ok());
         assert_eq!(recv.len, 9);
         assert_eq!(recv.off, 0);
         assert_eq!(recv.data.len(), 1);
 
-        assert!(recv.push(first).is_ok());
+        assert!(recv.write(first).is_ok());
         assert_eq!(recv.len, 9);
         assert_eq!(recv.off, 0);
         assert_eq!(recv.data.len(), 2);
 
-        let (len, fin) = recv.pop(&mut buf).unwrap();
+        let (len, fin) = recv.emit(&mut buf).unwrap();
         assert_eq!(len, 9);
         assert_eq!(fin, false);
         assert_eq!(&buf[..len], b"somehello");
@@ -1543,7 +1797,7 @@
         assert_eq!(recv.off, 9);
         assert_eq!(recv.data.len(), 0);
 
-        assert_eq!(recv.pop(&mut buf), Err(Error::Done));
+        assert_eq!(recv.emit(&mut buf), Err(Error::Done));
     }
 
     #[test]
@@ -1556,17 +1810,17 @@
         let first = RangeBuf::from(b"something", 0, false);
         let second = RangeBuf::from(b"hello", 3, false);
 
-        assert!(recv.push(second).is_ok());
+        assert!(recv.write(second).is_ok());
         assert_eq!(recv.len, 8);
         assert_eq!(recv.off, 0);
         assert_eq!(recv.data.len(), 1);
 
-        assert!(recv.push(first).is_ok());
+        assert!(recv.write(first).is_ok());
         assert_eq!(recv.len, 9);
         assert_eq!(recv.off, 0);
         assert_eq!(recv.data.len(), 3);
 
-        let (len, fin) = recv.pop(&mut buf).unwrap();
+        let (len, fin) = recv.emit(&mut buf).unwrap();
         assert_eq!(len, 9);
         assert_eq!(fin, false);
         assert_eq!(&buf[..len], b"somhellog");
@@ -1574,7 +1828,7 @@
         assert_eq!(recv.off, 9);
         assert_eq!(recv.data.len(), 0);
 
-        assert_eq!(recv.pop(&mut buf), Err(Error::Done));
+        assert_eq!(recv.emit(&mut buf), Err(Error::Done));
     }
 
     #[test]
@@ -1588,22 +1842,22 @@
         let second = RangeBuf::from(b"hello", 3, false);
         let third = RangeBuf::from(b"hello", 12, false);
 
-        assert!(recv.push(second).is_ok());
+        assert!(recv.write(second).is_ok());
         assert_eq!(recv.len, 8);
         assert_eq!(recv.off, 0);
         assert_eq!(recv.data.len(), 1);
 
-        assert!(recv.push(third).is_ok());
+        assert!(recv.write(third).is_ok());
         assert_eq!(recv.len, 17);
         assert_eq!(recv.off, 0);
         assert_eq!(recv.data.len(), 2);
 
-        assert!(recv.push(first).is_ok());
+        assert!(recv.write(first).is_ok());
         assert_eq!(recv.len, 18);
         assert_eq!(recv.off, 0);
         assert_eq!(recv.data.len(), 5);
 
-        let (len, fin) = recv.pop(&mut buf).unwrap();
+        let (len, fin) = recv.emit(&mut buf).unwrap();
         assert_eq!(len, 18);
         assert_eq!(fin, false);
         assert_eq!(&buf[..len], b"somhellogsomhellog");
@@ -1611,7 +1865,7 @@
         assert_eq!(recv.off, 18);
         assert_eq!(recv.data.len(), 0);
 
-        assert_eq!(recv.pop(&mut buf), Err(Error::Done));
+        assert_eq!(recv.emit(&mut buf), Err(Error::Done));
     }
 
     #[test]
@@ -1624,24 +1878,24 @@
         let first = RangeBuf::from(b"something", 0, false);
         let second = RangeBuf::from(b"hello", 8, true);
 
-        assert!(recv.push(first).is_ok());
+        assert!(recv.write(first).is_ok());
         assert_eq!(recv.len, 9);
         assert_eq!(recv.off, 0);
         assert_eq!(recv.data.len(), 1);
 
-        assert!(recv.push(second).is_ok());
+        assert!(recv.write(second).is_ok());
         assert_eq!(recv.len, 13);
         assert_eq!(recv.off, 0);
         assert_eq!(recv.data.len(), 2);
 
-        let (len, fin) = recv.pop(&mut buf).unwrap();
+        let (len, fin) = recv.emit(&mut buf).unwrap();
         assert_eq!(len, 13);
         assert_eq!(fin, true);
         assert_eq!(&buf[..len], b"somethingello");
         assert_eq!(recv.len, 13);
         assert_eq!(recv.off, 13);
 
-        assert_eq!(recv.pop(&mut buf), Err(Error::Done));
+        assert_eq!(recv.emit(&mut buf), Err(Error::Done));
     }
 
     #[test]
@@ -1654,24 +1908,24 @@
         let first = RangeBuf::from(b"hello", 0, false);
         let second = RangeBuf::from(b"something", 3, true);
 
-        assert!(recv.push(second).is_ok());
+        assert!(recv.write(second).is_ok());
         assert_eq!(recv.len, 12);
         assert_eq!(recv.off, 0);
         assert_eq!(recv.data.len(), 1);
 
-        assert!(recv.push(first).is_ok());
+        assert!(recv.write(first).is_ok());
         assert_eq!(recv.len, 12);
         assert_eq!(recv.off, 0);
         assert_eq!(recv.data.len(), 2);
 
-        let (len, fin) = recv.pop(&mut buf).unwrap();
+        let (len, fin) = recv.emit(&mut buf).unwrap();
         assert_eq!(len, 12);
         assert_eq!(fin, true);
         assert_eq!(&buf[..len], b"helsomething");
         assert_eq!(recv.len, 12);
         assert_eq!(recv.off, 12);
 
-        assert_eq!(recv.pop(&mut buf), Err(Error::Done));
+        assert_eq!(recv.emit(&mut buf), Err(Error::Done));
     }
 
     #[test]
@@ -1685,22 +1939,22 @@
         let second = RangeBuf::from(b"something", 0, false);
         let third = RangeBuf::from(b"moar", 11, true);
 
-        assert!(recv.push(first).is_ok());
+        assert!(recv.write(first).is_ok());
         assert_eq!(recv.len, 13);
         assert_eq!(recv.off, 0);
         assert_eq!(recv.data.len(), 1);
 
-        assert!(recv.push(second).is_ok());
+        assert!(recv.write(second).is_ok());
         assert_eq!(recv.len, 13);
         assert_eq!(recv.off, 0);
         assert_eq!(recv.data.len(), 2);
 
-        assert!(recv.push(third).is_ok());
+        assert!(recv.write(third).is_ok());
         assert_eq!(recv.len, 15);
         assert_eq!(recv.off, 0);
         assert_eq!(recv.data.len(), 3);
 
-        let (len, fin) = recv.pop(&mut buf).unwrap();
+        let (len, fin) = recv.emit(&mut buf).unwrap();
         assert_eq!(len, 15);
         assert_eq!(fin, true);
         assert_eq!(&buf[..len], b"somethinhelloar");
@@ -1708,7 +1962,7 @@
         assert_eq!(recv.off, 15);
         assert_eq!(recv.data.len(), 0);
 
-        assert_eq!(recv.pop(&mut buf), Err(Error::Done));
+        assert_eq!(recv.emit(&mut buf), Err(Error::Done));
     }
 
     #[test]
@@ -1725,37 +1979,37 @@
         let fifth = RangeBuf::from(b"eee", 9, false);
         let sixth = RangeBuf::from(b"fff", 11, false);
 
-        assert!(recv.push(second).is_ok());
+        assert!(recv.write(second).is_ok());
         assert_eq!(recv.len, 5);
         assert_eq!(recv.off, 0);
         assert_eq!(recv.data.len(), 1);
 
-        assert!(recv.push(fourth).is_ok());
+        assert!(recv.write(fourth).is_ok());
         assert_eq!(recv.len, 9);
         assert_eq!(recv.off, 0);
         assert_eq!(recv.data.len(), 2);
 
-        assert!(recv.push(third).is_ok());
+        assert!(recv.write(third).is_ok());
         assert_eq!(recv.len, 9);
         assert_eq!(recv.off, 0);
         assert_eq!(recv.data.len(), 3);
 
-        assert!(recv.push(first).is_ok());
+        assert!(recv.write(first).is_ok());
         assert_eq!(recv.len, 9);
         assert_eq!(recv.off, 0);
         assert_eq!(recv.data.len(), 4);
 
-        assert!(recv.push(sixth).is_ok());
+        assert!(recv.write(sixth).is_ok());
         assert_eq!(recv.len, 14);
         assert_eq!(recv.off, 0);
         assert_eq!(recv.data.len(), 5);
 
-        assert!(recv.push(fifth).is_ok());
+        assert!(recv.write(fifth).is_ok());
         assert_eq!(recv.len, 14);
         assert_eq!(recv.off, 0);
         assert_eq!(recv.data.len(), 6);
 
-        let (len, fin) = recv.pop(&mut buf).unwrap();
+        let (len, fin) = recv.emit(&mut buf).unwrap();
         assert_eq!(len, 14);
         assert_eq!(fin, false);
         assert_eq!(&buf[..len], b"aabbbcdddeefff");
@@ -1763,78 +2017,91 @@
         assert_eq!(recv.off, 14);
         assert_eq!(recv.data.len(), 0);
 
-        assert_eq!(recv.pop(&mut buf), Err(Error::Done));
+        assert_eq!(recv.emit(&mut buf), Err(Error::Done));
     }
 
     #[test]
     fn empty_write() {
+        let mut buf = [0; 5];
+
         let mut send = SendBuf::new(std::u64::MAX);
         assert_eq!(send.len, 0);
 
-        let write = send.pop(std::usize::MAX).unwrap();
-        assert_eq!(write.len(), 0);
-        assert_eq!(write.fin(), false);
+        let (written, fin) = send.emit(&mut buf).unwrap();
+        assert_eq!(written, 0);
+        assert_eq!(fin, false);
     }
 
     #[test]
     fn multi_write() {
+        let mut buf = [0; 128];
+
         let mut send = SendBuf::new(std::u64::MAX);
         assert_eq!(send.len, 0);
 
         let first = b"something";
         let second = b"helloworld";
 
-        assert!(send.push_slice(first, false).is_ok());
+        assert!(send.write(first, false).is_ok());
         assert_eq!(send.len, 9);
 
-        assert!(send.push_slice(second, true).is_ok());
+        assert!(send.write(second, true).is_ok());
         assert_eq!(send.len, 19);
 
-        let write = send.pop(128).unwrap();
-        assert_eq!(write.len(), 19);
-        assert_eq!(write.fin(), true);
-        assert_eq!(&write[..], b"somethinghelloworld");
+        let (written, fin) = send.emit(&mut buf[..128]).unwrap();
+        assert_eq!(written, 19);
+        assert_eq!(fin, true);
+        assert_eq!(&buf[..written], b"somethinghelloworld");
         assert_eq!(send.len, 0);
     }
 
     #[test]
     fn split_write() {
+        let mut buf = [0; 10];
+
         let mut send = SendBuf::new(std::u64::MAX);
         assert_eq!(send.len, 0);
 
         let first = b"something";
         let second = b"helloworld";
 
-        assert!(send.push_slice(first, false).is_ok());
+        assert!(send.write(first, false).is_ok());
         assert_eq!(send.len, 9);
 
-        assert!(send.push_slice(second, true).is_ok());
+        assert!(send.write(second, true).is_ok());
         assert_eq!(send.len, 19);
 
-        let write = send.pop(10).unwrap();
-        assert_eq!(write.off(), 0);
-        assert_eq!(write.len(), 10);
-        assert_eq!(write.fin(), false);
-        assert_eq!(&write[..], b"somethingh");
+        assert_eq!(send.off_front(), 0);
+
+        let (written, fin) = send.emit(&mut buf[..10]).unwrap();
+        assert_eq!(written, 10);
+        assert_eq!(fin, false);
+        assert_eq!(&buf[..written], b"somethingh");
         assert_eq!(send.len, 9);
 
-        let write = send.pop(5).unwrap();
-        assert_eq!(write.off(), 10);
-        assert_eq!(write.len(), 5);
-        assert_eq!(write.fin(), false);
-        assert_eq!(&write[..], b"ellow");
+        assert_eq!(send.off_front(), 10);
+
+        let (written, fin) = send.emit(&mut buf[..5]).unwrap();
+        assert_eq!(written, 5);
+        assert_eq!(fin, false);
+        assert_eq!(&buf[..written], b"ellow");
         assert_eq!(send.len, 4);
 
-        let write = send.pop(10).unwrap();
-        assert_eq!(write.off(), 15);
-        assert_eq!(write.len(), 4);
-        assert_eq!(write.fin(), true);
-        assert_eq!(&write[..], b"orld");
+        assert_eq!(send.off_front(), 15);
+
+        let (written, fin) = send.emit(&mut buf[..10]).unwrap();
+        assert_eq!(written, 4);
+        assert_eq!(fin, true);
+        assert_eq!(&buf[..written], b"orld");
         assert_eq!(send.len, 0);
+
+        assert_eq!(send.off_front(), 19);
     }
 
     #[test]
     fn resend() {
+        let mut buf = [0; 15];
+
         let mut send = SendBuf::new(std::u64::MAX);
         assert_eq!(send.len, 0);
         assert_eq!(send.off_front(), 0);
@@ -1842,143 +2109,149 @@
         let first = b"something";
         let second = b"helloworld";
 
-        assert!(send.push_slice(first, false).is_ok());
+        assert!(send.write(first, false).is_ok());
         assert_eq!(send.off_front(), 0);
 
-        assert!(send.push_slice(second, true).is_ok());
+        assert!(send.write(second, true).is_ok());
         assert_eq!(send.off_front(), 0);
 
-        let write1 = send.pop(4).unwrap();
-        assert_eq!(write1.off(), 0);
-        assert_eq!(write1.len(), 4);
-        assert_eq!(write1.fin(), false);
-        assert_eq!(&write1[..], b"some");
+        assert_eq!(send.len, 19);
+
+        let (written, fin) = send.emit(&mut buf[..4]).unwrap();
+        assert_eq!(written, 4);
+        assert_eq!(fin, false);
+        assert_eq!(&buf[..written], b"some");
         assert_eq!(send.len, 15);
         assert_eq!(send.off_front(), 4);
 
-        let write2 = send.pop(5).unwrap();
-        assert_eq!(write2.off(), 4);
-        assert_eq!(write2.len(), 5);
-        assert_eq!(write2.fin(), false);
-        assert_eq!(&write2[..], b"thing");
+        let (written, fin) = send.emit(&mut buf[..5]).unwrap();
+        assert_eq!(written, 5);
+        assert_eq!(fin, false);
+        assert_eq!(&buf[..written], b"thing");
         assert_eq!(send.len, 10);
         assert_eq!(send.off_front(), 9);
 
-        let write3 = send.pop(5).unwrap();
-        assert_eq!(write3.off(), 9);
-        assert_eq!(write3.len(), 5);
-        assert_eq!(write3.fin(), false);
-        assert_eq!(&write3[..], b"hello");
+        let (written, fin) = send.emit(&mut buf[..5]).unwrap();
+        assert_eq!(written, 5);
+        assert_eq!(fin, false);
+        assert_eq!(&buf[..written], b"hello");
         assert_eq!(send.len, 5);
         assert_eq!(send.off_front(), 14);
 
-        send.push(write2).unwrap();
+        send.retransmit(4, 5);
         assert_eq!(send.len, 10);
         assert_eq!(send.off_front(), 4);
 
-        send.push(write1).unwrap();
+        send.retransmit(0, 4);
         assert_eq!(send.len, 14);
         assert_eq!(send.off_front(), 0);
 
-        let write4 = send.pop(11).unwrap();
-        assert_eq!(write4.off(), 0);
-        assert_eq!(write4.len(), 9);
-        assert_eq!(write4.fin(), false);
-        assert_eq!(&write4[..], b"something");
+        let (written, fin) = send.emit(&mut buf[..11]).unwrap();
+        assert_eq!(written, 9);
+        assert_eq!(fin, false);
+        assert_eq!(&buf[..written], b"something");
         assert_eq!(send.len, 5);
         assert_eq!(send.off_front(), 14);
 
-        let write5 = send.pop(11).unwrap();
-        assert_eq!(write5.off(), 14);
-        assert_eq!(write5.len(), 5);
-        assert_eq!(write5.fin(), true);
-        assert_eq!(&write5[..], b"world");
+        let (written, fin) = send.emit(&mut buf[..11]).unwrap();
+        assert_eq!(written, 5);
+        assert_eq!(fin, true);
+        assert_eq!(&buf[..written], b"world");
         assert_eq!(send.len, 0);
         assert_eq!(send.off_front(), 19);
     }
 
     #[test]
     fn write_blocked_by_off() {
+        let mut buf = [0; 10];
+
         let mut send = SendBuf::default();
         assert_eq!(send.len, 0);
 
         let first = b"something";
         let second = b"helloworld";
 
-        assert_eq!(send.push_slice(first, false), Ok(0));
+        assert_eq!(send.write(first, false), Ok(0));
         assert_eq!(send.len, 0);
 
-        assert_eq!(send.push_slice(second, true), Ok(0));
+        assert_eq!(send.write(second, true), Ok(0));
         assert_eq!(send.len, 0);
 
         send.update_max_data(5);
 
-        assert_eq!(send.push_slice(first, false), Ok(5));
+        assert_eq!(send.write(first, false), Ok(5));
         assert_eq!(send.len, 5);
 
-        assert_eq!(send.push_slice(second, true), Ok(0));
+        assert_eq!(send.write(second, true), Ok(0));
         assert_eq!(send.len, 5);
 
-        let write = send.pop(10).unwrap();
-        assert_eq!(write.off(), 0);
-        assert_eq!(write.len(), 5);
-        assert_eq!(write.fin(), false);
-        assert_eq!(&write[..], b"somet");
+        assert_eq!(send.off_front(), 0);
+
+        let (written, fin) = send.emit(&mut buf[..10]).unwrap();
+        assert_eq!(written, 5);
+        assert_eq!(fin, false);
+        assert_eq!(&buf[..written], b"somet");
         assert_eq!(send.len, 0);
 
-        let write = send.pop(10).unwrap();
-        assert_eq!(write.off(), 5);
-        assert_eq!(write.len(), 0);
-        assert_eq!(write.fin(), false);
-        assert_eq!(&write[..], b"");
+        assert_eq!(send.off_front(), 5);
+
+        let (written, fin) = send.emit(&mut buf[..10]).unwrap();
+        assert_eq!(written, 0);
+        assert_eq!(fin, false);
+        assert_eq!(&buf[..written], b"");
         assert_eq!(send.len, 0);
 
         send.update_max_data(15);
 
-        assert_eq!(send.push_slice(&first[5..], false), Ok(4));
+        assert_eq!(send.write(&first[5..], false), Ok(4));
         assert_eq!(send.len, 4);
 
-        assert_eq!(send.push_slice(second, true), Ok(6));
+        assert_eq!(send.write(second, true), Ok(6));
         assert_eq!(send.len, 10);
 
-        let write = send.pop(10).unwrap();
-        assert_eq!(write.off(), 5);
-        assert_eq!(write.len(), 10);
-        assert_eq!(write.fin(), false);
-        assert_eq!(&write[..], b"hinghellow");
+        assert_eq!(send.off_front(), 5);
+
+        let (written, fin) = send.emit(&mut buf[..10]).unwrap();
+        assert_eq!(written, 10);
+        assert_eq!(fin, false);
+        assert_eq!(&buf[..10], b"hinghellow");
         assert_eq!(send.len, 0);
 
         send.update_max_data(25);
 
-        assert_eq!(send.push_slice(&second[6..], true), Ok(4));
+        assert_eq!(send.write(&second[6..], true), Ok(4));
         assert_eq!(send.len, 4);
 
-        let write = send.pop(10).unwrap();
-        assert_eq!(write.off(), 15);
-        assert_eq!(write.len(), 4);
-        assert_eq!(write.fin(), true);
-        assert_eq!(&write[..], b"orld");
+        assert_eq!(send.off_front(), 15);
+
+        let (written, fin) = send.emit(&mut buf[..10]).unwrap();
+        assert_eq!(written, 4);
+        assert_eq!(fin, true);
+        assert_eq!(&buf[..written], b"orld");
         assert_eq!(send.len, 0);
     }
 
     #[test]
     fn zero_len_write() {
+        let mut buf = [0; 10];
+
         let mut send = SendBuf::new(std::u64::MAX);
         assert_eq!(send.len, 0);
 
         let first = b"something";
 
-        assert!(send.push_slice(first, false).is_ok());
+        assert!(send.write(first, false).is_ok());
         assert_eq!(send.len, 9);
 
-        assert!(send.push_slice(&[], true).is_ok());
+        assert!(send.write(&[], true).is_ok());
         assert_eq!(send.len, 9);
 
-        let write = send.pop(10).unwrap();
-        assert_eq!(write.off(), 0);
-        assert_eq!(write.len(), 9);
-        assert_eq!(write.fin(), true);
-        assert_eq!(&write[..], b"something");
+        assert_eq!(send.off_front(), 0);
+
+        let (written, fin) = send.emit(&mut buf[..10]).unwrap();
+        assert_eq!(written, 9);
+        assert_eq!(fin, true);
+        assert_eq!(&buf[..written], b"something");
         assert_eq!(send.len, 0);
     }
 
@@ -1993,13 +2266,13 @@
         let second = RangeBuf::from(b"world", 5, false);
         let third = RangeBuf::from(b"something", 10, false);
 
-        assert_eq!(stream.recv.push(second), Ok(()));
-        assert_eq!(stream.recv.push(first), Ok(()));
+        assert_eq!(stream.recv.write(second), Ok(()));
+        assert_eq!(stream.recv.write(first), Ok(()));
         assert!(!stream.recv.almost_full());
 
-        assert_eq!(stream.recv.push(third), Err(Error::FlowControl));
+        assert_eq!(stream.recv.write(third), Err(Error::FlowControl));
 
-        let (len, fin) = stream.recv.pop(&mut buf).unwrap();
+        let (len, fin) = stream.recv.emit(&mut buf).unwrap();
         assert_eq!(&buf[..len], b"helloworld");
         assert_eq!(fin, false);
 
@@ -2010,7 +2283,7 @@
         assert!(!stream.recv.almost_full());
 
         let third = RangeBuf::from(b"something", 10, false);
-        assert_eq!(stream.recv.push(third), Ok(()));
+        assert_eq!(stream.recv.write(third), Ok(()));
     }
 
     #[test]
@@ -2021,8 +2294,8 @@
         let first = RangeBuf::from(b"hello", 0, true);
         let second = RangeBuf::from(b"world", 5, false);
 
-        assert_eq!(stream.recv.push(first), Ok(()));
-        assert_eq!(stream.recv.push(second), Err(Error::FinalSize));
+        assert_eq!(stream.recv.write(first), Ok(()));
+        assert_eq!(stream.recv.write(second), Err(Error::FinalSize));
     }
 
     #[test]
@@ -2033,12 +2306,12 @@
         let first = RangeBuf::from(b"hello", 0, true);
         let second = RangeBuf::from(b"hello", 0, true);
 
-        assert_eq!(stream.recv.push(first), Ok(()));
-        assert_eq!(stream.recv.push(second), Ok(()));
+        assert_eq!(stream.recv.write(first), Ok(()));
+        assert_eq!(stream.recv.write(second), Ok(()));
 
         let mut buf = [0; 32];
 
-        let (len, fin) = stream.recv.pop(&mut buf).unwrap();
+        let (len, fin) = stream.recv.emit(&mut buf).unwrap();
         assert_eq!(&buf[..len], b"hello");
         assert_eq!(fin, true);
     }
@@ -2051,8 +2324,8 @@
         let first = RangeBuf::from(b"hello", 0, true);
         let second = RangeBuf::from(b"world", 5, true);
 
-        assert_eq!(stream.recv.push(second), Ok(()));
-        assert_eq!(stream.recv.push(first), Err(Error::FinalSize));
+        assert_eq!(stream.recv.write(second), Ok(()));
+        assert_eq!(stream.recv.write(first), Err(Error::FinalSize));
     }
 
     #[test]
@@ -2063,8 +2336,8 @@
         let first = RangeBuf::from(b"hello", 0, true);
         let second = RangeBuf::from(b"world", 5, false);
 
-        assert_eq!(stream.recv.push(second), Ok(()));
-        assert_eq!(stream.recv.push(first), Err(Error::FinalSize));
+        assert_eq!(stream.recv.write(second), Ok(()));
+        assert_eq!(stream.recv.write(first), Err(Error::FinalSize));
     }
 
     #[test]
@@ -2077,10 +2350,10 @@
         let first = RangeBuf::from(b"hello", 0, false);
         let second = RangeBuf::from(b"world", 5, true);
 
-        assert_eq!(stream.recv.push(first), Ok(()));
-        assert_eq!(stream.recv.push(second), Ok(()));
+        assert_eq!(stream.recv.write(first), Ok(()));
+        assert_eq!(stream.recv.write(second), Ok(()));
 
-        let (len, fin) = stream.recv.pop(&mut buf).unwrap();
+        let (len, fin) = stream.recv.emit(&mut buf).unwrap();
         assert_eq!(&buf[..len], b"helloworld");
         assert_eq!(fin, true);
 
@@ -2094,7 +2367,7 @@
 
         let first = RangeBuf::from(b"hello", 0, true);
 
-        assert_eq!(stream.recv.push(first), Ok(()));
+        assert_eq!(stream.recv.write(first), Ok(()));
         assert_eq!(stream.recv.reset(10), Err(Error::FinalSize));
     }
 
@@ -2105,7 +2378,7 @@
 
         let first = RangeBuf::from(b"hello", 0, false);
 
-        assert_eq!(stream.recv.push(first), Ok(()));
+        assert_eq!(stream.recv.write(first), Ok(()));
         assert_eq!(stream.recv.reset(5), Ok(0));
         assert_eq!(stream.recv.reset(5), Ok(0));
     }
@@ -2117,7 +2390,7 @@
 
         let first = RangeBuf::from(b"hello", 0, false);
 
-        assert_eq!(stream.recv.push(first), Ok(()));
+        assert_eq!(stream.recv.write(first), Ok(()));
         assert_eq!(stream.recv.reset(5), Ok(0));
         assert_eq!(stream.recv.reset(10), Err(Error::FinalSize));
     }
@@ -2129,48 +2402,53 @@
 
         let first = RangeBuf::from(b"hello", 0, false);
 
-        assert_eq!(stream.recv.push(first), Ok(()));
+        assert_eq!(stream.recv.write(first), Ok(()));
         assert_eq!(stream.recv.reset(4), Err(Error::FinalSize));
     }
 
     #[test]
     fn send_flow_control() {
+        let mut buf = [0; 25];
+
         let mut stream = Stream::new(0, 15, true, true);
 
         let first = b"hello";
         let second = b"world";
         let third = b"something";
 
-        assert!(stream.send.push_slice(first, false).is_ok());
-        assert!(stream.send.push_slice(second, false).is_ok());
-        assert!(stream.send.push_slice(third, false).is_ok());
+        assert!(stream.send.write(first, false).is_ok());
+        assert!(stream.send.write(second, false).is_ok());
+        assert!(stream.send.write(third, false).is_ok());
 
-        let write = stream.send.pop(25).unwrap();
-        assert_eq!(write.off(), 0);
-        assert_eq!(write.len(), 15);
-        assert_eq!(write.fin(), false);
-        assert_eq!(write.data, b"helloworldsomet");
+        assert_eq!(stream.send.off_front(), 0);
 
-        let write = stream.send.pop(25).unwrap();
-        assert_eq!(write.off(), 15);
-        assert_eq!(write.len(), 0);
-        assert_eq!(write.fin(), false);
-        assert_eq!(write.data, b"");
+        let (written, fin) = stream.send.emit(&mut buf[..25]).unwrap();
+        assert_eq!(written, 15);
+        assert_eq!(fin, false);
+        assert_eq!(&buf[..written], b"helloworldsomet");
 
-        let first = RangeBuf::from(b"helloworldsomet", 0, false);
-        assert_eq!(stream.send.push(first), Ok(()));
+        assert_eq!(stream.send.off_front(), 15);
 
-        let write = stream.send.pop(10).unwrap();
-        assert_eq!(write.off(), 0);
-        assert_eq!(write.len(), 10);
-        assert_eq!(write.fin(), false);
-        assert_eq!(write.data, b"helloworld");
+        let (written, fin) = stream.send.emit(&mut buf[..25]).unwrap();
+        assert_eq!(written, 0);
+        assert_eq!(fin, false);
+        assert_eq!(&buf[..written], b"");
 
-        let write = stream.send.pop(10).unwrap();
-        assert_eq!(write.off(), 10);
-        assert_eq!(write.len(), 5);
-        assert_eq!(write.fin(), false);
-        assert_eq!(write.data, b"somet");
+        stream.send.retransmit(0, 15);
+
+        assert_eq!(stream.send.off_front(), 0);
+
+        let (written, fin) = stream.send.emit(&mut buf[..10]).unwrap();
+        assert_eq!(written, 10);
+        assert_eq!(fin, false);
+        assert_eq!(&buf[..written], b"helloworld");
+
+        assert_eq!(stream.send.off_front(), 10);
+
+        let (written, fin) = stream.send.emit(&mut buf[..10]).unwrap();
+        assert_eq!(written, 5);
+        assert_eq!(fin, false);
+        assert_eq!(&buf[..written], b"somet");
     }
 
     #[test]
@@ -2181,126 +2459,137 @@
         let second = b"world";
         let third = b"third";
 
-        assert_eq!(stream.send.push_slice(first, false), Ok(5));
+        assert_eq!(stream.send.write(first, false), Ok(5));
 
-        assert_eq!(stream.send.push_slice(second, true), Ok(5));
+        assert_eq!(stream.send.write(second, true), Ok(5));
         assert!(stream.send.is_fin());
 
-        assert_eq!(stream.send.push_slice(third, false), Err(Error::FinalSize));
+        assert_eq!(stream.send.write(third, false), Err(Error::FinalSize));
     }
 
     #[test]
     fn send_fin_dup() {
         let mut stream = Stream::new(0, 15, true, true);
 
-        let first = RangeBuf::from(b"hello", 0, true);
-        let second = RangeBuf::from(b"hello", 0, true);
+        assert_eq!(stream.send.write(b"hello", true), Ok(5));
+        assert!(stream.send.is_fin());
 
-        assert_eq!(stream.send.push(first), Ok(()));
-        assert_eq!(stream.send.push(second), Ok(()));
+        assert_eq!(stream.send.write(b"", true), Ok(0));
+        assert!(stream.send.is_fin());
     }
 
     #[test]
     fn send_undo_fin() {
         let mut stream = Stream::new(0, 15, true, true);
 
-        let first = b"hello";
-        let second = RangeBuf::from(b"hello", 0, false);
-
-        assert_eq!(stream.send.push_slice(first, true), Ok(5));
+        assert_eq!(stream.send.write(b"hello", true), Ok(5));
         assert!(stream.send.is_fin());
 
-        assert_eq!(stream.send.push(second), Err(Error::FinalSize));
+        assert_eq!(
+            stream.send.write(b"helloworld", true),
+            Err(Error::FinalSize)
+        );
     }
 
     #[test]
     fn send_fin_max_data_match() {
+        let mut buf = [0; 15];
+
         let mut stream = Stream::new(0, 15, true, true);
 
         let slice = b"hellohellohello";
 
-        assert!(stream.send.push_slice(slice, true).is_ok());
+        assert!(stream.send.write(slice, true).is_ok());
 
-        let write = stream.send.pop(15).unwrap();
-        assert_eq!(write.off(), 0);
-        assert_eq!(write.len(), 15);
-        assert_eq!(write.fin(), true);
-        assert_eq!(write.data, slice);
+        let (written, fin) = stream.send.emit(&mut buf[..15]).unwrap();
+        assert_eq!(written, 15);
+        assert_eq!(fin, true);
+        assert_eq!(&buf[..written], slice);
     }
 
     #[test]
     fn send_fin_zero_length() {
+        let mut buf = [0; 5];
+
         let mut stream = Stream::new(0, 15, true, true);
 
-        assert_eq!(stream.send.push_slice(b"hello", false), Ok(5));
-        assert_eq!(stream.send.push_slice(b"", true), Ok(0));
+        assert_eq!(stream.send.write(b"hello", false), Ok(5));
+        assert_eq!(stream.send.write(b"", true), Ok(0));
         assert!(stream.send.is_fin());
 
-        let write = stream.send.pop(5).unwrap();
-        assert_eq!(write.off(), 0);
-        assert_eq!(write.len(), 5);
-        assert_eq!(write.fin(), true);
-        assert_eq!(write.data, b"hello");
+        let (written, fin) = stream.send.emit(&mut buf[..5]).unwrap();
+        assert_eq!(written, 5);
+        assert_eq!(fin, true);
+        assert_eq!(&buf[..written], b"hello");
     }
 
     #[test]
     fn send_ack() {
+        let mut buf = [0; 5];
+
         let mut stream = Stream::new(0, 15, true, true);
 
-        assert_eq!(stream.send.push_slice(b"hello", false), Ok(5));
-        assert_eq!(stream.send.push_slice(b"world", false), Ok(5));
-        assert_eq!(stream.send.push_slice(b"", true), Ok(0));
+        assert_eq!(stream.send.write(b"hello", false), Ok(5));
+        assert_eq!(stream.send.write(b"world", false), Ok(5));
+        assert_eq!(stream.send.write(b"", true), Ok(0));
         assert!(stream.send.is_fin());
 
-        let write = stream.send.pop(5).unwrap();
-        assert_eq!(write.off(), 0);
-        assert_eq!(write.len(), 5);
-        assert_eq!(write.fin(), false);
-        assert_eq!(write.data, b"hello");
+        assert_eq!(stream.send.off_front(), 0);
 
-        stream.send.ack(write.off(), write.len());
+        let (written, fin) = stream.send.emit(&mut buf[..5]).unwrap();
+        assert_eq!(written, 5);
+        assert_eq!(fin, false);
+        assert_eq!(&buf[..written], b"hello");
 
-        assert_eq!(stream.send.push(write), Ok(()));
+        stream.send.ack_and_drop(0, 5);
 
-        let write = stream.send.pop(5).unwrap();
-        assert_eq!(write.off(), 5);
-        assert_eq!(write.len(), 5);
-        assert_eq!(write.fin(), true);
-        assert_eq!(write.data, b"world");
+        stream.send.retransmit(0, 5);
+
+        assert_eq!(stream.send.off_front(), 5);
+
+        let (written, fin) = stream.send.emit(&mut buf[..5]).unwrap();
+        assert_eq!(written, 5);
+        assert_eq!(fin, true);
+        assert_eq!(&buf[..written], b"world");
     }
 
     #[test]
     fn send_ack_reordering() {
+        let mut buf = [0; 5];
+
         let mut stream = Stream::new(0, 15, true, true);
 
-        assert_eq!(stream.send.push_slice(b"hello", false), Ok(5));
-        assert_eq!(stream.send.push_slice(b"world", false), Ok(5));
-        assert_eq!(stream.send.push_slice(b"", true), Ok(0));
+        assert_eq!(stream.send.write(b"hello", false), Ok(5));
+        assert_eq!(stream.send.write(b"world", false), Ok(5));
+        assert_eq!(stream.send.write(b"", true), Ok(0));
         assert!(stream.send.is_fin());
 
-        let write1 = stream.send.pop(5).unwrap();
-        assert_eq!(write1.off(), 0);
-        assert_eq!(write1.len(), 5);
-        assert_eq!(write1.fin(), false);
-        assert_eq!(write1.data, b"hello");
+        assert_eq!(stream.send.off_front(), 0);
 
-        let write2 = stream.send.pop(1).unwrap();
-        assert_eq!(write2.off(), 5);
-        assert_eq!(write2.len(), 1);
-        assert_eq!(write2.fin(), false);
-        assert_eq!(write2.data, b"w");
+        let (written, fin) = stream.send.emit(&mut buf[..5]).unwrap();
+        assert_eq!(written, 5);
+        assert_eq!(fin, false);
+        assert_eq!(&buf[..written], b"hello");
 
-        stream.send.ack(write2.off(), write2.len());
-        stream.send.ack(write1.off(), write1.len());
+        assert_eq!(stream.send.off_front(), 5);
 
-        assert_eq!(stream.send.push(write1), Ok(()));
-        assert_eq!(stream.send.push(write2), Ok(()));
+        let (written, fin) = stream.send.emit(&mut buf[..1]).unwrap();
+        assert_eq!(written, 1);
+        assert_eq!(fin, false);
+        assert_eq!(&buf[..written], b"w");
 
-        let write = stream.send.pop(5).unwrap();
-        assert_eq!(write.off(), 6);
-        assert_eq!(write.len(), 4);
-        assert_eq!(write.fin(), true);
-        assert_eq!(write.data, b"orld");
+        stream.send.ack_and_drop(5, 1);
+        stream.send.ack_and_drop(0, 5);
+
+        stream.send.retransmit(0, 5);
+        stream.send.retransmit(5, 1);
+
+        assert_eq!(stream.send.off_front(), 6);
+
+        let (written, fin) = stream.send.emit(&mut buf[..5]).unwrap();
+        assert_eq!(written, 4);
+        assert_eq!(fin, true);
+        assert_eq!(&buf[..written], b"orld");
     }
 
     #[test]
@@ -2309,18 +2598,18 @@
 
         let first = RangeBuf::from(b"hello", 0, false);
 
-        assert_eq!(stream.recv.push(first), Ok(()));
+        assert_eq!(stream.recv.write(first), Ok(()));
 
         let mut buf = [0; 10];
 
-        let (len, fin) = stream.recv.pop(&mut buf).unwrap();
+        let (len, fin) = stream.recv.emit(&mut buf).unwrap();
         assert_eq!(&buf[..len], b"hello");
         assert_eq!(fin, false);
 
         let first = RangeBuf::from(b"elloworld", 1, true);
-        assert_eq!(stream.recv.push(first), Ok(()));
+        assert_eq!(stream.recv.write(first), Ok(()));
 
-        let (len, fin) = stream.recv.pop(&mut buf).unwrap();
+        let (len, fin) = stream.recv.emit(&mut buf).unwrap();
         assert_eq!(&buf[..len], b"world");
         assert_eq!(fin, true);
     }
@@ -2329,26 +2618,26 @@
     fn stream_complete() {
         let mut stream = Stream::new(30, 30, true, true);
 
-        assert_eq!(stream.send.push_slice(b"hello", false), Ok(5));
-        assert_eq!(stream.send.push_slice(b"world", false), Ok(5));
+        assert_eq!(stream.send.write(b"hello", false), Ok(5));
+        assert_eq!(stream.send.write(b"world", false), Ok(5));
 
         assert!(!stream.send.is_complete());
         assert!(!stream.send.is_fin());
 
-        assert_eq!(stream.send.push_slice(b"", true), Ok(0));
+        assert_eq!(stream.send.write(b"", true), Ok(0));
 
         assert!(!stream.send.is_complete());
         assert!(stream.send.is_fin());
 
         let buf = RangeBuf::from(b"hello", 0, true);
-        assert!(stream.recv.push(buf).is_ok());
+        assert!(stream.recv.write(buf).is_ok());
         assert!(!stream.recv.is_fin());
 
         stream.send.ack(6, 4);
         assert!(!stream.send.is_complete());
 
         let mut buf = [0; 2];
-        assert_eq!(stream.recv.pop(&mut buf), Ok((2, false)));
+        assert_eq!(stream.recv.emit(&mut buf), Ok((2, false)));
         assert!(!stream.recv.is_fin());
 
         stream.send.ack(1, 5);
@@ -2360,7 +2649,7 @@
         assert!(!stream.is_complete());
 
         let mut buf = [0; 3];
-        assert_eq!(stream.recv.pop(&mut buf), Ok((3, true)));
+        assert_eq!(stream.recv.emit(&mut buf), Ok((3, true)));
         assert!(stream.recv.is_fin());
 
         assert!(stream.is_complete());
@@ -2368,24 +2657,372 @@
 
     #[test]
     fn send_fin_zero_length_output() {
+        let mut buf = [0; 5];
+
         let mut stream = Stream::new(0, 15, true, true);
 
-        assert_eq!(stream.send.push_slice(b"hello", false), Ok(5));
+        assert_eq!(stream.send.write(b"hello", false), Ok(5));
+        assert_eq!(stream.send.off_front(), 0);
         assert!(!stream.send.is_fin());
 
-        let write = stream.send.pop(5).unwrap();
-        assert_eq!(write.off(), 0);
-        assert_eq!(write.len(), 5);
-        assert_eq!(write.fin(), false);
-        assert_eq!(write.data, b"hello");
+        let (written, fin) = stream.send.emit(&mut buf).unwrap();
+        assert_eq!(written, 5);
+        assert_eq!(fin, false);
+        assert_eq!(&buf[..written], b"hello");
 
-        assert_eq!(stream.send.push_slice(b"", true), Ok(0));
+        assert_eq!(stream.send.write(b"", true), Ok(0));
         assert!(stream.send.is_fin());
+        assert_eq!(stream.send.off_front(), 5);
 
-        let write = stream.send.pop(5).unwrap();
-        assert_eq!(write.off(), 5);
-        assert_eq!(write.len(), 0);
-        assert_eq!(write.fin(), true);
-        assert_eq!(write.data, b"");
+        let (written, fin) = stream.send.emit(&mut buf).unwrap();
+        assert_eq!(written, 0);
+        assert_eq!(fin, true);
+        assert_eq!(&buf[..written], b"");
+    }
+
+    #[test]
+    fn send_emit() {
+        let mut buf = [0; 5];
+
+        let mut stream = Stream::new(0, 20, true, true);
+
+        assert_eq!(stream.send.write(b"hello", false), Ok(5));
+        assert_eq!(stream.send.write(b"world", false), Ok(5));
+        assert_eq!(stream.send.write(b"olleh", false), Ok(5));
+        assert_eq!(stream.send.write(b"dlrow", true), Ok(5));
+        assert_eq!(stream.send.off_front(), 0);
+        assert_eq!(stream.send.data.len(), 4);
+
+        assert!(stream.is_flushable());
+
+        assert!(stream.send.ready());
+        assert_eq!(stream.send.emit(&mut buf[..4]), Ok((4, false)));
+        assert_eq!(stream.send.off_front(), 4);
+        assert_eq!(&buf[..4], b"hell");
+
+        assert!(stream.send.ready());
+        assert_eq!(stream.send.emit(&mut buf[..4]), Ok((4, false)));
+        assert_eq!(stream.send.off_front(), 8);
+        assert_eq!(&buf[..4], b"owor");
+
+        assert!(stream.send.ready());
+        assert_eq!(stream.send.emit(&mut buf[..2]), Ok((2, false)));
+        assert_eq!(stream.send.off_front(), 10);
+        assert_eq!(&buf[..2], b"ld");
+
+        assert!(stream.send.ready());
+        assert_eq!(stream.send.emit(&mut buf[..1]), Ok((1, false)));
+        assert_eq!(stream.send.off_front(), 11);
+        assert_eq!(&buf[..1], b"o");
+
+        assert!(stream.send.ready());
+        assert_eq!(stream.send.emit(&mut buf[..5]), Ok((5, false)));
+        assert_eq!(stream.send.off_front(), 16);
+        assert_eq!(&buf[..5], b"llehd");
+
+        assert!(stream.send.ready());
+        assert_eq!(stream.send.emit(&mut buf[..5]), Ok((4, true)));
+        assert_eq!(stream.send.off_front(), 20);
+        assert_eq!(&buf[..4], b"lrow");
+
+        assert!(!stream.is_flushable());
+
+        assert!(!stream.send.ready());
+        assert_eq!(stream.send.emit(&mut buf[..5]), Ok((0, true)));
+        assert_eq!(stream.send.off_front(), 20);
+    }
+
+    #[test]
+    fn send_emit_ack() {
+        let mut buf = [0; 5];
+
+        let mut stream = Stream::new(0, 20, true, true);
+
+        assert_eq!(stream.send.write(b"hello", false), Ok(5));
+        assert_eq!(stream.send.write(b"world", false), Ok(5));
+        assert_eq!(stream.send.write(b"olleh", false), Ok(5));
+        assert_eq!(stream.send.write(b"dlrow", true), Ok(5));
+        assert_eq!(stream.send.off_front(), 0);
+        assert_eq!(stream.send.data.len(), 4);
+
+        assert!(stream.is_flushable());
+
+        assert!(stream.send.ready());
+        assert_eq!(stream.send.emit(&mut buf[..4]), Ok((4, false)));
+        assert_eq!(stream.send.off_front(), 4);
+        assert_eq!(&buf[..4], b"hell");
+
+        assert!(stream.send.ready());
+        assert_eq!(stream.send.emit(&mut buf[..4]), Ok((4, false)));
+        assert_eq!(stream.send.off_front(), 8);
+        assert_eq!(&buf[..4], b"owor");
+
+        stream.send.ack_and_drop(0, 5);
+        assert_eq!(stream.send.data.len(), 3);
+
+        assert!(stream.send.ready());
+        assert_eq!(stream.send.emit(&mut buf[..2]), Ok((2, false)));
+        assert_eq!(stream.send.off_front(), 10);
+        assert_eq!(&buf[..2], b"ld");
+
+        stream.send.ack_and_drop(7, 5);
+        assert_eq!(stream.send.data.len(), 3);
+
+        assert!(stream.send.ready());
+        assert_eq!(stream.send.emit(&mut buf[..1]), Ok((1, false)));
+        assert_eq!(stream.send.off_front(), 11);
+        assert_eq!(&buf[..1], b"o");
+
+        assert!(stream.send.ready());
+        assert_eq!(stream.send.emit(&mut buf[..5]), Ok((5, false)));
+        assert_eq!(stream.send.off_front(), 16);
+        assert_eq!(&buf[..5], b"llehd");
+
+        stream.send.ack_and_drop(5, 7);
+        assert_eq!(stream.send.data.len(), 2);
+
+        assert!(stream.send.ready());
+        assert_eq!(stream.send.emit(&mut buf[..5]), Ok((4, true)));
+        assert_eq!(stream.send.off_front(), 20);
+        assert_eq!(&buf[..4], b"lrow");
+
+        assert!(!stream.is_flushable());
+
+        assert!(!stream.send.ready());
+        assert_eq!(stream.send.emit(&mut buf[..5]), Ok((0, true)));
+        assert_eq!(stream.send.off_front(), 20);
+
+        stream.send.ack_and_drop(22, 4);
+        assert_eq!(stream.send.data.len(), 2);
+
+        stream.send.ack_and_drop(20, 1);
+        assert_eq!(stream.send.data.len(), 2);
+    }
+
+    #[test]
+    fn send_emit_retransmit() {
+        let mut buf = [0; 5];
+
+        let mut stream = Stream::new(0, 20, true, true);
+
+        assert_eq!(stream.send.write(b"hello", false), Ok(5));
+        assert_eq!(stream.send.write(b"world", false), Ok(5));
+        assert_eq!(stream.send.write(b"olleh", false), Ok(5));
+        assert_eq!(stream.send.write(b"dlrow", true), Ok(5));
+        assert_eq!(stream.send.off_front(), 0);
+        assert_eq!(stream.send.data.len(), 4);
+
+        assert!(stream.is_flushable());
+
+        assert!(stream.send.ready());
+        assert_eq!(stream.send.emit(&mut buf[..4]), Ok((4, false)));
+        assert_eq!(stream.send.off_front(), 4);
+        assert_eq!(&buf[..4], b"hell");
+
+        assert!(stream.send.ready());
+        assert_eq!(stream.send.emit(&mut buf[..4]), Ok((4, false)));
+        assert_eq!(stream.send.off_front(), 8);
+        assert_eq!(&buf[..4], b"owor");
+
+        stream.send.retransmit(3, 3);
+        assert_eq!(stream.send.off_front(), 3);
+
+        assert!(stream.send.ready());
+        assert_eq!(stream.send.emit(&mut buf[..3]), Ok((3, false)));
+        assert_eq!(stream.send.off_front(), 8);
+        assert_eq!(&buf[..3], b"low");
+
+        assert!(stream.send.ready());
+        assert_eq!(stream.send.emit(&mut buf[..2]), Ok((2, false)));
+        assert_eq!(stream.send.off_front(), 10);
+        assert_eq!(&buf[..2], b"ld");
+
+        stream.send.ack_and_drop(7, 2);
+
+        stream.send.retransmit(8, 2);
+
+        assert!(stream.send.ready());
+        assert_eq!(stream.send.emit(&mut buf[..2]), Ok((2, false)));
+        assert_eq!(stream.send.off_front(), 10);
+        assert_eq!(&buf[..2], b"ld");
+
+        assert!(stream.send.ready());
+        assert_eq!(stream.send.emit(&mut buf[..1]), Ok((1, false)));
+        assert_eq!(stream.send.off_front(), 11);
+        assert_eq!(&buf[..1], b"o");
+
+        assert!(stream.send.ready());
+        assert_eq!(stream.send.emit(&mut buf[..5]), Ok((5, false)));
+        assert_eq!(stream.send.off_front(), 16);
+        assert_eq!(&buf[..5], b"llehd");
+
+        stream.send.retransmit(12, 2);
+
+        assert!(stream.send.ready());
+        assert_eq!(stream.send.emit(&mut buf[..2]), Ok((2, false)));
+        assert_eq!(stream.send.off_front(), 16);
+        assert_eq!(&buf[..2], b"le");
+
+        assert!(stream.send.ready());
+        assert_eq!(stream.send.emit(&mut buf[..5]), Ok((4, true)));
+        assert_eq!(stream.send.off_front(), 20);
+        assert_eq!(&buf[..4], b"lrow");
+
+        assert!(!stream.is_flushable());
+
+        assert!(!stream.send.ready());
+        assert_eq!(stream.send.emit(&mut buf[..5]), Ok((0, true)));
+        assert_eq!(stream.send.off_front(), 20);
+
+        stream.send.retransmit(7, 12);
+
+        assert!(stream.send.ready());
+        assert_eq!(stream.send.emit(&mut buf[..5]), Ok((5, false)));
+        assert_eq!(stream.send.off_front(), 12);
+        assert_eq!(&buf[..5], b"rldol");
+
+        assert!(stream.send.ready());
+        assert_eq!(stream.send.emit(&mut buf[..5]), Ok((5, false)));
+        assert_eq!(stream.send.off_front(), 17);
+        assert_eq!(&buf[..5], b"lehdl");
+
+        assert!(stream.send.ready());
+        assert_eq!(stream.send.emit(&mut buf[..5]), Ok((2, false)));
+        assert_eq!(stream.send.off_front(), 20);
+        assert_eq!(&buf[..2], b"ro");
+
+        stream.send.ack_and_drop(12, 7);
+
+        stream.send.retransmit(7, 12);
+
+        assert!(stream.send.ready());
+        assert_eq!(stream.send.emit(&mut buf[..5]), Ok((5, false)));
+        assert_eq!(stream.send.off_front(), 12);
+        assert_eq!(&buf[..5], b"rldol");
+
+        assert!(stream.send.ready());
+        assert_eq!(stream.send.emit(&mut buf[..5]), Ok((5, false)));
+        assert_eq!(stream.send.off_front(), 17);
+        assert_eq!(&buf[..5], b"lehdl");
+
+        assert!(stream.send.ready());
+        assert_eq!(stream.send.emit(&mut buf[..5]), Ok((2, false)));
+        assert_eq!(stream.send.off_front(), 20);
+        assert_eq!(&buf[..2], b"ro");
+    }
+
+    #[test]
+    fn rangebuf_split_off() {
+        let mut buf = RangeBuf::from(b"helloworld", 5, true);
+        assert_eq!(buf.start, 0);
+        assert_eq!(buf.pos, 0);
+        assert_eq!(buf.len, 10);
+        assert_eq!(buf.off, 5);
+        assert_eq!(buf.fin, true);
+
+        assert_eq!(buf.len(), 10);
+        assert_eq!(buf.off(), 5);
+        assert_eq!(buf.fin(), true);
+
+        assert_eq!(&buf[..], b"helloworld");
+
+        // Advance buffer.
+        buf.consume(5);
+
+        assert_eq!(buf.start, 0);
+        assert_eq!(buf.pos, 5);
+        assert_eq!(buf.len, 10);
+        assert_eq!(buf.off, 5);
+        assert_eq!(buf.fin, true);
+
+        assert_eq!(buf.len(), 5);
+        assert_eq!(buf.off(), 10);
+        assert_eq!(buf.fin(), true);
+
+        assert_eq!(&buf[..], b"world");
+
+        // Split buffer before position.
+        let mut new_buf = buf.split_off(3);
+
+        assert_eq!(buf.start, 0);
+        assert_eq!(buf.pos, 3);
+        assert_eq!(buf.len, 3);
+        assert_eq!(buf.off, 5);
+        assert_eq!(buf.fin, false);
+
+        assert_eq!(buf.len(), 0);
+        assert_eq!(buf.off(), 8);
+        assert_eq!(buf.fin(), false);
+
+        assert_eq!(&buf[..], b"");
+
+        assert_eq!(new_buf.start, 3);
+        assert_eq!(new_buf.pos, 5);
+        assert_eq!(new_buf.len, 7);
+        assert_eq!(new_buf.off, 8);
+        assert_eq!(new_buf.fin, true);
+
+        assert_eq!(new_buf.len(), 5);
+        assert_eq!(new_buf.off(), 10);
+        assert_eq!(new_buf.fin(), true);
+
+        assert_eq!(&new_buf[..], b"world");
+
+        // Advance buffer.
+        new_buf.consume(2);
+
+        assert_eq!(new_buf.start, 3);
+        assert_eq!(new_buf.pos, 7);
+        assert_eq!(new_buf.len, 7);
+        assert_eq!(new_buf.off, 8);
+        assert_eq!(new_buf.fin, true);
+
+        assert_eq!(new_buf.len(), 3);
+        assert_eq!(new_buf.off(), 12);
+        assert_eq!(new_buf.fin(), true);
+
+        assert_eq!(&new_buf[..], b"rld");
+
+        // Split buffer after position.
+        let mut new_new_buf = new_buf.split_off(5);
+
+        assert_eq!(new_buf.start, 3);
+        assert_eq!(new_buf.pos, 7);
+        assert_eq!(new_buf.len, 5);
+        assert_eq!(new_buf.off, 8);
+        assert_eq!(new_buf.fin, false);
+
+        assert_eq!(new_buf.len(), 1);
+        assert_eq!(new_buf.off(), 12);
+        assert_eq!(new_buf.fin(), false);
+
+        assert_eq!(&new_buf[..], b"r");
+
+        assert_eq!(new_new_buf.start, 8);
+        assert_eq!(new_new_buf.pos, 8);
+        assert_eq!(new_new_buf.len, 2);
+        assert_eq!(new_new_buf.off, 13);
+        assert_eq!(new_new_buf.fin, true);
+
+        assert_eq!(new_new_buf.len(), 2);
+        assert_eq!(new_new_buf.off(), 13);
+        assert_eq!(new_new_buf.fin(), true);
+
+        assert_eq!(&new_new_buf[..], b"ld");
+
+        // Advance buffer.
+        new_new_buf.consume(2);
+
+        assert_eq!(new_new_buf.start, 8);
+        assert_eq!(new_new_buf.pos, 10);
+        assert_eq!(new_new_buf.len, 2);
+        assert_eq!(new_new_buf.off, 13);
+        assert_eq!(new_new_buf.fin, true);
+
+        assert_eq!(new_new_buf.len(), 0);
+        assert_eq!(new_new_buf.off(), 15);
+        assert_eq!(new_new_buf.fin(), true);
+
+        assert_eq!(&new_new_buf[..], b"");
     }
 }
diff --git a/src/tls.rs b/src/tls.rs
index 8044bb7..ed7a5bf 100644
--- a/src/tls.rs
+++ b/src/tls.rs
@@ -28,6 +28,8 @@
 use std::ptr;
 use std::slice;
 
+use std::io::Write;
+
 use libc::c_char;
 use libc::c_int;
 use libc::c_long;
@@ -38,6 +40,7 @@
 use crate::Result;
 
 use crate::Connection;
+use crate::ConnectionError;
 
 use crate::crypto;
 use crate::octets;
@@ -64,6 +67,10 @@
 
 #[allow(non_camel_case_types)]
 #[repr(transparent)]
+struct SSL_SESSION(c_void);
+
+#[allow(non_camel_case_types)]
+#[repr(transparent)]
 struct X509_VERIFY_PARAM(c_void);
 
 #[allow(non_camel_case_types)]
@@ -73,8 +80,17 @@
 
 #[allow(non_camel_case_types)]
 #[repr(transparent)]
+#[cfg(windows)]
 struct X509(c_void);
 
+#[allow(non_camel_case_types)]
+#[repr(transparent)]
+struct STACK_OF(c_void);
+
+#[allow(non_camel_case_types)]
+#[repr(transparent)]
+struct CRYPTO_BUFFER(c_void);
+
 #[repr(C)]
 #[allow(non_camel_case_types)]
 struct SSL_QUIC_METHOD {
@@ -130,6 +146,8 @@
 
             let mut ctx = Context(ctx_raw);
 
+            ctx.set_session_callback();
+
             ctx.load_ca_certs()?;
 
             Ok(ctx)
@@ -235,6 +253,19 @@
         Ok(())
     }
 
+    fn set_session_callback(&mut self) {
+        unsafe {
+            // This is needed to enable the session callback on the client. On
+            // the server it doesn't do anything.
+            SSL_CTX_set_session_cache_mode(
+                self.as_ptr(),
+                0x0001, // SSL_SESS_CACHE_CLIENT
+            );
+
+            SSL_CTX_sess_set_new_cb(self.as_ptr(), new_session);
+        };
+    }
+
     pub fn set_verify(&mut self, verify: bool) {
         let mode = if verify {
             0x01 // SSL_VERIFY_PEER
@@ -276,6 +307,12 @@
         })
     }
 
+    pub fn set_ticket_key(&mut self, key: &[u8]) -> Result<()> {
+        map_result(unsafe {
+            SSL_CTX_set_tlsext_ticket_keys(self.as_ptr(), key.as_ptr(), key.len())
+        })
+    }
+
     pub fn set_early_data_enabled(&mut self, enabled: bool) {
         let enabled = if enabled { 1 } else { 0 };
 
@@ -300,6 +337,7 @@
 pub struct Handshake(*mut SSL);
 
 impl Handshake {
+    #[cfg(feature = "ffi")]
     pub unsafe fn from_ptr(ssl: *mut c_void) -> Handshake {
         let ssl = ssl as *mut SSL;
         Handshake(ssl)
@@ -328,6 +366,12 @@
         Ok(())
     }
 
+    pub fn use_legacy_codepoint(&self, use_legacy: bool) {
+        unsafe {
+            SSL_set_quic_use_legacy_codepoint(self.as_ptr(), use_legacy as c_int);
+        }
+    }
+
     pub fn set_state(&self, is_server: bool) {
         unsafe {
             if is_server {
@@ -392,6 +436,13 @@
         })
     }
 
+    #[cfg(test)]
+    pub fn set_options(&mut self, opts: u32) {
+        unsafe {
+            SSL_set_options(self.as_ptr(), opts);
+        }
+    }
+
     pub fn quic_transport_params(&self) -> &[u8] {
         let mut ptr: *const u8 = ptr::null();
         let mut len: usize = 0;
@@ -422,6 +473,28 @@
         unsafe { slice::from_raw_parts(ptr, len as usize) }
     }
 
+    pub fn set_session(&self, session: &[u8]) -> Result<()> {
+        unsafe {
+            let ctx = SSL_get_SSL_CTX(self.as_ptr());
+
+            if ctx.is_null() {
+                return Err(Error::TlsFail);
+            }
+
+            let session =
+                SSL_SESSION_from_bytes(session.as_ptr(), session.len(), ctx);
+
+            if session.is_null() {
+                return Err(Error::TlsFail);
+            }
+
+            let rc = SSL_set_session(self.as_ptr(), session);
+            SSL_SESSION_free(session);
+
+            map_result(rc)
+        }
+    }
+
     pub fn provide_data(&self, level: crypto::Level, buf: &[u8]) -> Result<()> {
         map_result_ssl(self, unsafe {
             SSL_provide_quic_data(self.as_ptr(), level, buf.as_ptr(), buf.len())
@@ -432,6 +505,16 @@
         map_result_ssl(self, unsafe { SSL_do_handshake(self.as_ptr()) })
     }
 
+    pub fn process_post_handshake(&self) -> Result<()> {
+        map_result_ssl(self, unsafe {
+            SSL_process_quic_post_handshake(self.as_ptr())
+        })
+    }
+
+    pub fn reset_early_data_reject(&self) {
+        unsafe { SSL_reset_early_data_reject(self.as_ptr()) };
+    }
+
     pub fn write_level(&self) -> crypto::Level {
         unsafe { SSL_quic_write_level(self.as_ptr()) }
     }
@@ -481,24 +564,23 @@
 
     pub fn peer_cert(&self) -> Option<Vec<u8>> {
         let peer_cert = unsafe {
-            let mut out: *mut libc::c_uchar = ptr::null_mut();
-
-            let x509 = SSL_get_peer_certificate(self.as_ptr());
-            if x509.is_null() {
+            let chain =
+                map_result_ptr(SSL_get0_peer_certificates(self.as_ptr())).ok()?;
+            if sk_num(chain) <= 0 {
                 return None;
             }
 
-            let out_len = i2d_X509(x509, &mut out);
-            if out_len <= 0 {
+            let buffer =
+                map_result_ptr(sk_value(chain, 0) as *const CRYPTO_BUFFER)
+                    .ok()?;
+            let out_len = CRYPTO_BUFFER_len(buffer);
+            if out_len == 0 {
                 return None;
             }
 
+            let out = CRYPTO_BUFFER_data(buffer);
             let der = slice::from_raw_parts(out, out_len as usize);
-            let der = der.to_vec();
-
-            OPENSSL_free(out as *mut c_void);
-
-            der
+            der.to_vec()
         };
 
         Some(peer_cert)
@@ -676,7 +758,7 @@
             &mut conn.pkt_num_spaces[packet::EPOCH_APPLICATION],
     };
 
-    if space.crypto_stream.send.push_slice(buf, false).is_err() {
+    if space.crypto_stream.send.write(buf, false).is_err() {
         return 0;
     }
 
@@ -706,7 +788,11 @@
     );
 
     let error: u64 = TLS_ALERT_ERROR + u64::from(alert);
-    conn.error = Some(error);
+    conn.local_error = Some(ConnectionError {
+        is_app: false,
+        error_code: error,
+        reason: Vec::new(),
+    });
 
     1
 }
@@ -779,6 +865,68 @@
     3 // SSL_TLSEXT_ERR_NOACK
 }
 
+#[no_mangle]
+extern fn new_session(ssl: *mut SSL, session: *mut SSL_SESSION) -> c_int {
+    let conn =
+        match get_ex_data_from_ptr::<Connection>(ssl, *QUICHE_EX_DATA_INDEX) {
+            Some(v) => v,
+
+            None => return 0,
+        };
+
+    let handshake = Handshake(ssl);
+    let peer_params = handshake.quic_transport_params();
+
+    // Serialize session object into buffer.
+    let session_bytes = unsafe {
+        let mut out: *mut u8 = std::ptr::null_mut();
+        let mut out_len: usize = 0;
+
+        if SSL_SESSION_to_bytes(session, &mut out, &mut out_len) == 0 {
+            return 0;
+        }
+
+        let session_bytes = std::slice::from_raw_parts(out, out_len).to_vec();
+        OPENSSL_free(out as *mut c_void);
+
+        session_bytes
+    };
+
+    let mut buffer =
+        Vec::with_capacity(8 + peer_params.len() + 8 + session_bytes.len());
+
+    let session_bytes_len = session_bytes.len() as u64;
+
+    if buffer.write(&session_bytes_len.to_be_bytes()).is_err() {
+        std::mem::forget(handshake);
+        return 0;
+    }
+
+    if buffer.write(&session_bytes).is_err() {
+        std::mem::forget(handshake);
+        return 0;
+    }
+
+    let peer_params_len = peer_params.len() as u64;
+
+    if buffer.write(&peer_params_len.to_be_bytes()).is_err() {
+        std::mem::forget(handshake);
+        return 0;
+    }
+
+    if buffer.write(&peer_params).is_err() {
+        std::mem::forget(handshake);
+        return 0;
+    }
+
+    conn.session = Some(buffer);
+
+    // Prevent handshake from being freed, as we still need it.
+    std::mem::forget(handshake);
+
+    0
+}
+
 fn map_result(bssl_result: c_int) -> Result<()> {
     match bssl_result {
         1 => Ok(()),
@@ -826,6 +974,9 @@
                 // SSL_ERROR_SYSCALL
                 5 => Err(Error::TlsFail),
 
+                // SSL_ERROR_PENDING_SESSION
+                11 => Err(Error::Done),
+
                 // SSL_ERROR_PENDING_CERTIFICATE
                 12 => Err(Error::Done),
 
@@ -835,6 +986,15 @@
                 // SSL_ERROR_PENDING_TICKET
                 14 => Err(Error::Done),
 
+                // SSL_ERROR_EARLY_DATA_REJECTED
+                15 => {
+                    ssl.reset_early_data_reject();
+                    Err(Error::Done)
+                },
+
+                // SSL_ERROR_WANT_CERTIFICATE_VERIFY
+                16 => Err(Error::Done),
+
                 _ => Err(Error::TlsFail),
             }
         },
@@ -884,6 +1044,10 @@
         ctx: *mut SSL_CTX, cb: extern fn(ssl: *mut SSL, line: *const c_char),
     );
 
+    fn SSL_CTX_set_tlsext_ticket_keys(
+        ctx: *mut SSL_CTX, key: *const u8, key_len: usize,
+    ) -> c_int;
+
     fn SSL_CTX_set_alpn_protos(
         ctx: *mut SSL_CTX, protos: *const u8, protos_len: usize,
     ) -> c_int;
@@ -903,6 +1067,13 @@
 
     fn SSL_CTX_set_early_data_enabled(ctx: *mut SSL_CTX, enabled: i32);
 
+    fn SSL_CTX_set_session_cache_mode(ctx: *mut SSL_CTX, mode: c_int) -> c_int;
+
+    fn SSL_CTX_sess_set_new_cb(
+        ctx: *mut SSL_CTX,
+        cb: extern fn(ssl: *mut SSL, session: *mut SSL_SESSION) -> c_int,
+    );
+
     // SSL
     fn SSL_get_ex_new_index(
         argl: c_long, argp: *const c_void, unused: *const c_void,
@@ -931,7 +1102,11 @@
         sigalg: u16, include_curve: i32,
     ) -> *const c_char;
 
-    fn SSL_get_peer_certificate(ssl: *mut SSL) -> *const X509;
+    fn SSL_set_session(ssl: *mut SSL, session: *mut SSL_SESSION) -> c_int;
+
+    fn SSL_get_SSL_CTX(ssl: *mut SSL) -> *mut SSL_CTX;
+
+    fn SSL_get0_peer_certificates(ssl: *mut SSL) -> *const STACK_OF;
 
     fn SSL_set_min_proto_version(ssl: *mut SSL, version: u16);
     fn SSL_set_max_proto_version(ssl: *mut SSL, version: u16);
@@ -944,10 +1119,15 @@
         ssl: *mut SSL, params: *const u8, params_len: usize,
     ) -> c_int;
 
+    #[cfg(test)]
+    fn SSL_set_options(ssl: *mut SSL, opts: u32) -> u32;
+
     fn SSL_set_quic_method(
         ssl: *mut SSL, quic_method: *const SSL_QUIC_METHOD,
     ) -> c_int;
 
+    fn SSL_set_quic_use_legacy_codepoint(ssl: *mut SSL, use_legacy: c_int);
+
     fn SSL_set_quic_early_data_context(
         ssl: *mut SSL, context: *const u8, context_len: usize,
     ) -> c_int;
@@ -964,6 +1144,10 @@
         ssl: *mut SSL, level: crypto::Level, data: *const u8, len: usize,
     ) -> c_int;
 
+    fn SSL_process_quic_post_handshake(ssl: *mut SSL) -> c_int;
+
+    fn SSL_reset_early_data_reject(ssl: *mut SSL);
+
     fn SSL_do_handshake(ssl: *mut SSL) -> c_int;
 
     fn SSL_quic_write_level(ssl: *mut SSL) -> crypto::Level;
@@ -981,6 +1165,17 @@
     // SSL_CIPHER
     fn SSL_CIPHER_get_id(cipher: *const SSL_CIPHER) -> c_uint;
 
+    // SSL_SESSION
+    fn SSL_SESSION_to_bytes(
+        session: *const SSL_SESSION, out: *mut *mut u8, out_len: *mut usize,
+    ) -> c_int;
+
+    fn SSL_SESSION_from_bytes(
+        input: *const u8, input_len: usize, ctx: *const SSL_CTX,
+    ) -> *mut SSL_SESSION;
+
+    fn SSL_SESSION_free(session: *mut SSL_SESSION);
+
     // X509_VERIFY_PARAM
     fn X509_VERIFY_PARAM_set1_host(
         param: *mut X509_VERIFY_PARAM, name: *const c_char, namelen: usize,
@@ -996,7 +1191,13 @@
     #[cfg(windows)]
     fn d2i_X509(px: *mut X509, input: *const *const u8, len: c_int) -> *mut X509;
 
-    fn i2d_X509(px: *const X509, out: *mut *mut u8) -> c_int;
+    // STACK_OF
+    fn sk_num(stack: *const STACK_OF) -> c_int;
+    fn sk_value(stack: *const STACK_OF, idx: c_int) -> *mut c_void;
+
+    // CRYPTO_BUFFER
+    fn CRYPTO_BUFFER_len(buffer: *const CRYPTO_BUFFER) -> usize;
+    fn CRYPTO_BUFFER_data(buffer: *const CRYPTO_BUFFER) -> *const u8;
 
     // ERR
     fn ERR_peek_error() -> c_uint;