Snap for 8564071 from d147438ec39fc518e5bd9532ab987efcf4305684 to mainline-permission-release

Change-Id: If57779a0e02b6015cfe9605bb0cc29f97244a5b2
diff --git a/Android.bp b/Android.bp
index 60cadc9..ab3fb0d 100644
--- a/Android.bp
+++ b/Android.bp
@@ -46,6 +46,24 @@
     ],
 }
 
+cc_library_headers {
+    name: "scudo_headers",
+    recovery_available: true,
+    vendor_ramdisk_available: true,
+
+    export_include_dirs: [
+        "standalone/include",
+    ],
+
+    apex_available: [
+        "com.android.runtime",
+    ],
+
+    visibility: [
+        "//system/core/debuggerd",
+    ],
+}
+
 cc_defaults {
     name: "libscudo_defaults",
     native_coverage: false,
@@ -93,8 +111,6 @@
         "external/scudo/standalone/include",
     ],
 
-    system_shared_libs: [],
-
     srcs: [
         "standalone/checksum.cpp",
         "standalone/common.cpp",
@@ -126,21 +142,17 @@
     },
 
     target: {
-        linux_glibc: {
-            enabled: true,
-        },
-        android: {
-            header_libs: ["bionic_libc_platform_headers"],
-        },
-        linux_bionic: {
-            header_libs: ["bionic_libc_platform_headers"],
+        bionic: {
+            system_shared_libs: [],
+            header_libs: [
+                "libc_headers",
+                "bionic_libc_platform_headers",
+            ],
         },
         native_bridge: {
             cflags: ["-DSCUDO_DISABLE_TBI"],
         },
     },
-
-    header_libs: ["libc_headers"],
 }
 
 cc_library_static {
@@ -199,10 +211,7 @@
         "-fno-emulated-tls",
     ],
     target: {
-        android: {
-            header_libs: ["bionic_libc_platform_headers"],
-        },
-        linux_bionic: {
+        bionic: {
             header_libs: ["bionic_libc_platform_headers"],
         },
     },
diff --git a/standalone/allocator_config.h b/standalone/allocator_config.h
index 8e103f2..e6f46b5 100644
--- a/standalone/allocator_config.h
+++ b/standalone/allocator_config.h
@@ -40,6 +40,12 @@
 //   // eg: Ptr = Base + (CompactPtr << Scale).
 //   typedef u32 PrimaryCompactPtrT;
 //   static const uptr PrimaryCompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG;
+//   // Indicates support for offsetting the start of a region by
+//   // a random number of pages. Only used with primary64.
+//   static const bool PrimaryEnableRandomOffset = true;
+//   // Call map for user memory with at least this size. Only used with
+//   // primary64.
+//   static const uptr PrimaryMapSizeIncrement = 1UL << 18;
 //   // Defines the minimal & maximal release interval that can be set.
 //   static const s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN;
 //   static const s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX;
@@ -54,13 +60,15 @@
 
 struct DefaultConfig {
   using SizeClassMap = DefaultSizeClassMap;
-  static const bool MaySupportMemoryTagging = false;
+  static const bool MaySupportMemoryTagging = true;
 
 #if SCUDO_CAN_USE_PRIMARY64
   typedef SizeClassAllocator64<DefaultConfig> Primary;
   static const uptr PrimaryRegionSizeLog = 32U;
   typedef uptr PrimaryCompactPtrT;
   static const uptr PrimaryCompactPtrScale = 0;
+  static const bool PrimaryEnableRandomOffset = true;
+  static const uptr PrimaryMapSizeIncrement = 1UL << 18;
 #else
   typedef SizeClassAllocator32<DefaultConfig> Primary;
   static const uptr PrimaryRegionSizeLog = 19U;
@@ -79,7 +87,6 @@
 
   template <class A> using TSDRegistryT = TSDRegistryExT<A>; // Exclusive
 };
-
 struct AndroidConfig {
   using SizeClassMap = AndroidSizeClassMap;
   static const bool MaySupportMemoryTagging = true;
@@ -89,6 +96,8 @@
   static const uptr PrimaryRegionSizeLog = 28U;
   typedef u32 PrimaryCompactPtrT;
   static const uptr PrimaryCompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG;
+  static const bool PrimaryEnableRandomOffset = true;
+  static const uptr PrimaryMapSizeIncrement = 1UL << 18;
 #else
   typedef SizeClassAllocator32<AndroidConfig> Primary;
   static const uptr PrimaryRegionSizeLog = 18U;
@@ -118,6 +127,8 @@
   static const uptr PrimaryRegionSizeLog = 27U;
   typedef u32 PrimaryCompactPtrT;
   static const uptr PrimaryCompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG;
+  static const bool PrimaryEnableRandomOffset = true;
+  static const uptr PrimaryMapSizeIncrement = 1UL << 18;
 #else
   typedef SizeClassAllocator32<AndroidSvelteConfig> Primary;
   static const uptr PrimaryRegionSizeLog = 16U;
@@ -140,12 +151,14 @@
 
 #if SCUDO_CAN_USE_PRIMARY64
 struct FuchsiaConfig {
-  using SizeClassMap = DefaultSizeClassMap;
+  using SizeClassMap = FuchsiaSizeClassMap;
   static const bool MaySupportMemoryTagging = false;
 
   typedef SizeClassAllocator64<FuchsiaConfig> Primary;
   static const uptr PrimaryRegionSizeLog = 30U;
   typedef u32 PrimaryCompactPtrT;
+  static const bool PrimaryEnableRandomOffset = true;
+  static const uptr PrimaryMapSizeIncrement = 1UL << 18;
   static const uptr PrimaryCompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG;
   static const s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN;
   static const s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX;
@@ -154,12 +167,34 @@
   template <class A>
   using TSDRegistryT = TSDRegistrySharedT<A, 8U, 4U>; // Shared, max 8 TSDs.
 };
+
+struct TrustyConfig {
+  using SizeClassMap = TrustySizeClassMap;
+  static const bool MaySupportMemoryTagging = false;
+
+  typedef SizeClassAllocator64<TrustyConfig> Primary;
+  // Some apps have 1 page of heap total so small regions are necessary.
+  static const uptr PrimaryRegionSizeLog = 10U;
+  typedef u32 PrimaryCompactPtrT;
+  static const bool PrimaryEnableRandomOffset = false;
+  // Trusty is extremely memory-constrained so minimally round up map calls.
+  static const uptr PrimaryMapSizeIncrement = 1UL << 4;
+  static const uptr PrimaryCompactPtrScale = SCUDO_MIN_ALIGNMENT_LOG;
+  static const s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN;
+  static const s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX;
+
+  typedef MapAllocatorNoCache SecondaryCache;
+  template <class A>
+  using TSDRegistryT = TSDRegistrySharedT<A, 1U, 1U>; // Shared, max 1 TSD.
+};
 #endif
 
 #if SCUDO_ANDROID
 typedef AndroidConfig Config;
 #elif SCUDO_FUCHSIA
 typedef FuchsiaConfig Config;
+#elif SCUDO_TRUSTY
+typedef TrustyConfig Config;
 #else
 typedef DefaultConfig Config;
 #endif
diff --git a/standalone/benchmarks/malloc_benchmark.cpp b/standalone/benchmarks/malloc_benchmark.cpp
index 661fff4..2adec88 100644
--- a/standalone/benchmarks/malloc_benchmark.cpp
+++ b/standalone/benchmarks/malloc_benchmark.cpp
@@ -29,7 +29,6 @@
   std::unique_ptr<AllocatorT, decltype(Deleter)> Allocator(new AllocatorT,
                                                            Deleter);
   CurrentAllocator = Allocator.get();
-  Allocator->reset();
 
   const size_t NBytes = State.range(0);
   size_t PageSize = scudo::getPageSizeCached();
@@ -70,7 +69,6 @@
   std::unique_ptr<AllocatorT, decltype(Deleter)> Allocator(new AllocatorT,
                                                            Deleter);
   CurrentAllocator = Allocator.get();
-  Allocator->reset();
 
   const size_t NumIters = State.range(0);
   size_t PageSize = scudo::getPageSizeCached();
diff --git a/standalone/bytemap.h b/standalone/bytemap.h
index e0d54f4..248e096 100644
--- a/standalone/bytemap.h
+++ b/standalone/bytemap.h
@@ -17,10 +17,9 @@
 
 template <uptr Size> class FlatByteMap {
 public:
-  void initLinkerInitialized() {}
-  void init() { memset(Map, 0, sizeof(Map)); }
+  void init() { DCHECK(Size == 0 || Map[0] == 0); }
 
-  void unmapTestOnly() {}
+  void unmapTestOnly() { memset(Map, 0, Size); }
 
   void set(uptr Index, u8 Value) {
     DCHECK_LT(Index, Size);
@@ -36,7 +35,7 @@
   void enable() {}
 
 private:
-  u8 Map[Size];
+  u8 Map[Size] = {};
 };
 
 } // namespace scudo
diff --git a/standalone/checksum.cpp b/standalone/checksum.cpp
index 05d4ba5..2c27739 100644
--- a/standalone/checksum.cpp
+++ b/standalone/checksum.cpp
@@ -8,6 +8,7 @@
 
 #include "checksum.h"
 #include "atomic_helpers.h"
+#include "chunk.h"
 
 #if defined(__x86_64__) || defined(__i386__)
 #include <cpuid.h>
diff --git a/standalone/checksum.h b/standalone/checksum.h
index a63b1b4..0f787ce 100644
--- a/standalone/checksum.h
+++ b/standalone/checksum.h
@@ -12,12 +12,16 @@
 #include "internal_defs.h"
 
 // Hardware CRC32 is supported at compilation via the following:
-// - for i386 & x86_64: -msse4.2
+// - for i386 & x86_64: -mcrc32 (earlier: -msse4.2)
 // - for ARM & AArch64: -march=armv8-a+crc or -mcrc
 // An additional check must be performed at runtime as well to make sure the
 // emitted instructions are valid on the target host.
 
-#ifdef __SSE4_2__
+#if defined(__CRC32__)
+// NB: clang has <crc32intrin.h> but GCC does not
+#include <smmintrin.h>
+#define CRC32_INTRINSIC FIRST_32_SECOND_64(__builtin_ia32_crc32si, __builtin_ia32_crc32di)
+#elif defined(__SSE4_2__)
 #include <smmintrin.h>
 #define CRC32_INTRINSIC FIRST_32_SECOND_64(_mm_crc32_u32, _mm_crc32_u64)
 #endif
diff --git a/standalone/chunk.h b/standalone/chunk.h
index 69b8e1b..0581420 100644
--- a/standalone/chunk.h
+++ b/standalone/chunk.h
@@ -25,7 +25,7 @@
   // as opposed to only for crc32_hw.cpp. This means that other hardware
   // specific instructions were likely emitted at other places, and as a result
   // there is no reason to not use it here.
-#if defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32)
+#if defined(__CRC32__) || defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32)
   u32 Crc = static_cast<u32>(CRC32_INTRINSIC(Seed, Value));
   for (uptr I = 0; I < ArraySize; I++)
     Crc = static_cast<u32>(CRC32_INTRINSIC(Crc, Array[I]));
@@ -42,7 +42,7 @@
       Checksum = computeBSDChecksum(Checksum, Array[I]);
     return Checksum;
   }
-#endif // defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32)
+#endif // defined(__CRC32__) || defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32)
 }
 
 namespace Chunk {
diff --git a/standalone/combined.h b/standalone/combined.h
index 8080d67..365720d 100644
--- a/standalone/combined.h
+++ b/standalone/combined.h
@@ -132,7 +132,7 @@
   typedef GlobalQuarantine<QuarantineCallback, void> QuarantineT;
   typedef typename QuarantineT::CacheT QuarantineCacheT;
 
-  void initLinkerInitialized() {
+  void init() {
     performSanityChecks();
 
     // Check if hardware CRC32 is supported in the binary and by the platform,
@@ -166,11 +166,10 @@
     QuarantineMaxChunkSize =
         static_cast<u32>(getFlags()->quarantine_max_chunk_size);
 
-    Stats.initLinkerInitialized();
+    Stats.init();
     const s32 ReleaseToOsIntervalMs = getFlags()->release_to_os_interval_ms;
-    Primary.initLinkerInitialized(ReleaseToOsIntervalMs);
-    Secondary.initLinkerInitialized(&Stats, ReleaseToOsIntervalMs);
-
+    Primary.init(ReleaseToOsIntervalMs);
+    Secondary.init(&Stats, ReleaseToOsIntervalMs);
     Quarantine.init(
         static_cast<uptr>(getFlags()->quarantine_size_kb << 10),
         static_cast<uptr>(getFlags()->thread_local_quarantine_size_kb << 10));
@@ -206,15 +205,24 @@
 #endif // GWP_ASAN_HOOKS
   }
 
+#ifdef GWP_ASAN_HOOKS
+  const gwp_asan::AllocationMetadata *getGwpAsanAllocationMetadata() {
+    return GuardedAlloc.getMetadataRegion();
+  }
+
+  const gwp_asan::AllocatorState *getGwpAsanAllocatorState() {
+    return GuardedAlloc.getAllocatorState();
+  }
+#endif // GWP_ASAN_HOOKS
+
   ALWAYS_INLINE void initThreadMaybe(bool MinimalInit = false) {
     TSDRegistry.initThreadMaybe(this, MinimalInit);
   }
 
-  void reset() { memset(this, 0, sizeof(*this)); }
-
   void unmapTestOnly() {
-    TSDRegistry.unmapTestOnly();
+    TSDRegistry.unmapTestOnly(this);
     Primary.unmapTestOnly();
+    Secondary.unmapTestOnly();
 #ifdef GWP_ASAN_HOOKS
     if (getFlags()->GWP_ASAN_InstallSignalHandlers)
       gwp_asan::segv_handler::uninstallSignalHandlers();
@@ -225,9 +233,7 @@
   TSDRegistryT *getTSDRegistry() { return &TSDRegistry; }
 
   // The Cache must be provided zero-initialized.
-  void initCache(CacheT *Cache) {
-    Cache->initLinkerInitialized(&Stats, &Primary);
-  }
+  void initCache(CacheT *Cache) { Cache->init(&Stats, &Primary); }
 
   // Release the resources used by a TSD, which involves:
   // - draining the local quarantine cache to the global quarantine;
@@ -573,9 +579,6 @@
       reportAllocationSizeTooBig(NewSize, 0, MaxAllowedMallocSize);
     }
 
-    void *OldTaggedPtr = OldPtr;
-    OldPtr = getHeaderTaggedPointer(OldPtr);
-
     // The following cases are handled by the C wrappers.
     DCHECK_NE(OldPtr, nullptr);
     DCHECK_NE(NewSize, 0);
@@ -595,6 +598,9 @@
     }
 #endif // GWP_ASAN_HOOKS
 
+    void *OldTaggedPtr = OldPtr;
+    OldPtr = getHeaderTaggedPointer(OldPtr);
+
     if (UNLIKELY(!isAligned(reinterpret_cast<uptr>(OldPtr), MinAlignment)))
       reportMisalignedPointer(AllocatorAction::Reallocating, OldPtr);
 
@@ -643,7 +649,7 @@
           if (ClassId) {
             resizeTaggedChunk(reinterpret_cast<uptr>(OldTaggedPtr) + OldSize,
                               reinterpret_cast<uptr>(OldTaggedPtr) + NewSize,
-                              NewSize, BlockEnd);
+                              NewSize, untagPointer(BlockEnd));
             storePrimaryAllocationStackMaybe(Options, OldPtr);
           } else {
             storeSecondaryAllocationStackMaybe(Options, OldPtr, NewSize);
@@ -698,7 +704,7 @@
   // function. This can be called with a null buffer or zero size for buffer
   // sizing purposes.
   uptr getStats(char *Buffer, uptr Size) {
-    ScopedString Str(1024);
+    ScopedString Str;
     disable();
     const uptr Length = getStats(&Str) + 1;
     enable();
@@ -712,7 +718,7 @@
   }
 
   void printStats() {
-    ScopedString Str(1024);
+    ScopedString Str;
     disable();
     getStats(&Str);
     enable();
@@ -731,6 +737,8 @@
   void iterateOverChunks(uptr Base, uptr Size, iterate_callback Callback,
                          void *Arg) {
     initThreadMaybe();
+    if (archSupportsMemoryTagging())
+      Base = untagPointer(Base);
     const uptr From = Base;
     const uptr To = Base + Size;
     bool MayHaveTaggedPrimary = allocatorSupportsMemoryTagging<Params>() &&
@@ -912,7 +920,7 @@
     if (!Depot->find(Hash, &RingPos, &Size))
       return;
     for (unsigned I = 0; I != Size && I != MaxTraceSize; ++I)
-      Trace[I] = (*Depot)[RingPos + I];
+      Trace[I] = static_cast<uintptr_t>((*Depot)[RingPos + I]);
   }
 
   static void getErrorInfo(struct scudo_error_info *ErrorInfo,
@@ -1156,6 +1164,7 @@
   // address tags against chunks. To allow matching in this case we store the
   // address tag in the first byte of the chunk.
   void storeEndMarker(uptr End, uptr Size, uptr BlockEnd) {
+    DCHECK_EQ(BlockEnd, untagPointer(BlockEnd));
     uptr UntaggedEnd = untagPointer(End);
     if (UntaggedEnd != BlockEnd) {
       storeTag(UntaggedEnd);
@@ -1262,8 +1271,8 @@
   }
 
   static const size_t NumErrorReports =
-      sizeof(((scudo_error_info *)0)->reports) /
-      sizeof(((scudo_error_info *)0)->reports[0]);
+      sizeof(((scudo_error_info *)nullptr)->reports) /
+      sizeof(((scudo_error_info *)nullptr)->reports[0]);
 
   static void getInlineErrorInfo(struct scudo_error_info *ErrorInfo,
                                  size_t &NextErrorReport, uintptr_t FaultAddr,
diff --git a/standalone/common.h b/standalone/common.h
index 3f27a3d..bc3dfec 100644
--- a/standalone/common.h
+++ b/standalone/common.h
@@ -13,6 +13,7 @@
 
 #include "fuchsia.h"
 #include "linux.h"
+#include "trusty.h"
 
 #include <stddef.h>
 #include <string.h>
diff --git a/standalone/crc32_hw.cpp b/standalone/crc32_hw.cpp
index 62841ba..d13c615 100644
--- a/standalone/crc32_hw.cpp
+++ b/standalone/crc32_hw.cpp
@@ -10,10 +10,10 @@
 
 namespace scudo {
 
-#if defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32)
+#if defined(__CRC32__) || defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32)
 u32 computeHardwareCRC32(u32 Crc, uptr Data) {
   return static_cast<u32>(CRC32_INTRINSIC(Crc, Data));
 }
-#endif // defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32)
+#endif // defined(__CRC32__) || defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32)
 
 } // namespace scudo
diff --git a/standalone/flags.inc b/standalone/flags.inc
index b5cab47..690d889 100644
--- a/standalone/flags.inc
+++ b/standalone/flags.inc
@@ -37,12 +37,6 @@
 SCUDO_FLAG(bool, pattern_fill_contents, false,
            "Pattern fill chunk contents on allocation.")
 
-SCUDO_FLAG(int, rss_limit_mb, -1,
-           "Enforce an upper limit (in megabytes) to the process RSS. The "
-           "allocator will terminate or return NULL when allocations are "
-           "attempted past that limit (depending on may_return_null). Negative "
-           "values disable the feature.")
-
 SCUDO_FLAG(bool, may_return_null, true,
            "Indicate whether the allocator should terminate instead of "
            "returning NULL in otherwise non-fatal error scenarios, eg: OOM, "
diff --git a/standalone/internal_defs.h b/standalone/internal_defs.h
index bbf7631..621fc9c 100644
--- a/standalone/internal_defs.h
+++ b/standalone/internal_defs.h
@@ -78,16 +78,16 @@
 
 namespace scudo {
 
-typedef unsigned long uptr;
-typedef unsigned char u8;
-typedef unsigned short u16;
-typedef unsigned int u32;
-typedef unsigned long long u64;
-typedef signed long sptr;
-typedef signed char s8;
-typedef signed short s16;
-typedef signed int s32;
-typedef signed long long s64;
+typedef uintptr_t uptr;
+typedef uint8_t u8;
+typedef uint16_t u16;
+typedef uint32_t u32;
+typedef uint64_t u64;
+typedef intptr_t sptr;
+typedef int8_t s8;
+typedef int16_t s16;
+typedef int32_t s32;
+typedef int64_t s64;
 
 // The following two functions have platform specific implementations.
 void outputRaw(const char *Buffer);
@@ -105,14 +105,11 @@
 
 void NORETURN reportCheckFailed(const char *File, int Line,
                                 const char *Condition, u64 Value1, u64 Value2);
-
 #define CHECK_IMPL(C1, Op, C2)                                                 \
   do {                                                                         \
-    scudo::u64 V1 = (scudo::u64)(C1);                                          \
-    scudo::u64 V2 = (scudo::u64)(C2);                                          \
-    if (UNLIKELY(!(V1 Op V2))) {                                               \
-      scudo::reportCheckFailed(__FILE__, __LINE__,                             \
-                               "(" #C1 ") " #Op " (" #C2 ")", V1, V2);         \
+    if (UNLIKELY(!(C1 Op C2))) {                                               \
+      scudo::reportCheckFailed(__FILE__, __LINE__, #C1 " " #Op " " #C2,        \
+                               (scudo::u64)C1, (scudo::u64)C2);                \
       scudo::die();                                                            \
     }                                                                          \
   } while (false)
diff --git a/standalone/linux.cpp b/standalone/linux.cpp
index 301bdcd..c77c1bb 100644
--- a/standalone/linux.cpp
+++ b/standalone/linux.cpp
@@ -10,7 +10,6 @@
 
 #if SCUDO_LINUX
 
-#include "atomic_helpers.h"
 #include "common.h"
 #include "linux.h"
 #include "mutex.h"
@@ -59,11 +58,8 @@
   if (Flags & MAP_MEMTAG)
     MmapProt |= PROT_MTE;
 #endif
-  if (Addr) {
-    // Currently no scenario for a noaccess mapping with a fixed address.
-    DCHECK_EQ(Flags & MAP_NOACCESS, 0);
+  if (Addr)
     MmapFlags |= MAP_FIXED;
-  }
   void *P = mmap(Addr, Size, MmapProt, MmapFlags, -1, 0);
   if (P == MAP_FAILED) {
     if (!(Flags & MAP_ALLOWNOMEM) || errno != ENOMEM)
@@ -90,41 +86,10 @@
     dieOnMapUnmapError();
 }
 
-static bool madviseNeedsMemset() {
-  const uptr Size = getPageSizeCached();
-  char *P = reinterpret_cast<char *>(mmap(0, Size, PROT_READ | PROT_WRITE,
-                                          MAP_PRIVATE | MAP_ANONYMOUS, -1, 0));
-  if (!P)
-    dieOnMapUnmapError(errno == ENOMEM ? Size : 0);
-  *P = 1;
-  while (madvise(P, Size, MADV_DONTNEED) == -1 && errno == EAGAIN) {
-  }
-  const bool R = (*P != 0);
-  if (munmap(P, Size) != 0)
-    dieOnMapUnmapError();
-  return R;
-}
-
-static bool madviseNeedsMemsetCached() {
-  static atomic_u8 Cache;
-  enum State : u8 { Unknown = 0, Yes = 1, No = 2 };
-  State NeedsMemset = static_cast<State>(atomic_load_relaxed(&Cache));
-  if (NeedsMemset == Unknown) {
-    NeedsMemset = madviseNeedsMemset() ? Yes : No;
-    atomic_store_relaxed(&Cache, NeedsMemset);
-  }
-  return NeedsMemset == Yes;
-}
-
 void releasePagesToOS(uptr BaseAddress, uptr Offset, uptr Size,
                       UNUSED MapPlatformData *Data) {
   void *Addr = reinterpret_cast<void *>(BaseAddress + Offset);
-  if (madviseNeedsMemsetCached()) {
-    // Workaround for QEMU-user ignoring MADV_DONTNEED.
-    // https://github.com/qemu/qemu/blob/b1cffefa1b163bce9aebc3416f562c1d3886eeaa/linux-user/syscall.c#L11941
-    // https://bugs.launchpad.net/qemu/+bug/1926521
-    memset(Addr, 0, Size);
-  }
+
   while (madvise(Addr, Size, MADV_DONTNEED) == -1 && errno == EAGAIN) {
   }
 }
diff --git a/standalone/local_cache.h b/standalone/local_cache.h
index 5003937..f46645f 100644
--- a/standalone/local_cache.h
+++ b/standalone/local_cache.h
@@ -49,18 +49,14 @@
     CompactPtrT Batch[MaxNumCached];
   };
 
-  void initLinkerInitialized(GlobalStats *S, SizeClassAllocator *A) {
-    Stats.initLinkerInitialized();
+  void init(GlobalStats *S, SizeClassAllocator *A) {
+    DCHECK(isEmpty());
+    Stats.init();
     if (LIKELY(S))
       S->link(&Stats);
     Allocator = A;
   }
 
-  void init(GlobalStats *S, SizeClassAllocator *A) {
-    memset(this, 0, sizeof(*this));
-    initLinkerInitialized(S, A);
-  }
-
   void destroy(GlobalStats *S) {
     drain();
     if (LIKELY(S))
diff --git a/standalone/memtag.h b/standalone/memtag.h
index 4bdce16..7578aff 100644
--- a/standalone/memtag.h
+++ b/standalone/memtag.h
@@ -18,12 +18,14 @@
 
 namespace scudo {
 
-#if defined(__aarch64__) || defined(SCUDO_FUZZ)
+#if (__clang_major__ >= 12 && defined(__aarch64__)) || defined(SCUDO_FUZZ)
 
 // We assume that Top-Byte Ignore is enabled if the architecture supports memory
 // tagging. Not all operating systems enable TBI, so we only claim architectural
 // support for memory tagging if the operating system enables TBI.
-#if SCUDO_LINUX && !defined(SCUDO_DISABLE_TBI)
+// HWASan uses the top byte for its own purpose and Scudo should not touch it.
+#if SCUDO_LINUX && !defined(SCUDO_DISABLE_TBI) &&                              \
+    !__has_feature(hwaddress_sanitizer)
 inline constexpr bool archSupportsMemoryTagging() { return true; }
 #else
 inline constexpr bool archSupportsMemoryTagging() { return false; }
@@ -39,23 +41,23 @@
 
 inline constexpr bool archSupportsMemoryTagging() { return false; }
 
-inline uptr archMemoryTagGranuleSize() {
+inline NORETURN uptr archMemoryTagGranuleSize() {
   UNREACHABLE("memory tagging not supported");
 }
 
-inline uptr untagPointer(uptr Ptr) {
+inline NORETURN uptr untagPointer(uptr Ptr) {
   (void)Ptr;
   UNREACHABLE("memory tagging not supported");
 }
 
-inline uint8_t extractTag(uptr Ptr) {
+inline NORETURN uint8_t extractTag(uptr Ptr) {
   (void)Ptr;
   UNREACHABLE("memory tagging not supported");
 }
 
 #endif
 
-#if defined(__aarch64__)
+#if __clang_major__ >= 12 && defined(__aarch64__)
 
 #if SCUDO_LINUX
 
@@ -67,49 +69,58 @@
 }
 
 inline bool systemDetectsMemoryTagFaultsTestOnly() {
+#ifndef PR_SET_TAGGED_ADDR_CTRL
+#define PR_SET_TAGGED_ADDR_CTRL 54
+#endif
 #ifndef PR_GET_TAGGED_ADDR_CTRL
 #define PR_GET_TAGGED_ADDR_CTRL 56
 #endif
+#ifndef PR_TAGGED_ADDR_ENABLE
+#define PR_TAGGED_ADDR_ENABLE (1UL << 0)
+#endif
 #ifndef PR_MTE_TCF_SHIFT
 #define PR_MTE_TCF_SHIFT 1
 #endif
+#ifndef PR_MTE_TAG_SHIFT
+#define PR_MTE_TAG_SHIFT 3
+#endif
 #ifndef PR_MTE_TCF_NONE
 #define PR_MTE_TCF_NONE (0UL << PR_MTE_TCF_SHIFT)
 #endif
+#ifndef PR_MTE_TCF_SYNC
+#define PR_MTE_TCF_SYNC (1UL << PR_MTE_TCF_SHIFT)
+#endif
 #ifndef PR_MTE_TCF_MASK
 #define PR_MTE_TCF_MASK (3UL << PR_MTE_TCF_SHIFT)
 #endif
-  return (static_cast<unsigned long>(
-              prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0)) &
-          PR_MTE_TCF_MASK) != PR_MTE_TCF_NONE;
+  int res = prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0);
+  if (res == -1)
+    return false;
+  return (static_cast<unsigned long>(res) & PR_MTE_TCF_MASK) != PR_MTE_TCF_NONE;
+}
+
+inline void enableSystemMemoryTaggingTestOnly() {
+  prctl(PR_SET_TAGGED_ADDR_CTRL,
+        PR_TAGGED_ADDR_ENABLE | PR_MTE_TCF_SYNC | (0xfffe << PR_MTE_TAG_SHIFT),
+        0, 0, 0);
 }
 
 #else // !SCUDO_LINUX
 
 inline bool systemSupportsMemoryTagging() { return false; }
 
-inline bool systemDetectsMemoryTagFaultsTestOnly() { return false; }
+inline NORETURN bool systemDetectsMemoryTagFaultsTestOnly() {
+  UNREACHABLE("memory tagging not supported");
+}
+
+inline NORETURN void enableSystemMemoryTaggingTestOnly() {
+  UNREACHABLE("memory tagging not supported");
+}
 
 #endif // SCUDO_LINUX
 
-inline void disableMemoryTagChecksTestOnly() {
-  __asm__ __volatile__(
-      R"(
-      .arch_extension memtag
-      msr tco, #1
-      )");
-}
-
-inline void enableMemoryTagChecksTestOnly() {
-  __asm__ __volatile__(
-      R"(
-      .arch_extension memtag
-      msr tco, #0
-      )");
-}
-
 class ScopedDisableMemoryTagChecks {
-  size_t PrevTCO;
+  uptr PrevTCO;
 
 public:
   ScopedDisableMemoryTagChecks() {
@@ -134,6 +145,7 @@
 };
 
 inline uptr selectRandomTag(uptr Ptr, uptr ExcludeMask) {
+  ExcludeMask |= 1; // Always exclude Tag 0.
   uptr TaggedPtr;
   __asm__ __volatile__(
       R"(
@@ -145,10 +157,14 @@
   return TaggedPtr;
 }
 
-inline uptr addFixedTag(uptr Ptr, uptr Tag) { return Ptr | (Tag << 56); }
+inline uptr addFixedTag(uptr Ptr, uptr Tag) {
+  DCHECK_LT(Tag, 16);
+  DCHECK_EQ(untagPointer(Ptr), Ptr);
+  return Ptr | (Tag << 56);
+}
 
 inline uptr storeTags(uptr Begin, uptr End) {
-  DCHECK(Begin % 16 == 0);
+  DCHECK_EQ(0, Begin % 16);
   uptr LineSize, Next, Tmp;
   __asm__ __volatile__(
       R"(
@@ -208,10 +224,12 @@
         [Tmp] "=&r"(Tmp)
       : [End] "r"(End)
       : "memory");
+  DCHECK_EQ(0, Begin % 16);
   return Begin;
 }
 
 inline void storeTag(uptr Ptr) {
+  DCHECK_EQ(0, Ptr % 16);
   __asm__ __volatile__(R"(
     .arch_extension memtag
     stg %0, [%0]
@@ -222,6 +240,7 @@
 }
 
 inline uptr loadTag(uptr Ptr) {
+  DCHECK_EQ(0, Ptr % 16);
   uptr TaggedPtr = Ptr;
   __asm__ __volatile__(
       R"(
@@ -236,19 +255,15 @@
 
 #else
 
-inline bool systemSupportsMemoryTagging() {
+inline NORETURN bool systemSupportsMemoryTagging() {
   UNREACHABLE("memory tagging not supported");
 }
 
-inline bool systemDetectsMemoryTagFaultsTestOnly() {
+inline NORETURN bool systemDetectsMemoryTagFaultsTestOnly() {
   UNREACHABLE("memory tagging not supported");
 }
 
-inline void disableMemoryTagChecksTestOnly() {
-  UNREACHABLE("memory tagging not supported");
-}
-
-inline void enableMemoryTagChecksTestOnly() {
+inline NORETURN void enableSystemMemoryTaggingTestOnly() {
   UNREACHABLE("memory tagging not supported");
 }
 
@@ -256,41 +271,44 @@
   ScopedDisableMemoryTagChecks() {}
 };
 
-inline uptr selectRandomTag(uptr Ptr, uptr ExcludeMask) {
+inline NORETURN uptr selectRandomTag(uptr Ptr, uptr ExcludeMask) {
   (void)Ptr;
   (void)ExcludeMask;
   UNREACHABLE("memory tagging not supported");
 }
 
-inline uptr addFixedTag(uptr Ptr, uptr Tag) {
+inline NORETURN uptr addFixedTag(uptr Ptr, uptr Tag) {
   (void)Ptr;
   (void)Tag;
   UNREACHABLE("memory tagging not supported");
 }
 
-inline uptr storeTags(uptr Begin, uptr End) {
+inline NORETURN uptr storeTags(uptr Begin, uptr End) {
   (void)Begin;
   (void)End;
   UNREACHABLE("memory tagging not supported");
 }
 
-inline void storeTag(uptr Ptr) {
+inline NORETURN void storeTag(uptr Ptr) {
   (void)Ptr;
   UNREACHABLE("memory tagging not supported");
 }
 
-inline uptr loadTag(uptr Ptr) {
+inline NORETURN uptr loadTag(uptr Ptr) {
   (void)Ptr;
   UNREACHABLE("memory tagging not supported");
 }
 
 #endif
 
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wmissing-noreturn"
 inline void setRandomTag(void *Ptr, uptr Size, uptr ExcludeMask,
                          uptr *TaggedBegin, uptr *TaggedEnd) {
   *TaggedBegin = selectRandomTag(reinterpret_cast<uptr>(Ptr), ExcludeMask);
   *TaggedEnd = storeTags(*TaggedBegin, *TaggedBegin + Size);
 }
+#pragma GCC diagnostic pop
 
 inline void *untagPointer(void *Ptr) {
   return reinterpret_cast<void *>(untagPointer(reinterpret_cast<uptr>(Ptr)));
@@ -307,7 +325,8 @@
 
 template <typename Config>
 inline constexpr bool allocatorSupportsMemoryTagging() {
-  return archSupportsMemoryTagging() && Config::MaySupportMemoryTagging;
+  return archSupportsMemoryTagging() && Config::MaySupportMemoryTagging &&
+         (1 << SCUDO_MIN_ALIGNMENT_LOG) >= archMemoryTagGranuleSize();
 }
 
 } // namespace scudo
diff --git a/standalone/mutex.h b/standalone/mutex.h
index a654d35..c8504c0 100644
--- a/standalone/mutex.h
+++ b/standalone/mutex.h
@@ -22,7 +22,6 @@
 
 class HybridMutex {
 public:
-  void init() { M = {}; }
   bool tryLock();
   NOINLINE void lock() {
     if (LIKELY(tryLock()))
diff --git a/standalone/platform.h b/standalone/platform.h
index a4c2a0b..db4217d 100644
--- a/standalone/platform.h
+++ b/standalone/platform.h
@@ -12,7 +12,7 @@
 // Transitive includes of stdint.h specify some of the defines checked below.
 #include <stdint.h>
 
-#if defined(__linux__)
+#if defined(__linux__) && !defined(__TRUSTY__)
 #define SCUDO_LINUX 1
 #else
 #define SCUDO_LINUX 0
@@ -31,7 +31,13 @@
 #define SCUDO_FUCHSIA 0
 #endif
 
-#if __LP64__
+#if defined(__TRUSTY__)
+#define SCUDO_TRUSTY 1
+#else
+#define SCUDO_TRUSTY 0
+#endif
+
+#if defined(__LP64__)
 #define SCUDO_WORDSIZE 64U
 #else
 #define SCUDO_WORDSIZE 32U
diff --git a/standalone/primary32.h b/standalone/primary32.h
index 33d8175..326c10a 100644
--- a/standalone/primary32.h
+++ b/standalone/primary32.h
@@ -60,12 +60,15 @@
 
   static bool canAllocate(uptr Size) { return Size <= SizeClassMap::MaxSize; }
 
-  void initLinkerInitialized(s32 ReleaseToOsInterval) {
+  void init(s32 ReleaseToOsInterval) {
     if (SCUDO_FUCHSIA)
       reportError("SizeClassAllocator32 is not supported on Fuchsia");
 
-    PossibleRegions.initLinkerInitialized();
+    if (SCUDO_TRUSTY)
+      reportError("SizeClassAllocator32 is not supported on Trusty");
 
+    DCHECK(isAligned(reinterpret_cast<uptr>(this), alignof(ThisT)));
+    PossibleRegions.init();
     u32 Seed;
     const u64 Time = getMonotonicTime();
     if (!getRandom(reinterpret_cast<void *>(&Seed), sizeof(Seed)))
@@ -80,10 +83,6 @@
     }
     setOption(Option::ReleaseInterval, static_cast<sptr>(ReleaseToOsInterval));
   }
-  void init(s32 ReleaseToOsInterval) {
-    memset(this, 0, sizeof(*this));
-    initLinkerInitialized(ReleaseToOsInterval);
-  }
 
   void unmapTestOnly() {
     while (NumberOfStashedRegions > 0)
@@ -96,6 +95,7 @@
         MinRegionIndex = Sci->MinRegionIndex;
       if (Sci->MaxRegionIndex > MaxRegionIndex)
         MaxRegionIndex = Sci->MaxRegionIndex;
+      *Sci = {};
     }
     for (uptr I = MinRegionIndex; I < MaxRegionIndex; I++)
       if (PossibleRegions[I])
diff --git a/standalone/primary64.h b/standalone/primary64.h
index 94375fc..14784ee 100644
--- a/standalone/primary64.h
+++ b/standalone/primary64.h
@@ -25,8 +25,9 @@
 //
 // It starts by reserving NumClasses * 2^RegionSizeLog bytes, equally divided in
 // Regions, specific to each size class. Note that the base of that mapping is
-// random (based to the platform specific map() capabilities), and that each
-// Region actually starts at a random offset from its base.
+// random (based to the platform specific map() capabilities). If
+// PrimaryEnableRandomOffset is set, each Region actually starts at a random
+// offset from its base.
 //
 // Regions are mapped incrementally on demand to fulfill allocation requests,
 // those mappings being split into equally sized Blocks based on the size class
@@ -57,7 +58,9 @@
 
   static bool canAllocate(uptr Size) { return Size <= SizeClassMap::MaxSize; }
 
-  void initLinkerInitialized(s32 ReleaseToOsInterval) {
+  void init(s32 ReleaseToOsInterval) {
+    DCHECK(isAligned(reinterpret_cast<uptr>(this), alignof(ThisT)));
+    DCHECK_EQ(PrimaryBase, 0U);
     // Reserve the space required for the Primary.
     PrimaryBase = reinterpret_cast<uptr>(
         map(nullptr, PrimarySize, nullptr, MAP_NOACCESS, &Data));
@@ -69,21 +72,27 @@
     const uptr PageSize = getPageSizeCached();
     for (uptr I = 0; I < NumClasses; I++) {
       RegionInfo *Region = getRegionInfo(I);
-      // The actual start of a region is offseted by a random number of pages.
-      Region->RegionBeg =
-          getRegionBaseByClassId(I) + (getRandomModN(&Seed, 16) + 1) * PageSize;
+      // The actual start of a region is offset by a random number of pages
+      // when PrimaryEnableRandomOffset is set.
+      Region->RegionBeg = getRegionBaseByClassId(I) +
+                          (Config::PrimaryEnableRandomOffset
+                               ? ((getRandomModN(&Seed, 16) + 1) * PageSize)
+                               : 0);
       Region->RandState = getRandomU32(&Seed);
       Region->ReleaseInfo.LastReleaseAtNs = Time;
     }
     setOption(Option::ReleaseInterval, static_cast<sptr>(ReleaseToOsInterval));
   }
-  void init(s32 ReleaseToOsInterval) {
-    memset(this, 0, sizeof(*this));
-    initLinkerInitialized(ReleaseToOsInterval);
-  }
 
   void unmapTestOnly() {
-    unmap(reinterpret_cast<void *>(PrimaryBase), PrimarySize, UNMAP_ALL, &Data);
+    for (uptr I = 0; I < NumClasses; I++) {
+      RegionInfo *Region = getRegionInfo(I);
+      *Region = {};
+    }
+    if (PrimaryBase)
+      unmap(reinterpret_cast<void *>(PrimaryBase), PrimarySize, UNMAP_ALL,
+            &Data);
+    PrimaryBase = 0U;
   }
 
   TransferBatch *popBatch(CacheT *C, uptr ClassId) {
@@ -157,9 +166,9 @@
       PoppedBlocks += Region->Stats.PoppedBlocks;
       PushedBlocks += Region->Stats.PushedBlocks;
     }
-    Str->append("Stats: SizeClassAllocator64: %zuM mapped (%zuM rss) in %zu "
+    Str->append("Stats: SizeClassAllocator64: %zuM mapped (%uM rss) in %zu "
                 "allocations; remains %zu\n",
-                TotalMapped >> 20, 0, PoppedBlocks,
+                TotalMapped >> 20, 0U, PoppedBlocks,
                 PoppedBlocks - PushedBlocks);
 
     for (uptr I = 0; I < NumClasses; I++)
@@ -265,8 +274,7 @@
   static const uptr NumClasses = SizeClassMap::NumClasses;
   static const uptr PrimarySize = RegionSize * NumClasses;
 
-  // Call map for user memory with at least this size.
-  static const uptr MapSizeIncrement = 1UL << 18;
+  static const uptr MapSizeIncrement = Config::PrimaryMapSizeIncrement;
   // Fill at most this number of batches from the newly map'd memory.
   static const u32 MaxNumBatches = SCUDO_ANDROID ? 4U : 8U;
 
@@ -339,7 +347,7 @@
       if (UNLIKELY(RegionBase + MappedUser + MapSize > RegionSize)) {
         if (!Region->Exhausted) {
           Region->Exhausted = true;
-          ScopedString Str(1024);
+          ScopedString Str;
           getStats(&Str);
           Str.append(
               "Scudo OOM: The process has exhausted %zuM for size class %zu.\n",
diff --git a/standalone/quarantine.h b/standalone/quarantine.h
index 8d4b38e..2d231c3 100644
--- a/standalone/quarantine.h
+++ b/standalone/quarantine.h
@@ -64,11 +64,7 @@
 // Per-thread cache of memory blocks.
 template <typename Callback> class QuarantineCache {
 public:
-  void initLinkerInitialized() {}
-  void init() {
-    memset(this, 0, sizeof(*this));
-    initLinkerInitialized();
-  }
+  void init() { DCHECK_EQ(atomic_load_relaxed(&Size), 0U); }
 
   // Total memory used, including internal accounting.
   uptr getSize() const { return atomic_load_relaxed(&Size); }
@@ -174,8 +170,13 @@
 template <typename Callback, typename Node> class GlobalQuarantine {
 public:
   typedef QuarantineCache<Callback> CacheT;
+  using ThisT = GlobalQuarantine<Callback, Node>;
 
-  void initLinkerInitialized(uptr Size, uptr CacheSize) {
+  void init(uptr Size, uptr CacheSize) {
+    DCHECK(isAligned(reinterpret_cast<uptr>(this), alignof(ThisT)));
+    DCHECK_EQ(atomic_load_relaxed(&MaxSize), 0U);
+    DCHECK_EQ(atomic_load_relaxed(&MinSize), 0U);
+    DCHECK_EQ(atomic_load_relaxed(&MaxCacheSize), 0U);
     // Thread local quarantine size can be zero only when global quarantine size
     // is zero (it allows us to perform just one atomic read per put() call).
     CHECK((Size == 0 && CacheSize == 0) || CacheSize != 0);
@@ -184,16 +185,7 @@
     atomic_store_relaxed(&MinSize, Size / 10 * 9); // 90% of max size.
     atomic_store_relaxed(&MaxCacheSize, CacheSize);
 
-    Cache.initLinkerInitialized();
-  }
-  void init(uptr Size, uptr CacheSize) {
-    CacheMutex.init();
     Cache.init();
-    RecycleMutex.init();
-    MinSize = {};
-    MaxSize = {};
-    MaxCacheSize = {};
-    initLinkerInitialized(Size, CacheSize);
   }
 
   uptr getMaxSize() const { return atomic_load_relaxed(&MaxSize); }
diff --git a/standalone/report.cpp b/standalone/report.cpp
index 80cc6ed..561c7c5 100644
--- a/standalone/report.cpp
+++ b/standalone/report.cpp
@@ -17,7 +17,7 @@
 
 class ScopedErrorReport {
 public:
-  ScopedErrorReport() : Message(512) { Message.append("Scudo ERROR: "); }
+  ScopedErrorReport() : Message() { Message.append("Scudo ERROR: "); }
   void append(const char *Format, ...) {
     va_list Args;
     va_start(Args, Format);
@@ -45,8 +45,8 @@
     trap();
   }
   ScopedErrorReport Report;
-  Report.append("CHECK failed @ %s:%d %s (%llu, %llu)\n", File, Line, Condition,
-                Value1, Value2);
+  Report.append("CHECK failed @ %s:%d %s ((u64)op1=%llu, (u64)op2=%llu)\n",
+                File, Line, Condition, Value1, Value2);
 }
 
 // Generic string fatal error message.
diff --git a/standalone/secondary.h b/standalone/secondary.h
index ea5d680..2d177576 100644
--- a/standalone/secondary.h
+++ b/standalone/secondary.h
@@ -28,7 +28,10 @@
 
 namespace LargeBlock {
 
-struct Header {
+struct alignas(Max<uptr>(archSupportsMemoryTagging()
+                             ? archMemoryTagGranuleSize()
+                             : 1,
+                         1U << SCUDO_MIN_ALIGNMENT_LOG)) Header {
   LargeBlock::Header *Prev;
   LargeBlock::Header *Next;
   uptr CommitBase;
@@ -38,9 +41,12 @@
   [[no_unique_address]] MapPlatformData Data;
 };
 
-constexpr uptr getHeaderSize() {
-  return roundUpTo(sizeof(Header), 1U << SCUDO_MIN_ALIGNMENT_LOG);
-}
+static_assert(sizeof(Header) % (1U << SCUDO_MIN_ALIGNMENT_LOG) == 0, "");
+static_assert(!archSupportsMemoryTagging() ||
+                  sizeof(Header) % archMemoryTagGranuleSize() == 0,
+              "");
+
+constexpr uptr getHeaderSize() { return sizeof(Header); }
 
 template <typename Config> static uptr addHeaderTag(uptr Ptr) {
   if (allocatorSupportsMemoryTagging<Config>())
@@ -49,8 +55,7 @@
 }
 
 template <typename Config> static Header *getHeader(uptr Ptr) {
-  return reinterpret_cast<Header *>(addHeaderTag<Config>(Ptr) -
-                                    getHeaderSize());
+  return reinterpret_cast<Header *>(addHeaderTag<Config>(Ptr)) - 1;
 }
 
 template <typename Config> static Header *getHeader(const void *Ptr) {
@@ -66,7 +71,6 @@
 
 class MapAllocatorNoCache {
 public:
-  void initLinkerInitialized(UNUSED s32 ReleaseToOsInterval) {}
   void init(UNUSED s32 ReleaseToOsInterval) {}
   bool retrieve(UNUSED Options Options, UNUSED uptr Size, UNUSED uptr Alignment,
                 UNUSED LargeBlock::Header **H, UNUSED bool *Zeroed) {
@@ -78,6 +82,7 @@
   void enable() {}
   void releaseToOS() {}
   void disableMemoryTagging() {}
+  void unmapTestOnly() {}
   bool setOption(Option O, UNUSED sptr Value) {
     if (O == Option::ReleaseInterval || O == Option::MaxCacheEntriesCount ||
         O == Option::MaxCacheEntrySize)
@@ -108,6 +113,19 @@
   }
 }
 
+// Template specialization to avoid producing zero-length array
+template <typename T, size_t Size> class NonZeroLengthArray {
+public:
+  T &operator[](uptr Idx) { return values[Idx]; }
+
+private:
+  T values[Size];
+};
+template <typename T> class NonZeroLengthArray<T, 0> {
+public:
+  T &operator[](uptr UNUSED Idx) { UNREACHABLE("Unsupported!"); }
+};
+
 template <typename Config> class MapAllocatorCache {
 public:
   // Ensure the default maximum specified fits the array.
@@ -115,17 +133,14 @@
                     Config::SecondaryCacheEntriesArraySize,
                 "");
 
-  void initLinkerInitialized(s32 ReleaseToOsInterval) {
+  void init(s32 ReleaseToOsInterval) {
+    DCHECK_EQ(EntriesCount, 0U);
     setOption(Option::MaxCacheEntriesCount,
               static_cast<sptr>(Config::SecondaryCacheDefaultMaxEntriesCount));
     setOption(Option::MaxCacheEntrySize,
               static_cast<sptr>(Config::SecondaryCacheDefaultMaxEntrySize));
     setOption(Option::ReleaseInterval, static_cast<sptr>(ReleaseToOsInterval));
   }
-  void init(s32 ReleaseToOsInterval) {
-    memset(this, 0, sizeof(*this));
-    initLinkerInitialized(ReleaseToOsInterval);
-  }
 
   void store(Options Options, LargeBlock::Header *H) {
     if (!canCache(H->CommitSize))
@@ -217,7 +232,7 @@
     const u32 MaxCount = atomic_load_relaxed(&MaxEntriesCount);
     bool Found = false;
     CachedBlock Entry;
-    uptr HeaderPos;
+    uptr HeaderPos = 0;
     {
       ScopedLock L(Mutex);
       if (EntriesCount == 0)
@@ -321,6 +336,8 @@
 
   void enable() { Mutex.unlock(); }
 
+  void unmapTestOnly() { empty(); }
+
 private:
   void empty() {
     struct {
@@ -391,21 +408,20 @@
   atomic_s32 ReleaseToOsIntervalMs = {};
 
   CachedBlock Entries[Config::SecondaryCacheEntriesArraySize] = {};
-  CachedBlock Quarantine[Config::SecondaryCacheQuarantineSize] = {};
+  NonZeroLengthArray<CachedBlock, Config::SecondaryCacheQuarantineSize>
+      Quarantine = {};
 };
 
 template <typename Config> class MapAllocator {
 public:
-  void initLinkerInitialized(GlobalStats *S, s32 ReleaseToOsInterval = -1) {
-    Cache.initLinkerInitialized(ReleaseToOsInterval);
-    Stats.initLinkerInitialized();
+  void init(GlobalStats *S, s32 ReleaseToOsInterval = -1) {
+    DCHECK_EQ(AllocatedBytes, 0U);
+    DCHECK_EQ(FreedBytes, 0U);
+    Cache.init(ReleaseToOsInterval);
+    Stats.init();
     if (LIKELY(S))
       S->link(&Stats);
   }
-  void init(GlobalStats *S, s32 ReleaseToOsInterval = -1) {
-    memset(this, 0, sizeof(*this));
-    initLinkerInitialized(S, ReleaseToOsInterval);
-  }
 
   void *allocate(Options Options, uptr Size, uptr AlignmentHint = 0,
                  uptr *BlockEnd = nullptr,
@@ -443,7 +459,7 @@
     }
   }
 
-  uptr canCache(uptr Size) { return Cache.canCache(Size); }
+  bool canCache(uptr Size) { return Cache.canCache(Size); }
 
   bool setOption(Option O, sptr Value) { return Cache.setOption(O, Value); }
 
@@ -451,6 +467,8 @@
 
   void disableMemoryTagging() { Cache.disableMemoryTagging(); }
 
+  void unmapTestOnly() { Cache.unmapTestOnly(); }
+
 private:
   typename Config::SecondaryCache Cache;
 
@@ -481,7 +499,7 @@
                                      FillContentsMode FillContents) {
   if (Options.get(OptionBit::AddLargeAllocationSlack))
     Size += 1UL << SCUDO_MIN_ALIGNMENT_LOG;
-  Alignment = Max(Alignment, 1UL << SCUDO_MIN_ALIGNMENT_LOG);
+  Alignment = Max(Alignment, uptr(1U) << SCUDO_MIN_ALIGNMENT_LOG);
   const uptr PageSize = getPageSizeCached();
   uptr RoundedSize =
       roundUpTo(roundUpTo(Size, Alignment) + LargeBlock::getHeaderSize() +
@@ -598,12 +616,11 @@
 
 template <typename Config>
 void MapAllocator<Config>::getStats(ScopedString *Str) const {
-  Str->append(
-      "Stats: MapAllocator: allocated %zu times (%zuK), freed %zu times "
-      "(%zuK), remains %zu (%zuK) max %zuM\n",
-      NumberOfAllocs, AllocatedBytes >> 10, NumberOfFrees, FreedBytes >> 10,
-      NumberOfAllocs - NumberOfFrees, (AllocatedBytes - FreedBytes) >> 10,
-      LargestSize >> 20);
+  Str->append("Stats: MapAllocator: allocated %u times (%zuK), freed %u times "
+              "(%zuK), remains %u (%zuK) max %zuM\n",
+              NumberOfAllocs, AllocatedBytes >> 10, NumberOfFrees,
+              FreedBytes >> 10, NumberOfAllocs - NumberOfFrees,
+              (AllocatedBytes - FreedBytes) >> 10, LargestSize >> 20);
 }
 
 } // namespace scudo
diff --git a/standalone/size_class_map.h b/standalone/size_class_map.h
index 1948802..6b06095 100644
--- a/standalone/size_class_map.h
+++ b/standalone/size_class_map.h
@@ -64,12 +64,10 @@
   static const u8 S = Config::NumBits - 1;
   static const uptr M = (1UL << S) - 1;
 
-  static const uptr SizeDelta = Chunk::getHeaderSize();
-
 public:
   static const u32 MaxNumCachedHint = Config::MaxNumCachedHint;
 
-  static const uptr MaxSize = (1UL << Config::MaxSizeLog) + SizeDelta;
+  static const uptr MaxSize = (1UL << Config::MaxSizeLog) + Config::SizeDelta;
   static const uptr NumClasses =
       MidClass + ((Config::MaxSizeLog - Config::MidSizeLog) << S) + 1;
   static_assert(NumClasses <= 256, "");
@@ -79,24 +77,22 @@
   static uptr getSizeByClassId(uptr ClassId) {
     DCHECK_NE(ClassId, BatchClassId);
     if (ClassId <= MidClass)
-      return (ClassId << Config::MinSizeLog) + SizeDelta;
+      return (ClassId << Config::MinSizeLog) + Config::SizeDelta;
     ClassId -= MidClass;
     const uptr T = MidSize << (ClassId >> S);
-    return T + (T >> S) * (ClassId & M) + SizeDelta;
+    return T + (T >> S) * (ClassId & M) + Config::SizeDelta;
   }
 
   static u8 getSizeLSBByClassId(uptr ClassId) {
     return u8(getLeastSignificantSetBitIndex(getSizeByClassId(ClassId)));
   }
 
-  static constexpr bool usesCompressedLSBFormat() {
-    return false;
-  }
+  static constexpr bool usesCompressedLSBFormat() { return false; }
 
   static uptr getClassIdBySize(uptr Size) {
-    if (Size <= SizeDelta + (1 << Config::MinSizeLog))
+    if (Size <= Config::SizeDelta + (1 << Config::MinSizeLog))
       return 1;
-    Size -= SizeDelta;
+    Size -= Config::SizeDelta;
     DCHECK_LE(Size, MaxSize);
     if (Size <= MidSize)
       return (Size + MinSize - 1) >> Config::MinSizeLog;
@@ -227,12 +223,25 @@
   static const uptr MinSizeLog = 5;
   static const uptr MidSizeLog = 8;
   static const uptr MaxSizeLog = 17;
-  static const u32 MaxNumCachedHint = 10;
+  static const u32 MaxNumCachedHint = 14;
   static const uptr MaxBytesCachedLog = 10;
+  static const uptr SizeDelta = 0;
 };
 
 typedef FixedSizeClassMap<DefaultSizeClassConfig> DefaultSizeClassMap;
 
+struct FuchsiaSizeClassConfig {
+  static const uptr NumBits = 3;
+  static const uptr MinSizeLog = 5;
+  static const uptr MidSizeLog = 8;
+  static const uptr MaxSizeLog = 17;
+  static const u32 MaxNumCachedHint = 10;
+  static const uptr MaxBytesCachedLog = 10;
+  static const uptr SizeDelta = Chunk::getHeaderSize();
+};
+
+typedef FixedSizeClassMap<FuchsiaSizeClassConfig> FuchsiaSizeClassMap;
+
 struct AndroidSizeClassConfig {
 #if SCUDO_WORDSIZE == 64U
   static const uptr NumBits = 7;
@@ -285,6 +294,7 @@
   static const uptr MaxSizeLog = 14;
   static const u32 MaxNumCachedHint = 13;
   static const uptr MaxBytesCachedLog = 10;
+  static const uptr SizeDelta = Chunk::getHeaderSize();
 #else
   static const uptr NumBits = 4;
   static const uptr MinSizeLog = 3;
@@ -292,13 +302,28 @@
   static const uptr MaxSizeLog = 14;
   static const u32 MaxNumCachedHint = 14;
   static const uptr MaxBytesCachedLog = 10;
+  static const uptr SizeDelta = Chunk::getHeaderSize();
 #endif
 };
 
 typedef FixedSizeClassMap<SvelteSizeClassConfig> SvelteSizeClassMap;
 
+// Trusty is configured to only have one region containing blocks of size
+// 2^7 bytes.
+struct TrustySizeClassConfig {
+  static const uptr NumBits = 1;
+  static const uptr MinSizeLog = 7;
+  static const uptr MidSizeLog = 7;
+  static const uptr MaxSizeLog = 7;
+  static const u32 MaxNumCachedHint = 8;
+  static const uptr MaxBytesCachedLog = 10;
+  static const uptr SizeDelta = 0;
+};
+
+typedef FixedSizeClassMap<TrustySizeClassConfig> TrustySizeClassMap;
+
 template <typename SCMap> inline void printMap() {
-  ScopedString Buffer(1024);
+  ScopedString Buffer;
   uptr PrevS = 0;
   uptr TotalCached = 0;
   for (uptr I = 0; I < SCMap::NumClasses; I++) {
@@ -310,8 +335,8 @@
     const uptr L = S ? getMostSignificantSetBitIndex(S) : 0;
     const uptr Cached = SCMap::getMaxCachedHint(S) * S;
     Buffer.append(
-        "C%02zu => S: %zu diff: +%zu %02zu%% L %zu Cached: %zu %zu; id %zu\n",
-        I, S, D, P, L, SCMap::getMaxCachedHint(S), Cached,
+        "C%02zu => S: %zu diff: +%zu %02zu%% L %zu Cached: %u %zu; id %zu\n", I,
+        S, D, P, L, SCMap::getMaxCachedHint(S), Cached,
         SCMap::getClassIdBySize(S));
     TotalCached += Cached;
     PrevS = S;
@@ -320,7 +345,7 @@
   Buffer.output();
 }
 
-template <typename SCMap> static void validateMap() {
+template <typename SCMap> static UNUSED void validateMap() {
   for (uptr C = 0; C < SCMap::NumClasses; C++) {
     if (C == SCMap::BatchClassId)
       continue;
diff --git a/standalone/stats.h b/standalone/stats.h
index e15c056..be5bf2d 100644
--- a/standalone/stats.h
+++ b/standalone/stats.h
@@ -29,8 +29,10 @@
 // LocalStats::add'ing, this is OK, we will still get a meaningful number.
 class LocalStats {
 public:
-  void initLinkerInitialized() {}
-  void init() { memset(this, 0, sizeof(*this)); }
+  void init() {
+    for (uptr I = 0; I < StatCount; I++)
+      DCHECK_EQ(get(static_cast<StatType>(I)), 0U);
+  }
 
   void add(StatType I, uptr V) {
     V += atomic_load_relaxed(&StatsArray[I]);
@@ -56,13 +58,7 @@
 // Global stats, used for aggregation and querying.
 class GlobalStats : public LocalStats {
 public:
-  void initLinkerInitialized() {}
-  void init() {
-    LocalStats::init();
-    Mutex.init();
-    StatsList = {};
-    initLinkerInitialized();
-  }
+  void init() { LocalStats::init(); }
 
   void link(LocalStats *S) {
     ScopedLock L(Mutex);
diff --git a/standalone/string_utils.cpp b/standalone/string_utils.cpp
index 25bddbc..13fdb9c 100644
--- a/standalone/string_utils.cpp
+++ b/standalone/string_utils.cpp
@@ -219,7 +219,6 @@
 }
 
 void ScopedString::append(const char *Format, va_list Args) {
-  DCHECK_LT(Length, String.size());
   va_list ArgsCopy;
   va_copy(ArgsCopy, Args);
   // formatString doesn't currently support a null buffer or zero buffer length,
@@ -228,14 +227,15 @@
   char C[1];
   const uptr AdditionalLength =
       static_cast<uptr>(formatString(C, sizeof(C), Format, Args)) + 1;
+  const uptr Length = length();
   String.resize(Length + AdditionalLength);
-  formatString(String.data() + Length, AdditionalLength, Format, ArgsCopy);
+  const uptr FormattedLength = static_cast<uptr>(formatString(
+      String.data() + Length, String.size() - Length, Format, ArgsCopy));
+  RAW_CHECK(data()[length()] == '\0');
+  RAW_CHECK(FormattedLength + 1 == AdditionalLength);
   va_end(ArgsCopy);
-  Length = strlen(String.data());
-  CHECK_LT(Length, String.size());
 }
 
-FORMAT(2, 3)
 void ScopedString::append(const char *Format, ...) {
   va_list Args;
   va_start(Args, Format);
@@ -243,11 +243,10 @@
   va_end(Args);
 }
 
-FORMAT(1, 2)
 void Printf(const char *Format, ...) {
   va_list Args;
   va_start(Args, Format);
-  ScopedString Msg(1024);
+  ScopedString Msg;
   Msg.append(Format, Args);
   outputRaw(Msg.data());
   va_end(Args);
diff --git a/standalone/string_utils.h b/standalone/string_utils.h
index 4880fa1..dd6ff78 100644
--- a/standalone/string_utils.h
+++ b/standalone/string_utils.h
@@ -18,26 +18,24 @@
 
 class ScopedString {
 public:
-  explicit ScopedString(uptr MaxLength) : String(MaxLength), Length(0) {
-    String[0] = '\0';
-  }
-  uptr length() { return Length; }
+  explicit ScopedString() { String.push_back('\0'); }
+  uptr length() { return String.size() - 1; }
   const char *data() { return String.data(); }
   void clear() {
-    String[0] = '\0';
-    Length = 0;
+    String.clear();
+    String.push_back('\0');
   }
   void append(const char *Format, va_list Args);
-  void append(const char *Format, ...);
+  void append(const char *Format, ...) FORMAT(2, 3);
   void output() const { outputRaw(String.data()); }
 
 private:
   Vector<char> String;
-  uptr Length;
 };
 
-int formatString(char *Buffer, uptr BufferLength, const char *Format, ...);
-void Printf(const char *Format, ...);
+int formatString(char *Buffer, uptr BufferLength, const char *Format, ...)
+    FORMAT(3, 4);
+void Printf(const char *Format, ...) FORMAT(1, 2);
 
 } // namespace scudo
 
diff --git a/standalone/tests/checksum_test.cpp b/standalone/tests/checksum_test.cpp
index 781f990..c5d5b73 100644
--- a/standalone/tests/checksum_test.cpp
+++ b/standalone/tests/checksum_test.cpp
@@ -12,16 +12,16 @@
 
 #include <string.h>
 
-scudo::u16 computeSoftwareChecksum(scudo::u32 Seed, scudo::uptr *Array,
-                                   scudo::uptr ArraySize) {
+static scudo::u16 computeSoftwareChecksum(scudo::u32 Seed, scudo::uptr *Array,
+                                          scudo::uptr ArraySize) {
   scudo::u16 Checksum = static_cast<scudo::u16>(Seed & 0xffff);
   for (scudo::uptr I = 0; I < ArraySize; I++)
     Checksum = scudo::computeBSDChecksum(Checksum, Array[I]);
   return Checksum;
 }
 
-scudo::u16 computeHardwareChecksum(scudo::u32 Seed, scudo::uptr *Array,
-                                   scudo::uptr ArraySize) {
+static scudo::u16 computeHardwareChecksum(scudo::u32 Seed, scudo::uptr *Array,
+                                          scudo::uptr ArraySize) {
   scudo::u32 Crc = Seed;
   for (scudo::uptr I = 0; I < ArraySize; I++)
     Crc = scudo::computeHardwareCRC32(Crc, Array[I]);
@@ -32,7 +32,7 @@
 
 // This verifies that flipping bits in the data being checksummed produces a
 // different checksum. We do not use random data to avoid flakyness.
-template <ComputeChecksum F> void verifyChecksumFunctionBitFlip() {
+template <ComputeChecksum F> static void verifyChecksumFunctionBitFlip() {
   scudo::uptr Array[sizeof(scudo::u64) / sizeof(scudo::uptr)];
   const scudo::uptr ArraySize = ARRAY_SIZE(Array);
   memset(Array, 0xaa, sizeof(Array));
diff --git a/standalone/tests/chunk_test.cpp b/standalone/tests/chunk_test.cpp
index 6458e23..7a29f3c 100644
--- a/standalone/tests/chunk_test.cpp
+++ b/standalone/tests/chunk_test.cpp
@@ -21,7 +21,7 @@
     scudo::HashAlgorithm = scudo::Checksum::HardwareCRC32;
 }
 
-TEST(ScudoChunkTest, ChunkBasic) {
+TEST(ScudoChunkDeathTest, ChunkBasic) {
   initChecksum();
   const scudo::uptr Size = 0x100U;
   scudo::Chunk::UnpackedHeader Header = {};
@@ -60,7 +60,7 @@
   free(Block);
 }
 
-TEST(ScudoChunkTest, CorruptHeader) {
+TEST(ScudoChunkDeathTest, CorruptHeader) {
   initChecksum();
   const scudo::uptr Size = 0x100U;
   scudo::Chunk::UnpackedHeader Header = {};
diff --git a/standalone/tests/combined_test.cpp b/standalone/tests/combined_test.cpp
index 5db249d..94d97df 100644
--- a/standalone/tests/combined_test.cpp
+++ b/standalone/tests/combined_test.cpp
@@ -6,6 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "memtag.h"
 #include "tests/scudo_unit_test.h"
 
 #include "allocator_config.h"
@@ -68,7 +69,6 @@
 
 template <typename Config> struct TestAllocator : scudo::Allocator<Config> {
   TestAllocator() {
-    this->reset();
     this->initThreadMaybe();
     if (scudo::archSupportsMemoryTagging() &&
         !scudo::systemDetectsMemoryTagFaultsTestOnly())
@@ -97,12 +97,14 @@
 
   void RunTest();
 
-  void BasicTest(scudo::uptr SizeLogMin, scudo::uptr SizeLogMax);
+  void BasicTest(scudo::uptr SizeLog);
 
   using AllocatorT = TestAllocator<TypeParam>;
   std::unique_ptr<AllocatorT> Allocator;
 };
 
+template <typename T> using ScudoCombinedDeathTest = ScudoCombinedTest<T>;
+
 #if SCUDO_FUCHSIA
 #define SCUDO_TYPED_TEST_ALL_TYPES(FIXTURE, NAME)                              \
   SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, AndroidSvelteConfig)                    \
@@ -116,7 +118,7 @@
 
 #define SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TYPE)                             \
   using FIXTURE##NAME##_##TYPE = FIXTURE##NAME<scudo::TYPE>;                   \
-  TEST_F(FIXTURE##NAME##_##TYPE, NAME) { Run(); }
+  TEST_F(FIXTURE##NAME##_##TYPE, NAME) { FIXTURE##NAME<scudo::TYPE>::Run(); }
 
 #define SCUDO_TYPED_TEST(FIXTURE, NAME)                                        \
   template <class TypeParam>                                                   \
@@ -141,37 +143,56 @@
 }
 
 template <class Config>
-void ScudoCombinedTest<Config>::BasicTest(scudo::uptr SizeLogMin,
-                                          scudo::uptr SizeLogMax) {
+void ScudoCombinedTest<Config>::BasicTest(scudo::uptr SizeLog) {
   auto *Allocator = this->Allocator.get();
 
   // This allocates and deallocates a bunch of chunks, with a wide range of
   // sizes and alignments, with a focus on sizes that could trigger weird
   // behaviors (plus or minus a small delta of a power of two for example).
-  for (scudo::uptr SizeLog = SizeLogMin; SizeLog <= SizeLogMax; SizeLog++) {
-    for (scudo::uptr AlignLog = MinAlignLog; AlignLog <= 16U; AlignLog++) {
-      const scudo::uptr Align = 1U << AlignLog;
-      for (scudo::sptr Delta = -32; Delta <= 32; Delta++) {
-        if (static_cast<scudo::sptr>(1U << SizeLog) + Delta <= 0)
-          continue;
-        const scudo::uptr Size = (1U << SizeLog) + Delta;
-        void *P = Allocator->allocate(Size, Origin, Align);
-        EXPECT_NE(P, nullptr);
-        EXPECT_TRUE(Allocator->isOwned(P));
-        EXPECT_TRUE(scudo::isAligned(reinterpret_cast<scudo::uptr>(P), Align));
-        EXPECT_LE(Size, Allocator->getUsableSize(P));
-        memset(P, 0xaa, Size);
-        checkMemoryTaggingMaybe(Allocator, P, Size, Align);
-        Allocator->deallocate(P, Origin, Size);
-      }
+  for (scudo::uptr AlignLog = MinAlignLog; AlignLog <= 16U; AlignLog++) {
+    const scudo::uptr Align = 1U << AlignLog;
+    for (scudo::sptr Delta = -32; Delta <= 32; Delta++) {
+      if (static_cast<scudo::sptr>(1U << SizeLog) + Delta <= 0)
+        continue;
+      const scudo::uptr Size = (1U << SizeLog) + Delta;
+      void *P = Allocator->allocate(Size, Origin, Align);
+      EXPECT_NE(P, nullptr);
+      EXPECT_TRUE(Allocator->isOwned(P));
+      EXPECT_TRUE(scudo::isAligned(reinterpret_cast<scudo::uptr>(P), Align));
+      EXPECT_LE(Size, Allocator->getUsableSize(P));
+      memset(P, 0xaa, Size);
+      checkMemoryTaggingMaybe(Allocator, P, Size, Align);
+      Allocator->deallocate(P, Origin, Size);
     }
   }
 }
 
-SCUDO_TYPED_TEST(ScudoCombinedTest, BasicCombined0) { this->BasicTest(0, 16); }
-SCUDO_TYPED_TEST(ScudoCombinedTest, BasicCombined1) { this->BasicTest(17, 18); }
-SCUDO_TYPED_TEST(ScudoCombinedTest, BasicCombined2) { this->BasicTest(19, 19); }
-SCUDO_TYPED_TEST(ScudoCombinedTest, BasicCombined3) { this->BasicTest(20, 20); }
+#define SCUDO_MAKE_BASIC_TEST(SizeLog)                                         \
+  SCUDO_TYPED_TEST(ScudoCombinedDeathTest, BasicCombined##SizeLog) {           \
+    this->BasicTest(SizeLog);                                                  \
+  }
+
+SCUDO_MAKE_BASIC_TEST(0)
+SCUDO_MAKE_BASIC_TEST(1)
+SCUDO_MAKE_BASIC_TEST(2)
+SCUDO_MAKE_BASIC_TEST(3)
+SCUDO_MAKE_BASIC_TEST(4)
+SCUDO_MAKE_BASIC_TEST(5)
+SCUDO_MAKE_BASIC_TEST(6)
+SCUDO_MAKE_BASIC_TEST(7)
+SCUDO_MAKE_BASIC_TEST(8)
+SCUDO_MAKE_BASIC_TEST(9)
+SCUDO_MAKE_BASIC_TEST(10)
+SCUDO_MAKE_BASIC_TEST(11)
+SCUDO_MAKE_BASIC_TEST(12)
+SCUDO_MAKE_BASIC_TEST(13)
+SCUDO_MAKE_BASIC_TEST(14)
+SCUDO_MAKE_BASIC_TEST(15)
+SCUDO_MAKE_BASIC_TEST(16)
+SCUDO_MAKE_BASIC_TEST(17)
+SCUDO_MAKE_BASIC_TEST(18)
+SCUDO_MAKE_BASIC_TEST(19)
+SCUDO_MAKE_BASIC_TEST(20)
 
 SCUDO_TYPED_TEST(ScudoCombinedTest, ZeroContents) {
   auto *Allocator = this->Allocator.get();
@@ -193,7 +214,7 @@
 SCUDO_TYPED_TEST(ScudoCombinedTest, ZeroFill) {
   auto *Allocator = this->Allocator.get();
 
-  // Ensure that specifying ZeroContents returns a zero'd out block.
+  // Ensure that specifying ZeroFill returns a zero'd out block.
   Allocator->setFillContents(scudo::ZeroFill);
   for (scudo::uptr SizeLog = 0U; SizeLog <= 20U; SizeLog++) {
     for (scudo::uptr Delta = 0U; Delta <= 4U; Delta++) {
@@ -253,7 +274,28 @@
   EXPECT_TRUE(Found);
 }
 
-SCUDO_TYPED_TEST(ScudoCombinedTest, ReallocateLarge) {
+SCUDO_TYPED_TEST(ScudoCombinedTest, ReallocateLargeIncreasing) {
+  auto *Allocator = this->Allocator.get();
+
+  // Reallocate a chunk all the way up to a secondary allocation, verifying that
+  // we preserve the data in the process.
+  scudo::uptr Size = 16;
+  void *P = Allocator->allocate(Size, Origin);
+  const char Marker = 0xab;
+  memset(P, Marker, Size);
+  while (Size < TypeParam::Primary::SizeClassMap::MaxSize * 4) {
+    void *NewP = Allocator->reallocate(P, Size * 2);
+    EXPECT_NE(NewP, nullptr);
+    for (scudo::uptr J = 0; J < Size; J++)
+      EXPECT_EQ((reinterpret_cast<char *>(NewP))[J], Marker);
+    memset(reinterpret_cast<char *>(NewP) + Size, Marker, Size);
+    Size *= 2U;
+    P = NewP;
+  }
+  Allocator->deallocate(P, Origin);
+}
+
+SCUDO_TYPED_TEST(ScudoCombinedTest, ReallocateLargeDecreasing) {
   auto *Allocator = this->Allocator.get();
 
   // Reallocate a large chunk all the way down to a byte, verifying that we
@@ -274,7 +316,7 @@
   Allocator->deallocate(P, Origin);
 }
 
-SCUDO_TYPED_TEST(ScudoCombinedTest, ReallocateSame) {
+SCUDO_TYPED_TEST(ScudoCombinedDeathTest, ReallocateSame) {
   auto *Allocator = this->Allocator.get();
 
   // Check that reallocating a chunk to a slightly smaller or larger size
@@ -325,7 +367,7 @@
   }
 }
 
-SCUDO_TYPED_TEST(ScudoCombinedTest, UseAfterFree) {
+SCUDO_TYPED_TEST(ScudoCombinedDeathTest, UseAfterFree) {
   auto *Allocator = this->Allocator.get();
 
   // Check that use-after-free is detected.
@@ -352,14 +394,14 @@
   }
 }
 
-SCUDO_TYPED_TEST(ScudoCombinedTest, DisableMemoryTagging) {
+SCUDO_TYPED_TEST(ScudoCombinedDeathTest, DisableMemoryTagging) {
   auto *Allocator = this->Allocator.get();
 
   if (Allocator->useMemoryTaggingTestOnly()) {
     // Check that disabling memory tagging works correctly.
     void *P = Allocator->allocate(2048, Origin);
     EXPECT_DEATH(reinterpret_cast<char *>(P)[2048] = 0xaa, "");
-    scudo::disableMemoryTagChecksTestOnly();
+    scudo::ScopedDisableMemoryTagChecks NoTagChecks;
     Allocator->disableMemoryTagging();
     reinterpret_cast<char *>(P)[2048] = 0xaa;
     Allocator->deallocate(P, Origin);
@@ -370,10 +412,6 @@
     Allocator->deallocate(P, Origin);
 
     Allocator->releaseToOS();
-
-    // Disabling memory tag checks may interfere with subsequent tests.
-    // Re-enable them now.
-    scudo::enableMemoryTagChecksTestOnly();
   }
 }
 
@@ -452,15 +490,9 @@
   Allocator->releaseToOS();
 }
 
-#if SCUDO_FUCHSIA
-#define SKIP_ON_FUCHSIA(T) DISABLED_##T
-#else
-#define SKIP_ON_FUCHSIA(T) T
-#endif
-
 // Test that multiple instantiations of the allocator have not messed up the
 // process's signal handlers (GWP-ASan used to do this).
-TEST(ScudoCombinedTest, SKIP_ON_FUCHSIA(testSEGV)) {
+TEST(ScudoCombinedDeathTest, SKIP_ON_FUCHSIA(testSEGV)) {
   const scudo::uptr Size = 4 * scudo::getPageSizeCached();
   scudo::MapPlatformData Data = {};
   void *P = scudo::map(nullptr, Size, "testSEGV", MAP_NOACCESS, &Data);
@@ -476,6 +508,7 @@
   static const scudo::uptr MaxSizeLog = 13;
   static const scudo::u32 MaxNumCachedHint = 4;
   static const scudo::uptr MaxBytesCachedLog = 12;
+  static const scudo::uptr SizeDelta = 0;
 };
 
 static const scudo::uptr DeathRegionSizeLog = 20U;
@@ -490,12 +523,14 @@
   static const scudo::s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX;
   typedef scudo::uptr PrimaryCompactPtrT;
   static const scudo::uptr PrimaryCompactPtrScale = 0;
+  static const bool PrimaryEnableRandomOffset = true;
+  static const scudo::uptr PrimaryMapSizeIncrement = 1UL << 18;
 
   typedef scudo::MapAllocatorNoCache SecondaryCache;
   template <class A> using TSDRegistryT = scudo::TSDRegistrySharedT<A, 1U, 1U>;
 };
 
-TEST(ScudoCombinedTest, DeathCombined) {
+TEST(ScudoCombinedDeathTest, DeathCombined) {
   using AllocatorT = TestAllocator<DeathConfig>;
   auto Allocator = std::unique_ptr<AllocatorT>(new AllocatorT());
 
@@ -528,15 +563,6 @@
   EXPECT_DEATH(Allocator->getUsableSize(P), "");
 }
 
-// Ensure that releaseToOS can be called prior to any other allocator
-// operation without issue.
-TEST(ScudoCombinedTest, ReleaseToOS) {
-  using AllocatorT = TestAllocator<DeathConfig>;
-  auto Allocator = std::unique_ptr<AllocatorT>(new AllocatorT());
-
-  Allocator->releaseToOS();
-}
-
 // Verify that when a region gets full, the allocator will still manage to
 // fulfill the allocation through a larger size class.
 TEST(ScudoCombinedTest, FullRegion) {
@@ -569,10 +595,15 @@
   EXPECT_EQ(FailedAllocationsCount, 0U);
 }
 
-TEST(ScudoCombinedTest, OddEven) {
-  using AllocatorT = TestAllocator<scudo::AndroidConfig>;
-  using SizeClassMap = AllocatorT::PrimaryT::SizeClassMap;
-  auto Allocator = std::unique_ptr<AllocatorT>(new AllocatorT());
+// Ensure that releaseToOS can be called prior to any other allocator
+// operation without issue.
+SCUDO_TYPED_TEST(ScudoCombinedTest, ReleaseToOS) {
+  auto *Allocator = this->Allocator.get();
+  Allocator->releaseToOS();
+}
+
+SCUDO_TYPED_TEST(ScudoCombinedTest, OddEven) {
+  auto *Allocator = this->Allocator.get();
 
   if (!Allocator->useMemoryTaggingTestOnly())
     return;
@@ -583,6 +614,7 @@
     EXPECT_NE(Tag1 % 2, Tag2 % 2);
   };
 
+  using SizeClassMap = typename TypeParam::Primary::SizeClassMap;
   for (scudo::uptr ClassId = 1U; ClassId <= SizeClassMap::LargestClassId;
        ClassId++) {
     const scudo::uptr Size = SizeClassMap::getSizeByClassId(ClassId);
@@ -608,12 +640,10 @@
   }
 }
 
-TEST(ScudoCombinedTest, DisableMemInit) {
-  using AllocatorT = TestAllocator<scudo::AndroidConfig>;
-  using SizeClassMap = AllocatorT::PrimaryT::SizeClassMap;
-  auto Allocator = std::unique_ptr<AllocatorT>(new AllocatorT());
+SCUDO_TYPED_TEST(ScudoCombinedTest, DisableMemInit) {
+  auto *Allocator = this->Allocator.get();
 
-  std::vector<void *> Ptrs(65536, nullptr);
+  std::vector<void *> Ptrs(65536);
 
   Allocator->setOption(scudo::Option::ThreadDisableMemInit, 1);
 
@@ -623,6 +653,7 @@
   // expected. This is tricky to ensure when MTE is enabled, so this test tries
   // to exercise the relevant code on our MTE path.
   for (scudo::uptr ClassId = 1U; ClassId <= 8; ClassId++) {
+    using SizeClassMap = typename TypeParam::Primary::SizeClassMap;
     const scudo::uptr Size =
         SizeClassMap::getSizeByClassId(ClassId) - scudo::Chunk::getHeaderSize();
     if (Size < 8)
@@ -648,3 +679,23 @@
 
   Allocator->setOption(scudo::Option::ThreadDisableMemInit, 0);
 }
+
+SCUDO_TYPED_TEST(ScudoCombinedTest, ReallocateInPlaceStress) {
+  auto *Allocator = this->Allocator.get();
+
+  // Regression test: make realloc-in-place happen at the very right end of a
+  // mapped region.
+  constexpr int nPtrs = 10000;
+  for (int i = 1; i < 32; ++i) {
+    scudo::uptr Size = 16 * i - 1;
+    std::vector<void *> Ptrs;
+    for (int i = 0; i < nPtrs; ++i) {
+      void *P = Allocator->allocate(Size, Origin);
+      P = Allocator->reallocate(P, Size + 1);
+      Ptrs.push_back(P);
+    }
+
+    for (int i = 0; i < nPtrs; ++i)
+      Allocator->deallocate(Ptrs[i], Origin);
+  }
+}
diff --git a/standalone/tests/common_test.cpp b/standalone/tests/common_test.cpp
new file mode 100644
index 0000000..711e3b2
--- /dev/null
+++ b/standalone/tests/common_test.cpp
@@ -0,0 +1,72 @@
+//===-- common_test.cpp -----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "internal_defs.h"
+#include "tests/scudo_unit_test.h"
+
+#include "common.h"
+#include <algorithm>
+#include <fstream>
+
+namespace scudo {
+
+static uptr getResidentMemorySize() {
+  if (!SCUDO_LINUX)
+    UNREACHABLE("Not implemented!");
+  uptr Size;
+  uptr Resident;
+  std::ifstream IFS("/proc/self/statm");
+  IFS >> Size;
+  IFS >> Resident;
+  return Resident * getPageSizeCached();
+}
+
+// Fuchsia needs getResidentMemorySize implementation.
+TEST(ScudoCommonTest, SKIP_ON_FUCHSIA(ResidentMemorySize)) {
+  uptr OnStart = getResidentMemorySize();
+  EXPECT_GT(OnStart, 0UL);
+
+  const uptr Size = 1ull << 30;
+  const uptr Threshold = Size >> 3;
+
+  MapPlatformData Data = {};
+  void *P = map(nullptr, Size, "ResidentMemorySize", 0, &Data);
+  ASSERT_NE(nullptr, P);
+  EXPECT_LT(getResidentMemorySize(), OnStart + Threshold);
+
+  memset(P, 1, Size);
+  EXPECT_GT(getResidentMemorySize(), OnStart + Size - Threshold);
+
+  releasePagesToOS((uptr)P, 0, Size, &Data);
+  EXPECT_LT(getResidentMemorySize(), OnStart + Threshold);
+
+  memset(P, 1, Size);
+  EXPECT_GT(getResidentMemorySize(), OnStart + Size - Threshold);
+
+  unmap(P, Size, 0, &Data);
+}
+
+TEST(ScudoCommonTest, Zeros) {
+  const uptr Size = 1ull << 20;
+
+  MapPlatformData Data = {};
+  uptr *P = reinterpret_cast<uptr *>(map(nullptr, Size, "Zeros", 0, &Data));
+  const ptrdiff_t N = Size / sizeof(*P);
+  ASSERT_NE(nullptr, P);
+  EXPECT_EQ(std::count(P, P + N, 0), N);
+
+  memset(P, 1, Size);
+  EXPECT_EQ(std::count(P, P + N, 0), 0);
+
+  releasePagesToOS((uptr)P, 0, Size, &Data);
+  EXPECT_EQ(std::count(P, P + N, 0), N);
+
+  unmap(P, Size, 0, &Data);
+}
+
+} // namespace scudo
diff --git a/standalone/tests/map_test.cpp b/standalone/tests/map_test.cpp
index 7c40b73..ff05258 100644
--- a/standalone/tests/map_test.cpp
+++ b/standalone/tests/map_test.cpp
@@ -17,10 +17,10 @@
 
 TEST(ScudoMapTest, PageSize) {
   EXPECT_EQ(scudo::getPageSizeCached(),
-            static_cast<scudo::uptr>(getpagesize()));
+            static_cast<scudo::uptr>(sysconf(_SC_PAGESIZE)));
 }
 
-TEST(ScudoMapTest, MapNoAccessUnmap) {
+TEST(ScudoMapDeathTest, MapNoAccessUnmap) {
   const scudo::uptr Size = 4 * scudo::getPageSizeCached();
   scudo::MapPlatformData Data = {};
   void *P = scudo::map(nullptr, Size, MappingName, MAP_NOACCESS, &Data);
@@ -29,16 +29,24 @@
   scudo::unmap(P, Size, UNMAP_ALL, &Data);
 }
 
-TEST(ScudoMapTest, MapUnmap) {
+TEST(ScudoMapDeathTest, MapUnmap) {
   const scudo::uptr Size = 4 * scudo::getPageSizeCached();
-  void *P = scudo::map(nullptr, Size, MappingName, 0, nullptr);
-  EXPECT_NE(P, nullptr);
-  memset(P, 0xaa, Size);
-  scudo::unmap(P, Size, 0, nullptr);
-  EXPECT_DEATH(memset(P, 0xbb, Size), "");
+  EXPECT_DEATH(
+      {
+        // Repeat few time to avoid missing crash if it's mmaped by unrelated
+        // code.
+        for (int i = 0; i < 10; ++i) {
+          void *P = scudo::map(nullptr, Size, MappingName, 0, nullptr);
+          if (!P)
+            continue;
+          scudo::unmap(P, Size, 0, nullptr);
+          memset(P, 0xbb, Size);
+        }
+      },
+      "");
 }
 
-TEST(ScudoMapTest, MapWithGuardUnmap) {
+TEST(ScudoMapDeathTest, MapWithGuardUnmap) {
   const scudo::uptr PageSize = scudo::getPageSizeCached();
   const scudo::uptr Size = 4 * PageSize;
   scudo::MapPlatformData Data = {};
diff --git a/standalone/tests/memtag_test.cpp b/standalone/tests/memtag_test.cpp
new file mode 100644
index 0000000..283edaa
--- /dev/null
+++ b/standalone/tests/memtag_test.cpp
@@ -0,0 +1,188 @@
+//===-- memtag_test.cpp -----------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "common.h"
+#include "memtag.h"
+#include "platform.h"
+#include "tests/scudo_unit_test.h"
+
+#if SCUDO_LINUX
+namespace scudo {
+
+TEST(MemtagBasicDeathTest, Unsupported) {
+  if (archSupportsMemoryTagging())
+    GTEST_SKIP();
+
+  EXPECT_DEATH(archMemoryTagGranuleSize(), "not supported");
+  EXPECT_DEATH(untagPointer((uptr)0), "not supported");
+  EXPECT_DEATH(extractTag((uptr)0), "not supported");
+
+  EXPECT_DEATH(systemSupportsMemoryTagging(), "not supported");
+  EXPECT_DEATH(systemDetectsMemoryTagFaultsTestOnly(), "not supported");
+  EXPECT_DEATH(enableSystemMemoryTaggingTestOnly(), "not supported");
+
+  EXPECT_DEATH(selectRandomTag((uptr)0, 0), "not supported");
+  EXPECT_DEATH(addFixedTag((uptr)0, 1), "not supported");
+  EXPECT_DEATH(storeTags((uptr)0, (uptr)0 + sizeof(0)), "not supported");
+  EXPECT_DEATH(storeTag((uptr)0), "not supported");
+  EXPECT_DEATH(loadTag((uptr)0), "not supported");
+
+  EXPECT_DEATH(setRandomTag(nullptr, 64, 0, nullptr, nullptr), "not supported");
+  EXPECT_DEATH(untagPointer(nullptr), "not supported");
+  EXPECT_DEATH(loadTag(nullptr), "not supported");
+  EXPECT_DEATH(addFixedTag(nullptr, 0), "not supported");
+}
+
+class MemtagTest : public Test {
+protected:
+  void SetUp() override {
+    if (!archSupportsMemoryTagging() || !systemDetectsMemoryTagFaultsTestOnly())
+      GTEST_SKIP() << "Memory tagging is not supported";
+
+    BufferSize = getPageSizeCached();
+    Buffer = reinterpret_cast<u8 *>(
+        map(nullptr, BufferSize, "MemtagTest", MAP_MEMTAG, &Data));
+    Addr = reinterpret_cast<uptr>(Buffer);
+    EXPECT_TRUE(isAligned(Addr, archMemoryTagGranuleSize()));
+    EXPECT_EQ(Addr, untagPointer(Addr));
+  }
+
+  void TearDown() override {
+    if (Buffer)
+      unmap(Buffer, BufferSize, 0, &Data);
+  }
+
+  uptr BufferSize = 0;
+  MapPlatformData Data = {};
+  u8 *Buffer = nullptr;
+  uptr Addr = 0;
+};
+
+using MemtagDeathTest = MemtagTest;
+
+TEST_F(MemtagTest, ArchMemoryTagGranuleSize) {
+  EXPECT_GT(archMemoryTagGranuleSize(), 1u);
+  EXPECT_TRUE(isPowerOfTwo(archMemoryTagGranuleSize()));
+}
+
+TEST_F(MemtagTest, ExtractTag) {
+  uptr Tags = 0;
+  // Try all value for the top byte and check the tags values are in the
+  // expected range.
+  for (u64 Top = 0; Top < 0x100; ++Top)
+    Tags = Tags | (1u << extractTag(Addr | (Top << 56)));
+  EXPECT_EQ(0xffffull, Tags);
+}
+
+TEST_F(MemtagDeathTest, AddFixedTag) {
+  for (uptr Tag = 0; Tag < 0x10; ++Tag)
+    EXPECT_EQ(Tag, extractTag(addFixedTag(Addr, Tag)));
+  if (SCUDO_DEBUG) {
+    EXPECT_DEBUG_DEATH(addFixedTag(Addr, 16), "");
+    EXPECT_DEBUG_DEATH(addFixedTag(~Addr, 0), "");
+  }
+}
+
+TEST_F(MemtagTest, UntagPointer) {
+  uptr UnTagMask = untagPointer(~uptr(0));
+  for (u64 Top = 0; Top < 0x100; ++Top) {
+    uptr Ptr = (Addr | (Top << 56)) & UnTagMask;
+    EXPECT_EQ(addFixedTag(Ptr, 0), untagPointer(Ptr));
+  }
+}
+
+TEST_F(MemtagDeathTest, ScopedDisableMemoryTagChecks) {
+  u8 *P = reinterpret_cast<u8 *>(addFixedTag(Addr, 1));
+  EXPECT_NE(P, Buffer);
+
+  EXPECT_DEATH(*P = 20, "");
+  ScopedDisableMemoryTagChecks Disable;
+  *P = 10;
+}
+
+TEST_F(MemtagTest, SelectRandomTag) {
+  for (uptr SrcTag = 0; SrcTag < 0x10; ++SrcTag) {
+    uptr Ptr = addFixedTag(Addr, SrcTag);
+    uptr Tags = 0;
+    for (uptr I = 0; I < 100000; ++I)
+      Tags = Tags | (1u << extractTag(selectRandomTag(Ptr, 0)));
+    EXPECT_EQ(0xfffeull, Tags);
+  }
+}
+
+TEST_F(MemtagTest, SelectRandomTagWithMask) {
+  for (uptr j = 0; j < 32; ++j) {
+    for (uptr i = 0; i < 1000; ++i)
+      EXPECT_NE(j, extractTag(selectRandomTag(Addr, 1ull << j)));
+  }
+}
+
+TEST_F(MemtagDeathTest, SKIP_NO_DEBUG(LoadStoreTagUnaligned)) {
+  for (uptr P = Addr; P < Addr + 4 * archMemoryTagGranuleSize(); ++P) {
+    if (P % archMemoryTagGranuleSize() == 0)
+      continue;
+    EXPECT_DEBUG_DEATH(loadTag(P), "");
+    EXPECT_DEBUG_DEATH(storeTag(P), "");
+  }
+}
+
+TEST_F(MemtagTest, LoadStoreTag) {
+  uptr Base = Addr + 0x100;
+  uptr Tagged = addFixedTag(Base, 7);
+  storeTag(Tagged);
+
+  EXPECT_EQ(Base - archMemoryTagGranuleSize(),
+            loadTag(Base - archMemoryTagGranuleSize()));
+  EXPECT_EQ(Tagged, loadTag(Base));
+  EXPECT_EQ(Base + archMemoryTagGranuleSize(),
+            loadTag(Base + archMemoryTagGranuleSize()));
+}
+
+TEST_F(MemtagDeathTest, SKIP_NO_DEBUG(StoreTagsUnaligned)) {
+  for (uptr P = Addr; P < Addr + 4 * archMemoryTagGranuleSize(); ++P) {
+    uptr Tagged = addFixedTag(P, 5);
+    if (Tagged % archMemoryTagGranuleSize() == 0)
+      continue;
+    EXPECT_DEBUG_DEATH(storeTags(Tagged, Tagged), "");
+  }
+}
+
+TEST_F(MemtagTest, StoreTags) {
+  const uptr MaxTaggedSize = 4 * archMemoryTagGranuleSize();
+  for (uptr Size = 0; Size <= MaxTaggedSize; ++Size) {
+    uptr NoTagBegin = Addr + archMemoryTagGranuleSize();
+    uptr NoTagEnd = NoTagBegin + Size;
+
+    u8 Tag = 5;
+
+    uptr TaggedBegin = addFixedTag(NoTagBegin, Tag);
+    uptr TaggedEnd = addFixedTag(NoTagEnd, Tag);
+
+    EXPECT_EQ(roundUpTo(TaggedEnd, archMemoryTagGranuleSize()),
+              storeTags(TaggedBegin, TaggedEnd));
+
+    uptr LoadPtr = Addr;
+    // Untagged left granule.
+    EXPECT_EQ(LoadPtr, loadTag(LoadPtr));
+
+    for (LoadPtr += archMemoryTagGranuleSize(); LoadPtr < NoTagEnd;
+         LoadPtr += archMemoryTagGranuleSize()) {
+      EXPECT_EQ(addFixedTag(LoadPtr, 5), loadTag(LoadPtr));
+    }
+
+    // Untagged right granule.
+    EXPECT_EQ(LoadPtr, loadTag(LoadPtr));
+
+    // Reset tags without using StoreTags.
+    releasePagesToOS(Addr, 0, BufferSize, &Data);
+  }
+}
+
+} // namespace scudo
+
+#endif
diff --git a/standalone/tests/mutex_test.cpp b/standalone/tests/mutex_test.cpp
index ed56cb5..d3242a3 100644
--- a/standalone/tests/mutex_test.cpp
+++ b/standalone/tests/mutex_test.cpp
@@ -43,7 +43,7 @@
   void backoff() {
     volatile T LocalData[Size] = {};
     for (scudo::u32 I = 0; I < Size; I++) {
-      LocalData[I]++;
+      LocalData[I] = LocalData[I] + 1;
       EXPECT_EQ(LocalData[I], 1U);
     }
   }
@@ -82,7 +82,6 @@
 
 TEST(ScudoMutexTest, Mutex) {
   scudo::HybridMutex M;
-  M.init();
   TestData Data(M);
   pthread_t Threads[NumberOfThreads];
   for (scudo::u32 I = 0; I < NumberOfThreads; I++)
@@ -93,7 +92,6 @@
 
 TEST(ScudoMutexTest, MutexTry) {
   scudo::HybridMutex M;
-  M.init();
   TestData Data(M);
   pthread_t Threads[NumberOfThreads];
   for (scudo::u32 I = 0; I < NumberOfThreads; I++)
diff --git a/standalone/tests/primary_test.cpp b/standalone/tests/primary_test.cpp
index e7aa6f7..283e297 100644
--- a/standalone/tests/primary_test.cpp
+++ b/standalone/tests/primary_test.cpp
@@ -29,24 +29,40 @@
   static const bool MaySupportMemoryTagging = false;
   typedef scudo::uptr PrimaryCompactPtrT;
   static const scudo::uptr PrimaryCompactPtrScale = 0;
+  static const bool PrimaryEnableRandomOffset = true;
+  static const scudo::uptr PrimaryMapSizeIncrement = 1UL << 18;
 };
 
 struct TestConfig2 {
+#if defined(__mips__)
+  // Unable to allocate greater size on QEMU-user.
+  static const scudo::uptr PrimaryRegionSizeLog = 23U;
+#else
   static const scudo::uptr PrimaryRegionSizeLog = 24U;
+#endif
   static const scudo::s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN;
   static const scudo::s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX;
   static const bool MaySupportMemoryTagging = false;
   typedef scudo::uptr PrimaryCompactPtrT;
   static const scudo::uptr PrimaryCompactPtrScale = 0;
+  static const bool PrimaryEnableRandomOffset = true;
+  static const scudo::uptr PrimaryMapSizeIncrement = 1UL << 18;
 };
 
 struct TestConfig3 {
+#if defined(__mips__)
+  // Unable to allocate greater size on QEMU-user.
+  static const scudo::uptr PrimaryRegionSizeLog = 23U;
+#else
   static const scudo::uptr PrimaryRegionSizeLog = 24U;
+#endif
   static const scudo::s32 PrimaryMinReleaseToOsIntervalMs = INT32_MIN;
   static const scudo::s32 PrimaryMaxReleaseToOsIntervalMs = INT32_MAX;
   static const bool MaySupportMemoryTagging = true;
   typedef scudo::uptr PrimaryCompactPtrT;
   static const scudo::uptr PrimaryCompactPtrScale = 0;
+  static const bool PrimaryEnableRandomOffset = true;
+  static const scudo::uptr PrimaryMapSizeIncrement = 1UL << 18;
 };
 
 template <typename BaseConfig, typename SizeClassMapT>
@@ -89,7 +105,7 @@
 
 #define SCUDO_TYPED_TEST_TYPE(FIXTURE, NAME, TYPE)                             \
   using FIXTURE##NAME##_##TYPE = FIXTURE##NAME<TYPE>;                          \
-  TEST_F(FIXTURE##NAME##_##TYPE, NAME) { Run(); }
+  TEST_F(FIXTURE##NAME##_##TYPE, NAME) { FIXTURE##NAME<TYPE>::Run(); }
 
 #define SCUDO_TYPED_TEST(FIXTURE, NAME)                                        \
   template <class TypeParam>                                                   \
@@ -122,7 +138,7 @@
   }
   Cache.destroy(nullptr);
   Allocator->releaseToOS();
-  scudo::ScopedString Str(1024);
+  scudo::ScopedString Str;
   Allocator->getStats(&Str);
   Str.output();
 }
@@ -135,6 +151,8 @@
   static const bool MaySupportMemoryTagging = false;
   typedef scudo::uptr PrimaryCompactPtrT;
   static const scudo::uptr PrimaryCompactPtrScale = 0;
+  static const bool PrimaryEnableRandomOffset = true;
+  static const scudo::uptr PrimaryMapSizeIncrement = 1UL << 18;
 };
 
 // The 64-bit SizeClassAllocator can be easily OOM'd with small region sizes.
@@ -168,7 +186,7 @@
   }
   Cache.destroy(nullptr);
   Allocator.releaseToOS();
-  scudo::ScopedString Str(1024);
+  scudo::ScopedString Str;
   Allocator.getStats(&Str);
   Str.output();
   EXPECT_EQ(AllocationFailed, true);
@@ -189,7 +207,7 @@
     V.push_back(std::make_pair(ClassId, P));
   }
   scudo::uptr Found = 0;
-  auto Lambda = [V, &Found](scudo::uptr Block) {
+  auto Lambda = [&V, &Found](scudo::uptr Block) {
     for (const auto &Pair : V) {
       if (Pair.second == reinterpret_cast<void *>(Block))
         Found++;
@@ -206,7 +224,7 @@
   }
   Cache.destroy(nullptr);
   Allocator->releaseToOS();
-  scudo::ScopedString Str(1024);
+  scudo::ScopedString Str;
   Allocator->getStats(&Str);
   Str.output();
 }
@@ -253,7 +271,7 @@
   for (auto &T : Threads)
     T.join();
   Allocator->releaseToOS();
-  scudo::ScopedString Str(1024);
+  scudo::ScopedString Str;
   Allocator->getStats(&Str);
   Str.output();
 }
diff --git a/standalone/tests/quarantine_test.cpp b/standalone/tests/quarantine_test.cpp
index 91de56a..972c98d 100644
--- a/standalone/tests/quarantine_test.cpp
+++ b/standalone/tests/quarantine_test.cpp
@@ -214,7 +214,7 @@
   Quarantine.drainAndRecycle(&Cache, Cb);
   EXPECT_EQ(Cache.getSize(), 0UL);
 
-  scudo::ScopedString Str(1024);
+  scudo::ScopedString Str;
   Quarantine.getStats(&Str);
   Str.output();
 }
@@ -246,7 +246,7 @@
   for (scudo::uptr I = 0; I < NumberOfThreads; I++)
     pthread_join(T[I].Thread, 0);
 
-  scudo::ScopedString Str(1024);
+  scudo::ScopedString Str;
   Quarantine.getStats(&Str);
   Str.output();
 
diff --git a/standalone/tests/report_test.cpp b/standalone/tests/report_test.cpp
index 09f03f1..81587ba 100644
--- a/standalone/tests/report_test.cpp
+++ b/standalone/tests/report_test.cpp
@@ -10,7 +10,14 @@
 
 #include "report.h"
 
-TEST(ScudoReportTest, Generic) {
+TEST(ScudoReportDeathTest, Check) {
+  CHECK_LT(-1, 1);
+  EXPECT_DEATH(CHECK_GT(-1, 1),
+               "\\(-1\\) > \\(1\\) \\(\\(u64\\)op1=18446744073709551615, "
+               "\\(u64\\)op2=1");
+}
+
+TEST(ScudoReportDeathTest, Generic) {
   // Potentially unused if EXPECT_DEATH isn't defined.
   UNUSED void *P = reinterpret_cast<void *>(0x42424242U);
   EXPECT_DEATH(scudo::reportError("TEST123"), "Scudo ERROR.*TEST123");
@@ -38,7 +45,7 @@
                "Scudo ERROR.*42424242.*123.*456");
 }
 
-TEST(ScudoReportTest, CSpecific) {
+TEST(ScudoReportDeathTest, CSpecific) {
   EXPECT_DEATH(scudo::reportAlignmentNotPowerOfTwo(123), "Scudo ERROR.*123");
   EXPECT_DEATH(scudo::reportCallocOverflow(123, 456), "Scudo ERROR.*123.*456");
   EXPECT_DEATH(scudo::reportInvalidPosixMemalignAlignment(789),
diff --git a/standalone/tests/scudo_unit_test.h b/standalone/tests/scudo_unit_test.h
index 555a935..1665fa8 100644
--- a/standalone/tests/scudo_unit_test.h
+++ b/standalone/tests/scudo_unit_test.h
@@ -33,4 +33,16 @@
 #define EXPECT_STREQ(X, Y) EXPECT_EQ(strcmp(X, Y), 0)
 #endif
 
+#if SCUDO_FUCHSIA
+#define SKIP_ON_FUCHSIA(T) DISABLED_##T
+#else
+#define SKIP_ON_FUCHSIA(T) T
+#endif
+
+#if SCUDO_DEBUG
+#define SKIP_NO_DEBUG(T) T
+#else
+#define SKIP_NO_DEBUG(T) DISABLED_##T
+#endif
+
 extern bool UseQuarantine;
diff --git a/standalone/tests/scudo_unit_test_main.cpp b/standalone/tests/scudo_unit_test_main.cpp
index 9bbf6e7..fbfefa5 100644
--- a/standalone/tests/scudo_unit_test_main.cpp
+++ b/standalone/tests/scudo_unit_test_main.cpp
@@ -6,6 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "memtag.h"
 #include "tests/scudo_unit_test.h"
 
 // Match Android's default configuration, which disables Scudo's mismatch
@@ -16,12 +17,27 @@
 #define DEALLOC_TYPE_MISMATCH "true"
 #endif
 
+static void EnableMemoryTaggingIfSupported() {
+  if (!scudo::archSupportsMemoryTagging())
+    return;
+  static bool Done = []() {
+    if (!scudo::systemDetectsMemoryTagFaultsTestOnly())
+      scudo::enableSystemMemoryTaggingTestOnly();
+    return true;
+  }();
+  (void)Done;
+}
+
 // This allows us to turn on/off a Quarantine for specific tests. The Quarantine
 // parameters are on the low end, to avoid having to loop excessively in some
 // tests.
 bool UseQuarantine = true;
 extern "C" __attribute__((visibility("default"))) const char *
 __scudo_default_options() {
+  // The wrapper tests initialize the global allocator early, before main(). We
+  // need to have Memory Tagging enabled before that happens or the allocator
+  // will disable the feature entirely.
+  EnableMemoryTaggingIfSupported();
   if (!UseQuarantine)
     return "dealloc_type_mismatch=" DEALLOC_TYPE_MISMATCH;
   return "quarantine_size_kb=256:thread_local_quarantine_size_kb=128:"
@@ -33,6 +49,7 @@
 // for Fuchsia builds.
 #if !SCUDO_FUCHSIA
 int main(int argc, char **argv) {
+  EnableMemoryTaggingIfSupported();
   testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
diff --git a/standalone/tests/secondary_test.cpp b/standalone/tests/secondary_test.cpp
index a557042..e656466 100644
--- a/standalone/tests/secondary_test.cpp
+++ b/standalone/tests/secondary_test.cpp
@@ -6,55 +6,80 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "memtag.h"
 #include "tests/scudo_unit_test.h"
 
 #include "allocator_config.h"
 #include "secondary.h"
 
-#include <stdio.h>
-
+#include <algorithm>
 #include <condition_variable>
+#include <memory>
 #include <mutex>
 #include <random>
+#include <stdio.h>
 #include <thread>
 #include <vector>
 
+template <typename Config> static scudo::Options getOptionsForConfig() {
+  if (!Config::MaySupportMemoryTagging || !scudo::archSupportsMemoryTagging() ||
+      !scudo::systemSupportsMemoryTagging())
+    return {};
+  scudo::AtomicOptions AO;
+  AO.set(scudo::OptionBit::UseMemoryTagging);
+  return AO.load();
+}
+
 template <typename Config> static void testSecondaryBasic(void) {
   using SecondaryT = scudo::MapAllocator<Config>;
+  scudo::Options Options = getOptionsForConfig<Config>();
 
   scudo::GlobalStats S;
   S.init();
   std::unique_ptr<SecondaryT> L(new SecondaryT);
   L->init(&S);
   const scudo::uptr Size = 1U << 16;
-  void *P = L->allocate(scudo::Options{}, Size);
+  void *P = L->allocate(Options, Size);
   EXPECT_NE(P, nullptr);
   memset(P, 'A', Size);
   EXPECT_GE(SecondaryT::getBlockSize(P), Size);
-  L->deallocate(scudo::Options{}, P);
+  L->deallocate(Options, P);
+
   // If the Secondary can't cache that pointer, it will be unmapped.
-  if (!L->canCache(Size))
-    EXPECT_DEATH(memset(P, 'A', Size), "");
+  if (!L->canCache(Size)) {
+    EXPECT_DEATH(
+        {
+          // Repeat few time to avoid missing crash if it's mmaped by unrelated
+          // code.
+          for (int i = 0; i < 10; ++i) {
+            P = L->allocate(Options, Size);
+            L->deallocate(Options, P);
+            memset(P, 'A', Size);
+          }
+        },
+        "");
+  }
 
   const scudo::uptr Align = 1U << 16;
-  P = L->allocate(scudo::Options{}, Size + Align, Align);
+  P = L->allocate(Options, Size + Align, Align);
   EXPECT_NE(P, nullptr);
   void *AlignedP = reinterpret_cast<void *>(
       scudo::roundUpTo(reinterpret_cast<scudo::uptr>(P), Align));
   memset(AlignedP, 'A', Size);
-  L->deallocate(scudo::Options{}, P);
+  L->deallocate(Options, P);
 
   std::vector<void *> V;
   for (scudo::uptr I = 0; I < 32U; I++)
-    V.push_back(L->allocate(scudo::Options{}, Size));
+    V.push_back(L->allocate(Options, Size));
   std::shuffle(V.begin(), V.end(), std::mt19937(std::random_device()()));
   while (!V.empty()) {
-    L->deallocate(scudo::Options{}, V.back());
+    L->deallocate(Options, V.back());
     V.pop_back();
   }
-  scudo::ScopedString Str(1024);
+  scudo::ScopedString Str;
   L->getStats(&Str);
   Str.output();
+  L->unmapTestOnly();
 }
 
 struct NoCacheConfig {
@@ -79,16 +104,25 @@
   testSecondaryBasic<TestConfig>();
 }
 
-using LargeAllocator = scudo::MapAllocator<scudo::DefaultConfig>;
+struct MapAllocatorTest : public Test {
+  using Config = scudo::DefaultConfig;
+  using LargeAllocator = scudo::MapAllocator<Config>;
+
+  void SetUp() override { Allocator->init(nullptr); }
+
+  void TearDown() override { Allocator->unmapTestOnly(); }
+
+  std::unique_ptr<LargeAllocator> Allocator =
+      std::make_unique<LargeAllocator>();
+  scudo::Options Options = getOptionsForConfig<Config>();
+};
 
 // This exercises a variety of combinations of size and alignment for the
 // MapAllocator. The size computation done here mimic the ones done by the
 // combined allocator.
-TEST(ScudoSecondaryTest, SecondaryCombinations) {
+TEST_F(MapAllocatorTest, SecondaryCombinations) {
   constexpr scudo::uptr MinAlign = FIRST_32_SECOND_64(8, 16);
   constexpr scudo::uptr HeaderSize = scudo::roundUpTo(8, MinAlign);
-  std::unique_ptr<LargeAllocator> L(new LargeAllocator);
-  L->init(nullptr);
   for (scudo::uptr SizeLog = 0; SizeLog <= 20; SizeLog++) {
     for (scudo::uptr AlignLog = FIRST_32_SECOND_64(3, 4); AlignLog <= 16;
          AlignLog++) {
@@ -100,100 +134,102 @@
             scudo::roundUpTo((1U << SizeLog) + Delta, MinAlign);
         const scudo::uptr Size =
             HeaderSize + UserSize + (Align > MinAlign ? Align - HeaderSize : 0);
-        void *P = L->allocate(scudo::Options{}, Size, Align);
+        void *P = Allocator->allocate(Options, Size, Align);
         EXPECT_NE(P, nullptr);
         void *AlignedP = reinterpret_cast<void *>(
             scudo::roundUpTo(reinterpret_cast<scudo::uptr>(P), Align));
         memset(AlignedP, 0xff, UserSize);
-        L->deallocate(scudo::Options{}, P);
+        Allocator->deallocate(Options, P);
       }
     }
   }
-  scudo::ScopedString Str(1024);
-  L->getStats(&Str);
+  scudo::ScopedString Str;
+  Allocator->getStats(&Str);
   Str.output();
 }
 
-TEST(ScudoSecondaryTest, SecondaryIterate) {
-  std::unique_ptr<LargeAllocator> L(new LargeAllocator);
-  L->init(nullptr);
+TEST_F(MapAllocatorTest, SecondaryIterate) {
   std::vector<void *> V;
   const scudo::uptr PageSize = scudo::getPageSizeCached();
   for (scudo::uptr I = 0; I < 32U; I++)
-    V.push_back(L->allocate(scudo::Options{}, (std::rand() % 16) * PageSize));
-  auto Lambda = [V](scudo::uptr Block) {
+    V.push_back(Allocator->allocate(Options, (std::rand() % 16) * PageSize));
+  auto Lambda = [&V](scudo::uptr Block) {
     EXPECT_NE(std::find(V.begin(), V.end(), reinterpret_cast<void *>(Block)),
               V.end());
   };
-  L->disable();
-  L->iterateOverBlocks(Lambda);
-  L->enable();
+  Allocator->disable();
+  Allocator->iterateOverBlocks(Lambda);
+  Allocator->enable();
   while (!V.empty()) {
-    L->deallocate(scudo::Options{}, V.back());
+    Allocator->deallocate(Options, V.back());
     V.pop_back();
   }
-  scudo::ScopedString Str(1024);
-  L->getStats(&Str);
+  scudo::ScopedString Str;
+  Allocator->getStats(&Str);
   Str.output();
 }
 
-TEST(ScudoSecondaryTest, SecondaryOptions) {
-  std::unique_ptr<LargeAllocator> L(new LargeAllocator);
-  L->init(nullptr);
+TEST_F(MapAllocatorTest, SecondaryOptions) {
   // Attempt to set a maximum number of entries higher than the array size.
-  EXPECT_FALSE(L->setOption(scudo::Option::MaxCacheEntriesCount, 4096U));
+  EXPECT_FALSE(
+      Allocator->setOption(scudo::Option::MaxCacheEntriesCount, 4096U));
   // A negative number will be cast to a scudo::u32, and fail.
-  EXPECT_FALSE(L->setOption(scudo::Option::MaxCacheEntriesCount, -1));
-  if (L->canCache(0U)) {
+  EXPECT_FALSE(Allocator->setOption(scudo::Option::MaxCacheEntriesCount, -1));
+  if (Allocator->canCache(0U)) {
     // Various valid combinations.
-    EXPECT_TRUE(L->setOption(scudo::Option::MaxCacheEntriesCount, 4U));
-    EXPECT_TRUE(L->setOption(scudo::Option::MaxCacheEntrySize, 1UL << 20));
-    EXPECT_TRUE(L->canCache(1UL << 18));
-    EXPECT_TRUE(L->setOption(scudo::Option::MaxCacheEntrySize, 1UL << 17));
-    EXPECT_FALSE(L->canCache(1UL << 18));
-    EXPECT_TRUE(L->canCache(1UL << 16));
-    EXPECT_TRUE(L->setOption(scudo::Option::MaxCacheEntriesCount, 0U));
-    EXPECT_FALSE(L->canCache(1UL << 16));
-    EXPECT_TRUE(L->setOption(scudo::Option::MaxCacheEntriesCount, 4U));
-    EXPECT_TRUE(L->setOption(scudo::Option::MaxCacheEntrySize, 1UL << 20));
-    EXPECT_TRUE(L->canCache(1UL << 16));
+    EXPECT_TRUE(Allocator->setOption(scudo::Option::MaxCacheEntriesCount, 4U));
+    EXPECT_TRUE(
+        Allocator->setOption(scudo::Option::MaxCacheEntrySize, 1UL << 20));
+    EXPECT_TRUE(Allocator->canCache(1UL << 18));
+    EXPECT_TRUE(
+        Allocator->setOption(scudo::Option::MaxCacheEntrySize, 1UL << 17));
+    EXPECT_FALSE(Allocator->canCache(1UL << 18));
+    EXPECT_TRUE(Allocator->canCache(1UL << 16));
+    EXPECT_TRUE(Allocator->setOption(scudo::Option::MaxCacheEntriesCount, 0U));
+    EXPECT_FALSE(Allocator->canCache(1UL << 16));
+    EXPECT_TRUE(Allocator->setOption(scudo::Option::MaxCacheEntriesCount, 4U));
+    EXPECT_TRUE(
+        Allocator->setOption(scudo::Option::MaxCacheEntrySize, 1UL << 20));
+    EXPECT_TRUE(Allocator->canCache(1UL << 16));
   }
 }
 
-static std::mutex Mutex;
-static std::condition_variable Cv;
-static bool Ready;
+struct MapAllocatorWithReleaseTest : public MapAllocatorTest {
+  void SetUp() override { Allocator->init(nullptr, /*ReleaseToOsInterval=*/0); }
 
-static void performAllocations(LargeAllocator *L) {
-  std::vector<void *> V;
-  const scudo::uptr PageSize = scudo::getPageSizeCached();
-  {
-    std::unique_lock<std::mutex> Lock(Mutex);
-    while (!Ready)
-      Cv.wait(Lock);
+  void performAllocations() {
+    std::vector<void *> V;
+    const scudo::uptr PageSize = scudo::getPageSizeCached();
+    {
+      std::unique_lock<std::mutex> Lock(Mutex);
+      while (!Ready)
+        Cv.wait(Lock);
+    }
+    for (scudo::uptr I = 0; I < 128U; I++) {
+      // Deallocate 75% of the blocks.
+      const bool Deallocate = (rand() & 3) != 0;
+      void *P = Allocator->allocate(Options, (std::rand() % 16) * PageSize);
+      if (Deallocate)
+        Allocator->deallocate(Options, P);
+      else
+        V.push_back(P);
+    }
+    while (!V.empty()) {
+      Allocator->deallocate(Options, V.back());
+      V.pop_back();
+    }
   }
-  for (scudo::uptr I = 0; I < 128U; I++) {
-    // Deallocate 75% of the blocks.
-    const bool Deallocate = (rand() & 3) != 0;
-    void *P = L->allocate(scudo::Options{}, (std::rand() % 16) * PageSize);
-    if (Deallocate)
-      L->deallocate(scudo::Options{}, P);
-    else
-      V.push_back(P);
-  }
-  while (!V.empty()) {
-    L->deallocate(scudo::Options{}, V.back());
-    V.pop_back();
-  }
-}
 
-TEST(ScudoSecondaryTest, SecondaryThreadsRace) {
-  Ready = false;
-  std::unique_ptr<LargeAllocator> L(new LargeAllocator);
-  L->init(nullptr, /*ReleaseToOsInterval=*/0);
+  std::mutex Mutex;
+  std::condition_variable Cv;
+  bool Ready = false;
+};
+
+TEST_F(MapAllocatorWithReleaseTest, SecondaryThreadsRace) {
   std::thread Threads[16];
   for (scudo::uptr I = 0; I < ARRAY_SIZE(Threads); I++)
-    Threads[I] = std::thread(performAllocations, L.get());
+    Threads[I] =
+        std::thread(&MapAllocatorWithReleaseTest::performAllocations, this);
   {
     std::unique_lock<std::mutex> Lock(Mutex);
     Ready = true;
@@ -201,7 +237,7 @@
   }
   for (auto &T : Threads)
     T.join();
-  scudo::ScopedString Str(1024);
-  L->getStats(&Str);
+  scudo::ScopedString Str;
+  Allocator->getStats(&Str);
   Str.output();
 }
diff --git a/standalone/tests/size_class_map_test.cpp b/standalone/tests/size_class_map_test.cpp
index 88859de..076f36f 100644
--- a/standalone/tests/size_class_map_test.cpp
+++ b/standalone/tests/size_class_map_test.cpp
@@ -35,6 +35,7 @@
   static const scudo::uptr MaxSizeLog = 5;
   static const scudo::u32 MaxNumCachedHint = 0;
   static const scudo::uptr MaxBytesCachedLog = 0;
+  static const scudo::uptr SizeDelta = 0;
 };
 
 TEST(ScudoSizeClassMapTest, OneClassSizeClassMap) {
@@ -49,6 +50,7 @@
   static const scudo::uptr MaxSizeLog = 63;
   static const scudo::u32 MaxNumCachedHint = 128;
   static const scudo::uptr MaxBytesCachedLog = 16;
+  static const scudo::uptr SizeDelta = 0;
 };
 
 TEST(ScudoSizeClassMapTest, LargeMaxSizeClassMap) {
diff --git a/standalone/tests/strings_test.cpp b/standalone/tests/strings_test.cpp
index eed174d..6d7e78a 100644
--- a/standalone/tests/strings_test.cpp
+++ b/standalone/tests/strings_test.cpp
@@ -12,8 +12,14 @@
 
 #include <limits.h>
 
+TEST(ScudoStringsTest, Constructor) {
+  scudo::ScopedString Str;
+  EXPECT_EQ(0ul, Str.length());
+  EXPECT_EQ('\0', *Str.data());
+}
+
 TEST(ScudoStringsTest, Basic) {
-  scudo::ScopedString Str(128);
+  scudo::ScopedString Str;
   Str.append("a%db%zdc%ue%zuf%xh%zxq%pe%sr", static_cast<int>(-1),
              static_cast<scudo::uptr>(-2), static_cast<unsigned>(-4),
              static_cast<scudo::uptr>(5), static_cast<unsigned>(10),
@@ -28,8 +34,25 @@
   EXPECT_STREQ(expectedString.c_str(), Str.data());
 }
 
+TEST(ScudoStringsTest, Clear) {
+  scudo::ScopedString Str;
+  Str.append("123");
+  Str.clear();
+  EXPECT_EQ(0ul, Str.length());
+  EXPECT_EQ('\0', *Str.data());
+}
+
+TEST(ScudoStringsTest, ClearLarge) {
+  scudo::ScopedString Str;
+  for (int i = 0; i < 10000; ++i)
+    Str.append("123");
+  Str.clear();
+  EXPECT_EQ(0ul, Str.length());
+  EXPECT_EQ('\0', *Str.data());
+}
+
 TEST(ScudoStringsTest, Precision) {
-  scudo::ScopedString Str(128);
+  scudo::ScopedString Str;
   Str.append("%.*s", 3, "12345");
   EXPECT_EQ(Str.length(), strlen(Str.data()));
   EXPECT_STREQ("123", Str.data());
@@ -52,7 +75,7 @@
   // Use a ScopedString that spans a page, and attempt to write past the end
   // of it with variations of append. The expectation is for nothing to crash.
   const scudo::uptr PageSize = scudo::getPageSizeCached();
-  scudo::ScopedString Str(PageSize);
+  scudo::ScopedString Str;
   Str.clear();
   fillString(Str, 2 * PageSize);
   Str.clear();
@@ -68,7 +91,7 @@
 
 template <typename T>
 static void testAgainstLibc(const char *Format, T Arg1, T Arg2) {
-  scudo::ScopedString Str(128);
+  scudo::ScopedString Str;
   Str.append(Format, Arg1, Arg2);
   char Buffer[128];
   snprintf(Buffer, sizeof(Buffer), Format, Arg1, Arg2);
diff --git a/standalone/tests/tsd_test.cpp b/standalone/tests/tsd_test.cpp
index 58ac9e7..17387ee 100644
--- a/standalone/tests/tsd_test.cpp
+++ b/standalone/tests/tsd_test.cpp
@@ -11,6 +11,8 @@
 #include "tsd_exclusive.h"
 #include "tsd_shared.h"
 
+#include <stdlib.h>
+
 #include <condition_variable>
 #include <mutex>
 #include <set>
@@ -26,23 +28,29 @@
   using CacheT = struct MockCache { volatile scudo::uptr Canary; };
   using QuarantineCacheT = struct MockQuarantine {};
 
-  void initLinkerInitialized() {
+  void init() {
     // This should only be called once by the registry.
     EXPECT_FALSE(Initialized);
     Initialized = true;
   }
-  void reset() { memset(this, 0, sizeof(*this)); }
 
-  void unmapTestOnly() { TSDRegistry.unmapTestOnly(); }
-  void initCache(CacheT *Cache) { memset(Cache, 0, sizeof(*Cache)); }
+  void unmapTestOnly() { TSDRegistry.unmapTestOnly(this); }
+  void initCache(CacheT *Cache) { *Cache = {}; }
   void commitBack(scudo::TSD<MockAllocator> *TSD) {}
   TSDRegistryT *getTSDRegistry() { return &TSDRegistry; }
   void callPostInitCallback() {}
 
   bool isInitialized() { return Initialized; }
 
+  void *operator new(size_t Size) {
+    void *P = nullptr;
+    EXPECT_EQ(0, posix_memalign(&P, alignof(ThisT), Size));
+    return P;
+  }
+  void operator delete(void *P) { free(P); }
+
 private:
-  bool Initialized;
+  bool Initialized = false;
   TSDRegistryT TSDRegistry;
 };
 
@@ -69,11 +77,10 @@
   };
   std::unique_ptr<AllocatorT, decltype(Deleter)> Allocator(new AllocatorT,
                                                            Deleter);
-  Allocator->reset();
   EXPECT_FALSE(Allocator->isInitialized());
 
   auto Registry = Allocator->getTSDRegistry();
-  Registry->initLinkerInitialized(Allocator.get());
+  Registry->init(Allocator.get());
   EXPECT_TRUE(Allocator->isInitialized());
 }
 
@@ -84,7 +91,6 @@
   };
   std::unique_ptr<AllocatorT, decltype(Deleter)> Allocator(new AllocatorT,
                                                            Deleter);
-  Allocator->reset();
   EXPECT_FALSE(Allocator->isInitialized());
 
   auto Registry = Allocator->getTSDRegistry();
@@ -153,7 +159,6 @@
   };
   std::unique_ptr<AllocatorT, decltype(Deleter)> Allocator(new AllocatorT,
                                                            Deleter);
-  Allocator->reset();
   std::thread Threads[32];
   for (scudo::uptr I = 0; I < ARRAY_SIZE(Threads); I++)
     Threads[I] = std::thread(stressCache<AllocatorT>, Allocator.get());
@@ -209,7 +214,6 @@
   };
   std::unique_ptr<AllocatorT, decltype(Deleter)> Allocator(new AllocatorT,
                                                            Deleter);
-  Allocator->reset();
   // We attempt to use as many TSDs as the shared cache offers by creating a
   // decent amount of threads that will be run concurrently and attempt to get
   // and lock TSDs. We put them all in a set and count the number of entries
diff --git a/standalone/tests/vector_test.cpp b/standalone/tests/vector_test.cpp
index d2c6a9b..dc23c2a 100644
--- a/standalone/tests/vector_test.cpp
+++ b/standalone/tests/vector_test.cpp
@@ -23,14 +23,14 @@
 }
 
 TEST(ScudoVectorTest, Stride) {
-  scudo::Vector<int> V;
-  for (int i = 0; i < 1000; i++) {
-    V.push_back(i);
-    EXPECT_EQ(V.size(), i + 1U);
-    EXPECT_EQ(V[i], i);
+  scudo::Vector<scudo::uptr> V;
+  for (scudo::uptr I = 0; I < 1000; I++) {
+    V.push_back(I);
+    EXPECT_EQ(V.size(), I + 1U);
+    EXPECT_EQ(V[I], I);
   }
-  for (int i = 0; i < 1000; i++)
-    EXPECT_EQ(V[i], i);
+  for (scudo::uptr I = 0; I < 1000; I++)
+    EXPECT_EQ(V[I], I);
 }
 
 TEST(ScudoVectorTest, ResizeReduction) {
diff --git a/standalone/tests/wrappers_c_test.cpp b/standalone/tests/wrappers_c_test.cpp
index eed8f03..616cf54 100644
--- a/standalone/tests/wrappers_c_test.cpp
+++ b/standalone/tests/wrappers_c_test.cpp
@@ -6,6 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "memtag.h"
 #include "scudo/interface.h"
 #include "tests/scudo_unit_test.h"
 
@@ -15,6 +16,10 @@
 #include <stdlib.h>
 #include <unistd.h>
 
+#ifndef __GLIBC_PREREQ
+#define __GLIBC_PREREQ(x, y) 0
+#endif
+
 extern "C" {
 void malloc_enable(void);
 void malloc_disable(void);
@@ -37,7 +42,7 @@
 
 static const size_t Size = 100U;
 
-TEST(ScudoWrappersCTest, Malloc) {
+TEST(ScudoWrappersCDeathTest, Malloc) {
   void *P = malloc(Size);
   EXPECT_NE(P, nullptr);
   EXPECT_LE(Size, malloc_usable_size(P));
@@ -153,7 +158,7 @@
   EXPECT_EQ(errno, EINVAL);
 }
 
-TEST(ScudoWrappersCTest, Realloc) {
+TEST(ScudoWrappersCDeathTest, Realloc) {
   // realloc(nullptr, N) is malloc(N)
   void *P = realloc(nullptr, 0U);
   EXPECT_NE(P, nullptr);
@@ -257,8 +262,10 @@
 
 #if !SCUDO_FUCHSIA
 TEST(ScudoWrappersCTest, MallInfo) {
+  // mallinfo is deprecated.
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wdeprecated-declarations"
   const size_t BypassQuarantineSize = 1024U;
-
   struct mallinfo MI = mallinfo();
   size_t Allocated = MI.uordblks;
   void *P = malloc(BypassQuarantineSize);
@@ -270,6 +277,24 @@
   free(P);
   MI = mallinfo();
   EXPECT_GE(static_cast<size_t>(MI.fordblks), Free + BypassQuarantineSize);
+#pragma clang diagnostic pop
+}
+#endif
+
+#if __GLIBC_PREREQ(2, 33)
+TEST(ScudoWrappersCTest, MallInfo2) {
+  const size_t BypassQuarantineSize = 1024U;
+  struct mallinfo2 MI = mallinfo2();
+  size_t Allocated = MI.uordblks;
+  void *P = malloc(BypassQuarantineSize);
+  EXPECT_NE(P, nullptr);
+  MI = mallinfo2();
+  EXPECT_GE(MI.uordblks, Allocated + BypassQuarantineSize);
+  EXPECT_GT(MI.hblkhd, 0U);
+  size_t Free = MI.fordblks;
+  free(P);
+  MI = mallinfo2();
+  EXPECT_GE(MI.fordblks, Free + BypassQuarantineSize);
 }
 #endif
 
@@ -277,6 +302,10 @@
 static size_t Count;
 
 static void callback(uintptr_t Base, size_t Size, void *Arg) {
+  if (scudo::archSupportsMemoryTagging()) {
+    Base = scudo::untagPointer(Base);
+    BoundaryP = scudo::untagPointer(BoundaryP);
+  }
   if (Base == BoundaryP)
     Count++;
 }
@@ -328,7 +357,7 @@
 
 // Fuchsia doesn't have alarm, fork or malloc_info.
 #if !SCUDO_FUCHSIA
-TEST(ScudoWrappersCTest, MallocDisableDeadlock) {
+TEST(ScudoWrappersCDeathTest, MallocDisableDeadlock) {
   // We expect heap operations within a disable/enable scope to deadlock.
   EXPECT_DEATH(
       {
@@ -363,10 +392,10 @@
   free(P2);
 }
 
-TEST(ScudoWrappersCTest, Fork) {
+TEST(ScudoWrappersCDeathTest, Fork) {
   void *P;
   pid_t Pid = fork();
-  EXPECT_GE(Pid, 0);
+  EXPECT_GE(Pid, 0) << strerror(errno);
   if (Pid == 0) {
     P = malloc(Size);
     EXPECT_NE(P, nullptr);
diff --git a/standalone/tests/wrappers_cpp_test.cpp b/standalone/tests/wrappers_cpp_test.cpp
index 9df06dc..a88dc4a 100644
--- a/standalone/tests/wrappers_cpp_test.cpp
+++ b/standalone/tests/wrappers_cpp_test.cpp
@@ -6,10 +6,13 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "memtag.h"
 #include "tests/scudo_unit_test.h"
 
 #include <atomic>
 #include <condition_variable>
+#include <fstream>
+#include <memory>
 #include <mutex>
 #include <thread>
 #include <vector>
@@ -65,7 +68,7 @@
   Color C = Color::Red;
 };
 
-TEST(ScudoWrappersCppTest, New) {
+TEST(ScudoWrappersCppDeathTest, New) {
   if (getenv("SKIP_TYPE_MISMATCH")) {
     printf("Skipped type mismatch tests.\n");
     return;
@@ -107,6 +110,13 @@
 }
 
 TEST(ScudoWrappersCppTest, ThreadedNew) {
+  // TODO: Investigate why libc sometimes crashes with tag missmatch in
+  // __pthread_clockjoin_ex.
+  std::unique_ptr<scudo::ScopedDisableMemoryTagChecks> NoTags;
+  if (!SCUDO_ANDROID && scudo::archSupportsMemoryTagging() &&
+      scudo::systemSupportsMemoryTagging())
+    NoTags = std::make_unique<scudo::ScopedDisableMemoryTagChecks>();
+
   Ready = false;
   std::thread Threads[32];
   for (size_t I = 0U; I < sizeof(Threads) / sizeof(Threads[0]); I++)
@@ -121,11 +131,23 @@
 }
 
 #if !SCUDO_FUCHSIA
-// TODO(kostyak): for me, this test fails in a specific configuration when ran
-//                by itself with some Scudo or GWP-ASan violation. Other people
-//                can't seem to reproduce the failure. Consider skipping this in
-//                the event it fails on the upstream bots.
 TEST(ScudoWrappersCppTest, AllocAfterFork) {
+  // This test can fail flakily when ran as a part of large number of
+  // other tests if the maxmimum number of mappings allowed is low.
+  // We tried to reduce the number of iterations of the loops with
+  // moderate success, so we will now skip this test under those
+  // circumstances.
+  if (SCUDO_LINUX) {
+    long MaxMapCount = 0;
+    // If the file can't be accessed, we proceed with the test.
+    std::ifstream Stream("/proc/sys/vm/max_map_count");
+    if (Stream.good()) {
+      Stream >> MaxMapCount;
+      if (MaxMapCount < 200000)
+        return;
+    }
+  }
+
   std::atomic_bool Stop;
 
   // Create threads that simply allocate and free different sizes.
@@ -133,7 +155,7 @@
   for (size_t N = 0; N < 5; N++) {
     std::thread *T = new std::thread([&Stop] {
       while (!Stop) {
-        for (size_t SizeLog = 3; SizeLog <= 21; SizeLog++) {
+        for (size_t SizeLog = 3; SizeLog <= 20; SizeLog++) {
           char *P = new char[1UL << SizeLog];
           EXPECT_NE(P, nullptr);
           // Make sure this value is not optimized away.
@@ -146,10 +168,10 @@
   }
 
   // Create a thread to fork and allocate.
-  for (size_t N = 0; N < 100; N++) {
+  for (size_t N = 0; N < 50; N++) {
     pid_t Pid;
     if ((Pid = fork()) == 0) {
-      for (size_t SizeLog = 3; SizeLog <= 21; SizeLog++) {
+      for (size_t SizeLog = 3; SizeLog <= 20; SizeLog++) {
         char *P = new char[1UL << SizeLog];
         EXPECT_NE(P, nullptr);
         // Make sure this value is not optimized away.
diff --git a/standalone/trusty.cpp b/standalone/trusty.cpp
new file mode 100644
index 0000000..81d6bc5
--- /dev/null
+++ b/standalone/trusty.cpp
@@ -0,0 +1,100 @@
+//===-- trusty.cpp ---------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "platform.h"
+
+#if SCUDO_TRUSTY
+
+#include "common.h"
+#include "mutex.h"
+#include "string_utils.h"
+#include "trusty.h"
+
+#include <errno.h>           // for errno
+#include <stdio.h>           // for printf()
+#include <stdlib.h>          // for getenv()
+#include <sys/auxv.h>        // for getauxval()
+#include <time.h>            // for clock_gettime()
+#include <trusty_syscalls.h> // for _trusty_brk()
+
+#define SBRK_ALIGN 32
+
+namespace scudo {
+
+uptr getPageSize() { return getauxval(AT_PAGESZ); }
+
+void NORETURN die() { abort(); }
+
+void *map(UNUSED void *Addr, uptr Size, UNUSED const char *Name, uptr Flags,
+          UNUSED MapPlatformData *Data) {
+  // Calling _trusty_brk(0) returns the current program break.
+  uptr ProgramBreak = reinterpret_cast<uptr>(_trusty_brk(0));
+  uptr Start;
+  uptr End;
+
+  Start = roundUpTo(ProgramBreak, SBRK_ALIGN);
+  // Don't actually extend the heap if MAP_NOACCESS flag is set since this is
+  // the case where Scudo tries to reserve a memory region without mapping
+  // physical pages.
+  if (Flags & MAP_NOACCESS)
+    return reinterpret_cast<void *>(Start);
+
+  // Attempt to extend the heap by Size bytes using _trusty_brk.
+  End = roundUpTo(Start + Size, SBRK_ALIGN);
+  ProgramBreak =
+      reinterpret_cast<uptr>(_trusty_brk(reinterpret_cast<void *>(End)));
+  if (ProgramBreak < End) {
+    errno = ENOMEM;
+    dieOnMapUnmapError(Size);
+    return nullptr;
+  }
+  return reinterpret_cast<void *>(Start); // Base of new reserved region.
+}
+
+// Unmap is a no-op since Trusty uses sbrk instead of memory mapping.
+void unmap(UNUSED void *Addr, UNUSED uptr Size, UNUSED uptr Flags,
+           UNUSED MapPlatformData *Data) {}
+
+void setMemoryPermission(UNUSED uptr Addr, UNUSED uptr Size, UNUSED uptr Flags,
+                         UNUSED MapPlatformData *Data) {}
+
+void releasePagesToOS(UNUSED uptr BaseAddress, UNUSED uptr Offset,
+                      UNUSED uptr Size, UNUSED MapPlatformData *Data) {}
+
+const char *getEnv(const char *Name) { return getenv(Name); }
+
+// All mutex operations are a no-op since Trusty doesn't currently support
+// threads.
+bool HybridMutex::tryLock() { return true; }
+
+void HybridMutex::lockSlow() {}
+
+void HybridMutex::unlock() {}
+
+u64 getMonotonicTime() {
+  timespec TS;
+  clock_gettime(CLOCK_MONOTONIC, &TS);
+  return static_cast<u64>(TS.tv_sec) * (1000ULL * 1000 * 1000) +
+         static_cast<u64>(TS.tv_nsec);
+}
+
+u32 getNumberOfCPUs() { return 0; }
+
+u32 getThreadID() { return 0; }
+
+bool getRandom(UNUSED void *Buffer, UNUSED uptr Length, UNUSED bool Blocking) {
+  return false;
+}
+
+void outputRaw(const char *Buffer) { printf("%s", Buffer); }
+
+void setAbortMessage(UNUSED const char *Message) {}
+
+} // namespace scudo
+
+#endif // SCUDO_TRUSTY
diff --git a/standalone/trusty.h b/standalone/trusty.h
new file mode 100644
index 0000000..50edd1c
--- /dev/null
+++ b/standalone/trusty.h
@@ -0,0 +1,24 @@
+//===-- trusty.h -----------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef SCUDO_TRUSTY_H_
+#define SCUDO_TRUSTY_H_
+
+#include "platform.h"
+
+#if SCUDO_TRUSTY
+
+namespace scudo {
+// MapPlatformData is unused on Trusty, define it as a minimially sized
+// structure.
+struct MapPlatformData {};
+} // namespace scudo
+
+#endif // SCUDO_TRUSTY
+
+#endif // SCUDO_TRUSTY_H_
diff --git a/standalone/tsd.h b/standalone/tsd.h
index a6e669b..b400a3b 100644
--- a/standalone/tsd.h
+++ b/standalone/tsd.h
@@ -26,16 +26,15 @@
 template <class Allocator> struct alignas(SCUDO_CACHE_LINE_SIZE) TSD {
   typename Allocator::CacheT Cache;
   typename Allocator::QuarantineCacheT QuarantineCache;
+  using ThisT = TSD<Allocator>;
   u8 DestructorIterations = 0;
 
-  void initLinkerInitialized(Allocator *Instance) {
+  void init(Allocator *Instance) {
+    DCHECK_EQ(DestructorIterations, 0U);
+    DCHECK(isAligned(reinterpret_cast<uptr>(this), alignof(ThisT)));
     Instance->initCache(&Cache);
     DestructorIterations = PTHREAD_DESTRUCTOR_ITERATIONS;
   }
-  void init(Allocator *Instance) {
-    memset(this, 0, sizeof(*this));
-    initLinkerInitialized(Instance);
-  }
 
   void commitBack(Allocator *Instance) { Instance->commitBack(this); }
 
diff --git a/standalone/tsd_exclusive.h b/standalone/tsd_exclusive.h
index a907ed4..d49427b 100644
--- a/standalone/tsd_exclusive.h
+++ b/standalone/tsd_exclusive.h
@@ -15,7 +15,7 @@
 
 struct ThreadState {
   bool DisableMemInit : 1;
-  enum {
+  enum : unsigned {
     NotInitialized = 0,
     Initialized,
     TornDown,
@@ -25,31 +25,35 @@
 template <class Allocator> void teardownThread(void *Ptr);
 
 template <class Allocator> struct TSDRegistryExT {
-  void initLinkerInitialized(Allocator *Instance) {
-    Instance->initLinkerInitialized();
-    CHECK_EQ(pthread_key_create(&PThreadKey, teardownThread<Allocator>), 0);
-    FallbackTSD.initLinkerInitialized(Instance);
-    Initialized = true;
-  }
   void init(Allocator *Instance) {
-    memset(this, 0, sizeof(*this));
-    initLinkerInitialized(Instance);
+    DCHECK(!Initialized);
+    Instance->init();
+    CHECK_EQ(pthread_key_create(&PThreadKey, teardownThread<Allocator>), 0);
+    FallbackTSD.init(Instance);
+    Initialized = true;
   }
 
   void initOnceMaybe(Allocator *Instance) {
     ScopedLock L(Mutex);
     if (LIKELY(Initialized))
       return;
-    initLinkerInitialized(Instance); // Sets Initialized.
+    init(Instance); // Sets Initialized.
   }
 
-  void unmapTestOnly() {
-    Allocator *Instance =
-        reinterpret_cast<Allocator *>(pthread_getspecific(PThreadKey));
-    if (!Instance)
-      return;
-    ThreadTSD.commitBack(Instance);
+  void unmapTestOnly(Allocator *Instance) {
+    DCHECK(Instance);
+    if (reinterpret_cast<Allocator *>(pthread_getspecific(PThreadKey))) {
+      DCHECK_EQ(reinterpret_cast<Allocator *>(pthread_getspecific(PThreadKey)),
+                Instance);
+      ThreadTSD.commitBack(Instance);
+      ThreadTSD = {};
+    }
+    CHECK_EQ(pthread_key_delete(PThreadKey), 0);
+    PThreadKey = {};
+    FallbackTSD.commitBack(Instance);
+    FallbackTSD = {};
     State = {};
+    Initialized = false;
   }
 
   ALWAYS_INLINE void initThreadMaybe(Allocator *Instance, bool MinimalInit) {
@@ -83,7 +87,7 @@
     Mutex.unlock();
   }
 
-  bool setOption(Option O, UNUSED sptr Value) {
+  bool setOption(Option O, sptr Value) {
     if (O == Option::ThreadDisableMemInit)
       State.DisableMemInit = Value;
     if (O == Option::MaxTSDsCount)
@@ -103,7 +107,7 @@
       return;
     CHECK_EQ(
         pthread_setspecific(PThreadKey, reinterpret_cast<void *>(Instance)), 0);
-    ThreadTSD.initLinkerInitialized(Instance);
+    ThreadTSD.init(Instance);
     State.InitState = ThreadState::Initialized;
     Instance->callPostInitCallback();
   }
diff --git a/standalone/tsd_shared.h b/standalone/tsd_shared.h
index afe3623..1c2a880 100644
--- a/standalone/tsd_shared.h
+++ b/standalone/tsd_shared.h
@@ -24,28 +24,32 @@
 
 template <class Allocator, u32 TSDsArraySize, u32 DefaultTSDCount>
 struct TSDRegistrySharedT {
-  void initLinkerInitialized(Allocator *Instance) {
-    Instance->initLinkerInitialized();
+  void init(Allocator *Instance) {
+    DCHECK(!Initialized);
+    Instance->init();
     for (u32 I = 0; I < TSDsArraySize; I++)
-      TSDs[I].initLinkerInitialized(Instance);
+      TSDs[I].init(Instance);
     const u32 NumberOfCPUs = getNumberOfCPUs();
     setNumberOfTSDs((NumberOfCPUs == 0) ? DefaultTSDCount
                                         : Min(NumberOfCPUs, DefaultTSDCount));
     Initialized = true;
   }
-  void init(Allocator *Instance) {
-    memset(this, 0, sizeof(*this));
-    initLinkerInitialized(Instance);
-  }
 
   void initOnceMaybe(Allocator *Instance) {
     ScopedLock L(Mutex);
     if (LIKELY(Initialized))
       return;
-    initLinkerInitialized(Instance); // Sets Initialized.
+    init(Instance); // Sets Initialized.
   }
 
-  void unmapTestOnly() { setCurrentTSD(nullptr); }
+  void unmapTestOnly(Allocator *Instance) {
+    for (u32 I = 0; I < TSDsArraySize; I++) {
+      TSDs[I].commitBack(Instance);
+      TSDs[I] = {};
+    }
+    setCurrentTSD(nullptr);
+    Initialized = false;
+  }
 
   ALWAYS_INLINE void initThreadMaybe(Allocator *Instance,
                                      UNUSED bool MinimalInit) {
diff --git a/standalone/vector.h b/standalone/vector.h
index 6ca350a..eae774b 100644
--- a/standalone/vector.h
+++ b/standalone/vector.h
@@ -19,14 +19,14 @@
 // small vectors. The current implementation supports only POD types.
 template <typename T> class VectorNoCtor {
 public:
-  void init(uptr InitialCapacity) {
-    CapacityBytes = 0;
-    Size = 0;
-    Data = nullptr;
-    reserve(InitialCapacity);
+  constexpr void init(uptr InitialCapacity = 0) {
+    Data = &LocalData[0];
+    CapacityBytes = sizeof(LocalData);
+    if (InitialCapacity > capacity())
+      reserve(InitialCapacity);
   }
   void destroy() {
-    if (Data)
+    if (Data != &LocalData[0])
       unmap(Data, CapacityBytes);
   }
   T &operator[](uptr I) {
@@ -56,7 +56,7 @@
   uptr size() const { return Size; }
   const T *data() const { return Data; }
   T *data() { return Data; }
-  uptr capacity() const { return CapacityBytes / sizeof(T); }
+  constexpr uptr capacity() const { return CapacityBytes / sizeof(T); }
   void reserve(uptr NewSize) {
     // Never downsize internal buffer.
     if (NewSize > capacity())
@@ -82,26 +82,24 @@
   void reallocate(uptr NewCapacity) {
     DCHECK_GT(NewCapacity, 0);
     DCHECK_LE(Size, NewCapacity);
-    const uptr NewCapacityBytes =
-        roundUpTo(NewCapacity * sizeof(T), getPageSizeCached());
+    NewCapacity = roundUpTo(NewCapacity * sizeof(T), getPageSizeCached());
     T *NewData =
-        reinterpret_cast<T *>(map(nullptr, NewCapacityBytes, "scudo:vector"));
-    if (Data) {
-      memcpy(NewData, Data, Size * sizeof(T));
-      unmap(Data, CapacityBytes);
-    }
+        reinterpret_cast<T *>(map(nullptr, NewCapacity, "scudo:vector"));
+    memcpy(NewData, Data, Size * sizeof(T));
+    destroy();
     Data = NewData;
-    CapacityBytes = NewCapacityBytes;
+    CapacityBytes = NewCapacity;
   }
 
-  T *Data;
-  uptr CapacityBytes;
-  uptr Size;
+  T *Data = nullptr;
+  T LocalData[256 / sizeof(T)] = {};
+  uptr CapacityBytes = 0;
+  uptr Size = 0;
 };
 
 template <typename T> class Vector : public VectorNoCtor<T> {
 public:
-  Vector() { VectorNoCtor<T>::init(1); }
+  constexpr Vector() { VectorNoCtor<T>::init(); }
   explicit Vector(uptr Count) {
     VectorNoCtor<T>::init(Count);
     this->resize(Count);
diff --git a/standalone/wrappers_c.cpp b/standalone/wrappers_c.cpp
index 81c7dd6..b4d51be 100644
--- a/standalone/wrappers_c.cpp
+++ b/standalone/wrappers_c.cpp
@@ -21,8 +21,6 @@
 #define SCUDO_PREFIX(name) name
 #define SCUDO_ALLOCATOR Allocator
 
-extern "C" void SCUDO_PREFIX(malloc_postinit)();
-
 // Export the static allocator so that the C++ wrappers can access it.
 // Technically we could have a completely separated heap for C & C++ but in
 // reality the amount of cross pollination between the two is staggering.
diff --git a/standalone/wrappers_c.h b/standalone/wrappers_c.h
index 6d0cecd..08dc679 100644
--- a/standalone/wrappers_c.h
+++ b/standalone/wrappers_c.h
@@ -32,6 +32,19 @@
   __scudo_mallinfo_data_t keepcost;
 };
 
+struct __scudo_mallinfo2 {
+  size_t arena;
+  size_t ordblks;
+  size_t smblks;
+  size_t hblks;
+  size_t hblkhd;
+  size_t usmblks;
+  size_t fsmblks;
+  size_t uordblks;
+  size_t fordblks;
+  size_t keepcost;
+};
+
 // Android sometimes includes malloc.h no matter what, which yields to
 // conflicting return types for mallinfo() if we use our own structure. So if
 // struct mallinfo is declared (#define courtesy of malloc.h), use it directly.
@@ -41,4 +54,9 @@
 #define SCUDO_MALLINFO __scudo_mallinfo
 #endif
 
+#if !SCUDO_ANDROID || !_BIONIC
+extern "C" void malloc_postinit();
+extern HIDDEN scudo::Allocator<scudo::Config, malloc_postinit> Allocator;
+#endif
+
 #endif // SCUDO_WRAPPERS_C_H_
diff --git a/standalone/wrappers_c.inc b/standalone/wrappers_c.inc
index 43efb02..bbe3617 100644
--- a/standalone/wrappers_c.inc
+++ b/standalone/wrappers_c.inc
@@ -54,6 +54,23 @@
   return Info;
 }
 
+INTERFACE WEAK struct __scudo_mallinfo2 SCUDO_PREFIX(mallinfo2)(void) {
+  struct __scudo_mallinfo2 Info = {};
+  scudo::StatCounters Stats;
+  SCUDO_ALLOCATOR.getStats(Stats);
+  // Space allocated in mmapped regions (bytes)
+  Info.hblkhd = Stats[scudo::StatMapped];
+  // Maximum total allocated space (bytes)
+  Info.usmblks = Info.hblkhd;
+  // Space in freed fastbin blocks (bytes)
+  Info.fsmblks = Stats[scudo::StatFree];
+  // Total allocated space (bytes)
+  Info.uordblks = Stats[scudo::StatAllocated];
+  // Total free space (bytes)
+  Info.fordblks = Info.fsmblks;
+  return Info;
+}
+
 INTERFACE WEAK void *SCUDO_PREFIX(malloc)(size_t size) {
   return scudo::setErrnoOnNull(SCUDO_ALLOCATOR.allocate(
       size, scudo::Chunk::Origin::Malloc, SCUDO_MALLOC_ALIGNMENT));
@@ -226,7 +243,7 @@
   fputs("<malloc version=\"scudo-1\">\n", stream);
   for (scudo::uptr i = 0; i != max_size; ++i)
     if (sizes[i])
-      fprintf(stream, "<alloc size=\"%lu\" count=\"%lu\"/>\n", i, sizes[i]);
+      fprintf(stream, "<alloc size=\"%zu\" count=\"%zu\"/>\n", i, sizes[i]);
   fputs("</malloc>\n", stream);
   SCUDO_PREFIX(free)(sizes);
   return 0;
diff --git a/standalone/wrappers_c_checks.h b/standalone/wrappers_c_checks.h
index 7fc1a96..815d400 100644
--- a/standalone/wrappers_c_checks.h
+++ b/standalone/wrappers_c_checks.h
@@ -46,8 +46,13 @@
 // builtin supported by recent clang & GCC if it exists, otherwise fallback to a
 // costly division.
 inline bool checkForCallocOverflow(uptr Size, uptr N, uptr *Product) {
-#if __has_builtin(__builtin_umull_overflow)
-  return __builtin_umull_overflow(Size, N, Product);
+#if __has_builtin(__builtin_umull_overflow) && (SCUDO_WORDSIZE == 64U)
+  return __builtin_umull_overflow(Size, N,
+                                  reinterpret_cast<unsigned long *>(Product));
+#elif __has_builtin(__builtin_umul_overflow) && (SCUDO_WORDSIZE == 32U)
+  // On, e.g. armv7, uptr/uintptr_t may be defined as unsigned long
+  return __builtin_umul_overflow(Size, N,
+                                 reinterpret_cast<unsigned int *>(Product));
 #else
   *Product = Size * N;
   if (!Size)
diff --git a/standalone/wrappers_cpp.cpp b/standalone/wrappers_cpp.cpp
index adb1041..16f495b 100644
--- a/standalone/wrappers_cpp.cpp
+++ b/standalone/wrappers_cpp.cpp
@@ -12,12 +12,10 @@
 #if !SCUDO_ANDROID || !_BIONIC
 
 #include "allocator_config.h"
+#include "wrappers_c.h"
 
 #include <stdint.h>
 
-extern "C" void malloc_postinit();
-extern HIDDEN scudo::Allocator<scudo::Config, malloc_postinit> Allocator;
-
 namespace std {
 struct nothrow_t {};
 enum class align_val_t : size_t {};