simpleperf: support event group.

Kernel supports monitoring several events as a group, so they are
scheduled on and out at the same time. Add --group option to
stat command and record command.
Adjust the method to calculate miss rate in stat command: limit
the matched events in the same group or with scale == 1.0.

Bug: 29213742

Change-Id: I899aba207f1e3357307541e81f97526f5a2913c3
diff --git a/simpleperf/cmd_record.cpp b/simpleperf/cmd_record.cpp
index a989d37..7bc5ec0 100644
--- a/simpleperf/cmd_record.cpp
+++ b/simpleperf/cmd_record.cpp
@@ -95,6 +95,10 @@
 "             option is -f 4000.\n"
 "-F freq      Same as '-f freq'.\n"
 "-g           Same as '--call-graph dwarf'.\n"
+"--group event1[:modifier],event2[:modifier2],...\n"
+"             Similar to -e option. But events specified in the same --group\n"
+"             option are monitored as a group, and scheduled in and out at the\n"
+"             same time.\n"
 "-j branch_filter1,branch_filter2,...\n"
 "             Enable taken branch stack sampling. Each sample captures a series\n"
 "             of consecutive taken branches.\n"
@@ -152,15 +156,14 @@
  private:
   bool ParseOptions(const std::vector<std::string>& args,
                     std::vector<std::string>* non_option_args);
-  bool AddMeasuredEventType(const std::string& event_type_name);
-  bool SetEventSelection();
+  bool SetEventSelectionFlags();
   bool CreateAndInitRecordFile();
   std::unique_ptr<RecordFileWriter> CreateRecordFile(
       const std::string& filename);
   bool DumpKernelSymbol();
   bool DumpTracingData();
-  bool DumpKernelAndModuleMmaps(const perf_event_attr* attr, uint64_t event_id);
-  bool DumpThreadCommAndMmaps(const perf_event_attr* attr, uint64_t event_id,
+  bool DumpKernelAndModuleMmaps(const perf_event_attr& attr, uint64_t event_id);
+  bool DumpThreadCommAndMmaps(const perf_event_attr& attr, uint64_t event_id,
                               bool all_threads,
                               const std::vector<pid_t>& selected_threads);
   bool ProcessRecord(Record* record);
@@ -190,7 +193,6 @@
   bool dump_symbols_;
   std::vector<pid_t> monitored_threads_;
   std::vector<int> cpus_;
-  std::vector<EventTypeAndModifier> measured_event_types_;
   EventSelectionSet event_selection_set_;
 
   // mmap pages used by each perf event file, should be a power of 2.
@@ -217,12 +219,12 @@
   if (!ParseOptions(args, &workload_args)) {
     return false;
   }
-  if (measured_event_types_.empty()) {
-    if (!AddMeasuredEventType(default_measured_event_type)) {
+  if (event_selection_set_.empty()) {
+    if (!event_selection_set_.AddEventType(default_measured_event_type)) {
       return false;
     }
   }
-  if (!SetEventSelection()) {
+  if (!SetEventSelectionFlags()) {
     return false;
   }
 
@@ -369,7 +371,7 @@
       }
       std::vector<std::string> event_types = android::base::Split(args[i], ",");
       for (auto& event_type : event_types) {
-        if (!AddMeasuredEventType(event_type)) {
+        if (!event_selection_set_.AddEventType(event_type)) {
           return false;
         }
       }
@@ -387,6 +389,14 @@
     } else if (args[i] == "-g") {
       fp_callchain_sampling_ = false;
       dwarf_callchain_sampling_ = true;
+    } else if (args[i] == "--group") {
+      if (!NextArgumentOrError(args, &i)) {
+        return false;
+      }
+      std::vector<std::string> event_types = android::base::Split(args[i], ",");
+      if (!event_selection_set_.AddEventGroup(event_types)) {
+        return false;
+      }
     } else if (args[i] == "-j") {
       if (!NextArgumentOrError(args, &i)) {
         return false;
@@ -492,39 +502,22 @@
   return true;
 }
 
-bool RecordCommand::AddMeasuredEventType(const std::string& event_type_name) {
-  std::unique_ptr<EventTypeAndModifier> event_type_modifier =
-      ParseEventType(event_type_name);
-  if (event_type_modifier == nullptr) {
-    return false;
-  }
-  for (const auto& type : measured_event_types_) {
-    if (type.name == event_type_modifier->name) {
-      return true;
-    }
-  }
-  measured_event_types_.push_back(*event_type_modifier);
-  return true;
-}
-
-bool RecordCommand::SetEventSelection() {
-  for (auto& event_type : measured_event_types_) {
-    if (!event_selection_set_.AddEventType(event_type)) {
-      return false;
-    }
-  }
-  for (auto& event_type : measured_event_types_) {
-    if (use_sample_freq_) {
-      event_selection_set_.SetSampleFreq(event_type, sample_freq_);
-    } else if (use_sample_period_) {
-      event_selection_set_.SetSamplePeriod(event_type, sample_period_);
-    } else {
-      if (event_type.event_type.type == PERF_TYPE_TRACEPOINT) {
-        event_selection_set_.SetSamplePeriod(
-            event_type, DEFAULT_SAMPLE_PERIOD_FOR_TRACEPOINT_EVENT);
+bool RecordCommand::SetEventSelectionFlags() {
+  for (const auto& group : event_selection_set_.groups()) {
+    for (const auto& selection : group) {
+      if (use_sample_freq_) {
+        event_selection_set_.SetSampleFreq(selection, sample_freq_);
+      } else if (use_sample_period_) {
+        event_selection_set_.SetSamplePeriod(selection, sample_period_);
       } else {
-        event_selection_set_.SetSampleFreq(
-            event_type, DEFAULT_SAMPLE_FREQ_FOR_NONTRACEPOINT_EVENT);
+        if (selection.event_type_modifier.event_type.type ==
+            PERF_TYPE_TRACEPOINT) {
+          event_selection_set_.SetSamplePeriod(
+              selection, DEFAULT_SAMPLE_PERIOD_FOR_TRACEPOINT_EVENT);
+        } else {
+          event_selection_set_.SetSampleFreq(
+              selection, DEFAULT_SAMPLE_FREQ_FOR_NONTRACEPOINT_EVENT);
+        }
       }
     }
   }
@@ -550,11 +543,10 @@
     return false;
   }
   // Use first perf_event_attr and first event id to dump mmap and comm records.
-  const perf_event_attr* attr =
-      event_selection_set_.FindEventAttrByType(measured_event_types_[0]);
-  const std::vector<std::unique_ptr<EventFd>>* fds =
-      event_selection_set_.FindEventFdsByType(measured_event_types_[0]);
-  uint64_t event_id = (*fds)[0]->Id();
+  const EventSelection& selection = event_selection_set_.groups()[0][0];
+  const perf_event_attr& attr = selection.event_attr;
+  const std::vector<std::unique_ptr<EventFd>>& fds = selection.event_fds;
+  uint64_t event_id = fds[0]->Id();
   if (!DumpKernelSymbol()) {
     return false;
   }
@@ -580,17 +572,17 @@
   }
 
   std::vector<AttrWithId> attr_ids;
-  for (auto& event_type : measured_event_types_) {
-    AttrWithId attr_id;
-    attr_id.attr = event_selection_set_.FindEventAttrByType(event_type);
-    CHECK(attr_id.attr != nullptr);
-    const std::vector<std::unique_ptr<EventFd>>* fds =
-        event_selection_set_.FindEventFdsByType(event_type);
-    CHECK(fds != nullptr);
-    for (auto& fd : *fds) {
-      attr_id.ids.push_back(fd->Id());
+  for (const auto& group : event_selection_set_.groups()) {
+    for (const auto& selection : group) {
+      AttrWithId attr_id;
+      attr_id.attr = &selection.event_attr;
+      CHECK(attr_id.attr != nullptr);
+      const std::vector<std::unique_ptr<EventFd>>& fds = selection.event_fds;
+      for (const auto& fd : fds) {
+        attr_id.ids.push_back(fd->Id());
+      }
+      attr_ids.push_back(attr_id);
     }
-    attr_ids.push_back(attr_id);
   }
   if (!writer->WriteAttrSection(attr_ids)) {
     return nullptr;
@@ -602,10 +594,11 @@
   if (can_dump_kernel_symbols_) {
     std::string kallsyms;
     bool need_kernel_symbol = false;
-    for (const auto& type : measured_event_types_) {
-      if (!type.exclude_kernel) {
-        need_kernel_symbol = true;
-        break;
+    for (const auto& group : event_selection_set_.groups()) {
+      for (const auto& selection : group) {
+        if (!selection.event_type_modifier.exclude_kernel) {
+          need_kernel_symbol = true;
+        }
       }
     }
     if (need_kernel_symbol) {
@@ -626,18 +619,21 @@
 }
 
 bool RecordCommand::DumpTracingData() {
-  bool has_tracepoint = false;
-  for (const auto& type : measured_event_types_) {
-    if (type.event_type.type == PERF_TYPE_TRACEPOINT) {
-      has_tracepoint = true;
-      break;
+  std::vector<const EventType*> tracepoint_event_types;
+  for (const auto& group : event_selection_set_.groups()) {
+    for (const auto& selection : group) {
+      if (selection.event_type_modifier.event_type.type ==
+          PERF_TYPE_TRACEPOINT) {
+        tracepoint_event_types.push_back(
+            &selection.event_type_modifier.event_type);
+      }
     }
   }
-  if (!has_tracepoint) {
+  if (tracepoint_event_types.empty()) {
     return true;  // No need to dump tracing data.
   }
   std::vector<char> tracing_data;
-  if (!GetTracingData(measured_event_types_, &tracing_data)) {
+  if (!GetTracingData(tracepoint_event_types, &tracing_data)) {
     return false;
   }
   TracingDataRecord record = TracingDataRecord::Create(std::move(tracing_data));
@@ -647,21 +643,21 @@
   return true;
 }
 
-bool RecordCommand::DumpKernelAndModuleMmaps(const perf_event_attr* attr,
+bool RecordCommand::DumpKernelAndModuleMmaps(const perf_event_attr& attr,
                                              uint64_t event_id) {
   KernelMmap kernel_mmap;
   std::vector<KernelMmap> module_mmaps;
   GetKernelAndModuleMmaps(&kernel_mmap, &module_mmaps);
 
   MmapRecord mmap_record =
-      MmapRecord::Create(*attr, true, UINT_MAX, 0, kernel_mmap.start_addr,
+      MmapRecord::Create(attr, true, UINT_MAX, 0, kernel_mmap.start_addr,
                          kernel_mmap.len, 0, kernel_mmap.filepath, event_id);
   if (!ProcessRecord(&mmap_record)) {
     return false;
   }
   for (auto& module_mmap : module_mmaps) {
     MmapRecord mmap_record =
-        MmapRecord::Create(*attr, true, UINT_MAX, 0, module_mmap.start_addr,
+        MmapRecord::Create(attr, true, UINT_MAX, 0, module_mmap.start_addr,
                            module_mmap.len, 0, module_mmap.filepath, event_id);
     if (!ProcessRecord(&mmap_record)) {
       return false;
@@ -671,7 +667,7 @@
 }
 
 bool RecordCommand::DumpThreadCommAndMmaps(
-    const perf_event_attr* attr, uint64_t event_id, bool all_threads,
+    const perf_event_attr& attr, uint64_t event_id, bool all_threads,
     const std::vector<pid_t>& selected_threads) {
   std::vector<ThreadComm> thread_comms;
   if (!GetThreadComms(&thread_comms)) {
@@ -698,8 +694,8 @@
         dump_processes.find(thread.pid) == dump_processes.end()) {
       continue;
     }
-    CommRecord record = CommRecord::Create(*attr, thread.pid, thread.tid,
-                                           thread.comm, event_id);
+    CommRecord record =
+        CommRecord::Create(attr, thread.pid, thread.tid, thread.comm, event_id);
     if (!ProcessRecord(&record)) {
       return false;
     }
@@ -713,7 +709,7 @@
         continue;  // No need to dump non-executable mmap info.
       }
       MmapRecord record = MmapRecord::Create(
-          *attr, false, thread.pid, thread.tid, thread_mmap.start_addr,
+          attr, false, thread.pid, thread.tid, thread_mmap.start_addr,
           thread_mmap.len, thread_mmap.pgoff, thread_mmap.name, event_id);
       if (!ProcessRecord(&record)) {
         return false;
@@ -730,12 +726,12 @@
       continue;
     }
     ForkRecord fork_record = ForkRecord::Create(
-        *attr, thread.pid, thread.tid, thread.pid, thread.pid, event_id);
+        attr, thread.pid, thread.tid, thread.pid, thread.pid, event_id);
     if (!ProcessRecord(&fork_record)) {
       return false;
     }
-    CommRecord comm_record = CommRecord::Create(*attr, thread.pid, thread.tid,
-                                                thread.comm, event_id);
+    CommRecord comm_record =
+        CommRecord::Create(attr, thread.pid, thread.tid, thread.comm, event_id);
     if (!ProcessRecord(&comm_record)) {
       return false;
     }
diff --git a/simpleperf/cmd_record_test.cpp b/simpleperf/cmd_record_test.cpp
index 244e683..2306c55 100644
--- a/simpleperf/cmd_record_test.cpp
+++ b/simpleperf/cmd_record_test.cpp
@@ -296,3 +296,10 @@
   CheckDsoSymbolRecords(tmpfile.path, true, &success);
   ASSERT_TRUE(success);
 }
+
+TEST(record_cmd, group_option) {
+  ASSERT_TRUE(RunRecordCmd({"--group", "cpu-cycles,cpu-clock"}));
+  ASSERT_TRUE(RunRecordCmd({"--group", "cpu-cycles,cpu-clock", "--group",
+                            "cpu-cycles:u,cpu-clock:u", "--group",
+                            "cpu-cycles:k,cpu-clock:k"}));
+}
diff --git a/simpleperf/cmd_stat.cpp b/simpleperf/cmd_stat.cpp
index 15e7465..64ed60e 100644
--- a/simpleperf/cmd_stat.cpp
+++ b/simpleperf/cmd_stat.cpp
@@ -38,6 +38,8 @@
 #include "utils.h"
 #include "workload.h"
 
+namespace {
+
 static std::vector<std::string> default_measured_event_types{
     "cpu-cycles",   "stalled-cycles-frontend", "stalled-cycles-backend",
     "instructions", "branch-instructions",     "branch-misses",
@@ -45,33 +47,36 @@
 };
 
 static volatile bool signaled;
-static void signal_handler(int) {
-  signaled = true;
-}
+static void signal_handler(int) { signaled = true; }
 
 class StatCommand : public Command {
  public:
   StatCommand()
-      : Command("stat", "gather performance counter information",
-                "Usage: simpleperf stat [options] [command [command-args]]\n"
-                "    Gather performance counter information of running [command].\n"
-                "    -a           Collect system-wide information.\n"
-                "    --cpu cpu_item1,cpu_item2,...\n"
-                "                 Collect information only on the selected cpus. cpu_item can\n"
-                "                 be a cpu number like 1, or a cpu range like 0-3.\n"
-                "    -e event1[:modifier1],event2[:modifier2],...\n"
-                "                 Select the event list to count. Use `simpleperf list` to find\n"
-                "                 all possible event names. Modifiers can be added to define\n"
-                "                 how the event should be monitored. Possible modifiers are:\n"
-                "                   u - monitor user space events only\n"
-                "                   k - monitor kernel space events only\n"
-                "    --no-inherit\n"
-                "                 Don't stat created child threads/processes.\n"
-                "    -p pid1,pid2,...\n"
-                "                 Stat events on existing processes. Mutually exclusive with -a.\n"
-                "    -t tid1,tid2,...\n"
-                "                 Stat events on existing threads. Mutually exclusive with -a.\n"
-                "    --verbose    Show result in verbose mode.\n"),
+      : Command(
+            "stat", "gather performance counter information",
+            // clang-format off
+"Usage: simpleperf stat [options] [command [command-args]]\n"
+"    Gather performance counter information of running [command].\n"
+"-a           Collect system-wide information.\n"
+"--cpu cpu_item1,cpu_item2,...\n"
+"                 Collect information only on the selected cpus. cpu_item can\n"
+"                 be a cpu number like 1, or a cpu range like 0-3.\n"
+"-e event1[:modifier1],event2[:modifier2],...\n"
+"                 Select the event list to count. Use `simpleperf list` to find\n"
+"                 all possible event names. Modifiers can be added to define\n"
+"                 how the event should be monitored. Possible modifiers are:\n"
+"                   u - monitor user space events only\n"
+"                   k - monitor kernel space events only\n"
+"--group event1[:modifier],event2[:modifier2],...\n"
+"             Similar to -e option. But events specified in the same --group\n"
+"             option are monitored as a group, and scheduled in and out at the\n"
+"             same time.\n"
+"--no-inherit     Don't stat created child threads/processes.\n"
+"-p pid1,pid2,... Stat events on existing processes. Mutually exclusive with -a.\n"
+"-t tid1,tid2,... Stat events on existing threads. Mutually exclusive with -a.\n"
+"--verbose        Show result in verbose mode.\n"
+            // clang-format on
+            ),
         verbose_mode_(false),
         system_wide_collection_(false),
         child_inherit_(true) {
@@ -83,18 +88,18 @@
   bool Run(const std::vector<std::string>& args);
 
  private:
-  bool ParseOptions(const std::vector<std::string>& args, std::vector<std::string>* non_option_args);
-  bool AddMeasuredEventType(const std::string& event_type_name);
+  bool ParseOptions(const std::vector<std::string>& args,
+                    std::vector<std::string>* non_option_args);
   bool AddDefaultMeasuredEventTypes();
-  bool SetEventSelection();
-  bool ShowCounters(const std::vector<CountersInfo>& counters, double duration_in_sec);
+  void SetEventSelectionFlags();
+  bool ShowCounters(const std::vector<CountersInfo>& counters,
+                    double duration_in_sec);
 
   bool verbose_mode_;
   bool system_wide_collection_;
   bool child_inherit_;
   std::vector<pid_t> monitored_threads_;
   std::vector<int> cpus_;
-  std::vector<EventTypeAndModifier> measured_event_types_;
   EventSelectionSet event_selection_set_;
 
   std::unique_ptr<ScopedSignalHandler> scoped_signal_handler_;
@@ -110,14 +115,12 @@
   if (!ParseOptions(args, &workload_args)) {
     return false;
   }
-  if (measured_event_types_.empty()) {
+  if (event_selection_set_.empty()) {
     if (!AddDefaultMeasuredEventTypes()) {
       return false;
     }
   }
-  if (!SetEventSelection()) {
-    return false;
-  }
+  SetEventSelectionFlags();
 
   // 2. Create workload.
   std::unique_ptr<Workload> workload;
@@ -132,7 +135,8 @@
       monitored_threads_.push_back(workload->GetPid());
       event_selection_set_.SetEnableOnExec(true);
     } else {
-      LOG(ERROR) << "No threads to monitor. Try `simpleperf help stat` for help\n";
+      LOG(ERROR)
+          << "No threads to monitor. Try `simpleperf help stat` for help\n";
       return false;
     }
   }
@@ -146,7 +150,8 @@
     if (cpus_.empty()) {
       cpus_ = {-1};
     }
-    if (!event_selection_set_.OpenEventFilesForThreadsOnCpus(monitored_threads_, cpus_)) {
+    if (!event_selection_set_.OpenEventFilesForThreadsOnCpus(monitored_threads_,
+                                                             cpus_)) {
       return false;
     }
   }
@@ -167,7 +172,9 @@
     return false;
   }
   double duration_in_sec =
-      std::chrono::duration_cast<std::chrono::duration<double>>(end_time - start_time).count();
+      std::chrono::duration_cast<std::chrono::duration<double>>(end_time -
+                                                                start_time)
+          .count();
   if (!ShowCounters(counters, duration_in_sec)) {
     return false;
   }
@@ -192,10 +199,18 @@
       }
       std::vector<std::string> event_types = android::base::Split(args[i], ",");
       for (auto& event_type : event_types) {
-        if (!AddMeasuredEventType(event_type)) {
+        if (!event_selection_set_.AddEventType(event_type)) {
           return false;
         }
       }
+    } else if (args[i] == "--group") {
+      if (!NextArgumentOrError(args, &i)) {
+        return false;
+      }
+      std::vector<std::string> event_types = android::base::Split(args[i], ",");
+      if (!event_selection_set_.AddEventGroup(event_types)) {
+        return false;
+      }
     } else if (args[i] == "--no-inherit") {
       child_inherit_ = false;
     } else if (args[i] == "-p") {
@@ -220,9 +235,11 @@
     }
   }
 
-  monitored_threads_.insert(monitored_threads_.end(), tid_set.begin(), tid_set.end());
+  monitored_threads_.insert(monitored_threads_.end(), tid_set.begin(),
+                            tid_set.end());
   if (system_wide_collection_ && !monitored_threads_.empty()) {
-    LOG(ERROR) << "Stat system wide and existing processes/threads can't be used at the same time.";
+    LOG(ERROR) << "Stat system wide and existing processes/threads can't be "
+                  "used at the same time.";
     return false;
   }
 
@@ -235,47 +252,31 @@
   return true;
 }
 
-bool StatCommand::AddMeasuredEventType(const std::string& event_type_name) {
-  std::unique_ptr<EventTypeAndModifier> event_type_modifier = ParseEventType(event_type_name);
-  if (event_type_modifier == nullptr) {
-    return false;
-  }
-  for (const auto& type : measured_event_types_) {
-    if (type.name == event_type_modifier->name) {
-      return true;
-    }
-  }
-  measured_event_types_.push_back(*event_type_modifier);
-  return true;
-}
-
 bool StatCommand::AddDefaultMeasuredEventTypes() {
   for (auto& name : default_measured_event_types) {
-    // It is not an error when some event types in the default list are not supported by the kernel.
+    // It is not an error when some event types in the default list are not
+    // supported by the kernel.
     const EventType* type = FindEventTypeByName(name);
-    if (type != nullptr && IsEventAttrSupportedByKernel(CreateDefaultPerfEventAttr(*type))) {
-      AddMeasuredEventType(name);
+    if (type != nullptr &&
+        IsEventAttrSupportedByKernel(CreateDefaultPerfEventAttr(*type))) {
+      if (!event_selection_set_.AddEventType(name)) {
+        return false;
+      }
     }
   }
-  if (measured_event_types_.empty()) {
+  if (event_selection_set_.empty()) {
     LOG(ERROR) << "Failed to add any supported default measured types";
     return false;
   }
   return true;
 }
 
-bool StatCommand::SetEventSelection() {
-  for (auto& event_type : measured_event_types_) {
-    if (!event_selection_set_.AddEventType(event_type)) {
-      return false;
-    }
-  }
+void StatCommand::SetEventSelectionFlags() {
   event_selection_set_.SetInherit(child_inherit_);
-  return true;
 }
 
-static std::string ReadableCountValue(uint64_t count,
-                                      const EventTypeAndModifier& event_type_modifier) {
+static std::string ReadableCountValue(
+    uint64_t count, const EventTypeAndModifier& event_type_modifier) {
   if (event_type_modifier.event_type.name == "cpu-clock" ||
       event_type_modifier.event_type.name == "task-clock") {
     double value = count / 1e6;
@@ -293,36 +294,57 @@
 }
 
 struct CounterSummary {
-  const EventTypeAndModifier* event_type;
+  const EventSelection* selection;
   uint64_t count;
   double scale;
   std::string readable_count_str;
   std::string comment;
 };
 
-static std::string GetCommentForSummary(const CounterSummary& summary,
-                                        const std::vector<CounterSummary>& summaries,
-                                        double duration_in_sec) {
-  const std::string& type_name = summary.event_type->event_type.name;
-  const std::string& modifier = summary.event_type->modifier;
+static const CounterSummary* FindMatchedSummary(
+    const std::vector<CounterSummary>& summaries, const std::string& type_name,
+    const CounterSummary& summary) {
+  std::string modifier = summary.selection->event_type_modifier.modifier;
+  for (const auto& t : summaries) {
+    if (t.selection->event_type_modifier.event_type.name == type_name &&
+        t.selection->event_type_modifier.modifier == modifier) {
+      // The matched summary should be in the same group as summary,
+      // or both have scale == 1.0 (enabled all the time).
+      if (t.selection->group_id == summary.selection->group_id ||
+          (t.scale == 1.0 && summary.scale == 1.0)) {
+        return &t;
+      }
+    }
+  }
+  return nullptr;
+}
+
+static std::string GetCommentForSummary(
+    const CounterSummary& summary, const std::vector<CounterSummary>& summaries,
+    double duration_in_sec) {
+  const EventTypeAndModifier& type_modifier =
+      summary.selection->event_type_modifier;
+  const std::string& type_name = type_modifier.event_type.name;
   if (type_name == "task-clock") {
     double run_sec = summary.count / 1e9;
-    double cpu_usage = run_sec / duration_in_sec;
-    return android::base::StringPrintf("%lf%% cpu usage", cpu_usage * 100);
+    double used_cpus = run_sec / (duration_in_sec / summary.scale);
+    return android::base::StringPrintf("%lf cpus used", used_cpus);
   }
   if (type_name == "cpu-clock") {
     return "";
   }
   if (type_name == "cpu-cycles") {
-    double hz = summary.count / duration_in_sec;
+    double hz = summary.count / (duration_in_sec / summary.scale);
     return android::base::StringPrintf("%lf GHz", hz / 1e9);
   }
   if (type_name == "instructions" && summary.count != 0) {
-    for (auto& t : summaries) {
-      if (t.event_type->event_type.name == "cpu-cycles" && t.event_type->modifier == modifier) {
-        double cycles_per_instruction = t.count * 1.0 / summary.count;
-        return android::base::StringPrintf("%lf cycles per instruction", cycles_per_instruction);
-      }
+    const CounterSummary* cpu_cycles_summary =
+        FindMatchedSummary(summaries, "cpu-cycles", summary);
+    if (cpu_cycles_summary != nullptr) {
+      double cycles_per_instruction =
+          static_cast<double>(cpu_cycles_summary->count) / summary.count;
+      return android::base::StringPrintf("%lf cycles per instruction",
+                                         cycles_per_instruction);
     }
   }
   if (android::base::EndsWith(type_name, "-misses")) {
@@ -334,14 +356,15 @@
     } else {
       s = type_name.substr(0, type_name.size() - strlen("-misses")) + "s";
     }
-    for (auto& t : summaries) {
-      if (t.event_type->event_type.name == s && t.event_type->modifier == modifier && t.count != 0) {
-        double miss_rate = summary.count * 1.0 / t.count;
-        return android::base::StringPrintf("%lf%% miss rate", miss_rate * 100);
-      }
+    const CounterSummary* matched_summary =
+        FindMatchedSummary(summaries, s, summary);
+    if (matched_summary != nullptr && matched_summary->count != 0) {
+      double miss_rate =
+          static_cast<double>(summary.count) / matched_summary->count;
+      return android::base::StringPrintf("%lf%% miss rate", miss_rate * 100);
     }
   }
-  double rate = summary.count / duration_in_sec;
+  double rate = summary.count / (duration_in_sec / summary.scale);
   if (rate > 1e9) {
     return android::base::StringPrintf("%.3lf G/sec", rate / 1e9);
   }
@@ -354,19 +377,22 @@
   return android::base::StringPrintf("%.3lf /sec", rate);
 }
 
-bool StatCommand::ShowCounters(const std::vector<CountersInfo>& counters, double duration_in_sec) {
+bool StatCommand::ShowCounters(const std::vector<CountersInfo>& counters,
+                               double duration_in_sec) {
   printf("Performance counter statistics:\n\n");
 
   if (verbose_mode_) {
     for (auto& counters_info : counters) {
-      const EventTypeAndModifier* event_type = counters_info.event_type;
+      const EventTypeAndModifier& event_type =
+          counters_info.selection->event_type_modifier;
       for (auto& counter_info : counters_info.counters) {
-        printf("%s(tid %d, cpu %d): count %s, time_enabled %" PRIu64 ", time running %" PRIu64
-               ", id %" PRIu64 "\n",
-               event_type->name.c_str(), counter_info.tid, counter_info.cpu,
-               ReadableCountValue(counter_info.counter.value, *event_type).c_str(),
-               counter_info.counter.time_enabled, counter_info.counter.time_running,
-               counter_info.counter.id);
+        printf(
+            "%s(tid %d, cpu %d): count %s, time_enabled %" PRIu64
+            ", time running %" PRIu64 ", id %" PRIu64 "\n",
+            event_type.name.c_str(), counter_info.tid, counter_info.cpu,
+            ReadableCountValue(counter_info.counter.value, event_type).c_str(),
+            counter_info.counter.time_enabled,
+            counter_info.counter.time_running, counter_info.counter.id);
       }
     }
   }
@@ -377,8 +403,8 @@
     uint64_t time_enabled_sum = 0;
     uint64_t time_running_sum = 0;
     for (auto& counter_info : counters_info.counters) {
-      // If time_running is 0, the program has never run on this event and we shouldn't
-      // summarize it.
+      // If time_running is 0, the program has never run on this event and we
+      // shouldn't summarize it.
       if (counter_info.counter.time_running != 0) {
         value_sum += counter_info.counter.value;
         time_enabled_sum += counter_info.counter.time_enabled;
@@ -390,10 +416,11 @@
       scale = static_cast<double>(time_enabled_sum) / time_running_sum;
     }
     CounterSummary summary;
-    summary.event_type = counters_info.event_type;
+    summary.selection = counters_info.selection;
     summary.count = value_sum;
     summary.scale = scale;
-    summary.readable_count_str = ReadableCountValue(summary.count, *summary.event_type);
+    summary.readable_count_str = ReadableCountValue(
+        summary.count, summary.selection->event_type_modifier);
     summaries.push_back(summary);
   }
 
@@ -405,22 +432,31 @@
   size_t name_column_width = 0;
   size_t comment_column_width = 0;
   for (auto& summary : summaries) {
-    count_column_width = std::max(count_column_width, summary.readable_count_str.size());
-    name_column_width = std::max(name_column_width, summary.event_type->name.size());
-    comment_column_width = std::max(comment_column_width, summary.comment.size());
+    count_column_width =
+        std::max(count_column_width, summary.readable_count_str.size());
+    name_column_width = std::max(
+        name_column_width, summary.selection->event_type_modifier.name.size());
+    comment_column_width =
+        std::max(comment_column_width, summary.comment.size());
   }
 
   for (auto& summary : summaries) {
-    printf("  %*s  %-*s   # %-*s   (%.0lf%%)\n", static_cast<int>(count_column_width),
-           summary.readable_count_str.c_str(), static_cast<int>(name_column_width),
-           summary.event_type->name.c_str(), static_cast<int>(comment_column_width),
-           summary.comment.c_str(), 1.0 / summary.scale * 100);
+    printf("  %*s  %-*s   # %-*s   (%.0lf%%)\n",
+           static_cast<int>(count_column_width),
+           summary.readable_count_str.c_str(),
+           static_cast<int>(name_column_width),
+           summary.selection->event_type_modifier.name.c_str(),
+           static_cast<int>(comment_column_width), summary.comment.c_str(),
+           1.0 / summary.scale * 100);
   }
 
   printf("\nTotal test time: %lf seconds.\n", duration_in_sec);
   return true;
 }
 
+}  // namespace
+
 void RegisterStatCommand() {
-  RegisterCommand("stat", [] { return std::unique_ptr<Command>(new StatCommand); });
+  RegisterCommand("stat",
+                  [] { return std::unique_ptr<Command>(new StatCommand); });
 }
diff --git a/simpleperf/cmd_stat_test.cpp b/simpleperf/cmd_stat_test.cpp
index 1935b5f..0bb7d85 100644
--- a/simpleperf/cmd_stat_test.cpp
+++ b/simpleperf/cmd_stat_test.cpp
@@ -26,9 +26,7 @@
   return CreateCommandInstance("stat");
 }
 
-TEST(stat_cmd, no_options) {
-  ASSERT_TRUE(StatCmd()->Run({"sleep", "1"}));
-}
+TEST(stat_cmd, no_options) { ASSERT_TRUE(StatCmd()->Run({"sleep", "1"})); }
 
 TEST(stat_cmd, event_option) {
   ASSERT_TRUE(StatCmd()->Run({"-e", "cpu-clock,task-clock", "sleep", "1"}));
@@ -43,14 +41,17 @@
 }
 
 TEST(stat_cmd, tracepoint_event) {
-  TEST_IN_ROOT(ASSERT_TRUE(StatCmd()->Run({"-a", "-e", "sched:sched_switch", "sleep", "1"})));
+  TEST_IN_ROOT(ASSERT_TRUE(
+      StatCmd()->Run({"-a", "-e", "sched:sched_switch", "sleep", "1"})));
 }
 
 TEST(stat_cmd, event_modifier) {
-  ASSERT_TRUE(StatCmd()->Run({"-e", "cpu-cycles:u,cpu-cycles:k", "sleep", "1"}));
+  ASSERT_TRUE(
+      StatCmd()->Run({"-e", "cpu-cycles:u,cpu-cycles:k", "sleep", "1"}));
 }
 
-void CreateProcesses(size_t count, std::vector<std::unique_ptr<Workload>>* workloads) {
+void CreateProcesses(size_t count,
+                     std::vector<std::unique_ptr<Workload>>* workloads) {
   workloads->clear();
   for (size_t i = 0; i < count; ++i) {
     auto workload = Workload::CreateWorkload({"sleep", "1"});
@@ -63,8 +64,8 @@
 TEST(stat_cmd, existing_processes) {
   std::vector<std::unique_ptr<Workload>> workloads;
   CreateProcesses(2, &workloads);
-  std::string pid_list =
-      android::base::StringPrintf("%d,%d", workloads[0]->GetPid(), workloads[1]->GetPid());
+  std::string pid_list = android::base::StringPrintf(
+      "%d,%d", workloads[0]->GetPid(), workloads[1]->GetPid());
   ASSERT_TRUE(StatCmd()->Run({"-p", pid_list}));
 }
 
@@ -72,16 +73,22 @@
   std::vector<std::unique_ptr<Workload>> workloads;
   CreateProcesses(2, &workloads);
   // Process id can be used as thread id in linux.
-  std::string tid_list =
-      android::base::StringPrintf("%d,%d", workloads[0]->GetPid(), workloads[1]->GetPid());
+  std::string tid_list = android::base::StringPrintf(
+      "%d,%d", workloads[0]->GetPid(), workloads[1]->GetPid());
   ASSERT_TRUE(StatCmd()->Run({"-t", tid_list}));
 }
 
-TEST(stat_cmd, no_monitored_threads) {
-  ASSERT_FALSE(StatCmd()->Run({""}));
-}
+TEST(stat_cmd, no_monitored_threads) { ASSERT_FALSE(StatCmd()->Run({""})); }
 
 TEST(stat_cmd, cpu_option) {
   ASSERT_TRUE(StatCmd()->Run({"--cpu", "0", "sleep", "1"}));
   TEST_IN_ROOT(ASSERT_TRUE(StatCmd()->Run({"--cpu", "0", "-a", "sleep", "1"})));
 }
+
+TEST(stat_cmd, group_option) {
+  ASSERT_TRUE(
+      StatCmd()->Run({"--group", "cpu-cycles,cpu-clock", "sleep", "1"}));
+  ASSERT_TRUE(StatCmd()->Run({"--group", "cpu-cycles,cpu-clock", "--group",
+                              "cpu-cycles:u,cpu-clock:u", "--group",
+                              "cpu-cycles:k,cpu-clock:k", "sleep", "1"}));
+}
diff --git a/simpleperf/cpu_hotplug_test.cpp b/simpleperf/cpu_hotplug_test.cpp
index 2cc9a0e..56962b1 100644
--- a/simpleperf/cpu_hotplug_test.cpp
+++ b/simpleperf/cpu_hotplug_test.cpp
@@ -232,7 +232,7 @@
   size_t iterations = 0;
 
   while (std::chrono::steady_clock::now() < end_time) {
-    std::unique_ptr<EventFd> event_fd = EventFd::OpenEventFile(attr, -1, test_cpu, false);
+    std::unique_ptr<EventFd> event_fd = EventFd::OpenEventFile(attr, -1, test_cpu, nullptr, false);
     if (event_fd == nullptr) {
       // Failed to open because the test_cpu is offline.
       continue;
@@ -273,7 +273,7 @@
   size_t iterations = 0;
 
   while (std::chrono::steady_clock::now() < end_time) {
-    std::unique_ptr<EventFd> event_fd = EventFd::OpenEventFile(attr, -1, test_cpu, false);
+    std::unique_ptr<EventFd> event_fd = EventFd::OpenEventFile(attr, -1, test_cpu, nullptr, false);
     if (event_fd == nullptr) {
       // Failed to open because the test_cpu is offline.
       continue;
@@ -310,11 +310,11 @@
   for (size_t i = 0; i < TEST_ITERATION_COUNT; ++i) {
     int record_cpu = 0;
     ASSERT_TRUE(SetCpuOnline(test_cpu, true));
-    std::unique_ptr<EventFd> event_fd = EventFd::OpenEventFile(attr, getpid(), record_cpu);
+    std::unique_ptr<EventFd> event_fd = EventFd::OpenEventFile(attr, getpid(), record_cpu, nullptr);
     ASSERT_TRUE(event_fd != nullptr);
     ASSERT_TRUE(SetCpuOnline(test_cpu, false));
     event_fd = nullptr;
-    event_fd = EventFd::OpenEventFile(attr, getpid(), record_cpu);
+    event_fd = EventFd::OpenEventFile(attr, getpid(), record_cpu, nullptr);
     ASSERT_TRUE(event_fd != nullptr);
   }
 }
diff --git a/simpleperf/event_fd.cpp b/simpleperf/event_fd.cpp
index 705d6f3..58e04dc 100644
--- a/simpleperf/event_fd.cpp
+++ b/simpleperf/event_fd.cpp
@@ -44,27 +44,31 @@
 }
 
 std::unique_ptr<EventFd> EventFd::OpenEventFile(const perf_event_attr& attr, pid_t tid, int cpu,
-                                                bool report_error) {
+                                                EventFd* group_event_fd, bool report_error) {
   perf_event_attr perf_attr = attr;
   std::string event_name = GetEventNameByAttr(attr);
-  int perf_event_fd = perf_event_open(&perf_attr, tid, cpu, -1, 0);
+  int group_fd = -1;
+  if (group_event_fd != nullptr) {
+    group_fd = group_event_fd->perf_event_fd_;
+  }
+  int perf_event_fd = perf_event_open(&perf_attr, tid, cpu, group_fd, 0);
   if (perf_event_fd == -1) {
     if (report_error) {
       PLOG(ERROR) << "open perf_event_file (event " << event_name << ", tid " << tid << ", cpu "
-                  << cpu << ") failed";
+                  << cpu << ", group_fd " << group_fd << ") failed";
     } else {
       PLOG(DEBUG) << "open perf_event_file (event " << event_name << ", tid " << tid << ", cpu "
-                  << cpu << ") failed";
+                  << cpu << ", group_fd " << group_fd << ") failed";
     }
     return nullptr;
   }
   if (fcntl(perf_event_fd, F_SETFD, FD_CLOEXEC) == -1) {
     if (report_error) {
       PLOG(ERROR) << "fcntl(FD_CLOEXEC) for perf_event_file (event " << event_name << ", tid "
-                  << tid << ", cpu " << cpu << ") failed";
+                  << tid << ", cpu " << cpu << ", group_fd " << group_fd << ") failed";
     } else {
       PLOG(DEBUG) << "fcntl(FD_CLOEXEC) for perf_event_file (event " << event_name << ", tid "
-                  << tid << ", cpu " << cpu << ") failed";
+                  << tid << ", cpu " << cpu << ", group_fd " << group_fd << ") failed";
     }
     return nullptr;
   }
@@ -190,6 +194,6 @@
 }
 
 bool IsEventAttrSupportedByKernel(perf_event_attr attr) {
-  auto event_fd = EventFd::OpenEventFile(attr, getpid(), -1, false);
+  auto event_fd = EventFd::OpenEventFile(attr, getpid(), -1, nullptr, false);
   return event_fd != nullptr;
 }
diff --git a/simpleperf/event_fd.h b/simpleperf/event_fd.h
index f06d260..c54c3e6 100644
--- a/simpleperf/event_fd.h
+++ b/simpleperf/event_fd.h
@@ -40,10 +40,13 @@
 class EventFd {
  public:
   static std::unique_ptr<EventFd> OpenEventFile(const perf_event_attr& attr, pid_t tid, int cpu,
-                                                bool report_error = true);
+                                                EventFd* group_event_fd, bool report_error = true);
 
   ~EventFd();
 
+  // Give information about this perf_event_file, like (event_name, tid, cpu).
+  std::string Name() const;
+
   uint64_t Id() const;
 
   pid_t ThreadId() const {
@@ -81,9 +84,6 @@
         mmap_len_(0) {
   }
 
-  // Give information about this perf_event_file, like (event_name, tid, cpu).
-  std::string Name() const;
-
   // Discard how much data we have read, so the kernel can reuse this part of mapped area to store
   // new data.
   void DiscardMmapData(size_t discard_size);
diff --git a/simpleperf/event_selection_set.cpp b/simpleperf/event_selection_set.cpp
index 92e559c..1c83250 100644
--- a/simpleperf/event_selection_set.cpp
+++ b/simpleperf/event_selection_set.cpp
@@ -43,115 +43,170 @@
     return false;
   }
   perf_event_attr attr = CreateDefaultPerfEventAttr(*type);
-  attr.sample_type |= PERF_SAMPLE_CALLCHAIN | PERF_SAMPLE_REGS_USER | PERF_SAMPLE_STACK_USER;
+  attr.sample_type |=
+      PERF_SAMPLE_CALLCHAIN | PERF_SAMPLE_REGS_USER | PERF_SAMPLE_STACK_USER;
   attr.exclude_callchain_user = 1;
   attr.sample_regs_user = GetSupportedRegMask(GetBuildArch());
   attr.sample_stack_user = 8192;
   return IsEventAttrSupportedByKernel(attr);
 }
 
-bool EventSelectionSet::AddEventType(const EventTypeAndModifier& event_type_modifier) {
-  EventSelection selection;
-  selection.event_type_modifier = event_type_modifier;
-  selection.event_attr = CreateDefaultPerfEventAttr(event_type_modifier.event_type);
-  selection.event_attr.exclude_user = event_type_modifier.exclude_user;
-  selection.event_attr.exclude_kernel = event_type_modifier.exclude_kernel;
-  selection.event_attr.exclude_hv = event_type_modifier.exclude_hv;
-  selection.event_attr.exclude_host = event_type_modifier.exclude_host;
-  selection.event_attr.exclude_guest = event_type_modifier.exclude_guest;
-  selection.event_attr.precise_ip = event_type_modifier.precise_ip;
-  if (!IsEventAttrSupportedByKernel(selection.event_attr)) {
-    LOG(ERROR) << "Event type '" << event_type_modifier.name << "' is not supported by the kernel";
+bool EventSelectionSet::BuildAndCheckEventSelection(
+    const std::string& event_name, EventSelection* selection) {
+  std::unique_ptr<EventTypeAndModifier> event_type = ParseEventType(event_name);
+  if (event_type == nullptr) {
     return false;
   }
-  selections_.push_back(std::move(selection));
+  selection->event_type_modifier = *event_type;
+  selection->event_attr = CreateDefaultPerfEventAttr(event_type->event_type);
+  selection->event_attr.exclude_user = event_type->exclude_user;
+  selection->event_attr.exclude_kernel = event_type->exclude_kernel;
+  selection->event_attr.exclude_hv = event_type->exclude_hv;
+  selection->event_attr.exclude_host = event_type->exclude_host;
+  selection->event_attr.exclude_guest = event_type->exclude_guest;
+  selection->event_attr.precise_ip = event_type->precise_ip;
+  if (!IsEventAttrSupportedByKernel(selection->event_attr)) {
+    LOG(ERROR) << "Event type '" << event_type->name
+               << "' is not supported by the kernel";
+    return false;
+  }
+  selection->event_fds.clear();
+
+  for (const auto& group : groups_) {
+    for (const auto& sel : group) {
+      if (sel.event_type_modifier.name == selection->event_type_modifier.name) {
+        LOG(ERROR) << "Event type '" << sel.event_type_modifier.name
+                   << "' appears more than once";
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+bool EventSelectionSet::AddEventType(const std::string& event_name) {
+  return AddEventGroup(std::vector<std::string>(1, event_name));
+}
+
+bool EventSelectionSet::AddEventGroup(
+    const std::vector<std::string>& event_names) {
+  EventSelectionGroup group;
+  for (const auto& event_name : event_names) {
+    EventSelection selection;
+    if (!BuildAndCheckEventSelection(event_name, &selection)) {
+      return false;
+    }
+    selection.selection_id = group.size();
+    selection.group_id = groups_.size();
+    group.push_back(std::move(selection));
+  }
+  groups_.push_back(std::move(group));
   UnionSampleType();
   return true;
 }
 
-// Union the sample type of different event attrs can make reading sample records in perf.data
+// Union the sample type of different event attrs can make reading sample
+// records in perf.data
 // easier.
 void EventSelectionSet::UnionSampleType() {
   uint64_t sample_type = 0;
-  for (auto& selection : selections_) {
-    sample_type |= selection.event_attr.sample_type;
+  for (const auto& group : groups_) {
+    for (const auto& selection : group) {
+      sample_type |= selection.event_attr.sample_type;
+    }
   }
-  for (auto& selection : selections_) {
-    selection.event_attr.sample_type = sample_type;
+  for (auto& group : groups_) {
+    for (auto& selection : group) {
+      selection.event_attr.sample_type = sample_type;
+    }
   }
 }
 
 void EventSelectionSet::SetEnableOnExec(bool enable) {
-  for (auto& selection : selections_) {
-    // If sampling is enabled on exec, then it is disabled at startup, otherwise
-    // it should be enabled at startup. Don't use ioctl(PERF_EVENT_IOC_ENABLE)
-    // to enable it after perf_event_open(). Because some android kernels can't
-    // handle ioctl() well when cpu-hotplug happens. See http://b/25193162.
-    if (enable) {
-      selection.event_attr.enable_on_exec = 1;
-      selection.event_attr.disabled = 1;
-    } else {
-      selection.event_attr.enable_on_exec = 0;
-      selection.event_attr.disabled = 0;
+  for (auto& group : groups_) {
+    for (auto& selection : group) {
+      // If sampling is enabled on exec, then it is disabled at startup,
+      // otherwise
+      // it should be enabled at startup. Don't use ioctl(PERF_EVENT_IOC_ENABLE)
+      // to enable it after perf_event_open(). Because some android kernels
+      // can't
+      // handle ioctl() well when cpu-hotplug happens. See http://b/25193162.
+      if (enable) {
+        selection.event_attr.enable_on_exec = 1;
+        selection.event_attr.disabled = 1;
+      } else {
+        selection.event_attr.enable_on_exec = 0;
+        selection.event_attr.disabled = 0;
+      }
     }
   }
 }
 
 bool EventSelectionSet::GetEnableOnExec() {
-  for (auto& selection : selections_) {
-    if (selection.event_attr.enable_on_exec == 0) {
-      return false;
+  for (const auto& group : groups_) {
+    for (const auto& selection : group) {
+      if (selection.event_attr.enable_on_exec == 0) {
+        return false;
+      }
     }
   }
   return true;
 }
 
 void EventSelectionSet::SampleIdAll() {
-  for (auto& selection : selections_) {
-    selection.event_attr.sample_id_all = 1;
+  for (auto& group : groups_) {
+    for (auto& selection : group) {
+      selection.event_attr.sample_id_all = 1;
+    }
   }
 }
 
-void EventSelectionSet::SetSampleFreq(const EventTypeAndModifier& event_type_modifier, uint64_t sample_freq) {
-  EventSelection* sel = FindSelectionByType(event_type_modifier);
-  CHECK(sel != nullptr);
-  sel->event_attr.freq = 1;
-  sel->event_attr.sample_freq = sample_freq;
+void EventSelectionSet::SetSampleFreq(const EventSelection& selection,
+                                      uint64_t sample_freq) {
+  EventSelection& sel = groups_[selection.group_id][selection.selection_id];
+  sel.event_attr.freq = 1;
+  sel.event_attr.sample_freq = sample_freq;
 }
 
-void EventSelectionSet::SetSamplePeriod(const EventTypeAndModifier& event_type_modifier, uint64_t sample_period) {
-  EventSelection* sel = FindSelectionByType(event_type_modifier);
-  CHECK(sel != nullptr);
-  sel->event_attr.freq = 0;
-  sel->event_attr.sample_period = sample_period;
+void EventSelectionSet::SetSamplePeriod(const EventSelection& selection,
+                                        uint64_t sample_period) {
+  EventSelection& sel = groups_[selection.group_id][selection.selection_id];
+  sel.event_attr.freq = 0;
+  sel.event_attr.sample_period = sample_period;
 }
 
 bool EventSelectionSet::SetBranchSampling(uint64_t branch_sample_type) {
   if (branch_sample_type != 0 &&
-      (branch_sample_type & (PERF_SAMPLE_BRANCH_ANY | PERF_SAMPLE_BRANCH_ANY_CALL |
-                             PERF_SAMPLE_BRANCH_ANY_RETURN | PERF_SAMPLE_BRANCH_IND_CALL)) == 0) {
-    LOG(ERROR) << "Invalid branch_sample_type: 0x" << std::hex << branch_sample_type;
+      (branch_sample_type &
+       (PERF_SAMPLE_BRANCH_ANY | PERF_SAMPLE_BRANCH_ANY_CALL |
+        PERF_SAMPLE_BRANCH_ANY_RETURN | PERF_SAMPLE_BRANCH_IND_CALL)) == 0) {
+    LOG(ERROR) << "Invalid branch_sample_type: 0x" << std::hex
+               << branch_sample_type;
     return false;
   }
   if (branch_sample_type != 0 && !IsBranchSamplingSupported()) {
     LOG(ERROR) << "branch stack sampling is not supported on this device.";
     return false;
   }
-  for (auto& selection : selections_) {
-    perf_event_attr& attr = selection.event_attr;
-    if (branch_sample_type != 0) {
-      attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
-    } else {
-      attr.sample_type &= ~PERF_SAMPLE_BRANCH_STACK;
+  for (auto& group : groups_) {
+    for (auto& selection : group) {
+      perf_event_attr& attr = selection.event_attr;
+      if (branch_sample_type != 0) {
+        attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
+      } else {
+        attr.sample_type &= ~PERF_SAMPLE_BRANCH_STACK;
+      }
+      attr.branch_sample_type = branch_sample_type;
     }
-    attr.branch_sample_type = branch_sample_type;
   }
   return true;
 }
 
 void EventSelectionSet::EnableFpCallChainSampling() {
-  for (auto& selection : selections_) {
-    selection.event_attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
+  for (auto& group : groups_) {
+    for (auto& selection : group) {
+      selection.event_attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
+    }
   }
 }
 
@@ -160,26 +215,33 @@
     LOG(ERROR) << "dwarf callchain sampling is not supported on this device.";
     return false;
   }
-  for (auto& selection : selections_) {
-    selection.event_attr.sample_type |=
-        PERF_SAMPLE_CALLCHAIN | PERF_SAMPLE_REGS_USER | PERF_SAMPLE_STACK_USER;
-    selection.event_attr.exclude_callchain_user = 1;
-    selection.event_attr.sample_regs_user = GetSupportedRegMask(GetBuildArch());
-    selection.event_attr.sample_stack_user = dump_stack_size;
+  for (auto& group : groups_) {
+    for (auto& selection : group) {
+      selection.event_attr.sample_type |= PERF_SAMPLE_CALLCHAIN |
+                                          PERF_SAMPLE_REGS_USER |
+                                          PERF_SAMPLE_STACK_USER;
+      selection.event_attr.exclude_callchain_user = 1;
+      selection.event_attr.sample_regs_user =
+          GetSupportedRegMask(GetBuildArch());
+      selection.event_attr.sample_stack_user = dump_stack_size;
+    }
   }
   return true;
 }
 
 void EventSelectionSet::SetInherit(bool enable) {
-  for (auto& selection : selections_) {
-    selection.event_attr.inherit = (enable ? 1 : 0);
+  for (auto& group : groups_) {
+    for (auto& selection : group) {
+      selection.event_attr.inherit = (enable ? 1 : 0);
+    }
   }
 }
 
 static bool CheckIfCpusOnline(const std::vector<int>& cpus) {
   std::vector<int> online_cpus = GetOnlineCpus();
   for (const auto& cpu : cpus) {
-    if (std::find(online_cpus.begin(), online_cpus.end(), cpu) == online_cpus.end()) {
+    if (std::find(online_cpus.begin(), online_cpus.end(), cpu) ==
+        online_cpus.end()) {
       LOG(ERROR) << "cpu " << cpu << " is not online.";
       return false;
     }
@@ -191,8 +253,8 @@
   return OpenEventFilesForThreadsOnCpus({-1}, cpus);
 }
 
-bool EventSelectionSet::OpenEventFilesForThreadsOnCpus(const std::vector<pid_t>& threads,
-                                                       std::vector<int> cpus) {
+bool EventSelectionSet::OpenEventFilesForThreadsOnCpus(
+    const std::vector<pid_t>& threads, std::vector<int> cpus) {
   if (!cpus.empty()) {
     // cpus = {-1} means open an event file for all cpus.
     if (!(cpus.size() == 1 && cpus[0] == -1) && !CheckIfCpusOnline(cpus)) {
@@ -206,23 +268,45 @@
 
 bool EventSelectionSet::OpenEventFiles(const std::vector<pid_t>& threads,
                                        const std::vector<int>& cpus) {
-  for (auto& selection : selections_) {
-    for (auto& tid : threads) {
+  for (auto& group : groups_) {
+    for (const auto& tid : threads) {
       size_t open_per_thread = 0;
-      for (auto& cpu : cpus) {
-        auto event_fd = EventFd::OpenEventFile(selection.event_attr, tid, cpu);
-        if (event_fd != nullptr) {
-          LOG(VERBOSE) << "OpenEventFile for tid " << tid << ", cpu " << cpu;
-          selection.event_fds.push_back(std::move(event_fd));
+      std::string failed_event_type;
+      for (const auto& cpu : cpus) {
+        std::vector<std::unique_ptr<EventFd>> event_fds;
+        // Given a tid and cpu, events on the same group should be all opened
+        // successfully or all failed to open.
+        for (auto& selection : group) {
+          EventFd* group_fd = nullptr;
+          if (selection.selection_id != 0) {
+            group_fd = event_fds[0].get();
+          }
+          std::unique_ptr<EventFd> event_fd =
+              EventFd::OpenEventFile(selection.event_attr, tid, cpu, group_fd);
+          if (event_fd != nullptr) {
+            LOG(VERBOSE) << "OpenEventFile for " << event_fd->Name();
+            event_fds.push_back(std::move(event_fd));
+          } else {
+            failed_event_type = selection.event_type_modifier.name;
+            break;
+          }
+        }
+        if (event_fds.size() == group.size()) {
+          for (size_t i = 0; i < group.size(); ++i) {
+            group[i].event_fds.push_back(std::move(event_fds[i]));
+          }
           ++open_per_thread;
         }
       }
-      // As the online cpus can be enabled or disabled at runtime, we may not open event file for
-      // all cpus successfully. But we should open at least one cpu successfully.
+      // As the online cpus can be enabled or disabled at runtime, we may not
+      // open event file for
+      // all cpus successfully. But we should open at least one cpu
+      // successfully.
       if (open_per_thread == 0) {
         PLOG(ERROR) << "failed to open perf event file for event_type "
-                    << selection.event_type_modifier.name << " for "
-                    << (tid == -1 ? "all threads" : android::base::StringPrintf(" thread %d", tid));
+                    << failed_event_type << " for "
+                    << (tid == -1 ? "all threads" : android::base::StringPrintf(
+                                                        " thread %d", tid));
         return false;
       }
     }
@@ -232,69 +316,43 @@
 
 bool EventSelectionSet::ReadCounters(std::vector<CountersInfo>* counters) {
   counters->clear();
-  for (auto& selection : selections_) {
-    CountersInfo counters_info;
-    counters_info.event_type = &selection.event_type_modifier;
-    for (auto& event_fd : selection.event_fds) {
-      CountersInfo::CounterInfo counter_info;
-      if (!event_fd->ReadCounter(&counter_info.counter)) {
-        return false;
+  for (auto& group : groups_) {
+    for (auto& selection : group) {
+      CountersInfo counters_info;
+      counters_info.selection = &selection;
+      for (auto& event_fd : selection.event_fds) {
+        CountersInfo::CounterInfo counter_info;
+        if (!event_fd->ReadCounter(&counter_info.counter)) {
+          return false;
+        }
+        counter_info.tid = event_fd->ThreadId();
+        counter_info.cpu = event_fd->Cpu();
+        counters_info.counters.push_back(counter_info);
       }
-      counter_info.tid = event_fd->ThreadId();
-      counter_info.cpu = event_fd->Cpu();
-      counters_info.counters.push_back(counter_info);
+      counters->push_back(counters_info);
     }
-    counters->push_back(counters_info);
   }
   return true;
 }
 
-void EventSelectionSet::PrepareToPollForEventFiles(std::vector<pollfd>* pollfds) {
-  for (auto& selection : selections_) {
-    for (auto& event_fd : selection.event_fds) {
-      pollfd poll_fd;
-      event_fd->PrepareToPollForMmapData(&poll_fd);
-      pollfds->push_back(poll_fd);
+void EventSelectionSet::PrepareToPollForEventFiles(
+    std::vector<pollfd>* pollfds) {
+  for (auto& group : groups_) {
+    for (auto& selection : group) {
+      for (auto& event_fd : selection.event_fds) {
+        pollfd poll_fd;
+        event_fd->PrepareToPollForMmapData(&poll_fd);
+        pollfds->push_back(poll_fd);
+      }
     }
   }
 }
 
 bool EventSelectionSet::MmapEventFiles(size_t mmap_pages) {
-  for (auto& selection : selections_) {
-    for (auto& event_fd : selection.event_fds) {
-      if (!event_fd->MmapContent(mmap_pages)) {
-        return false;
-      }
-    }
-  }
-  return true;
-}
-
-void EventSelectionSet::PrepareToReadMmapEventData(std::function<bool (Record*)> callback) {
-  record_callback_ = callback;
-  bool has_timestamp = true;
-  for (const auto& selection : selections_) {
-    if (!IsTimestampSupported(selection.event_attr)) {
-      has_timestamp = false;
-      break;
-    }
-  }
-  record_cache_.reset(new RecordCache(has_timestamp));
-
-  for (const auto& selection : selections_) {
-    for (const auto& event_fd : selection.event_fds) {
-      int event_id = event_fd->Id();
-      event_id_to_attr_map_[event_id] = &selection.event_attr;
-    }
-  }
-}
-
-bool EventSelectionSet::ReadMmapEventData() {
-  for (auto& selection : selections_) {
-    for (auto& event_fd : selection.event_fds) {
-      bool has_data = true;
-      while (has_data) {
-        if (!ReadMmapEventDataForFd(event_fd, selection.event_attr, &has_data)) {
+  for (auto& group : groups_) {
+    for (auto& selection : group) {
+      for (auto& event_fd : selection.event_fds) {
+        if (!event_fd->MmapContent(mmap_pages)) {
           return false;
         }
       }
@@ -303,9 +361,50 @@
   return true;
 }
 
-bool EventSelectionSet::ReadMmapEventDataForFd(std::unique_ptr<EventFd>& event_fd,
-                                               const perf_event_attr& attr,
-                                               bool* has_data) {
+void EventSelectionSet::PrepareToReadMmapEventData(
+    std::function<bool(Record*)> callback) {
+  record_callback_ = callback;
+  bool has_timestamp = true;
+  for (const auto& group : groups_) {
+    for (const auto& selection : group) {
+      if (!IsTimestampSupported(selection.event_attr)) {
+        has_timestamp = false;
+        break;
+      }
+    }
+  }
+  record_cache_.reset(new RecordCache(has_timestamp));
+
+  for (const auto& group : groups_) {
+    for (const auto& selection : group) {
+      for (const auto& event_fd : selection.event_fds) {
+        int event_id = event_fd->Id();
+        event_id_to_attr_map_[event_id] = &selection.event_attr;
+      }
+    }
+  }
+}
+
+bool EventSelectionSet::ReadMmapEventData() {
+  for (auto& group : groups_) {
+    for (auto& selection : group) {
+      for (auto& event_fd : selection.event_fds) {
+        bool has_data = true;
+        while (has_data) {
+          if (!ReadMmapEventDataForFd(event_fd, selection.event_attr,
+                                      &has_data)) {
+            return false;
+          }
+        }
+      }
+    }
+  }
+  return true;
+}
+
+bool EventSelectionSet::ReadMmapEventDataForFd(
+    std::unique_ptr<EventFd>& event_fd, const perf_event_attr& attr,
+    bool* has_data) {
   *has_data = false;
   while (true) {
     char* data;
@@ -313,7 +412,8 @@
     if (size == 0) {
       break;
     }
-    std::vector<std::unique_ptr<Record>> records = ReadRecordsFromBuffer(attr, data, size);
+    std::vector<std::unique_ptr<Record>> records =
+        ReadRecordsFromBuffer(attr, data, size);
     record_cache_->Push(std::move(records));
     std::unique_ptr<Record> r = record_cache_->Pop();
     while (r != nullptr) {
@@ -336,25 +436,3 @@
   }
   return true;
 }
-
-EventSelectionSet::EventSelection* EventSelectionSet::FindSelectionByType(
-    const EventTypeAndModifier& event_type_modifier) {
-  for (auto& selection : selections_) {
-    if (selection.event_type_modifier.name == event_type_modifier.name) {
-      return &selection;
-    }
-  }
-  return nullptr;
-}
-
-const perf_event_attr* EventSelectionSet::FindEventAttrByType(
-    const EventTypeAndModifier& event_type_modifier) {
-  EventSelection* selection = FindSelectionByType(event_type_modifier);
-  return (selection != nullptr) ? &selection->event_attr : nullptr;
-}
-
-const std::vector<std::unique_ptr<EventFd>>* EventSelectionSet::FindEventFdsByType(
-    const EventTypeAndModifier& event_type_modifier) {
-  EventSelection* selection = FindSelectionByType(event_type_modifier);
-  return (selection != nullptr) ? &selection->event_fds : nullptr;
-}
diff --git a/simpleperf/event_selection_set.h b/simpleperf/event_selection_set.h
index 3cfdb46..d393a3b 100644
--- a/simpleperf/event_selection_set.h
+++ b/simpleperf/event_selection_set.h
@@ -29,8 +29,17 @@
 #include "perf_event.h"
 #include "record.h"
 
+struct EventSelection {
+  uint32_t group_id;
+  uint32_t selection_id;
+  EventTypeAndModifier event_type_modifier;
+  perf_event_attr event_attr;
+  std::vector<std::unique_ptr<EventFd>> event_fds;
+};
+typedef std::vector<EventSelection> EventSelectionGroup;
+
 struct CountersInfo {
-  const EventTypeAndModifier* event_type;
+  const EventSelection* selection;
   struct CounterInfo {
     pid_t tid;
     int cpu;
@@ -55,17 +64,22 @@
   EventSelectionSet() {
   }
 
-  bool Empty() const {
-    return selections_.empty();
+  bool empty() const {
+    return groups_.empty();
   }
 
-  bool AddEventType(const EventTypeAndModifier& event_type_modifier);
+  const std::vector<EventSelectionGroup>& groups() {
+    return groups_;
+  }
+
+  bool AddEventType(const std::string& event_name);
+  bool AddEventGroup(const std::vector<std::string>& event_names);
 
   void SetEnableOnExec(bool enable);
   bool GetEnableOnExec();
   void SampleIdAll();
-  void SetSampleFreq(const EventTypeAndModifier& event_type_modifier, uint64_t sample_freq);
-  void SetSamplePeriod(const EventTypeAndModifier& event_type_modifier, uint64_t sample_period);
+  void SetSampleFreq(const EventSelection& selection, uint64_t sample_freq);
+  void SetSamplePeriod(const EventSelection& selection, uint64_t sample_period);
   bool SetBranchSampling(uint64_t branch_sample_type);
   void EnableFpCallChainSampling();
   bool EnableDwarfCallChainSampling(uint32_t dump_stack_size);
@@ -80,24 +94,15 @@
   bool ReadMmapEventData();
   bool FinishReadMmapEventData();
 
-  const perf_event_attr* FindEventAttrByType(const EventTypeAndModifier& event_type_modifier);
-  const std::vector<std::unique_ptr<EventFd>>* FindEventFdsByType(
-      const EventTypeAndModifier& event_type_modifier);
-
  private:
+  bool BuildAndCheckEventSelection(const std::string& event_name,
+                                   EventSelection* selection);
   void UnionSampleType();
   bool OpenEventFiles(const std::vector<pid_t>& threads, const std::vector<int>& cpus);
   bool ReadMmapEventDataForFd(std::unique_ptr<EventFd>& event_fd, const perf_event_attr& attr,
                               bool* has_data);
 
-  struct EventSelection {
-    EventTypeAndModifier event_type_modifier;
-    perf_event_attr event_attr;
-    std::vector<std::unique_ptr<EventFd>> event_fds;
-  };
-  EventSelection* FindSelectionByType(const EventTypeAndModifier& event_type_modifier);
-
-  std::vector<EventSelection> selections_;
+  std::vector<EventSelectionGroup> groups_;
 
   std::function<bool (Record*)> record_callback_;
   std::unique_ptr<RecordCache> record_cache_;
diff --git a/simpleperf/tracing.cpp b/simpleperf/tracing.cpp
index 1757e05..884a883 100644
--- a/simpleperf/tracing.cpp
+++ b/simpleperf/tracing.cpp
@@ -396,18 +396,17 @@
 
 uint32_t Tracing::GetPageSize() const { return tracing_file_->GetPageSize(); }
 
-bool GetTracingData(const std::vector<EventTypeAndModifier>& event_types,
+bool GetTracingData(const std::vector<const EventType*>& event_types,
                     std::vector<char>* data) {
   data->clear();
   std::vector<TraceType> trace_types;
   for (const auto& type : event_types) {
-    if (type.event_type.type == PERF_TYPE_TRACEPOINT) {
-      size_t pos = type.event_type.name.find(':');
-      TraceType trace_type;
-      trace_type.system = type.event_type.name.substr(0, pos);
-      trace_type.name = type.event_type.name.substr(pos + 1);
-      trace_types.push_back(trace_type);
-    }
+    CHECK_EQ(PERF_TYPE_TRACEPOINT, type->type);
+    size_t pos = type->name.find(':');
+    TraceType trace_type;
+    trace_type.system = type->name.substr(0, pos);
+    trace_type.name = type->name.substr(pos + 1);
+    trace_types.push_back(trace_type);
   }
   TracingFile tracing_file;
   if (!tracing_file.RecordHeaderFiles()) {
diff --git a/simpleperf/tracing.h b/simpleperf/tracing.h
index bd3c1bf..9fd9534 100644
--- a/simpleperf/tracing.h
+++ b/simpleperf/tracing.h
@@ -73,7 +73,7 @@
   std::vector<TracingFormat> tracing_formats_;
 };
 
-bool GetTracingData(const std::vector<EventTypeAndModifier>& event_types,
+bool GetTracingData(const std::vector<const EventType*>& event_types,
                     std::vector<char>* data);
 
 #endif  // SIMPLE_PERF_TRACING_H_