blob: 8dd8cbc17e267212925c1c3d875dc6a11cc80acd [file] [log] [blame]
Brent Austinba3052e2015-04-21 16:08:23 -07001// Copyright 2009 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package testing
6
7import (
8 "flag"
9 "fmt"
Dan Willemsenebae3022017-01-13 23:01:08 -080010 "internal/race"
Brent Austinba3052e2015-04-21 16:08:23 -070011 "os"
12 "runtime"
Colin Crossd9c6b802019-03-19 21:10:31 -070013 "strconv"
14 "strings"
Brent Austinba3052e2015-04-21 16:08:23 -070015 "sync"
16 "sync/atomic"
17 "time"
18)
19
Dan Willemsenebae3022017-01-13 23:01:08 -080020var matchBenchmarks = flag.String("test.bench", "", "run only benchmarks matching `regexp`")
Colin Crossd9c6b802019-03-19 21:10:31 -070021var benchTime = benchTimeFlag{d: 1 * time.Second}
Brent Austinba3052e2015-04-21 16:08:23 -070022var benchmarkMemory = flag.Bool("test.benchmem", false, "print memory allocations for benchmarks")
23
Colin Crossd9c6b802019-03-19 21:10:31 -070024func init() {
25 flag.Var(&benchTime, "test.benchtime", "run each benchmark for duration `d`")
26}
27
28type benchTimeFlag struct {
29 d time.Duration
30 n int
31}
32
33func (f *benchTimeFlag) String() string {
34 if f.n > 0 {
35 return fmt.Sprintf("%dx", f.n)
36 }
37 return time.Duration(f.d).String()
38}
39
40func (f *benchTimeFlag) Set(s string) error {
41 if strings.HasSuffix(s, "x") {
42 n, err := strconv.ParseInt(s[:len(s)-1], 10, 0)
43 if err != nil || n <= 0 {
44 return fmt.Errorf("invalid count")
45 }
46 *f = benchTimeFlag{n: int(n)}
47 return nil
48 }
49 d, err := time.ParseDuration(s)
50 if err != nil || d <= 0 {
51 return fmt.Errorf("invalid duration")
52 }
53 *f = benchTimeFlag{d: d}
54 return nil
55}
56
Brent Austinba3052e2015-04-21 16:08:23 -070057// Global lock to ensure only one benchmark runs at a time.
58var benchmarkLock sync.Mutex
59
60// Used for every benchmark for measuring memory.
61var memStats runtime.MemStats
62
63// An internal type but exported because it is cross-package; part of the implementation
64// of the "go test" command.
65type InternalBenchmark struct {
66 Name string
67 F func(b *B)
68}
69
70// B is a type passed to Benchmark functions to manage benchmark
71// timing and to specify the number of iterations to run.
Dan Willemsen38f2dba2016-07-08 14:54:35 -070072//
73// A benchmark ends when its Benchmark function returns or calls any of the methods
74// FailNow, Fatal, Fatalf, SkipNow, Skip, or Skipf. Those methods must be called
75// only from the goroutine running the Benchmark function.
76// The other reporting methods, such as the variations of Log and Error,
77// may be called simultaneously from multiple goroutines.
78//
79// Like in tests, benchmark logs are accumulated during execution
80// and dumped to standard error when done. Unlike in tests, benchmark logs
81// are always printed, so as not to hide output whose existence may be
82// affecting benchmark results.
Brent Austinba3052e2015-04-21 16:08:23 -070083type B struct {
84 common
Dan Willemsend2797482017-07-26 13:13:13 -070085 importPath string // import path of the package containing the benchmark
Dan Willemsen38f2dba2016-07-08 14:54:35 -070086 context *benchContext
Brent Austinba3052e2015-04-21 16:08:23 -070087 N int
88 previousN int // number of iterations in the previous run
89 previousDuration time.Duration // total duration of the previous run
Dan Willemsen38f2dba2016-07-08 14:54:35 -070090 benchFunc func(b *B)
Colin Crossd9c6b802019-03-19 21:10:31 -070091 benchTime benchTimeFlag
Brent Austinba3052e2015-04-21 16:08:23 -070092 bytes int64
Dan Willemsen38f2dba2016-07-08 14:54:35 -070093 missingBytes bool // one of the subbenchmarks does not have bytes set.
Brent Austinba3052e2015-04-21 16:08:23 -070094 timerOn bool
95 showAllocResult bool
96 result BenchmarkResult
97 parallelism int // RunParallel creates parallelism*GOMAXPROCS goroutines
98 // The initial states of memStats.Mallocs and memStats.TotalAlloc.
99 startAllocs uint64
100 startBytes uint64
101 // The net total of this test after being run.
102 netAllocs uint64
103 netBytes uint64
104}
105
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700106// StartTimer starts timing a test. This function is called automatically
Brent Austinba3052e2015-04-21 16:08:23 -0700107// before a benchmark starts, but it can also used to resume timing after
108// a call to StopTimer.
109func (b *B) StartTimer() {
110 if !b.timerOn {
111 runtime.ReadMemStats(&memStats)
112 b.startAllocs = memStats.Mallocs
113 b.startBytes = memStats.TotalAlloc
114 b.start = time.Now()
115 b.timerOn = true
116 }
117}
118
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700119// StopTimer stops timing a test. This can be used to pause the timer
Brent Austinba3052e2015-04-21 16:08:23 -0700120// while performing complex initialization that you don't
121// want to measure.
122func (b *B) StopTimer() {
123 if b.timerOn {
Dan Willemsena3223282018-02-27 19:41:43 -0800124 b.duration += time.Since(b.start)
Brent Austinba3052e2015-04-21 16:08:23 -0700125 runtime.ReadMemStats(&memStats)
126 b.netAllocs += memStats.Mallocs - b.startAllocs
127 b.netBytes += memStats.TotalAlloc - b.startBytes
128 b.timerOn = false
129 }
130}
131
132// ResetTimer zeros the elapsed benchmark time and memory allocation counters.
133// It does not affect whether the timer is running.
134func (b *B) ResetTimer() {
135 if b.timerOn {
136 runtime.ReadMemStats(&memStats)
137 b.startAllocs = memStats.Mallocs
138 b.startBytes = memStats.TotalAlloc
139 b.start = time.Now()
140 }
141 b.duration = 0
142 b.netAllocs = 0
143 b.netBytes = 0
144}
145
146// SetBytes records the number of bytes processed in a single operation.
147// If this is called, the benchmark will report ns/op and MB/s.
148func (b *B) SetBytes(n int64) { b.bytes = n }
149
150// ReportAllocs enables malloc statistics for this benchmark.
151// It is equivalent to setting -test.benchmem, but it only affects the
152// benchmark function that calls ReportAllocs.
153func (b *B) ReportAllocs() {
154 b.showAllocResult = true
155}
156
157func (b *B) nsPerOp() int64 {
158 if b.N <= 0 {
159 return 0
160 }
161 return b.duration.Nanoseconds() / int64(b.N)
162}
163
164// runN runs a single benchmark for the specified number of iterations.
165func (b *B) runN(n int) {
166 benchmarkLock.Lock()
167 defer benchmarkLock.Unlock()
168 // Try to get a comparable environment for each run
169 // by clearing garbage from previous runs.
170 runtime.GC()
Dan Willemsenebae3022017-01-13 23:01:08 -0800171 b.raceErrors = -race.Errors()
Brent Austinba3052e2015-04-21 16:08:23 -0700172 b.N = n
173 b.parallelism = 1
174 b.ResetTimer()
175 b.StartTimer()
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700176 b.benchFunc(b)
Brent Austinba3052e2015-04-21 16:08:23 -0700177 b.StopTimer()
178 b.previousN = n
179 b.previousDuration = b.duration
Dan Willemsenebae3022017-01-13 23:01:08 -0800180 b.raceErrors += race.Errors()
181 if b.raceErrors > 0 {
182 b.Errorf("race detected during execution of benchmark")
183 }
Brent Austinba3052e2015-04-21 16:08:23 -0700184}
185
186func min(x, y int) int {
187 if x > y {
188 return y
189 }
190 return x
191}
192
193func max(x, y int) int {
194 if x < y {
195 return y
196 }
197 return x
198}
199
200// roundDown10 rounds a number down to the nearest power of 10.
201func roundDown10(n int) int {
202 var tens = 0
203 // tens = floor(log_10(n))
204 for n >= 10 {
205 n = n / 10
206 tens++
207 }
208 // result = 10^tens
209 result := 1
210 for i := 0; i < tens; i++ {
211 result *= 10
212 }
213 return result
214}
215
216// roundUp rounds x up to a number of the form [1eX, 2eX, 3eX, 5eX].
217func roundUp(n int) int {
218 base := roundDown10(n)
219 switch {
220 case n <= base:
221 return base
222 case n <= (2 * base):
223 return 2 * base
224 case n <= (3 * base):
225 return 3 * base
226 case n <= (5 * base):
227 return 5 * base
228 default:
229 return 10 * base
230 }
231}
232
Colin Crossd9c6b802019-03-19 21:10:31 -0700233// run1 runs the first iteration of benchFunc. It reports whether more
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700234// iterations of this benchmarks should be run.
235func (b *B) run1() bool {
236 if ctx := b.context; ctx != nil {
237 // Extend maxLen, if needed.
238 if n := len(b.name) + ctx.extLen + 1; n > ctx.maxLen {
239 ctx.maxLen = n + 8 // Add additional slack to avoid too many jumps in size.
240 }
241 }
242 go func() {
243 // Signal that we're done whether we return normally
244 // or by FailNow's runtime.Goexit.
245 defer func() {
246 b.signal <- true
247 }()
248
249 b.runN(1)
250 }()
251 <-b.signal
252 if b.failed {
253 fmt.Fprintf(b.w, "--- FAIL: %s\n%s", b.name, b.output)
254 return false
255 }
256 // Only print the output if we know we are not going to proceed.
257 // Otherwise it is printed in processBench.
Dan Willemsenb57d8522017-01-19 15:07:53 -0800258 if atomic.LoadInt32(&b.hasSub) != 0 || b.finished {
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700259 tag := "BENCH"
260 if b.skipped {
261 tag = "SKIP"
262 }
263 if b.chatty && (len(b.output) > 0 || b.finished) {
264 b.trimOutput()
265 fmt.Fprintf(b.w, "--- %s: %s\n%s", tag, b.name, b.output)
266 }
267 return false
268 }
269 return true
270}
271
Dan Willemsend2797482017-07-26 13:13:13 -0700272var labelsOnce sync.Once
273
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700274// run executes the benchmark in a separate goroutine, including all of its
275// subbenchmarks. b must not have subbenchmarks.
Dan Willemsena3223282018-02-27 19:41:43 -0800276func (b *B) run() {
Dan Willemsend2797482017-07-26 13:13:13 -0700277 labelsOnce.Do(func() {
278 fmt.Fprintf(b.w, "goos: %s\n", runtime.GOOS)
279 fmt.Fprintf(b.w, "goarch: %s\n", runtime.GOARCH)
280 if b.importPath != "" {
281 fmt.Fprintf(b.w, "pkg: %s\n", b.importPath)
282 }
283 })
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700284 if b.context != nil {
285 // Running go test --test.bench
286 b.context.processBench(b) // Must call doBench.
287 } else {
288 // Running func Benchmark.
289 b.doBench()
290 }
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700291}
292
293func (b *B) doBench() BenchmarkResult {
Brent Austinba3052e2015-04-21 16:08:23 -0700294 go b.launch()
295 <-b.signal
296 return b.result
297}
298
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700299// launch launches the benchmark function. It gradually increases the number
Brent Austinba3052e2015-04-21 16:08:23 -0700300// of benchmark iterations until the benchmark runs for the requested benchtime.
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700301// launch is run by the doBench function as a separate goroutine.
302// run1 must have been called on b.
Brent Austinba3052e2015-04-21 16:08:23 -0700303func (b *B) launch() {
Brent Austinba3052e2015-04-21 16:08:23 -0700304 // Signal that we're done whether we return normally
305 // or by FailNow's runtime.Goexit.
306 defer func() {
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700307 b.signal <- true
Brent Austinba3052e2015-04-21 16:08:23 -0700308 }()
309
Brent Austinba3052e2015-04-21 16:08:23 -0700310 // Run the benchmark for at least the specified amount of time.
Colin Crossd9c6b802019-03-19 21:10:31 -0700311 if b.benchTime.n > 0 {
312 b.runN(b.benchTime.n)
313 } else {
314 d := b.benchTime.d
315 for n := 1; !b.failed && b.duration < d && n < 1e9; {
316 last := n
317 // Predict required iterations.
318 n = int(d.Nanoseconds())
319 if nsop := b.nsPerOp(); nsop != 0 {
320 n /= int(nsop)
321 }
322 // Run more iterations than we think we'll need (1.2x).
323 // Don't grow too fast in case we had timing errors previously.
324 // Be sure to run at least one more than last time.
325 n = max(min(n+n/5, 100*last), last+1)
326 // Round up to something easy to read.
327 n = roundUp(n)
328 b.runN(n)
Brent Austinba3052e2015-04-21 16:08:23 -0700329 }
Brent Austinba3052e2015-04-21 16:08:23 -0700330 }
331 b.result = BenchmarkResult{b.N, b.duration, b.bytes, b.netAllocs, b.netBytes}
332}
333
334// The results of a benchmark run.
335type BenchmarkResult struct {
336 N int // The number of iterations.
337 T time.Duration // The total time taken.
338 Bytes int64 // Bytes processed in one iteration.
339 MemAllocs uint64 // The total number of memory allocations.
340 MemBytes uint64 // The total number of bytes allocated.
341}
342
343func (r BenchmarkResult) NsPerOp() int64 {
344 if r.N <= 0 {
345 return 0
346 }
347 return r.T.Nanoseconds() / int64(r.N)
348}
349
350func (r BenchmarkResult) mbPerSec() float64 {
351 if r.Bytes <= 0 || r.T <= 0 || r.N <= 0 {
352 return 0
353 }
354 return (float64(r.Bytes) * float64(r.N) / 1e6) / r.T.Seconds()
355}
356
Dan Willemsend2797482017-07-26 13:13:13 -0700357// AllocsPerOp returns r.MemAllocs / r.N.
Brent Austinba3052e2015-04-21 16:08:23 -0700358func (r BenchmarkResult) AllocsPerOp() int64 {
359 if r.N <= 0 {
360 return 0
361 }
362 return int64(r.MemAllocs) / int64(r.N)
363}
364
Dan Willemsend2797482017-07-26 13:13:13 -0700365// AllocedBytesPerOp returns r.MemBytes / r.N.
Brent Austinba3052e2015-04-21 16:08:23 -0700366func (r BenchmarkResult) AllocedBytesPerOp() int64 {
367 if r.N <= 0 {
368 return 0
369 }
370 return int64(r.MemBytes) / int64(r.N)
371}
372
373func (r BenchmarkResult) String() string {
374 mbs := r.mbPerSec()
375 mb := ""
376 if mbs != 0 {
377 mb = fmt.Sprintf("\t%7.2f MB/s", mbs)
378 }
379 nsop := r.NsPerOp()
380 ns := fmt.Sprintf("%10d ns/op", nsop)
381 if r.N > 0 && nsop < 100 {
382 // The format specifiers here make sure that
383 // the ones digits line up for all three possible formats.
384 if nsop < 10 {
385 ns = fmt.Sprintf("%13.2f ns/op", float64(r.T.Nanoseconds())/float64(r.N))
386 } else {
387 ns = fmt.Sprintf("%12.1f ns/op", float64(r.T.Nanoseconds())/float64(r.N))
388 }
389 }
390 return fmt.Sprintf("%8d\t%s%s", r.N, ns, mb)
391}
392
Dan Willemsend2797482017-07-26 13:13:13 -0700393// MemString returns r.AllocedBytesPerOp and r.AllocsPerOp in the same format as 'go test'.
Brent Austinba3052e2015-04-21 16:08:23 -0700394func (r BenchmarkResult) MemString() string {
395 return fmt.Sprintf("%8d B/op\t%8d allocs/op",
396 r.AllocedBytesPerOp(), r.AllocsPerOp())
397}
398
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700399// benchmarkName returns full name of benchmark including procs suffix.
400func benchmarkName(name string, n int) string {
401 if n != 1 {
402 return fmt.Sprintf("%s-%d", name, n)
403 }
404 return name
405}
406
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700407type benchContext struct {
408 match *matcher
409
410 maxLen int // The largest recorded benchmark name.
411 extLen int // Maximum extension length.
412}
413
Brent Austinba3052e2015-04-21 16:08:23 -0700414// An internal function but exported because it is cross-package; part of the implementation
415// of the "go test" command.
416func RunBenchmarks(matchString func(pat, str string) (bool, error), benchmarks []InternalBenchmark) {
Dan Willemsend2797482017-07-26 13:13:13 -0700417 runBenchmarks("", matchString, benchmarks)
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700418}
419
Dan Willemsend2797482017-07-26 13:13:13 -0700420func runBenchmarks(importPath string, matchString func(pat, str string) (bool, error), benchmarks []InternalBenchmark) bool {
Brent Austinba3052e2015-04-21 16:08:23 -0700421 // If no flag was specified, don't run benchmarks.
422 if len(*matchBenchmarks) == 0 {
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700423 return true
Brent Austinba3052e2015-04-21 16:08:23 -0700424 }
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700425 // Collect matching benchmarks and determine longest name.
426 maxprocs := 1
427 for _, procs := range cpuList {
428 if procs > maxprocs {
429 maxprocs = procs
430 }
431 }
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700432 ctx := &benchContext{
433 match: newMatcher(matchString, *matchBenchmarks, "-test.bench"),
434 extLen: len(benchmarkName("", maxprocs)),
435 }
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700436 var bs []InternalBenchmark
Brent Austinba3052e2015-04-21 16:08:23 -0700437 for _, Benchmark := range benchmarks {
Dan Willemsend2797482017-07-26 13:13:13 -0700438 if _, matched, _ := ctx.match.fullName(nil, Benchmark.Name); matched {
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700439 bs = append(bs, Benchmark)
440 benchName := benchmarkName(Benchmark.Name, maxprocs)
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700441 if l := len(benchName) + ctx.extLen + 1; l > ctx.maxLen {
442 ctx.maxLen = l
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700443 }
Brent Austinba3052e2015-04-21 16:08:23 -0700444 }
Dan Willemsen09eb3b12015-09-16 14:34:17 -0700445 }
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700446 main := &B{
447 common: common{
448 name: "Main",
449 w: os.Stdout,
450 chatty: *chatty,
451 },
Dan Willemsend2797482017-07-26 13:13:13 -0700452 importPath: importPath,
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700453 benchFunc: func(b *B) {
454 for _, Benchmark := range bs {
455 b.Run(Benchmark.Name, Benchmark.F)
456 }
457 },
Colin Crossd9c6b802019-03-19 21:10:31 -0700458 benchTime: benchTime,
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700459 context: ctx,
460 }
461 main.runN(1)
462 return !main.failed
463}
464
465// processBench runs bench b for the configured CPU counts and prints the results.
466func (ctx *benchContext) processBench(b *B) {
467 for i, procs := range cpuList {
Dan Willemsena3223282018-02-27 19:41:43 -0800468 for j := uint(0); j < *count; j++ {
469 runtime.GOMAXPROCS(procs)
470 benchName := benchmarkName(b.name, procs)
471 fmt.Fprintf(b.w, "%-*s\t", ctx.maxLen, benchName)
472 // Recompute the running time for all but the first iteration.
473 if i > 0 || j > 0 {
474 b = &B{
475 common: common{
476 signal: make(chan bool),
477 name: b.name,
478 w: b.w,
479 chatty: b.chatty,
480 },
481 benchFunc: b.benchFunc,
482 benchTime: b.benchTime,
483 }
484 b.run1()
Brent Austinba3052e2015-04-21 16:08:23 -0700485 }
Dan Willemsena3223282018-02-27 19:41:43 -0800486 r := b.doBench()
487 if b.failed {
488 // The output could be very long here, but probably isn't.
489 // We print it all, regardless, because we don't want to trim the reason
490 // the benchmark failed.
491 fmt.Fprintf(b.w, "--- FAIL: %s\n%s", benchName, b.output)
492 continue
493 }
494 results := r.String()
495 if *benchmarkMemory || b.showAllocResult {
496 results += "\t" + r.MemString()
497 }
498 fmt.Fprintln(b.w, results)
499 // Unlike with tests, we ignore the -chatty flag and always print output for
500 // benchmarks since the output generation time will skew the results.
501 if len(b.output) > 0 {
502 b.trimOutput()
503 fmt.Fprintf(b.w, "--- BENCH: %s\n%s", benchName, b.output)
504 }
505 if p := runtime.GOMAXPROCS(-1); p != procs {
506 fmt.Fprintf(os.Stderr, "testing: %s left GOMAXPROCS set to %d\n", benchName, p)
507 }
Brent Austinba3052e2015-04-21 16:08:23 -0700508 }
509 }
510}
511
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700512// Run benchmarks f as a subbenchmark with the given name. It reports
513// whether there were any failures.
514//
515// A subbenchmark is like any other benchmark. A benchmark that calls Run at
516// least once will not be measured itself and will be called once with N=1.
517func (b *B) Run(name string, f func(b *B)) bool {
518 // Since b has subbenchmarks, we will no longer run it as a benchmark itself.
519 // Release the lock and acquire it on exit to ensure locks stay paired.
Dan Willemsenb57d8522017-01-19 15:07:53 -0800520 atomic.StoreInt32(&b.hasSub, 1)
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700521 benchmarkLock.Unlock()
522 defer benchmarkLock.Lock()
523
Dan Willemsend2797482017-07-26 13:13:13 -0700524 benchName, ok, partial := b.name, true, false
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700525 if b.context != nil {
Dan Willemsend2797482017-07-26 13:13:13 -0700526 benchName, ok, partial = b.context.match.fullName(&b.common, name)
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700527 }
528 if !ok {
529 return true
530 }
Dan Willemsenc7413322018-08-27 23:21:26 -0700531 var pc [maxStackLen]uintptr
532 n := runtime.Callers(2, pc[:])
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700533 sub := &B{
534 common: common{
Dan Willemsenc7413322018-08-27 23:21:26 -0700535 signal: make(chan bool),
536 name: benchName,
537 parent: &b.common,
538 level: b.level + 1,
539 creator: pc[:n],
540 w: b.w,
541 chatty: b.chatty,
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700542 },
Dan Willemsend2797482017-07-26 13:13:13 -0700543 importPath: b.importPath,
544 benchFunc: f,
545 benchTime: b.benchTime,
546 context: b.context,
547 }
548 if partial {
549 // Partial name match, like -bench=X/Y matching BenchmarkX.
550 // Only process sub-benchmarks, if any.
551 atomic.StoreInt32(&sub.hasSub, 1)
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700552 }
553 if sub.run1() {
554 sub.run()
555 }
556 b.add(sub.result)
557 return !sub.failed
558}
559
560// add simulates running benchmarks in sequence in a single iteration. It is
561// used to give some meaningful results in case func Benchmark is used in
562// combination with Run.
563func (b *B) add(other BenchmarkResult) {
564 r := &b.result
565 // The aggregated BenchmarkResults resemble running all subbenchmarks as
566 // in sequence in a single benchmark.
567 r.N = 1
568 r.T += time.Duration(other.NsPerOp())
569 if other.Bytes == 0 {
570 // Summing Bytes is meaningless in aggregate if not all subbenchmarks
571 // set it.
572 b.missingBytes = true
573 r.Bytes = 0
574 }
575 if !b.missingBytes {
576 r.Bytes += other.Bytes
577 }
578 r.MemAllocs += uint64(other.AllocsPerOp())
579 r.MemBytes += uint64(other.AllocedBytesPerOp())
580}
581
Brent Austinba3052e2015-04-21 16:08:23 -0700582// trimOutput shortens the output from a benchmark, which can be very long.
583func (b *B) trimOutput() {
584 // The output is likely to appear multiple times because the benchmark
585 // is run multiple times, but at least it will be seen. This is not a big deal
586 // because benchmarks rarely print, but just in case, we trim it if it's too long.
587 const maxNewlines = 10
588 for nlCount, j := 0, 0; j < len(b.output); j++ {
589 if b.output[j] == '\n' {
590 nlCount++
591 if nlCount >= maxNewlines {
592 b.output = append(b.output[:j], "\n\t... [output truncated]\n"...)
593 break
594 }
595 }
596 }
597}
598
599// A PB is used by RunParallel for running parallel benchmarks.
600type PB struct {
601 globalN *uint64 // shared between all worker goroutines iteration counter
602 grain uint64 // acquire that many iterations from globalN at once
603 cache uint64 // local cache of acquired iterations
604 bN uint64 // total number of iterations to execute (b.N)
605}
606
607// Next reports whether there are more iterations to execute.
608func (pb *PB) Next() bool {
609 if pb.cache == 0 {
610 n := atomic.AddUint64(pb.globalN, pb.grain)
611 if n <= pb.bN {
612 pb.cache = pb.grain
613 } else if n < pb.bN+pb.grain {
614 pb.cache = pb.bN + pb.grain - n
615 } else {
616 return false
617 }
618 }
619 pb.cache--
620 return true
621}
622
623// RunParallel runs a benchmark in parallel.
624// It creates multiple goroutines and distributes b.N iterations among them.
625// The number of goroutines defaults to GOMAXPROCS. To increase parallelism for
626// non-CPU-bound benchmarks, call SetParallelism before RunParallel.
627// RunParallel is usually used with the go test -cpu flag.
628//
629// The body function will be run in each goroutine. It should set up any
630// goroutine-local state and then iterate until pb.Next returns false.
631// It should not use the StartTimer, StopTimer, or ResetTimer functions,
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700632// because they have global effect. It should also not call Run.
Brent Austinba3052e2015-04-21 16:08:23 -0700633func (b *B) RunParallel(body func(*PB)) {
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700634 if b.N == 0 {
635 return // Nothing to do when probing.
636 }
Brent Austinba3052e2015-04-21 16:08:23 -0700637 // Calculate grain size as number of iterations that take ~100µs.
638 // 100µs is enough to amortize the overhead and provide sufficient
639 // dynamic load balancing.
640 grain := uint64(0)
641 if b.previousN > 0 && b.previousDuration > 0 {
642 grain = 1e5 * uint64(b.previousN) / uint64(b.previousDuration)
643 }
644 if grain < 1 {
645 grain = 1
646 }
647 // We expect the inner loop and function call to take at least 10ns,
648 // so do not do more than 100µs/10ns=1e4 iterations.
649 if grain > 1e4 {
650 grain = 1e4
651 }
652
653 n := uint64(0)
654 numProcs := b.parallelism * runtime.GOMAXPROCS(0)
655 var wg sync.WaitGroup
656 wg.Add(numProcs)
657 for p := 0; p < numProcs; p++ {
658 go func() {
659 defer wg.Done()
660 pb := &PB{
661 globalN: &n,
662 grain: grain,
663 bN: uint64(b.N),
664 }
665 body(pb)
666 }()
667 }
668 wg.Wait()
669 if n <= uint64(b.N) && !b.Failed() {
670 b.Fatal("RunParallel: body exited without pb.Next() == false")
671 }
672}
673
674// SetParallelism sets the number of goroutines used by RunParallel to p*GOMAXPROCS.
675// There is usually no need to call SetParallelism for CPU-bound benchmarks.
676// If p is less than 1, this call will have no effect.
677func (b *B) SetParallelism(p int) {
678 if p >= 1 {
679 b.parallelism = p
680 }
681}
682
683// Benchmark benchmarks a single function. Useful for creating
684// custom benchmarks that do not use the "go test" command.
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700685//
686// If f calls Run, the result will be an estimate of running all its
687// subbenchmarks that don't call Run in sequence in a single benchmark.
Brent Austinba3052e2015-04-21 16:08:23 -0700688func Benchmark(f func(b *B)) BenchmarkResult {
689 b := &B{
690 common: common{
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700691 signal: make(chan bool),
692 w: discard{},
Brent Austinba3052e2015-04-21 16:08:23 -0700693 },
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700694 benchFunc: f,
Colin Crossd9c6b802019-03-19 21:10:31 -0700695 benchTime: benchTime,
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700696 }
Dan Willemsend2797482017-07-26 13:13:13 -0700697 if b.run1() {
698 b.run()
Brent Austinba3052e2015-04-21 16:08:23 -0700699 }
Dan Willemsend2797482017-07-26 13:13:13 -0700700 return b.result
Brent Austinba3052e2015-04-21 16:08:23 -0700701}
Dan Willemsen38f2dba2016-07-08 14:54:35 -0700702
703type discard struct{}
704
705func (discard) Write(b []byte) (n int, err error) { return len(b), nil }