testing-bench-diff
diff --git a/src/testing/benchmark.go b/src/testing/benchmark.go
index 2c7083db02..e92ca8e5b1 100644
@@ -116,6 +210,7 @@ type B struct {
    // Remaining iterations of Loop() to be executed in benchFunc.
    // See issue #61515.
    loopN int
+   H     *DDSketch
 }
 
 // StartTimer starts timing a test. This function is called automatically
@@ -136,7 +231,9 @@ func (b *B) StartTimer() {
 // want to measure.
 func (b *B) StopTimer() {
    if b.timerOn {
-       b.duration += highPrecisionTimeSince(b.start)
+       delta := highPrecisionTimeSince(b.start)
+       b.duration += delta
+       b.H.Add(uint64(delta))
        runtime.ReadMemStats(&memStats)
        b.netAllocs += memStats.Mallocs - b.startAllocs
        b.netBytes += memStats.TotalAlloc - b.startBytes
@@ -320,7 +417,7 @@ func (b *B) launch() {
            b.runN(int(n))
        }
    }
-   b.result = BenchmarkResult{b.N, b.duration, b.bytes, b.netAllocs, b.netBytes, b.extra}
+   b.result = BenchmarkResult{b.N, b.duration, b.bytes, b.netAllocs, b.netBytes, b.extra, b.H}
 }
 
 // Elapsed returns the measured elapsed time of the benchmark.
@@ -375,9 +472,9 @@ type BenchmarkResult struct {
    Bytes     int64         // Bytes processed in one iteration.
    MemAllocs uint64        // The total number of memory allocations.
    MemBytes  uint64        // The total number of bytes allocated.
    // Extra records additional metrics reported by ReportMetric.
    Extra map[string]float64
+   H     *DDSketch
 }
 
 // NsPerOp returns the "ns/op" metric.
@@ -445,6 +542,9 @@ func (r BenchmarkResult) String() string {
    if ns != 0 {
        buf.WriteByte('\t')
        prettyPrint(buf, ns, "ns/op")
+       buf.WriteByte('\t')
+       prettyPrint(buf, r.H.Quantile(0.99), "p99")
    }
 
    if mbs := r.mbPerSec(); mbs != 0 {
@@ -456,7 +556,7 @@ func (r BenchmarkResult) String() string {
    var extraKeys []string
    for k := range r.Extra {
        switch k {
-       case "ns/op", "MB/s", "B/op", "allocs/op":
+       case "ns/op", "MB/s", "B/op", "allocs/op", "p99":
            // Built-in metrics reported elsewhere.
            continue
        }
@@ -564,6 +664,7 @@ func runBenchmarks(importPath string, matchString func(pat, str string) (bool, e
        },
        benchTime: benchTime,
        bstate:    bstate,
+       H:         NewDDSketch(0.01),
    }
    if Verbose() {
        main.chatty = newChattyPrinter(main.w)
@@ -595,6 +696,7 @@ func (s *benchState) processBench(b *B) {
                    },
                    benchFunc: b.benchFunc,
                    benchTime: b.benchTime,
+                   H:         NewDDSketch(0.01),
                }
                b.run1()
            }
@@ -671,6 +773,7 @@ func (b *B) Run(name string, f func(b *B)) bool {
        benchFunc:  f,
        benchTime:  b.benchTime,
        bstate:     b.bstate,
+       H:          NewDDSketch(0.01),
    }
    if partial {
        // Partial name match, like -bench=X/Y matching BenchmarkX.
@@ -847,6 +950,7 @@ func Benchmark(f func(b *B)) BenchmarkResult {
        },
        benchFunc: f,
        benchTime: benchTime,
+       H:         NewDDSketch(0.01),
    }
    if b.run1() {
        b.run()