histogram.go 9.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357
  1. // Copyright 2015 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. package trace
  5. // This file implements histogramming for RPC statistics collection.
  6. import (
  7. "bytes"
  8. "fmt"
  9. "html/template"
  10. "log"
  11. "math"
  12. "golang.org/x/net/internal/timeseries"
  13. )
  14. const (
  15. bucketCount = 38
  16. )
  17. // histogram keeps counts of values in buckets that are spaced
  18. // out in powers of 2: 0-1, 2-3, 4-7...
  19. // histogram implements timeseries.Observable
  20. type histogram struct {
  21. sum int64 // running total of measurements
  22. sumOfSquares float64 // square of running total
  23. buckets []int64 // bucketed values for histogram
  24. value int // holds a single value as an optimization
  25. valueCount int64 // number of values recorded for single value
  26. }
  27. // AddMeasurement records a value measurement observation to the histogram.
  28. func (h *histogram) addMeasurement(value int64) {
  29. // TODO: assert invariant
  30. h.sum += value
  31. h.sumOfSquares += float64(value) * float64(value)
  32. bucketIndex := getBucket(value)
  33. if h.valueCount == 0 || (h.valueCount > 0 && h.value == bucketIndex) {
  34. h.value = bucketIndex
  35. h.valueCount++
  36. } else {
  37. h.allocateBuckets()
  38. h.buckets[bucketIndex]++
  39. }
  40. }
  41. func (h *histogram) allocateBuckets() {
  42. if h.buckets == nil {
  43. h.buckets = make([]int64, bucketCount)
  44. h.buckets[h.value] = h.valueCount
  45. h.value = 0
  46. h.valueCount = -1
  47. }
  48. }
  49. func log2(i int64) int {
  50. n := 0
  51. for ; i >= 0x100; i >>= 8 {
  52. n += 8
  53. }
  54. for ; i > 0; i >>= 1 {
  55. n += 1
  56. }
  57. return n
  58. }
  59. func getBucket(i int64) (index int) {
  60. index = log2(i) - 1
  61. if index < 0 {
  62. index = 0
  63. }
  64. if index >= bucketCount {
  65. index = bucketCount - 1
  66. }
  67. return
  68. }
  69. // Total returns the number of recorded observations.
  70. func (h *histogram) total() (total int64) {
  71. if h.valueCount >= 0 {
  72. total = h.valueCount
  73. }
  74. for _, val := range h.buckets {
  75. total += int64(val)
  76. }
  77. return
  78. }
  79. // Average returns the average value of recorded observations.
  80. func (h *histogram) average() float64 {
  81. t := h.total()
  82. if t == 0 {
  83. return 0
  84. }
  85. return float64(h.sum) / float64(t)
  86. }
  87. // Variance returns the variance of recorded observations.
  88. func (h *histogram) variance() float64 {
  89. t := float64(h.total())
  90. if t == 0 {
  91. return 0
  92. }
  93. s := float64(h.sum) / t
  94. return h.sumOfSquares/t - s*s
  95. }
  96. // StandardDeviation returns the standard deviation of recorded observations.
  97. func (h *histogram) standardDeviation() float64 {
  98. return math.Sqrt(h.variance())
  99. }
  100. // PercentileBoundary estimates the value that the given fraction of recorded
  101. // observations are less than.
  102. func (h *histogram) percentileBoundary(percentile float64) int64 {
  103. total := h.total()
  104. // Corner cases (make sure result is strictly less than Total())
  105. if total == 0 {
  106. return 0
  107. } else if total == 1 {
  108. return int64(h.average())
  109. }
  110. percentOfTotal := round(float64(total) * percentile)
  111. var runningTotal int64
  112. for i := range h.buckets {
  113. value := h.buckets[i]
  114. runningTotal += value
  115. if runningTotal == percentOfTotal {
  116. // We hit an exact bucket boundary. If the next bucket has data, it is a
  117. // good estimate of the value. If the bucket is empty, we interpolate the
  118. // midpoint between the next bucket's boundary and the next non-zero
  119. // bucket. If the remaining buckets are all empty, then we use the
  120. // boundary for the next bucket as the estimate.
  121. j := uint8(i + 1)
  122. min := bucketBoundary(j)
  123. if runningTotal < total {
  124. for h.buckets[j] == 0 {
  125. j++
  126. }
  127. }
  128. max := bucketBoundary(j)
  129. return min + round(float64(max-min)/2)
  130. } else if runningTotal > percentOfTotal {
  131. // The value is in this bucket. Interpolate the value.
  132. delta := runningTotal - percentOfTotal
  133. percentBucket := float64(value-delta) / float64(value)
  134. bucketMin := bucketBoundary(uint8(i))
  135. nextBucketMin := bucketBoundary(uint8(i + 1))
  136. bucketSize := nextBucketMin - bucketMin
  137. return bucketMin + round(percentBucket*float64(bucketSize))
  138. }
  139. }
  140. return bucketBoundary(bucketCount - 1)
  141. }
  142. // Median returns the estimated median of the observed values.
  143. func (h *histogram) median() int64 {
  144. return h.percentileBoundary(0.5)
  145. }
  146. // Add adds other to h.
  147. func (h *histogram) Add(other timeseries.Observable) {
  148. o := other.(*histogram)
  149. if o.valueCount == 0 {
  150. // Other histogram is empty
  151. } else if h.valueCount >= 0 && o.valueCount > 0 && h.value == o.value {
  152. // Both have a single bucketed value, aggregate them
  153. h.valueCount += o.valueCount
  154. } else {
  155. // Two different values necessitate buckets in this histogram
  156. h.allocateBuckets()
  157. if o.valueCount >= 0 {
  158. h.buckets[o.value] += o.valueCount
  159. } else {
  160. for i := range h.buckets {
  161. h.buckets[i] += o.buckets[i]
  162. }
  163. }
  164. }
  165. h.sumOfSquares += o.sumOfSquares
  166. h.sum += o.sum
  167. }
  168. // Clear resets the histogram to an empty state, removing all observed values.
  169. func (h *histogram) Clear() {
  170. h.buckets = nil
  171. h.value = 0
  172. h.valueCount = 0
  173. h.sum = 0
  174. h.sumOfSquares = 0
  175. }
  176. // CopyFrom copies from other, which must be a *histogram, into h.
  177. func (h *histogram) CopyFrom(other timeseries.Observable) {
  178. o := other.(*histogram)
  179. if o.valueCount == -1 {
  180. h.allocateBuckets()
  181. copy(h.buckets, o.buckets)
  182. }
  183. h.sum = o.sum
  184. h.sumOfSquares = o.sumOfSquares
  185. h.value = o.value
  186. h.valueCount = o.valueCount
  187. }
  188. // Multiply scales the histogram by the specified ratio.
  189. func (h *histogram) Multiply(ratio float64) {
  190. if h.valueCount == -1 {
  191. for i := range h.buckets {
  192. h.buckets[i] = int64(float64(h.buckets[i]) * ratio)
  193. }
  194. } else {
  195. h.valueCount = int64(float64(h.valueCount) * ratio)
  196. }
  197. h.sum = int64(float64(h.sum) * ratio)
  198. h.sumOfSquares = h.sumOfSquares * ratio
  199. }
  200. // New creates a new histogram.
  201. func (h *histogram) New() timeseries.Observable {
  202. r := new(histogram)
  203. r.Clear()
  204. return r
  205. }
  206. func (h *histogram) String() string {
  207. return fmt.Sprintf("%d, %f, %d, %d, %v",
  208. h.sum, h.sumOfSquares, h.value, h.valueCount, h.buckets)
  209. }
  210. // round returns the closest int64 to the argument
  211. func round(in float64) int64 {
  212. return int64(math.Floor(in + 0.5))
  213. }
  214. // bucketBoundary returns the first value in the bucket.
  215. func bucketBoundary(bucket uint8) int64 {
  216. if bucket == 0 {
  217. return 0
  218. }
  219. return 1 << bucket
  220. }
  221. // bucketData holds data about a specific bucket for use in distTmpl.
  222. type bucketData struct {
  223. Lower, Upper int64
  224. N int64
  225. Pct, CumulativePct float64
  226. GraphWidth int
  227. }
  228. // data holds data about a Distribution for use in distTmpl.
  229. type data struct {
  230. Buckets []*bucketData
  231. Count, Median int64
  232. Mean, StandardDeviation float64
  233. }
  234. // maxHTMLBarWidth is the maximum width of the HTML bar for visualizing buckets.
  235. const maxHTMLBarWidth = 350.0
  236. // newData returns data representing h for use in distTmpl.
  237. func (h *histogram) newData() *data {
  238. // Force the allocation of buckets to simplify the rendering implementation
  239. h.allocateBuckets()
  240. // We scale the bars on the right so that the largest bar is
  241. // maxHTMLBarWidth pixels in width.
  242. maxBucket := int64(0)
  243. for _, n := range h.buckets {
  244. if n > maxBucket {
  245. maxBucket = n
  246. }
  247. }
  248. total := h.total()
  249. barsizeMult := maxHTMLBarWidth / float64(maxBucket)
  250. var pctMult float64
  251. if total == 0 {
  252. pctMult = 1.0
  253. } else {
  254. pctMult = 100.0 / float64(total)
  255. }
  256. buckets := make([]*bucketData, len(h.buckets))
  257. runningTotal := int64(0)
  258. for i, n := range h.buckets {
  259. if n == 0 {
  260. continue
  261. }
  262. runningTotal += n
  263. var upperBound int64
  264. if i < bucketCount-1 {
  265. upperBound = bucketBoundary(uint8(i + 1))
  266. } else {
  267. upperBound = math.MaxInt64
  268. }
  269. buckets[i] = &bucketData{
  270. Lower: bucketBoundary(uint8(i)),
  271. Upper: upperBound,
  272. N: n,
  273. Pct: float64(n) * pctMult,
  274. CumulativePct: float64(runningTotal) * pctMult,
  275. GraphWidth: int(float64(n) * barsizeMult),
  276. }
  277. }
  278. return &data{
  279. Buckets: buckets,
  280. Count: total,
  281. Median: h.median(),
  282. Mean: h.average(),
  283. StandardDeviation: h.standardDeviation(),
  284. }
  285. }
  286. func (h *histogram) html() template.HTML {
  287. buf := new(bytes.Buffer)
  288. if err := distTmpl.Execute(buf, h.newData()); err != nil {
  289. buf.Reset()
  290. log.Printf("net/trace: couldn't execute template: %v", err)
  291. }
  292. return template.HTML(buf.String())
  293. }
  294. // Input: data
  295. var distTmpl = template.Must(template.New("distTmpl").Parse(`
  296. <table>
  297. <tr>
  298. <td style="padding:0.25em">Count: {{.Count}}</td>
  299. <td style="padding:0.25em">Mean: {{printf "%.0f" .Mean}}</td>
  300. <td style="padding:0.25em">StdDev: {{printf "%.0f" .StandardDeviation}}</td>
  301. <td style="padding:0.25em">Median: {{.Median}}</td>
  302. </tr>
  303. </table>
  304. <hr>
  305. <table>
  306. {{range $b := .Buckets}}
  307. {{if $b}}
  308. <tr>
  309. <td style="padding:0 0 0 0.25em">[</td>
  310. <td style="text-align:right;padding:0 0.25em">{{.Lower}},</td>
  311. <td style="text-align:right;padding:0 0.25em">{{.Upper}})</td>
  312. <td style="text-align:right;padding:0 0.25em">{{.N}}</td>
  313. <td style="text-align:right;padding:0 0.25em">{{printf "%#.3f" .Pct}}%</td>
  314. <td style="text-align:right;padding:0 0.25em">{{printf "%#.3f" .CumulativePct}}%</td>
  315. <td><div style="background-color: blue; height: 1em; width: {{.GraphWidth}};"></div></td>
  316. </tr>
  317. {{end}}
  318. {{end}}
  319. </table>
  320. `))