malloc_bench.cc 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294
  1. // -*- Mode: C++; c-basic-offset: 2; indent-tabs-mode: nil -*-
  2. // Redistribution and use in source and binary forms, with or without
  3. // modification, are permitted provided that the following conditions are
  4. // met:
  5. //
  6. // * Redistributions of source code must retain the above copyright
  7. // notice, this list of conditions and the following disclaimer.
  8. // * Redistributions in binary form must reproduce the above
  9. // copyright notice, this list of conditions and the following disclaimer
  10. // in the documentation and/or other materials provided with the
  11. // distribution.
  12. // * Neither the name of Google Inc. nor the names of its
  13. // contributors may be used to endorse or promote products derived from
  14. // this software without specific prior written permission.
  15. //
  16. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  17. // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  18. // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  19. // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  20. // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  21. // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  22. // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  23. // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  24. // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  25. // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  26. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27. #include <stdlib.h>
  28. #include <stdio.h>
  29. #include <stdint.h>
  30. #include <algorithm>
  31. #include "run_benchmark.h"
  32. static void bench_fastpath_throughput(long iterations,
  33. uintptr_t param)
  34. {
  35. size_t sz = 32;
  36. for (; iterations>0; iterations--) {
  37. void *p = malloc(sz);
  38. if (!p) {
  39. abort();
  40. }
  41. free(p);
  42. // this makes next iteration use different free list. So
  43. // subsequent iterations may actually overlap in time.
  44. sz = ((sz * 8191) & 511) + 16;
  45. }
  46. }
  47. static void bench_fastpath_dependent(long iterations,
  48. uintptr_t param)
  49. {
  50. size_t sz = 32;
  51. for (; iterations>0; iterations--) {
  52. void *p = malloc(sz);
  53. if (!p) {
  54. abort();
  55. }
  56. free(p);
  57. // this makes next iteration depend on current iteration. But this
  58. // iteration's free may still overlap with next iteration's malloc
  59. sz = ((sz | reinterpret_cast<size_t>(p)) & 511) + 16;
  60. }
  61. }
  62. static void bench_fastpath_simple(long iterations,
  63. uintptr_t param)
  64. {
  65. size_t sz = static_cast<size_t>(param);
  66. for (; iterations>0; iterations--) {
  67. void *p = malloc(sz);
  68. if (!p) {
  69. abort();
  70. }
  71. free(p);
  72. // next iteration will use same free list as this iteration. So it
  73. // should be prevent next iterations malloc to go too far before
  74. // free done. But using same size will make free "too fast" since
  75. // we'll hit size class cache.
  76. }
  77. }
  78. #ifdef __GNUC__
  79. #define HAVE_SIZED_FREE_OPTION
  80. extern "C" void tc_free_sized(void *ptr, size_t size) __attribute__((weak));
  81. extern "C" void *tc_memalign(size_t align, size_t size) __attribute__((weak));
  82. static bool is_sized_free_available(void)
  83. {
  84. return tc_free_sized != NULL;
  85. }
  86. static bool is_memalign_available(void)
  87. {
  88. return tc_memalign != NULL;
  89. }
  90. static void bench_fastpath_simple_sized(long iterations,
  91. uintptr_t param)
  92. {
  93. size_t sz = static_cast<size_t>(param);
  94. for (; iterations>0; iterations--) {
  95. void *p = malloc(sz);
  96. if (!p) {
  97. abort();
  98. }
  99. tc_free_sized(p, sz);
  100. // next iteration will use same free list as this iteration. So it
  101. // should be prevent next iterations malloc to go too far before
  102. // free done. But using same size will make free "too fast" since
  103. // we'll hit size class cache.
  104. }
  105. }
  106. static void bench_fastpath_memalign(long iterations,
  107. uintptr_t param)
  108. {
  109. size_t sz = static_cast<size_t>(param);
  110. for (; iterations>0; iterations--) {
  111. void *p = tc_memalign(32, sz);
  112. if (!p) {
  113. abort();
  114. }
  115. free(p);
  116. // next iteration will use same free list as this iteration. So it
  117. // should be prevent next iterations malloc to go too far before
  118. // free done. But using same size will make free "too fast" since
  119. // we'll hit size class cache.
  120. }
  121. }
  122. #endif // __GNUC__
  123. #define STACKSZ (1 << 16)
  124. static void bench_fastpath_stack(long iterations,
  125. uintptr_t _param)
  126. {
  127. void *stack[STACKSZ];
  128. size_t sz = 64;
  129. long param = static_cast<long>(_param);
  130. param &= STACKSZ - 1;
  131. param = param ? param : 1;
  132. for (; iterations>0; iterations -= param) {
  133. for (long k = param-1; k >= 0; k--) {
  134. void *p = malloc(sz);
  135. if (!p) {
  136. abort();
  137. }
  138. stack[k] = p;
  139. // this makes next iteration depend on result of this iteration
  140. sz = ((sz | reinterpret_cast<size_t>(p)) & 511) + 16;
  141. }
  142. for (long k = 0; k < param; k++) {
  143. free(stack[k]);
  144. }
  145. }
  146. }
  147. static void bench_fastpath_stack_simple(long iterations,
  148. uintptr_t _param)
  149. {
  150. void *stack[STACKSZ];
  151. size_t sz = 128;
  152. long param = static_cast<long>(_param);
  153. param &= STACKSZ - 1;
  154. param = param ? param : 1;
  155. for (; iterations>0; iterations -= param) {
  156. for (long k = param-1; k >= 0; k--) {
  157. void *p = malloc(sz);
  158. if (!p) {
  159. abort();
  160. }
  161. stack[k] = p;
  162. }
  163. for (long k = 0; k < param; k++) {
  164. free(stack[k]);
  165. }
  166. }
  167. }
  168. static void bench_fastpath_rnd_dependent(long iterations,
  169. uintptr_t _param)
  170. {
  171. static const uintptr_t rnd_c = 1013904223;
  172. static const uintptr_t rnd_a = 1664525;
  173. void *ptrs[STACKSZ];
  174. size_t sz = 128;
  175. if ((_param & (_param - 1))) {
  176. abort();
  177. }
  178. if (_param > STACKSZ) {
  179. abort();
  180. }
  181. int param = static_cast<int>(_param);
  182. for (; iterations>0; iterations -= param) {
  183. for (int k = param-1; k >= 0; k--) {
  184. void *p = malloc(sz);
  185. if (!p) {
  186. abort();
  187. }
  188. ptrs[k] = p;
  189. sz = ((sz | reinterpret_cast<size_t>(p)) & 511) + 16;
  190. }
  191. // this will iterate through all objects in order that is
  192. // unpredictable to processor's prefetchers
  193. uint32_t rnd = 0;
  194. uint32_t free_idx = 0;
  195. do {
  196. free(ptrs[free_idx]);
  197. rnd = rnd * rnd_a + rnd_c;
  198. free_idx = rnd & (param - 1);
  199. } while (free_idx != 0);
  200. }
  201. }
  202. static void *randomize_buffer[13<<20];
  203. void randomize_one_size_class(size_t size) {
  204. int count = (100<<20) / size;
  205. if (count * sizeof(randomize_buffer[0]) > sizeof(randomize_buffer)) {
  206. abort();
  207. }
  208. for (int i = 0; i < count; i++) {
  209. randomize_buffer[i] = malloc(size);
  210. }
  211. std::random_shuffle(randomize_buffer, randomize_buffer + count);
  212. for (int i = 0; i < count; i++) {
  213. free(randomize_buffer[i]);
  214. }
  215. }
  216. void randomize_size_classes() {
  217. randomize_one_size_class(8);
  218. int i;
  219. for (i = 16; i < 256; i += 16) {
  220. randomize_one_size_class(i);
  221. }
  222. for (; i < 512; i += 32) {
  223. randomize_one_size_class(i);
  224. }
  225. for (; i < 1024; i += 64) {
  226. randomize_one_size_class(i);
  227. }
  228. for (; i < (4 << 10); i += 128) {
  229. randomize_one_size_class(i);
  230. }
  231. for (; i < (32 << 10); i += 1024) {
  232. randomize_one_size_class(i);
  233. }
  234. }
  235. int main(void)
  236. {
  237. randomize_size_classes();
  238. report_benchmark("bench_fastpath_throughput", bench_fastpath_throughput, 0);
  239. report_benchmark("bench_fastpath_dependent", bench_fastpath_dependent, 0);
  240. report_benchmark("bench_fastpath_simple", bench_fastpath_simple, 64);
  241. report_benchmark("bench_fastpath_simple", bench_fastpath_simple, 2048);
  242. report_benchmark("bench_fastpath_simple", bench_fastpath_simple, 16384);
  243. #ifdef HAVE_SIZED_FREE_OPTION
  244. if (is_sized_free_available()) {
  245. report_benchmark("bench_fastpath_simple_sized", bench_fastpath_simple_sized, 64);
  246. report_benchmark("bench_fastpath_simple_sized", bench_fastpath_simple_sized, 2048);
  247. }
  248. if (is_memalign_available()) {
  249. report_benchmark("bench_fastpath_memalign", bench_fastpath_memalign, 64);
  250. report_benchmark("bench_fastpath_memalign", bench_fastpath_memalign, 2048);
  251. }
  252. #endif
  253. for (int i = 8; i <= 512; i <<= 1) {
  254. report_benchmark("bench_fastpath_stack", bench_fastpath_stack, i);
  255. }
  256. report_benchmark("bench_fastpath_stack_simple", bench_fastpath_stack_simple, 32);
  257. report_benchmark("bench_fastpath_stack_simple", bench_fastpath_stack_simple, 8192);
  258. report_benchmark("bench_fastpath_rnd_dependent", bench_fastpath_rnd_dependent, 32);
  259. report_benchmark("bench_fastpath_rnd_dependent", bench_fastpath_rnd_dependent, 8192);
  260. return 0;
  261. }