mem-functions.c 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387
  1. /*
  2. * mem-memcpy.c
  3. *
  4. * Simple memcpy() and memset() benchmarks
  5. *
  6. * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
  7. */
  8. #include "debug.h"
  9. #include "../perf.h"
  10. #include "../util/util.h"
  11. #include <subcmd/parse-options.h>
  12. #include "../util/header.h"
  13. #include "../util/cloexec.h"
  14. #include "bench.h"
  15. #include "mem-memcpy-arch.h"
  16. #include "mem-memset-arch.h"
  17. #include <stdio.h>
  18. #include <stdlib.h>
  19. #include <string.h>
  20. #include <sys/time.h>
  21. #include <errno.h>
  22. #define K 1024
  23. static const char *size_str = "1MB";
  24. static const char *function_str = "all";
  25. static int nr_loops = 1;
  26. static bool use_cycles;
  27. static int cycles_fd;
  28. static const struct option options[] = {
  29. OPT_STRING('s', "size", &size_str, "1MB",
  30. "Specify the size of the memory buffers. "
  31. "Available units: B, KB, MB, GB and TB (case insensitive)"),
  32. OPT_STRING('f', "function", &function_str, "all",
  33. "Specify the function to run, \"all\" runs all available functions, \"help\" lists them"),
  34. OPT_INTEGER('l', "nr_loops", &nr_loops,
  35. "Specify the number of loops to run. (default: 1)"),
  36. OPT_BOOLEAN('c', "cycles", &use_cycles,
  37. "Use a cycles event instead of gettimeofday() to measure performance"),
  38. OPT_END()
  39. };
  40. typedef void *(*memcpy_t)(void *, const void *, size_t);
  41. typedef void *(*memset_t)(void *, int, size_t);
  42. struct function {
  43. const char *name;
  44. const char *desc;
  45. union {
  46. memcpy_t memcpy;
  47. memset_t memset;
  48. } fn;
  49. };
  50. static struct perf_event_attr cycle_attr = {
  51. .type = PERF_TYPE_HARDWARE,
  52. .config = PERF_COUNT_HW_CPU_CYCLES
  53. };
  54. static int init_cycles(void)
  55. {
  56. cycles_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag());
  57. if (cycles_fd < 0 && errno == ENOSYS) {
  58. pr_debug("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
  59. return -1;
  60. }
  61. return cycles_fd;
  62. }
  63. static u64 get_cycles(void)
  64. {
  65. int ret;
  66. u64 clk;
  67. ret = read(cycles_fd, &clk, sizeof(u64));
  68. BUG_ON(ret != sizeof(u64));
  69. return clk;
  70. }
  71. static double timeval2double(struct timeval *ts)
  72. {
  73. return (double)ts->tv_sec + (double)ts->tv_usec / (double)1000000;
  74. }
  75. #define print_bps(x) do { \
  76. if (x < K) \
  77. printf(" %14lf bytes/sec\n", x); \
  78. else if (x < K * K) \
  79. printf(" %14lfd KB/sec\n", x / K); \
  80. else if (x < K * K * K) \
  81. printf(" %14lf MB/sec\n", x / K / K); \
  82. else \
  83. printf(" %14lf GB/sec\n", x / K / K / K); \
  84. } while (0)
  85. struct bench_mem_info {
  86. const struct function *functions;
  87. u64 (*do_cycles)(const struct function *r, size_t size);
  88. double (*do_gettimeofday)(const struct function *r, size_t size);
  89. const char *const *usage;
  90. };
  91. static void __bench_mem_function(struct bench_mem_info *info, int r_idx, size_t size, double size_total)
  92. {
  93. const struct function *r = &info->functions[r_idx];
  94. double result_bps = 0.0;
  95. u64 result_cycles = 0;
  96. printf("# function '%s' (%s)\n", r->name, r->desc);
  97. if (bench_format == BENCH_FORMAT_DEFAULT)
  98. printf("# Copying %s bytes ...\n\n", size_str);
  99. if (use_cycles) {
  100. result_cycles = info->do_cycles(r, size);
  101. } else {
  102. result_bps = info->do_gettimeofday(r, size);
  103. }
  104. switch (bench_format) {
  105. case BENCH_FORMAT_DEFAULT:
  106. if (use_cycles) {
  107. printf(" %14lf cycles/byte\n", (double)result_cycles/size_total);
  108. } else {
  109. print_bps(result_bps);
  110. }
  111. break;
  112. case BENCH_FORMAT_SIMPLE:
  113. if (use_cycles) {
  114. printf("%lf\n", (double)result_cycles/size_total);
  115. } else {
  116. printf("%lf\n", result_bps);
  117. }
  118. break;
  119. default:
  120. BUG_ON(1);
  121. break;
  122. }
  123. }
  124. static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info)
  125. {
  126. int i;
  127. size_t size;
  128. double size_total;
  129. argc = parse_options(argc, argv, options, info->usage, 0);
  130. if (use_cycles) {
  131. i = init_cycles();
  132. if (i < 0) {
  133. fprintf(stderr, "Failed to open cycles counter\n");
  134. return i;
  135. }
  136. }
  137. size = (size_t)perf_atoll((char *)size_str);
  138. size_total = (double)size * nr_loops;
  139. if ((s64)size <= 0) {
  140. fprintf(stderr, "Invalid size:%s\n", size_str);
  141. return 1;
  142. }
  143. if (!strncmp(function_str, "all", 3)) {
  144. for (i = 0; info->functions[i].name; i++)
  145. __bench_mem_function(info, i, size, size_total);
  146. return 0;
  147. }
  148. for (i = 0; info->functions[i].name; i++) {
  149. if (!strcmp(info->functions[i].name, function_str))
  150. break;
  151. }
  152. if (!info->functions[i].name) {
  153. if (strcmp(function_str, "help") && strcmp(function_str, "h"))
  154. printf("Unknown function: %s\n", function_str);
  155. printf("Available functions:\n");
  156. for (i = 0; info->functions[i].name; i++) {
  157. printf("\t%s ... %s\n",
  158. info->functions[i].name, info->functions[i].desc);
  159. }
  160. return 1;
  161. }
  162. __bench_mem_function(info, i, size, size_total);
  163. return 0;
  164. }
  165. static void memcpy_alloc_mem(void **dst, void **src, size_t size)
  166. {
  167. *dst = zalloc(size);
  168. if (!*dst)
  169. die("memory allocation failed - maybe size is too large?\n");
  170. *src = zalloc(size);
  171. if (!*src)
  172. die("memory allocation failed - maybe size is too large?\n");
  173. /* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */
  174. memset(*src, 0, size);
  175. }
  176. static u64 do_memcpy_cycles(const struct function *r, size_t size)
  177. {
  178. u64 cycle_start = 0ULL, cycle_end = 0ULL;
  179. void *src = NULL, *dst = NULL;
  180. memcpy_t fn = r->fn.memcpy;
  181. int i;
  182. memcpy_alloc_mem(&dst, &src, size);
  183. /*
  184. * We prefault the freshly allocated memory range here,
  185. * to not measure page fault overhead:
  186. */
  187. fn(dst, src, size);
  188. cycle_start = get_cycles();
  189. for (i = 0; i < nr_loops; ++i)
  190. fn(dst, src, size);
  191. cycle_end = get_cycles();
  192. free(src);
  193. free(dst);
  194. return cycle_end - cycle_start;
  195. }
  196. static double do_memcpy_gettimeofday(const struct function *r, size_t size)
  197. {
  198. struct timeval tv_start, tv_end, tv_diff;
  199. memcpy_t fn = r->fn.memcpy;
  200. void *src = NULL, *dst = NULL;
  201. int i;
  202. memcpy_alloc_mem(&dst, &src, size);
  203. /*
  204. * We prefault the freshly allocated memory range here,
  205. * to not measure page fault overhead:
  206. */
  207. fn(dst, src, size);
  208. BUG_ON(gettimeofday(&tv_start, NULL));
  209. for (i = 0; i < nr_loops; ++i)
  210. fn(dst, src, size);
  211. BUG_ON(gettimeofday(&tv_end, NULL));
  212. timersub(&tv_end, &tv_start, &tv_diff);
  213. free(src);
  214. free(dst);
  215. return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
  216. }
  217. struct function memcpy_functions[] = {
  218. { .name = "default",
  219. .desc = "Default memcpy() provided by glibc",
  220. .fn.memcpy = memcpy },
  221. #ifdef HAVE_ARCH_X86_64_SUPPORT
  222. # define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn},
  223. # include "mem-memcpy-x86-64-asm-def.h"
  224. # undef MEMCPY_FN
  225. #endif
  226. { .name = NULL, }
  227. };
  228. static const char * const bench_mem_memcpy_usage[] = {
  229. "perf bench mem memcpy <options>",
  230. NULL
  231. };
  232. int bench_mem_memcpy(int argc, const char **argv, const char *prefix __maybe_unused)
  233. {
  234. struct bench_mem_info info = {
  235. .functions = memcpy_functions,
  236. .do_cycles = do_memcpy_cycles,
  237. .do_gettimeofday = do_memcpy_gettimeofday,
  238. .usage = bench_mem_memcpy_usage,
  239. };
  240. return bench_mem_common(argc, argv, &info);
  241. }
  242. static void memset_alloc_mem(void **dst, size_t size)
  243. {
  244. *dst = zalloc(size);
  245. if (!*dst)
  246. die("memory allocation failed - maybe size is too large?\n");
  247. }
  248. static u64 do_memset_cycles(const struct function *r, size_t size)
  249. {
  250. u64 cycle_start = 0ULL, cycle_end = 0ULL;
  251. memset_t fn = r->fn.memset;
  252. void *dst = NULL;
  253. int i;
  254. memset_alloc_mem(&dst, size);
  255. /*
  256. * We prefault the freshly allocated memory range here,
  257. * to not measure page fault overhead:
  258. */
  259. fn(dst, -1, size);
  260. cycle_start = get_cycles();
  261. for (i = 0; i < nr_loops; ++i)
  262. fn(dst, i, size);
  263. cycle_end = get_cycles();
  264. free(dst);
  265. return cycle_end - cycle_start;
  266. }
  267. static double do_memset_gettimeofday(const struct function *r, size_t size)
  268. {
  269. struct timeval tv_start, tv_end, tv_diff;
  270. memset_t fn = r->fn.memset;
  271. void *dst = NULL;
  272. int i;
  273. memset_alloc_mem(&dst, size);
  274. /*
  275. * We prefault the freshly allocated memory range here,
  276. * to not measure page fault overhead:
  277. */
  278. fn(dst, -1, size);
  279. BUG_ON(gettimeofday(&tv_start, NULL));
  280. for (i = 0; i < nr_loops; ++i)
  281. fn(dst, i, size);
  282. BUG_ON(gettimeofday(&tv_end, NULL));
  283. timersub(&tv_end, &tv_start, &tv_diff);
  284. free(dst);
  285. return (double)(((double)size * nr_loops) / timeval2double(&tv_diff));
  286. }
  287. static const char * const bench_mem_memset_usage[] = {
  288. "perf bench mem memset <options>",
  289. NULL
  290. };
  291. static const struct function memset_functions[] = {
  292. { .name = "default",
  293. .desc = "Default memset() provided by glibc",
  294. .fn.memset = memset },
  295. #ifdef HAVE_ARCH_X86_64_SUPPORT
  296. # define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn },
  297. # include "mem-memset-x86-64-asm-def.h"
  298. # undef MEMSET_FN
  299. #endif
  300. { .name = NULL, }
  301. };
  302. int bench_mem_memset(int argc, const char **argv, const char *prefix __maybe_unused)
  303. {
  304. struct bench_mem_info info = {
  305. .functions = memset_functions,
  306. .do_cycles = do_memset_cycles,
  307. .do_gettimeofday = do_memset_gettimeofday,
  308. .usage = bench_mem_memset_usage,
  309. };
  310. return bench_mem_common(argc, argv, &info);
  311. }