mem-functions.c 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374
  1. /*
  2. * mem-memcpy.c
  3. *
  4. * Simple memcpy() and memset() benchmarks
  5. *
  6. * Written by Hitoshi Mitake <mitake@dcl.info.waseda.ac.jp>
  7. */
  8. #include "../perf.h"
  9. #include "../util/util.h"
  10. #include "../util/parse-options.h"
  11. #include "../util/header.h"
  12. #include "../util/cloexec.h"
  13. #include "bench.h"
  14. #include "mem-memcpy-arch.h"
  15. #include "mem-memset-arch.h"
  16. #include <stdio.h>
  17. #include <stdlib.h>
  18. #include <string.h>
  19. #include <sys/time.h>
  20. #include <errno.h>
  21. #define K 1024
  22. static const char *size_str = "1MB";
  23. static const char *routine_str = "all";
  24. static int iterations = 1;
  25. static bool use_cycles;
  26. static int cycles_fd;
  27. static const struct option options[] = {
  28. OPT_STRING('l', "size", &size_str, "1MB",
  29. "Specify the size of the memory buffers. "
  30. "Available units: B, KB, MB, GB and TB (upper and lower)"),
  31. OPT_STRING('r', "routine", &routine_str, "all",
  32. "Specify the routine to run, \"all\" runs all available routines"),
  33. OPT_INTEGER('i', "iterations", &iterations,
  34. "repeat memcpy() invocation this number of times"),
  35. OPT_BOOLEAN('c', "cycles", &use_cycles,
  36. "Use a cycles event instead of gettimeofday() to measure performance"),
  37. OPT_END()
  38. };
  39. typedef void *(*memcpy_t)(void *, const void *, size_t);
  40. typedef void *(*memset_t)(void *, int, size_t);
  41. struct routine {
  42. const char *name;
  43. const char *desc;
  44. union {
  45. memcpy_t memcpy;
  46. memset_t memset;
  47. } fn;
  48. };
  49. struct routine memcpy_routines[] = {
  50. { .name = "default",
  51. .desc = "Default memcpy() provided by glibc",
  52. .fn.memcpy = memcpy },
  53. #ifdef HAVE_ARCH_X86_64_SUPPORT
  54. # define MEMCPY_FN(_fn, _name, _desc) {.name = _name, .desc = _desc, .fn.memcpy = _fn},
  55. # include "mem-memcpy-x86-64-asm-def.h"
  56. # undef MEMCPY_FN
  57. #endif
  58. { NULL, }
  59. };
  60. static const char * const bench_mem_memcpy_usage[] = {
  61. "perf bench mem memcpy <options>",
  62. NULL
  63. };
  64. static struct perf_event_attr cycle_attr = {
  65. .type = PERF_TYPE_HARDWARE,
  66. .config = PERF_COUNT_HW_CPU_CYCLES
  67. };
  68. static void init_cycles(void)
  69. {
  70. cycles_fd = sys_perf_event_open(&cycle_attr, getpid(), -1, -1, perf_event_open_cloexec_flag());
  71. if (cycles_fd < 0 && errno == ENOSYS)
  72. die("No CONFIG_PERF_EVENTS=y kernel support configured?\n");
  73. else
  74. BUG_ON(cycles_fd < 0);
  75. }
  76. static u64 get_cycles(void)
  77. {
  78. int ret;
  79. u64 clk;
  80. ret = read(cycles_fd, &clk, sizeof(u64));
  81. BUG_ON(ret != sizeof(u64));
  82. return clk;
  83. }
  84. static double timeval2double(struct timeval *ts)
  85. {
  86. return (double)ts->tv_sec + (double)ts->tv_usec / (double)1000000;
  87. }
  88. #define print_bps(x) do { \
  89. if (x < K) \
  90. printf(" %14lf B/Sec\n", x); \
  91. else if (x < K * K) \
  92. printf(" %14lfd KB/Sec\n", x / K); \
  93. else if (x < K * K * K) \
  94. printf(" %14lf MB/Sec\n", x / K / K); \
  95. else \
  96. printf(" %14lf GB/Sec\n", x / K / K / K); \
  97. } while (0)
  98. struct bench_mem_info {
  99. const struct routine *routines;
  100. u64 (*do_cycles)(const struct routine *r, size_t size);
  101. double (*do_gettimeofday)(const struct routine *r, size_t size);
  102. const char *const *usage;
  103. };
  104. static void __bench_mem_routine(struct bench_mem_info *info, int r_idx, size_t size, double size_total)
  105. {
  106. const struct routine *r = &info->routines[r_idx];
  107. double result_bps = 0.0;
  108. u64 result_cycles = 0;
  109. printf("Routine %s (%s)\n", r->name, r->desc);
  110. if (bench_format == BENCH_FORMAT_DEFAULT)
  111. printf("# Copying %s Bytes ...\n\n", size_str);
  112. if (use_cycles) {
  113. result_cycles = info->do_cycles(r, size);
  114. } else {
  115. result_bps = info->do_gettimeofday(r, size);
  116. }
  117. switch (bench_format) {
  118. case BENCH_FORMAT_DEFAULT:
  119. if (use_cycles) {
  120. printf(" %14lf cycles/Byte\n", (double)result_cycles/size_total);
  121. } else {
  122. print_bps(result_bps);
  123. }
  124. break;
  125. case BENCH_FORMAT_SIMPLE:
  126. if (use_cycles) {
  127. printf("%lf\n", (double)result_cycles/size_total);
  128. } else {
  129. printf("%lf\n", result_bps);
  130. }
  131. break;
  132. default:
  133. BUG_ON(1);
  134. break;
  135. }
  136. }
  137. static int bench_mem_common(int argc, const char **argv, struct bench_mem_info *info)
  138. {
  139. int i;
  140. size_t size;
  141. double size_total;
  142. argc = parse_options(argc, argv, options, info->usage, 0);
  143. if (use_cycles)
  144. init_cycles();
  145. size = (size_t)perf_atoll((char *)size_str);
  146. size_total = (double)size * iterations;
  147. if ((s64)size <= 0) {
  148. fprintf(stderr, "Invalid size:%s\n", size_str);
  149. return 1;
  150. }
  151. if (!strncmp(routine_str, "all", 3)) {
  152. for (i = 0; info->routines[i].name; i++)
  153. __bench_mem_routine(info, i, size, size_total);
  154. return 0;
  155. }
  156. for (i = 0; info->routines[i].name; i++) {
  157. if (!strcmp(info->routines[i].name, routine_str))
  158. break;
  159. }
  160. if (!info->routines[i].name) {
  161. printf("Unknown routine: %s\n", routine_str);
  162. printf("Available routines...\n");
  163. for (i = 0; info->routines[i].name; i++) {
  164. printf("\t%s ... %s\n",
  165. info->routines[i].name, info->routines[i].desc);
  166. }
  167. return 1;
  168. }
  169. __bench_mem_routine(info, i, size, size_total);
  170. return 0;
  171. }
  172. static void memcpy_alloc_mem(void **dst, void **src, size_t size)
  173. {
  174. *dst = zalloc(size);
  175. if (!*dst)
  176. die("memory allocation failed - maybe size is too large?\n");
  177. *src = zalloc(size);
  178. if (!*src)
  179. die("memory allocation failed - maybe size is too large?\n");
  180. /* Make sure to always prefault zero pages even if MMAP_THRESH is crossed: */
  181. memset(*src, 0, size);
  182. }
  183. static u64 do_memcpy_cycles(const struct routine *r, size_t size)
  184. {
  185. u64 cycle_start = 0ULL, cycle_end = 0ULL;
  186. void *src = NULL, *dst = NULL;
  187. memcpy_t fn = r->fn.memcpy;
  188. int i;
  189. memcpy_alloc_mem(&dst, &src, size);
  190. /*
  191. * We prefault the freshly allocated memory range here,
  192. * to not measure page fault overhead:
  193. */
  194. fn(dst, src, size);
  195. cycle_start = get_cycles();
  196. for (i = 0; i < iterations; ++i)
  197. fn(dst, src, size);
  198. cycle_end = get_cycles();
  199. free(src);
  200. free(dst);
  201. return cycle_end - cycle_start;
  202. }
  203. static double do_memcpy_gettimeofday(const struct routine *r, size_t size)
  204. {
  205. struct timeval tv_start, tv_end, tv_diff;
  206. memcpy_t fn = r->fn.memcpy;
  207. void *src = NULL, *dst = NULL;
  208. int i;
  209. memcpy_alloc_mem(&dst, &src, size);
  210. /*
  211. * We prefault the freshly allocated memory range here,
  212. * to not measure page fault overhead:
  213. */
  214. fn(dst, src, size);
  215. BUG_ON(gettimeofday(&tv_start, NULL));
  216. for (i = 0; i < iterations; ++i)
  217. fn(dst, src, size);
  218. BUG_ON(gettimeofday(&tv_end, NULL));
  219. timersub(&tv_end, &tv_start, &tv_diff);
  220. free(src);
  221. free(dst);
  222. return (double)(((double)size * iterations) / timeval2double(&tv_diff));
  223. }
  224. int bench_mem_memcpy(int argc, const char **argv, const char *prefix __maybe_unused)
  225. {
  226. struct bench_mem_info info = {
  227. .routines = memcpy_routines,
  228. .do_cycles = do_memcpy_cycles,
  229. .do_gettimeofday = do_memcpy_gettimeofday,
  230. .usage = bench_mem_memcpy_usage,
  231. };
  232. return bench_mem_common(argc, argv, &info);
  233. }
  234. static void memset_alloc_mem(void **dst, size_t size)
  235. {
  236. *dst = zalloc(size);
  237. if (!*dst)
  238. die("memory allocation failed - maybe size is too large?\n");
  239. }
  240. static u64 do_memset_cycles(const struct routine *r, size_t size)
  241. {
  242. u64 cycle_start = 0ULL, cycle_end = 0ULL;
  243. memset_t fn = r->fn.memset;
  244. void *dst = NULL;
  245. int i;
  246. memset_alloc_mem(&dst, size);
  247. /*
  248. * We prefault the freshly allocated memory range here,
  249. * to not measure page fault overhead:
  250. */
  251. fn(dst, -1, size);
  252. cycle_start = get_cycles();
  253. for (i = 0; i < iterations; ++i)
  254. fn(dst, i, size);
  255. cycle_end = get_cycles();
  256. free(dst);
  257. return cycle_end - cycle_start;
  258. }
  259. static double do_memset_gettimeofday(const struct routine *r, size_t size)
  260. {
  261. struct timeval tv_start, tv_end, tv_diff;
  262. memset_t fn = r->fn.memset;
  263. void *dst = NULL;
  264. int i;
  265. memset_alloc_mem(&dst, size);
  266. /*
  267. * We prefault the freshly allocated memory range here,
  268. * to not measure page fault overhead:
  269. */
  270. fn(dst, -1, size);
  271. BUG_ON(gettimeofday(&tv_start, NULL));
  272. for (i = 0; i < iterations; ++i)
  273. fn(dst, i, size);
  274. BUG_ON(gettimeofday(&tv_end, NULL));
  275. timersub(&tv_end, &tv_start, &tv_diff);
  276. free(dst);
  277. return (double)(((double)size * iterations) / timeval2double(&tv_diff));
  278. }
  279. static const char * const bench_mem_memset_usage[] = {
  280. "perf bench mem memset <options>",
  281. NULL
  282. };
  283. static const struct routine memset_routines[] = {
  284. { .name = "default",
  285. .desc = "Default memset() provided by glibc",
  286. .fn.memset = memset },
  287. #ifdef HAVE_ARCH_X86_64_SUPPORT
  288. # define MEMSET_FN(_fn, _name, _desc) { .name = _name, .desc = _desc, .fn.memset = _fn },
  289. # include "mem-memset-x86-64-asm-def.h"
  290. # undef MEMSET_FN
  291. #endif
  292. { NULL, }
  293. };
  294. int bench_mem_memset(int argc, const char **argv, const char *prefix __maybe_unused)
  295. {
  296. struct bench_mem_info info = {
  297. .routines = memset_routines,
  298. .do_cycles = do_memset_cycles,
  299. .do_gettimeofday = do_memset_gettimeofday,
  300. .usage = bench_mem_memset_usage,
  301. };
  302. return bench_mem_common(argc, argv, &info);
  303. }