init.c 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531
  1. /*
  2. * S390 Version
  3. * Copyright IBM Corp. 2002, 2011
  4. * Author(s): Thomas Spatzier (tspat@de.ibm.com)
  5. * Author(s): Mahesh Salgaonkar (mahesh@linux.vnet.ibm.com)
  6. * Author(s): Heinz Graalfs (graalfs@linux.vnet.ibm.com)
  7. * Author(s): Andreas Krebbel (krebbel@linux.vnet.ibm.com)
  8. *
  9. * @remark Copyright 2002-2011 OProfile authors
  10. */
  11. #include <linux/oprofile.h>
  12. #include <linux/perf_event.h>
  13. #include <linux/init.h>
  14. #include <linux/errno.h>
  15. #include <linux/fs.h>
  16. #include <linux/module.h>
  17. #include <asm/processor.h>
  18. #include <asm/perf_event.h>
  19. #include "../../../drivers/oprofile/oprof.h"
  20. #include "hwsampler.h"
  21. #include "op_counter.h"
  22. #define DEFAULT_INTERVAL 4127518
  23. #define DEFAULT_SDBT_BLOCKS 1
  24. #define DEFAULT_SDB_BLOCKS 511
  25. static unsigned long oprofile_hw_interval = DEFAULT_INTERVAL;
  26. static unsigned long oprofile_min_interval;
  27. static unsigned long oprofile_max_interval;
  28. static unsigned long oprofile_sdbt_blocks = DEFAULT_SDBT_BLOCKS;
  29. static unsigned long oprofile_sdb_blocks = DEFAULT_SDB_BLOCKS;
  30. static int hwsampler_enabled;
  31. static int hwsampler_running; /* start_mutex must be held to change */
  32. static int hwsampler_available;
  33. static struct oprofile_operations timer_ops;
  34. struct op_counter_config counter_config;
  35. enum __force_cpu_type {
  36. reserved = 0, /* do not force */
  37. timer,
  38. };
  39. static int force_cpu_type;
  40. static int set_cpu_type(const char *str, struct kernel_param *kp)
  41. {
  42. if (!strcmp(str, "timer")) {
  43. force_cpu_type = timer;
  44. printk(KERN_INFO "oprofile: forcing timer to be returned "
  45. "as cpu type\n");
  46. } else {
  47. force_cpu_type = 0;
  48. }
  49. return 0;
  50. }
  51. module_param_call(cpu_type, set_cpu_type, NULL, NULL, 0);
  52. MODULE_PARM_DESC(cpu_type, "Force legacy basic mode sampling"
  53. "(report cpu_type \"timer\"");
  54. static int __oprofile_hwsampler_start(void)
  55. {
  56. int retval;
  57. retval = hwsampler_allocate(oprofile_sdbt_blocks, oprofile_sdb_blocks);
  58. if (retval)
  59. return retval;
  60. retval = hwsampler_start_all(oprofile_hw_interval);
  61. if (retval)
  62. hwsampler_deallocate();
  63. return retval;
  64. }
  65. static int oprofile_hwsampler_start(void)
  66. {
  67. int retval;
  68. hwsampler_running = hwsampler_enabled;
  69. if (!hwsampler_running)
  70. return timer_ops.start();
  71. retval = perf_reserve_sampling();
  72. if (retval)
  73. return retval;
  74. retval = __oprofile_hwsampler_start();
  75. if (retval)
  76. perf_release_sampling();
  77. return retval;
  78. }
  79. static void oprofile_hwsampler_stop(void)
  80. {
  81. if (!hwsampler_running) {
  82. timer_ops.stop();
  83. return;
  84. }
  85. hwsampler_stop_all();
  86. hwsampler_deallocate();
  87. perf_release_sampling();
  88. return;
  89. }
  90. /*
  91. * File ops used for:
  92. * /dev/oprofile/0/enabled
  93. * /dev/oprofile/hwsampling/hwsampler (cpu_type = timer)
  94. */
  95. static ssize_t hwsampler_read(struct file *file, char __user *buf,
  96. size_t count, loff_t *offset)
  97. {
  98. return oprofilefs_ulong_to_user(hwsampler_enabled, buf, count, offset);
  99. }
  100. static ssize_t hwsampler_write(struct file *file, char const __user *buf,
  101. size_t count, loff_t *offset)
  102. {
  103. unsigned long val;
  104. int retval;
  105. if (*offset)
  106. return -EINVAL;
  107. retval = oprofilefs_ulong_from_user(&val, buf, count);
  108. if (retval <= 0)
  109. return retval;
  110. if (val != 0 && val != 1)
  111. return -EINVAL;
  112. if (oprofile_started)
  113. /*
  114. * save to do without locking as we set
  115. * hwsampler_running in start() when start_mutex is
  116. * held
  117. */
  118. return -EBUSY;
  119. hwsampler_enabled = val;
  120. return count;
  121. }
  122. static const struct file_operations hwsampler_fops = {
  123. .read = hwsampler_read,
  124. .write = hwsampler_write,
  125. };
  126. /*
  127. * File ops used for:
  128. * /dev/oprofile/0/count
  129. * /dev/oprofile/hwsampling/hw_interval (cpu_type = timer)
  130. *
  131. * Make sure that the value is within the hardware range.
  132. */
  133. static ssize_t hw_interval_read(struct file *file, char __user *buf,
  134. size_t count, loff_t *offset)
  135. {
  136. return oprofilefs_ulong_to_user(oprofile_hw_interval, buf,
  137. count, offset);
  138. }
  139. static ssize_t hw_interval_write(struct file *file, char const __user *buf,
  140. size_t count, loff_t *offset)
  141. {
  142. unsigned long val;
  143. int retval;
  144. if (*offset)
  145. return -EINVAL;
  146. retval = oprofilefs_ulong_from_user(&val, buf, count);
  147. if (retval <= 0)
  148. return retval;
  149. if (val < oprofile_min_interval)
  150. oprofile_hw_interval = oprofile_min_interval;
  151. else if (val > oprofile_max_interval)
  152. oprofile_hw_interval = oprofile_max_interval;
  153. else
  154. oprofile_hw_interval = val;
  155. return count;
  156. }
  157. static const struct file_operations hw_interval_fops = {
  158. .read = hw_interval_read,
  159. .write = hw_interval_write,
  160. };
  161. /*
  162. * File ops used for:
  163. * /dev/oprofile/0/event
  164. * Only a single event with number 0 is supported with this counter.
  165. *
  166. * /dev/oprofile/0/unit_mask
  167. * This is a dummy file needed by the user space tools.
  168. * No value other than 0 is accepted or returned.
  169. */
  170. static ssize_t hwsampler_zero_read(struct file *file, char __user *buf,
  171. size_t count, loff_t *offset)
  172. {
  173. return oprofilefs_ulong_to_user(0, buf, count, offset);
  174. }
  175. static ssize_t hwsampler_zero_write(struct file *file, char const __user *buf,
  176. size_t count, loff_t *offset)
  177. {
  178. unsigned long val;
  179. int retval;
  180. if (*offset)
  181. return -EINVAL;
  182. retval = oprofilefs_ulong_from_user(&val, buf, count);
  183. if (retval <= 0)
  184. return retval;
  185. if (val != 0)
  186. return -EINVAL;
  187. return count;
  188. }
  189. static const struct file_operations zero_fops = {
  190. .read = hwsampler_zero_read,
  191. .write = hwsampler_zero_write,
  192. };
  193. /* /dev/oprofile/0/kernel file ops. */
  194. static ssize_t hwsampler_kernel_read(struct file *file, char __user *buf,
  195. size_t count, loff_t *offset)
  196. {
  197. return oprofilefs_ulong_to_user(counter_config.kernel,
  198. buf, count, offset);
  199. }
  200. static ssize_t hwsampler_kernel_write(struct file *file, char const __user *buf,
  201. size_t count, loff_t *offset)
  202. {
  203. unsigned long val;
  204. int retval;
  205. if (*offset)
  206. return -EINVAL;
  207. retval = oprofilefs_ulong_from_user(&val, buf, count);
  208. if (retval <= 0)
  209. return retval;
  210. if (val != 0 && val != 1)
  211. return -EINVAL;
  212. counter_config.kernel = val;
  213. return count;
  214. }
  215. static const struct file_operations kernel_fops = {
  216. .read = hwsampler_kernel_read,
  217. .write = hwsampler_kernel_write,
  218. };
  219. /* /dev/oprofile/0/user file ops. */
  220. static ssize_t hwsampler_user_read(struct file *file, char __user *buf,
  221. size_t count, loff_t *offset)
  222. {
  223. return oprofilefs_ulong_to_user(counter_config.user,
  224. buf, count, offset);
  225. }
  226. static ssize_t hwsampler_user_write(struct file *file, char const __user *buf,
  227. size_t count, loff_t *offset)
  228. {
  229. unsigned long val;
  230. int retval;
  231. if (*offset)
  232. return -EINVAL;
  233. retval = oprofilefs_ulong_from_user(&val, buf, count);
  234. if (retval <= 0)
  235. return retval;
  236. if (val != 0 && val != 1)
  237. return -EINVAL;
  238. counter_config.user = val;
  239. return count;
  240. }
  241. static const struct file_operations user_fops = {
  242. .read = hwsampler_user_read,
  243. .write = hwsampler_user_write,
  244. };
  245. /*
  246. * File ops used for: /dev/oprofile/timer/enabled
  247. * The value always has to be the inverted value of hwsampler_enabled. So
  248. * no separate variable is created. That way we do not need locking.
  249. */
  250. static ssize_t timer_enabled_read(struct file *file, char __user *buf,
  251. size_t count, loff_t *offset)
  252. {
  253. return oprofilefs_ulong_to_user(!hwsampler_enabled, buf, count, offset);
  254. }
  255. static ssize_t timer_enabled_write(struct file *file, char const __user *buf,
  256. size_t count, loff_t *offset)
  257. {
  258. unsigned long val;
  259. int retval;
  260. if (*offset)
  261. return -EINVAL;
  262. retval = oprofilefs_ulong_from_user(&val, buf, count);
  263. if (retval <= 0)
  264. return retval;
  265. if (val != 0 && val != 1)
  266. return -EINVAL;
  267. /* Timer cannot be disabled without having hardware sampling. */
  268. if (val == 0 && !hwsampler_available)
  269. return -EINVAL;
  270. if (oprofile_started)
  271. /*
  272. * save to do without locking as we set
  273. * hwsampler_running in start() when start_mutex is
  274. * held
  275. */
  276. return -EBUSY;
  277. hwsampler_enabled = !val;
  278. return count;
  279. }
  280. static const struct file_operations timer_enabled_fops = {
  281. .read = timer_enabled_read,
  282. .write = timer_enabled_write,
  283. };
  284. static int oprofile_create_hwsampling_files(struct dentry *root)
  285. {
  286. struct dentry *dir;
  287. dir = oprofilefs_mkdir(root, "timer");
  288. if (!dir)
  289. return -EINVAL;
  290. oprofilefs_create_file(dir, "enabled", &timer_enabled_fops);
  291. if (!hwsampler_available)
  292. return 0;
  293. /* reinitialize default values */
  294. hwsampler_enabled = 1;
  295. counter_config.kernel = 1;
  296. counter_config.user = 1;
  297. if (!force_cpu_type) {
  298. /*
  299. * Create the counter file system. A single virtual
  300. * counter is created which can be used to
  301. * enable/disable hardware sampling dynamically from
  302. * user space. The user space will configure a single
  303. * counter with a single event. The value of 'event'
  304. * and 'unit_mask' are not evaluated by the kernel code
  305. * and can only be set to 0.
  306. */
  307. dir = oprofilefs_mkdir(root, "0");
  308. if (!dir)
  309. return -EINVAL;
  310. oprofilefs_create_file(dir, "enabled", &hwsampler_fops);
  311. oprofilefs_create_file(dir, "event", &zero_fops);
  312. oprofilefs_create_file(dir, "count", &hw_interval_fops);
  313. oprofilefs_create_file(dir, "unit_mask", &zero_fops);
  314. oprofilefs_create_file(dir, "kernel", &kernel_fops);
  315. oprofilefs_create_file(dir, "user", &user_fops);
  316. oprofilefs_create_ulong(dir, "hw_sdbt_blocks",
  317. &oprofile_sdbt_blocks);
  318. } else {
  319. /*
  320. * Hardware sampling can be used but the cpu_type is
  321. * forced to timer in order to deal with legacy user
  322. * space tools. The /dev/oprofile/hwsampling fs is
  323. * provided in that case.
  324. */
  325. dir = oprofilefs_mkdir(root, "hwsampling");
  326. if (!dir)
  327. return -EINVAL;
  328. oprofilefs_create_file(dir, "hwsampler",
  329. &hwsampler_fops);
  330. oprofilefs_create_file(dir, "hw_interval",
  331. &hw_interval_fops);
  332. oprofilefs_create_ro_ulong(dir, "hw_min_interval",
  333. &oprofile_min_interval);
  334. oprofilefs_create_ro_ulong(dir, "hw_max_interval",
  335. &oprofile_max_interval);
  336. oprofilefs_create_ulong(dir, "hw_sdbt_blocks",
  337. &oprofile_sdbt_blocks);
  338. }
  339. return 0;
  340. }
  341. static int oprofile_hwsampler_init(struct oprofile_operations *ops)
  342. {
  343. /*
  344. * Initialize the timer mode infrastructure as well in order
  345. * to be able to switch back dynamically. oprofile_timer_init
  346. * is not supposed to fail.
  347. */
  348. if (oprofile_timer_init(ops))
  349. BUG();
  350. memcpy(&timer_ops, ops, sizeof(timer_ops));
  351. ops->create_files = oprofile_create_hwsampling_files;
  352. /*
  353. * If the user space tools do not support newer cpu types,
  354. * the force_cpu_type module parameter
  355. * can be used to always return \"timer\" as cpu type.
  356. */
  357. if (force_cpu_type != timer) {
  358. struct cpuid id;
  359. get_cpu_id (&id);
  360. switch (id.machine) {
  361. case 0x2097: case 0x2098: ops->cpu_type = "s390/z10"; break;
  362. case 0x2817: case 0x2818: ops->cpu_type = "s390/z196"; break;
  363. case 0x2827: case 0x2828: ops->cpu_type = "s390/zEC12"; break;
  364. case 0x2964: case 0x2965: ops->cpu_type = "s390/z13"; break;
  365. default: return -ENODEV;
  366. }
  367. }
  368. if (hwsampler_setup())
  369. return -ENODEV;
  370. /*
  371. * Query the range for the sampling interval from the
  372. * hardware.
  373. */
  374. oprofile_min_interval = hwsampler_query_min_interval();
  375. if (oprofile_min_interval == 0)
  376. return -ENODEV;
  377. oprofile_max_interval = hwsampler_query_max_interval();
  378. if (oprofile_max_interval == 0)
  379. return -ENODEV;
  380. /* The initial value should be sane */
  381. if (oprofile_hw_interval < oprofile_min_interval)
  382. oprofile_hw_interval = oprofile_min_interval;
  383. if (oprofile_hw_interval > oprofile_max_interval)
  384. oprofile_hw_interval = oprofile_max_interval;
  385. printk(KERN_INFO "oprofile: System z hardware sampling "
  386. "facility found.\n");
  387. ops->start = oprofile_hwsampler_start;
  388. ops->stop = oprofile_hwsampler_stop;
  389. return 0;
  390. }
  391. static void oprofile_hwsampler_exit(void)
  392. {
  393. hwsampler_shutdown();
  394. }
  395. static int __s390_backtrace(void *data, unsigned long address)
  396. {
  397. unsigned int *depth = data;
  398. if (*depth == 0)
  399. return 1;
  400. (*depth)--;
  401. oprofile_add_trace(address);
  402. return 0;
  403. }
  404. static void s390_backtrace(struct pt_regs *regs, unsigned int depth)
  405. {
  406. if (user_mode(regs))
  407. return;
  408. dump_trace(__s390_backtrace, &depth, NULL, regs->gprs[15]);
  409. }
  410. int __init oprofile_arch_init(struct oprofile_operations *ops)
  411. {
  412. ops->backtrace = s390_backtrace;
  413. /*
  414. * -ENODEV is not reported to the caller. The module itself
  415. * will use the timer mode sampling as fallback and this is
  416. * always available.
  417. */
  418. hwsampler_available = oprofile_hwsampler_init(ops) == 0;
  419. return 0;
  420. }
  421. void oprofile_arch_exit(void)
  422. {
  423. oprofile_hwsampler_exit();
  424. }