fault.c 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375
  1. /*
  2. * Copyright(c) 2018 Intel Corporation.
  3. *
  4. * This file is provided under a dual BSD/GPLv2 license. When using or
  5. * redistributing this file, you may do so under either license.
  6. *
  7. * GPL LICENSE SUMMARY
  8. *
  9. * This program is free software; you can redistribute it and/or modify
  10. * it under the terms of version 2 of the GNU General Public License as
  11. * published by the Free Software Foundation.
  12. *
  13. * This program is distributed in the hope that it will be useful, but
  14. * WITHOUT ANY WARRANTY; without even the implied warranty of
  15. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  16. * General Public License for more details.
  17. *
  18. * BSD LICENSE
  19. *
  20. * Redistribution and use in source and binary forms, with or without
  21. * modification, are permitted provided that the following conditions
  22. * are met:
  23. *
  24. * - Redistributions of source code must retain the above copyright
  25. * notice, this list of conditions and the following disclaimer.
  26. * - Redistributions in binary form must reproduce the above copyright
  27. * notice, this list of conditions and the following disclaimer in
  28. * the documentation and/or other materials provided with the
  29. * distribution.
  30. * - Neither the name of Intel Corporation nor the names of its
  31. * contributors may be used to endorse or promote products derived
  32. * from this software without specific prior written permission.
  33. *
  34. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  35. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  36. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  37. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  38. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  39. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  40. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  41. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  42. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  43. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  44. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  45. *
  46. */
  47. #include <linux/debugfs.h>
  48. #include <linux/seq_file.h>
  49. #include <linux/kernel.h>
  50. #include <linux/module.h>
  51. #include <linux/types.h>
  52. #include <linux/bitmap.h>
  53. #include "debugfs.h"
  54. #include "fault.h"
  55. #include "trace.h"
  56. #define HFI1_FAULT_DIR_TX BIT(0)
  57. #define HFI1_FAULT_DIR_RX BIT(1)
  58. #define HFI1_FAULT_DIR_TXRX (HFI1_FAULT_DIR_TX | HFI1_FAULT_DIR_RX)
  59. static void *_fault_stats_seq_start(struct seq_file *s, loff_t *pos)
  60. {
  61. struct hfi1_opcode_stats_perctx *opstats;
  62. if (*pos >= ARRAY_SIZE(opstats->stats))
  63. return NULL;
  64. return pos;
  65. }
  66. static void *_fault_stats_seq_next(struct seq_file *s, void *v, loff_t *pos)
  67. {
  68. struct hfi1_opcode_stats_perctx *opstats;
  69. ++*pos;
  70. if (*pos >= ARRAY_SIZE(opstats->stats))
  71. return NULL;
  72. return pos;
  73. }
  74. static void _fault_stats_seq_stop(struct seq_file *s, void *v)
  75. {
  76. }
  77. static int _fault_stats_seq_show(struct seq_file *s, void *v)
  78. {
  79. loff_t *spos = v;
  80. loff_t i = *spos, j;
  81. u64 n_packets = 0, n_bytes = 0;
  82. struct hfi1_ibdev *ibd = (struct hfi1_ibdev *)s->private;
  83. struct hfi1_devdata *dd = dd_from_dev(ibd);
  84. struct hfi1_ctxtdata *rcd;
  85. for (j = 0; j < dd->first_dyn_alloc_ctxt; j++) {
  86. rcd = hfi1_rcd_get_by_index(dd, j);
  87. if (rcd) {
  88. n_packets += rcd->opstats->stats[i].n_packets;
  89. n_bytes += rcd->opstats->stats[i].n_bytes;
  90. }
  91. hfi1_rcd_put(rcd);
  92. }
  93. for_each_possible_cpu(j) {
  94. struct hfi1_opcode_stats_perctx *sp =
  95. per_cpu_ptr(dd->tx_opstats, j);
  96. n_packets += sp->stats[i].n_packets;
  97. n_bytes += sp->stats[i].n_bytes;
  98. }
  99. if (!n_packets && !n_bytes)
  100. return SEQ_SKIP;
  101. if (!ibd->fault->n_rxfaults[i] && !ibd->fault->n_txfaults[i])
  102. return SEQ_SKIP;
  103. seq_printf(s, "%02llx %llu/%llu (faults rx:%llu faults: tx:%llu)\n", i,
  104. (unsigned long long)n_packets,
  105. (unsigned long long)n_bytes,
  106. (unsigned long long)ibd->fault->n_rxfaults[i],
  107. (unsigned long long)ibd->fault->n_txfaults[i]);
  108. return 0;
  109. }
  110. DEBUGFS_SEQ_FILE_OPS(fault_stats);
  111. DEBUGFS_SEQ_FILE_OPEN(fault_stats);
  112. DEBUGFS_FILE_OPS(fault_stats);
  113. static int fault_opcodes_open(struct inode *inode, struct file *file)
  114. {
  115. file->private_data = inode->i_private;
  116. return nonseekable_open(inode, file);
  117. }
  118. static ssize_t fault_opcodes_write(struct file *file, const char __user *buf,
  119. size_t len, loff_t *pos)
  120. {
  121. ssize_t ret = 0;
  122. /* 1280 = 256 opcodes * 4 chars/opcode + 255 commas + NULL */
  123. size_t copy, datalen = 1280;
  124. char *data, *token, *ptr, *end;
  125. struct fault *fault = file->private_data;
  126. data = kcalloc(datalen, sizeof(*data), GFP_KERNEL);
  127. if (!data)
  128. return -ENOMEM;
  129. copy = min(len, datalen - 1);
  130. if (copy_from_user(data, buf, copy))
  131. return -EFAULT;
  132. ret = debugfs_file_get(file->f_path.dentry);
  133. if (unlikely(ret))
  134. return ret;
  135. ptr = data;
  136. token = ptr;
  137. for (ptr = data; *ptr; ptr = end + 1, token = ptr) {
  138. char *dash;
  139. unsigned long range_start, range_end, i;
  140. bool remove = false;
  141. end = strchr(ptr, ',');
  142. if (end)
  143. *end = '\0';
  144. if (token[0] == '-') {
  145. remove = true;
  146. token++;
  147. }
  148. dash = strchr(token, '-');
  149. if (dash)
  150. *dash = '\0';
  151. if (kstrtoul(token, 0, &range_start))
  152. break;
  153. if (dash) {
  154. token = dash + 1;
  155. if (kstrtoul(token, 0, &range_end))
  156. break;
  157. } else {
  158. range_end = range_start;
  159. }
  160. if (range_start == range_end && range_start == -1UL) {
  161. bitmap_zero(fault->opcodes, sizeof(fault->opcodes) *
  162. BITS_PER_BYTE);
  163. break;
  164. }
  165. for (i = range_start; i <= range_end; i++) {
  166. if (remove)
  167. clear_bit(i, fault->opcodes);
  168. else
  169. set_bit(i, fault->opcodes);
  170. }
  171. if (!end)
  172. break;
  173. }
  174. ret = len;
  175. debugfs_file_put(file->f_path.dentry);
  176. kfree(data);
  177. return ret;
  178. }
  179. static ssize_t fault_opcodes_read(struct file *file, char __user *buf,
  180. size_t len, loff_t *pos)
  181. {
  182. ssize_t ret = 0;
  183. char *data;
  184. size_t datalen = 1280, size = 0; /* see fault_opcodes_write() */
  185. unsigned long bit = 0, zero = 0;
  186. struct fault *fault = file->private_data;
  187. size_t bitsize = sizeof(fault->opcodes) * BITS_PER_BYTE;
  188. data = kcalloc(datalen, sizeof(*data), GFP_KERNEL);
  189. if (!data)
  190. return -ENOMEM;
  191. ret = debugfs_file_get(file->f_path.dentry);
  192. if (unlikely(ret))
  193. return ret;
  194. bit = find_first_bit(fault->opcodes, bitsize);
  195. while (bit < bitsize) {
  196. zero = find_next_zero_bit(fault->opcodes, bitsize, bit);
  197. if (zero - 1 != bit)
  198. size += snprintf(data + size,
  199. datalen - size - 1,
  200. "0x%lx-0x%lx,", bit, zero - 1);
  201. else
  202. size += snprintf(data + size,
  203. datalen - size - 1, "0x%lx,",
  204. bit);
  205. bit = find_next_bit(fault->opcodes, bitsize, zero);
  206. }
  207. debugfs_file_put(file->f_path.dentry);
  208. data[size - 1] = '\n';
  209. data[size] = '\0';
  210. ret = simple_read_from_buffer(buf, len, pos, data, size);
  211. kfree(data);
  212. return ret;
  213. }
  214. static const struct file_operations __fault_opcodes_fops = {
  215. .owner = THIS_MODULE,
  216. .open = fault_opcodes_open,
  217. .read = fault_opcodes_read,
  218. .write = fault_opcodes_write,
  219. .llseek = no_llseek
  220. };
  221. void hfi1_fault_exit_debugfs(struct hfi1_ibdev *ibd)
  222. {
  223. if (ibd->fault)
  224. debugfs_remove_recursive(ibd->fault->dir);
  225. kfree(ibd->fault);
  226. ibd->fault = NULL;
  227. }
  228. int hfi1_fault_init_debugfs(struct hfi1_ibdev *ibd)
  229. {
  230. struct dentry *parent = ibd->hfi1_ibdev_dbg;
  231. ibd->fault = kzalloc(sizeof(*ibd->fault), GFP_KERNEL);
  232. if (!ibd->fault)
  233. return -ENOMEM;
  234. ibd->fault->attr.interval = 1;
  235. ibd->fault->attr.require_end = ULONG_MAX;
  236. ibd->fault->attr.stacktrace_depth = 32;
  237. ibd->fault->attr.dname = NULL;
  238. ibd->fault->attr.verbose = 0;
  239. ibd->fault->enable = false;
  240. ibd->fault->opcode = false;
  241. ibd->fault->fault_skip = 0;
  242. ibd->fault->skip = 0;
  243. ibd->fault->direction = HFI1_FAULT_DIR_TXRX;
  244. ibd->fault->suppress_err = false;
  245. bitmap_zero(ibd->fault->opcodes,
  246. sizeof(ibd->fault->opcodes) * BITS_PER_BYTE);
  247. ibd->fault->dir =
  248. fault_create_debugfs_attr("fault", parent,
  249. &ibd->fault->attr);
  250. if (IS_ERR(ibd->fault->dir)) {
  251. kfree(ibd->fault);
  252. ibd->fault = NULL;
  253. return -ENOENT;
  254. }
  255. DEBUGFS_SEQ_FILE_CREATE(fault_stats, ibd->fault->dir, ibd);
  256. if (!debugfs_create_bool("enable", 0600, ibd->fault->dir,
  257. &ibd->fault->enable))
  258. goto fail;
  259. if (!debugfs_create_bool("suppress_err", 0600,
  260. ibd->fault->dir,
  261. &ibd->fault->suppress_err))
  262. goto fail;
  263. if (!debugfs_create_bool("opcode_mode", 0600, ibd->fault->dir,
  264. &ibd->fault->opcode))
  265. goto fail;
  266. if (!debugfs_create_file("opcodes", 0600, ibd->fault->dir,
  267. ibd->fault, &__fault_opcodes_fops))
  268. goto fail;
  269. if (!debugfs_create_u64("skip_pkts", 0600,
  270. ibd->fault->dir,
  271. &ibd->fault->fault_skip))
  272. goto fail;
  273. if (!debugfs_create_u64("skip_usec", 0600,
  274. ibd->fault->dir,
  275. &ibd->fault->fault_skip_usec))
  276. goto fail;
  277. if (!debugfs_create_u8("direction", 0600, ibd->fault->dir,
  278. &ibd->fault->direction))
  279. goto fail;
  280. return 0;
  281. fail:
  282. hfi1_fault_exit_debugfs(ibd);
  283. return -ENOMEM;
  284. }
  285. bool hfi1_dbg_fault_suppress_err(struct hfi1_ibdev *ibd)
  286. {
  287. if (ibd->fault)
  288. return ibd->fault->suppress_err;
  289. return false;
  290. }
  291. static bool __hfi1_should_fault(struct hfi1_ibdev *ibd, u32 opcode,
  292. u8 direction)
  293. {
  294. bool ret = false;
  295. if (!ibd->fault || !ibd->fault->enable)
  296. return false;
  297. if (!(ibd->fault->direction & direction))
  298. return false;
  299. if (ibd->fault->opcode) {
  300. if (bitmap_empty(ibd->fault->opcodes,
  301. (sizeof(ibd->fault->opcodes) *
  302. BITS_PER_BYTE)))
  303. return false;
  304. if (!(test_bit(opcode, ibd->fault->opcodes)))
  305. return false;
  306. }
  307. if (ibd->fault->fault_skip_usec &&
  308. time_before(jiffies, ibd->fault->skip_usec))
  309. return false;
  310. if (ibd->fault->fault_skip && ibd->fault->skip) {
  311. ibd->fault->skip--;
  312. return false;
  313. }
  314. ret = should_fail(&ibd->fault->attr, 1);
  315. if (ret) {
  316. ibd->fault->skip = ibd->fault->fault_skip;
  317. ibd->fault->skip_usec = jiffies +
  318. usecs_to_jiffies(ibd->fault->fault_skip_usec);
  319. }
  320. return ret;
  321. }
  322. bool hfi1_dbg_should_fault_tx(struct rvt_qp *qp, u32 opcode)
  323. {
  324. struct hfi1_ibdev *ibd = to_idev(qp->ibqp.device);
  325. if (__hfi1_should_fault(ibd, opcode, HFI1_FAULT_DIR_TX)) {
  326. trace_hfi1_fault_opcode(qp, opcode);
  327. ibd->fault->n_txfaults[opcode]++;
  328. return true;
  329. }
  330. return false;
  331. }
  332. bool hfi1_dbg_should_fault_rx(struct hfi1_packet *packet)
  333. {
  334. struct hfi1_ibdev *ibd = &packet->rcd->dd->verbs_dev;
  335. if (__hfi1_should_fault(ibd, packet->opcode, HFI1_FAULT_DIR_RX)) {
  336. trace_hfi1_fault_packet(packet);
  337. ibd->fault->n_rxfaults[packet->opcode]++;
  338. return true;
  339. }
  340. return false;
  341. }