ntb_perf.c 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515
  1. /*
  2. * This file is provided under a dual BSD/GPLv2 license. When using or
  3. * redistributing this file, you may do so under either license.
  4. *
  5. * GPL LICENSE SUMMARY
  6. *
  7. * Copyright(c) 2015 Intel Corporation. All rights reserved.
  8. * Copyright(c) 2017 T-Platforms. All Rights Reserved.
  9. *
  10. * This program is free software; you can redistribute it and/or modify
  11. * it under the terms of version 2 of the GNU General Public License as
  12. * published by the Free Software Foundation.
  13. *
  14. * BSD LICENSE
  15. *
  16. * Copyright(c) 2015 Intel Corporation. All rights reserved.
  17. * Copyright(c) 2017 T-Platforms. All Rights Reserved.
  18. *
  19. * Redistribution and use in source and binary forms, with or without
  20. * modification, are permitted provided that the following conditions
  21. * are met:
  22. *
  23. * * Redistributions of source code must retain the above copyright
  24. * notice, this list of conditions and the following disclaimer.
  25. * * Redistributions in binary form must reproduce the above copy
  26. * notice, this list of conditions and the following disclaimer in
  27. * the documentation and/or other materials provided with the
  28. * distribution.
  29. * * Neither the name of Intel Corporation nor the names of its
  30. * contributors may be used to endorse or promote products derived
  31. * from this software without specific prior written permission.
  32. *
  33. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  34. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  35. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  36. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  37. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  38. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  39. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  40. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  41. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  42. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  43. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  44. *
  45. * PCIe NTB Perf Linux driver
  46. */
  47. /*
  48. * How to use this tool, by example.
  49. *
  50. * Assuming $DBG_DIR is something like:
  51. * '/sys/kernel/debug/ntb_perf/0000:00:03.0'
  52. * Suppose aside from local device there is at least one remote device
  53. * connected to NTB with index 0.
  54. *-----------------------------------------------------------------------------
  55. * Eg: install driver with specified chunk/total orders and dma-enabled flag
  56. *
  57. * root@self# insmod ntb_perf.ko chunk_order=19 total_order=28 use_dma
  58. *-----------------------------------------------------------------------------
  59. * Eg: check NTB ports (index) and MW mapping information
  60. *
  61. * root@self# cat $DBG_DIR/info
  62. *-----------------------------------------------------------------------------
  63. * Eg: start performance test with peer (index 0) and get the test metrics
  64. *
  65. * root@self# echo 0 > $DBG_DIR/run
  66. * root@self# cat $DBG_DIR/run
  67. */
  68. #include <linux/init.h>
  69. #include <linux/kernel.h>
  70. #include <linux/module.h>
  71. #include <linux/sched.h>
  72. #include <linux/wait.h>
  73. #include <linux/dma-mapping.h>
  74. #include <linux/dmaengine.h>
  75. #include <linux/pci.h>
  76. #include <linux/ktime.h>
  77. #include <linux/slab.h>
  78. #include <linux/delay.h>
  79. #include <linux/sizes.h>
  80. #include <linux/workqueue.h>
  81. #include <linux/debugfs.h>
  82. #include <linux/random.h>
  83. #include <linux/ntb.h>
  84. #define DRIVER_NAME "ntb_perf"
  85. #define DRIVER_VERSION "2.0"
  86. MODULE_LICENSE("Dual BSD/GPL");
  87. MODULE_VERSION(DRIVER_VERSION);
  88. MODULE_AUTHOR("Dave Jiang <dave.jiang@intel.com>");
  89. MODULE_DESCRIPTION("PCIe NTB Performance Measurement Tool");
  90. #define MAX_THREADS_CNT 32
  91. #define DEF_THREADS_CNT 1
  92. #define MAX_CHUNK_SIZE SZ_1M
  93. #define MAX_CHUNK_ORDER 20 /* no larger than 1M */
  94. #define DMA_TRIES 100
  95. #define DMA_MDELAY 10
  96. #define MSG_TRIES 500
  97. #define MSG_UDELAY_LOW 1000
  98. #define MSG_UDELAY_HIGH 2000
  99. #define PERF_BUF_LEN 1024
  100. static unsigned long max_mw_size;
  101. module_param(max_mw_size, ulong, 0644);
  102. MODULE_PARM_DESC(max_mw_size, "Upper limit of memory window size");
  103. static unsigned char chunk_order = 19; /* 512K */
  104. module_param(chunk_order, byte, 0644);
  105. MODULE_PARM_DESC(chunk_order, "Data chunk order [2^n] to transfer");
  106. static unsigned char total_order = 30; /* 1G */
  107. module_param(total_order, byte, 0644);
  108. MODULE_PARM_DESC(total_order, "Total data order [2^n] to transfer");
  109. static bool use_dma; /* default to 0 */
  110. module_param(use_dma, bool, 0644);
  111. MODULE_PARM_DESC(use_dma, "Use DMA engine to measure performance");
  112. /*==============================================================================
  113. * Perf driver data definition
  114. *==============================================================================
  115. */
  116. enum perf_cmd {
  117. PERF_CMD_INVAL = -1,/* invalid spad command */
  118. PERF_CMD_SSIZE = 0, /* send out buffer size */
  119. PERF_CMD_RSIZE = 1, /* recv in buffer size */
  120. PERF_CMD_SXLAT = 2, /* send in buffer xlat */
  121. PERF_CMD_RXLAT = 3, /* recv out buffer xlat */
  122. PERF_CMD_CLEAR = 4, /* clear allocated memory */
  123. PERF_STS_DONE = 5, /* init is done */
  124. PERF_STS_LNKUP = 6, /* link up state flag */
  125. };
  126. struct perf_ctx;
  127. struct perf_peer {
  128. struct perf_ctx *perf;
  129. int pidx;
  130. int gidx;
  131. /* Outbound MW params */
  132. u64 outbuf_xlat;
  133. resource_size_t outbuf_size;
  134. void __iomem *outbuf;
  135. /* Inbound MW params */
  136. dma_addr_t inbuf_xlat;
  137. resource_size_t inbuf_size;
  138. void *inbuf;
  139. /* NTB connection setup service */
  140. struct work_struct service;
  141. unsigned long sts;
  142. };
  143. #define to_peer_service(__work) \
  144. container_of(__work, struct perf_peer, service)
  145. struct perf_thread {
  146. struct perf_ctx *perf;
  147. int tidx;
  148. /* DMA-based test sync parameters */
  149. atomic_t dma_sync;
  150. wait_queue_head_t dma_wait;
  151. struct dma_chan *dma_chan;
  152. /* Data source and measured statistics */
  153. void *src;
  154. u64 copied;
  155. ktime_t duration;
  156. int status;
  157. struct work_struct work;
  158. };
  159. #define to_thread_work(__work) \
  160. container_of(__work, struct perf_thread, work)
  161. struct perf_ctx {
  162. struct ntb_dev *ntb;
  163. /* Global device index and peers descriptors */
  164. int gidx;
  165. int pcnt;
  166. struct perf_peer *peers;
  167. /* Performance measuring work-threads interface */
  168. unsigned long busy_flag;
  169. wait_queue_head_t twait;
  170. atomic_t tsync;
  171. u8 tcnt;
  172. struct perf_peer *test_peer;
  173. struct perf_thread threads[MAX_THREADS_CNT];
  174. /* Scratchpad/Message IO operations */
  175. int (*cmd_send)(struct perf_peer *peer, enum perf_cmd cmd, u64 data);
  176. int (*cmd_recv)(struct perf_ctx *perf, int *pidx, enum perf_cmd *cmd,
  177. u64 *data);
  178. struct dentry *dbgfs_dir;
  179. };
  180. /*
  181. * Scratchpads-base commands interface
  182. */
  183. #define PERF_SPAD_CNT(_pcnt) \
  184. (3*((_pcnt) + 1))
  185. #define PERF_SPAD_CMD(_gidx) \
  186. (3*(_gidx))
  187. #define PERF_SPAD_LDATA(_gidx) \
  188. (3*(_gidx) + 1)
  189. #define PERF_SPAD_HDATA(_gidx) \
  190. (3*(_gidx) + 2)
  191. #define PERF_SPAD_NOTIFY(_gidx) \
  192. (BIT_ULL(_gidx))
  193. /*
  194. * Messages-base commands interface
  195. */
  196. #define PERF_MSG_CNT 3
  197. #define PERF_MSG_CMD 0
  198. #define PERF_MSG_LDATA 1
  199. #define PERF_MSG_HDATA 2
  200. /*==============================================================================
  201. * Static data declarations
  202. *==============================================================================
  203. */
  204. static struct dentry *perf_dbgfs_topdir;
  205. static struct workqueue_struct *perf_wq __read_mostly;
  206. /*==============================================================================
  207. * NTB cross-link commands execution service
  208. *==============================================================================
  209. */
  210. static void perf_terminate_test(struct perf_ctx *perf);
  211. static inline bool perf_link_is_up(struct perf_peer *peer)
  212. {
  213. u64 link;
  214. link = ntb_link_is_up(peer->perf->ntb, NULL, NULL);
  215. return !!(link & BIT_ULL_MASK(peer->pidx));
  216. }
  217. static int perf_spad_cmd_send(struct perf_peer *peer, enum perf_cmd cmd,
  218. u64 data)
  219. {
  220. struct perf_ctx *perf = peer->perf;
  221. int try;
  222. u32 sts;
  223. dev_dbg(&perf->ntb->dev, "CMD send: %d 0x%llx\n", cmd, data);
  224. /*
  225. * Perform predefined number of attempts before give up.
  226. * We are sending the data to the port specific scratchpad, so
  227. * to prevent a multi-port access race-condition. Additionally
  228. * there is no need in local locking since only thread-safe
  229. * service work is using this method.
  230. */
  231. for (try = 0; try < MSG_TRIES; try++) {
  232. if (!perf_link_is_up(peer))
  233. return -ENOLINK;
  234. sts = ntb_peer_spad_read(perf->ntb, peer->pidx,
  235. PERF_SPAD_CMD(perf->gidx));
  236. if (sts != PERF_CMD_INVAL) {
  237. usleep_range(MSG_UDELAY_LOW, MSG_UDELAY_HIGH);
  238. continue;
  239. }
  240. ntb_peer_spad_write(perf->ntb, peer->pidx,
  241. PERF_SPAD_LDATA(perf->gidx),
  242. lower_32_bits(data));
  243. ntb_peer_spad_write(perf->ntb, peer->pidx,
  244. PERF_SPAD_HDATA(perf->gidx),
  245. upper_32_bits(data));
  246. mmiowb();
  247. ntb_peer_spad_write(perf->ntb, peer->pidx,
  248. PERF_SPAD_CMD(perf->gidx),
  249. cmd);
  250. mmiowb();
  251. ntb_peer_db_set(perf->ntb, PERF_SPAD_NOTIFY(peer->gidx));
  252. dev_dbg(&perf->ntb->dev, "DB ring peer %#llx\n",
  253. PERF_SPAD_NOTIFY(peer->gidx));
  254. break;
  255. }
  256. return try < MSG_TRIES ? 0 : -EAGAIN;
  257. }
  258. static int perf_spad_cmd_recv(struct perf_ctx *perf, int *pidx,
  259. enum perf_cmd *cmd, u64 *data)
  260. {
  261. struct perf_peer *peer;
  262. u32 val;
  263. ntb_db_clear(perf->ntb, PERF_SPAD_NOTIFY(perf->gidx));
  264. /*
  265. * We start scanning all over, since cleared DB may have been set
  266. * by any peer. Yes, it makes peer with smaller index being
  267. * serviced with greater priority, but it's convenient for spad
  268. * and message code unification and simplicity.
  269. */
  270. for (*pidx = 0; *pidx < perf->pcnt; (*pidx)++) {
  271. peer = &perf->peers[*pidx];
  272. if (!perf_link_is_up(peer))
  273. continue;
  274. val = ntb_spad_read(perf->ntb, PERF_SPAD_CMD(peer->gidx));
  275. if (val == PERF_CMD_INVAL)
  276. continue;
  277. *cmd = val;
  278. val = ntb_spad_read(perf->ntb, PERF_SPAD_LDATA(peer->gidx));
  279. *data = val;
  280. val = ntb_spad_read(perf->ntb, PERF_SPAD_HDATA(peer->gidx));
  281. *data |= (u64)val << 32;
  282. /* Next command can be retrieved from now */
  283. ntb_spad_write(perf->ntb, PERF_SPAD_CMD(peer->gidx),
  284. PERF_CMD_INVAL);
  285. dev_dbg(&perf->ntb->dev, "CMD recv: %d 0x%llx\n", *cmd, *data);
  286. return 0;
  287. }
  288. return -ENODATA;
  289. }
  290. static int perf_msg_cmd_send(struct perf_peer *peer, enum perf_cmd cmd,
  291. u64 data)
  292. {
  293. struct perf_ctx *perf = peer->perf;
  294. int try, ret;
  295. u64 outbits;
  296. dev_dbg(&perf->ntb->dev, "CMD send: %d 0x%llx\n", cmd, data);
  297. /*
  298. * Perform predefined number of attempts before give up. Message
  299. * registers are free of race-condition problem when accessed
  300. * from different ports, so we don't need splitting registers
  301. * by global device index. We also won't have local locking,
  302. * since the method is used from service work only.
  303. */
  304. outbits = ntb_msg_outbits(perf->ntb);
  305. for (try = 0; try < MSG_TRIES; try++) {
  306. if (!perf_link_is_up(peer))
  307. return -ENOLINK;
  308. ret = ntb_msg_clear_sts(perf->ntb, outbits);
  309. if (ret)
  310. return ret;
  311. ntb_peer_msg_write(perf->ntb, peer->pidx, PERF_MSG_LDATA,
  312. lower_32_bits(data));
  313. if (ntb_msg_read_sts(perf->ntb) & outbits) {
  314. usleep_range(MSG_UDELAY_LOW, MSG_UDELAY_HIGH);
  315. continue;
  316. }
  317. ntb_peer_msg_write(perf->ntb, peer->pidx, PERF_MSG_HDATA,
  318. upper_32_bits(data));
  319. mmiowb();
  320. /* This call shall trigger peer message event */
  321. ntb_peer_msg_write(perf->ntb, peer->pidx, PERF_MSG_CMD, cmd);
  322. break;
  323. }
  324. return try < MSG_TRIES ? 0 : -EAGAIN;
  325. }
  326. static int perf_msg_cmd_recv(struct perf_ctx *perf, int *pidx,
  327. enum perf_cmd *cmd, u64 *data)
  328. {
  329. u64 inbits;
  330. u32 val;
  331. inbits = ntb_msg_inbits(perf->ntb);
  332. if (hweight64(ntb_msg_read_sts(perf->ntb) & inbits) < 3)
  333. return -ENODATA;
  334. val = ntb_msg_read(perf->ntb, pidx, PERF_MSG_CMD);
  335. *cmd = val;
  336. val = ntb_msg_read(perf->ntb, pidx, PERF_MSG_LDATA);
  337. *data = val;
  338. val = ntb_msg_read(perf->ntb, pidx, PERF_MSG_HDATA);
  339. *data |= (u64)val << 32;
  340. /* Next command can be retrieved from now */
  341. ntb_msg_clear_sts(perf->ntb, inbits);
  342. dev_dbg(&perf->ntb->dev, "CMD recv: %d 0x%llx\n", *cmd, *data);
  343. return 0;
  344. }
  345. static int perf_cmd_send(struct perf_peer *peer, enum perf_cmd cmd, u64 data)
  346. {
  347. struct perf_ctx *perf = peer->perf;
  348. if (cmd == PERF_CMD_SSIZE || cmd == PERF_CMD_SXLAT)
  349. return perf->cmd_send(peer, cmd, data);
  350. dev_err(&perf->ntb->dev, "Send invalid command\n");
  351. return -EINVAL;
  352. }
  353. static int perf_cmd_exec(struct perf_peer *peer, enum perf_cmd cmd)
  354. {
  355. switch (cmd) {
  356. case PERF_CMD_SSIZE:
  357. case PERF_CMD_RSIZE:
  358. case PERF_CMD_SXLAT:
  359. case PERF_CMD_RXLAT:
  360. case PERF_CMD_CLEAR:
  361. break;
  362. default:
  363. dev_err(&peer->perf->ntb->dev, "Exec invalid command\n");
  364. return -EINVAL;
  365. }
  366. /* No need of memory barrier, since bit ops have invernal lock */
  367. set_bit(cmd, &peer->sts);
  368. dev_dbg(&peer->perf->ntb->dev, "CMD exec: %d\n", cmd);
  369. (void)queue_work(system_highpri_wq, &peer->service);
  370. return 0;
  371. }
  372. static int perf_cmd_recv(struct perf_ctx *perf)
  373. {
  374. struct perf_peer *peer;
  375. int ret, pidx, cmd;
  376. u64 data;
  377. while (!(ret = perf->cmd_recv(perf, &pidx, &cmd, &data))) {
  378. peer = &perf->peers[pidx];
  379. switch (cmd) {
  380. case PERF_CMD_SSIZE:
  381. peer->inbuf_size = data;
  382. return perf_cmd_exec(peer, PERF_CMD_RSIZE);
  383. case PERF_CMD_SXLAT:
  384. peer->outbuf_xlat = data;
  385. return perf_cmd_exec(peer, PERF_CMD_RXLAT);
  386. default:
  387. dev_err(&perf->ntb->dev, "Recv invalid command\n");
  388. return -EINVAL;
  389. }
  390. }
  391. /* Return 0 if no data left to process, otherwise an error */
  392. return ret == -ENODATA ? 0 : ret;
  393. }
  394. static void perf_link_event(void *ctx)
  395. {
  396. struct perf_ctx *perf = ctx;
  397. struct perf_peer *peer;
  398. bool lnk_up;
  399. int pidx;
  400. for (pidx = 0; pidx < perf->pcnt; pidx++) {
  401. peer = &perf->peers[pidx];
  402. lnk_up = perf_link_is_up(peer);
  403. if (lnk_up &&
  404. !test_and_set_bit(PERF_STS_LNKUP, &peer->sts)) {
  405. perf_cmd_exec(peer, PERF_CMD_SSIZE);
  406. } else if (!lnk_up &&
  407. test_and_clear_bit(PERF_STS_LNKUP, &peer->sts)) {
  408. perf_cmd_exec(peer, PERF_CMD_CLEAR);
  409. }
  410. }
  411. }
  412. static void perf_db_event(void *ctx, int vec)
  413. {
  414. struct perf_ctx *perf = ctx;
  415. dev_dbg(&perf->ntb->dev, "DB vec %d mask %#llx bits %#llx\n", vec,
  416. ntb_db_vector_mask(perf->ntb, vec), ntb_db_read(perf->ntb));
  417. /* Just receive all available commands */
  418. (void)perf_cmd_recv(perf);
  419. }
  420. static void perf_msg_event(void *ctx)
  421. {
  422. struct perf_ctx *perf = ctx;
  423. dev_dbg(&perf->ntb->dev, "Msg status bits %#llx\n",
  424. ntb_msg_read_sts(perf->ntb));
  425. /* Messages are only sent one-by-one */
  426. (void)perf_cmd_recv(perf);
  427. }
  428. static const struct ntb_ctx_ops perf_ops = {
  429. .link_event = perf_link_event,
  430. .db_event = perf_db_event,
  431. .msg_event = perf_msg_event
  432. };
  433. static void perf_free_outbuf(struct perf_peer *peer)
  434. {
  435. (void)ntb_peer_mw_clear_trans(peer->perf->ntb, peer->pidx, peer->gidx);
  436. }
  437. static int perf_setup_outbuf(struct perf_peer *peer)
  438. {
  439. struct perf_ctx *perf = peer->perf;
  440. int ret;
  441. /* Outbuf size can be unaligned due to custom max_mw_size */
  442. ret = ntb_peer_mw_set_trans(perf->ntb, peer->pidx, peer->gidx,
  443. peer->outbuf_xlat, peer->outbuf_size);
  444. if (ret) {
  445. dev_err(&perf->ntb->dev, "Failed to set outbuf translation\n");
  446. return ret;
  447. }
  448. /* Initialization is finally done */
  449. set_bit(PERF_STS_DONE, &peer->sts);
  450. return 0;
  451. }
  452. static void perf_free_inbuf(struct perf_peer *peer)
  453. {
  454. if (!peer->inbuf)
  455. return;
  456. (void)ntb_mw_clear_trans(peer->perf->ntb, peer->pidx, peer->gidx);
  457. dma_free_coherent(&peer->perf->ntb->dev, peer->inbuf_size,
  458. peer->inbuf, peer->inbuf_xlat);
  459. peer->inbuf = NULL;
  460. }
  461. static int perf_setup_inbuf(struct perf_peer *peer)
  462. {
  463. resource_size_t xlat_align, size_align, size_max;
  464. struct perf_ctx *perf = peer->perf;
  465. int ret;
  466. /* Get inbound MW parameters */
  467. ret = ntb_mw_get_align(perf->ntb, peer->pidx, perf->gidx,
  468. &xlat_align, &size_align, &size_max);
  469. if (ret) {
  470. dev_err(&perf->ntb->dev, "Couldn't get inbuf restrictions\n");
  471. return ret;
  472. }
  473. if (peer->inbuf_size > size_max) {
  474. dev_err(&perf->ntb->dev, "Too big inbuf size %pa > %pa\n",
  475. &peer->inbuf_size, &size_max);
  476. return -EINVAL;
  477. }
  478. peer->inbuf_size = round_up(peer->inbuf_size, size_align);
  479. perf_free_inbuf(peer);
  480. peer->inbuf = dma_alloc_coherent(&perf->ntb->dev, peer->inbuf_size,
  481. &peer->inbuf_xlat, GFP_KERNEL);
  482. if (!peer->inbuf) {
  483. dev_err(&perf->ntb->dev, "Failed to alloc inbuf of %pa\n",
  484. &peer->inbuf_size);
  485. return -ENOMEM;
  486. }
  487. if (!IS_ALIGNED(peer->inbuf_xlat, xlat_align)) {
  488. dev_err(&perf->ntb->dev, "Unaligned inbuf allocated\n");
  489. goto err_free_inbuf;
  490. }
  491. ret = ntb_mw_set_trans(perf->ntb, peer->pidx, peer->gidx,
  492. peer->inbuf_xlat, peer->inbuf_size);
  493. if (ret) {
  494. dev_err(&perf->ntb->dev, "Failed to set inbuf translation\n");
  495. goto err_free_inbuf;
  496. }
  497. /*
  498. * We submit inbuf xlat transmission cmd for execution here to follow
  499. * the code architecture, even though this method is called from service
  500. * work itself so the command will be executed right after it returns.
  501. */
  502. (void)perf_cmd_exec(peer, PERF_CMD_SXLAT);
  503. return 0;
  504. err_free_inbuf:
  505. perf_free_inbuf(peer);
  506. return ret;
  507. }
  508. static void perf_service_work(struct work_struct *work)
  509. {
  510. struct perf_peer *peer = to_peer_service(work);
  511. if (test_and_clear_bit(PERF_CMD_SSIZE, &peer->sts))
  512. perf_cmd_send(peer, PERF_CMD_SSIZE, peer->outbuf_size);
  513. if (test_and_clear_bit(PERF_CMD_RSIZE, &peer->sts))
  514. perf_setup_inbuf(peer);
  515. if (test_and_clear_bit(PERF_CMD_SXLAT, &peer->sts))
  516. perf_cmd_send(peer, PERF_CMD_SXLAT, peer->inbuf_xlat);
  517. if (test_and_clear_bit(PERF_CMD_RXLAT, &peer->sts))
  518. perf_setup_outbuf(peer);
  519. if (test_and_clear_bit(PERF_CMD_CLEAR, &peer->sts)) {
  520. clear_bit(PERF_STS_DONE, &peer->sts);
  521. if (test_bit(0, &peer->perf->busy_flag) &&
  522. peer == peer->perf->test_peer) {
  523. dev_warn(&peer->perf->ntb->dev,
  524. "Freeing while test on-fly\n");
  525. perf_terminate_test(peer->perf);
  526. }
  527. perf_free_outbuf(peer);
  528. perf_free_inbuf(peer);
  529. }
  530. }
  531. static int perf_init_service(struct perf_ctx *perf)
  532. {
  533. u64 mask;
  534. if (ntb_peer_mw_count(perf->ntb) < perf->pcnt + 1) {
  535. dev_err(&perf->ntb->dev, "Not enough memory windows\n");
  536. return -EINVAL;
  537. }
  538. if (ntb_msg_count(perf->ntb) >= PERF_MSG_CNT) {
  539. perf->cmd_send = perf_msg_cmd_send;
  540. perf->cmd_recv = perf_msg_cmd_recv;
  541. dev_dbg(&perf->ntb->dev, "Message service initialized\n");
  542. return 0;
  543. }
  544. dev_dbg(&perf->ntb->dev, "Message service unsupported\n");
  545. mask = GENMASK_ULL(perf->pcnt, 0);
  546. if (ntb_spad_count(perf->ntb) >= PERF_SPAD_CNT(perf->pcnt) &&
  547. (ntb_db_valid_mask(perf->ntb) & mask) == mask) {
  548. perf->cmd_send = perf_spad_cmd_send;
  549. perf->cmd_recv = perf_spad_cmd_recv;
  550. dev_dbg(&perf->ntb->dev, "Scratchpad service initialized\n");
  551. return 0;
  552. }
  553. dev_dbg(&perf->ntb->dev, "Scratchpad service unsupported\n");
  554. dev_err(&perf->ntb->dev, "Command services unsupported\n");
  555. return -EINVAL;
  556. }
  557. static int perf_enable_service(struct perf_ctx *perf)
  558. {
  559. u64 mask, incmd_bit;
  560. int ret, sidx, scnt;
  561. mask = ntb_db_valid_mask(perf->ntb);
  562. (void)ntb_db_set_mask(perf->ntb, mask);
  563. ret = ntb_set_ctx(perf->ntb, perf, &perf_ops);
  564. if (ret)
  565. return ret;
  566. if (perf->cmd_send == perf_msg_cmd_send) {
  567. u64 inbits, outbits;
  568. inbits = ntb_msg_inbits(perf->ntb);
  569. outbits = ntb_msg_outbits(perf->ntb);
  570. (void)ntb_msg_set_mask(perf->ntb, inbits | outbits);
  571. incmd_bit = BIT_ULL(__ffs64(inbits));
  572. ret = ntb_msg_clear_mask(perf->ntb, incmd_bit);
  573. dev_dbg(&perf->ntb->dev, "MSG sts unmasked %#llx\n", incmd_bit);
  574. } else {
  575. scnt = ntb_spad_count(perf->ntb);
  576. for (sidx = 0; sidx < scnt; sidx++)
  577. ntb_spad_write(perf->ntb, sidx, PERF_CMD_INVAL);
  578. incmd_bit = PERF_SPAD_NOTIFY(perf->gidx);
  579. ret = ntb_db_clear_mask(perf->ntb, incmd_bit);
  580. dev_dbg(&perf->ntb->dev, "DB bits unmasked %#llx\n", incmd_bit);
  581. }
  582. if (ret) {
  583. ntb_clear_ctx(perf->ntb);
  584. return ret;
  585. }
  586. ntb_link_enable(perf->ntb, NTB_SPEED_AUTO, NTB_WIDTH_AUTO);
  587. /* Might be not necessary */
  588. ntb_link_event(perf->ntb);
  589. return 0;
  590. }
  591. static void perf_disable_service(struct perf_ctx *perf)
  592. {
  593. int pidx;
  594. ntb_link_disable(perf->ntb);
  595. if (perf->cmd_send == perf_msg_cmd_send) {
  596. u64 inbits;
  597. inbits = ntb_msg_inbits(perf->ntb);
  598. (void)ntb_msg_set_mask(perf->ntb, inbits);
  599. } else {
  600. (void)ntb_db_set_mask(perf->ntb, PERF_SPAD_NOTIFY(perf->gidx));
  601. }
  602. ntb_clear_ctx(perf->ntb);
  603. for (pidx = 0; pidx < perf->pcnt; pidx++)
  604. perf_cmd_exec(&perf->peers[pidx], PERF_CMD_CLEAR);
  605. for (pidx = 0; pidx < perf->pcnt; pidx++)
  606. flush_work(&perf->peers[pidx].service);
  607. }
  608. /*==============================================================================
  609. * Performance measuring work-thread
  610. *==============================================================================
  611. */
  612. static void perf_dma_copy_callback(void *data)
  613. {
  614. struct perf_thread *pthr = data;
  615. atomic_dec(&pthr->dma_sync);
  616. wake_up(&pthr->dma_wait);
  617. }
  618. static int perf_copy_chunk(struct perf_thread *pthr,
  619. void __iomem *dst, void *src, size_t len)
  620. {
  621. struct dma_async_tx_descriptor *tx;
  622. struct dmaengine_unmap_data *unmap;
  623. struct device *dma_dev;
  624. int try = 0, ret = 0;
  625. if (!use_dma) {
  626. memcpy_toio(dst, src, len);
  627. goto ret_check_tsync;
  628. }
  629. dma_dev = pthr->dma_chan->device->dev;
  630. if (!is_dma_copy_aligned(pthr->dma_chan->device, offset_in_page(src),
  631. offset_in_page(dst), len))
  632. return -EIO;
  633. unmap = dmaengine_get_unmap_data(dma_dev, 2, GFP_NOWAIT);
  634. if (!unmap)
  635. return -ENOMEM;
  636. unmap->len = len;
  637. unmap->addr[0] = dma_map_page(dma_dev, virt_to_page(src),
  638. offset_in_page(src), len, DMA_TO_DEVICE);
  639. if (dma_mapping_error(dma_dev, unmap->addr[0])) {
  640. ret = -EIO;
  641. goto err_free_resource;
  642. }
  643. unmap->to_cnt = 1;
  644. unmap->addr[1] = dma_map_page(dma_dev, virt_to_page(dst),
  645. offset_in_page(dst), len, DMA_FROM_DEVICE);
  646. if (dma_mapping_error(dma_dev, unmap->addr[1])) {
  647. ret = -EIO;
  648. goto err_free_resource;
  649. }
  650. unmap->from_cnt = 1;
  651. do {
  652. tx = dmaengine_prep_dma_memcpy(pthr->dma_chan, unmap->addr[1],
  653. unmap->addr[0], len, DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
  654. if (!tx)
  655. msleep(DMA_MDELAY);
  656. } while (!tx && (try++ < DMA_TRIES));
  657. if (!tx) {
  658. ret = -EIO;
  659. goto err_free_resource;
  660. }
  661. tx->callback = perf_dma_copy_callback;
  662. tx->callback_param = pthr;
  663. dma_set_unmap(tx, unmap);
  664. ret = dma_submit_error(dmaengine_submit(tx));
  665. if (ret) {
  666. dmaengine_unmap_put(unmap);
  667. goto err_free_resource;
  668. }
  669. dmaengine_unmap_put(unmap);
  670. atomic_inc(&pthr->dma_sync);
  671. dma_async_issue_pending(pthr->dma_chan);
  672. ret_check_tsync:
  673. return likely(atomic_read(&pthr->perf->tsync) > 0) ? 0 : -EINTR;
  674. err_free_resource:
  675. dmaengine_unmap_put(unmap);
  676. return ret;
  677. }
  678. static bool perf_dma_filter(struct dma_chan *chan, void *data)
  679. {
  680. struct perf_ctx *perf = data;
  681. int node;
  682. node = dev_to_node(&perf->ntb->dev);
  683. return node == NUMA_NO_NODE || node == dev_to_node(chan->device->dev);
  684. }
  685. static int perf_init_test(struct perf_thread *pthr)
  686. {
  687. struct perf_ctx *perf = pthr->perf;
  688. dma_cap_mask_t dma_mask;
  689. pthr->src = kmalloc_node(perf->test_peer->outbuf_size, GFP_KERNEL,
  690. dev_to_node(&perf->ntb->dev));
  691. if (!pthr->src)
  692. return -ENOMEM;
  693. get_random_bytes(pthr->src, perf->test_peer->outbuf_size);
  694. if (!use_dma)
  695. return 0;
  696. dma_cap_zero(dma_mask);
  697. dma_cap_set(DMA_MEMCPY, dma_mask);
  698. pthr->dma_chan = dma_request_channel(dma_mask, perf_dma_filter, perf);
  699. if (!pthr->dma_chan) {
  700. dev_err(&perf->ntb->dev, "%d: Failed to get DMA channel\n",
  701. pthr->tidx);
  702. atomic_dec(&perf->tsync);
  703. wake_up(&perf->twait);
  704. kfree(pthr->src);
  705. return -ENODEV;
  706. }
  707. atomic_set(&pthr->dma_sync, 0);
  708. return 0;
  709. }
  710. static int perf_run_test(struct perf_thread *pthr)
  711. {
  712. struct perf_peer *peer = pthr->perf->test_peer;
  713. struct perf_ctx *perf = pthr->perf;
  714. void __iomem *flt_dst, *bnd_dst;
  715. u64 total_size, chunk_size;
  716. void *flt_src;
  717. int ret = 0;
  718. total_size = 1ULL << total_order;
  719. chunk_size = 1ULL << chunk_order;
  720. chunk_size = min_t(u64, peer->outbuf_size, chunk_size);
  721. flt_src = pthr->src;
  722. bnd_dst = peer->outbuf + peer->outbuf_size;
  723. flt_dst = peer->outbuf;
  724. pthr->duration = ktime_get();
  725. /* Copied field is cleared on test launch stage */
  726. while (pthr->copied < total_size) {
  727. ret = perf_copy_chunk(pthr, flt_dst, flt_src, chunk_size);
  728. if (ret) {
  729. dev_err(&perf->ntb->dev, "%d: Got error %d on test\n",
  730. pthr->tidx, ret);
  731. return ret;
  732. }
  733. pthr->copied += chunk_size;
  734. flt_dst += chunk_size;
  735. flt_src += chunk_size;
  736. if (flt_dst >= bnd_dst || flt_dst < peer->outbuf) {
  737. flt_dst = peer->outbuf;
  738. flt_src = pthr->src;
  739. }
  740. /* Give up CPU to give a chance for other threads to use it */
  741. schedule();
  742. }
  743. return 0;
  744. }
  745. static int perf_sync_test(struct perf_thread *pthr)
  746. {
  747. struct perf_ctx *perf = pthr->perf;
  748. if (!use_dma)
  749. goto no_dma_ret;
  750. wait_event(pthr->dma_wait,
  751. (atomic_read(&pthr->dma_sync) == 0 ||
  752. atomic_read(&perf->tsync) < 0));
  753. if (atomic_read(&perf->tsync) < 0)
  754. return -EINTR;
  755. no_dma_ret:
  756. pthr->duration = ktime_sub(ktime_get(), pthr->duration);
  757. dev_dbg(&perf->ntb->dev, "%d: copied %llu bytes\n",
  758. pthr->tidx, pthr->copied);
  759. dev_dbg(&perf->ntb->dev, "%d: lasted %llu usecs\n",
  760. pthr->tidx, ktime_to_us(pthr->duration));
  761. dev_dbg(&perf->ntb->dev, "%d: %llu MBytes/s\n", pthr->tidx,
  762. div64_u64(pthr->copied, ktime_to_us(pthr->duration)));
  763. return 0;
  764. }
  765. static void perf_clear_test(struct perf_thread *pthr)
  766. {
  767. struct perf_ctx *perf = pthr->perf;
  768. if (!use_dma)
  769. goto no_dma_notify;
  770. /*
  771. * If test finished without errors, termination isn't needed.
  772. * We call it anyway just to be sure of the transfers completion.
  773. */
  774. (void)dmaengine_terminate_sync(pthr->dma_chan);
  775. dma_release_channel(pthr->dma_chan);
  776. no_dma_notify:
  777. atomic_dec(&perf->tsync);
  778. wake_up(&perf->twait);
  779. kfree(pthr->src);
  780. }
  781. static void perf_thread_work(struct work_struct *work)
  782. {
  783. struct perf_thread *pthr = to_thread_work(work);
  784. int ret;
  785. /*
  786. * Perform stages in compliance with use_dma flag value.
  787. * Test status is changed only if error happened, otherwise
  788. * status -ENODATA is kept while test is on-fly. Results
  789. * synchronization is performed only if test fininshed
  790. * without an error or interruption.
  791. */
  792. ret = perf_init_test(pthr);
  793. if (ret) {
  794. pthr->status = ret;
  795. return;
  796. }
  797. ret = perf_run_test(pthr);
  798. if (ret) {
  799. pthr->status = ret;
  800. goto err_clear_test;
  801. }
  802. pthr->status = perf_sync_test(pthr);
  803. err_clear_test:
  804. perf_clear_test(pthr);
  805. }
  806. static int perf_set_tcnt(struct perf_ctx *perf, u8 tcnt)
  807. {
  808. if (tcnt == 0 || tcnt > MAX_THREADS_CNT)
  809. return -EINVAL;
  810. if (test_and_set_bit_lock(0, &perf->busy_flag))
  811. return -EBUSY;
  812. perf->tcnt = tcnt;
  813. clear_bit_unlock(0, &perf->busy_flag);
  814. return 0;
  815. }
  816. static void perf_terminate_test(struct perf_ctx *perf)
  817. {
  818. int tidx;
  819. atomic_set(&perf->tsync, -1);
  820. wake_up(&perf->twait);
  821. for (tidx = 0; tidx < MAX_THREADS_CNT; tidx++) {
  822. wake_up(&perf->threads[tidx].dma_wait);
  823. cancel_work_sync(&perf->threads[tidx].work);
  824. }
  825. }
  826. static int perf_submit_test(struct perf_peer *peer)
  827. {
  828. struct perf_ctx *perf = peer->perf;
  829. struct perf_thread *pthr;
  830. int tidx, ret;
  831. if (!test_bit(PERF_STS_DONE, &peer->sts))
  832. return -ENOLINK;
  833. if (test_and_set_bit_lock(0, &perf->busy_flag))
  834. return -EBUSY;
  835. perf->test_peer = peer;
  836. atomic_set(&perf->tsync, perf->tcnt);
  837. for (tidx = 0; tidx < MAX_THREADS_CNT; tidx++) {
  838. pthr = &perf->threads[tidx];
  839. pthr->status = -ENODATA;
  840. pthr->copied = 0;
  841. pthr->duration = ktime_set(0, 0);
  842. if (tidx < perf->tcnt)
  843. (void)queue_work(perf_wq, &pthr->work);
  844. }
  845. ret = wait_event_interruptible(perf->twait,
  846. atomic_read(&perf->tsync) <= 0);
  847. if (ret == -ERESTARTSYS) {
  848. perf_terminate_test(perf);
  849. ret = -EINTR;
  850. }
  851. clear_bit_unlock(0, &perf->busy_flag);
  852. return ret;
  853. }
  854. static int perf_read_stats(struct perf_ctx *perf, char *buf,
  855. size_t size, ssize_t *pos)
  856. {
  857. struct perf_thread *pthr;
  858. int tidx;
  859. if (test_and_set_bit_lock(0, &perf->busy_flag))
  860. return -EBUSY;
  861. (*pos) += scnprintf(buf + *pos, size - *pos,
  862. " Peer %d test statistics:\n", perf->test_peer->pidx);
  863. for (tidx = 0; tidx < MAX_THREADS_CNT; tidx++) {
  864. pthr = &perf->threads[tidx];
  865. if (pthr->status == -ENODATA)
  866. continue;
  867. if (pthr->status) {
  868. (*pos) += scnprintf(buf + *pos, size - *pos,
  869. "%d: error status %d\n", tidx, pthr->status);
  870. continue;
  871. }
  872. (*pos) += scnprintf(buf + *pos, size - *pos,
  873. "%d: copied %llu bytes in %llu usecs, %llu MBytes/s\n",
  874. tidx, pthr->copied, ktime_to_us(pthr->duration),
  875. div64_u64(pthr->copied, ktime_to_us(pthr->duration)));
  876. }
  877. clear_bit_unlock(0, &perf->busy_flag);
  878. return 0;
  879. }
  880. static void perf_init_threads(struct perf_ctx *perf)
  881. {
  882. struct perf_thread *pthr;
  883. int tidx;
  884. perf->tcnt = DEF_THREADS_CNT;
  885. perf->test_peer = &perf->peers[0];
  886. init_waitqueue_head(&perf->twait);
  887. for (tidx = 0; tidx < MAX_THREADS_CNT; tidx++) {
  888. pthr = &perf->threads[tidx];
  889. pthr->perf = perf;
  890. pthr->tidx = tidx;
  891. pthr->status = -ENODATA;
  892. init_waitqueue_head(&pthr->dma_wait);
  893. INIT_WORK(&pthr->work, perf_thread_work);
  894. }
  895. }
  896. static void perf_clear_threads(struct perf_ctx *perf)
  897. {
  898. perf_terminate_test(perf);
  899. }
  900. /*==============================================================================
  901. * DebugFS nodes
  902. *==============================================================================
  903. */
  904. static ssize_t perf_dbgfs_read_info(struct file *filep, char __user *ubuf,
  905. size_t size, loff_t *offp)
  906. {
  907. struct perf_ctx *perf = filep->private_data;
  908. struct perf_peer *peer;
  909. size_t buf_size;
  910. ssize_t pos = 0;
  911. int ret, pidx;
  912. char *buf;
  913. buf_size = min_t(size_t, size, 0x1000U);
  914. buf = kmalloc(buf_size, GFP_KERNEL);
  915. if (!buf)
  916. return -ENOMEM;
  917. pos += scnprintf(buf + pos, buf_size - pos,
  918. " Performance measuring tool info:\n\n");
  919. pos += scnprintf(buf + pos, buf_size - pos,
  920. "Local port %d, Global index %d\n", ntb_port_number(perf->ntb),
  921. perf->gidx);
  922. pos += scnprintf(buf + pos, buf_size - pos, "Test status: ");
  923. if (test_bit(0, &perf->busy_flag)) {
  924. pos += scnprintf(buf + pos, buf_size - pos,
  925. "on-fly with port %d (%d)\n",
  926. ntb_peer_port_number(perf->ntb, perf->test_peer->pidx),
  927. perf->test_peer->pidx);
  928. } else {
  929. pos += scnprintf(buf + pos, buf_size - pos, "idle\n");
  930. }
  931. for (pidx = 0; pidx < perf->pcnt; pidx++) {
  932. peer = &perf->peers[pidx];
  933. pos += scnprintf(buf + pos, buf_size - pos,
  934. "Port %d (%d), Global index %d:\n",
  935. ntb_peer_port_number(perf->ntb, peer->pidx), peer->pidx,
  936. peer->gidx);
  937. pos += scnprintf(buf + pos, buf_size - pos,
  938. "\tLink status: %s\n",
  939. test_bit(PERF_STS_LNKUP, &peer->sts) ? "up" : "down");
  940. pos += scnprintf(buf + pos, buf_size - pos,
  941. "\tOut buffer addr 0x%pK\n", peer->outbuf);
  942. pos += scnprintf(buf + pos, buf_size - pos,
  943. "\tOut buffer size %pa\n", &peer->outbuf_size);
  944. pos += scnprintf(buf + pos, buf_size - pos,
  945. "\tOut buffer xlat 0x%016llx[p]\n", peer->outbuf_xlat);
  946. if (!peer->inbuf) {
  947. pos += scnprintf(buf + pos, buf_size - pos,
  948. "\tIn buffer addr: unallocated\n");
  949. continue;
  950. }
  951. pos += scnprintf(buf + pos, buf_size - pos,
  952. "\tIn buffer addr 0x%pK\n", peer->inbuf);
  953. pos += scnprintf(buf + pos, buf_size - pos,
  954. "\tIn buffer size %pa\n", &peer->inbuf_size);
  955. pos += scnprintf(buf + pos, buf_size - pos,
  956. "\tIn buffer xlat %pad[p]\n", &peer->inbuf_xlat);
  957. }
  958. ret = simple_read_from_buffer(ubuf, size, offp, buf, pos);
  959. kfree(buf);
  960. return ret;
  961. }
  962. static const struct file_operations perf_dbgfs_info = {
  963. .open = simple_open,
  964. .read = perf_dbgfs_read_info
  965. };
  966. static ssize_t perf_dbgfs_read_run(struct file *filep, char __user *ubuf,
  967. size_t size, loff_t *offp)
  968. {
  969. struct perf_ctx *perf = filep->private_data;
  970. ssize_t ret, pos = 0;
  971. char *buf;
  972. buf = kmalloc(PERF_BUF_LEN, GFP_KERNEL);
  973. if (!buf)
  974. return -ENOMEM;
  975. ret = perf_read_stats(perf, buf, PERF_BUF_LEN, &pos);
  976. if (ret)
  977. goto err_free;
  978. ret = simple_read_from_buffer(ubuf, size, offp, buf, pos);
  979. err_free:
  980. kfree(buf);
  981. return ret;
  982. }
  983. static ssize_t perf_dbgfs_write_run(struct file *filep, const char __user *ubuf,
  984. size_t size, loff_t *offp)
  985. {
  986. struct perf_ctx *perf = filep->private_data;
  987. struct perf_peer *peer;
  988. int pidx, ret;
  989. ret = kstrtoint_from_user(ubuf, size, 0, &pidx);
  990. if (ret)
  991. return ret;
  992. if (pidx < 0 || pidx >= perf->pcnt)
  993. return -EINVAL;
  994. peer = &perf->peers[pidx];
  995. ret = perf_submit_test(peer);
  996. if (ret)
  997. return ret;
  998. return size;
  999. }
  1000. static const struct file_operations perf_dbgfs_run = {
  1001. .open = simple_open,
  1002. .read = perf_dbgfs_read_run,
  1003. .write = perf_dbgfs_write_run
  1004. };
  1005. static ssize_t perf_dbgfs_read_tcnt(struct file *filep, char __user *ubuf,
  1006. size_t size, loff_t *offp)
  1007. {
  1008. struct perf_ctx *perf = filep->private_data;
  1009. char buf[8];
  1010. ssize_t pos;
  1011. pos = scnprintf(buf, sizeof(buf), "%hhu\n", perf->tcnt);
  1012. return simple_read_from_buffer(ubuf, size, offp, buf, pos);
  1013. }
  1014. static ssize_t perf_dbgfs_write_tcnt(struct file *filep,
  1015. const char __user *ubuf,
  1016. size_t size, loff_t *offp)
  1017. {
  1018. struct perf_ctx *perf = filep->private_data;
  1019. int ret;
  1020. u8 val;
  1021. ret = kstrtou8_from_user(ubuf, size, 0, &val);
  1022. if (ret)
  1023. return ret;
  1024. ret = perf_set_tcnt(perf, val);
  1025. if (ret)
  1026. return ret;
  1027. return size;
  1028. }
  1029. static const struct file_operations perf_dbgfs_tcnt = {
  1030. .open = simple_open,
  1031. .read = perf_dbgfs_read_tcnt,
  1032. .write = perf_dbgfs_write_tcnt
  1033. };
  1034. static void perf_setup_dbgfs(struct perf_ctx *perf)
  1035. {
  1036. struct pci_dev *pdev = perf->ntb->pdev;
  1037. perf->dbgfs_dir = debugfs_create_dir(pci_name(pdev), perf_dbgfs_topdir);
  1038. if (!perf->dbgfs_dir) {
  1039. dev_warn(&perf->ntb->dev, "DebugFS unsupported\n");
  1040. return;
  1041. }
  1042. debugfs_create_file("info", 0600, perf->dbgfs_dir, perf,
  1043. &perf_dbgfs_info);
  1044. debugfs_create_file("run", 0600, perf->dbgfs_dir, perf,
  1045. &perf_dbgfs_run);
  1046. debugfs_create_file("threads_count", 0600, perf->dbgfs_dir, perf,
  1047. &perf_dbgfs_tcnt);
  1048. /* They are made read-only for test exec safety and integrity */
  1049. debugfs_create_u8("chunk_order", 0500, perf->dbgfs_dir, &chunk_order);
  1050. debugfs_create_u8("total_order", 0500, perf->dbgfs_dir, &total_order);
  1051. debugfs_create_bool("use_dma", 0500, perf->dbgfs_dir, &use_dma);
  1052. }
  1053. static void perf_clear_dbgfs(struct perf_ctx *perf)
  1054. {
  1055. debugfs_remove_recursive(perf->dbgfs_dir);
  1056. }
  1057. /*==============================================================================
  1058. * Basic driver initialization
  1059. *==============================================================================
  1060. */
  1061. static struct perf_ctx *perf_create_data(struct ntb_dev *ntb)
  1062. {
  1063. struct perf_ctx *perf;
  1064. perf = devm_kzalloc(&ntb->dev, sizeof(*perf), GFP_KERNEL);
  1065. if (!perf)
  1066. return ERR_PTR(-ENOMEM);
  1067. perf->pcnt = ntb_peer_port_count(ntb);
  1068. perf->peers = devm_kcalloc(&ntb->dev, perf->pcnt, sizeof(*perf->peers),
  1069. GFP_KERNEL);
  1070. if (!perf->peers)
  1071. return ERR_PTR(-ENOMEM);
  1072. perf->ntb = ntb;
  1073. return perf;
  1074. }
  1075. static int perf_setup_peer_mw(struct perf_peer *peer)
  1076. {
  1077. struct perf_ctx *perf = peer->perf;
  1078. phys_addr_t phys_addr;
  1079. int ret;
  1080. /* Get outbound MW parameters and map it */
  1081. ret = ntb_peer_mw_get_addr(perf->ntb, peer->gidx, &phys_addr,
  1082. &peer->outbuf_size);
  1083. if (ret)
  1084. return ret;
  1085. peer->outbuf = devm_ioremap_wc(&perf->ntb->dev, phys_addr,
  1086. peer->outbuf_size);
  1087. if (!peer->outbuf)
  1088. return -ENOMEM;
  1089. if (max_mw_size && peer->outbuf_size > max_mw_size) {
  1090. peer->outbuf_size = max_mw_size;
  1091. dev_warn(&peer->perf->ntb->dev,
  1092. "Peer %d outbuf reduced to %pa\n", peer->pidx,
  1093. &peer->outbuf_size);
  1094. }
  1095. return 0;
  1096. }
  1097. static int perf_init_peers(struct perf_ctx *perf)
  1098. {
  1099. struct perf_peer *peer;
  1100. int pidx, lport, ret;
  1101. lport = ntb_port_number(perf->ntb);
  1102. perf->gidx = -1;
  1103. for (pidx = 0; pidx < perf->pcnt; pidx++) {
  1104. peer = &perf->peers[pidx];
  1105. peer->perf = perf;
  1106. peer->pidx = pidx;
  1107. if (lport < ntb_peer_port_number(perf->ntb, pidx)) {
  1108. if (perf->gidx == -1)
  1109. perf->gidx = pidx;
  1110. peer->gidx = pidx + 1;
  1111. } else {
  1112. peer->gidx = pidx;
  1113. }
  1114. INIT_WORK(&peer->service, perf_service_work);
  1115. }
  1116. if (perf->gidx == -1)
  1117. perf->gidx = pidx;
  1118. for (pidx = 0; pidx < perf->pcnt; pidx++) {
  1119. ret = perf_setup_peer_mw(&perf->peers[pidx]);
  1120. if (ret)
  1121. return ret;
  1122. }
  1123. dev_dbg(&perf->ntb->dev, "Global port index %d\n", perf->gidx);
  1124. return 0;
  1125. }
  1126. static int perf_probe(struct ntb_client *client, struct ntb_dev *ntb)
  1127. {
  1128. struct perf_ctx *perf;
  1129. int ret;
  1130. perf = perf_create_data(ntb);
  1131. if (IS_ERR(perf))
  1132. return PTR_ERR(perf);
  1133. ret = perf_init_peers(perf);
  1134. if (ret)
  1135. return ret;
  1136. perf_init_threads(perf);
  1137. ret = perf_init_service(perf);
  1138. if (ret)
  1139. return ret;
  1140. ret = perf_enable_service(perf);
  1141. if (ret)
  1142. return ret;
  1143. perf_setup_dbgfs(perf);
  1144. return 0;
  1145. }
  1146. static void perf_remove(struct ntb_client *client, struct ntb_dev *ntb)
  1147. {
  1148. struct perf_ctx *perf = ntb->ctx;
  1149. perf_clear_dbgfs(perf);
  1150. perf_disable_service(perf);
  1151. perf_clear_threads(perf);
  1152. }
  1153. static struct ntb_client perf_client = {
  1154. .ops = {
  1155. .probe = perf_probe,
  1156. .remove = perf_remove
  1157. }
  1158. };
  1159. static int __init perf_init(void)
  1160. {
  1161. int ret;
  1162. if (chunk_order > MAX_CHUNK_ORDER) {
  1163. chunk_order = MAX_CHUNK_ORDER;
  1164. pr_info("Chunk order reduced to %hhu\n", chunk_order);
  1165. }
  1166. if (total_order < chunk_order) {
  1167. total_order = chunk_order;
  1168. pr_info("Total data order reduced to %hhu\n", total_order);
  1169. }
  1170. perf_wq = alloc_workqueue("perf_wq", WQ_UNBOUND | WQ_SYSFS, 0);
  1171. if (!perf_wq)
  1172. return -ENOMEM;
  1173. if (debugfs_initialized())
  1174. perf_dbgfs_topdir = debugfs_create_dir(KBUILD_MODNAME, NULL);
  1175. ret = ntb_register_client(&perf_client);
  1176. if (ret) {
  1177. debugfs_remove_recursive(perf_dbgfs_topdir);
  1178. destroy_workqueue(perf_wq);
  1179. }
  1180. return ret;
  1181. }
  1182. module_init(perf_init);
  1183. static void __exit perf_exit(void)
  1184. {
  1185. ntb_unregister_client(&perf_client);
  1186. debugfs_remove_recursive(perf_dbgfs_topdir);
  1187. destroy_workqueue(perf_wq);
  1188. }
  1189. module_exit(perf_exit);