cxgb4_ppm.c 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464
  1. /*
  2. * cxgb4_ppm.c: Chelsio common library for T4/T5 iSCSI PagePod Manager
  3. *
  4. * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved.
  5. *
  6. * This program is free software; you can redistribute it and/or modify
  7. * it under the terms of the GNU General Public License version 2 as
  8. * published by the Free Software Foundation.
  9. *
  10. * Written by: Karen Xie (kxie@chelsio.com)
  11. */
  12. #include <linux/kernel.h>
  13. #include <linux/version.h>
  14. #include <linux/module.h>
  15. #include <linux/errno.h>
  16. #include <linux/types.h>
  17. #include <linux/debugfs.h>
  18. #include <linux/export.h>
  19. #include <linux/list.h>
  20. #include <linux/skbuff.h>
  21. #include <linux/pci.h>
  22. #include <linux/scatterlist.h>
  23. #include "cxgb4_ppm.h"
  24. /* Direct Data Placement -
  25. * Directly place the iSCSI Data-In or Data-Out PDU's payload into
  26. * pre-posted final destination host-memory buffers based on the
  27. * Initiator Task Tag (ITT) in Data-In or Target Task Tag (TTT)
  28. * in Data-Out PDUs. The host memory address is programmed into
  29. * h/w in the format of pagepod entries. The location of the
  30. * pagepod entry is encoded into ddp tag which is used as the base
  31. * for ITT/TTT.
  32. */
  33. /* Direct-Data Placement page size adjustment
  34. */
  35. int cxgbi_ppm_find_page_index(struct cxgbi_ppm *ppm, unsigned long pgsz)
  36. {
  37. struct cxgbi_tag_format *tformat = &ppm->tformat;
  38. int i;
  39. for (i = 0; i < DDP_PGIDX_MAX; i++) {
  40. if (pgsz == 1UL << (DDP_PGSZ_BASE_SHIFT +
  41. tformat->pgsz_order[i])) {
  42. pr_debug("%s: %s ppm, pgsz %lu -> idx %d.\n",
  43. __func__, ppm->ndev->name, pgsz, i);
  44. return i;
  45. }
  46. }
  47. pr_info("ippm: ddp page size %lu not supported.\n", pgsz);
  48. return DDP_PGIDX_MAX;
  49. }
  50. /* DDP setup & teardown
  51. */
  52. static int ppm_find_unused_entries(unsigned long *bmap,
  53. unsigned int max_ppods,
  54. unsigned int start,
  55. unsigned int nr,
  56. unsigned int align_mask)
  57. {
  58. unsigned long i;
  59. i = bitmap_find_next_zero_area(bmap, max_ppods, start, nr, align_mask);
  60. if (unlikely(i >= max_ppods) && (start > nr))
  61. i = bitmap_find_next_zero_area(bmap, max_ppods, 0, start - 1,
  62. align_mask);
  63. if (unlikely(i >= max_ppods))
  64. return -ENOSPC;
  65. bitmap_set(bmap, i, nr);
  66. return (int)i;
  67. }
  68. static void ppm_mark_entries(struct cxgbi_ppm *ppm, int i, int count,
  69. unsigned long caller_data)
  70. {
  71. struct cxgbi_ppod_data *pdata = ppm->ppod_data + i;
  72. pdata->caller_data = caller_data;
  73. pdata->npods = count;
  74. if (pdata->color == ((1 << PPOD_IDX_SHIFT) - 1))
  75. pdata->color = 0;
  76. else
  77. pdata->color++;
  78. }
  79. static int ppm_get_cpu_entries(struct cxgbi_ppm *ppm, unsigned int count,
  80. unsigned long caller_data)
  81. {
  82. struct cxgbi_ppm_pool *pool;
  83. unsigned int cpu;
  84. int i;
  85. cpu = get_cpu();
  86. pool = per_cpu_ptr(ppm->pool, cpu);
  87. spin_lock_bh(&pool->lock);
  88. put_cpu();
  89. i = ppm_find_unused_entries(pool->bmap, ppm->pool_index_max,
  90. pool->next, count, 0);
  91. if (i < 0) {
  92. pool->next = 0;
  93. spin_unlock_bh(&pool->lock);
  94. return -ENOSPC;
  95. }
  96. pool->next = i + count;
  97. if (pool->next >= ppm->pool_index_max)
  98. pool->next = 0;
  99. spin_unlock_bh(&pool->lock);
  100. pr_debug("%s: cpu %u, idx %d + %d (%d), next %u.\n",
  101. __func__, cpu, i, count, i + cpu * ppm->pool_index_max,
  102. pool->next);
  103. i += cpu * ppm->pool_index_max;
  104. ppm_mark_entries(ppm, i, count, caller_data);
  105. return i;
  106. }
  107. static int ppm_get_entries(struct cxgbi_ppm *ppm, unsigned int count,
  108. unsigned long caller_data)
  109. {
  110. int i;
  111. spin_lock_bh(&ppm->map_lock);
  112. i = ppm_find_unused_entries(ppm->ppod_bmap, ppm->bmap_index_max,
  113. ppm->next, count, 0);
  114. if (i < 0) {
  115. ppm->next = 0;
  116. spin_unlock_bh(&ppm->map_lock);
  117. pr_debug("ippm: NO suitable entries %u available.\n",
  118. count);
  119. return -ENOSPC;
  120. }
  121. ppm->next = i + count;
  122. if (ppm->next >= ppm->bmap_index_max)
  123. ppm->next = 0;
  124. spin_unlock_bh(&ppm->map_lock);
  125. pr_debug("%s: idx %d + %d (%d), next %u, caller_data 0x%lx.\n",
  126. __func__, i, count, i + ppm->pool_rsvd, ppm->next,
  127. caller_data);
  128. i += ppm->pool_rsvd;
  129. ppm_mark_entries(ppm, i, count, caller_data);
  130. return i;
  131. }
  132. static void ppm_unmark_entries(struct cxgbi_ppm *ppm, int i, int count)
  133. {
  134. pr_debug("%s: idx %d + %d.\n", __func__, i, count);
  135. if (i < ppm->pool_rsvd) {
  136. unsigned int cpu;
  137. struct cxgbi_ppm_pool *pool;
  138. cpu = i / ppm->pool_index_max;
  139. i %= ppm->pool_index_max;
  140. pool = per_cpu_ptr(ppm->pool, cpu);
  141. spin_lock_bh(&pool->lock);
  142. bitmap_clear(pool->bmap, i, count);
  143. if (i < pool->next)
  144. pool->next = i;
  145. spin_unlock_bh(&pool->lock);
  146. pr_debug("%s: cpu %u, idx %d, next %u.\n",
  147. __func__, cpu, i, pool->next);
  148. } else {
  149. spin_lock_bh(&ppm->map_lock);
  150. i -= ppm->pool_rsvd;
  151. bitmap_clear(ppm->ppod_bmap, i, count);
  152. if (i < ppm->next)
  153. ppm->next = i;
  154. spin_unlock_bh(&ppm->map_lock);
  155. pr_debug("%s: idx %d, next %u.\n", __func__, i, ppm->next);
  156. }
  157. }
  158. void cxgbi_ppm_ppod_release(struct cxgbi_ppm *ppm, u32 idx)
  159. {
  160. struct cxgbi_ppod_data *pdata;
  161. if (idx >= ppm->ppmax) {
  162. pr_warn("ippm: idx too big %u > %u.\n", idx, ppm->ppmax);
  163. return;
  164. }
  165. pdata = ppm->ppod_data + idx;
  166. if (!pdata->npods) {
  167. pr_warn("ippm: idx %u, npods 0.\n", idx);
  168. return;
  169. }
  170. pr_debug("release idx %u, npods %u.\n", idx, pdata->npods);
  171. ppm_unmark_entries(ppm, idx, pdata->npods);
  172. }
  173. EXPORT_SYMBOL(cxgbi_ppm_ppod_release);
  174. int cxgbi_ppm_ppods_reserve(struct cxgbi_ppm *ppm, unsigned short nr_pages,
  175. u32 per_tag_pg_idx, u32 *ppod_idx,
  176. u32 *ddp_tag, unsigned long caller_data)
  177. {
  178. struct cxgbi_ppod_data *pdata;
  179. unsigned int npods;
  180. int idx = -1;
  181. unsigned int hwidx;
  182. u32 tag;
  183. npods = (nr_pages + PPOD_PAGES_MAX - 1) >> PPOD_PAGES_SHIFT;
  184. if (!npods) {
  185. pr_warn("%s: pages %u -> npods %u, full.\n",
  186. __func__, nr_pages, npods);
  187. return -EINVAL;
  188. }
  189. /* grab from cpu pool first */
  190. idx = ppm_get_cpu_entries(ppm, npods, caller_data);
  191. /* try the general pool */
  192. if (idx < 0)
  193. idx = ppm_get_entries(ppm, npods, caller_data);
  194. if (idx < 0) {
  195. pr_debug("ippm: pages %u, nospc %u, nxt %u, 0x%lx.\n",
  196. nr_pages, npods, ppm->next, caller_data);
  197. return idx;
  198. }
  199. pdata = ppm->ppod_data + idx;
  200. hwidx = ppm->base_idx + idx;
  201. tag = cxgbi_ppm_make_ddp_tag(hwidx, pdata->color);
  202. if (per_tag_pg_idx)
  203. tag |= (per_tag_pg_idx << 30) & 0xC0000000;
  204. *ppod_idx = idx;
  205. *ddp_tag = tag;
  206. pr_debug("ippm: sg %u, tag 0x%x(%u,%u), data 0x%lx.\n",
  207. nr_pages, tag, idx, npods, caller_data);
  208. return npods;
  209. }
  210. EXPORT_SYMBOL(cxgbi_ppm_ppods_reserve);
  211. void cxgbi_ppm_make_ppod_hdr(struct cxgbi_ppm *ppm, u32 tag,
  212. unsigned int tid, unsigned int offset,
  213. unsigned int length,
  214. struct cxgbi_pagepod_hdr *hdr)
  215. {
  216. /* The ddp tag in pagepod should be with bit 31:30 set to 0.
  217. * The ddp Tag on the wire should be with non-zero 31:30 to the peer
  218. */
  219. tag &= 0x3FFFFFFF;
  220. hdr->vld_tid = htonl(PPOD_VALID_FLAG | PPOD_TID(tid));
  221. hdr->rsvd = 0;
  222. hdr->pgsz_tag_clr = htonl(tag & ppm->tformat.idx_clr_mask);
  223. hdr->max_offset = htonl(length);
  224. hdr->page_offset = htonl(offset);
  225. pr_debug("ippm: tag 0x%x, tid 0x%x, xfer %u, off %u.\n",
  226. tag, tid, length, offset);
  227. }
  228. EXPORT_SYMBOL(cxgbi_ppm_make_ppod_hdr);
  229. static void ppm_free(struct cxgbi_ppm *ppm)
  230. {
  231. vfree(ppm);
  232. }
  233. static void ppm_destroy(struct kref *kref)
  234. {
  235. struct cxgbi_ppm *ppm = container_of(kref,
  236. struct cxgbi_ppm,
  237. refcnt);
  238. pr_info("ippm: kref 0, destroy %s ppm 0x%p.\n",
  239. ppm->ndev->name, ppm);
  240. *ppm->ppm_pp = NULL;
  241. free_percpu(ppm->pool);
  242. ppm_free(ppm);
  243. }
  244. int cxgbi_ppm_release(struct cxgbi_ppm *ppm)
  245. {
  246. if (ppm) {
  247. int rv;
  248. rv = kref_put(&ppm->refcnt, ppm_destroy);
  249. return rv;
  250. }
  251. return 1;
  252. }
  253. static struct cxgbi_ppm_pool *ppm_alloc_cpu_pool(unsigned int *total,
  254. unsigned int *pcpu_ppmax)
  255. {
  256. struct cxgbi_ppm_pool *pools;
  257. unsigned int ppmax = (*total) / num_possible_cpus();
  258. unsigned int max = (PCPU_MIN_UNIT_SIZE - sizeof(*pools)) << 3;
  259. unsigned int bmap;
  260. unsigned int alloc_sz;
  261. unsigned int count = 0;
  262. unsigned int cpu;
  263. /* make sure per cpu pool fits into PCPU_MIN_UNIT_SIZE */
  264. if (ppmax > max)
  265. ppmax = max;
  266. /* pool size must be multiple of unsigned long */
  267. bmap = BITS_TO_LONGS(ppmax);
  268. ppmax = (bmap * sizeof(unsigned long)) << 3;
  269. alloc_sz = sizeof(*pools) + sizeof(unsigned long) * bmap;
  270. pools = __alloc_percpu(alloc_sz, __alignof__(struct cxgbi_ppm_pool));
  271. if (!pools)
  272. return NULL;
  273. for_each_possible_cpu(cpu) {
  274. struct cxgbi_ppm_pool *ppool = per_cpu_ptr(pools, cpu);
  275. memset(ppool, 0, alloc_sz);
  276. spin_lock_init(&ppool->lock);
  277. count += ppmax;
  278. }
  279. *total = count;
  280. *pcpu_ppmax = ppmax;
  281. return pools;
  282. }
  283. int cxgbi_ppm_init(void **ppm_pp, struct net_device *ndev,
  284. struct pci_dev *pdev, void *lldev,
  285. struct cxgbi_tag_format *tformat,
  286. unsigned int ppmax,
  287. unsigned int llimit,
  288. unsigned int start,
  289. unsigned int reserve_factor)
  290. {
  291. struct cxgbi_ppm *ppm = (struct cxgbi_ppm *)(*ppm_pp);
  292. struct cxgbi_ppm_pool *pool = NULL;
  293. unsigned int ppmax_pool = 0;
  294. unsigned int pool_index_max = 0;
  295. unsigned int alloc_sz;
  296. unsigned int ppod_bmap_size;
  297. if (ppm) {
  298. pr_info("ippm: %s, ppm 0x%p,0x%p already initialized, %u/%u.\n",
  299. ndev->name, ppm_pp, ppm, ppm->ppmax, ppmax);
  300. kref_get(&ppm->refcnt);
  301. return 1;
  302. }
  303. if (reserve_factor) {
  304. ppmax_pool = ppmax / reserve_factor;
  305. pool = ppm_alloc_cpu_pool(&ppmax_pool, &pool_index_max);
  306. pr_debug("%s: ppmax %u, cpu total %u, per cpu %u.\n",
  307. ndev->name, ppmax, ppmax_pool, pool_index_max);
  308. }
  309. ppod_bmap_size = BITS_TO_LONGS(ppmax - ppmax_pool);
  310. alloc_sz = sizeof(struct cxgbi_ppm) +
  311. ppmax * (sizeof(struct cxgbi_ppod_data)) +
  312. ppod_bmap_size * sizeof(unsigned long);
  313. ppm = vmalloc(alloc_sz);
  314. if (!ppm)
  315. goto release_ppm_pool;
  316. memset(ppm, 0, alloc_sz);
  317. ppm->ppod_bmap = (unsigned long *)(&ppm->ppod_data[ppmax]);
  318. if ((ppod_bmap_size >> 3) > (ppmax - ppmax_pool)) {
  319. unsigned int start = ppmax - ppmax_pool;
  320. unsigned int end = ppod_bmap_size >> 3;
  321. bitmap_set(ppm->ppod_bmap, ppmax, end - start);
  322. pr_info("%s: %u - %u < %u * 8, mask extra bits %u, %u.\n",
  323. __func__, ppmax, ppmax_pool, ppod_bmap_size, start,
  324. end);
  325. }
  326. spin_lock_init(&ppm->map_lock);
  327. kref_init(&ppm->refcnt);
  328. memcpy(&ppm->tformat, tformat, sizeof(struct cxgbi_tag_format));
  329. ppm->ppm_pp = ppm_pp;
  330. ppm->ndev = ndev;
  331. ppm->pdev = pdev;
  332. ppm->lldev = lldev;
  333. ppm->ppmax = ppmax;
  334. ppm->next = 0;
  335. ppm->llimit = llimit;
  336. ppm->base_idx = start > llimit ?
  337. (start - llimit + 1) >> PPOD_SIZE_SHIFT : 0;
  338. ppm->bmap_index_max = ppmax - ppmax_pool;
  339. ppm->pool = pool;
  340. ppm->pool_rsvd = ppmax_pool;
  341. ppm->pool_index_max = pool_index_max;
  342. /* check one more time */
  343. if (*ppm_pp) {
  344. ppm_free(ppm);
  345. ppm = (struct cxgbi_ppm *)(*ppm_pp);
  346. pr_info("ippm: %s, ppm 0x%p,0x%p already initialized, %u/%u.\n",
  347. ndev->name, ppm_pp, *ppm_pp, ppm->ppmax, ppmax);
  348. kref_get(&ppm->refcnt);
  349. return 1;
  350. }
  351. *ppm_pp = ppm;
  352. ppm->tformat.pgsz_idx_dflt = cxgbi_ppm_find_page_index(ppm, PAGE_SIZE);
  353. pr_info("ippm %s: ppm 0x%p, 0x%p, base %u/%u, pg %lu,%u, rsvd %u,%u.\n",
  354. ndev->name, ppm_pp, ppm, ppm->base_idx, ppm->ppmax, PAGE_SIZE,
  355. ppm->tformat.pgsz_idx_dflt, ppm->pool_rsvd,
  356. ppm->pool_index_max);
  357. return 0;
  358. release_ppm_pool:
  359. free_percpu(pool);
  360. return -ENOMEM;
  361. }
  362. EXPORT_SYMBOL(cxgbi_ppm_init);
  363. unsigned int cxgbi_tagmask_set(unsigned int ppmax)
  364. {
  365. unsigned int bits = fls(ppmax);
  366. if (bits > PPOD_IDX_MAX_SIZE)
  367. bits = PPOD_IDX_MAX_SIZE;
  368. pr_info("ippm: ppmax %u/0x%x -> bits %u, tagmask 0x%x.\n",
  369. ppmax, ppmax, bits, 1 << (bits + PPOD_IDX_SHIFT));
  370. return 1 << (bits + PPOD_IDX_SHIFT);
  371. }