iov_iter.c 29 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222
  1. #include <linux/export.h>
  2. #include <linux/uio.h>
  3. #include <linux/pagemap.h>
  4. #include <linux/slab.h>
  5. #include <linux/vmalloc.h>
  6. #include <linux/splice.h>
  7. #include <net/checksum.h>
  8. #define PIPE_PARANOIA /* for now */
  9. #define iterate_iovec(i, n, __v, __p, skip, STEP) { \
  10. size_t left; \
  11. size_t wanted = n; \
  12. __p = i->iov; \
  13. __v.iov_len = min(n, __p->iov_len - skip); \
  14. if (likely(__v.iov_len)) { \
  15. __v.iov_base = __p->iov_base + skip; \
  16. left = (STEP); \
  17. __v.iov_len -= left; \
  18. skip += __v.iov_len; \
  19. n -= __v.iov_len; \
  20. } else { \
  21. left = 0; \
  22. } \
  23. while (unlikely(!left && n)) { \
  24. __p++; \
  25. __v.iov_len = min(n, __p->iov_len); \
  26. if (unlikely(!__v.iov_len)) \
  27. continue; \
  28. __v.iov_base = __p->iov_base; \
  29. left = (STEP); \
  30. __v.iov_len -= left; \
  31. skip = __v.iov_len; \
  32. n -= __v.iov_len; \
  33. } \
  34. n = wanted - n; \
  35. }
  36. #define iterate_kvec(i, n, __v, __p, skip, STEP) { \
  37. size_t wanted = n; \
  38. __p = i->kvec; \
  39. __v.iov_len = min(n, __p->iov_len - skip); \
  40. if (likely(__v.iov_len)) { \
  41. __v.iov_base = __p->iov_base + skip; \
  42. (void)(STEP); \
  43. skip += __v.iov_len; \
  44. n -= __v.iov_len; \
  45. } \
  46. while (unlikely(n)) { \
  47. __p++; \
  48. __v.iov_len = min(n, __p->iov_len); \
  49. if (unlikely(!__v.iov_len)) \
  50. continue; \
  51. __v.iov_base = __p->iov_base; \
  52. (void)(STEP); \
  53. skip = __v.iov_len; \
  54. n -= __v.iov_len; \
  55. } \
  56. n = wanted; \
  57. }
  58. #define iterate_bvec(i, n, __v, __bi, skip, STEP) { \
  59. struct bvec_iter __start; \
  60. __start.bi_size = n; \
  61. __start.bi_bvec_done = skip; \
  62. __start.bi_idx = 0; \
  63. for_each_bvec(__v, i->bvec, __bi, __start) { \
  64. if (!__v.bv_len) \
  65. continue; \
  66. (void)(STEP); \
  67. } \
  68. }
  69. #define iterate_all_kinds(i, n, v, I, B, K) { \
  70. size_t skip = i->iov_offset; \
  71. if (unlikely(i->type & ITER_BVEC)) { \
  72. struct bio_vec v; \
  73. struct bvec_iter __bi; \
  74. iterate_bvec(i, n, v, __bi, skip, (B)) \
  75. } else if (unlikely(i->type & ITER_KVEC)) { \
  76. const struct kvec *kvec; \
  77. struct kvec v; \
  78. iterate_kvec(i, n, v, kvec, skip, (K)) \
  79. } else { \
  80. const struct iovec *iov; \
  81. struct iovec v; \
  82. iterate_iovec(i, n, v, iov, skip, (I)) \
  83. } \
  84. }
  85. #define iterate_and_advance(i, n, v, I, B, K) { \
  86. if (unlikely(i->count < n)) \
  87. n = i->count; \
  88. if (i->count) { \
  89. size_t skip = i->iov_offset; \
  90. if (unlikely(i->type & ITER_BVEC)) { \
  91. const struct bio_vec *bvec = i->bvec; \
  92. struct bio_vec v; \
  93. struct bvec_iter __bi; \
  94. iterate_bvec(i, n, v, __bi, skip, (B)) \
  95. i->bvec = __bvec_iter_bvec(i->bvec, __bi); \
  96. i->nr_segs -= i->bvec - bvec; \
  97. skip = __bi.bi_bvec_done; \
  98. } else if (unlikely(i->type & ITER_KVEC)) { \
  99. const struct kvec *kvec; \
  100. struct kvec v; \
  101. iterate_kvec(i, n, v, kvec, skip, (K)) \
  102. if (skip == kvec->iov_len) { \
  103. kvec++; \
  104. skip = 0; \
  105. } \
  106. i->nr_segs -= kvec - i->kvec; \
  107. i->kvec = kvec; \
  108. } else { \
  109. const struct iovec *iov; \
  110. struct iovec v; \
  111. iterate_iovec(i, n, v, iov, skip, (I)) \
  112. if (skip == iov->iov_len) { \
  113. iov++; \
  114. skip = 0; \
  115. } \
  116. i->nr_segs -= iov - i->iov; \
  117. i->iov = iov; \
  118. } \
  119. i->count -= n; \
  120. i->iov_offset = skip; \
  121. } \
  122. }
  123. static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
  124. struct iov_iter *i)
  125. {
  126. size_t skip, copy, left, wanted;
  127. const struct iovec *iov;
  128. char __user *buf;
  129. void *kaddr, *from;
  130. if (unlikely(bytes > i->count))
  131. bytes = i->count;
  132. if (unlikely(!bytes))
  133. return 0;
  134. wanted = bytes;
  135. iov = i->iov;
  136. skip = i->iov_offset;
  137. buf = iov->iov_base + skip;
  138. copy = min(bytes, iov->iov_len - skip);
  139. if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) {
  140. kaddr = kmap_atomic(page);
  141. from = kaddr + offset;
  142. /* first chunk, usually the only one */
  143. left = __copy_to_user_inatomic(buf, from, copy);
  144. copy -= left;
  145. skip += copy;
  146. from += copy;
  147. bytes -= copy;
  148. while (unlikely(!left && bytes)) {
  149. iov++;
  150. buf = iov->iov_base;
  151. copy = min(bytes, iov->iov_len);
  152. left = __copy_to_user_inatomic(buf, from, copy);
  153. copy -= left;
  154. skip = copy;
  155. from += copy;
  156. bytes -= copy;
  157. }
  158. if (likely(!bytes)) {
  159. kunmap_atomic(kaddr);
  160. goto done;
  161. }
  162. offset = from - kaddr;
  163. buf += copy;
  164. kunmap_atomic(kaddr);
  165. copy = min(bytes, iov->iov_len - skip);
  166. }
  167. /* Too bad - revert to non-atomic kmap */
  168. kaddr = kmap(page);
  169. from = kaddr + offset;
  170. left = __copy_to_user(buf, from, copy);
  171. copy -= left;
  172. skip += copy;
  173. from += copy;
  174. bytes -= copy;
  175. while (unlikely(!left && bytes)) {
  176. iov++;
  177. buf = iov->iov_base;
  178. copy = min(bytes, iov->iov_len);
  179. left = __copy_to_user(buf, from, copy);
  180. copy -= left;
  181. skip = copy;
  182. from += copy;
  183. bytes -= copy;
  184. }
  185. kunmap(page);
  186. done:
  187. if (skip == iov->iov_len) {
  188. iov++;
  189. skip = 0;
  190. }
  191. i->count -= wanted - bytes;
  192. i->nr_segs -= iov - i->iov;
  193. i->iov = iov;
  194. i->iov_offset = skip;
  195. return wanted - bytes;
  196. }
  197. static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes,
  198. struct iov_iter *i)
  199. {
  200. size_t skip, copy, left, wanted;
  201. const struct iovec *iov;
  202. char __user *buf;
  203. void *kaddr, *to;
  204. if (unlikely(bytes > i->count))
  205. bytes = i->count;
  206. if (unlikely(!bytes))
  207. return 0;
  208. wanted = bytes;
  209. iov = i->iov;
  210. skip = i->iov_offset;
  211. buf = iov->iov_base + skip;
  212. copy = min(bytes, iov->iov_len - skip);
  213. if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) {
  214. kaddr = kmap_atomic(page);
  215. to = kaddr + offset;
  216. /* first chunk, usually the only one */
  217. left = __copy_from_user_inatomic(to, buf, copy);
  218. copy -= left;
  219. skip += copy;
  220. to += copy;
  221. bytes -= copy;
  222. while (unlikely(!left && bytes)) {
  223. iov++;
  224. buf = iov->iov_base;
  225. copy = min(bytes, iov->iov_len);
  226. left = __copy_from_user_inatomic(to, buf, copy);
  227. copy -= left;
  228. skip = copy;
  229. to += copy;
  230. bytes -= copy;
  231. }
  232. if (likely(!bytes)) {
  233. kunmap_atomic(kaddr);
  234. goto done;
  235. }
  236. offset = to - kaddr;
  237. buf += copy;
  238. kunmap_atomic(kaddr);
  239. copy = min(bytes, iov->iov_len - skip);
  240. }
  241. /* Too bad - revert to non-atomic kmap */
  242. kaddr = kmap(page);
  243. to = kaddr + offset;
  244. left = __copy_from_user(to, buf, copy);
  245. copy -= left;
  246. skip += copy;
  247. to += copy;
  248. bytes -= copy;
  249. while (unlikely(!left && bytes)) {
  250. iov++;
  251. buf = iov->iov_base;
  252. copy = min(bytes, iov->iov_len);
  253. left = __copy_from_user(to, buf, copy);
  254. copy -= left;
  255. skip = copy;
  256. to += copy;
  257. bytes -= copy;
  258. }
  259. kunmap(page);
  260. done:
  261. if (skip == iov->iov_len) {
  262. iov++;
  263. skip = 0;
  264. }
  265. i->count -= wanted - bytes;
  266. i->nr_segs -= iov - i->iov;
  267. i->iov = iov;
  268. i->iov_offset = skip;
  269. return wanted - bytes;
  270. }
  271. #ifdef PIPE_PARANOIA
  272. static bool sanity(const struct iov_iter *i)
  273. {
  274. struct pipe_inode_info *pipe = i->pipe;
  275. int idx = i->idx;
  276. int next = pipe->curbuf + pipe->nrbufs;
  277. if (i->iov_offset) {
  278. struct pipe_buffer *p;
  279. if (unlikely(!pipe->nrbufs))
  280. goto Bad; // pipe must be non-empty
  281. if (unlikely(idx != ((next - 1) & (pipe->buffers - 1))))
  282. goto Bad; // must be at the last buffer...
  283. p = &pipe->bufs[idx];
  284. if (unlikely(p->offset + p->len != i->iov_offset))
  285. goto Bad; // ... at the end of segment
  286. } else {
  287. if (idx != (next & (pipe->buffers - 1)))
  288. goto Bad; // must be right after the last buffer
  289. }
  290. return true;
  291. Bad:
  292. printk(KERN_ERR "idx = %d, offset = %zd\n", i->idx, i->iov_offset);
  293. printk(KERN_ERR "curbuf = %d, nrbufs = %d, buffers = %d\n",
  294. pipe->curbuf, pipe->nrbufs, pipe->buffers);
  295. for (idx = 0; idx < pipe->buffers; idx++)
  296. printk(KERN_ERR "[%p %p %d %d]\n",
  297. pipe->bufs[idx].ops,
  298. pipe->bufs[idx].page,
  299. pipe->bufs[idx].offset,
  300. pipe->bufs[idx].len);
  301. WARN_ON(1);
  302. return false;
  303. }
  304. #else
  305. #define sanity(i) true
  306. #endif
  307. static inline int next_idx(int idx, struct pipe_inode_info *pipe)
  308. {
  309. return (idx + 1) & (pipe->buffers - 1);
  310. }
  311. static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes,
  312. struct iov_iter *i)
  313. {
  314. struct pipe_inode_info *pipe = i->pipe;
  315. struct pipe_buffer *buf;
  316. size_t off;
  317. int idx;
  318. if (unlikely(bytes > i->count))
  319. bytes = i->count;
  320. if (unlikely(!bytes))
  321. return 0;
  322. if (!sanity(i))
  323. return 0;
  324. off = i->iov_offset;
  325. idx = i->idx;
  326. buf = &pipe->bufs[idx];
  327. if (off) {
  328. if (offset == off && buf->page == page) {
  329. /* merge with the last one */
  330. buf->len += bytes;
  331. i->iov_offset += bytes;
  332. goto out;
  333. }
  334. idx = next_idx(idx, pipe);
  335. buf = &pipe->bufs[idx];
  336. }
  337. if (idx == pipe->curbuf && pipe->nrbufs)
  338. return 0;
  339. pipe->nrbufs++;
  340. buf->ops = &page_cache_pipe_buf_ops;
  341. get_page(buf->page = page);
  342. buf->offset = offset;
  343. buf->len = bytes;
  344. i->iov_offset = offset + bytes;
  345. i->idx = idx;
  346. out:
  347. i->count -= bytes;
  348. return bytes;
  349. }
  350. /*
  351. * Fault in one or more iovecs of the given iov_iter, to a maximum length of
  352. * bytes. For each iovec, fault in each page that constitutes the iovec.
  353. *
  354. * Return 0 on success, or non-zero if the memory could not be accessed (i.e.
  355. * because it is an invalid address).
  356. */
  357. int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
  358. {
  359. size_t skip = i->iov_offset;
  360. const struct iovec *iov;
  361. int err;
  362. struct iovec v;
  363. if (!(i->type & (ITER_BVEC|ITER_KVEC))) {
  364. iterate_iovec(i, bytes, v, iov, skip, ({
  365. err = fault_in_pages_readable(v.iov_base, v.iov_len);
  366. if (unlikely(err))
  367. return err;
  368. 0;}))
  369. }
  370. return 0;
  371. }
  372. EXPORT_SYMBOL(iov_iter_fault_in_readable);
  373. void iov_iter_init(struct iov_iter *i, int direction,
  374. const struct iovec *iov, unsigned long nr_segs,
  375. size_t count)
  376. {
  377. /* It will get better. Eventually... */
  378. if (segment_eq(get_fs(), KERNEL_DS)) {
  379. direction |= ITER_KVEC;
  380. i->type = direction;
  381. i->kvec = (struct kvec *)iov;
  382. } else {
  383. i->type = direction;
  384. i->iov = iov;
  385. }
  386. i->nr_segs = nr_segs;
  387. i->iov_offset = 0;
  388. i->count = count;
  389. }
  390. EXPORT_SYMBOL(iov_iter_init);
  391. static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len)
  392. {
  393. char *from = kmap_atomic(page);
  394. memcpy(to, from + offset, len);
  395. kunmap_atomic(from);
  396. }
  397. static void memcpy_to_page(struct page *page, size_t offset, const char *from, size_t len)
  398. {
  399. char *to = kmap_atomic(page);
  400. memcpy(to + offset, from, len);
  401. kunmap_atomic(to);
  402. }
  403. static void memzero_page(struct page *page, size_t offset, size_t len)
  404. {
  405. char *addr = kmap_atomic(page);
  406. memset(addr + offset, 0, len);
  407. kunmap_atomic(addr);
  408. }
  409. static inline bool allocated(struct pipe_buffer *buf)
  410. {
  411. return buf->ops == &default_pipe_buf_ops;
  412. }
  413. static inline void data_start(const struct iov_iter *i, int *idxp, size_t *offp)
  414. {
  415. size_t off = i->iov_offset;
  416. int idx = i->idx;
  417. if (off && (!allocated(&i->pipe->bufs[idx]) || off == PAGE_SIZE)) {
  418. idx = next_idx(idx, i->pipe);
  419. off = 0;
  420. }
  421. *idxp = idx;
  422. *offp = off;
  423. }
  424. static size_t push_pipe(struct iov_iter *i, size_t size,
  425. int *idxp, size_t *offp)
  426. {
  427. struct pipe_inode_info *pipe = i->pipe;
  428. size_t off;
  429. int idx;
  430. ssize_t left;
  431. if (unlikely(size > i->count))
  432. size = i->count;
  433. if (unlikely(!size))
  434. return 0;
  435. left = size;
  436. data_start(i, &idx, &off);
  437. *idxp = idx;
  438. *offp = off;
  439. if (off) {
  440. left -= PAGE_SIZE - off;
  441. if (left <= 0) {
  442. pipe->bufs[idx].len += size;
  443. return size;
  444. }
  445. pipe->bufs[idx].len = PAGE_SIZE;
  446. idx = next_idx(idx, pipe);
  447. }
  448. while (idx != pipe->curbuf || !pipe->nrbufs) {
  449. struct page *page = alloc_page(GFP_USER);
  450. if (!page)
  451. break;
  452. pipe->nrbufs++;
  453. pipe->bufs[idx].ops = &default_pipe_buf_ops;
  454. pipe->bufs[idx].page = page;
  455. pipe->bufs[idx].offset = 0;
  456. if (left <= PAGE_SIZE) {
  457. pipe->bufs[idx].len = left;
  458. return size;
  459. }
  460. pipe->bufs[idx].len = PAGE_SIZE;
  461. left -= PAGE_SIZE;
  462. idx = next_idx(idx, pipe);
  463. }
  464. return size - left;
  465. }
  466. static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
  467. struct iov_iter *i)
  468. {
  469. struct pipe_inode_info *pipe = i->pipe;
  470. size_t n, off;
  471. int idx;
  472. if (!sanity(i))
  473. return 0;
  474. bytes = n = push_pipe(i, bytes, &idx, &off);
  475. if (unlikely(!n))
  476. return 0;
  477. for ( ; n; idx = next_idx(idx, pipe), off = 0) {
  478. size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
  479. memcpy_to_page(pipe->bufs[idx].page, off, addr, chunk);
  480. i->idx = idx;
  481. i->iov_offset = off + chunk;
  482. n -= chunk;
  483. addr += chunk;
  484. }
  485. i->count -= bytes;
  486. return bytes;
  487. }
  488. size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
  489. {
  490. const char *from = addr;
  491. if (unlikely(i->type & ITER_PIPE))
  492. return copy_pipe_to_iter(addr, bytes, i);
  493. iterate_and_advance(i, bytes, v,
  494. __copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len,
  495. v.iov_len),
  496. memcpy_to_page(v.bv_page, v.bv_offset,
  497. (from += v.bv_len) - v.bv_len, v.bv_len),
  498. memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len)
  499. )
  500. return bytes;
  501. }
  502. EXPORT_SYMBOL(copy_to_iter);
  503. size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
  504. {
  505. char *to = addr;
  506. if (unlikely(i->type & ITER_PIPE)) {
  507. WARN_ON(1);
  508. return 0;
  509. }
  510. iterate_and_advance(i, bytes, v,
  511. __copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base,
  512. v.iov_len),
  513. memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
  514. v.bv_offset, v.bv_len),
  515. memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
  516. )
  517. return bytes;
  518. }
  519. EXPORT_SYMBOL(copy_from_iter);
  520. size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
  521. {
  522. char *to = addr;
  523. if (unlikely(i->type & ITER_PIPE)) {
  524. WARN_ON(1);
  525. return 0;
  526. }
  527. iterate_and_advance(i, bytes, v,
  528. __copy_from_user_nocache((to += v.iov_len) - v.iov_len,
  529. v.iov_base, v.iov_len),
  530. memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
  531. v.bv_offset, v.bv_len),
  532. memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
  533. )
  534. return bytes;
  535. }
  536. EXPORT_SYMBOL(copy_from_iter_nocache);
  537. size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
  538. struct iov_iter *i)
  539. {
  540. if (i->type & (ITER_BVEC|ITER_KVEC)) {
  541. void *kaddr = kmap_atomic(page);
  542. size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
  543. kunmap_atomic(kaddr);
  544. return wanted;
  545. } else if (likely(!(i->type & ITER_PIPE)))
  546. return copy_page_to_iter_iovec(page, offset, bytes, i);
  547. else
  548. return copy_page_to_iter_pipe(page, offset, bytes, i);
  549. }
  550. EXPORT_SYMBOL(copy_page_to_iter);
  551. size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
  552. struct iov_iter *i)
  553. {
  554. if (unlikely(i->type & ITER_PIPE)) {
  555. WARN_ON(1);
  556. return 0;
  557. }
  558. if (i->type & (ITER_BVEC|ITER_KVEC)) {
  559. void *kaddr = kmap_atomic(page);
  560. size_t wanted = copy_from_iter(kaddr + offset, bytes, i);
  561. kunmap_atomic(kaddr);
  562. return wanted;
  563. } else
  564. return copy_page_from_iter_iovec(page, offset, bytes, i);
  565. }
  566. EXPORT_SYMBOL(copy_page_from_iter);
  567. static size_t pipe_zero(size_t bytes, struct iov_iter *i)
  568. {
  569. struct pipe_inode_info *pipe = i->pipe;
  570. size_t n, off;
  571. int idx;
  572. if (!sanity(i))
  573. return 0;
  574. bytes = n = push_pipe(i, bytes, &idx, &off);
  575. if (unlikely(!n))
  576. return 0;
  577. for ( ; n; idx = next_idx(idx, pipe), off = 0) {
  578. size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
  579. memzero_page(pipe->bufs[idx].page, off, chunk);
  580. i->idx = idx;
  581. i->iov_offset = off + chunk;
  582. n -= chunk;
  583. }
  584. i->count -= bytes;
  585. return bytes;
  586. }
  587. size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
  588. {
  589. if (unlikely(i->type & ITER_PIPE))
  590. return pipe_zero(bytes, i);
  591. iterate_and_advance(i, bytes, v,
  592. __clear_user(v.iov_base, v.iov_len),
  593. memzero_page(v.bv_page, v.bv_offset, v.bv_len),
  594. memset(v.iov_base, 0, v.iov_len)
  595. )
  596. return bytes;
  597. }
  598. EXPORT_SYMBOL(iov_iter_zero);
  599. size_t iov_iter_copy_from_user_atomic(struct page *page,
  600. struct iov_iter *i, unsigned long offset, size_t bytes)
  601. {
  602. char *kaddr = kmap_atomic(page), *p = kaddr + offset;
  603. if (unlikely(i->type & ITER_PIPE)) {
  604. kunmap_atomic(kaddr);
  605. WARN_ON(1);
  606. return 0;
  607. }
  608. iterate_all_kinds(i, bytes, v,
  609. __copy_from_user_inatomic((p += v.iov_len) - v.iov_len,
  610. v.iov_base, v.iov_len),
  611. memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
  612. v.bv_offset, v.bv_len),
  613. memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
  614. )
  615. kunmap_atomic(kaddr);
  616. return bytes;
  617. }
  618. EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
  619. static void pipe_advance(struct iov_iter *i, size_t size)
  620. {
  621. struct pipe_inode_info *pipe = i->pipe;
  622. struct pipe_buffer *buf;
  623. int idx = i->idx;
  624. size_t off = i->iov_offset, orig_sz;
  625. if (unlikely(i->count < size))
  626. size = i->count;
  627. orig_sz = size;
  628. if (size) {
  629. if (off) /* make it relative to the beginning of buffer */
  630. size += off - pipe->bufs[idx].offset;
  631. while (1) {
  632. buf = &pipe->bufs[idx];
  633. if (size <= buf->len)
  634. break;
  635. size -= buf->len;
  636. idx = next_idx(idx, pipe);
  637. }
  638. buf->len = size;
  639. i->idx = idx;
  640. off = i->iov_offset = buf->offset + size;
  641. }
  642. if (off)
  643. idx = next_idx(idx, pipe);
  644. if (pipe->nrbufs) {
  645. int unused = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
  646. /* [curbuf,unused) is in use. Free [idx,unused) */
  647. while (idx != unused) {
  648. pipe_buf_release(pipe, &pipe->bufs[idx]);
  649. idx = next_idx(idx, pipe);
  650. pipe->nrbufs--;
  651. }
  652. }
  653. i->count -= orig_sz;
  654. }
  655. void iov_iter_advance(struct iov_iter *i, size_t size)
  656. {
  657. if (unlikely(i->type & ITER_PIPE)) {
  658. pipe_advance(i, size);
  659. return;
  660. }
  661. iterate_and_advance(i, size, v, 0, 0, 0)
  662. }
  663. EXPORT_SYMBOL(iov_iter_advance);
  664. /*
  665. * Return the count of just the current iov_iter segment.
  666. */
  667. size_t iov_iter_single_seg_count(const struct iov_iter *i)
  668. {
  669. if (unlikely(i->type & ITER_PIPE))
  670. return i->count; // it is a silly place, anyway
  671. if (i->nr_segs == 1)
  672. return i->count;
  673. else if (i->type & ITER_BVEC)
  674. return min(i->count, i->bvec->bv_len - i->iov_offset);
  675. else
  676. return min(i->count, i->iov->iov_len - i->iov_offset);
  677. }
  678. EXPORT_SYMBOL(iov_iter_single_seg_count);
  679. void iov_iter_kvec(struct iov_iter *i, int direction,
  680. const struct kvec *kvec, unsigned long nr_segs,
  681. size_t count)
  682. {
  683. BUG_ON(!(direction & ITER_KVEC));
  684. i->type = direction;
  685. i->kvec = kvec;
  686. i->nr_segs = nr_segs;
  687. i->iov_offset = 0;
  688. i->count = count;
  689. }
  690. EXPORT_SYMBOL(iov_iter_kvec);
  691. void iov_iter_bvec(struct iov_iter *i, int direction,
  692. const struct bio_vec *bvec, unsigned long nr_segs,
  693. size_t count)
  694. {
  695. BUG_ON(!(direction & ITER_BVEC));
  696. i->type = direction;
  697. i->bvec = bvec;
  698. i->nr_segs = nr_segs;
  699. i->iov_offset = 0;
  700. i->count = count;
  701. }
  702. EXPORT_SYMBOL(iov_iter_bvec);
  703. void iov_iter_pipe(struct iov_iter *i, int direction,
  704. struct pipe_inode_info *pipe,
  705. size_t count)
  706. {
  707. BUG_ON(direction != ITER_PIPE);
  708. i->type = direction;
  709. i->pipe = pipe;
  710. i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
  711. i->iov_offset = 0;
  712. i->count = count;
  713. }
  714. EXPORT_SYMBOL(iov_iter_pipe);
  715. unsigned long iov_iter_alignment(const struct iov_iter *i)
  716. {
  717. unsigned long res = 0;
  718. size_t size = i->count;
  719. if (!size)
  720. return 0;
  721. if (unlikely(i->type & ITER_PIPE)) {
  722. if (i->iov_offset && allocated(&i->pipe->bufs[i->idx]))
  723. return size | i->iov_offset;
  724. return size;
  725. }
  726. iterate_all_kinds(i, size, v,
  727. (res |= (unsigned long)v.iov_base | v.iov_len, 0),
  728. res |= v.bv_offset | v.bv_len,
  729. res |= (unsigned long)v.iov_base | v.iov_len
  730. )
  731. return res;
  732. }
  733. EXPORT_SYMBOL(iov_iter_alignment);
  734. unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
  735. {
  736. unsigned long res = 0;
  737. size_t size = i->count;
  738. if (!size)
  739. return 0;
  740. if (unlikely(i->type & ITER_PIPE)) {
  741. WARN_ON(1);
  742. return ~0U;
  743. }
  744. iterate_all_kinds(i, size, v,
  745. (res |= (!res ? 0 : (unsigned long)v.iov_base) |
  746. (size != v.iov_len ? size : 0), 0),
  747. (res |= (!res ? 0 : (unsigned long)v.bv_offset) |
  748. (size != v.bv_len ? size : 0)),
  749. (res |= (!res ? 0 : (unsigned long)v.iov_base) |
  750. (size != v.iov_len ? size : 0))
  751. );
  752. return res;
  753. }
  754. EXPORT_SYMBOL(iov_iter_gap_alignment);
  755. static inline size_t __pipe_get_pages(struct iov_iter *i,
  756. size_t maxsize,
  757. struct page **pages,
  758. int idx,
  759. size_t *start)
  760. {
  761. struct pipe_inode_info *pipe = i->pipe;
  762. ssize_t n = push_pipe(i, maxsize, &idx, start);
  763. if (!n)
  764. return -EFAULT;
  765. maxsize = n;
  766. n += *start;
  767. while (n > 0) {
  768. get_page(*pages++ = pipe->bufs[idx].page);
  769. idx = next_idx(idx, pipe);
  770. n -= PAGE_SIZE;
  771. }
  772. return maxsize;
  773. }
  774. static ssize_t pipe_get_pages(struct iov_iter *i,
  775. struct page **pages, size_t maxsize, unsigned maxpages,
  776. size_t *start)
  777. {
  778. unsigned npages;
  779. size_t capacity;
  780. int idx;
  781. if (!sanity(i))
  782. return -EFAULT;
  783. data_start(i, &idx, start);
  784. /* some of this one + all after this one */
  785. npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1;
  786. capacity = min(npages,maxpages) * PAGE_SIZE - *start;
  787. return __pipe_get_pages(i, min(maxsize, capacity), pages, idx, start);
  788. }
  789. ssize_t iov_iter_get_pages(struct iov_iter *i,
  790. struct page **pages, size_t maxsize, unsigned maxpages,
  791. size_t *start)
  792. {
  793. if (maxsize > i->count)
  794. maxsize = i->count;
  795. if (!maxsize)
  796. return 0;
  797. if (unlikely(i->type & ITER_PIPE))
  798. return pipe_get_pages(i, pages, maxsize, maxpages, start);
  799. iterate_all_kinds(i, maxsize, v, ({
  800. unsigned long addr = (unsigned long)v.iov_base;
  801. size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
  802. int n;
  803. int res;
  804. if (len > maxpages * PAGE_SIZE)
  805. len = maxpages * PAGE_SIZE;
  806. addr &= ~(PAGE_SIZE - 1);
  807. n = DIV_ROUND_UP(len, PAGE_SIZE);
  808. res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages);
  809. if (unlikely(res < 0))
  810. return res;
  811. return (res == n ? len : res * PAGE_SIZE) - *start;
  812. 0;}),({
  813. /* can't be more than PAGE_SIZE */
  814. *start = v.bv_offset;
  815. get_page(*pages = v.bv_page);
  816. return v.bv_len;
  817. }),({
  818. return -EFAULT;
  819. })
  820. )
  821. return 0;
  822. }
  823. EXPORT_SYMBOL(iov_iter_get_pages);
  824. static struct page **get_pages_array(size_t n)
  825. {
  826. struct page **p = kmalloc(n * sizeof(struct page *), GFP_KERNEL);
  827. if (!p)
  828. p = vmalloc(n * sizeof(struct page *));
  829. return p;
  830. }
  831. static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
  832. struct page ***pages, size_t maxsize,
  833. size_t *start)
  834. {
  835. struct page **p;
  836. size_t n;
  837. int idx;
  838. int npages;
  839. if (!sanity(i))
  840. return -EFAULT;
  841. data_start(i, &idx, start);
  842. /* some of this one + all after this one */
  843. npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1;
  844. n = npages * PAGE_SIZE - *start;
  845. if (maxsize > n)
  846. maxsize = n;
  847. else
  848. npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE);
  849. p = get_pages_array(npages);
  850. if (!p)
  851. return -ENOMEM;
  852. n = __pipe_get_pages(i, maxsize, p, idx, start);
  853. if (n > 0)
  854. *pages = p;
  855. else
  856. kvfree(p);
  857. return n;
  858. }
  859. ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
  860. struct page ***pages, size_t maxsize,
  861. size_t *start)
  862. {
  863. struct page **p;
  864. if (maxsize > i->count)
  865. maxsize = i->count;
  866. if (!maxsize)
  867. return 0;
  868. if (unlikely(i->type & ITER_PIPE))
  869. return pipe_get_pages_alloc(i, pages, maxsize, start);
  870. iterate_all_kinds(i, maxsize, v, ({
  871. unsigned long addr = (unsigned long)v.iov_base;
  872. size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
  873. int n;
  874. int res;
  875. addr &= ~(PAGE_SIZE - 1);
  876. n = DIV_ROUND_UP(len, PAGE_SIZE);
  877. p = get_pages_array(n);
  878. if (!p)
  879. return -ENOMEM;
  880. res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, p);
  881. if (unlikely(res < 0)) {
  882. kvfree(p);
  883. return res;
  884. }
  885. *pages = p;
  886. return (res == n ? len : res * PAGE_SIZE) - *start;
  887. 0;}),({
  888. /* can't be more than PAGE_SIZE */
  889. *start = v.bv_offset;
  890. *pages = p = get_pages_array(1);
  891. if (!p)
  892. return -ENOMEM;
  893. get_page(*p = v.bv_page);
  894. return v.bv_len;
  895. }),({
  896. return -EFAULT;
  897. })
  898. )
  899. return 0;
  900. }
  901. EXPORT_SYMBOL(iov_iter_get_pages_alloc);
  902. size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
  903. struct iov_iter *i)
  904. {
  905. char *to = addr;
  906. __wsum sum, next;
  907. size_t off = 0;
  908. sum = *csum;
  909. if (unlikely(i->type & ITER_PIPE)) {
  910. WARN_ON(1);
  911. return 0;
  912. }
  913. iterate_and_advance(i, bytes, v, ({
  914. int err = 0;
  915. next = csum_and_copy_from_user(v.iov_base,
  916. (to += v.iov_len) - v.iov_len,
  917. v.iov_len, 0, &err);
  918. if (!err) {
  919. sum = csum_block_add(sum, next, off);
  920. off += v.iov_len;
  921. }
  922. err ? v.iov_len : 0;
  923. }), ({
  924. char *p = kmap_atomic(v.bv_page);
  925. next = csum_partial_copy_nocheck(p + v.bv_offset,
  926. (to += v.bv_len) - v.bv_len,
  927. v.bv_len, 0);
  928. kunmap_atomic(p);
  929. sum = csum_block_add(sum, next, off);
  930. off += v.bv_len;
  931. }),({
  932. next = csum_partial_copy_nocheck(v.iov_base,
  933. (to += v.iov_len) - v.iov_len,
  934. v.iov_len, 0);
  935. sum = csum_block_add(sum, next, off);
  936. off += v.iov_len;
  937. })
  938. )
  939. *csum = sum;
  940. return bytes;
  941. }
  942. EXPORT_SYMBOL(csum_and_copy_from_iter);
  943. size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum,
  944. struct iov_iter *i)
  945. {
  946. const char *from = addr;
  947. __wsum sum, next;
  948. size_t off = 0;
  949. sum = *csum;
  950. if (unlikely(i->type & ITER_PIPE)) {
  951. WARN_ON(1); /* for now */
  952. return 0;
  953. }
  954. iterate_and_advance(i, bytes, v, ({
  955. int err = 0;
  956. next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
  957. v.iov_base,
  958. v.iov_len, 0, &err);
  959. if (!err) {
  960. sum = csum_block_add(sum, next, off);
  961. off += v.iov_len;
  962. }
  963. err ? v.iov_len : 0;
  964. }), ({
  965. char *p = kmap_atomic(v.bv_page);
  966. next = csum_partial_copy_nocheck((from += v.bv_len) - v.bv_len,
  967. p + v.bv_offset,
  968. v.bv_len, 0);
  969. kunmap_atomic(p);
  970. sum = csum_block_add(sum, next, off);
  971. off += v.bv_len;
  972. }),({
  973. next = csum_partial_copy_nocheck((from += v.iov_len) - v.iov_len,
  974. v.iov_base,
  975. v.iov_len, 0);
  976. sum = csum_block_add(sum, next, off);
  977. off += v.iov_len;
  978. })
  979. )
  980. *csum = sum;
  981. return bytes;
  982. }
  983. EXPORT_SYMBOL(csum_and_copy_to_iter);
  984. int iov_iter_npages(const struct iov_iter *i, int maxpages)
  985. {
  986. size_t size = i->count;
  987. int npages = 0;
  988. if (!size)
  989. return 0;
  990. if (unlikely(i->type & ITER_PIPE)) {
  991. struct pipe_inode_info *pipe = i->pipe;
  992. size_t off;
  993. int idx;
  994. if (!sanity(i))
  995. return 0;
  996. data_start(i, &idx, &off);
  997. /* some of this one + all after this one */
  998. npages = ((pipe->curbuf - idx - 1) & (pipe->buffers - 1)) + 1;
  999. if (npages >= maxpages)
  1000. return maxpages;
  1001. } else iterate_all_kinds(i, size, v, ({
  1002. unsigned long p = (unsigned long)v.iov_base;
  1003. npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
  1004. - p / PAGE_SIZE;
  1005. if (npages >= maxpages)
  1006. return maxpages;
  1007. 0;}),({
  1008. npages++;
  1009. if (npages >= maxpages)
  1010. return maxpages;
  1011. }),({
  1012. unsigned long p = (unsigned long)v.iov_base;
  1013. npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
  1014. - p / PAGE_SIZE;
  1015. if (npages >= maxpages)
  1016. return maxpages;
  1017. })
  1018. )
  1019. return npages;
  1020. }
  1021. EXPORT_SYMBOL(iov_iter_npages);
  1022. const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
  1023. {
  1024. *new = *old;
  1025. if (unlikely(new->type & ITER_PIPE)) {
  1026. WARN_ON(1);
  1027. return NULL;
  1028. }
  1029. if (new->type & ITER_BVEC)
  1030. return new->bvec = kmemdup(new->bvec,
  1031. new->nr_segs * sizeof(struct bio_vec),
  1032. flags);
  1033. else
  1034. /* iovec and kvec have identical layout */
  1035. return new->iov = kmemdup(new->iov,
  1036. new->nr_segs * sizeof(struct iovec),
  1037. flags);
  1038. }
  1039. EXPORT_SYMBOL(dup_iter);
  1040. /**
  1041. * import_iovec() - Copy an array of &struct iovec from userspace
  1042. * into the kernel, check that it is valid, and initialize a new
  1043. * &struct iov_iter iterator to access it.
  1044. *
  1045. * @type: One of %READ or %WRITE.
  1046. * @uvector: Pointer to the userspace array.
  1047. * @nr_segs: Number of elements in userspace array.
  1048. * @fast_segs: Number of elements in @iov.
  1049. * @iov: (input and output parameter) Pointer to pointer to (usually small
  1050. * on-stack) kernel array.
  1051. * @i: Pointer to iterator that will be initialized on success.
  1052. *
  1053. * If the array pointed to by *@iov is large enough to hold all @nr_segs,
  1054. * then this function places %NULL in *@iov on return. Otherwise, a new
  1055. * array will be allocated and the result placed in *@iov. This means that
  1056. * the caller may call kfree() on *@iov regardless of whether the small
  1057. * on-stack array was used or not (and regardless of whether this function
  1058. * returns an error or not).
  1059. *
  1060. * Return: 0 on success or negative error code on error.
  1061. */
  1062. int import_iovec(int type, const struct iovec __user * uvector,
  1063. unsigned nr_segs, unsigned fast_segs,
  1064. struct iovec **iov, struct iov_iter *i)
  1065. {
  1066. ssize_t n;
  1067. struct iovec *p;
  1068. n = rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
  1069. *iov, &p);
  1070. if (n < 0) {
  1071. if (p != *iov)
  1072. kfree(p);
  1073. *iov = NULL;
  1074. return n;
  1075. }
  1076. iov_iter_init(i, type, p, nr_segs, n);
  1077. *iov = p == *iov ? NULL : p;
  1078. return 0;
  1079. }
  1080. EXPORT_SYMBOL(import_iovec);
  1081. #ifdef CONFIG_COMPAT
  1082. #include <linux/compat.h>
  1083. int compat_import_iovec(int type, const struct compat_iovec __user * uvector,
  1084. unsigned nr_segs, unsigned fast_segs,
  1085. struct iovec **iov, struct iov_iter *i)
  1086. {
  1087. ssize_t n;
  1088. struct iovec *p;
  1089. n = compat_rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
  1090. *iov, &p);
  1091. if (n < 0) {
  1092. if (p != *iov)
  1093. kfree(p);
  1094. *iov = NULL;
  1095. return n;
  1096. }
  1097. iov_iter_init(i, type, p, nr_segs, n);
  1098. *iov = p == *iov ? NULL : p;
  1099. return 0;
  1100. }
  1101. #endif
  1102. int import_single_range(int rw, void __user *buf, size_t len,
  1103. struct iovec *iov, struct iov_iter *i)
  1104. {
  1105. if (len > MAX_RW_COUNT)
  1106. len = MAX_RW_COUNT;
  1107. if (unlikely(!access_ok(!rw, buf, len)))
  1108. return -EFAULT;
  1109. iov->iov_base = buf;
  1110. iov->iov_len = len;
  1111. iov_iter_init(i, rw, iov, 1, len);
  1112. return 0;
  1113. }
  1114. EXPORT_SYMBOL(import_single_range);