iov_iter.c 28 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199
  1. #include <linux/export.h>
  2. #include <linux/uio.h>
  3. #include <linux/pagemap.h>
  4. #include <linux/slab.h>
  5. #include <linux/vmalloc.h>
  6. #include <linux/splice.h>
  7. #include <net/checksum.h>
  8. #define PIPE_PARANOIA /* for now */
  9. #define iterate_iovec(i, n, __v, __p, skip, STEP) { \
  10. size_t left; \
  11. size_t wanted = n; \
  12. __p = i->iov; \
  13. __v.iov_len = min(n, __p->iov_len - skip); \
  14. if (likely(__v.iov_len)) { \
  15. __v.iov_base = __p->iov_base + skip; \
  16. left = (STEP); \
  17. __v.iov_len -= left; \
  18. skip += __v.iov_len; \
  19. n -= __v.iov_len; \
  20. } else { \
  21. left = 0; \
  22. } \
  23. while (unlikely(!left && n)) { \
  24. __p++; \
  25. __v.iov_len = min(n, __p->iov_len); \
  26. if (unlikely(!__v.iov_len)) \
  27. continue; \
  28. __v.iov_base = __p->iov_base; \
  29. left = (STEP); \
  30. __v.iov_len -= left; \
  31. skip = __v.iov_len; \
  32. n -= __v.iov_len; \
  33. } \
  34. n = wanted - n; \
  35. }
  36. #define iterate_kvec(i, n, __v, __p, skip, STEP) { \
  37. size_t wanted = n; \
  38. __p = i->kvec; \
  39. __v.iov_len = min(n, __p->iov_len - skip); \
  40. if (likely(__v.iov_len)) { \
  41. __v.iov_base = __p->iov_base + skip; \
  42. (void)(STEP); \
  43. skip += __v.iov_len; \
  44. n -= __v.iov_len; \
  45. } \
  46. while (unlikely(n)) { \
  47. __p++; \
  48. __v.iov_len = min(n, __p->iov_len); \
  49. if (unlikely(!__v.iov_len)) \
  50. continue; \
  51. __v.iov_base = __p->iov_base; \
  52. (void)(STEP); \
  53. skip = __v.iov_len; \
  54. n -= __v.iov_len; \
  55. } \
  56. n = wanted; \
  57. }
  58. #define iterate_bvec(i, n, __v, __bi, skip, STEP) { \
  59. struct bvec_iter __start; \
  60. __start.bi_size = n; \
  61. __start.bi_bvec_done = skip; \
  62. __start.bi_idx = 0; \
  63. for_each_bvec(__v, i->bvec, __bi, __start) { \
  64. if (!__v.bv_len) \
  65. continue; \
  66. (void)(STEP); \
  67. } \
  68. }
  69. #define iterate_all_kinds(i, n, v, I, B, K) { \
  70. size_t skip = i->iov_offset; \
  71. if (unlikely(i->type & ITER_BVEC)) { \
  72. struct bio_vec v; \
  73. struct bvec_iter __bi; \
  74. iterate_bvec(i, n, v, __bi, skip, (B)) \
  75. } else if (unlikely(i->type & ITER_KVEC)) { \
  76. const struct kvec *kvec; \
  77. struct kvec v; \
  78. iterate_kvec(i, n, v, kvec, skip, (K)) \
  79. } else { \
  80. const struct iovec *iov; \
  81. struct iovec v; \
  82. iterate_iovec(i, n, v, iov, skip, (I)) \
  83. } \
  84. }
  85. #define iterate_and_advance(i, n, v, I, B, K) { \
  86. if (unlikely(i->count < n)) \
  87. n = i->count; \
  88. if (i->count) { \
  89. size_t skip = i->iov_offset; \
  90. if (unlikely(i->type & ITER_BVEC)) { \
  91. const struct bio_vec *bvec = i->bvec; \
  92. struct bio_vec v; \
  93. struct bvec_iter __bi; \
  94. iterate_bvec(i, n, v, __bi, skip, (B)) \
  95. i->bvec = __bvec_iter_bvec(i->bvec, __bi); \
  96. i->nr_segs -= i->bvec - bvec; \
  97. skip = __bi.bi_bvec_done; \
  98. } else if (unlikely(i->type & ITER_KVEC)) { \
  99. const struct kvec *kvec; \
  100. struct kvec v; \
  101. iterate_kvec(i, n, v, kvec, skip, (K)) \
  102. if (skip == kvec->iov_len) { \
  103. kvec++; \
  104. skip = 0; \
  105. } \
  106. i->nr_segs -= kvec - i->kvec; \
  107. i->kvec = kvec; \
  108. } else { \
  109. const struct iovec *iov; \
  110. struct iovec v; \
  111. iterate_iovec(i, n, v, iov, skip, (I)) \
  112. if (skip == iov->iov_len) { \
  113. iov++; \
  114. skip = 0; \
  115. } \
  116. i->nr_segs -= iov - i->iov; \
  117. i->iov = iov; \
  118. } \
  119. i->count -= n; \
  120. i->iov_offset = skip; \
  121. } \
  122. }
  123. static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
  124. struct iov_iter *i)
  125. {
  126. size_t skip, copy, left, wanted;
  127. const struct iovec *iov;
  128. char __user *buf;
  129. void *kaddr, *from;
  130. if (unlikely(bytes > i->count))
  131. bytes = i->count;
  132. if (unlikely(!bytes))
  133. return 0;
  134. wanted = bytes;
  135. iov = i->iov;
  136. skip = i->iov_offset;
  137. buf = iov->iov_base + skip;
  138. copy = min(bytes, iov->iov_len - skip);
  139. if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) {
  140. kaddr = kmap_atomic(page);
  141. from = kaddr + offset;
  142. /* first chunk, usually the only one */
  143. left = __copy_to_user_inatomic(buf, from, copy);
  144. copy -= left;
  145. skip += copy;
  146. from += copy;
  147. bytes -= copy;
  148. while (unlikely(!left && bytes)) {
  149. iov++;
  150. buf = iov->iov_base;
  151. copy = min(bytes, iov->iov_len);
  152. left = __copy_to_user_inatomic(buf, from, copy);
  153. copy -= left;
  154. skip = copy;
  155. from += copy;
  156. bytes -= copy;
  157. }
  158. if (likely(!bytes)) {
  159. kunmap_atomic(kaddr);
  160. goto done;
  161. }
  162. offset = from - kaddr;
  163. buf += copy;
  164. kunmap_atomic(kaddr);
  165. copy = min(bytes, iov->iov_len - skip);
  166. }
  167. /* Too bad - revert to non-atomic kmap */
  168. kaddr = kmap(page);
  169. from = kaddr + offset;
  170. left = __copy_to_user(buf, from, copy);
  171. copy -= left;
  172. skip += copy;
  173. from += copy;
  174. bytes -= copy;
  175. while (unlikely(!left && bytes)) {
  176. iov++;
  177. buf = iov->iov_base;
  178. copy = min(bytes, iov->iov_len);
  179. left = __copy_to_user(buf, from, copy);
  180. copy -= left;
  181. skip = copy;
  182. from += copy;
  183. bytes -= copy;
  184. }
  185. kunmap(page);
  186. done:
  187. if (skip == iov->iov_len) {
  188. iov++;
  189. skip = 0;
  190. }
  191. i->count -= wanted - bytes;
  192. i->nr_segs -= iov - i->iov;
  193. i->iov = iov;
  194. i->iov_offset = skip;
  195. return wanted - bytes;
  196. }
  197. static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes,
  198. struct iov_iter *i)
  199. {
  200. size_t skip, copy, left, wanted;
  201. const struct iovec *iov;
  202. char __user *buf;
  203. void *kaddr, *to;
  204. if (unlikely(bytes > i->count))
  205. bytes = i->count;
  206. if (unlikely(!bytes))
  207. return 0;
  208. wanted = bytes;
  209. iov = i->iov;
  210. skip = i->iov_offset;
  211. buf = iov->iov_base + skip;
  212. copy = min(bytes, iov->iov_len - skip);
  213. if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) {
  214. kaddr = kmap_atomic(page);
  215. to = kaddr + offset;
  216. /* first chunk, usually the only one */
  217. left = __copy_from_user_inatomic(to, buf, copy);
  218. copy -= left;
  219. skip += copy;
  220. to += copy;
  221. bytes -= copy;
  222. while (unlikely(!left && bytes)) {
  223. iov++;
  224. buf = iov->iov_base;
  225. copy = min(bytes, iov->iov_len);
  226. left = __copy_from_user_inatomic(to, buf, copy);
  227. copy -= left;
  228. skip = copy;
  229. to += copy;
  230. bytes -= copy;
  231. }
  232. if (likely(!bytes)) {
  233. kunmap_atomic(kaddr);
  234. goto done;
  235. }
  236. offset = to - kaddr;
  237. buf += copy;
  238. kunmap_atomic(kaddr);
  239. copy = min(bytes, iov->iov_len - skip);
  240. }
  241. /* Too bad - revert to non-atomic kmap */
  242. kaddr = kmap(page);
  243. to = kaddr + offset;
  244. left = __copy_from_user(to, buf, copy);
  245. copy -= left;
  246. skip += copy;
  247. to += copy;
  248. bytes -= copy;
  249. while (unlikely(!left && bytes)) {
  250. iov++;
  251. buf = iov->iov_base;
  252. copy = min(bytes, iov->iov_len);
  253. left = __copy_from_user(to, buf, copy);
  254. copy -= left;
  255. skip = copy;
  256. to += copy;
  257. bytes -= copy;
  258. }
  259. kunmap(page);
  260. done:
  261. if (skip == iov->iov_len) {
  262. iov++;
  263. skip = 0;
  264. }
  265. i->count -= wanted - bytes;
  266. i->nr_segs -= iov - i->iov;
  267. i->iov = iov;
  268. i->iov_offset = skip;
  269. return wanted - bytes;
  270. }
  271. #ifdef PIPE_PARANOIA
  272. static bool sanity(const struct iov_iter *i)
  273. {
  274. struct pipe_inode_info *pipe = i->pipe;
  275. int idx = i->idx;
  276. int next = pipe->curbuf + pipe->nrbufs;
  277. if (i->iov_offset) {
  278. struct pipe_buffer *p;
  279. if (unlikely(!pipe->nrbufs))
  280. goto Bad; // pipe must be non-empty
  281. if (unlikely(idx != ((next - 1) & (pipe->buffers - 1))))
  282. goto Bad; // must be at the last buffer...
  283. p = &pipe->bufs[idx];
  284. if (unlikely(p->offset + p->len != i->iov_offset))
  285. goto Bad; // ... at the end of segment
  286. } else {
  287. if (idx != (next & (pipe->buffers - 1)))
  288. goto Bad; // must be right after the last buffer
  289. }
  290. return true;
  291. Bad:
  292. printk(KERN_ERR "idx = %d, offset = %zd\n", i->idx, i->iov_offset);
  293. printk(KERN_ERR "curbuf = %d, nrbufs = %d, buffers = %d\n",
  294. pipe->curbuf, pipe->nrbufs, pipe->buffers);
  295. for (idx = 0; idx < pipe->buffers; idx++)
  296. printk(KERN_ERR "[%p %p %d %d]\n",
  297. pipe->bufs[idx].ops,
  298. pipe->bufs[idx].page,
  299. pipe->bufs[idx].offset,
  300. pipe->bufs[idx].len);
  301. WARN_ON(1);
  302. return false;
  303. }
  304. #else
  305. #define sanity(i) true
  306. #endif
  307. static inline int next_idx(int idx, struct pipe_inode_info *pipe)
  308. {
  309. return (idx + 1) & (pipe->buffers - 1);
  310. }
  311. static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes,
  312. struct iov_iter *i)
  313. {
  314. struct pipe_inode_info *pipe = i->pipe;
  315. struct pipe_buffer *buf;
  316. size_t off;
  317. int idx;
  318. if (unlikely(bytes > i->count))
  319. bytes = i->count;
  320. if (unlikely(!bytes))
  321. return 0;
  322. if (!sanity(i))
  323. return 0;
  324. off = i->iov_offset;
  325. idx = i->idx;
  326. buf = &pipe->bufs[idx];
  327. if (off) {
  328. if (offset == off && buf->page == page) {
  329. /* merge with the last one */
  330. buf->len += bytes;
  331. i->iov_offset += bytes;
  332. goto out;
  333. }
  334. idx = next_idx(idx, pipe);
  335. buf = &pipe->bufs[idx];
  336. }
  337. if (idx == pipe->curbuf && pipe->nrbufs)
  338. return 0;
  339. pipe->nrbufs++;
  340. buf->ops = &page_cache_pipe_buf_ops;
  341. get_page(buf->page = page);
  342. buf->offset = offset;
  343. buf->len = bytes;
  344. i->iov_offset = offset + bytes;
  345. i->idx = idx;
  346. out:
  347. i->count -= bytes;
  348. return bytes;
  349. }
  350. /*
  351. * Fault in one or more iovecs of the given iov_iter, to a maximum length of
  352. * bytes. For each iovec, fault in each page that constitutes the iovec.
  353. *
  354. * Return 0 on success, or non-zero if the memory could not be accessed (i.e.
  355. * because it is an invalid address).
  356. */
  357. int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
  358. {
  359. size_t skip = i->iov_offset;
  360. const struct iovec *iov;
  361. int err;
  362. struct iovec v;
  363. if (!(i->type & (ITER_BVEC|ITER_KVEC))) {
  364. iterate_iovec(i, bytes, v, iov, skip, ({
  365. err = fault_in_multipages_readable(v.iov_base,
  366. v.iov_len);
  367. if (unlikely(err))
  368. return err;
  369. 0;}))
  370. }
  371. return 0;
  372. }
  373. EXPORT_SYMBOL(iov_iter_fault_in_readable);
  374. void iov_iter_init(struct iov_iter *i, int direction,
  375. const struct iovec *iov, unsigned long nr_segs,
  376. size_t count)
  377. {
  378. /* It will get better. Eventually... */
  379. if (segment_eq(get_fs(), KERNEL_DS)) {
  380. direction |= ITER_KVEC;
  381. i->type = direction;
  382. i->kvec = (struct kvec *)iov;
  383. } else {
  384. i->type = direction;
  385. i->iov = iov;
  386. }
  387. i->nr_segs = nr_segs;
  388. i->iov_offset = 0;
  389. i->count = count;
  390. }
  391. EXPORT_SYMBOL(iov_iter_init);
  392. static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len)
  393. {
  394. char *from = kmap_atomic(page);
  395. memcpy(to, from + offset, len);
  396. kunmap_atomic(from);
  397. }
  398. static void memcpy_to_page(struct page *page, size_t offset, const char *from, size_t len)
  399. {
  400. char *to = kmap_atomic(page);
  401. memcpy(to + offset, from, len);
  402. kunmap_atomic(to);
  403. }
  404. static void memzero_page(struct page *page, size_t offset, size_t len)
  405. {
  406. char *addr = kmap_atomic(page);
  407. memset(addr + offset, 0, len);
  408. kunmap_atomic(addr);
  409. }
  410. static inline bool allocated(struct pipe_buffer *buf)
  411. {
  412. return buf->ops == &default_pipe_buf_ops;
  413. }
  414. static inline void data_start(const struct iov_iter *i, int *idxp, size_t *offp)
  415. {
  416. size_t off = i->iov_offset;
  417. int idx = i->idx;
  418. if (off && (!allocated(&i->pipe->bufs[idx]) || off == PAGE_SIZE)) {
  419. idx = next_idx(idx, i->pipe);
  420. off = 0;
  421. }
  422. *idxp = idx;
  423. *offp = off;
  424. }
  425. static size_t push_pipe(struct iov_iter *i, size_t size,
  426. int *idxp, size_t *offp)
  427. {
  428. struct pipe_inode_info *pipe = i->pipe;
  429. size_t off;
  430. int idx;
  431. ssize_t left;
  432. if (unlikely(size > i->count))
  433. size = i->count;
  434. if (unlikely(!size))
  435. return 0;
  436. left = size;
  437. data_start(i, &idx, &off);
  438. *idxp = idx;
  439. *offp = off;
  440. if (off) {
  441. left -= PAGE_SIZE - off;
  442. if (left <= 0) {
  443. pipe->bufs[idx].len += size;
  444. return size;
  445. }
  446. pipe->bufs[idx].len = PAGE_SIZE;
  447. idx = next_idx(idx, pipe);
  448. }
  449. while (idx != pipe->curbuf || !pipe->nrbufs) {
  450. struct page *page = alloc_page(GFP_USER);
  451. if (!page)
  452. break;
  453. pipe->nrbufs++;
  454. pipe->bufs[idx].ops = &default_pipe_buf_ops;
  455. pipe->bufs[idx].page = page;
  456. pipe->bufs[idx].offset = 0;
  457. if (left <= PAGE_SIZE) {
  458. pipe->bufs[idx].len = left;
  459. return size;
  460. }
  461. pipe->bufs[idx].len = PAGE_SIZE;
  462. left -= PAGE_SIZE;
  463. idx = next_idx(idx, pipe);
  464. }
  465. return size - left;
  466. }
  467. static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
  468. struct iov_iter *i)
  469. {
  470. struct pipe_inode_info *pipe = i->pipe;
  471. size_t n, off;
  472. int idx;
  473. if (!sanity(i))
  474. return 0;
  475. bytes = n = push_pipe(i, bytes, &idx, &off);
  476. if (unlikely(!n))
  477. return 0;
  478. for ( ; n; idx = next_idx(idx, pipe), off = 0) {
  479. size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
  480. memcpy_to_page(pipe->bufs[idx].page, off, addr, chunk);
  481. i->idx = idx;
  482. i->iov_offset = off + chunk;
  483. n -= chunk;
  484. addr += chunk;
  485. }
  486. i->count -= bytes;
  487. return bytes;
  488. }
  489. size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
  490. {
  491. const char *from = addr;
  492. if (unlikely(i->type & ITER_PIPE))
  493. return copy_pipe_to_iter(addr, bytes, i);
  494. iterate_and_advance(i, bytes, v,
  495. __copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len,
  496. v.iov_len),
  497. memcpy_to_page(v.bv_page, v.bv_offset,
  498. (from += v.bv_len) - v.bv_len, v.bv_len),
  499. memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len)
  500. )
  501. return bytes;
  502. }
  503. EXPORT_SYMBOL(copy_to_iter);
  504. size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
  505. {
  506. char *to = addr;
  507. if (unlikely(i->type & ITER_PIPE)) {
  508. WARN_ON(1);
  509. return 0;
  510. }
  511. iterate_and_advance(i, bytes, v,
  512. __copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base,
  513. v.iov_len),
  514. memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
  515. v.bv_offset, v.bv_len),
  516. memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
  517. )
  518. return bytes;
  519. }
  520. EXPORT_SYMBOL(copy_from_iter);
  521. size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
  522. {
  523. char *to = addr;
  524. if (unlikely(i->type & ITER_PIPE)) {
  525. WARN_ON(1);
  526. return 0;
  527. }
  528. iterate_and_advance(i, bytes, v,
  529. __copy_from_user_nocache((to += v.iov_len) - v.iov_len,
  530. v.iov_base, v.iov_len),
  531. memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
  532. v.bv_offset, v.bv_len),
  533. memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
  534. )
  535. return bytes;
  536. }
  537. EXPORT_SYMBOL(copy_from_iter_nocache);
  538. size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
  539. struct iov_iter *i)
  540. {
  541. if (i->type & (ITER_BVEC|ITER_KVEC)) {
  542. void *kaddr = kmap_atomic(page);
  543. size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
  544. kunmap_atomic(kaddr);
  545. return wanted;
  546. } else if (likely(!(i->type & ITER_PIPE)))
  547. return copy_page_to_iter_iovec(page, offset, bytes, i);
  548. else
  549. return copy_page_to_iter_pipe(page, offset, bytes, i);
  550. }
  551. EXPORT_SYMBOL(copy_page_to_iter);
  552. size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
  553. struct iov_iter *i)
  554. {
  555. if (unlikely(i->type & ITER_PIPE)) {
  556. WARN_ON(1);
  557. return 0;
  558. }
  559. if (i->type & (ITER_BVEC|ITER_KVEC)) {
  560. void *kaddr = kmap_atomic(page);
  561. size_t wanted = copy_from_iter(kaddr + offset, bytes, i);
  562. kunmap_atomic(kaddr);
  563. return wanted;
  564. } else
  565. return copy_page_from_iter_iovec(page, offset, bytes, i);
  566. }
  567. EXPORT_SYMBOL(copy_page_from_iter);
  568. static size_t pipe_zero(size_t bytes, struct iov_iter *i)
  569. {
  570. struct pipe_inode_info *pipe = i->pipe;
  571. size_t n, off;
  572. int idx;
  573. if (!sanity(i))
  574. return 0;
  575. bytes = n = push_pipe(i, bytes, &idx, &off);
  576. if (unlikely(!n))
  577. return 0;
  578. for ( ; n; idx = next_idx(idx, pipe), off = 0) {
  579. size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
  580. memzero_page(pipe->bufs[idx].page, off, chunk);
  581. i->idx = idx;
  582. i->iov_offset = off + chunk;
  583. n -= chunk;
  584. }
  585. i->count -= bytes;
  586. return bytes;
  587. }
  588. size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
  589. {
  590. if (unlikely(i->type & ITER_PIPE))
  591. return pipe_zero(bytes, i);
  592. iterate_and_advance(i, bytes, v,
  593. __clear_user(v.iov_base, v.iov_len),
  594. memzero_page(v.bv_page, v.bv_offset, v.bv_len),
  595. memset(v.iov_base, 0, v.iov_len)
  596. )
  597. return bytes;
  598. }
  599. EXPORT_SYMBOL(iov_iter_zero);
  600. size_t iov_iter_copy_from_user_atomic(struct page *page,
  601. struct iov_iter *i, unsigned long offset, size_t bytes)
  602. {
  603. char *kaddr = kmap_atomic(page), *p = kaddr + offset;
  604. if (unlikely(i->type & ITER_PIPE)) {
  605. kunmap_atomic(kaddr);
  606. WARN_ON(1);
  607. return 0;
  608. }
  609. iterate_all_kinds(i, bytes, v,
  610. __copy_from_user_inatomic((p += v.iov_len) - v.iov_len,
  611. v.iov_base, v.iov_len),
  612. memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
  613. v.bv_offset, v.bv_len),
  614. memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
  615. )
  616. kunmap_atomic(kaddr);
  617. return bytes;
  618. }
  619. EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
  620. static void pipe_advance(struct iov_iter *i, size_t size)
  621. {
  622. struct pipe_inode_info *pipe = i->pipe;
  623. struct pipe_buffer *buf;
  624. int idx = i->idx;
  625. size_t off = i->iov_offset;
  626. if (unlikely(i->count < size))
  627. size = i->count;
  628. if (size) {
  629. if (off) /* make it relative to the beginning of buffer */
  630. size += off - pipe->bufs[idx].offset;
  631. while (1) {
  632. buf = &pipe->bufs[idx];
  633. if (size <= buf->len)
  634. break;
  635. size -= buf->len;
  636. idx = next_idx(idx, pipe);
  637. }
  638. buf->len = size;
  639. i->idx = idx;
  640. off = i->iov_offset = buf->offset + size;
  641. }
  642. if (off)
  643. idx = next_idx(idx, pipe);
  644. if (pipe->nrbufs) {
  645. int unused = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
  646. /* [curbuf,unused) is in use. Free [idx,unused) */
  647. while (idx != unused) {
  648. pipe_buf_release(pipe, &pipe->bufs[idx]);
  649. idx = next_idx(idx, pipe);
  650. pipe->nrbufs--;
  651. }
  652. }
  653. }
  654. void iov_iter_advance(struct iov_iter *i, size_t size)
  655. {
  656. if (unlikely(i->type & ITER_PIPE)) {
  657. pipe_advance(i, size);
  658. return;
  659. }
  660. iterate_and_advance(i, size, v, 0, 0, 0)
  661. }
  662. EXPORT_SYMBOL(iov_iter_advance);
  663. /*
  664. * Return the count of just the current iov_iter segment.
  665. */
  666. size_t iov_iter_single_seg_count(const struct iov_iter *i)
  667. {
  668. if (unlikely(i->type & ITER_PIPE))
  669. return i->count; // it is a silly place, anyway
  670. if (i->nr_segs == 1)
  671. return i->count;
  672. else if (i->type & ITER_BVEC)
  673. return min(i->count, i->bvec->bv_len - i->iov_offset);
  674. else
  675. return min(i->count, i->iov->iov_len - i->iov_offset);
  676. }
  677. EXPORT_SYMBOL(iov_iter_single_seg_count);
  678. void iov_iter_kvec(struct iov_iter *i, int direction,
  679. const struct kvec *kvec, unsigned long nr_segs,
  680. size_t count)
  681. {
  682. BUG_ON(!(direction & ITER_KVEC));
  683. i->type = direction;
  684. i->kvec = kvec;
  685. i->nr_segs = nr_segs;
  686. i->iov_offset = 0;
  687. i->count = count;
  688. }
  689. EXPORT_SYMBOL(iov_iter_kvec);
  690. void iov_iter_bvec(struct iov_iter *i, int direction,
  691. const struct bio_vec *bvec, unsigned long nr_segs,
  692. size_t count)
  693. {
  694. BUG_ON(!(direction & ITER_BVEC));
  695. i->type = direction;
  696. i->bvec = bvec;
  697. i->nr_segs = nr_segs;
  698. i->iov_offset = 0;
  699. i->count = count;
  700. }
  701. EXPORT_SYMBOL(iov_iter_bvec);
  702. void iov_iter_pipe(struct iov_iter *i, int direction,
  703. struct pipe_inode_info *pipe,
  704. size_t count)
  705. {
  706. BUG_ON(direction != ITER_PIPE);
  707. i->type = direction;
  708. i->pipe = pipe;
  709. i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
  710. i->iov_offset = 0;
  711. i->count = count;
  712. }
  713. EXPORT_SYMBOL(iov_iter_pipe);
  714. unsigned long iov_iter_alignment(const struct iov_iter *i)
  715. {
  716. unsigned long res = 0;
  717. size_t size = i->count;
  718. if (!size)
  719. return 0;
  720. if (unlikely(i->type & ITER_PIPE)) {
  721. if (i->iov_offset && allocated(&i->pipe->bufs[i->idx]))
  722. return size | i->iov_offset;
  723. return size;
  724. }
  725. iterate_all_kinds(i, size, v,
  726. (res |= (unsigned long)v.iov_base | v.iov_len, 0),
  727. res |= v.bv_offset | v.bv_len,
  728. res |= (unsigned long)v.iov_base | v.iov_len
  729. )
  730. return res;
  731. }
  732. EXPORT_SYMBOL(iov_iter_alignment);
  733. unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
  734. {
  735. unsigned long res = 0;
  736. size_t size = i->count;
  737. if (!size)
  738. return 0;
  739. if (unlikely(i->type & ITER_PIPE)) {
  740. WARN_ON(1);
  741. return ~0U;
  742. }
  743. iterate_all_kinds(i, size, v,
  744. (res |= (!res ? 0 : (unsigned long)v.iov_base) |
  745. (size != v.iov_len ? size : 0), 0),
  746. (res |= (!res ? 0 : (unsigned long)v.bv_offset) |
  747. (size != v.bv_len ? size : 0)),
  748. (res |= (!res ? 0 : (unsigned long)v.iov_base) |
  749. (size != v.iov_len ? size : 0))
  750. );
  751. return res;
  752. }
  753. EXPORT_SYMBOL(iov_iter_gap_alignment);
  754. static inline size_t __pipe_get_pages(struct iov_iter *i,
  755. size_t maxsize,
  756. struct page **pages,
  757. int idx,
  758. size_t *start)
  759. {
  760. struct pipe_inode_info *pipe = i->pipe;
  761. size_t n = push_pipe(i, maxsize, &idx, start);
  762. if (!n)
  763. return -EFAULT;
  764. maxsize = n;
  765. n += *start;
  766. while (n >= PAGE_SIZE) {
  767. get_page(*pages++ = pipe->bufs[idx].page);
  768. idx = next_idx(idx, pipe);
  769. n -= PAGE_SIZE;
  770. }
  771. return maxsize;
  772. }
  773. static ssize_t pipe_get_pages(struct iov_iter *i,
  774. struct page **pages, size_t maxsize, unsigned maxpages,
  775. size_t *start)
  776. {
  777. unsigned npages;
  778. size_t capacity;
  779. int idx;
  780. if (!sanity(i))
  781. return -EFAULT;
  782. data_start(i, &idx, start);
  783. /* some of this one + all after this one */
  784. npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1;
  785. capacity = min(npages,maxpages) * PAGE_SIZE - *start;
  786. return __pipe_get_pages(i, min(maxsize, capacity), pages, idx, start);
  787. }
  788. ssize_t iov_iter_get_pages(struct iov_iter *i,
  789. struct page **pages, size_t maxsize, unsigned maxpages,
  790. size_t *start)
  791. {
  792. if (maxsize > i->count)
  793. maxsize = i->count;
  794. if (!maxsize)
  795. return 0;
  796. if (unlikely(i->type & ITER_PIPE))
  797. return pipe_get_pages(i, pages, maxsize, maxpages, start);
  798. iterate_all_kinds(i, maxsize, v, ({
  799. unsigned long addr = (unsigned long)v.iov_base;
  800. size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
  801. int n;
  802. int res;
  803. if (len > maxpages * PAGE_SIZE)
  804. len = maxpages * PAGE_SIZE;
  805. addr &= ~(PAGE_SIZE - 1);
  806. n = DIV_ROUND_UP(len, PAGE_SIZE);
  807. res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages);
  808. if (unlikely(res < 0))
  809. return res;
  810. return (res == n ? len : res * PAGE_SIZE) - *start;
  811. 0;}),({
  812. /* can't be more than PAGE_SIZE */
  813. *start = v.bv_offset;
  814. get_page(*pages = v.bv_page);
  815. return v.bv_len;
  816. }),({
  817. return -EFAULT;
  818. })
  819. )
  820. return 0;
  821. }
  822. EXPORT_SYMBOL(iov_iter_get_pages);
  823. static struct page **get_pages_array(size_t n)
  824. {
  825. struct page **p = kmalloc(n * sizeof(struct page *), GFP_KERNEL);
  826. if (!p)
  827. p = vmalloc(n * sizeof(struct page *));
  828. return p;
  829. }
  830. static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
  831. struct page ***pages, size_t maxsize,
  832. size_t *start)
  833. {
  834. struct page **p;
  835. size_t n;
  836. int idx;
  837. int npages;
  838. if (!sanity(i))
  839. return -EFAULT;
  840. data_start(i, &idx, start);
  841. /* some of this one + all after this one */
  842. npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1;
  843. n = npages * PAGE_SIZE - *start;
  844. if (maxsize > n)
  845. maxsize = n;
  846. else
  847. npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE);
  848. p = get_pages_array(npages);
  849. if (!p)
  850. return -ENOMEM;
  851. n = __pipe_get_pages(i, maxsize, p, idx, start);
  852. if (n > 0)
  853. *pages = p;
  854. else
  855. kvfree(p);
  856. return n;
  857. }
  858. ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
  859. struct page ***pages, size_t maxsize,
  860. size_t *start)
  861. {
  862. struct page **p;
  863. if (maxsize > i->count)
  864. maxsize = i->count;
  865. if (!maxsize)
  866. return 0;
  867. if (unlikely(i->type & ITER_PIPE))
  868. return pipe_get_pages_alloc(i, pages, maxsize, start);
  869. iterate_all_kinds(i, maxsize, v, ({
  870. unsigned long addr = (unsigned long)v.iov_base;
  871. size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
  872. int n;
  873. int res;
  874. addr &= ~(PAGE_SIZE - 1);
  875. n = DIV_ROUND_UP(len, PAGE_SIZE);
  876. p = get_pages_array(n);
  877. if (!p)
  878. return -ENOMEM;
  879. res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, p);
  880. if (unlikely(res < 0)) {
  881. kvfree(p);
  882. return res;
  883. }
  884. *pages = p;
  885. return (res == n ? len : res * PAGE_SIZE) - *start;
  886. 0;}),({
  887. /* can't be more than PAGE_SIZE */
  888. *start = v.bv_offset;
  889. *pages = p = get_pages_array(1);
  890. if (!p)
  891. return -ENOMEM;
  892. get_page(*p = v.bv_page);
  893. return v.bv_len;
  894. }),({
  895. return -EFAULT;
  896. })
  897. )
  898. return 0;
  899. }
  900. EXPORT_SYMBOL(iov_iter_get_pages_alloc);
  901. size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
  902. struct iov_iter *i)
  903. {
  904. char *to = addr;
  905. __wsum sum, next;
  906. size_t off = 0;
  907. sum = *csum;
  908. if (unlikely(i->type & ITER_PIPE)) {
  909. WARN_ON(1);
  910. return 0;
  911. }
  912. iterate_and_advance(i, bytes, v, ({
  913. int err = 0;
  914. next = csum_and_copy_from_user(v.iov_base,
  915. (to += v.iov_len) - v.iov_len,
  916. v.iov_len, 0, &err);
  917. if (!err) {
  918. sum = csum_block_add(sum, next, off);
  919. off += v.iov_len;
  920. }
  921. err ? v.iov_len : 0;
  922. }), ({
  923. char *p = kmap_atomic(v.bv_page);
  924. next = csum_partial_copy_nocheck(p + v.bv_offset,
  925. (to += v.bv_len) - v.bv_len,
  926. v.bv_len, 0);
  927. kunmap_atomic(p);
  928. sum = csum_block_add(sum, next, off);
  929. off += v.bv_len;
  930. }),({
  931. next = csum_partial_copy_nocheck(v.iov_base,
  932. (to += v.iov_len) - v.iov_len,
  933. v.iov_len, 0);
  934. sum = csum_block_add(sum, next, off);
  935. off += v.iov_len;
  936. })
  937. )
  938. *csum = sum;
  939. return bytes;
  940. }
  941. EXPORT_SYMBOL(csum_and_copy_from_iter);
  942. size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum,
  943. struct iov_iter *i)
  944. {
  945. const char *from = addr;
  946. __wsum sum, next;
  947. size_t off = 0;
  948. sum = *csum;
  949. if (unlikely(i->type & ITER_PIPE)) {
  950. WARN_ON(1); /* for now */
  951. return 0;
  952. }
  953. iterate_and_advance(i, bytes, v, ({
  954. int err = 0;
  955. next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
  956. v.iov_base,
  957. v.iov_len, 0, &err);
  958. if (!err) {
  959. sum = csum_block_add(sum, next, off);
  960. off += v.iov_len;
  961. }
  962. err ? v.iov_len : 0;
  963. }), ({
  964. char *p = kmap_atomic(v.bv_page);
  965. next = csum_partial_copy_nocheck((from += v.bv_len) - v.bv_len,
  966. p + v.bv_offset,
  967. v.bv_len, 0);
  968. kunmap_atomic(p);
  969. sum = csum_block_add(sum, next, off);
  970. off += v.bv_len;
  971. }),({
  972. next = csum_partial_copy_nocheck((from += v.iov_len) - v.iov_len,
  973. v.iov_base,
  974. v.iov_len, 0);
  975. sum = csum_block_add(sum, next, off);
  976. off += v.iov_len;
  977. })
  978. )
  979. *csum = sum;
  980. return bytes;
  981. }
  982. EXPORT_SYMBOL(csum_and_copy_to_iter);
  983. int iov_iter_npages(const struct iov_iter *i, int maxpages)
  984. {
  985. size_t size = i->count;
  986. int npages = 0;
  987. if (!size)
  988. return 0;
  989. if (unlikely(i->type & ITER_PIPE)) {
  990. struct pipe_inode_info *pipe = i->pipe;
  991. size_t off;
  992. int idx;
  993. if (!sanity(i))
  994. return 0;
  995. data_start(i, &idx, &off);
  996. /* some of this one + all after this one */
  997. npages = ((pipe->curbuf - idx - 1) & (pipe->buffers - 1)) + 1;
  998. if (npages >= maxpages)
  999. return maxpages;
  1000. } else iterate_all_kinds(i, size, v, ({
  1001. unsigned long p = (unsigned long)v.iov_base;
  1002. npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
  1003. - p / PAGE_SIZE;
  1004. if (npages >= maxpages)
  1005. return maxpages;
  1006. 0;}),({
  1007. npages++;
  1008. if (npages >= maxpages)
  1009. return maxpages;
  1010. }),({
  1011. unsigned long p = (unsigned long)v.iov_base;
  1012. npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
  1013. - p / PAGE_SIZE;
  1014. if (npages >= maxpages)
  1015. return maxpages;
  1016. })
  1017. )
  1018. return npages;
  1019. }
  1020. EXPORT_SYMBOL(iov_iter_npages);
  1021. const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
  1022. {
  1023. *new = *old;
  1024. if (unlikely(new->type & ITER_PIPE)) {
  1025. WARN_ON(1);
  1026. return NULL;
  1027. }
  1028. if (new->type & ITER_BVEC)
  1029. return new->bvec = kmemdup(new->bvec,
  1030. new->nr_segs * sizeof(struct bio_vec),
  1031. flags);
  1032. else
  1033. /* iovec and kvec have identical layout */
  1034. return new->iov = kmemdup(new->iov,
  1035. new->nr_segs * sizeof(struct iovec),
  1036. flags);
  1037. }
  1038. EXPORT_SYMBOL(dup_iter);
  1039. int import_iovec(int type, const struct iovec __user * uvector,
  1040. unsigned nr_segs, unsigned fast_segs,
  1041. struct iovec **iov, struct iov_iter *i)
  1042. {
  1043. ssize_t n;
  1044. struct iovec *p;
  1045. n = rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
  1046. *iov, &p);
  1047. if (n < 0) {
  1048. if (p != *iov)
  1049. kfree(p);
  1050. *iov = NULL;
  1051. return n;
  1052. }
  1053. iov_iter_init(i, type, p, nr_segs, n);
  1054. *iov = p == *iov ? NULL : p;
  1055. return 0;
  1056. }
  1057. EXPORT_SYMBOL(import_iovec);
  1058. #ifdef CONFIG_COMPAT
  1059. #include <linux/compat.h>
  1060. int compat_import_iovec(int type, const struct compat_iovec __user * uvector,
  1061. unsigned nr_segs, unsigned fast_segs,
  1062. struct iovec **iov, struct iov_iter *i)
  1063. {
  1064. ssize_t n;
  1065. struct iovec *p;
  1066. n = compat_rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
  1067. *iov, &p);
  1068. if (n < 0) {
  1069. if (p != *iov)
  1070. kfree(p);
  1071. *iov = NULL;
  1072. return n;
  1073. }
  1074. iov_iter_init(i, type, p, nr_segs, n);
  1075. *iov = p == *iov ? NULL : p;
  1076. return 0;
  1077. }
  1078. #endif
  1079. int import_single_range(int rw, void __user *buf, size_t len,
  1080. struct iovec *iov, struct iov_iter *i)
  1081. {
  1082. if (len > MAX_RW_COUNT)
  1083. len = MAX_RW_COUNT;
  1084. if (unlikely(!access_ok(!rw, buf, len)))
  1085. return -EFAULT;
  1086. iov->iov_base = buf;
  1087. iov->iov_len = len;
  1088. iov_iter_init(i, rw, iov, 1, len);
  1089. return 0;
  1090. }
  1091. EXPORT_SYMBOL(import_single_range);