iov_iter.c 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602
  1. #include <linux/export.h>
  2. #include <linux/bvec.h>
  3. #include <linux/uio.h>
  4. #include <linux/pagemap.h>
  5. #include <linux/slab.h>
  6. #include <linux/vmalloc.h>
  7. #include <linux/splice.h>
  8. #include <net/checksum.h>
  9. #define PIPE_PARANOIA /* for now */
  10. #define iterate_iovec(i, n, __v, __p, skip, STEP) { \
  11. size_t left; \
  12. size_t wanted = n; \
  13. __p = i->iov; \
  14. __v.iov_len = min(n, __p->iov_len - skip); \
  15. if (likely(__v.iov_len)) { \
  16. __v.iov_base = __p->iov_base + skip; \
  17. left = (STEP); \
  18. __v.iov_len -= left; \
  19. skip += __v.iov_len; \
  20. n -= __v.iov_len; \
  21. } else { \
  22. left = 0; \
  23. } \
  24. while (unlikely(!left && n)) { \
  25. __p++; \
  26. __v.iov_len = min(n, __p->iov_len); \
  27. if (unlikely(!__v.iov_len)) \
  28. continue; \
  29. __v.iov_base = __p->iov_base; \
  30. left = (STEP); \
  31. __v.iov_len -= left; \
  32. skip = __v.iov_len; \
  33. n -= __v.iov_len; \
  34. } \
  35. n = wanted - n; \
  36. }
  37. #define iterate_kvec(i, n, __v, __p, skip, STEP) { \
  38. size_t wanted = n; \
  39. __p = i->kvec; \
  40. __v.iov_len = min(n, __p->iov_len - skip); \
  41. if (likely(__v.iov_len)) { \
  42. __v.iov_base = __p->iov_base + skip; \
  43. (void)(STEP); \
  44. skip += __v.iov_len; \
  45. n -= __v.iov_len; \
  46. } \
  47. while (unlikely(n)) { \
  48. __p++; \
  49. __v.iov_len = min(n, __p->iov_len); \
  50. if (unlikely(!__v.iov_len)) \
  51. continue; \
  52. __v.iov_base = __p->iov_base; \
  53. (void)(STEP); \
  54. skip = __v.iov_len; \
  55. n -= __v.iov_len; \
  56. } \
  57. n = wanted; \
  58. }
  59. #define iterate_bvec(i, n, __v, __bi, skip, STEP) { \
  60. struct bvec_iter __start; \
  61. __start.bi_size = n; \
  62. __start.bi_bvec_done = skip; \
  63. __start.bi_idx = 0; \
  64. for_each_bvec(__v, i->bvec, __bi, __start) { \
  65. if (!__v.bv_len) \
  66. continue; \
  67. (void)(STEP); \
  68. } \
  69. }
  70. #define iterate_all_kinds(i, n, v, I, B, K) { \
  71. if (likely(n)) { \
  72. size_t skip = i->iov_offset; \
  73. if (unlikely(i->type & ITER_BVEC)) { \
  74. struct bio_vec v; \
  75. struct bvec_iter __bi; \
  76. iterate_bvec(i, n, v, __bi, skip, (B)) \
  77. } else if (unlikely(i->type & ITER_KVEC)) { \
  78. const struct kvec *kvec; \
  79. struct kvec v; \
  80. iterate_kvec(i, n, v, kvec, skip, (K)) \
  81. } else { \
  82. const struct iovec *iov; \
  83. struct iovec v; \
  84. iterate_iovec(i, n, v, iov, skip, (I)) \
  85. } \
  86. } \
  87. }
  88. #define iterate_and_advance(i, n, v, I, B, K) { \
  89. if (unlikely(i->count < n)) \
  90. n = i->count; \
  91. if (i->count) { \
  92. size_t skip = i->iov_offset; \
  93. if (unlikely(i->type & ITER_BVEC)) { \
  94. const struct bio_vec *bvec = i->bvec; \
  95. struct bio_vec v; \
  96. struct bvec_iter __bi; \
  97. iterate_bvec(i, n, v, __bi, skip, (B)) \
  98. i->bvec = __bvec_iter_bvec(i->bvec, __bi); \
  99. i->nr_segs -= i->bvec - bvec; \
  100. skip = __bi.bi_bvec_done; \
  101. } else if (unlikely(i->type & ITER_KVEC)) { \
  102. const struct kvec *kvec; \
  103. struct kvec v; \
  104. iterate_kvec(i, n, v, kvec, skip, (K)) \
  105. if (skip == kvec->iov_len) { \
  106. kvec++; \
  107. skip = 0; \
  108. } \
  109. i->nr_segs -= kvec - i->kvec; \
  110. i->kvec = kvec; \
  111. } else { \
  112. const struct iovec *iov; \
  113. struct iovec v; \
  114. iterate_iovec(i, n, v, iov, skip, (I)) \
  115. if (skip == iov->iov_len) { \
  116. iov++; \
  117. skip = 0; \
  118. } \
  119. i->nr_segs -= iov - i->iov; \
  120. i->iov = iov; \
  121. } \
  122. i->count -= n; \
  123. i->iov_offset = skip; \
  124. } \
  125. }
  126. static int copyout(void __user *to, const void *from, size_t n)
  127. {
  128. if (access_ok(VERIFY_WRITE, to, n)) {
  129. kasan_check_read(from, n);
  130. n = raw_copy_to_user(to, from, n);
  131. }
  132. return n;
  133. }
  134. static int copyin(void *to, const void __user *from, size_t n)
  135. {
  136. if (access_ok(VERIFY_READ, from, n)) {
  137. kasan_check_write(to, n);
  138. n = raw_copy_from_user(to, from, n);
  139. }
  140. return n;
  141. }
  142. static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
  143. struct iov_iter *i)
  144. {
  145. size_t skip, copy, left, wanted;
  146. const struct iovec *iov;
  147. char __user *buf;
  148. void *kaddr, *from;
  149. if (unlikely(bytes > i->count))
  150. bytes = i->count;
  151. if (unlikely(!bytes))
  152. return 0;
  153. might_fault();
  154. wanted = bytes;
  155. iov = i->iov;
  156. skip = i->iov_offset;
  157. buf = iov->iov_base + skip;
  158. copy = min(bytes, iov->iov_len - skip);
  159. if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_writeable(buf, copy)) {
  160. kaddr = kmap_atomic(page);
  161. from = kaddr + offset;
  162. /* first chunk, usually the only one */
  163. left = copyout(buf, from, copy);
  164. copy -= left;
  165. skip += copy;
  166. from += copy;
  167. bytes -= copy;
  168. while (unlikely(!left && bytes)) {
  169. iov++;
  170. buf = iov->iov_base;
  171. copy = min(bytes, iov->iov_len);
  172. left = copyout(buf, from, copy);
  173. copy -= left;
  174. skip = copy;
  175. from += copy;
  176. bytes -= copy;
  177. }
  178. if (likely(!bytes)) {
  179. kunmap_atomic(kaddr);
  180. goto done;
  181. }
  182. offset = from - kaddr;
  183. buf += copy;
  184. kunmap_atomic(kaddr);
  185. copy = min(bytes, iov->iov_len - skip);
  186. }
  187. /* Too bad - revert to non-atomic kmap */
  188. kaddr = kmap(page);
  189. from = kaddr + offset;
  190. left = copyout(buf, from, copy);
  191. copy -= left;
  192. skip += copy;
  193. from += copy;
  194. bytes -= copy;
  195. while (unlikely(!left && bytes)) {
  196. iov++;
  197. buf = iov->iov_base;
  198. copy = min(bytes, iov->iov_len);
  199. left = copyout(buf, from, copy);
  200. copy -= left;
  201. skip = copy;
  202. from += copy;
  203. bytes -= copy;
  204. }
  205. kunmap(page);
  206. done:
  207. if (skip == iov->iov_len) {
  208. iov++;
  209. skip = 0;
  210. }
  211. i->count -= wanted - bytes;
  212. i->nr_segs -= iov - i->iov;
  213. i->iov = iov;
  214. i->iov_offset = skip;
  215. return wanted - bytes;
  216. }
  217. static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes,
  218. struct iov_iter *i)
  219. {
  220. size_t skip, copy, left, wanted;
  221. const struct iovec *iov;
  222. char __user *buf;
  223. void *kaddr, *to;
  224. if (unlikely(bytes > i->count))
  225. bytes = i->count;
  226. if (unlikely(!bytes))
  227. return 0;
  228. might_fault();
  229. wanted = bytes;
  230. iov = i->iov;
  231. skip = i->iov_offset;
  232. buf = iov->iov_base + skip;
  233. copy = min(bytes, iov->iov_len - skip);
  234. if (IS_ENABLED(CONFIG_HIGHMEM) && !fault_in_pages_readable(buf, copy)) {
  235. kaddr = kmap_atomic(page);
  236. to = kaddr + offset;
  237. /* first chunk, usually the only one */
  238. left = copyin(to, buf, copy);
  239. copy -= left;
  240. skip += copy;
  241. to += copy;
  242. bytes -= copy;
  243. while (unlikely(!left && bytes)) {
  244. iov++;
  245. buf = iov->iov_base;
  246. copy = min(bytes, iov->iov_len);
  247. left = copyin(to, buf, copy);
  248. copy -= left;
  249. skip = copy;
  250. to += copy;
  251. bytes -= copy;
  252. }
  253. if (likely(!bytes)) {
  254. kunmap_atomic(kaddr);
  255. goto done;
  256. }
  257. offset = to - kaddr;
  258. buf += copy;
  259. kunmap_atomic(kaddr);
  260. copy = min(bytes, iov->iov_len - skip);
  261. }
  262. /* Too bad - revert to non-atomic kmap */
  263. kaddr = kmap(page);
  264. to = kaddr + offset;
  265. left = copyin(to, buf, copy);
  266. copy -= left;
  267. skip += copy;
  268. to += copy;
  269. bytes -= copy;
  270. while (unlikely(!left && bytes)) {
  271. iov++;
  272. buf = iov->iov_base;
  273. copy = min(bytes, iov->iov_len);
  274. left = copyin(to, buf, copy);
  275. copy -= left;
  276. skip = copy;
  277. to += copy;
  278. bytes -= copy;
  279. }
  280. kunmap(page);
  281. done:
  282. if (skip == iov->iov_len) {
  283. iov++;
  284. skip = 0;
  285. }
  286. i->count -= wanted - bytes;
  287. i->nr_segs -= iov - i->iov;
  288. i->iov = iov;
  289. i->iov_offset = skip;
  290. return wanted - bytes;
  291. }
  292. #ifdef PIPE_PARANOIA
  293. static bool sanity(const struct iov_iter *i)
  294. {
  295. struct pipe_inode_info *pipe = i->pipe;
  296. int idx = i->idx;
  297. int next = pipe->curbuf + pipe->nrbufs;
  298. if (i->iov_offset) {
  299. struct pipe_buffer *p;
  300. if (unlikely(!pipe->nrbufs))
  301. goto Bad; // pipe must be non-empty
  302. if (unlikely(idx != ((next - 1) & (pipe->buffers - 1))))
  303. goto Bad; // must be at the last buffer...
  304. p = &pipe->bufs[idx];
  305. if (unlikely(p->offset + p->len != i->iov_offset))
  306. goto Bad; // ... at the end of segment
  307. } else {
  308. if (idx != (next & (pipe->buffers - 1)))
  309. goto Bad; // must be right after the last buffer
  310. }
  311. return true;
  312. Bad:
  313. printk(KERN_ERR "idx = %d, offset = %zd\n", i->idx, i->iov_offset);
  314. printk(KERN_ERR "curbuf = %d, nrbufs = %d, buffers = %d\n",
  315. pipe->curbuf, pipe->nrbufs, pipe->buffers);
  316. for (idx = 0; idx < pipe->buffers; idx++)
  317. printk(KERN_ERR "[%p %p %d %d]\n",
  318. pipe->bufs[idx].ops,
  319. pipe->bufs[idx].page,
  320. pipe->bufs[idx].offset,
  321. pipe->bufs[idx].len);
  322. WARN_ON(1);
  323. return false;
  324. }
  325. #else
  326. #define sanity(i) true
  327. #endif
  328. static inline int next_idx(int idx, struct pipe_inode_info *pipe)
  329. {
  330. return (idx + 1) & (pipe->buffers - 1);
  331. }
  332. static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes,
  333. struct iov_iter *i)
  334. {
  335. struct pipe_inode_info *pipe = i->pipe;
  336. struct pipe_buffer *buf;
  337. size_t off;
  338. int idx;
  339. if (unlikely(bytes > i->count))
  340. bytes = i->count;
  341. if (unlikely(!bytes))
  342. return 0;
  343. if (!sanity(i))
  344. return 0;
  345. off = i->iov_offset;
  346. idx = i->idx;
  347. buf = &pipe->bufs[idx];
  348. if (off) {
  349. if (offset == off && buf->page == page) {
  350. /* merge with the last one */
  351. buf->len += bytes;
  352. i->iov_offset += bytes;
  353. goto out;
  354. }
  355. idx = next_idx(idx, pipe);
  356. buf = &pipe->bufs[idx];
  357. }
  358. if (idx == pipe->curbuf && pipe->nrbufs)
  359. return 0;
  360. pipe->nrbufs++;
  361. buf->ops = &page_cache_pipe_buf_ops;
  362. get_page(buf->page = page);
  363. buf->offset = offset;
  364. buf->len = bytes;
  365. i->iov_offset = offset + bytes;
  366. i->idx = idx;
  367. out:
  368. i->count -= bytes;
  369. return bytes;
  370. }
  371. /*
  372. * Fault in one or more iovecs of the given iov_iter, to a maximum length of
  373. * bytes. For each iovec, fault in each page that constitutes the iovec.
  374. *
  375. * Return 0 on success, or non-zero if the memory could not be accessed (i.e.
  376. * because it is an invalid address).
  377. */
  378. int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
  379. {
  380. size_t skip = i->iov_offset;
  381. const struct iovec *iov;
  382. int err;
  383. struct iovec v;
  384. if (!(i->type & (ITER_BVEC|ITER_KVEC))) {
  385. iterate_iovec(i, bytes, v, iov, skip, ({
  386. err = fault_in_pages_readable(v.iov_base, v.iov_len);
  387. if (unlikely(err))
  388. return err;
  389. 0;}))
  390. }
  391. return 0;
  392. }
  393. EXPORT_SYMBOL(iov_iter_fault_in_readable);
  394. void iov_iter_init(struct iov_iter *i, unsigned int direction,
  395. const struct iovec *iov, unsigned long nr_segs,
  396. size_t count)
  397. {
  398. WARN_ON(direction & ~(READ | WRITE));
  399. direction &= READ | WRITE;
  400. /* It will get better. Eventually... */
  401. if (uaccess_kernel()) {
  402. i->type = ITER_KVEC | direction;
  403. i->kvec = (struct kvec *)iov;
  404. } else {
  405. i->type = ITER_IOVEC | direction;
  406. i->iov = iov;
  407. }
  408. i->nr_segs = nr_segs;
  409. i->iov_offset = 0;
  410. i->count = count;
  411. }
  412. EXPORT_SYMBOL(iov_iter_init);
  413. static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len)
  414. {
  415. char *from = kmap_atomic(page);
  416. memcpy(to, from + offset, len);
  417. kunmap_atomic(from);
  418. }
  419. static void memcpy_to_page(struct page *page, size_t offset, const char *from, size_t len)
  420. {
  421. char *to = kmap_atomic(page);
  422. memcpy(to + offset, from, len);
  423. kunmap_atomic(to);
  424. }
  425. static void memzero_page(struct page *page, size_t offset, size_t len)
  426. {
  427. char *addr = kmap_atomic(page);
  428. memset(addr + offset, 0, len);
  429. kunmap_atomic(addr);
  430. }
  431. static inline bool allocated(struct pipe_buffer *buf)
  432. {
  433. return buf->ops == &default_pipe_buf_ops;
  434. }
  435. static inline void data_start(const struct iov_iter *i, int *idxp, size_t *offp)
  436. {
  437. size_t off = i->iov_offset;
  438. int idx = i->idx;
  439. if (off && (!allocated(&i->pipe->bufs[idx]) || off == PAGE_SIZE)) {
  440. idx = next_idx(idx, i->pipe);
  441. off = 0;
  442. }
  443. *idxp = idx;
  444. *offp = off;
  445. }
  446. static size_t push_pipe(struct iov_iter *i, size_t size,
  447. int *idxp, size_t *offp)
  448. {
  449. struct pipe_inode_info *pipe = i->pipe;
  450. size_t off;
  451. int idx;
  452. ssize_t left;
  453. if (unlikely(size > i->count))
  454. size = i->count;
  455. if (unlikely(!size))
  456. return 0;
  457. left = size;
  458. data_start(i, &idx, &off);
  459. *idxp = idx;
  460. *offp = off;
  461. if (off) {
  462. left -= PAGE_SIZE - off;
  463. if (left <= 0) {
  464. pipe->bufs[idx].len += size;
  465. return size;
  466. }
  467. pipe->bufs[idx].len = PAGE_SIZE;
  468. idx = next_idx(idx, pipe);
  469. }
  470. while (idx != pipe->curbuf || !pipe->nrbufs) {
  471. struct page *page = alloc_page(GFP_USER);
  472. if (!page)
  473. break;
  474. pipe->nrbufs++;
  475. pipe->bufs[idx].ops = &default_pipe_buf_ops;
  476. pipe->bufs[idx].page = page;
  477. pipe->bufs[idx].offset = 0;
  478. if (left <= PAGE_SIZE) {
  479. pipe->bufs[idx].len = left;
  480. return size;
  481. }
  482. pipe->bufs[idx].len = PAGE_SIZE;
  483. left -= PAGE_SIZE;
  484. idx = next_idx(idx, pipe);
  485. }
  486. return size - left;
  487. }
  488. static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
  489. struct iov_iter *i)
  490. {
  491. struct pipe_inode_info *pipe = i->pipe;
  492. size_t n, off;
  493. int idx;
  494. if (!sanity(i))
  495. return 0;
  496. bytes = n = push_pipe(i, bytes, &idx, &off);
  497. if (unlikely(!n))
  498. return 0;
  499. for ( ; n; idx = next_idx(idx, pipe), off = 0) {
  500. size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
  501. memcpy_to_page(pipe->bufs[idx].page, off, addr, chunk);
  502. i->idx = idx;
  503. i->iov_offset = off + chunk;
  504. n -= chunk;
  505. addr += chunk;
  506. }
  507. i->count -= bytes;
  508. return bytes;
  509. }
  510. size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
  511. {
  512. const char *from = addr;
  513. if (unlikely(iov_iter_is_pipe(i)))
  514. return copy_pipe_to_iter(addr, bytes, i);
  515. if (iter_is_iovec(i))
  516. might_fault();
  517. iterate_and_advance(i, bytes, v,
  518. copyout(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
  519. memcpy_to_page(v.bv_page, v.bv_offset,
  520. (from += v.bv_len) - v.bv_len, v.bv_len),
  521. memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len)
  522. )
  523. return bytes;
  524. }
  525. EXPORT_SYMBOL(_copy_to_iter);
  526. #ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE
  527. static int copyout_mcsafe(void __user *to, const void *from, size_t n)
  528. {
  529. if (access_ok(VERIFY_WRITE, to, n)) {
  530. kasan_check_read(from, n);
  531. n = copy_to_user_mcsafe((__force void *) to, from, n);
  532. }
  533. return n;
  534. }
  535. static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset,
  536. const char *from, size_t len)
  537. {
  538. unsigned long ret;
  539. char *to;
  540. to = kmap_atomic(page);
  541. ret = memcpy_mcsafe(to + offset, from, len);
  542. kunmap_atomic(to);
  543. return ret;
  544. }
  545. static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
  546. struct iov_iter *i)
  547. {
  548. struct pipe_inode_info *pipe = i->pipe;
  549. size_t n, off, xfer = 0;
  550. int idx;
  551. if (!sanity(i))
  552. return 0;
  553. bytes = n = push_pipe(i, bytes, &idx, &off);
  554. if (unlikely(!n))
  555. return 0;
  556. for ( ; n; idx = next_idx(idx, pipe), off = 0) {
  557. size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
  558. unsigned long rem;
  559. rem = memcpy_mcsafe_to_page(pipe->bufs[idx].page, off, addr,
  560. chunk);
  561. i->idx = idx;
  562. i->iov_offset = off + chunk - rem;
  563. xfer += chunk - rem;
  564. if (rem)
  565. break;
  566. n -= chunk;
  567. addr += chunk;
  568. }
  569. i->count -= xfer;
  570. return xfer;
  571. }
  572. /**
  573. * _copy_to_iter_mcsafe - copy to user with source-read error exception handling
  574. * @addr: source kernel address
  575. * @bytes: total transfer length
  576. * @iter: destination iterator
  577. *
  578. * The pmem driver arranges for filesystem-dax to use this facility via
  579. * dax_copy_to_iter() for protecting read/write to persistent memory.
  580. * Unless / until an architecture can guarantee identical performance
  581. * between _copy_to_iter_mcsafe() and _copy_to_iter() it would be a
  582. * performance regression to switch more users to the mcsafe version.
  583. *
  584. * Otherwise, the main differences between this and typical _copy_to_iter().
  585. *
  586. * * Typical tail/residue handling after a fault retries the copy
  587. * byte-by-byte until the fault happens again. Re-triggering machine
  588. * checks is potentially fatal so the implementation uses source
  589. * alignment and poison alignment assumptions to avoid re-triggering
  590. * hardware exceptions.
  591. *
  592. * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies.
  593. * Compare to copy_to_iter() where only ITER_IOVEC attempts might return
  594. * a short copy.
  595. *
  596. * See MCSAFE_TEST for self-test.
  597. */
  598. size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
  599. {
  600. const char *from = addr;
  601. unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
  602. if (unlikely(iov_iter_is_pipe(i)))
  603. return copy_pipe_to_iter_mcsafe(addr, bytes, i);
  604. if (iter_is_iovec(i))
  605. might_fault();
  606. iterate_and_advance(i, bytes, v,
  607. copyout_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
  608. ({
  609. rem = memcpy_mcsafe_to_page(v.bv_page, v.bv_offset,
  610. (from += v.bv_len) - v.bv_len, v.bv_len);
  611. if (rem) {
  612. curr_addr = (unsigned long) from;
  613. bytes = curr_addr - s_addr - rem;
  614. return bytes;
  615. }
  616. }),
  617. ({
  618. rem = memcpy_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len,
  619. v.iov_len);
  620. if (rem) {
  621. curr_addr = (unsigned long) from;
  622. bytes = curr_addr - s_addr - rem;
  623. return bytes;
  624. }
  625. })
  626. )
  627. return bytes;
  628. }
  629. EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe);
  630. #endif /* CONFIG_ARCH_HAS_UACCESS_MCSAFE */
  631. size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
  632. {
  633. char *to = addr;
  634. if (unlikely(iov_iter_is_pipe(i))) {
  635. WARN_ON(1);
  636. return 0;
  637. }
  638. if (iter_is_iovec(i))
  639. might_fault();
  640. iterate_and_advance(i, bytes, v,
  641. copyin((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
  642. memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
  643. v.bv_offset, v.bv_len),
  644. memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
  645. )
  646. return bytes;
  647. }
  648. EXPORT_SYMBOL(_copy_from_iter);
  649. bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i)
  650. {
  651. char *to = addr;
  652. if (unlikely(iov_iter_is_pipe(i))) {
  653. WARN_ON(1);
  654. return false;
  655. }
  656. if (unlikely(i->count < bytes))
  657. return false;
  658. if (iter_is_iovec(i))
  659. might_fault();
  660. iterate_all_kinds(i, bytes, v, ({
  661. if (copyin((to += v.iov_len) - v.iov_len,
  662. v.iov_base, v.iov_len))
  663. return false;
  664. 0;}),
  665. memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
  666. v.bv_offset, v.bv_len),
  667. memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
  668. )
  669. iov_iter_advance(i, bytes);
  670. return true;
  671. }
  672. EXPORT_SYMBOL(_copy_from_iter_full);
  673. size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
  674. {
  675. char *to = addr;
  676. if (unlikely(iov_iter_is_pipe(i))) {
  677. WARN_ON(1);
  678. return 0;
  679. }
  680. iterate_and_advance(i, bytes, v,
  681. __copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len,
  682. v.iov_base, v.iov_len),
  683. memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
  684. v.bv_offset, v.bv_len),
  685. memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
  686. )
  687. return bytes;
  688. }
  689. EXPORT_SYMBOL(_copy_from_iter_nocache);
  690. #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
  691. /**
  692. * _copy_from_iter_flushcache - write destination through cpu cache
  693. * @addr: destination kernel address
  694. * @bytes: total transfer length
  695. * @iter: source iterator
  696. *
  697. * The pmem driver arranges for filesystem-dax to use this facility via
  698. * dax_copy_from_iter() for ensuring that writes to persistent memory
  699. * are flushed through the CPU cache. It is differentiated from
  700. * _copy_from_iter_nocache() in that guarantees all data is flushed for
  701. * all iterator types. The _copy_from_iter_nocache() only attempts to
  702. * bypass the cache for the ITER_IOVEC case, and on some archs may use
  703. * instructions that strand dirty-data in the cache.
  704. */
  705. size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
  706. {
  707. char *to = addr;
  708. if (unlikely(iov_iter_is_pipe(i))) {
  709. WARN_ON(1);
  710. return 0;
  711. }
  712. iterate_and_advance(i, bytes, v,
  713. __copy_from_user_flushcache((to += v.iov_len) - v.iov_len,
  714. v.iov_base, v.iov_len),
  715. memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page,
  716. v.bv_offset, v.bv_len),
  717. memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base,
  718. v.iov_len)
  719. )
  720. return bytes;
  721. }
  722. EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache);
  723. #endif
  724. bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i)
  725. {
  726. char *to = addr;
  727. if (unlikely(iov_iter_is_pipe(i))) {
  728. WARN_ON(1);
  729. return false;
  730. }
  731. if (unlikely(i->count < bytes))
  732. return false;
  733. iterate_all_kinds(i, bytes, v, ({
  734. if (__copy_from_user_inatomic_nocache((to += v.iov_len) - v.iov_len,
  735. v.iov_base, v.iov_len))
  736. return false;
  737. 0;}),
  738. memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
  739. v.bv_offset, v.bv_len),
  740. memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
  741. )
  742. iov_iter_advance(i, bytes);
  743. return true;
  744. }
  745. EXPORT_SYMBOL(_copy_from_iter_full_nocache);
  746. static inline bool page_copy_sane(struct page *page, size_t offset, size_t n)
  747. {
  748. struct page *head = compound_head(page);
  749. size_t v = n + offset + page_address(page) - page_address(head);
  750. if (likely(n <= v && v <= (PAGE_SIZE << compound_order(head))))
  751. return true;
  752. WARN_ON(1);
  753. return false;
  754. }
  755. size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
  756. struct iov_iter *i)
  757. {
  758. if (unlikely(!page_copy_sane(page, offset, bytes)))
  759. return 0;
  760. if (i->type & (ITER_BVEC|ITER_KVEC)) {
  761. void *kaddr = kmap_atomic(page);
  762. size_t wanted = copy_to_iter(kaddr + offset, bytes, i);
  763. kunmap_atomic(kaddr);
  764. return wanted;
  765. } else if (likely(!iov_iter_is_pipe(i)))
  766. return copy_page_to_iter_iovec(page, offset, bytes, i);
  767. else
  768. return copy_page_to_iter_pipe(page, offset, bytes, i);
  769. }
  770. EXPORT_SYMBOL(copy_page_to_iter);
  771. size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
  772. struct iov_iter *i)
  773. {
  774. if (unlikely(!page_copy_sane(page, offset, bytes)))
  775. return 0;
  776. if (unlikely(iov_iter_is_pipe(i))) {
  777. WARN_ON(1);
  778. return 0;
  779. }
  780. if (i->type & (ITER_BVEC|ITER_KVEC)) {
  781. void *kaddr = kmap_atomic(page);
  782. size_t wanted = _copy_from_iter(kaddr + offset, bytes, i);
  783. kunmap_atomic(kaddr);
  784. return wanted;
  785. } else
  786. return copy_page_from_iter_iovec(page, offset, bytes, i);
  787. }
  788. EXPORT_SYMBOL(copy_page_from_iter);
  789. static size_t pipe_zero(size_t bytes, struct iov_iter *i)
  790. {
  791. struct pipe_inode_info *pipe = i->pipe;
  792. size_t n, off;
  793. int idx;
  794. if (!sanity(i))
  795. return 0;
  796. bytes = n = push_pipe(i, bytes, &idx, &off);
  797. if (unlikely(!n))
  798. return 0;
  799. for ( ; n; idx = next_idx(idx, pipe), off = 0) {
  800. size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
  801. memzero_page(pipe->bufs[idx].page, off, chunk);
  802. i->idx = idx;
  803. i->iov_offset = off + chunk;
  804. n -= chunk;
  805. }
  806. i->count -= bytes;
  807. return bytes;
  808. }
  809. size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
  810. {
  811. if (unlikely(iov_iter_is_pipe(i)))
  812. return pipe_zero(bytes, i);
  813. iterate_and_advance(i, bytes, v,
  814. clear_user(v.iov_base, v.iov_len),
  815. memzero_page(v.bv_page, v.bv_offset, v.bv_len),
  816. memset(v.iov_base, 0, v.iov_len)
  817. )
  818. return bytes;
  819. }
  820. EXPORT_SYMBOL(iov_iter_zero);
  821. size_t iov_iter_copy_from_user_atomic(struct page *page,
  822. struct iov_iter *i, unsigned long offset, size_t bytes)
  823. {
  824. char *kaddr = kmap_atomic(page), *p = kaddr + offset;
  825. if (unlikely(!page_copy_sane(page, offset, bytes))) {
  826. kunmap_atomic(kaddr);
  827. return 0;
  828. }
  829. if (unlikely(iov_iter_is_pipe(i))) {
  830. kunmap_atomic(kaddr);
  831. WARN_ON(1);
  832. return 0;
  833. }
  834. iterate_all_kinds(i, bytes, v,
  835. copyin((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len),
  836. memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
  837. v.bv_offset, v.bv_len),
  838. memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
  839. )
  840. kunmap_atomic(kaddr);
  841. return bytes;
  842. }
  843. EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
  844. static inline void pipe_truncate(struct iov_iter *i)
  845. {
  846. struct pipe_inode_info *pipe = i->pipe;
  847. if (pipe->nrbufs) {
  848. size_t off = i->iov_offset;
  849. int idx = i->idx;
  850. int nrbufs = (idx - pipe->curbuf) & (pipe->buffers - 1);
  851. if (off) {
  852. pipe->bufs[idx].len = off - pipe->bufs[idx].offset;
  853. idx = next_idx(idx, pipe);
  854. nrbufs++;
  855. }
  856. while (pipe->nrbufs > nrbufs) {
  857. pipe_buf_release(pipe, &pipe->bufs[idx]);
  858. idx = next_idx(idx, pipe);
  859. pipe->nrbufs--;
  860. }
  861. }
  862. }
  863. static void pipe_advance(struct iov_iter *i, size_t size)
  864. {
  865. struct pipe_inode_info *pipe = i->pipe;
  866. if (unlikely(i->count < size))
  867. size = i->count;
  868. if (size) {
  869. struct pipe_buffer *buf;
  870. size_t off = i->iov_offset, left = size;
  871. int idx = i->idx;
  872. if (off) /* make it relative to the beginning of buffer */
  873. left += off - pipe->bufs[idx].offset;
  874. while (1) {
  875. buf = &pipe->bufs[idx];
  876. if (left <= buf->len)
  877. break;
  878. left -= buf->len;
  879. idx = next_idx(idx, pipe);
  880. }
  881. i->idx = idx;
  882. i->iov_offset = buf->offset + left;
  883. }
  884. i->count -= size;
  885. /* ... and discard everything past that point */
  886. pipe_truncate(i);
  887. }
  888. void iov_iter_advance(struct iov_iter *i, size_t size)
  889. {
  890. if (unlikely(iov_iter_is_pipe(i))) {
  891. pipe_advance(i, size);
  892. return;
  893. }
  894. iterate_and_advance(i, size, v, 0, 0, 0)
  895. }
  896. EXPORT_SYMBOL(iov_iter_advance);
  897. void iov_iter_revert(struct iov_iter *i, size_t unroll)
  898. {
  899. if (!unroll)
  900. return;
  901. if (WARN_ON(unroll > MAX_RW_COUNT))
  902. return;
  903. i->count += unroll;
  904. if (unlikely(iov_iter_is_pipe(i))) {
  905. struct pipe_inode_info *pipe = i->pipe;
  906. int idx = i->idx;
  907. size_t off = i->iov_offset;
  908. while (1) {
  909. size_t n = off - pipe->bufs[idx].offset;
  910. if (unroll < n) {
  911. off -= unroll;
  912. break;
  913. }
  914. unroll -= n;
  915. if (!unroll && idx == i->start_idx) {
  916. off = 0;
  917. break;
  918. }
  919. if (!idx--)
  920. idx = pipe->buffers - 1;
  921. off = pipe->bufs[idx].offset + pipe->bufs[idx].len;
  922. }
  923. i->iov_offset = off;
  924. i->idx = idx;
  925. pipe_truncate(i);
  926. return;
  927. }
  928. if (unroll <= i->iov_offset) {
  929. i->iov_offset -= unroll;
  930. return;
  931. }
  932. unroll -= i->iov_offset;
  933. if (iov_iter_is_bvec(i)) {
  934. const struct bio_vec *bvec = i->bvec;
  935. while (1) {
  936. size_t n = (--bvec)->bv_len;
  937. i->nr_segs++;
  938. if (unroll <= n) {
  939. i->bvec = bvec;
  940. i->iov_offset = n - unroll;
  941. return;
  942. }
  943. unroll -= n;
  944. }
  945. } else { /* same logics for iovec and kvec */
  946. const struct iovec *iov = i->iov;
  947. while (1) {
  948. size_t n = (--iov)->iov_len;
  949. i->nr_segs++;
  950. if (unroll <= n) {
  951. i->iov = iov;
  952. i->iov_offset = n - unroll;
  953. return;
  954. }
  955. unroll -= n;
  956. }
  957. }
  958. }
  959. EXPORT_SYMBOL(iov_iter_revert);
  960. /*
  961. * Return the count of just the current iov_iter segment.
  962. */
  963. size_t iov_iter_single_seg_count(const struct iov_iter *i)
  964. {
  965. if (unlikely(iov_iter_is_pipe(i)))
  966. return i->count; // it is a silly place, anyway
  967. if (i->nr_segs == 1)
  968. return i->count;
  969. else if (iov_iter_is_bvec(i))
  970. return min(i->count, i->bvec->bv_len - i->iov_offset);
  971. else
  972. return min(i->count, i->iov->iov_len - i->iov_offset);
  973. }
  974. EXPORT_SYMBOL(iov_iter_single_seg_count);
  975. void iov_iter_kvec(struct iov_iter *i, unsigned int direction,
  976. const struct kvec *kvec, unsigned long nr_segs,
  977. size_t count)
  978. {
  979. WARN_ON(direction & ~(READ | WRITE));
  980. i->type = ITER_KVEC | (direction & (READ | WRITE));
  981. i->kvec = kvec;
  982. i->nr_segs = nr_segs;
  983. i->iov_offset = 0;
  984. i->count = count;
  985. }
  986. EXPORT_SYMBOL(iov_iter_kvec);
  987. void iov_iter_bvec(struct iov_iter *i, unsigned int direction,
  988. const struct bio_vec *bvec, unsigned long nr_segs,
  989. size_t count)
  990. {
  991. WARN_ON(direction & ~(READ | WRITE));
  992. i->type = ITER_BVEC | (direction & (READ | WRITE));
  993. i->bvec = bvec;
  994. i->nr_segs = nr_segs;
  995. i->iov_offset = 0;
  996. i->count = count;
  997. }
  998. EXPORT_SYMBOL(iov_iter_bvec);
  999. void iov_iter_pipe(struct iov_iter *i, unsigned int direction,
  1000. struct pipe_inode_info *pipe,
  1001. size_t count)
  1002. {
  1003. BUG_ON(direction != READ);
  1004. WARN_ON(pipe->nrbufs == pipe->buffers);
  1005. i->type = ITER_PIPE | READ;
  1006. i->pipe = pipe;
  1007. i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1);
  1008. i->iov_offset = 0;
  1009. i->count = count;
  1010. i->start_idx = i->idx;
  1011. }
  1012. EXPORT_SYMBOL(iov_iter_pipe);
  1013. unsigned long iov_iter_alignment(const struct iov_iter *i)
  1014. {
  1015. unsigned long res = 0;
  1016. size_t size = i->count;
  1017. if (unlikely(iov_iter_is_pipe(i))) {
  1018. if (size && i->iov_offset && allocated(&i->pipe->bufs[i->idx]))
  1019. return size | i->iov_offset;
  1020. return size;
  1021. }
  1022. iterate_all_kinds(i, size, v,
  1023. (res |= (unsigned long)v.iov_base | v.iov_len, 0),
  1024. res |= v.bv_offset | v.bv_len,
  1025. res |= (unsigned long)v.iov_base | v.iov_len
  1026. )
  1027. return res;
  1028. }
  1029. EXPORT_SYMBOL(iov_iter_alignment);
  1030. unsigned long iov_iter_gap_alignment(const struct iov_iter *i)
  1031. {
  1032. unsigned long res = 0;
  1033. size_t size = i->count;
  1034. if (unlikely(iov_iter_is_pipe(i))) {
  1035. WARN_ON(1);
  1036. return ~0U;
  1037. }
  1038. iterate_all_kinds(i, size, v,
  1039. (res |= (!res ? 0 : (unsigned long)v.iov_base) |
  1040. (size != v.iov_len ? size : 0), 0),
  1041. (res |= (!res ? 0 : (unsigned long)v.bv_offset) |
  1042. (size != v.bv_len ? size : 0)),
  1043. (res |= (!res ? 0 : (unsigned long)v.iov_base) |
  1044. (size != v.iov_len ? size : 0))
  1045. );
  1046. return res;
  1047. }
  1048. EXPORT_SYMBOL(iov_iter_gap_alignment);
  1049. static inline ssize_t __pipe_get_pages(struct iov_iter *i,
  1050. size_t maxsize,
  1051. struct page **pages,
  1052. int idx,
  1053. size_t *start)
  1054. {
  1055. struct pipe_inode_info *pipe = i->pipe;
  1056. ssize_t n = push_pipe(i, maxsize, &idx, start);
  1057. if (!n)
  1058. return -EFAULT;
  1059. maxsize = n;
  1060. n += *start;
  1061. while (n > 0) {
  1062. get_page(*pages++ = pipe->bufs[idx].page);
  1063. idx = next_idx(idx, pipe);
  1064. n -= PAGE_SIZE;
  1065. }
  1066. return maxsize;
  1067. }
  1068. static ssize_t pipe_get_pages(struct iov_iter *i,
  1069. struct page **pages, size_t maxsize, unsigned maxpages,
  1070. size_t *start)
  1071. {
  1072. unsigned npages;
  1073. size_t capacity;
  1074. int idx;
  1075. if (!maxsize)
  1076. return 0;
  1077. if (!sanity(i))
  1078. return -EFAULT;
  1079. data_start(i, &idx, start);
  1080. /* some of this one + all after this one */
  1081. npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1;
  1082. capacity = min(npages,maxpages) * PAGE_SIZE - *start;
  1083. return __pipe_get_pages(i, min(maxsize, capacity), pages, idx, start);
  1084. }
  1085. ssize_t iov_iter_get_pages(struct iov_iter *i,
  1086. struct page **pages, size_t maxsize, unsigned maxpages,
  1087. size_t *start)
  1088. {
  1089. if (maxsize > i->count)
  1090. maxsize = i->count;
  1091. if (unlikely(iov_iter_is_pipe(i)))
  1092. return pipe_get_pages(i, pages, maxsize, maxpages, start);
  1093. iterate_all_kinds(i, maxsize, v, ({
  1094. unsigned long addr = (unsigned long)v.iov_base;
  1095. size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
  1096. int n;
  1097. int res;
  1098. if (len > maxpages * PAGE_SIZE)
  1099. len = maxpages * PAGE_SIZE;
  1100. addr &= ~(PAGE_SIZE - 1);
  1101. n = DIV_ROUND_UP(len, PAGE_SIZE);
  1102. res = get_user_pages_fast(addr, n, iov_iter_rw(i) != WRITE, pages);
  1103. if (unlikely(res < 0))
  1104. return res;
  1105. return (res == n ? len : res * PAGE_SIZE) - *start;
  1106. 0;}),({
  1107. /* can't be more than PAGE_SIZE */
  1108. *start = v.bv_offset;
  1109. get_page(*pages = v.bv_page);
  1110. return v.bv_len;
  1111. }),({
  1112. return -EFAULT;
  1113. })
  1114. )
  1115. return 0;
  1116. }
  1117. EXPORT_SYMBOL(iov_iter_get_pages);
  1118. static struct page **get_pages_array(size_t n)
  1119. {
  1120. return kvmalloc_array(n, sizeof(struct page *), GFP_KERNEL);
  1121. }
  1122. static ssize_t pipe_get_pages_alloc(struct iov_iter *i,
  1123. struct page ***pages, size_t maxsize,
  1124. size_t *start)
  1125. {
  1126. struct page **p;
  1127. ssize_t n;
  1128. int idx;
  1129. int npages;
  1130. if (!maxsize)
  1131. return 0;
  1132. if (!sanity(i))
  1133. return -EFAULT;
  1134. data_start(i, &idx, start);
  1135. /* some of this one + all after this one */
  1136. npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1;
  1137. n = npages * PAGE_SIZE - *start;
  1138. if (maxsize > n)
  1139. maxsize = n;
  1140. else
  1141. npages = DIV_ROUND_UP(maxsize + *start, PAGE_SIZE);
  1142. p = get_pages_array(npages);
  1143. if (!p)
  1144. return -ENOMEM;
  1145. n = __pipe_get_pages(i, maxsize, p, idx, start);
  1146. if (n > 0)
  1147. *pages = p;
  1148. else
  1149. kvfree(p);
  1150. return n;
  1151. }
  1152. ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
  1153. struct page ***pages, size_t maxsize,
  1154. size_t *start)
  1155. {
  1156. struct page **p;
  1157. if (maxsize > i->count)
  1158. maxsize = i->count;
  1159. if (unlikely(iov_iter_is_pipe(i)))
  1160. return pipe_get_pages_alloc(i, pages, maxsize, start);
  1161. iterate_all_kinds(i, maxsize, v, ({
  1162. unsigned long addr = (unsigned long)v.iov_base;
  1163. size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1));
  1164. int n;
  1165. int res;
  1166. addr &= ~(PAGE_SIZE - 1);
  1167. n = DIV_ROUND_UP(len, PAGE_SIZE);
  1168. p = get_pages_array(n);
  1169. if (!p)
  1170. return -ENOMEM;
  1171. res = get_user_pages_fast(addr, n, iov_iter_rw(i) != WRITE, p);
  1172. if (unlikely(res < 0)) {
  1173. kvfree(p);
  1174. return res;
  1175. }
  1176. *pages = p;
  1177. return (res == n ? len : res * PAGE_SIZE) - *start;
  1178. 0;}),({
  1179. /* can't be more than PAGE_SIZE */
  1180. *start = v.bv_offset;
  1181. *pages = p = get_pages_array(1);
  1182. if (!p)
  1183. return -ENOMEM;
  1184. get_page(*p = v.bv_page);
  1185. return v.bv_len;
  1186. }),({
  1187. return -EFAULT;
  1188. })
  1189. )
  1190. return 0;
  1191. }
  1192. EXPORT_SYMBOL(iov_iter_get_pages_alloc);
  1193. size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
  1194. struct iov_iter *i)
  1195. {
  1196. char *to = addr;
  1197. __wsum sum, next;
  1198. size_t off = 0;
  1199. sum = *csum;
  1200. if (unlikely(iov_iter_is_pipe(i))) {
  1201. WARN_ON(1);
  1202. return 0;
  1203. }
  1204. iterate_and_advance(i, bytes, v, ({
  1205. int err = 0;
  1206. next = csum_and_copy_from_user(v.iov_base,
  1207. (to += v.iov_len) - v.iov_len,
  1208. v.iov_len, 0, &err);
  1209. if (!err) {
  1210. sum = csum_block_add(sum, next, off);
  1211. off += v.iov_len;
  1212. }
  1213. err ? v.iov_len : 0;
  1214. }), ({
  1215. char *p = kmap_atomic(v.bv_page);
  1216. next = csum_partial_copy_nocheck(p + v.bv_offset,
  1217. (to += v.bv_len) - v.bv_len,
  1218. v.bv_len, 0);
  1219. kunmap_atomic(p);
  1220. sum = csum_block_add(sum, next, off);
  1221. off += v.bv_len;
  1222. }),({
  1223. next = csum_partial_copy_nocheck(v.iov_base,
  1224. (to += v.iov_len) - v.iov_len,
  1225. v.iov_len, 0);
  1226. sum = csum_block_add(sum, next, off);
  1227. off += v.iov_len;
  1228. })
  1229. )
  1230. *csum = sum;
  1231. return bytes;
  1232. }
  1233. EXPORT_SYMBOL(csum_and_copy_from_iter);
  1234. bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum,
  1235. struct iov_iter *i)
  1236. {
  1237. char *to = addr;
  1238. __wsum sum, next;
  1239. size_t off = 0;
  1240. sum = *csum;
  1241. if (unlikely(iov_iter_is_pipe(i))) {
  1242. WARN_ON(1);
  1243. return false;
  1244. }
  1245. if (unlikely(i->count < bytes))
  1246. return false;
  1247. iterate_all_kinds(i, bytes, v, ({
  1248. int err = 0;
  1249. next = csum_and_copy_from_user(v.iov_base,
  1250. (to += v.iov_len) - v.iov_len,
  1251. v.iov_len, 0, &err);
  1252. if (err)
  1253. return false;
  1254. sum = csum_block_add(sum, next, off);
  1255. off += v.iov_len;
  1256. 0;
  1257. }), ({
  1258. char *p = kmap_atomic(v.bv_page);
  1259. next = csum_partial_copy_nocheck(p + v.bv_offset,
  1260. (to += v.bv_len) - v.bv_len,
  1261. v.bv_len, 0);
  1262. kunmap_atomic(p);
  1263. sum = csum_block_add(sum, next, off);
  1264. off += v.bv_len;
  1265. }),({
  1266. next = csum_partial_copy_nocheck(v.iov_base,
  1267. (to += v.iov_len) - v.iov_len,
  1268. v.iov_len, 0);
  1269. sum = csum_block_add(sum, next, off);
  1270. off += v.iov_len;
  1271. })
  1272. )
  1273. *csum = sum;
  1274. iov_iter_advance(i, bytes);
  1275. return true;
  1276. }
  1277. EXPORT_SYMBOL(csum_and_copy_from_iter_full);
  1278. size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum,
  1279. struct iov_iter *i)
  1280. {
  1281. const char *from = addr;
  1282. __wsum sum, next;
  1283. size_t off = 0;
  1284. sum = *csum;
  1285. if (unlikely(iov_iter_is_pipe(i))) {
  1286. WARN_ON(1); /* for now */
  1287. return 0;
  1288. }
  1289. iterate_and_advance(i, bytes, v, ({
  1290. int err = 0;
  1291. next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
  1292. v.iov_base,
  1293. v.iov_len, 0, &err);
  1294. if (!err) {
  1295. sum = csum_block_add(sum, next, off);
  1296. off += v.iov_len;
  1297. }
  1298. err ? v.iov_len : 0;
  1299. }), ({
  1300. char *p = kmap_atomic(v.bv_page);
  1301. next = csum_partial_copy_nocheck((from += v.bv_len) - v.bv_len,
  1302. p + v.bv_offset,
  1303. v.bv_len, 0);
  1304. kunmap_atomic(p);
  1305. sum = csum_block_add(sum, next, off);
  1306. off += v.bv_len;
  1307. }),({
  1308. next = csum_partial_copy_nocheck((from += v.iov_len) - v.iov_len,
  1309. v.iov_base,
  1310. v.iov_len, 0);
  1311. sum = csum_block_add(sum, next, off);
  1312. off += v.iov_len;
  1313. })
  1314. )
  1315. *csum = sum;
  1316. return bytes;
  1317. }
  1318. EXPORT_SYMBOL(csum_and_copy_to_iter);
  1319. int iov_iter_npages(const struct iov_iter *i, int maxpages)
  1320. {
  1321. size_t size = i->count;
  1322. int npages = 0;
  1323. if (!size)
  1324. return 0;
  1325. if (unlikely(iov_iter_is_pipe(i))) {
  1326. struct pipe_inode_info *pipe = i->pipe;
  1327. size_t off;
  1328. int idx;
  1329. if (!sanity(i))
  1330. return 0;
  1331. data_start(i, &idx, &off);
  1332. /* some of this one + all after this one */
  1333. npages = ((pipe->curbuf - idx - 1) & (pipe->buffers - 1)) + 1;
  1334. if (npages >= maxpages)
  1335. return maxpages;
  1336. } else iterate_all_kinds(i, size, v, ({
  1337. unsigned long p = (unsigned long)v.iov_base;
  1338. npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
  1339. - p / PAGE_SIZE;
  1340. if (npages >= maxpages)
  1341. return maxpages;
  1342. 0;}),({
  1343. npages++;
  1344. if (npages >= maxpages)
  1345. return maxpages;
  1346. }),({
  1347. unsigned long p = (unsigned long)v.iov_base;
  1348. npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
  1349. - p / PAGE_SIZE;
  1350. if (npages >= maxpages)
  1351. return maxpages;
  1352. })
  1353. )
  1354. return npages;
  1355. }
  1356. EXPORT_SYMBOL(iov_iter_npages);
  1357. const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
  1358. {
  1359. *new = *old;
  1360. if (unlikely(iov_iter_is_pipe(new))) {
  1361. WARN_ON(1);
  1362. return NULL;
  1363. }
  1364. if (iov_iter_is_bvec(new))
  1365. return new->bvec = kmemdup(new->bvec,
  1366. new->nr_segs * sizeof(struct bio_vec),
  1367. flags);
  1368. else
  1369. /* iovec and kvec have identical layout */
  1370. return new->iov = kmemdup(new->iov,
  1371. new->nr_segs * sizeof(struct iovec),
  1372. flags);
  1373. }
  1374. EXPORT_SYMBOL(dup_iter);
  1375. /**
  1376. * import_iovec() - Copy an array of &struct iovec from userspace
  1377. * into the kernel, check that it is valid, and initialize a new
  1378. * &struct iov_iter iterator to access it.
  1379. *
  1380. * @type: One of %READ or %WRITE.
  1381. * @uvector: Pointer to the userspace array.
  1382. * @nr_segs: Number of elements in userspace array.
  1383. * @fast_segs: Number of elements in @iov.
  1384. * @iov: (input and output parameter) Pointer to pointer to (usually small
  1385. * on-stack) kernel array.
  1386. * @i: Pointer to iterator that will be initialized on success.
  1387. *
  1388. * If the array pointed to by *@iov is large enough to hold all @nr_segs,
  1389. * then this function places %NULL in *@iov on return. Otherwise, a new
  1390. * array will be allocated and the result placed in *@iov. This means that
  1391. * the caller may call kfree() on *@iov regardless of whether the small
  1392. * on-stack array was used or not (and regardless of whether this function
  1393. * returns an error or not).
  1394. *
  1395. * Return: 0 on success or negative error code on error.
  1396. */
  1397. int import_iovec(int type, const struct iovec __user * uvector,
  1398. unsigned nr_segs, unsigned fast_segs,
  1399. struct iovec **iov, struct iov_iter *i)
  1400. {
  1401. ssize_t n;
  1402. struct iovec *p;
  1403. n = rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
  1404. *iov, &p);
  1405. if (n < 0) {
  1406. if (p != *iov)
  1407. kfree(p);
  1408. *iov = NULL;
  1409. return n;
  1410. }
  1411. iov_iter_init(i, type, p, nr_segs, n);
  1412. *iov = p == *iov ? NULL : p;
  1413. return 0;
  1414. }
  1415. EXPORT_SYMBOL(import_iovec);
  1416. #ifdef CONFIG_COMPAT
  1417. #include <linux/compat.h>
  1418. int compat_import_iovec(int type, const struct compat_iovec __user * uvector,
  1419. unsigned nr_segs, unsigned fast_segs,
  1420. struct iovec **iov, struct iov_iter *i)
  1421. {
  1422. ssize_t n;
  1423. struct iovec *p;
  1424. n = compat_rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
  1425. *iov, &p);
  1426. if (n < 0) {
  1427. if (p != *iov)
  1428. kfree(p);
  1429. *iov = NULL;
  1430. return n;
  1431. }
  1432. iov_iter_init(i, type, p, nr_segs, n);
  1433. *iov = p == *iov ? NULL : p;
  1434. return 0;
  1435. }
  1436. #endif
  1437. int import_single_range(int rw, void __user *buf, size_t len,
  1438. struct iovec *iov, struct iov_iter *i)
  1439. {
  1440. if (len > MAX_RW_COUNT)
  1441. len = MAX_RW_COUNT;
  1442. if (unlikely(!access_ok(!rw, buf, len)))
  1443. return -EFAULT;
  1444. iov->iov_base = buf;
  1445. iov->iov_len = len;
  1446. iov_iter_init(i, rw, iov, 1, len);
  1447. return 0;
  1448. }
  1449. EXPORT_SYMBOL(import_single_range);
  1450. int iov_iter_for_each_range(struct iov_iter *i, size_t bytes,
  1451. int (*f)(struct kvec *vec, void *context),
  1452. void *context)
  1453. {
  1454. struct kvec w;
  1455. int err = -EINVAL;
  1456. if (!bytes)
  1457. return 0;
  1458. iterate_all_kinds(i, bytes, v, -EINVAL, ({
  1459. w.iov_base = kmap(v.bv_page) + v.bv_offset;
  1460. w.iov_len = v.bv_len;
  1461. err = f(&w, context);
  1462. kunmap(v.bv_page);
  1463. err;}), ({
  1464. w = v;
  1465. err = f(&w, context);})
  1466. )
  1467. return err;
  1468. }
  1469. EXPORT_SYMBOL(iov_iter_for_each_range);