read_write.c 29 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329
  1. /*
  2. * linux/fs/read_write.c
  3. *
  4. * Copyright (C) 1991, 1992 Linus Torvalds
  5. */
  6. #include <linux/slab.h>
  7. #include <linux/stat.h>
  8. #include <linux/fcntl.h>
  9. #include <linux/file.h>
  10. #include <linux/uio.h>
  11. #include <linux/fsnotify.h>
  12. #include <linux/security.h>
  13. #include <linux/export.h>
  14. #include <linux/syscalls.h>
  15. #include <linux/pagemap.h>
  16. #include <linux/splice.h>
  17. #include <linux/compat.h>
  18. #include "internal.h"
  19. #include <asm/uaccess.h>
  20. #include <asm/unistd.h>
  21. typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *);
  22. typedef ssize_t (*iter_fn_t)(struct kiocb *, struct iov_iter *);
  23. const struct file_operations generic_ro_fops = {
  24. .llseek = generic_file_llseek,
  25. .read_iter = generic_file_read_iter,
  26. .mmap = generic_file_readonly_mmap,
  27. .splice_read = generic_file_splice_read,
  28. };
  29. EXPORT_SYMBOL(generic_ro_fops);
  30. static inline int unsigned_offsets(struct file *file)
  31. {
  32. return file->f_mode & FMODE_UNSIGNED_OFFSET;
  33. }
  34. /**
  35. * vfs_setpos - update the file offset for lseek
  36. * @file: file structure in question
  37. * @offset: file offset to seek to
  38. * @maxsize: maximum file size
  39. *
  40. * This is a low-level filesystem helper for updating the file offset to
  41. * the value specified by @offset if the given offset is valid and it is
  42. * not equal to the current file offset.
  43. *
  44. * Return the specified offset on success and -EINVAL on invalid offset.
  45. */
  46. loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize)
  47. {
  48. if (offset < 0 && !unsigned_offsets(file))
  49. return -EINVAL;
  50. if (offset > maxsize)
  51. return -EINVAL;
  52. if (offset != file->f_pos) {
  53. file->f_pos = offset;
  54. file->f_version = 0;
  55. }
  56. return offset;
  57. }
  58. EXPORT_SYMBOL(vfs_setpos);
  59. /**
  60. * generic_file_llseek_size - generic llseek implementation for regular files
  61. * @file: file structure to seek on
  62. * @offset: file offset to seek to
  63. * @whence: type of seek
  64. * @size: max size of this file in file system
  65. * @eof: offset used for SEEK_END position
  66. *
  67. * This is a variant of generic_file_llseek that allows passing in a custom
  68. * maximum file size and a custom EOF position, for e.g. hashed directories
  69. *
  70. * Synchronization:
  71. * SEEK_SET and SEEK_END are unsynchronized (but atomic on 64bit platforms)
  72. * SEEK_CUR is synchronized against other SEEK_CURs, but not read/writes.
  73. * read/writes behave like SEEK_SET against seeks.
  74. */
  75. loff_t
  76. generic_file_llseek_size(struct file *file, loff_t offset, int whence,
  77. loff_t maxsize, loff_t eof)
  78. {
  79. switch (whence) {
  80. case SEEK_END:
  81. offset += eof;
  82. break;
  83. case SEEK_CUR:
  84. /*
  85. * Here we special-case the lseek(fd, 0, SEEK_CUR)
  86. * position-querying operation. Avoid rewriting the "same"
  87. * f_pos value back to the file because a concurrent read(),
  88. * write() or lseek() might have altered it
  89. */
  90. if (offset == 0)
  91. return file->f_pos;
  92. /*
  93. * f_lock protects against read/modify/write race with other
  94. * SEEK_CURs. Note that parallel writes and reads behave
  95. * like SEEK_SET.
  96. */
  97. spin_lock(&file->f_lock);
  98. offset = vfs_setpos(file, file->f_pos + offset, maxsize);
  99. spin_unlock(&file->f_lock);
  100. return offset;
  101. case SEEK_DATA:
  102. /*
  103. * In the generic case the entire file is data, so as long as
  104. * offset isn't at the end of the file then the offset is data.
  105. */
  106. if (offset >= eof)
  107. return -ENXIO;
  108. break;
  109. case SEEK_HOLE:
  110. /*
  111. * There is a virtual hole at the end of the file, so as long as
  112. * offset isn't i_size or larger, return i_size.
  113. */
  114. if (offset >= eof)
  115. return -ENXIO;
  116. offset = eof;
  117. break;
  118. }
  119. return vfs_setpos(file, offset, maxsize);
  120. }
  121. EXPORT_SYMBOL(generic_file_llseek_size);
  122. /**
  123. * generic_file_llseek - generic llseek implementation for regular files
  124. * @file: file structure to seek on
  125. * @offset: file offset to seek to
  126. * @whence: type of seek
  127. *
  128. * This is a generic implemenation of ->llseek useable for all normal local
  129. * filesystems. It just updates the file offset to the value specified by
  130. * @offset and @whence.
  131. */
  132. loff_t generic_file_llseek(struct file *file, loff_t offset, int whence)
  133. {
  134. struct inode *inode = file->f_mapping->host;
  135. return generic_file_llseek_size(file, offset, whence,
  136. inode->i_sb->s_maxbytes,
  137. i_size_read(inode));
  138. }
  139. EXPORT_SYMBOL(generic_file_llseek);
  140. /**
  141. * fixed_size_llseek - llseek implementation for fixed-sized devices
  142. * @file: file structure to seek on
  143. * @offset: file offset to seek to
  144. * @whence: type of seek
  145. * @size: size of the file
  146. *
  147. */
  148. loff_t fixed_size_llseek(struct file *file, loff_t offset, int whence, loff_t size)
  149. {
  150. switch (whence) {
  151. case SEEK_SET: case SEEK_CUR: case SEEK_END:
  152. return generic_file_llseek_size(file, offset, whence,
  153. size, size);
  154. default:
  155. return -EINVAL;
  156. }
  157. }
  158. EXPORT_SYMBOL(fixed_size_llseek);
  159. /**
  160. * noop_llseek - No Operation Performed llseek implementation
  161. * @file: file structure to seek on
  162. * @offset: file offset to seek to
  163. * @whence: type of seek
  164. *
  165. * This is an implementation of ->llseek useable for the rare special case when
  166. * userspace expects the seek to succeed but the (device) file is actually not
  167. * able to perform the seek. In this case you use noop_llseek() instead of
  168. * falling back to the default implementation of ->llseek.
  169. */
  170. loff_t noop_llseek(struct file *file, loff_t offset, int whence)
  171. {
  172. return file->f_pos;
  173. }
  174. EXPORT_SYMBOL(noop_llseek);
  175. loff_t no_llseek(struct file *file, loff_t offset, int whence)
  176. {
  177. return -ESPIPE;
  178. }
  179. EXPORT_SYMBOL(no_llseek);
  180. loff_t default_llseek(struct file *file, loff_t offset, int whence)
  181. {
  182. struct inode *inode = file_inode(file);
  183. loff_t retval;
  184. mutex_lock(&inode->i_mutex);
  185. switch (whence) {
  186. case SEEK_END:
  187. offset += i_size_read(inode);
  188. break;
  189. case SEEK_CUR:
  190. if (offset == 0) {
  191. retval = file->f_pos;
  192. goto out;
  193. }
  194. offset += file->f_pos;
  195. break;
  196. case SEEK_DATA:
  197. /*
  198. * In the generic case the entire file is data, so as
  199. * long as offset isn't at the end of the file then the
  200. * offset is data.
  201. */
  202. if (offset >= inode->i_size) {
  203. retval = -ENXIO;
  204. goto out;
  205. }
  206. break;
  207. case SEEK_HOLE:
  208. /*
  209. * There is a virtual hole at the end of the file, so
  210. * as long as offset isn't i_size or larger, return
  211. * i_size.
  212. */
  213. if (offset >= inode->i_size) {
  214. retval = -ENXIO;
  215. goto out;
  216. }
  217. offset = inode->i_size;
  218. break;
  219. }
  220. retval = -EINVAL;
  221. if (offset >= 0 || unsigned_offsets(file)) {
  222. if (offset != file->f_pos) {
  223. file->f_pos = offset;
  224. file->f_version = 0;
  225. }
  226. retval = offset;
  227. }
  228. out:
  229. mutex_unlock(&inode->i_mutex);
  230. return retval;
  231. }
  232. EXPORT_SYMBOL(default_llseek);
  233. loff_t vfs_llseek(struct file *file, loff_t offset, int whence)
  234. {
  235. loff_t (*fn)(struct file *, loff_t, int);
  236. fn = no_llseek;
  237. if (file->f_mode & FMODE_LSEEK) {
  238. if (file->f_op->llseek)
  239. fn = file->f_op->llseek;
  240. }
  241. return fn(file, offset, whence);
  242. }
  243. EXPORT_SYMBOL(vfs_llseek);
  244. static inline struct fd fdget_pos(int fd)
  245. {
  246. return __to_fd(__fdget_pos(fd));
  247. }
  248. static inline void fdput_pos(struct fd f)
  249. {
  250. if (f.flags & FDPUT_POS_UNLOCK)
  251. mutex_unlock(&f.file->f_pos_lock);
  252. fdput(f);
  253. }
  254. SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence)
  255. {
  256. off_t retval;
  257. struct fd f = fdget_pos(fd);
  258. if (!f.file)
  259. return -EBADF;
  260. retval = -EINVAL;
  261. if (whence <= SEEK_MAX) {
  262. loff_t res = vfs_llseek(f.file, offset, whence);
  263. retval = res;
  264. if (res != (loff_t)retval)
  265. retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */
  266. }
  267. fdput_pos(f);
  268. return retval;
  269. }
  270. #ifdef CONFIG_COMPAT
  271. COMPAT_SYSCALL_DEFINE3(lseek, unsigned int, fd, compat_off_t, offset, unsigned int, whence)
  272. {
  273. return sys_lseek(fd, offset, whence);
  274. }
  275. #endif
  276. #ifdef __ARCH_WANT_SYS_LLSEEK
  277. SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
  278. unsigned long, offset_low, loff_t __user *, result,
  279. unsigned int, whence)
  280. {
  281. int retval;
  282. struct fd f = fdget_pos(fd);
  283. loff_t offset;
  284. if (!f.file)
  285. return -EBADF;
  286. retval = -EINVAL;
  287. if (whence > SEEK_MAX)
  288. goto out_putf;
  289. offset = vfs_llseek(f.file, ((loff_t) offset_high << 32) | offset_low,
  290. whence);
  291. retval = (int)offset;
  292. if (offset >= 0) {
  293. retval = -EFAULT;
  294. if (!copy_to_user(result, &offset, sizeof(offset)))
  295. retval = 0;
  296. }
  297. out_putf:
  298. fdput_pos(f);
  299. return retval;
  300. }
  301. #endif
  302. ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos)
  303. {
  304. struct kiocb kiocb;
  305. ssize_t ret;
  306. if (!file->f_op->read_iter)
  307. return -EINVAL;
  308. init_sync_kiocb(&kiocb, file);
  309. kiocb.ki_pos = *ppos;
  310. iter->type |= READ;
  311. ret = file->f_op->read_iter(&kiocb, iter);
  312. BUG_ON(ret == -EIOCBQUEUED);
  313. if (ret > 0)
  314. *ppos = kiocb.ki_pos;
  315. return ret;
  316. }
  317. EXPORT_SYMBOL(vfs_iter_read);
  318. ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos)
  319. {
  320. struct kiocb kiocb;
  321. ssize_t ret;
  322. if (!file->f_op->write_iter)
  323. return -EINVAL;
  324. init_sync_kiocb(&kiocb, file);
  325. kiocb.ki_pos = *ppos;
  326. iter->type |= WRITE;
  327. ret = file->f_op->write_iter(&kiocb, iter);
  328. BUG_ON(ret == -EIOCBQUEUED);
  329. if (ret > 0)
  330. *ppos = kiocb.ki_pos;
  331. return ret;
  332. }
  333. EXPORT_SYMBOL(vfs_iter_write);
  334. /*
  335. * rw_verify_area doesn't like huge counts. We limit
  336. * them to something that fits in "int" so that others
  337. * won't have to do range checks all the time.
  338. */
  339. int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t count)
  340. {
  341. struct inode *inode;
  342. loff_t pos;
  343. int retval = -EINVAL;
  344. inode = file_inode(file);
  345. if (unlikely((ssize_t) count < 0))
  346. return retval;
  347. pos = *ppos;
  348. if (unlikely(pos < 0)) {
  349. if (!unsigned_offsets(file))
  350. return retval;
  351. if (count >= -pos) /* both values are in 0..LLONG_MAX */
  352. return -EOVERFLOW;
  353. } else if (unlikely((loff_t) (pos + count) < 0)) {
  354. if (!unsigned_offsets(file))
  355. return retval;
  356. }
  357. if (unlikely(inode->i_flctx && mandatory_lock(inode))) {
  358. retval = locks_mandatory_area(
  359. read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE,
  360. inode, file, pos, count);
  361. if (retval < 0)
  362. return retval;
  363. }
  364. retval = security_file_permission(file,
  365. read_write == READ ? MAY_READ : MAY_WRITE);
  366. if (retval)
  367. return retval;
  368. return count > MAX_RW_COUNT ? MAX_RW_COUNT : count;
  369. }
  370. static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
  371. {
  372. struct iovec iov = { .iov_base = buf, .iov_len = len };
  373. struct kiocb kiocb;
  374. struct iov_iter iter;
  375. ssize_t ret;
  376. init_sync_kiocb(&kiocb, filp);
  377. kiocb.ki_pos = *ppos;
  378. iov_iter_init(&iter, READ, &iov, 1, len);
  379. ret = filp->f_op->read_iter(&kiocb, &iter);
  380. BUG_ON(ret == -EIOCBQUEUED);
  381. *ppos = kiocb.ki_pos;
  382. return ret;
  383. }
  384. ssize_t __vfs_read(struct file *file, char __user *buf, size_t count,
  385. loff_t *pos)
  386. {
  387. if (file->f_op->read)
  388. return file->f_op->read(file, buf, count, pos);
  389. else if (file->f_op->read_iter)
  390. return new_sync_read(file, buf, count, pos);
  391. else
  392. return -EINVAL;
  393. }
  394. EXPORT_SYMBOL(__vfs_read);
  395. ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
  396. {
  397. ssize_t ret;
  398. if (!(file->f_mode & FMODE_READ))
  399. return -EBADF;
  400. if (!(file->f_mode & FMODE_CAN_READ))
  401. return -EINVAL;
  402. if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))
  403. return -EFAULT;
  404. ret = rw_verify_area(READ, file, pos, count);
  405. if (ret >= 0) {
  406. count = ret;
  407. ret = __vfs_read(file, buf, count, pos);
  408. if (ret > 0) {
  409. fsnotify_access(file);
  410. add_rchar(current, ret);
  411. }
  412. inc_syscr(current);
  413. }
  414. return ret;
  415. }
  416. EXPORT_SYMBOL(vfs_read);
  417. static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
  418. {
  419. struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
  420. struct kiocb kiocb;
  421. struct iov_iter iter;
  422. ssize_t ret;
  423. init_sync_kiocb(&kiocb, filp);
  424. kiocb.ki_pos = *ppos;
  425. iov_iter_init(&iter, WRITE, &iov, 1, len);
  426. ret = filp->f_op->write_iter(&kiocb, &iter);
  427. BUG_ON(ret == -EIOCBQUEUED);
  428. if (ret > 0)
  429. *ppos = kiocb.ki_pos;
  430. return ret;
  431. }
  432. ssize_t __vfs_write(struct file *file, const char __user *p, size_t count,
  433. loff_t *pos)
  434. {
  435. if (file->f_op->write)
  436. return file->f_op->write(file, p, count, pos);
  437. else if (file->f_op->write_iter)
  438. return new_sync_write(file, p, count, pos);
  439. else
  440. return -EINVAL;
  441. }
  442. EXPORT_SYMBOL(__vfs_write);
  443. ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos)
  444. {
  445. mm_segment_t old_fs;
  446. const char __user *p;
  447. ssize_t ret;
  448. if (!(file->f_mode & FMODE_CAN_WRITE))
  449. return -EINVAL;
  450. old_fs = get_fs();
  451. set_fs(get_ds());
  452. p = (__force const char __user *)buf;
  453. if (count > MAX_RW_COUNT)
  454. count = MAX_RW_COUNT;
  455. ret = __vfs_write(file, p, count, pos);
  456. set_fs(old_fs);
  457. if (ret > 0) {
  458. fsnotify_modify(file);
  459. add_wchar(current, ret);
  460. }
  461. inc_syscw(current);
  462. return ret;
  463. }
  464. EXPORT_SYMBOL(__kernel_write);
  465. ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
  466. {
  467. ssize_t ret;
  468. if (!(file->f_mode & FMODE_WRITE))
  469. return -EBADF;
  470. if (!(file->f_mode & FMODE_CAN_WRITE))
  471. return -EINVAL;
  472. if (unlikely(!access_ok(VERIFY_READ, buf, count)))
  473. return -EFAULT;
  474. ret = rw_verify_area(WRITE, file, pos, count);
  475. if (ret >= 0) {
  476. count = ret;
  477. file_start_write(file);
  478. ret = __vfs_write(file, buf, count, pos);
  479. if (ret > 0) {
  480. fsnotify_modify(file);
  481. add_wchar(current, ret);
  482. }
  483. inc_syscw(current);
  484. file_end_write(file);
  485. }
  486. return ret;
  487. }
  488. EXPORT_SYMBOL(vfs_write);
  489. static inline loff_t file_pos_read(struct file *file)
  490. {
  491. return file->f_pos;
  492. }
  493. static inline void file_pos_write(struct file *file, loff_t pos)
  494. {
  495. file->f_pos = pos;
  496. }
  497. SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
  498. {
  499. struct fd f = fdget_pos(fd);
  500. ssize_t ret = -EBADF;
  501. if (f.file) {
  502. loff_t pos = file_pos_read(f.file);
  503. ret = vfs_read(f.file, buf, count, &pos);
  504. if (ret >= 0)
  505. file_pos_write(f.file, pos);
  506. fdput_pos(f);
  507. }
  508. return ret;
  509. }
  510. SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
  511. size_t, count)
  512. {
  513. struct fd f = fdget_pos(fd);
  514. ssize_t ret = -EBADF;
  515. if (f.file) {
  516. loff_t pos = file_pos_read(f.file);
  517. ret = vfs_write(f.file, buf, count, &pos);
  518. if (ret >= 0)
  519. file_pos_write(f.file, pos);
  520. fdput_pos(f);
  521. }
  522. return ret;
  523. }
  524. SYSCALL_DEFINE4(pread64, unsigned int, fd, char __user *, buf,
  525. size_t, count, loff_t, pos)
  526. {
  527. struct fd f;
  528. ssize_t ret = -EBADF;
  529. if (pos < 0)
  530. return -EINVAL;
  531. f = fdget(fd);
  532. if (f.file) {
  533. ret = -ESPIPE;
  534. if (f.file->f_mode & FMODE_PREAD)
  535. ret = vfs_read(f.file, buf, count, &pos);
  536. fdput(f);
  537. }
  538. return ret;
  539. }
  540. SYSCALL_DEFINE4(pwrite64, unsigned int, fd, const char __user *, buf,
  541. size_t, count, loff_t, pos)
  542. {
  543. struct fd f;
  544. ssize_t ret = -EBADF;
  545. if (pos < 0)
  546. return -EINVAL;
  547. f = fdget(fd);
  548. if (f.file) {
  549. ret = -ESPIPE;
  550. if (f.file->f_mode & FMODE_PWRITE)
  551. ret = vfs_write(f.file, buf, count, &pos);
  552. fdput(f);
  553. }
  554. return ret;
  555. }
  556. /*
  557. * Reduce an iovec's length in-place. Return the resulting number of segments
  558. */
  559. unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
  560. {
  561. unsigned long seg = 0;
  562. size_t len = 0;
  563. while (seg < nr_segs) {
  564. seg++;
  565. if (len + iov->iov_len >= to) {
  566. iov->iov_len = to - len;
  567. break;
  568. }
  569. len += iov->iov_len;
  570. iov++;
  571. }
  572. return seg;
  573. }
  574. EXPORT_SYMBOL(iov_shorten);
  575. static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
  576. loff_t *ppos, iter_fn_t fn)
  577. {
  578. struct kiocb kiocb;
  579. ssize_t ret;
  580. init_sync_kiocb(&kiocb, filp);
  581. kiocb.ki_pos = *ppos;
  582. ret = fn(&kiocb, iter);
  583. BUG_ON(ret == -EIOCBQUEUED);
  584. *ppos = kiocb.ki_pos;
  585. return ret;
  586. }
  587. /* Do it by hand, with file-ops */
  588. static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
  589. loff_t *ppos, io_fn_t fn)
  590. {
  591. ssize_t ret = 0;
  592. while (iov_iter_count(iter)) {
  593. struct iovec iovec = iov_iter_iovec(iter);
  594. ssize_t nr;
  595. nr = fn(filp, iovec.iov_base, iovec.iov_len, ppos);
  596. if (nr < 0) {
  597. if (!ret)
  598. ret = nr;
  599. break;
  600. }
  601. ret += nr;
  602. if (nr != iovec.iov_len)
  603. break;
  604. iov_iter_advance(iter, nr);
  605. }
  606. return ret;
  607. }
  608. /* A write operation does a read from user space and vice versa */
  609. #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)
  610. ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
  611. unsigned long nr_segs, unsigned long fast_segs,
  612. struct iovec *fast_pointer,
  613. struct iovec **ret_pointer)
  614. {
  615. unsigned long seg;
  616. ssize_t ret;
  617. struct iovec *iov = fast_pointer;
  618. /*
  619. * SuS says "The readv() function *may* fail if the iovcnt argument
  620. * was less than or equal to 0, or greater than {IOV_MAX}. Linux has
  621. * traditionally returned zero for zero segments, so...
  622. */
  623. if (nr_segs == 0) {
  624. ret = 0;
  625. goto out;
  626. }
  627. /*
  628. * First get the "struct iovec" from user memory and
  629. * verify all the pointers
  630. */
  631. if (nr_segs > UIO_MAXIOV) {
  632. ret = -EINVAL;
  633. goto out;
  634. }
  635. if (nr_segs > fast_segs) {
  636. iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
  637. if (iov == NULL) {
  638. ret = -ENOMEM;
  639. goto out;
  640. }
  641. }
  642. if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
  643. ret = -EFAULT;
  644. goto out;
  645. }
  646. /*
  647. * According to the Single Unix Specification we should return EINVAL
  648. * if an element length is < 0 when cast to ssize_t or if the
  649. * total length would overflow the ssize_t return value of the
  650. * system call.
  651. *
  652. * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
  653. * overflow case.
  654. */
  655. ret = 0;
  656. for (seg = 0; seg < nr_segs; seg++) {
  657. void __user *buf = iov[seg].iov_base;
  658. ssize_t len = (ssize_t)iov[seg].iov_len;
  659. /* see if we we're about to use an invalid len or if
  660. * it's about to overflow ssize_t */
  661. if (len < 0) {
  662. ret = -EINVAL;
  663. goto out;
  664. }
  665. if (type >= 0
  666. && unlikely(!access_ok(vrfy_dir(type), buf, len))) {
  667. ret = -EFAULT;
  668. goto out;
  669. }
  670. if (len > MAX_RW_COUNT - ret) {
  671. len = MAX_RW_COUNT - ret;
  672. iov[seg].iov_len = len;
  673. }
  674. ret += len;
  675. }
  676. out:
  677. *ret_pointer = iov;
  678. return ret;
  679. }
  680. static ssize_t do_readv_writev(int type, struct file *file,
  681. const struct iovec __user * uvector,
  682. unsigned long nr_segs, loff_t *pos)
  683. {
  684. size_t tot_len;
  685. struct iovec iovstack[UIO_FASTIOV];
  686. struct iovec *iov = iovstack;
  687. struct iov_iter iter;
  688. ssize_t ret;
  689. io_fn_t fn;
  690. iter_fn_t iter_fn;
  691. ret = import_iovec(type, uvector, nr_segs,
  692. ARRAY_SIZE(iovstack), &iov, &iter);
  693. if (ret < 0)
  694. return ret;
  695. tot_len = iov_iter_count(&iter);
  696. if (!tot_len)
  697. goto out;
  698. ret = rw_verify_area(type, file, pos, tot_len);
  699. if (ret < 0)
  700. goto out;
  701. if (type == READ) {
  702. fn = file->f_op->read;
  703. iter_fn = file->f_op->read_iter;
  704. } else {
  705. fn = (io_fn_t)file->f_op->write;
  706. iter_fn = file->f_op->write_iter;
  707. file_start_write(file);
  708. }
  709. if (iter_fn)
  710. ret = do_iter_readv_writev(file, &iter, pos, iter_fn);
  711. else
  712. ret = do_loop_readv_writev(file, &iter, pos, fn);
  713. if (type != READ)
  714. file_end_write(file);
  715. out:
  716. kfree(iov);
  717. if ((ret + (type == READ)) > 0) {
  718. if (type == READ)
  719. fsnotify_access(file);
  720. else
  721. fsnotify_modify(file);
  722. }
  723. return ret;
  724. }
  725. ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
  726. unsigned long vlen, loff_t *pos)
  727. {
  728. if (!(file->f_mode & FMODE_READ))
  729. return -EBADF;
  730. if (!(file->f_mode & FMODE_CAN_READ))
  731. return -EINVAL;
  732. return do_readv_writev(READ, file, vec, vlen, pos);
  733. }
  734. EXPORT_SYMBOL(vfs_readv);
  735. ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
  736. unsigned long vlen, loff_t *pos)
  737. {
  738. if (!(file->f_mode & FMODE_WRITE))
  739. return -EBADF;
  740. if (!(file->f_mode & FMODE_CAN_WRITE))
  741. return -EINVAL;
  742. return do_readv_writev(WRITE, file, vec, vlen, pos);
  743. }
  744. EXPORT_SYMBOL(vfs_writev);
  745. SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
  746. unsigned long, vlen)
  747. {
  748. struct fd f = fdget_pos(fd);
  749. ssize_t ret = -EBADF;
  750. if (f.file) {
  751. loff_t pos = file_pos_read(f.file);
  752. ret = vfs_readv(f.file, vec, vlen, &pos);
  753. if (ret >= 0)
  754. file_pos_write(f.file, pos);
  755. fdput_pos(f);
  756. }
  757. if (ret > 0)
  758. add_rchar(current, ret);
  759. inc_syscr(current);
  760. return ret;
  761. }
  762. SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
  763. unsigned long, vlen)
  764. {
  765. struct fd f = fdget_pos(fd);
  766. ssize_t ret = -EBADF;
  767. if (f.file) {
  768. loff_t pos = file_pos_read(f.file);
  769. ret = vfs_writev(f.file, vec, vlen, &pos);
  770. if (ret >= 0)
  771. file_pos_write(f.file, pos);
  772. fdput_pos(f);
  773. }
  774. if (ret > 0)
  775. add_wchar(current, ret);
  776. inc_syscw(current);
  777. return ret;
  778. }
  779. static inline loff_t pos_from_hilo(unsigned long high, unsigned long low)
  780. {
  781. #define HALF_LONG_BITS (BITS_PER_LONG / 2)
  782. return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low;
  783. }
  784. SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
  785. unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
  786. {
  787. loff_t pos = pos_from_hilo(pos_h, pos_l);
  788. struct fd f;
  789. ssize_t ret = -EBADF;
  790. if (pos < 0)
  791. return -EINVAL;
  792. f = fdget(fd);
  793. if (f.file) {
  794. ret = -ESPIPE;
  795. if (f.file->f_mode & FMODE_PREAD)
  796. ret = vfs_readv(f.file, vec, vlen, &pos);
  797. fdput(f);
  798. }
  799. if (ret > 0)
  800. add_rchar(current, ret);
  801. inc_syscr(current);
  802. return ret;
  803. }
  804. SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
  805. unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
  806. {
  807. loff_t pos = pos_from_hilo(pos_h, pos_l);
  808. struct fd f;
  809. ssize_t ret = -EBADF;
  810. if (pos < 0)
  811. return -EINVAL;
  812. f = fdget(fd);
  813. if (f.file) {
  814. ret = -ESPIPE;
  815. if (f.file->f_mode & FMODE_PWRITE)
  816. ret = vfs_writev(f.file, vec, vlen, &pos);
  817. fdput(f);
  818. }
  819. if (ret > 0)
  820. add_wchar(current, ret);
  821. inc_syscw(current);
  822. return ret;
  823. }
  824. #ifdef CONFIG_COMPAT
  825. static ssize_t compat_do_readv_writev(int type, struct file *file,
  826. const struct compat_iovec __user *uvector,
  827. unsigned long nr_segs, loff_t *pos)
  828. {
  829. compat_ssize_t tot_len;
  830. struct iovec iovstack[UIO_FASTIOV];
  831. struct iovec *iov = iovstack;
  832. struct iov_iter iter;
  833. ssize_t ret;
  834. io_fn_t fn;
  835. iter_fn_t iter_fn;
  836. ret = compat_import_iovec(type, uvector, nr_segs,
  837. UIO_FASTIOV, &iov, &iter);
  838. if (ret < 0)
  839. return ret;
  840. tot_len = iov_iter_count(&iter);
  841. if (!tot_len)
  842. goto out;
  843. ret = rw_verify_area(type, file, pos, tot_len);
  844. if (ret < 0)
  845. goto out;
  846. if (type == READ) {
  847. fn = file->f_op->read;
  848. iter_fn = file->f_op->read_iter;
  849. } else {
  850. fn = (io_fn_t)file->f_op->write;
  851. iter_fn = file->f_op->write_iter;
  852. file_start_write(file);
  853. }
  854. if (iter_fn)
  855. ret = do_iter_readv_writev(file, &iter, pos, iter_fn);
  856. else
  857. ret = do_loop_readv_writev(file, &iter, pos, fn);
  858. if (type != READ)
  859. file_end_write(file);
  860. out:
  861. kfree(iov);
  862. if ((ret + (type == READ)) > 0) {
  863. if (type == READ)
  864. fsnotify_access(file);
  865. else
  866. fsnotify_modify(file);
  867. }
  868. return ret;
  869. }
  870. static size_t compat_readv(struct file *file,
  871. const struct compat_iovec __user *vec,
  872. unsigned long vlen, loff_t *pos)
  873. {
  874. ssize_t ret = -EBADF;
  875. if (!(file->f_mode & FMODE_READ))
  876. goto out;
  877. ret = -EINVAL;
  878. if (!(file->f_mode & FMODE_CAN_READ))
  879. goto out;
  880. ret = compat_do_readv_writev(READ, file, vec, vlen, pos);
  881. out:
  882. if (ret > 0)
  883. add_rchar(current, ret);
  884. inc_syscr(current);
  885. return ret;
  886. }
  887. COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
  888. const struct compat_iovec __user *,vec,
  889. compat_ulong_t, vlen)
  890. {
  891. struct fd f = fdget_pos(fd);
  892. ssize_t ret;
  893. loff_t pos;
  894. if (!f.file)
  895. return -EBADF;
  896. pos = f.file->f_pos;
  897. ret = compat_readv(f.file, vec, vlen, &pos);
  898. if (ret >= 0)
  899. f.file->f_pos = pos;
  900. fdput_pos(f);
  901. return ret;
  902. }
  903. static long __compat_sys_preadv64(unsigned long fd,
  904. const struct compat_iovec __user *vec,
  905. unsigned long vlen, loff_t pos)
  906. {
  907. struct fd f;
  908. ssize_t ret;
  909. if (pos < 0)
  910. return -EINVAL;
  911. f = fdget(fd);
  912. if (!f.file)
  913. return -EBADF;
  914. ret = -ESPIPE;
  915. if (f.file->f_mode & FMODE_PREAD)
  916. ret = compat_readv(f.file, vec, vlen, &pos);
  917. fdput(f);
  918. return ret;
  919. }
  920. #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64
  921. COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd,
  922. const struct compat_iovec __user *,vec,
  923. unsigned long, vlen, loff_t, pos)
  924. {
  925. return __compat_sys_preadv64(fd, vec, vlen, pos);
  926. }
  927. #endif
  928. COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd,
  929. const struct compat_iovec __user *,vec,
  930. compat_ulong_t, vlen, u32, pos_low, u32, pos_high)
  931. {
  932. loff_t pos = ((loff_t)pos_high << 32) | pos_low;
  933. return __compat_sys_preadv64(fd, vec, vlen, pos);
  934. }
  935. static size_t compat_writev(struct file *file,
  936. const struct compat_iovec __user *vec,
  937. unsigned long vlen, loff_t *pos)
  938. {
  939. ssize_t ret = -EBADF;
  940. if (!(file->f_mode & FMODE_WRITE))
  941. goto out;
  942. ret = -EINVAL;
  943. if (!(file->f_mode & FMODE_CAN_WRITE))
  944. goto out;
  945. ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos);
  946. out:
  947. if (ret > 0)
  948. add_wchar(current, ret);
  949. inc_syscw(current);
  950. return ret;
  951. }
  952. COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
  953. const struct compat_iovec __user *, vec,
  954. compat_ulong_t, vlen)
  955. {
  956. struct fd f = fdget_pos(fd);
  957. ssize_t ret;
  958. loff_t pos;
  959. if (!f.file)
  960. return -EBADF;
  961. pos = f.file->f_pos;
  962. ret = compat_writev(f.file, vec, vlen, &pos);
  963. if (ret >= 0)
  964. f.file->f_pos = pos;
  965. fdput_pos(f);
  966. return ret;
  967. }
  968. static long __compat_sys_pwritev64(unsigned long fd,
  969. const struct compat_iovec __user *vec,
  970. unsigned long vlen, loff_t pos)
  971. {
  972. struct fd f;
  973. ssize_t ret;
  974. if (pos < 0)
  975. return -EINVAL;
  976. f = fdget(fd);
  977. if (!f.file)
  978. return -EBADF;
  979. ret = -ESPIPE;
  980. if (f.file->f_mode & FMODE_PWRITE)
  981. ret = compat_writev(f.file, vec, vlen, &pos);
  982. fdput(f);
  983. return ret;
  984. }
  985. #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64
  986. COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd,
  987. const struct compat_iovec __user *,vec,
  988. unsigned long, vlen, loff_t, pos)
  989. {
  990. return __compat_sys_pwritev64(fd, vec, vlen, pos);
  991. }
  992. #endif
  993. COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd,
  994. const struct compat_iovec __user *,vec,
  995. compat_ulong_t, vlen, u32, pos_low, u32, pos_high)
  996. {
  997. loff_t pos = ((loff_t)pos_high << 32) | pos_low;
  998. return __compat_sys_pwritev64(fd, vec, vlen, pos);
  999. }
  1000. #endif
  1001. static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
  1002. size_t count, loff_t max)
  1003. {
  1004. struct fd in, out;
  1005. struct inode *in_inode, *out_inode;
  1006. loff_t pos;
  1007. loff_t out_pos;
  1008. ssize_t retval;
  1009. int fl;
  1010. /*
  1011. * Get input file, and verify that it is ok..
  1012. */
  1013. retval = -EBADF;
  1014. in = fdget(in_fd);
  1015. if (!in.file)
  1016. goto out;
  1017. if (!(in.file->f_mode & FMODE_READ))
  1018. goto fput_in;
  1019. retval = -ESPIPE;
  1020. if (!ppos) {
  1021. pos = in.file->f_pos;
  1022. } else {
  1023. pos = *ppos;
  1024. if (!(in.file->f_mode & FMODE_PREAD))
  1025. goto fput_in;
  1026. }
  1027. retval = rw_verify_area(READ, in.file, &pos, count);
  1028. if (retval < 0)
  1029. goto fput_in;
  1030. count = retval;
  1031. /*
  1032. * Get output file, and verify that it is ok..
  1033. */
  1034. retval = -EBADF;
  1035. out = fdget(out_fd);
  1036. if (!out.file)
  1037. goto fput_in;
  1038. if (!(out.file->f_mode & FMODE_WRITE))
  1039. goto fput_out;
  1040. retval = -EINVAL;
  1041. in_inode = file_inode(in.file);
  1042. out_inode = file_inode(out.file);
  1043. out_pos = out.file->f_pos;
  1044. retval = rw_verify_area(WRITE, out.file, &out_pos, count);
  1045. if (retval < 0)
  1046. goto fput_out;
  1047. count = retval;
  1048. if (!max)
  1049. max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
  1050. if (unlikely(pos + count > max)) {
  1051. retval = -EOVERFLOW;
  1052. if (pos >= max)
  1053. goto fput_out;
  1054. count = max - pos;
  1055. }
  1056. fl = 0;
  1057. #if 0
  1058. /*
  1059. * We need to debate whether we can enable this or not. The
  1060. * man page documents EAGAIN return for the output at least,
  1061. * and the application is arguably buggy if it doesn't expect
  1062. * EAGAIN on a non-blocking file descriptor.
  1063. */
  1064. if (in.file->f_flags & O_NONBLOCK)
  1065. fl = SPLICE_F_NONBLOCK;
  1066. #endif
  1067. file_start_write(out.file);
  1068. retval = do_splice_direct(in.file, &pos, out.file, &out_pos, count, fl);
  1069. file_end_write(out.file);
  1070. if (retval > 0) {
  1071. add_rchar(current, retval);
  1072. add_wchar(current, retval);
  1073. fsnotify_access(in.file);
  1074. fsnotify_modify(out.file);
  1075. out.file->f_pos = out_pos;
  1076. if (ppos)
  1077. *ppos = pos;
  1078. else
  1079. in.file->f_pos = pos;
  1080. }
  1081. inc_syscr(current);
  1082. inc_syscw(current);
  1083. if (pos > max)
  1084. retval = -EOVERFLOW;
  1085. fput_out:
  1086. fdput(out);
  1087. fput_in:
  1088. fdput(in);
  1089. out:
  1090. return retval;
  1091. }
  1092. SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count)
  1093. {
  1094. loff_t pos;
  1095. off_t off;
  1096. ssize_t ret;
  1097. if (offset) {
  1098. if (unlikely(get_user(off, offset)))
  1099. return -EFAULT;
  1100. pos = off;
  1101. ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
  1102. if (unlikely(put_user(pos, offset)))
  1103. return -EFAULT;
  1104. return ret;
  1105. }
  1106. return do_sendfile(out_fd, in_fd, NULL, count, 0);
  1107. }
  1108. SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count)
  1109. {
  1110. loff_t pos;
  1111. ssize_t ret;
  1112. if (offset) {
  1113. if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
  1114. return -EFAULT;
  1115. ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
  1116. if (unlikely(put_user(pos, offset)))
  1117. return -EFAULT;
  1118. return ret;
  1119. }
  1120. return do_sendfile(out_fd, in_fd, NULL, count, 0);
  1121. }
  1122. #ifdef CONFIG_COMPAT
  1123. COMPAT_SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd,
  1124. compat_off_t __user *, offset, compat_size_t, count)
  1125. {
  1126. loff_t pos;
  1127. off_t off;
  1128. ssize_t ret;
  1129. if (offset) {
  1130. if (unlikely(get_user(off, offset)))
  1131. return -EFAULT;
  1132. pos = off;
  1133. ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
  1134. if (unlikely(put_user(pos, offset)))
  1135. return -EFAULT;
  1136. return ret;
  1137. }
  1138. return do_sendfile(out_fd, in_fd, NULL, count, 0);
  1139. }
  1140. COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd,
  1141. compat_loff_t __user *, offset, compat_size_t, count)
  1142. {
  1143. loff_t pos;
  1144. ssize_t ret;
  1145. if (offset) {
  1146. if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
  1147. return -EFAULT;
  1148. ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
  1149. if (unlikely(put_user(pos, offset)))
  1150. return -EFAULT;
  1151. return ret;
  1152. }
  1153. return do_sendfile(out_fd, in_fd, NULL, count, 0);
  1154. }
  1155. #endif