read_write.c 47 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049
  1. // SPDX-License-Identifier: GPL-2.0
  2. /*
  3. * linux/fs/read_write.c
  4. *
  5. * Copyright (C) 1991, 1992 Linus Torvalds
  6. */
  7. #include <linux/slab.h>
  8. #include <linux/stat.h>
  9. #include <linux/sched/xacct.h>
  10. #include <linux/fcntl.h>
  11. #include <linux/file.h>
  12. #include <linux/uio.h>
  13. #include <linux/fsnotify.h>
  14. #include <linux/security.h>
  15. #include <linux/export.h>
  16. #include <linux/syscalls.h>
  17. #include <linux/pagemap.h>
  18. #include <linux/splice.h>
  19. #include <linux/compat.h>
  20. #include <linux/mount.h>
  21. #include <linux/fs.h>
  22. #include "internal.h"
  23. #include <linux/uaccess.h>
  24. #include <asm/unistd.h>
  25. const struct file_operations generic_ro_fops = {
  26. .llseek = generic_file_llseek,
  27. .read_iter = generic_file_read_iter,
  28. .mmap = generic_file_readonly_mmap,
  29. .splice_read = generic_file_splice_read,
  30. };
  31. EXPORT_SYMBOL(generic_ro_fops);
  32. static inline bool unsigned_offsets(struct file *file)
  33. {
  34. return file->f_mode & FMODE_UNSIGNED_OFFSET;
  35. }
  36. /**
  37. * vfs_setpos - update the file offset for lseek
  38. * @file: file structure in question
  39. * @offset: file offset to seek to
  40. * @maxsize: maximum file size
  41. *
  42. * This is a low-level filesystem helper for updating the file offset to
  43. * the value specified by @offset if the given offset is valid and it is
  44. * not equal to the current file offset.
  45. *
  46. * Return the specified offset on success and -EINVAL on invalid offset.
  47. */
  48. loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize)
  49. {
  50. if (offset < 0 && !unsigned_offsets(file))
  51. return -EINVAL;
  52. if (offset > maxsize)
  53. return -EINVAL;
  54. if (offset != file->f_pos) {
  55. file->f_pos = offset;
  56. file->f_version = 0;
  57. }
  58. return offset;
  59. }
  60. EXPORT_SYMBOL(vfs_setpos);
  61. /**
  62. * generic_file_llseek_size - generic llseek implementation for regular files
  63. * @file: file structure to seek on
  64. * @offset: file offset to seek to
  65. * @whence: type of seek
  66. * @size: max size of this file in file system
  67. * @eof: offset used for SEEK_END position
  68. *
  69. * This is a variant of generic_file_llseek that allows passing in a custom
  70. * maximum file size and a custom EOF position, for e.g. hashed directories
  71. *
  72. * Synchronization:
  73. * SEEK_SET and SEEK_END are unsynchronized (but atomic on 64bit platforms)
  74. * SEEK_CUR is synchronized against other SEEK_CURs, but not read/writes.
  75. * read/writes behave like SEEK_SET against seeks.
  76. */
  77. loff_t
  78. generic_file_llseek_size(struct file *file, loff_t offset, int whence,
  79. loff_t maxsize, loff_t eof)
  80. {
  81. switch (whence) {
  82. case SEEK_END:
  83. offset += eof;
  84. break;
  85. case SEEK_CUR:
  86. /*
  87. * Here we special-case the lseek(fd, 0, SEEK_CUR)
  88. * position-querying operation. Avoid rewriting the "same"
  89. * f_pos value back to the file because a concurrent read(),
  90. * write() or lseek() might have altered it
  91. */
  92. if (offset == 0)
  93. return file->f_pos;
  94. /*
  95. * f_lock protects against read/modify/write race with other
  96. * SEEK_CURs. Note that parallel writes and reads behave
  97. * like SEEK_SET.
  98. */
  99. spin_lock(&file->f_lock);
  100. offset = vfs_setpos(file, file->f_pos + offset, maxsize);
  101. spin_unlock(&file->f_lock);
  102. return offset;
  103. case SEEK_DATA:
  104. /*
  105. * In the generic case the entire file is data, so as long as
  106. * offset isn't at the end of the file then the offset is data.
  107. */
  108. if ((unsigned long long)offset >= eof)
  109. return -ENXIO;
  110. break;
  111. case SEEK_HOLE:
  112. /*
  113. * There is a virtual hole at the end of the file, so as long as
  114. * offset isn't i_size or larger, return i_size.
  115. */
  116. if ((unsigned long long)offset >= eof)
  117. return -ENXIO;
  118. offset = eof;
  119. break;
  120. }
  121. return vfs_setpos(file, offset, maxsize);
  122. }
  123. EXPORT_SYMBOL(generic_file_llseek_size);
  124. /**
  125. * generic_file_llseek - generic llseek implementation for regular files
  126. * @file: file structure to seek on
  127. * @offset: file offset to seek to
  128. * @whence: type of seek
  129. *
  130. * This is a generic implemenation of ->llseek useable for all normal local
  131. * filesystems. It just updates the file offset to the value specified by
  132. * @offset and @whence.
  133. */
  134. loff_t generic_file_llseek(struct file *file, loff_t offset, int whence)
  135. {
  136. struct inode *inode = file->f_mapping->host;
  137. return generic_file_llseek_size(file, offset, whence,
  138. inode->i_sb->s_maxbytes,
  139. i_size_read(inode));
  140. }
  141. EXPORT_SYMBOL(generic_file_llseek);
  142. /**
  143. * fixed_size_llseek - llseek implementation for fixed-sized devices
  144. * @file: file structure to seek on
  145. * @offset: file offset to seek to
  146. * @whence: type of seek
  147. * @size: size of the file
  148. *
  149. */
  150. loff_t fixed_size_llseek(struct file *file, loff_t offset, int whence, loff_t size)
  151. {
  152. switch (whence) {
  153. case SEEK_SET: case SEEK_CUR: case SEEK_END:
  154. return generic_file_llseek_size(file, offset, whence,
  155. size, size);
  156. default:
  157. return -EINVAL;
  158. }
  159. }
  160. EXPORT_SYMBOL(fixed_size_llseek);
  161. /**
  162. * no_seek_end_llseek - llseek implementation for fixed-sized devices
  163. * @file: file structure to seek on
  164. * @offset: file offset to seek to
  165. * @whence: type of seek
  166. *
  167. */
  168. loff_t no_seek_end_llseek(struct file *file, loff_t offset, int whence)
  169. {
  170. switch (whence) {
  171. case SEEK_SET: case SEEK_CUR:
  172. return generic_file_llseek_size(file, offset, whence,
  173. OFFSET_MAX, 0);
  174. default:
  175. return -EINVAL;
  176. }
  177. }
  178. EXPORT_SYMBOL(no_seek_end_llseek);
  179. /**
  180. * no_seek_end_llseek_size - llseek implementation for fixed-sized devices
  181. * @file: file structure to seek on
  182. * @offset: file offset to seek to
  183. * @whence: type of seek
  184. * @size: maximal offset allowed
  185. *
  186. */
  187. loff_t no_seek_end_llseek_size(struct file *file, loff_t offset, int whence, loff_t size)
  188. {
  189. switch (whence) {
  190. case SEEK_SET: case SEEK_CUR:
  191. return generic_file_llseek_size(file, offset, whence,
  192. size, 0);
  193. default:
  194. return -EINVAL;
  195. }
  196. }
  197. EXPORT_SYMBOL(no_seek_end_llseek_size);
  198. /**
  199. * noop_llseek - No Operation Performed llseek implementation
  200. * @file: file structure to seek on
  201. * @offset: file offset to seek to
  202. * @whence: type of seek
  203. *
  204. * This is an implementation of ->llseek useable for the rare special case when
  205. * userspace expects the seek to succeed but the (device) file is actually not
  206. * able to perform the seek. In this case you use noop_llseek() instead of
  207. * falling back to the default implementation of ->llseek.
  208. */
  209. loff_t noop_llseek(struct file *file, loff_t offset, int whence)
  210. {
  211. return file->f_pos;
  212. }
  213. EXPORT_SYMBOL(noop_llseek);
  214. loff_t no_llseek(struct file *file, loff_t offset, int whence)
  215. {
  216. return -ESPIPE;
  217. }
  218. EXPORT_SYMBOL(no_llseek);
  219. loff_t default_llseek(struct file *file, loff_t offset, int whence)
  220. {
  221. struct inode *inode = file_inode(file);
  222. loff_t retval;
  223. inode_lock(inode);
  224. switch (whence) {
  225. case SEEK_END:
  226. offset += i_size_read(inode);
  227. break;
  228. case SEEK_CUR:
  229. if (offset == 0) {
  230. retval = file->f_pos;
  231. goto out;
  232. }
  233. offset += file->f_pos;
  234. break;
  235. case SEEK_DATA:
  236. /*
  237. * In the generic case the entire file is data, so as
  238. * long as offset isn't at the end of the file then the
  239. * offset is data.
  240. */
  241. if (offset >= inode->i_size) {
  242. retval = -ENXIO;
  243. goto out;
  244. }
  245. break;
  246. case SEEK_HOLE:
  247. /*
  248. * There is a virtual hole at the end of the file, so
  249. * as long as offset isn't i_size or larger, return
  250. * i_size.
  251. */
  252. if (offset >= inode->i_size) {
  253. retval = -ENXIO;
  254. goto out;
  255. }
  256. offset = inode->i_size;
  257. break;
  258. }
  259. retval = -EINVAL;
  260. if (offset >= 0 || unsigned_offsets(file)) {
  261. if (offset != file->f_pos) {
  262. file->f_pos = offset;
  263. file->f_version = 0;
  264. }
  265. retval = offset;
  266. }
  267. out:
  268. inode_unlock(inode);
  269. return retval;
  270. }
  271. EXPORT_SYMBOL(default_llseek);
  272. loff_t vfs_llseek(struct file *file, loff_t offset, int whence)
  273. {
  274. loff_t (*fn)(struct file *, loff_t, int);
  275. fn = no_llseek;
  276. if (file->f_mode & FMODE_LSEEK) {
  277. if (file->f_op->llseek)
  278. fn = file->f_op->llseek;
  279. }
  280. return fn(file, offset, whence);
  281. }
  282. EXPORT_SYMBOL(vfs_llseek);
  283. SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence)
  284. {
  285. off_t retval;
  286. struct fd f = fdget_pos(fd);
  287. if (!f.file)
  288. return -EBADF;
  289. retval = -EINVAL;
  290. if (whence <= SEEK_MAX) {
  291. loff_t res = vfs_llseek(f.file, offset, whence);
  292. retval = res;
  293. if (res != (loff_t)retval)
  294. retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */
  295. }
  296. fdput_pos(f);
  297. return retval;
  298. }
  299. #ifdef CONFIG_COMPAT
  300. COMPAT_SYSCALL_DEFINE3(lseek, unsigned int, fd, compat_off_t, offset, unsigned int, whence)
  301. {
  302. return sys_lseek(fd, offset, whence);
  303. }
  304. #endif
  305. #ifdef __ARCH_WANT_SYS_LLSEEK
  306. SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high,
  307. unsigned long, offset_low, loff_t __user *, result,
  308. unsigned int, whence)
  309. {
  310. int retval;
  311. struct fd f = fdget_pos(fd);
  312. loff_t offset;
  313. if (!f.file)
  314. return -EBADF;
  315. retval = -EINVAL;
  316. if (whence > SEEK_MAX)
  317. goto out_putf;
  318. offset = vfs_llseek(f.file, ((loff_t) offset_high << 32) | offset_low,
  319. whence);
  320. retval = (int)offset;
  321. if (offset >= 0) {
  322. retval = -EFAULT;
  323. if (!copy_to_user(result, &offset, sizeof(offset)))
  324. retval = 0;
  325. }
  326. out_putf:
  327. fdput_pos(f);
  328. return retval;
  329. }
  330. #endif
  331. int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t count)
  332. {
  333. struct inode *inode;
  334. loff_t pos;
  335. int retval = -EINVAL;
  336. inode = file_inode(file);
  337. if (unlikely((ssize_t) count < 0))
  338. return retval;
  339. pos = *ppos;
  340. if (unlikely(pos < 0)) {
  341. if (!unsigned_offsets(file))
  342. return retval;
  343. if (count >= -pos) /* both values are in 0..LLONG_MAX */
  344. return -EOVERFLOW;
  345. } else if (unlikely((loff_t) (pos + count) < 0)) {
  346. if (!unsigned_offsets(file))
  347. return retval;
  348. }
  349. if (unlikely(inode->i_flctx && mandatory_lock(inode))) {
  350. retval = locks_mandatory_area(inode, file, pos, pos + count - 1,
  351. read_write == READ ? F_RDLCK : F_WRLCK);
  352. if (retval < 0)
  353. return retval;
  354. }
  355. return security_file_permission(file,
  356. read_write == READ ? MAY_READ : MAY_WRITE);
  357. }
  358. static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
  359. {
  360. struct iovec iov = { .iov_base = buf, .iov_len = len };
  361. struct kiocb kiocb;
  362. struct iov_iter iter;
  363. ssize_t ret;
  364. init_sync_kiocb(&kiocb, filp);
  365. kiocb.ki_pos = *ppos;
  366. iov_iter_init(&iter, READ, &iov, 1, len);
  367. ret = call_read_iter(filp, &kiocb, &iter);
  368. BUG_ON(ret == -EIOCBQUEUED);
  369. *ppos = kiocb.ki_pos;
  370. return ret;
  371. }
  372. ssize_t __vfs_read(struct file *file, char __user *buf, size_t count,
  373. loff_t *pos)
  374. {
  375. if (file->f_op->read)
  376. return file->f_op->read(file, buf, count, pos);
  377. else if (file->f_op->read_iter)
  378. return new_sync_read(file, buf, count, pos);
  379. else
  380. return -EINVAL;
  381. }
  382. ssize_t kernel_read(struct file *file, void *buf, size_t count, loff_t *pos)
  383. {
  384. mm_segment_t old_fs;
  385. ssize_t result;
  386. old_fs = get_fs();
  387. set_fs(get_ds());
  388. /* The cast to a user pointer is valid due to the set_fs() */
  389. result = vfs_read(file, (void __user *)buf, count, pos);
  390. set_fs(old_fs);
  391. return result;
  392. }
  393. EXPORT_SYMBOL(kernel_read);
  394. ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
  395. {
  396. ssize_t ret;
  397. if (!(file->f_mode & FMODE_READ))
  398. return -EBADF;
  399. if (!(file->f_mode & FMODE_CAN_READ))
  400. return -EINVAL;
  401. if (unlikely(!access_ok(VERIFY_WRITE, buf, count)))
  402. return -EFAULT;
  403. ret = rw_verify_area(READ, file, pos, count);
  404. if (!ret) {
  405. if (count > MAX_RW_COUNT)
  406. count = MAX_RW_COUNT;
  407. ret = __vfs_read(file, buf, count, pos);
  408. if (ret > 0) {
  409. fsnotify_access(file);
  410. add_rchar(current, ret);
  411. }
  412. inc_syscr(current);
  413. }
  414. return ret;
  415. }
  416. static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos)
  417. {
  418. struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len };
  419. struct kiocb kiocb;
  420. struct iov_iter iter;
  421. ssize_t ret;
  422. init_sync_kiocb(&kiocb, filp);
  423. kiocb.ki_pos = *ppos;
  424. iov_iter_init(&iter, WRITE, &iov, 1, len);
  425. ret = call_write_iter(filp, &kiocb, &iter);
  426. BUG_ON(ret == -EIOCBQUEUED);
  427. if (ret > 0)
  428. *ppos = kiocb.ki_pos;
  429. return ret;
  430. }
  431. ssize_t __vfs_write(struct file *file, const char __user *p, size_t count,
  432. loff_t *pos)
  433. {
  434. if (file->f_op->write)
  435. return file->f_op->write(file, p, count, pos);
  436. else if (file->f_op->write_iter)
  437. return new_sync_write(file, p, count, pos);
  438. else
  439. return -EINVAL;
  440. }
  441. ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos)
  442. {
  443. mm_segment_t old_fs;
  444. const char __user *p;
  445. ssize_t ret;
  446. if (!(file->f_mode & FMODE_CAN_WRITE))
  447. return -EINVAL;
  448. old_fs = get_fs();
  449. set_fs(get_ds());
  450. p = (__force const char __user *)buf;
  451. if (count > MAX_RW_COUNT)
  452. count = MAX_RW_COUNT;
  453. ret = __vfs_write(file, p, count, pos);
  454. set_fs(old_fs);
  455. if (ret > 0) {
  456. fsnotify_modify(file);
  457. add_wchar(current, ret);
  458. }
  459. inc_syscw(current);
  460. return ret;
  461. }
  462. EXPORT_SYMBOL(__kernel_write);
  463. ssize_t kernel_write(struct file *file, const void *buf, size_t count,
  464. loff_t *pos)
  465. {
  466. mm_segment_t old_fs;
  467. ssize_t res;
  468. old_fs = get_fs();
  469. set_fs(get_ds());
  470. /* The cast to a user pointer is valid due to the set_fs() */
  471. res = vfs_write(file, (__force const char __user *)buf, count, pos);
  472. set_fs(old_fs);
  473. return res;
  474. }
  475. EXPORT_SYMBOL(kernel_write);
  476. ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
  477. {
  478. ssize_t ret;
  479. if (!(file->f_mode & FMODE_WRITE))
  480. return -EBADF;
  481. if (!(file->f_mode & FMODE_CAN_WRITE))
  482. return -EINVAL;
  483. if (unlikely(!access_ok(VERIFY_READ, buf, count)))
  484. return -EFAULT;
  485. ret = rw_verify_area(WRITE, file, pos, count);
  486. if (!ret) {
  487. if (count > MAX_RW_COUNT)
  488. count = MAX_RW_COUNT;
  489. file_start_write(file);
  490. ret = __vfs_write(file, buf, count, pos);
  491. if (ret > 0) {
  492. fsnotify_modify(file);
  493. add_wchar(current, ret);
  494. }
  495. inc_syscw(current);
  496. file_end_write(file);
  497. }
  498. return ret;
  499. }
  500. static inline loff_t file_pos_read(struct file *file)
  501. {
  502. return file->f_pos;
  503. }
  504. static inline void file_pos_write(struct file *file, loff_t pos)
  505. {
  506. file->f_pos = pos;
  507. }
  508. SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
  509. {
  510. struct fd f = fdget_pos(fd);
  511. ssize_t ret = -EBADF;
  512. if (f.file) {
  513. loff_t pos = file_pos_read(f.file);
  514. ret = vfs_read(f.file, buf, count, &pos);
  515. if (ret >= 0)
  516. file_pos_write(f.file, pos);
  517. fdput_pos(f);
  518. }
  519. return ret;
  520. }
  521. ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count)
  522. {
  523. struct fd f = fdget_pos(fd);
  524. ssize_t ret = -EBADF;
  525. if (f.file) {
  526. loff_t pos = file_pos_read(f.file);
  527. ret = vfs_write(f.file, buf, count, &pos);
  528. if (ret >= 0)
  529. file_pos_write(f.file, pos);
  530. fdput_pos(f);
  531. }
  532. return ret;
  533. }
  534. SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
  535. size_t, count)
  536. {
  537. return ksys_write(fd, buf, count);
  538. }
  539. SYSCALL_DEFINE4(pread64, unsigned int, fd, char __user *, buf,
  540. size_t, count, loff_t, pos)
  541. {
  542. struct fd f;
  543. ssize_t ret = -EBADF;
  544. if (pos < 0)
  545. return -EINVAL;
  546. f = fdget(fd);
  547. if (f.file) {
  548. ret = -ESPIPE;
  549. if (f.file->f_mode & FMODE_PREAD)
  550. ret = vfs_read(f.file, buf, count, &pos);
  551. fdput(f);
  552. }
  553. return ret;
  554. }
  555. SYSCALL_DEFINE4(pwrite64, unsigned int, fd, const char __user *, buf,
  556. size_t, count, loff_t, pos)
  557. {
  558. struct fd f;
  559. ssize_t ret = -EBADF;
  560. if (pos < 0)
  561. return -EINVAL;
  562. f = fdget(fd);
  563. if (f.file) {
  564. ret = -ESPIPE;
  565. if (f.file->f_mode & FMODE_PWRITE)
  566. ret = vfs_write(f.file, buf, count, &pos);
  567. fdput(f);
  568. }
  569. return ret;
  570. }
  571. static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
  572. loff_t *ppos, int type, rwf_t flags)
  573. {
  574. struct kiocb kiocb;
  575. ssize_t ret;
  576. init_sync_kiocb(&kiocb, filp);
  577. ret = kiocb_set_rw_flags(&kiocb, flags);
  578. if (ret)
  579. return ret;
  580. kiocb.ki_pos = *ppos;
  581. if (type == READ)
  582. ret = call_read_iter(filp, &kiocb, iter);
  583. else
  584. ret = call_write_iter(filp, &kiocb, iter);
  585. BUG_ON(ret == -EIOCBQUEUED);
  586. *ppos = kiocb.ki_pos;
  587. return ret;
  588. }
  589. /* Do it by hand, with file-ops */
  590. static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
  591. loff_t *ppos, int type, rwf_t flags)
  592. {
  593. ssize_t ret = 0;
  594. if (flags & ~RWF_HIPRI)
  595. return -EOPNOTSUPP;
  596. while (iov_iter_count(iter)) {
  597. struct iovec iovec = iov_iter_iovec(iter);
  598. ssize_t nr;
  599. if (type == READ) {
  600. nr = filp->f_op->read(filp, iovec.iov_base,
  601. iovec.iov_len, ppos);
  602. } else {
  603. nr = filp->f_op->write(filp, iovec.iov_base,
  604. iovec.iov_len, ppos);
  605. }
  606. if (nr < 0) {
  607. if (!ret)
  608. ret = nr;
  609. break;
  610. }
  611. ret += nr;
  612. if (nr != iovec.iov_len)
  613. break;
  614. iov_iter_advance(iter, nr);
  615. }
  616. return ret;
  617. }
  618. /* A write operation does a read from user space and vice versa */
  619. #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ)
  620. /**
  621. * rw_copy_check_uvector() - Copy an array of &struct iovec from userspace
  622. * into the kernel and check that it is valid.
  623. *
  624. * @type: One of %CHECK_IOVEC_ONLY, %READ, or %WRITE.
  625. * @uvector: Pointer to the userspace array.
  626. * @nr_segs: Number of elements in userspace array.
  627. * @fast_segs: Number of elements in @fast_pointer.
  628. * @fast_pointer: Pointer to (usually small on-stack) kernel array.
  629. * @ret_pointer: (output parameter) Pointer to a variable that will point to
  630. * either @fast_pointer, a newly allocated kernel array, or NULL,
  631. * depending on which array was used.
  632. *
  633. * This function copies an array of &struct iovec of @nr_segs from
  634. * userspace into the kernel and checks that each element is valid (e.g.
  635. * it does not point to a kernel address or cause overflow by being too
  636. * large, etc.).
  637. *
  638. * As an optimization, the caller may provide a pointer to a small
  639. * on-stack array in @fast_pointer, typically %UIO_FASTIOV elements long
  640. * (the size of this array, or 0 if unused, should be given in @fast_segs).
  641. *
  642. * @ret_pointer will always point to the array that was used, so the
  643. * caller must take care not to call kfree() on it e.g. in case the
  644. * @fast_pointer array was used and it was allocated on the stack.
  645. *
  646. * Return: The total number of bytes covered by the iovec array on success
  647. * or a negative error code on error.
  648. */
  649. ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
  650. unsigned long nr_segs, unsigned long fast_segs,
  651. struct iovec *fast_pointer,
  652. struct iovec **ret_pointer)
  653. {
  654. unsigned long seg;
  655. ssize_t ret;
  656. struct iovec *iov = fast_pointer;
  657. /*
  658. * SuS says "The readv() function *may* fail if the iovcnt argument
  659. * was less than or equal to 0, or greater than {IOV_MAX}. Linux has
  660. * traditionally returned zero for zero segments, so...
  661. */
  662. if (nr_segs == 0) {
  663. ret = 0;
  664. goto out;
  665. }
  666. /*
  667. * First get the "struct iovec" from user memory and
  668. * verify all the pointers
  669. */
  670. if (nr_segs > UIO_MAXIOV) {
  671. ret = -EINVAL;
  672. goto out;
  673. }
  674. if (nr_segs > fast_segs) {
  675. iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
  676. if (iov == NULL) {
  677. ret = -ENOMEM;
  678. goto out;
  679. }
  680. }
  681. if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
  682. ret = -EFAULT;
  683. goto out;
  684. }
  685. /*
  686. * According to the Single Unix Specification we should return EINVAL
  687. * if an element length is < 0 when cast to ssize_t or if the
  688. * total length would overflow the ssize_t return value of the
  689. * system call.
  690. *
  691. * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
  692. * overflow case.
  693. */
  694. ret = 0;
  695. for (seg = 0; seg < nr_segs; seg++) {
  696. void __user *buf = iov[seg].iov_base;
  697. ssize_t len = (ssize_t)iov[seg].iov_len;
  698. /* see if we we're about to use an invalid len or if
  699. * it's about to overflow ssize_t */
  700. if (len < 0) {
  701. ret = -EINVAL;
  702. goto out;
  703. }
  704. if (type >= 0
  705. && unlikely(!access_ok(vrfy_dir(type), buf, len))) {
  706. ret = -EFAULT;
  707. goto out;
  708. }
  709. if (len > MAX_RW_COUNT - ret) {
  710. len = MAX_RW_COUNT - ret;
  711. iov[seg].iov_len = len;
  712. }
  713. ret += len;
  714. }
  715. out:
  716. *ret_pointer = iov;
  717. return ret;
  718. }
  719. #ifdef CONFIG_COMPAT
  720. ssize_t compat_rw_copy_check_uvector(int type,
  721. const struct compat_iovec __user *uvector, unsigned long nr_segs,
  722. unsigned long fast_segs, struct iovec *fast_pointer,
  723. struct iovec **ret_pointer)
  724. {
  725. compat_ssize_t tot_len;
  726. struct iovec *iov = *ret_pointer = fast_pointer;
  727. ssize_t ret = 0;
  728. int seg;
  729. /*
  730. * SuS says "The readv() function *may* fail if the iovcnt argument
  731. * was less than or equal to 0, or greater than {IOV_MAX}. Linux has
  732. * traditionally returned zero for zero segments, so...
  733. */
  734. if (nr_segs == 0)
  735. goto out;
  736. ret = -EINVAL;
  737. if (nr_segs > UIO_MAXIOV)
  738. goto out;
  739. if (nr_segs > fast_segs) {
  740. ret = -ENOMEM;
  741. iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL);
  742. if (iov == NULL)
  743. goto out;
  744. }
  745. *ret_pointer = iov;
  746. ret = -EFAULT;
  747. if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector)))
  748. goto out;
  749. /*
  750. * Single unix specification:
  751. * We should -EINVAL if an element length is not >= 0 and fitting an
  752. * ssize_t.
  753. *
  754. * In Linux, the total length is limited to MAX_RW_COUNT, there is
  755. * no overflow possibility.
  756. */
  757. tot_len = 0;
  758. ret = -EINVAL;
  759. for (seg = 0; seg < nr_segs; seg++) {
  760. compat_uptr_t buf;
  761. compat_ssize_t len;
  762. if (__get_user(len, &uvector->iov_len) ||
  763. __get_user(buf, &uvector->iov_base)) {
  764. ret = -EFAULT;
  765. goto out;
  766. }
  767. if (len < 0) /* size_t not fitting in compat_ssize_t .. */
  768. goto out;
  769. if (type >= 0 &&
  770. !access_ok(vrfy_dir(type), compat_ptr(buf), len)) {
  771. ret = -EFAULT;
  772. goto out;
  773. }
  774. if (len > MAX_RW_COUNT - tot_len)
  775. len = MAX_RW_COUNT - tot_len;
  776. tot_len += len;
  777. iov->iov_base = compat_ptr(buf);
  778. iov->iov_len = (compat_size_t) len;
  779. uvector++;
  780. iov++;
  781. }
  782. ret = tot_len;
  783. out:
  784. return ret;
  785. }
  786. #endif
  787. static ssize_t do_iter_read(struct file *file, struct iov_iter *iter,
  788. loff_t *pos, rwf_t flags)
  789. {
  790. size_t tot_len;
  791. ssize_t ret = 0;
  792. if (!(file->f_mode & FMODE_READ))
  793. return -EBADF;
  794. if (!(file->f_mode & FMODE_CAN_READ))
  795. return -EINVAL;
  796. tot_len = iov_iter_count(iter);
  797. if (!tot_len)
  798. goto out;
  799. ret = rw_verify_area(READ, file, pos, tot_len);
  800. if (ret < 0)
  801. return ret;
  802. if (file->f_op->read_iter)
  803. ret = do_iter_readv_writev(file, iter, pos, READ, flags);
  804. else
  805. ret = do_loop_readv_writev(file, iter, pos, READ, flags);
  806. out:
  807. if (ret >= 0)
  808. fsnotify_access(file);
  809. return ret;
  810. }
  811. ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos,
  812. rwf_t flags)
  813. {
  814. if (!file->f_op->read_iter)
  815. return -EINVAL;
  816. return do_iter_read(file, iter, ppos, flags);
  817. }
  818. EXPORT_SYMBOL(vfs_iter_read);
  819. static ssize_t do_iter_write(struct file *file, struct iov_iter *iter,
  820. loff_t *pos, rwf_t flags)
  821. {
  822. size_t tot_len;
  823. ssize_t ret = 0;
  824. if (!(file->f_mode & FMODE_WRITE))
  825. return -EBADF;
  826. if (!(file->f_mode & FMODE_CAN_WRITE))
  827. return -EINVAL;
  828. tot_len = iov_iter_count(iter);
  829. if (!tot_len)
  830. return 0;
  831. ret = rw_verify_area(WRITE, file, pos, tot_len);
  832. if (ret < 0)
  833. return ret;
  834. if (file->f_op->write_iter)
  835. ret = do_iter_readv_writev(file, iter, pos, WRITE, flags);
  836. else
  837. ret = do_loop_readv_writev(file, iter, pos, WRITE, flags);
  838. if (ret > 0)
  839. fsnotify_modify(file);
  840. return ret;
  841. }
  842. ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
  843. rwf_t flags)
  844. {
  845. if (!file->f_op->write_iter)
  846. return -EINVAL;
  847. return do_iter_write(file, iter, ppos, flags);
  848. }
  849. EXPORT_SYMBOL(vfs_iter_write);
  850. ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
  851. unsigned long vlen, loff_t *pos, rwf_t flags)
  852. {
  853. struct iovec iovstack[UIO_FASTIOV];
  854. struct iovec *iov = iovstack;
  855. struct iov_iter iter;
  856. ssize_t ret;
  857. ret = import_iovec(READ, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter);
  858. if (ret >= 0) {
  859. ret = do_iter_read(file, &iter, pos, flags);
  860. kfree(iov);
  861. }
  862. return ret;
  863. }
  864. static ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
  865. unsigned long vlen, loff_t *pos, rwf_t flags)
  866. {
  867. struct iovec iovstack[UIO_FASTIOV];
  868. struct iovec *iov = iovstack;
  869. struct iov_iter iter;
  870. ssize_t ret;
  871. ret = import_iovec(WRITE, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter);
  872. if (ret >= 0) {
  873. file_start_write(file);
  874. ret = do_iter_write(file, &iter, pos, flags);
  875. file_end_write(file);
  876. kfree(iov);
  877. }
  878. return ret;
  879. }
  880. static ssize_t do_readv(unsigned long fd, const struct iovec __user *vec,
  881. unsigned long vlen, rwf_t flags)
  882. {
  883. struct fd f = fdget_pos(fd);
  884. ssize_t ret = -EBADF;
  885. if (f.file) {
  886. loff_t pos = file_pos_read(f.file);
  887. ret = vfs_readv(f.file, vec, vlen, &pos, flags);
  888. if (ret >= 0)
  889. file_pos_write(f.file, pos);
  890. fdput_pos(f);
  891. }
  892. if (ret > 0)
  893. add_rchar(current, ret);
  894. inc_syscr(current);
  895. return ret;
  896. }
  897. static ssize_t do_writev(unsigned long fd, const struct iovec __user *vec,
  898. unsigned long vlen, rwf_t flags)
  899. {
  900. struct fd f = fdget_pos(fd);
  901. ssize_t ret = -EBADF;
  902. if (f.file) {
  903. loff_t pos = file_pos_read(f.file);
  904. ret = vfs_writev(f.file, vec, vlen, &pos, flags);
  905. if (ret >= 0)
  906. file_pos_write(f.file, pos);
  907. fdput_pos(f);
  908. }
  909. if (ret > 0)
  910. add_wchar(current, ret);
  911. inc_syscw(current);
  912. return ret;
  913. }
  914. static inline loff_t pos_from_hilo(unsigned long high, unsigned long low)
  915. {
  916. #define HALF_LONG_BITS (BITS_PER_LONG / 2)
  917. return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low;
  918. }
  919. static ssize_t do_preadv(unsigned long fd, const struct iovec __user *vec,
  920. unsigned long vlen, loff_t pos, rwf_t flags)
  921. {
  922. struct fd f;
  923. ssize_t ret = -EBADF;
  924. if (pos < 0)
  925. return -EINVAL;
  926. f = fdget(fd);
  927. if (f.file) {
  928. ret = -ESPIPE;
  929. if (f.file->f_mode & FMODE_PREAD)
  930. ret = vfs_readv(f.file, vec, vlen, &pos, flags);
  931. fdput(f);
  932. }
  933. if (ret > 0)
  934. add_rchar(current, ret);
  935. inc_syscr(current);
  936. return ret;
  937. }
  938. static ssize_t do_pwritev(unsigned long fd, const struct iovec __user *vec,
  939. unsigned long vlen, loff_t pos, rwf_t flags)
  940. {
  941. struct fd f;
  942. ssize_t ret = -EBADF;
  943. if (pos < 0)
  944. return -EINVAL;
  945. f = fdget(fd);
  946. if (f.file) {
  947. ret = -ESPIPE;
  948. if (f.file->f_mode & FMODE_PWRITE)
  949. ret = vfs_writev(f.file, vec, vlen, &pos, flags);
  950. fdput(f);
  951. }
  952. if (ret > 0)
  953. add_wchar(current, ret);
  954. inc_syscw(current);
  955. return ret;
  956. }
  957. SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
  958. unsigned long, vlen)
  959. {
  960. return do_readv(fd, vec, vlen, 0);
  961. }
  962. SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
  963. unsigned long, vlen)
  964. {
  965. return do_writev(fd, vec, vlen, 0);
  966. }
  967. SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
  968. unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
  969. {
  970. loff_t pos = pos_from_hilo(pos_h, pos_l);
  971. return do_preadv(fd, vec, vlen, pos, 0);
  972. }
  973. SYSCALL_DEFINE6(preadv2, unsigned long, fd, const struct iovec __user *, vec,
  974. unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h,
  975. rwf_t, flags)
  976. {
  977. loff_t pos = pos_from_hilo(pos_h, pos_l);
  978. if (pos == -1)
  979. return do_readv(fd, vec, vlen, flags);
  980. return do_preadv(fd, vec, vlen, pos, flags);
  981. }
  982. SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
  983. unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
  984. {
  985. loff_t pos = pos_from_hilo(pos_h, pos_l);
  986. return do_pwritev(fd, vec, vlen, pos, 0);
  987. }
  988. SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *, vec,
  989. unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h,
  990. rwf_t, flags)
  991. {
  992. loff_t pos = pos_from_hilo(pos_h, pos_l);
  993. if (pos == -1)
  994. return do_writev(fd, vec, vlen, flags);
  995. return do_pwritev(fd, vec, vlen, pos, flags);
  996. }
  997. #ifdef CONFIG_COMPAT
  998. static size_t compat_readv(struct file *file,
  999. const struct compat_iovec __user *vec,
  1000. unsigned long vlen, loff_t *pos, rwf_t flags)
  1001. {
  1002. struct iovec iovstack[UIO_FASTIOV];
  1003. struct iovec *iov = iovstack;
  1004. struct iov_iter iter;
  1005. ssize_t ret;
  1006. ret = compat_import_iovec(READ, vec, vlen, UIO_FASTIOV, &iov, &iter);
  1007. if (ret >= 0) {
  1008. ret = do_iter_read(file, &iter, pos, flags);
  1009. kfree(iov);
  1010. }
  1011. if (ret > 0)
  1012. add_rchar(current, ret);
  1013. inc_syscr(current);
  1014. return ret;
  1015. }
  1016. static size_t do_compat_readv(compat_ulong_t fd,
  1017. const struct compat_iovec __user *vec,
  1018. compat_ulong_t vlen, rwf_t flags)
  1019. {
  1020. struct fd f = fdget_pos(fd);
  1021. ssize_t ret;
  1022. loff_t pos;
  1023. if (!f.file)
  1024. return -EBADF;
  1025. pos = f.file->f_pos;
  1026. ret = compat_readv(f.file, vec, vlen, &pos, flags);
  1027. if (ret >= 0)
  1028. f.file->f_pos = pos;
  1029. fdput_pos(f);
  1030. return ret;
  1031. }
  1032. COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
  1033. const struct compat_iovec __user *,vec,
  1034. compat_ulong_t, vlen)
  1035. {
  1036. return do_compat_readv(fd, vec, vlen, 0);
  1037. }
  1038. static long do_compat_preadv64(unsigned long fd,
  1039. const struct compat_iovec __user *vec,
  1040. unsigned long vlen, loff_t pos, rwf_t flags)
  1041. {
  1042. struct fd f;
  1043. ssize_t ret;
  1044. if (pos < 0)
  1045. return -EINVAL;
  1046. f = fdget(fd);
  1047. if (!f.file)
  1048. return -EBADF;
  1049. ret = -ESPIPE;
  1050. if (f.file->f_mode & FMODE_PREAD)
  1051. ret = compat_readv(f.file, vec, vlen, &pos, flags);
  1052. fdput(f);
  1053. return ret;
  1054. }
  1055. #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64
  1056. COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd,
  1057. const struct compat_iovec __user *,vec,
  1058. unsigned long, vlen, loff_t, pos)
  1059. {
  1060. return do_compat_preadv64(fd, vec, vlen, pos, 0);
  1061. }
  1062. #endif
  1063. COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd,
  1064. const struct compat_iovec __user *,vec,
  1065. compat_ulong_t, vlen, u32, pos_low, u32, pos_high)
  1066. {
  1067. loff_t pos = ((loff_t)pos_high << 32) | pos_low;
  1068. return do_compat_preadv64(fd, vec, vlen, pos, 0);
  1069. }
  1070. #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2
  1071. COMPAT_SYSCALL_DEFINE5(preadv64v2, unsigned long, fd,
  1072. const struct compat_iovec __user *,vec,
  1073. unsigned long, vlen, loff_t, pos, rwf_t, flags)
  1074. {
  1075. return do_compat_preadv64(fd, vec, vlen, pos, flags);
  1076. }
  1077. #endif
  1078. COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd,
  1079. const struct compat_iovec __user *,vec,
  1080. compat_ulong_t, vlen, u32, pos_low, u32, pos_high,
  1081. rwf_t, flags)
  1082. {
  1083. loff_t pos = ((loff_t)pos_high << 32) | pos_low;
  1084. if (pos == -1)
  1085. return do_compat_readv(fd, vec, vlen, flags);
  1086. return do_compat_preadv64(fd, vec, vlen, pos, flags);
  1087. }
  1088. static size_t compat_writev(struct file *file,
  1089. const struct compat_iovec __user *vec,
  1090. unsigned long vlen, loff_t *pos, rwf_t flags)
  1091. {
  1092. struct iovec iovstack[UIO_FASTIOV];
  1093. struct iovec *iov = iovstack;
  1094. struct iov_iter iter;
  1095. ssize_t ret;
  1096. ret = compat_import_iovec(WRITE, vec, vlen, UIO_FASTIOV, &iov, &iter);
  1097. if (ret >= 0) {
  1098. file_start_write(file);
  1099. ret = do_iter_write(file, &iter, pos, flags);
  1100. file_end_write(file);
  1101. kfree(iov);
  1102. }
  1103. if (ret > 0)
  1104. add_wchar(current, ret);
  1105. inc_syscw(current);
  1106. return ret;
  1107. }
  1108. static size_t do_compat_writev(compat_ulong_t fd,
  1109. const struct compat_iovec __user* vec,
  1110. compat_ulong_t vlen, rwf_t flags)
  1111. {
  1112. struct fd f = fdget_pos(fd);
  1113. ssize_t ret;
  1114. loff_t pos;
  1115. if (!f.file)
  1116. return -EBADF;
  1117. pos = f.file->f_pos;
  1118. ret = compat_writev(f.file, vec, vlen, &pos, flags);
  1119. if (ret >= 0)
  1120. f.file->f_pos = pos;
  1121. fdput_pos(f);
  1122. return ret;
  1123. }
  1124. COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
  1125. const struct compat_iovec __user *, vec,
  1126. compat_ulong_t, vlen)
  1127. {
  1128. return do_compat_writev(fd, vec, vlen, 0);
  1129. }
  1130. static long do_compat_pwritev64(unsigned long fd,
  1131. const struct compat_iovec __user *vec,
  1132. unsigned long vlen, loff_t pos, rwf_t flags)
  1133. {
  1134. struct fd f;
  1135. ssize_t ret;
  1136. if (pos < 0)
  1137. return -EINVAL;
  1138. f = fdget(fd);
  1139. if (!f.file)
  1140. return -EBADF;
  1141. ret = -ESPIPE;
  1142. if (f.file->f_mode & FMODE_PWRITE)
  1143. ret = compat_writev(f.file, vec, vlen, &pos, flags);
  1144. fdput(f);
  1145. return ret;
  1146. }
  1147. #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64
  1148. COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd,
  1149. const struct compat_iovec __user *,vec,
  1150. unsigned long, vlen, loff_t, pos)
  1151. {
  1152. return do_compat_pwritev64(fd, vec, vlen, pos, 0);
  1153. }
  1154. #endif
  1155. COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd,
  1156. const struct compat_iovec __user *,vec,
  1157. compat_ulong_t, vlen, u32, pos_low, u32, pos_high)
  1158. {
  1159. loff_t pos = ((loff_t)pos_high << 32) | pos_low;
  1160. return do_compat_pwritev64(fd, vec, vlen, pos, 0);
  1161. }
  1162. #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64V2
  1163. COMPAT_SYSCALL_DEFINE5(pwritev64v2, unsigned long, fd,
  1164. const struct compat_iovec __user *,vec,
  1165. unsigned long, vlen, loff_t, pos, rwf_t, flags)
  1166. {
  1167. return do_compat_pwritev64(fd, vec, vlen, pos, flags);
  1168. }
  1169. #endif
  1170. COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd,
  1171. const struct compat_iovec __user *,vec,
  1172. compat_ulong_t, vlen, u32, pos_low, u32, pos_high, rwf_t, flags)
  1173. {
  1174. loff_t pos = ((loff_t)pos_high << 32) | pos_low;
  1175. if (pos == -1)
  1176. return do_compat_writev(fd, vec, vlen, flags);
  1177. return do_compat_pwritev64(fd, vec, vlen, pos, flags);
  1178. }
  1179. #endif
  1180. static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
  1181. size_t count, loff_t max)
  1182. {
  1183. struct fd in, out;
  1184. struct inode *in_inode, *out_inode;
  1185. loff_t pos;
  1186. loff_t out_pos;
  1187. ssize_t retval;
  1188. int fl;
  1189. /*
  1190. * Get input file, and verify that it is ok..
  1191. */
  1192. retval = -EBADF;
  1193. in = fdget(in_fd);
  1194. if (!in.file)
  1195. goto out;
  1196. if (!(in.file->f_mode & FMODE_READ))
  1197. goto fput_in;
  1198. retval = -ESPIPE;
  1199. if (!ppos) {
  1200. pos = in.file->f_pos;
  1201. } else {
  1202. pos = *ppos;
  1203. if (!(in.file->f_mode & FMODE_PREAD))
  1204. goto fput_in;
  1205. }
  1206. retval = rw_verify_area(READ, in.file, &pos, count);
  1207. if (retval < 0)
  1208. goto fput_in;
  1209. if (count > MAX_RW_COUNT)
  1210. count = MAX_RW_COUNT;
  1211. /*
  1212. * Get output file, and verify that it is ok..
  1213. */
  1214. retval = -EBADF;
  1215. out = fdget(out_fd);
  1216. if (!out.file)
  1217. goto fput_in;
  1218. if (!(out.file->f_mode & FMODE_WRITE))
  1219. goto fput_out;
  1220. retval = -EINVAL;
  1221. in_inode = file_inode(in.file);
  1222. out_inode = file_inode(out.file);
  1223. out_pos = out.file->f_pos;
  1224. retval = rw_verify_area(WRITE, out.file, &out_pos, count);
  1225. if (retval < 0)
  1226. goto fput_out;
  1227. if (!max)
  1228. max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes);
  1229. if (unlikely(pos + count > max)) {
  1230. retval = -EOVERFLOW;
  1231. if (pos >= max)
  1232. goto fput_out;
  1233. count = max - pos;
  1234. }
  1235. fl = 0;
  1236. #if 0
  1237. /*
  1238. * We need to debate whether we can enable this or not. The
  1239. * man page documents EAGAIN return for the output at least,
  1240. * and the application is arguably buggy if it doesn't expect
  1241. * EAGAIN on a non-blocking file descriptor.
  1242. */
  1243. if (in.file->f_flags & O_NONBLOCK)
  1244. fl = SPLICE_F_NONBLOCK;
  1245. #endif
  1246. file_start_write(out.file);
  1247. retval = do_splice_direct(in.file, &pos, out.file, &out_pos, count, fl);
  1248. file_end_write(out.file);
  1249. if (retval > 0) {
  1250. add_rchar(current, retval);
  1251. add_wchar(current, retval);
  1252. fsnotify_access(in.file);
  1253. fsnotify_modify(out.file);
  1254. out.file->f_pos = out_pos;
  1255. if (ppos)
  1256. *ppos = pos;
  1257. else
  1258. in.file->f_pos = pos;
  1259. }
  1260. inc_syscr(current);
  1261. inc_syscw(current);
  1262. if (pos > max)
  1263. retval = -EOVERFLOW;
  1264. fput_out:
  1265. fdput(out);
  1266. fput_in:
  1267. fdput(in);
  1268. out:
  1269. return retval;
  1270. }
  1271. SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count)
  1272. {
  1273. loff_t pos;
  1274. off_t off;
  1275. ssize_t ret;
  1276. if (offset) {
  1277. if (unlikely(get_user(off, offset)))
  1278. return -EFAULT;
  1279. pos = off;
  1280. ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
  1281. if (unlikely(put_user(pos, offset)))
  1282. return -EFAULT;
  1283. return ret;
  1284. }
  1285. return do_sendfile(out_fd, in_fd, NULL, count, 0);
  1286. }
  1287. SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count)
  1288. {
  1289. loff_t pos;
  1290. ssize_t ret;
  1291. if (offset) {
  1292. if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
  1293. return -EFAULT;
  1294. ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
  1295. if (unlikely(put_user(pos, offset)))
  1296. return -EFAULT;
  1297. return ret;
  1298. }
  1299. return do_sendfile(out_fd, in_fd, NULL, count, 0);
  1300. }
  1301. #ifdef CONFIG_COMPAT
  1302. COMPAT_SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd,
  1303. compat_off_t __user *, offset, compat_size_t, count)
  1304. {
  1305. loff_t pos;
  1306. off_t off;
  1307. ssize_t ret;
  1308. if (offset) {
  1309. if (unlikely(get_user(off, offset)))
  1310. return -EFAULT;
  1311. pos = off;
  1312. ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS);
  1313. if (unlikely(put_user(pos, offset)))
  1314. return -EFAULT;
  1315. return ret;
  1316. }
  1317. return do_sendfile(out_fd, in_fd, NULL, count, 0);
  1318. }
  1319. COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd,
  1320. compat_loff_t __user *, offset, compat_size_t, count)
  1321. {
  1322. loff_t pos;
  1323. ssize_t ret;
  1324. if (offset) {
  1325. if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t))))
  1326. return -EFAULT;
  1327. ret = do_sendfile(out_fd, in_fd, &pos, count, 0);
  1328. if (unlikely(put_user(pos, offset)))
  1329. return -EFAULT;
  1330. return ret;
  1331. }
  1332. return do_sendfile(out_fd, in_fd, NULL, count, 0);
  1333. }
  1334. #endif
  1335. /*
  1336. * copy_file_range() differs from regular file read and write in that it
  1337. * specifically allows return partial success. When it does so is up to
  1338. * the copy_file_range method.
  1339. */
  1340. ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in,
  1341. struct file *file_out, loff_t pos_out,
  1342. size_t len, unsigned int flags)
  1343. {
  1344. struct inode *inode_in = file_inode(file_in);
  1345. struct inode *inode_out = file_inode(file_out);
  1346. ssize_t ret;
  1347. if (flags != 0)
  1348. return -EINVAL;
  1349. if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
  1350. return -EISDIR;
  1351. if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
  1352. return -EINVAL;
  1353. ret = rw_verify_area(READ, file_in, &pos_in, len);
  1354. if (unlikely(ret))
  1355. return ret;
  1356. ret = rw_verify_area(WRITE, file_out, &pos_out, len);
  1357. if (unlikely(ret))
  1358. return ret;
  1359. if (!(file_in->f_mode & FMODE_READ) ||
  1360. !(file_out->f_mode & FMODE_WRITE) ||
  1361. (file_out->f_flags & O_APPEND))
  1362. return -EBADF;
  1363. /* this could be relaxed once a method supports cross-fs copies */
  1364. if (inode_in->i_sb != inode_out->i_sb)
  1365. return -EXDEV;
  1366. if (len == 0)
  1367. return 0;
  1368. file_start_write(file_out);
  1369. /*
  1370. * Try cloning first, this is supported by more file systems, and
  1371. * more efficient if both clone and copy are supported (e.g. NFS).
  1372. */
  1373. if (file_in->f_op->clone_file_range) {
  1374. ret = file_in->f_op->clone_file_range(file_in, pos_in,
  1375. file_out, pos_out, len);
  1376. if (ret == 0) {
  1377. ret = len;
  1378. goto done;
  1379. }
  1380. }
  1381. if (file_out->f_op->copy_file_range) {
  1382. ret = file_out->f_op->copy_file_range(file_in, pos_in, file_out,
  1383. pos_out, len, flags);
  1384. if (ret != -EOPNOTSUPP)
  1385. goto done;
  1386. }
  1387. ret = do_splice_direct(file_in, &pos_in, file_out, &pos_out,
  1388. len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0);
  1389. done:
  1390. if (ret > 0) {
  1391. fsnotify_access(file_in);
  1392. add_rchar(current, ret);
  1393. fsnotify_modify(file_out);
  1394. add_wchar(current, ret);
  1395. }
  1396. inc_syscr(current);
  1397. inc_syscw(current);
  1398. file_end_write(file_out);
  1399. return ret;
  1400. }
  1401. EXPORT_SYMBOL(vfs_copy_file_range);
  1402. SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in,
  1403. int, fd_out, loff_t __user *, off_out,
  1404. size_t, len, unsigned int, flags)
  1405. {
  1406. loff_t pos_in;
  1407. loff_t pos_out;
  1408. struct fd f_in;
  1409. struct fd f_out;
  1410. ssize_t ret = -EBADF;
  1411. f_in = fdget(fd_in);
  1412. if (!f_in.file)
  1413. goto out2;
  1414. f_out = fdget(fd_out);
  1415. if (!f_out.file)
  1416. goto out1;
  1417. ret = -EFAULT;
  1418. if (off_in) {
  1419. if (copy_from_user(&pos_in, off_in, sizeof(loff_t)))
  1420. goto out;
  1421. } else {
  1422. pos_in = f_in.file->f_pos;
  1423. }
  1424. if (off_out) {
  1425. if (copy_from_user(&pos_out, off_out, sizeof(loff_t)))
  1426. goto out;
  1427. } else {
  1428. pos_out = f_out.file->f_pos;
  1429. }
  1430. ret = vfs_copy_file_range(f_in.file, pos_in, f_out.file, pos_out, len,
  1431. flags);
  1432. if (ret > 0) {
  1433. pos_in += ret;
  1434. pos_out += ret;
  1435. if (off_in) {
  1436. if (copy_to_user(off_in, &pos_in, sizeof(loff_t)))
  1437. ret = -EFAULT;
  1438. } else {
  1439. f_in.file->f_pos = pos_in;
  1440. }
  1441. if (off_out) {
  1442. if (copy_to_user(off_out, &pos_out, sizeof(loff_t)))
  1443. ret = -EFAULT;
  1444. } else {
  1445. f_out.file->f_pos = pos_out;
  1446. }
  1447. }
  1448. out:
  1449. fdput(f_out);
  1450. out1:
  1451. fdput(f_in);
  1452. out2:
  1453. return ret;
  1454. }
  1455. static int clone_verify_area(struct file *file, loff_t pos, u64 len, bool write)
  1456. {
  1457. struct inode *inode = file_inode(file);
  1458. if (unlikely(pos < 0))
  1459. return -EINVAL;
  1460. if (unlikely((loff_t) (pos + len) < 0))
  1461. return -EINVAL;
  1462. if (unlikely(inode->i_flctx && mandatory_lock(inode))) {
  1463. loff_t end = len ? pos + len - 1 : OFFSET_MAX;
  1464. int retval;
  1465. retval = locks_mandatory_area(inode, file, pos, end,
  1466. write ? F_WRLCK : F_RDLCK);
  1467. if (retval < 0)
  1468. return retval;
  1469. }
  1470. return security_file_permission(file, write ? MAY_WRITE : MAY_READ);
  1471. }
  1472. /*
  1473. * Check that the two inodes are eligible for cloning, the ranges make
  1474. * sense, and then flush all dirty data. Caller must ensure that the
  1475. * inodes have been locked against any other modifications.
  1476. *
  1477. * Returns: 0 for "nothing to clone", 1 for "something to clone", or
  1478. * the usual negative error code.
  1479. */
  1480. int vfs_clone_file_prep_inodes(struct inode *inode_in, loff_t pos_in,
  1481. struct inode *inode_out, loff_t pos_out,
  1482. u64 *len, bool is_dedupe)
  1483. {
  1484. loff_t bs = inode_out->i_sb->s_blocksize;
  1485. loff_t blen;
  1486. loff_t isize;
  1487. bool same_inode = (inode_in == inode_out);
  1488. int ret;
  1489. /* Don't touch certain kinds of inodes */
  1490. if (IS_IMMUTABLE(inode_out))
  1491. return -EPERM;
  1492. if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out))
  1493. return -ETXTBSY;
  1494. /* Don't reflink dirs, pipes, sockets... */
  1495. if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
  1496. return -EISDIR;
  1497. if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
  1498. return -EINVAL;
  1499. /* Are we going all the way to the end? */
  1500. isize = i_size_read(inode_in);
  1501. if (isize == 0)
  1502. return 0;
  1503. /* Zero length dedupe exits immediately; reflink goes to EOF. */
  1504. if (*len == 0) {
  1505. if (is_dedupe || pos_in == isize)
  1506. return 0;
  1507. if (pos_in > isize)
  1508. return -EINVAL;
  1509. *len = isize - pos_in;
  1510. }
  1511. /* Ensure offsets don't wrap and the input is inside i_size */
  1512. if (pos_in + *len < pos_in || pos_out + *len < pos_out ||
  1513. pos_in + *len > isize)
  1514. return -EINVAL;
  1515. /* Don't allow dedupe past EOF in the dest file */
  1516. if (is_dedupe) {
  1517. loff_t disize;
  1518. disize = i_size_read(inode_out);
  1519. if (pos_out >= disize || pos_out + *len > disize)
  1520. return -EINVAL;
  1521. }
  1522. /* If we're linking to EOF, continue to the block boundary. */
  1523. if (pos_in + *len == isize)
  1524. blen = ALIGN(isize, bs) - pos_in;
  1525. else
  1526. blen = *len;
  1527. /* Only reflink if we're aligned to block boundaries */
  1528. if (!IS_ALIGNED(pos_in, bs) || !IS_ALIGNED(pos_in + blen, bs) ||
  1529. !IS_ALIGNED(pos_out, bs) || !IS_ALIGNED(pos_out + blen, bs))
  1530. return -EINVAL;
  1531. /* Don't allow overlapped reflink within the same file */
  1532. if (same_inode) {
  1533. if (pos_out + blen > pos_in && pos_out < pos_in + blen)
  1534. return -EINVAL;
  1535. }
  1536. /* Wait for the completion of any pending IOs on both files */
  1537. inode_dio_wait(inode_in);
  1538. if (!same_inode)
  1539. inode_dio_wait(inode_out);
  1540. ret = filemap_write_and_wait_range(inode_in->i_mapping,
  1541. pos_in, pos_in + *len - 1);
  1542. if (ret)
  1543. return ret;
  1544. ret = filemap_write_and_wait_range(inode_out->i_mapping,
  1545. pos_out, pos_out + *len - 1);
  1546. if (ret)
  1547. return ret;
  1548. /*
  1549. * Check that the extents are the same.
  1550. */
  1551. if (is_dedupe) {
  1552. bool is_same = false;
  1553. ret = vfs_dedupe_file_range_compare(inode_in, pos_in,
  1554. inode_out, pos_out, *len, &is_same);
  1555. if (ret)
  1556. return ret;
  1557. if (!is_same)
  1558. return -EBADE;
  1559. }
  1560. return 1;
  1561. }
  1562. EXPORT_SYMBOL(vfs_clone_file_prep_inodes);
  1563. int vfs_clone_file_range(struct file *file_in, loff_t pos_in,
  1564. struct file *file_out, loff_t pos_out, u64 len)
  1565. {
  1566. struct inode *inode_in = file_inode(file_in);
  1567. struct inode *inode_out = file_inode(file_out);
  1568. int ret;
  1569. if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode))
  1570. return -EISDIR;
  1571. if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode))
  1572. return -EINVAL;
  1573. /*
  1574. * FICLONE/FICLONERANGE ioctls enforce that src and dest files are on
  1575. * the same mount. Practically, they only need to be on the same file
  1576. * system.
  1577. */
  1578. if (inode_in->i_sb != inode_out->i_sb)
  1579. return -EXDEV;
  1580. if (!(file_in->f_mode & FMODE_READ) ||
  1581. !(file_out->f_mode & FMODE_WRITE) ||
  1582. (file_out->f_flags & O_APPEND))
  1583. return -EBADF;
  1584. if (!file_in->f_op->clone_file_range)
  1585. return -EOPNOTSUPP;
  1586. ret = clone_verify_area(file_in, pos_in, len, false);
  1587. if (ret)
  1588. return ret;
  1589. ret = clone_verify_area(file_out, pos_out, len, true);
  1590. if (ret)
  1591. return ret;
  1592. if (pos_in + len > i_size_read(inode_in))
  1593. return -EINVAL;
  1594. ret = file_in->f_op->clone_file_range(file_in, pos_in,
  1595. file_out, pos_out, len);
  1596. if (!ret) {
  1597. fsnotify_access(file_in);
  1598. fsnotify_modify(file_out);
  1599. }
  1600. return ret;
  1601. }
  1602. EXPORT_SYMBOL(vfs_clone_file_range);
  1603. /*
  1604. * Read a page's worth of file data into the page cache. Return the page
  1605. * locked.
  1606. */
  1607. static struct page *vfs_dedupe_get_page(struct inode *inode, loff_t offset)
  1608. {
  1609. struct address_space *mapping;
  1610. struct page *page;
  1611. pgoff_t n;
  1612. n = offset >> PAGE_SHIFT;
  1613. mapping = inode->i_mapping;
  1614. page = read_mapping_page(mapping, n, NULL);
  1615. if (IS_ERR(page))
  1616. return page;
  1617. if (!PageUptodate(page)) {
  1618. put_page(page);
  1619. return ERR_PTR(-EIO);
  1620. }
  1621. lock_page(page);
  1622. return page;
  1623. }
  1624. /*
  1625. * Compare extents of two files to see if they are the same.
  1626. * Caller must have locked both inodes to prevent write races.
  1627. */
  1628. int vfs_dedupe_file_range_compare(struct inode *src, loff_t srcoff,
  1629. struct inode *dest, loff_t destoff,
  1630. loff_t len, bool *is_same)
  1631. {
  1632. loff_t src_poff;
  1633. loff_t dest_poff;
  1634. void *src_addr;
  1635. void *dest_addr;
  1636. struct page *src_page;
  1637. struct page *dest_page;
  1638. loff_t cmp_len;
  1639. bool same;
  1640. int error;
  1641. error = -EINVAL;
  1642. same = true;
  1643. while (len) {
  1644. src_poff = srcoff & (PAGE_SIZE - 1);
  1645. dest_poff = destoff & (PAGE_SIZE - 1);
  1646. cmp_len = min(PAGE_SIZE - src_poff,
  1647. PAGE_SIZE - dest_poff);
  1648. cmp_len = min(cmp_len, len);
  1649. if (cmp_len <= 0)
  1650. goto out_error;
  1651. src_page = vfs_dedupe_get_page(src, srcoff);
  1652. if (IS_ERR(src_page)) {
  1653. error = PTR_ERR(src_page);
  1654. goto out_error;
  1655. }
  1656. dest_page = vfs_dedupe_get_page(dest, destoff);
  1657. if (IS_ERR(dest_page)) {
  1658. error = PTR_ERR(dest_page);
  1659. unlock_page(src_page);
  1660. put_page(src_page);
  1661. goto out_error;
  1662. }
  1663. src_addr = kmap_atomic(src_page);
  1664. dest_addr = kmap_atomic(dest_page);
  1665. flush_dcache_page(src_page);
  1666. flush_dcache_page(dest_page);
  1667. if (memcmp(src_addr + src_poff, dest_addr + dest_poff, cmp_len))
  1668. same = false;
  1669. kunmap_atomic(dest_addr);
  1670. kunmap_atomic(src_addr);
  1671. unlock_page(dest_page);
  1672. unlock_page(src_page);
  1673. put_page(dest_page);
  1674. put_page(src_page);
  1675. if (!same)
  1676. break;
  1677. srcoff += cmp_len;
  1678. destoff += cmp_len;
  1679. len -= cmp_len;
  1680. }
  1681. *is_same = same;
  1682. return 0;
  1683. out_error:
  1684. return error;
  1685. }
  1686. EXPORT_SYMBOL(vfs_dedupe_file_range_compare);
  1687. int vfs_dedupe_file_range(struct file *file, struct file_dedupe_range *same)
  1688. {
  1689. struct file_dedupe_range_info *info;
  1690. struct inode *src = file_inode(file);
  1691. u64 off;
  1692. u64 len;
  1693. int i;
  1694. int ret;
  1695. bool is_admin = capable(CAP_SYS_ADMIN);
  1696. u16 count = same->dest_count;
  1697. struct file *dst_file;
  1698. loff_t dst_off;
  1699. ssize_t deduped;
  1700. if (!(file->f_mode & FMODE_READ))
  1701. return -EINVAL;
  1702. if (same->reserved1 || same->reserved2)
  1703. return -EINVAL;
  1704. off = same->src_offset;
  1705. len = same->src_length;
  1706. ret = -EISDIR;
  1707. if (S_ISDIR(src->i_mode))
  1708. goto out;
  1709. ret = -EINVAL;
  1710. if (!S_ISREG(src->i_mode))
  1711. goto out;
  1712. ret = clone_verify_area(file, off, len, false);
  1713. if (ret < 0)
  1714. goto out;
  1715. ret = 0;
  1716. if (off + len > i_size_read(src))
  1717. return -EINVAL;
  1718. /* pre-format output fields to sane values */
  1719. for (i = 0; i < count; i++) {
  1720. same->info[i].bytes_deduped = 0ULL;
  1721. same->info[i].status = FILE_DEDUPE_RANGE_SAME;
  1722. }
  1723. for (i = 0, info = same->info; i < count; i++, info++) {
  1724. struct inode *dst;
  1725. struct fd dst_fd = fdget(info->dest_fd);
  1726. dst_file = dst_fd.file;
  1727. if (!dst_file) {
  1728. info->status = -EBADF;
  1729. goto next_loop;
  1730. }
  1731. dst = file_inode(dst_file);
  1732. ret = mnt_want_write_file(dst_file);
  1733. if (ret) {
  1734. info->status = ret;
  1735. goto next_loop;
  1736. }
  1737. dst_off = info->dest_offset;
  1738. ret = clone_verify_area(dst_file, dst_off, len, true);
  1739. if (ret < 0) {
  1740. info->status = ret;
  1741. goto next_file;
  1742. }
  1743. ret = 0;
  1744. if (info->reserved) {
  1745. info->status = -EINVAL;
  1746. } else if (!(is_admin || (dst_file->f_mode & FMODE_WRITE))) {
  1747. info->status = -EINVAL;
  1748. } else if (file->f_path.mnt != dst_file->f_path.mnt) {
  1749. info->status = -EXDEV;
  1750. } else if (S_ISDIR(dst->i_mode)) {
  1751. info->status = -EISDIR;
  1752. } else if (dst_file->f_op->dedupe_file_range == NULL) {
  1753. info->status = -EINVAL;
  1754. } else {
  1755. deduped = dst_file->f_op->dedupe_file_range(file, off,
  1756. len, dst_file,
  1757. info->dest_offset);
  1758. if (deduped == -EBADE)
  1759. info->status = FILE_DEDUPE_RANGE_DIFFERS;
  1760. else if (deduped < 0)
  1761. info->status = deduped;
  1762. else
  1763. info->bytes_deduped += deduped;
  1764. }
  1765. next_file:
  1766. mnt_drop_write_file(dst_file);
  1767. next_loop:
  1768. fdput(dst_fd);
  1769. if (fatal_signal_pending(current))
  1770. goto out;
  1771. }
  1772. out:
  1773. return ret;
  1774. }
  1775. EXPORT_SYMBOL(vfs_dedupe_file_range);