data.c 44 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866
  1. /*
  2. * fs/f2fs/data.c
  3. *
  4. * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  5. * http://www.samsung.com/
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License version 2 as
  9. * published by the Free Software Foundation.
  10. */
  11. #include <linux/fs.h>
  12. #include <linux/f2fs_fs.h>
  13. #include <linux/buffer_head.h>
  14. #include <linux/mpage.h>
  15. #include <linux/writeback.h>
  16. #include <linux/backing-dev.h>
  17. #include <linux/pagevec.h>
  18. #include <linux/blkdev.h>
  19. #include <linux/bio.h>
  20. #include <linux/prefetch.h>
  21. #include <linux/uio.h>
  22. #include <linux/cleancache.h>
  23. #include "f2fs.h"
  24. #include "node.h"
  25. #include "segment.h"
  26. #include "trace.h"
  27. #include <trace/events/f2fs.h>
  28. static void f2fs_read_end_io(struct bio *bio)
  29. {
  30. struct bio_vec *bvec;
  31. int i;
  32. if (f2fs_bio_encrypted(bio)) {
  33. if (bio->bi_error) {
  34. fscrypt_release_ctx(bio->bi_private);
  35. } else {
  36. fscrypt_decrypt_bio_pages(bio->bi_private, bio);
  37. return;
  38. }
  39. }
  40. bio_for_each_segment_all(bvec, bio, i) {
  41. struct page *page = bvec->bv_page;
  42. if (!bio->bi_error) {
  43. SetPageUptodate(page);
  44. } else {
  45. ClearPageUptodate(page);
  46. SetPageError(page);
  47. }
  48. unlock_page(page);
  49. }
  50. bio_put(bio);
  51. }
  52. static void f2fs_write_end_io(struct bio *bio)
  53. {
  54. struct f2fs_sb_info *sbi = bio->bi_private;
  55. struct bio_vec *bvec;
  56. int i;
  57. bio_for_each_segment_all(bvec, bio, i) {
  58. struct page *page = bvec->bv_page;
  59. fscrypt_pullback_bio_page(&page, true);
  60. if (unlikely(bio->bi_error)) {
  61. set_bit(AS_EIO, &page->mapping->flags);
  62. f2fs_stop_checkpoint(sbi, true);
  63. }
  64. end_page_writeback(page);
  65. }
  66. if (atomic_dec_and_test(&sbi->nr_wb_bios) &&
  67. wq_has_sleeper(&sbi->cp_wait))
  68. wake_up(&sbi->cp_wait);
  69. bio_put(bio);
  70. }
  71. /*
  72. * Low-level block read/write IO operations.
  73. */
  74. static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
  75. int npages, bool is_read)
  76. {
  77. struct bio *bio;
  78. bio = f2fs_bio_alloc(npages);
  79. bio->bi_bdev = sbi->sb->s_bdev;
  80. bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
  81. bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
  82. bio->bi_private = is_read ? NULL : sbi;
  83. return bio;
  84. }
  85. static inline void __submit_bio(struct f2fs_sb_info *sbi, int rw,
  86. struct bio *bio)
  87. {
  88. if (!is_read_io(rw))
  89. atomic_inc(&sbi->nr_wb_bios);
  90. submit_bio(rw, bio);
  91. }
  92. static void __submit_merged_bio(struct f2fs_bio_info *io)
  93. {
  94. struct f2fs_io_info *fio = &io->fio;
  95. if (!io->bio)
  96. return;
  97. if (is_read_io(fio->rw))
  98. trace_f2fs_submit_read_bio(io->sbi->sb, fio, io->bio);
  99. else
  100. trace_f2fs_submit_write_bio(io->sbi->sb, fio, io->bio);
  101. __submit_bio(io->sbi, fio->rw, io->bio);
  102. io->bio = NULL;
  103. }
  104. static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode,
  105. struct page *page, nid_t ino)
  106. {
  107. struct bio_vec *bvec;
  108. struct page *target;
  109. int i;
  110. if (!io->bio)
  111. return false;
  112. if (!inode && !page && !ino)
  113. return true;
  114. bio_for_each_segment_all(bvec, io->bio, i) {
  115. if (bvec->bv_page->mapping)
  116. target = bvec->bv_page;
  117. else
  118. target = fscrypt_control_page(bvec->bv_page);
  119. if (inode && inode == target->mapping->host)
  120. return true;
  121. if (page && page == target)
  122. return true;
  123. if (ino && ino == ino_of_node(target))
  124. return true;
  125. }
  126. return false;
  127. }
  128. static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode,
  129. struct page *page, nid_t ino,
  130. enum page_type type)
  131. {
  132. enum page_type btype = PAGE_TYPE_OF_BIO(type);
  133. struct f2fs_bio_info *io = &sbi->write_io[btype];
  134. bool ret;
  135. down_read(&io->io_rwsem);
  136. ret = __has_merged_page(io, inode, page, ino);
  137. up_read(&io->io_rwsem);
  138. return ret;
  139. }
  140. static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
  141. struct inode *inode, struct page *page,
  142. nid_t ino, enum page_type type, int rw)
  143. {
  144. enum page_type btype = PAGE_TYPE_OF_BIO(type);
  145. struct f2fs_bio_info *io;
  146. io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype];
  147. down_write(&io->io_rwsem);
  148. if (!__has_merged_page(io, inode, page, ino))
  149. goto out;
  150. /* change META to META_FLUSH in the checkpoint procedure */
  151. if (type >= META_FLUSH) {
  152. io->fio.type = META_FLUSH;
  153. if (test_opt(sbi, NOBARRIER))
  154. io->fio.rw = WRITE_FLUSH | REQ_META | REQ_PRIO;
  155. else
  156. io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
  157. }
  158. __submit_merged_bio(io);
  159. out:
  160. up_write(&io->io_rwsem);
  161. }
  162. void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, enum page_type type,
  163. int rw)
  164. {
  165. __f2fs_submit_merged_bio(sbi, NULL, NULL, 0, type, rw);
  166. }
  167. void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *sbi,
  168. struct inode *inode, struct page *page,
  169. nid_t ino, enum page_type type, int rw)
  170. {
  171. if (has_merged_page(sbi, inode, page, ino, type))
  172. __f2fs_submit_merged_bio(sbi, inode, page, ino, type, rw);
  173. }
  174. void f2fs_flush_merged_bios(struct f2fs_sb_info *sbi)
  175. {
  176. f2fs_submit_merged_bio(sbi, DATA, WRITE);
  177. f2fs_submit_merged_bio(sbi, NODE, WRITE);
  178. f2fs_submit_merged_bio(sbi, META, WRITE);
  179. }
  180. /*
  181. * Fill the locked page with data located in the block address.
  182. * Return unlocked page.
  183. */
  184. int f2fs_submit_page_bio(struct f2fs_io_info *fio)
  185. {
  186. struct bio *bio;
  187. struct page *page = fio->encrypted_page ?
  188. fio->encrypted_page : fio->page;
  189. trace_f2fs_submit_page_bio(page, fio);
  190. f2fs_trace_ios(fio, 0);
  191. /* Allocate a new bio */
  192. bio = __bio_alloc(fio->sbi, fio->new_blkaddr, 1, is_read_io(fio->rw));
  193. if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
  194. bio_put(bio);
  195. return -EFAULT;
  196. }
  197. __submit_bio(fio->sbi, fio->rw, bio);
  198. return 0;
  199. }
  200. void f2fs_submit_page_mbio(struct f2fs_io_info *fio)
  201. {
  202. struct f2fs_sb_info *sbi = fio->sbi;
  203. enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
  204. struct f2fs_bio_info *io;
  205. bool is_read = is_read_io(fio->rw);
  206. struct page *bio_page;
  207. io = is_read ? &sbi->read_io : &sbi->write_io[btype];
  208. if (fio->old_blkaddr != NEW_ADDR)
  209. verify_block_addr(sbi, fio->old_blkaddr);
  210. verify_block_addr(sbi, fio->new_blkaddr);
  211. down_write(&io->io_rwsem);
  212. if (io->bio && (io->last_block_in_bio != fio->new_blkaddr - 1 ||
  213. io->fio.rw != fio->rw))
  214. __submit_merged_bio(io);
  215. alloc_new:
  216. if (io->bio == NULL) {
  217. int bio_blocks = MAX_BIO_BLOCKS(sbi);
  218. io->bio = __bio_alloc(sbi, fio->new_blkaddr,
  219. bio_blocks, is_read);
  220. io->fio = *fio;
  221. }
  222. bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;
  223. if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) <
  224. PAGE_SIZE) {
  225. __submit_merged_bio(io);
  226. goto alloc_new;
  227. }
  228. io->last_block_in_bio = fio->new_blkaddr;
  229. f2fs_trace_ios(fio, 0);
  230. up_write(&io->io_rwsem);
  231. trace_f2fs_submit_page_mbio(fio->page, fio);
  232. }
  233. static void __set_data_blkaddr(struct dnode_of_data *dn)
  234. {
  235. struct f2fs_node *rn = F2FS_NODE(dn->node_page);
  236. __le32 *addr_array;
  237. /* Get physical address of data block */
  238. addr_array = blkaddr_in_node(rn);
  239. addr_array[dn->ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
  240. }
  241. /*
  242. * Lock ordering for the change of data block address:
  243. * ->data_page
  244. * ->node_page
  245. * update block addresses in the node page
  246. */
  247. void set_data_blkaddr(struct dnode_of_data *dn)
  248. {
  249. f2fs_wait_on_page_writeback(dn->node_page, NODE, true);
  250. __set_data_blkaddr(dn);
  251. if (set_page_dirty(dn->node_page))
  252. dn->node_changed = true;
  253. }
  254. void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
  255. {
  256. dn->data_blkaddr = blkaddr;
  257. set_data_blkaddr(dn);
  258. f2fs_update_extent_cache(dn);
  259. }
  260. /* dn->ofs_in_node will be returned with up-to-date last block pointer */
  261. int reserve_new_blocks(struct dnode_of_data *dn, blkcnt_t count)
  262. {
  263. struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
  264. if (!count)
  265. return 0;
  266. if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
  267. return -EPERM;
  268. if (unlikely(!inc_valid_block_count(sbi, dn->inode, &count)))
  269. return -ENOSPC;
  270. trace_f2fs_reserve_new_blocks(dn->inode, dn->nid,
  271. dn->ofs_in_node, count);
  272. f2fs_wait_on_page_writeback(dn->node_page, NODE, true);
  273. for (; count > 0; dn->ofs_in_node++) {
  274. block_t blkaddr =
  275. datablock_addr(dn->node_page, dn->ofs_in_node);
  276. if (blkaddr == NULL_ADDR) {
  277. dn->data_blkaddr = NEW_ADDR;
  278. __set_data_blkaddr(dn);
  279. count--;
  280. }
  281. }
  282. if (set_page_dirty(dn->node_page))
  283. dn->node_changed = true;
  284. mark_inode_dirty(dn->inode);
  285. sync_inode_page(dn);
  286. return 0;
  287. }
  288. /* Should keep dn->ofs_in_node unchanged */
  289. int reserve_new_block(struct dnode_of_data *dn)
  290. {
  291. unsigned int ofs_in_node = dn->ofs_in_node;
  292. int ret;
  293. ret = reserve_new_blocks(dn, 1);
  294. dn->ofs_in_node = ofs_in_node;
  295. return ret;
  296. }
  297. int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
  298. {
  299. bool need_put = dn->inode_page ? false : true;
  300. int err;
  301. err = get_dnode_of_data(dn, index, ALLOC_NODE);
  302. if (err)
  303. return err;
  304. if (dn->data_blkaddr == NULL_ADDR)
  305. err = reserve_new_block(dn);
  306. if (err || need_put)
  307. f2fs_put_dnode(dn);
  308. return err;
  309. }
  310. int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index)
  311. {
  312. struct extent_info ei;
  313. struct inode *inode = dn->inode;
  314. if (f2fs_lookup_extent_cache(inode, index, &ei)) {
  315. dn->data_blkaddr = ei.blk + index - ei.fofs;
  316. return 0;
  317. }
  318. return f2fs_reserve_block(dn, index);
  319. }
  320. struct page *get_read_data_page(struct inode *inode, pgoff_t index,
  321. int rw, bool for_write)
  322. {
  323. struct address_space *mapping = inode->i_mapping;
  324. struct dnode_of_data dn;
  325. struct page *page;
  326. struct extent_info ei;
  327. int err;
  328. struct f2fs_io_info fio = {
  329. .sbi = F2FS_I_SB(inode),
  330. .type = DATA,
  331. .rw = rw,
  332. .encrypted_page = NULL,
  333. };
  334. if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
  335. return read_mapping_page(mapping, index, NULL);
  336. page = f2fs_grab_cache_page(mapping, index, for_write);
  337. if (!page)
  338. return ERR_PTR(-ENOMEM);
  339. if (f2fs_lookup_extent_cache(inode, index, &ei)) {
  340. dn.data_blkaddr = ei.blk + index - ei.fofs;
  341. goto got_it;
  342. }
  343. set_new_dnode(&dn, inode, NULL, NULL, 0);
  344. err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
  345. if (err)
  346. goto put_err;
  347. f2fs_put_dnode(&dn);
  348. if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
  349. err = -ENOENT;
  350. goto put_err;
  351. }
  352. got_it:
  353. if (PageUptodate(page)) {
  354. unlock_page(page);
  355. return page;
  356. }
  357. /*
  358. * A new dentry page is allocated but not able to be written, since its
  359. * new inode page couldn't be allocated due to -ENOSPC.
  360. * In such the case, its blkaddr can be remained as NEW_ADDR.
  361. * see, f2fs_add_link -> get_new_data_page -> init_inode_metadata.
  362. */
  363. if (dn.data_blkaddr == NEW_ADDR) {
  364. zero_user_segment(page, 0, PAGE_SIZE);
  365. SetPageUptodate(page);
  366. unlock_page(page);
  367. return page;
  368. }
  369. fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr;
  370. fio.page = page;
  371. err = f2fs_submit_page_bio(&fio);
  372. if (err)
  373. goto put_err;
  374. return page;
  375. put_err:
  376. f2fs_put_page(page, 1);
  377. return ERR_PTR(err);
  378. }
  379. struct page *find_data_page(struct inode *inode, pgoff_t index)
  380. {
  381. struct address_space *mapping = inode->i_mapping;
  382. struct page *page;
  383. page = find_get_page(mapping, index);
  384. if (page && PageUptodate(page))
  385. return page;
  386. f2fs_put_page(page, 0);
  387. page = get_read_data_page(inode, index, READ_SYNC, false);
  388. if (IS_ERR(page))
  389. return page;
  390. if (PageUptodate(page))
  391. return page;
  392. wait_on_page_locked(page);
  393. if (unlikely(!PageUptodate(page))) {
  394. f2fs_put_page(page, 0);
  395. return ERR_PTR(-EIO);
  396. }
  397. return page;
  398. }
  399. /*
  400. * If it tries to access a hole, return an error.
  401. * Because, the callers, functions in dir.c and GC, should be able to know
  402. * whether this page exists or not.
  403. */
  404. struct page *get_lock_data_page(struct inode *inode, pgoff_t index,
  405. bool for_write)
  406. {
  407. struct address_space *mapping = inode->i_mapping;
  408. struct page *page;
  409. repeat:
  410. page = get_read_data_page(inode, index, READ_SYNC, for_write);
  411. if (IS_ERR(page))
  412. return page;
  413. /* wait for read completion */
  414. lock_page(page);
  415. if (unlikely(!PageUptodate(page))) {
  416. f2fs_put_page(page, 1);
  417. return ERR_PTR(-EIO);
  418. }
  419. if (unlikely(page->mapping != mapping)) {
  420. f2fs_put_page(page, 1);
  421. goto repeat;
  422. }
  423. return page;
  424. }
  425. /*
  426. * Caller ensures that this data page is never allocated.
  427. * A new zero-filled data page is allocated in the page cache.
  428. *
  429. * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
  430. * f2fs_unlock_op().
  431. * Note that, ipage is set only by make_empty_dir, and if any error occur,
  432. * ipage should be released by this function.
  433. */
  434. struct page *get_new_data_page(struct inode *inode,
  435. struct page *ipage, pgoff_t index, bool new_i_size)
  436. {
  437. struct address_space *mapping = inode->i_mapping;
  438. struct page *page;
  439. struct dnode_of_data dn;
  440. int err;
  441. page = f2fs_grab_cache_page(mapping, index, true);
  442. if (!page) {
  443. /*
  444. * before exiting, we should make sure ipage will be released
  445. * if any error occur.
  446. */
  447. f2fs_put_page(ipage, 1);
  448. return ERR_PTR(-ENOMEM);
  449. }
  450. set_new_dnode(&dn, inode, ipage, NULL, 0);
  451. err = f2fs_reserve_block(&dn, index);
  452. if (err) {
  453. f2fs_put_page(page, 1);
  454. return ERR_PTR(err);
  455. }
  456. if (!ipage)
  457. f2fs_put_dnode(&dn);
  458. if (PageUptodate(page))
  459. goto got_it;
  460. if (dn.data_blkaddr == NEW_ADDR) {
  461. zero_user_segment(page, 0, PAGE_SIZE);
  462. SetPageUptodate(page);
  463. } else {
  464. f2fs_put_page(page, 1);
  465. /* if ipage exists, blkaddr should be NEW_ADDR */
  466. f2fs_bug_on(F2FS_I_SB(inode), ipage);
  467. page = get_lock_data_page(inode, index, true);
  468. if (IS_ERR(page))
  469. return page;
  470. }
  471. got_it:
  472. if (new_i_size && i_size_read(inode) <
  473. ((loff_t)(index + 1) << PAGE_SHIFT)) {
  474. i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT));
  475. /* Only the directory inode sets new_i_size */
  476. set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR);
  477. }
  478. return page;
  479. }
  480. static int __allocate_data_block(struct dnode_of_data *dn)
  481. {
  482. struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
  483. struct f2fs_summary sum;
  484. struct node_info ni;
  485. int seg = CURSEG_WARM_DATA;
  486. pgoff_t fofs;
  487. blkcnt_t count = 1;
  488. if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
  489. return -EPERM;
  490. dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node);
  491. if (dn->data_blkaddr == NEW_ADDR)
  492. goto alloc;
  493. if (unlikely(!inc_valid_block_count(sbi, dn->inode, &count)))
  494. return -ENOSPC;
  495. alloc:
  496. get_node_info(sbi, dn->nid, &ni);
  497. set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
  498. if (dn->ofs_in_node == 0 && dn->inode_page == dn->node_page)
  499. seg = CURSEG_DIRECT_IO;
  500. allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr,
  501. &sum, seg);
  502. set_data_blkaddr(dn);
  503. /* update i_size */
  504. fofs = start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
  505. dn->ofs_in_node;
  506. if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_SHIFT))
  507. i_size_write(dn->inode,
  508. ((loff_t)(fofs + 1) << PAGE_SHIFT));
  509. return 0;
  510. }
  511. ssize_t f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
  512. {
  513. struct inode *inode = file_inode(iocb->ki_filp);
  514. struct f2fs_map_blocks map;
  515. ssize_t ret = 0;
  516. map.m_lblk = F2FS_BLK_ALIGN(iocb->ki_pos);
  517. map.m_len = F2FS_BYTES_TO_BLK(iov_iter_count(from));
  518. map.m_next_pgofs = NULL;
  519. if (f2fs_encrypted_inode(inode))
  520. return 0;
  521. if (iocb->ki_flags & IOCB_DIRECT) {
  522. ret = f2fs_convert_inline_inode(inode);
  523. if (ret)
  524. return ret;
  525. return f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
  526. }
  527. if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA) {
  528. ret = f2fs_convert_inline_inode(inode);
  529. if (ret)
  530. return ret;
  531. }
  532. if (!f2fs_has_inline_data(inode))
  533. return f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO);
  534. return ret;
  535. }
  536. /*
  537. * f2fs_map_blocks() now supported readahead/bmap/rw direct_IO with
  538. * f2fs_map_blocks structure.
  539. * If original data blocks are allocated, then give them to blockdev.
  540. * Otherwise,
  541. * a. preallocate requested block addresses
  542. * b. do not use extent cache for better performance
  543. * c. give the block addresses to blockdev
  544. */
  545. int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
  546. int create, int flag)
  547. {
  548. unsigned int maxblocks = map->m_len;
  549. struct dnode_of_data dn;
  550. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  551. int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA;
  552. pgoff_t pgofs, end_offset, end;
  553. int err = 0, ofs = 1;
  554. unsigned int ofs_in_node, last_ofs_in_node;
  555. blkcnt_t prealloc;
  556. struct extent_info ei;
  557. bool allocated = false;
  558. block_t blkaddr;
  559. map->m_len = 0;
  560. map->m_flags = 0;
  561. /* it only supports block size == page size */
  562. pgofs = (pgoff_t)map->m_lblk;
  563. end = pgofs + maxblocks;
  564. if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
  565. map->m_pblk = ei.blk + pgofs - ei.fofs;
  566. map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs);
  567. map->m_flags = F2FS_MAP_MAPPED;
  568. goto out;
  569. }
  570. next_dnode:
  571. if (create)
  572. f2fs_lock_op(sbi);
  573. /* When reading holes, we need its node page */
  574. set_new_dnode(&dn, inode, NULL, NULL, 0);
  575. err = get_dnode_of_data(&dn, pgofs, mode);
  576. if (err) {
  577. if (flag == F2FS_GET_BLOCK_BMAP)
  578. map->m_pblk = 0;
  579. if (err == -ENOENT) {
  580. err = 0;
  581. if (map->m_next_pgofs)
  582. *map->m_next_pgofs =
  583. get_next_page_offset(&dn, pgofs);
  584. }
  585. goto unlock_out;
  586. }
  587. prealloc = 0;
  588. ofs_in_node = dn.ofs_in_node;
  589. end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
  590. next_block:
  591. blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
  592. if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) {
  593. if (create) {
  594. if (unlikely(f2fs_cp_error(sbi))) {
  595. err = -EIO;
  596. goto sync_out;
  597. }
  598. if (flag == F2FS_GET_BLOCK_PRE_AIO) {
  599. if (blkaddr == NULL_ADDR) {
  600. prealloc++;
  601. last_ofs_in_node = dn.ofs_in_node;
  602. }
  603. } else {
  604. err = __allocate_data_block(&dn);
  605. if (!err) {
  606. set_inode_flag(F2FS_I(inode),
  607. FI_APPEND_WRITE);
  608. allocated = true;
  609. }
  610. }
  611. if (err)
  612. goto sync_out;
  613. map->m_flags = F2FS_MAP_NEW;
  614. blkaddr = dn.data_blkaddr;
  615. } else {
  616. if (flag == F2FS_GET_BLOCK_BMAP) {
  617. map->m_pblk = 0;
  618. goto sync_out;
  619. }
  620. if (flag == F2FS_GET_BLOCK_FIEMAP &&
  621. blkaddr == NULL_ADDR) {
  622. if (map->m_next_pgofs)
  623. *map->m_next_pgofs = pgofs + 1;
  624. }
  625. if (flag != F2FS_GET_BLOCK_FIEMAP ||
  626. blkaddr != NEW_ADDR)
  627. goto sync_out;
  628. }
  629. }
  630. if (flag == F2FS_GET_BLOCK_PRE_AIO)
  631. goto skip;
  632. if (map->m_len == 0) {
  633. /* preallocated unwritten block should be mapped for fiemap. */
  634. if (blkaddr == NEW_ADDR)
  635. map->m_flags |= F2FS_MAP_UNWRITTEN;
  636. map->m_flags |= F2FS_MAP_MAPPED;
  637. map->m_pblk = blkaddr;
  638. map->m_len = 1;
  639. } else if ((map->m_pblk != NEW_ADDR &&
  640. blkaddr == (map->m_pblk + ofs)) ||
  641. (map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) ||
  642. flag == F2FS_GET_BLOCK_PRE_DIO) {
  643. ofs++;
  644. map->m_len++;
  645. } else {
  646. goto sync_out;
  647. }
  648. skip:
  649. dn.ofs_in_node++;
  650. pgofs++;
  651. /* preallocate blocks in batch for one dnode page */
  652. if (flag == F2FS_GET_BLOCK_PRE_AIO &&
  653. (pgofs == end || dn.ofs_in_node == end_offset)) {
  654. dn.ofs_in_node = ofs_in_node;
  655. err = reserve_new_blocks(&dn, prealloc);
  656. if (err)
  657. goto sync_out;
  658. map->m_len += dn.ofs_in_node - ofs_in_node;
  659. if (prealloc && dn.ofs_in_node != last_ofs_in_node + 1) {
  660. err = -ENOSPC;
  661. goto sync_out;
  662. }
  663. dn.ofs_in_node = end_offset;
  664. }
  665. if (pgofs >= end)
  666. goto sync_out;
  667. else if (dn.ofs_in_node < end_offset)
  668. goto next_block;
  669. if (allocated)
  670. sync_inode_page(&dn);
  671. f2fs_put_dnode(&dn);
  672. if (create) {
  673. f2fs_unlock_op(sbi);
  674. f2fs_balance_fs(sbi, allocated);
  675. }
  676. allocated = false;
  677. goto next_dnode;
  678. sync_out:
  679. if (allocated)
  680. sync_inode_page(&dn);
  681. f2fs_put_dnode(&dn);
  682. unlock_out:
  683. if (create) {
  684. f2fs_unlock_op(sbi);
  685. f2fs_balance_fs(sbi, allocated);
  686. }
  687. out:
  688. trace_f2fs_map_blocks(inode, map, err);
  689. return err;
  690. }
  691. static int __get_data_block(struct inode *inode, sector_t iblock,
  692. struct buffer_head *bh, int create, int flag,
  693. pgoff_t *next_pgofs)
  694. {
  695. struct f2fs_map_blocks map;
  696. int ret;
  697. map.m_lblk = iblock;
  698. map.m_len = bh->b_size >> inode->i_blkbits;
  699. map.m_next_pgofs = next_pgofs;
  700. ret = f2fs_map_blocks(inode, &map, create, flag);
  701. if (!ret) {
  702. map_bh(bh, inode->i_sb, map.m_pblk);
  703. bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
  704. bh->b_size = map.m_len << inode->i_blkbits;
  705. }
  706. return ret;
  707. }
  708. static int get_data_block(struct inode *inode, sector_t iblock,
  709. struct buffer_head *bh_result, int create, int flag,
  710. pgoff_t *next_pgofs)
  711. {
  712. return __get_data_block(inode, iblock, bh_result, create,
  713. flag, next_pgofs);
  714. }
  715. static int get_data_block_dio(struct inode *inode, sector_t iblock,
  716. struct buffer_head *bh_result, int create)
  717. {
  718. return __get_data_block(inode, iblock, bh_result, create,
  719. F2FS_GET_BLOCK_DIO, NULL);
  720. }
  721. static int get_data_block_bmap(struct inode *inode, sector_t iblock,
  722. struct buffer_head *bh_result, int create)
  723. {
  724. /* Block number less than F2FS MAX BLOCKS */
  725. if (unlikely(iblock >= F2FS_I_SB(inode)->max_file_blocks))
  726. return -EFBIG;
  727. return __get_data_block(inode, iblock, bh_result, create,
  728. F2FS_GET_BLOCK_BMAP, NULL);
  729. }
  730. static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
  731. {
  732. return (offset >> inode->i_blkbits);
  733. }
  734. static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
  735. {
  736. return (blk << inode->i_blkbits);
  737. }
  738. int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
  739. u64 start, u64 len)
  740. {
  741. struct buffer_head map_bh;
  742. sector_t start_blk, last_blk;
  743. pgoff_t next_pgofs;
  744. loff_t isize;
  745. u64 logical = 0, phys = 0, size = 0;
  746. u32 flags = 0;
  747. int ret = 0;
  748. ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
  749. if (ret)
  750. return ret;
  751. if (f2fs_has_inline_data(inode)) {
  752. ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len);
  753. if (ret != -EAGAIN)
  754. return ret;
  755. }
  756. inode_lock(inode);
  757. isize = i_size_read(inode);
  758. if (start >= isize)
  759. goto out;
  760. if (start + len > isize)
  761. len = isize - start;
  762. if (logical_to_blk(inode, len) == 0)
  763. len = blk_to_logical(inode, 1);
  764. start_blk = logical_to_blk(inode, start);
  765. last_blk = logical_to_blk(inode, start + len - 1);
  766. next:
  767. memset(&map_bh, 0, sizeof(struct buffer_head));
  768. map_bh.b_size = len;
  769. ret = get_data_block(inode, start_blk, &map_bh, 0,
  770. F2FS_GET_BLOCK_FIEMAP, &next_pgofs);
  771. if (ret)
  772. goto out;
  773. /* HOLE */
  774. if (!buffer_mapped(&map_bh)) {
  775. start_blk = next_pgofs;
  776. /* Go through holes util pass the EOF */
  777. if (blk_to_logical(inode, start_blk) < isize)
  778. goto prep_next;
  779. /* Found a hole beyond isize means no more extents.
  780. * Note that the premise is that filesystems don't
  781. * punch holes beyond isize and keep size unchanged.
  782. */
  783. flags |= FIEMAP_EXTENT_LAST;
  784. }
  785. if (size) {
  786. if (f2fs_encrypted_inode(inode))
  787. flags |= FIEMAP_EXTENT_DATA_ENCRYPTED;
  788. ret = fiemap_fill_next_extent(fieinfo, logical,
  789. phys, size, flags);
  790. }
  791. if (start_blk > last_blk || ret)
  792. goto out;
  793. logical = blk_to_logical(inode, start_blk);
  794. phys = blk_to_logical(inode, map_bh.b_blocknr);
  795. size = map_bh.b_size;
  796. flags = 0;
  797. if (buffer_unwritten(&map_bh))
  798. flags = FIEMAP_EXTENT_UNWRITTEN;
  799. start_blk += logical_to_blk(inode, size);
  800. prep_next:
  801. cond_resched();
  802. if (fatal_signal_pending(current))
  803. ret = -EINTR;
  804. else
  805. goto next;
  806. out:
  807. if (ret == 1)
  808. ret = 0;
  809. inode_unlock(inode);
  810. return ret;
  811. }
  812. /*
  813. * This function was originally taken from fs/mpage.c, and customized for f2fs.
  814. * Major change was from block_size == page_size in f2fs by default.
  815. */
  816. static int f2fs_mpage_readpages(struct address_space *mapping,
  817. struct list_head *pages, struct page *page,
  818. unsigned nr_pages)
  819. {
  820. struct bio *bio = NULL;
  821. unsigned page_idx;
  822. sector_t last_block_in_bio = 0;
  823. struct inode *inode = mapping->host;
  824. const unsigned blkbits = inode->i_blkbits;
  825. const unsigned blocksize = 1 << blkbits;
  826. sector_t block_in_file;
  827. sector_t last_block;
  828. sector_t last_block_in_file;
  829. sector_t block_nr;
  830. struct block_device *bdev = inode->i_sb->s_bdev;
  831. struct f2fs_map_blocks map;
  832. map.m_pblk = 0;
  833. map.m_lblk = 0;
  834. map.m_len = 0;
  835. map.m_flags = 0;
  836. map.m_next_pgofs = NULL;
  837. for (page_idx = 0; nr_pages; page_idx++, nr_pages--) {
  838. prefetchw(&page->flags);
  839. if (pages) {
  840. page = list_entry(pages->prev, struct page, lru);
  841. list_del(&page->lru);
  842. if (add_to_page_cache_lru(page, mapping,
  843. page->index, GFP_KERNEL))
  844. goto next_page;
  845. }
  846. block_in_file = (sector_t)page->index;
  847. last_block = block_in_file + nr_pages;
  848. last_block_in_file = (i_size_read(inode) + blocksize - 1) >>
  849. blkbits;
  850. if (last_block > last_block_in_file)
  851. last_block = last_block_in_file;
  852. /*
  853. * Map blocks using the previous result first.
  854. */
  855. if ((map.m_flags & F2FS_MAP_MAPPED) &&
  856. block_in_file > map.m_lblk &&
  857. block_in_file < (map.m_lblk + map.m_len))
  858. goto got_it;
  859. /*
  860. * Then do more f2fs_map_blocks() calls until we are
  861. * done with this page.
  862. */
  863. map.m_flags = 0;
  864. if (block_in_file < last_block) {
  865. map.m_lblk = block_in_file;
  866. map.m_len = last_block - block_in_file;
  867. if (f2fs_map_blocks(inode, &map, 0,
  868. F2FS_GET_BLOCK_READ))
  869. goto set_error_page;
  870. }
  871. got_it:
  872. if ((map.m_flags & F2FS_MAP_MAPPED)) {
  873. block_nr = map.m_pblk + block_in_file - map.m_lblk;
  874. SetPageMappedToDisk(page);
  875. if (!PageUptodate(page) && !cleancache_get_page(page)) {
  876. SetPageUptodate(page);
  877. goto confused;
  878. }
  879. } else {
  880. zero_user_segment(page, 0, PAGE_SIZE);
  881. SetPageUptodate(page);
  882. unlock_page(page);
  883. goto next_page;
  884. }
  885. /*
  886. * This page will go to BIO. Do we need to send this
  887. * BIO off first?
  888. */
  889. if (bio && (last_block_in_bio != block_nr - 1)) {
  890. submit_and_realloc:
  891. __submit_bio(F2FS_I_SB(inode), READ, bio);
  892. bio = NULL;
  893. }
  894. if (bio == NULL) {
  895. struct fscrypt_ctx *ctx = NULL;
  896. if (f2fs_encrypted_inode(inode) &&
  897. S_ISREG(inode->i_mode)) {
  898. ctx = fscrypt_get_ctx(inode, GFP_NOFS);
  899. if (IS_ERR(ctx))
  900. goto set_error_page;
  901. /* wait the page to be moved by cleaning */
  902. f2fs_wait_on_encrypted_page_writeback(
  903. F2FS_I_SB(inode), block_nr);
  904. }
  905. bio = bio_alloc(GFP_KERNEL,
  906. min_t(int, nr_pages, BIO_MAX_PAGES));
  907. if (!bio) {
  908. if (ctx)
  909. fscrypt_release_ctx(ctx);
  910. goto set_error_page;
  911. }
  912. bio->bi_bdev = bdev;
  913. bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(block_nr);
  914. bio->bi_end_io = f2fs_read_end_io;
  915. bio->bi_private = ctx;
  916. }
  917. if (bio_add_page(bio, page, blocksize, 0) < blocksize)
  918. goto submit_and_realloc;
  919. last_block_in_bio = block_nr;
  920. goto next_page;
  921. set_error_page:
  922. SetPageError(page);
  923. zero_user_segment(page, 0, PAGE_SIZE);
  924. unlock_page(page);
  925. goto next_page;
  926. confused:
  927. if (bio) {
  928. __submit_bio(F2FS_I_SB(inode), READ, bio);
  929. bio = NULL;
  930. }
  931. unlock_page(page);
  932. next_page:
  933. if (pages)
  934. put_page(page);
  935. }
  936. BUG_ON(pages && !list_empty(pages));
  937. if (bio)
  938. __submit_bio(F2FS_I_SB(inode), READ, bio);
  939. return 0;
  940. }
  941. static int f2fs_read_data_page(struct file *file, struct page *page)
  942. {
  943. struct inode *inode = page->mapping->host;
  944. int ret = -EAGAIN;
  945. trace_f2fs_readpage(page, DATA);
  946. /* If the file has inline data, try to read it directly */
  947. if (f2fs_has_inline_data(inode))
  948. ret = f2fs_read_inline_data(inode, page);
  949. if (ret == -EAGAIN)
  950. ret = f2fs_mpage_readpages(page->mapping, NULL, page, 1);
  951. return ret;
  952. }
  953. static int f2fs_read_data_pages(struct file *file,
  954. struct address_space *mapping,
  955. struct list_head *pages, unsigned nr_pages)
  956. {
  957. struct inode *inode = file->f_mapping->host;
  958. struct page *page = list_entry(pages->prev, struct page, lru);
  959. trace_f2fs_readpages(inode, page, nr_pages);
  960. /* If the file has inline data, skip readpages */
  961. if (f2fs_has_inline_data(inode))
  962. return 0;
  963. return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages);
  964. }
  965. int do_write_data_page(struct f2fs_io_info *fio)
  966. {
  967. struct page *page = fio->page;
  968. struct inode *inode = page->mapping->host;
  969. struct dnode_of_data dn;
  970. int err = 0;
  971. set_new_dnode(&dn, inode, NULL, NULL, 0);
  972. err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
  973. if (err)
  974. return err;
  975. fio->old_blkaddr = dn.data_blkaddr;
  976. /* This page is already truncated */
  977. if (fio->old_blkaddr == NULL_ADDR) {
  978. ClearPageUptodate(page);
  979. goto out_writepage;
  980. }
  981. if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
  982. gfp_t gfp_flags = GFP_NOFS;
  983. /* wait for GCed encrypted page writeback */
  984. f2fs_wait_on_encrypted_page_writeback(F2FS_I_SB(inode),
  985. fio->old_blkaddr);
  986. retry_encrypt:
  987. fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page,
  988. gfp_flags);
  989. if (IS_ERR(fio->encrypted_page)) {
  990. err = PTR_ERR(fio->encrypted_page);
  991. if (err == -ENOMEM) {
  992. /* flush pending ios and wait for a while */
  993. f2fs_flush_merged_bios(F2FS_I_SB(inode));
  994. congestion_wait(BLK_RW_ASYNC, HZ/50);
  995. gfp_flags |= __GFP_NOFAIL;
  996. err = 0;
  997. goto retry_encrypt;
  998. }
  999. goto out_writepage;
  1000. }
  1001. }
  1002. set_page_writeback(page);
  1003. /*
  1004. * If current allocation needs SSR,
  1005. * it had better in-place writes for updated data.
  1006. */
  1007. if (unlikely(fio->old_blkaddr != NEW_ADDR &&
  1008. !is_cold_data(page) &&
  1009. !IS_ATOMIC_WRITTEN_PAGE(page) &&
  1010. need_inplace_update(inode))) {
  1011. rewrite_data_page(fio);
  1012. set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE);
  1013. trace_f2fs_do_write_data_page(page, IPU);
  1014. } else {
  1015. write_data_page(&dn, fio);
  1016. trace_f2fs_do_write_data_page(page, OPU);
  1017. set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
  1018. if (page->index == 0)
  1019. set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN);
  1020. }
  1021. out_writepage:
  1022. f2fs_put_dnode(&dn);
  1023. return err;
  1024. }
  1025. static int f2fs_write_data_page(struct page *page,
  1026. struct writeback_control *wbc)
  1027. {
  1028. struct inode *inode = page->mapping->host;
  1029. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  1030. loff_t i_size = i_size_read(inode);
  1031. const pgoff_t end_index = ((unsigned long long) i_size)
  1032. >> PAGE_SHIFT;
  1033. unsigned offset = 0;
  1034. bool need_balance_fs = false;
  1035. int err = 0;
  1036. struct f2fs_io_info fio = {
  1037. .sbi = sbi,
  1038. .type = DATA,
  1039. .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
  1040. .page = page,
  1041. .encrypted_page = NULL,
  1042. };
  1043. trace_f2fs_writepage(page, DATA);
  1044. if (page->index < end_index)
  1045. goto write;
  1046. /*
  1047. * If the offset is out-of-range of file size,
  1048. * this page does not have to be written to disk.
  1049. */
  1050. offset = i_size & (PAGE_SIZE - 1);
  1051. if ((page->index >= end_index + 1) || !offset)
  1052. goto out;
  1053. zero_user_segment(page, offset, PAGE_SIZE);
  1054. write:
  1055. if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
  1056. goto redirty_out;
  1057. if (f2fs_is_drop_cache(inode))
  1058. goto out;
  1059. /* we should not write 0'th page having journal header */
  1060. if (f2fs_is_volatile_file(inode) && (!page->index ||
  1061. (!wbc->for_reclaim &&
  1062. available_free_memory(sbi, BASE_CHECK))))
  1063. goto redirty_out;
  1064. /* Dentry blocks are controlled by checkpoint */
  1065. if (S_ISDIR(inode->i_mode)) {
  1066. if (unlikely(f2fs_cp_error(sbi)))
  1067. goto redirty_out;
  1068. err = do_write_data_page(&fio);
  1069. goto done;
  1070. }
  1071. /* we should bypass data pages to proceed the kworkder jobs */
  1072. if (unlikely(f2fs_cp_error(sbi))) {
  1073. SetPageError(page);
  1074. goto out;
  1075. }
  1076. if (!wbc->for_reclaim)
  1077. need_balance_fs = true;
  1078. else if (has_not_enough_free_secs(sbi, 0))
  1079. goto redirty_out;
  1080. err = -EAGAIN;
  1081. f2fs_lock_op(sbi);
  1082. if (f2fs_has_inline_data(inode))
  1083. err = f2fs_write_inline_data(inode, page);
  1084. if (err == -EAGAIN)
  1085. err = do_write_data_page(&fio);
  1086. f2fs_unlock_op(sbi);
  1087. done:
  1088. if (err && err != -ENOENT)
  1089. goto redirty_out;
  1090. clear_cold_data(page);
  1091. out:
  1092. inode_dec_dirty_pages(inode);
  1093. if (err)
  1094. ClearPageUptodate(page);
  1095. if (wbc->for_reclaim) {
  1096. f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, DATA, WRITE);
  1097. remove_dirty_inode(inode);
  1098. }
  1099. unlock_page(page);
  1100. f2fs_balance_fs(sbi, need_balance_fs);
  1101. if (unlikely(f2fs_cp_error(sbi)))
  1102. f2fs_submit_merged_bio(sbi, DATA, WRITE);
  1103. return 0;
  1104. redirty_out:
  1105. redirty_page_for_writepage(wbc, page);
  1106. return AOP_WRITEPAGE_ACTIVATE;
  1107. }
  1108. static int __f2fs_writepage(struct page *page, struct writeback_control *wbc,
  1109. void *data)
  1110. {
  1111. struct address_space *mapping = data;
  1112. int ret = mapping->a_ops->writepage(page, wbc);
  1113. mapping_set_error(mapping, ret);
  1114. return ret;
  1115. }
  1116. /*
  1117. * This function was copied from write_cche_pages from mm/page-writeback.c.
  1118. * The major change is making write step of cold data page separately from
  1119. * warm/hot data page.
  1120. */
  1121. static int f2fs_write_cache_pages(struct address_space *mapping,
  1122. struct writeback_control *wbc, writepage_t writepage,
  1123. void *data)
  1124. {
  1125. int ret = 0;
  1126. int done = 0;
  1127. struct pagevec pvec;
  1128. int nr_pages;
  1129. pgoff_t uninitialized_var(writeback_index);
  1130. pgoff_t index;
  1131. pgoff_t end; /* Inclusive */
  1132. pgoff_t done_index;
  1133. int cycled;
  1134. int range_whole = 0;
  1135. int tag;
  1136. int step = 0;
  1137. pagevec_init(&pvec, 0);
  1138. next:
  1139. if (wbc->range_cyclic) {
  1140. writeback_index = mapping->writeback_index; /* prev offset */
  1141. index = writeback_index;
  1142. if (index == 0)
  1143. cycled = 1;
  1144. else
  1145. cycled = 0;
  1146. end = -1;
  1147. } else {
  1148. index = wbc->range_start >> PAGE_SHIFT;
  1149. end = wbc->range_end >> PAGE_SHIFT;
  1150. if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
  1151. range_whole = 1;
  1152. cycled = 1; /* ignore range_cyclic tests */
  1153. }
  1154. if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
  1155. tag = PAGECACHE_TAG_TOWRITE;
  1156. else
  1157. tag = PAGECACHE_TAG_DIRTY;
  1158. retry:
  1159. if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
  1160. tag_pages_for_writeback(mapping, index, end);
  1161. done_index = index;
  1162. while (!done && (index <= end)) {
  1163. int i;
  1164. nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
  1165. min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1);
  1166. if (nr_pages == 0)
  1167. break;
  1168. for (i = 0; i < nr_pages; i++) {
  1169. struct page *page = pvec.pages[i];
  1170. if (page->index > end) {
  1171. done = 1;
  1172. break;
  1173. }
  1174. done_index = page->index;
  1175. lock_page(page);
  1176. if (unlikely(page->mapping != mapping)) {
  1177. continue_unlock:
  1178. unlock_page(page);
  1179. continue;
  1180. }
  1181. if (!PageDirty(page)) {
  1182. /* someone wrote it for us */
  1183. goto continue_unlock;
  1184. }
  1185. if (step == is_cold_data(page))
  1186. goto continue_unlock;
  1187. if (PageWriteback(page)) {
  1188. if (wbc->sync_mode != WB_SYNC_NONE)
  1189. f2fs_wait_on_page_writeback(page,
  1190. DATA, true);
  1191. else
  1192. goto continue_unlock;
  1193. }
  1194. BUG_ON(PageWriteback(page));
  1195. if (!clear_page_dirty_for_io(page))
  1196. goto continue_unlock;
  1197. ret = (*writepage)(page, wbc, data);
  1198. if (unlikely(ret)) {
  1199. if (ret == AOP_WRITEPAGE_ACTIVATE) {
  1200. unlock_page(page);
  1201. ret = 0;
  1202. } else {
  1203. done_index = page->index + 1;
  1204. done = 1;
  1205. break;
  1206. }
  1207. }
  1208. if (--wbc->nr_to_write <= 0 &&
  1209. wbc->sync_mode == WB_SYNC_NONE) {
  1210. done = 1;
  1211. break;
  1212. }
  1213. }
  1214. pagevec_release(&pvec);
  1215. cond_resched();
  1216. }
  1217. if (step < 1) {
  1218. step++;
  1219. goto next;
  1220. }
  1221. if (!cycled && !done) {
  1222. cycled = 1;
  1223. index = 0;
  1224. end = writeback_index - 1;
  1225. goto retry;
  1226. }
  1227. if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
  1228. mapping->writeback_index = done_index;
  1229. return ret;
  1230. }
  1231. static int f2fs_write_data_pages(struct address_space *mapping,
  1232. struct writeback_control *wbc)
  1233. {
  1234. struct inode *inode = mapping->host;
  1235. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  1236. bool locked = false;
  1237. int ret;
  1238. long diff;
  1239. /* deal with chardevs and other special file */
  1240. if (!mapping->a_ops->writepage)
  1241. return 0;
  1242. /* skip writing if there is no dirty page in this inode */
  1243. if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE)
  1244. return 0;
  1245. if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
  1246. get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
  1247. available_free_memory(sbi, DIRTY_DENTS))
  1248. goto skip_write;
  1249. /* skip writing during file defragment */
  1250. if (is_inode_flag_set(F2FS_I(inode), FI_DO_DEFRAG))
  1251. goto skip_write;
  1252. /* during POR, we don't need to trigger writepage at all. */
  1253. if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
  1254. goto skip_write;
  1255. trace_f2fs_writepages(mapping->host, wbc, DATA);
  1256. diff = nr_pages_to_write(sbi, DATA, wbc);
  1257. if (!S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_ALL) {
  1258. mutex_lock(&sbi->writepages);
  1259. locked = true;
  1260. }
  1261. ret = f2fs_write_cache_pages(mapping, wbc, __f2fs_writepage, mapping);
  1262. f2fs_submit_merged_bio_cond(sbi, inode, NULL, 0, DATA, WRITE);
  1263. if (locked)
  1264. mutex_unlock(&sbi->writepages);
  1265. remove_dirty_inode(inode);
  1266. wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
  1267. return ret;
  1268. skip_write:
  1269. wbc->pages_skipped += get_dirty_pages(inode);
  1270. trace_f2fs_writepages(mapping->host, wbc, DATA);
  1271. return 0;
  1272. }
  1273. static void f2fs_write_failed(struct address_space *mapping, loff_t to)
  1274. {
  1275. struct inode *inode = mapping->host;
  1276. loff_t i_size = i_size_read(inode);
  1277. if (to > i_size) {
  1278. truncate_pagecache(inode, i_size);
  1279. truncate_blocks(inode, i_size, true);
  1280. }
  1281. }
  1282. static int prepare_write_begin(struct f2fs_sb_info *sbi,
  1283. struct page *page, loff_t pos, unsigned len,
  1284. block_t *blk_addr, bool *node_changed)
  1285. {
  1286. struct inode *inode = page->mapping->host;
  1287. pgoff_t index = page->index;
  1288. struct dnode_of_data dn;
  1289. struct page *ipage;
  1290. bool locked = false;
  1291. struct extent_info ei;
  1292. int err = 0;
  1293. /*
  1294. * we already allocated all the blocks, so we don't need to get
  1295. * the block addresses when there is no need to fill the page.
  1296. */
  1297. if (!f2fs_has_inline_data(inode) && !f2fs_encrypted_inode(inode) &&
  1298. len == PAGE_SIZE)
  1299. return 0;
  1300. if (f2fs_has_inline_data(inode) ||
  1301. (pos & PAGE_MASK) >= i_size_read(inode)) {
  1302. f2fs_lock_op(sbi);
  1303. locked = true;
  1304. }
  1305. restart:
  1306. /* check inline_data */
  1307. ipage = get_node_page(sbi, inode->i_ino);
  1308. if (IS_ERR(ipage)) {
  1309. err = PTR_ERR(ipage);
  1310. goto unlock_out;
  1311. }
  1312. set_new_dnode(&dn, inode, ipage, ipage, 0);
  1313. if (f2fs_has_inline_data(inode)) {
  1314. if (pos + len <= MAX_INLINE_DATA) {
  1315. read_inline_data(page, ipage);
  1316. set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
  1317. if (inode->i_nlink)
  1318. set_inline_node(ipage);
  1319. } else {
  1320. err = f2fs_convert_inline_page(&dn, page);
  1321. if (err)
  1322. goto out;
  1323. if (dn.data_blkaddr == NULL_ADDR)
  1324. err = f2fs_get_block(&dn, index);
  1325. }
  1326. } else if (locked) {
  1327. err = f2fs_get_block(&dn, index);
  1328. } else {
  1329. if (f2fs_lookup_extent_cache(inode, index, &ei)) {
  1330. dn.data_blkaddr = ei.blk + index - ei.fofs;
  1331. } else {
  1332. /* hole case */
  1333. err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
  1334. if (err || dn.data_blkaddr == NULL_ADDR) {
  1335. f2fs_put_dnode(&dn);
  1336. f2fs_lock_op(sbi);
  1337. locked = true;
  1338. goto restart;
  1339. }
  1340. }
  1341. }
  1342. /* convert_inline_page can make node_changed */
  1343. *blk_addr = dn.data_blkaddr;
  1344. *node_changed = dn.node_changed;
  1345. out:
  1346. f2fs_put_dnode(&dn);
  1347. unlock_out:
  1348. if (locked)
  1349. f2fs_unlock_op(sbi);
  1350. return err;
  1351. }
  1352. static int f2fs_write_begin(struct file *file, struct address_space *mapping,
  1353. loff_t pos, unsigned len, unsigned flags,
  1354. struct page **pagep, void **fsdata)
  1355. {
  1356. struct inode *inode = mapping->host;
  1357. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  1358. struct page *page = NULL;
  1359. pgoff_t index = ((unsigned long long) pos) >> PAGE_SHIFT;
  1360. bool need_balance = false;
  1361. block_t blkaddr = NULL_ADDR;
  1362. int err = 0;
  1363. trace_f2fs_write_begin(inode, pos, len, flags);
  1364. /*
  1365. * We should check this at this moment to avoid deadlock on inode page
  1366. * and #0 page. The locking rule for inline_data conversion should be:
  1367. * lock_page(page #0) -> lock_page(inode_page)
  1368. */
  1369. if (index != 0) {
  1370. err = f2fs_convert_inline_inode(inode);
  1371. if (err)
  1372. goto fail;
  1373. }
  1374. repeat:
  1375. page = grab_cache_page_write_begin(mapping, index, flags);
  1376. if (!page) {
  1377. err = -ENOMEM;
  1378. goto fail;
  1379. }
  1380. *pagep = page;
  1381. err = prepare_write_begin(sbi, page, pos, len,
  1382. &blkaddr, &need_balance);
  1383. if (err)
  1384. goto fail;
  1385. if (need_balance && has_not_enough_free_secs(sbi, 0)) {
  1386. unlock_page(page);
  1387. f2fs_balance_fs(sbi, true);
  1388. lock_page(page);
  1389. if (page->mapping != mapping) {
  1390. /* The page got truncated from under us */
  1391. f2fs_put_page(page, 1);
  1392. goto repeat;
  1393. }
  1394. }
  1395. f2fs_wait_on_page_writeback(page, DATA, false);
  1396. /* wait for GCed encrypted page writeback */
  1397. if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
  1398. f2fs_wait_on_encrypted_page_writeback(sbi, blkaddr);
  1399. if (len == PAGE_SIZE)
  1400. goto out_update;
  1401. if (PageUptodate(page))
  1402. goto out_clear;
  1403. if ((pos & PAGE_MASK) >= i_size_read(inode)) {
  1404. unsigned start = pos & (PAGE_SIZE - 1);
  1405. unsigned end = start + len;
  1406. /* Reading beyond i_size is simple: memset to zero */
  1407. zero_user_segments(page, 0, start, end, PAGE_SIZE);
  1408. goto out_update;
  1409. }
  1410. if (blkaddr == NEW_ADDR) {
  1411. zero_user_segment(page, 0, PAGE_SIZE);
  1412. } else {
  1413. struct f2fs_io_info fio = {
  1414. .sbi = sbi,
  1415. .type = DATA,
  1416. .rw = READ_SYNC,
  1417. .old_blkaddr = blkaddr,
  1418. .new_blkaddr = blkaddr,
  1419. .page = page,
  1420. .encrypted_page = NULL,
  1421. };
  1422. err = f2fs_submit_page_bio(&fio);
  1423. if (err)
  1424. goto fail;
  1425. lock_page(page);
  1426. if (unlikely(!PageUptodate(page))) {
  1427. err = -EIO;
  1428. goto fail;
  1429. }
  1430. if (unlikely(page->mapping != mapping)) {
  1431. f2fs_put_page(page, 1);
  1432. goto repeat;
  1433. }
  1434. /* avoid symlink page */
  1435. if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
  1436. err = fscrypt_decrypt_page(page);
  1437. if (err)
  1438. goto fail;
  1439. }
  1440. }
  1441. out_update:
  1442. SetPageUptodate(page);
  1443. out_clear:
  1444. clear_cold_data(page);
  1445. return 0;
  1446. fail:
  1447. f2fs_put_page(page, 1);
  1448. f2fs_write_failed(mapping, pos + len);
  1449. return err;
  1450. }
  1451. static int f2fs_write_end(struct file *file,
  1452. struct address_space *mapping,
  1453. loff_t pos, unsigned len, unsigned copied,
  1454. struct page *page, void *fsdata)
  1455. {
  1456. struct inode *inode = page->mapping->host;
  1457. trace_f2fs_write_end(inode, pos, len, copied);
  1458. set_page_dirty(page);
  1459. if (pos + copied > i_size_read(inode)) {
  1460. i_size_write(inode, pos + copied);
  1461. mark_inode_dirty(inode);
  1462. }
  1463. f2fs_put_page(page, 1);
  1464. f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
  1465. return copied;
  1466. }
  1467. static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
  1468. loff_t offset)
  1469. {
  1470. unsigned blocksize_mask = inode->i_sb->s_blocksize - 1;
  1471. if (offset & blocksize_mask)
  1472. return -EINVAL;
  1473. if (iov_iter_alignment(iter) & blocksize_mask)
  1474. return -EINVAL;
  1475. return 0;
  1476. }
  1477. static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
  1478. {
  1479. struct address_space *mapping = iocb->ki_filp->f_mapping;
  1480. struct inode *inode = mapping->host;
  1481. size_t count = iov_iter_count(iter);
  1482. loff_t offset = iocb->ki_pos;
  1483. int err;
  1484. err = check_direct_IO(inode, iter, offset);
  1485. if (err)
  1486. return err;
  1487. if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
  1488. return 0;
  1489. trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
  1490. err = blockdev_direct_IO(iocb, inode, iter, get_data_block_dio);
  1491. if (iov_iter_rw(iter) == WRITE) {
  1492. if (err > 0)
  1493. set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE);
  1494. else if (err < 0)
  1495. f2fs_write_failed(mapping, offset + count);
  1496. }
  1497. trace_f2fs_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), err);
  1498. return err;
  1499. }
  1500. void f2fs_invalidate_page(struct page *page, unsigned int offset,
  1501. unsigned int length)
  1502. {
  1503. struct inode *inode = page->mapping->host;
  1504. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  1505. if (inode->i_ino >= F2FS_ROOT_INO(sbi) &&
  1506. (offset % PAGE_SIZE || length != PAGE_SIZE))
  1507. return;
  1508. if (PageDirty(page)) {
  1509. if (inode->i_ino == F2FS_META_INO(sbi))
  1510. dec_page_count(sbi, F2FS_DIRTY_META);
  1511. else if (inode->i_ino == F2FS_NODE_INO(sbi))
  1512. dec_page_count(sbi, F2FS_DIRTY_NODES);
  1513. else
  1514. inode_dec_dirty_pages(inode);
  1515. }
  1516. /* This is atomic written page, keep Private */
  1517. if (IS_ATOMIC_WRITTEN_PAGE(page))
  1518. return;
  1519. set_page_private(page, 0);
  1520. ClearPagePrivate(page);
  1521. }
  1522. int f2fs_release_page(struct page *page, gfp_t wait)
  1523. {
  1524. /* If this is dirty page, keep PagePrivate */
  1525. if (PageDirty(page))
  1526. return 0;
  1527. /* This is atomic written page, keep Private */
  1528. if (IS_ATOMIC_WRITTEN_PAGE(page))
  1529. return 0;
  1530. set_page_private(page, 0);
  1531. ClearPagePrivate(page);
  1532. return 1;
  1533. }
  1534. static int f2fs_set_data_page_dirty(struct page *page)
  1535. {
  1536. struct address_space *mapping = page->mapping;
  1537. struct inode *inode = mapping->host;
  1538. trace_f2fs_set_page_dirty(page, DATA);
  1539. SetPageUptodate(page);
  1540. if (f2fs_is_atomic_file(inode)) {
  1541. if (!IS_ATOMIC_WRITTEN_PAGE(page)) {
  1542. register_inmem_page(inode, page);
  1543. return 1;
  1544. }
  1545. /*
  1546. * Previously, this page has been registered, we just
  1547. * return here.
  1548. */
  1549. return 0;
  1550. }
  1551. if (!PageDirty(page)) {
  1552. __set_page_dirty_nobuffers(page);
  1553. update_dirty_page(inode, page);
  1554. return 1;
  1555. }
  1556. return 0;
  1557. }
  1558. static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
  1559. {
  1560. struct inode *inode = mapping->host;
  1561. if (f2fs_has_inline_data(inode))
  1562. return 0;
  1563. /* make sure allocating whole blocks */
  1564. if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
  1565. filemap_write_and_wait(mapping);
  1566. return generic_block_bmap(mapping, block, get_data_block_bmap);
  1567. }
  1568. const struct address_space_operations f2fs_dblock_aops = {
  1569. .readpage = f2fs_read_data_page,
  1570. .readpages = f2fs_read_data_pages,
  1571. .writepage = f2fs_write_data_page,
  1572. .writepages = f2fs_write_data_pages,
  1573. .write_begin = f2fs_write_begin,
  1574. .write_end = f2fs_write_end,
  1575. .set_page_dirty = f2fs_set_data_page_dirty,
  1576. .invalidatepage = f2fs_invalidate_page,
  1577. .releasepage = f2fs_release_page,
  1578. .direct_IO = f2fs_direct_IO,
  1579. .bmap = f2fs_bmap,
  1580. };