data.c 40 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751
  1. /*
  2. * fs/f2fs/data.c
  3. *
  4. * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  5. * http://www.samsung.com/
  6. *
  7. * This program is free software; you can redistribute it and/or modify
  8. * it under the terms of the GNU General Public License version 2 as
  9. * published by the Free Software Foundation.
  10. */
  11. #include <linux/fs.h>
  12. #include <linux/f2fs_fs.h>
  13. #include <linux/buffer_head.h>
  14. #include <linux/mpage.h>
  15. #include <linux/writeback.h>
  16. #include <linux/backing-dev.h>
  17. #include <linux/pagevec.h>
  18. #include <linux/blkdev.h>
  19. #include <linux/bio.h>
  20. #include <linux/prefetch.h>
  21. #include <linux/uio.h>
  22. #include <linux/cleancache.h>
  23. #include "f2fs.h"
  24. #include "node.h"
  25. #include "segment.h"
  26. #include "trace.h"
  27. #include <trace/events/f2fs.h>
  28. static void f2fs_read_end_io(struct bio *bio)
  29. {
  30. struct bio_vec *bvec;
  31. int i;
  32. if (f2fs_bio_encrypted(bio)) {
  33. if (bio->bi_error) {
  34. f2fs_release_crypto_ctx(bio->bi_private);
  35. } else {
  36. f2fs_end_io_crypto_work(bio->bi_private, bio);
  37. return;
  38. }
  39. }
  40. bio_for_each_segment_all(bvec, bio, i) {
  41. struct page *page = bvec->bv_page;
  42. if (!bio->bi_error) {
  43. SetPageUptodate(page);
  44. } else {
  45. ClearPageUptodate(page);
  46. SetPageError(page);
  47. }
  48. unlock_page(page);
  49. }
  50. bio_put(bio);
  51. }
  52. static void f2fs_write_end_io(struct bio *bio)
  53. {
  54. struct f2fs_sb_info *sbi = bio->bi_private;
  55. struct bio_vec *bvec;
  56. int i;
  57. bio_for_each_segment_all(bvec, bio, i) {
  58. struct page *page = bvec->bv_page;
  59. f2fs_restore_and_release_control_page(&page);
  60. if (unlikely(bio->bi_error)) {
  61. set_page_dirty(page);
  62. set_bit(AS_EIO, &page->mapping->flags);
  63. f2fs_stop_checkpoint(sbi);
  64. }
  65. end_page_writeback(page);
  66. dec_page_count(sbi, F2FS_WRITEBACK);
  67. }
  68. if (!get_pages(sbi, F2FS_WRITEBACK) &&
  69. !list_empty(&sbi->cp_wait.task_list))
  70. wake_up(&sbi->cp_wait);
  71. bio_put(bio);
  72. }
  73. /*
  74. * Low-level block read/write IO operations.
  75. */
  76. static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
  77. int npages, bool is_read)
  78. {
  79. struct bio *bio;
  80. bio = f2fs_bio_alloc(npages);
  81. bio->bi_bdev = sbi->sb->s_bdev;
  82. bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
  83. bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
  84. bio->bi_private = is_read ? NULL : sbi;
  85. return bio;
  86. }
  87. static void __submit_merged_bio(struct f2fs_bio_info *io)
  88. {
  89. struct f2fs_io_info *fio = &io->fio;
  90. if (!io->bio)
  91. return;
  92. if (is_read_io(fio->rw))
  93. trace_f2fs_submit_read_bio(io->sbi->sb, fio, io->bio);
  94. else
  95. trace_f2fs_submit_write_bio(io->sbi->sb, fio, io->bio);
  96. submit_bio(fio->rw, io->bio);
  97. io->bio = NULL;
  98. }
  99. void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
  100. enum page_type type, int rw)
  101. {
  102. enum page_type btype = PAGE_TYPE_OF_BIO(type);
  103. struct f2fs_bio_info *io;
  104. io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype];
  105. down_write(&io->io_rwsem);
  106. /* change META to META_FLUSH in the checkpoint procedure */
  107. if (type >= META_FLUSH) {
  108. io->fio.type = META_FLUSH;
  109. if (test_opt(sbi, NOBARRIER))
  110. io->fio.rw = WRITE_FLUSH | REQ_META | REQ_PRIO;
  111. else
  112. io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
  113. }
  114. __submit_merged_bio(io);
  115. up_write(&io->io_rwsem);
  116. }
  117. /*
  118. * Fill the locked page with data located in the block address.
  119. * Return unlocked page.
  120. */
  121. int f2fs_submit_page_bio(struct f2fs_io_info *fio)
  122. {
  123. struct bio *bio;
  124. struct page *page = fio->encrypted_page ? fio->encrypted_page : fio->page;
  125. trace_f2fs_submit_page_bio(page, fio);
  126. f2fs_trace_ios(fio, 0);
  127. /* Allocate a new bio */
  128. bio = __bio_alloc(fio->sbi, fio->blk_addr, 1, is_read_io(fio->rw));
  129. if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) {
  130. bio_put(bio);
  131. return -EFAULT;
  132. }
  133. submit_bio(fio->rw, bio);
  134. return 0;
  135. }
  136. void f2fs_submit_page_mbio(struct f2fs_io_info *fio)
  137. {
  138. struct f2fs_sb_info *sbi = fio->sbi;
  139. enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
  140. struct f2fs_bio_info *io;
  141. bool is_read = is_read_io(fio->rw);
  142. struct page *bio_page;
  143. io = is_read ? &sbi->read_io : &sbi->write_io[btype];
  144. verify_block_addr(sbi, fio->blk_addr);
  145. down_write(&io->io_rwsem);
  146. if (!is_read)
  147. inc_page_count(sbi, F2FS_WRITEBACK);
  148. if (io->bio && (io->last_block_in_bio != fio->blk_addr - 1 ||
  149. io->fio.rw != fio->rw))
  150. __submit_merged_bio(io);
  151. alloc_new:
  152. if (io->bio == NULL) {
  153. int bio_blocks = MAX_BIO_BLOCKS(sbi);
  154. io->bio = __bio_alloc(sbi, fio->blk_addr, bio_blocks, is_read);
  155. io->fio = *fio;
  156. }
  157. bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;
  158. if (bio_add_page(io->bio, bio_page, PAGE_CACHE_SIZE, 0) <
  159. PAGE_CACHE_SIZE) {
  160. __submit_merged_bio(io);
  161. goto alloc_new;
  162. }
  163. io->last_block_in_bio = fio->blk_addr;
  164. f2fs_trace_ios(fio, 0);
  165. up_write(&io->io_rwsem);
  166. trace_f2fs_submit_page_mbio(fio->page, fio);
  167. }
  168. /*
  169. * Lock ordering for the change of data block address:
  170. * ->data_page
  171. * ->node_page
  172. * update block addresses in the node page
  173. */
  174. void set_data_blkaddr(struct dnode_of_data *dn)
  175. {
  176. struct f2fs_node *rn;
  177. __le32 *addr_array;
  178. struct page *node_page = dn->node_page;
  179. unsigned int ofs_in_node = dn->ofs_in_node;
  180. f2fs_wait_on_page_writeback(node_page, NODE);
  181. rn = F2FS_NODE(node_page);
  182. /* Get physical address of data block */
  183. addr_array = blkaddr_in_node(rn);
  184. addr_array[ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
  185. if (set_page_dirty(node_page))
  186. dn->node_changed = true;
  187. }
  188. int reserve_new_block(struct dnode_of_data *dn)
  189. {
  190. struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
  191. if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
  192. return -EPERM;
  193. if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
  194. return -ENOSPC;
  195. trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node);
  196. dn->data_blkaddr = NEW_ADDR;
  197. set_data_blkaddr(dn);
  198. mark_inode_dirty(dn->inode);
  199. sync_inode_page(dn);
  200. return 0;
  201. }
  202. int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
  203. {
  204. bool need_put = dn->inode_page ? false : true;
  205. int err;
  206. err = get_dnode_of_data(dn, index, ALLOC_NODE);
  207. if (err)
  208. return err;
  209. if (dn->data_blkaddr == NULL_ADDR)
  210. err = reserve_new_block(dn);
  211. if (err || need_put)
  212. f2fs_put_dnode(dn);
  213. return err;
  214. }
  215. int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index)
  216. {
  217. struct extent_info ei;
  218. struct inode *inode = dn->inode;
  219. if (f2fs_lookup_extent_cache(inode, index, &ei)) {
  220. dn->data_blkaddr = ei.blk + index - ei.fofs;
  221. return 0;
  222. }
  223. return f2fs_reserve_block(dn, index);
  224. }
  225. struct page *get_read_data_page(struct inode *inode, pgoff_t index,
  226. int rw, bool for_write)
  227. {
  228. struct address_space *mapping = inode->i_mapping;
  229. struct dnode_of_data dn;
  230. struct page *page;
  231. struct extent_info ei;
  232. int err;
  233. struct f2fs_io_info fio = {
  234. .sbi = F2FS_I_SB(inode),
  235. .type = DATA,
  236. .rw = rw,
  237. .encrypted_page = NULL,
  238. };
  239. if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
  240. return read_mapping_page(mapping, index, NULL);
  241. page = f2fs_grab_cache_page(mapping, index, for_write);
  242. if (!page)
  243. return ERR_PTR(-ENOMEM);
  244. if (f2fs_lookup_extent_cache(inode, index, &ei)) {
  245. dn.data_blkaddr = ei.blk + index - ei.fofs;
  246. goto got_it;
  247. }
  248. set_new_dnode(&dn, inode, NULL, NULL, 0);
  249. err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
  250. if (err)
  251. goto put_err;
  252. f2fs_put_dnode(&dn);
  253. if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
  254. err = -ENOENT;
  255. goto put_err;
  256. }
  257. got_it:
  258. if (PageUptodate(page)) {
  259. unlock_page(page);
  260. return page;
  261. }
  262. /*
  263. * A new dentry page is allocated but not able to be written, since its
  264. * new inode page couldn't be allocated due to -ENOSPC.
  265. * In such the case, its blkaddr can be remained as NEW_ADDR.
  266. * see, f2fs_add_link -> get_new_data_page -> init_inode_metadata.
  267. */
  268. if (dn.data_blkaddr == NEW_ADDR) {
  269. zero_user_segment(page, 0, PAGE_CACHE_SIZE);
  270. SetPageUptodate(page);
  271. unlock_page(page);
  272. return page;
  273. }
  274. fio.blk_addr = dn.data_blkaddr;
  275. fio.page = page;
  276. err = f2fs_submit_page_bio(&fio);
  277. if (err)
  278. goto put_err;
  279. return page;
  280. put_err:
  281. f2fs_put_page(page, 1);
  282. return ERR_PTR(err);
  283. }
  284. struct page *find_data_page(struct inode *inode, pgoff_t index)
  285. {
  286. struct address_space *mapping = inode->i_mapping;
  287. struct page *page;
  288. page = find_get_page(mapping, index);
  289. if (page && PageUptodate(page))
  290. return page;
  291. f2fs_put_page(page, 0);
  292. page = get_read_data_page(inode, index, READ_SYNC, false);
  293. if (IS_ERR(page))
  294. return page;
  295. if (PageUptodate(page))
  296. return page;
  297. wait_on_page_locked(page);
  298. if (unlikely(!PageUptodate(page))) {
  299. f2fs_put_page(page, 0);
  300. return ERR_PTR(-EIO);
  301. }
  302. return page;
  303. }
  304. /*
  305. * If it tries to access a hole, return an error.
  306. * Because, the callers, functions in dir.c and GC, should be able to know
  307. * whether this page exists or not.
  308. */
  309. struct page *get_lock_data_page(struct inode *inode, pgoff_t index,
  310. bool for_write)
  311. {
  312. struct address_space *mapping = inode->i_mapping;
  313. struct page *page;
  314. repeat:
  315. page = get_read_data_page(inode, index, READ_SYNC, for_write);
  316. if (IS_ERR(page))
  317. return page;
  318. /* wait for read completion */
  319. lock_page(page);
  320. if (unlikely(!PageUptodate(page))) {
  321. f2fs_put_page(page, 1);
  322. return ERR_PTR(-EIO);
  323. }
  324. if (unlikely(page->mapping != mapping)) {
  325. f2fs_put_page(page, 1);
  326. goto repeat;
  327. }
  328. return page;
  329. }
  330. /*
  331. * Caller ensures that this data page is never allocated.
  332. * A new zero-filled data page is allocated in the page cache.
  333. *
  334. * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
  335. * f2fs_unlock_op().
  336. * Note that, ipage is set only by make_empty_dir, and if any error occur,
  337. * ipage should be released by this function.
  338. */
  339. struct page *get_new_data_page(struct inode *inode,
  340. struct page *ipage, pgoff_t index, bool new_i_size)
  341. {
  342. struct address_space *mapping = inode->i_mapping;
  343. struct page *page;
  344. struct dnode_of_data dn;
  345. int err;
  346. page = f2fs_grab_cache_page(mapping, index, true);
  347. if (!page) {
  348. /*
  349. * before exiting, we should make sure ipage will be released
  350. * if any error occur.
  351. */
  352. f2fs_put_page(ipage, 1);
  353. return ERR_PTR(-ENOMEM);
  354. }
  355. set_new_dnode(&dn, inode, ipage, NULL, 0);
  356. err = f2fs_reserve_block(&dn, index);
  357. if (err) {
  358. f2fs_put_page(page, 1);
  359. return ERR_PTR(err);
  360. }
  361. if (!ipage)
  362. f2fs_put_dnode(&dn);
  363. if (PageUptodate(page))
  364. goto got_it;
  365. if (dn.data_blkaddr == NEW_ADDR) {
  366. zero_user_segment(page, 0, PAGE_CACHE_SIZE);
  367. SetPageUptodate(page);
  368. } else {
  369. f2fs_put_page(page, 1);
  370. /* if ipage exists, blkaddr should be NEW_ADDR */
  371. f2fs_bug_on(F2FS_I_SB(inode), ipage);
  372. page = get_lock_data_page(inode, index, true);
  373. if (IS_ERR(page))
  374. return page;
  375. }
  376. got_it:
  377. if (new_i_size && i_size_read(inode) <
  378. ((loff_t)(index + 1) << PAGE_CACHE_SHIFT)) {
  379. i_size_write(inode, ((loff_t)(index + 1) << PAGE_CACHE_SHIFT));
  380. /* Only the directory inode sets new_i_size */
  381. set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR);
  382. }
  383. return page;
  384. }
  385. static int __allocate_data_block(struct dnode_of_data *dn)
  386. {
  387. struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
  388. struct f2fs_inode_info *fi = F2FS_I(dn->inode);
  389. struct f2fs_summary sum;
  390. struct node_info ni;
  391. int seg = CURSEG_WARM_DATA;
  392. pgoff_t fofs;
  393. if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
  394. return -EPERM;
  395. dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node);
  396. if (dn->data_blkaddr == NEW_ADDR)
  397. goto alloc;
  398. if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
  399. return -ENOSPC;
  400. alloc:
  401. get_node_info(sbi, dn->nid, &ni);
  402. set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
  403. if (dn->ofs_in_node == 0 && dn->inode_page == dn->node_page)
  404. seg = CURSEG_DIRECT_IO;
  405. allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr,
  406. &sum, seg);
  407. set_data_blkaddr(dn);
  408. /* update i_size */
  409. fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
  410. dn->ofs_in_node;
  411. if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_CACHE_SHIFT))
  412. i_size_write(dn->inode,
  413. ((loff_t)(fofs + 1) << PAGE_CACHE_SHIFT));
  414. return 0;
  415. }
  416. static int __allocate_data_blocks(struct inode *inode, loff_t offset,
  417. size_t count)
  418. {
  419. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  420. struct dnode_of_data dn;
  421. u64 start = F2FS_BYTES_TO_BLK(offset);
  422. u64 len = F2FS_BYTES_TO_BLK(count);
  423. bool allocated;
  424. u64 end_offset;
  425. int err = 0;
  426. while (len) {
  427. f2fs_lock_op(sbi);
  428. /* When reading holes, we need its node page */
  429. set_new_dnode(&dn, inode, NULL, NULL, 0);
  430. err = get_dnode_of_data(&dn, start, ALLOC_NODE);
  431. if (err)
  432. goto out;
  433. allocated = false;
  434. end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
  435. while (dn.ofs_in_node < end_offset && len) {
  436. block_t blkaddr;
  437. if (unlikely(f2fs_cp_error(sbi))) {
  438. err = -EIO;
  439. goto sync_out;
  440. }
  441. blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
  442. if (blkaddr == NULL_ADDR || blkaddr == NEW_ADDR) {
  443. err = __allocate_data_block(&dn);
  444. if (err)
  445. goto sync_out;
  446. allocated = true;
  447. }
  448. len--;
  449. start++;
  450. dn.ofs_in_node++;
  451. }
  452. if (allocated)
  453. sync_inode_page(&dn);
  454. f2fs_put_dnode(&dn);
  455. f2fs_unlock_op(sbi);
  456. f2fs_balance_fs(sbi, dn.node_changed);
  457. }
  458. return err;
  459. sync_out:
  460. if (allocated)
  461. sync_inode_page(&dn);
  462. f2fs_put_dnode(&dn);
  463. out:
  464. f2fs_unlock_op(sbi);
  465. f2fs_balance_fs(sbi, dn.node_changed);
  466. return err;
  467. }
  468. /*
  469. * f2fs_map_blocks() now supported readahead/bmap/rw direct_IO with
  470. * f2fs_map_blocks structure.
  471. * If original data blocks are allocated, then give them to blockdev.
  472. * Otherwise,
  473. * a. preallocate requested block addresses
  474. * b. do not use extent cache for better performance
  475. * c. give the block addresses to blockdev
  476. */
  477. int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
  478. int create, int flag)
  479. {
  480. unsigned int maxblocks = map->m_len;
  481. struct dnode_of_data dn;
  482. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  483. int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA;
  484. pgoff_t pgofs, end_offset;
  485. int err = 0, ofs = 1;
  486. struct extent_info ei;
  487. bool allocated = false;
  488. block_t blkaddr;
  489. map->m_len = 0;
  490. map->m_flags = 0;
  491. /* it only supports block size == page size */
  492. pgofs = (pgoff_t)map->m_lblk;
  493. if (f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
  494. map->m_pblk = ei.blk + pgofs - ei.fofs;
  495. map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs);
  496. map->m_flags = F2FS_MAP_MAPPED;
  497. goto out;
  498. }
  499. if (create)
  500. f2fs_lock_op(sbi);
  501. /* When reading holes, we need its node page */
  502. set_new_dnode(&dn, inode, NULL, NULL, 0);
  503. err = get_dnode_of_data(&dn, pgofs, mode);
  504. if (err) {
  505. if (err == -ENOENT)
  506. err = 0;
  507. goto unlock_out;
  508. }
  509. if (dn.data_blkaddr == NEW_ADDR || dn.data_blkaddr == NULL_ADDR) {
  510. if (create) {
  511. if (unlikely(f2fs_cp_error(sbi))) {
  512. err = -EIO;
  513. goto put_out;
  514. }
  515. err = __allocate_data_block(&dn);
  516. if (err)
  517. goto put_out;
  518. allocated = true;
  519. map->m_flags = F2FS_MAP_NEW;
  520. } else {
  521. if (flag != F2FS_GET_BLOCK_FIEMAP ||
  522. dn.data_blkaddr != NEW_ADDR) {
  523. if (flag == F2FS_GET_BLOCK_BMAP)
  524. err = -ENOENT;
  525. goto put_out;
  526. }
  527. /*
  528. * preallocated unwritten block should be mapped
  529. * for fiemap.
  530. */
  531. if (dn.data_blkaddr == NEW_ADDR)
  532. map->m_flags = F2FS_MAP_UNWRITTEN;
  533. }
  534. }
  535. map->m_flags |= F2FS_MAP_MAPPED;
  536. map->m_pblk = dn.data_blkaddr;
  537. map->m_len = 1;
  538. end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
  539. dn.ofs_in_node++;
  540. pgofs++;
  541. get_next:
  542. if (map->m_len >= maxblocks)
  543. goto sync_out;
  544. if (dn.ofs_in_node >= end_offset) {
  545. if (allocated)
  546. sync_inode_page(&dn);
  547. allocated = false;
  548. f2fs_put_dnode(&dn);
  549. if (create) {
  550. f2fs_unlock_op(sbi);
  551. f2fs_balance_fs(sbi, dn.node_changed);
  552. f2fs_lock_op(sbi);
  553. }
  554. set_new_dnode(&dn, inode, NULL, NULL, 0);
  555. err = get_dnode_of_data(&dn, pgofs, mode);
  556. if (err) {
  557. if (err == -ENOENT)
  558. err = 0;
  559. goto unlock_out;
  560. }
  561. end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
  562. }
  563. blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
  564. if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) {
  565. if (create) {
  566. if (unlikely(f2fs_cp_error(sbi))) {
  567. err = -EIO;
  568. goto sync_out;
  569. }
  570. err = __allocate_data_block(&dn);
  571. if (err)
  572. goto sync_out;
  573. allocated = true;
  574. map->m_flags |= F2FS_MAP_NEW;
  575. blkaddr = dn.data_blkaddr;
  576. } else {
  577. /*
  578. * we only merge preallocated unwritten blocks
  579. * for fiemap.
  580. */
  581. if (flag != F2FS_GET_BLOCK_FIEMAP ||
  582. blkaddr != NEW_ADDR)
  583. goto sync_out;
  584. }
  585. }
  586. /* Give more consecutive addresses for the readahead */
  587. if ((map->m_pblk != NEW_ADDR &&
  588. blkaddr == (map->m_pblk + ofs)) ||
  589. (map->m_pblk == NEW_ADDR &&
  590. blkaddr == NEW_ADDR)) {
  591. ofs++;
  592. dn.ofs_in_node++;
  593. pgofs++;
  594. map->m_len++;
  595. goto get_next;
  596. }
  597. sync_out:
  598. if (allocated)
  599. sync_inode_page(&dn);
  600. put_out:
  601. f2fs_put_dnode(&dn);
  602. unlock_out:
  603. if (create) {
  604. f2fs_unlock_op(sbi);
  605. f2fs_balance_fs(sbi, dn.node_changed);
  606. }
  607. out:
  608. trace_f2fs_map_blocks(inode, map, err);
  609. return err;
  610. }
  611. static int __get_data_block(struct inode *inode, sector_t iblock,
  612. struct buffer_head *bh, int create, int flag)
  613. {
  614. struct f2fs_map_blocks map;
  615. int ret;
  616. map.m_lblk = iblock;
  617. map.m_len = bh->b_size >> inode->i_blkbits;
  618. ret = f2fs_map_blocks(inode, &map, create, flag);
  619. if (!ret) {
  620. map_bh(bh, inode->i_sb, map.m_pblk);
  621. bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
  622. bh->b_size = map.m_len << inode->i_blkbits;
  623. }
  624. return ret;
  625. }
  626. static int get_data_block(struct inode *inode, sector_t iblock,
  627. struct buffer_head *bh_result, int create, int flag)
  628. {
  629. return __get_data_block(inode, iblock, bh_result, create, flag);
  630. }
  631. static int get_data_block_dio(struct inode *inode, sector_t iblock,
  632. struct buffer_head *bh_result, int create)
  633. {
  634. return __get_data_block(inode, iblock, bh_result, create,
  635. F2FS_GET_BLOCK_DIO);
  636. }
  637. static int get_data_block_bmap(struct inode *inode, sector_t iblock,
  638. struct buffer_head *bh_result, int create)
  639. {
  640. /* Block number less than F2FS MAX BLOCKS */
  641. if (unlikely(iblock >= F2FS_I_SB(inode)->max_file_blocks))
  642. return -EFBIG;
  643. return __get_data_block(inode, iblock, bh_result, create,
  644. F2FS_GET_BLOCK_BMAP);
  645. }
  646. static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
  647. {
  648. return (offset >> inode->i_blkbits);
  649. }
  650. static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
  651. {
  652. return (blk << inode->i_blkbits);
  653. }
  654. int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
  655. u64 start, u64 len)
  656. {
  657. struct buffer_head map_bh;
  658. sector_t start_blk, last_blk;
  659. loff_t isize;
  660. u64 logical = 0, phys = 0, size = 0;
  661. u32 flags = 0;
  662. int ret = 0;
  663. ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
  664. if (ret)
  665. return ret;
  666. if (f2fs_has_inline_data(inode)) {
  667. ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len);
  668. if (ret != -EAGAIN)
  669. return ret;
  670. }
  671. inode_lock(inode);
  672. isize = i_size_read(inode);
  673. if (start >= isize)
  674. goto out;
  675. if (start + len > isize)
  676. len = isize - start;
  677. if (logical_to_blk(inode, len) == 0)
  678. len = blk_to_logical(inode, 1);
  679. start_blk = logical_to_blk(inode, start);
  680. last_blk = logical_to_blk(inode, start + len - 1);
  681. next:
  682. memset(&map_bh, 0, sizeof(struct buffer_head));
  683. map_bh.b_size = len;
  684. ret = get_data_block(inode, start_blk, &map_bh, 0,
  685. F2FS_GET_BLOCK_FIEMAP);
  686. if (ret)
  687. goto out;
  688. /* HOLE */
  689. if (!buffer_mapped(&map_bh)) {
  690. /* Go through holes util pass the EOF */
  691. if (blk_to_logical(inode, start_blk++) < isize)
  692. goto prep_next;
  693. /* Found a hole beyond isize means no more extents.
  694. * Note that the premise is that filesystems don't
  695. * punch holes beyond isize and keep size unchanged.
  696. */
  697. flags |= FIEMAP_EXTENT_LAST;
  698. }
  699. if (size) {
  700. if (f2fs_encrypted_inode(inode))
  701. flags |= FIEMAP_EXTENT_DATA_ENCRYPTED;
  702. ret = fiemap_fill_next_extent(fieinfo, logical,
  703. phys, size, flags);
  704. }
  705. if (start_blk > last_blk || ret)
  706. goto out;
  707. logical = blk_to_logical(inode, start_blk);
  708. phys = blk_to_logical(inode, map_bh.b_blocknr);
  709. size = map_bh.b_size;
  710. flags = 0;
  711. if (buffer_unwritten(&map_bh))
  712. flags = FIEMAP_EXTENT_UNWRITTEN;
  713. start_blk += logical_to_blk(inode, size);
  714. prep_next:
  715. cond_resched();
  716. if (fatal_signal_pending(current))
  717. ret = -EINTR;
  718. else
  719. goto next;
  720. out:
  721. if (ret == 1)
  722. ret = 0;
  723. inode_unlock(inode);
  724. return ret;
  725. }
  726. /*
  727. * This function was originally taken from fs/mpage.c, and customized for f2fs.
  728. * Major change was from block_size == page_size in f2fs by default.
  729. */
  730. static int f2fs_mpage_readpages(struct address_space *mapping,
  731. struct list_head *pages, struct page *page,
  732. unsigned nr_pages)
  733. {
  734. struct bio *bio = NULL;
  735. unsigned page_idx;
  736. sector_t last_block_in_bio = 0;
  737. struct inode *inode = mapping->host;
  738. const unsigned blkbits = inode->i_blkbits;
  739. const unsigned blocksize = 1 << blkbits;
  740. sector_t block_in_file;
  741. sector_t last_block;
  742. sector_t last_block_in_file;
  743. sector_t block_nr;
  744. struct block_device *bdev = inode->i_sb->s_bdev;
  745. struct f2fs_map_blocks map;
  746. map.m_pblk = 0;
  747. map.m_lblk = 0;
  748. map.m_len = 0;
  749. map.m_flags = 0;
  750. for (page_idx = 0; nr_pages; page_idx++, nr_pages--) {
  751. prefetchw(&page->flags);
  752. if (pages) {
  753. page = list_entry(pages->prev, struct page, lru);
  754. list_del(&page->lru);
  755. if (add_to_page_cache_lru(page, mapping,
  756. page->index, GFP_KERNEL))
  757. goto next_page;
  758. }
  759. block_in_file = (sector_t)page->index;
  760. last_block = block_in_file + nr_pages;
  761. last_block_in_file = (i_size_read(inode) + blocksize - 1) >>
  762. blkbits;
  763. if (last_block > last_block_in_file)
  764. last_block = last_block_in_file;
  765. /*
  766. * Map blocks using the previous result first.
  767. */
  768. if ((map.m_flags & F2FS_MAP_MAPPED) &&
  769. block_in_file > map.m_lblk &&
  770. block_in_file < (map.m_lblk + map.m_len))
  771. goto got_it;
  772. /*
  773. * Then do more f2fs_map_blocks() calls until we are
  774. * done with this page.
  775. */
  776. map.m_flags = 0;
  777. if (block_in_file < last_block) {
  778. map.m_lblk = block_in_file;
  779. map.m_len = last_block - block_in_file;
  780. if (f2fs_map_blocks(inode, &map, 0,
  781. F2FS_GET_BLOCK_READ))
  782. goto set_error_page;
  783. }
  784. got_it:
  785. if ((map.m_flags & F2FS_MAP_MAPPED)) {
  786. block_nr = map.m_pblk + block_in_file - map.m_lblk;
  787. SetPageMappedToDisk(page);
  788. if (!PageUptodate(page) && !cleancache_get_page(page)) {
  789. SetPageUptodate(page);
  790. goto confused;
  791. }
  792. } else {
  793. zero_user_segment(page, 0, PAGE_CACHE_SIZE);
  794. SetPageUptodate(page);
  795. unlock_page(page);
  796. goto next_page;
  797. }
  798. /*
  799. * This page will go to BIO. Do we need to send this
  800. * BIO off first?
  801. */
  802. if (bio && (last_block_in_bio != block_nr - 1)) {
  803. submit_and_realloc:
  804. submit_bio(READ, bio);
  805. bio = NULL;
  806. }
  807. if (bio == NULL) {
  808. struct f2fs_crypto_ctx *ctx = NULL;
  809. if (f2fs_encrypted_inode(inode) &&
  810. S_ISREG(inode->i_mode)) {
  811. ctx = f2fs_get_crypto_ctx(inode);
  812. if (IS_ERR(ctx))
  813. goto set_error_page;
  814. /* wait the page to be moved by cleaning */
  815. f2fs_wait_on_encrypted_page_writeback(
  816. F2FS_I_SB(inode), block_nr);
  817. }
  818. bio = bio_alloc(GFP_KERNEL,
  819. min_t(int, nr_pages, BIO_MAX_PAGES));
  820. if (!bio) {
  821. if (ctx)
  822. f2fs_release_crypto_ctx(ctx);
  823. goto set_error_page;
  824. }
  825. bio->bi_bdev = bdev;
  826. bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(block_nr);
  827. bio->bi_end_io = f2fs_read_end_io;
  828. bio->bi_private = ctx;
  829. }
  830. if (bio_add_page(bio, page, blocksize, 0) < blocksize)
  831. goto submit_and_realloc;
  832. last_block_in_bio = block_nr;
  833. goto next_page;
  834. set_error_page:
  835. SetPageError(page);
  836. zero_user_segment(page, 0, PAGE_CACHE_SIZE);
  837. unlock_page(page);
  838. goto next_page;
  839. confused:
  840. if (bio) {
  841. submit_bio(READ, bio);
  842. bio = NULL;
  843. }
  844. unlock_page(page);
  845. next_page:
  846. if (pages)
  847. page_cache_release(page);
  848. }
  849. BUG_ON(pages && !list_empty(pages));
  850. if (bio)
  851. submit_bio(READ, bio);
  852. return 0;
  853. }
  854. static int f2fs_read_data_page(struct file *file, struct page *page)
  855. {
  856. struct inode *inode = page->mapping->host;
  857. int ret = -EAGAIN;
  858. trace_f2fs_readpage(page, DATA);
  859. /* If the file has inline data, try to read it directly */
  860. if (f2fs_has_inline_data(inode))
  861. ret = f2fs_read_inline_data(inode, page);
  862. if (ret == -EAGAIN)
  863. ret = f2fs_mpage_readpages(page->mapping, NULL, page, 1);
  864. return ret;
  865. }
  866. static int f2fs_read_data_pages(struct file *file,
  867. struct address_space *mapping,
  868. struct list_head *pages, unsigned nr_pages)
  869. {
  870. struct inode *inode = file->f_mapping->host;
  871. struct page *page = list_entry(pages->prev, struct page, lru);
  872. trace_f2fs_readpages(inode, page, nr_pages);
  873. /* If the file has inline data, skip readpages */
  874. if (f2fs_has_inline_data(inode))
  875. return 0;
  876. return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages);
  877. }
  878. int do_write_data_page(struct f2fs_io_info *fio)
  879. {
  880. struct page *page = fio->page;
  881. struct inode *inode = page->mapping->host;
  882. struct dnode_of_data dn;
  883. int err = 0;
  884. set_new_dnode(&dn, inode, NULL, NULL, 0);
  885. err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
  886. if (err)
  887. return err;
  888. fio->blk_addr = dn.data_blkaddr;
  889. /* This page is already truncated */
  890. if (fio->blk_addr == NULL_ADDR) {
  891. ClearPageUptodate(page);
  892. goto out_writepage;
  893. }
  894. if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
  895. /* wait for GCed encrypted page writeback */
  896. f2fs_wait_on_encrypted_page_writeback(F2FS_I_SB(inode),
  897. fio->blk_addr);
  898. fio->encrypted_page = f2fs_encrypt(inode, fio->page);
  899. if (IS_ERR(fio->encrypted_page)) {
  900. err = PTR_ERR(fio->encrypted_page);
  901. goto out_writepage;
  902. }
  903. }
  904. set_page_writeback(page);
  905. /*
  906. * If current allocation needs SSR,
  907. * it had better in-place writes for updated data.
  908. */
  909. if (unlikely(fio->blk_addr != NEW_ADDR &&
  910. !is_cold_data(page) &&
  911. !IS_ATOMIC_WRITTEN_PAGE(page) &&
  912. need_inplace_update(inode))) {
  913. rewrite_data_page(fio);
  914. set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE);
  915. trace_f2fs_do_write_data_page(page, IPU);
  916. } else {
  917. write_data_page(&dn, fio);
  918. set_data_blkaddr(&dn);
  919. f2fs_update_extent_cache(&dn);
  920. trace_f2fs_do_write_data_page(page, OPU);
  921. set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
  922. if (page->index == 0)
  923. set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN);
  924. }
  925. out_writepage:
  926. f2fs_put_dnode(&dn);
  927. return err;
  928. }
  929. static int f2fs_write_data_page(struct page *page,
  930. struct writeback_control *wbc)
  931. {
  932. struct inode *inode = page->mapping->host;
  933. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  934. loff_t i_size = i_size_read(inode);
  935. const pgoff_t end_index = ((unsigned long long) i_size)
  936. >> PAGE_CACHE_SHIFT;
  937. unsigned offset = 0;
  938. bool need_balance_fs = false;
  939. int err = 0;
  940. struct f2fs_io_info fio = {
  941. .sbi = sbi,
  942. .type = DATA,
  943. .rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
  944. .page = page,
  945. .encrypted_page = NULL,
  946. };
  947. trace_f2fs_writepage(page, DATA);
  948. if (page->index < end_index)
  949. goto write;
  950. /*
  951. * If the offset is out-of-range of file size,
  952. * this page does not have to be written to disk.
  953. */
  954. offset = i_size & (PAGE_CACHE_SIZE - 1);
  955. if ((page->index >= end_index + 1) || !offset)
  956. goto out;
  957. zero_user_segment(page, offset, PAGE_CACHE_SIZE);
  958. write:
  959. if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
  960. goto redirty_out;
  961. if (f2fs_is_drop_cache(inode))
  962. goto out;
  963. if (f2fs_is_volatile_file(inode) && !wbc->for_reclaim &&
  964. available_free_memory(sbi, BASE_CHECK))
  965. goto redirty_out;
  966. /* Dentry blocks are controlled by checkpoint */
  967. if (S_ISDIR(inode->i_mode)) {
  968. if (unlikely(f2fs_cp_error(sbi)))
  969. goto redirty_out;
  970. err = do_write_data_page(&fio);
  971. goto done;
  972. }
  973. /* we should bypass data pages to proceed the kworkder jobs */
  974. if (unlikely(f2fs_cp_error(sbi))) {
  975. SetPageError(page);
  976. goto out;
  977. }
  978. if (!wbc->for_reclaim)
  979. need_balance_fs = true;
  980. else if (has_not_enough_free_secs(sbi, 0))
  981. goto redirty_out;
  982. err = -EAGAIN;
  983. f2fs_lock_op(sbi);
  984. if (f2fs_has_inline_data(inode))
  985. err = f2fs_write_inline_data(inode, page);
  986. if (err == -EAGAIN)
  987. err = do_write_data_page(&fio);
  988. f2fs_unlock_op(sbi);
  989. done:
  990. if (err && err != -ENOENT)
  991. goto redirty_out;
  992. clear_cold_data(page);
  993. out:
  994. inode_dec_dirty_pages(inode);
  995. if (err)
  996. ClearPageUptodate(page);
  997. unlock_page(page);
  998. f2fs_balance_fs(sbi, need_balance_fs);
  999. if (wbc->for_reclaim || unlikely(f2fs_cp_error(sbi))) {
  1000. f2fs_submit_merged_bio(sbi, DATA, WRITE);
  1001. remove_dirty_inode(inode);
  1002. }
  1003. return 0;
  1004. redirty_out:
  1005. redirty_page_for_writepage(wbc, page);
  1006. return AOP_WRITEPAGE_ACTIVATE;
  1007. }
  1008. static int __f2fs_writepage(struct page *page, struct writeback_control *wbc,
  1009. void *data)
  1010. {
  1011. struct address_space *mapping = data;
  1012. int ret = mapping->a_ops->writepage(page, wbc);
  1013. mapping_set_error(mapping, ret);
  1014. return ret;
  1015. }
  1016. /*
  1017. * This function was copied from write_cche_pages from mm/page-writeback.c.
  1018. * The major change is making write step of cold data page separately from
  1019. * warm/hot data page.
  1020. */
  1021. static int f2fs_write_cache_pages(struct address_space *mapping,
  1022. struct writeback_control *wbc, writepage_t writepage,
  1023. void *data)
  1024. {
  1025. int ret = 0;
  1026. int done = 0;
  1027. struct pagevec pvec;
  1028. int nr_pages;
  1029. pgoff_t uninitialized_var(writeback_index);
  1030. pgoff_t index;
  1031. pgoff_t end; /* Inclusive */
  1032. pgoff_t done_index;
  1033. int cycled;
  1034. int range_whole = 0;
  1035. int tag;
  1036. int step = 0;
  1037. pagevec_init(&pvec, 0);
  1038. next:
  1039. if (wbc->range_cyclic) {
  1040. writeback_index = mapping->writeback_index; /* prev offset */
  1041. index = writeback_index;
  1042. if (index == 0)
  1043. cycled = 1;
  1044. else
  1045. cycled = 0;
  1046. end = -1;
  1047. } else {
  1048. index = wbc->range_start >> PAGE_CACHE_SHIFT;
  1049. end = wbc->range_end >> PAGE_CACHE_SHIFT;
  1050. if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
  1051. range_whole = 1;
  1052. cycled = 1; /* ignore range_cyclic tests */
  1053. }
  1054. if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
  1055. tag = PAGECACHE_TAG_TOWRITE;
  1056. else
  1057. tag = PAGECACHE_TAG_DIRTY;
  1058. retry:
  1059. if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
  1060. tag_pages_for_writeback(mapping, index, end);
  1061. done_index = index;
  1062. while (!done && (index <= end)) {
  1063. int i;
  1064. nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
  1065. min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1);
  1066. if (nr_pages == 0)
  1067. break;
  1068. for (i = 0; i < nr_pages; i++) {
  1069. struct page *page = pvec.pages[i];
  1070. if (page->index > end) {
  1071. done = 1;
  1072. break;
  1073. }
  1074. done_index = page->index;
  1075. lock_page(page);
  1076. if (unlikely(page->mapping != mapping)) {
  1077. continue_unlock:
  1078. unlock_page(page);
  1079. continue;
  1080. }
  1081. if (!PageDirty(page)) {
  1082. /* someone wrote it for us */
  1083. goto continue_unlock;
  1084. }
  1085. if (step == is_cold_data(page))
  1086. goto continue_unlock;
  1087. if (PageWriteback(page)) {
  1088. if (wbc->sync_mode != WB_SYNC_NONE)
  1089. f2fs_wait_on_page_writeback(page, DATA);
  1090. else
  1091. goto continue_unlock;
  1092. }
  1093. BUG_ON(PageWriteback(page));
  1094. if (!clear_page_dirty_for_io(page))
  1095. goto continue_unlock;
  1096. ret = (*writepage)(page, wbc, data);
  1097. if (unlikely(ret)) {
  1098. if (ret == AOP_WRITEPAGE_ACTIVATE) {
  1099. unlock_page(page);
  1100. ret = 0;
  1101. } else {
  1102. done_index = page->index + 1;
  1103. done = 1;
  1104. break;
  1105. }
  1106. }
  1107. if (--wbc->nr_to_write <= 0 &&
  1108. wbc->sync_mode == WB_SYNC_NONE) {
  1109. done = 1;
  1110. break;
  1111. }
  1112. }
  1113. pagevec_release(&pvec);
  1114. cond_resched();
  1115. }
  1116. if (step < 1) {
  1117. step++;
  1118. goto next;
  1119. }
  1120. if (!cycled && !done) {
  1121. cycled = 1;
  1122. index = 0;
  1123. end = writeback_index - 1;
  1124. goto retry;
  1125. }
  1126. if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
  1127. mapping->writeback_index = done_index;
  1128. return ret;
  1129. }
  1130. static int f2fs_write_data_pages(struct address_space *mapping,
  1131. struct writeback_control *wbc)
  1132. {
  1133. struct inode *inode = mapping->host;
  1134. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  1135. bool locked = false;
  1136. int ret;
  1137. long diff;
  1138. trace_f2fs_writepages(mapping->host, wbc, DATA);
  1139. /* deal with chardevs and other special file */
  1140. if (!mapping->a_ops->writepage)
  1141. return 0;
  1142. /* skip writing if there is no dirty page in this inode */
  1143. if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE)
  1144. return 0;
  1145. if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
  1146. get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
  1147. available_free_memory(sbi, DIRTY_DENTS))
  1148. goto skip_write;
  1149. /* skip writing during file defragment */
  1150. if (is_inode_flag_set(F2FS_I(inode), FI_DO_DEFRAG))
  1151. goto skip_write;
  1152. /* during POR, we don't need to trigger writepage at all. */
  1153. if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
  1154. goto skip_write;
  1155. diff = nr_pages_to_write(sbi, DATA, wbc);
  1156. if (!S_ISDIR(inode->i_mode)) {
  1157. mutex_lock(&sbi->writepages);
  1158. locked = true;
  1159. }
  1160. ret = f2fs_write_cache_pages(mapping, wbc, __f2fs_writepage, mapping);
  1161. f2fs_submit_merged_bio(sbi, DATA, WRITE);
  1162. if (locked)
  1163. mutex_unlock(&sbi->writepages);
  1164. remove_dirty_inode(inode);
  1165. wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
  1166. return ret;
  1167. skip_write:
  1168. wbc->pages_skipped += get_dirty_pages(inode);
  1169. return 0;
  1170. }
  1171. static void f2fs_write_failed(struct address_space *mapping, loff_t to)
  1172. {
  1173. struct inode *inode = mapping->host;
  1174. loff_t i_size = i_size_read(inode);
  1175. if (to > i_size) {
  1176. truncate_pagecache(inode, i_size);
  1177. truncate_blocks(inode, i_size, true);
  1178. }
  1179. }
  1180. static int prepare_write_begin(struct f2fs_sb_info *sbi,
  1181. struct page *page, loff_t pos, unsigned len,
  1182. block_t *blk_addr, bool *node_changed)
  1183. {
  1184. struct inode *inode = page->mapping->host;
  1185. pgoff_t index = page->index;
  1186. struct dnode_of_data dn;
  1187. struct page *ipage;
  1188. bool locked = false;
  1189. struct extent_info ei;
  1190. int err = 0;
  1191. if (f2fs_has_inline_data(inode) ||
  1192. (pos & PAGE_CACHE_MASK) >= i_size_read(inode)) {
  1193. f2fs_lock_op(sbi);
  1194. locked = true;
  1195. }
  1196. restart:
  1197. /* check inline_data */
  1198. ipage = get_node_page(sbi, inode->i_ino);
  1199. if (IS_ERR(ipage)) {
  1200. err = PTR_ERR(ipage);
  1201. goto unlock_out;
  1202. }
  1203. set_new_dnode(&dn, inode, ipage, ipage, 0);
  1204. if (f2fs_has_inline_data(inode)) {
  1205. if (pos + len <= MAX_INLINE_DATA) {
  1206. read_inline_data(page, ipage);
  1207. set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
  1208. sync_inode_page(&dn);
  1209. } else {
  1210. err = f2fs_convert_inline_page(&dn, page);
  1211. if (err)
  1212. goto out;
  1213. if (dn.data_blkaddr == NULL_ADDR)
  1214. err = f2fs_get_block(&dn, index);
  1215. }
  1216. } else if (locked) {
  1217. err = f2fs_get_block(&dn, index);
  1218. } else {
  1219. if (f2fs_lookup_extent_cache(inode, index, &ei)) {
  1220. dn.data_blkaddr = ei.blk + index - ei.fofs;
  1221. } else {
  1222. bool restart = false;
  1223. /* hole case */
  1224. err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
  1225. if (err || (!err && dn.data_blkaddr == NULL_ADDR))
  1226. restart = true;
  1227. if (restart) {
  1228. f2fs_put_dnode(&dn);
  1229. f2fs_lock_op(sbi);
  1230. locked = true;
  1231. goto restart;
  1232. }
  1233. }
  1234. }
  1235. /* convert_inline_page can make node_changed */
  1236. *blk_addr = dn.data_blkaddr;
  1237. *node_changed = dn.node_changed;
  1238. out:
  1239. f2fs_put_dnode(&dn);
  1240. unlock_out:
  1241. if (locked)
  1242. f2fs_unlock_op(sbi);
  1243. return err;
  1244. }
  1245. static int f2fs_write_begin(struct file *file, struct address_space *mapping,
  1246. loff_t pos, unsigned len, unsigned flags,
  1247. struct page **pagep, void **fsdata)
  1248. {
  1249. struct inode *inode = mapping->host;
  1250. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  1251. struct page *page = NULL;
  1252. pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT;
  1253. bool need_balance = false;
  1254. block_t blkaddr = NULL_ADDR;
  1255. int err = 0;
  1256. trace_f2fs_write_begin(inode, pos, len, flags);
  1257. /*
  1258. * We should check this at this moment to avoid deadlock on inode page
  1259. * and #0 page. The locking rule for inline_data conversion should be:
  1260. * lock_page(page #0) -> lock_page(inode_page)
  1261. */
  1262. if (index != 0) {
  1263. err = f2fs_convert_inline_inode(inode);
  1264. if (err)
  1265. goto fail;
  1266. }
  1267. repeat:
  1268. page = grab_cache_page_write_begin(mapping, index, flags);
  1269. if (!page) {
  1270. err = -ENOMEM;
  1271. goto fail;
  1272. }
  1273. *pagep = page;
  1274. err = prepare_write_begin(sbi, page, pos, len,
  1275. &blkaddr, &need_balance);
  1276. if (err)
  1277. goto fail;
  1278. if (need_balance && has_not_enough_free_secs(sbi, 0)) {
  1279. unlock_page(page);
  1280. f2fs_balance_fs(sbi, true);
  1281. lock_page(page);
  1282. if (page->mapping != mapping) {
  1283. /* The page got truncated from under us */
  1284. f2fs_put_page(page, 1);
  1285. goto repeat;
  1286. }
  1287. }
  1288. f2fs_wait_on_page_writeback(page, DATA);
  1289. /* wait for GCed encrypted page writeback */
  1290. if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
  1291. f2fs_wait_on_encrypted_page_writeback(sbi, blkaddr);
  1292. if (len == PAGE_CACHE_SIZE)
  1293. goto out_update;
  1294. if (PageUptodate(page))
  1295. goto out_clear;
  1296. if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) {
  1297. unsigned start = pos & (PAGE_CACHE_SIZE - 1);
  1298. unsigned end = start + len;
  1299. /* Reading beyond i_size is simple: memset to zero */
  1300. zero_user_segments(page, 0, start, end, PAGE_CACHE_SIZE);
  1301. goto out_update;
  1302. }
  1303. if (blkaddr == NEW_ADDR) {
  1304. zero_user_segment(page, 0, PAGE_CACHE_SIZE);
  1305. } else {
  1306. struct f2fs_io_info fio = {
  1307. .sbi = sbi,
  1308. .type = DATA,
  1309. .rw = READ_SYNC,
  1310. .blk_addr = blkaddr,
  1311. .page = page,
  1312. .encrypted_page = NULL,
  1313. };
  1314. err = f2fs_submit_page_bio(&fio);
  1315. if (err)
  1316. goto fail;
  1317. lock_page(page);
  1318. if (unlikely(!PageUptodate(page))) {
  1319. err = -EIO;
  1320. goto fail;
  1321. }
  1322. if (unlikely(page->mapping != mapping)) {
  1323. f2fs_put_page(page, 1);
  1324. goto repeat;
  1325. }
  1326. /* avoid symlink page */
  1327. if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
  1328. err = f2fs_decrypt_one(inode, page);
  1329. if (err)
  1330. goto fail;
  1331. }
  1332. }
  1333. out_update:
  1334. SetPageUptodate(page);
  1335. out_clear:
  1336. clear_cold_data(page);
  1337. return 0;
  1338. fail:
  1339. f2fs_put_page(page, 1);
  1340. f2fs_write_failed(mapping, pos + len);
  1341. return err;
  1342. }
  1343. static int f2fs_write_end(struct file *file,
  1344. struct address_space *mapping,
  1345. loff_t pos, unsigned len, unsigned copied,
  1346. struct page *page, void *fsdata)
  1347. {
  1348. struct inode *inode = page->mapping->host;
  1349. trace_f2fs_write_end(inode, pos, len, copied);
  1350. set_page_dirty(page);
  1351. if (pos + copied > i_size_read(inode)) {
  1352. i_size_write(inode, pos + copied);
  1353. mark_inode_dirty(inode);
  1354. update_inode_page(inode);
  1355. }
  1356. f2fs_put_page(page, 1);
  1357. f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
  1358. return copied;
  1359. }
  1360. static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
  1361. loff_t offset)
  1362. {
  1363. unsigned blocksize_mask = inode->i_sb->s_blocksize - 1;
  1364. if (offset & blocksize_mask)
  1365. return -EINVAL;
  1366. if (iov_iter_alignment(iter) & blocksize_mask)
  1367. return -EINVAL;
  1368. return 0;
  1369. }
  1370. static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
  1371. loff_t offset)
  1372. {
  1373. struct file *file = iocb->ki_filp;
  1374. struct address_space *mapping = file->f_mapping;
  1375. struct inode *inode = mapping->host;
  1376. size_t count = iov_iter_count(iter);
  1377. int err;
  1378. /* we don't need to use inline_data strictly */
  1379. err = f2fs_convert_inline_inode(inode);
  1380. if (err)
  1381. return err;
  1382. if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
  1383. return 0;
  1384. err = check_direct_IO(inode, iter, offset);
  1385. if (err)
  1386. return err;
  1387. trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
  1388. if (iov_iter_rw(iter) == WRITE) {
  1389. err = __allocate_data_blocks(inode, offset, count);
  1390. if (err)
  1391. goto out;
  1392. }
  1393. err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block_dio);
  1394. out:
  1395. if (err < 0 && iov_iter_rw(iter) == WRITE)
  1396. f2fs_write_failed(mapping, offset + count);
  1397. trace_f2fs_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), err);
  1398. return err;
  1399. }
  1400. void f2fs_invalidate_page(struct page *page, unsigned int offset,
  1401. unsigned int length)
  1402. {
  1403. struct inode *inode = page->mapping->host;
  1404. struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
  1405. if (inode->i_ino >= F2FS_ROOT_INO(sbi) &&
  1406. (offset % PAGE_CACHE_SIZE || length != PAGE_CACHE_SIZE))
  1407. return;
  1408. if (PageDirty(page)) {
  1409. if (inode->i_ino == F2FS_META_INO(sbi))
  1410. dec_page_count(sbi, F2FS_DIRTY_META);
  1411. else if (inode->i_ino == F2FS_NODE_INO(sbi))
  1412. dec_page_count(sbi, F2FS_DIRTY_NODES);
  1413. else
  1414. inode_dec_dirty_pages(inode);
  1415. }
  1416. /* This is atomic written page, keep Private */
  1417. if (IS_ATOMIC_WRITTEN_PAGE(page))
  1418. return;
  1419. ClearPagePrivate(page);
  1420. }
  1421. int f2fs_release_page(struct page *page, gfp_t wait)
  1422. {
  1423. /* If this is dirty page, keep PagePrivate */
  1424. if (PageDirty(page))
  1425. return 0;
  1426. /* This is atomic written page, keep Private */
  1427. if (IS_ATOMIC_WRITTEN_PAGE(page))
  1428. return 0;
  1429. ClearPagePrivate(page);
  1430. return 1;
  1431. }
  1432. static int f2fs_set_data_page_dirty(struct page *page)
  1433. {
  1434. struct address_space *mapping = page->mapping;
  1435. struct inode *inode = mapping->host;
  1436. trace_f2fs_set_page_dirty(page, DATA);
  1437. SetPageUptodate(page);
  1438. if (f2fs_is_atomic_file(inode)) {
  1439. if (!IS_ATOMIC_WRITTEN_PAGE(page)) {
  1440. register_inmem_page(inode, page);
  1441. return 1;
  1442. }
  1443. /*
  1444. * Previously, this page has been registered, we just
  1445. * return here.
  1446. */
  1447. return 0;
  1448. }
  1449. if (!PageDirty(page)) {
  1450. __set_page_dirty_nobuffers(page);
  1451. update_dirty_page(inode, page);
  1452. return 1;
  1453. }
  1454. return 0;
  1455. }
  1456. static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
  1457. {
  1458. struct inode *inode = mapping->host;
  1459. if (f2fs_has_inline_data(inode))
  1460. return 0;
  1461. /* make sure allocating whole blocks */
  1462. if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
  1463. filemap_write_and_wait(mapping);
  1464. return generic_block_bmap(mapping, block, get_data_block_bmap);
  1465. }
  1466. const struct address_space_operations f2fs_dblock_aops = {
  1467. .readpage = f2fs_read_data_page,
  1468. .readpages = f2fs_read_data_pages,
  1469. .writepage = f2fs_write_data_page,
  1470. .writepages = f2fs_write_data_pages,
  1471. .write_begin = f2fs_write_begin,
  1472. .write_end = f2fs_write_end,
  1473. .set_page_dirty = f2fs_set_data_page_dirty,
  1474. .invalidatepage = f2fs_invalidate_page,
  1475. .releasepage = f2fs_release_page,
  1476. .direct_IO = f2fs_direct_IO,
  1477. .bmap = f2fs_bmap,
  1478. };